get rid of the use of lists for children/localgradient
This commit is contained in:
84
microgopt.go
84
microgopt.go
@@ -284,22 +284,19 @@ func gpt(tokenId int, posId int, keys [][][]*value, values [][][]*value) []*valu
|
|||||||
type value struct {
|
type value struct {
|
||||||
data float64
|
data float64
|
||||||
grad float64 // implicitly 0 to start
|
grad float64 // implicitly 0 to start
|
||||||
children []*value
|
lChild *value
|
||||||
localGrads []*value
|
rChild *value
|
||||||
}
|
lLocalGrad *value
|
||||||
|
rLocalGrad *value
|
||||||
// this lets us build a set-like map with our Values.
|
|
||||||
// If the slices were removed from the struct, that would make this method irrelevant.
|
|
||||||
func (v *value) toKey() string {
|
|
||||||
k := fmt.Sprintf("%+v", v)
|
|
||||||
return k
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (v *value) Add(other *value) *value {
|
func (v *value) Add(other *value) *value {
|
||||||
return &value{
|
return &value{
|
||||||
data: v.data + other.data,
|
data: v.data + other.data,
|
||||||
children: []*value{v, other},
|
lChild: v,
|
||||||
localGrads: []*value{{data: 1.0}, {data: 1.0}},
|
rChild: other,
|
||||||
|
lLocalGrad: &value{data: 1.0},
|
||||||
|
rLocalGrad: &value{data: 1.0},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -314,22 +311,20 @@ func (v *value) Div(other *value) *value {
|
|||||||
func (v *value) Mul(other *value) *value {
|
func (v *value) Mul(other *value) *value {
|
||||||
// note the swap here: children are stored as v, other but grads are other, v
|
// note the swap here: children are stored as v, other but grads are other, v
|
||||||
return &value{
|
return &value{
|
||||||
data: v.data * other.data,
|
data: v.data * other.data,
|
||||||
children: []*value{v, other},
|
rChild: v,
|
||||||
localGrads: []*value{
|
lChild: other,
|
||||||
{data: other.data},
|
rLocalGrad: &value{data: other.data},
|
||||||
{data: v.data},
|
lLocalGrad: &value{data: v.data},
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (v *value) Pow(other *value) *value {
|
func (v *value) Pow(other *value) *value {
|
||||||
return &value{
|
return &value{
|
||||||
data: math.Pow(v.data, other.data),
|
data: math.Pow(v.data, other.data),
|
||||||
children: []*value{v},
|
rChild: v,
|
||||||
localGrads: []*value{
|
rLocalGrad: other.Mul(&value{data: math.Pow(v.data, other.Sub(&value{data: 1}).data)}),
|
||||||
other.Mul(&value{data: math.Pow(v.data, other.Sub(&value{data: 1}).data)}),
|
}
|
||||||
}}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (v *value) Neg() *value {
|
func (v *value) Neg() *value {
|
||||||
@@ -338,45 +333,41 @@ func (v *value) Neg() *value {
|
|||||||
|
|
||||||
func (v *value) Log() *value {
|
func (v *value) Log() *value {
|
||||||
return &value{
|
return &value{
|
||||||
data: math.Log(v.data),
|
data: math.Log(v.data),
|
||||||
children: []*value{v},
|
rChild: v,
|
||||||
localGrads: []*value{
|
rLocalGrad: (&value{data: 1}).Div(v),
|
||||||
(&value{data: 1}).Div(v),
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (v *value) Exp() *value {
|
func (v *value) Exp() *value {
|
||||||
return &value{
|
return &value{
|
||||||
data: math.Exp(v.data),
|
data: math.Exp(v.data),
|
||||||
children: []*value{v},
|
rChild: v,
|
||||||
localGrads: []*value{
|
rLocalGrad: &value{data: math.Exp(v.data)},
|
||||||
{data: math.Exp(v.data)},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (v *value) Relu() *value {
|
func (v *value) Relu() *value {
|
||||||
return &value{
|
return &value{
|
||||||
data: max(v.data, 0),
|
data: max(v.data, 0),
|
||||||
children: []*value{v},
|
rChild: v,
|
||||||
localGrads: []*value{
|
rLocalGrad: &value{data: btof(v.data > 0)},
|
||||||
{data: btof(v.data > 0)},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (v *value) Backward() {
|
func (v *value) Backward() {
|
||||||
topo := []*value{}
|
topo := []*value{}
|
||||||
visited := map[string]struct{}{}
|
visited := map[*value]struct{}{}
|
||||||
|
|
||||||
var buildTopo func(v *value)
|
var buildTopo func(v *value)
|
||||||
buildTopo = func(v *value) {
|
buildTopo = func(v *value) {
|
||||||
k := v.toKey()
|
if _, ok := visited[v]; !ok {
|
||||||
if _, ok := visited[k]; !ok {
|
visited[v] = struct{}{}
|
||||||
visited[k] = struct{}{}
|
if v.rChild != nil {
|
||||||
for _, child := range v.children {
|
buildTopo(v.rChild)
|
||||||
buildTopo(child)
|
}
|
||||||
|
if v.lChild != nil {
|
||||||
|
buildTopo(v.lChild)
|
||||||
}
|
}
|
||||||
topo = append(topo, v)
|
topo = append(topo, v)
|
||||||
}
|
}
|
||||||
@@ -384,8 +375,11 @@ func (v *value) Backward() {
|
|||||||
buildTopo(v)
|
buildTopo(v)
|
||||||
v.grad = 1.0
|
v.grad = 1.0
|
||||||
for _, v := range slices.Backward(topo) {
|
for _, v := range slices.Backward(topo) {
|
||||||
for i := range v.children {
|
if v.rChild != nil {
|
||||||
v.children[i].grad += v.localGrads[i].data * v.grad
|
v.rChild.grad += v.rLocalGrad.data * v.grad
|
||||||
|
}
|
||||||
|
if v.lChild != nil {
|
||||||
|
v.lChild.grad += v.lLocalGrad.data * v.grad
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user