get rid of the use of lists for children/localgradient

This commit is contained in:
2026-03-07 22:59:54 -05:00
parent 730b59797a
commit aabd0087d5

View File

@@ -284,22 +284,19 @@ func gpt(tokenId int, posId int, keys [][][]*value, values [][][]*value) []*valu
type value struct { type value struct {
data float64 data float64
grad float64 // implicitly 0 to start grad float64 // implicitly 0 to start
children []*value lChild *value
localGrads []*value rChild *value
} lLocalGrad *value
rLocalGrad *value
// this lets us build a set-like map with our Values.
// If the slices were removed from the struct, that would make this method irrelevant.
func (v *value) toKey() string {
k := fmt.Sprintf("%+v", v)
return k
} }
func (v *value) Add(other *value) *value { func (v *value) Add(other *value) *value {
return &value{ return &value{
data: v.data + other.data, data: v.data + other.data,
children: []*value{v, other}, lChild: v,
localGrads: []*value{{data: 1.0}, {data: 1.0}}, rChild: other,
lLocalGrad: &value{data: 1.0},
rLocalGrad: &value{data: 1.0},
} }
} }
@@ -314,22 +311,20 @@ func (v *value) Div(other *value) *value {
func (v *value) Mul(other *value) *value { func (v *value) Mul(other *value) *value {
// note the swap here: children are stored as v, other but grads are other, v // note the swap here: children are stored as v, other but grads are other, v
return &value{ return &value{
data: v.data * other.data, data: v.data * other.data,
children: []*value{v, other}, rChild: v,
localGrads: []*value{ lChild: other,
{data: other.data}, rLocalGrad: &value{data: other.data},
{data: v.data}, lLocalGrad: &value{data: v.data},
},
} }
} }
func (v *value) Pow(other *value) *value { func (v *value) Pow(other *value) *value {
return &value{ return &value{
data: math.Pow(v.data, other.data), data: math.Pow(v.data, other.data),
children: []*value{v}, rChild: v,
localGrads: []*value{ rLocalGrad: other.Mul(&value{data: math.Pow(v.data, other.Sub(&value{data: 1}).data)}),
other.Mul(&value{data: math.Pow(v.data, other.Sub(&value{data: 1}).data)}), }
}}
} }
func (v *value) Neg() *value { func (v *value) Neg() *value {
@@ -338,45 +333,41 @@ func (v *value) Neg() *value {
func (v *value) Log() *value { func (v *value) Log() *value {
return &value{ return &value{
data: math.Log(v.data), data: math.Log(v.data),
children: []*value{v}, rChild: v,
localGrads: []*value{ rLocalGrad: (&value{data: 1}).Div(v),
(&value{data: 1}).Div(v),
},
} }
} }
func (v *value) Exp() *value { func (v *value) Exp() *value {
return &value{ return &value{
data: math.Exp(v.data), data: math.Exp(v.data),
children: []*value{v}, rChild: v,
localGrads: []*value{ rLocalGrad: &value{data: math.Exp(v.data)},
{data: math.Exp(v.data)},
},
} }
} }
func (v *value) Relu() *value { func (v *value) Relu() *value {
return &value{ return &value{
data: max(v.data, 0), data: max(v.data, 0),
children: []*value{v}, rChild: v,
localGrads: []*value{ rLocalGrad: &value{data: btof(v.data > 0)},
{data: btof(v.data > 0)},
},
} }
} }
func (v *value) Backward() { func (v *value) Backward() {
topo := []*value{} topo := []*value{}
visited := map[string]struct{}{} visited := map[*value]struct{}{}
var buildTopo func(v *value) var buildTopo func(v *value)
buildTopo = func(v *value) { buildTopo = func(v *value) {
k := v.toKey() if _, ok := visited[v]; !ok {
if _, ok := visited[k]; !ok { visited[v] = struct{}{}
visited[k] = struct{}{} if v.rChild != nil {
for _, child := range v.children { buildTopo(v.rChild)
buildTopo(child) }
if v.lChild != nil {
buildTopo(v.lChild)
} }
topo = append(topo, v) topo = append(topo, v)
} }
@@ -384,8 +375,11 @@ func (v *value) Backward() {
buildTopo(v) buildTopo(v)
v.grad = 1.0 v.grad = 1.0
for _, v := range slices.Backward(topo) { for _, v := range slices.Backward(topo) {
for i := range v.children { if v.rChild != nil {
v.children[i].grad += v.localGrads[i].data * v.grad v.rChild.grad += v.rLocalGrad.data * v.grad
}
if v.lChild != nil {
v.lChild.grad += v.lLocalGrad.data * v.grad
} }
} }
} }