diff --git a/microgopt.go b/microgopt.go index da04113..ed4198b 100644 --- a/microgopt.go +++ b/microgopt.go @@ -104,7 +104,7 @@ func Run(docs []string) { n := min(blockSize, len(tokens)-1) // Forward the token sequence through the model, building up the computation graph all the way to the loss - keys, values := mkDeepSlice(nLayer), mkDeepSlice(nLayer) + keys, values := mkDeepSlice(), mkDeepSlice() losses := []*value{} for posId := range n { tokenId, targetId := tokens[posId], tokens[posId+1] @@ -138,7 +138,7 @@ func Run(docs []string) { temperature := 0.5 // in (0, 1], control the "creativity" of generated text, low to high fmt.Println("\n--- inference (new, hallucinated names) ---") for sampleIdx := range 20 { - keys, values := mkDeepSlice(nLayer), mkDeepSlice(nLayer) + keys, values := mkDeepSlice(), mkDeepSlice() tokenId := BOS sample := []rune{} for posId := range blockSize { @@ -390,7 +390,7 @@ func (v *value) Backward() { } } -func mkDeepSlice(size int) [][][]*value { +func mkDeepSlice() [][][]*value { a := make([][][]*value, 1, 10) a[0] = make([][]*value, 0, 10) return a diff --git a/readme.md b/readme.md index b9a8f93..16ab3a8 100644 --- a/readme.md +++ b/readme.md @@ -13,3 +13,4 @@ Differences between the Go and the Python, as well as notes more generally: * The Value struct has actual tests confirming the backward propagation logic. * When writing the Value struct and its methods, I accidentally swapped the order of the values in the `localGrads` slice in `Mul` and tore my hair out trying to figure out where the bug was. When I broke down and asked copilot to "compare these two implementations and tell me how they differ," it managed to find the error -- but also reported three non-existent differences and told me that `slices.Backward()` doesn't exist. * Initial pass translating the linear algebra functions has me worried that all those value structs aren't going to be very fast... +* Had to implement weighted random choice. made that relatively straightforward; it's a neat algorithm.