some cleanup

This commit is contained in:
2026-03-07 22:25:44 -05:00
parent b78f3c42a0
commit 2976011682
2 changed files with 4 additions and 3 deletions

View File

@@ -104,7 +104,7 @@ func Run(docs []string) {
n := min(blockSize, len(tokens)-1)
// Forward the token sequence through the model, building up the computation graph all the way to the loss
keys, values := mkDeepSlice(nLayer), mkDeepSlice(nLayer)
keys, values := mkDeepSlice(), mkDeepSlice()
losses := []*value{}
for posId := range n {
tokenId, targetId := tokens[posId], tokens[posId+1]
@@ -138,7 +138,7 @@ func Run(docs []string) {
temperature := 0.5 // in (0, 1], control the "creativity" of generated text, low to high
fmt.Println("\n--- inference (new, hallucinated names) ---")
for sampleIdx := range 20 {
keys, values := mkDeepSlice(nLayer), mkDeepSlice(nLayer)
keys, values := mkDeepSlice(), mkDeepSlice()
tokenId := BOS
sample := []rune{}
for posId := range blockSize {
@@ -390,7 +390,7 @@ func (v *value) Backward() {
}
}
func mkDeepSlice(size int) [][][]*value {
func mkDeepSlice() [][][]*value {
a := make([][][]*value, 1, 10)
a[0] = make([][]*value, 0, 10)
return a

View File

@@ -13,3 +13,4 @@ Differences between the Go and the Python, as well as notes more generally:
* The Value struct has actual tests confirming the backward propagation logic.
* When writing the Value struct and its methods, I accidentally swapped the order of the values in the `localGrads` slice in `Mul` and tore my hair out trying to figure out where the bug was. When I broke down and asked copilot to "compare these two implementations and tell me how they differ," it managed to find the error -- but also reported three non-existent differences and told me that `slices.Backward()` doesn't exist.
* Initial pass translating the linear algebra functions has me worried that all those value structs aren't going to be very fast...
* Had to implement weighted random choice. <https://cybernetist.com/2019/01/24/random-weighted-draws-in-go/> made that relatively straightforward; it's a neat algorithm.