diff --git a/microgopt.go b/microgopt.go index c0794a2..da04113 100644 --- a/microgopt.go +++ b/microgopt.go @@ -88,8 +88,8 @@ func Run(docs []string) { // "Let there be Adam, the blessed optimizer and its buffers" learningRate, beta1, beta2, epsAdam := 0.01, 0.85, 0.99, 1e-8 - m := slices.Repeat([]float64{}, len(params)) // first moment buffer - v := slices.Repeat([]float64{}, len(params)) // second moment buffer + m := make([]float64, len(params)) // first moment buffer + v := make([]float64, len(params)) // second moment buffer // Repeat in sequence numSteps := 1000 // number of training steps @@ -115,10 +115,9 @@ func Run(docs []string) { } lossSum := &value{} for _, l := range losses { - lossSum.Add(l) + lossSum = lossSum.Add(l) } - loss := lossSum.Mul(&value{data: float64(1 / n)}) // final average loss over the document sequence. May yours be low. - + loss := (&value{data: 1 / float64(n)}).Mul(lossSum) // final average loss over the document sequence. May yours be low. // Backward the loss, calculating the gradients with respect to all model parameters loss.Backward() @@ -132,12 +131,12 @@ func Run(docs []string) { p.data = p.data - (lrt*m_hat)/(math.Pow(v_hat, 0.5)+epsAdam) p.grad = 0.0 } - fmt.Printf("step %4d / %4d | loss %.4f\n", step+1, numSteps, loss.data) + fmt.Printf("step %4d / %4d | loss %.4f\r", step+1, numSteps, loss.data) } // Inference: may the model babble back to us temperature := 0.5 // in (0, 1], control the "creativity" of generated text, low to high - fmt.Println("--- inference (new, hallucinated names) ---") + fmt.Println("\n--- inference (new, hallucinated names) ---") for sampleIdx := range 20 { keys, values := mkDeepSlice(nLayer), mkDeepSlice(nLayer) tokenId := BOS @@ -149,13 +148,13 @@ func Run(docs []string) { probs[i] = l.Div(&value{data: temperature}) } probs = softMax(probs) - tokenId := RouletteDraw(probs) + tokenId := choose(probs) if tokenId == BOS { break } sample = append(sample, uchars[tokenId]) } - fmt.Printf("sample %2d: %s\n", sampleIdx, string(sample)) + fmt.Printf("sample %2d: %s\n", sampleIdx+1, string(sample)) } } @@ -173,9 +172,11 @@ func genMatrix(out, in int) [][]*value { func linear(x []*value, w [][]*value) []*value { r := []*value{} for _, wo := range w { + s := &value{data: 0.0} for i := range wo { - r = append(r, wo[i].Mul(x[i])) + s = s.Add(wo[i].Mul(x[i])) } + r = append(r, s) } return r } @@ -391,14 +392,13 @@ func (v *value) Backward() { func mkDeepSlice(size int) [][][]*value { a := make([][][]*value, 1, 10) - a[0] = make([][]*value, 1, 10) - a[0][0] = make([]*value, 1, 10) + a[0] = make([][]*value, 0, 10) return a } // implement our own weighted random chooser // based on https://cybernetist.com/2019/01/24/random-weighted-draws-in-go/ but without the dependency on gonum -func RouletteDraw(p []*value) int { +func choose(p []*value) int { // Initialization: create the discrete CDF cdf := make([]float64, len(p)) for i, v := range p {