diff --git a/microgopt.go b/microgopt.go
index c0794a2..da04113 100644
--- a/microgopt.go
+++ b/microgopt.go
@@ -88,8 +88,8 @@ func Run(docs []string) {
 
 	// "Let there be Adam, the blessed optimizer and its buffers"
 	learningRate, beta1, beta2, epsAdam := 0.01, 0.85, 0.99, 1e-8
-	m := slices.Repeat([]float64{}, len(params)) // first moment buffer
-	v := slices.Repeat([]float64{}, len(params)) // second moment buffer
+	m := make([]float64, len(params)) // first moment buffer
+	v := make([]float64, len(params)) // second moment buffer
 
 	// Repeat in sequence
 	numSteps := 1000 // number of training steps
@@ -115,10 +115,9 @@ func Run(docs []string) {
 		}
 		lossSum := &value{}
 		for _, l := range losses {
-			lossSum.Add(l)
+			lossSum = lossSum.Add(l)
 		}
-		loss := lossSum.Mul(&value{data: float64(1 / n)}) // final average loss over the document sequence. May yours be low.
-
+		loss := (&value{data: 1 / float64(n)}).Mul(lossSum) // final average loss over the document sequence. May yours be low.
 		// Backward the loss, calculating the gradients with respect to all model parameters
 		loss.Backward()
 
@@ -132,12 +131,12 @@ func Run(docs []string) {
 			p.data = p.data - (lrt*m_hat)/(math.Pow(v_hat, 0.5)+epsAdam)
 			p.grad = 0.0
 		}
-		fmt.Printf("step %4d / %4d | loss %.4f\n", step+1, numSteps, loss.data)
+		fmt.Printf("step %4d / %4d | loss %.4f\r", step+1, numSteps, loss.data)
 	}
 
 	// Inference: may the model babble back to us
 	temperature := 0.5 // in (0, 1], control the "creativity" of generated text, low to high
-	fmt.Println("--- inference (new, hallucinated names) ---")
+	fmt.Println("\n--- inference (new, hallucinated names) ---")
 	for sampleIdx := range 20 {
 		keys, values := mkDeepSlice(nLayer), mkDeepSlice(nLayer)
 		tokenId := BOS
@@ -149,13 +148,13 @@ func Run(docs []string) {
 				probs[i] = l.Div(&value{data: temperature})
 			}
 			probs = softMax(probs)
-			tokenId := RouletteDraw(probs)
+			tokenId := choose(probs)
 			if tokenId == BOS {
 				break
 			}
 			sample = append(sample, uchars[tokenId])
 		}
-		fmt.Printf("sample %2d: %s\n", sampleIdx, string(sample))
+		fmt.Printf("sample %2d: %s\n", sampleIdx+1, string(sample))
 	}
 }
 
@@ -173,9 +172,11 @@ func genMatrix(out, in int) [][]*value {
 func linear(x []*value, w [][]*value) []*value {
 	r := []*value{}
 	for _, wo := range w {
+		s := &value{data: 0.0}
 		for i := range wo {
-			r = append(r, wo[i].Mul(x[i]))
+			s = s.Add(wo[i].Mul(x[i]))
 		}
+		r = append(r, s)
 	}
 	return r
 }
@@ -391,14 +392,13 @@ func (v *value) Backward() {
 
 func mkDeepSlice(size int) [][][]*value {
 	a := make([][][]*value, 1, 10)
-	a[0] = make([][]*value, 1, 10)
-	a[0][0] = make([]*value, 1, 10)
+	a[0] = make([][]*value, 0, 10)
 	return a
 }
 
 // implement our own weighted random chooser
 // based on https://cybernetist.com/2019/01/24/random-weighted-draws-in-go/ but without the dependency on gonum
-func RouletteDraw(p []*value) int {
+func choose(p []*value) int {
 	// Initialization: create the discrete CDF
 	cdf := make([]float64, len(p))
 	for i, v := range p {