fix all the compilation issues
This commit is contained in:
26
microgopt.go
26
microgopt.go
@@ -88,8 +88,8 @@ func Run(docs []string) {
|
|||||||
|
|
||||||
// "Let there be Adam, the blessed optimizer and its buffers"
|
// "Let there be Adam, the blessed optimizer and its buffers"
|
||||||
learningRate, beta1, beta2, epsAdam := 0.01, 0.85, 0.99, 1e-8
|
learningRate, beta1, beta2, epsAdam := 0.01, 0.85, 0.99, 1e-8
|
||||||
m := slices.Repeat([]float64{}, len(params)) // first moment buffer
|
m := make([]float64, len(params)) // first moment buffer
|
||||||
v := slices.Repeat([]float64{}, len(params)) // second moment buffer
|
v := make([]float64, len(params)) // second moment buffer
|
||||||
|
|
||||||
// Repeat in sequence
|
// Repeat in sequence
|
||||||
numSteps := 1000 // number of training steps
|
numSteps := 1000 // number of training steps
|
||||||
@@ -115,10 +115,9 @@ func Run(docs []string) {
|
|||||||
}
|
}
|
||||||
lossSum := &value{}
|
lossSum := &value{}
|
||||||
for _, l := range losses {
|
for _, l := range losses {
|
||||||
lossSum.Add(l)
|
lossSum = lossSum.Add(l)
|
||||||
}
|
}
|
||||||
loss := lossSum.Mul(&value{data: float64(1 / n)}) // final average loss over the document sequence. May yours be low.
|
loss := (&value{data: 1 / float64(n)}).Mul(lossSum) // final average loss over the document sequence. May yours be low.
|
||||||
|
|
||||||
// Backward the loss, calculating the gradients with respect to all model parameters
|
// Backward the loss, calculating the gradients with respect to all model parameters
|
||||||
loss.Backward()
|
loss.Backward()
|
||||||
|
|
||||||
@@ -132,12 +131,12 @@ func Run(docs []string) {
|
|||||||
p.data = p.data - (lrt*m_hat)/(math.Pow(v_hat, 0.5)+epsAdam)
|
p.data = p.data - (lrt*m_hat)/(math.Pow(v_hat, 0.5)+epsAdam)
|
||||||
p.grad = 0.0
|
p.grad = 0.0
|
||||||
}
|
}
|
||||||
fmt.Printf("step %4d / %4d | loss %.4f\n", step+1, numSteps, loss.data)
|
fmt.Printf("step %4d / %4d | loss %.4f\r", step+1, numSteps, loss.data)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Inference: may the model babble back to us
|
// Inference: may the model babble back to us
|
||||||
temperature := 0.5 // in (0, 1], control the "creativity" of generated text, low to high
|
temperature := 0.5 // in (0, 1], control the "creativity" of generated text, low to high
|
||||||
fmt.Println("--- inference (new, hallucinated names) ---")
|
fmt.Println("\n--- inference (new, hallucinated names) ---")
|
||||||
for sampleIdx := range 20 {
|
for sampleIdx := range 20 {
|
||||||
keys, values := mkDeepSlice(nLayer), mkDeepSlice(nLayer)
|
keys, values := mkDeepSlice(nLayer), mkDeepSlice(nLayer)
|
||||||
tokenId := BOS
|
tokenId := BOS
|
||||||
@@ -149,13 +148,13 @@ func Run(docs []string) {
|
|||||||
probs[i] = l.Div(&value{data: temperature})
|
probs[i] = l.Div(&value{data: temperature})
|
||||||
}
|
}
|
||||||
probs = softMax(probs)
|
probs = softMax(probs)
|
||||||
tokenId := RouletteDraw(probs)
|
tokenId := choose(probs)
|
||||||
if tokenId == BOS {
|
if tokenId == BOS {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
sample = append(sample, uchars[tokenId])
|
sample = append(sample, uchars[tokenId])
|
||||||
}
|
}
|
||||||
fmt.Printf("sample %2d: %s\n", sampleIdx, string(sample))
|
fmt.Printf("sample %2d: %s\n", sampleIdx+1, string(sample))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -173,9 +172,11 @@ func genMatrix(out, in int) [][]*value {
|
|||||||
func linear(x []*value, w [][]*value) []*value {
|
func linear(x []*value, w [][]*value) []*value {
|
||||||
r := []*value{}
|
r := []*value{}
|
||||||
for _, wo := range w {
|
for _, wo := range w {
|
||||||
|
s := &value{data: 0.0}
|
||||||
for i := range wo {
|
for i := range wo {
|
||||||
r = append(r, wo[i].Mul(x[i]))
|
s = s.Add(wo[i].Mul(x[i]))
|
||||||
}
|
}
|
||||||
|
r = append(r, s)
|
||||||
}
|
}
|
||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
@@ -391,14 +392,13 @@ func (v *value) Backward() {
|
|||||||
|
|
||||||
func mkDeepSlice(size int) [][][]*value {
|
func mkDeepSlice(size int) [][][]*value {
|
||||||
a := make([][][]*value, 1, 10)
|
a := make([][][]*value, 1, 10)
|
||||||
a[0] = make([][]*value, 1, 10)
|
a[0] = make([][]*value, 0, 10)
|
||||||
a[0][0] = make([]*value, 1, 10)
|
|
||||||
return a
|
return a
|
||||||
}
|
}
|
||||||
|
|
||||||
// implement our own weighted random chooser
|
// implement our own weighted random chooser
|
||||||
// based on https://cybernetist.com/2019/01/24/random-weighted-draws-in-go/ but without the dependency on gonum
|
// based on https://cybernetist.com/2019/01/24/random-weighted-draws-in-go/ but without the dependency on gonum
|
||||||
func RouletteDraw(p []*value) int {
|
func choose(p []*value) int {
|
||||||
// Initialization: create the discrete CDF
|
// Initialization: create the discrete CDF
|
||||||
cdf := make([]float64, len(p))
|
cdf := make([]float64, len(p))
|
||||||
for i, v := range p {
|
for i, v := range p {
|
||||||
|
|||||||
Reference in New Issue
Block a user