some cleanup
This commit is contained in:
@@ -104,7 +104,7 @@ func Run(docs []string) {
|
|||||||
n := min(blockSize, len(tokens)-1)
|
n := min(blockSize, len(tokens)-1)
|
||||||
|
|
||||||
// Forward the token sequence through the model, building up the computation graph all the way to the loss
|
// Forward the token sequence through the model, building up the computation graph all the way to the loss
|
||||||
keys, values := mkDeepSlice(nLayer), mkDeepSlice(nLayer)
|
keys, values := mkDeepSlice(), mkDeepSlice()
|
||||||
losses := []*value{}
|
losses := []*value{}
|
||||||
for posId := range n {
|
for posId := range n {
|
||||||
tokenId, targetId := tokens[posId], tokens[posId+1]
|
tokenId, targetId := tokens[posId], tokens[posId+1]
|
||||||
@@ -138,7 +138,7 @@ func Run(docs []string) {
|
|||||||
temperature := 0.5 // in (0, 1], control the "creativity" of generated text, low to high
|
temperature := 0.5 // in (0, 1], control the "creativity" of generated text, low to high
|
||||||
fmt.Println("\n--- inference (new, hallucinated names) ---")
|
fmt.Println("\n--- inference (new, hallucinated names) ---")
|
||||||
for sampleIdx := range 20 {
|
for sampleIdx := range 20 {
|
||||||
keys, values := mkDeepSlice(nLayer), mkDeepSlice(nLayer)
|
keys, values := mkDeepSlice(), mkDeepSlice()
|
||||||
tokenId := BOS
|
tokenId := BOS
|
||||||
sample := []rune{}
|
sample := []rune{}
|
||||||
for posId := range blockSize {
|
for posId := range blockSize {
|
||||||
@@ -390,7 +390,7 @@ func (v *value) Backward() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func mkDeepSlice(size int) [][][]*value {
|
func mkDeepSlice() [][][]*value {
|
||||||
a := make([][][]*value, 1, 10)
|
a := make([][][]*value, 1, 10)
|
||||||
a[0] = make([][]*value, 0, 10)
|
a[0] = make([][]*value, 0, 10)
|
||||||
return a
|
return a
|
||||||
|
|||||||
@@ -13,3 +13,4 @@ Differences between the Go and the Python, as well as notes more generally:
|
|||||||
* The Value struct has actual tests confirming the backward propagation logic.
|
* The Value struct has actual tests confirming the backward propagation logic.
|
||||||
* When writing the Value struct and its methods, I accidentally swapped the order of the values in the `localGrads` slice in `Mul` and tore my hair out trying to figure out where the bug was. When I broke down and asked copilot to "compare these two implementations and tell me how they differ," it managed to find the error -- but also reported three non-existent differences and told me that `slices.Backward()` doesn't exist.
|
* When writing the Value struct and its methods, I accidentally swapped the order of the values in the `localGrads` slice in `Mul` and tore my hair out trying to figure out where the bug was. When I broke down and asked copilot to "compare these two implementations and tell me how they differ," it managed to find the error -- but also reported three non-existent differences and told me that `slices.Backward()` doesn't exist.
|
||||||
* Initial pass translating the linear algebra functions has me worried that all those value structs aren't going to be very fast...
|
* Initial pass translating the linear algebra functions has me worried that all those value structs aren't going to be very fast...
|
||||||
|
* Had to implement weighted random choice. <https://cybernetist.com/2019/01/24/random-weighted-draws-in-go/> made that relatively straightforward; it's a neat algorithm.
|
||||||
|
|||||||
Reference in New Issue
Block a user