From 327458e0c168624026d353417db4dd906659db6b Mon Sep 17 00:00:00 2001 From: Branden J Brown Date: Sun, 22 Dec 2024 17:40:39 -0500 Subject: [PATCH] brain/*: remove speaking Fixes #94. --- brain/brain.go | 5 - brain/braintest/braintest_test.go | 47 --- brain/builder.go | 50 ---- brain/builder_test.go | 116 -------- brain/kvbrain/speak.go | 120 -------- brain/kvbrain/speak_test.go | 143 ---------- brain/learn_test.go | 5 - brain/speak.go | 30 -- brain/speak_test.go | 4 - brain/sqlbrain/speak.go | 144 ---------- brain/sqlbrain/speak_test.go | 457 ------------------------------ 11 files changed, 1121 deletions(-) delete mode 100644 brain/builder.go delete mode 100644 brain/builder_test.go diff --git a/brain/brain.go b/brain/brain.go index 65238c7..d162a29 100644 --- a/brain/brain.go +++ b/brain/brain.go @@ -31,11 +31,6 @@ type Interface interface { // will be the same on each iteration and must not retain them. Think(ctx context.Context, tag string, prefix []string) iter.Seq[func(id, suf *[]byte) error] - // Speak generates a full message and appends it to w. - // - // The prompt is in reverse order and has entropy reduction applied. - Speak(ctx context.Context, tag string, prompt []string, w *Builder) error - // Forget forgets everything learned from a single given message. // If nothing has been learned from the message, it must prevent anything // from being learned from a message with that ID. diff --git a/brain/braintest/braintest_test.go b/brain/braintest/braintest_test.go index 18ef6cf..a64bac5 100644 --- a/brain/braintest/braintest_test.go +++ b/brain/braintest/braintest_test.go @@ -3,7 +3,6 @@ package braintest_test import ( "context" "iter" - "math/rand/v2" "slices" "strings" "sync" @@ -88,52 +87,6 @@ func (m *membrain) Think(ctx context.Context, tag string, prompt []string) iter. } } -func (m *membrain) Speak(ctx context.Context, tag string, prompt []string, w *brain.Builder) error { - m.mu.Lock() - defer m.mu.Unlock() - var s string - if len(prompt) == 0 { - u := slices.Clone(m.tups[tag].tups[""]) - d := 0 - for k, v := range u { - if m.tups[tag].forgort[v[0]] { - u[d], u[k] = u[k], u[d] - d++ - } - } - u = u[d:] - if len(u) == 0 { - return nil - } - t := u[rand.IntN(len(u))] - w.Append(t[0], []byte(t[1])) - s = brain.ReduceEntropy(t[1]) - } else { - s = brain.ReduceEntropy(prompt[len(prompt)-1]) - } - for range 256 { - u := slices.Clone(m.tups[tag].tups[s]) - d := 0 - for k, v := range u { - if m.tups[tag].forgort[v[0]] { - u[d], u[k] = u[k], u[d] - d++ - } - } - u = u[d:] - if len(u) == 0 { - break - } - t := u[rand.IntN(len(u))] - if t[1] == "" { - break - } - w.Append(t[0], []byte(t[1])) - s = brain.ReduceEntropy(t[1]) - } - return nil -} - func TestTests(t *testing.T) { braintest.Test(context.Background(), t, func(ctx context.Context) brain.Interface { return new(membrain) }) } diff --git a/brain/builder.go b/brain/builder.go deleted file mode 100644 index 547e011..0000000 --- a/brain/builder.go +++ /dev/null @@ -1,50 +0,0 @@ -package brain - -import "slices" - -// Builder builds a spoken message along with its message trace. -type Builder struct { - w []byte - id []string -} - -// Append adds a term to the builder. -func (b *Builder) Append(id string, term []byte) { - b.w = append(b.w, term...) - k, ok := slices.BinarySearch(b.id, id) - if !ok { - b.id = slices.Insert(b.id, k, id) - } -} - -// prompt adds a term without an ID. -func (b *Builder) prompt(term string) { - b.w = append(b.w, term...) -} - -// grow reserves sufficient space to append at least n bytes without reallocating. -func (b *Builder) grow(n int) { - if cap(b.w)-len(b.w) >= n { - return - } - t := make([]byte, len(b.w), len(b.w)+n) - copy(t, b.w) - b.w = t -} - -// String returns the built message. -func (b *Builder) String() string { - return string(b.w) -} - -// Trace returns a direct reference to the message trace. -func (b *Builder) Trace() []string { - return b.id -} - -// Reset restores the builder to an empty state. -func (b *Builder) Reset() { - b.w = b.w[:0] - clear(b.id) // allow held strings to release - b.id = b.id[:0] -} diff --git a/brain/builder_test.go b/brain/builder_test.go deleted file mode 100644 index 2eda322..0000000 --- a/brain/builder_test.go +++ /dev/null @@ -1,116 +0,0 @@ -package brain_test - -import ( - "math/rand/v2" - "slices" - "strconv" - "testing" - - "github.com/zephyrtronium/robot/brain" -) - -func TestBuilder(t *testing.T) { - cases := []struct { - name string - terms [][2]string - want string - trace []string - }{ - { - name: "empty", - terms: nil, - want: "", - trace: nil, - }, - { - name: "single", - terms: [][2]string{ - {"bocchi", "ryo"}, - }, - want: "ryo", - trace: []string{ - "bocchi", - }, - }, - { - name: "multi", - terms: [][2]string{ - {"bocchi", "ryo"}, - {"nijika", "kita"}, - }, - want: "ryokita", - trace: []string{ - "bocchi", - "nijika", - }, - }, - { - name: "order", - terms: [][2]string{ - {"nijika", "ryo"}, - {"bocchi", "kita"}, - }, - want: "ryokita", - trace: []string{ - "bocchi", - "nijika", - }, - }, - { - name: "dedup", - terms: [][2]string{ - {"bocchi", "ryo"}, - {"bocchi", "kita"}, - }, - want: "ryokita", - trace: []string{ - "bocchi", - }, - }, - } - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - var b brain.Builder - for _, t := range c.terms { - b.Append(t[0], []byte(t[1])) - } - got := b.String() - trace := b.Trace() - if got != c.want { - t.Errorf("wrong string: want %q, got %q", c.want, got) - } - if !slices.Equal(trace, c.trace) { - t.Errorf("wrong trace: want %q, got %q", c.trace, trace) - } - b.Reset() - got = b.String() - trace = b.Trace() - if got != "" { - t.Errorf("string %q not empty after reset", got) - } - if len(trace) != 0 { - t.Errorf("trace %q not empty after reset", trace) - } - }) - } -} - -func BenchmarkBuilder(b *testing.B) { - var ids [256]string - var words [256][]byte - for i := range ids { - ids[i] = strconv.FormatUint(rand.Uint64()>>(i/16), 2) - words[i] = []byte(ids[i]) - } - var m brain.Builder - b.ReportAllocs() - b.ResetTimer() - for range b.N { - m.Reset() - u := rand.Uint64() - m.Append(ids[byte(u>>0)], words[byte(u>>8)]) - m.Append(ids[byte(u>>16)], words[byte(u>>24)]) - m.Append(ids[byte(u>>32)], words[byte(u>>40)]) - m.Append(ids[byte(u>>48)], words[byte(u>>56)]) - } -} diff --git a/brain/kvbrain/speak.go b/brain/kvbrain/speak.go index 23374a3..590ae59 100644 --- a/brain/kvbrain/speak.go +++ b/brain/kvbrain/speak.go @@ -5,17 +5,10 @@ import ( "context" "fmt" "iter" - "math/rand/v2" "github.com/dgraph-io/badger/v4" - - "github.com/zephyrtronium/robot/brain" - "github.com/zephyrtronium/robot/deque" - "github.com/zephyrtronium/robot/tpool" ) -var prependerPool tpool.Pool[deque.Deque[string]] - func (br *Brain) Think(ctx context.Context, tag string, prompt []string) iter.Seq[func(id *[]byte, suf *[]byte) error] { return func(yield func(func(id *[]byte, suf *[]byte) error) bool) { erf := func(err error) { yield(func(id, suf *[]byte) error { return err }) } @@ -79,116 +72,3 @@ func (br *Brain) Think(ctx context.Context, tag string, prompt []string) iter.Se } } } - -// Speak generates a full message and appends it to w. -// The prompt is in reverse order and has entropy reduction applied. -func (br *Brain) Speak(ctx context.Context, tag string, prompt []string, w *brain.Builder) error { - search := prependerPool.Get().Prepend(prompt...) - defer func() { prependerPool.Put(search.Reset()) }() - - tb := hashTag(make([]byte, 0, tagHashLen), tag) - b := make([]byte, 0, 128) - var id string - opts := badger.DefaultIteratorOptions - // We don't actually need to iterate over values, only the single value - // that we decide to use per suffix. So, we can disable value prefetch. - opts.PrefetchValues = false - opts.Prefix = hashTag(nil, tag) - for range 1024 { - var err error - var l int - b = append(b[:0], tb...) - b, id, l, err = br.next(b, search.Slice(), opts) - if err != nil { - return err - } - if len(b) == 0 { - break - } - w.Append(id, b) - search = search.DropEnd(search.Len() - l - 1).Prepend(brain.ReduceEntropy(string(b))) - } - return nil -} - -// next finds a single token to continue a prompt. -// The returned values are, in order, -// b with its contents replaced with the new term, -// the ID of the message used for the term, -// the number of terms of the prompt which matched to produce the new term, -// and any error. -// If the returned term is the empty string, generation should end. -func (br *Brain) next(b []byte, prompt []string, opts badger.IteratorOptions) ([]byte, string, int, error) { - // These definitions are outside the loop to ensure we don't bias toward - // smaller contexts. - var ( - key []byte - skip brain.Skip - n uint64 - ) - b = appendPrefix(b, prompt) - if len(prompt) == 0 { - // If we have no prompt, then we want to make sure we select only - // options that start a message. - b = append(b, '\xff') - } - for { - var seen uint64 - err := br.knowledge.View(func(txn *badger.Txn) error { - it := txn.NewIterator(opts) - defer it.Close() - it.Seek(b) - for it.ValidForPrefix(b) { - if n == 0 { - item := it.Item() - // TODO(zeph): for #43, check deleted uuids so we never - // pick a message that has been deleted - key = item.KeyCopy(key[:0]) - n = skip.N(rand.Uint64(), rand.Uint64()) - } - it.Next() - n-- - seen++ - } - return nil - }) - if err != nil { - return nil, "", len(prompt), fmt.Errorf("couldn't read knowledge: %w", err) - } - // Try to lose context. - // We want to do so when we have a long context and almost no options, - // or at random with even a short context. - // Note that in the latter case we use a 1/2 chance; it seems high, but - // n.b. the caller will recover the last token that we discard. - if len(prompt) > 4 && seen <= 2 || len(prompt) > 2 && rand.Uint32()&1 == 0 { - // We haven't seen enough options, and we have context we could - // lose. Do so and try again from the beginning. - prompt = prompt[:len(prompt)-1] - b = appendPrefix(b[:tagHashLen], prompt) - continue - } - if key == nil { - // We never saw any options. Since we always select the first, this - // means there were no options. Don't look for nothing in the DB. - return b[:0], "", len(prompt), nil - } - err = br.knowledge.View(func(txn *badger.Txn) error { - item, err := txn.Get(key) - if err != nil { - return fmt.Errorf("couldn't get item for key %q: %w", key, err) - } - b, err = item.ValueCopy(b[:0]) - if err != nil { - return fmt.Errorf("couldn't get value for key %q: %w", key, err) - } - return nil - }) - // The id is everything after the first byte following the hash for - // empty prefixes, and everything after the first \xff\xff otherwise. - id := key[tagHashLen+1:] - if len(prompt) > 0 { - _, id, _ = bytes.Cut(key, []byte{0xff, 0xff}) - } - return b, string(id), len(prompt), err - } -} diff --git a/brain/kvbrain/speak_test.go b/brain/kvbrain/speak_test.go index 5453050..94a4017 100644 --- a/brain/kvbrain/speak_test.go +++ b/brain/kvbrain/speak_test.go @@ -3,7 +3,6 @@ package kvbrain import ( "context" "errors" - "maps" "slices" "testing" @@ -174,148 +173,6 @@ func TestThink(t *testing.T) { } } -func TestSpeak(t *testing.T) { - uu := ":)" - cases := []struct { - name string - kvs [][2]string - prompt []string - want []string - }{ - { - name: "empty", - kvs: nil, - want: []string{ - // Even with no thoughts head empty, we expect to get empty, - // non-error results when we speak. Our test currently records - // what it gets as a joined string for convenience, so we want - // an empty string in here, even though we really should be - // getting an empty slice. - "", - }, - }, - { - name: "single", - kvs: [][2]string{ - {mkey("kessoku", "\xff", uu), "bocchi "}, - {mkey("kessoku", "bocchi \xff\xff", uu), ""}, - }, - want: []string{ - "bocchi ", - }, - }, - { - name: "longer", - kvs: [][2]string{ - {mkey("kessoku", "\xff", uu), "bocchi "}, - {mkey("kessoku", "bocchi \xff\xff", uu), "ryou "}, - {mkey("kessoku", "ryou \xffbocchi \xff\xff", uu), "nijika "}, - {mkey("kessoku", "nijika \xffryou \xffbocchi \xff\xff", uu), "kita "}, - {mkey("kessoku", "kita \xffnijika \xffryou \xffbocchi \xff\xff", uu), ""}, - }, - want: []string{ - "bocchi ryou nijika kita ", - }, - }, - { - name: "entropy", - kvs: [][2]string{ - {mkey("kessoku", "\xff", uu), "BOCCHI "}, - {mkey("kessoku", "bocchi \xff\xff", uu), "RYOU "}, - {mkey("kessoku", "ryou \xffbocchi \xff\xff", uu), "NIJIKA "}, - {mkey("kessoku", "nijika \xffryou \xffbocchi \xff\xff", uu), "KITA "}, - {mkey("kessoku", "kita \xffnijika \xffryou \xffbocchi \xff\xff", uu), ""}, - }, - want: []string{ - "BOCCHI RYOU NIJIKA KITA ", - }, - }, - { - name: "prompted", - kvs: [][2]string{ - {mkey("kessoku", "\xff", uu), "bocchi "}, - {mkey("kessoku", "bocchi \xff\xff", uu), "ryou "}, - {mkey("kessoku", "ryou \xffbocchi \xff\xff", uu), "nijika "}, - {mkey("kessoku", "nijika \xffryou \xffbocchi \xff\xff", uu), "kita "}, - {mkey("kessoku", "kita \xffnijika \xffryou \xffbocchi \xff\xff", uu), ""}, - }, - prompt: []string{"bocchi "}, - want: []string{ - "ryou nijika kita ", - }, - }, - { - name: "prompted-entropy", - kvs: [][2]string{ - {mkey("kessoku", "\xff", uu), "BOCCHI "}, - {mkey("kessoku", "bocchi \xff\xff", uu), "RYOU "}, - {mkey("kessoku", "ryou \xffbocchi \xff\xff", uu), "NIJIKA "}, - {mkey("kessoku", "nijika \xffryou \xffbocchi \xff\xff", uu), "KITA "}, - {mkey("kessoku", "kita \xffnijika \xffryou \xffbocchi \xff\xff", uu), ""}, - }, - prompt: []string{"bocchi "}, - want: []string{ - "RYOU NIJIKA KITA ", - }, - }, - { - name: "uniform", - kvs: [][2]string{ - {mkey("kessoku", "\xff", "1"), "bocchi "}, - {mkey("kessoku", "bocchi \xff\xff", "1"), "ryou "}, - {mkey("kessoku", "ryou \xffbocchi \xff\xff", "1"), ""}, - {mkey("kessoku", "\xff", "2"), "bocchi "}, - {mkey("kessoku", "bocchi \xff\xff", "2"), "nijika "}, - {mkey("kessoku", "nijika \xffbocchi \xff\xff", "2"), ""}, - {mkey("kessoku", "\xff", "3"), "bocchi "}, - {mkey("kessoku", "bocchi \xff\xff", "3"), "kita "}, - {mkey("kessoku", "kita \xffbocchi \xff\xff", "3"), ""}, - }, - want: []string{ - "bocchi ryou ", - "bocchi nijika ", - "bocchi kita ", - }, - }, - // TODO(zeph): test tag isolation - } - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - t.Parallel() - ctx := context.Background() - db, err := badger.Open(badger.DefaultOptions("").WithInMemory(true).WithLogger(nil)) - if err != nil { - t.Fatal(err) - } - db.Update(func(txn *badger.Txn) error { - var err error - for _, item := range c.kvs { - err = errors.Join(err, txn.Set([]byte(item[0]), []byte(item[1]))) - } - return err - }) - br := New(db) - want := make(map[string]bool, len(c.want)) - for _, v := range c.want { - want[v] = true - } - got := make(map[string]bool, len(c.want)) - var w brain.Builder - for range 256 { - w.Reset() - err := br.Speak(ctx, "kessoku", slices.Clone(c.prompt), &w) - if err != nil { - t.Errorf("failed to speak: %v", err) - } - got[w.String()] = true - } - if !maps.Equal(want, got) { - t.Errorf("wrong results: want %v, got %v", want, got) - } - }) - } -} - func BenchmarkSpeak(b *testing.B) { new := func(ctx context.Context, b *testing.B) brain.Interface { db, err := badger.Open(badger.DefaultOptions(b.TempDir()).WithLogger(nil).WithCompression(options.None).WithBloomFalsePositive(1.0 / 32).WithNumMemtables(16).WithLevelSizeMultiplier(4)) diff --git a/brain/learn_test.go b/brain/learn_test.go index 50196a8..708f994 100644 --- a/brain/learn_test.go +++ b/brain/learn_test.go @@ -28,11 +28,6 @@ func (t *testLearner) Forget(ctx context.Context, tag, id string) error { return nil } -// Speak implements brain.Brain. -func (t *testLearner) Speak(ctx context.Context, tag string, prompt []string, w *brain.Builder) error { - panic("unimplemented") -} - // Think implements brain.Interface. func (t *testLearner) Think(ctx context.Context, tag string, prefix []string) iter.Seq[func(id *[]byte, suf *[]byte) error] { panic("unimplemented") diff --git a/brain/speak.go b/brain/speak.go index 781cc21..7f60a9e 100644 --- a/brain/speak.go +++ b/brain/speak.go @@ -6,7 +6,6 @@ import ( "fmt" "math/rand/v2" "slices" - "strings" "github.com/zephyrtronium/robot/deque" "github.com/zephyrtronium/robot/tpool" @@ -16,37 +15,8 @@ var ( tokensPool tpool.Pool[[]string] prependerPool tpool.Pool[deque.Deque[string]] bytesPool tpool.Pool[[]byte] - builderPool = tpool.Pool[*Builder]{New: func() any { return new(Builder) }} ) -// Speak produces a new message and the trace of messages used to form it -// from the given prompt. -// If the brain does not produce any terms, the result is the empty string -// regardless of the prompt, with no error. -func Speak(ctx context.Context, s Interface, tag, prompt string) (string, []string, error) { - w := builderPool.Get() - toks := tokens(tokensPool.Get(), prompt) - defer func() { - w.Reset() - builderPool.Put(w) - tokensPool.Put(toks[:0]) - }() - w.grow(len(prompt) + 1) - for i, t := range toks { - w.prompt(t) - toks[i] = ReduceEntropy(t) - } - slices.Reverse(toks) - err := s.Speak(ctx, tag, toks, w) - if err != nil { - return "", nil, fmt.Errorf("couldn't speak: %w", err) - } - if len(w.Trace()) == 0 { - return "", nil, nil - } - return strings.TrimSpace(w.String()), slices.Clone(w.Trace()), nil -} - // Think produces a new message and the trace of message IDs used to form it // from the given prompt. // If the brain does not produce any terms, the result is the empty string diff --git a/brain/speak_test.go b/brain/speak_test.go index ac69eea..cd6e801 100644 --- a/brain/speak_test.go +++ b/brain/speak_test.go @@ -47,10 +47,6 @@ func (t *testThinker) Think(ctx context.Context, tag string, prefix []string) it } } -func (t *testThinker) Speak(ctx context.Context, tag string, prompt []string, w *brain.Builder) error { - panic("TODO: remove") -} - // Forget implements brain.Brain. func (t *testThinker) Forget(ctx context.Context, tag string, id string) error { panic("unimplemented") diff --git a/brain/sqlbrain/speak.go b/brain/sqlbrain/speak.go index bddeb3a..036be63 100644 --- a/brain/sqlbrain/speak.go +++ b/brain/sqlbrain/speak.go @@ -4,17 +4,10 @@ import ( "context" "fmt" "iter" - "math/rand/v2" "zombiezen.com/go/sqlite" - - "github.com/zephyrtronium/robot/brain" - "github.com/zephyrtronium/robot/deque" - "github.com/zephyrtronium/robot/tpool" ) -var prependerPool tpool.Pool[deque.Deque[string]] - func (br *Brain) Think(ctx context.Context, tag string, prompt []string) iter.Seq[func(id *[]byte, suf *[]byte) error] { return func(yield func(func(id, suf *[]byte) error) bool) { erf := func(err error) { yield(func(id, suf *[]byte) error { return err }) } @@ -69,106 +62,6 @@ func bytecol(d []byte, s *sqlite.Stmt, col int) []byte { return d[:s.ColumnBytes(col, d[:n])] } -// Speak generates a full message and appends it to w. -// The prompt is in reverse order and has entropy reduction applied. -func (br *Brain) Speak(ctx context.Context, tag string, prompt []string, w *brain.Builder) error { - search := prependerPool.Get().Append("").Prepend(prompt...) - defer func() { prependerPool.Put(search.Reset()) }() - - conn, err := br.db.Take(ctx) - defer br.db.Put(conn) - if err != nil { - return fmt.Errorf("couldn't get connection to speak: %w", err) - } - - b := make([]byte, 0, 128) - for range 1024 { - var err error - var l int - var id string - b, id, l, err = next(conn, tag, b, search.Slice()) - if err != nil { - return err - } - if len(b) == 0 { - break - } - w.Append(id, b) - search = search.DropEnd(search.Len() - l - 1).Prepend(brain.ReduceEntropy(string(b))) - } - return nil -} - -func next(conn *sqlite.Conn, tag string, b []byte, prompt []string) ([]byte, string, int, error) { - var id string - if len(prompt) == 0 { - var err error - b, id, err = first(conn, tag, b) - return b, id, 0, err - } - st, err := conn.Prepare(`SELECT id, suffix FROM knowledge WHERE tag = :tag AND prefix >= :lower AND prefix < :upper AND LIKELY(deleted IS NULL)`) - if err != nil { - return b[:0], "", len(prompt), fmt.Errorf("couldn't prepare term selection: %w", err) - } - st.SetText(":tag", tag) - w := make([]byte, 0, 32) - var ( - d []byte - skip brain.Skip - t uint64 - ) - for { - var seen uint64 - b = prefix(b[:0], prompt) - b, d = searchbounds(b) - st.SetBytes(":lower", b) - st.SetBytes(":upper", d) - sel: - for { - for t > 0 { - ok, err := st.Step() - if err != nil { - return b[:0], "", len(prompt), fmt.Errorf("couldn't step term selection: %w", err) - } - if !ok { - break sel - } - seen++ - t-- - } - ok, err := st.Step() - if err != nil { - return b[:0], "", len(prompt), fmt.Errorf("couldn't step term selection: %w", err) - } - if !ok { - break - } - id = st.ColumnText(0) - n := st.ColumnLen(1) - if cap(w) < n { - w = make([]byte, n) - } - w = w[:st.ColumnBytes(1, w[:n])] - t = skip.N(rand.Uint64(), rand.Uint64()) - } - // Try to lose context. - // We want to do so when we have a long context and almost no options, - // or at random with even a short context. - // Note that in the latter case we use a 1/2 chance; it seems high, but - // n.b. the caller will recover the last token that we discard. - if len(prompt) > 4 && seen <= 2 || len(prompt) > 2 && rand.Uint32()&1 == 0 { - prompt = prompt[:len(prompt)-1] - if err := st.Reset(); err != nil { - return b[:0], "", len(prompt), fmt.Errorf("couldn't reset term selection: %w", err) - } - continue - } - // Note that this also handles the case where there were no results. - b = append(b[:0], w...) - return b, id, len(prompt), nil - } -} - // searchbounds produces the lower and upper bounds for a search by prefix. // The upper bound is always a slice of the lower bound's underlying array. func searchbounds(prefix []byte) (lower, upper []byte) { @@ -182,40 +75,3 @@ func searchbounds(prefix []byte) (lower, upper []byte) { } return lower, upper } - -func first(conn *sqlite.Conn, tag string, b []byte) ([]byte, string, error) { - var id string - b = b[:0] // in case we get no rows - s, err := conn.Prepare(`SELECT id, suffix FROM knowledge WHERE tag = :tag AND prefix = x'00' AND LIKELY(deleted IS NULL)`) - if err != nil { - return b, "", fmt.Errorf("couldn't prepare first term selection: %w", err) - } - s.SetText(":tag", tag) - var skip brain.Skip -sel: - for { - ok, err := s.Step() - if err != nil { - return b[:0], "", fmt.Errorf("couldn't step first term selection: %w", err) - } - if !ok { - break - } - id = s.ColumnText(0) - n := s.ColumnLen(1) - if cap(b) < n { - b = make([]byte, n) - } - b = b[:s.ColumnBytes(1, b[:n])] - for range skip.N(rand.Uint64(), rand.Uint64()) { - ok, err := s.Step() - if err != nil { - return b[:0], "", fmt.Errorf("couldn't step first term selection: %w", err) - } - if !ok { - break sel - } - } - } - return b, id, nil -} diff --git a/brain/sqlbrain/speak_test.go b/brain/sqlbrain/speak_test.go index 4885d00..d5f09ef 100644 --- a/brain/sqlbrain/speak_test.go +++ b/brain/sqlbrain/speak_test.go @@ -466,463 +466,6 @@ func TestThink(t *testing.T) { } } -func TestSpeak(t *testing.T) { - cases := []struct { - name string - know []know - tag string - prompt []string - want []string - }{ - { - name: "empty", - know: nil, - tag: "kessoku", - prompt: nil, - // We should only ever get nil from the brain, - // but that converts to the empty string. - want: []string{""}, - }, - { - name: "empty-tagged", - know: []know{ - { - tag: "kessoku", - prefix: "\x00", - suffix: "bocchi ", - }, - { - tag: "kessoku", - prefix: "bocchi \x00\x00", - suffix: "", - }, - }, - tag: "sickhack", - prompt: nil, - // We should only ever get nil from the brain, - // but that converts to the empty string. - want: []string{""}, - }, - { - name: "empty-prompted", - know: []know{ - { - tag: "kessoku", - prefix: "\x00", - suffix: "bocchi ", - }, - { - tag: "kessoku", - prefix: "bocchi \x00\x00", - suffix: "", - }, - }, - tag: "kessoku", - prompt: []string{"kikuri "}, - // We should only ever get nil from the brain, - // but that converts to the empty string. - want: []string{""}, - }, - { - name: "single", - know: []know{ - { - tag: "kessoku", - prefix: "\x00", - suffix: "bocchi ", - }, - { - tag: "kessoku", - prefix: "bocchi \x00\x00", - suffix: "", - }, - }, - tag: "kessoku", - prompt: nil, - want: []string{"bocchi "}, - }, - { - name: "several", - know: []know{ - { - tag: "kessoku", - prefix: "\x00", - suffix: "bocchi ", - }, - { - tag: "kessoku", - prefix: "bocchi \x00\x00", - suffix: "ryo ", - }, - { - tag: "kessoku", - prefix: "ryo \x00bocchi \x00\x00", - suffix: "nijika ", - }, - { - tag: "kessoku", - prefix: "nijika \x00ryo \x00bocchi \x00\x00", - suffix: "kita ", - }, - { - tag: "kessoku", - prefix: "kita \x00nijika \x00ryo \x00bocchi \x00\x00", - suffix: "", - }, - }, - tag: "kessoku", - prompt: nil, - want: []string{"bocchi ryo nijika kita "}, - }, - { - name: "multi", - know: []know{ - { - tag: "kessoku", - prefix: "\x00", - suffix: "member ", - }, - { - tag: "kessoku", - prefix: "member \x00\x00", - suffix: "bocchi ", - }, - { - tag: "kessoku", - prefix: "bocchi \x00member \x00\x00", - suffix: "", - }, - { - tag: "kessoku", - prefix: "\x00", - suffix: "member ", - }, - { - tag: "kessoku", - prefix: "member \x00\x00", - suffix: "ryo ", - }, - { - tag: "kessoku", - prefix: "ryo \x00member \x00\x00", - suffix: "", - }, - { - tag: "kessoku", - prefix: "\x00", - suffix: "member ", - }, - { - tag: "kessoku", - prefix: "member \x00\x00", - suffix: "nijika ", - }, - { - tag: "kessoku", - prefix: "nijika \x00member \x00\x00", - suffix: "", - }, - { - tag: "kessoku", - prefix: "\x00", - suffix: "member ", - }, - { - tag: "kessoku", - prefix: "member \x00\x00", - suffix: "kita ", - }, - { - tag: "kessoku", - prefix: "kita \x00member \x00\x00", - suffix: "", - }, - }, - tag: "kessoku", - prompt: nil, - want: []string{"member bocchi ", "member ryo ", "member nijika ", "member kita "}, - }, - { - name: "multi-tagged", - know: []know{ - { - tag: "kessoku", - prefix: "\x00", - suffix: "member ", - }, - { - tag: "kessoku", - prefix: "member \x00\x00", - suffix: "bocchi ", - }, - { - tag: "kessoku", - prefix: "bocchi \x00member \x00\x00", - suffix: "", - }, - { - tag: "kessoku", - prefix: "\x00", - suffix: "member ", - }, - { - tag: "kessoku", - prefix: "member \x00\x00", - suffix: "ryo ", - }, - { - tag: "kessoku", - prefix: "ryo \x00member \x00\x00", - suffix: "", - }, - { - tag: "kessoku", - prefix: "\x00", - suffix: "member ", - }, - { - tag: "kessoku", - prefix: "member \x00\x00", - suffix: "nijika ", - }, - { - tag: "kessoku", - prefix: "nijika \x00member \x00\x00", - suffix: "", - }, - { - tag: "kessoku", - prefix: "\x00", - suffix: "member ", - }, - { - tag: "kessoku", - prefix: "member \x00\x00", - suffix: "kita ", - }, - { - tag: "kessoku", - prefix: "kita \x00member \x00\x00", - suffix: "", - }, - { - tag: "sickhack", - prefix: "\x00", - suffix: "member ", - }, - { - tag: "sickhack", - prefix: "member \x00\x00", - suffix: "kikuri ", - }, - { - tag: "sickhack", - prefix: "kikuri \x00member \x00\x00", - suffix: "", - }, - { - tag: "sickhack", - prefix: "\x00", - suffix: "member ", - }, - { - tag: "sickhack", - prefix: "member \x00\x00", - suffix: "eliza ", - }, - { - tag: "sickhack", - prefix: "eliza \x00member \x00\x00", - suffix: "", - }, - { - tag: "sickhack", - prefix: "\x00", - suffix: "member ", - }, - { - tag: "sickhack", - prefix: "member \x00\x00", - suffix: "shima ", - }, - { - tag: "sickhack", - prefix: "shima \x00member \x00\x00", - suffix: "", - }, - }, - tag: "sickhack", - prompt: nil, - want: []string{"member kikuri ", "member eliza ", "member shima "}, - }, - { - name: "forgort", - know: []know{ - { - tag: "kessoku", - prefix: "", - suffix: "member", - deleted: ref("FORGET"), - }, - { - tag: "kessoku", - prefix: "member \x00\x00", - suffix: "bocchi ", - }, - { - tag: "kessoku", - prefix: "bocchi \x00member \x00\x00", - suffix: "", - }, - { - tag: "kessoku", - prefix: "\x00", - suffix: "member ", - }, - { - tag: "kessoku", - prefix: "member \x00\x00", - suffix: "ryo ", - }, - { - tag: "kessoku", - prefix: "ryo \x00member \x00\x00", - suffix: "", - }, - { - tag: "kessoku", - prefix: "\x00", - suffix: "member ", - }, - { - tag: "kessoku", - prefix: "member \x00\x00", - suffix: "nijika ", - }, - { - tag: "kessoku", - prefix: "nijika \x00member \x00\x00", - suffix: "", - }, - { - tag: "kessoku", - prefix: "\x00", - suffix: "member ", - }, - { - tag: "kessoku", - prefix: "member \x00", - suffix: "kita ", - deleted: ref("FORGET"), - }, - { - tag: "kessoku", - prefix: "kita \x00member \x00", - suffix: "", - deleted: ref("FORGET"), - }, - }, - tag: "kessoku", - prompt: nil, - want: []string{"member bocchi ", "member ryo ", "member nijika "}, - }, - } - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - t.Parallel() - ctx := context.Background() - db := testDB(ctx) - br, err := sqlbrain.Open(ctx, db) - if err != nil { - t.Fatalf("couldn't open brain: %v", err) - } - conn, err := db.Take(ctx) - defer db.Put(conn) - if err != nil { - t.Fatalf("couldn't get conn: %v", err) - } - insert(t, conn, c.know, nil) - slices.Sort(c.want) - got := make([]string, 0, len(c.want)) - var w brain.Builder - for range 10000 { - w.Reset() - err := br.Speak(ctx, c.tag, c.prompt, &w) - if err != nil { - t.Errorf("couldn't speak: %v", err) - } - s := w.String() - k, ok := slices.BinarySearch(got, s) - if !ok { - got = slices.Insert(got, k, s) - if len(got) == len(c.want) { - break - } - } - } - if !slices.Equal(c.want, got) { - t.Errorf("wrong results:\nwant %q\ngot %q", c.want, got) - } - }) - } -} - -func TestSpeakInitialContext(t *testing.T) { - // Test that the same token appearing at the beginning and end of - // non-singleton messages can't cause us to generate singletons. - t.Parallel() - ctx := context.Background() - db := testDB(ctx) - br, err := sqlbrain.Open(ctx, db) - if err != nil { - t.Fatalf("couldn't open brain: %v", err) - } - conn, err := db.Take(ctx) - defer db.Put(conn) - if err != nil { - t.Fatalf("couldn't get conn: %v", err) - } - k := []know{ - { - tag: "kessoku", - id: "1", - prefix: "\x00", - suffix: "bocchi ", - }, - { - tag: "kessoku", - id: "1", - prefix: "bocchi \x00\x00", - suffix: "ryo ", - }, - { - tag: "kessoku", - id: "1", - prefix: "ryo \x00bocchi \x00\x00", - suffix: "bocchi ", - }, - { - tag: "kessoku", - id: "1", - prefix: "bocchi \x00ryo \x00bocchi \x00\x00", - suffix: "", - }, - } - insert(t, conn, k, nil) - var w brain.Builder - for range 100 { - w.Reset() - err := br.Speak(ctx, "kessoku", nil, &w) - if err != nil { - t.Errorf("couldn't speak: %v", err) - } - s := w.String() - if want := "bocchi ryo bocchi "; s != want { - t.Errorf("wrong result: should always say %q but got %q", want, s) - } - } -} - func insert(t *testing.T, conn *sqlite.Conn, know []know, msgs []msg) { t.Helper() for _, v := range know {