From 5026f4f68e92fd3ac609f1e8f0d469ede7fa4fa7 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 12 Nov 2024 13:30:17 -0800 Subject: [PATCH 1/7] Add interfaces that allow for multiple implementations of prolly.Map, prolly.MutableMap, and tree.Map --- .../doltcore/doltdb/durable/artifact_index.go | 2 +- go/libraries/doltcore/doltdb/durable/index.go | 27 +++- go/libraries/doltcore/doltdb/durable/table.go | 6 +- .../doltcore/sqle/enginetest/validation.go | 16 +- .../doltcore/sqle/index/dolt_index.go | 2 +- .../sqle/writer/prolly_index_writer.go | 10 +- .../writer/prolly_index_writer_keyless.go | 4 +- .../sqle/writer/prolly_table_writer.go | 8 +- go/store/prolly/address_map.go | 2 +- go/store/prolly/artifact_map.go | 29 +++- go/store/prolly/commit_closure.go | 2 +- go/store/prolly/map_interface.go | 54 +++++++ go/store/prolly/shim/shim.go | 43 ++++-- go/store/prolly/tree/map.go | 22 ++- go/store/prolly/tree/mutable_map.go | 23 ++- go/store/prolly/tuple_map.go | 5 + go/store/prolly/tuple_mutable_map.go | 146 ++++++++++++------ 17 files changed, 297 insertions(+), 104 deletions(-) create mode 100644 go/store/prolly/map_interface.go diff --git a/go/libraries/doltcore/doltdb/durable/artifact_index.go b/go/libraries/doltcore/doltdb/durable/artifact_index.go index 0378bd6278f..f91bd636e5b 100644 --- a/go/libraries/doltcore/doltdb/durable/artifact_index.go +++ b/go/libraries/doltcore/doltdb/durable/artifact_index.go @@ -46,7 +46,7 @@ func RefFromArtifactIndex(ctx context.Context, vrw types.ValueReadWriter, idx Ar panic("TODO") case types.Format_DOLT: - b := shim.ValueFromArtifactMap(idx.(prollyArtifactIndex).index) + b := shim.ValueFromMap(idx.(prollyArtifactIndex).index) return refFromNomsValue(ctx, vrw, b) default: diff --git a/go/libraries/doltcore/doltdb/durable/index.go b/go/libraries/doltcore/doltdb/durable/index.go index 9897bf893be..bb022c863d3 100644 --- a/go/libraries/doltcore/doltdb/durable/index.go +++ b/go/libraries/doltcore/doltdb/durable/index.go @@ -87,7 +87,7 @@ func RefFromIndex(ctx context.Context, vrw types.ValueReadWriter, idx Index) (ty return refFromNomsValue(ctx, vrw, idx.(nomsIndex).index) case types.Format_DOLT: - b := shim.ValueFromMap(idx.(prollyIndex).index) + b := shim.ValueFromMap(MapFromIndex(idx)) return refFromNomsValue(ctx, vrw, b) default: @@ -112,11 +112,11 @@ func indexFromAddr(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeS return IndexFromNomsMap(v.(types.Map), vrw, ns), nil case types.Format_DOLT: - pm, err := shim.MapFromValue(v, sch, ns, isKeylessTable) + m, err := shim.MapInterfaceFromValue(v, sch, ns, isKeylessTable) if err != nil { return nil, err } - return IndexFromProllyMap(pm), nil + return IndexFromMapInterface(m), nil default: return nil, errNbfUnknown @@ -238,11 +238,32 @@ func ProllyMapFromIndex(i Index) prolly.Map { return i.(prollyIndex).index } +// MapFromIndex unwraps the Index and returns the underlying prolly.Map or prolly.ProximityMap. +func MapFromIndex(i Index) prolly.MapInterfaceWithMutable { + switch indexType := i.(type) { + case prollyIndex: + return indexType.index + } + return i.(prollyIndex).index +} + // IndexFromProllyMap wraps a prolly.Map and returns it as an Index. func IndexFromProllyMap(m prolly.Map) Index { return prollyIndex{index: m} } +// IndexFromMapInterface wraps a prolly.MapInterface and returns it as an Index. +func IndexFromMapInterface(m prolly.MapInterface) Index { + switch m := m.(type) { + case prolly.Map: + return IndexFromProllyMap(m) + case prolly.ProximityMap: + return IndexFromProximityMap(m) + default: + panic("unknown map type") + } +} + var _ Index = prollyIndex{} // HashOf implements Index. diff --git a/go/libraries/doltcore/doltdb/durable/table.go b/go/libraries/doltcore/doltdb/durable/table.go index 3d1637980f3..0e7977a6486 100644 --- a/go/libraries/doltcore/doltdb/durable/table.go +++ b/go/libraries/doltcore/doltdb/durable/table.go @@ -850,11 +850,11 @@ func (t doltDevTable) GetTableRows(ctx context.Context) (Index, error) { if err != nil { return nil, err } - m, err := shim.MapFromValue(types.SerialMessage(rowbytes), sch, t.ns, false) + m, err := shim.MapInterfaceFromValue(types.SerialMessage(rowbytes), sch, t.ns, false) if err != nil { return nil, err } - return IndexFromProllyMap(m), nil + return IndexFromMapInterface(m), nil } func (t doltDevTable) GetTableRowsWithDescriptors(ctx context.Context, kd, vd val.TupleDesc) (Index, error) { @@ -863,7 +863,7 @@ func (t doltDevTable) GetTableRowsWithDescriptors(ctx context.Context, kd, vd va if err != nil { return nil, err } - return IndexFromProllyMap(m), nil + return IndexFromMapInterface(m), nil } func (t doltDevTable) SetTableRows(ctx context.Context, rows Index) (Table, error) { diff --git a/go/libraries/doltcore/sqle/enginetest/validation.go b/go/libraries/doltcore/sqle/enginetest/validation.go index 6521743f292..28bed245cd1 100644 --- a/go/libraries/doltcore/sqle/enginetest/validation.go +++ b/go/libraries/doltcore/sqle/enginetest/validation.go @@ -67,8 +67,8 @@ var validationStages = []validator{ // validateChunkReferences checks for dangling chunks. func validateChunkReferences(ctx context.Context, db sqle.Database) error { validateIndex := func(ctx context.Context, idx durable.Index) error { - pm := durable.ProllyMapFromIndex(idx) - return pm.WalkNodes(ctx, func(ctx context.Context, nd tree.Node) error { + m := durable.MapFromIndex(idx) + return m.WalkNodes(ctx, func(ctx context.Context, nd tree.Node) error { if nd.Size() <= 0 { return fmt.Errorf("encountered nil tree.Node") } @@ -113,7 +113,7 @@ func validateSecondaryIndexes(ctx context.Context, db sqle.Database) error { if err != nil { return false, err } - primary := durable.ProllyMapFromIndex(rows) + primary := durable.MapFromIndex(rows) for _, def := range sch.Indexes().AllIndexes() { set, err := t.GetIndexSet(ctx) @@ -124,7 +124,7 @@ func validateSecondaryIndexes(ctx context.Context, db sqle.Database) error { if err != nil { return true, err } - secondary := durable.ProllyMapFromIndex(idx) + secondary := durable.MapFromIndex(idx) err = validateIndexConsistency(ctx, sch, def, primary, secondary) if err != nil { @@ -140,7 +140,7 @@ func validateIndexConsistency( ctx context.Context, sch schema.Schema, def schema.Index, - primary, secondary prolly.Map, + primary, secondary prolly.MapInterface, ) error { if schema.IsKeyless(sch) { return validateKeylessIndex(ctx, sch, def, primary, secondary) @@ -151,7 +151,7 @@ func validateIndexConsistency( // printIndexContents prints the contents of |prollyMap| to stdout. Intended for use debugging // index consistency issues. -func printIndexContents(ctx context.Context, prollyMap prolly.Map) { +func printIndexContents(ctx context.Context, prollyMap prolly.MapInterface) { fmt.Printf("Secondary index contents:\n") kd := prollyMap.KeyDesc() iterAll, _ := prollyMap.IterAll(ctx) @@ -164,7 +164,7 @@ func printIndexContents(ctx context.Context, prollyMap prolly.Map) { } } -func validateKeylessIndex(ctx context.Context, sch schema.Schema, def schema.Index, primary, secondary prolly.Map) error { +func validateKeylessIndex(ctx context.Context, sch schema.Schema, def schema.Index, primary, secondary prolly.MapInterface) error { // Full-Text indexes do not make use of their internal map, so we may safely skip this check if def.IsFullText() { return nil @@ -239,7 +239,7 @@ func validateKeylessIndex(ctx context.Context, sch schema.Schema, def schema.Ind } } -func validatePkIndex(ctx context.Context, sch schema.Schema, def schema.Index, primary, secondary prolly.Map) error { +func validatePkIndex(ctx context.Context, sch schema.Schema, def schema.Index, primary, secondary prolly.MapInterface) error { // Full-Text indexes do not make use of their internal map, so we may safely skip this check if def.IsFullText() { return nil diff --git a/go/libraries/doltcore/sqle/index/dolt_index.go b/go/libraries/doltcore/sqle/index/dolt_index.go index 67687bb1967..a519656c9e7 100644 --- a/go/libraries/doltcore/sqle/index/dolt_index.go +++ b/go/libraries/doltcore/sqle/index/dolt_index.go @@ -1095,7 +1095,7 @@ var sharePool = pool.NewBuffPool() func maybeGetKeyBuilder(idx durable.Index) *val.TupleBuilder { if types.IsFormat_DOLT(idx.Format()) { - kd, _ := durable.ProllyMapFromIndex(idx).Descriptors() + kd, _ := durable.MapFromIndex(idx).Descriptors() return val.NewTupleBuilder(kd) } return nil diff --git a/go/libraries/doltcore/sqle/writer/prolly_index_writer.go b/go/libraries/doltcore/sqle/writer/prolly_index_writer.go index 423b91cfcb6..af3928f877d 100644 --- a/go/libraries/doltcore/sqle/writer/prolly_index_writer.go +++ b/go/libraries/doltcore/sqle/writer/prolly_index_writer.go @@ -68,7 +68,7 @@ func getPrimaryKeylessProllyWriter(ctx context.Context, t *doltdb.Table, schStat type indexWriter interface { Name() string - Map(ctx context.Context) (prolly.Map, error) + Map(ctx context.Context) (prolly.MapInterface, error) ValidateKeyViolations(ctx context.Context, sqlRow sql.Row) error Insert(ctx context.Context, sqlRow sql.Row) error Delete(ctx context.Context, sqlRow sql.Row) error @@ -101,7 +101,7 @@ func (m prollyIndexWriter) Name() string { return "" } -func (m prollyIndexWriter) Map(ctx context.Context) (prolly.Map, error) { +func (m prollyIndexWriter) Map(ctx context.Context) (prolly.MapInterface, error) { return m.mut.Map(ctx) } @@ -247,7 +247,7 @@ func (m prollyIndexWriter) uniqueKeyError(ctx context.Context, keyStr string, ke type prollySecondaryIndexWriter struct { name string - mut *prolly.MutableMap + mut prolly.MutableMapInterface unique bool prefixLengths []uint16 @@ -273,8 +273,8 @@ func (m prollySecondaryIndexWriter) Name() string { return m.name } -func (m prollySecondaryIndexWriter) Map(ctx context.Context) (prolly.Map, error) { - return m.mut.Map(ctx) +func (m prollySecondaryIndexWriter) Map(ctx context.Context) (prolly.MapInterface, error) { + return m.mut.MapInterface(ctx) } func (m prollySecondaryIndexWriter) ValidateKeyViolations(ctx context.Context, sqlRow sql.Row) error { diff --git a/go/libraries/doltcore/sqle/writer/prolly_index_writer_keyless.go b/go/libraries/doltcore/sqle/writer/prolly_index_writer_keyless.go index 3a03399f0b6..83c438e2e61 100644 --- a/go/libraries/doltcore/sqle/writer/prolly_index_writer_keyless.go +++ b/go/libraries/doltcore/sqle/writer/prolly_index_writer_keyless.go @@ -41,7 +41,7 @@ func (k prollyKeylessWriter) Name() string { return k.name } -func (k prollyKeylessWriter) Map(ctx context.Context) (prolly.Map, error) { +func (k prollyKeylessWriter) Map(ctx context.Context) (prolly.MapInterface, error) { return k.mut.Map(ctx) } @@ -202,7 +202,7 @@ func (writer prollyKeylessSecondaryWriter) Name() string { } // Map implements the interface indexWriter. -func (writer prollyKeylessSecondaryWriter) Map(ctx context.Context) (prolly.Map, error) { +func (writer prollyKeylessSecondaryWriter) Map(ctx context.Context) (prolly.MapInterface, error) { return writer.mut.Map(ctx) } diff --git a/go/libraries/doltcore/sqle/writer/prolly_table_writer.go b/go/libraries/doltcore/sqle/writer/prolly_table_writer.go index 30c8ddf15ca..d7f05c14532 100644 --- a/go/libraries/doltcore/sqle/writer/prolly_table_writer.go +++ b/go/libraries/doltcore/sqle/writer/prolly_table_writer.go @@ -78,14 +78,14 @@ func getSecondaryProllyIndexWriters(ctx context.Context, t *doltdb.Table, schSta if err != nil { return nil, err } - idxMap := durable.ProllyMapFromIndex(idxRows) + idxMap := durable.MapFromIndex(idxRows) keyDesc, _ := idxMap.Descriptors() // mapping from secondary index key to primary key writers[defName] = prollySecondaryIndexWriter{ name: defName, - mut: idxMap.Mutate(), + mut: idxMap.MutateInterface(), unique: def.IsUnique, prefixLengths: def.PrefixLengths, idxCols: def.Count, @@ -337,7 +337,7 @@ func (w *prollyTableWriter) table(ctx context.Context) (t *doltdb.Table, err err return nil, err } - t, err = w.tbl.UpdateRows(ctx, durable.IndexFromProllyMap(pm)) + t, err = w.tbl.UpdateRows(ctx, durable.IndexFromMapInterface(pm)) if err != nil { return nil, err } @@ -353,7 +353,7 @@ func (w *prollyTableWriter) table(ctx context.Context) (t *doltdb.Table, err err if err != nil { return nil, err } - idx := durable.IndexFromProllyMap(sm) + idx := durable.IndexFromMapInterface(sm) s, err = s.PutIndex(ctx, wrSecondary.Name(), idx) if err != nil { diff --git a/go/store/prolly/address_map.go b/go/store/prolly/address_map.go index 3dffb18d133..7cae8d1a6f9 100644 --- a/go/store/prolly/address_map.go +++ b/go/store/prolly/address_map.go @@ -134,7 +134,7 @@ func (c AddressMap) Editor() AddressMapEditor { } type AddressMapEditor struct { - addresses tree.MutableMap[stringSlice, address, lexicographic] + addresses tree.MutableMap[stringSlice, address, lexicographic, tree.StaticMap[stringSlice, address, lexicographic]] } func (wr AddressMapEditor) Add(ctx context.Context, name string, addr hash.Hash) error { diff --git a/go/store/prolly/artifact_map.go b/go/store/prolly/artifact_map.go index c77dfad8c2d..05e63ef62c4 100644 --- a/go/store/prolly/artifact_map.go +++ b/go/store/prolly/artifact_map.go @@ -57,6 +57,20 @@ type ArtifactMap struct { valDesc val.TupleDesc } +func (m ArtifactMap) Has(ctx context.Context, key val.Tuple) (ok bool, err error) { + return m.tuples.Has(ctx, key) +} + +func (m ArtifactMap) ValDesc() val.TupleDesc { + return m.valDesc +} + +func (m ArtifactMap) KeyDesc() val.TupleDesc { + return m.keyDesc +} + +var _ MapInterface = (*ArtifactMap)(nil) + // NewArtifactMap creates an artifact map based on |srcKeyDesc| which is the key descriptor for // the corresponding row map. func NewArtifactMap(node tree.Node, ns tree.NodeStore, srcKeyDesc val.TupleDesc) ArtifactMap { @@ -169,8 +183,13 @@ func (m ArtifactMap) Editor() *ArtifactsEditor { } } -// IterAll returns an iterator for all artifacts. -func (m ArtifactMap) IterAll(ctx context.Context) (ArtifactIter, error) { +// IterAll returns an MapIter for all artifacts. +func (m ArtifactMap) IterAll(ctx context.Context) (MapIter, error) { + return m.tuples.IterAll(ctx) +} + +// IterAllArtifacts returns an iterator for all artifacts. +func (m ArtifactMap) IterAllArtifacts(ctx context.Context) (ArtifactIter, error) { numPks := m.srcKeyDesc.Count() tb := val.NewTupleBuilder(m.srcKeyDesc) itr, err := m.tuples.IterAll(ctx) @@ -279,7 +298,7 @@ func (m ArtifactMap) CountOfTypes(ctx context.Context, artTypes ...ArtifactType) } func (m ArtifactMap) iterAllOfType(ctx context.Context, artType ArtifactType) (artifactTypeIter, error) { - itr, err := m.IterAll(ctx) + itr, err := m.IterAllArtifacts(ctx) if err != nil { return artifactTypeIter{}, err } @@ -287,7 +306,7 @@ func (m ArtifactMap) iterAllOfType(ctx context.Context, artType ArtifactType) (a } func (m ArtifactMap) iterAllOfTypes(ctx context.Context, artTypes ...ArtifactType) (multiArtifactTypeItr, error) { - itr, err := m.IterAll(ctx) + itr, err := m.IterAllArtifacts(ctx) if err != nil { return multiArtifactTypeItr{}, err } @@ -429,7 +448,7 @@ func (wr *ArtifactsEditor) Flush(ctx context.Context) (ArtifactMap, error) { } return ArtifactMap{ - tuples: m.tuples, + tuples: m, srcKeyDesc: wr.srcKeyDesc, keyDesc: wr.mut.keyDesc, valDesc: wr.mut.valDesc, diff --git a/go/store/prolly/commit_closure.go b/go/store/prolly/commit_closure.go index 0835069b5b6..866d1f4b67a 100644 --- a/go/store/prolly/commit_closure.go +++ b/go/store/prolly/commit_closure.go @@ -148,7 +148,7 @@ func (c CommitClosure) AsHashSet(ctx context.Context) (hash.HashSet, error) { } type CommitClosureEditor struct { - closure tree.MutableMap[CommitClosureKey, CommitClosureValue, commitClosureKeyOrdering] + closure tree.MutableMap[CommitClosureKey, CommitClosureValue, commitClosureKeyOrdering, tree.StaticMap[CommitClosureKey, CommitClosureValue, commitClosureKeyOrdering]] } type CommitClosureKey []byte diff --git a/go/store/prolly/map_interface.go b/go/store/prolly/map_interface.go new file mode 100644 index 00000000000..c80559ed64c --- /dev/null +++ b/go/store/prolly/map_interface.go @@ -0,0 +1,54 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package prolly + +import ( + "context" + "github.com/dolthub/dolt/go/store/hash" + "github.com/dolthub/dolt/go/store/pool" + "github.com/dolthub/dolt/go/store/prolly/tree" + "github.com/dolthub/dolt/go/store/val" +) + +// MapInterface is a common interface for prolly-tree based maps. +type MapInterface interface { + Node() tree.Node + NodeStore() tree.NodeStore + Count() (int, error) + HashOf() hash.Hash + WalkNodes(ctx context.Context, cb tree.NodeCb) error + Descriptors() (val.TupleDesc, val.TupleDesc) + IterAll(ctx context.Context) (MapIter, error) + Pool() pool.BuffPool + Has(ctx context.Context, key val.Tuple) (ok bool, err error) + ValDesc() val.TupleDesc + KeyDesc() val.TupleDesc +} + +type MapInterfaceWithMutable interface { + MapInterface + MutateInterface() MutableMapInterface +} + +type MutableMapInterface interface { + NodeStore() tree.NodeStore + Put(ctx context.Context, key, value val.Tuple) error + Delete(ctx context.Context, key val.Tuple) error + Checkpoint(ctx context.Context) error + Revert(ctx context.Context) + HasEdits() bool + IterRange(ctx context.Context, rng Range) (MapIter, error) + MapInterface(ctx context.Context) (MapInterface, error) +} diff --git a/go/store/prolly/shim/shim.go b/go/store/prolly/shim/shim.go index 6ddc47d9cd6..2ebd29ecbe5 100644 --- a/go/store/prolly/shim/shim.go +++ b/go/store/prolly/shim/shim.go @@ -15,27 +15,29 @@ package shim import ( + "fmt" + "github.com/dolthub/dolt/go/gen/fb/serial" "github.com/dolthub/dolt/go/libraries/doltcore/schema" "github.com/dolthub/dolt/go/store/prolly" "github.com/dolthub/dolt/go/store/prolly/tree" "github.com/dolthub/dolt/go/store/types" "github.com/dolthub/dolt/go/store/val" + "github.com/dolthub/go-mysql-server/sql/expression/function/vector" ) func NodeFromValue(v types.Value) (tree.Node, error) { return tree.NodeFromBytes(v.(types.SerialMessage)) } -func ValueFromMap(m prolly.Map) types.Value { - return tree.ValueFromNode(m.Node()) -} - -func ValueFromArtifactMap(m prolly.ArtifactMap) types.Value { +func ValueFromMap(m prolly.MapInterface) types.Value { return tree.ValueFromNode(m.Node()) } func MapFromValue(v types.Value, sch schema.Schema, ns tree.NodeStore, isKeylessSecondary bool) (prolly.Map, error) { - root, err := NodeFromValue(v) + root, fileId, err := NodeFromValue(v) + if fileId == serial.VectorIndexNodeFileID { + return prolly.Map{}, fmt.Errorf("can't make a prolly.Map from a vector index node") + } if err != nil { return prolly.Map{}, err } @@ -47,10 +49,33 @@ func MapFromValue(v types.Value, sch schema.Schema, ns tree.NodeStore, isKeyless return prolly.NewMap(root, ns, kd, vd), nil } -func MapFromValueWithDescriptors(v types.Value, kd, vd val.TupleDesc, ns tree.NodeStore) (prolly.Map, error) { - root, err := NodeFromValue(v) +func MapInterfaceFromValue(v types.Value, sch schema.Schema, ns tree.NodeStore, isKeylessSecondary bool) (prolly.MapInterface, error) { + root, fileId, err := NodeFromValue(v) + if err != nil { + return nil, err + } + kd := sch.GetKeyDescriptor() + if isKeylessSecondary { + kd = prolly.AddHashToSchema(kd) + } + vd := sch.GetValueDescriptor() + switch fileId { + case serial.VectorIndexNodeFileID: + return prolly.NewProximityMap(nil, ns, root, kd, vd, vector.DistanceL2Squared{}), nil + default: + return prolly.NewMap(root, ns, kd, vd), nil + } +} + +func MapFromValueWithDescriptors(v types.Value, kd, vd val.TupleDesc, ns tree.NodeStore) (prolly.MapInterface, error) { + root, fileId, err := NodeFromValue(v) if err != nil { return prolly.Map{}, err } - return prolly.NewMap(root, ns, kd, vd), nil + switch fileId { + case serial.VectorIndexNodeFileID: + return prolly.NewProximityMap(nil, ns, root, kd, vd, vector.DistanceL2Squared{}), nil + default: + return prolly.NewMap(root, ns, kd, vd), nil + } } diff --git a/go/store/prolly/tree/map.go b/go/store/prolly/tree/map.go index ae87e1cda30..555d235d39e 100644 --- a/go/store/prolly/tree/map.go +++ b/go/store/prolly/tree/map.go @@ -37,6 +37,16 @@ type StaticMap[K, V ~[]byte, O Ordering[K]] struct { Order O } +type MapInterface[K, V ~[]byte, O Ordering[K]] interface { + Get(ctx context.Context, query K, cb KeyValueFn[K, V]) (err error) + GetPrefix(ctx context.Context, query K, prefixOrder O, cb KeyValueFn[K, V]) (err error) + Has(ctx context.Context, query K) (ok bool, err error) + HasPrefix(ctx context.Context, query K, prefixOrder O) (ok bool, err error) + GetRoot() Node + GetNodeStore() NodeStore + IterKeyRange(ctx context.Context, start, stop K) (*OrderedTreeIter[K, V], error) +} + // DiffOrderedTrees invokes `cb` for each difference between `from` and `to. If `considerAllRowsModified` // is true, then a key that exists in both trees will be considered a modification even if the bytes are the same. // This is used when `from` and `to` have different schemas. @@ -193,6 +203,14 @@ func VisitMapLevelOrder[K, V ~[]byte, O Ordering[K]]( return err } +func (t StaticMap[K, V, O]) GetRoot() Node { + return t.Root +} + +func (t StaticMap[K, V, O]) GetNodeStore() NodeStore { + return t.NodeStore +} + func (t StaticMap[K, V, O]) Count() (int, error) { return t.Root.TreeCount() } @@ -205,8 +223,8 @@ func (t StaticMap[K, V, O]) HashOf() hash.Hash { return t.Root.HashOf() } -func (t StaticMap[K, V, O]) Mutate() MutableMap[K, V, O] { - return MutableMap[K, V, O]{ +func (t StaticMap[K, V, O]) Mutate() MutableMap[K, V, O, StaticMap[K, V, O]] { + return MutableMap[K, V, O, StaticMap[K, V, O]]{ Edits: skip.NewSkipList(func(left, right []byte) int { return t.Order.Compare(left, right) }), diff --git a/go/store/prolly/tree/mutable_map.go b/go/store/prolly/tree/mutable_map.go index d5456f6d73c..8fd7598f51a 100644 --- a/go/store/prolly/tree/mutable_map.go +++ b/go/store/prolly/tree/mutable_map.go @@ -16,27 +16,26 @@ package tree import ( "context" - "github.com/dolthub/dolt/go/store/skip" ) // MutableMap is a mutable prolly Static with ordered elements. -type MutableMap[K, V ~[]byte, O Ordering[K]] struct { +type MutableMap[K, V ~[]byte, O Ordering[K], M MapInterface[K, V, O]] struct { Edits *skip.List - Static StaticMap[K, V, O] + Static M } -func (m MutableMap[K, V, O]) Put(_ context.Context, key K, value V) error { +func (m MutableMap[K, V, O, M]) Put(_ context.Context, key K, value V) error { m.Edits.Put(key, value) return nil } -func (m MutableMap[K, V, O]) Delete(_ context.Context, key K) error { +func (m MutableMap[K, V, O, M]) Delete(_ context.Context, key K) error { m.Edits.Put(key, nil) return nil } -func (m MutableMap[K, V, O]) Get(ctx context.Context, key K, cb KeyValueFn[K, V]) (err error) { +func (m MutableMap[K, V, O, M]) Get(ctx context.Context, key K, cb KeyValueFn[K, V]) (err error) { value, ok := m.Edits.Get(key) if ok { if value == nil { @@ -48,7 +47,7 @@ func (m MutableMap[K, V, O]) Get(ctx context.Context, key K, cb KeyValueFn[K, V] return m.Static.Get(ctx, key, cb) } -func (m MutableMap[K, V, O]) GetPrefix(ctx context.Context, key K, prefixOrder O, cb KeyValueFn[K, V]) (err error) { +func (m MutableMap[K, V, O, M]) GetPrefix(ctx context.Context, key K, prefixOrder O, cb KeyValueFn[K, V]) (err error) { iter := m.Edits.GetIterFromSeekFn(func(k []byte) (advance bool) { if k != nil { // seek until |k| >= |key| advance = prefixOrder.Compare(k, key) < 0 @@ -65,7 +64,7 @@ func (m MutableMap[K, V, O]) GetPrefix(ctx context.Context, key K, prefixOrder O return m.Static.GetPrefix(ctx, key, prefixOrder, cb) } -func (m MutableMap[K, V, O]) Has(ctx context.Context, key K) (present bool, err error) { +func (m MutableMap[K, V, O, M]) Has(ctx context.Context, key K) (present bool, err error) { value, ok := m.Edits.Get(key) if ok { present = value != nil @@ -74,7 +73,7 @@ func (m MutableMap[K, V, O]) Has(ctx context.Context, key K) (present bool, err return m.Static.Has(ctx, key) } -func (m MutableMap[K, V, O]) HasPrefix(ctx context.Context, key K, prefixOrder O) (present bool, err error) { +func (m MutableMap[K, V, O, M]) HasPrefix(ctx context.Context, key K, prefixOrder O) (present bool, err error) { iter := m.Edits.GetIterFromSeekFn(func(k []byte) (advance bool) { if k != nil { // seek until |k| >= |key| advance = prefixOrder.Compare(k, key) < 0 @@ -89,14 +88,14 @@ func (m MutableMap[K, V, O]) HasPrefix(ctx context.Context, key K, prefixOrder O return m.Static.HasPrefix(ctx, key, prefixOrder) } -func (m MutableMap[K, V, O]) Copy() MutableMap[K, V, O] { - return MutableMap[K, V, O]{ +func (m MutableMap[K, V, O, M]) Copy() MutableMap[K, V, O, M] { + return MutableMap[K, V, O, M]{ Edits: m.Edits.Copy(), Static: m.Static, } } -func (m MutableMap[K, V, O]) Mutations() MutationIter { +func (m MutableMap[K, V, O, M]) Mutations() MutationIter { return orderedListIter[K, V]{iter: m.Edits.IterAtStart()} } diff --git a/go/store/prolly/tuple_map.go b/go/store/prolly/tuple_map.go index bdca03c622c..1f2243544af 100644 --- a/go/store/prolly/tuple_map.go +++ b/go/store/prolly/tuple_map.go @@ -213,6 +213,11 @@ func (m Map) Mutate() *MutableMap { return newMutableMap(m) } +// MutateInterface makes a MutableMap from a Map. +func (m Map) MutateInterface() MutableMapInterface { + return newMutableMap(m) +} + // Rewriter returns a mutator that intends to rewrite this map with the key and value descriptors provided. func (m Map) Rewriter(kd, vd val.TupleDesc) *MutableMap { return newMutableMapWithDescriptors(m, kd, vd) diff --git a/go/store/prolly/tuple_mutable_map.go b/go/store/prolly/tuple_mutable_map.go index a90f9439bfb..2a65cb06a59 100644 --- a/go/store/prolly/tuple_mutable_map.go +++ b/go/store/prolly/tuple_mutable_map.go @@ -32,22 +32,42 @@ const ( // MutableMap is an ordered collection of val.Tuple backed by a Prolly Tree. // Writes to the map are queued in a skip.List and periodically flushed when // the maximum number of pending writes is exceeded. -type MutableMap struct { + +type GenericMutableMap[MapType MapInterface, TreeMap tree.MapInterface[val.Tuple, val.Tuple, val.TupleDesc]] struct { // tuples contains the primary Prolly Tree and skip.List for this map. - tuples tree.MutableMap[val.Tuple, val.Tuple, val.TupleDesc] + tuples tree.MutableMap[val.Tuple, val.Tuple, val.TupleDesc, TreeMap] // stash, if not nil, contains a previous checkpoint of this map. // stashes are created when a MutableMap has been check-pointed, but // the number of in-memory pending writes exceeds, maxPending. // In this case we stash a copy MutableMap containing the checkpoint, // flush the pending writes and continue accumulating - stash *tree.MutableMap[val.Tuple, val.Tuple, val.TupleDesc] + stash *tree.MutableMap[val.Tuple, val.Tuple, val.TupleDesc, TreeMap] // keyDesc and valDesc are tuples descriptors for the map. keyDesc, valDesc val.TupleDesc // buffer size maxPending int + flusher MutableMapFlusher[MapType, TreeMap] +} + +type MutableMap = GenericMutableMap[Map, tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]] + +// MapInterface materializes all pending and applied mutations in the GenericMutableMap, producing the resulting MapInterface. +func (mut *GenericMutableMap[M, T]) MapInterface(ctx context.Context) (MapInterface, error) { + return mut.Map(ctx) +} + +// TreeMap materializes all pending and applied mutations in the GenericMutableMap, producing the resulting tree.MapInterface. +func (mut *GenericMutableMap[M, T]) TreeMap(ctx context.Context) (T, error) { + return mut.flusher.ApplyMutations(ctx, mut.NodeStore(), mut.tuples.Static.GetRoot(), mut.keyDesc, mut.tuples.Mutations()) +} + +// Map materializes all pending and applied mutations in the GenericMutableMap, producing the specific MapInterface implementation +// that the struct has been specialized with. +func (mut *GenericMutableMap[M, T]) Map(ctx context.Context) (M, error) { + return mut.flusher.Map(ctx, mut) } // newMutableMap returns a new MutableMap. @@ -57,6 +77,7 @@ func newMutableMap(m Map) *MutableMap { keyDesc: m.keyDesc, valDesc: m.valDesc, maxPending: defaultMaxPending, + flusher: ProllyFlusher{}, } } @@ -68,49 +89,28 @@ func newMutableMapWithDescriptors(m Map, kd, vd val.TupleDesc) *MutableMap { keyDesc: kd, valDesc: vd, maxPending: defaultMaxPending, + flusher: ProllyFlusher{}, } } -// Map materializes all pending and applied mutations in the MutableMap. -func (mut *MutableMap) Map(ctx context.Context) (Map, error) { - s := message.NewProllyMapSerializer(mut.valDesc, mut.NodeStore().Pool()) - return mut.flushWithSerializer(ctx, s) -} - -func (mut *MutableMap) flushWithSerializer(ctx context.Context, s message.Serializer) (Map, error) { - sm := mut.tuples.Static - fn := tree.ApplyMutations[val.Tuple, val.TupleDesc, message.Serializer] - - root, err := fn(ctx, sm.NodeStore, sm.Root, mut.keyDesc, s, mut.tuples.Mutations()) - if err != nil { - return Map{}, err - } - - return Map{ - tuples: tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{ - Root: root, - NodeStore: sm.NodeStore, - Order: sm.Order, - }, - keyDesc: mut.keyDesc, - valDesc: mut.valDesc, - }, nil +func (mut *GenericMutableMap[M, T]) flushWithSerializer(ctx context.Context, s message.Serializer) (T, error) { + return mut.flusher.ApplyMutationsWithSerializer(ctx, mut.NodeStore(), mut.tuples.Static.GetRoot(), mut.keyDesc, s, mut.tuples.Mutations()) } // WithMaxPending returns a MutableMap with a new pending buffer size. -func (mut *MutableMap) WithMaxPending(max int) *MutableMap { +func (mut *GenericMutableMap[M, T]) WithMaxPending(max int) *GenericMutableMap[M, T] { ret := *mut ret.maxPending = max return &ret } // NodeStore returns the map's NodeStore -func (mut *MutableMap) NodeStore() tree.NodeStore { - return mut.tuples.Static.NodeStore +func (mut *GenericMutableMap[M, T]) NodeStore() tree.NodeStore { + return mut.tuples.Static.GetNodeStore() } // Put adds the Tuple pair |key|, |value| to the MutableMap. -func (mut *MutableMap) Put(ctx context.Context, key, value val.Tuple) error { +func (mut *GenericMutableMap[M, T]) Put(ctx context.Context, key, value val.Tuple) error { if err := mut.tuples.Put(ctx, key, value); err != nil { return err } @@ -121,32 +121,32 @@ func (mut *MutableMap) Put(ctx context.Context, key, value val.Tuple) error { } // Delete deletes the pair keyed by |key| from the MutableMap. -func (mut *MutableMap) Delete(ctx context.Context, key val.Tuple) error { +func (mut *GenericMutableMap[M, T]) Delete(ctx context.Context, key val.Tuple) error { return mut.tuples.Delete(ctx, key) } // Get fetches the Tuple pair keyed by |key|, if it exists, and passes it to |cb|. // If the |key| is not present in the MutableMap, a nil Tuple pair is passed to |cb|. -func (mut *MutableMap) Get(ctx context.Context, key val.Tuple, cb tree.KeyValueFn[val.Tuple, val.Tuple]) (err error) { +func (mut *GenericMutableMap[M, T]) Get(ctx context.Context, key val.Tuple, cb tree.KeyValueFn[val.Tuple, val.Tuple]) (err error) { return mut.tuples.Get(ctx, key, cb) } -func (mut *MutableMap) GetPrefix(ctx context.Context, key val.Tuple, prefixDesc val.TupleDesc, cb tree.KeyValueFn[val.Tuple, val.Tuple]) (err error) { +func (mut *GenericMutableMap[M, T]) GetPrefix(ctx context.Context, key val.Tuple, prefixDesc val.TupleDesc, cb tree.KeyValueFn[val.Tuple, val.Tuple]) (err error) { return mut.tuples.GetPrefix(ctx, key, prefixDesc, cb) } // Has returns true if |key| is present in the MutableMap. -func (mut *MutableMap) Has(ctx context.Context, key val.Tuple) (ok bool, err error) { +func (mut *GenericMutableMap[M, T]) Has(ctx context.Context, key val.Tuple) (ok bool, err error) { return mut.tuples.Has(ctx, key) } // HasPrefix returns true if a key with a matching prefix to |key| is present in the MutableMap. -func (mut *MutableMap) HasPrefix(ctx context.Context, key val.Tuple, prefixDesc val.TupleDesc) (ok bool, err error) { +func (mut *GenericMutableMap[M, T]) HasPrefix(ctx context.Context, key val.Tuple, prefixDesc val.TupleDesc) (ok bool, err error) { return mut.tuples.HasPrefix(ctx, key, prefixDesc) } // Checkpoint records a checkpoint that can be reverted to. -func (mut *MutableMap) Checkpoint(context.Context) error { +func (mut *GenericMutableMap[M, T]) Checkpoint(context.Context) error { // discard previous stash, if one exists mut.stash = nil mut.tuples.Edits.Checkpoint() @@ -154,7 +154,7 @@ func (mut *MutableMap) Checkpoint(context.Context) error { } // Revert discards writes made since the last checkpoint. -func (mut *MutableMap) Revert(context.Context) { +func (mut *GenericMutableMap[M, T]) Revert(context.Context) { // if we've accumulated a large number of writes // since we check-pointed, our last checkpoint // may be stashed in a separate tree.MutableMap @@ -165,7 +165,7 @@ func (mut *MutableMap) Revert(context.Context) { mut.tuples.Edits.Revert() } -func (mut *MutableMap) flushPending(ctx context.Context) error { +func (mut *GenericMutableMap[M, T]) flushPending(ctx context.Context) error { stash := mut.stash // if our in-memory edit set contains a checkpoint, we // must stash a copy of |mut.tuples| we can revert to. @@ -174,18 +174,18 @@ func (mut *MutableMap) flushPending(ctx context.Context) error { cp.Edits.Revert() stash = &cp } - sm, err := mut.Map(ctx) + sm, err := mut.flusher.ApplyMutations(ctx, mut.NodeStore(), mut.tuples.Static.GetRoot(), mut.keyDesc, mut.tuples.Mutations()) if err != nil { return err } - mut.tuples.Static = sm.tuples + mut.tuples.Static = sm mut.tuples.Edits.Truncate() // reuse skip list mut.stash = stash return nil } // IterAll returns a mutableMapIter that iterates over the entire MutableMap. -func (mut *MutableMap) IterAll(ctx context.Context) (MapIter, error) { +func (mut *GenericMutableMap[M, T]) IterAll(ctx context.Context) (MapIter, error) { rng := Range{Fields: nil, Desc: mut.keyDesc} return mut.IterRange(ctx, rng) } @@ -193,13 +193,13 @@ func (mut *MutableMap) IterAll(ctx context.Context) (MapIter, error) { // IterKeyRange iterates over a physical key range defined by |start| and // |stop|. If |start| and/or |stop| is nil, the range will be open // towards that end. -func (mut *MutableMap) IterKeyRange(ctx context.Context, start, stop val.Tuple) (MapIter, error) { +func (mut *GenericMutableMap[M, T]) IterKeyRange(ctx context.Context, start, stop val.Tuple) (MapIter, error) { return mut.tuples.Static.IterKeyRange(ctx, start, stop) } // IterRange returns a MapIter that iterates over a Range. -func (mut *MutableMap) IterRange(ctx context.Context, rng Range) (MapIter, error) { - treeIter, err := treeIterFromRange(ctx, mut.tuples.Static.Root, mut.tuples.Static.NodeStore, rng) +func (mut *GenericMutableMap[M, T]) IterRange(ctx context.Context, rng Range) (MapIter, error) { + treeIter, err := treeIterFromRange(ctx, mut.tuples.Static.GetRoot(), mut.tuples.Static.GetNodeStore(), rng) if err != nil { return nil, err } @@ -216,12 +216,12 @@ func (mut *MutableMap) IterRange(ctx context.Context, rng Range) (MapIter, error // HasEdits returns true when the MutableMap has performed at least one Put or Delete operation. This does not indicate // whether the materialized map contains different values to the contained unedited map. -func (mut *MutableMap) HasEdits() bool { +func (mut *GenericMutableMap[M, T]) HasEdits() bool { return mut.tuples.Edits.Count() > 0 } // Descriptors returns the key and value val.TupleDesc. -func (mut *MutableMap) Descriptors() (val.TupleDesc, val.TupleDesc) { +func (mut *GenericMutableMap[M, T]) Descriptors() (val.TupleDesc, val.TupleDesc) { return mut.keyDesc, mut.valDesc } @@ -308,3 +308,55 @@ func debugFormat(ctx context.Context, m *MutableMap) (string, error) { sb.WriteString("\t}\n}\n") return sb.String(), nil } + +type ProllyFlusher struct{} + +func (f ProllyFlusher) Map(ctx context.Context, mut *GenericMutableMap[Map, tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]]) (Map, error) { + treeMap, err := f.TreeMap(ctx, mut) + if err != nil { + return Map{}, err + } + return Map{ + tuples: treeMap, + keyDesc: mut.keyDesc, + valDesc: mut.valDesc, + }, nil +} + +// TreeMap materializes all pending and applied mutations in the MutableMap. +func (f ProllyFlusher) TreeMap(ctx context.Context, mut *MutableMap) (tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc], error) { + s := message.NewProllyMapSerializer(mut.valDesc, mut.NodeStore().Pool()) + return mut.flushWithSerializer(ctx, s) +} + +var _ MutableMapFlusher[Map, tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]] = ProllyFlusher{} + +func (f ProllyFlusher) ApplyMutations( + ctx context.Context, + ns tree.NodeStore, + root tree.Node, + order val.TupleDesc, + edits tree.MutationIter, +) (tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc], error) { + serializer := message.NewVectorIndexSerializer(ns.Pool()) + return f.ApplyMutationsWithSerializer(ctx, ns, root, order, serializer, edits) +} + +func (f ProllyFlusher) ApplyMutationsWithSerializer( + ctx context.Context, + ns tree.NodeStore, + root tree.Node, + order val.TupleDesc, + serializer message.Serializer, + edits tree.MutationIter, +) (tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc], error) { + newRoot, err := tree.ApplyMutations(ctx, ns, root, order, serializer, edits) + if err != nil { + return tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{}, err + } + return tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{ + Root: newRoot, + NodeStore: ns, + Order: order, + }, nil +} From 6269494ea27996f7ae9821faa1f13239b9fd6b7c Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 12 Nov 2024 13:20:29 -0800 Subject: [PATCH 2/7] Add MutableMapFlusher for Prolly Map --- go/store/prolly/mutable_map_flusher.go | 45 ++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 go/store/prolly/mutable_map_flusher.go diff --git a/go/store/prolly/mutable_map_flusher.go b/go/store/prolly/mutable_map_flusher.go new file mode 100644 index 00000000000..882a5c03be3 --- /dev/null +++ b/go/store/prolly/mutable_map_flusher.go @@ -0,0 +1,45 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package prolly + +import ( + "context" + "github.com/dolthub/dolt/go/store/prolly/message" + "github.com/dolthub/dolt/go/store/prolly/tree" + "github.com/dolthub/dolt/go/store/val" +) + +// MutableMapFlusher provides methods for flushing the edits in a MutableMap, producing a new static MapInterface +// containing the edits. +type MutableMapFlusher[MapType MapInterface, TreeMap tree.MapInterface[val.Tuple, val.Tuple, val.TupleDesc]] interface { + ApplyMutations( + ctx context.Context, + ns tree.NodeStore, + root tree.Node, + order val.TupleDesc, + edits tree.MutationIter, + ) (TreeMap, error) + + ApplyMutationsWithSerializer( + ctx context.Context, + ns tree.NodeStore, + root tree.Node, + order val.TupleDesc, + serializer message.Serializer, + edits tree.MutationIter, + ) (TreeMap, error) + + Map(ctx context.Context, m *GenericMutableMap[MapType, TreeMap]) (MapType, error) +} From 8529de5adea7b763c1d6adbc62bcb4898f934615 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 12 Nov 2024 16:51:16 -0800 Subject: [PATCH 3/7] Amend Flusher interface. --- go/libraries/doltcore/doltdb/durable/index.go | 2 -- go/store/prolly/mutable_map_flusher.go | 13 ++------- go/store/prolly/shim/shim.go | 26 ++++------------- go/store/prolly/tuple_mutable_map.go | 29 +++++++------------ 4 files changed, 17 insertions(+), 53 deletions(-) diff --git a/go/libraries/doltcore/doltdb/durable/index.go b/go/libraries/doltcore/doltdb/durable/index.go index bb022c863d3..3f5ffc3dc6c 100644 --- a/go/libraries/doltcore/doltdb/durable/index.go +++ b/go/libraries/doltcore/doltdb/durable/index.go @@ -257,8 +257,6 @@ func IndexFromMapInterface(m prolly.MapInterface) Index { switch m := m.(type) { case prolly.Map: return IndexFromProllyMap(m) - case prolly.ProximityMap: - return IndexFromProximityMap(m) default: panic("unknown map type") } diff --git a/go/store/prolly/mutable_map_flusher.go b/go/store/prolly/mutable_map_flusher.go index 882a5c03be3..e3b5ca1584d 100644 --- a/go/store/prolly/mutable_map_flusher.go +++ b/go/store/prolly/mutable_map_flusher.go @@ -24,21 +24,12 @@ import ( // MutableMapFlusher provides methods for flushing the edits in a MutableMap, producing a new static MapInterface // containing the edits. type MutableMapFlusher[MapType MapInterface, TreeMap tree.MapInterface[val.Tuple, val.Tuple, val.TupleDesc]] interface { - ApplyMutations( - ctx context.Context, - ns tree.NodeStore, - root tree.Node, - order val.TupleDesc, - edits tree.MutationIter, - ) (TreeMap, error) + ApplyMutations(ctx context.Context, m *GenericMutableMap[MapType, TreeMap]) (TreeMap, error) ApplyMutationsWithSerializer( ctx context.Context, - ns tree.NodeStore, - root tree.Node, - order val.TupleDesc, serializer message.Serializer, - edits tree.MutationIter, + m *GenericMutableMap[MapType, TreeMap], ) (TreeMap, error) Map(ctx context.Context, m *GenericMutableMap[MapType, TreeMap]) (MapType, error) diff --git a/go/store/prolly/shim/shim.go b/go/store/prolly/shim/shim.go index 2ebd29ecbe5..d6a53d56522 100644 --- a/go/store/prolly/shim/shim.go +++ b/go/store/prolly/shim/shim.go @@ -15,14 +15,11 @@ package shim import ( - "fmt" - "github.com/dolthub/dolt/go/gen/fb/serial" "github.com/dolthub/dolt/go/libraries/doltcore/schema" "github.com/dolthub/dolt/go/store/prolly" "github.com/dolthub/dolt/go/store/prolly/tree" "github.com/dolthub/dolt/go/store/types" "github.com/dolthub/dolt/go/store/val" - "github.com/dolthub/go-mysql-server/sql/expression/function/vector" ) func NodeFromValue(v types.Value) (tree.Node, error) { @@ -34,10 +31,7 @@ func ValueFromMap(m prolly.MapInterface) types.Value { } func MapFromValue(v types.Value, sch schema.Schema, ns tree.NodeStore, isKeylessSecondary bool) (prolly.Map, error) { - root, fileId, err := NodeFromValue(v) - if fileId == serial.VectorIndexNodeFileID { - return prolly.Map{}, fmt.Errorf("can't make a prolly.Map from a vector index node") - } + root, err := NodeFromValue(v) if err != nil { return prolly.Map{}, err } @@ -50,7 +44,7 @@ func MapFromValue(v types.Value, sch schema.Schema, ns tree.NodeStore, isKeyless } func MapInterfaceFromValue(v types.Value, sch schema.Schema, ns tree.NodeStore, isKeylessSecondary bool) (prolly.MapInterface, error) { - root, fileId, err := NodeFromValue(v) + root, err := NodeFromValue(v) if err != nil { return nil, err } @@ -59,23 +53,13 @@ func MapInterfaceFromValue(v types.Value, sch schema.Schema, ns tree.NodeStore, kd = prolly.AddHashToSchema(kd) } vd := sch.GetValueDescriptor() - switch fileId { - case serial.VectorIndexNodeFileID: - return prolly.NewProximityMap(nil, ns, root, kd, vd, vector.DistanceL2Squared{}), nil - default: - return prolly.NewMap(root, ns, kd, vd), nil - } + return prolly.NewMap(root, ns, kd, vd), nil } func MapFromValueWithDescriptors(v types.Value, kd, vd val.TupleDesc, ns tree.NodeStore) (prolly.MapInterface, error) { - root, fileId, err := NodeFromValue(v) + root, err := NodeFromValue(v) if err != nil { return prolly.Map{}, err } - switch fileId { - case serial.VectorIndexNodeFileID: - return prolly.NewProximityMap(nil, ns, root, kd, vd, vector.DistanceL2Squared{}), nil - default: - return prolly.NewMap(root, ns, kd, vd), nil - } + return prolly.NewMap(root, ns, kd, vd), nil } diff --git a/go/store/prolly/tuple_mutable_map.go b/go/store/prolly/tuple_mutable_map.go index 2a65cb06a59..a7d910e5c85 100644 --- a/go/store/prolly/tuple_mutable_map.go +++ b/go/store/prolly/tuple_mutable_map.go @@ -61,7 +61,7 @@ func (mut *GenericMutableMap[M, T]) MapInterface(ctx context.Context) (MapInterf // TreeMap materializes all pending and applied mutations in the GenericMutableMap, producing the resulting tree.MapInterface. func (mut *GenericMutableMap[M, T]) TreeMap(ctx context.Context) (T, error) { - return mut.flusher.ApplyMutations(ctx, mut.NodeStore(), mut.tuples.Static.GetRoot(), mut.keyDesc, mut.tuples.Mutations()) + return mut.flusher.ApplyMutations(ctx, mut) } // Map materializes all pending and applied mutations in the GenericMutableMap, producing the specific MapInterface implementation @@ -94,7 +94,7 @@ func newMutableMapWithDescriptors(m Map, kd, vd val.TupleDesc) *MutableMap { } func (mut *GenericMutableMap[M, T]) flushWithSerializer(ctx context.Context, s message.Serializer) (T, error) { - return mut.flusher.ApplyMutationsWithSerializer(ctx, mut.NodeStore(), mut.tuples.Static.GetRoot(), mut.keyDesc, s, mut.tuples.Mutations()) + return mut.flusher.ApplyMutationsWithSerializer(ctx, s, mut) } // WithMaxPending returns a MutableMap with a new pending buffer size. @@ -174,7 +174,7 @@ func (mut *GenericMutableMap[M, T]) flushPending(ctx context.Context) error { cp.Edits.Revert() stash = &cp } - sm, err := mut.flusher.ApplyMutations(ctx, mut.NodeStore(), mut.tuples.Static.GetRoot(), mut.keyDesc, mut.tuples.Mutations()) + sm, err := mut.flusher.ApplyMutations(ctx, mut) if err != nil { return err } @@ -331,32 +331,23 @@ func (f ProllyFlusher) TreeMap(ctx context.Context, mut *MutableMap) (tree.Stati var _ MutableMapFlusher[Map, tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]] = ProllyFlusher{} -func (f ProllyFlusher) ApplyMutations( - ctx context.Context, - ns tree.NodeStore, - root tree.Node, - order val.TupleDesc, - edits tree.MutationIter, -) (tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc], error) { - serializer := message.NewVectorIndexSerializer(ns.Pool()) - return f.ApplyMutationsWithSerializer(ctx, ns, root, order, serializer, edits) +func (f ProllyFlusher) ApplyMutations(ctx context.Context, m *GenericMutableMap[Map, tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]]) (tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc], error) { + serializer := message.NewProllyMapSerializer(m.valDesc, m.NodeStore().Pool()) + return f.ApplyMutationsWithSerializer(ctx, serializer, m) } func (f ProllyFlusher) ApplyMutationsWithSerializer( ctx context.Context, - ns tree.NodeStore, - root tree.Node, - order val.TupleDesc, serializer message.Serializer, - edits tree.MutationIter, + m *GenericMutableMap[Map, tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]], ) (tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc], error) { - newRoot, err := tree.ApplyMutations(ctx, ns, root, order, serializer, edits) + newRoot, err := tree.ApplyMutations(ctx, m.NodeStore(), m.tuples.Static.GetRoot(), m.keyDesc, serializer, m.tuples.Mutations()) if err != nil { return tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{}, err } return tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{ Root: newRoot, - NodeStore: ns, - Order: order, + NodeStore: m.NodeStore(), + Order: m.keyDesc, }, nil } From 6b2e8a076bd0071c6e1785b679b07f068ebef043 Mon Sep 17 00:00:00 2001 From: nicktobey Date: Wed, 13 Nov 2024 01:01:36 +0000 Subject: [PATCH 4/7] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/store/prolly/map_interface.go | 1 + go/store/prolly/mutable_map_flusher.go | 1 + go/store/prolly/tree/mutable_map.go | 1 + 3 files changed, 3 insertions(+) diff --git a/go/store/prolly/map_interface.go b/go/store/prolly/map_interface.go index c80559ed64c..f0d2587f557 100644 --- a/go/store/prolly/map_interface.go +++ b/go/store/prolly/map_interface.go @@ -16,6 +16,7 @@ package prolly import ( "context" + "github.com/dolthub/dolt/go/store/hash" "github.com/dolthub/dolt/go/store/pool" "github.com/dolthub/dolt/go/store/prolly/tree" diff --git a/go/store/prolly/mutable_map_flusher.go b/go/store/prolly/mutable_map_flusher.go index e3b5ca1584d..5be08e0b37d 100644 --- a/go/store/prolly/mutable_map_flusher.go +++ b/go/store/prolly/mutable_map_flusher.go @@ -16,6 +16,7 @@ package prolly import ( "context" + "github.com/dolthub/dolt/go/store/prolly/message" "github.com/dolthub/dolt/go/store/prolly/tree" "github.com/dolthub/dolt/go/store/val" diff --git a/go/store/prolly/tree/mutable_map.go b/go/store/prolly/tree/mutable_map.go index 8fd7598f51a..2130c91fc66 100644 --- a/go/store/prolly/tree/mutable_map.go +++ b/go/store/prolly/tree/mutable_map.go @@ -16,6 +16,7 @@ package tree import ( "context" + "github.com/dolthub/dolt/go/store/skip" ) From bbebb1ff4989ba2226b1688ea641741d73667160 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 12 Nov 2024 17:25:18 -0800 Subject: [PATCH 5/7] Make sure we create a flusher when creating a MutableMap --- go/store/prolly/artifact_map.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/store/prolly/artifact_map.go b/go/store/prolly/artifact_map.go index 05e63ef62c4..47094057259 100644 --- a/go/store/prolly/artifact_map.go +++ b/go/store/prolly/artifact_map.go @@ -176,6 +176,7 @@ func (m ArtifactMap) Editor() *ArtifactsEditor { keyDesc: m.keyDesc, valDesc: m.valDesc, maxPending: artifactMapPendingBufferSize, + flusher: ProllyFlusher{}, }, artKB: val.NewTupleBuilder(artKD), artVB: val.NewTupleBuilder(artVD), From 1f11ab6e078af86115e7de779665986df58c2084 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 13 Nov 2024 13:32:18 -0800 Subject: [PATCH 6/7] Improve docstrings for prolly map functions. --- go/libraries/doltcore/doltdb/durable/index.go | 2 +- go/store/prolly/artifact_map.go | 2 +- go/store/prolly/map_interface.go | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/go/libraries/doltcore/doltdb/durable/index.go b/go/libraries/doltcore/doltdb/durable/index.go index 3f5ffc3dc6c..aa765fd8be8 100644 --- a/go/libraries/doltcore/doltdb/durable/index.go +++ b/go/libraries/doltcore/doltdb/durable/index.go @@ -238,7 +238,7 @@ func ProllyMapFromIndex(i Index) prolly.Map { return i.(prollyIndex).index } -// MapFromIndex unwraps the Index and returns the underlying prolly.Map or prolly.ProximityMap. +// MapFromIndex unwraps the Index and returns the underlying map as an interface. func MapFromIndex(i Index) prolly.MapInterfaceWithMutable { switch indexType := i.(type) { case prollyIndex: diff --git a/go/store/prolly/artifact_map.go b/go/store/prolly/artifact_map.go index 47094057259..7601b014d67 100644 --- a/go/store/prolly/artifact_map.go +++ b/go/store/prolly/artifact_map.go @@ -184,7 +184,7 @@ func (m ArtifactMap) Editor() *ArtifactsEditor { } } -// IterAll returns an MapIter for all artifacts. +// IterAll returns a MapIter for all artifacts. func (m ArtifactMap) IterAll(ctx context.Context) (MapIter, error) { return m.tuples.IterAll(ctx) } diff --git a/go/store/prolly/map_interface.go b/go/store/prolly/map_interface.go index f0d2587f557..ca48ef60bfb 100644 --- a/go/store/prolly/map_interface.go +++ b/go/store/prolly/map_interface.go @@ -38,11 +38,14 @@ type MapInterface interface { KeyDesc() val.TupleDesc } +// MapInterface is a common interface for prolly-tree based maps that can be used as the basis of a mutable map that +// implements MutableMapInterface. type MapInterfaceWithMutable interface { MapInterface MutateInterface() MutableMapInterface } +// MutableMapInterface is a common interface for prolly-tree based maps that can be mutated. type MutableMapInterface interface { NodeStore() tree.NodeStore Put(ctx context.Context, key, value val.Tuple) error From 20280fd0862e2c23dbaa5a530c28a52919e0f20b Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 13 Nov 2024 13:47:12 -0800 Subject: [PATCH 7/7] Improve MutableMapFlusher interface --- go/store/prolly/mutable_map_flusher.go | 4 ++-- go/store/prolly/tuple_mutable_map.go | 21 ++++++++------------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/go/store/prolly/mutable_map_flusher.go b/go/store/prolly/mutable_map_flusher.go index 5be08e0b37d..7826e1bea61 100644 --- a/go/store/prolly/mutable_map_flusher.go +++ b/go/store/prolly/mutable_map_flusher.go @@ -25,8 +25,6 @@ import ( // MutableMapFlusher provides methods for flushing the edits in a MutableMap, producing a new static MapInterface // containing the edits. type MutableMapFlusher[MapType MapInterface, TreeMap tree.MapInterface[val.Tuple, val.Tuple, val.TupleDesc]] interface { - ApplyMutations(ctx context.Context, m *GenericMutableMap[MapType, TreeMap]) (TreeMap, error) - ApplyMutationsWithSerializer( ctx context.Context, serializer message.Serializer, @@ -34,4 +32,6 @@ type MutableMapFlusher[MapType MapInterface, TreeMap tree.MapInterface[val.Tuple ) (TreeMap, error) Map(ctx context.Context, m *GenericMutableMap[MapType, TreeMap]) (MapType, error) + + GetDefaultSerializer(ctx context.Context, mut *GenericMutableMap[MapType, TreeMap]) message.Serializer } diff --git a/go/store/prolly/tuple_mutable_map.go b/go/store/prolly/tuple_mutable_map.go index a7d910e5c85..47bacfbbc3d 100644 --- a/go/store/prolly/tuple_mutable_map.go +++ b/go/store/prolly/tuple_mutable_map.go @@ -59,11 +59,6 @@ func (mut *GenericMutableMap[M, T]) MapInterface(ctx context.Context) (MapInterf return mut.Map(ctx) } -// TreeMap materializes all pending and applied mutations in the GenericMutableMap, producing the resulting tree.MapInterface. -func (mut *GenericMutableMap[M, T]) TreeMap(ctx context.Context) (T, error) { - return mut.flusher.ApplyMutations(ctx, mut) -} - // Map materializes all pending and applied mutations in the GenericMutableMap, producing the specific MapInterface implementation // that the struct has been specialized with. func (mut *GenericMutableMap[M, T]) Map(ctx context.Context) (M, error) { @@ -174,7 +169,8 @@ func (mut *GenericMutableMap[M, T]) flushPending(ctx context.Context) error { cp.Edits.Revert() stash = &cp } - sm, err := mut.flusher.ApplyMutations(ctx, mut) + serializer := mut.flusher.GetDefaultSerializer(ctx, mut) + sm, err := mut.flusher.ApplyMutationsWithSerializer(ctx, serializer, mut) if err != nil { return err } @@ -311,8 +307,13 @@ func debugFormat(ctx context.Context, m *MutableMap) (string, error) { type ProllyFlusher struct{} +func (f ProllyFlusher) GetDefaultSerializer(ctx context.Context, mut *GenericMutableMap[Map, tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]]) message.Serializer { + return message.NewProllyMapSerializer(mut.valDesc, mut.NodeStore().Pool()) +} + func (f ProllyFlusher) Map(ctx context.Context, mut *GenericMutableMap[Map, tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]]) (Map, error) { - treeMap, err := f.TreeMap(ctx, mut) + s := f.GetDefaultSerializer(ctx, mut) + treeMap, err := f.ApplyMutationsWithSerializer(ctx, s, mut) if err != nil { return Map{}, err } @@ -323,12 +324,6 @@ func (f ProllyFlusher) Map(ctx context.Context, mut *GenericMutableMap[Map, tree }, nil } -// TreeMap materializes all pending and applied mutations in the MutableMap. -func (f ProllyFlusher) TreeMap(ctx context.Context, mut *MutableMap) (tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc], error) { - s := message.NewProllyMapSerializer(mut.valDesc, mut.NodeStore().Pool()) - return mut.flushWithSerializer(ctx, s) -} - var _ MutableMapFlusher[Map, tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]] = ProllyFlusher{} func (f ProllyFlusher) ApplyMutations(ctx context.Context, m *GenericMutableMap[Map, tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]]) (tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc], error) {