Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Btree insert #356

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions pkg/btree/btree.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package btree

import (
"fmt"
"github.com/kevmo314/appendable/pkg/hnsw"
"github.com/kevmo314/appendable/pkg/metapage"
"github.com/kevmo314/appendable/pkg/pagefile"
"github.com/kevmo314/appendable/pkg/pointer"
"io"
"slices"
)

type BTree struct {
Expand Down Expand Up @@ -46,3 +49,105 @@ func (t *BTree) readNode(offset uint64) (*BTreeNode, error) {

return node, nil
}

// Insert has the following assumptions:
// key.Value represents the Node Id. It is written to []bytes in LittleEndian.
func (t *BTree) Insert(key pointer.ReferencedValue, value hnsw.Point) error {
root, _, err := t.root()
if err != nil {
return fmt.Errorf("read root node: %d", err)
}

if root == nil {
node := &BTreeNode{
Keys: []pointer.ReferencedValue{key},
Vectors: []hnsw.Point{value},
Width: t.Width,
}
buf, err := node.MarshalBinary()
if err != nil {
return err
}

offset, err := t.PageFile.NewPage(buf)
if err != nil {
return err
}

return t.MetaPage.SetRoot(pointer.MemoryPointer{
Offset: uint64(offset),
Length: uint32(len(buf)),
})
}

parent := root
for !parent.Leaf() {
index, found := slices.BinarySearchFunc(parent.Keys, key, pointer.CompareReferencedValues)
if found {
index++
}

if len(parent.Pointers) > index {
return fmt.Errorf("found index %d, but node.Pointers length is %d", index, len(parent.Pointers))
}

childPointer := parent.Pointers[index]
child, err := t.readNode(childPointer)
if err != nil {
return err
}

if int(child.Size()) > t.PageFile.PageSize() {
rightChild, midKey, err := t.SplitChild(parent, index, child)
if err != nil {
return err
}

switch pointer.CompareReferencedValues(midKey, key) {
case 1:
// key < midKey
parent = child
default:
// right child
parent = rightChild
}
} else {
parent = child
}
}

return nil
}

func (t *BTree) SplitChild(parent *BTreeNode, leftChildIndex int, leftChild *BTreeNode) (*BTreeNode, pointer.ReferencedValue, error) {
mid := len(leftChild.Keys) / 2

midKey, midVector := leftChild.Keys[mid], leftChild.Vectors[mid]

rightChild := &BTreeNode{
Keys: append([]pointer.ReferencedValue(nil), leftChild.Keys[mid+1:]...),
Vectors: append([]hnsw.Point(nil), leftChild.Vectors[mid+1:]...),
Pointers: append([]uint64(nil), leftChild.Pointers[mid+1:]...),
Width: t.Width,
}

rbuf, err := rightChild.MarshalBinary()
if err != nil {
return nil, pointer.ReferencedValue{}, err
}
roffset, err := t.PageFile.NewPage(rbuf)
if err != nil {
return nil, pointer.ReferencedValue{}, err
}

leftChild.Keys = leftChild.Keys[:mid]
leftChild.Vectors = leftChild.Vectors[:mid]
leftChild.Pointers = leftChild.Pointers[:mid]

parent.Keys = append(parent.Keys[:leftChildIndex], append([]pointer.ReferencedValue{midKey}, parent.Keys[leftChildIndex:]...)...)
parent.Vectors = append(parent.Vectors[:leftChildIndex], append([]hnsw.Point{midVector}, parent.Vectors[leftChildIndex:]...)...)
parent.Pointers = append(parent.Pointers[:leftChildIndex+1], append([]uint64{uint64(roffset)}, parent.Pointers[leftChildIndex+1:]...)...)

return rightChild, midKey, nil
}

8 changes: 7 additions & 1 deletion pkg/btree/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,23 @@ package btree

import (
"github.com/kevmo314/appendable/pkg/hnsw"
"github.com/kevmo314/appendable/pkg/pointer"
"io"
)

type BTreeNode struct {
Ids []hnsw.Id
Keys []pointer.ReferencedValue
Vectors []hnsw.Point

Pointers []uint64
Width uint16
}


func (n *BTreeNode) Leaf() bool {
return len(n.Pointers) == 0
}

func (n *BTreeNode) Size() int64 {
return 0
}
Expand Down
Loading