Skip to content

Commit

Permalink
restructuring the tree so that the root no longer contains metadata.
Browse files Browse the repository at this point in the history
instead, a new header node contains all metadata, and a pointer to the
root.
  • Loading branch information
Rosnec committed May 3, 2014
1 parent 2a82c6f commit 71e4211
Show file tree
Hide file tree
Showing 6 changed files with 222 additions and 104 deletions.
115 changes: 63 additions & 52 deletions src/b_plus_tree/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,13 @@
(defn find
"Returns the value associated with key by traversing the entire tree, or
nil if not found."
([key page-size raf]
(let [root (b-plus-tree.io/read-root page-size raf)]
(when-let [record (find-type key #{:record} root raf)]
(:data record)))))
([key raf {cnt :count, size :key-size, root-ptr :root
:as header}]
(when-not (or (zero? cnt)
(> (count key) size))
(let [root (b-plus-tree.io/read-node root-ptr raf)]
(when-let [record (find-type key #{:record} root raf)]
(:data record))))))

(defn insert-record
"Inserts a record into the given leaf node and writes changes to file.
Expand All @@ -96,43 +99,50 @@
(b-plus-tree.io/write-node record raf)
(+ next-free page-size))))

(defn insert
"Inserts a key-value pair into the B+ Tree. Returns a map which maps pointer
offsets to the nodes located there, for all nodes which are altered."
([key val cache raf
{:keys [count free order key-size val-size page-size root] :as header}]))


; problem: I am re-writing the root on disc, but then using the same
; in-memory root every time
(defn insert
"Inserts key-value pair into the B+ Tree. Returns the new record if
(comment
(defn insert
"Inserts key-value pair into the B+ Tree. Returns the new record if
successful, or nil if key already exists."
([key val order page-size raf]
(let [root (b-plus-tree.io/read-root page-size raf)
free (:free root)
; find the leaf to insert into, while building a stack of
; parent pointers
[leaf stack]
(loop [node root
free free
stack []]
(let [stack (conj stack node)]
(if (b-plus-tree.nodes/leaf? node)
; found leaf
[node stack]
; keep searching
(recur (next-node key node raf) free stack))))]
(when-not (find-record key leaf raf)
; record doesn't exist already, so we can insert
(let [free
(if-not (b-plus-tree.nodes/full? leaf order)
(insert-record key val
(assoc leaf
:free free)
free page-size raf)
; placeholder
free)
new-root (assoc (if (= :root-leaf (:type leaf))
leaf
root)
:free free)]
(when-not (= :root-leaf (:type leaf))
(b-plus-tree.io/write-node (assoc root
:free free))))))))
([key val order page-size raf]
(let [root (b-plus-tree.io/read-root page-size raf)
free (:free root)
; find the leaf to insert into, while building a stack of
; parent pointers
[leaf stack]
(loop [node root
stack []]
(let [stack (conj stack node)]
(if (b-plus-tree.nodes/leaf? node)
; found leaf
[node stack]
; keep searching
(recur (next-node key node raf) stack))))]
(when-not (find-record key leaf raf)
; record doesn't exist already, so we can insert
(let [free
(if-not (b-plus-tree.nodes/full? leaf order)
(insert-record key val
(assoc leaf
:free free)
free page-size raf)
; placeholder
free)
new-root (assoc (if (= :root-leaf (:type leaf))
leaf
root)
:free free)]
(when-not (= :root-leaf (:type leaf))
(b-plus-tree.io/write-node (assoc root
:free free)))))))))

(defn traverse
"Returns a lazy sequence of the key value pairs contained in the B+ Tree,
Expand Down Expand Up @@ -170,17 +180,18 @@
(take-while (fn [[k v]] (-> k (compare stop) neg?))
(traverse leaf start page-size raf))))

(defn find-slice
""
([start page-size raf]
(when-let [leaf (find-type start
:leaf
(b-plus-tree.io/read-root page-size raf)
raf)]
(traverse start leaf page-size raf)))
([start stop page-size raf]
(when-let [leaf (find-type start
:leaf
(b-plus-tree.io/read-root page-size raf)
raf)]
(traverse start stop leaf page-size raf))))
(comment "work in progress"
(defn find-slice
""
([start page-size raf]
(when-let [leaf (find-type start
:leaf
(b-plus-tree.io/read-root page-size raf)
raf)]
(traverse start leaf page-size raf)))
([start stop page-size raf]
(when-let [leaf (find-type start
:leaf
(b-plus-tree.io/read-root page-size raf)
raf)]
(traverse start stop leaf page-size raf)))))
100 changes: 91 additions & 9 deletions src/b_plus_tree/io.clj
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,90 @@
"Operations for B+ Tree I/O."
(:require [gloss core io]
[b-plus-tree.nodes :as nodes]
[b-plus-tree.util :as util]
[b-plus-tree.util :refer [dbg verbose]]))

(defn header-size
([]
(gloss.core/byte-count
(gloss.io/encode nodes/header-node
{:count 0
:free 0
:order 0
:key-size 0
:val-size 0
:page-size 0
:root 0}))))

(defn max-node-size
([order key-size]
(gloss.core/byte-count
(gloss.io/encode nodes/node
{:type :internal
:key-ptrs (apply sorted-map
(interleave
(util/unique-strings (dec order)
key-size)
(range)))
:last order}))))

(defn max-record-size
([val-size]
(gloss.core/byte-count
(gloss.io/encode nodes/node
{:type :record
:data (apply str
(repeat val-size \a))}))))

(defn min-page-size
([order key-size val-size]
(max (max-node-size order key-size)
(max-record-size val-size)
(header-size))))

(defn check-parameters
([order key-size val-size page-size]
(>= page-size
(min-page-size order key-size val-size))))

(defn new-tree
"Creates a new file."
([filename order key-size val-size]
(let [page-size (min-page-size order key-size val-size)]
(new-tree filename order key-size val-size page-size)))
([filename order key-size val-size page-size]
(when-not (check-parameters order key-size val-size page-size)
(throw (ex-info "Insufficient page size.")))
(let [header (gloss.io/encode nodes/header-node
{:count 0,
:free page-size,
:order order,
:key-size key-size,
:val-size val-size,
:page-size page-size,
:root -1})]
(with-open [raf (new java.io.RandomAccessFile filename "rwd")]
(if (pos? (.length raf))
(throw (ex-info "File already exists."))
(.write raf
(.array (gloss.io/contiguous header))))))))

(defn read-header
"Reads the header from the RandomAccessFile."
([raf]
(.seek raf 0) ; go to head of file
(let [header-bytes (byte-array (header-size))]
(.readFully raf header-bytes)
(gloss.io/decode nodes/header-node header-bytes))))

(defn write-header
"Writes the header to the RandomAccessFile."
([header raf]
(.seek raf 0)
(.write raf
(.array (gloss.io/contiguous (gloss.io/encode nodes/header-node
header))))))

(defn read-node
"Reads the node stored in the RandomAccessFile at the given offset."
([offset raf]
Expand All @@ -14,19 +96,19 @@
(assoc (gloss.io/decode nodes/node (gloss.io/to-byte-buffer node-bytes))
:offset offset))))

(defn read-root
"Reads the root node from the RandomAccessFile"
([page-size raf]
(if (zero? (.length raf))
(b-plus-tree.nodes/new-root page-size)
(read-node 0 raf))))
(comment
(defn read-root
"Reads the root node from the RandomAccessFile"
([page-size raf]
(if (zero? (.length raf))
(b-plus-tree.nodes/new-root page-size)
(read-node 0 raf)))))

(defn write-node
"Writes the node to the RandomAccessFile at the given offset. Returns the
offset of the file after writing."
([node raf]
(let [offset (:offset node)
encoded-node (gloss.io/encode nodes/node node)
([{:keys [offset] :as node} raf]
(let [encoded-node (gloss.io/encode nodes/node node)
size (gloss.core/byte-count encoded-node)]
(comment
(doall
Expand Down
14 changes: 10 additions & 4 deletions src/b_plus_tree/nodes.clj
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,20 @@ fields."
:ptrs (vals key-ptrs))
(dissoc :key-ptrs))))

(gloss.core/defcodec header-node
(gloss.core/ordered-map
:count :int32
:free raf-offset
:order :int16
:key-size :int32
:val-size :int32
:page-size :int32
:root raf-offset))

(def root-leaf-node
(gloss.core/compile-frame
(gloss.core/ordered-map
:type :root-leaf
:page-size :int32
:free raf-offset
:keys key-list
:ptrs child-list)
node-unmap
Expand All @@ -50,8 +58,6 @@ fields."
(gloss.core/compile-frame
(gloss.core/ordered-map
:type :root-nonleaf
:page-size :int32
:free raf-offset
:keys key-list
:ptrs child-list
:last raf-offset)
Expand Down
12 changes: 12 additions & 0 deletions src/b_plus_tree/util.clj
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,15 @@
"Returns true if item is in coll, otherwise false."
([coll item]
(some #(= item %) coll)))

(defn unique-strings
"Returns a seq of unique strings of the given length. "
([length]
(let [formatter (new java.text.DecimalFormat
(apply str (repeat length 0)))
step (fn step [n]
(cons (.format formatter n)
(-> n inc step lazy-seq)))]
(lazy-seq (step 0))))
([n length]
(take n (unique-strings length))))
2 changes: 0 additions & 2 deletions test/b_plus_tree/io_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@
(testing "basic read/write operations for all node types"
(let [fname "/tmp/RAF"
nodes [{:type :root-leaf,
:free -1,
:key-ptrs (sorted-map "a" 1, "b" 2, "c" 3),
:offset 0}
{:type :root-nonleaf,
:free -1,
:key-ptrs (sorted-map "a" 5, "b" 4, "c" 6),
:last 1,
:offset 4000}
Expand Down
Loading

0 comments on commit 71e4211

Please sign in to comment.