diff --git a/src/b_plus_tree/core.clj b/src/b_plus_tree/core.clj index 3be024f..a33ce75 100644 --- a/src/b_plus_tree/core.clj +++ b/src/b_plus_tree/core.clj @@ -79,10 +79,13 @@ (defn find "Returns the value associated with key by traversing the entire tree, or nil if not found." - ([key page-size raf] - (let [root (b-plus-tree.io/read-root page-size raf)] - (when-let [record (find-type key #{:record} root raf)] - (:data record))))) + ([key raf {cnt :count, size :key-size, root-ptr :root + :as header}] + (when-not (or (zero? cnt) + (> (count key) size)) + (let [root (b-plus-tree.io/read-node root-ptr raf)] + (when-let [record (find-type key #{:record} root raf)] + (:data record)))))) (defn insert-record "Inserts a record into the given leaf node and writes changes to file. @@ -96,43 +99,50 @@ (b-plus-tree.io/write-node record raf) (+ next-free page-size)))) +(defn insert + "Inserts a key-value pair into the B+ Tree. Returns a map which maps pointer + offsets to the nodes located there, for all nodes which are altered." + ([key val cache raf + {:keys [count free order key-size val-size page-size root] :as header}])) + + ; problem: I am re-writing the root on disc, but then using the same ; in-memory root every time -(defn insert - "Inserts key-value pair into the B+ Tree. Returns the new record if +(comment + (defn insert + "Inserts key-value pair into the B+ Tree. Returns the new record if successful, or nil if key already exists." - ([key val order page-size raf] - (let [root (b-plus-tree.io/read-root page-size raf) - free (:free root) - ; find the leaf to insert into, while building a stack of - ; parent pointers - [leaf stack] - (loop [node root - free free - stack []] - (let [stack (conj stack node)] - (if (b-plus-tree.nodes/leaf? node) - ; found leaf - [node stack] - ; keep searching - (recur (next-node key node raf) free stack))))] - (when-not (find-record key leaf raf) - ; record doesn't exist already, so we can insert - (let [free - (if-not (b-plus-tree.nodes/full? leaf order) - (insert-record key val - (assoc leaf - :free free) - free page-size raf) - ; placeholder - free) - new-root (assoc (if (= :root-leaf (:type leaf)) - leaf - root) - :free free)] - (when-not (= :root-leaf (:type leaf)) - (b-plus-tree.io/write-node (assoc root - :free free)))))))) + ([key val order page-size raf] + (let [root (b-plus-tree.io/read-root page-size raf) + free (:free root) + ; find the leaf to insert into, while building a stack of + ; parent pointers + [leaf stack] + (loop [node root + stack []] + (let [stack (conj stack node)] + (if (b-plus-tree.nodes/leaf? node) + ; found leaf + [node stack] + ; keep searching + (recur (next-node key node raf) stack))))] + (when-not (find-record key leaf raf) + ; record doesn't exist already, so we can insert + (let [free + (if-not (b-plus-tree.nodes/full? leaf order) + (insert-record key val + (assoc leaf + :free free) + free page-size raf) + ; placeholder + free) + new-root (assoc (if (= :root-leaf (:type leaf)) + leaf + root) + :free free)] + (when-not (= :root-leaf (:type leaf)) + (b-plus-tree.io/write-node (assoc root + :free free))))))))) (defn traverse "Returns a lazy sequence of the key value pairs contained in the B+ Tree, @@ -170,17 +180,18 @@ (take-while (fn [[k v]] (-> k (compare stop) neg?)) (traverse leaf start page-size raf)))) -(defn find-slice - "" - ([start page-size raf] - (when-let [leaf (find-type start - :leaf - (b-plus-tree.io/read-root page-size raf) - raf)] - (traverse start leaf page-size raf))) - ([start stop page-size raf] - (when-let [leaf (find-type start - :leaf - (b-plus-tree.io/read-root page-size raf) - raf)] - (traverse start stop leaf page-size raf)))) +(comment "work in progress" + (defn find-slice + "" + ([start page-size raf] + (when-let [leaf (find-type start + :leaf + (b-plus-tree.io/read-root page-size raf) + raf)] + (traverse start leaf page-size raf))) + ([start stop page-size raf] + (when-let [leaf (find-type start + :leaf + (b-plus-tree.io/read-root page-size raf) + raf)] + (traverse start stop leaf page-size raf))))) diff --git a/src/b_plus_tree/io.clj b/src/b_plus_tree/io.clj index 75b0453..9c25cad 100644 --- a/src/b_plus_tree/io.clj +++ b/src/b_plus_tree/io.clj @@ -2,8 +2,90 @@ "Operations for B+ Tree I/O." (:require [gloss core io] [b-plus-tree.nodes :as nodes] + [b-plus-tree.util :as util] [b-plus-tree.util :refer [dbg verbose]])) +(defn header-size + ([] + (gloss.core/byte-count + (gloss.io/encode nodes/header-node + {:count 0 + :free 0 + :order 0 + :key-size 0 + :val-size 0 + :page-size 0 + :root 0})))) + +(defn max-node-size + ([order key-size] + (gloss.core/byte-count + (gloss.io/encode nodes/node + {:type :internal + :key-ptrs (apply sorted-map + (interleave + (util/unique-strings (dec order) + key-size) + (range))) + :last order})))) + +(defn max-record-size + ([val-size] + (gloss.core/byte-count + (gloss.io/encode nodes/node + {:type :record + :data (apply str + (repeat val-size \a))})))) + +(defn min-page-size + ([order key-size val-size] + (max (max-node-size order key-size) + (max-record-size val-size) + (header-size)))) + +(defn check-parameters + ([order key-size val-size page-size] + (>= page-size + (min-page-size order key-size val-size)))) + +(defn new-tree + "Creates a new file." + ([filename order key-size val-size] + (let [page-size (min-page-size order key-size val-size)] + (new-tree filename order key-size val-size page-size))) + ([filename order key-size val-size page-size] + (when-not (check-parameters order key-size val-size page-size) + (throw (ex-info "Insufficient page size."))) + (let [header (gloss.io/encode nodes/header-node + {:count 0, + :free page-size, + :order order, + :key-size key-size, + :val-size val-size, + :page-size page-size, + :root -1})] + (with-open [raf (new java.io.RandomAccessFile filename "rwd")] + (if (pos? (.length raf)) + (throw (ex-info "File already exists.")) + (.write raf + (.array (gloss.io/contiguous header)))))))) + +(defn read-header + "Reads the header from the RandomAccessFile." + ([raf] + (.seek raf 0) ; go to head of file + (let [header-bytes (byte-array (header-size))] + (.readFully raf header-bytes) + (gloss.io/decode nodes/header-node header-bytes)))) + +(defn write-header + "Writes the header to the RandomAccessFile." + ([header raf] + (.seek raf 0) + (.write raf + (.array (gloss.io/contiguous (gloss.io/encode nodes/header-node + header)))))) + (defn read-node "Reads the node stored in the RandomAccessFile at the given offset." ([offset raf] @@ -14,19 +96,19 @@ (assoc (gloss.io/decode nodes/node (gloss.io/to-byte-buffer node-bytes)) :offset offset)))) -(defn read-root - "Reads the root node from the RandomAccessFile" - ([page-size raf] - (if (zero? (.length raf)) - (b-plus-tree.nodes/new-root page-size) - (read-node 0 raf)))) +(comment + (defn read-root + "Reads the root node from the RandomAccessFile" + ([page-size raf] + (if (zero? (.length raf)) + (b-plus-tree.nodes/new-root page-size) + (read-node 0 raf))))) (defn write-node "Writes the node to the RandomAccessFile at the given offset. Returns the offset of the file after writing." - ([node raf] - (let [offset (:offset node) - encoded-node (gloss.io/encode nodes/node node) + ([{:keys [offset] :as node} raf] + (let [encoded-node (gloss.io/encode nodes/node node) size (gloss.core/byte-count encoded-node)] (comment (doall diff --git a/src/b_plus_tree/nodes.clj b/src/b_plus_tree/nodes.clj index 764fdda..559ac55 100644 --- a/src/b_plus_tree/nodes.clj +++ b/src/b_plus_tree/nodes.clj @@ -35,12 +35,20 @@ fields." :ptrs (vals key-ptrs)) (dissoc :key-ptrs)))) +(gloss.core/defcodec header-node + (gloss.core/ordered-map + :count :int32 + :free raf-offset + :order :int16 + :key-size :int32 + :val-size :int32 + :page-size :int32 + :root raf-offset)) + (def root-leaf-node (gloss.core/compile-frame (gloss.core/ordered-map :type :root-leaf - :page-size :int32 - :free raf-offset :keys key-list :ptrs child-list) node-unmap @@ -50,8 +58,6 @@ fields." (gloss.core/compile-frame (gloss.core/ordered-map :type :root-nonleaf - :page-size :int32 - :free raf-offset :keys key-list :ptrs child-list :last raf-offset) diff --git a/src/b_plus_tree/util.clj b/src/b_plus_tree/util.clj index 61ca836..242fca9 100644 --- a/src/b_plus_tree/util.clj +++ b/src/b_plus_tree/util.clj @@ -56,3 +56,15 @@ "Returns true if item is in coll, otherwise false." ([coll item] (some #(= item %) coll))) + +(defn unique-strings + "Returns a seq of unique strings of the given length. " + ([length] + (let [formatter (new java.text.DecimalFormat + (apply str (repeat length 0))) + step (fn step [n] + (cons (.format formatter n) + (-> n inc step lazy-seq)))] + (lazy-seq (step 0)))) + ([n length] + (take n (unique-strings length)))) diff --git a/test/b_plus_tree/io_test.clj b/test/b_plus_tree/io_test.clj index 0b18bbd..163cab9 100644 --- a/test/b_plus_tree/io_test.clj +++ b/test/b_plus_tree/io_test.clj @@ -8,11 +8,9 @@ (testing "basic read/write operations for all node types" (let [fname "/tmp/RAF" nodes [{:type :root-leaf, - :free -1, :key-ptrs (sorted-map "a" 1, "b" 2, "c" 3), :offset 0} {:type :root-nonleaf, - :free -1, :key-ptrs (sorted-map "a" 5, "b" 4, "c" 6), :last 1, :offset 4000} diff --git a/test/b_plus_tree/lookup_test.clj b/test/b_plus_tree/lookup_test.clj index e894253..715b07a 100644 --- a/test/b_plus_tree/lookup_test.clj +++ b/test/b_plus_tree/lookup_test.clj @@ -4,70 +4,77 @@ [b-plus-tree core io nodes]) (:use clojure.test)) +(def header-node + {:count 6 + :free 1500 + :order 3 + :key-size 1 + :val-size 16 + :page-size 100 + :root 100}) + (def root-node - [{:type :root-nonleaf - :free -1 - :page-size 100 - :key-ptrs (sorted-map "c" 100) - :last 200 - :offset 0}]) + {:type :root-nonleaf + :key-ptrs (sorted-map "c" 200) + :last 300 + :offset 100}) (def internal-nodes [{:type :internal - :key-ptrs (sorted-map "b" 300) - :last 400 - :offset 100} + :key-ptrs (sorted-map "b" 400) + :last 500 + :offset 200} {:type :internal - :key-ptrs (sorted-map "d" 500, - "e" 600) - :last 700 - :offset 200}]) + :key-ptrs (sorted-map "d" 600, + "e" 700) + :last 800 + :offset 300}]) (def leaf-nodes [{:type :leaf - :key-ptrs (sorted-map "a" 800) - :next 400 - :offset 300} - {:type :leaf - :key-ptrs (sorted-map "b" 900) + :key-ptrs (sorted-map "a" 900) :next 500 :offset 400} {:type :leaf - :key-ptrs (sorted-map "c" 1000) + :key-ptrs (sorted-map "b" 1000) :next 600 :offset 500} {:type :leaf - :key-ptrs (sorted-map "d" 1100) + :key-ptrs (sorted-map "c" 1100) :next 700 :offset 600} {:type :leaf - :key-ptrs (sorted-map "e" 1200, - "f" 1300) + :key-ptrs (sorted-map "d" 1200) + :next 800 + :offset 700} + {:type :leaf + :key-ptrs (sorted-map "e" 1300, + "f" 1400) :next -1 - :offset 700}]) + :offset 800}]) (def record-nodes [{:type :record :data "http://www.a.com" - :offset 800} + :offset 900} {:type :record :data "http://www.b.com" - :offset 900} + :offset 1000} {:type :record :data "http://www.c.com" - :offset 1000} + :offset 1100} {:type :record :data "http://www.d.com" - :offset 1100} + :offset 1200} {:type :record :data "http://www.e.com" - :offset 1200} + :offset 1300} {:type :record :data "http://www.f.com" - :offset 1300}]) + :offset 1400}]) (def nodes - (concat root-node internal-nodes leaf-nodes record-nodes)) + (concat [root-node] internal-nodes leaf-nodes record-nodes)) (defn populate-file "Writes all nodes to file" @@ -79,14 +86,16 @@ (deftest find (testing "finding all records" (with-open [raf (new java.io.RandomAccessFile "/tmp/raf" "rwd")] + (b-plus-tree.io/write-header header-node raf) (populate-file nodes raf) - (doseq [[k v] {"a" "http://www.a.com", - "b" "http://www.b.com", - "c" "http://www.c.com", - "d" "http://www.d.com", - "e" "http://www.e.com", - "f" "http://www.f.com"}] - (is (= (b-plus-tree.core/find k 100 raf) v)))) + (let [header (b-plus-tree.io/read-header raf)] + (doseq [[k v] {"a" "http://www.a.com", + "b" "http://www.b.com", + "c" "http://www.c.com", + "d" "http://www.d.com", + "e" "http://www.e.com", + "f" "http://www.f.com"}] + (is (= (b-plus-tree.core/find k raf header) v))))) (io/delete-file "/tmp/RAF" true))) (deftest find-record-test