diff --git a/project.clj b/project.clj index 44eb94f..4cedc9a 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject b-plus-tree "0.1.4" +(defproject b-plus-tree "0.1.5" :description "A B+ Tree implemented in Clojure." :url "https://github.com/Rosnec/b-plus-tree" :license {:name "MIT License" diff --git a/src/b_plus_tree/core.clj b/src/b_plus_tree/core.clj index e6a9051..a229f6c 100644 --- a/src/b_plus_tree/core.clj +++ b/src/b_plus_tree/core.clj @@ -153,7 +153,7 @@ [nil, cache]))) (defn find-stack - "Returns the value associated with key by traversing the entire tree, or + "Returns the record associated with key by traversing the entire tree, or nil if not found, building a stack of visited nodes during the process." ([key raf {cnt :count, size :key-size, root-ptr :root :as header} & {:keys [cache] @@ -165,7 +165,7 @@ (find-type-stack key #{:record} root [] raf :cache cache)] ; (println "herp" [record stack cache]) - [(when record (:data record)), stack, cache]) + [record, stack, cache]) [nil, [], cache]))) (defn insert-record @@ -208,9 +208,13 @@ :cache cache) leaf (last stack)] (cond - ; record already exists, do nothing - record ;[header, cache] - (throw (ex-info "repeat" {})) + ; record already exists, overwrite + record + [header, (cache-node (assoc record + :data val + :altered? true) + raf + cache)] ; leaf is full, split (b-plus-tree.nodes/full? leaf order) @@ -243,6 +247,25 @@ (recur (next keyvals) raf header {:cache cache})) [header cache]))) +(defn map-subset + "Returns true if m is a subset of the B+ Tree" + ([m raf header + & {:keys [cache] + :or {cache {}}}] + (every? identity (map (fn [[k v]] (= v (first (find k raf header + :cache cache)))) + m)))) + +(defn map-equals + ([m raf + {size :count + :as header} + & {:keys [cache] + :or {cache {}}}] + (when (= size (count m)) + (map-subset m raf header + :cache cache)))) + ; problem: I am re-writing the root on disc, but then using the same ; in-memory root every time (comment diff --git a/src/b_plus_tree/io.clj b/src/b_plus_tree/io.clj index 40a6fdc..b77db96 100644 --- a/src/b_plus_tree/io.clj +++ b/src/b_plus_tree/io.clj @@ -135,7 +135,5 @@ (defn write-cache ([cache raf] (let [nodes (vals cache) - _ (println "nodes:" nodes) - altered-nodes (filter :altered? nodes) - _ (println "altered:" altered-nodes)] + altered-nodes (filter :altered? nodes)] (doall (map #(write-node % raf) altered-nodes))))) diff --git a/test/b_plus_tree/insert_test.clj b/test/b_plus_tree/insert_test.clj index 404876a..abe8907 100644 --- a/test/b_plus_tree/insert_test.clj +++ b/test/b_plus_tree/insert_test.clj @@ -28,17 +28,24 @@ (b-plus-tree.io/new-tree "/tmp/RAF" order key-size val-size) (with-open [raf (new java.io.RandomAccessFile "/tmp/RAF" "rwd")] (let [header (b-plus-tree.io/read-header raf) - keyvals (apply sorted-map (map str (-> order (* 2) range))) + keyvals1 (apply sorted-map (map str (-> order (* 2) range))) + keyvals2 (reduce (fn [m [k v]] (assoc m k (str v 2))) + (sorted-map) + keyvals1) [header cache] - (b-plus-tree.core/insert-all keyvals raf header)] + (b-plus-tree.core/insert-all keyvals1 raf header)] + ; confirming that all entries can be found in the cache + (is (b-plus-tree.core/map-equals keyvals1 raf header + :cache cache)) + ; writing cache to disc (b-plus-tree.io/write-cache cache raf) - (loop [keyvals keyvals] - (if-let [entry (first keyvals)] - (let [[key val] entry - [cached-data cache] (b-plus-tree.core/find key raf header - :cache cache) - [uncached-data cache] (b-plus-tree.core/find key - raf header)] - (is (= val cached-data uncached-data)) - (recur (next keyvals))))))) + ; confirming that all entries can be found on disc + (is (b-plus-tree.core/map-equals keyvals1 raf header)) + ; overwriting all entries, and running the same checks + (let [[header cache] + (b-plus-tree.core/insert-all keyvals2 raf header)] + (is (b-plus-tree.core/map-equals keyvals2 raf header + :cache cache)) + (b-plus-tree.io/write-cache cache raf) + (is (b-plus-tree.core/map-equals keyvals2 raf header))))) (io/delete-file "/tmp/RAF" true)))