From 8aaa3c08b1bbfa5c11482c068644e52bc79ad29e Mon Sep 17 00:00:00 2001 From: Ian Eure Date: Sat, 24 Aug 2013 13:02:00 -0700 Subject: [PATCH 1/2] Add support for explaining queries. --- src/clucy/core.clj | 43 +++++++++++++++++++++++++++++----------- test/clucy/test/core.clj | 15 +++++++++++++- 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/src/clucy/core.clj b/src/clucy/core.clj index 42d8ed9..3b444de 100644 --- a/src/clucy/core.clj +++ b/src/clucy/core.clj @@ -12,7 +12,8 @@ (org.apache.lucene.queryparser.classic QueryParser) (org.apache.lucene.search BooleanClause BooleanClause$Occur BooleanQuery IndexSearcher Query ScoreDoc - Scorer TermQuery) + Scorer TermQuery + Explanation) (org.apache.lucene.search.highlight Highlighter QueryScorer SimpleHTMLFormatter) (org.apache.lucene.util Version AttributeSource) @@ -170,6 +171,17 @@ (meta s)) document)) +(defn has-children? [exp] + (if-let [details (.getDetails exp)] + (> (alength details) 0))) + +(defn explain->map [^Explanation exp] + (conj {:description (.getDescription exp) + :value (.getValue exp) + :match? (.isMatch exp)} + (when (has-children? exp) + {:children (map explain->map (.getDetails exp))}))) + (defn add "Add hash-maps to the search index." [index & items] @@ -213,19 +225,23 @@ ([^Document document score] (document->map document score (constantly nil))) ([^Document document score highlighter] + (document->map document score (constantly nil) nil)) + ([^Document document score highlighter explanation] (let [m (into {} (for [^Field f (.getFields document)] [(keyword (.name f)) (.stringValue f)])) fragments (highlighter m) ; so that we can highlight :_content m (dissoc m :_content)] (with-meta m - (-> (into {} - (for [^Field f (.getFields document) - :let [field-type (.fieldType f)]] - [(keyword (.name f)) {:stored (.stored field-type) - :tokenized (.tokenized field-type)}])) - (assoc :_fragments fragments :_score score) - (dissoc :_content)))))) + (conj + (-> (into {} + (for [^Field f (.getFields document) + :let [field-type (.fieldType f)]] + [(keyword (.name f)) {:stored (.stored field-type) + :tokenized (.tokenized field-type)}])) + (assoc :_fragments fragments :_score score) + (dissoc :_content)) + (when explanation {:explain (explain->map explanation)})))))) (defn- make-highlighter "Create a highlighter function which will take a map and return highlighted @@ -257,7 +273,7 @@ fragments." (defn search "Search the supplied index with a query string." [index query max-results - & {:keys [highlight default-field default-operator page results-per-page] + & {:keys [highlight default-field default-operator page results-per-page explain] :or {page 0 results-per-page max-results}}] (if (every? false? [default-field *content*]) (throw (Exception. "No default search field specified")) @@ -275,13 +291,16 @@ fragments." start (* page results-per-page) end (min (+ start results-per-page) (.totalHits hits))] (doall - (with-meta (for [hit (map (partial aget (.scoreDocs hits)) - (range start end))] + (with-meta (for [[pos hit] (->> (map (partial aget (.scoreDocs hits)) + (range start end)) + (map-indexed vector))] (document->map (.doc ^IndexSearcher searcher (.doc ^ScoreDoc hit)) (.score ^ScoreDoc hit) - highlighter)) + highlighter + (when explain + (.explain searcher query pos)))) {:_total-hits (.totalHits hits) :_max-score (.getMaxScore hits)})))))) diff --git a/test/clucy/test/core.clj b/test/clucy/test/core.clj index 07bc0a4..8b9a849 100644 --- a/test/clucy/test/core.clj +++ b/test/clucy/test/core.clj @@ -69,4 +69,17 @@ (is (== 1 (count (search index "m*" 10 :page 1 :results-per-page 3)))) (is (empty? (intersection (set (search index "m*" 10 :page 0 :results-per-page 3)) - (set (search index "m*" 10 :page 1 :results-per-page 3)))))))) + (set (search index "m*" 10 :page 1 :results-per-page 3))))))) + + (testing "Explanations" + (let [i (memory-index)] + (apply add i people) + (let [exp (-> (search i "Miles" 10 :explain true) + (first) + (meta) + (:explain))] + (is (map? exp)) + (is (:match? exp)) + (is (string? (:description exp))) + (is (float? (:value exp))) + (is (seq? (:children exp))))))) From 63e801d48a23e54513107692ba305f7d6b8f7105 Mon Sep 17 00:00:00 2001 From: Ian Eure Date: Sat, 24 Aug 2013 13:02:10 -0700 Subject: [PATCH 2/2] Add field boosting. --- src/clucy/core.clj | 9 ++++++--- test/clucy/test/core.clj | 11 ++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/clucy/core.clj b/src/clucy/core.clj index 3b444de..d4e5efc 100644 --- a/src/clucy/core.clj +++ b/src/clucy/core.clj @@ -120,9 +120,12 @@ ([document key value meta-map] (.add ^Document document - (Field. (as-str key) - value - (make-field-type meta-map))))) + (let [field (Field. (as-str key) + value + (make-field-type meta-map))] + (if-let [boost (:boost meta-map)] + (.setBoost field boost)) + field)))) (defn- map-stored "Returns a hash-map containing all of the values in the map that diff --git a/test/clucy/test/core.clj b/test/clucy/test/core.clj index 8b9a849..2fbbe62 100644 --- a/test/clucy/test/core.clj +++ b/test/clucy/test/core.clj @@ -82,4 +82,13 @@ (is (:match? exp)) (is (string? (:description exp))) (is (float? (:value exp))) - (is (seq? (:children exp))))))) + (is (seq? (:children exp)))))) + + (testing "Boosting" + (let [i (memory-index)] + (add i + (with-meta {:planet "Earth Mk. II" :designer "Slartibartfast"} + {:name {:boost 0.0}}) + (with-meta {:planet "Earth" :designer "Slartibartfast"} + {:name {:boost 1.0}})) + (is (= "Earth" (:planet (first (search i "Slartibartfast" 2))))))))