Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Boost and explain #21

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 46 additions & 25 deletions src/clucy/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
(org.apache.lucene.queryparser.classic QueryParser)
(org.apache.lucene.search BooleanClause BooleanClause$Occur
BooleanQuery IndexSearcher Query ScoreDoc
Scorer TermQuery)
Scorer TermQuery
Explanation ComplexExplanation)
(org.apache.lucene.search.highlight Highlighter QueryScorer
SimpleHTMLFormatter)
(org.apache.lucene.util Version AttributeSource)
Expand Down Expand Up @@ -60,18 +61,20 @@
(add-field document key value {}))

([document key value meta-map]
(.add ^Document document
(Field. (as-str key) (as-str value)
(if (false? (:stored meta-map))
Field$Store/NO
Field$Store/YES)
(if (false? (:indexed meta-map))
Field$Index/NO
(case [(false? (:analyzed meta-map)) (false? (:norms meta-map))]
[false false] Field$Index/ANALYZED
[true false] Field$Index/NOT_ANALYZED
[false true] Field$Index/ANALYZED_NO_NORMS
[true true] Field$Index/NOT_ANALYZED_NO_NORMS))))))
(let [field (Field. (as-str key) (as-str value)
(if (false? (:stored meta-map))
Field$Store/NO
Field$Store/YES)
(if (false? (:indexed meta-map))
Field$Index/NO
(case [(false? (:analyzed meta-map)) (false? (:norms meta-map))]
[false false] Field$Index/ANALYZED
[true false] Field$Index/NOT_ANALYZED
[false true] Field$Index/ANALYZED_NO_NORMS
[true true] Field$Index/NOT_ANALYZED_NO_NORMS)))]
(if-let [boost (:boost meta-map)]
(.setBoost field boost))
(.add ^Document document field))))

(defn- map-stored
"Returns a hash-map containing all of the values in the map that
Expand Down Expand Up @@ -100,6 +103,17 @@
(add-field document :_content (concat-values map)))
document))

(defn has-children? [exp]
(if-let [details (.getDetails exp)]
(> (alength details) 0)))

(defn explain->map [^Explanation exp]
(conj {:description (.getDescription exp)
:value (.getValue exp)
:match? (.isMatch exp)}
(when (has-children? exp)
{:children (map explain->map (.getDetails exp))})))

(defn add
"Add hash-maps to the search index."
[index & maps]
Expand Down Expand Up @@ -127,20 +141,24 @@
([^Document document score]
(document->map document score (constantly nil)))
([^Document document score highlighter]
(document->map document score (constantly nil) nil))
([^Document document score highlighter explanation]
(let [m (into {} (for [^Field f (.getFields document)]
[(keyword (.name f)) (.stringValue f)]))
fragments (highlighter m) ; so that we can highlight :_content
m (dissoc m :_content)]
(with-meta
m
(-> (into {}
(for [^Field f (.getFields document)
:let [field-type (.fieldType f)]]
[(keyword (.name f)) {:indexed (.indexed field-type)
:stored (.stored field-type)
:tokenized (.tokenized field-type)}]))
(assoc :_fragments fragments :_score score)
(dissoc :_content))))))
(conj
(-> (into {}
(for [^Field f (.getFields document)
:let [field-type (.fieldType f)]]
[(keyword (.name f)) {:indexed (.indexed field-type)
:stored (.stored field-type)
:tokenized (.tokenized field-type)}]))
(assoc :_fragments fragments :_score score)
(dissoc :_content))
(when explanation {:explain (explain->map explanation)}))))))

(defn- make-highlighter
"Create a highlighter function which will take a map and return highlighted
Expand Down Expand Up @@ -172,7 +190,7 @@ fragments."
(defn search
"Search the supplied index with a query string."
[index query max-results
& {:keys [highlight default-field default-operator page results-per-page]
& {:keys [highlight default-field default-operator page results-per-page explain]
:or {page 0 results-per-page max-results}}]
(if (every? false? [default-field *content*])
(throw (Exception. "No default search field specified"))
Expand All @@ -191,13 +209,16 @@ fragments."
start (* page results-per-page)
end (min (+ start results-per-page) (.totalHits hits))]
(doall
(with-meta (for [hit (map (partial aget (.scoreDocs hits))
(range start end))]
(with-meta (for [[pos hit] (->> (map (partial aget (.scoreDocs hits))
(range start end))
(map-indexed vector))]
(document->map (.doc ^IndexSearcher searcher
(.doc ^ScoreDoc hit))
(.score ^ScoreDoc hit)

highlighter))
highlighter
(when explain
(.explain searcher query pos))))
{:_total-hits (.totalHits hits)
:_max-score (.getMaxScore hits)}))))))

Expand Down
24 changes: 23 additions & 1 deletion test/clucy/test/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,26 @@
(is (== 1 (count (search index "m*" 10 :page 1 :results-per-page 3))))
(is (empty? (intersection
(set (search index "m*" 10 :page 0 :results-per-page 3))
(set (search index "m*" 10 :page 1 :results-per-page 3))))))))
(set (search index "m*" 10 :page 1 :results-per-page 3)))))))

(testing "Explanations"
(let [i (memory-index)]
(apply add i people)
(let [exp (-> (search i "Miles" 10 :explain true)
(first)
(meta)
(:explain))]
(is (map? exp))
(is (:match? exp))
(is (string? (:description exp)))
(is (float? (:value exp)))
(is (seq? (:children exp))))))

(testing "Boosting"
(let [i (memory-index)]
(add i
(with-meta {:planet "Earth Mk. II" :designer "Slartibartfast"}
{:name {:boost 0.0}})
(with-meta {:planet "Earth" :designer "Slartibartfast"}
{:name {:boost 1.0}}))
(is (= "Earth" (:planet (first (search i "Slartibartfast" 2))))))))