diff --git a/geoportal/doc/HierarchyAggregation.md b/geoportal/doc/HierarchyAggregation.md new file mode 100644 index 000000000..f41caa97b --- /dev/null +++ b/geoportal/doc/HierarchyAggregation.md @@ -0,0 +1,50 @@ +# Creating a Hierarchy Search Panel + +Terminology is often in a hierarchy, but this is not expressed by a simple keyword list. +Sometimes metadata creators may express such paths in their keywords, eg + `Science Domain > Earth Sciences > Geology` +The HierarchyAggregation allows for the generation of tree widgets in the search panel to allow +for searching of of such hierarchies. +ISO metadata evaluator has been modified to add a term to ```keywords_hier``` when keyword contains '>' + +HierarchyAggregation the use of fields that have been indexed by the path_hierarchy analyzer. +In order for the filter to display there must be one top level term defined by rootPath. + +* If the field is populated with uncontrolled set keywords, there may be mutiple + top level terms. In order to handle this, there are two options + * if rootPath is '', then all terms will be given a path (in the interface, not in the index) + * set rootPath to a top top level term, and only that branch will be shown. +* Use multiple HierarchyAggregation to display terms from different top level terms. + +# Preparing for Use +In order to use HierarchyAggregation, you will probably need to reindex, or reharvest records. + +http://{{host}}:{{port}}/geoportal/rest/metadata/reindex?fromIndexName=metadata&toIndexName=metadata_v2 + +For large collections, is suggested that you log into the terminal and use a command line call to ensure that +all records are index. + +Here are the steps to use screen and curl. Screen allows you to log out. You could use & to shift it to the background. + +``` +# create screen +screen -S reindex +curl -u username:password http://host:8080/geoportal_reindex/rest/metadata/reindex?fromIndexName=metadata_v5&toIndexName=metadata_v1 +# detach +ctrl-a ctrl-d +# jobs +screen -ls +# attach +screen -R somenumber.reindex +# when done +exit +``` +then relias +http://{{host}}:{{port}}/geoportal/rest/metadata/realias?IndexName=metadata_v2 + +Note in {tomcat}/logs/geoportal.log, there may be Reindex issues: + +```2019-02-15 00:02:01,197 ERROR [com.esri.geoportal.lib.elastic.http.request.ReindexRequest] - Reindex issue: metadata_v5->metadata_v1 id=42c8a4c212fb46e885d5bde421e0d22b``` + + +```grep 'Reindex issue' /opt/tomcat/logs/geoportal.log``` diff --git a/geoportal/src/main/resources/config/elastic-mappings.json b/geoportal/src/main/resources/config/elastic-mappings.json index 4b311f0df..0e7e5e440 100644 --- a/geoportal/src/main/resources/config/elastic-mappings.json +++ b/geoportal/src/main/resources/config/elastic-mappings.json @@ -1,6 +1,18 @@ { "settings": { "analysis": { + "tokenizer": { + "hierarchy_tokenizer": { + "type": "path_hierarchy", + "delimiter": ">", + "reverse": false + }, + "category_tokenizer": { + "type": "path_hierarchy", + "delimiter": "/", + "reverse": false + } + }, "analyzer": { "default": { "tokenizer": "standard", @@ -9,12 +21,31 @@ "case_insensitive_sort": { "tokenizer": "keyword", "filter": ["lowercase"] + }, + "category_analyzer": { + "tokenizer": "category_tokenizer", + "filter": [ + "trim", + "hierarchy_stop" + ] + }, + "hierarchy_analyzer": { + "tokenizer": "hierarchy_tokenizer", + "filter": [ + "trim", + "hierarchy_stop" + ] } }, "filter": { "english_stemmer": { "type": "stemmer", "name": "english" + }, + "hierarchy_stop": { + "type": "stop", + "ignore_case": true, + "stopwords": ["category > unassigned", "unassigned"] } } } @@ -191,6 +222,28 @@ "doc_values": false } } + }, + { + "_cat": { + "match": "*_cat", + "mapping": { + "type": "text", + "fielddata": true, + "analyzer": "category_analyzer", + "search_analyzer": "category_analyzer" + } + } + }, + { + "_hier": { + "match": "*_hier", + "mapping": { + "type": "text", + "fielddata": true, + "analyzer": "hierarchy_analyzer", + "search_analyzer": "hierarchy_analyzer" + } + } } ] } diff --git a/geoportal/src/main/resources/metadata/js/EvaluatorFor_ISO.js b/geoportal/src/main/resources/metadata/js/EvaluatorFor_ISO.js index 3d3247bab..1404b4eac 100644 --- a/geoportal/src/main/resources/metadata/js/EvaluatorFor_ISO.js +++ b/geoportal/src/main/resources/metadata/js/EvaluatorFor_ISO.js @@ -19,6 +19,7 @@ G.evaluators.iso = { evaluate: function(task) { this.evalBase(task); + this.evalKeywords(task); this.evalService(task); this.evalSpatial(task); this.evalTemporal(task); @@ -35,7 +36,7 @@ G.evaluators.iso = { G.evalProp(task,item,root,"fileid","gmd:fileIdentifier/gco:CharacterString"); G.evalProp(task,item,iden,"title","gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString"); G.evalProp(task,item,iden,"description","gmd:abstract/gco:CharacterString"); - G.evalProps(task,item,root,"keywords_s","//gmd:MD_TopicCategoryCode | //gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString | //gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor"); + // G.evalProps(task,item,root,"keywords_s","//gmd:MD_TopicCategoryCode | //gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString | //gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor"); G.evalProp(task,item,iden,"thumbnail_s","gmd:graphicOverview/gmd:MD_BrowseGraphic/gmd:fileName/gco:CharacterString"); G.evalProps(task,item,root,"contact_organizations_s","//gmd:CI_ResponsibleParty/gmd:organisationName/gco:CharacterString"); G.evalProps(task,item,root,"contact_people_s","//gmd:CI_ResponsibleParty/gmd:individualName/gco:CharacterString"); @@ -77,7 +78,31 @@ G.evaluators.iso = { G.evalProps(task,item,root,"apiso_Classification_s","//gmd:resourceConstraints/gmd:MD_SecurityConstraints/gmd:classification/gmd:MD_ClassificationCode/@codeListValue"); G.writeProp(item,"apiso_HasSecurityConstraints_b",G.hasNode(task,root,"//gmd:resourceConstraints")); }, + evalKeywords: function(task){ + /* Add hierarchical keywords to keywords_hier, all to keywords_s + Looks for NOAA type keywords + "Earth Science > Oceans > Ocean Circulation > Fronts" + */ + var self = this; + var item = task.item, root = task.root; + G.forEachNode(task,root,"//gmd:MD_TopicCategoryCode | //gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/*/text() ", + function(node){ + print("evalKeywords "+ G.getNodeText(node) ); + var kw = G.getNodeText(node); + if (typeof kw === "undefined" || kw === null) return; + if (typeof kw === "string" && kw.length === 0) return; + if (kw.length >0) + { + print("evalKeywords is text"+ node ); + if (kw.indexOf(">") !== -1) { + G.writeMultiProp(task.item, "keywords_S", kw); + G.writeMultiProp(task.item, "keywords_hier", kw); + } + G.writeMultiProp(task.item, "keywords_s", kw); + } + }); + }, evalInspire: function(task) { var item = task.item, root = task.root; G.evalProps(task,item,root,"apiso_InspireSpatialDataThemes_s","//gmd:title[gco:CharacterString='GEMET - INSPIRE themes, version 1.0']/../../../gmd:keyword/gco:CharacterString"); diff --git a/geoportal/src/main/webapp/app/main/SearchPanel.js b/geoportal/src/main/webapp/app/main/SearchPanel.js index 15dd61649..16abe1d76 100644 --- a/geoportal/src/main/webapp/app/main/SearchPanel.js +++ b/geoportal/src/main/webapp/app/main/SearchPanel.js @@ -25,6 +25,7 @@ define(["dojo/_base/declare", "app/search/TemporalFilter", "app/search/TermsAggregation", "app/search/NumericFilter", + "app/search/HierarchyAggregation", "app/search/AppliedFilters", "app/search/ResultsPane", "app/search/OpenSearchLinksPane" diff --git a/geoportal/src/main/webapp/app/main/templates/SearchPanel.html b/geoportal/src/main/webapp/app/main/templates/SearchPanel.html index ac7f5b61d..54a55ca1c 100644 --- a/geoportal/src/main/webapp/app/main/templates/SearchPanel.html +++ b/geoportal/src/main/webapp/app/main/templates/SearchPanel.html @@ -55,11 +55,27 @@
- -
-
-
+
+
+ + +
+
+ +
Method (Other)" + , "Property > Property (Other)" + , "Process > Process (Other)" + , "Material > Material (Other)" + ,"Category > Method (Other)" + , "Category > Property (Other)" + , "Category > Process (Other)" + , "Category > Material (Other)" + ,"Realm > Realm (Other)" + , "Category > Realm (Other)" + ,"Science Domain > Science Domain (Other)" + ,"Category > Science Domain (Other)" + ], + stopFullMatch: [ + "Category > Organization > Tectonics and Structural Geology, Department of Earth and Ocean Sciences, University of South Carolina > Lamont Doherty Earth Observatory" + , "Category > Property > Measure > Topographic" + ], + _initialSettings: null, + + postCreate: function() { + this.inherited(arguments); + + this._initialSettings = { + label: this.label, + field: this.field + }; + if (this.props) { + this._initialSettings.props = lang.clone(this.props); + } + if (this.rootTerm.length === 0 ) { + this.addRootTerm = true; + this.rootTerm = this.defaultRootTerm; + } + //if (this.allowSettings === null) { + // if (AppContext.appConfig.search && !!AppContext.appConfig.search.allowSettings) { + // this.allowSettings = true; + // } + //} + //if (this.allowSettings) { + // var link = this.dropPane.addSettingsLink(); + // link.onclick = lang.hitch(this,function(e) { + // var d = new TermsAggregationSettings({ + // targetWidget: this + // }); + // d.showDialog(); + // }); + //} + }, + + postMixInProperties: function() { + this.inherited(arguments); + if (typeof this.label === "undefined" || this.label === null || this.label.length === 0) { + this.label = this.field; + } + }, + + addEntry: function(term,count,missingVal) { + var v = term+" ("+count+")"; + var tipPattern = i18n.search.appliedFilters.tipPattern; + var tip = tipPattern.replace("{type}",this.label).replace("{value}",term); + var query = {"term": {}}; + query.term[this.field] = term; + if (typeof missingVal === "string" && missingVal.length > 0 && missingVal === term) { + query = {"missing": {"field": this.field}}; + } + var qClause = new QClause({ + label: term, + tip: tip, + parentQComponent: this, + removable: true, + scorable: true, + query: query + }); + var nd = domConstruct.create("div",{},this.categoryNode); + var link = domConstruct.create("a",{ + href: "#", + onclick: lang.hitch(this,function() { + this.pushQClause(qClause,true); + }) + },nd); + this.setNodeText(link,v); + }, + + hasField: function() { + return (typeof this.field !== "undefined" && this.field !== null && this.field.length > 0); + }, + + /* SearchComponent API ============================================= */ + + appendQueryParams: function(params) { + if (!this.hasField()) return; + this.appendQClauses(params); + + if (!params.aggregations) params.aggregations = {}; + var key = this.getAggregationKey(); + if (this.addRootTerm && (this.selectedTerm === null || this.defaultRootTerm === this.selectedTerm) ) { + var clause = '' ; + } else { + clause = this.rootTerm ; + } + if (this.selectedTerm != null ) clause = this.selectedTerm; + var props = {"field":this.field,"include" : clause+".*", }; + if (typeof this.props !== "undefined" && this.props !== null) { + delete this.props.field; // TODO ?? + lang.mixin(props,this.props); + } + params.aggregations[key] = {"terms":props}; + }, + + processResults: function(searchResponse) { + domConstruct.empty(this.categoryNode); + var key = this.getAggregationKey(); + this.treeData = []; + + if (searchResponse.aggregations) { + var data = searchResponse.aggregations[key]; + if (data && data.buckets) { + + var v, missingVal = null; + if (this.props && typeof this.props.missing === "string") { + v = lang.trim(this.props.missing); + if (v.length > 0) missingVal = v; + } + if (this.addRootTerm ){ + var item = { + id: this.defaultRootTerm, + parent: parent, + name: v, + term: this.defaultRootTerm, + key: this.defaultRootTerm, + type: 'cat', + count: 0, + count_children: 0 + }; + this.treeData.push(item); + } + + array.forEach(data.buckets, function (entry) { + // this.addEntry(entry.key,entry.doc_count,missingVal); + if (this.addRootTerm){ + var hid = this.defaultRootTerm + ' > ' + entry.key; + } else { + hid = entry.key; + } + // ideally we would use Array.find + var stop = array.filter(this.stopTreeMatch, function(item){ + return entry.key.trim() == item; + }); + if (stop.length>0) { + return; + } + stop = array.filter(this.stopTreeMatch, function(item){ + return entry.key.trim().startsWith(item); + }); + if (stop.length>0){ + return; + } + var split = hid.lastIndexOf(">"); + var parent = null; + var term = entry.key.trim(); + + if (split > 0) { + parent = hid.substring(0, hid.lastIndexOf(">")).trim(); + term = hid.substring(split + 1).trim(); + } + if (!this.stopTerms[term]) { + var v = term + " (" + entry.doc_count + ")"; + var item = { + id: hid, + parent: parent, + name: v, + term: term, + key: entry.key, + type: 'cat', + count: entry.doc_count, + count_children: entry.doc_count + }; + //catStore.put(item); + this.treeData.push(item); + } + }, this); + var mySortOptions = { sort: [{attribute:"key",}]}; + this.treeData = SimpleQueryEngine({ },mySortOptions)(this.treeData); + var catStore = new Memory({ + data: this.treeData, + getChildren: function (object) { + return this.query({parent: object.id}); + }, + + }); + array.forEach( + catStore.query( + // null + // ,{ + // sort: function (a, b) { + // console.info ( a.id > b.id ); + // console.info (a.id); + // console.info (b.id); + // return a.id > b.id ? -1 : 1; + // } + // } + ), + function (entry) { + if (entry.parent) { + var parent = array.filter( + this.treeData, + function(item){ + return item.id == entry.parent; + } + ); + + if (parent.length >0) { + parent[0].count_children = parent[0].count_children + entry.count; + var count = parent[0].count_children > parent[0].count ? parent[0].count_children : parent[0].count; + var v = parent[0].term + " (" + count + ")"; + + // var v = parent[0].term + " (" + parent[0].count + "/" + parent[0].count_children + ")"; + parent[0].name = v; + } + } + + + }, this); + } + } + + try { + // catStore.fetch(mySortOptions); + + + var catModel = new ObjectStoreModel({ + store: catStore, + query: {id: this.rootTerm,} + }); + + + + if (catStore.data.length > 0) { + var tree = new Tree({ + model: catModel, + open: this.open, + showRoot: this.showRoot, + onClick: lang.hitch(this, function (item) { + var query = {"term": {}}; + query.term[this.field] = item.key; + var tip = item.key; + var qClause = new QClause({ + label: item.key, + tip: tip, + parentQComponent: this, + removable: true, + scorable: true, + query: query + }); + this.pushQClause(qClause, true); + }) + }); + tree.placeAt(this.categoryNode); + } + } catch (e) { + + console.log("tree warining. No items for base term"); + this.setNodeText(this.categoryNode, "(No Items)"); + + } + + } + + + }); + + return oThisClass; +}); \ No newline at end of file diff --git a/geoportal/src/main/webapp/app/search/templates/HierarchyAggregation.html b/geoportal/src/main/webapp/app/search/templates/HierarchyAggregation.html new file mode 100644 index 000000000..e82c198d3 --- /dev/null +++ b/geoportal/src/main/webapp/app/search/templates/HierarchyAggregation.html @@ -0,0 +1,12 @@ +
+
+ + + +
+
+
\ No newline at end of file