diff --git a/geoportal/doc/HierarchyAggregation.md b/geoportal/doc/HierarchyAggregation.md new file mode 100644 index 000000000..f41caa97b --- /dev/null +++ b/geoportal/doc/HierarchyAggregation.md @@ -0,0 +1,50 @@ +# Creating a Hierarchy Search Panel + +Terminology is often in a hierarchy, but this is not expressed by a simple keyword list. +Sometimes metadata creators may express such paths in their keywords, eg + `Science Domain > Earth Sciences > Geology` +The HierarchyAggregation allows for the generation of tree widgets in the search panel to allow +for searching of of such hierarchies. +ISO metadata evaluator has been modified to add a term to ```keywords_hier``` when keyword contains '>' + +HierarchyAggregation the use of fields that have been indexed by the path_hierarchy analyzer. +In order for the filter to display there must be one top level term defined by rootPath. + +* If the field is populated with uncontrolled set keywords, there may be mutiple + top level terms. In order to handle this, there are two options + * if rootPath is '', then all terms will be given a path (in the interface, not in the index) + * set rootPath to a top top level term, and only that branch will be shown. +* Use multiple HierarchyAggregation to display terms from different top level terms. + +# Preparing for Use +In order to use HierarchyAggregation, you will probably need to reindex, or reharvest records. + +http://{{host}}:{{port}}/geoportal/rest/metadata/reindex?fromIndexName=metadata&toIndexName=metadata_v2 + +For large collections, is suggested that you log into the terminal and use a command line call to ensure that +all records are index. + +Here are the steps to use screen and curl. Screen allows you to log out. You could use & to shift it to the background. + +``` +# create screen +screen -S reindex +curl -u username:password http://host:8080/geoportal_reindex/rest/metadata/reindex?fromIndexName=metadata_v5&toIndexName=metadata_v1 +# detach +ctrl-a ctrl-d +# jobs +screen -ls +# attach +screen -R somenumber.reindex +# when done +exit +``` +then relias +http://{{host}}:{{port}}/geoportal/rest/metadata/realias?IndexName=metadata_v2 + +Note in {tomcat}/logs/geoportal.log, there may be Reindex issues: + +```2019-02-15 00:02:01,197 ERROR [com.esri.geoportal.lib.elastic.http.request.ReindexRequest] - Reindex issue: metadata_v5->metadata_v1 id=42c8a4c212fb46e885d5bde421e0d22b``` + + +```grep 'Reindex issue' /opt/tomcat/logs/geoportal.log``` diff --git a/geoportal/src/main/resources/config/elastic-mappings.json b/geoportal/src/main/resources/config/elastic-mappings.json index 4b311f0df..0e7e5e440 100644 --- a/geoportal/src/main/resources/config/elastic-mappings.json +++ b/geoportal/src/main/resources/config/elastic-mappings.json @@ -1,6 +1,18 @@ { "settings": { "analysis": { + "tokenizer": { + "hierarchy_tokenizer": { + "type": "path_hierarchy", + "delimiter": ">", + "reverse": false + }, + "category_tokenizer": { + "type": "path_hierarchy", + "delimiter": "/", + "reverse": false + } + }, "analyzer": { "default": { "tokenizer": "standard", @@ -9,12 +21,31 @@ "case_insensitive_sort": { "tokenizer": "keyword", "filter": ["lowercase"] + }, + "category_analyzer": { + "tokenizer": "category_tokenizer", + "filter": [ + "trim", + "hierarchy_stop" + ] + }, + "hierarchy_analyzer": { + "tokenizer": "hierarchy_tokenizer", + "filter": [ + "trim", + "hierarchy_stop" + ] } }, "filter": { "english_stemmer": { "type": "stemmer", "name": "english" + }, + "hierarchy_stop": { + "type": "stop", + "ignore_case": true, + "stopwords": ["category > unassigned", "unassigned"] } } } @@ -191,6 +222,28 @@ "doc_values": false } } + }, + { + "_cat": { + "match": "*_cat", + "mapping": { + "type": "text", + "fielddata": true, + "analyzer": "category_analyzer", + "search_analyzer": "category_analyzer" + } + } + }, + { + "_hier": { + "match": "*_hier", + "mapping": { + "type": "text", + "fielddata": true, + "analyzer": "hierarchy_analyzer", + "search_analyzer": "hierarchy_analyzer" + } + } } ] } diff --git a/geoportal/src/main/resources/metadata/js/EvaluatorFor_ISO.js b/geoportal/src/main/resources/metadata/js/EvaluatorFor_ISO.js index 3d3247bab..1404b4eac 100644 --- a/geoportal/src/main/resources/metadata/js/EvaluatorFor_ISO.js +++ b/geoportal/src/main/resources/metadata/js/EvaluatorFor_ISO.js @@ -19,6 +19,7 @@ G.evaluators.iso = { evaluate: function(task) { this.evalBase(task); + this.evalKeywords(task); this.evalService(task); this.evalSpatial(task); this.evalTemporal(task); @@ -35,7 +36,7 @@ G.evaluators.iso = { G.evalProp(task,item,root,"fileid","gmd:fileIdentifier/gco:CharacterString"); G.evalProp(task,item,iden,"title","gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString"); G.evalProp(task,item,iden,"description","gmd:abstract/gco:CharacterString"); - G.evalProps(task,item,root,"keywords_s","//gmd:MD_TopicCategoryCode | //gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString | //gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor"); + // G.evalProps(task,item,root,"keywords_s","//gmd:MD_TopicCategoryCode | //gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString | //gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gmx:Anchor"); G.evalProp(task,item,iden,"thumbnail_s","gmd:graphicOverview/gmd:MD_BrowseGraphic/gmd:fileName/gco:CharacterString"); G.evalProps(task,item,root,"contact_organizations_s","//gmd:CI_ResponsibleParty/gmd:organisationName/gco:CharacterString"); G.evalProps(task,item,root,"contact_people_s","//gmd:CI_ResponsibleParty/gmd:individualName/gco:CharacterString"); @@ -77,7 +78,31 @@ G.evaluators.iso = { G.evalProps(task,item,root,"apiso_Classification_s","//gmd:resourceConstraints/gmd:MD_SecurityConstraints/gmd:classification/gmd:MD_ClassificationCode/@codeListValue"); G.writeProp(item,"apiso_HasSecurityConstraints_b",G.hasNode(task,root,"//gmd:resourceConstraints")); }, + evalKeywords: function(task){ + /* Add hierarchical keywords to keywords_hier, all to keywords_s + Looks for NOAA type keywords + "Earth Science > Oceans > Ocean Circulation > Fronts" + */ + var self = this; + var item = task.item, root = task.root; + G.forEachNode(task,root,"//gmd:MD_TopicCategoryCode | //gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/*/text() ", + function(node){ + print("evalKeywords "+ G.getNodeText(node) ); + var kw = G.getNodeText(node); + if (typeof kw === "undefined" || kw === null) return; + if (typeof kw === "string" && kw.length === 0) return; + if (kw.length >0) + { + print("evalKeywords is text"+ node ); + if (kw.indexOf(">") !== -1) { + G.writeMultiProp(task.item, "keywords_S", kw); + G.writeMultiProp(task.item, "keywords_hier", kw); + } + G.writeMultiProp(task.item, "keywords_s", kw); + } + }); + }, evalInspire: function(task) { var item = task.item, root = task.root; G.evalProps(task,item,root,"apiso_InspireSpatialDataThemes_s","//gmd:title[gco:CharacterString='GEMET - INSPIRE themes, version 1.0']/../../../gmd:keyword/gco:CharacterString"); diff --git a/geoportal/src/main/webapp/app/main/SearchPanel.js b/geoportal/src/main/webapp/app/main/SearchPanel.js index 15dd61649..16abe1d76 100644 --- a/geoportal/src/main/webapp/app/main/SearchPanel.js +++ b/geoportal/src/main/webapp/app/main/SearchPanel.js @@ -25,6 +25,7 @@ define(["dojo/_base/declare", "app/search/TemporalFilter", "app/search/TermsAggregation", "app/search/NumericFilter", + "app/search/HierarchyAggregation", "app/search/AppliedFilters", "app/search/ResultsPane", "app/search/OpenSearchLinksPane" diff --git a/geoportal/src/main/webapp/app/main/templates/SearchPanel.html b/geoportal/src/main/webapp/app/main/templates/SearchPanel.html index ac7f5b61d..54a55ca1c 100644 --- a/geoportal/src/main/webapp/app/main/templates/SearchPanel.html +++ b/geoportal/src/main/webapp/app/main/templates/SearchPanel.html @@ -55,11 +55,27 @@