From c3f677c84d31326e230b02f7b09ade5ac0a32b37 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Mon, 5 Feb 2024 22:06:14 +0100 Subject: [PATCH 1/4] update docker compose to use standard SOLR not the ontoportal configured --- Gemfile | 4 ++-- Gemfile.lock | 10 +++++----- docker-compose.yml | 33 +++++---------------------------- rakelib/docker_based_test.rake | 13 +++++++++++++ 4 files changed, 25 insertions(+), 35 deletions(-) diff --git a/Gemfile b/Gemfile index 82554462..54388b7e 100644 --- a/Gemfile +++ b/Gemfile @@ -44,12 +44,12 @@ gem 'haml', '~> 5.2.2' # pin see https://github.com/ncbo/ontologies_api/pull/107 gem 'redcarpet' # NCBO gems (can be from a local dev path or from rubygems/git) -gem 'goo', github: 'ontoportal-lirmm/goo', branch: 'development' +gem 'goo', github: 'ontoportal-lirmm/goo', branch: 'feature/add-model-based-search' gem 'ncbo_annotator', git: 'https://github.com/ontoportal-lirmm/ncbo_annotator.git', branch: 'master' gem 'ncbo_cron', git: 'https://github.com/ontoportal-lirmm/ncbo_cron.git', branch: 'master' gem 'ncbo_ontology_recommender', git: 'https://github.com/ncbo/ncbo_ontology_recommender.git', branch: 'master' gem 'sparql-client', github: 'ontoportal-lirmm/sparql-client', branch: 'master' -gem 'ontologies_linked_data', git: 'https://github.com/ontoportal-lirmm/ontologies_linked_data.git', branch: 'development' +gem 'ontologies_linked_data', git: 'https://github.com/ontoportal-lirmm/ontologies_linked_data.git', branch: 'feature/enhance-search' group :development do # bcrypt_pbkdf and ed35519 is required for capistrano deployments when using ed25519 keys; see https://github.com/miloserdow/capistrano-deploy/issues/42 diff --git a/Gemfile.lock b/Gemfile.lock index 5849b1c8..0b6b3962 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,8 +11,8 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/goo.git - revision: f8aeea33e270960afb530064d616eaf8343b5473 - branch: development + revision: 8308441f435f7dd09e22afb22c817f229a585c5a + branch: feature/add-model-based-search specs: goo (0.0.2) addressable (~> 2.8) @@ -54,8 +54,8 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/ontologies_linked_data.git - revision: 0de5f3b2db02a5d2c008ace73a34d586c24e67a7 - branch: development + revision: ae998b8694fb6bd3fd3de598b638c5b52884cbd8 + branch: feature/enhance-search specs: ontologies_linked_data (0.0.1) activesupport @@ -252,7 +252,7 @@ GEM multipart-post (2.3.0) mutex_m (0.2.0) net-http-persistent (2.9.4) - net-imap (0.4.9.1) + net-imap (0.4.10) date net-protocol net-pop (0.1.2) diff --git a/docker-compose.yml b/docker-compose.yml index 4d03843d..adc0891c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -61,34 +61,11 @@ services: profiles: - fs - solr-term-ut: - image: solr:8 - volumes: - - ./test/solr/configsets:/configsets:ro - ports: - - "8983:8983" - command: [ "solr-precreate", "term_search_core1", "/configsets/term_search" ] - healthcheck: - test: [ "CMD-SHELL", "curl -sf http://localhost:8983/solr/term_search_core1/admin/ping?wt=json | grep -iq '\"status\":\"OK\"}' || exit 1" ] - start_period: 5s - interval: 10s - timeout: 5s - retries: 5 - - solr-prop-ut: - image: solr:8 - volumes: - - ./test/solr/configsets:/configsets:ro - ports: - - "8984:8983" - command: [ "solr-precreate", "prop_search_core1", "/configsets/property_search" ] - healthcheck: - test: [ "CMD-SHELL", "curl -sf http://localhost:8983/solr/prop_search_core1/admin/ping?wt=json | grep -iq '\"status\":\"OK\"}' || exit 1" ] - start_period: 5s - interval: 10s - timeout: 5s - retries: 5 - + solr-ut: + image: solr:8 + ports: + - 8983:8983 + command: bin/solr start -cloud -f agraph-ut: image: franzinc/agraph:v8.0.0.rc1 platform: linux/amd64 diff --git a/rakelib/docker_based_test.rake b/rakelib/docker_based_test.rake index d9b334f4..52af504c 100644 --- a/rakelib/docker_based_test.rake +++ b/rakelib/docker_based_test.rake @@ -5,6 +5,19 @@ namespace :test do namespace :docker do task :up do system("docker compose up -d") || abort("Unable to start docker containers") + unless system("curl -sf http://localhost:8983/solr || exit 1") + printf("waiting for Solr container to initialize") + sec = 0 + until system("curl -sf http://localhost:8983/solr || exit 1") do + sleep(1) + printf(".") + sec += 1 + if sec > 30 + abort(" Solr container hasn't initialized properly") + end + end + printf("\n") + end end task :down do #system("docker compose --profile fs --profile ag stop") From d9bbd407ccda2b7a9766a77bad2e1ab73da159a0 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Mon, 5 Feb 2024 22:07:27 +0100 Subject: [PATCH 2/4] update term search to use the new Schema API and remove config files --- config/solr/solr.xml | 60 - config/solr/term_search/enumsconfig.xml | 12 - .../term_search/mapping-ISOLatin1Accent.txt | 246 ---- config/solr/term_search/schema.xml | 1222 ---------------- config/solr/term_search/solrconfig.xml | 1299 ----------------- helpers/properties_search_helper.rb | 6 +- helpers/search_helper.rb | 14 +- test/controllers/test_search_controller.rb | 10 +- 8 files changed, 15 insertions(+), 2854 deletions(-) delete mode 100644 config/solr/solr.xml delete mode 100644 config/solr/term_search/enumsconfig.xml delete mode 100644 config/solr/term_search/mapping-ISOLatin1Accent.txt delete mode 100644 config/solr/term_search/schema.xml delete mode 100644 config/solr/term_search/solrconfig.xml diff --git a/config/solr/solr.xml b/config/solr/solr.xml deleted file mode 100644 index d9d089e4..00000000 --- a/config/solr/solr.xml +++ /dev/null @@ -1,60 +0,0 @@ - - - - - - - - ${solr.max.booleanClauses:500000} - ${solr.sharedLib:} - ${solr.allowPaths:} - - - - ${host:} - ${solr.port.advertise:0} - ${hostContext:solr} - - ${genericCoreNodeNames:true} - - ${zkClientTimeout:30000} - ${distribUpdateSoTimeout:600000} - ${distribUpdateConnTimeout:60000} - ${zkCredentialsProvider:org.apache.solr.common.cloud.DefaultZkCredentialsProvider} - ${zkACLProvider:org.apache.solr.common.cloud.DefaultZkACLProvider} - - - - - ${socketTimeout:600000} - ${connTimeout:60000} - ${solr.shardsWhitelist:} - - - - - diff --git a/config/solr/term_search/enumsconfig.xml b/config/solr/term_search/enumsconfig.xml deleted file mode 100644 index 72e7b7d3..00000000 --- a/config/solr/term_search/enumsconfig.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - ONTOLOGY - VALUE_SET_COLLECTION - - - ANNOTATION - DATATYPE - OBJECT - - \ No newline at end of file diff --git a/config/solr/term_search/mapping-ISOLatin1Accent.txt b/config/solr/term_search/mapping-ISOLatin1Accent.txt deleted file mode 100644 index ede77425..00000000 --- a/config/solr/term_search/mapping-ISOLatin1Accent.txt +++ /dev/null @@ -1,246 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Syntax: -# "source" => "target" -# "source".length() > 0 (source cannot be empty.) -# "target".length() >= 0 (target can be empty.) - -# example: -# "À" => "A" -# "\u00C0" => "A" -# "\u00C0" => "\u0041" -# "ß" => "ss" -# "\t" => " " -# "\n" => "" - -# À => A -"\u00C0" => "A" - -# Á => A -"\u00C1" => "A" - -#  => A -"\u00C2" => "A" - -# à => A -"\u00C3" => "A" - -# Ä => A -"\u00C4" => "A" - -# Å => A -"\u00C5" => "A" - -# Æ => AE -"\u00C6" => "AE" - -# Ç => C -"\u00C7" => "C" - -# È => E -"\u00C8" => "E" - -# É => E -"\u00C9" => "E" - -# Ê => E -"\u00CA" => "E" - -# Ë => E -"\u00CB" => "E" - -# Ì => I -"\u00CC" => "I" - -# Í => I -"\u00CD" => "I" - -# Î => I -"\u00CE" => "I" - -# Ï => I -"\u00CF" => "I" - -# IJ => IJ -"\u0132" => "IJ" - -# Ð => D -"\u00D0" => "D" - -# Ñ => N -"\u00D1" => "N" - -# Ò => O -"\u00D2" => "O" - -# Ó => O -"\u00D3" => "O" - -# Ô => O -"\u00D4" => "O" - -# Õ => O -"\u00D5" => "O" - -# Ö => O -"\u00D6" => "O" - -# Ø => O -"\u00D8" => "O" - -# Œ => OE -"\u0152" => "OE" - -# Þ -"\u00DE" => "TH" - -# Ù => U -"\u00D9" => "U" - -# Ú => U -"\u00DA" => "U" - -# Û => U -"\u00DB" => "U" - -# Ü => U -"\u00DC" => "U" - -# Ý => Y -"\u00DD" => "Y" - -# Ÿ => Y -"\u0178" => "Y" - -# à => a -"\u00E0" => "a" - -# á => a -"\u00E1" => "a" - -# â => a -"\u00E2" => "a" - -# ã => a -"\u00E3" => "a" - -# ä => a -"\u00E4" => "a" - -# å => a -"\u00E5" => "a" - -# æ => ae -"\u00E6" => "ae" - -# ç => c -"\u00E7" => "c" - -# è => e -"\u00E8" => "e" - -# é => e -"\u00E9" => "e" - -# ê => e -"\u00EA" => "e" - -# ë => e -"\u00EB" => "e" - -# ì => i -"\u00EC" => "i" - -# í => i -"\u00ED" => "i" - -# î => i -"\u00EE" => "i" - -# ï => i -"\u00EF" => "i" - -# ij => ij -"\u0133" => "ij" - -# ð => d -"\u00F0" => "d" - -# ñ => n -"\u00F1" => "n" - -# ò => o -"\u00F2" => "o" - -# ó => o -"\u00F3" => "o" - -# ô => o -"\u00F4" => "o" - -# õ => o -"\u00F5" => "o" - -# ö => o -"\u00F6" => "o" - -# ø => o -"\u00F8" => "o" - -# œ => oe -"\u0153" => "oe" - -# ß => ss -"\u00DF" => "ss" - -# þ => th -"\u00FE" => "th" - -# ù => u -"\u00F9" => "u" - -# ú => u -"\u00FA" => "u" - -# û => u -"\u00FB" => "u" - -# ü => u -"\u00FC" => "u" - -# ý => y -"\u00FD" => "y" - -# ÿ => y -"\u00FF" => "y" - -# ff => ff -"\uFB00" => "ff" - -# fi => fi -"\uFB01" => "fi" - -# fl => fl -"\uFB02" => "fl" - -# ffi => ffi -"\uFB03" => "ffi" - -# ffl => ffl -"\uFB04" => "ffl" - -# ſt => ft -"\uFB05" => "ft" - -# st => st -"\uFB06" => "st" diff --git a/config/solr/term_search/schema.xml b/config/solr/term_search/schema.xml deleted file mode 100644 index fa95e127..00000000 --- a/config/solr/term_search/schema.xml +++ /dev/null @@ -1,1222 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - id - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/config/solr/term_search/solrconfig.xml b/config/solr/term_search/solrconfig.xml deleted file mode 100644 index 771a0f32..00000000 --- a/config/solr/term_search/solrconfig.xml +++ /dev/null @@ -1,1299 +0,0 @@ - - - - - - - - - 8.8.2 - - - - - - - - - - - ${solr.data.dir:} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ${solr.lock.type:native} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ${solr.ulog.dir:} - ${solr.ulog.numVersionBuckets:65536} - - - - - ${solr.autoCommit.maxTime:15000} - false - - - - - - ${solr.autoSoftCommit.maxTime:-1} - - - - - - - - - - - - - - ${solr.max.booleanClauses:500000} - - - - - - - - - - - - - - - - - - - - - - - - true - - - - - - 20 - - - 200 - - - - - - - - - - - - - - - - - - - false - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - explicit - 10 - - - - - - - - - - - - - - - - explicit - json - true - - - - - - _text_ - - - - - - - - - text_general - - - - - - default - _text_ - solr.DirectSolrSpellChecker - - internal - - 0.5 - - 2 - - 1 - - 5 - - 4 - - 0.01 - - - - - - - - - - - - default - on - true - 10 - 5 - 5 - true - true - 10 - 5 - - - spellcheck - - - - - - - - - - true - false - - - terms - - - - - - - - - - - 100 - - - - - - - - 70 - - 0.5 - - [-\w ,/\n\"']{20,200} - - - - - - - ]]> - ]]> - - - - - - - - - - - - - - - - - - - - - - - - ,, - ,, - ,, - ,, - ,]]> - ]]> - - - - - - 10 - .,!? - - - - - - - WORD - - - en - US - - - - - - - - - - - - [^\w-\.] - _ - - - - - - - yyyy-MM-dd['T'[HH:mm[:ss[.SSS]][z - yyyy-MM-dd['T'[HH:mm[:ss[,SSS]][z - yyyy-MM-dd HH:mm[:ss[.SSS]][z - yyyy-MM-dd HH:mm[:ss[,SSS]][z - [EEE, ]dd MMM yyyy HH:mm[:ss] z - EEEE, dd-MMM-yy HH:mm:ss z - EEE MMM ppd HH:mm:ss [z ]yyyy - - - - - java.lang.String - text_general - - *_str - 256 - - - true - - - java.lang.Boolean - booleans - - - java.util.Date - pdates - - - java.lang.Long - java.lang.Integer - plongs - - - java.lang.Number - pdoubles - - - - - - - - - - - - - - - - - - - - text/plain; charset=UTF-8 - - - - - - - - - - - - - - diff --git a/helpers/properties_search_helper.rb b/helpers/properties_search_helper.rb index c3567edd..28ff8186 100644 --- a/helpers/properties_search_helper.rb +++ b/helpers/properties_search_helper.rb @@ -30,10 +30,10 @@ def get_properties_search_query(text, params) if params[SearchHelper::EXACT_MATCH_PARAM] == "true" query = "\"#{solr_escape(text)}\"" - params["qf"] = "resource_id^20 labelExact^10 labelGeneratedExact^8" - params["hl.fl"] = "resource_id labelExact labelGeneratedExact" + params["qf"] = "resource_id^20 label_Exact^10 labelGenerated_Exact^8" + params["hl.fl"] = "resource_id label_Exact labelGenerated_Exact" else - params["qf"] = "labelExact^100 labelGeneratedExact^80 labelSuggestEdge^50 labelSuggestNgram label labelGenerated resource_id" + params["qf"] = "label_Exact^100 labelGenerated_Exact^80 label_SuggestEdge^50 labelGenerated_SuggestEdge^40 labelGenerated resource_id" query = solr_escape(text) # double quote the query if it is a URL (ID searches) query = "\"#{query}\"" if text =~ /\A#{URI::regexp(['http', 'https'])}\z/ diff --git a/helpers/search_helper.rb b/helpers/search_helper.rb index 071000d9..abe5c3e5 100644 --- a/helpers/search_helper.rb +++ b/helpers/search_helper.rb @@ -78,7 +78,7 @@ def get_term_search_query(text, params={}) raise error 400, "The search query must be provided via /search?q=[&page=&pagesize=]" else text = '' - params['sort'] = 'prefLabelExact asc, submissionAcronym asc' if sort == 'prefLabel' + params['sort'] = 'prefLabel_Exact asc, submissionAcronym asc' if sort == 'prefLabel' end end @@ -101,15 +101,15 @@ def get_term_search_query(text, params={}) if params[EXACT_MATCH_PARAM] == "true" query = "\"#{solr_escape(text)}\"" - params["qf"] = "resource_id^20 prefLabelExact#{lang_suffix }^10 synonymExact#{lang_suffix } #{QUERYLESS_FIELDS_STR}" - params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix } synonymExact#{lang_suffix } #{QUERYLESS_FIELDS_STR}" + params["qf"] = "resource_id^20 prefLabel_Exact#{lang_suffix}^10 synonym_Exact#{lang_suffix} #{QUERYLESS_FIELDS_STR}" + params["hl.fl"] = "resource_id prefLabel_Exact#{lang_suffix} synonym_Exact#{lang_suffix} #{QUERYLESS_FIELDS_STR}" elsif params[SUGGEST_PARAM] == "true" || text[-1] == '*' text.gsub!(/\*+$/, '') query = "\"#{solr_escape(text)}\"" params["qt"] = "/suggest_ncbo" - params["qf"] = "prefLabelExact#{lang_suffix }^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}" + params["qf"] = " prefLabel_Exact#{lang_suffix}^100 prefLabel_SuggestEdge#{lang_suffix}^50 synonym#{lang_suffix}_SuggestEdge^10 prefLabel#{lang_suffix}_SuggestNgram synonym#{lang_suffix}_SuggestNgram resource_id #{QUERYLESS_FIELDS_STR}" params["pf"] = "prefLabelSuggest^50" - params["hl.fl"] = "prefLabelExact#{lang_suffix } prefLabelSuggestEdge synonymSuggestEdge prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}" + params["hl.fl"] = "prefLabel_Exact#{lang_suffix} prefLabel_SuggestEdge#{lang_suffix} synonym_SuggestEdge#{lang_suffix} prefLabel_SuggestNgram#{lang_suffix} synonym_SuggestNgram#{lang_suffix} resource_id #{QUERYLESS_FIELDS_STR}" else if text.strip.empty? query = '*' @@ -117,9 +117,9 @@ def get_term_search_query(text, params={}) query = solr_escape(text) end - params["qf"] = "resource_id^100 prefLabelExact#{lang_suffix }^90 prefLabel#{lang_suffix }^70 synonymExact#{lang_suffix }^50 synonym#{lang_suffix }^10 #{QUERYLESS_FIELDS_STR}" + params["qf"] = "resource_id^100 prefLabel_Exact#{lang_suffix}^90 prefLabel#{lang_suffix}^70 synonym_Exact#{lang_suffix}^50 synonym#{lang_suffix }^10 #{QUERYLESS_FIELDS_STR}" params["qf"] << " property" if params[INCLUDE_PROPERTIES_PARAM] == "true" - params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix } prefLabel#{lang_suffix } synonymExact#{lang_suffix } synonym#{lang_suffix } #{QUERYLESS_FIELDS_STR}" + params["hl.fl"] = "resource_id prefLabel_Exact#{lang_suffix} prefLabel#{lang_suffix } synonym_Exact#{lang_suffix} synonym#{lang_suffix } #{QUERYLESS_FIELDS_STR}" params["hl.fl"] = "#{params["hl.fl"]} property" if params[INCLUDE_PROPERTIES_PARAM] == "true" end diff --git a/test/controllers/test_search_controller.rb b/test/controllers/test_search_controller.rb index 228d83db..17ca5ebd 100644 --- a/test/controllers/test_search_controller.rb +++ b/test/controllers/test_search_controller.rb @@ -59,8 +59,8 @@ def self.after_suite @@test_pc_child.delete LinkedData::SampleData::Ontology.delete_ontologies_and_submissions @@test_user.delete - LinkedData::Models::Ontology.indexClear - LinkedData::Models::Ontology.indexCommit + LinkedData::Models::Class.indexClear + LinkedData::Models::Class.indexCommit end def test_search @@ -221,9 +221,9 @@ def test_multilingual_search doc = res["collection"].select{|doc| doc["@id"].to_s.eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first refute_nil doc - #res = LinkedData::Models::Class.search("prefLabel_none:Activity", {:fq => "submissionAcronym:BROSEARCHTEST-0", :start => 0, :rows => 80}, :main) - #refute_equal 0, res["response"]["numFound"] - #refute_nil res["response"]["docs"].select{|doc| doc["resource_id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first + res = LinkedData::Models::Class.search("prefLabel_none:Activity", {:fq => "submissionAcronym:BROSEARCHTEST-0", :start => 0, :rows => 80}) + refute_equal 0, res["response"]["numFound"] + refute_nil res["response"]["docs"].select{|doc| doc["resource_id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first get "/search?q=Activit%C3%A9&ontologies=BROSEARCHTEST-0&lang=fr" res = MultiJson.load(last_response.body) From 35a28229e13a685a9dade1ddc3f7f47e45d7256a Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Mon, 5 Feb 2024 22:08:30 +0100 Subject: [PATCH 3/4] update properties search to use the new Schema API & remove config files --- .github/workflows/ruby-unit-tests.yml | 2 - config/environments/test.rb | 4 +- config/solr/property_search/enumsconfig.xml | 12 - .../mapping-ISOLatin1Accent.txt | 246 ---- config/solr/property_search/schema.xml | 1179 --------------- config/solr/property_search/solrconfig.xml | 1299 ----------------- controllers/properties_search_controller.rb | 2 +- .../test_properties_search_controller.rb | 4 +- 8 files changed, 5 insertions(+), 2743 deletions(-) delete mode 100644 config/solr/property_search/enumsconfig.xml delete mode 100644 config/solr/property_search/mapping-ISOLatin1Accent.txt delete mode 100644 config/solr/property_search/schema.xml delete mode 100644 config/solr/property_search/solrconfig.xml diff --git a/.github/workflows/ruby-unit-tests.yml b/.github/workflows/ruby-unit-tests.yml index 2e5c21cb..16d8357e 100644 --- a/.github/workflows/ruby-unit-tests.yml +++ b/.github/workflows/ruby-unit-tests.yml @@ -15,8 +15,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Set up solr configsets - run: ./test/solr/generate_ncbo_configsets.sh - name: Install Dependencies run: sudo apt-get update && sudo apt-get -y install raptor2-utils - name: Set up JDK 11 diff --git a/config/environments/test.rb b/config/environments/test.rb index cbfc0ada..0b68cc3b 100644 --- a/config/environments/test.rb +++ b/config/environments/test.rb @@ -8,8 +8,8 @@ GOO_HOST = ENV.include?("GOO_HOST") ? ENV["GOO_HOST"] : "localhost" REDIS_HOST = ENV.include?("REDIS_HOST") ? ENV["REDIS_HOST"] : "localhost" REDIS_PORT = ENV.include?("REDIS_PORT") ? ENV["REDIS_PORT"] : 6379 -SOLR_TERM_SEARCH_URL = ENV.include?("SOLR_TERM_SEARCH_URL") ? ENV["SOLR_TERM_SEARCH_URL"] : "http://localhost:8983/solr/term_search_core1" -SOLR_PROP_SEARCH_URL = ENV.include?("SOLR_PROP_SEARCH_URL") ? ENV["SOLR_PROP_SEARCH_URL"] : "http://localhost:8984/solr/prop_search_core1" +SOLR_TERM_SEARCH_URL = ENV.include?("SOLR_TERM_SEARCH_URL") ? ENV["SOLR_TERM_SEARCH_URL"] : "http://localhost:8983/solr" +SOLR_PROP_SEARCH_URL = ENV.include?("SOLR_PROP_SEARCH_URL") ? ENV["SOLR_PROP_SEARCH_URL"] : "http://localhost:8983/solr" MGREP_HOST = ENV.include?("MGREP_HOST") ? ENV["MGREP_HOST"] : "localhost" MGREP_PORT = ENV.include?("MGREP_PORT") ? ENV["MGREP_PORT"] : 55556 GOO_SLICES = ENV["GOO_SLICES"] || 500 diff --git a/config/solr/property_search/enumsconfig.xml b/config/solr/property_search/enumsconfig.xml deleted file mode 100644 index 72e7b7d3..00000000 --- a/config/solr/property_search/enumsconfig.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - ONTOLOGY - VALUE_SET_COLLECTION - - - ANNOTATION - DATATYPE - OBJECT - - \ No newline at end of file diff --git a/config/solr/property_search/mapping-ISOLatin1Accent.txt b/config/solr/property_search/mapping-ISOLatin1Accent.txt deleted file mode 100644 index ede77425..00000000 --- a/config/solr/property_search/mapping-ISOLatin1Accent.txt +++ /dev/null @@ -1,246 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Syntax: -# "source" => "target" -# "source".length() > 0 (source cannot be empty.) -# "target".length() >= 0 (target can be empty.) - -# example: -# "À" => "A" -# "\u00C0" => "A" -# "\u00C0" => "\u0041" -# "ß" => "ss" -# "\t" => " " -# "\n" => "" - -# À => A -"\u00C0" => "A" - -# Á => A -"\u00C1" => "A" - -#  => A -"\u00C2" => "A" - -# à => A -"\u00C3" => "A" - -# Ä => A -"\u00C4" => "A" - -# Å => A -"\u00C5" => "A" - -# Æ => AE -"\u00C6" => "AE" - -# Ç => C -"\u00C7" => "C" - -# È => E -"\u00C8" => "E" - -# É => E -"\u00C9" => "E" - -# Ê => E -"\u00CA" => "E" - -# Ë => E -"\u00CB" => "E" - -# Ì => I -"\u00CC" => "I" - -# Í => I -"\u00CD" => "I" - -# Î => I -"\u00CE" => "I" - -# Ï => I -"\u00CF" => "I" - -# IJ => IJ -"\u0132" => "IJ" - -# Ð => D -"\u00D0" => "D" - -# Ñ => N -"\u00D1" => "N" - -# Ò => O -"\u00D2" => "O" - -# Ó => O -"\u00D3" => "O" - -# Ô => O -"\u00D4" => "O" - -# Õ => O -"\u00D5" => "O" - -# Ö => O -"\u00D6" => "O" - -# Ø => O -"\u00D8" => "O" - -# Œ => OE -"\u0152" => "OE" - -# Þ -"\u00DE" => "TH" - -# Ù => U -"\u00D9" => "U" - -# Ú => U -"\u00DA" => "U" - -# Û => U -"\u00DB" => "U" - -# Ü => U -"\u00DC" => "U" - -# Ý => Y -"\u00DD" => "Y" - -# Ÿ => Y -"\u0178" => "Y" - -# à => a -"\u00E0" => "a" - -# á => a -"\u00E1" => "a" - -# â => a -"\u00E2" => "a" - -# ã => a -"\u00E3" => "a" - -# ä => a -"\u00E4" => "a" - -# å => a -"\u00E5" => "a" - -# æ => ae -"\u00E6" => "ae" - -# ç => c -"\u00E7" => "c" - -# è => e -"\u00E8" => "e" - -# é => e -"\u00E9" => "e" - -# ê => e -"\u00EA" => "e" - -# ë => e -"\u00EB" => "e" - -# ì => i -"\u00EC" => "i" - -# í => i -"\u00ED" => "i" - -# î => i -"\u00EE" => "i" - -# ï => i -"\u00EF" => "i" - -# ij => ij -"\u0133" => "ij" - -# ð => d -"\u00F0" => "d" - -# ñ => n -"\u00F1" => "n" - -# ò => o -"\u00F2" => "o" - -# ó => o -"\u00F3" => "o" - -# ô => o -"\u00F4" => "o" - -# õ => o -"\u00F5" => "o" - -# ö => o -"\u00F6" => "o" - -# ø => o -"\u00F8" => "o" - -# œ => oe -"\u0153" => "oe" - -# ß => ss -"\u00DF" => "ss" - -# þ => th -"\u00FE" => "th" - -# ù => u -"\u00F9" => "u" - -# ú => u -"\u00FA" => "u" - -# û => u -"\u00FB" => "u" - -# ü => u -"\u00FC" => "u" - -# ý => y -"\u00FD" => "y" - -# ÿ => y -"\u00FF" => "y" - -# ff => ff -"\uFB00" => "ff" - -# fi => fi -"\uFB01" => "fi" - -# fl => fl -"\uFB02" => "fl" - -# ffi => ffi -"\uFB03" => "ffi" - -# ffl => ffl -"\uFB04" => "ffl" - -# ſt => ft -"\uFB05" => "ft" - -# st => st -"\uFB06" => "st" diff --git a/config/solr/property_search/schema.xml b/config/solr/property_search/schema.xml deleted file mode 100644 index 20824ea6..00000000 --- a/config/solr/property_search/schema.xml +++ /dev/null @@ -1,1179 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - id - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/config/solr/property_search/solrconfig.xml b/config/solr/property_search/solrconfig.xml deleted file mode 100644 index 771a0f32..00000000 --- a/config/solr/property_search/solrconfig.xml +++ /dev/null @@ -1,1299 +0,0 @@ - - - - - - - - - 8.8.2 - - - - - - - - - - - ${solr.data.dir:} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ${solr.lock.type:native} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ${solr.ulog.dir:} - ${solr.ulog.numVersionBuckets:65536} - - - - - ${solr.autoCommit.maxTime:15000} - false - - - - - - ${solr.autoSoftCommit.maxTime:-1} - - - - - - - - - - - - - - ${solr.max.booleanClauses:500000} - - - - - - - - - - - - - - - - - - - - - - - - true - - - - - - 20 - - - 200 - - - - - - - - - - - - - - - - - - - false - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - explicit - 10 - - - - - - - - - - - - - - - - explicit - json - true - - - - - - _text_ - - - - - - - - - text_general - - - - - - default - _text_ - solr.DirectSolrSpellChecker - - internal - - 0.5 - - 2 - - 1 - - 5 - - 4 - - 0.01 - - - - - - - - - - - - default - on - true - 10 - 5 - 5 - true - true - 10 - 5 - - - spellcheck - - - - - - - - - - true - false - - - terms - - - - - - - - - - - 100 - - - - - - - - 70 - - 0.5 - - [-\w ,/\n\"']{20,200} - - - - - - - ]]> - ]]> - - - - - - - - - - - - - - - - - - - - - - - - ,, - ,, - ,, - ,, - ,]]> - ]]> - - - - - - 10 - .,!? - - - - - - - WORD - - - en - US - - - - - - - - - - - - [^\w-\.] - _ - - - - - - - yyyy-MM-dd['T'[HH:mm[:ss[.SSS]][z - yyyy-MM-dd['T'[HH:mm[:ss[,SSS]][z - yyyy-MM-dd HH:mm[:ss[.SSS]][z - yyyy-MM-dd HH:mm[:ss[,SSS]][z - [EEE, ]dd MMM yyyy HH:mm[:ss] z - EEEE, dd-MMM-yy HH:mm:ss z - EEE MMM ppd HH:mm:ss [z ]yyyy - - - - - java.lang.String - text_general - - *_str - 256 - - - true - - - java.lang.Boolean - booleans - - - java.util.Date - pdates - - - java.lang.Long - java.lang.Integer - plongs - - - java.lang.Number - pdoubles - - - - - - - - - - - - - - - - - - - - text/plain; charset=UTF-8 - - - - - - - - - - - - - - diff --git a/controllers/properties_search_controller.rb b/controllers/properties_search_controller.rb index 29d6b772..6c5b6cdf 100644 --- a/controllers/properties_search_controller.rb +++ b/controllers/properties_search_controller.rb @@ -22,7 +22,7 @@ def process_search(params=nil) # puts "Properties query: #{query}, params: #{params}" set_page_params(params) docs = Array.new - resp = LinkedData::Models::Class.search(query, params, :property) + resp = LinkedData::Models::OntologyProperty.search(query, params) total_found = resp["response"]["numFound"] add_matched_fields(resp, Sinatra::Helpers::SearchHelper::MATCH_TYPE_LABEL) ontology_rank = LinkedData::Models::Ontology.rank diff --git a/test/controllers/test_properties_search_controller.rb b/test/controllers/test_properties_search_controller.rb index 75a36a48..6c99fc40 100644 --- a/test/controllers/test_properties_search_controller.rb +++ b/test/controllers/test_properties_search_controller.rb @@ -28,8 +28,8 @@ def self.before_suite def self.after_suite LinkedData::SampleData::Ontology.delete_ontologies_and_submissions - LinkedData::Models::Ontology.indexClear(:property) - LinkedData::Models::Ontology.indexCommit(nil, :property) + LinkedData::Models::OntologyProperty.indexClear + LinkedData::Models::OntologyProperty.indexCommit end def test_property_search From 35e9cb7c69390d78ba3add3ed0638a5591af567a Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Fri, 9 Feb 2024 02:53:15 +0100 Subject: [PATCH 4/4] update class and properties schema to use the existent dynamic names --- Gemfile.lock | 16 ++++++++-------- helpers/properties_search_helper.rb | 6 +++--- helpers/search_helper.rb | 14 +++++++------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 0b6b3962..f11c5252 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,7 +11,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/goo.git - revision: 8308441f435f7dd09e22afb22c817f229a585c5a + revision: 9d5c23d170e5739d3635861de1af7eec3b988009 branch: feature/add-model-based-search specs: goo (0.0.2) @@ -37,7 +37,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/ncbo_cron.git - revision: f33a917aae2000270d5b560cadf7044bff822779 + revision: 9ec0147203007cc368a5119ffe1a019fa8701c14 branch: master specs: ncbo_cron (0.0.1) @@ -54,7 +54,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/ontologies_linked_data.git - revision: ae998b8694fb6bd3fd3de598b638c5b52884cbd8 + revision: db96af1c8ad925a9392e15fbebee9cfebcc7e608 branch: feature/enhance-search specs: ontologies_linked_data (0.0.1) @@ -201,7 +201,7 @@ GEM grpc (~> 1.27) googleapis-common-protos-types (1.11.0) google-protobuf (~> 3.18) - googleauth (1.9.2) + googleauth (1.10.0) faraday (>= 1.0, < 3.a) google-cloud-env (~> 2.1) jwt (>= 1.4, < 3.0) @@ -242,14 +242,14 @@ GEM method_source (1.0.0) mime-types (3.5.2) mime-types-data (~> 3.2015) - mime-types-data (3.2023.1205) + mime-types-data (3.2024.0206) mini_mime (1.1.5) minitest (4.7.5) minitest-stub_any_instance (1.0.3) mlanett-redis-lock (0.2.7) redis multi_json (1.15.0) - multipart-post (2.3.0) + multipart-post (2.4.0) mutex_m (0.2.0) net-http-persistent (2.9.4) net-imap (0.4.10) @@ -313,7 +313,7 @@ GEM declarative (< 0.1.0) trailblazer-option (>= 0.1.1, < 0.2.0) uber (< 0.2.0) - request_store (1.5.1) + request_store (1.6.0) rack (>= 1.4) rest-client (2.1.0) http-accept (>= 1.7.0, < 2.0) @@ -378,7 +378,7 @@ GEM unicorn (>= 4, < 7) uuid (2.3.9) macaddr (~> 1.0) - webmock (3.19.1) + webmock (3.20.0) addressable (>= 2.8.0) crack (>= 0.3.2) hashdiff (>= 0.4.0, < 2.0.0) diff --git a/helpers/properties_search_helper.rb b/helpers/properties_search_helper.rb index 28ff8186..c4295749 100644 --- a/helpers/properties_search_helper.rb +++ b/helpers/properties_search_helper.rb @@ -30,10 +30,10 @@ def get_properties_search_query(text, params) if params[SearchHelper::EXACT_MATCH_PARAM] == "true" query = "\"#{solr_escape(text)}\"" - params["qf"] = "resource_id^20 label_Exact^10 labelGenerated_Exact^8" - params["hl.fl"] = "resource_id label_Exact labelGenerated_Exact" + params["qf"] = "resource_id^20 labelExact^10 labelGeneratedExact^8" + params["hl.fl"] = "resource_id labelExact labelGeneratedExact" else - params["qf"] = "label_Exact^100 labelGenerated_Exact^80 label_SuggestEdge^50 labelGenerated_SuggestEdge^40 labelGenerated resource_id" + params["qf"] = "labelExact^100 labelGeneratedExact^80 labelSuggestEdge^50 labelGeneratedSuggestEdge^40 labelGenerated resource_id" query = solr_escape(text) # double quote the query if it is a URL (ID searches) query = "\"#{query}\"" if text =~ /\A#{URI::regexp(['http', 'https'])}\z/ diff --git a/helpers/search_helper.rb b/helpers/search_helper.rb index abe5c3e5..276ea3e0 100644 --- a/helpers/search_helper.rb +++ b/helpers/search_helper.rb @@ -78,7 +78,7 @@ def get_term_search_query(text, params={}) raise error 400, "The search query must be provided via /search?q=[&page=&pagesize=]" else text = '' - params['sort'] = 'prefLabel_Exact asc, submissionAcronym asc' if sort == 'prefLabel' + params['sort'] = 'prefLabelExact asc, submissionAcronym asc' if sort == 'prefLabel' end end @@ -101,15 +101,15 @@ def get_term_search_query(text, params={}) if params[EXACT_MATCH_PARAM] == "true" query = "\"#{solr_escape(text)}\"" - params["qf"] = "resource_id^20 prefLabel_Exact#{lang_suffix}^10 synonym_Exact#{lang_suffix} #{QUERYLESS_FIELDS_STR}" - params["hl.fl"] = "resource_id prefLabel_Exact#{lang_suffix} synonym_Exact#{lang_suffix} #{QUERYLESS_FIELDS_STR}" + params["qf"] = "resource_id^20 prefLabel#{lang_suffix}^10 synonymExact#{lang_suffix} #{QUERYLESS_FIELDS_STR}" + params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix} synonymExact#{lang_suffix} #{QUERYLESS_FIELDS_STR}" elsif params[SUGGEST_PARAM] == "true" || text[-1] == '*' text.gsub!(/\*+$/, '') query = "\"#{solr_escape(text)}\"" params["qt"] = "/suggest_ncbo" - params["qf"] = " prefLabel_Exact#{lang_suffix}^100 prefLabel_SuggestEdge#{lang_suffix}^50 synonym#{lang_suffix}_SuggestEdge^10 prefLabel#{lang_suffix}_SuggestNgram synonym#{lang_suffix}_SuggestNgram resource_id #{QUERYLESS_FIELDS_STR}" + params["qf"] = " prefLabelExact#{lang_suffix}^100 prefLabelSuggestEdge#{lang_suffix}^50 synonym#{lang_suffix}SuggestEdge^10 prefLabel#{lang_suffix}SuggestNgram synonym#{lang_suffix}SuggestNgram resource_id #{QUERYLESS_FIELDS_STR}" params["pf"] = "prefLabelSuggest^50" - params["hl.fl"] = "prefLabel_Exact#{lang_suffix} prefLabel_SuggestEdge#{lang_suffix} synonym_SuggestEdge#{lang_suffix} prefLabel_SuggestNgram#{lang_suffix} synonym_SuggestNgram#{lang_suffix} resource_id #{QUERYLESS_FIELDS_STR}" + params["hl.fl"] = "prefLabelExact#{lang_suffix} prefLabelSuggestEdge#{lang_suffix} synonymSuggestEdge#{lang_suffix} prefLabelSuggestNgram#{lang_suffix} synonymSuggestNgram#{lang_suffix} resource_id #{QUERYLESS_FIELDS_STR}" else if text.strip.empty? query = '*' @@ -117,9 +117,9 @@ def get_term_search_query(text, params={}) query = solr_escape(text) end - params["qf"] = "resource_id^100 prefLabel_Exact#{lang_suffix}^90 prefLabel#{lang_suffix}^70 synonym_Exact#{lang_suffix}^50 synonym#{lang_suffix }^10 #{QUERYLESS_FIELDS_STR}" + params["qf"] = "resource_id^100 prefLabelExact#{lang_suffix}^90 prefLabel#{lang_suffix}^70 synonymExact#{lang_suffix}^50 synonym#{lang_suffix }^10 #{QUERYLESS_FIELDS_STR}" params["qf"] << " property" if params[INCLUDE_PROPERTIES_PARAM] == "true" - params["hl.fl"] = "resource_id prefLabel_Exact#{lang_suffix} prefLabel#{lang_suffix } synonym_Exact#{lang_suffix} synonym#{lang_suffix } #{QUERYLESS_FIELDS_STR}" + params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix} prefLabel#{lang_suffix } synonymExact#{lang_suffix} synonym#{lang_suffix } #{QUERYLESS_FIELDS_STR}" params["hl.fl"] = "#{params["hl.fl"]} property" if params[INCLUDE_PROPERTIES_PARAM] == "true" end