From e378e64a093f4fd2529b4d34d30a9f902da75f20 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Tue, 27 Feb 2024 20:25:25 +0100 Subject: [PATCH] implement search ontologies content search endpoint --- Gemfile | 4 +- Gemfile.lock | 63 ++++++++++++------- controllers/search_controller.rb | 62 +++++++++++------- docker-compose.yml | 2 +- .../test_search_models_controller.rb | 58 ++++++++++++++++- 5 files changed, 143 insertions(+), 46 deletions(-) diff --git a/Gemfile b/Gemfile index fe1838c7..007533c4 100644 --- a/Gemfile +++ b/Gemfile @@ -14,7 +14,7 @@ gem 'sinatra', '~> 1.0' gem 'sinatra-advanced-routes' gem 'sinatra-contrib', '~> 1.0' gem 'request_store' - +gem 'addressable', '~> 2.8' # Rack middleware gem 'ffi' gem 'rack-accept', '~> 0.4' @@ -74,5 +74,5 @@ group :test do gem 'rack-test' gem 'simplecov', require: false gem 'simplecov-cobertura' # for codecov.io - gem 'webmock' + gem 'webmock', '~> 3.19.1' end \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock index 3b5d8dfc..63fdd262 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,13 +11,16 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/goo.git - revision: ec99ed8d742fc8d458911cb0e74fa23d31cdd158 + revision: 10b90c17af12c71bfc95bfb4fc0bba5e47ff77af branch: feature/add-model-based-search specs: goo (0.0.2) addressable (~> 2.8) pry - rdf (= 1.0.8) + rdf (= 3.2.11) + rdf-raptor + rdf-rdfxml + rdf-vocab redis rest-client rsolr @@ -54,7 +57,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/ontologies_linked_data.git - revision: 4773e7f90a1afba33f3d84d4b05105826465d8a9 + revision: 4e74b918119a246b908634c152b22a3f732d6abd branch: feature/index-ontology-agent-metadata specs: ontologies_linked_data (0.0.1) @@ -74,7 +77,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/sparql-client.git - revision: aed51baf4106fd0f3d0e3f9238f0aad9406aa3f0 + revision: 180c818f7715baac64b2699bb452ef5c756f62c5 branch: master specs: sparql-client (1.0.1) @@ -109,6 +112,7 @@ GEM airbrussh (1.5.1) sshkit (>= 1.6.1, != 1.7.0) backports (3.24.1) + base64 (0.2.0) bcrypt (3.1.20) bcrypt_pbkdf (1.1.0) bigdecimal (1.4.2) @@ -171,15 +175,15 @@ GEM grpc (~> 1.59) get_process_mem (0.2.7) ffi (~> 1.0) - google-analytics-data (0.5.0) + google-analytics-data (0.6.0) google-analytics-data-v1beta (>= 0.11, < 2.a) google-cloud-core (~> 1.6) - google-analytics-data-v1beta (0.11.2) + google-analytics-data-v1beta (0.12.0) gapic-common (>= 0.21.1, < 2.a) google-cloud-errors (~> 1.0) - google-apis-analytics_v3 (0.14.0) - google-apis-core (>= 0.12.0, < 2.a) - google-apis-core (0.13.0) + google-apis-analytics_v3 (0.15.0) + google-apis-core (>= 0.14.0, < 2.a) + google-apis-core (0.14.0) addressable (~> 2.5, >= 2.5.1) googleauth (~> 1.9) httpclient (>= 2.8.1, < 3.a) @@ -195,11 +199,11 @@ GEM google-cloud-errors (1.3.1) google-protobuf (3.25.3-x86_64-darwin) google-protobuf (3.25.3-x86_64-linux) - googleapis-common-protos (1.4.0) - google-protobuf (~> 3.14) - googleapis-common-protos-types (~> 1.2) - grpc (~> 1.27) - googleapis-common-protos-types (1.12.0) + googleapis-common-protos (1.5.0) + google-protobuf (~> 3.18) + googleapis-common-protos-types (~> 1.7) + grpc (~> 1.41) + googleapis-common-protos-types (1.13.0) google-protobuf (~> 3.18) googleauth (1.11.0) faraday (>= 1.0, < 3.a) @@ -208,16 +212,17 @@ GEM multi_json (~> 1.11) os (>= 0.9, < 2.0) signet (>= 0.16, < 2.a) - grpc (1.61.0-x86_64-darwin) + grpc (1.62.0-x86_64-darwin) google-protobuf (~> 3.25) googleapis-common-protos-types (~> 1.0) - grpc (1.61.0-x86_64-linux) + grpc (1.62.0-x86_64-linux) google-protobuf (~> 3.25) googleapis-common-protos-types (~> 1.0) haml (5.2.2) temple (>= 0.8.0) tilt hashdiff (1.1.0) + htmlentities (4.3.4) http-accept (1.7.0) http-cookie (1.0.5) domain_name (~> 0.5) @@ -228,9 +233,11 @@ GEM json-schema (2.8.1) addressable (>= 2.4) json_pure (2.7.1) - jwt (2.7.1) + jwt (2.8.0) + base64 kgio (2.11.4) libxml-ruby (5.0.2) + link_header (0.0.8) logger (1.6.0) macaddr (1.7.2) systemu (~> 2.6.5) @@ -297,8 +304,21 @@ GEM rack-timeout (0.6.3) raindrops (0.20.1) rake (10.5.0) - rdf (1.0.8) - addressable (>= 2.2) + rdf (3.2.11) + link_header (~> 0.0, >= 0.0.8) + rdf-raptor (3.2.0) + ffi (~> 1.15) + rdf (~> 3.2) + rdf-rdfxml (3.2.2) + builder (~> 3.2) + htmlentities (~> 4.3) + rdf (~> 3.2) + rdf-xsd (~> 3.2) + rdf-vocab (3.2.7) + rdf (~> 3.2, >= 3.2.4) + rdf-xsd (3.2.1) + rdf (~> 3.2) + rexml (~> 3.2) redcarpet (3.6.0) redis (4.8.1) redis-activesupport (5.3.0) @@ -378,7 +398,7 @@ GEM unicorn (>= 4, < 7) uuid (2.3.9) macaddr (~> 1.0) - webmock (3.20.0) + webmock (3.19.1) addressable (>= 2.8.0) crack (>= 0.3.2) hashdiff (>= 0.4.0, < 2.0.0) @@ -389,6 +409,7 @@ PLATFORMS DEPENDENCIES activesupport (~> 3.2) + addressable (~> 2.8) bcrypt_pbkdf (>= 1.0, < 2.0) bigdecimal (= 1.4.2) capistrano (~> 3) @@ -438,7 +459,7 @@ DEPENDENCIES sparql-client! unicorn unicorn-worker-killer - webmock + webmock (~> 3.19.1) BUNDLED WITH 2.4.21 diff --git a/controllers/search_controller.rb b/controllers/search_controller.rb index 22dbc1e9..63c2226b 100644 --- a/controllers/search_controller.rb +++ b/controllers/search_controller.rb @@ -31,18 +31,18 @@ class SearchController < ApplicationController 'resource_model:"ontology_submission"', 'submissionStatus_txt:ERROR_* OR submissionStatus_txt:"RDF" OR submissionStatus_txt:"UPLOADED"', "ontology_viewingRestriction_t:#{visibility}", - groups.map{|x| "ontology_group_txt:\"http://data.bioontology.org/groups/#{x.upcase}\""}.join(' OR '), - categories.map{|x| "ontology_hasDomain_txt:\"http://data.bioontology.org/categories/#{x.upcase}\""}.join(' OR '), - languages.map{|x| "naturalLanguage_txt:\"#{x.downcase}\""}.join(' OR '), + groups.map { |x| "ontology_group_txt:\"http://data.bioontology.org/groups/#{x.upcase}\"" }.join(' OR '), + categories.map { |x| "ontology_hasDomain_txt:\"http://data.bioontology.org/categories/#{x.upcase}\"" }.join(' OR '), + languages.map { |x| "naturalLanguage_txt:\"#{x.downcase}\"" }.join(' OR '), ] fq << "!ontology_viewOf_t:*" unless show_views - fq << format.map{|x| "hasOntologyLanguage_t:\"http://data.bioontology.org/ontology_formats/#{x}\""}.join(' OR ') unless format.blank? + fq << format.map { |x| "hasOntologyLanguage_t:\"http://data.bioontology.org/ontology_formats/#{x}\"" }.join(' OR ') unless format.blank? - fq << status.map{|x| "status_t:#{x}"}.join(' OR ') unless status.blank? - fq << is_of_type.map{|x| "isOfType_t:#{x}"}.join(' OR ') unless is_of_type.blank? - fq << has_format.map{|x| "hasFormalityLevel_t:#{x}"}.join(' OR ') unless has_format.blank? + fq << status.map { |x| "status_t:#{x}" }.join(' OR ') unless status.blank? + fq << is_of_type.map { |x| "isOfType_t:#{x}" }.join(' OR ') unless is_of_type.blank? + fq << has_format.map { |x| "hasFormalityLevel_t:#{x}" }.join(' OR ') unless has_format.blank? fq.reject!(&:blank?) @@ -63,8 +63,7 @@ class SearchController < ApplicationController page_size: page_size, sort: sort }) - - #resp = Ontology.search(query, search_params) + total_found = page_data.aggregate ontology_rank = LinkedData::Models::Ontology.rank docs = {} @@ -77,7 +76,7 @@ class SearchController < ApplicationController old_id = old_resource_id.split('/').last.to_i rescue 0 if acronym.blank? || old_id && id && (id <= old_id) - total_found-= 1 + total_found -= 1 next end @@ -85,17 +84,40 @@ class SearchController < ApplicationController acronyms_ids[acronym] = resource_id doc["ontology_rank"] = ontology_rank.dig(doc["ontology_acronym_text"], :normalizedScore) || 0.0 - docs[resource_id] = doc + docs[resource_id] = doc end docs = docs.values - docs.sort! {|a, b| [b["score"], b["ontology_rank"]] <=> [a["score"], a["ontology_rank"]]} unless params[:sort].present? + docs.sort! { |a, b| [b["score"], b["ontology_rank"]] <=> [a["score"], a["ontology_rank"]] } unless params[:sort].present? page = page_object(docs, total_found) reply 200, page end + + get '/content' do + query = params[:query] || params[:q] + page, page_size = page_params + ontologies = params.fetch("ontologies", "").split(',') + qf = params.fetch("qf", "") + + fq = [] + + fq << ontologies.map { |x| "ontology_t:\"#{x}\"" }.join(' OR ') unless ontologies.blank? + + + conn = SOLR::SolrConnector.new(Goo.search_conf, :ontology_data) + + resp = conn.search(query, fq: fq, qf: qf, + page: page, page_size: page_size) + + total_found = resp["response"]["numFound"] + docs = resp["response"]["docs"] + + + reply 200,page_object(docs, total_found) + end end namespace "/agents" do @@ -104,7 +126,7 @@ class SearchController < ApplicationController page, page_size = page_params type = params[:agentType].blank? ? nil : params[:agentType] - fq = "agentType_t:#{type}" if type + fq = "agentType_t:#{type}" if type qf = [ "acronymSuggestEdge^25 nameSuggestEdge^15 emailSuggestEdge^15 identifiersSuggestEdge^10 ", # start of the word first @@ -118,7 +140,6 @@ class SearchController < ApplicationController sort = "score desc, acronym_sort asc, name_sort asc" end - reply 200, search(LinkedData::Models::Agent, query, fq: fq, qf: qf, @@ -132,7 +153,7 @@ class SearchController < ApplicationController def search(model, query, params = {}) query = query.blank? ? "*" : query - resp = model.search(query, search_params(params)) + resp = model.search(query, search_params(params)) total_found = resp["response"]["numFound"] docs = resp["response"]["docs"] @@ -140,7 +161,7 @@ def search(model, query, params = {}) page_object(docs, total_found) end - def search_params(defType: "edismax", fq: , qf: , stopwords: "true", lowercaseOperators: "true", page: , page_size: , fl: '*,score', sort: ) + def search_params(defType: "edismax", fq:, qf:, stopwords: "true", lowercaseOperators: "true", page:, page_size:, fl: '*,score', sort:) { defType: defType, fq: fq, @@ -154,8 +175,7 @@ def search_params(defType: "edismax", fq: , qf: , stopwords: "true", lowercaseOp } end - - def process_search(params=nil) + def process_search(params = nil) params ||= @params text = params["q"] @@ -191,13 +211,13 @@ def process_search(params=nil) unless params['sort'] if !text.nil? && text[-1] == '*' - docs.sort! {|a, b| [b[:score], a[:prefLabelExact].downcase, b[:ontology_rank]] <=> [a[:score], b[:prefLabelExact].downcase, a[:ontology_rank]]} + docs.sort! { |a, b| [b[:score], a[:prefLabelExact].downcase, b[:ontology_rank]] <=> [a[:score], b[:prefLabelExact].downcase, a[:ontology_rank]] } else - docs.sort! {|a, b| [b[:score], b[:ontology_rank]] <=> [a[:score], a[:ontology_rank]]} + docs.sort! { |a, b| [b[:score], b[:ontology_rank]] <=> [a[:score], a[:ontology_rank]] } end end - #need to return a Page object + # need to return a Page object page = page_object(docs, total_found) reply 200, page diff --git a/docker-compose.yml b/docker-compose.yml index b6b8102b..370615a6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -87,7 +87,7 @@ services: # volumes: #- solr_data:/var/solr/data agraph-ut: - image: franzinc/agraph:v8.0.0.rc1 + image: franzinc/agraph:v8.1.0 platform: linux/amd64 environment: - AGRAPH_SUPER_USER=test diff --git a/test/controllers/test_search_models_controller.rb b/test/controllers/test_search_models_controller.rb index 6f39e974..851c7a31 100644 --- a/test/controllers/test_search_models_controller.rb +++ b/test/controllers/test_search_models_controller.rb @@ -18,7 +18,7 @@ def test_show_all_collection get '/admin/search/collections' assert last_response.ok? res = MultiJson.load(last_response.body) - assert_equal res["collections"], Goo.search_connections.keys.map(&:to_s) + assert_equal res["collections"].sort, Goo.search_connections.keys.map(&:to_s).sort end def test_collection_schema @@ -341,4 +341,60 @@ def test_agents_search agents = MultiJson.load(last_response.body) assert_equal agent_org.id.to_s, agents["collection"].first["id"] end + + def test_search_data + count, acronyms, bro = LinkedData::SampleData::Ontology.create_ontologies_and_submissions({ + process_submission: true, + process_options: { process_rdf: true, extract_metadata: false, generate_missing_labels: false}, + acronym: "BROSEARCHTEST", + name: "BRO Search Test", + file_path: "./test/data/ontology_files/BRO_v3.2.owl", + ont_count: 1, + submission_count: 1, + ontology_type: "VALUE_SET_COLLECTION" + }) + + count, acronyms, mccl = LinkedData::SampleData::Ontology.create_ontologies_and_submissions({ + process_submission: true, + process_options: { process_rdf: true, extract_metadata: false, generate_missing_labels: false}, + acronym: "MCCLSEARCHTEST", + name: "MCCL Search Test", + file_path: "./test/data/ontology_files/CellLine_OWL_BioPortal_v1.0.owl", + ont_count: 1, + submission_count: 1 + }) + + + subs = LinkedData::Models::OntologySubmission.all + count = [] + subs.each do |s| + s.bring_remaining + s.index_all_data(Logger.new($stdout)) + count << Goo.sparql_query_client.query("SELECT (COUNT( DISTINCT ?id) as ?c) FROM <#{s.id}> WHERE {?id ?p ?v}") + .first[:c] + .to_i + end + + get "/search/ontologies/content?q=*" + assert last_response.ok? + res = MultiJson.load(last_response.body) + assert_equal count.sum, res['totalCount'] + + + get "/search/ontologies/content?q=*&ontologies=MCCLSEARCHTEST-0,BROSEARCHTEST-0" + assert last_response.ok? + res = MultiJson.load(last_response.body) + assert_equal count.sum, res['totalCount'] + + get "/search/ontologies/content?q=*&ontologies=BROSEARCHTEST-0" + assert last_response.ok? + res = MultiJson.load(last_response.body) + assert_includes count, res['totalCount'] + + get "/search/ontologies/content?q=*&ontologies=MCCLSEARCHTEST-0" + assert last_response.ok? + res = MultiJson.load(last_response.body) + assert_includes count, res['totalCount'] + + end end