diff --git a/Gemfile b/Gemfile index 1977a788..a1f48928 100644 --- a/Gemfile +++ b/Gemfile @@ -47,12 +47,12 @@ gem 'haml', '~> 5.2.2' # pin see https://github.com/ncbo/ontologies_api/pull/107 gem 'redcarpet' # NCBO gems (can be from a local dev path or from rubygems/git) -gem 'goo', github: 'ontoportal-lirmm/goo', branch: 'development' gem 'ncbo_annotator', git: 'https://github.com/ontoportal-lirmm/ncbo_annotator.git', branch: 'development' gem 'ncbo_cron', git: 'https://github.com/ontoportal-lirmm/ncbo_cron.git', branch: 'master' gem 'ncbo_ontology_recommender', git: 'https://github.com/ncbo/ncbo_ontology_recommender.git', branch: 'master' gem 'sparql-client', github: 'ontoportal-lirmm/sparql-client', branch: 'master' -gem 'ontologies_linked_data', git: 'https://github.com/ontoportal-lirmm/ontologies_linked_data.git', branch: 'master' +gem 'goo', github: 'ontoportal-lirmm/goo', branch: 'development' +gem 'ontologies_linked_data', git: 'https://github.com/ontoportal-lirmm/ontologies_linked_data.git', branch: 'development' group :development do # bcrypt_pbkdf and ed35519 is required for capistrano deployments when using ed25519 keys; see https://github.com/miloserdow/capistrano-deploy/issues/42 @@ -77,5 +77,5 @@ group :test do gem 'rack-test' gem 'simplecov', require: false gem 'simplecov-cobertura' # for codecov.io - gem 'webmock' + gem 'webmock', '~> 3.19.1' end \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock index cf14b8df..e1fe41a7 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,7 +11,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/goo.git - revision: 3f8b1f0b62c4334306f9ed5cb7b17a1b645e7db3 + revision: 0e554fce49713ce4d5a742a06c2fb59a547caf47 branch: development specs: goo (0.0.2) @@ -57,7 +57,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/ontologies_linked_data.git - revision: 337dce98ec27627d14a440ff2a6ed09483cdac12 + revision: 026c9c46baf6c0d638568528c2adcb7bcb1c2796 branch: development specs: ontologies_linked_data (0.0.1) @@ -183,7 +183,7 @@ GEM google-cloud-errors (~> 1.0) google-apis-analytics_v3 (0.15.0) google-apis-core (>= 0.14.0, < 2.a) - google-apis-core (0.14.0) + google-apis-core (0.14.1) addressable (~> 2.5, >= 2.5.1) googleauth (~> 1.9) httpclient (>= 2.8.1, < 3.a) @@ -203,7 +203,7 @@ GEM google-protobuf (~> 3.18) googleapis-common-protos-types (~> 1.7) grpc (~> 1.41) - googleapis-common-protos-types (1.13.0) + googleapis-common-protos-types (1.14.0) google-protobuf (~> 3.18) googleauth (1.11.0) faraday (>= 1.0, < 3.a) @@ -229,13 +229,13 @@ GEM httpclient (2.8.3) i18n (0.9.5) concurrent-ruby (~> 1.0) - json (2.7.1) + json (2.7.2) json-ld (3.0.2) multi_json (~> 1.12) rdf (>= 2.2.8, < 4.0) json-schema (2.8.1) addressable (>= 2.4) - json_pure (2.7.1) + json_pure (2.7.2) jwt (2.8.1) base64 kgio (2.11.4) @@ -273,11 +273,11 @@ GEM net-ssh (>= 2.6.5, < 8.0.0) net-sftp (4.0.0) net-ssh (>= 5.0.0, < 8.0.0) - net-smtp (0.4.0.1) + net-smtp (0.5.0) net-protocol - net-ssh (7.2.1) + net-ssh (7.2.3) netrc (0.11.0) - newrelic_rpm (9.7.1) + newrelic_rpm (9.8.0) oj (2.18.5) omni_logger (0.1.4) logger @@ -289,7 +289,7 @@ GEM pry (0.14.2) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (5.0.4) + public_suffix (5.0.5) rack (1.6.13) rack-accept (0.4.5) rack (>= 0.4) @@ -346,7 +346,7 @@ GEM netrc (~> 0.8) retriable (3.1.2) rexml (3.2.6) - rsolr (2.5.0) + rsolr (2.6.0) builder (>= 2.1.2) faraday (>= 0.9, < 3, != 2.0.0) ruby-xxHash (0.4.0.2) @@ -381,7 +381,8 @@ GEM rack-test sinatra (~> 1.4.0) tilt (>= 1.3, < 3) - sshkit (1.22.0) + sshkit (1.22.1) + base64 mutex_m net-scp (>= 1.1.2) net-sftp (>= 2.1.2) @@ -402,12 +403,13 @@ GEM unicorn (>= 4, < 7) uuid (2.3.9) macaddr (~> 1.0) - webmock (3.23.0) + webmock (3.19.1) addressable (>= 2.8.0) crack (>= 0.3.2) hashdiff (>= 0.4.0, < 2.0.0) PLATFORMS + x86_64-darwin-23 x86_64-linux DEPENDENCIES @@ -463,7 +465,7 @@ DEPENDENCIES sparql-client! unicorn unicorn-worker-killer - webmock + webmock (~> 3.19.1) BUNDLED WITH - 2.3.23 + 2.4.22 diff --git a/controllers/admin_controller.rb b/controllers/admin_controller.rb index 747def93..70b94411 100644 --- a/controllers/admin_controller.rb +++ b/controllers/admin_controller.rb @@ -127,6 +127,79 @@ class AdminController < ApplicationController halt 204 end + namespace "/search" do + get '/collections' do + conn = SOLR::SolrConnector.new(Goo.search_conf, '') + collections = { collections: conn.fetch_all_collections} + reply(200, collections) + end + + get '/collections/:collection/schema' do + collection = params[:collection].to_sym + conn = SOLR::SolrConnector.new(Goo.search_conf, collection) + collection_schema = conn.fetch_schema + + reply(200, collection_schema) + end + + post '/collections/:collection/schema/init' do + collection = params[:collection].to_sym + conn = SOLR::SolrConnector.new(Goo.search_conf, collection) + collection_schema = conn.init_schema + reply(200, collection_schema) + end + + + post '/collections/:collection/search' do + collection = params[:collection].to_sym + + search_keys = %w[defType fq qf sort start rows fl stopwords lowercaseOperators] + + search_params = params.select { |key, _| search_keys.include?(key) } + search_query = params[:query] || params[:q] + search_query = search_query.blank? ? '*' : search_query + conn = SOLR::SolrConnector.new(Goo.search_conf, collection) + reply(200, conn.search(search_query, search_params).to_h) + end + + post '/index_batch/:model_name' do + error 500, "model_name parameter not set" if params["model_name"].blank? + + model = Goo.model_by_name(params["model_name"].to_sym) + error 500, "#{params["model_name"]} is not indexable" if model.nil? || !model.index_enabled? + + all_attrs = get_attributes_to_include([:all], model) + + collections = model.where.include(all_attrs).all + indexed = [] + not_indexed = [] + collections.each do |m| + begin + response = m.index.dig("responseHeader", "status") + if response.eql?(0) + indexed << m.id + else + not_indexed << m.id + end + rescue StandardError + not_indexed << m.id + end + end + + if !indexed.empty? + msg = "Batch indexing for #{model.model_name} completed for" + + if not_indexed.empty? + msg += " all models" + else + msg += " #{indexed.join(', ')} and not for the following #{not_indexed.join(', ')}, see logs for more details" + end + reply(200, msg) + else + reply(500, "Batch indexing for #{model.model_name} failed") + end + end + end private def process_long_operation(timeout, args) diff --git a/controllers/search_controller.rb b/controllers/search_controller.rb index 3bc1c13f..9a354f08 100644 --- a/controllers/search_controller.rb +++ b/controllers/search_controller.rb @@ -5,16 +5,178 @@ class SearchController < ApplicationController namespace "/search" do # execute a search query get do - process_search() + process_search end post do - process_search() + process_search + end + + namespace "/ontologies" do + get do + query = params[:query] || params[:q] + groups = params.fetch("groups", "").split(',') + categories = params.fetch("hasDomain", "").split(',') + languages = params.fetch("languages", "").split(',') + status = params.fetch("status", "").split(',') + format = params.fetch("hasOntologyLanguage", "").split(',') + is_of_type = params.fetch("isOfType", "").split(',') + has_format = params.fetch("hasFormat", "").split(',') + visibility = params["visibility"]&.presence || "public" + show_views = params["show_views"] == 'true' + sort = params.fetch("sort", "score desc, ontology_name_sort asc, ontology_acronym_sort asc") + page, page_size = page_params + + fq = [ + 'resource_model:"ontology_submission"', + 'submissionStatus_txt:ERROR_* OR submissionStatus_txt:"RDF" OR submissionStatus_txt:"UPLOADED"', + "ontology_viewingRestriction_t:#{visibility}", + groups.map { |x| "ontology_group_txt:\"http://data.bioontology.org/groups/#{x.upcase}\"" }.join(' OR '), + categories.map { |x| "ontology_hasDomain_txt:\"http://data.bioontology.org/categories/#{x.upcase}\"" }.join(' OR '), + languages.map { |x| "naturalLanguage_txt:\"#{x.downcase}\"" }.join(' OR '), + ] + + fq << "!ontology_viewOf_t:*" unless show_views + + fq << format.map { |x| "hasOntologyLanguage_t:\"http://data.bioontology.org/ontology_formats/#{x}\"" }.join(' OR ') unless format.blank? + + fq << status.map { |x| "status_t:#{x}" }.join(' OR ') unless status.blank? + fq << is_of_type.map { |x| "isOfType_t:#{x}" }.join(' OR ') unless is_of_type.blank? + fq << has_format.map { |x| "hasFormalityLevel_t:#{x}" }.join(' OR ') unless has_format.blank? + + fq.reject!(&:blank?) + + if params[:qf] + qf = params[:qf] + else + qf = [ + "ontology_acronymSuggestEdge^25 ontology_nameSuggestEdge^15 descriptionSuggestEdge^10 ", # start of the word first + "ontology_acronym_text^15 ontology_name_text^10 description_text^5 ", # full word match + "ontology_acronymSuggestNgram^2 ontology_nameSuggestNgram^1.5 descriptionSuggestNgram" # substring match last + ].join(' ') + end + + page_data = search(Ontology, query, { + fq: fq, + qf: qf, + page: page, + page_size: page_size, + sort: sort + }) + + total_found = page_data.aggregate + ontology_rank = LinkedData::Models::Ontology.rank + docs = {} + acronyms_ids = {} + page_data.each do |doc| + resource_id = doc["resource_id"] + id = doc["submissionId_i"] + acronym = doc["ontology_acronym_text"] + old_resource_id = acronyms_ids[acronym] + old_id = old_resource_id.split('/').last.to_i rescue 0 + + if acronym.blank? || old_id && id && (id <= old_id) + total_found -= 1 + next + end + + docs.delete(old_resource_id) + acronyms_ids[acronym] = resource_id + + doc["ontology_rank"] = ontology_rank.dig(doc["ontology_acronym_text"], :normalizedScore) || 0.0 + docs[resource_id] = doc + end + + docs = docs.values + + docs.sort! { |a, b| [b["score"], b["ontology_rank"]] <=> [a["score"], a["ontology_rank"]] } unless params[:sort].present? + + page = page_object(docs, total_found) + + reply 200, page + end + + get '/content' do + query = params[:query] || params[:q] + page, page_size = page_params + ontologies = params.fetch("ontologies", "").split(',') + types = params.fetch("types", "").split(',') + qf = params.fetch("qf", "") + + fq = [] + + fq << ontologies.map { |x| "ontology_t:\"#{x}\"" }.join(' OR ') unless ontologies.blank? + fq << types.map { |x| "type_t:\"#{x}\" OR type_txt:\"#{x}\"" }.join(' OR ') unless types.blank? + + + conn = SOLR::SolrConnector.new(Goo.search_conf, :ontology_data) + resp = conn.search(query, fq: fq, qf: qf, defType: "edismax", + start: (page - 1) * page_size, rows: page_size) + + total_found = resp["response"]["numFound"] + docs = resp["response"]["docs"] + + + reply 200,page_object(docs, total_found) + end + end + + namespace "/agents" do + get do + query = params[:query] || params[:q] + page, page_size = page_params + type = params[:agentType].blank? ? nil : params[:agentType] + + fq = "agentType_t:#{type}" if type + + qf = [ + "acronymSuggestEdge^25 nameSuggestEdge^15 emailSuggestEdge^15 identifiersSuggestEdge^10 ", # start of the word first + "identifiers_texts^20 acronym_text^15 name_text^10 email_text^10 ", # full word match + "acronymSuggestNgram^2 nameSuggestNgram^1.5 email_text^1" # substring match last + ].join(' ') + + if params[:sort] + sort = "#{params[:sort]} asc, score desc" + else + sort = "score desc, acronym_sort asc, name_sort asc" + end + + reply 200, search(LinkedData::Models::Agent, + query, + fq: fq, qf: qf, + page: page, page_size: page_size, + sort: sort) + end end private - def process_search(params=nil) + def search(model, query, params = {}) + query = query.blank? ? "*" : query + + resp = model.search(query, search_params(params)) + + total_found = resp["response"]["numFound"] + docs = resp["response"]["docs"] + + page_object(docs, total_found) + end + + def search_params(defType: "edismax", fq:, qf:, stopwords: "true", lowercaseOperators: "true", page:, page_size:, fl: '*,score', sort:) + { + defType: defType, + fq: fq, + qf: qf, + sort: sort, + start: (page - 1) * page_size, + rows: page_size, + fl: fl, + stopwords: stopwords, + lowercaseOperators: lowercaseOperators, + } + end + + def process_search(params = nil) params ||= @params text = params["q"] @@ -50,13 +212,13 @@ def process_search(params=nil) unless params['sort'] if !text.nil? && text[-1] == '*' - docs.sort! {|a, b| [b[:score], a[:prefLabelExact].downcase, b[:ontology_rank]] <=> [a[:score], b[:prefLabelExact].downcase, a[:ontology_rank]]} + docs.sort! { |a, b| [b[:score], a[:prefLabelExact].downcase, b[:ontology_rank]] <=> [a[:score], b[:prefLabelExact].downcase, a[:ontology_rank]] } else - docs.sort! {|a, b| [b[:score], b[:ontology_rank]] <=> [a[:score], a[:ontology_rank]]} + docs.sort! { |a, b| [b[:score], b[:ontology_rank]] <=> [a[:score], a[:ontology_rank]] } end end - #need to return a Page object + # need to return a Page object page = page_object(docs, total_found) reply 200, page diff --git a/docker-compose.yml b/docker-compose.yml index 8a940f8e..370615a6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,10 +1,5 @@ x-app: &app - build: - context: . - args: - RUBY_VERSION: '2.7' - # Increase the version number in the image tag every time Dockerfile or its arguments is changed - image: ontologies_ld-dev:0.0.2 + image: agroportal/ontologies_api:development environment: &env # default bundle config resolves to /usr/local/bundle/config inside of the container # we are setting it to local app directory if we need to use 'bundle config local' @@ -13,15 +8,15 @@ x-app: &app COVERAGE: 'true' # enable simplecov code coverage REDIS_HOST: redis-ut REDIS_PORT: 6379 - SOLR_TERM_SEARCH_URL: http://solr-term-ut:8983/solr/term_search_core1 - SOLR_PROP_SEARCH_URL: http://solr-prop-ut:8984/solr/prop_search_core1 + SOLR_TERM_SEARCH_URL: http://solr-ut:8983/solr + SOLR_PROP_SEARCH_URL: http://solr-ut:8983/solr stdin_open: true tty: true command: /bin/bash volumes: # bundle volume for hosting gems installed by bundle; it speeds up gem install in local development - bundle:/srv/ontoportal/bundle - - .:/srv/ontoportal/ontologies_linked_data + - .:/srv/ontoportal/ontologies_api # mount directory containing development version of the gems if you need to use 'bundle config local' #- /Users/alexskr/ontoportal:/Users/alexskr/ontoportal depends_on: &depends_on @@ -33,6 +28,27 @@ x-app: &app condition: service_healthy services: + api: + <<: *app + env_file: + .env + environment: + <<: *env + GOO_BACKEND_NAME: 4store + GOO_PORT: 9000 + GOO_HOST: 4store-ut + GOO_PATH_QUERY: /sparql/ + GOO_PATH_DATA: /data/ + GOO_PATH_UPDATE: /update/ + profiles: + - 4store + depends_on: + - solr-ut + - redis-ut + - mgrep-ut + - 4store-ut + ports: + - "9393:9393" mgrep-ut: image: ontoportal/mgrep-ncbo:0.1 @@ -52,20 +68,24 @@ services: 4store-ut: image: bde2020/4store + volumes: + - 4store:/var/lib/4store command: > - bash -c "4s-backend-setup --segments 4 ontoportal_kb - && 4s-backend ontoportal_kb - && 4s-httpd -D -s-1 -p 9000 ontoportal_kb" + bash -c "if [ ! -d '/var/lib/4store/ontoportal_kb' ]; then 4s-backend-setup --segments 4 ontoportal_kb; fi ; 4s-backend ontoportal_kb ; 4s-httpd -D -s-1 -p 9000 ontoportal_kb" + ports: - "9000:9000" profiles: - fs + - 4store solr-ut: - image: solr:8 - ports: - - 8983:8983 - command: bin/solr start -cloud -f + image: solr:8 + ports: + - 8983:8983 + command: bin/solr start -cloud -f + # volumes: + #- solr_data:/var/solr/data agraph-ut: image: franzinc/agraph:v8.1.0 platform: linux/amd64 @@ -133,4 +153,6 @@ services: volumes: bundle: - agdata: \ No newline at end of file + agdata: + 4store: + #solr_data: \ No newline at end of file diff --git a/test/controllers/test_agents_controller.rb b/test/controllers/test_agents_controller.rb index de36bc36..658ef38b 100644 --- a/test/controllers/test_agents_controller.rb +++ b/test/controllers/test_agents_controller.rb @@ -168,24 +168,9 @@ def test_delete_agent end private + def _agent_data(type: 'organization') - schema_agencies = LinkedData::Models::AgentIdentifier::IDENTIFIER_SCHEMES.keys - users = LinkedData::Models::User.all - users = [LinkedData::Models::User.new(username: "tim", email: "tim@example.org", password: "password").save] if users.empty? - test_identifiers = 5.times.map { |i| { notation: rand.to_s[2..11], schemaAgency: schema_agencies.sample.to_s } } - user = users.sample.id.to_s - - i = rand.to_s[2..11] - return { - agentType: type, - name: "name #{i}", - homepage: "home page #{i}", - acronym: "acronym #{i}", - email: "email_#{i}@test.com", - identifiers: test_identifiers.sample(2).map { |x| x.merge({ creator: user }) }, - affiliations: [], - creator: user - } + agent_data(type: type) end def _find_agent(name) diff --git a/test/controllers/test_search_controller.rb b/test/controllers/test_search_controller.rb index 17ca5ebd..7549ca3e 100644 --- a/test/controllers/test_search_controller.rb +++ b/test/controllers/test_search_controller.rb @@ -3,6 +3,11 @@ class TestSearchController < TestCase def self.before_suite + LinkedData::Models::Ontology.indexClear + LinkedData::Models::Agent.indexClear + LinkedData::Models::Class.indexClear + LinkedData::Models::OntologyProperty.indexClear + count, acronyms, bro = LinkedData::SampleData::Ontology.create_ontologies_and_submissions({ process_submission: true, acronym: "BROSEARCHTEST", @@ -59,8 +64,10 @@ def self.after_suite @@test_pc_child.delete LinkedData::SampleData::Ontology.delete_ontologies_and_submissions @@test_user.delete + LinkedData::Models::Ontology.indexClear + LinkedData::Models::Agent.indexClear LinkedData::Models::Class.indexClear - LinkedData::Models::Class.indexCommit + LinkedData::Models::OntologyProperty.indexClear end def test_search @@ -257,4 +264,5 @@ def test_multilingual_search end + end diff --git a/test/controllers/test_search_models_controller.rb b/test/controllers/test_search_models_controller.rb new file mode 100644 index 00000000..851c7a31 --- /dev/null +++ b/test/controllers/test_search_models_controller.rb @@ -0,0 +1,400 @@ +require_relative '../test_case' + +class TestSearchModelsController < TestCase + + def self.after_suite + LinkedData::SampleData::Ontology.delete_ontologies_and_submissions + LinkedData::Models::Ontology.indexClear + LinkedData::Models::Agent.indexClear + LinkedData::Models::Class.indexClear + LinkedData::Models::OntologyProperty.indexClear + end + + def setup + self.class.after_suite + end + + def test_show_all_collection + get '/admin/search/collections' + assert last_response.ok? + res = MultiJson.load(last_response.body) + assert_equal res["collections"].sort, Goo.search_connections.keys.map(&:to_s).sort + end + + def test_collection_schema + get '/admin/search/collections' + assert last_response.ok? + res = MultiJson.load(last_response.body) + collection = res["collections"].first + refute_nil collection + get "/admin/search/collections/#{collection}/schema" + assert last_response.ok? + res = MultiJson.load(last_response.body) + fields = res["fields"].map { |x| x["name"] } + assert_includes fields, 'id' + assert_includes fields, 'resource_id' + assert_includes fields, 'resource_model' + end + + def test_collection_search + + count, acronyms, bro = LinkedData::SampleData::Ontology.create_ontologies_and_submissions({ + process_submission: false, + acronym: "BROSEARCHTEST", + name: "BRO Search Test", + file_path: "./test/data/ontology_files/BRO_v3.2.owl", + ont_count: 1, + submission_count: 1, + ontology_type: "VALUE_SET_COLLECTION" + }) + collection = 'ontology_metadata' + post "/admin/search/collections/#{collection}/search", {q: ""} + + assert last_response.ok? + res = MultiJson.load(last_response.body) + assert_equal 2, res['response']['numFound'] + end + + + def test_ontology_metadata_search + count, acronyms, bro = LinkedData::SampleData::Ontology.create_ontologies_and_submissions({ + process_submission: false, + acronym: "BROSEARCHTEST", + name: "BRO Search Test", + file_path: "./test/data/ontology_files/BRO_v3.2.owl", + ont_count: 1, + submission_count: 1, + ontology_type: "VALUE_SET_COLLECTION" + }) + + count, acronyms, mccl = LinkedData::SampleData::Ontology.create_ontologies_and_submissions({ + process_submission: false, + acronym: "MCCLSEARCHTEST", + name: "MCCL Search Test", + file_path: "./test/data/ontology_files/CellLine_OWL_BioPortal_v1.0.owl", + ont_count: 1, + submission_count: 1 + }) + + # Search ACRONYM + ## full word + get '/search/ontologies?query=BROSEARCHTEST-0' + response = MultiJson.load(last_response.body)["collection"] + assert_equal 'BROSEARCHTEST-0', response.first['ontology_acronym_text'] + + ### start + get '/search/ontologies?query=BROSEARCHTEST' + response = MultiJson.load(last_response.body)["collection"] + assert_equal 'BROSEARCHTEST-0', response.first['ontology_acronym_text'] + + ## part of the word + get '/search/ontologies?query=BRO' + response = MultiJson.load(last_response.body)["collection"] + assert_equal 'BROSEARCHTEST-0', response.first['ontology_acronym_text'] + + + # Search name + ## full word + ### start + get '/search/ontologies?query=MCCL Search' + response = MultiJson.load(last_response.body)["collection"] + assert_equal 'MCCLSEARCHTEST-0', response.first['ontology_acronym_text'] + ###in the middle + get '/search/ontologies?query=Search Test' + response = MultiJson.load(last_response.body)["collection"] + assert_equal 2, response.size + assert_equal 'BROSEARCHTEST-0', response.first['ontology_acronym_text'] + assert_equal 'MCCLSEARCHTEST-0', response.last['ontology_acronym_text'] + ## part of the word + ### start + get '/search/ontologies?query=MCCL Sea' + response = MultiJson.load(last_response.body)["collection"] + assert_equal 'MCCLSEARCHTEST-0', response.first['ontology_acronym_text'] + ### in the middle + get '/search/ontologies?query=Sea' + response = MultiJson.load(last_response.body)["collection"] + assert_equal 2, response.size + assert_equal 'BROSEARCHTEST-0', response.first['ontology_acronym_text'] + assert_equal 'MCCLSEARCHTEST-0', response.last['ontology_acronym_text'] + + + ## full text + get '/search/ontologies?query=MCCL Search Test' + response = MultiJson.load(last_response.body)["collection"] + assert_equal 'MCCLSEARCHTEST-0', response.first['ontology_acronym_text'] + + + # Search description + ## full word + ### start + get '/search/ontologies?query=Description' + response = MultiJson.load(last_response.body)["collection"] + assert_equal 2, response.size + assert_equal 'BROSEARCHTEST-0', response.first['ontology_acronym_text'] + assert_equal 'MCCLSEARCHTEST-0', response.last['ontology_acronym_text'] + + ### in the middle + get '/search/ontologies?query=1' + response = MultiJson.load(last_response.body)["collection"] + assert_equal 2, response.size + assert_equal 'BROSEARCHTEST-0', response.first['ontology_acronym_text'] + assert_equal 'MCCLSEARCHTEST-0', response.last['ontology_acronym_text'] + + ## part of the word + ### start + get '/search/ontologies?query=Desc' + response = MultiJson.load(last_response.body)["collection"] + assert_equal 2, response.size + assert_equal 'BROSEARCHTEST-0', response.first['ontology_acronym_text'] + assert_equal 'MCCLSEARCHTEST-0', response.last['ontology_acronym_text'] + + ### full text + get '/search/ontologies?query=Description 1' + response = MultiJson.load(last_response.body)["collection"] + assert_equal 2, response.size + assert_equal 'BROSEARCHTEST-0', response.first['ontology_acronym_text'] + assert_equal 'MCCLSEARCHTEST-0', response.last['ontology_acronym_text'] + end + + def test_ontology_metadata_filters + num_onts_created, created_ont_acronyms, ontologies = create_ontologies_and_submissions(ont_count: 10, submission_count: 1) + + + group1 = LinkedData::Models::Group.find('group-1').first || LinkedData::Models::Group.new(acronym: 'group-1', name: "Test Group 1").save + group2 = LinkedData::Models::Group.find('group-2').first || LinkedData::Models::Group.new(acronym: 'group-2', name: "Test Group 2").save + category1 = LinkedData::Models::Category.find('category-1').first || LinkedData::Models::Category.new(acronym: 'category-1', name: "Test Category 1").save + category2 = LinkedData::Models::Category.find('category-2').first || LinkedData::Models::Category.new(acronym: 'category-2', name: "Test Category 2").save + + ontologies1 = ontologies[0..5].each do |o| + o.bring_remaining + o.group = [group1] + o.hasDomain = [category1] + o.save + end + + ontologies2 = ontologies[6..8].each do |o| + o.bring_remaining + o.group = [group2] + o.hasDomain = [category2] + o.save + end + + + # test filter by group and category + get "/search/ontologies?page=1&pagesize=100&groups=#{group1.acronym}" + assert last_response.ok? + assert_equal ontologies1.size, MultiJson.load(last_response.body)["collection"].length + get "/search/ontologies?page=1&pagesize=100&groups=#{group2.acronym}" + assert last_response.ok? + assert_equal ontologies2.size, MultiJson.load(last_response.body)["collection"].length + + + get "/search/ontologies?page=1&pagesize=100&groups=#{group1.acronym},#{group2.acronym}" + assert last_response.ok? + assert_equal ontologies1.size + ontologies2.size, MultiJson.load(last_response.body)["collection"].length + + get "/search/ontologies?page=1&pagesize=100&hasDomain=#{category1.acronym}" + assert last_response.ok? + assert_equal ontologies1.size, MultiJson.load(last_response.body)["collection"].length + + get "/search/ontologies?page=1&pagesize=100&hasDomain=#{category2.acronym}" + assert last_response.ok? + assert_equal ontologies2.size, MultiJson.load(last_response.body)["collection"].length + + get "/search/ontologies?page=1&pagesize=100&hasDomain=#{category2.acronym},#{category1.acronym}" + assert last_response.ok? + assert_equal ontologies1.size + ontologies2.size, MultiJson.load(last_response.body)["collection"].length + + get "/search/ontologies?page=1&pagesize=100&hasDomain=#{category2.acronym}&groups=#{group1.acronym}" + assert last_response.ok? + assert_equal 0, MultiJson.load(last_response.body)["collection"].length + get "/search/ontologies?page=1&pagesize=100&hasDomain=#{category2.acronym}&groups=#{group2.acronym}" + assert last_response.ok? + assert_equal ontologies2.size, MultiJson.load(last_response.body)["collection"].length + + + + ontologies3 = ontologies[9] + ontologies3.bring_remaining + ontologies3.group = [group1, group2] + ontologies3.hasDomain = [category1, category2] + ontologies3.name = "name search test" + ontologies3.save + + ontologies.first.name = "sort by test" + ontologies.first.save + sub = ontologies.first.latest_submission(status: :any).bring_remaining + sub.status = 'retired' + sub.description = "234" + sub.creationDate = DateTime.yesterday.to_datetime + sub.hasOntologyLanguage = LinkedData::Models::OntologyFormat.find('SKOS').first + sub.save + + #test search with sort + get "/search/ontologies?page=1&pagesize=100&q=tes&sort=ontology_name_sort asc" + assert last_response.ok? + submissions = MultiJson.load(last_response.body) + + refute_empty submissions["collection"] + assert_equal ontologies.map{|x| x.bring(:name).name}.sort, submissions["collection"].map{|x| x["ontology_name_text"]} + + get "/search/ontologies?page=1&pagesize=100&q=tes&sort=creationDate_dt desc" + + + assert last_response.ok? + submissions = MultiJson.load(last_response.body) + refute_empty submissions["collection"] + assert_equal ontologies.map{|x| x.latest_submission(status: :any).bring(:creationDate).creationDate.to_s.split('T').first}.sort.reverse, + submissions["collection"].map{|x| x["creationDate_dt"].split('T').first} + + # test search with format + get "/search/ontologies?page=1&pagesize=100&q=tes&hasOntologyLanguage=SKOS" + assert last_response.ok? + submissions = MultiJson.load(last_response.body) + + refute_empty submissions["collection"] + assert_equal 1, submissions["collection"].size + + + + get "/search/ontologies?page=1&pagesize=100&q=tes&hasOntologyLanguage=OWL" + assert last_response.ok? + submissions = MultiJson.load(last_response.body) + refute_empty submissions["collection"] + assert_equal ontologies.size-1 , submissions["collection"].size + + + # test ontology filter with submission filter attributes + get "/search/ontologies?page=1&pagesize=100&q=tes&groups=group-2&hasDomain=category-2&hasOntologyLanguage=OWL" + assert last_response.ok? + submissions = MultiJson.load(last_response.body) + refute_empty submissions["collection"] + assert_equal ontologies2.size + 1 , submissions["collection"].size + + + + # test ontology filter with status + + get "/search/ontologies?page=1&pagesize=100&status=retired" + assert last_response.ok? + submissions = MultiJson.load(last_response.body) + refute_empty submissions["collection"] + assert_equal 1 , submissions["collection"].size + + get "/search/ontologies?page=1&pagesize=100&status=alpha,beta,production" + assert last_response.ok? + submissions = MultiJson.load(last_response.body) + refute_empty submissions["collection"] + assert_equal ontologies.size - 1 , submissions["collection"].size + + get "/search/ontologies?page=1&pagesize=100&q=234" + assert last_response.ok? + submissions = MultiJson.load(last_response.body) + assert_equal "http://data.bioontology.org/ontologies/TEST-ONT-0/submissions/1" , submissions["collection"].first["id"] + end + + def test_agents_search + agents_tmp = [ agent_data(type: 'organization'), agent_data(type: 'organization'), agent_data(type: 'person')] + agents_tmp.each do |a| + post "/agents", MultiJson.dump(a), "CONTENT_TYPE" => "application/json" + assert last_response.status == 201 + end + + agent_person = LinkedData::Models::Agent.where(agentType: 'person').all.first.bring_remaining + agent_org = LinkedData::Models::Agent.where(agentType: 'organization').all.first.bring_remaining + + + get "/search/agents?&q=name" + assert last_response.ok? + agents = MultiJson.load(last_response.body) + + + assert_equal 3, agents["totalCount"] + + + get "/search/agents?&q=name&agentType=organization" + assert last_response.ok? + agents = MultiJson.load(last_response.body) + assert_equal 2, agents["totalCount"] + + + + get "/search/agents?&q=name&agentType=person" + assert last_response.ok? + agents = MultiJson.load(last_response.body) + assert_equal 1, agents["totalCount"] + + + get "/search/agents?&q=#{agent_person.name}" + assert last_response.ok? + agents = MultiJson.load(last_response.body) + assert_equal agent_person.id.to_s, agents["collection"].first["id"] + + get "/search/agents?&q=#{agent_org.acronym}" + assert last_response.ok? + agents = MultiJson.load(last_response.body) + assert_equal agent_org.id.to_s, agents["collection"].first["id"] + + + get "/search/agents?&q=#{agent_org.identifiers.first.id.split('/').last}" + assert last_response.ok? + agents = MultiJson.load(last_response.body) + assert_equal agent_org.id.to_s, agents["collection"].first["id"] + end + + def test_search_data + count, acronyms, bro = LinkedData::SampleData::Ontology.create_ontologies_and_submissions({ + process_submission: true, + process_options: { process_rdf: true, extract_metadata: false, generate_missing_labels: false}, + acronym: "BROSEARCHTEST", + name: "BRO Search Test", + file_path: "./test/data/ontology_files/BRO_v3.2.owl", + ont_count: 1, + submission_count: 1, + ontology_type: "VALUE_SET_COLLECTION" + }) + + count, acronyms, mccl = LinkedData::SampleData::Ontology.create_ontologies_and_submissions({ + process_submission: true, + process_options: { process_rdf: true, extract_metadata: false, generate_missing_labels: false}, + acronym: "MCCLSEARCHTEST", + name: "MCCL Search Test", + file_path: "./test/data/ontology_files/CellLine_OWL_BioPortal_v1.0.owl", + ont_count: 1, + submission_count: 1 + }) + + + subs = LinkedData::Models::OntologySubmission.all + count = [] + subs.each do |s| + s.bring_remaining + s.index_all_data(Logger.new($stdout)) + count << Goo.sparql_query_client.query("SELECT (COUNT( DISTINCT ?id) as ?c) FROM <#{s.id}> WHERE {?id ?p ?v}") + .first[:c] + .to_i + end + + get "/search/ontologies/content?q=*" + assert last_response.ok? + res = MultiJson.load(last_response.body) + assert_equal count.sum, res['totalCount'] + + + get "/search/ontologies/content?q=*&ontologies=MCCLSEARCHTEST-0,BROSEARCHTEST-0" + assert last_response.ok? + res = MultiJson.load(last_response.body) + assert_equal count.sum, res['totalCount'] + + get "/search/ontologies/content?q=*&ontologies=BROSEARCHTEST-0" + assert last_response.ok? + res = MultiJson.load(last_response.body) + assert_includes count, res['totalCount'] + + get "/search/ontologies/content?q=*&ontologies=MCCLSEARCHTEST-0" + assert last_response.ok? + res = MultiJson.load(last_response.body) + assert_includes count, res['totalCount'] + + end +end diff --git a/test/test_case.rb b/test/test_case.rb index d58bcace..06bbc99f 100644 --- a/test/test_case.rb +++ b/test/test_case.rb @@ -155,6 +155,27 @@ def create_ontologies_and_submissions(options = {}) LinkedData::SampleData::Ontology.create_ontologies_and_submissions(options) end + + def agent_data(type: 'organization') + schema_agencies = LinkedData::Models::AgentIdentifier::IDENTIFIER_SCHEMES.keys + users = LinkedData::Models::User.all + users = [LinkedData::Models::User.new(username: "tim", email: "tim@example.org", password: "password").save] if users.empty? + test_identifiers = 5.times.map { |i| { notation: rand.to_s[2..11], schemaAgency: schema_agencies.sample.to_s } } + user = users.sample.id.to_s + + i = rand.to_s[2..11] + return { + agentType: type, + name: "name #{i}", + homepage: "home page #{i}", + acronym: "acronym #{i}", + email: "email_#{i}@test.com", + identifiers: test_identifiers.sample(2).map { |x| x.merge({ creator: user }) }, + affiliations: [], + creator: user + } + end + ## # Delete all ontologies and their submissions def delete_ontologies_and_submissions