From 1e083cc7216cf1be031b7e93728f43ae4a5ab917 Mon Sep 17 00:00:00 2001 From: Adam Coffman Date: Fri, 1 Apr 2022 11:30:58 -0500 Subject: [PATCH 1/3] unscope scrapers from module --- server/app/scrapers/asco.rb | 146 ++++++++++---------- server/app/scrapers/asco_query_response.rb | 52 ++++--- server/app/scrapers/asco_record_response.rb | 50 ++++--- server/app/scrapers/pub_med.rb | 100 +++++++------- server/app/scrapers/pub_med_response.rb | 144 ++++++++++--------- 5 files changed, 241 insertions(+), 251 deletions(-) diff --git a/server/app/scrapers/asco.rb b/server/app/scrapers/asco.rb index 3cd172825..73ebc4ef8 100644 --- a/server/app/scrapers/asco.rb +++ b/server/app/scrapers/asco.rb @@ -1,100 +1,98 @@ require 'net/http' require 'uri' -module Scrapers - class Asco - def self.run - ActiveRecord::Base.transaction do - ::Source.all.each do |source| - populate_source_fields(source) - end +class Asco + def self.run + ActiveRecord::Base.transaction do + ::Source.all.each do |source| + populate_source_fields(source) end end + end - def self.get_citations_from_asco_abstract_id(id) - resp = call_asco_query_api_by_asco_abstract_id(id) - resp.citations - end + def self.get_citations_from_asco_abstract_id(id) + resp = call_asco_query_api_by_asco_abstract_id(id) + resp.citations + end - def self.get_citation_from_asco_id(id) - resp = call_asco_query_api_by_asco_id(id) + def self.get_citation_from_asco_id(id) + resp = call_asco_query_api_by_asco_id(id) + if resp.citations.any? + resp.citations.first[:citation] + else + resp = call_asco_query_stage_api_by_asco_id(id) if resp.citations.any? resp.citations.first[:citation] else - resp = call_asco_query_stage_api_by_asco_id(id) - if resp.citations.any? - resp.citations.first[:citation] - else - nil - end + nil end end + end - def self.populate_source_fields(source) - record_resp = call_asco_abstract_api(source.citation_id) - query_resp = call_asco_query_api_by_asco_id(source.citation_id) - if not query_resp.citations.any? - query_resp = call_asco_query_stage_api_by_asco_id(source.citation_id) - end - source.description = get_citation_from_asco_id(source.citation_id) - source.asco_presenter = record_resp.presenter - source.asco_abstract_id = record_resp.asco_abstract_id - source.publication_year = query_resp.publication_year - source.journal = record_resp.journal - source.name = record_resp.article_title - source.abstract = record_resp.abstract - source.full_journal_title = 'Journal of Clinical Oncology' - nct_id = record_resp.nct_id - if not nct_id.empty? - source.clinical_trials << ::ClinicalTrial.where(nct_id: nct_id).first_or_create - end - source.save + def self.populate_source_fields(source) + record_resp = call_asco_abstract_api(source.citation_id) + query_resp = call_asco_query_api_by_asco_id(source.citation_id) + if not query_resp.citations.any? + query_resp = call_asco_query_stage_api_by_asco_id(source.citation_id) end - - def self.call_asco_query_api_by_asco_id(asco_id) - http_resp = Util.make_get_request(query_url_for_asco_id(asco_id)) - AscoQueryResponse.new(http_resp) + source.description = get_citation_from_asco_id(source.citation_id) + source.asco_presenter = record_resp.presenter + source.asco_abstract_id = record_resp.asco_abstract_id + source.publication_year = query_resp.publication_year + source.journal = record_resp.journal + source.name = record_resp.article_title + source.abstract = record_resp.abstract + source.full_journal_title = 'Journal of Clinical Oncology' + nct_id = record_resp.nct_id + if not nct_id.empty? + source.clinical_trials << ::ClinicalTrial.where(nct_id: nct_id).first_or_create end + source.save + end - def self.call_asco_query_api_by_asco_abstract_id(asco_abstract_id) - http_resp = Util.make_get_request(query_url_for_asco_abstract_id(asco_abstract_id)) - AscoQueryResponse.new(http_resp) - end + def self.call_asco_query_api_by_asco_id(asco_id) + http_resp = Util.make_get_request(query_url_for_asco_id(asco_id)) + AscoQueryResponse.new(http_resp) + end - def self.call_asco_query_stage_api_by_asco_id(asco_id) - http_resp = Util.make_get_request(query_stage_url_for_asco_id(asco_id)) - AscoQueryResponse.new(http_resp) - end + def self.call_asco_query_api_by_asco_abstract_id(asco_abstract_id) + http_resp = Util.make_get_request(query_url_for_asco_abstract_id(asco_abstract_id)) + AscoQueryResponse.new(http_resp) + end - def self.call_asco_record_api(asco_id) - http_resp = Util.make_get_request(record_url_for_asco_id(asco_id)) - AscoRecordResponse.new(http_resp) - end + def self.call_asco_query_stage_api_by_asco_id(asco_id) + http_resp = Util.make_get_request(query_stage_url_for_asco_id(asco_id)) + AscoQueryResponse.new(http_resp) + end - def self.call_asco_abstract_api(asco_id) - http_resp = Util.make_get_request(abstract_url_for_asco_id(asco_id)) - AscoRecordResponse.new(http_resp) - end + def self.call_asco_record_api(asco_id) + http_resp = Util.make_get_request(record_url_for_asco_id(asco_id)) + AscoRecordResponse.new(http_resp) + end - private - def self.query_url_for_asco_id(asco_id) - "https://solr.asco.org/solr/ml/select?_format=json&wt=json&q=(_id:#{asco_id})" - end + def self.call_asco_abstract_api(asco_id) + http_resp = Util.make_get_request(abstract_url_for_asco_id(asco_id)) + AscoRecordResponse.new(http_resp) + end - def self.query_stage_url_for_asco_id(asco_id) - "https://stage-solr.asco.org/solr/ml/select?_format=json&wt=json&q=(_id:#{asco_id})" - end + private + def self.query_url_for_asco_id(asco_id) + "https://solr.asco.org/solr/ml/select?_format=json&wt=json&q=(_id:#{asco_id})" + end - def self.query_url_for_asco_abstract_id(asco_abstract_id) - "https://solr.asco.org/solr/ml/select?_format=json&wt=json&q=(AbstID:#{asco_abstract_id})" - end + def self.query_stage_url_for_asco_id(asco_id) + "https://stage-solr.asco.org/solr/ml/select?_format=json&wt=json&q=(_id:#{asco_id})" + end - def self.record_url_for_asco_id(asco_id) - "https://ml-couch.asco.org/records/#{asco_id}" - end + def self.query_url_for_asco_abstract_id(asco_abstract_id) + "https://solr.asco.org/solr/ml/select?_format=json&wt=json&q=(AbstID:#{asco_abstract_id})" + end - def self.abstract_url_for_asco_id(asco_id) - "https://ml-couch.asco.org/abstracts/#{asco_id}" - end + def self.record_url_for_asco_id(asco_id) + "https://ml-couch.asco.org/records/#{asco_id}" + end + + def self.abstract_url_for_asco_id(asco_id) + "https://ml-couch.asco.org/abstracts/#{asco_id}" end end diff --git a/server/app/scrapers/asco_query_response.rb b/server/app/scrapers/asco_query_response.rb index f4b843c32..5fcdb20bc 100644 --- a/server/app/scrapers/asco_query_response.rb +++ b/server/app/scrapers/asco_query_response.rb @@ -1,34 +1,32 @@ -module Scrapers - class AscoQueryResponse - attr_reader :json - def initialize(response_body) - @json = JSON.parse(response_body) - end +class AscoQueryResponse + attr_reader :json + def initialize(response_body) + @json = JSON.parse(response_body) + end - def citations - json['response']['docs'].each_with_object([]) do |element, o| - o.append({ - citation: [author_for_element(element), element['Year'], element['MeetingName'], "Abstract #{element['AbstID']}"].join(', '), - citation_id: element['_id'], - source_type: 'ASCO', - status: 'new', - }) - end + def citations + json['response']['docs'].each_with_object([]) do |element, o| + o.append({ + citation: [author_for_element(element), element['Year'], element['MeetingName'], "Abstract #{element['AbstID']}"].join(', '), + citation_id: element['_id'], + source_type: 'ASCO', + status: 'new', + }) end + end - def publication_year - json['response']['docs'].first['Year'] - end + def publication_year + json['response']['docs'].first['Year'] + end - private - def author_for_element(elem) - if elem['FirstAuthor'].present? - elem['FirstAuthor'] - elsif elem['Authors'].present? - elem['Authors'].first - else - '' - end + private + def author_for_element(elem) + if elem['FirstAuthor'].present? + elem['FirstAuthor'] + elsif elem['Authors'].present? + elem['Authors'].first + else + '' end end end diff --git a/server/app/scrapers/asco_record_response.rb b/server/app/scrapers/asco_record_response.rb index a8d1b7682..771271fa1 100644 --- a/server/app/scrapers/asco_record_response.rb +++ b/server/app/scrapers/asco_record_response.rb @@ -1,36 +1,34 @@ require 'htmlentities' -module Scrapers - class AscoRecordResponse - attr_reader :json - def initialize(response_body) - @json = JSON.parse(response_body) - end +class AscoRecordResponse + attr_reader :json + def initialize(response_body) + @json = JSON.parse(response_body) + end - def presenter - json['FirstAuthor'] - end + def presenter + json['FirstAuthor'] + end - def asco_abstract_id - json['abstractId'] - end + def asco_abstract_id + json['abstractId'] + end - def nct_id - json['ClinicalTrialRegistryNumber'] - end + def nct_id + json['ClinicalTrialRegistryNumber'] + end - def article_title - json['Title'] - end + def article_title + json['Title'] + end - def journal - json['SiteCitation'] - end + def journal + json['SiteCitation'] + end - def abstract - sanitizer = Rails::Html::FullSanitizer.new - decoder = HTMLEntities.new - decoder.decode(sanitizer.sanitize(json['Body']).strip) - end + def abstract + sanitizer = Rails::Html::FullSanitizer.new + decoder = HTMLEntities.new + decoder.decode(sanitizer.sanitize(json['Body']).strip) end end diff --git a/server/app/scrapers/pub_med.rb b/server/app/scrapers/pub_med.rb index 891be3c58..adb6926d2 100644 --- a/server/app/scrapers/pub_med.rb +++ b/server/app/scrapers/pub_med.rb @@ -1,63 +1,61 @@ require 'net/http' require 'uri' -module Scrapers - class PubMed - def self.run - ActiveRecord::Base.transaction do - ::Source.all.each do |source| - populate_source_fields(source) - end +class PubMed + def self.run + ActiveRecord::Base.transaction do + ::Source.all.each do |source| + populate_source_fields(source) end end + end - def self.get_citation_from_pubmed_id(pubmed_id) - resp = call_pubmed_api(pubmed_id) - resp.citation - end + def self.get_citation_from_pubmed_id(pubmed_id) + resp = call_pubmed_api(pubmed_id) + resp.citation + end - def self.populate_source_fields(source) - resp = call_pubmed_api(source.citation_id) - source.description = resp.citation - resp.authors.each do |author| - author_obj = Author.where( - last_name: author[:last_name], - fore_name: author[:fore_name] - ).first_or_create - AuthorsSource.where( - source: source, - author: author_obj, - author_position: author[:author_position] - ).first_or_create - end - if pmc_id = resp.pmc_id - source.pmc_id = pmc_id - source.open_access = true - end - (day, month, year) = resp.publication_date - source.publication_day = day - source.publication_month = month - source.publication_year = year - source.journal = resp.journal - source.name = resp.article_title - source.full_journal_title = resp.full_journal_title - source.abstract = resp.abstract - source.is_review = resp.is_review? - clinical_trials = resp.clinical_trial_ids.uniq.map do |nct_id| - ::ClinicalTrial.where(nct_id: nct_id).first_or_create - end - source.clinical_trials = clinical_trials - source.save + def self.populate_source_fields(source) + resp = call_pubmed_api(source.citation_id) + source.description = resp.citation + resp.authors.each do |author| + author_obj = Author.where( + last_name: author[:last_name], + fore_name: author[:fore_name] + ).first_or_create + AuthorsSource.where( + source: source, + author: author_obj, + author_position: author[:author_position] + ).first_or_create end - - def self.call_pubmed_api(pubmed_id) - http_resp = Util.make_get_request(url_for_pubmed_id(pubmed_id)) - PubMedResponse.new(http_resp) + if pmc_id = resp.pmc_id + source.pmc_id = pmc_id + source.open_access = true end - - private - def self.url_for_pubmed_id(pubmed_id) - "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=#{pubmed_id}&retmode=xml" + (day, month, year) = resp.publication_date + source.publication_day = day + source.publication_month = month + source.publication_year = year + source.journal = resp.journal + source.name = resp.article_title + source.full_journal_title = resp.full_journal_title + source.abstract = resp.abstract + source.is_review = resp.is_review? + clinical_trials = resp.clinical_trial_ids.uniq.map do |nct_id| + ::ClinicalTrial.where(nct_id: nct_id).first_or_create end + source.clinical_trials = clinical_trials + source.save + end + + def self.call_pubmed_api(pubmed_id) + http_resp = Util.make_get_request(url_for_pubmed_id(pubmed_id)) + PubMedResponse.new(http_resp) + end + + private + def self.url_for_pubmed_id(pubmed_id) + "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=#{pubmed_id}&retmode=xml" end end diff --git a/server/app/scrapers/pub_med_response.rb b/server/app/scrapers/pub_med_response.rb index 332ba8644..665d9424c 100644 --- a/server/app/scrapers/pub_med_response.rb +++ b/server/app/scrapers/pub_med_response.rb @@ -1,95 +1,93 @@ -module Scrapers - class PubMedResponse - attr_reader :xml - def initialize(response_body) - @xml = Nokogiri::XML(response_body) - end +class PubMedResponse + attr_reader :xml + def initialize(response_body) + @xml = Nokogiri::XML(response_body) + end - def citation - [first_author, year, journal].compact.join(', ') - end + def citation + [first_author, year, journal].compact.join(', ') + end - def authors - xml.xpath('//AuthorList/Author').to_a.each.with_index(1).map do |author, i| - { - fore_name: author.xpath('ForeName').text, - last_name: author.xpath('LastName').text, - author_position: i - } - end + def authors + xml.xpath('//AuthorList/Author').to_a.each.with_index(1).map do |author, i| + { + fore_name: author.xpath('ForeName').text, + last_name: author.xpath('LastName').text, + author_position: i + } end + end - def pmc_id - xpath_contents_or_nil("//PubmedData/ArticleIdList/ArticleId[@IdType='pmc']") - end + def pmc_id + xpath_contents_or_nil("//PubmedData/ArticleIdList/ArticleId[@IdType='pmc']") + end - def abstract - xpath_contents_or_nil('//Abstract/AbstractText') - end + def abstract + xpath_contents_or_nil('//Abstract/AbstractText') + end - def first_author - xpath_contents_or_nil('//AuthorList/Author[1]/LastName') do |author_name| - if xml.xpath('//AuthorList/Author').size > 1 - author_name + " et al." - else - author_name - end + def first_author + xpath_contents_or_nil('//AuthorList/Author[1]/LastName') do |author_name| + if xml.xpath('//AuthorList/Author').size > 1 + author_name + " et al." + else + author_name end end + end - def publication_date - [day, month, year] - end + def publication_date + [day, month, year] + end - def year - xpath_contents_or_nil('//Journal/JournalIssue/PubDate/Year') - end + def year + xpath_contents_or_nil('//Journal/JournalIssue/PubDate/Year') + end - def month - monthname = xpath_contents_or_nil('//Journal/JournalIssue/PubDate/Month') - if monthname - Date::ABBR_MONTHNAMES.index(monthname) - else - nil - end + def month + monthname = xpath_contents_or_nil('//Journal/JournalIssue/PubDate/Month') + if monthname + Date::ABBR_MONTHNAMES.index(monthname) + else + nil end + end - def day - xpath_contents_or_nil('//Journal/JournalIssue/PubDate/Day') - end + def day + xpath_contents_or_nil('//Journal/JournalIssue/PubDate/Day') + end - def journal - xpath_contents_or_nil('//Journal/ISOAbbreviation') - end + def journal + xpath_contents_or_nil('//Journal/ISOAbbreviation') + end - def full_journal_title - xpath_contents_or_nil('//Journal/Title') - end + def full_journal_title + xpath_contents_or_nil('//Journal/Title') + end - def article_title - xpath_contents_or_nil('//Article/ArticleTitle') - end + def article_title + xpath_contents_or_nil('//Article/ArticleTitle') + end - def is_review? - (xml.xpath('//PublicationTypeList/PublicationType') || []) - .map(&:text) - .any? { |x| x == 'Review' } - end + def is_review? + (xml.xpath('//PublicationTypeList/PublicationType') || []) + .map(&:text) + .any? { |x| x == 'Review' } + end - def clinical_trial_ids - (xml.xpath("//DataBankList/DataBank[DataBankName='ClinicalTrials.gov']/AccessionNumberList/AccessionNumber") || []) - .map(&:text) - end + def clinical_trial_ids + (xml.xpath("//DataBankList/DataBank[DataBankName='ClinicalTrials.gov']/AccessionNumberList/AccessionNumber") || []) + .map(&:text) + end - private - def xpath_contents_or_nil(path) - if (node = xml.xpath(path).text).blank? - nil - elsif block_given? - yield node - else - node - end + private + def xpath_contents_or_nil(path) + if (node = xml.xpath(path).text).blank? + nil + elsif block_given? + yield node + else + node end end end From 52cb5905758d7b4d714d760bed9f2eb483501a05 Mon Sep 17 00:00:00 2001 From: Adam Coffman Date: Wed, 6 Apr 2022 14:28:03 -0500 Subject: [PATCH 2/3] generate and push prod build on release publish --- .github/workflows/build_frontend.yml | 2 +- .github/workflows/release.yml | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/build_frontend.yml b/.github/workflows/build_frontend.yml index a3c32c220..468c1f71f 100644 --- a/.github/workflows/build_frontend.yml +++ b/.github/workflows/build_frontend.yml @@ -8,7 +8,7 @@ on: type: string jobs: - build_staging: + build_frontend: runs-on: ubuntu-latest steps: - name: Checkout Code diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 000000000..e0a9b16ee --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,19 @@ +name: Build and deploy production + +on: + release: + types: [published] +jobs: + build-production: + uses: griffithlab/civic-v2/.github/workflows/build_frontend.yml@main + with: + branch: release + deploy-staging: + uses: griffithlab/civic-v2/.github/workflows/deploy.yml@main + needs: build-production + with: + environment: production + secrets: + SSH_HOST: ${{ secrets.CIVIC_V2_PRODUCTION_SSH_HOST }} + SSH_KEY: ${{ secrets.CIVIC_V2_PRODUCTION_SSH_KEY }} + From f5ca1ff1da06b345971717f068a9a1034ca5deeb Mon Sep 17 00:00:00 2001 From: Adam Coffman Date: Wed, 6 Apr 2022 14:28:21 -0500 Subject: [PATCH 3/3] increase db connection pool size --- server/config/database.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/config/database.yml b/server/config/database.yml index 26b973fa7..1aad9fe38 100644 --- a/server/config/database.yml +++ b/server/config/database.yml @@ -18,7 +18,7 @@ test: production: <<: *default - pool: 20 + pool: 40 username: <%= Rails.application.credentials.dig(:database, :username) %> password: <%= Rails.application.credentials.dig(:database, :password) %>