Skip to content

Commit

Permalink
fix: use docm snapshot data (#533)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson authored Nov 22, 2024
1 parent ebe99dc commit 957eeac
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 126 deletions.
29 changes: 0 additions & 29 deletions server/lib/genome/importers/api_importers/docm/api_client.rb

This file was deleted.

97 changes: 0 additions & 97 deletions server/lib/genome/importers/api_importers/docm/importer.rb

This file was deleted.

95 changes: 95 additions & 0 deletions server/lib/genome/importers/file_importers/docm.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
module Genome
module Importers
module FileImporters
module Docm
class Importer < Genome::Importers::Base
attr_reader :file_path

def initialize(tsv_root_path)
@tsv_root = if tsv_root_path.nil?
'lib/data/docm/'
else
tsv_root_path
end
@source_db_name = 'DoCM'
@drug_claims = {}
@gene_claims = {}
@interaction_claims = {}
end

def create_claims
create_drug_claims
create_gene_claims
create_interaction_claims
end

private

def create_new_source
@source ||= Source.create(
{
base_url: 'http://docm.info/',
site_url: 'http://docm.info/',
citation: 'Ainscough BJ, Griffith M, Coffman AC, Wagner AH, Kunisaki J, Choudhary MN, McMichael JF, Fulton RS, Wilson RK, Griffith OL, Mardis ER. DoCM: a database of curated mutations in cancer. Nat Methods. 2016 Sep 29;13(10):806-7. doi: 10.1038/nmeth.4000. PMID: 27684579; PMCID: PMC5317181.',
citation_short: 'Ainscough BJ, et al. DoCM: a database of curated mutations in cancer. Nat Methods. 2016 Sep 29;13(10):806-7.',
pmid: '27684579',
pmcid: 'PMC5317181',
doi: '10.1038/nmeth.4000',
source_db_version: '2024-10-02',
source_trust_level_id: SourceTrustLevel.EXPERT_CURATED,
source_db_name:,
full_name: 'Database of Curated Mutations',
license: License::CC_BY_4_0,
license_link: 'https://github.com/griffithlab/docm/blob/c8d2a8723f505689074d07841931475b9b7e914c/app/views/static/about.html.haml#L86'
}
)
@source.source_types << SourceType.find_by(type: 'interaction')
@source.save
end


def create_drug_claims
CSV.foreach("#{@tsv_root}drug_claim.csv", headers: true, col_sep: ',') do |row|
dc = create_drug_claim(row[0])
@drug_claims[row[0]] = dc
end
end

def create_gene_claims
CSV.foreach("#{@tsv_root}gene_claim.csv", headers: true, col_sep: ',') do |row|
gc = create_gene_claim(row[0], GeneNomenclature::NCBI_NAME)
@gene_claims[row[0]] = gc
end
end

def create_interaction_claims
CSV.foreach("#{@tsv_root}interaction_claim.csv", headers: true, col_sep: ',') do |row|
gc = @gene_claims[row[1]]
dc = @drug_claims[row[0]]
next if gc.nil? || dc.nil?

ic = create_interaction_claim(gc, dc)
@interaction_claims[[gc, dc]] = ic
end
CSV.foreach("#{@tsv_root}interaction_claim_attributes.csv", headers: true, col_sep: ',') do |row|
gc = @gene_claims[row[3]]
dc = @drug_claims[row[2]]
next if gc.nil? || dc.nil?

ic = @interaction_claims[[gc, dc]]
create_interaction_claim_attribute(ic, row[0], row[1])
end
CSV.foreach("#{@tsv_root}interaction_claim_publications.csv", headers: true, col_sep: ',') do |row|
gc = @gene_claims[row[3]]
dc = @drug_claims[row[2]]
next if gc.nil? || dc.nil?

ic = @interaction_claims[[gc, dc]]
create_interaction_claim_publication(ic, row[0])
end
end
end
end
end
end
end
41 changes: 41 additions & 0 deletions server/lib/genome/importers/file_importers/docm.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
-- It's a little tricky to grab data from the final DOCM dump;
-- instead, these queries help us produce dumps from our last extraction in DGIdb

-- gene claim
SELECT gc.name, gc.nomenclature
FROM gene_claims gc
LEFT JOIN sources s on gc.source_id = s.id
WHERE source_db_name = 'DoCM';

-- drug claim
SELECT dc.name, dc.nomenclature
FROM drug_claims dc
LEFT JOIN sources s on dc.source_id = s.id
WHERE s.source_db_name = 'DoCM';

-- interaction claim
SELECT dc.name, gc.name
FROM interaction_claims ic
LEFT JOIN sources s on ic.source_id = s.id
LEFT JOIN drug_claims dc on ic.drug_claim_id = dc.id
LEFT JOIN gene_claims gc on ic.gene_claim_id = gc.id
WHERE s.source_db_name = 'DoCM';

-- interaction claim attributes
SELECT ica.name, ica.value, dc.name, gc.name
FROM interaction_claims ic
LEFT JOIN sources s on ic.source_id = s.id
LEFT JOIN drug_claims dc on ic.drug_claim_id = dc.id
LEFT JOIN gene_claims gc on ic.gene_claim_id = gc.id
RIGHT JOIN interaction_claim_attributes ica on ic.id = ica.interaction_claim_id
WHERE s.source_db_name = 'DoCM';

-- interaction claim publications
SELECT p.pmid, p.citation, dc.name, gc.name
FROM interaction_claims ic
LEFT JOIN sources s on ic.source_id = s.id
LEFT JOIN drug_claims dc on ic.drug_claim_id = dc.id
LEFT JOIN gene_claims gc on ic.gene_claim_id = gc.id
RIGHT JOIN interaction_claims_publications icp ON icp.interaction_claim_id = ic.id
LEFT JOIN publications p ON p.id = icp.publication_id
WHERE s.source_db_name = 'DoCM';

0 comments on commit 957eeac

Please sign in to comment.