Skip to content

Commit

Permalink
WIP #50
Browse files Browse the repository at this point in the history
  • Loading branch information
Adafede committed May 13, 2024
1 parent 8480173 commit 65657f9
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 2 deletions.
9 changes: 9 additions & 0 deletions update/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,15 @@
"output_file": "triplets.csv",
},
),
Task(
name="urls_formatters",
f=download_query_as_csv.run,
group=DownloadGroup,
params={
"query_file": "update/queries/urls_formatters.rq",
"output_file": "urls_formatters.csv",
},
),
Task(
name="generate_database_chemo",
f=generate_database_chemo.run,
Expand Down
4 changes: 2 additions & 2 deletions update/generate_database_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def run(path: Path) -> None:
# structures.append({"id": struct, "smiles": smiles})
logging.info(" Processed structures")

# TODO add all structure IDs (See #50)
# TODO add all IDs and formatters (See #50)

descriptors_dict = {}
with open(path / "descriptors_rdkit.csv", "r") as f:
Expand Down Expand Up @@ -158,7 +158,7 @@ def run(path: Path) -> None:

# Eventually TODO add taxa_names_com

# TODO add all taxon IDs (See #50)
# TODO add all IDs and formatters (See #50)

taxon_ranks_dict = {}
with open(path / "ranks_names.csv", "r") as f:
Expand Down
46 changes: 46 additions & 0 deletions update/queries/urls_formatters.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX hint: <http://www.bigdata.com/queryHints#>

SELECT * WHERE {
# All properties we use with a formatter URL
VALUES ?property {
wd:P231 # STRUCTURE CAS
wd:P233 # STRUCTURE SMILES (canonical)
wd:P234 # STRUCTURE InChI
wd:P235 # STRUCTURE InChIKey
wd:P356 # REFERENCE DOI
wd:P592 # STRUCTURE ChEMBL
wd:P638 # STRUCTURE PDB structure
wd:P661 # STRUCTURE ChemSpider
wd:P662 # STRUCTURE PubChem CID
wd:P683 # STRUCTURE ChEBI
wd:P665 # STRUCTURE KEGG
wd:P685 # TAXON NCBI
wd:P815 # TAXON ITIS
wd:P830 # TAXON EOL
wd:P846 # TAXON GBIF
wd:P850 # TAXON WoRMS
wd:P960 # TAXON TROPICOS
wd:P961 # TAXON IPNI
wd:P2017 # STRUCTURE SMILES (isomeric)
wd:P2057 # STRUCTURE HMDB
wd:P2064 # STRUCTURE KNApSAcK
wd:P2084 # STRUCTURE ZINC
wd:P2877 # STRUCTURE SureChEMBL
wd:P3151 # TAXON iNat
wd:P3636 # STRUCTURE PDB ligand
wd:P4964 # STRUCTURE SPLASH
wd:P5037 # TAXON PoWO
wd:P5055 # TAXON IRMNG
wd:P6689 # STRUCTURE MassBank
wd:P7715 # TAXON WFO
wd:P7746 # STRUCTURE NPAtlas
wd:P8533 # STRUCTURE SMARTS
wd:P9157 # TAXON OTL
wd:P9405 # STRUCTURE NMRShiftDB
wd:P10718 # STRUCTURE CXSMILES
wd:P11375 # STRUCTURE CSD
}
?property wdt:P1630 ?formatter. hint:Prior hint:rangeSafe TRUE.
}

0 comments on commit 65657f9

Please sign in to comment.