Skip to content

Commit

Permalink
add chemical types and equivalences to metabolights ingest
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesamcl committed May 31, 2024
1 parent 6ff5ae3 commit 2c5541f
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 5 deletions.
20 changes: 15 additions & 5 deletions 01_ingest/grebi_ingest_metabolights/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ def parse_metabolights_xml(xml_content):

entries = []
for entry in root.findall(".//entry"):
entry_id = entry.get("id")
entry_data = {
"id": entry.get("id"),
"grebi:type": "metabolights:Study",
"id": entry_id,
"grebi:name": entry.find("name").text if entry.find("name") is not None else None,
"grebi:description": entry.find("description").text if entry.find("description") is not None else None
}
Expand All @@ -30,11 +30,21 @@ def parse_metabolights_xml(xml_content):
for field in entry.findall(".//field"):
field_name = f"metabolights:{field.get('name')}"
if field_name in entry_data:
if not isinstance(entry_data[field_name], list):
entry_data[field_name] = [entry_data[field_name]]
entry_data[field_name].append(field.text)
else:
entry_data[field_name] = field.text
entry_data[field_name] = [field.text]

if entry_id.startswith("MTBLS"):
entry_data["grebi:type"]="metabolights:Study"
elif entry_id.startswith("MTBLC"):
entry_data["grebi:type"]="metabolights:Chemical"
entry_data["grebi:equivalentTo"]=entry_data["metabolights:ref"]
if "metabolights:inchi" in entry_data:
entry_data["grebi:equivalentTo"]=entry_data["grebi:equivalentTo"]+entry_data["metabolights:inchi"]
if "metabolights:formula" in entry_data:
entry_data["grebi:equivalentTo"]=entry_data["grebi:equivalentTo"]+entry_data["metabolights:formula"]
else:
assert False

entries.append(entry_data)

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ EBI Codon HPC pipeline for building integrated knowledge graphs from [EMBL-EBI r
* [OLS](https://www.ebi.ac.uk/ols4)
* [Reactome](https://reactome.org/)
* [OpenTargets](https://www.opentargets.org/)
* [Metabolights](https://www.ebi.ac.uk/metabolights)

GrEBI also imports complementary datasets, so far:

Expand Down

0 comments on commit 2c5541f

Please sign in to comment.