Skip to content

Commit

Permalink
ENH Update drug categorization logic
Browse files Browse the repository at this point in the history
- Navigate up entire ARO tree for genes so drugs of gene families can be included
- Antibiotic mixture ARO mapping is excluded except cases of antibiotic + adjuvant mixtures
- Use 'has_part' relationship to handle antibiotic mixtures and give drug class rather than 'antibiotic mixture'
  • Loading branch information
Vedanth-Ramji committed Jun 17, 2024
1 parent bb97433 commit 7d2224c
Show file tree
Hide file tree
Showing 19 changed files with 1,389 additions and 1,348 deletions.
65 changes: 53 additions & 12 deletions argnorm/drug_categorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,30 @@
from . import lib

ARO = lib.get_aro_ontology()

confers_resistance_to_drug_class_rel = ARO.get_relationship('confers_resistance_to_drug_class')
confers_resistance_to_antibiotic_rel = ARO.get_relationship('confers_resistance_to_antibiotic')
has_part_rel = ARO.get_relationship('has_part')
is_small_inhibitor_molecule_rel = ARO.get_relationship('is_small_molecule_inhibitor')

def navigate_superclasses(super_classes_list: List[str]) -> List[str]:
"""
- Helper function to traverse up and record superclasses in ARO
- Traverses up ARO until 'antibiotic molecule' class reached and 'antibiotic mixture' class not reached
- antibiotic molecule -> ARO:1000003
- antibiotic mixture -> ARO:3000707
"""
output = []

for super_class in super_classes_list:
super_class_classes = list(super_class.superclasses(1))
antibiotic_molecule_node = [ARO['ARO:1000003']]

# checking if immediate child of 'antibiotic molecule' is reached & it is not 'antibiotic mixture'
if super_class_classes[1:] == antibiotic_molecule_node and super_class.id != 'ARO:3000707':
output.append(super_class.id)

return output

def confers_resistance_to(aro_num: str) -> List[str]:
'''
Expand All @@ -16,20 +38,29 @@ def confers_resistance_to(aro_num: str) -> List[str]:
target (list[str]):
A list with ARO number of the drugs/antibiotics to which the input gene confers resistance to.
'''

antibiotic_molecule_node = [ARO['ARO:1000003'], ARO['ARO:1000001']]
# some gene superclasses can map to drugs which are immediate children of 'antibiotic molecule'
# only use these if no other drugs can be found, as this information will be present in
# drugs to drug classes
backup_drugs = []
target = set()

for term in ARO[aro_num].superclasses():
for drug in term.relationships.get(confers_resistance_to_drug_class_rel, []):
target.add(drug.id)
if list(ARO[drug.id].superclasses())[1:] == antibiotic_molecule_node:
backup_drugs.append(drug.id)
else:
target.add(drug.id)

for drug in term.relationships.get(confers_resistance_to_antibiotic_rel, []):
target.add(drug.id)
if list(ARO[drug.id].superclasses())[1:] == antibiotic_molecule_node:
backup_drugs.append(drug.id)
else:
target.add(drug.id)

if target:
break
if target == set():
target.update(backup_drugs)

return sorted(target)
return sorted(list(target))

def drugs_to_drug_classes(drugs_list: List[str]) -> List[str]:
'''
Expand All @@ -50,11 +81,21 @@ def drugs_to_drug_classes(drugs_list: List[str]) -> List[str]:
for drug in drugs_list:
drug_instance = ARO[drug]
drug_instance_superclasses = list(drug_instance.superclasses())
superclasses_len = len(drug_instance_superclasses)
drug_classes += navigate_superclasses(drug_instance_superclasses)

has_part_nodes = drug_instance.relationships.get(has_part_rel, [])
if has_part_nodes:
for hast_part_node in has_part_nodes:
hast_part_node_superclasses = list(hast_part_node.superclasses())[1:]

for super_class in hast_part_node_superclasses:
super_class_categories = list(super_class.superclasses())
drug_classes += navigate_superclasses(super_class_categories)

if hast_part_node.relationships.get(is_small_inhibitor_molecule_rel, []):
drug_classes.append('ARO:3000707')

if superclasses_len >= 3:
drug_classes.append(drug_instance_superclasses[superclasses_len - 3].id)
else:
drug_classes.append(drug_instance_superclasses[0].id)
if drug_classes == []:
drug_classes.append(drug_instance.id)

return sorted(drug_classes)
448 changes: 224 additions & 224 deletions outputs/hamronized/abricate.argannot.tsv

Large diffs are not rendered by default.

318 changes: 159 additions & 159 deletions outputs/hamronized/abricate.megares.tsv

Large diffs are not rendered by default.

238 changes: 119 additions & 119 deletions outputs/hamronized/abricate.ncbi.tsv

Large diffs are not rendered by default.

542 changes: 271 additions & 271 deletions outputs/hamronized/abricate.resfinder.tsv

Large diffs are not rendered by default.

294 changes: 147 additions & 147 deletions outputs/hamronized/abricate.resfinderfg.tsv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion outputs/hamronized/amrfinderplus.ncbi.orfs.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ amrfinderplus.ncbi.orfs.tsv tet(Q) tetracycline resistance ribosomal protection
amrfinderplus.ncbi.orfs.tsv bexA multidrug efflux MATE transporter BexA NCBI Reference Gene Database 2023-Nov-01 BAB64566.1 amrfinderplus 3.10.30 gene_presence_detected EFFLUX 80.36 EFFLUX 18 1085 356 k119_41685 443 - 92.98 ARO:3003953 ARO:0000045,ARO:3000662 ARO:0000001,ARO:3005386
amrfinderplus.ncbi.orfs.tsv lnu(C) lincosamide nucleotidyltransferase Lnu(C) NCBI Reference Gene Database 2023-Nov-01 WP_063851341.1 amrfinderplus 3.10.30 gene_presence_detected LINCOSAMIDE 100.0 LINCOSAMIDE 234 725 164 k119_46979 164 - 97.56 ARO:3002837 ARO:0000046 ARO:0000017
amrfinderplus.ncbi.orfs.tsv sat4 streptothricin N-acetyltransferase Sat4 NCBI Reference Gene Database 2023-Nov-01 WP_000627290.1 amrfinderplus 3.10.30 gene_presence_detected STREPTOTHRICIN 86.11 STREPTOTHRICIN 8 472 155 k119_47732 180 - 100.0 ARO:3002897 ARO:0000012 ARO:3000034
amrfinderplus.ncbi.orfs.tsv aph(3')-IIIa aminoglycoside O-phosphotransferase APH(3')-IIIa NCBI Reference Gene Database 2023-Nov-01 WP_001096887.1 amrfinderplus 3.10.30 gene_presence_detected AMIKACIN/KANAMYCIN 100.0 AMINOGLYCOSIDE 207 998 264 k119_48139 264 - 100.0 ARO:3002647 ARO:0000005,ARO:0000013,ARO:0000021,ARO:0000024,ARO:0000049,ARO:3000652,ARO:3000655,ARO:3000657,ARO:3000658 ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:3000707
amrfinderplus.ncbi.orfs.tsv aph(3')-IIIa aminoglycoside O-phosphotransferase APH(3')-IIIa NCBI Reference Gene Database 2023-Nov-01 WP_001096887.1 amrfinderplus 3.10.30 gene_presence_detected AMIKACIN/KANAMYCIN 100.0 AMINOGLYCOSIDE 207 998 264 k119_48139 264 - 100.0 ARO:3002647 ARO:0000005,ARO:0000013,ARO:0000021,ARO:0000024,ARO:0000049,ARO:3000652,ARO:3000655,ARO:3000657,ARO:3000658 ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016
amrfinderplus.ncbi.orfs.tsv aadS aminoglycoside 6-adenylyltransferase AadS NCBI Reference Gene Database 2023-Nov-01 WP_003013318.1 amrfinderplus 3.10.30 gene_presence_detected STREPTOMYCIN 100.0 AMINOGLYCOSIDE 34628 35488 287 k119_48233 287 + 100.0 ARO:3004683 ARO:0000040 ARO:0000016
amrfinderplus.ncbi.orfs.tsv tet(X2) tetracycline-inactivating monooxygenase Tet(X2) NCBI Reference Gene Database 2023-Nov-01 WP_008651082.1 amrfinderplus 3.10.30 gene_presence_detected TETRACYCLINE 100.0 TETRACYCLINE 12370 13533 388 k119_48273 388 + 99.74 ARO:3000205 ARO:0000030,ARO:0000051,ARO:0000069,ARO:3000152,ARO:3000528,ARO:3000667,ARO:3000668 ARO:3000050,ARO:3000050,ARO:3000050,ARO:3000050,ARO:3000050,ARO:3000050,ARO:3000050
amrfinderplus.ncbi.orfs.tsv tet(O) tetracycline resistance ribosomal protection protein Tet(O) NCBI Reference Gene Database 2023-Nov-01 WP_014636291.1 amrfinderplus 3.10.30 gene_presence_detected TETRACYCLINE 100.0 TETRACYCLINE 978 2894 639 k119_60190 639 + 99.22 ARO:3000190 ARO:0000051,ARO:0000069,ARO:3000152,ARO:3000528,ARO:3000667,ARO:3000668 ARO:3000050,ARO:3000050,ARO:3000050,ARO:3000050,ARO:3000050,ARO:3000050
Expand Down
Loading

0 comments on commit 7d2224c

Please sign in to comment.