Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add BioPortal API key parameter + Minor fixes (closes #61) #62

Merged
merged 2 commits into from
Jul 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
long_description=long_description,
long_description_content_type='text/markdown',
author='Center for Computational Biomedicine, Harvard Medical School',
author_email='[email protected]',
author_email='[email protected]',
classifiers=[
'Development Status :: 4 - Beta',
'License :: OSI Approved :: MIT License',
Expand Down
17 changes: 16 additions & 1 deletion test/simple_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,19 @@ def test_mapping_zooma_ontologies(self):
assert df_zooma[self.MAPPED_TERM_CURIE_COLUMN].str.contains("EFO:").any()
assert df_zooma[self.MAPPED_TERM_CURIE_COLUMN].str.contains("NCIT:").any()

def test_mapping_bioportal_ontologies_no_apikey(self):
# Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper without API Key
print("Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper...")
df_bioportal = text2term.map_terms(["asthma", "location", "food allergy"], target_ontology="EFO,NCIT",
mapper=Mapper.BIOPORTAL, term_type=OntologyTermType.ANY)
assert df_bioportal.empty is True

def test_mapping_bioportal_ontologies(self):
# Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper
print("Test mapping a list of terms to multiple ontologies using the BioPortal Annotator mapper...")
df_bioportal = text2term.map_terms(["asthma", "location", "food allergy"], target_ontology="EFO,NCIT",
mapper=Mapper.BIOPORTAL, term_type=OntologyTermType.ANY)
mapper=Mapper.BIOPORTAL, term_type=OntologyTermType.ANY,
bioportal_apikey="8f0cbe43-2906-431a-9572-8600d3f4266e")
print(f"{df_bioportal}\n")
assert df_bioportal.size > 0
assert df_bioportal[self.MAPPED_TERM_CURIE_COLUMN].str.contains("EFO:").any()
Expand Down Expand Up @@ -207,6 +215,13 @@ def test_mapping_with_min_score_filter(self):
term_type=OntologyTermType.ANY, min_score=min_score)
assert (df_leven[self.MAPPING_SCORE_COLUMN] >= min_score).all()

def test_mapping_with_min_score_filter_empty_results(self):
self.ensure_cache_exists("EFO", self.EFO_URL)
print("Test mapping to EFO using TFIDF similarity metric and min_score filter that results in no mappings...")
df_tfidf = text2term.map_terms(["carbon monoxide"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF,
term_type=OntologyTermType.ANY, min_score=0.99)
assert df_tfidf.empty is True

def test_include_unmapped_terms(self):
self.ensure_cache_exists("EFO", self.EFO_URL)
df = text2term.map_terms(["asthma", "margarita"], target_ontology="EFO", use_cache=True, mapper=Mapper.TFIDF,
Expand Down
7 changes: 5 additions & 2 deletions text2term/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='A tool for mapping free-text descriptions of (biomedical) '
'entities to controlled terms in an ontology')
'entities to ontology terms')
parser.add_argument("-s", "--source", required=True, type=str,
help="Input file containing 'source' terms to map to ontology terms: list of terms or CSV file")
parser.add_argument("-t", "--target", required=True, type=str,
Expand Down Expand Up @@ -42,6 +42,8 @@
help="Define whether to map to ontology classes, properties, or both")
parser.add_argument('-u', "--incl_unmapped", required=False, default=False, action="store_true",
help="Include all unmapped terms in the output")
parser.add_argument('-bp', "--bioportal_apikey", required=False, type=str, default="",
help="BioPortal API Key to use along with the BioPortal mapper option")

arguments = parser.parse_args()
if not os.path.exists(arguments.source):
Expand All @@ -63,4 +65,5 @@
excl_deprecated=arguments.excl_deprecated, mapper=mapper, max_mappings=arguments.top_mappings,
min_score=arguments.min_score, base_iris=iris, save_graphs=arguments.save_term_graphs,
save_mappings=True, separator=arguments.separator, use_cache=cache_exists(target),
term_type=arguments.term_type, incl_unmapped=arguments.incl_unmapped)
term_type=arguments.term_type, incl_unmapped=arguments.incl_unmapped,
bioportal_apikey=arguments.bioportal_apikey)
2 changes: 1 addition & 1 deletion text2term/config.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION = "4.2.0"
VERSION = "4.2.1"
17 changes: 12 additions & 5 deletions text2term/t2t.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_deprecated=False, max_mappings=3,
min_score=0.3, mapper=Mapper.TFIDF, output_file='', save_graphs=False, save_mappings=False,
source_terms_ids=(), separator=',', use_cache=False, term_type=OntologyTermType.CLASS,
incl_unmapped=False):
incl_unmapped=False, bioportal_apikey=""):
"""
Maps the terms in the given list to the specified target ontology.

Expand Down Expand Up @@ -75,6 +75,8 @@ def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_
The type(s) of ontology terms to map to, which can be 'class' or 'property' or 'any'
incl_unmapped : bool
Include unmapped terms in the output data frame
bioportal_apikey : str
BioPortal API Key to use along with the BioPortal mapper option

Returns
----------
Expand All @@ -101,8 +103,9 @@ def map_terms(source_terms, target_ontology, base_iris=(), csv_columns=(), excl_
# Run the mapper
LOGGER.info(f"Mapping {len(source_terms)} source terms to {target_ontology}")
mappings_df = _do_mapping(source_terms, source_terms_ids, target_terms, mapper, max_mappings, min_score, tags,
incl_unmapped)
mappings_df["Mapping Score"] = mappings_df["Mapping Score"].astype(float).round(decimals=3)
incl_unmapped, bioportal_apikey)
if not mappings_df.empty:
mappings_df["Mapping Score"] = mappings_df["Mapping Score"].astype(float).round(decimals=3)
if save_mappings:
_save_mappings(mappings_df, output_file, min_score, mapper, target_ontology, base_iris,
excl_deprecated, max_mappings, term_type, source_terms, incl_unmapped)
Expand Down Expand Up @@ -194,7 +197,8 @@ def _load_ontology(ontology, iris, exclude_deprecated, use_cache=False, term_typ
return onto_terms


def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappings, min_score, tags, incl_unmapped):
def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappings, min_score, tags, incl_unmapped,
bioportal_apikey):
to_map, tags = _process_tags(source_terms, tags)
start = time.time()
if mapper == Mapper.TFIDF:
Expand All @@ -204,7 +208,10 @@ def _do_mapping(source_terms, source_term_ids, ontology_terms, mapper, max_mappi
term_mapper = ZoomaMapper()
mappings_df = term_mapper.map(to_map, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
elif mapper == Mapper.BIOPORTAL:
term_mapper = BioPortalAnnotatorMapper("8f0cbe43-2906-431a-9572-8600d3f4266e")
if bioportal_apikey == "":
LOGGER.error("A BioPortal API Key must be specified via the parameter `bioportal_apikey`")
return pd.DataFrame()
term_mapper = BioPortalAnnotatorMapper(bioportal_apikey)
mappings_df = term_mapper.map(to_map, source_term_ids, ontologies=ontology_terms, max_mappings=max_mappings)
elif mapper in {Mapper.LEVENSHTEIN, Mapper.JARO, Mapper.JARO_WINKLER, Mapper.INDEL, Mapper.FUZZY, Mapper.JACCARD}:
term_mapper = SyntacticMapper(ontology_terms)
Expand Down
Loading