diff --git a/.github/workflows/github_test.yml b/.github/workflows/github_test.yml index bab1088..e1fc875 100644 --- a/.github/workflows/github_test.yml +++ b/.github/workflows/github_test.yml @@ -1,8 +1,8 @@ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python -name: Tests -run-name: Unit Testing on ${{ github.event_name }} +name: CI Tests +run-name: CI Testing on ${{ github.event_name }} on: push: diff --git a/README.md b/README.md index 90808cc..5b5e1b6 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,8 @@ arXiv preprint + PyPI Installs + Total Downloads ## Installation diff --git a/requirements.txt b/requirements.txt index cf8334d..f932ccf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,16 @@ -Owlready2~=0.44 +owlready2~=0.46 argparse~=1.4.0 -pandas~=2.0.3 -numpy~=1.24.2 -gensim~=4.3.0 -scipy~=1.10.1 -scikit-learn~=1.2.1 -setuptools~=68.2.2 -requests~=2.31.0 +pandas~=2.2.2 +numpy~=1.26.4 +gensim~=4.3.2 +scipy~=1.12.0 +scikit-learn~=1.5.1 +setuptools~=70.2.0 +requests~=2.32.3 tqdm~=4.66.1 -sparse_dot_topn~=0.3.4 -bioregistry~=0.10.6 +sparse-dot-topn~=1.1.3 +bioregistry~=0.11.10 nltk~=3.8.1 -rapidfuzz~=2.13.7 +rapidfuzz~=3.9.4 shortuuid~=1.0.11 -myst_parser~=2.0.0 \ No newline at end of file +myst-parser~=2.0.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 7292f02..d6d71c5 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages from text2term.config import VERSION -description = 'A tool for mapping free-text descriptions of (biomedical) entities to controlled terms in ontologies' +description = 'a tool for mapping free-text descriptions of entities to ontology terms' long_description = open('README.md').read() with open('requirements.txt') as f: @@ -24,9 +24,13 @@ 'Development Status :: 4 - Beta', 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', + 'Environment :: Console', 'Programming Language :: Python', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'Topic :: Scientific/Engineering' ], python_requires=">=3.9", diff --git a/text2term/__main__.py b/text2term/__main__.py index 9560fac..ed94223 100644 --- a/text2term/__main__.py +++ b/text2term/__main__.py @@ -4,7 +4,6 @@ from t2t import map_terms, cache_ontology from onto_cache import cache_exists from mapper import Mapper -from term import OntologyTermType if __name__ == "__main__": parser = argparse.ArgumentParser(description='A tool for mapping free-text descriptions of (biomedical) ' @@ -25,7 +24,7 @@ "that contains the terms to map, optionally followed by the name of the column that " "contains identifiers for the terms (eg 'my_terms,my_term_ids')") parser.add_argument("-sep", "--separator", required=False, type=str, default=',', - help="Specifies the cell separator to be used when reading a non-comma-separated tabular file") + help="Specifies the cell separator to be used when reading a table") parser.add_argument("-top", "--top_mappings", required=False, type=int, default=3, help="Maximum number of top-ranked mappings returned per source term (default=3)") parser.add_argument("-min", "--min_score", required=False, type=float, default=0.5, @@ -38,9 +37,9 @@ parser.add_argument("-g", "--save_term_graphs", required=False, default=False, action="store_true", help="Save vis.js graphs representing the neighborhood of each ontology term (default=False)") parser.add_argument("-c", "--store_in_cache", required=False, type=str, default="", - help="Store the target ontology into local cache under acronym") + help="Cache the target ontology using the name given here") parser.add_argument("-type", "--term_type", required=False, type=str, default="class", - help="Define whether to return ontology classes, properties, or both") + help="Define whether to map to ontology classes, properties, or both") parser.add_argument('-u', "--incl_unmapped", required=False, default=False, action="store_true", help="Include all unmapped terms in the output") diff --git a/text2term/config.py b/text2term/config.py index 60e3b39..773464b 100644 --- a/text2term/config.py +++ b/text2term/config.py @@ -1 +1 @@ -VERSION = "4.1.4" +VERSION = "4.2.0" diff --git a/text2term/syntactic_mapper.py b/text2term/syntactic_mapper.py index 5316303..ea84dbe 100644 --- a/text2term/syntactic_mapper.py +++ b/text2term/syntactic_mapper.py @@ -79,7 +79,7 @@ def compare_levenshtein(self, s1, s2): Calculates the normalized Levenshtein distance between s1 and s2. :return similarity between s1 and s2 as a float between 0 and 1 """ - similarity = rapidfuzz.string_metric.normalized_levenshtein(s1, s2)/100 + similarity = rapidfuzz.distance.Levenshtein.normalized_similarity(s1, s2) return similarity def compare_jaro(self, s1, s2): @@ -87,7 +87,7 @@ def compare_jaro(self, s1, s2): Calculates the Jaro similarity between s1 and s2. :return similarity between s1 and s2 as a float between 0 and 1 """ - similarity = rapidfuzz.string_metric.jaro_similarity(s1, s2)/100 + similarity = rapidfuzz.distance.Jaro.normalized_similarity(s1, s2) return similarity def compare_jarowinkler(self, s1, s2): @@ -95,16 +95,15 @@ def compare_jarowinkler(self, s1, s2): Calculates the Jaro-Winkler similarity between s1 and s2. :return similarity between s1 and s2 as a float between 0 and 1 """ - similarity = rapidfuzz.string_metric.jaro_winkler_similarity(s1, s2)/100 + similarity = rapidfuzz.distance.Jaro.normalized_similarity(s1, s2) return similarity def compare_indel(self, s1, s2): """ Calculates the normalized Indel distance between s1 and s2. - See: https://maxbachmann.github.io/RapidFuzz/Usage/fuzz.html#ratio :return similarity between s1 and s2 as a float between 0 and 1 """ - similarity = rapidfuzz.fuzz.ratio(s1, s2)/100 + similarity = rapidfuzz.distance.Indel.normalized_similarity(s1, s2) return similarity def compare_fuzzy_ratio(self, s1, s2):