Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: update models to vrs 2.0.0 community review ballot #449

Merged
merged 5 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ dependencies = [
"uvicorn",
"click",
"boto3",
"ga4gh.vrs~=2.0.0a8",
"ga4gh.vrs==2.0.0a13",
]
dynamic = ["version"]

[project.optional-dependencies]
etl = [
"disease-normalizer[etl]~=0.5.0",
"disease-normalizer[etl]~=0.7.0",
"owlready2",
"rdflib",
"wikibaseintegrator>=0.12.0",
Expand Down
2 changes: 1 addition & 1 deletion src/therapy/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def _normalize_disease(self, query: str) -> str | None:
if term in self._disease_cache:
return self._disease_cache[term]
response = self.disease_normalizer.normalize(term)
normalized_id = response.normalized_id
normalized_id = response.disease.primaryCode.root if response.disease else None
self._disease_cache[term] = normalized_id
if normalized_id is None:
_logger.warning("Failed to normalize disease term: %s", query)
Expand Down
4 changes: 2 additions & 2 deletions src/therapy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def custom_openapi() -> dict:
"Return merged strongest-match concept for query string " "provided by user."
)
merged_matches_summary = (
"Given query, provide merged normalized record as a " "Therapeutic Agent."
"Given query, provide merged normalized record as a Therapy Mappable Concept."
)
merged_response_descr = "A response to a validly-formed query."
normalize_q_descr = "Therapy to normalize."
Expand Down Expand Up @@ -148,7 +148,7 @@ def normalize(
:param q: therapy search term
:param bool infer_namespace: if True, try to infer namespace from query term.
:returns: JSON response with matching normalized record provided as a
Therapeutic Agent, and source metadata
Therapy Mappable Concept, and source metadata
"""
try:
response = query_handler.normalize(html.unescape(q), infer_namespace)
Expand Down
145 changes: 97 additions & 48 deletions src/therapy/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,23 @@
from typing import Any, TypeVar

from botocore.exceptions import ClientError
from ga4gh.core import domain_models, entity_models
from disease.schemas import NAMESPACE_TO_SYSTEM_URI as DISEASE_NAMESPACE_TO_SYSTEM_URI
from disease.schemas import NamespacePrefix as DiseaseNamespacePrefix
from ga4gh.core.models import (
Coding,
ConceptMapping,
Extension,
MappableConcept,
Relation,
code,
)
from uvicorn.config import logger

from therapy import NAMESPACE_LUIS, PREFIX_LOOKUP, SOURCES
from therapy.database import AbstractDatabase
from therapy.schemas import (
NAMESPACE_TO_SYSTEM_URI,
SYSTEM_URI_TO_NAMESPACE,
BaseNormalizationService,
HasIndication,
MatchesNormalized,
Expand Down Expand Up @@ -350,20 +361,17 @@ def _add_merged_meta(self, response: NormalizationService) -> NormalizationServi
:return: completed response object.
"""
sources_meta = {}
therapeutic_agent = response.therapeutic_agent
sources = [response.normalized_id.split(":")[0]] # type: ignore[union-attr]
if therapeutic_agent.mappings: # type: ignore[union-attr]
sources += [m.coding.system for m in therapeutic_agent.mappings] # type: ignore[union-attr]
therapy = response.therapy

sources = []
for m in therapy.mappings or []:
ns = SYSTEM_URI_TO_NAMESPACE.get(m.coding.system)
if ns in PREFIX_LOOKUP:
sources.append(PREFIX_LOOKUP[ns])

for src in sources:
try:
src_name = SourceName(PREFIX_LOOKUP[src])
except KeyError:
# not an imported source
continue
else:
if src_name not in sources_meta:
sources_meta[src_name] = self.db.get_source_metadata(src_name)
if src not in sources_meta:
sources_meta[src] = self.db.get_source_metadata(src)
response.source_meta_ = sources_meta # type: ignore[assignment]
return response

Expand All @@ -377,42 +385,88 @@ def _record_order(self, record: dict) -> tuple[int, str]:
source_rank = SourcePriority[src]
return source_rank, record["concept_id"]

def _add_therapeutic_agent(
def _add_therapy(
self,
response: NormalizationService,
record: dict,
match_type: MatchType,
) -> NormalizationService:
"""Format received DB record as therapeutic agent and update response object.
"""Format received DB record as Mappable Concept and update response object.
:param NormalizationService response: in-progress response object
:param Dict record: record as stored in DB
:param str query: query string from user request
:param MatchType match_type: type of match achieved
:return: completed response object ready to return to user
"""
therapeutic_agent_obj = domain_models.TherapeuticAgent(
id=f"normalize.therapy.{record['concept_id']}", label=record.get("label")

def _create_concept_mapping(
concept_id: str,
relation: Relation,
ns_to_system_uri: dict[str, str],
ns_prefix: NamespacePrefix | DiseaseNamespacePrefix,
) -> ConceptMapping:
"""Create concept mapping for therapy or disease identifier

``system`` will use OBO Foundry persistent URL (PURL), source homepage, or
namespace prefix, in that order of preference, if available.

:param concept_id: Concept identifier represented as a curie
:param relation: SKOS mapping relationship, default is relatedMatch
:param ns_to_system_uri: Dictionary containing mapping from namespace to
system URI
:param ns_prefix: Namespace prefix enum
:return: Concept mapping for therapy or disease identifier
"""
source = concept_id.split(":")[0]

try:
source = ns_prefix(source)
except ValueError:
try:
source = ns_prefix(source.upper())
except ValueError as e:
err_msg = f"Namespace prefix not supported: {source}"
raise ValueError(err_msg) from e

system = ns_to_system_uri.get(source, source)

return ConceptMapping(
coding=Coding(code=code(concept_id), system=system), relation=relation
)

therapy_obj = MappableConcept(
id=f"normalize.therapy.{record['concept_id']}",
primaryCode=code(root=record["concept_id"]),
conceptType="Therapy",
label=record.get("label"),
)

# mappings
mappings = [
_create_concept_mapping(
concept_id=record["concept_id"],
relation=Relation.EXACT_MATCH,
ns_to_system_uri=NAMESPACE_TO_SYSTEM_URI,
ns_prefix=NamespacePrefix,
)
]
source_ids = record.get("xrefs", []) + record.get("associated_with", [])
mappings = []
for source_id in source_ids:
system, code = source_id.split(":")
mappings.append(
entity_models.ConceptMapping(
coding=entity_models.Coding(
code=entity_models.Code(code), system=system.lower()
),
relation=entity_models.Relation.RELATED_MATCH,
)
mappings.extend(
_create_concept_mapping(
concept_id=source_id,
relation=Relation.RELATED_MATCH,
ns_to_system_uri=NAMESPACE_TO_SYSTEM_URI,
ns_prefix=NamespacePrefix,
)
for source_id in source_ids
)
if mappings:
therapeutic_agent_obj.mappings = mappings
therapy_obj.mappings = mappings

extensions = []
if "aliases" in record:
therapeutic_agent_obj.alternativeLabels = record["aliases"]
extensions.append(Extension(name="aliases", value=record["aliases"]))

extensions = []
if any(
filter(
lambda f: f in record,
Expand All @@ -435,49 +489,44 @@ def _add_therapeutic_agent(
indication = self._get_indication(ind_db)

if indication.normalized_disease_id:
system, code = indication.normalized_disease_id.split(":")
mappings = [
entity_models.ConceptMapping(
coding=entity_models.Coding(
code=entity_models.Code(code), system=system.lower()
),
relation=entity_models.Relation.RELATED_MATCH,
_create_concept_mapping(
concept_id=indication.normalized_disease_id,
relation=Relation.RELATED_MATCH,
ns_to_system_uri=DISEASE_NAMESPACE_TO_SYSTEM_URI,
ns_prefix=DiseaseNamespacePrefix,
)
]
else:
mappings = []
ind_disease_obj = domain_models.Disease(
ind_disease_obj = MappableConcept(
id=indication.disease_id,
conceptType="Disease",
label=indication.disease_label,
mappings=mappings or None,
)

if indication.supplemental_info:
ind_disease_obj.extensions = [
entity_models.Extension(name=k, value=v)
Extension(name=k, value=v)
for k, v in indication.supplemental_info.items()
]
inds_list.append(ind_disease_obj.model_dump(exclude_none=True))
if inds_list:
approv_value["has_indication"] = inds_list

approv = entity_models.Extension(
name="regulatory_approval", value=approv_value
)
approv = Extension(name="regulatory_approval", value=approv_value)
extensions.append(approv)

trade_names = record.get("trade_names")
if trade_names:
extensions.append(
entity_models.Extension(name="trade_names", value=trade_names)
)
extensions.append(Extension(name="trade_names", value=trade_names))

if extensions:
therapeutic_agent_obj.extensions = extensions
therapy_obj.extensions = extensions

response.match_type = match_type
response.normalized_id = record["concept_id"]
response.therapeutic_agent = therapeutic_agent_obj
response.therapy = therapy_obj
return self._add_merged_meta(response)

def _resolve_merge(
Expand Down Expand Up @@ -537,7 +586,7 @@ def normalize(self, query: str, infer: bool = True) -> NormalizationService:
response = NormalizationService(**self._prepare_normalized_response(query))

return self._perform_normalized_lookup(
response, query, infer, self._add_therapeutic_agent
response, query, infer, self._add_therapy
)

def _construct_drug_match(self, record: dict) -> Therapy:
Expand Down
66 changes: 58 additions & 8 deletions src/therapy/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from enum import Enum, IntEnum
from typing import Any, Literal

from ga4gh.core import domain_models
from ga4gh.core.models import MappableConcept
from pydantic import BaseModel, ConfigDict, StrictBool, constr

from therapy import __version__
Expand Down Expand Up @@ -258,6 +258,44 @@ class NamespacePrefix(Enum):
WIKIDATA = "wikidata"


# Source to URI. Will use OBO Foundry persistent URL (PURL) or source homepage
NAMESPACE_TO_SYSTEM_URI: dict[NamespacePrefix, str] = {
NamespacePrefix.ATC: "https://www.who.int/tools/atc-ddd-toolkit/atc-classification/",
NamespacePrefix.CHEBI: "http://purl.obolibrary.org/obo/chebi.owl",
NamespacePrefix.CHEMBL: "https://www.ebi.ac.uk/chembl/",
NamespacePrefix.CHEMIDPLUS: "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
NamespacePrefix.CASREGISTRY: "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
NamespacePrefix.CVX: "https://www2a.cdc.gov/vaccines/iis/iisstandards/vaccines.asp?rpt=cvx",
NamespacePrefix.DRUGBANK: "https://go.drugbank.com",
NamespacePrefix.DRUGCENTRAL: "https://drugcentral.org",
NamespacePrefix.DRUGSATFDA_ANDA: "https://www.fda.gov/drugs/types-applications/abbreviated-new-drug-application-anda",
NamespacePrefix.DRUGSATFDA_NDA: "https://www.fda.gov/drugs/types-applications/new-drug-application-nda",
NamespacePrefix.HEMONC: "https://hemonc.org",
NamespacePrefix.INCHIKEY: "https://www.chemspider.com",
NamespacePrefix.IUPHAR_LIGAND: "https://www.guidetopharmacology.org/GRAC/LigandListForward",
NamespacePrefix.GUIDETOPHARMACOLOGY: "https://www.guidetopharmacology.org/GRAC/LigandListForward",
NamespacePrefix.MMSL: "https://www.nlm.nih.gov/research/umls/rxnorm/sourcereleasedocs/mmsl.html",
NamespacePrefix.MSH: "https://id.nlm.nih.gov/mesh/",
NamespacePrefix.NCIT: "http://purl.obolibrary.org/obo/ncit.owl",
NamespacePrefix.NDC: "https://dps.fda.gov/ndc",
NamespacePrefix.PUBCHEMCOMPOUND: "https://pubchem.ncbi.nlm.nih.gov/docs/compounds",
NamespacePrefix.PUBCHEMSUBSTANCE: "https://pubchem.ncbi.nlm.nih.gov/docs/substances",
NamespacePrefix.RXNORM: "https://www.nlm.nih.gov/research/umls/rxnorm/index.html",
NamespacePrefix.SPL: "https://www.fda.gov/industry/fda-data-standards-advisory-board/structured-product-labeling-resources",
NamespacePrefix.UMLS: "https://www.nlm.nih.gov/research/umls/index.html",
NamespacePrefix.UNII: "https://precision.fda.gov/uniisearch",
NamespacePrefix.UNIPROT: "https://www.uniprot.org",
NamespacePrefix.USP: "https://www.usp.org/health-quality-safety/compendial-nomenclature",
NamespacePrefix.VANDF: "https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/VANDF",
NamespacePrefix.WIKIDATA: "https://www.wikidata.org",
}

# URI to source
SYSTEM_URI_TO_NAMESPACE = {
system_uri: ns.value for ns, system_uri in NAMESPACE_TO_SYSTEM_URI.items()
}


class DataLicenseAttributes(BaseModel):
"""Define constraints for data license attributes."""

Expand Down Expand Up @@ -484,8 +522,7 @@ class UnmergedNormalizationService(BaseNormalizationService):
class NormalizationService(BaseNormalizationService):
"""Response containing one or more merged records and source data."""

normalized_id: str | None = None
therapeutic_agent: domain_models.TherapeuticAgent | None = None
therapy: MappableConcept | None = None
source_meta_: dict[SourceName, SourceMeta] | None = None

model_config = ConfigDict(
Expand All @@ -494,18 +531,31 @@ class NormalizationService(BaseNormalizationService):
"query": "cisplatin",
"warnings": None,
"match_type": 80,
"normalized_id": "rxcui:2555",
"therapeutic_agent": {
"type": "TherapeuticAgent",
"therapy": {
"conceptType": "Therapy",
"primaryCode": "rxcui:2555",
"id": "normalize.therapy.rxcui:2555",
"label": "cisplatin",
"mappings": [
{
"coding": {"code": "C376", "system": "ncit"},
"coding": {
"code": "2555",
"system": "https://www.nlm.nih.gov/research/umls/rxnorm/index.html",
},
"relation": "exactMatch",
},
{
"coding": {
"code": "C376",
"system": "http://purl.obolibrary.org/obo/ncit.owl",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "15663-27-1", "system": "chemidplus"},
"coding": {
"code": "15663-27-1",
"system": "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
},
"relation": "relatedMatch",
},
{
Expand Down
Loading
Loading