cancervariants · korikuzma · Jan 2, 2025 · Dec 20, 2024 · Dec 31, 2024 · Dec 31, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -29,13 +29,13 @@ dependencies = [
     "uvicorn",
     "click",
     "boto3",
-    "ga4gh.vrs~=2.0.0a8",
+    "ga4gh.vrs==2.0.0a13",
 ]
 dynamic = ["version"]
 
 [project.optional-dependencies]
 etl = [
-    "disease-normalizer[etl]~=0.5.0",
+    "disease-normalizer[etl]~=0.7.0",
     "owlready2",
     "rdflib",
     "wikibaseintegrator>=0.12.0",

diff --git a/src/therapy/etl/base.py b/src/therapy/etl/base.py
@@ -286,7 +286,7 @@ def _normalize_disease(self, query: str) -> str | None:
         if term in self._disease_cache:
             return self._disease_cache[term]
         response = self.disease_normalizer.normalize(term)
-        normalized_id = response.normalized_id
+        normalized_id = response.disease.primaryCode.root if response.disease else None
         self._disease_cache[term] = normalized_id
         if normalized_id is None:
             _logger.warning("Failed to normalize disease term: %s", query)

diff --git a/src/therapy/main.py b/src/therapy/main.py
@@ -74,7 +74,7 @@ def custom_openapi() -> dict:
     "Return merged strongest-match concept for query string " "provided by user."
 )
 merged_matches_summary = (
-    "Given query, provide merged normalized record as a " "Therapeutic Agent."
+    "Given query, provide merged normalized record as a Therapy Mappable Concept."
 )
 merged_response_descr = "A response to a validly-formed query."
 normalize_q_descr = "Therapy to normalize."
@@ -148,7 +148,7 @@ def normalize(
     :param q: therapy search term
     :param bool infer_namespace: if True, try to infer namespace from query term.
     :returns: JSON response with matching normalized record provided as a
-    Therapeutic Agent, and source metadata
+    Therapy Mappable Concept, and source metadata
     """
     try:
         response = query_handler.normalize(html.unescape(q), infer_namespace)

diff --git a/src/therapy/query.py b/src/therapy/query.py
@@ -7,12 +7,23 @@
 from typing import Any, TypeVar
 
 from botocore.exceptions import ClientError
-from ga4gh.core import domain_models, entity_models
+from disease.schemas import NAMESPACE_TO_SYSTEM_URI as DISEASE_NAMESPACE_TO_SYSTEM_URI
+from disease.schemas import NamespacePrefix as DiseaseNamespacePrefix
+from ga4gh.core.models import (
+    Coding,
+    ConceptMapping,
+    Extension,
+    MappableConcept,
+    Relation,
+    code,
+)
 from uvicorn.config import logger
 
 from therapy import NAMESPACE_LUIS, PREFIX_LOOKUP, SOURCES
 from therapy.database import AbstractDatabase
 from therapy.schemas import (
+    NAMESPACE_TO_SYSTEM_URI,
+    SYSTEM_URI_TO_NAMESPACE,
     BaseNormalizationService,
     HasIndication,
     MatchesNormalized,
@@ -350,20 +361,17 @@ def _add_merged_meta(self, response: NormalizationService) -> NormalizationServi
         :return: completed response object.
         """
         sources_meta = {}
-        therapeutic_agent = response.therapeutic_agent
-        sources = [response.normalized_id.split(":")[0]]  # type: ignore[union-attr]
-        if therapeutic_agent.mappings:  # type: ignore[union-attr]
-            sources += [m.coding.system for m in therapeutic_agent.mappings]  # type: ignore[union-attr]
+        therapy = response.therapy
+
+        sources = []
+        for m in therapy.mappings or []:
+            ns = SYSTEM_URI_TO_NAMESPACE.get(m.coding.system)
+            if ns in PREFIX_LOOKUP:
+                sources.append(PREFIX_LOOKUP[ns])
 
         for src in sources:
-            try:
-                src_name = SourceName(PREFIX_LOOKUP[src])
-            except KeyError:
-                # not an imported source
-                continue
-            else:
-                if src_name not in sources_meta:
-                    sources_meta[src_name] = self.db.get_source_metadata(src_name)
+            if src not in sources_meta:
+                sources_meta[src] = self.db.get_source_metadata(src)
         response.source_meta_ = sources_meta  # type: ignore[assignment]
         return response
 
@@ -377,42 +385,88 @@ def _record_order(self, record: dict) -> tuple[int, str]:
         source_rank = SourcePriority[src]
         return source_rank, record["concept_id"]
 
-    def _add_therapeutic_agent(
+    def _add_therapy(
         self,
         response: NormalizationService,
         record: dict,
         match_type: MatchType,
     ) -> NormalizationService:
-        """Format received DB record as therapeutic agent and update response object.
+        """Format received DB record as Mappable Concept and update response object.
         :param NormalizationService response: in-progress response object
         :param Dict record: record as stored in DB
         :param str query: query string from user request
         :param MatchType match_type: type of match achieved
         :return: completed response object ready to return to user
         """
-        therapeutic_agent_obj = domain_models.TherapeuticAgent(
-            id=f"normalize.therapy.{record['concept_id']}", label=record.get("label")
+
+        def _create_concept_mapping(
+            concept_id: str,
+            relation: Relation,
+            ns_to_system_uri: dict[str, str],
+            ns_prefix: NamespacePrefix | DiseaseNamespacePrefix,
+        ) -> ConceptMapping:
+            """Create concept mapping for therapy or disease identifier
+
+            ``system`` will use OBO Foundry persistent URL (PURL), source homepage, or
+            namespace prefix, in that order of preference, if available.
+
+            :param concept_id: Concept identifier represented as a curie
+            :param relation: SKOS mapping relationship, default is relatedMatch
+            :param ns_to_system_uri: Dictionary containing mapping from namespace to
+                system URI
+            :param ns_prefix: Namespace prefix enum
+            :return: Concept mapping for therapy or disease identifier
+            """
+            source = concept_id.split(":")[0]
+
+            try:
+                source = ns_prefix(source)
+            except ValueError:
+                try:
+                    source = ns_prefix(source.upper())
+                except ValueError as e:
+                    err_msg = f"Namespace prefix not supported: {source}"
+                    raise ValueError(err_msg) from e
+
+            system = ns_to_system_uri.get(source, source)
+
+            return ConceptMapping(
+                coding=Coding(code=code(concept_id), system=system), relation=relation
+            )
+
+        therapy_obj = MappableConcept(
+            id=f"normalize.therapy.{record['concept_id']}",
+            primaryCode=code(root=record["concept_id"]),
+            conceptType="Therapy",
+            label=record.get("label"),
         )
 
+        # mappings
+        mappings = [
+            _create_concept_mapping(
+                concept_id=record["concept_id"],
+                relation=Relation.EXACT_MATCH,
+                ns_to_system_uri=NAMESPACE_TO_SYSTEM_URI,
+                ns_prefix=NamespacePrefix,
+            )
+        ]
         source_ids = record.get("xrefs", []) + record.get("associated_with", [])
-        mappings = []
-        for source_id in source_ids:
-            system, code = source_id.split(":")
-            mappings.append(
-                entity_models.ConceptMapping(
-                    coding=entity_models.Coding(
-                        code=entity_models.Code(code), system=system.lower()
-                    ),
-                    relation=entity_models.Relation.RELATED_MATCH,
-                )
+        mappings.extend(
+            _create_concept_mapping(
+                concept_id=source_id,
+                relation=Relation.RELATED_MATCH,
+                ns_to_system_uri=NAMESPACE_TO_SYSTEM_URI,
+                ns_prefix=NamespacePrefix,
             )
+            for source_id in source_ids
+        )
         if mappings:
-            therapeutic_agent_obj.mappings = mappings
+            therapy_obj.mappings = mappings
 
+        extensions = []
         if "aliases" in record:
-            therapeutic_agent_obj.alternativeLabels = record["aliases"]
+            extensions.append(Extension(name="aliases", value=record["aliases"]))
 
-        extensions = []
         if any(
             filter(
                 lambda f: f in record,
@@ -435,49 +489,44 @@ def _add_therapeutic_agent(
                 indication = self._get_indication(ind_db)
 
                 if indication.normalized_disease_id:
-                    system, code = indication.normalized_disease_id.split(":")
                     mappings = [
-                        entity_models.ConceptMapping(
-                            coding=entity_models.Coding(
-                                code=entity_models.Code(code), system=system.lower()
-                            ),
-                            relation=entity_models.Relation.RELATED_MATCH,
+                        _create_concept_mapping(
+                            concept_id=indication.normalized_disease_id,
+                            relation=Relation.RELATED_MATCH,
+                            ns_to_system_uri=DISEASE_NAMESPACE_TO_SYSTEM_URI,
+                            ns_prefix=DiseaseNamespacePrefix,
                         )
                     ]
                 else:
                     mappings = []
-                ind_disease_obj = domain_models.Disease(
+                ind_disease_obj = MappableConcept(
                     id=indication.disease_id,
+                    conceptType="Disease",
                     label=indication.disease_label,
                     mappings=mappings or None,
                 )
 
                 if indication.supplemental_info:
                     ind_disease_obj.extensions = [
-                        entity_models.Extension(name=k, value=v)
+                        Extension(name=k, value=v)
                         for k, v in indication.supplemental_info.items()
                     ]
                 inds_list.append(ind_disease_obj.model_dump(exclude_none=True))
             if inds_list:
                 approv_value["has_indication"] = inds_list
 
-            approv = entity_models.Extension(
-                name="regulatory_approval", value=approv_value
-            )
+            approv = Extension(name="regulatory_approval", value=approv_value)
             extensions.append(approv)
 
         trade_names = record.get("trade_names")
         if trade_names:
-            extensions.append(
-                entity_models.Extension(name="trade_names", value=trade_names)
-            )
+            extensions.append(Extension(name="trade_names", value=trade_names))
 
         if extensions:
-            therapeutic_agent_obj.extensions = extensions
+            therapy_obj.extensions = extensions
 
         response.match_type = match_type
-        response.normalized_id = record["concept_id"]
-        response.therapeutic_agent = therapeutic_agent_obj
+        response.therapy = therapy_obj
         return self._add_merged_meta(response)
 
     def _resolve_merge(
@@ -537,7 +586,7 @@ def normalize(self, query: str, infer: bool = True) -> NormalizationService:
         response = NormalizationService(**self._prepare_normalized_response(query))
 
         return self._perform_normalized_lookup(
-            response, query, infer, self._add_therapeutic_agent
+            response, query, infer, self._add_therapy
         )
 
     def _construct_drug_match(self, record: dict) -> Therapy:

diff --git a/src/therapy/schemas.py b/src/therapy/schemas.py
@@ -4,7 +4,7 @@
 from enum import Enum, IntEnum
 from typing import Any, Literal
 
-from ga4gh.core import domain_models
+from ga4gh.core.models import MappableConcept
 from pydantic import BaseModel, ConfigDict, StrictBool, constr
 
 from therapy import __version__
@@ -258,6 +258,44 @@ class NamespacePrefix(Enum):
     WIKIDATA = "wikidata"
 
 
+# Source to URI. Will use OBO Foundry persistent URL (PURL) or source homepage
+NAMESPACE_TO_SYSTEM_URI: dict[NamespacePrefix, str] = {
+    NamespacePrefix.ATC: "https://www.who.int/tools/atc-ddd-toolkit/atc-classification/",
+    NamespacePrefix.CHEBI: "http://purl.obolibrary.org/obo/chebi.owl",
+    NamespacePrefix.CHEMBL: "https://www.ebi.ac.uk/chembl/",
+    NamespacePrefix.CHEMIDPLUS: "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
+    NamespacePrefix.CASREGISTRY: "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
+    NamespacePrefix.CVX: "https://www2a.cdc.gov/vaccines/iis/iisstandards/vaccines.asp?rpt=cvx",
+    NamespacePrefix.DRUGBANK: "https://go.drugbank.com",
+    NamespacePrefix.DRUGCENTRAL: "https://drugcentral.org",
+    NamespacePrefix.DRUGSATFDA_ANDA: "https://www.fda.gov/drugs/types-applications/abbreviated-new-drug-application-anda",
+    NamespacePrefix.DRUGSATFDA_NDA: "https://www.fda.gov/drugs/types-applications/new-drug-application-nda",
+    NamespacePrefix.HEMONC: "https://hemonc.org",
+    NamespacePrefix.INCHIKEY: "https://www.chemspider.com",
+    NamespacePrefix.IUPHAR_LIGAND: "https://www.guidetopharmacology.org/GRAC/LigandListForward",
+    NamespacePrefix.GUIDETOPHARMACOLOGY: "https://www.guidetopharmacology.org/GRAC/LigandListForward",
+    NamespacePrefix.MMSL: "https://www.nlm.nih.gov/research/umls/rxnorm/sourcereleasedocs/mmsl.html",
+    NamespacePrefix.MSH: "https://id.nlm.nih.gov/mesh/",
+    NamespacePrefix.NCIT: "http://purl.obolibrary.org/obo/ncit.owl",
+    NamespacePrefix.NDC: "https://dps.fda.gov/ndc",
+    NamespacePrefix.PUBCHEMCOMPOUND: "https://pubchem.ncbi.nlm.nih.gov/docs/compounds",
+    NamespacePrefix.PUBCHEMSUBSTANCE: "https://pubchem.ncbi.nlm.nih.gov/docs/substances",
+    NamespacePrefix.RXNORM: "https://www.nlm.nih.gov/research/umls/rxnorm/index.html",
+    NamespacePrefix.SPL: "https://www.fda.gov/industry/fda-data-standards-advisory-board/structured-product-labeling-resources",
+    NamespacePrefix.UMLS: "https://www.nlm.nih.gov/research/umls/index.html",
+    NamespacePrefix.UNII: "https://precision.fda.gov/uniisearch",
+    NamespacePrefix.UNIPROT: "https://www.uniprot.org",
+    NamespacePrefix.USP: "https://www.usp.org/health-quality-safety/compendial-nomenclature",
+    NamespacePrefix.VANDF: "https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/VANDF",
+    NamespacePrefix.WIKIDATA: "https://www.wikidata.org",
+}
+
+# URI to source
+SYSTEM_URI_TO_NAMESPACE = {
+    system_uri: ns.value for ns, system_uri in NAMESPACE_TO_SYSTEM_URI.items()
+}
+
+
 class DataLicenseAttributes(BaseModel):
     """Define constraints for data license attributes."""
 
@@ -484,8 +522,7 @@ class UnmergedNormalizationService(BaseNormalizationService):
 class NormalizationService(BaseNormalizationService):
     """Response containing one or more merged records and source data."""
 
-    normalized_id: str | None = None
-    therapeutic_agent: domain_models.TherapeuticAgent | None = None
+    therapy: MappableConcept | None = None
     source_meta_: dict[SourceName, SourceMeta] | None = None
 
     model_config = ConfigDict(
@@ -494,18 +531,31 @@ class NormalizationService(BaseNormalizationService):
                 "query": "cisplatin",
                 "warnings": None,
                 "match_type": 80,
-                "normalized_id": "rxcui:2555",
-                "therapeutic_agent": {
-                    "type": "TherapeuticAgent",
+                "therapy": {
+                    "conceptType": "Therapy",
+                    "primaryCode": "rxcui:2555",
                     "id": "normalize.therapy.rxcui:2555",
                     "label": "cisplatin",
                     "mappings": [
                         {
-                            "coding": {"code": "C376", "system": "ncit"},
+                            "coding": {
+                                "code": "2555",
+                                "system": "https://www.nlm.nih.gov/research/umls/rxnorm/index.html",
+                            },
+                            "relation": "exactMatch",
+                        },
+                        {
+                            "coding": {
+                                "code": "C376",
+                                "system": "http://purl.obolibrary.org/obo/ncit.owl",
+                            },
                             "relation": "relatedMatch",
                         },
                         {
-                            "coding": {"code": "15663-27-1", "system": "chemidplus"},
+                            "coding": {
+                                "code": "15663-27-1",
+                                "system": "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
+                            },
                             "relation": "relatedMatch",
                         },
                         {