feat!: remove search result formatting options

cancervariants · Sep 26, 2023 · 7631ceb · 7631ceb
1 parent 46e91c5
commit 7631ceb
Show file tree

Hide file tree

Showing 7 changed files with 88 additions and 169 deletions.
diff --git a/gene/main.py b/gene/main.py
@@ -42,8 +42,6 @@
 read_query_summary = "Given query, provide best-matching source records."
 response_description = "A response to a validly-formed query"
 q_descr = "Gene to normalize."
-keyed_descr = """Optional. If true, return response as key-value pairs of
-              sources to source matches. False by default."""
 incl_descr = """Optional. Comma-separated list of source names to include in
              response. Will exclude all other sources. Returns HTTP status code
              422: Unprocessable Entity if both 'incl' and 'excl' parameters
@@ -68,15 +66,12 @@
 )
 def search(
     q: str = Query(..., description=q_descr),  # noqa: D103
-    keyed: Optional[bool] = Query(False, description=keyed_descr),
     incl: Optional[str] = Query(None, description=incl_descr),
     excl: Optional[str] = Query(None, description=excl_descr),
 ) -> SearchService:
     """Return strongest match concepts to query string provided by user.
 
     :param str q: gene search term
-    :param Optional[bool] keyed: if true, response is structured as key/value
-        pair of sources to source match lists.
     :param Optional[str] incl: comma-separated list of sources to include,
         with all others excluded. Raises HTTPException if both `incl` and
         `excl` are given.
@@ -86,10 +81,10 @@ def search(
     :return: JSON response with matched records and source metadata
     """
     try:
-        resp = query_handler.search(html.unescape(q), keyed=keyed, incl=incl, excl=excl)
+        resp = query_handler.search(html.unescape(q), incl=incl, excl=excl)
     except InvalidParameterException as e:
         raise HTTPException(status_code=422, detail=str(e))
-
+    breakpoint()
     return resp
 
 

diff --git a/gene/query.py b/gene/query.py
@@ -223,9 +223,9 @@ def _post_process_resp(self, resp: Dict) -> Dict:
                     records = sorted(records, key=lambda k: k.match_type, reverse=True)
         return resp
 
-    def _response_keyed(self, query: str, sources: Set[str]) -> Dict:
-        """Return response as dict where key is source name and value
-        is a list of records. Corresponds to `keyed=true` API parameter.
+    def _get_search_response(self, query: str, sources: Set[str]) -> Dict:
+        """Return response as dict where key is source name and value is a list of
+        records.
 
         :param query: string to match against
         :param sources: sources to match from
@@ -275,28 +275,6 @@ def _response_keyed(self, query: str, sources: Set[str]) -> Dict:
         # remaining sources get no match
         return self._post_process_resp(resp)
 
-    def _response_list(self, query: str, sources: Set[str]) -> Dict:
-        """Return response as list, where the first key-value in each item
-        is the source name. Corresponds to `keyed=false` API parameter.
-
-        :param query: string to match against
-        :param sources: sources to match from
-        :return: completed response object to return to client
-        """
-        response_dict = self._response_keyed(query, sources)
-        source_list = []
-        for src_name in response_dict["source_matches"].keys():
-            src = {
-                "source": src_name,
-            }
-            to_merge = response_dict["source_matches"][src_name]
-            src.update(to_merge)
-
-            source_list.append(src)
-        response_dict["source_matches"] = source_list
-
-        return response_dict
-
     @staticmethod
     def _get_service_meta() -> ServiceMeta:
         """Return metadata about gene-normalizer service.
@@ -308,7 +286,6 @@ def _get_service_meta() -> ServiceMeta:
     def search(
         self,
         query_str: str,
-        keyed: bool = False,
         incl: str = "",
         excl: str = "",
         **params,
@@ -323,8 +300,6 @@ def search(
         'ncbigene:673'
 
         :param query_str: query, a string, to search for
-        :param keyed: if true, return response as dict keying source names to source
-            objects; otherwise, return list of source objects
         :param incl: str containing comma-separated names of sources to use. Will
             exclude all other sources. Case-insensitive.
         :param excl: str containing comma-separated names of source to exclude. Will
@@ -375,10 +350,7 @@ def search(
 
         query_str = query_str.strip()
 
-        if keyed:
-            resp = self._response_keyed(query_str, query_sources)
-        else:
-            resp = self._response_list(query_str, query_sources)
+        resp = self._get_search_response(query_str, query_sources)
 
         resp["service_meta_"] = self._get_service_meta()
         return SearchService(**resp)

diff --git a/gene/schemas.py b/gene/schemas.py
@@ -297,62 +297,75 @@ class SourceMeta(BaseModel):
     )
 
 
-class MatchesKeyed(BaseModel):
-    """Container for matching information from an individual source.
-    Used when matches are requested as an object, not an array.
-    """
+class SourceSearchMatches(BaseModel):
+    """Container for matching information from an individual source."""
 
     records: List[Gene] = []
     source_meta_: SourceMeta
 
     model_config = ConfigDict(
         json_schema_extra={
             "example": {
-                "records": [],
-                "source_meta_": {
-                    "data_license": "custom",
-                    "data_license_url": "https://www.ncbi.nlm.nih.gov/home/about/policies/",  # noqa: E501
-                    "version": "20201215",
-                    "data_url": "ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/",
-                    "rdp_url": "https://reusabledata.org/ncbi-gene.html",
-                    "data_license_attributes": {
-                        "non_commercial": False,
-                        "share_alike": False,
-                        "attribution": False,
-                    },
-                    "genome_assemblies": [],
+                "query": "ensembl:ENSG00000157764",
+                "warnings": [],
+                "source_matches": {
+                    "Ensembl": {
+                        "records": [
+                            {
+                                "concept_id": "ensembl:ENSG00000157764",
+                                "symbol": "BRAF",
+                                "symbol_status": None,
+                                "label": "B-Raf proto-oncogene, serine/threonine kinase",
+                                "strand": "-",
+                                "location_annotations": [],
+                                "locations": [
+                                    {
+                                        "id": "ga4gh:SL.iwWw9B3tkU3TCLF3d8xu4zSQBhpDZfJ6",
+                                        "label": None,
+                                        "extensions": None,
+                                        "type": "SequenceLocation",
+                                        "digest": None,
+                                        "sequenceReference": {
+                                            "id": None,
+                                            "label": None,
+                                            "extensions": None,
+                                            "type": "SequenceReference",
+                                            "digest": None,
+                                            "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
+                                            "residueAlphabet": None,
+                                        },
+                                        "start": 140719326,
+                                        "end": 140924929,
+                                    }
+                                ],
+                                "aliases": [],
+                                "previous_symbols": [],
+                                "xrefs": ["hgnc:1097"],
+                                "associated_with": [],
+                                "gene_type": "protein_coding",
+                                "match_type": 100,
+                            }
+                        ],
+                        "source_meta_": {
+                            "data_license": "custom",
+                            "data_license_url": "https://useast.ensembl.org/info/about/legal/disclaimer.html",
+                            "version": "110",
+                            "data_url": "ftp://ftp.ensembl.org/pub/current_gff3/homo_sapiens/Homo_sapiens.GRCh38.110.gff3.gz",
+                            "rdp_url": None,
+                            "data_license_attributes": {
+                                "non_commercial": False,
+                                "attribution": False,
+                                "share_alike": False,
+                            },
+                            "genome_assemblies": ["GRCh38"],
+                        },
+                    }
                 },
-            }
-        }
-    )
-
-
-class MatchesListed(BaseModel):
-    """Container for matching information from an individual source.
-    Used when matches are requested as an array, not an object.
-    """
-
-    source: SourceName
-    records: List[Gene] = []
-    source_meta_: SourceMeta
-
-    model_config = ConfigDict(
-        json_schema_extra={
-            "example": {
-                "source": "NCBI",
-                "records": [],
-                "source_meta_": {
-                    "data_license": "custom",
-                    "data_license_url": "https://www.ncbi.nlm.nih.gov/home/about/policies/",  # noqa: E501
-                    "version": "20201215",
-                    "data_url": "ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/",
-                    "rdp_url": "https://reusabledata.org/ncbi-gene.html",
-                    "data_license_attributes": {
-                        "non_commercial": False,
-                        "share_alike": False,
-                        "attribution": False,
-                    },
-                    "genome_assemblies": [],
+                "service_meta_": {
+                    "name": "gene-normalizer",
+                    "version": "0.3.0-dev0",
+                    "response_datetime": "2023-09-26 15:23:18.837074",
+                    "url": "https://github.com/cancervariants/gene-normalization",
                 },
             }
         }
@@ -386,58 +399,10 @@ class SearchService(BaseModel):
 
     query: StrictStr
     warnings: List[Dict] = []
-    source_matches: Union[Dict[SourceName, MatchesKeyed], List[MatchesListed]]
+    source_matches: Dict[SourceName, SourceSearchMatches]
     service_meta_: ServiceMeta
 
-    model_config = ConfigDict(
-        json_schema_extra={
-            "example": {
-                "query": "BRAF",
-                "warnings": [],
-                "source_matches": [
-                    {
-                        "source": "Ensembl",
-                        "records": [
-                            {
-                                "label": None,
-                                "concept_id": "ensembl:ENSG00000157764",
-                                "symbol": "BRAF",
-                                "previous_symbols": [],
-                                "aliases": [],
-                                "xrefs": [],
-                                "symbol_status": None,
-                                "strand": "-",
-                                "locations": [],
-                                "location_annotations": [],
-                                "associated_with": [],
-                                "gene_type": None,
-                                "match_type": 100,
-                            }
-                        ],
-                        "source_meta_": {
-                            "data_license": "custom",
-                            "data_license_url": "https://uswest.ensembl.org/info/about/legal/index.html",  # noqa: E501
-                            "version": "102",
-                            "data_url": "http://ftp.ensembl.org/pub/",
-                            "rdp_url": None,
-                            "data_license_attributes": {
-                                "non_commercial": False,
-                                "share_alike": False,
-                                "attribution": False,
-                            },
-                            "genome_assemblies": ["GRCh38"],
-                        },
-                    }
-                ],
-                "service_meta_": {
-                    "name": "gene-normalizer",
-                    "version": __version__,
-                    "response_datetime": "2022-03-23 15:57:14.180908",
-                    "url": "https://github.com/cancervariants/gene-normalization",
-                },
-            }
-        }
-    )
+    model_config = ConfigDict(json_schema_extra={"example": {}})
 
 
 class GeneTypeFieldName(str, Enum):

diff --git a/tests/unit/test_ensembl_source.py b/tests/unit/test_ensembl_source.py
@@ -14,7 +14,7 @@ def __init__(self):
             self.query_handler = QueryHandler(database)
 
         def search(self, query_str, incl="ensembl"):
-            resp = self.query_handler.search(query_str, keyed=True, incl=incl)
+            resp = self.query_handler.search(query_str, incl=incl)
             return resp.source_matches[SourceName.ENSEMBL]
 
     e = QueryGetter()

diff --git a/tests/unit/test_hgnc_source.py b/tests/unit/test_hgnc_source.py
@@ -16,7 +16,7 @@ def __init__(self):
             self.query_handler = QueryHandler(database)
 
         def search(self, query_str, incl="hgnc"):
-            resp = self.query_handler.search(query_str, keyed=True, incl=incl)
+            resp = self.query_handler.search(query_str, incl=incl)
             return resp.source_matches[SourceName.HGNC]
 
     h = QueryGetter()

diff --git a/tests/unit/test_ncbi_source.py b/tests/unit/test_ncbi_source.py
@@ -34,7 +34,7 @@ def __init__(self):
             self.query_handler = QueryHandler(database)
 
         def search(self, query_str, incl="ncbi"):
-            resp = self.query_handler.search(query_str, keyed=True, incl=incl)
+            resp = self.query_handler.search(query_str, incl=incl)
             return resp.source_matches[SourceName.NCBI]
 
     n = QueryGetter()