Skip to content

Commit

Permalink
feat!: remove search result formatting options
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Sep 26, 2023
1 parent 46e91c5 commit 7631ceb
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 169 deletions.
9 changes: 2 additions & 7 deletions gene/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@
read_query_summary = "Given query, provide best-matching source records."
response_description = "A response to a validly-formed query"
q_descr = "Gene to normalize."
keyed_descr = """Optional. If true, return response as key-value pairs of
sources to source matches. False by default."""
incl_descr = """Optional. Comma-separated list of source names to include in
response. Will exclude all other sources. Returns HTTP status code
422: Unprocessable Entity if both 'incl' and 'excl' parameters
Expand All @@ -68,15 +66,12 @@
)
def search(
q: str = Query(..., description=q_descr), # noqa: D103
keyed: Optional[bool] = Query(False, description=keyed_descr),
incl: Optional[str] = Query(None, description=incl_descr),
excl: Optional[str] = Query(None, description=excl_descr),
) -> SearchService:
"""Return strongest match concepts to query string provided by user.
:param str q: gene search term
:param Optional[bool] keyed: if true, response is structured as key/value
pair of sources to source match lists.
:param Optional[str] incl: comma-separated list of sources to include,
with all others excluded. Raises HTTPException if both `incl` and
`excl` are given.
Expand All @@ -86,10 +81,10 @@ def search(
:return: JSON response with matched records and source metadata
"""
try:
resp = query_handler.search(html.unescape(q), keyed=keyed, incl=incl, excl=excl)
resp = query_handler.search(html.unescape(q), incl=incl, excl=excl)
except InvalidParameterException as e:
raise HTTPException(status_code=422, detail=str(e))

breakpoint()
return resp


Expand Down
36 changes: 4 additions & 32 deletions gene/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,9 +223,9 @@ def _post_process_resp(self, resp: Dict) -> Dict:
records = sorted(records, key=lambda k: k.match_type, reverse=True)
return resp

def _response_keyed(self, query: str, sources: Set[str]) -> Dict:
"""Return response as dict where key is source name and value
is a list of records. Corresponds to `keyed=true` API parameter.
def _get_search_response(self, query: str, sources: Set[str]) -> Dict:
"""Return response as dict where key is source name and value is a list of
records.
:param query: string to match against
:param sources: sources to match from
Expand Down Expand Up @@ -275,28 +275,6 @@ def _response_keyed(self, query: str, sources: Set[str]) -> Dict:
# remaining sources get no match
return self._post_process_resp(resp)

def _response_list(self, query: str, sources: Set[str]) -> Dict:
"""Return response as list, where the first key-value in each item
is the source name. Corresponds to `keyed=false` API parameter.
:param query: string to match against
:param sources: sources to match from
:return: completed response object to return to client
"""
response_dict = self._response_keyed(query, sources)
source_list = []
for src_name in response_dict["source_matches"].keys():
src = {
"source": src_name,
}
to_merge = response_dict["source_matches"][src_name]
src.update(to_merge)

source_list.append(src)
response_dict["source_matches"] = source_list

return response_dict

@staticmethod
def _get_service_meta() -> ServiceMeta:
"""Return metadata about gene-normalizer service.
Expand All @@ -308,7 +286,6 @@ def _get_service_meta() -> ServiceMeta:
def search(
self,
query_str: str,
keyed: bool = False,
incl: str = "",
excl: str = "",
**params,
Expand All @@ -323,8 +300,6 @@ def search(
'ncbigene:673'
:param query_str: query, a string, to search for
:param keyed: if true, return response as dict keying source names to source
objects; otherwise, return list of source objects
:param incl: str containing comma-separated names of sources to use. Will
exclude all other sources. Case-insensitive.
:param excl: str containing comma-separated names of source to exclude. Will
Expand Down Expand Up @@ -375,10 +350,7 @@ def search(

query_str = query_str.strip()

if keyed:
resp = self._response_keyed(query_str, query_sources)
else:
resp = self._response_list(query_str, query_sources)
resp = self._get_search_response(query_str, query_sources)

resp["service_meta_"] = self._get_service_meta()
return SearchService(**resp)
Expand Down
161 changes: 63 additions & 98 deletions gene/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,62 +297,75 @@ class SourceMeta(BaseModel):
)


class MatchesKeyed(BaseModel):
"""Container for matching information from an individual source.
Used when matches are requested as an object, not an array.
"""
class SourceSearchMatches(BaseModel):
"""Container for matching information from an individual source."""

records: List[Gene] = []
source_meta_: SourceMeta

model_config = ConfigDict(
json_schema_extra={
"example": {
"records": [],
"source_meta_": {
"data_license": "custom",
"data_license_url": "https://www.ncbi.nlm.nih.gov/home/about/policies/", # noqa: E501
"version": "20201215",
"data_url": "ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/",
"rdp_url": "https://reusabledata.org/ncbi-gene.html",
"data_license_attributes": {
"non_commercial": False,
"share_alike": False,
"attribution": False,
},
"genome_assemblies": [],
"query": "ensembl:ENSG00000157764",
"warnings": [],
"source_matches": {
"Ensembl": {
"records": [
{
"concept_id": "ensembl:ENSG00000157764",
"symbol": "BRAF",
"symbol_status": None,
"label": "B-Raf proto-oncogene, serine/threonine kinase",
"strand": "-",
"location_annotations": [],
"locations": [
{
"id": "ga4gh:SL.iwWw9B3tkU3TCLF3d8xu4zSQBhpDZfJ6",
"label": None,
"extensions": None,
"type": "SequenceLocation",
"digest": None,
"sequenceReference": {
"id": None,
"label": None,
"extensions": None,
"type": "SequenceReference",
"digest": None,
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
"residueAlphabet": None,
},
"start": 140719326,
"end": 140924929,
}
],
"aliases": [],
"previous_symbols": [],
"xrefs": ["hgnc:1097"],
"associated_with": [],
"gene_type": "protein_coding",
"match_type": 100,
}
],
"source_meta_": {
"data_license": "custom",
"data_license_url": "https://useast.ensembl.org/info/about/legal/disclaimer.html",
"version": "110",
"data_url": "ftp://ftp.ensembl.org/pub/current_gff3/homo_sapiens/Homo_sapiens.GRCh38.110.gff3.gz",
"rdp_url": None,
"data_license_attributes": {
"non_commercial": False,
"attribution": False,
"share_alike": False,
},
"genome_assemblies": ["GRCh38"],
},
}
},
}
}
)


class MatchesListed(BaseModel):
"""Container for matching information from an individual source.
Used when matches are requested as an array, not an object.
"""

source: SourceName
records: List[Gene] = []
source_meta_: SourceMeta

model_config = ConfigDict(
json_schema_extra={
"example": {
"source": "NCBI",
"records": [],
"source_meta_": {
"data_license": "custom",
"data_license_url": "https://www.ncbi.nlm.nih.gov/home/about/policies/", # noqa: E501
"version": "20201215",
"data_url": "ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/",
"rdp_url": "https://reusabledata.org/ncbi-gene.html",
"data_license_attributes": {
"non_commercial": False,
"share_alike": False,
"attribution": False,
},
"genome_assemblies": [],
"service_meta_": {
"name": "gene-normalizer",
"version": "0.3.0-dev0",
"response_datetime": "2023-09-26 15:23:18.837074",
"url": "https://github.com/cancervariants/gene-normalization",
},
}
}
Expand Down Expand Up @@ -386,58 +399,10 @@ class SearchService(BaseModel):

query: StrictStr
warnings: List[Dict] = []
source_matches: Union[Dict[SourceName, MatchesKeyed], List[MatchesListed]]
source_matches: Dict[SourceName, SourceSearchMatches]
service_meta_: ServiceMeta

model_config = ConfigDict(
json_schema_extra={
"example": {
"query": "BRAF",
"warnings": [],
"source_matches": [
{
"source": "Ensembl",
"records": [
{
"label": None,
"concept_id": "ensembl:ENSG00000157764",
"symbol": "BRAF",
"previous_symbols": [],
"aliases": [],
"xrefs": [],
"symbol_status": None,
"strand": "-",
"locations": [],
"location_annotations": [],
"associated_with": [],
"gene_type": None,
"match_type": 100,
}
],
"source_meta_": {
"data_license": "custom",
"data_license_url": "https://uswest.ensembl.org/info/about/legal/index.html", # noqa: E501
"version": "102",
"data_url": "http://ftp.ensembl.org/pub/",
"rdp_url": None,
"data_license_attributes": {
"non_commercial": False,
"share_alike": False,
"attribution": False,
},
"genome_assemblies": ["GRCh38"],
},
}
],
"service_meta_": {
"name": "gene-normalizer",
"version": __version__,
"response_datetime": "2022-03-23 15:57:14.180908",
"url": "https://github.com/cancervariants/gene-normalization",
},
}
}
)
model_config = ConfigDict(json_schema_extra={"example": {}})


class GeneTypeFieldName(str, Enum):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_ensembl_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(self):
self.query_handler = QueryHandler(database)

def search(self, query_str, incl="ensembl"):
resp = self.query_handler.search(query_str, keyed=True, incl=incl)
resp = self.query_handler.search(query_str, incl=incl)
return resp.source_matches[SourceName.ENSEMBL]

e = QueryGetter()
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_hgnc_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def __init__(self):
self.query_handler = QueryHandler(database)

def search(self, query_str, incl="hgnc"):
resp = self.query_handler.search(query_str, keyed=True, incl=incl)
resp = self.query_handler.search(query_str, incl=incl)
return resp.source_matches[SourceName.HGNC]

h = QueryGetter()
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_ncbi_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __init__(self):
self.query_handler = QueryHandler(database)

def search(self, query_str, incl="ncbi"):
resp = self.query_handler.search(query_str, keyed=True, incl=incl)
resp = self.query_handler.search(query_str, incl=incl)
return resp.source_matches[SourceName.NCBI]

n = QueryGetter()
Expand Down
Loading

0 comments on commit 7631ceb

Please sign in to comment.