Skip to content

Commit

Permalink
queriesTester and multi_variants modification
Browse files Browse the repository at this point in the history
  • Loading branch information
mbaudis committed Dec 13, 2024
1 parent 84d4420 commit b363a17
Show file tree
Hide file tree
Showing 14 changed files with 150 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ export function DatasetResultBox({ data: responseSet, responseMeta, query }) {
) : null}
</div>
<div className="column is-one-third">
{info.counts.variants > 0 ? (
{info.counts.variants > 0 && query.referenceName ? (
<div>
<UCSCRegion query={query} />
</div>
Expand Down
8 changes: 5 additions & 3 deletions beaconplusWeb/src/config/beaconSearchParameters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ parameters:
geneId:
label: Gene Symbol
placeholder: MYCN
infoText: One or more comma concatenated gene symbols (HUGO), e.g. "TP53,CDKN2A".
infoText: A gene symbol (HUGO), e.g. "TP53" or "CDKN2A".
isHidden: true
aminoacidChange:
label: Aminoacid alteration
Expand Down Expand Up @@ -209,7 +209,8 @@ parameters:
create bracket requests or (experimentally) fusion requests (where the 2nd
region denotes the "mateName" chromosome of the fusion partner).
Example here would be the use of "8q24,14q32" to search for fusion events
involving MYC and IGH (_i.e._ the typical "Burkitt lymphoma fusion").
involving MYC and IGH (_i.e._ the typical "Burkitt lymphoma fusion").</br>
This is *not* a standard Beacon option.
isHidden: true
variantQueryDigests:
label: Variation Shorthand(s)
Expand All @@ -220,7 +221,8 @@ parameters:
* define a variant request in a concatenated `referenceName:start--end:variantType`
or `referenceName:start:referenceBases>alternateBases` format
* optionally comma-concatenate 2 or more of such strings, to identify samples
carrying matches for all of these variations
carrying matches for all of these variations</br>
This is *not* a standard Beacon option.
isHidden: true
variantMinLength:
label: Min Variant Length
Expand Down
2 changes: 0 additions & 2 deletions beaconplusWeb/src/site-specific/searchExamples.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,6 @@
description: |
This example shows a short form version of the CDKN2A deletion example.
parameters:
cytoBands:
isHidden: true
variantQueryDigests:
isHidden: false
defaultValue: "9:21000001-21975098--21967753-24000000:DEL"
Expand Down
10 changes: 4 additions & 6 deletions bycon/config/argument_definitions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,8 @@ $defs:
The maximum variant length in bases e.g. for CNV queries.
gene_id:
type: array
items:
type: string
pattern: '^\w+?(\w+?(\-\w+?)?)?$'
type: string
pattern: '^\w+?(\w+?(\-\w+?)?)?$'
db_key: Null
cmdFlags:
- --geneId
Expand Down Expand Up @@ -328,9 +326,9 @@ $defs:
- --variantInternalId
description: 'An id value used for all variant instances of the same composition; a kind of `digest`'

################################################################################
##############################################################################
# non-standard parameters
################################################################################
##############################################################################

accessid:
type: string
Expand Down
9 changes: 5 additions & 4 deletions bycon/config/variant_request_definitions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ request_pars:
- mate_end
- variant_min_length
- variant_max_length

# BeaconPlus
multi_request_pars:
- gene_id
- aminoacid_change
- genomic_allele_short_form

# BeaconPlus
multi_request_pars:
- cyto_bands
- variant_query_digests

Expand Down Expand Up @@ -109,8 +109,9 @@ request_types:
# - assembly_id
- start
- reference_name
- reference_bases
- alternate_bases
optional:
- reference_bases

variantTypeFilteredRequest:
description: >-
Expand Down
2 changes: 1 addition & 1 deletion bycon/lib/query_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def __run_multi_variants_query(self):
if len(queries) < 1:
return

prdbug(f' ====== {BYC["response_entity"]} =====')
prdbug(f' ====== {BYC.get("response_entity_id", "biosample")} =====')
res_e_id = BYC.get("response_entity_id", "biosample")
res_e_coll = BYC["response_entity"].get("collection", "biosamples")
id_k = f'{res_e_id}_id'
Expand Down
78 changes: 37 additions & 41 deletions bycon/lib/query_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,33 +185,25 @@ def __query_from_variant_pars(self):
# -------------------------------------------------------------------------#

def __preprocess_variant_pars(self):
self.variant_multi_pars = BYC_PARS.get("variant_multi_pars", [])
v_p_s = self.variant_request_definitions.get("request_pars", [])
v_mp_s = self.variant_request_definitions.get("multi_request_pars", [])

vips = []
v_m_ps = BYC_PARS.get("variant_multi_pars", [])

v_p_s = self.variant_request_definitions.get("request_pars", [])
# standard pars
s_q_p_0 = {}
if (v_s_s := v_p_s & BYC_PARS.keys()):
for v_p in v_s_s:
v_v = BYC_PARS.get(v_p)
s_q_p_0.update({ v_p: v_v })
BYC_VARGS.update({ v_p: v_v })
prdbug(f'...__preprocess_variant_pars: {v_p} {v_v}')

for v_mp in v_mp_s:
if len(v_mp_vs := BYC_PARS.get(v_mp, [])) > 0:
for v in v_mp_vs:
if len(s_q_p_0.keys()) > 0:
t_p = s_q_p_0.copy()
t_p.update({v_mp: v})
vips.append(self.__parse_variant_parameters(t_p))
else:
vips.append({v_mp: v})
if (len(vips)) < 1:
if len(s_q_p_0.keys()) > 0:
vips.append(self.__parse_variant_parameters(s_q_p_0))
s_q_p_0.update({ v_p: BYC_PARS.get(v_p) })
if len(s_q_p_0.keys()) > 0:
v_m_ps.append(s_q_p_0)

vips = []
for v in v_m_ps:
vp = {}
if (v_s_s := v_p_s & v.keys()):
for v_p in v_s_s:
vp.update({ v_p: v.get(v_p) })
if len(vp.keys()) > 0:
vips.append(self.__parse_variant_parameters(vp))

self.variant_multi_pars = vips

Expand All @@ -225,17 +217,17 @@ def __parse_variant_parameters(self, variant_pars):

# value checks
v_p_c = { }

for p_k, v_p in variant_pars.items():
v_p = variant_pars[ p_k ]
v_p_k = humps.decamelize(p_k)
if "variant_type" in v_p_k:
v_s_c = VariantTypes().variantStateChildren(v_p)
v_p_c[ v_p_k ] = { "$in": v_s_c }
elif "reference_name" in v_p_k or "mate_name" in v_p_k:
v_p_c[ v_p_k ] = self.ChroNames.refseq(v_p)
else:
v_p_c[ v_p_k ] = v_p
v_p_c.update({v_p_k: { "$in": v_s_c }})
continue
if "reference_name" in v_p_k or "mate_name" in v_p_k:
v_p_c.update({v_p_k: self.ChroNames.refseq(v_p)})
continue

v_p_c.update({v_p_k: v_p})

return v_p_c

Expand Down Expand Up @@ -388,10 +380,10 @@ def __create_geneVariantRequest_query(self, v_pars):
"end": [ gene.get("end", 1) ]
}
# TODO: global variant parameters by definition file
g_p_s = ["variant_type", "variant_min_length", "variant_max_length"]
for g_p in g_p_s:
if g_p in BYC_VARGS:
v_pars.update( { g_p: BYC_VARGS[g_p] } )
# g_p_s = ["variant_type", "variant_min_length", "variant_max_length"]
# for g_p in g_p_s:
# if g_p in BYC_VARGS:
# v_pars.update( { g_p: BYC_VARGS[g_p] } )
q_t = self.__create_variantRangeRequest_query(v_pars)
prdbug(f'...geneVariantRequest query result: {q_t}')

Expand Down Expand Up @@ -477,10 +469,10 @@ def __create_cytoBandRequest_query(self, v_pars):
} )
# TODO: other global parameters (length etc.)
# TODO: global variant parameters by definition file
g_p_s = ["variant_type", "variant_min_length", "variant_max_length"]
for g_p in g_p_s:
if g_p in BYC_VARGS:
v_pars.update( { g_p: BYC_VARGS[g_p] } )
# g_p_s = ["variant_type", "variant_min_length", "variant_max_length"]
# for g_p in g_p_s:
# if g_p in BYC_VARGS:
# v_pars.update( { g_p: BYC_VARGS[g_p] } )
self.variant_request_type = "variantRangeRequest"
q = self.__create_variantRangeRequest_query(v_pars)
return q
Expand Down Expand Up @@ -722,10 +714,14 @@ def __query_from_filters(self):
# TODO: needs a general solution; so far for the iso age w/
# pre-calculated days field...
if "alphanumeric" in f_info.get("ft_type", "ontology"):
f_class, comp, val = re.match(r'^(\w+):([<>=]+?)(\w[\w.]+?)$', f_info["id"]).group(1, 2, 3)
if "iso8601duration" in f_info.get("format", "___none___"):
val = days_from_iso8601duration(val)
f_lists[f_entity][f_field].append(self.__mongo_comparator_query(comp, val))
prdbug(f'__query_from_filters ... alphanumeric: {f_info["id"]}')
if re.match(r'^(\w+):([<>=]+?)?(\w[\w.]+?)$', f_info["id"]):
f_class, comp, val = re.match(r'^(\w+):([<>=]+?)?(\w[\w.]+?)$', f_info["id"]).group(1, 2, 3)
if "iso8601duration" in f_info.get("format", "___none___"):
val = days_from_iso8601duration(val)
f_lists[f_entity][f_field].append(self.__mongo_comparator_query(comp, val))
else:
f_lists[f_entity][f_field].append(f_info["id"])
elif f_desc is True:
if f_neg is True:
f_lists[f_entity][f_field].append({'$nin': f_info["child_terms"]})
Expand Down
6 changes: 2 additions & 4 deletions docs/generated/argument_definitions.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,8 @@ The minimal variant length in bases e.g. for CNV queries.
The maximum variant length in bases e.g. for CNV queries.

#### `gene_id`
**type:** array
**items:**
- `type`: `string`
- `pattern`: `^\w+?(\w+?(\-\w+?)?)?$`
**type:** string
**pattern:** `^\w+?(\w+?(\-\w+?)?)?$`
**db_key:** None
**cmdFlags:** `--geneId`
**description:**
Expand Down
6 changes: 6 additions & 0 deletions housekeepers/config/mongodb_indexer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,13 @@ indexed_special_dbs:
location.start: 1
location.end: 1
variant_state.id: 1
type: compound
variantallelerangequery:
db_key:
location.sequence_id: 1
location.start: 1
sequence: 1
location.end: 1
type: compound

_byconHousekeepingDB:
Expand Down
57 changes: 57 additions & 0 deletions housekeepers/queriesTester.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env python3

from bycon import *
from pymongo import MongoClient

from bycon import byconServiceLibs
from service_helpers import assertSingleDatasetOrExit


"""
"""

################################################################################
################################################################################
################################################################################

ds_id = assertSingleDatasetOrExit()
BYC_PARS.update({"response_entity_path_id":"analyses"})
set_entities()
for qek, qev in BYC.get("test_queries", {}).items():
for p, v in qev.items():
if p == "filters":
f_l = []
for f in v:
f_l.append({"id": f})
if len(f_l) > 0:
BYC.update({"BYC_FILTERS":f_l})
else:
BYC_PARS.update({p: v})

# print(f'... getting data for {qek}')
BRS = ByconResultSets()
r_c = BRS.get_record_queries()
ds_results = BRS.datasetsResults()
# print(f'... got it')

# clean out those globals for next run
# filters are tricky since they have a default `[]` value
# and have been pre-parsed into BYC_FILTERS at the stage of
# `ByconResultSets()` (_i.e._ embedded `ByconQuery()`)
for p, v in qev.items():
if p == "filters":
BYC_PARS.update({"filters": []})
else:
BYC_PARS.pop(p)
BYC.update({"BYC_FILTERS": []})

if not (ds := ds_results.get(ds_id)):
print(f'ERROR - no {qek} data for {ds_id}')
prjsonnice(r_c)
continue
if BYC.get("DEBUG_MODE"):
print(f'############################### {qek} ###############################')
prjsonnice(r_c)

print(f'==> {qek} with {ds["analyses.id"].get("target_count")} analysis hits')

34 changes: 32 additions & 2 deletions local/test_queries.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,35 @@ CDKN2AcnvQuery:
start: [21000001,21975098]
end: [21967753,23000000]
variant_type: EFO:0030067

EIF4A1snvQuery:
reference_name: refseq:NC_000017.11
start: [7577120]
variant_type: SO:0001059
alternate_bases: A
reference_bases: G

EIF4A1rangeQuery:
reference_name: refseq:NC_000017.11
start: [7572825]
end: [7579005]
variant_type: SO:0001059

geneMatchQuery:
gene_id: CDK2
variant_min_length: 100000
variant_max_length: 2000000
variant_type: EFO:0030070

HeLaIdentifierQuery:
filters:
- cellosaurus:CVCL_0030

CDKN2AshortFormExample:
filters:
- NCIT:C3058
variant_query_digests: 9:21000001-21975098--21967753-24000000:DEL
variant_query_digests: [9:21000001-21975098--21967753-24000000:DEL]

fusionExample:
variant_type: SO:0000806
reference_name: refseq:NC_000008.11
Expand All @@ -36,10 +42,34 @@ fusionExample:
end: [47300000]
mate_start: [26200000]
mate_end: [35600000]

ClinicalFiltersQuery:
filters:
- NCIT:C48786
- NCIT:C20197

AgeAtDXquery:
filters:
- ageAtDiagnosis:<=P18Y
- ageAtDiagnosis:>=P65Y
- ageAtDiagnosis:<=P69Y

MultiAlleleCNVquery:
variant_multi_pars:
- reference_name: refseq:NC_000017.11
start: [7673801]
alternate_bases: A
- reference_name: refseq:NC_000017.11
start: [5000000, 7687480]
end: [7668422, 9000000]
variant_type: EFO:0030067

DoubleGeneDeletion:
variant_multi_pars:
- reference_name: refseq:NC_000009.12
start: [21500001, 21975098]
end: [21967753, 22500000]
variant_type: EFO:0030067
- reference_name: refseq:NC_000017.11
start: [5000000, 7687480]
end: [7668422, 9000000]
variant_type: EFO:0030067
Binary file modified rsrc/mongodump/_byconServicesDB.tar.gz
Binary file not shown.
Binary file modified rsrc/mongodump/examplez.tar.gz
Binary file not shown.
Loading

0 comments on commit b363a17

Please sign in to comment.