Skip to content

Commit

Permalink
2.0.5
Browse files Browse the repository at this point in the history
mainly examplez & methods for this
  • Loading branch information
mbaudis committed Oct 28, 2024
1 parent a12e50b commit 114659a
Show file tree
Hide file tree
Showing 71 changed files with 636 additions and 14,642 deletions.
21 changes: 4 additions & 17 deletions bycon/byconServiceLibs/bycon_bundler.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,37 +441,24 @@ def __isetBundlesFromCollationParameters(self):
# return

fmap_name = "frequencymap"

query = CollationQuery().getQuery()

# id_q = {}
# if len(filters) > 0:
# fids = [x.get("id", "___none___") for x in filters]
# id_q = {"id": {"$in": fids}}
# elif len(self.collation_types) > 0:
# id_q = {"collation_type": {"$in": self.collation_types}}


prdbug(f'... __isetBundlesFromCollationParameters query {query}')

mongo_client = MongoClient(host=DB_MONGOHOST)
for ds_id in datset_ids:
coll_db = mongo_client[ds_id]
coll_ids = coll_db[ "collations" ].distinct("id", query)
for f_val in coll_ids:
f_q = { "id": f_val }
if not (collation_f := coll_db["frequencymaps"].find_one(f_q)):
continue
if not (collation_c := coll_db["collations"].find_one(f_q)):
continue
for collation_f in coll_db["frequencymaps" ].find(query):
if not fmap_name in collation_f:
continue
fmap_count = collation_f[ fmap_name ].get("cnv_analyses", 0)
if fmap_count < self.min_number:
continue
r_o = {
"dataset_id": ds_id,
"group_id": f_val,
"label": re.sub(r';', ',', collation_c["label"]),
"group_id": collation_f.get("id", ""),
"label": re.sub(r';', ',', collation_f.get("label", "")),
"sample_count": fmap_count,
"frequencymap_samples": collation_f[ fmap_name ].get("frequencymap_samples", fmap_count),
"interval_frequencies": collation_f[ fmap_name ]["intervals"] }
Expand Down
1 change: 0 additions & 1 deletion bycon/byconServiceLibs/datatable_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import csv, re, requests
# from attrdictionary import AttrDict
from random import sample as randomSamples

# bycon
from bycon import RefactoredValues, prdbug, prdlhead, prjsonnice, BYC, BYC_PARS, ENV
Expand Down
4 changes: 2 additions & 2 deletions bycon/byconServiceLibs/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def read_tsv_to_dictlist(filepath, max_count=0):
for l in data:
dictlist.append(dict(l))
if 0 < max_count < len(dictlist):
dictlist = random_samples(dictlist, k=max_count)
dictlist = random_samples(dictlist, max_count)

return dictlist, fieldnames

Expand All @@ -76,7 +76,7 @@ def read_www_tsv_to_dictlist(www, max_count=0):
dictlist.append(dict(l))

if 0 < max_count < len(dictlist):
dictlist = random_samples(dictlist, k=max_count)
dictlist = random_samples(dictlist, max_count)

return dictlist, fieldnames

Expand Down
4 changes: 2 additions & 2 deletions bycon/byconServiceLibs/ontology_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re
from random import sample
from random import sample as random_samples
from progress.bar import Bar

from pymongo import MongoClient
Expand Down Expand Up @@ -213,7 +213,7 @@ def __create_ontology_maps(self):
for k, v in keyed_maps.items():
examples = self.bios_coll.distinct("notes", v["local_query"])
s_no = min(10, len(examples))
e = sample(examples, s_no)
e = random_samples(examples, s_no)
e = [t for t in e if len(t) > 2]
v.update({"examples": e})
if len(v.get("errors", 0)) > 0:
Expand Down
5 changes: 3 additions & 2 deletions bycon/byconServiceLibs/service_helpers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import re, time, base36
import re, time, base36, datetime
from humps import decamelize
from os import path
from pathlib import Path

from bycon import load_yaml_empty_fallback, BYC, BYC_PARS, ENV
from bycon import load_yaml_empty_fallback, BYC, BYC_PARS, ENV, prdbug

################################################################################

Expand Down Expand Up @@ -68,6 +68,7 @@ def open_text_streaming(filename="data.pgxseg"):

def close_text_streaming():
print()
prdbug(f'... closing text streaming at {datetime.datetime.now().strftime("%H:%M:%S")}')
exit()


Expand Down
4 changes: 2 additions & 2 deletions bycon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
LIB_PATH = path.join( pkg_path, "lib")

# path of the calling script is used to point to a local config directory
__caller_path = path.dirname( path.abspath(sys.argv[0]))
LOC_PATH = path.join(__caller_path, pardir, "local")
CALLER_PATH = path.dirname( path.abspath(sys.argv[0]))
LOC_PATH = path.join(CALLER_PATH, pardir, "local")

REQUEST_PATH_ROOT = "beacon"

Expand Down
8 changes: 0 additions & 8 deletions bycon/config/argument_definitions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -502,14 +502,6 @@ outputfile:
- --outputfile
description: output file where supported (cmd line)

randno:
type: integer
cmdFlags:
- -r
- --randno
description: random number to limit processing, where supported
default: 0

min_number:
type: integer
cmdFlags:
Expand Down
66 changes: 38 additions & 28 deletions bycon/config/datatable_mappings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ ordered_metadata_core:
- individual_id
- experiment_title
- biosample_name
- biosample_notes
- notes
- histological_diagnosis_id
- histological_diagnosis_label
- experiment_id
Expand All @@ -86,6 +86,8 @@ ordered_metadata_core:
- pathological_stage_label
- tumor_grade_id
- tumor_grade_label
- ethnicity_id
- ethnicity_label
- index_disease_followup_time
- index_disease_followup_state_id
- index_disease_followup_state_label
Expand All @@ -95,6 +97,21 @@ ordered_metadata_core:
- pipeline_ref
- analysis_operation_id
- analysis_operation_label
- geoprov_id


ordered_variants_core:
- analysis_id
- biosample_id
- individual_id
- reference_name
- start
- end
- variant_state_id
- variant_state_label
- reference_sequence
- sequence


definitions:

Expand Down Expand Up @@ -140,14 +157,6 @@ definitions:
pipeline_ref:
db_key: pipeline_ref
beacon_model_path: analyses.pipelineRef

# In Beacon but not used by us
# library_source_id:
# beacon_model_path: runs.librarySource.id
# default: "GENEPIO:0001966"
# library_source_label:
# beacon_model_path: runs.librarySource.label
# default: "genomic source"

# bycon & data management specials
analysis_operation_id:
Expand Down Expand Up @@ -184,23 +193,6 @@ definitions:
- T48_Xba_051011
data_provenance:
db_key: info.data_provenance
geoprov_city:
type: string
db_key: geo_location.properties.city
indexed: True
geoprov_country:
type: string
db_key: geo_location.properties.country
indexed: True
geoprov_iso_alpha3:
type: string
db_key: geo_location.properties.ISO3166alpha3
indexed: True
geoprov_long_lat:
type: array
items:
type: number
db_key: geo_location.geometry.coordinates


#------------------------------------------------------------------------------#
Expand Down Expand Up @@ -391,23 +383,31 @@ definitions:
- pgx:cohort-TCGA
- pgx:cohort-TCGAcancers
- pgx:cohort-arraymap
geoprov_id:
type: string
db_key: geo_location.properties.id
indexed: True
geoprov_city:
type: string
db_key: geo_location.properties.city
indexed: True
computed: True
geoprov_country:
type: string
db_key: geo_location.properties.country
indexed: True
computed: True
geoprov_iso_alpha3:
type: string
db_key: geo_location.properties.ISO3166alpha3
indexed: True
computed: True
geoprov_long_lat:
type: array
items:
type: number
type: number
db_key: geo_location.geometry.coordinates
computed: True

# special export labels
group_id:
Expand Down Expand Up @@ -505,7 +505,16 @@ definitions:
auxiliary_disease_notes:
type: string
db_key: auxiliary_disease.notes

ethnicity_id:
type: string
db_key: ethnicity.id
beacon_model_path: individuals.ethnicity.id
indexed: True
ethnicity_label:
type: string
db_key: ethnicity.label
beacon_model_path: individuals.ethnicity.label
indexed: True

#------------------------------------------------------------------------------#

Expand Down Expand Up @@ -547,6 +556,7 @@ definitions:
type: string
db_key: location.sequence_id
indexed: True
computed: True
reference_name:
type: string
db_key: location.chromosome
Expand Down
1 change: 0 additions & 1 deletion bycon/config/entity_defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ info:
- "service-info"
- "service_info"
response_entity_id: info
collection: Null
response_schema: beaconInfoResponse
bycon_response_class: BeaconInfoResponse
beacon_schema:
Expand Down
28 changes: 25 additions & 3 deletions bycon/lib/beacon_response_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,6 @@ def __init__(self):
self.result_sets = list() # data rewrapped into the resultSets list
self.flattened_data = list() # data from all resultSets as flat list
self.entity_defaults = BYC.get("entity_defaults", {})
self.data_collection = BYC["response_entity"].get("collection", "biosamples")
self.response_entity_id = BYC.get("response_entity_id", "biosample")
self.limit = BYC_PARS.get("limit")
self.skip = BYC_PARS.get("skip")
Expand All @@ -713,6 +712,11 @@ def __init__(self):
# ----------------------------- public ------------------------------------#
# -------------------------------------------------------------------------#

def get_record_queries(self):
return self.record_queries


# -------------------------------------------------------------------------#
def get_populated_result_sets(self):
self.__retrieve_datasets_data()
self.__retrieve_variants_data()
Expand Down Expand Up @@ -740,6 +744,24 @@ def datasetsResults(self):
return self.datasets_results


# -------------------------------------------------------------------------#

def dataset_results_individual_ids(self, ds_id="___none___"):
individual_ids = set()
self.response_entity_id = "individual"
self.__retrieve_datasets_data()
if not ds_id in self.datasets_data:
BYC["ERRORS"].append("no correct dataset id provided to `dataset_results_biosample_ids`")
return []

data = self.datasets_data[ds_id]
for s in data:
if (ind_id := s.get("individual_id")):
individual_ids.add(ind_id)

return list(individual_ids)


# -------------------------------------------------------------------------#
# ----------------------------- private -----------------------------------#
# -------------------------------------------------------------------------#
Expand Down Expand Up @@ -809,7 +831,7 @@ def __retrieve_datasets_results(self):
# -------------------------------------------------------------------------#

def __retrieve_datasets_data(self):
if "variants" in self.data_collection:
if "variant" in self.response_entity_id.lower():
return

e_d_s = BYC["entity_defaults"].get(self.response_entity_id, {})
Expand Down Expand Up @@ -846,7 +868,7 @@ def __retrieve_datasets_data(self):
# -------------------------------------------------------------------------#

def __retrieve_variants_data(self):
if not "variants" in self.data_collection:
if not "variant" in self.response_entity_id.lower():
return

ds_v_start = datetime.datetime.now()
Expand Down
2 changes: 1 addition & 1 deletion bycon/lib/query_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def __loop_multivars(self):
queries.append(q)
continue

prdbug(f'??? queries: {queries}')
prdbug(f'__loop_multivars queries: {queries}')

return queries

Expand Down
22 changes: 14 additions & 8 deletions bycon/lib/service_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ def set_entities():
This function evaluates the definitions for the entities and their selection
by path elements (including aliases) or parameters and updates the global
BYC definitions.
As approximation in a script one can override the original selection by providing
a `--responseEntityPathId analyses` (or "individuals" etc.) parameter or forcing
```
BYC_PARS.update({"response_entity_path_id":"analyses"})
set_entities()
```
"""
b_e_d = BYC.get("entity_defaults", {})
a_defs = BYC.get("argument_definitions", {})
Expand All @@ -61,16 +68,15 @@ def set_entities():
# it should only apply to special cases (e.g. overriding the standard
# biosample table export in services with individuals) or for command
# line testing
if (e_p_id := BYC_PARS.get("request_entity_path_id", "___none___")) in dealiased_path_ids.keys():
BYC.update({"request_entity_path_id": e_p_id})
if (e_p_id := BYC_PARS.get("response_entity_path_id", "___none___")) in dealiased_path_ids.keys():
BYC.update({"response_entity_path_id": e_p_id})
if (q_p_id := BYC_PARS.get("request_entity_path_id", "___none___")) in dealiased_path_ids.keys():
BYC.update({"request_entity_path_id": q_p_id})
if (p_p_id := BYC_PARS.get("response_entity_path_id", "___none___")) in dealiased_path_ids.keys():
BYC.update({"response_entity_path_id": p_p_id})

p_i_d = BYC.get("request_entity_path_id", "___none___")
if p_i_d not in dealiased_path_ids.keys():
if (p_i_d := BYC.get("request_entity_path_id", "___none___")) not in dealiased_path_ids.keys():
p_i_d = "info"
rp_i_d = BYC.get("response_entity_path_id", "___none___")
if rp_i_d not in dealiased_path_ids.keys():

if (rp_i_d := BYC.get("response_entity_path_id", "___none___")) not in dealiased_path_ids.keys():
rp_i_d = p_i_d

# after settling the paths we can get the entity ids
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
},
"examples": [
{
"id": "pgxcohort-arraymap",
"id": "pgx:cohort-arraymap",
"label": "arrayMap collection"
}
]
Expand Down
Loading

0 comments on commit 114659a

Please sign in to comment.