From 70ec92fb6003230dc7f7d0ddea264f4d32ff6074 Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Tue, 10 Sep 2019 21:09:03 -0700 Subject: [PATCH 1/5] -> Add 'case insensitive' flag to BQ filter builder, default to TRUE --- cohorts/utils.py | 14 +++++++------- google_helpers/bigquery/bq_support.py | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cohorts/utils.py b/cohorts/utils.py index ae091bcc..84b6d0f4 100644 --- a/cohorts/utils.py +++ b/cohorts/utils.py @@ -166,7 +166,7 @@ def create_cohort(user, filters=None, name=None, source_id=None): # Get samples and cases by querying BQ tables -def get_sample_case_list_bq(cohort_id=None, inc_filters=None, comb_mut_filters='OR', long_form=False): +def get_sample_case_list_bq(cohort_id=None, inc_filters=None, comb_mut_filters='OR', long_form=False, case_insens=True): comb_mut_filters = comb_mut_filters.upper() @@ -292,7 +292,7 @@ def get_sample_case_list_bq(cohort_id=None, inc_filters=None, comb_mut_filters=' joins = "" if len(data_type_filters) > 0: - data_type_where_clause = BigQuerySupport.build_bq_filter_and_params(data_type_filters) + data_type_where_clause = BigQuerySupport.build_bq_filter_and_params(data_type_filters, case_insens=case_insens) data_avail_sample_subquery = (data_avail_sample_query % data_avail_table) + ' WHERE ' + \ data_type_where_clause['filter_string'] parameters += data_type_where_clause['parameters'] @@ -304,9 +304,9 @@ def get_sample_case_list_bq(cohort_id=None, inc_filters=None, comb_mut_filters=' # Send in a type schema for Biospecimen, because sample_type is an integer encoded as a string, # so detection will not work properly type_schema = {x['name']: x['type'] for x in biospec_fields} - where_clause['biospec'] = BigQuerySupport.build_bq_filter_and_params(filters['biospec'], field_prefix='bs.', type_schema=type_schema) + where_clause['biospec'] = BigQuerySupport.build_bq_filter_and_params(filters['biospec'], field_prefix='bs.', type_schema=type_schema, case_insens=case_insens) if len(list(filters['clin'].keys())): - where_clause['clin'] = BigQuerySupport.build_bq_filter_and_params(filters['clin'], field_prefix='cl.') + where_clause['clin'] = BigQuerySupport.build_bq_filter_and_params(filters['clin'], field_prefix='cl.', case_insens=case_insens) mut_query_job = None @@ -344,12 +344,12 @@ def get_sample_case_list_bq(cohort_id=None, inc_filters=None, comb_mut_filters=' this_filter = {} this_filter[filter] = build_queries[build]['raw_filters'][filter] build_queries[build]['filter_str_params'].append(BigQuerySupport.build_bq_filter_and_params( - this_filter, comb_mut_filters, build+'_{}'.format(str(filter_num)) + this_filter, comb_mut_filters, build+'_{}'.format(str(filter_num)), case_insens=case_insens )) filter_num += 1 elif comb_mut_filters == 'OR': build_queries[build]['filter_str_params'].append(BigQuerySupport.build_bq_filter_and_params( - build_queries[build]['raw_filters'], comb_mut_filters, build + build_queries[build]['raw_filters'], comb_mut_filters, build, case_insens=case_insens )) # Create the queries and their parameters @@ -391,7 +391,7 @@ def get_sample_case_list_bq(cohort_id=None, inc_filters=None, comb_mut_filters=' any_filter = {} any_filter[filter] = build_queries[build]['not_any'][not_any] filter_str_param = BigQuerySupport.build_bq_filter_and_params( - any_filter,param_suffix=build+'_any_{}'.format(any_count) + any_filter,param_suffix=build+'_any_{}'.format(any_count), case_insens=case_insens ) build_queries[build]['filter_str_params'].append(filter_str_param) diff --git a/google_helpers/bigquery/bq_support.py b/google_helpers/bigquery/bq_support.py index 51826263..a9e5b56d 100644 --- a/google_helpers/bigquery/bq_support.py +++ b/google_helpers/bigquery/bq_support.py @@ -505,7 +505,7 @@ def insert_job_batch_and_get_results(cls, query_set): # # TODO: add support for DATES @staticmethod - def build_bq_filter_and_params(filters, comb_with='AND', param_suffix=None, with_count_toggle=False, field_prefix=None, type_schema=None): + def build_bq_filter_and_params(filters, comb_with='AND', param_suffix=None, with_count_toggle=False, field_prefix=None, type_schema=None, case_insens=True): result = { 'filter_string': '', 'parameters': [] @@ -609,7 +609,7 @@ def build_bq_filter_and_params(filters, comb_with='AND', param_suffix=None, with # Scalar param query_param['parameterValue']['value'] = values[0] if query_param['parameterType']['type'] == 'STRING': - if '%' in values[0]: + if '%' in values[0] or case_insens: filter_string += "LOWER({}{}) LIKE LOWER(@{})".format('' if not field_prefix else field_prefix, attr, param_name) else: filter_string += "{}{} = @{}".format('' if not field_prefix else field_prefix, attr, From 5fe0cdf56272d71c727f42eb8e710f7aba353147 Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Thu, 19 Sep 2019 11:09:25 -0700 Subject: [PATCH 2/5] -> Remove uneeded/duplicated settings variables --- accounts/service_obj.py | 2 +- google_helpers/resourcemanager_service.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/accounts/service_obj.py b/accounts/service_obj.py index a11d2396..4e35cc71 100644 --- a/accounts/service_obj.py +++ b/accounts/service_obj.py @@ -225,7 +225,7 @@ def get_django_values(): try: from django.conf import settings values.append(settings.CLIENT_EMAIL) - values.append(settings.WEB_CLIENT_ID) + values.append(settings.GOOGLE_CLIENT_ID) except Exception as e: logger.error("Could not read Service Account settings from Django configuration.") logger.exception(e) diff --git a/google_helpers/resourcemanager_service.py b/google_helpers/resourcemanager_service.py index 3e17d43b..ed094b53 100644 --- a/google_helpers/resourcemanager_service.py +++ b/google_helpers/resourcemanager_service.py @@ -36,6 +36,6 @@ def get_special_crm_resource(): has the Browser (or Viewer, Editor, Owner) role on the other project. """ credentials = GoogleCredentials.from_stream( - settings.USER_GCP_ACCESS_CREDENTIALS).create_scoped(CRM_SCOPES) + settings.MONITORING_SA_ACCESS_CREDENTIALS).create_scoped(CRM_SCOPES) service = build_with_retries('cloudresourcemanager', 'v1beta1', credentials, 2) return service From af0a607250c5512f9f4fc1576ab5a5e7bf08303f Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Wed, 25 Sep 2019 11:19:30 -0700 Subject: [PATCH 3/5] -> Support case insensitivity in multi-value filters as well --- google_helpers/bigquery/bq_support.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google_helpers/bigquery/bq_support.py b/google_helpers/bigquery/bq_support.py index a9e5b56d..a38afb96 100644 --- a/google_helpers/bigquery/bq_support.py +++ b/google_helpers/bigquery/bq_support.py @@ -654,9 +654,9 @@ def build_bq_filter_and_params(filters, comb_with='AND', param_suffix=None, with query_param['parameterType']['arrayType'] = { 'type': parameter_type } - query_param['parameterValue'] = {'arrayValues': [{'value': x} for x in values]} + query_param['parameterValue'] = {'arrayValues': [{'value': x.lower() if parameter_type == 'STRING' else x} for x in values]} - filter_string += "{}{} IN UNNEST(@{})".format('' if not field_prefix else field_prefix, attr, param_name) + filter_string += "LOWER({}{}) IN UNNEST(@{})".format('' if not field_prefix else field_prefix, attr, param_name) if with_count_toggle: filter_string = "({}) OR @{}_filtering = 'not_filtering'".format(filter_string,param_name) From 2593554bec09bd6ab7d438296133f5eaa9c207db Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Wed, 25 Sep 2019 11:25:04 -0700 Subject: [PATCH 4/5] -> Support case insensitivity in multi-value filters as well --- google_helpers/bigquery/bq_support.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google_helpers/bigquery/bq_support.py b/google_helpers/bigquery/bq_support.py index a9e5b56d..a38afb96 100644 --- a/google_helpers/bigquery/bq_support.py +++ b/google_helpers/bigquery/bq_support.py @@ -654,9 +654,9 @@ def build_bq_filter_and_params(filters, comb_with='AND', param_suffix=None, with query_param['parameterType']['arrayType'] = { 'type': parameter_type } - query_param['parameterValue'] = {'arrayValues': [{'value': x} for x in values]} + query_param['parameterValue'] = {'arrayValues': [{'value': x.lower() if parameter_type == 'STRING' else x} for x in values]} - filter_string += "{}{} IN UNNEST(@{})".format('' if not field_prefix else field_prefix, attr, param_name) + filter_string += "LOWER({}{}) IN UNNEST(@{})".format('' if not field_prefix else field_prefix, attr, param_name) if with_count_toggle: filter_string = "({}) OR @{}_filtering = 'not_filtering'".format(filter_string,param_name) From d21cce6729606a3dee1b28c37cf683377934cbbc Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Wed, 25 Sep 2019 16:55:54 -0700 Subject: [PATCH 5/5] -> Enable case insensitivity in cohort creation --- cohorts/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cohorts/utils.py b/cohorts/utils.py index 84b6d0f4..adda3af7 100644 --- a/cohorts/utils.py +++ b/cohorts/utils.py @@ -34,7 +34,7 @@ from django.core.exceptions import ObjectDoesNotExist -def create_cohort(user, filters=None, name=None, source_id=None): +def create_cohort(user, filters=None, name=None, source_id=None, case_insens=True): if not filters and not name: # Can't save/edit a cohort when nothing is being changed! @@ -64,7 +64,7 @@ def create_cohort(user, filters=None, name=None, source_id=None): if source_progs: source_prog_filters = {x: {} for x in source_progs if x not in list(barcodes.keys())} if len(source_prog_filters): - source_prog_barcodes = get_sample_case_list_bq(source_id, source_prog_filters, long_form=True) + source_prog_barcodes = get_sample_case_list_bq(source_id, source_prog_filters, long_form=True, case_insens=True) for prog in source_prog_barcodes: barcodes[prog] = source_prog_barcodes[prog]