From edc85bf3c3acdffd71dd0edbe6bc0be31b38a7d8 Mon Sep 17 00:00:00 2001 From: wlongabaugh Date: Wed, 17 Apr 2019 12:15:20 -0700 Subject: [PATCH 01/19] Adding signed URL capabilities --- accounts/dcf_support.py | 31 ++++++++++++++++++++++++++++++- accounts/dcf_views.py | 2 +- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/accounts/dcf_support.py b/accounts/dcf_support.py index b6a04003..86b7ea0b 100755 --- a/accounts/dcf_support.py +++ b/accounts/dcf_support.py @@ -35,6 +35,7 @@ DCF_GOOGLE_SA_VERIFY_URL = settings.DCF_GOOGLE_SA_VERIFY_URL DCF_GOOGLE_SA_MONITOR_URL = settings.DCF_GOOGLE_SA_MONITOR_URL DCF_GOOGLE_SA_URL = settings.DCF_GOOGLE_SA_URL +DCF_URL_URL = settings.DCF_URL_URL class DCFCommFailure(Exception): """Thrown if we have problems communicating with DCF """ @@ -678,6 +679,34 @@ def _write_dataset_summary(dataset_info, dataset_id, phs_map): return is_ok, combo_msg +def get_signed_url_from_dcf(user_id, file_uuid): + """ + :raise TokenFailure: + :raise InternalTokenError: + :raise DCFCommFailure: + :raise RefreshTokenExpired: + """ + # + # Get a signed URL for a file ID. + # + + try: + resp = _dcf_call('{}/{}'.format(DCF_URL_URL, file_uuid), user_id) + except (TokenFailure, InternalTokenError, RefreshTokenExpired, DCFCommFailure) as e: + logger.error("[ERROR] Attempt to contact DCF for signed URL failed (user {})".format(user_id)) + raise e + except Exception as e: + logger.error("[ERROR] Attempt to contact DCF for signed URL failed (user {})".format(user_id)) + raise e + + result = { + 'uri': resp.text, + 'code': resp.status_code + } + + return result + + def verify_sa_at_dcf(user_id, gcp_id, service_account_id, datasets, phs_map, sa_in_use): """ :raise TokenFailure: @@ -1143,7 +1172,7 @@ def refresh_at_dcf(user_id): resp = None # - # Call DCF to drop the linkage. Note that this will immediately remove them from controlled access. + # Call DCF to refresh the linkage. # try: diff --git a/accounts/dcf_views.py b/accounts/dcf_views.py index 07e3857d..fffa92d5 100755 --- a/accounts/dcf_views.py +++ b/accounts/dcf_views.py @@ -79,7 +79,7 @@ def oauth2_login(request): # Found that 'user' scope had to be included to be able to do the user query on callback, and the data scope # to do data queries. Starting to recognize a pattern here... - oauth = OAuth2Session(client_id, redirect_uri=full_callback, scope=['openid', 'user', 'google_service_account']) + oauth = OAuth2Session(client_id, redirect_uri=full_callback, scope=['openid', 'user', 'data', 'google_service_account']) authorization_url, state = oauth.authorization_url(DCF_AUTH_URL) # stash the state string in the session! From 2f27aa4220bb11bdf511f0705302cbfe04ab97ad Mon Sep 17 00:00:00 2001 From: eleeisb Date: Wed, 29 May 2019 12:21:52 -0700 Subject: [PATCH 02/19] Adding compute_service.py --- google_helpers/compute_service.py | 42 +++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 google_helpers/compute_service.py diff --git a/google_helpers/compute_service.py b/google_helpers/compute_service.py new file mode 100644 index 00000000..85ddfe22 --- /dev/null +++ b/google_helpers/compute_service.py @@ -0,0 +1,42 @@ +""" + +Copyright 2019, Institute for Systems Biology + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +""" + +from oauth2client.client import GoogleCredentials +from django.conf import settings +import httplib2 +# from .utils import build_with_retries + +from googleapiclient.discovery import build + +COMPUTE_SCOPES = ['https://www.googleapis.com/auth/compute', + 'https://www.googleapis.com/auth/cloud-platform'] + + +# def get_crm_resource(): +# """ +# Returns: a Cloud Resource Manager service client for calling the API. +# """ +# credentials = GoogleCredentials.get_application_default() +# service = build_with_retries('cloudresourcemanager', 'v1beta1', credentials, 2) +# return service + +def get_compute_resource(): + credentials = GoogleCredentials.from_stream(settings.GOOGLE_APPLICATION_CREDENTIALS).create_scoped(COMPUTE_SCOPES) + http = credentials.authorize(httplib2.Http()) + service = build('compute', 'v1', http=http, cache_discovery=False) + return service From c38584a581bd6140e8e635b0f44f5dd53db14d8d Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 10 Jun 2019 18:14:48 -0700 Subject: [PATCH 03/19] -> Get list of user's GCPs (for use by the API) --- accounts/utils.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/accounts/utils.py b/accounts/utils.py index af5e36a0..df3ee485 100644 --- a/accounts/utils.py +++ b/accounts/utils.py @@ -296,3 +296,24 @@ def unreg_gcp(user, gcp_id): status=500 return response, status + + +def get_user_gcps(user, gcp_id=None): + gcps = [] + gcp_list = None + + try: + if gcp_id: + gcp_list = GoogleProject.objects.filter(user=user, active=1) + else: + gcp_list = GoogleProject.objects.filter(user=user, active=1, project_id=gcp_id) + + for gcp in gcp_list: + gcps.append({'gcp_id': gcp.project_id, 'gcp_name': gcp.project_name, 'users': [x.email for x in gcp.users_set.all()]}) + + except Exception as e: + logger.error("[ERROR] While fetching the GCP project list for user {}:") + logger.exception(e) + + return gcps + From e41b9ae18f10c8686cc0d36fa78380883f05932b Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Tue, 11 Jun 2019 12:36:19 -0700 Subject: [PATCH 04/19] -> Adding in #2601 fix --- accounts/views.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/accounts/views.py b/accounts/views.py index 1ac6dc70..cf3d87b3 100755 --- a/accounts/views.py +++ b/accounts/views.py @@ -249,12 +249,13 @@ def register_gcp(request, user_id): try: if request.POST: project_id = request.POST.get('gcp_id', None) - gcp = GoogleProject.objects.get(project_id=project_id) register_users = request.POST.getlist('register_users') is_refresh = bool(request.POST.get('is_refresh', '') == 'true') register, status = register_or_refresh_gcp(User.objects.get(id=user_id), project_id, register_users, is_refresh) + gcp = GoogleProject.objects.get(project_id=project_id, active=1) + if status == 200: if 'message' in register: messages.info(request, register['message']) @@ -273,7 +274,7 @@ def register_gcp(request, user_id): except Exception as e: logger.error("[ERROR] While {} a Google Cloud Project:".format("refreshing" if is_refresh else "registering")) if type(e) is ObjectDoesNotExist: - logger.error("GCP {} was not found.".format(project_id)) + logger.error("GCP {} was not found post-registration.".format(project_id)) else: logger.exception(e) messages.error(request, "There was an error while attempting to register/refresh this Google Cloud Project - please contact feedback@isb-cgc.org.") From f715b9a05edb5d37c46d9ca4aca06f77bffc7361 Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Wed, 12 Jun 2019 12:51:09 -0700 Subject: [PATCH 05/19] -> In Python 3, must decode request.body to unicode first --- cohorts/views.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cohorts/views.py b/cohorts/views.py index b49c15ca..3a9621ac 100755 --- a/cohorts/views.py +++ b/cohorts/views.py @@ -690,7 +690,9 @@ def validate_barcodes(request): if debug: logger.debug('Called {}'.format(sys._getframe().f_code.co_name)) try: - barcodes = json.loads(request.body)['barcodes'] + body_unicode = request.body.decode('utf-8') + body = json.loads(body_unicode) + barcodes = body['barcodes'] status = 500 From a6e596cb0e6b5a3d9b524028bcd0d383cf3f296d Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Thu, 20 Jun 2019 12:59:29 -0700 Subject: [PATCH 06/19] -> Put return inside try --- cohorts/metadata_helpers.py | 68 ++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py index 35f94c5f..0338b5d7 100644 --- a/cohorts/metadata_helpers.py +++ b/cohorts/metadata_helpers.py @@ -1388,25 +1388,25 @@ def get_full_sample_metadata(barcodes): db = None cursor = None - barcodes_by_program = {} - - for barcode in barcodes: - dash = barcode.find("-") - if dash >= 0: - prog = barcode[0:dash] - if prog not in ['TCGA', 'TARGET']: + try: + barcodes_by_program = {} + + for barcode in barcodes: + dash = barcode.find("-") + if dash >= 0: + prog = barcode[0:dash] + if prog not in ['TCGA', 'TARGET']: + prog = 'CCLE' + else: prog = 'CCLE' - else: - prog = 'CCLE' - if prog not in barcodes_by_program: - barcodes_by_program[prog] = () - barcodes_by_program[prog] += (barcode,) + if prog not in barcodes_by_program: + barcodes_by_program[prog] = () + barcodes_by_program[prog] += (barcode,) - programs = Program.objects.filter(name__in=list(barcodes_by_program.keys()), active=True, is_public=True) + programs = Program.objects.filter(name__in=list(barcodes_by_program.keys()), active=True, is_public=True) - items = {} + items = {} - try: db = get_sql_connection() cursor = db.cursor() @@ -1454,6 +1454,8 @@ def get_full_sample_metadata(barcodes): result['total_found'] += 1 result['samples'] = [item for item in list(items.values())] + return result + except Exception as e: logger.error("[ERROR] While fetching sample metadata for {}:".format(barcode)) logger.exception(e) @@ -1461,8 +1463,6 @@ def get_full_sample_metadata(barcodes): if cursor: cursor.close() if db and db.open: db.close() - return result - def get_full_case_metadata(barcodes): if debug: logger.debug('Called ' + sys._getframe().f_code.co_name) @@ -1472,25 +1472,25 @@ def get_full_case_metadata(barcodes): db = None cursor = None - barcodes_by_program = {} - - for barcode in barcodes: - dash = barcode.find("-") - if dash >= 0: - prog = barcode[0:dash] - if prog not in ['TCGA','TARGET']: + try: + barcodes_by_program = {} + + for barcode in barcodes: + dash = barcode.find("-") + if dash >= 0: + prog = barcode[0:dash] + if prog not in ['TCGA', 'TARGET']: + prog = 'CCLE' + else: prog = 'CCLE' - else: - prog = 'CCLE' - if prog not in barcodes_by_program: - barcodes_by_program[prog] = () - barcodes_by_program[prog] += (barcode,) + if prog not in barcodes_by_program: + barcodes_by_program[prog] = () + barcodes_by_program[prog] += (barcode,) - programs = Program.objects.filter(name__in=list(barcodes_by_program.keys()),active=True,is_public=True) + programs = Program.objects.filter(name__in=list(barcodes_by_program.keys()), active=True, is_public=True) - items = {} + items = {} - try: db = get_sql_connection() cursor = db.cursor() @@ -1545,6 +1545,8 @@ def get_full_case_metadata(barcodes): result['total_found'] += 1 result['cases'] = [item for item in list(items.values())] + return result + except Exception as e: logger.error("[ERROR] While fetching sample metadata for {}:".format(barcode)) logger.exception(e) @@ -1552,8 +1554,6 @@ def get_full_case_metadata(barcodes): if cursor: cursor.close() if db and db.open: db.close() - return result - def get_sample_metadata(barcode): if debug: logger.debug('Called ' + sys._getframe().f_code.co_name) From 2015b8f8b7234b86fccc95a46487dc6273fd0dec Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Fri, 21 Jun 2019 11:15:05 -0700 Subject: [PATCH 07/19] -> Don't try to query if nothing was found in the prior query --- cohorts/metadata_helpers.py | 38 +++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py index 0338b5d7..2a846acf 100644 --- a/cohorts/metadata_helpers.py +++ b/cohorts/metadata_helpers.py @@ -1433,26 +1433,27 @@ def get_full_sample_metadata(barcodes): 'data_details': {} } - for build in program_data_tables: - cursor.execute(""" - SELECT md.sample_barcode as sb, md.* - FROM {} md - WHERE md.sample_barcode IN ({}) AND NOT(md.sample_barcode = '') AND md.sample_barcode IS NOT NULL - """.format(build.data_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))), - barcodes_by_program[program.name]) - - fields = cursor.description - for row in cursor.fetchall(): - if not build.build in items[row[0]]['data_details']: - items[row[0]]['data_details'][build.build] = [] - items[row[0]]['data_details'][build.build].append( - {fields[index][0]: column for index, column in enumerate(row) if fields[index][0] not in skip} - ) + if len(list(items.keys())): + for build in program_data_tables: + cursor.execute(""" + SELECT md.sample_barcode as sb, md.* + FROM {} md + WHERE md.sample_barcode IN ({}) AND NOT(md.sample_barcode = '') AND md.sample_barcode IS NOT NULL + """.format(build.data_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))), + barcodes_by_program[program.name]) + + fields = cursor.description + for row in cursor.fetchall(): + if not build.build in items[row[0]]['data_details']: + items[row[0]]['data_details'][build.build] = [] + items[row[0]]['data_details'][build.build].append( + {fields[index][0]: column for index, column in enumerate(row) if fields[index][0] not in skip} + ) - # TODO: Once we have aliquots in the database again, add those here + # TODO: Once we have aliquots in the database again, add those here - result['total_found'] += 1 - result['samples'] = [item for item in list(items.values())] + result['total_found'] += 1 + result['samples'] = [item for item in list(items.values())] return result @@ -1460,6 +1461,7 @@ def get_full_sample_metadata(barcodes): logger.error("[ERROR] While fetching sample metadata for {}:".format(barcode)) logger.exception(e) finally: + logger.info("[STATUS] Closing connection in sample metadata.") if cursor: cursor.close() if db and db.open: db.close() From ca5a4dc3157efb7799493dbfaeeacc871137c46d Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Fri, 21 Jun 2019 11:15:46 -0700 Subject: [PATCH 08/19] -> Same, but for cases. --- cohorts/metadata_helpers.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py index 2a846acf..45af0f89 100644 --- a/cohorts/metadata_helpers.py +++ b/cohorts/metadata_helpers.py @@ -1526,26 +1526,27 @@ def get_full_case_metadata(barcodes): for row in cursor.fetchall(): items[row[0]]['samples'].append(row[1]) - for build in program_data_tables: - cursor.execute(""" - SELECT md.case_barcode as cb, md.* - FROM {} md - WHERE md.case_barcode IN ({}) AND (md.sample_barcode = '' OR md.sample_barcode IS NULL) - """.format(build.data_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))), - barcodes_by_program[program.name]) - - fields = cursor.description - for row in cursor.fetchall(): - if not build.build in items[row[0]]['data_details']: - items[row[0]]['data_details'][build.build] = [] - items[row[0]]['data_details'][build.build].append( - {fields[index][0]: column for index, column in enumerate(row) if fields[index][0] not in skip} - ) + if len(list(items.keys())): + for build in program_data_tables: + cursor.execute(""" + SELECT md.case_barcode as cb, md.* + FROM {} md + WHERE md.case_barcode IN ({}) AND (md.sample_barcode = '' OR md.sample_barcode IS NULL) + """.format(build.data_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))), + barcodes_by_program[program.name]) + + fields = cursor.description + for row in cursor.fetchall(): + if not build.build in items[row[0]]['data_details']: + items[row[0]]['data_details'][build.build] = [] + items[row[0]]['data_details'][build.build].append( + {fields[index][0]: column for index, column in enumerate(row) if fields[index][0] not in skip} + ) - # TODO: Once we have aliquots in the database again, add those here + # TODO: Once we have aliquots in the database again, add those here - result['total_found'] += 1 - result['cases'] = [item for item in list(items.values())] + result['total_found'] += 1 + result['cases'] = [item for item in list(items.values())] return result From 9d7eec357672485cf67e7dce690c232965477fc8 Mon Sep 17 00:00:00 2001 From: wlongabaugh Date: Fri, 21 Jun 2019 14:46:55 -0700 Subject: [PATCH 09/19] Logging to trace DCF issue PXP-3304 --- accounts/dcf_support.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/accounts/dcf_support.py b/accounts/dcf_support.py index 5fdfe1e7..abcb0a78 100755 --- a/accounts/dcf_support.py +++ b/accounts/dcf_support.py @@ -741,7 +741,7 @@ def verify_sa_at_dcf(user_id, gcp_id, service_account_id, datasets, phs_map, sa_ try: # DCF requires this to be in the header. OAuth2 library glues this onto the auth header stuff: headers = {'Content-Type': 'application/json'} - + logger.info("[INFO] DCF verification request: {} {}".format(json_dumps(sa_data, service_account_id))) resp = _dcf_call(full_url, user_id, mode=use_mode, post_body=json_dumps(sa_data), headers=headers) except (TokenFailure, InternalTokenError, RefreshTokenExpired, DCFCommFailure) as e: logger.error("[ERROR] Attempt to contact DCF for SA verification failed (user {})".format(user_id)) From 6f1d31e278aa590fbeb43599049ad1e1331a184b Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 24 Jun 2019 14:29:45 -0700 Subject: [PATCH 10/19] -> Added in some new BQ helper methods: wait and fetch, filling gap between insert and insert-and-fetch; batch insert, wait, and fetch -> Swapped full case metadata to BQ --- cohorts/metadata_helpers.py | 103 ++++++++++++++++---------- google_helpers/bigquery/bq_support.py | 78 ++++++++++++++++--- 2 files changed, 130 insertions(+), 51 deletions(-) diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py index 45af0f89..89a9c962 100644 --- a/cohorts/metadata_helpers.py +++ b/cohorts/metadata_helpers.py @@ -1486,62 +1486,84 @@ def get_full_case_metadata(barcodes): else: prog = 'CCLE' if prog not in barcodes_by_program: - barcodes_by_program[prog] = () - barcodes_by_program[prog] += (barcode,) + barcodes_by_program[prog] = [] + barcodes_by_program[prog].append(barcode) programs = Program.objects.filter(name__in=list(barcodes_by_program.keys()), active=True, is_public=True) items = {} - db = get_sql_connection() - cursor = db.cursor() - for program in programs: program_tables = program.get_metadata_tables() program_data_tables = program.get_data_tables() + + bq_search = BigQuerySupport.build_bq_filter_and_params({'case_barcode': barcodes_by_program[program.name]}) - cursor.execute(""" + case_job = BigQuerySupport.insert_query_job(""" SELECT clin.case_barcode as cb, clin.* - FROM {} clin - WHERE clin.case_barcode IN ({}) AND clin.endpoint_type = 'current' - """.format(program_tables.clin_table, ",".join(["%s"]*(len(barcodes_by_program[program.name])))), barcodes_by_program[program.name]) + FROM `{}` clin + WHERE {} + """.format("{}.{}.{}".format( + settings.BIGQUERY_DATA_PROJECT_ID, program_tables.bq_dataset, program_tables.clin_bq_table), + bq_search['filter_str']), bq_search['parameters']) - fields = cursor.description - skip = ['endpoint_type', 'metadata_clinical_id', 'metadata_biospecimen_id', 'cb'] + bq_results = BigQuerySupport.wait_for_done_and_get_results(case_job['jobReference']) + result_schema = BigQuerySupport.get_result_schema(case_job['jobReference']) - for row in cursor.fetchall(): - items[row[0]] = { - 'case_barcode': row[0], - 'clinical_data': {fields[index][0]: column for index, column in enumerate(row) if fields[index][0] not in skip}, + skip = ['endpoint_type', 'metadata_clinical_id', 'metadata_biospecimen_id', 'cb', 'summary_file_count'] + + for row in bq_results: + items[row['f'][0]['v']] = { + 'case_barcode': row['f'][0]['v'], 'samples': [], - 'data_details': {} + 'data_details': {}, + 'clinlical_data': {result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip} } - cursor.execute(""" - SELECT case_barcode, sample_barcode - FROM {} - WHERE case_barcode IN ({}) AND endpoint_type = 'current' - """.format(program_tables.biospec_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))), barcodes_by_program[program.name]) - - for row in cursor.fetchall(): - items[row[0]]['samples'].append(row[1]) - if len(list(items.keys())): - for build in program_data_tables: - cursor.execute(""" - SELECT md.case_barcode as cb, md.* - FROM {} md - WHERE md.case_barcode IN ({}) AND (md.sample_barcode = '' OR md.sample_barcode IS NULL) - """.format(build.data_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))), - barcodes_by_program[program.name]) - - fields = cursor.description - for row in cursor.fetchall(): - if not build.build in items[row[0]]['data_details']: - items[row[0]]['data_details'][build.build] = [] - items[row[0]]['data_details'][build.build].append( - {fields[index][0]: column for index, column in enumerate(row) if fields[index][0] not in skip} - ) + queries = [] + + for build_table in program_data_tables: + queries.append({ + 'query': """ + SELECT md.case_barcode as cb, md.* + FROM `{}` md + WHERE {} AND (md.sample_barcode = '' OR md.sample_barcode IS NULL) + """.format( + "{}.{}.{}".format( + settings.BIGQUERY_DATA_PROJECT_ID, program_data_tables.bq_dataset, program_data_tables.data_table), + bq_search['filter_str']), + 'parameters': bq_search['parameters'], + 'query_type': 'data_details', + 'build': build_table.build + }) + + queries.append({ + 'query': """ + SELECT case_barcode, sample_barcode + FROM {} + WHERE {} + """.format("{}.{}.{}".format( + settings.BIGQUERY_DATA_PROJECT_ID, program_tables.bq_dataset, program_tables.biospec_bq_table, + bq_search['filter_str'])), + 'parameters': bq_search['parameters'], + 'query_type': 'samples' + }) + + results = BigQuerySupport.insert_job_batch_and_get_results(queries) + + for result in results: + bq_results = result['bq_results'] + if result['query_type'] == 'samples': + for row in bq_results: + items[row['f'][0]['v']]['samples'].append(row['f'][1]['v']) + else: + for row in bq_results: + if 'data_details' not in items[row['f'][0]['v']] or result['build'] not in items[row['f'][0]['v']]['data_details']: + items[row['f'][0]['v']]['data_details'][result['build']] = [] + items[row['f'][0]['v']]['data_details'][result['build']].append({ + result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip + }) # TODO: Once we have aliquots in the database again, add those here @@ -1554,6 +1576,7 @@ def get_full_case_metadata(barcodes): logger.error("[ERROR] While fetching sample metadata for {}:".format(barcode)) logger.exception(e) finally: + logger.info("[STATUS] Closing connection in case metadata.") if cursor: cursor.close() if db and db.open: db.close() diff --git a/google_helpers/bigquery/bq_support.py b/google_helpers/bigquery/bq_support.py index 3534f125..3995775e 100644 --- a/google_helpers/bigquery/bq_support.py +++ b/google_helpers/bigquery/bq_support.py @@ -302,16 +302,7 @@ def execute_query(self, query, parameters=None, write_disposition='WRITE_EMPTY', 'total_bytes_processed': query_job['statistics']['query']['totalBytesProcessed'] } - job_is_done = self.bq_service.jobs().get(projectId=self.executing_project, - jobId=job_id).execute(num_retries=5) - - retries = 0 - - while (job_is_done and not job_is_done['status']['state'] == 'DONE') and retries < BQ_ATTEMPT_MAX: - retries += 1 - sleep(1) - job_is_done = self.bq_service.jobs().get(projectId=self.executing_project, - jobId=job_id).execute(num_retries=5) + job_is_done = self.await_job_is_done(query_job) # Parse the final disposition if job_is_done and job_is_done['status']['state'] == 'DONE': @@ -333,6 +324,20 @@ def execute_query(self, query, parameters=None, write_disposition='WRITE_EMPTY', return query_results + # Check for a job's status for the maximum number of attempts, return the final resulting response + def await_job_is_done(self, query_job): + done = self.job_is_done(query_job) + retries = 0 + + while not done and retries < BQ_ATTEMPT_MAX: + retries += 1 + sleep(1) + done = self.job_is_done(query_job) + + return self.bq_service.jobs().get( + projectId=self.executing_project, jobId=query_job['jobReference']['jobId'] + ).execute(num_retries=5) + # Check to see if query job is done def job_is_done(self, query_job): job_is_done = self.bq_service.jobs().get(projectId=self.executing_project, @@ -404,12 +409,20 @@ def estimate_query_cost(cls, query, parameters=None): bqs = cls(None, None, None) return bqs.execute_query(query, parameters, cost_est=True) - # Given a BQ service and a job reference, fetch out the results + # Given a job reference, fetch out the results @classmethod def get_job_results(cls, job_reference): bqs = cls(None, None, None) return bqs.fetch_job_results(job_reference) + # Given a job reference for a running job, await the completion, + # then fetch and return the results + @classmethod + def wait_for_done_and_get_results(cls, job_reference): + bqs = cls(None, None, None) + check_done = bqs.await_job_is_done(job_reference) + return bqs.fetch_job_results(check_done['jobReference']) + # Given a BQ service and a job reference, fetch out the results @classmethod def get_job_resource(cls, job_id, project_id): @@ -430,6 +443,49 @@ def get_table_schema(cls, projectId, datasetId, tableId): return [{'name': x['name'], 'type': x['type']} for x in table['schema']['fields']] + @classmethod + def get_result_schema(cls, job_ref): + bqs = cls(None, None, None) + results = bqs.bq_service.jobs.getQueryResults(**job_ref).execute(num_retries=5) + + return results['schema'] + + # Method for submitting a group of jobs and awaiting the results of the whole set + @classmethod + def insert_job_batch_and_get_results(cls, query_set): + bqs = cls(None, None, None) + submitted_job_set = {} + for query in query_set: + job_obj = bqs.insert_bq_query_job(query['query'],query['parameters']) + query['job_id'] = job_obj['jobReference']['jobId'] + submitted_job_set[job_obj['jobReference']['jobId']] = job_obj + + not_done = True + still_checking = True + num_retries = 0 + + while still_checking and not_done: + not_done = False + for job in submitted_job_set: + if not BigQuerySupport.check_job_is_done(submitted_job_set[job]['jobReference']): + not_done = True + if not_done: + sleep(1) + num_retries += 1 + still_checking = (num_retries < settings.BQ_MAX_ATTEMPTS) + + if not_done: + logger.warn("[WARNING] Not all of the queries completed!") + + for query in query_set: + if bqs.job_is_done(submitted_job_set[query['job_id']]['jobReference']): + query['bq_results'] = bqs.fetch_job_results(submitted_job_set[query['job_id']]['jobReference']) + query['result_schema'] = BigQuerySupport.get_result_schema(query['job_id']['jobReference']) + else: + query['bq_results'] = None + + return query_set + # Builds a BQ API v2 QueryParameter set and WHERE clause string from a set of filters of the form: # { # 'field_name': [,...] From 3647048a5166cf122e113ca7ba890cead859b3c8 Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 24 Jun 2019 19:33:02 -0700 Subject: [PATCH 11/19] -> Added in some new BQ helper methods: wait and fetch, filling gap between insert and insert-and-fetch; batch insert, wait, and fetch -> Swapped full case metadata to BQ --- cohorts/metadata_helpers.py | 121 ++++++++++++++++---------- google_helpers/bigquery/bq_support.py | 19 ++-- projects/models.py | 3 +- 3 files changed, 90 insertions(+), 53 deletions(-) diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py index 89a9c962..8e8f0055 100644 --- a/cohorts/metadata_helpers.py +++ b/cohorts/metadata_helpers.py @@ -1400,8 +1400,8 @@ def get_full_sample_metadata(barcodes): else: prog = 'CCLE' if prog not in barcodes_by_program: - barcodes_by_program[prog] = () - barcodes_by_program[prog] += (barcode,) + barcodes_by_program[prog] = [] + barcodes_by_program[prog].append(barcode) programs = Program.objects.filter(name__in=list(barcodes_by_program.keys()), active=True, is_public=True) @@ -1414,41 +1414,67 @@ def get_full_sample_metadata(barcodes): program_tables = program.get_metadata_tables() program_data_tables = program.get_data_tables() - cursor.execute(""" + search_clause = BigQuerySupport.build_bq_filter_and_params({'sample_barcode': barcodes_by_program[program.name]}) + + sample_job = BigQuerySupport.insert_query_job(""" SELECT biospec.sample_barcode as sb, biospec.case_barcode as cb, biospec.* - FROM {} biospec - WHERE biospec.sample_barcode IN ({}) AND biospec.endpoint_type = 'current' - """.format(program_tables.biospec_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))), - barcodes_by_program[program.name]) + FROM `{}` biospec + WHERE {} + """.format( + "{}.{}.{}".format(settings.BIGQUERY_DATA_PROJECT_ID, program_tables.bq_dataset, program_tables.biospec_bq_table,), + search_clause['filter_string'] + ), search_clause['parameters']) - fields = cursor.description - skip = ['endpoint_type', 'metadata_clinical_id', 'metadata_biospecimen_id', 'sb', 'cb'] + bq_results = BigQuerySupport.wait_for_done_and_get_results(sample_job) + result_schema = BigQuerySupport.get_result_schema(sample_job['jobReference']) - for row in cursor.fetchall(): - items[row[0]] = { - 'sample_barcode': row[0], - 'case_barcode': row[1], - 'biospecimen_data': {fields[index][0]: column for index, column in enumerate(row) if - fields[index][0] not in skip}, - 'data_details': {} + skip = ['endpoint_type', 'metadata_clinical_id', 'metadata_biospecimen_id', 'sb', 'cb', 'case_barcode'] + + for row in bq_results: + items[row['f'][0]['v']] = { + 'sample_barcode': row['f'][0]['v'], + 'case_barcode': row['f'][1]['v'], + 'data_details': { + x.build: 'NONE_FOUND' for x in program_data_tables + }, + 'biospecimen_data': {result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip} } if len(list(items.keys())): - for build in program_data_tables: - cursor.execute(""" - SELECT md.sample_barcode as sb, md.* - FROM {} md - WHERE md.sample_barcode IN ({}) AND NOT(md.sample_barcode = '') AND md.sample_barcode IS NOT NULL - """.format(build.data_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))), - barcodes_by_program[program.name]) - - fields = cursor.description - for row in cursor.fetchall(): - if not build.build in items[row[0]]['data_details']: - items[row[0]]['data_details'][build.build] = [] - items[row[0]]['data_details'][build.build].append( - {fields[index][0]: column for index, column in enumerate(row) if fields[index][0] not in skip} - ) + queries = [] + + for build_table in program_data_tables: + logger.info(str(build_table)) + queries.append({ + 'query': """ + #standardSQL + SELECT md.sample_barcode as sb, md.* + FROM `{}` md + WHERE {} AND NOT(md.sample_barcode = '') AND md.sample_barcode IS NOT NULL + """.format( + "{}.{}.{}".format( + settings.BIGQUERY_DATA_PROJECT_ID, build_table.bq_dataset, + build_table.data_table.lower()), + search_clause['filter_string']), + 'parameters': search_clause['parameters'], + 'build': build_table.build + }) + + results = BigQuerySupport.insert_job_batch_and_get_results(queries) + + for bq_result in results: + result_schema = bq_result['result_schema'] + bq_results = bq_result['bq_results'] + if not bq_results or not result_schema: + logger.warn("[WARNING] Results not received for this query:") + logger.warn("{}".format(bq_result['query'])) + continue + for row in bq_results: + if items[row['f'][0]['v']]['data_details'][bq_result['build']] == 'NONE_FOUND': + items[row['f'][0]['v']]['data_details'][bq_result['build']] = [] + items[row['f'][0]['v']]['data_details'][bq_result['build']].append({ + result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip + }) # TODO: Once we have aliquots in the database again, add those here @@ -1500,14 +1526,15 @@ def get_full_case_metadata(barcodes): bq_search = BigQuerySupport.build_bq_filter_and_params({'case_barcode': barcodes_by_program[program.name]}) case_job = BigQuerySupport.insert_query_job(""" + #standardSQL SELECT clin.case_barcode as cb, clin.* FROM `{}` clin WHERE {} """.format("{}.{}.{}".format( settings.BIGQUERY_DATA_PROJECT_ID, program_tables.bq_dataset, program_tables.clin_bq_table), - bq_search['filter_str']), bq_search['parameters']) + bq_search['filter_string']), bq_search['parameters']) - bq_results = BigQuerySupport.wait_for_done_and_get_results(case_job['jobReference']) + bq_results = BigQuerySupport.wait_for_done_and_get_results(case_job) result_schema = BigQuerySupport.get_result_schema(case_job['jobReference']) skip = ['endpoint_type', 'metadata_clinical_id', 'metadata_biospecimen_id', 'cb', 'summary_file_count'] @@ -1516,7 +1543,9 @@ def get_full_case_metadata(barcodes): items[row['f'][0]['v']] = { 'case_barcode': row['f'][0]['v'], 'samples': [], - 'data_details': {}, + 'data_details': { + x.build: 'NONE_FOUND' for x in program_data_tables + }, 'clinlical_data': {result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip} } @@ -1524,15 +1553,17 @@ def get_full_case_metadata(barcodes): queries = [] for build_table in program_data_tables: + logger.info(str(build_table)) queries.append({ 'query': """ + #standardSQL SELECT md.case_barcode as cb, md.* FROM `{}` md WHERE {} AND (md.sample_barcode = '' OR md.sample_barcode IS NULL) """.format( "{}.{}.{}".format( - settings.BIGQUERY_DATA_PROJECT_ID, program_data_tables.bq_dataset, program_data_tables.data_table), - bq_search['filter_str']), + settings.BIGQUERY_DATA_PROJECT_ID, build_table.bq_dataset, build_table.data_table.lower()), + bq_search['filter_string']), 'parameters': bq_search['parameters'], 'query_type': 'data_details', 'build': build_table.build @@ -1540,28 +1571,30 @@ def get_full_case_metadata(barcodes): queries.append({ 'query': """ + #standardSQL SELECT case_barcode, sample_barcode - FROM {} + FROM `{}` WHERE {} """.format("{}.{}.{}".format( settings.BIGQUERY_DATA_PROJECT_ID, program_tables.bq_dataset, program_tables.biospec_bq_table, - bq_search['filter_str'])), + ), bq_search['filter_string']), 'parameters': bq_search['parameters'], 'query_type': 'samples' }) results = BigQuerySupport.insert_job_batch_and_get_results(queries) - for result in results: - bq_results = result['bq_results'] - if result['query_type'] == 'samples': + for bq_result in results: + result_schema = bq_result['result_schema'] + bq_results = bq_result['bq_results'] + if bq_result['query_type'] == 'samples': for row in bq_results: items[row['f'][0]['v']]['samples'].append(row['f'][1]['v']) else: for row in bq_results: - if 'data_details' not in items[row['f'][0]['v']] or result['build'] not in items[row['f'][0]['v']]['data_details']: - items[row['f'][0]['v']]['data_details'][result['build']] = [] - items[row['f'][0]['v']]['data_details'][result['build']].append({ + if items[row['f'][0]['v']]['data_details'][bq_result['build']] == 'NONE_FOUND': + items[row['f'][0]['v']]['data_details'][bq_result['build']] = [] + items[row['f'][0]['v']]['data_details'][bq_result['build']].append({ result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip }) diff --git a/google_helpers/bigquery/bq_support.py b/google_helpers/bigquery/bq_support.py index 3995775e..51826263 100644 --- a/google_helpers/bigquery/bq_support.py +++ b/google_helpers/bigquery/bq_support.py @@ -347,6 +347,7 @@ def job_is_done(self, query_job): # Fetch the results of a job based on the reference provided def fetch_job_results(self, job_ref): + logger.info(str(job_ref)) result = [] page_token = None @@ -399,9 +400,9 @@ def insert_query_job(cls, query, parameters=None): # Check the status of a BQ job @classmethod - def check_job_is_done(cls, job_ref): + def check_job_is_done(cls, query_job): bqs = cls(None, None, None) - return bqs.job_is_done(job_ref) + return bqs.job_is_done(query_job) # Do a 'dry run' query, which estimates the cost @classmethod @@ -418,9 +419,9 @@ def get_job_results(cls, job_reference): # Given a job reference for a running job, await the completion, # then fetch and return the results @classmethod - def wait_for_done_and_get_results(cls, job_reference): + def wait_for_done_and_get_results(cls, query_job): bqs = cls(None, None, None) - check_done = bqs.await_job_is_done(job_reference) + check_done = bqs.await_job_is_done(query_job) return bqs.fetch_job_results(check_done['jobReference']) # Given a BQ service and a job reference, fetch out the results @@ -446,13 +447,14 @@ def get_table_schema(cls, projectId, datasetId, tableId): @classmethod def get_result_schema(cls, job_ref): bqs = cls(None, None, None) - results = bqs.bq_service.jobs.getQueryResults(**job_ref).execute(num_retries=5) + results = bqs.bq_service.jobs().getQueryResults(**job_ref).execute(num_retries=5) return results['schema'] # Method for submitting a group of jobs and awaiting the results of the whole set @classmethod def insert_job_batch_and_get_results(cls, query_set): + logger.info(str(query_set)) bqs = cls(None, None, None) submitted_job_set = {} for query in query_set: @@ -467,7 +469,7 @@ def insert_job_batch_and_get_results(cls, query_set): while still_checking and not_done: not_done = False for job in submitted_job_set: - if not BigQuerySupport.check_job_is_done(submitted_job_set[job]['jobReference']): + if not BigQuerySupport.check_job_is_done(submitted_job_set[job]): not_done = True if not_done: sleep(1) @@ -478,11 +480,12 @@ def insert_job_batch_and_get_results(cls, query_set): logger.warn("[WARNING] Not all of the queries completed!") for query in query_set: - if bqs.job_is_done(submitted_job_set[query['job_id']]['jobReference']): + if bqs.job_is_done(submitted_job_set[query['job_id']]): query['bq_results'] = bqs.fetch_job_results(submitted_job_set[query['job_id']]['jobReference']) - query['result_schema'] = BigQuerySupport.get_result_schema(query['job_id']['jobReference']) + query['result_schema'] = BigQuerySupport.get_result_schema(submitted_job_set[query['job_id']]['jobReference']) else: query['bq_results'] = None + query['result_schema'] = None return query_set diff --git a/projects/models.py b/projects/models.py index cfac5081..3e7e496f 100644 --- a/projects/models.py +++ b/projects/models.py @@ -231,7 +231,8 @@ class Meta(object): verbose_name_plural = "Public Data Tables" def __str__(self): - return self.program__name + " " + self.build + " Data Tables" + return "{} [{}] Data Tables".format(self.program.name,self.build) + class Public_Annotation_Tables(models.Model): program = models.ForeignKey(Program, null=False) From 36633b14d1b6ca739ba06b9cfdb03d526d66e0b6 Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 24 Jun 2019 19:33:59 -0700 Subject: [PATCH 12/19] -> Default to empty data details --- cohorts/metadata_helpers.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py index 8e8f0055..d99eda22 100644 --- a/cohorts/metadata_helpers.py +++ b/cohorts/metadata_helpers.py @@ -1435,7 +1435,7 @@ def get_full_sample_metadata(barcodes): 'sample_barcode': row['f'][0]['v'], 'case_barcode': row['f'][1]['v'], 'data_details': { - x.build: 'NONE_FOUND' for x in program_data_tables + x.build: [] for x in program_data_tables }, 'biospecimen_data': {result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip} } @@ -1470,8 +1470,6 @@ def get_full_sample_metadata(barcodes): logger.warn("{}".format(bq_result['query'])) continue for row in bq_results: - if items[row['f'][0]['v']]['data_details'][bq_result['build']] == 'NONE_FOUND': - items[row['f'][0]['v']]['data_details'][bq_result['build']] = [] items[row['f'][0]['v']]['data_details'][bq_result['build']].append({ result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip }) @@ -1544,7 +1542,7 @@ def get_full_case_metadata(barcodes): 'case_barcode': row['f'][0]['v'], 'samples': [], 'data_details': { - x.build: 'NONE_FOUND' for x in program_data_tables + x.build: [] for x in program_data_tables }, 'clinlical_data': {result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip} } @@ -1592,8 +1590,6 @@ def get_full_case_metadata(barcodes): items[row['f'][0]['v']]['samples'].append(row['f'][1]['v']) else: for row in bq_results: - if items[row['f'][0]['v']]['data_details'][bq_result['build']] == 'NONE_FOUND': - items[row['f'][0]['v']]['data_details'][bq_result['build']] = [] items[row['f'][0]['v']]['data_details'][bq_result['build']].append({ result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip }) From 4b97307bbeb5dea1ebaedfa070e9415273ab0e4c Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 24 Jun 2019 20:02:00 -0700 Subject: [PATCH 13/19] -> No more db calls in those methods --- cohorts/metadata_helpers.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py index d99eda22..66b87ff8 100644 --- a/cohorts/metadata_helpers.py +++ b/cohorts/metadata_helpers.py @@ -1385,8 +1385,6 @@ def get_full_sample_metadata(barcodes): result = { 'total_found': 0 } - db = None - cursor = None try: barcodes_by_program = {} @@ -1484,10 +1482,6 @@ def get_full_sample_metadata(barcodes): except Exception as e: logger.error("[ERROR] While fetching sample metadata for {}:".format(barcode)) logger.exception(e) - finally: - logger.info("[STATUS] Closing connection in sample metadata.") - if cursor: cursor.close() - if db and db.open: db.close() def get_full_case_metadata(barcodes): @@ -1495,8 +1489,6 @@ def get_full_case_metadata(barcodes): result = { 'total_found': 0 } - db = None - cursor = None try: barcodes_by_program = {} @@ -1604,10 +1596,6 @@ def get_full_case_metadata(barcodes): except Exception as e: logger.error("[ERROR] While fetching sample metadata for {}:".format(barcode)) logger.exception(e) - finally: - logger.info("[STATUS] Closing connection in case metadata.") - if cursor: cursor.close() - if db and db.open: db.close() def get_sample_metadata(barcode): From ba71f964588872e73699b0a66fa393bec87c69c9 Mon Sep 17 00:00:00 2001 From: wlongabaugh Date: Mon, 8 Jul 2019 12:17:57 -0700 Subject: [PATCH 14/19] Logging had bad parentheses --- accounts/dcf_support.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/accounts/dcf_support.py b/accounts/dcf_support.py index abcb0a78..63bbc6e0 100755 --- a/accounts/dcf_support.py +++ b/accounts/dcf_support.py @@ -741,7 +741,7 @@ def verify_sa_at_dcf(user_id, gcp_id, service_account_id, datasets, phs_map, sa_ try: # DCF requires this to be in the header. OAuth2 library glues this onto the auth header stuff: headers = {'Content-Type': 'application/json'} - logger.info("[INFO] DCF verification request: {} {}".format(json_dumps(sa_data, service_account_id))) + logger.info("[INFO] DCF verification request: {} {}".format(json_dumps(sa_data), service_account_id)) resp = _dcf_call(full_url, user_id, mode=use_mode, post_body=json_dumps(sa_data), headers=headers) except (TokenFailure, InternalTokenError, RefreshTokenExpired, DCFCommFailure) as e: logger.error("[ERROR] Attempt to contact DCF for SA verification failed (user {})".format(user_id)) From bacd3434cb9e317e35913ab0f4f31526299ee2bd Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 15 Jul 2019 13:17:41 -0700 Subject: [PATCH 15/19] -> Rename 'desc' to description which is more consistent --- cohorts/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cohorts/utils.py b/cohorts/utils.py index 905ad1c7..9dd90e29 100644 --- a/cohorts/utils.py +++ b/cohorts/utils.py @@ -31,9 +31,9 @@ from django.conf import settings -def create_cohort(user, filters=None, name=None, desc=None, source_id=None): +def create_cohort(user, filters=None, name=None, description=None, source_id=None): - if not filters and not name and not desc: + if not filters and not name and not description: # Can't save/edit a cohort when nothing is being changed! return None @@ -46,7 +46,7 @@ def create_cohort(user, filters=None, name=None, desc=None, source_id=None): if source and not filters or (len(filters) <= 0): # If we're only changing the name and/or desc, just edit the cohort and update it - source.update(name=name, description=desc) + source.update(name=name, description=description) return { 'cohort_id': source.id } # Make and save cohort @@ -75,7 +75,7 @@ def create_cohort(user, filters=None, name=None, desc=None, source_id=None): } # Create new cohort - cohort = Cohort.objects.create(name=name, description=desc) + cohort = Cohort.objects.create(name=name, description=description) cohort.save() # Set permission for user to be owner From 1512ab5250fe9be17c117c2c40d01f060aec08c7 Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 15 Jul 2019 13:45:42 -0700 Subject: [PATCH 16/19] -> Update is for a QuerySet, not a single object... --- cohorts/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cohorts/utils.py b/cohorts/utils.py index 9dd90e29..1e04e591 100644 --- a/cohorts/utils.py +++ b/cohorts/utils.py @@ -41,7 +41,7 @@ def create_cohort(user, filters=None, name=None, description=None, source_id=Non source_progs = None if source_id: - source = Cohort.objects.filter(id=source_id).first() + source = Cohort.objects.filter(id=source_id) source_progs = source.get_programs() if source and not filters or (len(filters) <= 0): From ab36588825fa51dd9f121895401697dc121c8163 Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 15 Jul 2019 13:49:18 -0700 Subject: [PATCH 17/19] -> ...but we need a single object for OTHER things. --- cohorts/utils.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cohorts/utils.py b/cohorts/utils.py index 1e04e591..58f5fa67 100644 --- a/cohorts/utils.py +++ b/cohorts/utils.py @@ -41,13 +41,14 @@ def create_cohort(user, filters=None, name=None, description=None, source_id=Non source_progs = None if source_id: - source = Cohort.objects.filter(id=source_id) - source_progs = source.get_programs() - - if source and not filters or (len(filters) <= 0): - # If we're only changing the name and/or desc, just edit the cohort and update it - source.update(name=name, description=description) - return { 'cohort_id': source.id } + if not filters or (len(filters) <= 0): + source = Cohort.objects.filter(id=source_id).first() + # If we're only changing the name and/or desc, just edit the cohort and update it + source.update(name=name, description=description) + return { 'cohort_id': source.id } + else: + source = Cohort.objects.filter(id=source_id).first() + source_progs = source.get_programs() # Make and save cohort From 1f30ea6737ae60de155ddb6467462df9959c765e Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 15 Jul 2019 13:52:37 -0700 Subject: [PATCH 18/19] -> Apply Occam's razor --- cohorts/utils.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/cohorts/utils.py b/cohorts/utils.py index 58f5fa67..1b593948 100644 --- a/cohorts/utils.py +++ b/cohorts/utils.py @@ -41,14 +41,17 @@ def create_cohort(user, filters=None, name=None, description=None, source_id=Non source_progs = None if source_id: - if not filters or (len(filters) <= 0): - source = Cohort.objects.filter(id=source_id).first() - # If we're only changing the name and/or desc, just edit the cohort and update it - source.update(name=name, description=description) - return { 'cohort_id': source.id } - else: - source = Cohort.objects.filter(id=source_id).first() - source_progs = source.get_programs() + source = Cohort.objects.filter(id=source_id).first() + source_progs = source.get_programs() + + if source and not filters or (len(filters) <= 0): + # If we're only changing the name and/or desc, just edit the cohort and return + if name: + source.name = name + if description: + source.description = description + source.save() + return { 'cohort_id': source.id } # Make and save cohort From 6222240eaf633a5d2ca9c5ad51c987f01bccb078 Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Mon, 15 Jul 2019 14:50:01 -0700 Subject: [PATCH 19/19] -> Require any string filter to have a value --- cohorts/metadata_helpers.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py index 66b87ff8..62443f5e 100644 --- a/cohorts/metadata_helpers.py +++ b/cohorts/metadata_helpers.py @@ -1739,7 +1739,15 @@ def get_sample_case_list_bq(cohort_id=None, inc_filters=None, comb_mut_filters=' if key_field_type not in field_types: invalid_keys.append(key_split) else: - filters[field_types[key_field_type]['type']][key_field] = inc_filters[prog][key_split] + # Check to make sure any string values aren't empty strings - if they are, it's invalid. + vals = inc_filters[prog][key_split] + if not isinstance(vals, list): + vals = [inc_filters[prog][key_split]] + for val in vals: + if isinstance(val, str) and not len(val): + invalid_keys.append(key_split) + else: + filters[field_types[key_field_type]['type']][key_field] = inc_filters[prog][key_split] if len(invalid_keys) > 0: raise Exception("Improper filter(s) supplied for program {}: '{}'".format(prog, ("', '".join(invalid_keys))))