From edc85bf3c3acdffd71dd0edbe6bc0be31b38a7d8 Mon Sep 17 00:00:00 2001
From: wlongabaugh <wlongabaugh@systemsbiology.org>
Date: Wed, 17 Apr 2019 12:15:20 -0700
Subject: [PATCH 01/19] Adding signed URL capabilities

---
 accounts/dcf_support.py | 31 ++++++++++++++++++++++++++++++-
 accounts/dcf_views.py   |  2 +-
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/accounts/dcf_support.py b/accounts/dcf_support.py
index b6a04003..86b7ea0b 100755
--- a/accounts/dcf_support.py
+++ b/accounts/dcf_support.py
@@ -35,6 +35,7 @@
 DCF_GOOGLE_SA_VERIFY_URL = settings.DCF_GOOGLE_SA_VERIFY_URL
 DCF_GOOGLE_SA_MONITOR_URL = settings.DCF_GOOGLE_SA_MONITOR_URL
 DCF_GOOGLE_SA_URL = settings.DCF_GOOGLE_SA_URL
+DCF_URL_URL = settings.DCF_URL_URL
 
 class DCFCommFailure(Exception):
     """Thrown if we have problems communicating with DCF """
@@ -678,6 +679,34 @@ def _write_dataset_summary(dataset_info, dataset_id, phs_map):
     return is_ok, combo_msg
 
 
+def get_signed_url_from_dcf(user_id, file_uuid):
+    """
+    :raise TokenFailure:
+    :raise InternalTokenError:
+    :raise DCFCommFailure:
+    :raise RefreshTokenExpired:
+    """
+    #
+    # Get a signed URL for a file ID.
+    #
+
+    try:
+        resp = _dcf_call('{}/{}'.format(DCF_URL_URL, file_uuid), user_id)
+    except (TokenFailure, InternalTokenError, RefreshTokenExpired, DCFCommFailure) as e:
+        logger.error("[ERROR] Attempt to contact DCF for signed URL failed (user {})".format(user_id))
+        raise e
+    except Exception as e:
+        logger.error("[ERROR] Attempt to contact DCF for signed URL failed (user {})".format(user_id))
+        raise e
+
+    result = {
+        'uri': resp.text,
+        'code': resp.status_code
+    }
+
+    return result
+
+
 def verify_sa_at_dcf(user_id, gcp_id, service_account_id, datasets, phs_map, sa_in_use):
     """
     :raise TokenFailure:
@@ -1143,7 +1172,7 @@ def refresh_at_dcf(user_id):
     resp = None
 
     #
-    # Call DCF to drop the linkage. Note that this will immediately remove them from controlled access.
+    # Call DCF to refresh the linkage.
     #
 
     try:
diff --git a/accounts/dcf_views.py b/accounts/dcf_views.py
index 07e3857d..fffa92d5 100755
--- a/accounts/dcf_views.py
+++ b/accounts/dcf_views.py
@@ -79,7 +79,7 @@ def oauth2_login(request):
 
         # Found that 'user' scope had to be included to be able to do the user query on callback, and the data scope
         # to do data queries. Starting to recognize a pattern here...
-        oauth = OAuth2Session(client_id, redirect_uri=full_callback, scope=['openid', 'user', 'google_service_account'])
+        oauth = OAuth2Session(client_id, redirect_uri=full_callback, scope=['openid', 'user', 'data', 'google_service_account'])
         authorization_url, state = oauth.authorization_url(DCF_AUTH_URL)
 
         # stash the state string in the session!

From 2f27aa4220bb11bdf511f0705302cbfe04ab97ad Mon Sep 17 00:00:00 2001
From: eleeisb <elaine.lee@systemsbiology.org>
Date: Wed, 29 May 2019 12:21:52 -0700
Subject: [PATCH 02/19] Adding compute_service.py

---
 google_helpers/compute_service.py | 42 +++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 google_helpers/compute_service.py

diff --git a/google_helpers/compute_service.py b/google_helpers/compute_service.py
new file mode 100644
index 00000000..85ddfe22
--- /dev/null
+++ b/google_helpers/compute_service.py
@@ -0,0 +1,42 @@
+"""
+
+Copyright 2019, Institute for Systems Biology
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+from oauth2client.client import GoogleCredentials
+from django.conf import settings
+import httplib2
+# from .utils import build_with_retries
+
+from googleapiclient.discovery import build
+
+COMPUTE_SCOPES = ['https://www.googleapis.com/auth/compute',
+                  'https://www.googleapis.com/auth/cloud-platform']
+
+
+# def get_crm_resource():
+#     """
+#     Returns: a Cloud Resource Manager service client for calling the API.
+#     """
+#     credentials = GoogleCredentials.get_application_default()
+#     service = build_with_retries('cloudresourcemanager', 'v1beta1', credentials, 2)
+#     return service
+
+def get_compute_resource():
+    credentials = GoogleCredentials.from_stream(settings.GOOGLE_APPLICATION_CREDENTIALS).create_scoped(COMPUTE_SCOPES)
+    http = credentials.authorize(httplib2.Http())
+    service = build('compute', 'v1', http=http, cache_discovery=False)
+    return service

From c38584a581bd6140e8e635b0f44f5dd53db14d8d Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Mon, 10 Jun 2019 18:14:48 -0700
Subject: [PATCH 03/19] -> Get list of user's GCPs (for use by the API)

---
 accounts/utils.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/accounts/utils.py b/accounts/utils.py
index af5e36a0..df3ee485 100644
--- a/accounts/utils.py
+++ b/accounts/utils.py
@@ -296,3 +296,24 @@ def unreg_gcp(user, gcp_id):
         status=500
 
     return response, status
+
+
+def get_user_gcps(user, gcp_id=None):
+    gcps = []
+    gcp_list = None
+
+    try:
+        if gcp_id:
+            gcp_list = GoogleProject.objects.filter(user=user, active=1)
+        else:
+            gcp_list = GoogleProject.objects.filter(user=user, active=1, project_id=gcp_id)
+
+        for gcp in gcp_list:
+            gcps.append({'gcp_id': gcp.project_id, 'gcp_name': gcp.project_name, 'users': [x.email for x in gcp.users_set.all()]})
+
+    except Exception as e:
+        logger.error("[ERROR] While fetching the GCP project list for user {}:")
+        logger.exception(e)
+
+    return gcps
+

From e41b9ae18f10c8686cc0d36fa78380883f05932b Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Tue, 11 Jun 2019 12:36:19 -0700
Subject: [PATCH 04/19] -> Adding in #2601 fix

---
 accounts/views.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/accounts/views.py b/accounts/views.py
index 1ac6dc70..cf3d87b3 100755
--- a/accounts/views.py
+++ b/accounts/views.py
@@ -249,12 +249,13 @@ def register_gcp(request, user_id):
     try:
         if request.POST:
             project_id = request.POST.get('gcp_id', None)
-            gcp = GoogleProject.objects.get(project_id=project_id)
             register_users = request.POST.getlist('register_users')
             is_refresh = bool(request.POST.get('is_refresh', '') == 'true')
 
             register, status = register_or_refresh_gcp(User.objects.get(id=user_id), project_id, register_users, is_refresh)
 
+            gcp = GoogleProject.objects.get(project_id=project_id, active=1)
+
             if status == 200:
                 if 'message' in register:
                     messages.info(request, register['message'])
@@ -273,7 +274,7 @@ def register_gcp(request, user_id):
     except Exception as e:
         logger.error("[ERROR] While {} a Google Cloud Project:".format("refreshing" if is_refresh else "registering"))
         if type(e) is ObjectDoesNotExist:
-            logger.error("GCP {} was not found.".format(project_id))
+            logger.error("GCP {} was not found post-registration.".format(project_id))
         else:
             logger.exception(e)
         messages.error(request, "There was an error while attempting to register/refresh this Google Cloud Project - please contact feedback@isb-cgc.org.")

From f715b9a05edb5d37c46d9ca4aca06f77bffc7361 Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Wed, 12 Jun 2019 12:51:09 -0700
Subject: [PATCH 05/19] -> In Python 3, must decode request.body to unicode
 first

---
 cohorts/views.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cohorts/views.py b/cohorts/views.py
index b49c15ca..3a9621ac 100755
--- a/cohorts/views.py
+++ b/cohorts/views.py
@@ -690,7 +690,9 @@ def validate_barcodes(request):
     if debug: logger.debug('Called {}'.format(sys._getframe().f_code.co_name))
 
     try:
-        barcodes = json.loads(request.body)['barcodes']
+        body_unicode = request.body.decode('utf-8')
+        body = json.loads(body_unicode)
+        barcodes = body['barcodes']
 
         status = 500
 

From a6e596cb0e6b5a3d9b524028bcd0d383cf3f296d Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Thu, 20 Jun 2019 12:59:29 -0700
Subject: [PATCH 06/19] -> Put return inside try

---
 cohorts/metadata_helpers.py | 68 ++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py
index 35f94c5f..0338b5d7 100644
--- a/cohorts/metadata_helpers.py
+++ b/cohorts/metadata_helpers.py
@@ -1388,25 +1388,25 @@ def get_full_sample_metadata(barcodes):
     db = None
     cursor = None
 
-    barcodes_by_program = {}
-
-    for barcode in barcodes:
-        dash = barcode.find("-")
-        if dash >= 0:
-            prog = barcode[0:dash]
-            if prog not in ['TCGA', 'TARGET']:
+    try:
+        barcodes_by_program = {}
+
+        for barcode in barcodes:
+            dash = barcode.find("-")
+            if dash >= 0:
+                prog = barcode[0:dash]
+                if prog not in ['TCGA', 'TARGET']:
+                    prog = 'CCLE'
+            else:
                 prog = 'CCLE'
-        else:
-            prog = 'CCLE'
-        if prog not in barcodes_by_program:
-            barcodes_by_program[prog] = ()
-        barcodes_by_program[prog] += (barcode,)
+            if prog not in barcodes_by_program:
+                barcodes_by_program[prog] = ()
+            barcodes_by_program[prog] += (barcode,)
 
-    programs = Program.objects.filter(name__in=list(barcodes_by_program.keys()), active=True, is_public=True)
+        programs = Program.objects.filter(name__in=list(barcodes_by_program.keys()), active=True, is_public=True)
 
-    items = {}
+        items = {}
 
-    try:
         db = get_sql_connection()
         cursor = db.cursor()
 
@@ -1454,6 +1454,8 @@ def get_full_sample_metadata(barcodes):
             result['total_found'] += 1
             result['samples'] = [item for item in list(items.values())]
 
+        return result
+
     except Exception as e:
         logger.error("[ERROR] While fetching sample metadata for {}:".format(barcode))
         logger.exception(e)
@@ -1461,8 +1463,6 @@ def get_full_sample_metadata(barcodes):
         if cursor: cursor.close()
         if db and db.open: db.close()
 
-    return result
-
 
 def get_full_case_metadata(barcodes):
     if debug: logger.debug('Called ' + sys._getframe().f_code.co_name)
@@ -1472,25 +1472,25 @@ def get_full_case_metadata(barcodes):
     db = None
     cursor = None
 
-    barcodes_by_program = {}
-
-    for barcode in barcodes:
-        dash = barcode.find("-")
-        if dash >= 0:
-            prog = barcode[0:dash]
-            if prog not in ['TCGA','TARGET']:
+    try:
+        barcodes_by_program = {}
+
+        for barcode in barcodes:
+            dash = barcode.find("-")
+            if dash >= 0:
+                prog = barcode[0:dash]
+                if prog not in ['TCGA', 'TARGET']:
+                    prog = 'CCLE'
+            else:
                 prog = 'CCLE'
-        else:
-            prog = 'CCLE'
-        if prog not in barcodes_by_program:
-            barcodes_by_program[prog] = ()
-        barcodes_by_program[prog] += (barcode,)
+            if prog not in barcodes_by_program:
+                barcodes_by_program[prog] = ()
+            barcodes_by_program[prog] += (barcode,)
 
-    programs = Program.objects.filter(name__in=list(barcodes_by_program.keys()),active=True,is_public=True)
+        programs = Program.objects.filter(name__in=list(barcodes_by_program.keys()), active=True, is_public=True)
 
-    items = {}
+        items = {}
 
-    try:
         db = get_sql_connection()
         cursor = db.cursor()
 
@@ -1545,6 +1545,8 @@ def get_full_case_metadata(barcodes):
             result['total_found'] += 1
             result['cases'] = [item for item in list(items.values())]
 
+        return result
+
     except Exception as e:
         logger.error("[ERROR] While fetching sample metadata for {}:".format(barcode))
         logger.exception(e)
@@ -1552,8 +1554,6 @@ def get_full_case_metadata(barcodes):
         if cursor: cursor.close()
         if db and db.open: db.close()
 
-    return result
-
 
 def get_sample_metadata(barcode):
     if debug: logger.debug('Called ' + sys._getframe().f_code.co_name)

From 2015b8f8b7234b86fccc95a46487dc6273fd0dec Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Fri, 21 Jun 2019 11:15:05 -0700
Subject: [PATCH 07/19] -> Don't try to query if nothing was found in the prior
 query

---
 cohorts/metadata_helpers.py | 38 +++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py
index 0338b5d7..2a846acf 100644
--- a/cohorts/metadata_helpers.py
+++ b/cohorts/metadata_helpers.py
@@ -1433,26 +1433,27 @@ def get_full_sample_metadata(barcodes):
                     'data_details': {}
                 }
 
-            for build in program_data_tables:
-                cursor.execute("""
-                    SELECT md.sample_barcode as sb, md.*
-                    FROM {} md
-                    WHERE md.sample_barcode IN ({}) AND NOT(md.sample_barcode = '') AND md.sample_barcode IS NOT NULL 
-                """.format(build.data_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))),
-                               barcodes_by_program[program.name])
-
-                fields = cursor.description
-                for row in cursor.fetchall():
-                    if not build.build in items[row[0]]['data_details']:
-                        items[row[0]]['data_details'][build.build] = []
-                    items[row[0]]['data_details'][build.build].append(
-                        {fields[index][0]: column for index, column in enumerate(row) if fields[index][0] not in skip}
-                    )
+            if len(list(items.keys())):
+                for build in program_data_tables:
+                    cursor.execute("""
+                        SELECT md.sample_barcode as sb, md.*
+                        FROM {} md
+                        WHERE md.sample_barcode IN ({}) AND NOT(md.sample_barcode = '') AND md.sample_barcode IS NOT NULL 
+                    """.format(build.data_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))),
+                                   barcodes_by_program[program.name])
+
+                    fields = cursor.description
+                    for row in cursor.fetchall():
+                        if not build.build in items[row[0]]['data_details']:
+                            items[row[0]]['data_details'][build.build] = []
+                        items[row[0]]['data_details'][build.build].append(
+                            {fields[index][0]: column for index, column in enumerate(row) if fields[index][0] not in skip}
+                        )
 
-            # TODO: Once we have aliquots in the database again, add those here
+                # TODO: Once we have aliquots in the database again, add those here
 
-            result['total_found'] += 1
-            result['samples'] = [item for item in list(items.values())]
+                result['total_found'] += 1
+                result['samples'] = [item for item in list(items.values())]
 
         return result
 
@@ -1460,6 +1461,7 @@ def get_full_sample_metadata(barcodes):
         logger.error("[ERROR] While fetching sample metadata for {}:".format(barcode))
         logger.exception(e)
     finally:
+        logger.info("[STATUS] Closing connection in sample metadata.")
         if cursor: cursor.close()
         if db and db.open: db.close()
 

From ca5a4dc3157efb7799493dbfaeeacc871137c46d Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Fri, 21 Jun 2019 11:15:46 -0700
Subject: [PATCH 08/19] -> Same, but for cases.

---
 cohorts/metadata_helpers.py | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py
index 2a846acf..45af0f89 100644
--- a/cohorts/metadata_helpers.py
+++ b/cohorts/metadata_helpers.py
@@ -1526,26 +1526,27 @@ def get_full_case_metadata(barcodes):
             for row in cursor.fetchall():
                 items[row[0]]['samples'].append(row[1])
 
-            for build in program_data_tables:
-                cursor.execute("""
-                    SELECT md.case_barcode as cb, md.*
-                    FROM {} md
-                    WHERE md.case_barcode IN ({}) AND (md.sample_barcode = '' OR md.sample_barcode IS NULL) 
-                """.format(build.data_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))),
-                               barcodes_by_program[program.name])
-
-                fields = cursor.description
-                for row in cursor.fetchall():
-                    if not build.build in items[row[0]]['data_details']:
-                        items[row[0]]['data_details'][build.build] = []
-                    items[row[0]]['data_details'][build.build].append(
-                        {fields[index][0]: column for index, column in enumerate(row) if fields[index][0] not in skip}
-                    )
+            if len(list(items.keys())):
+                for build in program_data_tables:
+                    cursor.execute("""
+                        SELECT md.case_barcode as cb, md.*
+                        FROM {} md
+                        WHERE md.case_barcode IN ({}) AND (md.sample_barcode = '' OR md.sample_barcode IS NULL) 
+                    """.format(build.data_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))),
+                                   barcodes_by_program[program.name])
+
+                    fields = cursor.description
+                    for row in cursor.fetchall():
+                        if not build.build in items[row[0]]['data_details']:
+                            items[row[0]]['data_details'][build.build] = []
+                        items[row[0]]['data_details'][build.build].append(
+                            {fields[index][0]: column for index, column in enumerate(row) if fields[index][0] not in skip}
+                        )
 
-            # TODO: Once we have aliquots in the database again, add those here
+                # TODO: Once we have aliquots in the database again, add those here
 
-            result['total_found'] += 1
-            result['cases'] = [item for item in list(items.values())]
+                result['total_found'] += 1
+                result['cases'] = [item for item in list(items.values())]
 
         return result
 

From 9d7eec357672485cf67e7dce690c232965477fc8 Mon Sep 17 00:00:00 2001
From: wlongabaugh <wlongabaugh@systemsbiology.org>
Date: Fri, 21 Jun 2019 14:46:55 -0700
Subject: [PATCH 09/19] Logging to trace DCF issue PXP-3304

---
 accounts/dcf_support.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/accounts/dcf_support.py b/accounts/dcf_support.py
index 5fdfe1e7..abcb0a78 100755
--- a/accounts/dcf_support.py
+++ b/accounts/dcf_support.py
@@ -741,7 +741,7 @@ def verify_sa_at_dcf(user_id, gcp_id, service_account_id, datasets, phs_map, sa_
     try:
         # DCF requires this to be in the header. OAuth2 library glues this onto the auth header stuff:
         headers = {'Content-Type': 'application/json'}
-
+        logger.info("[INFO] DCF verification request: {} {}".format(json_dumps(sa_data, service_account_id)))
         resp = _dcf_call(full_url, user_id, mode=use_mode, post_body=json_dumps(sa_data), headers=headers)
     except (TokenFailure, InternalTokenError, RefreshTokenExpired, DCFCommFailure) as e:
         logger.error("[ERROR] Attempt to contact DCF for SA verification failed (user {})".format(user_id))

From 6f1d31e278aa590fbeb43599049ad1e1331a184b Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Mon, 24 Jun 2019 14:29:45 -0700
Subject: [PATCH 10/19] -> Added in some new BQ helper methods: wait and fetch,
 filling gap between insert and insert-and-fetch; batch insert, wait, and
 fetch -> Swapped full case metadata to BQ

---
 cohorts/metadata_helpers.py           | 103 ++++++++++++++++----------
 google_helpers/bigquery/bq_support.py |  78 ++++++++++++++++---
 2 files changed, 130 insertions(+), 51 deletions(-)

diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py
index 45af0f89..89a9c962 100644
--- a/cohorts/metadata_helpers.py
+++ b/cohorts/metadata_helpers.py
@@ -1486,62 +1486,84 @@ def get_full_case_metadata(barcodes):
             else:
                 prog = 'CCLE'
             if prog not in barcodes_by_program:
-                barcodes_by_program[prog] = ()
-            barcodes_by_program[prog] += (barcode,)
+                barcodes_by_program[prog] = []
+            barcodes_by_program[prog].append(barcode)
 
         programs = Program.objects.filter(name__in=list(barcodes_by_program.keys()), active=True, is_public=True)
 
         items = {}
 
-        db = get_sql_connection()
-        cursor = db.cursor()
-
         for program in programs:
             program_tables = program.get_metadata_tables()
             program_data_tables = program.get_data_tables()
+            
+            bq_search = BigQuerySupport.build_bq_filter_and_params({'case_barcode': barcodes_by_program[program.name]})
 
-            cursor.execute("""
+            case_job = BigQuerySupport.insert_query_job("""
                 SELECT clin.case_barcode as cb, clin.*
-                FROM {} clin
-                WHERE clin.case_barcode IN ({}) AND clin.endpoint_type = 'current'
-            """.format(program_tables.clin_table, ",".join(["%s"]*(len(barcodes_by_program[program.name])))), barcodes_by_program[program.name])
+                FROM `{}` clin
+                WHERE {}
+            """.format("{}.{}.{}".format(
+                settings.BIGQUERY_DATA_PROJECT_ID, program_tables.bq_dataset, program_tables.clin_bq_table),
+                bq_search['filter_str']), bq_search['parameters'])
 
-            fields = cursor.description
-            skip = ['endpoint_type', 'metadata_clinical_id', 'metadata_biospecimen_id', 'cb']
+            bq_results = BigQuerySupport.wait_for_done_and_get_results(case_job['jobReference'])
+            result_schema = BigQuerySupport.get_result_schema(case_job['jobReference'])
 
-            for row in cursor.fetchall():
-                items[row[0]] = {
-                    'case_barcode': row[0],
-                    'clinical_data': {fields[index][0]: column for index, column in enumerate(row) if fields[index][0] not in skip},
+            skip = ['endpoint_type', 'metadata_clinical_id', 'metadata_biospecimen_id', 'cb', 'summary_file_count']
+
+            for row in bq_results:
+                items[row['f'][0]['v']] = {
+                    'case_barcode': row['f'][0]['v'],
                     'samples': [],
-                    'data_details': {}
+                    'data_details': {},
+                    'clinlical_data': {result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip}
                 }
 
-            cursor.execute("""
-                SELECT case_barcode, sample_barcode
-                FROM {} 
-                WHERE case_barcode IN ({}) AND endpoint_type = 'current'
-            """.format(program_tables.biospec_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))), barcodes_by_program[program.name])
-
-            for row in cursor.fetchall():
-                items[row[0]]['samples'].append(row[1])
-
             if len(list(items.keys())):
-                for build in program_data_tables:
-                    cursor.execute("""
-                        SELECT md.case_barcode as cb, md.*
-                        FROM {} md
-                        WHERE md.case_barcode IN ({}) AND (md.sample_barcode = '' OR md.sample_barcode IS NULL) 
-                    """.format(build.data_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))),
-                                   barcodes_by_program[program.name])
-
-                    fields = cursor.description
-                    for row in cursor.fetchall():
-                        if not build.build in items[row[0]]['data_details']:
-                            items[row[0]]['data_details'][build.build] = []
-                        items[row[0]]['data_details'][build.build].append(
-                            {fields[index][0]: column for index, column in enumerate(row) if fields[index][0] not in skip}
-                        )
+                queries = []
+                
+                for build_table in program_data_tables:
+                    queries.append({
+                        'query': """
+                            SELECT md.case_barcode as cb, md.*
+                            FROM `{}` md
+                            WHERE {} AND (md.sample_barcode = '' OR md.sample_barcode IS NULL)                     
+                        """.format(
+                            "{}.{}.{}".format(
+                                settings.BIGQUERY_DATA_PROJECT_ID, program_data_tables.bq_dataset, program_data_tables.data_table),
+                            bq_search['filter_str']),
+                        'parameters': bq_search['parameters'],
+                        'query_type': 'data_details',
+                        'build': build_table.build
+                    })
+
+                queries.append({
+                    'query': """
+                        SELECT case_barcode, sample_barcode
+                        FROM {} 
+                        WHERE {}
+                    """.format("{}.{}.{}".format(
+                        settings.BIGQUERY_DATA_PROJECT_ID, program_tables.bq_dataset, program_tables.biospec_bq_table,
+                        bq_search['filter_str'])),
+                    'parameters': bq_search['parameters'],
+                    'query_type': 'samples'
+                })
+
+                results = BigQuerySupport.insert_job_batch_and_get_results(queries)
+
+                for result in results:
+                    bq_results = result['bq_results']
+                    if result['query_type'] == 'samples':
+                        for row in bq_results:
+                            items[row['f'][0]['v']]['samples'].append(row['f'][1]['v'])
+                    else:
+                        for row in bq_results:
+                            if 'data_details' not in items[row['f'][0]['v']] or result['build'] not in items[row['f'][0]['v']]['data_details']:
+                                items[row['f'][0]['v']]['data_details'][result['build']] = []
+                            items[row['f'][0]['v']]['data_details'][result['build']].append({
+                                result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip
+                            })
 
                 # TODO: Once we have aliquots in the database again, add those here
 
@@ -1554,6 +1576,7 @@ def get_full_case_metadata(barcodes):
         logger.error("[ERROR] While fetching sample metadata for {}:".format(barcode))
         logger.exception(e)
     finally:
+        logger.info("[STATUS] Closing connection in case metadata.")
         if cursor: cursor.close()
         if db and db.open: db.close()
 
diff --git a/google_helpers/bigquery/bq_support.py b/google_helpers/bigquery/bq_support.py
index 3534f125..3995775e 100644
--- a/google_helpers/bigquery/bq_support.py
+++ b/google_helpers/bigquery/bq_support.py
@@ -302,16 +302,7 @@ def execute_query(self, query, parameters=None, write_disposition='WRITE_EMPTY',
                     'total_bytes_processed': query_job['statistics']['query']['totalBytesProcessed']
                 }
 
-        job_is_done = self.bq_service.jobs().get(projectId=self.executing_project,
-                                                 jobId=job_id).execute(num_retries=5)
-
-        retries = 0
-
-        while (job_is_done and not job_is_done['status']['state'] == 'DONE') and retries < BQ_ATTEMPT_MAX:
-            retries += 1
-            sleep(1)
-            job_is_done = self.bq_service.jobs().get(projectId=self.executing_project,
-                                                     jobId=job_id).execute(num_retries=5)
+        job_is_done = self.await_job_is_done(query_job)
 
         # Parse the final disposition
         if job_is_done and job_is_done['status']['state'] == 'DONE':
@@ -333,6 +324,20 @@ def execute_query(self, query, parameters=None, write_disposition='WRITE_EMPTY',
 
         return query_results
 
+    # Check for a job's status for the maximum number of attempts, return the final resulting response
+    def await_job_is_done(self, query_job):
+        done = self.job_is_done(query_job)
+        retries = 0
+
+        while not done and retries < BQ_ATTEMPT_MAX:
+            retries += 1
+            sleep(1)
+            done = self.job_is_done(query_job)
+
+        return self.bq_service.jobs().get(
+            projectId=self.executing_project, jobId=query_job['jobReference']['jobId']
+        ).execute(num_retries=5)
+
     # Check to see if query job is done
     def job_is_done(self, query_job):
         job_is_done = self.bq_service.jobs().get(projectId=self.executing_project,
@@ -404,12 +409,20 @@ def estimate_query_cost(cls, query, parameters=None):
         bqs = cls(None, None, None)
         return bqs.execute_query(query, parameters, cost_est=True)
 
-    # Given a BQ service and a job reference, fetch out the results
+    # Given a job reference, fetch out the results
     @classmethod
     def get_job_results(cls, job_reference):
         bqs = cls(None, None, None)
         return bqs.fetch_job_results(job_reference)
 
+    # Given a job reference for a running job, await the completion,
+    # then fetch and return the results
+    @classmethod
+    def wait_for_done_and_get_results(cls, job_reference):
+        bqs = cls(None, None, None)
+        check_done = bqs.await_job_is_done(job_reference)
+        return bqs.fetch_job_results(check_done['jobReference'])
+
     # Given a BQ service and a job reference, fetch out the results
     @classmethod
     def get_job_resource(cls, job_id, project_id):
@@ -430,6 +443,49 @@ def get_table_schema(cls, projectId, datasetId, tableId):
 
         return [{'name': x['name'], 'type': x['type']} for x in table['schema']['fields']]
 
+    @classmethod
+    def get_result_schema(cls, job_ref):
+        bqs = cls(None, None, None)
+        results = bqs.bq_service.jobs.getQueryResults(**job_ref).execute(num_retries=5)
+
+        return results['schema']
+    
+    # Method for submitting a group of jobs and awaiting the results of the whole set
+    @classmethod
+    def insert_job_batch_and_get_results(cls, query_set):
+        bqs = cls(None, None, None)
+        submitted_job_set = {}
+        for query in query_set:
+            job_obj = bqs.insert_bq_query_job(query['query'],query['parameters'])
+            query['job_id'] = job_obj['jobReference']['jobId']
+            submitted_job_set[job_obj['jobReference']['jobId']] = job_obj
+
+        not_done = True
+        still_checking = True
+        num_retries = 0
+
+        while still_checking and not_done:
+            not_done = False
+            for job in submitted_job_set:
+                if not BigQuerySupport.check_job_is_done(submitted_job_set[job]['jobReference']):
+                    not_done = True
+            if not_done:
+                sleep(1)
+                num_retries += 1
+                still_checking = (num_retries < settings.BQ_MAX_ATTEMPTS)
+
+        if not_done:
+            logger.warn("[WARNING] Not all of the queries completed!")
+
+        for query in query_set:
+            if bqs.job_is_done(submitted_job_set[query['job_id']]['jobReference']):
+                query['bq_results'] = bqs.fetch_job_results(submitted_job_set[query['job_id']]['jobReference'])
+                query['result_schema'] = BigQuerySupport.get_result_schema(query['job_id']['jobReference'])
+            else:
+                query['bq_results'] = None
+
+        return query_set
+
     # Builds a BQ API v2 QueryParameter set and WHERE clause string from a set of filters of the form:
     # {
     #     'field_name': [<value>,...]

From 3647048a5166cf122e113ca7ba890cead859b3c8 Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Mon, 24 Jun 2019 19:33:02 -0700
Subject: [PATCH 11/19] -> Added in some new BQ helper methods: wait and fetch,
 filling gap between insert and insert-and-fetch; batch insert, wait, and
 fetch -> Swapped full case metadata to BQ

---
 cohorts/metadata_helpers.py           | 121 ++++++++++++++++----------
 google_helpers/bigquery/bq_support.py |  19 ++--
 projects/models.py                    |   3 +-
 3 files changed, 90 insertions(+), 53 deletions(-)

diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py
index 89a9c962..8e8f0055 100644
--- a/cohorts/metadata_helpers.py
+++ b/cohorts/metadata_helpers.py
@@ -1400,8 +1400,8 @@ def get_full_sample_metadata(barcodes):
             else:
                 prog = 'CCLE'
             if prog not in barcodes_by_program:
-                barcodes_by_program[prog] = ()
-            barcodes_by_program[prog] += (barcode,)
+                barcodes_by_program[prog] = []
+            barcodes_by_program[prog].append(barcode)
 
         programs = Program.objects.filter(name__in=list(barcodes_by_program.keys()), active=True, is_public=True)
 
@@ -1414,41 +1414,67 @@ def get_full_sample_metadata(barcodes):
             program_tables = program.get_metadata_tables()
             program_data_tables = program.get_data_tables()
 
-            cursor.execute("""
+            search_clause = BigQuerySupport.build_bq_filter_and_params({'sample_barcode': barcodes_by_program[program.name]})
+
+            sample_job = BigQuerySupport.insert_query_job("""
                 SELECT biospec.sample_barcode as sb, biospec.case_barcode as cb, biospec.*
-                FROM {} biospec
-                WHERE biospec.sample_barcode IN ({}) AND biospec.endpoint_type = 'current'
-            """.format(program_tables.biospec_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))),
-                           barcodes_by_program[program.name])
+                FROM `{}` biospec
+                WHERE {}
+            """.format(
+                "{}.{}.{}".format(settings.BIGQUERY_DATA_PROJECT_ID, program_tables.bq_dataset, program_tables.biospec_bq_table,),
+                search_clause['filter_string']
+            ), search_clause['parameters'])
 
-            fields = cursor.description
-            skip = ['endpoint_type', 'metadata_clinical_id', 'metadata_biospecimen_id', 'sb', 'cb']
+            bq_results = BigQuerySupport.wait_for_done_and_get_results(sample_job)
+            result_schema = BigQuerySupport.get_result_schema(sample_job['jobReference'])
 
-            for row in cursor.fetchall():
-                items[row[0]] = {
-                    'sample_barcode': row[0],
-                    'case_barcode': row[1],
-                    'biospecimen_data': {fields[index][0]: column for index, column in enumerate(row) if
-                                      fields[index][0] not in skip},
-                    'data_details': {}
+            skip = ['endpoint_type', 'metadata_clinical_id', 'metadata_biospecimen_id', 'sb', 'cb', 'case_barcode']
+
+            for row in bq_results:
+                items[row['f'][0]['v']] = {
+                    'sample_barcode': row['f'][0]['v'],
+                    'case_barcode': row['f'][1]['v'],
+                    'data_details': {
+                        x.build: 'NONE_FOUND' for x in program_data_tables
+                    },
+                    'biospecimen_data': {result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip}
                 }
 
             if len(list(items.keys())):
-                for build in program_data_tables:
-                    cursor.execute("""
-                        SELECT md.sample_barcode as sb, md.*
-                        FROM {} md
-                        WHERE md.sample_barcode IN ({}) AND NOT(md.sample_barcode = '') AND md.sample_barcode IS NOT NULL 
-                    """.format(build.data_table, ",".join(["%s"] * (len(barcodes_by_program[program.name])))),
-                                   barcodes_by_program[program.name])
-
-                    fields = cursor.description
-                    for row in cursor.fetchall():
-                        if not build.build in items[row[0]]['data_details']:
-                            items[row[0]]['data_details'][build.build] = []
-                        items[row[0]]['data_details'][build.build].append(
-                            {fields[index][0]: column for index, column in enumerate(row) if fields[index][0] not in skip}
-                        )
+                queries = []
+
+                for build_table in program_data_tables:
+                    logger.info(str(build_table))
+                    queries.append({
+                        'query': """
+                            #standardSQL
+                            SELECT md.sample_barcode as sb, md.*
+                            FROM `{}` md
+                            WHERE {} AND NOT(md.sample_barcode = '') AND md.sample_barcode IS NOT NULL              
+                        """.format(
+                            "{}.{}.{}".format(
+                                settings.BIGQUERY_DATA_PROJECT_ID, build_table.bq_dataset,
+                                build_table.data_table.lower()),
+                            search_clause['filter_string']),
+                        'parameters': search_clause['parameters'],
+                        'build': build_table.build
+                    })
+
+                results = BigQuerySupport.insert_job_batch_and_get_results(queries)
+
+                for bq_result in results:
+                    result_schema = bq_result['result_schema']
+                    bq_results = bq_result['bq_results']
+                    if not bq_results or not result_schema:
+                        logger.warn("[WARNING] Results not received for this query:")
+                        logger.warn("{}".format(bq_result['query']))
+                        continue
+                    for row in bq_results:
+                        if items[row['f'][0]['v']]['data_details'][bq_result['build']] == 'NONE_FOUND':
+                            items[row['f'][0]['v']]['data_details'][bq_result['build']] = []
+                        items[row['f'][0]['v']]['data_details'][bq_result['build']].append({
+                            result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip
+                        })
 
                 # TODO: Once we have aliquots in the database again, add those here
 
@@ -1500,14 +1526,15 @@ def get_full_case_metadata(barcodes):
             bq_search = BigQuerySupport.build_bq_filter_and_params({'case_barcode': barcodes_by_program[program.name]})
 
             case_job = BigQuerySupport.insert_query_job("""
+                #standardSQL
                 SELECT clin.case_barcode as cb, clin.*
                 FROM `{}` clin
                 WHERE {}
             """.format("{}.{}.{}".format(
                 settings.BIGQUERY_DATA_PROJECT_ID, program_tables.bq_dataset, program_tables.clin_bq_table),
-                bq_search['filter_str']), bq_search['parameters'])
+                bq_search['filter_string']), bq_search['parameters'])
 
-            bq_results = BigQuerySupport.wait_for_done_and_get_results(case_job['jobReference'])
+            bq_results = BigQuerySupport.wait_for_done_and_get_results(case_job)
             result_schema = BigQuerySupport.get_result_schema(case_job['jobReference'])
 
             skip = ['endpoint_type', 'metadata_clinical_id', 'metadata_biospecimen_id', 'cb', 'summary_file_count']
@@ -1516,7 +1543,9 @@ def get_full_case_metadata(barcodes):
                 items[row['f'][0]['v']] = {
                     'case_barcode': row['f'][0]['v'],
                     'samples': [],
-                    'data_details': {},
+                    'data_details': {
+                        x.build: 'NONE_FOUND' for x in program_data_tables
+                    },
                     'clinlical_data': {result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip}
                 }
 
@@ -1524,15 +1553,17 @@ def get_full_case_metadata(barcodes):
                 queries = []
                 
                 for build_table in program_data_tables:
+                    logger.info(str(build_table))
                     queries.append({
                         'query': """
+                            #standardSQL
                             SELECT md.case_barcode as cb, md.*
                             FROM `{}` md
                             WHERE {} AND (md.sample_barcode = '' OR md.sample_barcode IS NULL)                     
                         """.format(
                             "{}.{}.{}".format(
-                                settings.BIGQUERY_DATA_PROJECT_ID, program_data_tables.bq_dataset, program_data_tables.data_table),
-                            bq_search['filter_str']),
+                                settings.BIGQUERY_DATA_PROJECT_ID, build_table.bq_dataset, build_table.data_table.lower()),
+                            bq_search['filter_string']),
                         'parameters': bq_search['parameters'],
                         'query_type': 'data_details',
                         'build': build_table.build
@@ -1540,28 +1571,30 @@ def get_full_case_metadata(barcodes):
 
                 queries.append({
                     'query': """
+                        #standardSQL
                         SELECT case_barcode, sample_barcode
-                        FROM {} 
+                        FROM `{}` 
                         WHERE {}
                     """.format("{}.{}.{}".format(
                         settings.BIGQUERY_DATA_PROJECT_ID, program_tables.bq_dataset, program_tables.biospec_bq_table,
-                        bq_search['filter_str'])),
+                       ), bq_search['filter_string']),
                     'parameters': bq_search['parameters'],
                     'query_type': 'samples'
                 })
 
                 results = BigQuerySupport.insert_job_batch_and_get_results(queries)
 
-                for result in results:
-                    bq_results = result['bq_results']
-                    if result['query_type'] == 'samples':
+                for bq_result in results:
+                    result_schema = bq_result['result_schema']
+                    bq_results = bq_result['bq_results']
+                    if bq_result['query_type'] == 'samples':
                         for row in bq_results:
                             items[row['f'][0]['v']]['samples'].append(row['f'][1]['v'])
                     else:
                         for row in bq_results:
-                            if 'data_details' not in items[row['f'][0]['v']] or result['build'] not in items[row['f'][0]['v']]['data_details']:
-                                items[row['f'][0]['v']]['data_details'][result['build']] = []
-                            items[row['f'][0]['v']]['data_details'][result['build']].append({
+                            if items[row['f'][0]['v']]['data_details'][bq_result['build']] == 'NONE_FOUND':
+                                items[row['f'][0]['v']]['data_details'][bq_result['build']] = []
+                            items[row['f'][0]['v']]['data_details'][bq_result['build']].append({
                                 result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip
                             })
 
diff --git a/google_helpers/bigquery/bq_support.py b/google_helpers/bigquery/bq_support.py
index 3995775e..51826263 100644
--- a/google_helpers/bigquery/bq_support.py
+++ b/google_helpers/bigquery/bq_support.py
@@ -347,6 +347,7 @@ def job_is_done(self, query_job):
 
     # Fetch the results of a job based on the reference provided
     def fetch_job_results(self, job_ref):
+        logger.info(str(job_ref))
         result = []
         page_token = None
 
@@ -399,9 +400,9 @@ def insert_query_job(cls, query, parameters=None):
 
     # Check the status of a BQ job
     @classmethod
-    def check_job_is_done(cls, job_ref):
+    def check_job_is_done(cls, query_job):
         bqs = cls(None, None, None)
-        return bqs.job_is_done(job_ref)
+        return bqs.job_is_done(query_job)
 
     # Do a 'dry run' query, which estimates the cost
     @classmethod
@@ -418,9 +419,9 @@ def get_job_results(cls, job_reference):
     # Given a job reference for a running job, await the completion,
     # then fetch and return the results
     @classmethod
-    def wait_for_done_and_get_results(cls, job_reference):
+    def wait_for_done_and_get_results(cls, query_job):
         bqs = cls(None, None, None)
-        check_done = bqs.await_job_is_done(job_reference)
+        check_done = bqs.await_job_is_done(query_job)
         return bqs.fetch_job_results(check_done['jobReference'])
 
     # Given a BQ service and a job reference, fetch out the results
@@ -446,13 +447,14 @@ def get_table_schema(cls, projectId, datasetId, tableId):
     @classmethod
     def get_result_schema(cls, job_ref):
         bqs = cls(None, None, None)
-        results = bqs.bq_service.jobs.getQueryResults(**job_ref).execute(num_retries=5)
+        results = bqs.bq_service.jobs().getQueryResults(**job_ref).execute(num_retries=5)
 
         return results['schema']
     
     # Method for submitting a group of jobs and awaiting the results of the whole set
     @classmethod
     def insert_job_batch_and_get_results(cls, query_set):
+        logger.info(str(query_set))
         bqs = cls(None, None, None)
         submitted_job_set = {}
         for query in query_set:
@@ -467,7 +469,7 @@ def insert_job_batch_and_get_results(cls, query_set):
         while still_checking and not_done:
             not_done = False
             for job in submitted_job_set:
-                if not BigQuerySupport.check_job_is_done(submitted_job_set[job]['jobReference']):
+                if not BigQuerySupport.check_job_is_done(submitted_job_set[job]):
                     not_done = True
             if not_done:
                 sleep(1)
@@ -478,11 +480,12 @@ def insert_job_batch_and_get_results(cls, query_set):
             logger.warn("[WARNING] Not all of the queries completed!")
 
         for query in query_set:
-            if bqs.job_is_done(submitted_job_set[query['job_id']]['jobReference']):
+            if bqs.job_is_done(submitted_job_set[query['job_id']]):
                 query['bq_results'] = bqs.fetch_job_results(submitted_job_set[query['job_id']]['jobReference'])
-                query['result_schema'] = BigQuerySupport.get_result_schema(query['job_id']['jobReference'])
+                query['result_schema'] = BigQuerySupport.get_result_schema(submitted_job_set[query['job_id']]['jobReference'])
             else:
                 query['bq_results'] = None
+                query['result_schema'] = None
 
         return query_set
 
diff --git a/projects/models.py b/projects/models.py
index cfac5081..3e7e496f 100644
--- a/projects/models.py
+++ b/projects/models.py
@@ -231,7 +231,8 @@ class Meta(object):
         verbose_name_plural = "Public Data Tables"
 
     def __str__(self):
-        return self.program__name + " " + self.build + " Data Tables"
+        return "{} [{}] Data Tables".format(self.program.name,self.build)
+
 
 class Public_Annotation_Tables(models.Model):
     program = models.ForeignKey(Program, null=False)

From 36633b14d1b6ca739ba06b9cfdb03d526d66e0b6 Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Mon, 24 Jun 2019 19:33:59 -0700
Subject: [PATCH 12/19] -> Default to empty data details

---
 cohorts/metadata_helpers.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py
index 8e8f0055..d99eda22 100644
--- a/cohorts/metadata_helpers.py
+++ b/cohorts/metadata_helpers.py
@@ -1435,7 +1435,7 @@ def get_full_sample_metadata(barcodes):
                     'sample_barcode': row['f'][0]['v'],
                     'case_barcode': row['f'][1]['v'],
                     'data_details': {
-                        x.build: 'NONE_FOUND' for x in program_data_tables
+                        x.build: [] for x in program_data_tables
                     },
                     'biospecimen_data': {result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip}
                 }
@@ -1470,8 +1470,6 @@ def get_full_sample_metadata(barcodes):
                         logger.warn("{}".format(bq_result['query']))
                         continue
                     for row in bq_results:
-                        if items[row['f'][0]['v']]['data_details'][bq_result['build']] == 'NONE_FOUND':
-                            items[row['f'][0]['v']]['data_details'][bq_result['build']] = []
                         items[row['f'][0]['v']]['data_details'][bq_result['build']].append({
                             result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip
                         })
@@ -1544,7 +1542,7 @@ def get_full_case_metadata(barcodes):
                     'case_barcode': row['f'][0]['v'],
                     'samples': [],
                     'data_details': {
-                        x.build: 'NONE_FOUND' for x in program_data_tables
+                        x.build: [] for x in program_data_tables
                     },
                     'clinlical_data': {result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip}
                 }
@@ -1592,8 +1590,6 @@ def get_full_case_metadata(barcodes):
                             items[row['f'][0]['v']]['samples'].append(row['f'][1]['v'])
                     else:
                         for row in bq_results:
-                            if items[row['f'][0]['v']]['data_details'][bq_result['build']] == 'NONE_FOUND':
-                                items[row['f'][0]['v']]['data_details'][bq_result['build']] = []
                             items[row['f'][0]['v']]['data_details'][bq_result['build']].append({
                                 result_schema['fields'][index]['name']: x['v'] for index, x in enumerate(row['f'], start=0) if result_schema['fields'][index]['name'] not in skip
                             })

From 4b97307bbeb5dea1ebaedfa070e9415273ab0e4c Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Mon, 24 Jun 2019 20:02:00 -0700
Subject: [PATCH 13/19] -> No more db calls in those methods

---
 cohorts/metadata_helpers.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py
index d99eda22..66b87ff8 100644
--- a/cohorts/metadata_helpers.py
+++ b/cohorts/metadata_helpers.py
@@ -1385,8 +1385,6 @@ def get_full_sample_metadata(barcodes):
     result = {
         'total_found': 0
     }
-    db = None
-    cursor = None
 
     try:
         barcodes_by_program = {}
@@ -1484,10 +1482,6 @@ def get_full_sample_metadata(barcodes):
     except Exception as e:
         logger.error("[ERROR] While fetching sample metadata for {}:".format(barcode))
         logger.exception(e)
-    finally:
-        logger.info("[STATUS] Closing connection in sample metadata.")
-        if cursor: cursor.close()
-        if db and db.open: db.close()
 
 
 def get_full_case_metadata(barcodes):
@@ -1495,8 +1489,6 @@ def get_full_case_metadata(barcodes):
     result = {
         'total_found': 0
     }
-    db = None
-    cursor = None
 
     try:
         barcodes_by_program = {}
@@ -1604,10 +1596,6 @@ def get_full_case_metadata(barcodes):
     except Exception as e:
         logger.error("[ERROR] While fetching sample metadata for {}:".format(barcode))
         logger.exception(e)
-    finally:
-        logger.info("[STATUS] Closing connection in case metadata.")
-        if cursor: cursor.close()
-        if db and db.open: db.close()
 
 
 def get_sample_metadata(barcode):

From ba71f964588872e73699b0a66fa393bec87c69c9 Mon Sep 17 00:00:00 2001
From: wlongabaugh <wlongabaugh@systemsbiology.org>
Date: Mon, 8 Jul 2019 12:17:57 -0700
Subject: [PATCH 14/19] Logging had bad parentheses

---
 accounts/dcf_support.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/accounts/dcf_support.py b/accounts/dcf_support.py
index abcb0a78..63bbc6e0 100755
--- a/accounts/dcf_support.py
+++ b/accounts/dcf_support.py
@@ -741,7 +741,7 @@ def verify_sa_at_dcf(user_id, gcp_id, service_account_id, datasets, phs_map, sa_
     try:
         # DCF requires this to be in the header. OAuth2 library glues this onto the auth header stuff:
         headers = {'Content-Type': 'application/json'}
-        logger.info("[INFO] DCF verification request: {} {}".format(json_dumps(sa_data, service_account_id)))
+        logger.info("[INFO] DCF verification request: {} {}".format(json_dumps(sa_data), service_account_id))
         resp = _dcf_call(full_url, user_id, mode=use_mode, post_body=json_dumps(sa_data), headers=headers)
     except (TokenFailure, InternalTokenError, RefreshTokenExpired, DCFCommFailure) as e:
         logger.error("[ERROR] Attempt to contact DCF for SA verification failed (user {})".format(user_id))

From bacd3434cb9e317e35913ab0f4f31526299ee2bd Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Mon, 15 Jul 2019 13:17:41 -0700
Subject: [PATCH 15/19] -> Rename 'desc' to description which is more
 consistent

---
 cohorts/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cohorts/utils.py b/cohorts/utils.py
index 905ad1c7..9dd90e29 100644
--- a/cohorts/utils.py
+++ b/cohorts/utils.py
@@ -31,9 +31,9 @@
 from django.conf import settings
 
 
-def create_cohort(user, filters=None, name=None, desc=None, source_id=None):
+def create_cohort(user, filters=None, name=None, description=None, source_id=None):
 
-    if not filters and not name and not desc:
+    if not filters and not name and not description:
         # Can't save/edit a cohort when nothing is being changed!
         return None
 
@@ -46,7 +46,7 @@ def create_cohort(user, filters=None, name=None, desc=None, source_id=None):
 
     if source and not filters or (len(filters) <= 0):
         # If we're only changing the name and/or desc, just edit the cohort and update it
-        source.update(name=name, description=desc)
+        source.update(name=name, description=description)
         return { 'cohort_id': source.id }
 
     # Make and save cohort
@@ -75,7 +75,7 @@ def create_cohort(user, filters=None, name=None, desc=None, source_id=None):
             }
 
     # Create new cohort
-    cohort = Cohort.objects.create(name=name, description=desc)
+    cohort = Cohort.objects.create(name=name, description=description)
     cohort.save()
 
     # Set permission for user to be owner

From 1512ab5250fe9be17c117c2c40d01f060aec08c7 Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Mon, 15 Jul 2019 13:45:42 -0700
Subject: [PATCH 16/19] -> Update is for a QuerySet, not a single object...

---
 cohorts/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cohorts/utils.py b/cohorts/utils.py
index 9dd90e29..1e04e591 100644
--- a/cohorts/utils.py
+++ b/cohorts/utils.py
@@ -41,7 +41,7 @@ def create_cohort(user, filters=None, name=None, description=None, source_id=Non
     source_progs = None
 
     if source_id:
-        source = Cohort.objects.filter(id=source_id).first()
+        source = Cohort.objects.filter(id=source_id)
         source_progs = source.get_programs()
 
     if source and not filters or (len(filters) <= 0):

From ab36588825fa51dd9f121895401697dc121c8163 Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Mon, 15 Jul 2019 13:49:18 -0700
Subject: [PATCH 17/19] -> ...but we need a single object for OTHER things.

---
 cohorts/utils.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/cohorts/utils.py b/cohorts/utils.py
index 1e04e591..58f5fa67 100644
--- a/cohorts/utils.py
+++ b/cohorts/utils.py
@@ -41,13 +41,14 @@ def create_cohort(user, filters=None, name=None, description=None, source_id=Non
     source_progs = None
 
     if source_id:
-        source = Cohort.objects.filter(id=source_id)
-        source_progs = source.get_programs()
-
-    if source and not filters or (len(filters) <= 0):
-        # If we're only changing the name and/or desc, just edit the cohort and update it
-        source.update(name=name, description=description)
-        return { 'cohort_id': source.id }
+        if not filters or (len(filters) <= 0):
+            source = Cohort.objects.filter(id=source_id).first()
+            # If we're only changing the name and/or desc, just edit the cohort and update it
+            source.update(name=name, description=description)
+            return { 'cohort_id': source.id }
+        else:
+            source = Cohort.objects.filter(id=source_id).first()
+            source_progs = source.get_programs()
 
     # Make and save cohort
 

From 1f30ea6737ae60de155ddb6467462df9959c765e Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Mon, 15 Jul 2019 13:52:37 -0700
Subject: [PATCH 18/19] -> Apply Occam's razor

---
 cohorts/utils.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/cohorts/utils.py b/cohorts/utils.py
index 58f5fa67..1b593948 100644
--- a/cohorts/utils.py
+++ b/cohorts/utils.py
@@ -41,14 +41,17 @@ def create_cohort(user, filters=None, name=None, description=None, source_id=Non
     source_progs = None
 
     if source_id:
-        if not filters or (len(filters) <= 0):
-            source = Cohort.objects.filter(id=source_id).first()
-            # If we're only changing the name and/or desc, just edit the cohort and update it
-            source.update(name=name, description=description)
-            return { 'cohort_id': source.id }
-        else:
-            source = Cohort.objects.filter(id=source_id).first()
-            source_progs = source.get_programs()
+        source = Cohort.objects.filter(id=source_id).first()
+        source_progs = source.get_programs()
+
+    if source and not filters or (len(filters) <= 0):
+        # If we're only changing the name and/or desc, just edit the cohort and return
+        if name:
+            source.name = name
+        if description:
+            source.description = description
+        source.save()
+        return { 'cohort_id': source.id }
 
     # Make and save cohort
 

From 6222240eaf633a5d2ca9c5ad51c987f01bccb078 Mon Sep 17 00:00:00 2001
From: "S. Paquette" <spaquett@systemsbiology.org>
Date: Mon, 15 Jul 2019 14:50:01 -0700
Subject: [PATCH 19/19] -> Require any string filter to have a value

---
 cohorts/metadata_helpers.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py
index 66b87ff8..62443f5e 100644
--- a/cohorts/metadata_helpers.py
+++ b/cohorts/metadata_helpers.py
@@ -1739,7 +1739,15 @@ def get_sample_case_list_bq(cohort_id=None, inc_filters=None, comb_mut_filters='
                         if key_field_type not in field_types:
                             invalid_keys.append(key_split)
                         else:
-                            filters[field_types[key_field_type]['type']][key_field] = inc_filters[prog][key_split]
+                            # Check to make sure any string values aren't empty strings - if they are, it's invalid.
+                            vals = inc_filters[prog][key_split]
+                            if not isinstance(vals, list):
+                                vals = [inc_filters[prog][key_split]]
+                            for val in vals:
+                                if isinstance(val, str) and not len(val):
+                                    invalid_keys.append(key_split)
+                                else:
+                                    filters[field_types[key_field_type]['type']][key_field] = inc_filters[prog][key_split]
 
             if len(invalid_keys) > 0:
                 raise Exception("Improper filter(s) supplied for program {}: '{}'".format(prog, ("', '".join(invalid_keys))))