-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreport_summary_requests.py
125 lines (102 loc) · 4.58 KB
/
report_summary_requests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Request:
# - request ID
# - project ID
# - isCmoRequest value
# Request samples:
# - How many in original request?
# - How many that are missing fields required for label generation?
# - How many that are missing other key fields that Voyager needs?
import sys
import os
import csv
import json
REQUEST_SUMMARY_REPORT = {}
REQUEST_SUMMARY_REPORT_KEYS = ["REQUEST_ID", "LOGGED_REQUEST_STATUS", "PROJECT_ID", "IS_CMO_REQUEST", "TOTAL_NUM_SAMPLES", "FAILED_NUM_SAMPLES", "DETAILED_SAMPLE_ERRORS"]
LABEL_GEN_FIELD_PTID = ["cmoPatientId", "normalizedPatientId"]
LABEL_GEN_FIELD_SAMPLETYPE = ["specimenType", "sampleOrigin", "cmoSampleClass"]
LABEL_GEN_FIELD_NUCACID = ["sampleType"]
OTHER_ESSENTIAL_SAMPLE_FIELDS = ["investigatorSampleId", "baitSet"]
# detailed sample report format: sample_id: LABEL_GEN_MISSING_FIELDS=[fields missing for label generation], OTHER_ESSENTIAL_MISSING_FIELDS=[other essential fields];
# sample can have the required fields for label generation but might be missing other fields that were identified as essential for downstream operations (i.e., voyager needs baitSet and/or recipe)
def extract_request_details(json_data):
request_id = json_data['requestId']
project_id = json_data['projectId']
is_cmo_request = json_data['isCmoRequest']
samples = json_data['samples']
total_samples = len(json_data['samples'])
sample_details = []
for s in samples:
# check patient id fields
label_gen_fields_missing = []
has_pt_id_fields = False
if "cmoPatientId" in s.keys() and s["cmoPatientId"] != "":
has_pt_id_fields = True
elif "cmoSampleIdFields" in s.keys() and "normalizedPatientId" in s["cmoSampleIdFields"].keys():
has_pt_id_fields = True
else:
label_gen_fields_missing.extend(["cmoPatientId", "normalizedPatientId"])
# has at least one sample type abbreviation field
has_sample_abbrev_fields = False
for f in LABEL_GEN_FIELD_SAMPLETYPE:
if f in s.keys() and s[f] not in ["", "null"]:
has_sample_abbrev_fields = True
break
if not has_sample_abbrev_fields:
label_gen_fields_missing.extend(LABEL_GEN_FIELD_SAMPLETYPE[:])
has_nuc_acid_abbrev_fields = False
if "cmoSampleIdFields" in s.keys():
cmoSampleIdFields = s["cmoSampleIdFields"]
if "sampleType" in cmoSampleIdFields.keys():
if cmoSampleIdFields["sampleType"] != "" or (cmoSampleIdFields["naToExtract"] != "" or (s["baitSet"] != "")):
has_nuc_acid_abbrev_fields = True
if not has_nuc_acid_abbrev_fields:
label_gen_fields_missing.extend(["sampleType", "naToExtract", "baitSet"])
other_missing = []
for f in OTHER_ESSENTIAL_SAMPLE_FIELDS:
if not f in s.keys():
other_missing.append(f)
if len(label_gen_fields_missing) > 0 or len(other_missing) > 0:
sample_map = {'LABEL_GEN_MISSING_FIELDS':label_gen_fields_missing, 'OTHER_ESSENTIAL_MISSING_FIELDS':other_missing}
sample_details.append({s["igoId"]: sample_map})
request_details_map = {"REQUEST_ID": request_id, "PROJECT_ID": project_id, "IS_CMO_REQUEST": is_cmo_request, "TOTAL_NUM_SAMPLES": total_samples, "FAILED_NUM_SAMPLES": len(sample_details), "DETAILED_SAMPLE_ERRORS": sample_details}
return request_details_map
def format_request_details_string(request_details_map):
record = []
for f in REQUEST_SUMMARY_REPORT_KEYS:
if f == "DETAILED_SAMPLE_ERRORS":
sample_details_string = []
for s in request_details_map[f]:
for sample_id,v in s.iteritems():
if len(v["LABEL_GEN_MISSING_FIELDS"]) > 0:
label_gen_fields_string = "LABEL_GEN_MISSING_FIELDS: " + ",".join(v["LABEL_GEN_MISSING_FIELDS"])
else:
label_gen_fields_string = ""
if len(v["OTHER_ESSENTIAL_MISSING_FIELDS"]) > 0:
other_fields_string = "OTHER_ESSENTIAL_MISSING_FIELDS: " ",".join(v["OTHER_ESSENTIAL_MISSING_FIELDS"])
else:
other_fields_string = ""
to_add = "%s: %s; %s" % (sample_id, label_gen_fields_string, other_fields_string)
sample_details_string.append(to_add)
record.append(" | ".join(sample_details_string))
else:
record.append(str(request_details_map[f]))
print("\t".join(record))
def load_request_details_from_log(filename):
print("\t".join(REQUEST_SUMMARY_REPORT_KEYS))
with open(filename, 'rU') as dfile:
header = []
for line in dfile.readlines():
if not header:
header = line.split('\t')
continue
data = line.split('\t')
#logged status
logged_status = data[1]
request_json = json.loads(data[2])
request_details_map = extract_request_details(request_json)
request_details_map["LOGGED_REQUEST_STATUS"] = logged_status
format_request_details_string(request_details_map)
def main():
load_request_details_from_log(sys.argv[1])
if __name__ == '__main__':
main()