From f90d823946448cab7da79d1c2f4740ff01d8a89f Mon Sep 17 00:00:00 2001 From: LucaCilibrasi <37807618+LucaCilibrasi@users.noreply.github.com> Date: Fri, 10 Sep 2021 13:44:37 +0200 Subject: [PATCH] - Change everything to MongoDB - Minor changes --- backend/apis/analyze.py | 4501 +++++++++++++---- .../src/components/FreeTargetVsBackground.vue | 78 +- .../src/components/SelectorsQueryFree.vue | 32 +- .../TimeSelectorDistributionLineageInGeo.vue | 7 + .../src/components/TimeSelectorQueryFree.vue | 80 +- .../src/components/TimeSelectorQueryGeo.vue | 14 +- frontend/src/store.js | 33 +- 7 files changed, 3633 insertions(+), 1112 deletions(-) diff --git a/backend/apis/analyze.py b/backend/apis/analyze.py index e78e826..cf2092d 100644 --- a/backend/apis/analyze.py +++ b/backend/apis/analyze.py @@ -19,10 +19,11 @@ api = Namespace('analyze', description='analyze') uri = "mongodb://localhost:23456/gcm_gisaid" +# uri = "mongodb://localhost:23457/gcm_gisaid" client = MongoClient(uri) db = client.gcm_gisaid -collection_db = db.seq_2021_08_26 +collection_db = db.seq_2021_08_26_2 ######################################################################################################## @@ -30,7 +31,7 @@ sars_cov_2_products = { "A": [ { - "name": "E (envelope protein)", + "name": "E", "start": 26245, "end": 26472, "row": 0, @@ -38,7 +39,7 @@ "sequence": "MYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVSLVKPSFYVYSRVKNLNSSRVPDLLV" }, { - "name": "M (membrane glycoprotein)", + "name": "M", "start": 26523, "end": 27191, "row": 0, @@ -46,7 +47,7 @@ "sequence": "MADSNGTITVEELKKLLEQWNLVIGFLFLTWICLLQFAYANRNRFLYIIKLIFLWLLWPVTLACFVLAAVYRINWITGGIAIAMACLVGLMWLSYFIASFRLFARTRSMWSFNPETNILLNVPLHGTILTRPLLESELVIGAVILRGHLRIAGHHLGRCDIKDLPKEITVATSRTLSYYKLGASQRVAGDSGFAAYSRYRIGNYKLNTDHSSSSDNIALLVQ" }, { - "name": "N (nucleocapsid phosphoprotein)", + "name": "N", "start": 28274, "end": 29533, "row": 0, @@ -54,7 +55,7 @@ "sequence": "MSDNGPQNQRNAPRITFGGPSDSTGSNQNGERSGARSKQRRPQGLPNNTASWFTALTQHGKEDLKFPRGQGVPINTNSSPDDQIGYYRRATRRIRGGDGKMKDLSPRWYFYYLGTGPEAGLPYGANKDGIIWVATEGALNTPKDHIGTRNPANNAAIVLQLPQGTTLPKGFYAEGSRGGSQASSRSSSRSRNSSRNSTPGSSRGTSPARMAGNGGDAALALLLLDRLNQLESKMSGKGQQQQGQTVTKKSAAEASKKPRQKRTATKAYNVTQAFGRRGPEQTQGNFGDQELIRQGTDYKHWPQIAQFAPSASAFFGMSRIGMEVTPSGTWLTYTGAIKLDDKDPNFKDQVILLNKHIDAYKTFPPTEPKKDKKKKADETQALPQRQKKQQTVTLLPAADLDDFSKQLQQSMSSADSTQA" }, { - "name": "ORF10 protein", + "name": "ORF10", "start": 29558, "end": 29674, "row": 0, @@ -62,7 +63,7 @@ "sequence": "MGYINVFAFPFTIYSLLLCRMNSRNYIAQVDVVNFNLT" }, { - "name": "NSP16 (2'-O-ribose methyltransferase)", + "name": "NSP16", "start": 20659, "end": 21552, "row": 0, @@ -86,7 +87,7 @@ "sequence": "KIVNNWLKQLIKVTLVFLFVAAIFYLITPVHVMSKHTDFSSEIIGYKAIDGGVTRDIASTDTCFANKHADFDTWFSQRGGSYTNDKACPLIAAVITREVGFVVPGLPGTILRTTNGDFLHFLPRVFSAVGNICYTPSKLIEYTDFATSACVLAAECTIFKDASGKPVPYCYDTNVLEGSVAYESLRPDTRYVLMDGSIIQFPNTYLEGSVRVVTTFDSEYCRHGTCERSEAGVCVSTSGRWVLNNDYYRSLPGVFCGVDAVNLLTNMFTPLIQPIGALDISASIVAGGIVAIVVTCLAYYFMRFRRAFGEYSHVVAFNTLLFLMSFTVLCLTPVYSFLPGVYSVIYLYLTFYLTNDVSFLAHIQWMVMFTPLVPFWITIAYIICISTKHFYWFFSNYLKRRVVFNGVSFSTFEEAALCTFLLNKEMYLKLRSDVLLPLTQYNRYLALYNKYKYFSGAMDTTSYREAACCHLAKALNDFSNSGSDVLYQPPQTSITSAVLQ" }, { - "name": "NSP15 (endoRNAse)", + "name": "NSP15", "start": 19621, "end": 20658, "row": 0, @@ -94,7 +95,7 @@ "sequence": "SLENVAFNVVNKGHFDGQQGEVPVSIINNTVYTKVDGVDVELFENKTTLPVNVAFELWAKRNIKPVPEVKILNNLGVDIAANTVIWDYKRDAPAHISTIGVCSMTDIAKKPTETICAPLTVFFDGRVDGQVDLFRNARNGVLITEGSVKGLQPSVGPKQASLNGVTLIGEAVKTQFNYYKKVDGVVQQLPETYFTQSRNLQEFKPRSQMEIDFLELAMDEFIERYKLEGYAFEHIVYGDFSHSQLGGLHLLIGLAKRFKESPFELEDFIPMDSTVKNYFITDAQTGSSKCVCSVIDLLLDDFVEIIKSQDLSVVSKVVKVTIDYTEISFMLWCKDGHVETFYPKLQ" }, { - "name": "NSP5 (3C-like proteinase)", + "name": "NSP5", "start": 10055, "end": 10972, "row": 0, @@ -102,7 +103,7 @@ "sequence": "SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ" }, { - "name": "NSP14 (3'-to-5' exonuclease)", + "name": "NSP14", "start": 18040, "end": 19620, "row": 0, @@ -118,7 +119,7 @@ "sequence": "SADAQSFLNGFAV" }, { - "name": "NSP13 (helicase)", + "name": "NSP13", "start": 16237, "end": 18039, "row": 0, @@ -158,7 +159,7 @@ "sequence": "NNELSPVALRQMSCAAGTTQTACTDDNALAYYNTTKGGRFVLALLSDLQDLKWARFPKSDGTGTIYTELEPPCRFVTDTPKGPKVKYLYFIKGLNNLNRGMVLGSLAATVRLQ" }, { - "name": "NSP12 (RNA-dependent RNA polymerase)", + "name": "NSP12", "start": 13442, "end": 16236, "row": 0, @@ -166,7 +167,7 @@ "sequence": "SADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVYLPYPDPSRILGAGCFVDDIVKTDGTLMIERFVSLAIDAYPLTKHPNQEYADVFHLYLQYIRKLHDELTGHMLDMYSVMLTNDNTSRYWEPEFYEAMYTPHTVLQ" }, { - "name": "ORF1ab polyprotein", + "name": "ORF1ab", "start": 266, "end": 21555, "row": 0, @@ -182,7 +183,7 @@ "sequence": "AGNATEVPANSTVLSFCAFAVDAAKAYKDYLASGGQPITNCVKMLCTHTGTGQAITVTPEANMDQESFGGASCCLYCRCHIDHPNPKGFCDLKGKYVQIPTTCANDPVGFTLKNTVCTVCGMWKGYGCSCDQLREPMLQ" }, { - "name": "NSP1 (leader protein)", + "name": "NSP1", "start": 266, "end": 805, "row": 0, @@ -190,7 +191,7 @@ "sequence": "MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLEQPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGG" }, { - "name": "ORF1a polyprotein", + "name": "ORF1a", "start": 266, "end": 13483, "row": 0, @@ -205,7 +206,7 @@ "sequence": "AYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGG" }, { - "name": "NS3 (ORF3a protein)", + "name": "NS3", "start": 25393, "end": 26220, "row": 0, @@ -213,7 +214,7 @@ "sequence": "MDLFMRIFTIGTVTLKQGEIKDATPSDFVRATATIPIQASLPFGWLIVGVALLAVFQSASKIITLKKRWQLALSKGVHFVCNLLLLFVTVYSHLLLVAAGLEAPFLYLYALVYFLQSINFVRIIMRLWLCWKCRSKNPLLYDANYFLCWHTNCYDYCIPYNSVTSSIVITSGDGTTSPISEHDYQIGGYTEKWESGVKDCVVLHSYFTSDYYQLYSTQLSTDTGVEHVTFFIYNKIVDEPEEHVQIHTIDGSSGVVNPVMEPIYDEPTTTTSVPL" }, { - "name": "NS6 (ORF6 protein)", + "name": "NS6", "start": 27202, "end": 27387, "row": 0, @@ -221,7 +222,7 @@ "sequence": "MFHLVDFQVTIAEILLIIMRTFKVSIWNLDYIINLIIKNLSKSLTENKYSQLDEEQPMEID" }, { - "name": "NS7a (ORF7a protein)", + "name": "NS7a", "start": 27394, "end": 27759, "row": 0, @@ -229,7 +230,7 @@ "sequence": "MKIILFLALITLATCELYHYQECVRGTTVLLKEPCSSGTYEGNSPFHPLADNKFALTCFSTQFAFACPDGVKHVYQLRARSVSPKLFIRQEEVQELYSPIFLIVAAIVFITLCFTLKRKTE" }, { - "name": "NS7b (ORF7b)", + "name": "NS7b", "start": 27756, "end": 27887, "row": 0, @@ -237,7 +238,7 @@ "sequence": "MIELSLIDFYLCFLAFLLFLVLIMLIIFWFSLELQDHNETCHA" }, { - "name": "NS8 (ORF8 protein)", + "name": "NS8", "start": 27894, "end": 28259, "row": 0, @@ -245,7 +246,7 @@ "sequence": "MKFLVFLGIITTVAAFHQECSLQSCTQHQPYVVDDPCPIHFYSKWYIRVGARKSAPLIELCVDEAGSKSPIQYIDIGNYTVSCLPFTINCQEPKLGSLVVRCSFYEDFLEYHDVRVVLDFI" }, { - "name": "Spike (surface glycoprotein)", + "name": "Spike", "start": 21563, "end": 25384, "row": 0, @@ -319,410 +320,3106 @@ def get(self): return all_protein -@api.route('/tableLineageCountry') +@api.route('/getProteinPosition') class FieldList(Resource): - @api.doc('table_lineage_country') + @api.doc('get_protein_position') def post(self): - to_send = api.payload - - conn = http.client.HTTPConnection('geco.deib.polimi.it') - headers = {'Content-type': 'application/json'} - send = to_send - json_data = json.dumps(send) - conn.request('POST', '/virusurf_epitope/api/epitope/tableLineageCountry', json_data, headers) + payload = api.payload + name_protein = payload['protein'] - response = conn.getresponse() - all_geo = response.read().decode() - all_geo = json.loads(all_geo) + all_protein = sars_cov_2_products['A'] + min_pos = 0 + max_pos = 0 + for item in all_protein: + name = str(item.get('name')) + if name.lower() == name_protein.lower(): + min_pos = 1 + max_pos = (item.get('end') - item.get('start')) // 3 + if "nsp" in name.lower(): + max_pos = max_pos + 1 - table = [] - for item in all_geo: - single_line = {'lineage': item['lineage']} - country_count = item['country_count'] - country_count = country_count.replace('"', "") - country_count = country_count.replace(")\\", "") - country_count = country_count.replace("\\", "") - country_count = country_count.replace("{", "") - country_count = country_count.replace("}", "") - country_count = country_count.replace("(", "") - array_country_count = country_count.split("),") - for single_country in array_country_count: - single_country = single_country.replace(")", "") - array_single_country = single_country.split(',') - single_line[array_single_country[0]] = array_single_country[1] - table.append(single_line) + res = {'start': min_pos, 'stop': max_pos} - return table + return res -@api.route('/possibleCountryLineage') +@api.route('/getDomains') class FieldList(Resource): - @api.doc('possible_country_lineage') + @api.doc('get_domains') def post(self): - to_send = api.payload - - conn = http.client.HTTPConnection('geco.deib.polimi.it') - headers = {'Content-type': 'application/json'} - send = to_send - json_data = json.dumps(send) - conn.request('POST', '/virusurf_epitope/api/epitope/arrayCountryForLineage', json_data, headers) - - response = conn.getresponse() - all_country = response.read().decode() - all_country = all_country.replace(']', '').replace('[', '') - all_country = all_country.replace('"', '').split(",") - - return all_country - + payload = api.payload + name_protein = payload['protein'] -@api.route('/denominatorLineageCountry') -class FieldList(Resource): - @api.doc('possible_country_lineage') - def post(self): + annotations = pd.read_csv("apis/protein_annotations.csv", + delimiter=',') - to_send = api.payload + annotations1 = copy.deepcopy(annotations) + annotations2 = copy.deepcopy(annotations) + annotations3 = copy.deepcopy(annotations) - conn = http.client.HTTPConnection('geco.deib.polimi.it') - headers = {'Content-type': 'application/json'} - send = to_send - json_data = json.dumps(send) - conn.request('POST', '/virusurf_epitope/api/epitope/denominatorLineageCountry', json_data, headers) + ann_mutagenesis = annotations1[(annotations.Description.str.lower() != 'n/d') + & (annotations.Protein.str.lower() == name_protein.lower()) + & (annotations.Category.str.lower() == 'mutagenesis') + ] + ann_mutagenesis2 = ann_mutagenesis[['Description', 'Begin', 'End']] + ann_mutagenesis3 = json.loads(ann_mutagenesis2.to_json(orient="records")) - response = conn.getresponse() - resp = response.read().decode() - resp = json.loads(resp) + ann_aa_modifications = annotations2[(annotations.Description.str.lower() != 'n/d') + & (annotations.Protein.str.lower() == name_protein.lower()) + & (annotations.Category.str.lower() == 'ptm') + & (annotations.Type.str.lower() == 'carbohyd') + ] + ann_aa_modifications2 = ann_aa_modifications[['Description', 'Begin', 'End']] + ann_aa_modifications3 = json.loads(ann_aa_modifications2.to_json(orient="records")) - denominators = {} + ann_sites_family_dom = annotations3[(annotations.Description.str.lower() != 'n/d') + & (annotations.Protein.str.lower() == name_protein.lower()) + & ((annotations.Category.str.lower() == 'domains_and_sites') | + (annotations.Type.str.lower() == 'n/d')) + ] + ann_sites_family_dom2 = ann_sites_family_dom[['Description', 'Begin', 'End']] + ann_sites_family_dom3 = json.loads(ann_sites_family_dom2.to_json(orient="records")) - for item in resp: - if item['geo'] is None: - denominators['N/D'] = item['cnt'] - else: - denominators[item['geo']] = item['cnt'] + result = {'mutagenesis': ann_mutagenesis3, 'aa_modifications': ann_aa_modifications3, + 'sites_and_domains': ann_sites_family_dom3} - return denominators + return result -@api.route('/analyzeMutationCountryLineage') +@api.route('/getImportantMutation') class FieldList(Resource): - @api.doc('analyze_mutation_country_lineage') + @api.doc('get_important_mutation') def post(self): - to_send = api.payload - - conn = http.client.HTTPConnection('geco.deib.polimi.it') - headers = {'Content-type': 'application/json'} - send = to_send - json_data = json.dumps(send) - conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationCountryLineage', json_data, headers) - - response = conn.getresponse() - all_result = response.read().decode() - all_result = json.loads(all_result) - - mutation_table2 = [] - arr_p_values = [] - for item in all_result: - single_item = {} - if item['product'] == 'Spike (surface glycoprotein)': - protein = item['product'].split(" ", 1)[0] - mutation = protein + '_' - # mutation = 'S_' - else: - protein = item['product'].split(" ", 1)[0] - mutation = protein + '_' - mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative'] - single_item['mutation'] = mutation - single_item['start_aa_original'] = item['start_aa_original'] - single_item['sequence_aa_original'] = item['sequence_aa_original'] - single_item['sequence_aa_alternative'] = item['sequence_aa_alternative'] - single_item['product'] = item['product'] - single_item['mutation_position'] = item['start_aa_original'] - single_item['target'] = item['country'] - single_item['background'] = item['lineage'] - single_item['count_target'] = item['count_seq'] - single_item['percentage_background'] = item['fraction'] - single_item['numerator_background'] = item['numerator'] - single_item['denominator_background'] = item['denominator'] - single_item['percentage_target'] = item['fraction_country'] - single_item['numerator_target'] = item['count_seq'] - single_item['denominator_target'] = item['denominator_country'] - - epsilon = 0.00000001 - single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \ - (single_item['percentage_background'] + epsilon) - - if single_item['odd_ratio'] >= 1: - # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_country'], - # item['numerator'] / item['denominator']) - if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ - and single_item['denominator_target'] - single_item['numerator_target'] == 0: - single_item['p_value'] = 1 - else: - stat, p, dof, expected = \ - chi2_contingency([[single_item['numerator_background'], - single_item['denominator_background'] - single_item['numerator_background']], - [single_item['numerator_target'], - single_item['denominator_target'] - single_item['numerator_target']]]) - single_item['p_value'] = p - else: - # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_country'], - # item['numerator'] / item['denominator']) - if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ - and single_item['denominator_target'] - single_item['numerator_target'] == 0: - single_item['p_value'] = 1 - else: - stat, p, dof, expected = \ - chi2_contingency([[single_item['numerator_background'], - single_item['denominator_background'] - single_item['numerator_background']], - [single_item['numerator_target'], - single_item['denominator_target'] - single_item['numerator_target']]]) - single_item['p_value'] = p - - arr_p_values.append(single_item['p_value']) - mutation_table2.append(single_item) + payload = api.payload + name_lineage = payload['lineage'] - a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni') + result = {'mutation': [], 'additional_mutation': []} - i = 0 - for item in mutation_table2: - item['pvalue'] = new_p_values[i] - i = i + 1 + if name_lineage in dict_lineage_mutation: + lineage_json = dict_lineage_mutation[name_lineage] + result['mutation'] = lineage_json['mutation'] + result['additional_mutation'] = lineage_json['additional_mutation'] + else: + all_mutation = [] + all_additional_mutation = [] + for lineage in dict_lineage_mutation: + row = dict_lineage_mutation[lineage] + for mutation in row['mutation']: + if mutation not in all_mutation: + all_mutation.append(mutation) + if mutation in all_additional_mutation: + all_additional_mutation.remove(mutation) + for additional_mutation in row['additional_mutation']: + if additional_mutation not in all_additional_mutation and additional_mutation not in all_mutation: + all_additional_mutation.append(additional_mutation) + result['mutation'] = all_mutation + result['additional_mutation'] = all_additional_mutation - return mutation_table2 + return result -@api.route('/analyzeMutationCountryLineageInTime') +@api.route('/getLineageTree') class FieldList(Resource): - @api.doc('analyze_mutation_country_lineage_in_time') + @api.doc('get_lineage_tree') def post(self): - to_send = api.payload + payload = api.payload + possible_lineages = payload['possibleLineages'] - conn = http.client.HTTPConnection('geco.deib.polimi.it') - headers = {'Content-type': 'application/json'} - send = to_send - json_data = json.dumps(send) - conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationCountryLineageInTime', json_data, headers) + dict_copy = dict_lineage_mutation - response = conn.getresponse() - all_result = response.read().decode() - all_result = json.loads(all_result) + arr_lineages = [] + dict_lineages = {} + for item in possible_lineages: + single_line = item + dict_lineages[item['value']] = single_line + arr_lineages.append(item['value']) - mutation_table2 = [] - arr_p_values = [] - for item in all_result: - single_item = {} - if item['product'] == 'Spike (surface glycoprotein)': - protein = item['product'].split(" ", 1)[0] - mutation = protein + '_' - # mutation = 'S_' - else: - protein = item['product'].split(" ", 1)[0] - mutation = protein + '_' - mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative'] - single_item['mutation'] = mutation - single_item['start_aa_original'] = item['start_aa_original'] - single_item['sequence_aa_original'] = item['sequence_aa_original'] - single_item['sequence_aa_alternative'] = item['sequence_aa_alternative'] - single_item['product'] = item['product'] - single_item['mutation_position'] = item['start_aa_original'] - single_item['target'] = item['target_time'] - single_item['background'] = item['background_time'] - single_item['country'] = item['country'] - single_item['lineage'] = item['lineage'] - single_item['count_target'] = item['count_seq'] - single_item['percentage_background'] = item['fraction'] - single_item['numerator_background'] = item['numerator'] - single_item['denominator_background'] = item['denominator'] - single_item['percentage_target'] = item['fraction_target'] - single_item['numerator_target'] = item['count_seq'] - single_item['denominator_target'] = item['denominator_target'] - - epsilon = 0.00000001 - single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \ - (single_item['percentage_background'] + epsilon) + dict_copy2 = dict(sorted(dict_copy.items(), key=lambda k_v: k_v[1]['alias'])) - if single_item['odd_ratio'] >= 1: - # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'], - # item['numerator'] / item['denominator']) - if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ - and single_item['denominator_target'] - single_item['numerator_target'] == 0: - single_item['p_value'] = 1 - else: - stat, p, dof, expected = \ - chi2_contingency([[single_item['numerator_background'], - single_item['denominator_background'] - single_item['numerator_background']], - [single_item['numerator_target'], - single_item['denominator_target'] - single_item['numerator_target']]]) - single_item['p_value'] = p - else: - # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'], - # item['numerator'] / item['denominator']) - if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ - and single_item['denominator_target'] - single_item['numerator_target'] == 0: - single_item['p_value'] = 1 - else: - stat, p, dof, expected = \ - chi2_contingency([[single_item['numerator_background'], - single_item['denominator_background'] - single_item['numerator_background']], - [single_item['numerator_target'], - single_item['denominator_target'] - single_item['numerator_target']]]) - single_item['p_value'] = p + items = [] + idx = 1 - arr_p_values.append(single_item['p_value']) - mutation_table2.append(single_item) + for lineage in dict_copy2: + already_done = False + children = False + children_lineage = False + important_lineage = False + alias = dict_copy2[lineage]['alias'] + if lineage in arr_lineages: + if dict_copy2[lineage]['WHO label'] != '': + important_lineage = True + for itm in items: + possible_parent_alias = str(itm['alias']) + '.' + possible_children_alias = str(alias) + possible_parent_lineage = str(itm['real_name']) + '.' + possible_children_lineage = str(lineage) + if possible_parent_alias in possible_children_alias: + children = True + recursive_children_lineage(itm, lineage, alias, dict_copy2, dict_lineages) + if possible_parent_lineage in possible_children_lineage: + children_lineage = True + if possible_children_lineage != possible_children_alias: + recursive_children_lineage(itm, lineage, lineage, dict_copy2, dict_lineages) + if not children: + already_done = True + name_complete = lineage + if dict_copy2[lineage]['WHO label'] != '': + name_complete = lineage + ' (' + dict_copy2[lineage]['WHO label'] + ') ' + single_lineage = {'id': idx, 'alias': alias, 'name': name_complete, 'real_name': lineage, + 'who': dict_copy2[lineage]['WHO label'], 'children': [], + 'count': dict_lineages[lineage]['count']} + items.append(single_lineage) + idx = idx + 1 - a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni') + if not children_lineage and not already_done: + name_complete = lineage.split('.')[0] + single_lineage = {'id': idx, 'alias': name_complete, 'name': name_complete, + 'real_name': name_complete, + 'who': '', 'children': [], + 'count': 0} + items.append(single_lineage) + idx = idx + 1 + recursive_children_lineage(single_lineage, lineage, lineage, dict_copy2, dict_lineages) - i = 0 - for item in mutation_table2: - item['pvalue'] = new_p_values[i] - i = i + 1 + # if important_lineage and not already_done: + # name_complete = lineage + # if dict_copy2[lineage]['WHO label'] != '': + # name_complete = lineage + ' (' + dict_copy2[lineage]['WHO label'] + ') ' + # single_lineage = {'id': idx, 'alias': alias, 'name': name_complete, 'real_name': lineage, + # 'who': dict_copy2[lineage]['WHO label'], 'children': [], + # 'count': dict_lineages[lineage]['count']} + # items.append(single_lineage) + # idx = idx + 1 - return mutation_table2 + return items -@api.route('/analyzeTimeDistributionCountryLineage') +@api.route('/getAllImportantMutationPerLineage') class FieldList(Resource): - @api.doc('analyze_time_distribution_country_lineage') + @api.doc('get_important_mutation') def post(self): - to_send = api.payload - - conn = http.client.HTTPConnection('geco.deib.polimi.it') - headers = {'Content-type': 'application/json'} - send = to_send - json_data = json.dumps(send) - conn.request('POST', '/virusurf_epitope/api/epitope/analyzeTimeDistributionCountryLineage', json_data, headers) - response = conn.getresponse() - all_result = response.read().decode() - all_result = json.loads(all_result) + payload = api.payload + lineage = payload['lineage'] + proteins = payload['proteins'] - return all_result + array_proteins = [] + for protein in proteins: + protein_rewritten = protein.split(" ")[0] + array_proteins.append(protein_rewritten) -@api.route('/analyzeTimeDistributionBackgroundQueryGeo') -class FieldList(Resource): - @api.doc('analyze_time_distribution_country_lineage') - def post(self): - to_send = api.payload + dict_copy = all_important_mutation_dict - conn = http.client.HTTPConnection('geco.deib.polimi.it') - headers = {'Content-type': 'application/json'} - send = to_send - json_data = json.dumps(send) - conn.request('POST', '/virusurf_epitope/api/epitope/analyzeTimeDistributionBackgroundQueryGeo', json_data, - headers) + array_important_mutation = [] - response = conn.getresponse() - all_result = response.read().decode() - all_result = json.loads(all_result) + if lineage is None: + for lineage_mutations in dict_copy: + single_lineage_mutation = dict_copy[lineage_mutations] + for mutation in single_lineage_mutation[1]: + if mutation not in array_important_mutation: + protein = mutation.split("_")[0] + if protein in array_proteins: + array_important_mutation.append(mutation) + array_important_mutation.sort() + else: + if lineage in dict_copy: + single_lineage_mutation = dict_copy[lineage] + for mutation in single_lineage_mutation[1]: + if mutation not in array_important_mutation: + protein = mutation.split("_")[0] + if protein in array_proteins: + array_important_mutation.append(mutation) + array_important_mutation.sort() - return all_result + return array_important_mutation -@api.route('/analyzeMutationProvinceRegion') +@api.route('/checkAccessionId') class FieldList(Resource): - @api.doc('analyze_mutation_province_region') + @api.doc('check_accession_id') def post(self): - to_send = api.payload + payload = api.payload + accession_id = payload['accession_id'] + acc_id_arr = all_accession_id_dict['all_acc_id'] + result = False + if accession_id in acc_id_arr: + result = True + return result - conn = http.client.HTTPConnection('geco.deib.polimi.it') - headers = {'Content-type': 'application/json'} - send = to_send - json_data = json.dumps(send) - conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationProvinceRegion', json_data, headers) - response = conn.getresponse() - all_result = response.read().decode() - all_result = json.loads(all_result) +def recursive_children_lineage(parent, lineage, alias, dict_copy2, dict_lineages): + children = False + idx = str(parent['id']) + '_' + str(len(parent['children'])) + for itm in parent['children']: + possible_parent_alias = str(itm['alias']) + '.' + possible_children_alias = str(alias) + if possible_parent_alias in possible_children_alias: + children = True + recursive_children_lineage(itm, lineage, alias, dict_copy2, dict_lineages) + break + else: + children = False + if not children: + name_complete = lineage + if dict_copy2[lineage]['WHO label'] != '': + name_complete = lineage + ' (' + dict_copy2[lineage]['WHO label'] + ') ' + single_lineage = {'id': idx, 'alias': alias, 'name': name_complete, 'real_name': lineage, + 'who': dict_copy2[lineage]['WHO label'], + 'children': [], 'count': dict_lineages[lineage]['count']} + parent['children'].append(single_lineage) - mutation_table2 = [] - arr_p_values = [] - for item in all_result: - single_item = {} - if item['product'] == 'Spike (surface glycoprotein)': - protein = item['product'].split(" ", 1)[0] - mutation = protein + '_' - # mutation = 'S_' - else: - protein = item['product'].split(" ", 1)[0] - mutation = protein + '_' - mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative'] - single_item['start_aa_original'] = item['start_aa_original'] - single_item['sequence_aa_original'] = item['sequence_aa_original'] - single_item['sequence_aa_alternative'] = item['sequence_aa_alternative'] - single_item['mutation'] = mutation - single_item['product'] = item['product'] - single_item['mutation_position'] = item['start_aa_original'] - # if 'country' in item: - # single_item['target'] = item['region'] - # single_item['background'] = item['country'] - # else: - # single_item['target'] = item['province'] - # single_item['background'] = item['region'] - single_item['target'] = item['target'] - single_item['background'] = item['background'] - single_item['lineage'] = item['lineage'] - single_item['count_target'] = item['count_seq'] - single_item['percentage_background'] = item['fraction'] - single_item['numerator_background'] = item['numerator'] - single_item['denominator_background'] = item['denominator'] - single_item['percentage_target'] = item['fraction_target'] - single_item['numerator_target'] = item['count_seq'] - single_item['denominator_target'] = item['denominator_target'] +# ----------------------------------------- MONGO DB ----------------------------------------------- # - epsilon = 0.00000001 - single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \ - (single_item['percentage_background'] + epsilon) - if single_item['odd_ratio'] >= 1: - # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'], - # item['numerator'] / item['denominator']) - if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ - and single_item['denominator_target'] - single_item['numerator_target'] == 0: - single_item['p_value'] = 1 - else: - stat, p, dof, expected = \ - chi2_contingency([[single_item['numerator_background'], - single_item['denominator_background'] - single_item['numerator_background']], - [single_item['numerator_target'], - single_item['denominator_target'] - single_item['numerator_target']]]) - single_item['p_value'] = p - else: - # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'], - # item['numerator'] / item['denominator']) - if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ - and single_item['denominator_target'] - single_item['numerator_target'] == 0: - single_item['p_value'] = 1 +translate_dictionary = { + 'accession_id': '_id', + 'lineage': 'covv_lineage', + 'collection_date': 'covv_collection_date', + 'location': 'covv_location', + 'product': 'muts.pro', + 'start_aa_original': 'muts.loc', + 'sequence_aa_original': 'muts.org', + 'sequence_aa_alternative': 'muts.alt', +} + + +@api.route('/selectorQuery') +class FieldList(Resource): + @api.doc('selector_query') + def post(self): + + to_use = api.payload + field_name = to_use['field'] + query_fields = to_use['query'] + + # field_name = 'country' + # query_fields = {'lineage': 'B.1', 'geo_group': ['Europe', 'Asia'], 'minDate': '2020-01-01', 'maxDate': "2021-01-01", + # 'toExclude': {}} + # 'toExclude': {'geo_group': ['Asia'], 'country': ['Italy', 'France'] + + if field_name in query_fields: + del query_fields[field_name] + + where_part = {} + start_date = datetime.strptime("2019-01-01", '%Y-%m-%d') + where_part['c_coll_date_prec'] = {} + where_part['c_coll_date_prec']['$eq'] = 2 + where_part['collection_date'] = {} + where_part['collection_date']['$gte'] = start_date + + field_not_null = field_name + if field_not_null in translate_dictionary: + field_not_null = translate_dictionary[field_name] + if field_name == 'geo_group' or field_name == 'country' or field_name == 'region' or field_name == 'province': + field_not_null = 'location.' + field_name + where_part[field_not_null] = {'$ne': None} + + if query_fields is not None: + for key in query_fields: + if key == 'minDate': + start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') + where_part['collection_date']['$gte'] = start_date + elif key == 'maxDate': + stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') + where_part['collection_date']['$lte'] = stop_date + + elif key == 'toExclude': + for fieldToExclude in query_fields[key]: + if '$and' not in where_part: + where_part['$and'] = [] + + single_where_part = {'$and': []} + for geoToExclude in query_fields[key][fieldToExclude]: + real_field_to_exclude = fieldToExclude + if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \ + or fieldToExclude == 'region' or fieldToExclude == 'province': + real_field_to_exclude = 'location.' + fieldToExclude + specific_and = {} + geo_value = geoToExclude # .replace("'", "''") + specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value} + single_where_part['$and'].append(specific_and) + where_part['$and'].append(single_where_part) + + elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + if '$and' not in where_part: + where_part['$and'] = [] + + real_key = key + if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + real_key = 'location.' + key + if isinstance(query_fields[key], list): + single_where_part_or = {'$or': []} + for itm in query_fields[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part['$and'].append(single_where_part_or) + else: + single_where_part_or = {'$or': []} + replace_fields_value = query_fields[key] # .replace("'", "''") + specific_or = {f'{real_key}': {'$eq': replace_fields_value}} + single_where_part_or['$or'].append(specific_or) + where_part['$and'].append(single_where_part_or) + else: - stat, p, dof, expected = \ - chi2_contingency([[single_item['numerator_background'], - single_item['denominator_background'] - single_item['numerator_background']], - [single_item['numerator_target'], - single_item['denominator_target'] - single_item['numerator_target']]]) - single_item['p_value'] = p + real_key = key + if key in translate_dictionary: + real_key = translate_dictionary[key] + replace_fields_value = query_fields[key] + if key != 'start_aa_original': + replace_fields_value = query_fields[key] # .replace("'", "''") + if real_key not in where_part: + where_part[real_key] = {} + where_part[real_key]['$eq'] = replace_fields_value + + query = [] + + query_where = {"$match": where_part} + query.append(query_where) + + group_part = {} + real_field = field_name + if field_name in translate_dictionary: + real_field = translate_dictionary[field_name] + if field_name == 'geo_group' or field_name == 'country' or field_name == 'region' or field_name == 'province': + real_field = 'location.' + field_name + # group_part["_id"] = {"value": + # {"$cond": + # [{"$eq": [f"${real_field}", ""]}, + # None, + # {"$cond": + # [{"$eq": [f"${real_field}", None]}, + # f"${real_field}", + # {"$concat": [ + # {"$toUpper": + # {"$substrCP": [f"${real_field}", 0, 1]} + # }, + # { + # "$substrCP": [ + # f"${real_field}", 1, + # {"$subtract": [{"$strLenCP": f"${real_field}"}, 1]} + # ] + # } + # ]} + # ] + # } + # ] + # }, + # } + group_part["_id"] = {"value": f"${real_field}"} + group_part["count"] = {"$sum": 1} + query_group = {"$group": group_part} + query.append(query_group) + + sort_part = {"count": -1} + query_sort = {"$sort": sort_part} + query.append(query_sort) + + list_dict = [] + if field_name == 'lineage' and (len(query_fields['toExclude']) == 0 and ((len(query_fields) == 2 and "lineage" in query_fields) or len(query_fields) < 2)): + for lineage in all_important_mutation_dict: + single_item_remodel = {'value': lineage, 'count': all_important_mutation_dict[lineage][0]} + list_dict.append(single_item_remodel) + elif field_name == 'geo_group' and (len(query_fields['toExclude']) == 0 and ((len(query_fields) == 2 and "geo_group" in query_fields) or len(query_fields) < 2)): + dict_geo = {} + for geo in all_geo_dict['all_geo']: + geo_group = geo['geo_group'] + if geo_group not in dict_geo: + single_item_remodel = {'value': geo_group, 'count': geo['count']} + dict_geo[geo_group] = single_item_remodel + else: + dict_geo[geo_group]['count'] = dict_geo[geo_group]['count'] + geo['count'] + for location in dict_geo: + list_dict.append(dict_geo[location]) + else: + # print("query", query) + results = collection_db.aggregate(query, allowDiskUse=True) + + for single_item in list(results): + single_item_remodel = {} + for key in single_item: + if key == '_id': + single_item_remodel['value'] = single_item['_id']['value'] + else: + single_item_remodel[key] = single_item[key] + list_dict.append(single_item_remodel) + + # print("field:", field_name, " result:", list_dict) + return list_dict + + +@api.route('/tableLineageCountry') +class FieldList(Resource): + @api.doc('table_lineage_country') + def post(self): + filter_geo = api.payload + # filter_geo = {'type': 'country', 'value': 'Italy', 'minCountSeq': 500} + geo_selection = 'country' + geo_min_count = filter_geo['minCountSeq'] + geo_where = filter_geo['type'] + min_date = filter_geo['minDate'] + max_date = filter_geo['maxDate'] + geo_where_value = filter_geo['value'] + # if geo_where_value is not None: + # geo_where_value = geo_where_value.replace("'", "''") + + geo_where_part = None + if geo_where == 'geo_group': + geo_selection = 'country' + geo_where_part = {"$eq": geo_where_value} + elif geo_where == 'country': + geo_selection = 'region' + geo_where_part = {"$eq": geo_where_value} + elif geo_where == 'region': + geo_selection = 'province' + geo_where_part = {"$eq": geo_where_value} + elif geo_where == 'world': + geo_selection = 'geo_group' + geo_where_part = None + + where_part = {} + start_date = datetime.strptime(min_date, '%Y-%m-%d') + stop_date = datetime.strptime(max_date, '%Y-%m-%d') + where_part['c_coll_date_prec'] = {} + where_part['c_coll_date_prec']['$eq'] = 2 + where_part['collection_date'] = {} + where_part['collection_date']['$gte'] = start_date + where_part['collection_date']['$lte'] = stop_date + if geo_where_part is not None: + mongo_field = "location." + geo_where + where_part[mongo_field] = geo_where_part + + query = [] + + query_where = {"$match": where_part} + query.append(query_where) + + query_denominator = [query_where] + group_part_denominator = {} + + modified_field_denominator = "location." + geo_where + group_part_denominator["_id"] = {f"{geo_where}": f"${modified_field_denominator}"} + group_part_denominator["count"] = {"$sum": 1} + query_group_denominator = {"$group": group_part_denominator} + query_denominator.append(query_group_denominator) + + # print("query denominator", query_denominator) + results = collection_db.aggregate(query_denominator, allowDiskUse=True) + + denominator = 0 + for single_item in list(results): + for key in single_item: + if key == 'count': + denominator = single_item[key] + + group_part = {} + + modified_field_location = "location." + geo_selection + modified_field_lineage = translate_dictionary["lineage"] + group_part["_id"] = {"lineage": f"${modified_field_lineage}", f"{geo_selection}": f"${modified_field_location}"} + group_part["count"] = {"$sum": 1} + query_group = {"$group": group_part} + query.append(query_group) + + # print("query", query) + results = collection_db.aggregate(query, allowDiskUse=True) + + dict_lineage_copy = dict_lineage_mutation + + list_dict = [] + list_dict_dict = {} + for single_item in list(results): + for key in single_item: + if key == '_id': + for k in single_item[key]: + if single_item[key]['lineage'] is None or single_item[key]['lineage'] == 'None': + lineage_to_use = 'N/D' + else: + if single_item[key]['lineage'] in dict_lineage_copy and 'WHO label' in dict_lineage_copy[single_item[key]['lineage']] and dict_lineage_copy[single_item[key]['lineage']]['WHO label'] != '': + lineage_to_use = single_item[key]['lineage'] + ' (' + dict_lineage_copy[single_item[key]['lineage']]['WHO label'] + ') ' + else: + lineage_to_use = single_item[key]['lineage'] + if k == 'lineage': + if lineage_to_use not in list_dict_dict: + list_dict_dict[lineage_to_use] = {k: lineage_to_use} + else: + if single_item[key][k] is None: + list_dict_dict[lineage_to_use]['N/D'] = single_item['count'] + else: + list_dict_dict[lineage_to_use][single_item[key][k]] = single_item['count'] + + for item in list_dict_dict: + count = 0 + real_item = list_dict_dict[item] + for key in real_item: + if key != 'lineage': + count = count + real_item[key] + if (count / denominator) * 100 >= geo_min_count: + list_dict.append(real_item) + + # print("dict", list_dict) + return list_dict + + +@api.route('/denominatorLineageCountry') +class FieldList(Resource): + @api.doc('denominator_lineage_country') + def post(self): + filter_geo = api.payload + + geo_selection = 'country' + geo_where = filter_geo['type'] + min_date = filter_geo['minDate'] + max_date = filter_geo['maxDate'] + geo_where_value = filter_geo['value'] + + geo_where_part = None + if geo_where == 'geo_group': + geo_selection = 'country' + geo_where_part = {"$eq": geo_where_value} + elif geo_where == 'country': + geo_selection = 'region' + geo_where_part = {"$eq": geo_where_value} + elif geo_where == 'region': + geo_selection = 'province' + geo_where_part = {"$eq": geo_where_value} + elif geo_where == 'world': + geo_selection = 'geo_group' + geo_where_part = None + + where_part = {} + start_date = datetime.strptime(min_date, '%Y-%m-%d') + stop_date = datetime.strptime(max_date, '%Y-%m-%d') + where_part['c_coll_date_prec'] = {} + where_part['c_coll_date_prec']['$eq'] = 2 + where_part['collection_date'] = {} + where_part['collection_date']['$gte'] = start_date + where_part['collection_date']['$lte'] = stop_date + if geo_where_part is not None: + mongo_field = "location." + geo_where + where_part[mongo_field] = geo_where_part + + query = [] + + query_where = {"$match": where_part} + query.append(query_where) + + group_part = {} + + modified_field = "location." + geo_selection + group_part["_id"] = {f"{geo_selection}": f"${modified_field}"} + group_part["count"] = {"$sum": 1} + query_group = {"$group": group_part} + query.append(query_group) + + # print("query", query) + results = collection_db.aggregate(query, allowDiskUse=True) + + list_dict = {} + for single_item in list(results): + for key in single_item: + if key == '_id': + for k in single_item[key]: + list_dict[single_item[key][k]] = single_item['count'] + + result_dict = {} + for item in list_dict: + if item is None: + result_dict['N/D'] = list_dict[item] + else: + result_dict[item] = list_dict[item] + + # print("denominators", result_dict) + return result_dict + + +@api.route('/analyzeTimeDistributionCountryLineage') +class FieldList(Resource): + @api.doc('analyze_time_distribution_country_lineage') + def post(self): + to_use = api.payload + query_fields = to_use['query'] + + where_part = {} + start_date = datetime.strptime("2019-01-01", '%Y-%m-%d') + where_part['c_coll_date_prec'] = {} + where_part['c_coll_date_prec']['$eq'] = 2 + where_part['collection_date'] = {} + where_part['collection_date']['$gte'] = start_date + + if query_fields is not None: + for key in query_fields: + if key == 'minDate': + start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') + where_part['collection_date']['$gte'] = start_date + elif key == 'maxDate': + stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') + where_part['collection_date']['$lte'] = stop_date + + elif key == 'toExclude': + for fieldToExclude in query_fields[key]: + if '$and' not in where_part: + where_part['$and'] = [] + + single_where_part = {'$and': []} + for geoToExclude in query_fields[key][fieldToExclude]: + real_field_to_exclude = fieldToExclude + if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \ + or fieldToExclude == 'region' or fieldToExclude == 'province': + real_field_to_exclude = 'location.' + fieldToExclude + specific_and = {} + geo_value = geoToExclude # .replace("'", "''") + specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value} + single_where_part['$and'].append(specific_and) + where_part['$and'].append(single_where_part) + + elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + if '$and' not in where_part: + where_part['$and'] = [] + + real_key = key + if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + real_key = 'location.' + key + if isinstance(query_fields[key], list): + single_where_part_or = {'$or': []} + for itm in query_fields[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part['$and'].append(single_where_part_or) + else: + single_where_part_or = {'$or': []} + replace_fields_value = query_fields[key] # .replace("'", "''") + specific_or = {f'{real_key}': {'$eq': replace_fields_value}} + single_where_part_or['$or'].append(specific_or) + where_part['$and'].append(single_where_part_or) + + else: + real_key = key + if key in translate_dictionary: + real_key = translate_dictionary[key] + replace_fields_value = query_fields[key] + if key != 'start_aa_original': + replace_fields_value = query_fields[key] # .replace("'", "''") + if real_key not in where_part: + where_part[real_key] = {} + where_part[real_key]['$eq'] = replace_fields_value + + query = [] + + query_where = {"$match": where_part} + query.append(query_where) + + group_part = {} + real_field = translate_dictionary['collection_date'] + # group_part["_id"] = {"name": f"${real_field}"} + group_part["_id"] = {"name": {"$toString": '$collection_date'}} + group_part["count"] = {"$sum": 1} + query_group = {"$group": group_part} + query.append(query_group) + + sort_part = {"_id": 1} + query_sort = {"$sort": sort_part} + query.append(query_sort) + + # print("query", query) + results = collection_db.aggregate(query, allowDiskUse=True) + + list_dict = [] + for single_item in list(results): + single_item_remodel = {} + for key in single_item: + if key == '_id': + single_item_remodel['name'] = single_item['_id']['name'].split("T")[0] + else: + single_item_remodel['value'] = single_item['count'] + list_dict.append(single_item_remodel) + + return list_dict + + +@api.route('/analyzeTimeDistributionBackgroundQueryGeo') +class FieldList(Resource): + @api.doc('analyze_time_distribution_country_lineage') + def post(self): + to_use = api.payload + query_fields = to_use['query'] + query_false = to_use['query_false'] + + where_part = {} + start_date = datetime.strptime("2019-01-01", '%Y-%m-%d') + where_part['c_coll_date_prec'] = {} + where_part['c_coll_date_prec']['$eq'] = 2 + where_part['collection_date'] = {} + where_part['collection_date']['$gte'] = start_date + + if query_fields is not None: + for key in query_fields: + if key == 'minDate': + start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') + where_part['collection_date']['$gte'] = start_date + elif key == 'maxDate': + stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') + where_part['collection_date']['$lte'] = stop_date + + elif key == 'toExclude': + for fieldToExclude in query_fields[key]: + if '$and' not in where_part: + where_part['$and'] = [] + + single_where_part = {'$and': []} + for geoToExclude in query_fields[key][fieldToExclude]: + real_field_to_exclude = fieldToExclude + if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \ + or fieldToExclude == 'region' or fieldToExclude == 'province': + real_field_to_exclude = 'location.' + fieldToExclude + specific_and = {} + geo_value = geoToExclude # .replace("'", "''") + specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value} + single_where_part['$and'].append(specific_and) + where_part['$and'].append(single_where_part) + + elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + if '$and' not in where_part: + where_part['$and'] = [] + + real_key = key + if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + real_key = 'location.' + key + if key == query_false: + single_where_part_or = {'$or': []} + specific_or = {f'{real_key}': {'$eq': None}} + single_where_part_or['$or'].append(specific_or) + specific_or = {f'{real_key}': {'$ne': query_fields[key]}} + single_where_part_or['$or'].append(specific_or) + where_part['$and'].append(single_where_part_or) + else: + if isinstance(query_fields[key], list): + single_where_part_or = {'$or': []} + for itm in query_fields[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part['$and'].append(single_where_part_or) + else: + single_where_part_or = {'$or': []} + replace_fields_value = query_fields[key] # .replace("'", "''") + specific_or = {f'{real_key}': {'$eq': replace_fields_value}} + single_where_part_or['$or'].append(specific_or) + where_part['$and'].append(single_where_part_or) + + else: + real_key = key + if key == query_false: + single_where_part_or = {'$or': []} + specific_or = {f'{real_key}': {'$eq': None}} + single_where_part_or['$or'].append(specific_or) + specific_or = {f'{real_key}': {'$ne': query_fields[key]}} + single_where_part_or['$or'].append(specific_or) + where_part['$and'].append(single_where_part_or) + else: + if key in translate_dictionary: + real_key = translate_dictionary[key] + replace_fields_value = query_fields[key] + if key != 'start_aa_original': + replace_fields_value = query_fields[key] # .replace("'", "''") + if real_key not in where_part: + where_part[real_key] = {} + where_part[real_key]['$eq'] = replace_fields_value + + query = [] + + query_where = {"$match": where_part} + query.append(query_where) + + group_part = {} + real_field = translate_dictionary['collection_date'] + # group_part["_id"] = {"name": f"${real_field}"} + group_part["_id"] = {"name": {"$toString": '$collection_date'}} + group_part["count"] = {"$sum": 1} + query_group = {"$group": group_part} + query.append(query_group) + + sort_part = {"_id": 1} + query_sort = {"$sort": sort_part} + query.append(query_sort) + + # print("query", query) + results = collection_db.aggregate(query, allowDiskUse=True) + + list_dict = [] + for single_item in list(results): + single_item_remodel = {} + for key in single_item: + if key == '_id': + single_item_remodel['name'] = single_item['_id']['name'].split("T")[0] + else: + single_item_remodel['value'] = single_item['count'] + list_dict.append(single_item_remodel) + + return list_dict + + +@api.route('/analyzeMutationCountryLineageInTime') +class FieldList(Resource): + @api.doc('analyze_mutation_country_lineage_in_time') + def post(self): + payload = api.payload + start_target_time = payload['start_target'] # '2021-03-31' + end_target_time = payload['end_target'] # '2021-06-31' + start_background_time = payload['start_background'] # '2019-01-31' + end_background_time = payload['end_background'] # '2021-03-31' + array_protein = payload['protein'] # ['Spike (surface glycoprotein)'] + + query_fields = payload['query'] + + if 'lineage' in query_fields: + lineage = query_fields['lineage'] + else: + lineage = 'empty' + if 'province' in query_fields: + geo1 = query_fields['province'] + elif 'region' in query_fields: + geo1 = query_fields['region'] + elif 'country' in query_fields: + geo1 = query_fields['country'] + elif 'geo_group' in query_fields: + geo1 = query_fields['geo_group'] + else: + geo1 = 'empty' + + array_result = [] + + where_part_target = {} + where_part_background = {} + where_part_target_denominator = {} + where_part_background_denominator = {} + start_date_target = datetime.strptime(start_target_time, '%Y-%m-%d') + end_date_target = datetime.strptime(end_target_time, '%Y-%m-%d') + start_date_background = datetime.strptime(start_background_time, '%Y-%m-%d') + end_date_background = datetime.strptime(end_background_time, '%Y-%m-%d') + where_part_target['c_coll_date_prec'] = {} + where_part_target['c_coll_date_prec']['$eq'] = 2 + where_part_background['c_coll_date_prec'] = {} + where_part_background['c_coll_date_prec']['$eq'] = 2 + where_part_target_denominator['c_coll_date_prec'] = {} + where_part_target_denominator['c_coll_date_prec']['$eq'] = 2 + where_part_background_denominator['c_coll_date_prec'] = {} + where_part_background_denominator['c_coll_date_prec']['$eq'] = 2 + + where_part_target['collection_date'] = {} + where_part_target['collection_date']['$gte'] = start_date_target + where_part_target['collection_date']['$lte'] = end_date_target + where_part_background['collection_date'] = {} + where_part_background['collection_date']['$gte'] = start_date_background + where_part_background['collection_date']['$lte'] = end_date_background + where_part_target_denominator['collection_date'] = {} + where_part_target_denominator['collection_date']['$gte'] = start_date_target + where_part_target_denominator['collection_date']['$lte'] = end_date_target + where_part_background_denominator['collection_date'] = {} + where_part_background_denominator['collection_date']['$gte'] = start_date_background + where_part_background_denominator['collection_date']['$lte'] = end_date_background + + protein_length = len(array_protein) + if protein_length > 0: + if '$and' not in where_part_target: + where_part_target['$and'] = [] + if '$and' not in where_part_background: + where_part_background['$and'] = [] + single_where_part_or = {'$or': []} + for protein in array_protein: + specific_or = {} + real_key = translate_dictionary['product'] + specific_or[f'{real_key}'] = {'$eq': protein} + single_where_part_or['$or'].append(specific_or) + where_part_target['$and'].append(single_where_part_or) + where_part_background['$and'].append(single_where_part_or) + + if query_fields is not None: + for key in query_fields: + if key == 'minDate': + start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') + where_part_target['collection_date']['$gte'] = start_date + where_part_background['collection_date']['$gte'] = start_date + where_part_target_denominator['collection_date']['$gte'] = start_date + where_part_background_denominator['collection_date']['$gte'] = start_date + elif key == 'maxDate': + stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') + where_part_target['collection_date']['$lte'] = stop_date + where_part_background['collection_date']['$lte'] = stop_date + where_part_target_denominator['collection_date']['$lte'] = stop_date + where_part_background_denominator['collection_date']['$lte'] = stop_date + + elif key == 'toExclude': + for fieldToExclude in query_fields[key]: + if '$and' not in where_part_target: + where_part_target['$and'] = [] + if '$and' not in where_part_background: + where_part_background['$and'] = [] + if '$and' not in where_part_target_denominator: + where_part_target_denominator['$and'] = [] + if '$and' not in where_part_background_denominator: + where_part_background_denominator['$and'] = [] + + single_where_part = {'$and': []} + for geoToExclude in query_fields[key][fieldToExclude]: + real_field_to_exclude = fieldToExclude + if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \ + or fieldToExclude == 'region' or fieldToExclude == 'province': + real_field_to_exclude = 'location.' + fieldToExclude + specific_and = {} + geo_value = geoToExclude # .replace("'", "''") + specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value} + single_where_part['$and'].append(specific_and) + where_part_target['$and'].append(single_where_part) + where_part_background['$and'].append(single_where_part) + where_part_target_denominator['$and'].append(single_where_part) + where_part_background_denominator['$and'].append(single_where_part) + + elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + if '$and' not in where_part_target: + where_part_target['$and'] = [] + if '$and' not in where_part_background: + where_part_background['$and'] = [] + if '$and' not in where_part_target_denominator: + where_part_target_denominator['$and'] = [] + if '$and' not in where_part_background_denominator: + where_part_background_denominator['$and'] = [] + + real_key = key + if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + real_key = 'location.' + key + if isinstance(query_fields[key], list): + single_where_part_or = {'$or': []} + for itm in query_fields[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part_target['$and'].append(single_where_part_or) + where_part_background['$and'].append(single_where_part_or) + where_part_target_denominator['$and'].append(single_where_part_or) + where_part_background_denominator['$and'].append(single_where_part_or) + else: + single_where_part_or = {'$or': []} + replace_fields_value = query_fields[key] # .replace("'", "''") + specific_or = {f'{real_key}': {'$eq': replace_fields_value}} + single_where_part_or['$or'].append(specific_or) + where_part_target['$and'].append(single_where_part_or) + where_part_background['$and'].append(single_where_part_or) + where_part_target_denominator['$and'].append(single_where_part_or) + where_part_background_denominator['$and'].append(single_where_part_or) + + else: + real_key = key + if key in translate_dictionary: + real_key = translate_dictionary[key] + replace_fields_value = query_fields[key] + if key != 'start_aa_original': + replace_fields_value = query_fields[key] # .replace("'", "''") + if real_key not in where_part_target: + where_part_target[real_key] = {} + if real_key not in where_part_background: + where_part_background[real_key] = {} + if real_key not in where_part_target_denominator: + where_part_target_denominator[real_key] = {} + if real_key not in where_part_background_denominator: + where_part_background_denominator[real_key] = {} + where_part_target[real_key]['$eq'] = replace_fields_value + where_part_background[real_key]['$eq'] = replace_fields_value + where_part_target_denominator[real_key]['$eq'] = replace_fields_value + where_part_background_denominator[real_key]['$eq'] = replace_fields_value + + query_target = [] + query_background = [] + query_target_denominator = [] + query_background_denominator = [] + + query_unwind_target = {"$unwind": "$muts"} + query_target.append(query_unwind_target) + query_unwind_background = {"$unwind": "$muts"} + query_background.append(query_unwind_background) + + query_where_target = {"$match": where_part_target} + query_target.append(query_where_target) + query_where_background = {"$match": where_part_background} + query_background.append(query_where_background) + query_where_target_denominator = {"$match": where_part_target_denominator} + query_target_denominator.append(query_where_target_denominator) + query_where_background_denominator = {"$match": where_part_background_denominator} + query_background_denominator.append(query_where_background_denominator) + + group_part = {"_id": {}} + real_field = translate_dictionary['product'] + group_part["_id"]["product"] = f"${real_field}" + real_field = translate_dictionary['start_aa_original'] + group_part["_id"]["start_aa_original"] = f"${real_field}" + real_field = translate_dictionary['sequence_aa_original'] + group_part["_id"]["sequence_aa_original"] = f"${real_field}" + real_field = translate_dictionary['sequence_aa_alternative'] + group_part["_id"]["sequence_aa_alternative"] = f"${real_field}" + group_part["count"] = {"$sum": 1} + query_group = {"$group": group_part} + query_target.append(query_group) + query_background.append(query_group) + + group_part_denominator = {"_id": {}} + group_part_denominator["count"] = {"$sum": 1} + query_group = {"$group": group_part_denominator} + query_target_denominator.append(query_group) + query_background_denominator.append(query_group) + + sort_part = {"count": -1} + query_sort = {"$sort": sort_part} + query_target.append(query_sort) + query_background.append(query_sort) + + # print("query target", query_target) + # print("query target denominator", query_target_denominator) + # print("query background", query_background) + # print("query background denominator", query_background_denominator) + + results_target = collection_db.aggregate(query_target, allowDiskUse=True) + results_background = collection_db.aggregate(query_background, allowDiskUse=True) + # results_target_denominator = collection_db.aggregate(query_target_denominator, allowDiskUse=True) + # results_background_denominator = collection_db.aggregate(query_background_denominator, allowDiskUse=True) + results_target_denominator = collection_db.count_documents(where_part_target_denominator) + results_background_denominator = collection_db.count_documents(where_part_background_denominator) + + denominator_country = results_target_denominator + # for single_item in list(results_target_denominator): + # denominator_country = single_item['count'] + + denominator = results_background_denominator + # for single_item in list(results_background_denominator): + # denominator = single_item['count'] + + list_dict_target = [] + for single_item in list(results_target): + single_item_remodel = {} + for key in single_item: + if key == '_id': + for k in single_item[key]: + single_item_remodel[k] = single_item[key][k] + else: + single_item_remodel['total'] = single_item['count'] + list_dict_target.append(single_item_remodel) + + list_dict_background = [] + for single_item in list(results_background): + single_item_remodel = {} + for key in single_item: + if key == '_id': + for k in single_item[key]: + single_item_remodel[k] = single_item[key][k] + else: + single_item_remodel['total'] = single_item['count'] + list_dict_background.append(single_item_remodel) + + for item in list_dict_target: + numerator = 0 + for item2 in list_dict_background: + if item['start_aa_original'] == item2['start_aa_original'] \ + and item['sequence_aa_original'] == item2['sequence_aa_original'] \ + and item['sequence_aa_alternative'] == item2['sequence_aa_alternative'] \ + and item['product'] == item2['product']: + numerator = item2['total'] + + if denominator == 0: + fraction = 0 + else: + fraction = (numerator / denominator) + if denominator_country == 0: + fraction_target = 0 + else: + fraction_target = (item['total'] / denominator_country) + + single_line = {'lineage': lineage, 'country': geo1, 'count_seq': item['total'], + 'target_time': start_target_time + ' / ' + end_target_time, + 'background_time': start_background_time + ' / ' + end_background_time, + 'start_aa_original': item['start_aa_original'], + 'product': item['product'], + 'sequence_aa_original': item['sequence_aa_original'], + 'sequence_aa_alternative': item['sequence_aa_alternative'], + 'numerator': numerator, + 'denominator': denominator, + 'fraction': fraction * 100, + 'denominator_target': denominator_country, + 'fraction_target': fraction_target * 100} + + array_result.append(single_line) + + all_result = array_result + + mutation_table2 = [] + arr_p_values = [] + for item in all_result: + single_item = {} + if item['product'] == 'Spike (surface glycoprotein)': + protein = item['product'].split(" ", 1)[0] + mutation = protein + '_' + # mutation = 'S_' + else: + protein = item['product'].split(" ", 1)[0] + mutation = protein + '_' + mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative'] + single_item['mutation'] = mutation + single_item['start_aa_original'] = item['start_aa_original'] + single_item['sequence_aa_original'] = item['sequence_aa_original'] + single_item['sequence_aa_alternative'] = item['sequence_aa_alternative'] + single_item['product'] = item['product'] + single_item['mutation_position'] = item['start_aa_original'] + single_item['target'] = item['target_time'] + single_item['background'] = item['background_time'] + single_item['country'] = item['country'] + single_item['lineage'] = item['lineage'] + single_item['count_target'] = item['count_seq'] + single_item['percentage_background'] = item['fraction'] + single_item['numerator_background'] = item['numerator'] + single_item['denominator_background'] = item['denominator'] + single_item['percentage_target'] = item['fraction_target'] + single_item['numerator_target'] = item['count_seq'] + single_item['denominator_target'] = item['denominator_target'] + + epsilon = 0.00000001 + single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \ + (single_item['percentage_background'] + epsilon) + + if single_item['odd_ratio'] >= 1: + # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'], + # item['numerator'] / item['denominator']) + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 + else: + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item['numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p + else: + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 + else: + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item['numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p + + arr_p_values.append(single_item['p_value']) + mutation_table2.append(single_item) + + a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni') + + i = 0 + for item in mutation_table2: + item['pvalue'] = new_p_values[i] + i = i + 1 + + return mutation_table2 + + +@api.route('/analyzeMutationProvinceRegion') +class FieldList(Resource): + @api.doc('analyze_mutation_province_region') + def post(self): + payload = api.payload + array_protein = payload['protein'] + query_fields = payload['query'] + toExcludeBackground = payload['toExcludeBackground'] + + if 'province' in query_fields: + target = query_fields['province'] + target_key = 'province' + if 'region' in query_fields: + background = query_fields['region'] + elif 'country' in query_fields: + background = query_fields['country'] + elif 'geo_group' in query_fields: + background = query_fields['geo_group'] + else: + background = 'World' + elif 'region' in query_fields: + target = query_fields['region'] + target_key = 'region' + if 'country' in query_fields: + background = query_fields['country'] + elif 'geo_group' in query_fields: + background = query_fields['geo_group'] + else: + background = 'World' + elif 'country' in query_fields: + target = query_fields['country'] + target_key = 'country' + if 'geo_group' in query_fields: + background = query_fields['geo_group'] + else: + background = 'World' + elif 'geo_group' in query_fields: + target = query_fields['geo_group'] + target_key = 'geo_group' + background = 'World' + else: + target = 'empty' + target_key = 'empty' + background = 'empty' + + if 'lineage' in query_fields: + lineage = query_fields['lineage'] + else: + lineage = 'empty' + + array_result = [] + + where_part_target = {} + where_part_background = {} + where_part_target_denominator = {} + where_part_background_denominator = {} + start_date_target = datetime.strptime("2019-01-01", '%Y-%m-%d') + start_date_background = datetime.strptime("2019-01-01", '%Y-%m-%d') + where_part_target['c_coll_date_prec'] = {} + where_part_target['c_coll_date_prec']['$eq'] = 2 + where_part_background['c_coll_date_prec'] = {} + where_part_background['c_coll_date_prec']['$eq'] = 2 + where_part_target_denominator['c_coll_date_prec'] = {} + where_part_target_denominator['c_coll_date_prec']['$eq'] = 2 + where_part_background_denominator['c_coll_date_prec'] = {} + where_part_background_denominator['c_coll_date_prec']['$eq'] = 2 + + where_part_target['collection_date'] = {} + where_part_target['collection_date']['$gte'] = start_date_target + where_part_background['collection_date'] = {} + where_part_background['collection_date']['$gte'] = start_date_background + where_part_target_denominator['collection_date'] = {} + where_part_target_denominator['collection_date']['$gte'] = start_date_target + where_part_background_denominator['collection_date'] = {} + where_part_background_denominator['collection_date']['$gte'] = start_date_background + + protein_length = len(array_protein) + if protein_length > 0: + if '$and' not in where_part_target: + where_part_target['$and'] = [] + if '$and' not in where_part_background: + where_part_background['$and'] = [] + single_where_part_or = {'$or': []} + for protein in array_protein: + specific_or = {} + real_key = translate_dictionary['product'] + specific_or[f'{real_key}'] = {'$eq': protein} + single_where_part_or['$or'].append(specific_or) + where_part_target['$and'].append(single_where_part_or) + where_part_background['$and'].append(single_where_part_or) + + if query_fields is not None: + for key in query_fields: + if key == 'minDate': + start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') + where_part_target['collection_date']['$gte'] = start_date + where_part_background['collection_date']['$gte'] = start_date + where_part_target_denominator['collection_date']['$gte'] = start_date + where_part_background_denominator['collection_date']['$gte'] = start_date + elif key == 'maxDate': + stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') + where_part_target['collection_date']['$lte'] = stop_date + where_part_background['collection_date']['$lte'] = stop_date + where_part_target_denominator['collection_date']['$lte'] = stop_date + where_part_background_denominator['collection_date']['$lte'] = stop_date + + elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + if '$and' not in where_part_target: + where_part_target['$and'] = [] + if '$and' not in where_part_background: + where_part_background['$and'] = [] + if '$and' not in where_part_target_denominator: + where_part_target_denominator['$and'] = [] + if '$and' not in where_part_background_denominator: + where_part_background_denominator['$and'] = [] + + real_key = key + if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + real_key = 'location.' + key + if isinstance(query_fields[key], list): + single_where_part_or_target = {'$or': []} + single_where_part_or_background = {'$or': []} + for itm in query_fields[key]: + specific_or_target = {} + specific_or_background = {} + field_value = itm # .replace("'", "''") + if key == target_key: + specific_or_target[f'{real_key}'] = {'$eq': field_value} + specific_or_background[f'{real_key}'] = {'$ne': field_value} + else: + specific_or_target[f'{real_key}'] = {'$eq': field_value} + specific_or_background[f'{real_key}'] = {'$eq': field_value} + single_where_part_or_target['$or'].append(specific_or_target) + single_where_part_or_background['$or'].append(specific_or_background) + where_part_target['$and'].append(single_where_part_or_target) + where_part_background['$and'].append(single_where_part_or_background) + where_part_target_denominator['$and'].append(single_where_part_or_target) + where_part_background_denominator['$and'].append(single_where_part_or_background) + else: + single_where_part_or_target = {'$or': []} + single_where_part_or_background = {'$or': []} + replace_fields_value = query_fields[key] # .replace("'", "''") + if key == target_key: + specific_or_target = {f'{real_key}': {'$eq': replace_fields_value}} + specific_or_background = {f'{real_key}': {'$ne': replace_fields_value}} + else: + specific_or_target = {f'{real_key}': {'$eq': replace_fields_value}} + specific_or_background = {f'{real_key}': {'$eq': replace_fields_value}} + single_where_part_or_target['$or'].append(specific_or_target) + single_where_part_or_background['$or'].append(specific_or_background) + where_part_target['$and'].append(single_where_part_or_target) + where_part_background['$and'].append(single_where_part_or_background) + where_part_target_denominator['$and'].append(single_where_part_or_target) + where_part_background_denominator['$and'].append(single_where_part_or_background) + + else: + real_key = key + if key in translate_dictionary: + real_key = translate_dictionary[key] + replace_fields_value = query_fields[key] + if key != 'start_aa_original': + replace_fields_value = query_fields[key] # .replace("'", "''") + if real_key not in where_part_target: + where_part_target[real_key] = {} + if real_key not in where_part_background: + where_part_background[real_key] = {} + if real_key not in where_part_target_denominator: + where_part_target_denominator[real_key] = {} + if real_key not in where_part_background_denominator: + where_part_background_denominator[real_key] = {} + where_part_target[real_key]['$eq'] = replace_fields_value + where_part_target_denominator[real_key]['$eq'] = replace_fields_value + if key == target_key: + where_part_background[real_key]['$ne'] = replace_fields_value + where_part_background_denominator[real_key]['$ne'] = replace_fields_value + else: + where_part_background[real_key]['$eq'] = replace_fields_value + where_part_background_denominator[real_key]['$eq'] = replace_fields_value + + for fieldToExclude in toExcludeBackground: + single_where_part_and_background = {'$and': []} + for geoToExclude in toExcludeBackground[fieldToExclude]: + specific_and_background = {} + geo_value = geoToExclude # .replace("'", "''") + if fieldToExclude == 'geo_group' or fieldToExclude == 'country' or fieldToExclude == 'region' or fieldToExclude == 'province': + fieldToExclude = 'location.' + fieldToExclude + specific_and_background[f'{fieldToExclude}'] = {'$ne': f"{geo_value}"} + single_where_part_and_background['$and'].append(specific_and_background) + where_part_background['$and'].append(single_where_part_and_background) + where_part_background_denominator['$and'].append(single_where_part_and_background) + + query_target = [] + query_background = [] + query_target_denominator = [] + query_background_denominator = [] + + query_unwind_target = {"$unwind": "$muts"} + query_target.append(query_unwind_target) + query_unwind_background = {"$unwind": "$muts"} + query_background.append(query_unwind_background) + + query_where_target = {"$match": where_part_target} + query_target.append(query_where_target) + query_where_background = {"$match": where_part_background} + query_background.append(query_where_background) + query_where_target_denominator = {"$match": where_part_target_denominator} + query_target_denominator.append(query_where_target_denominator) + query_where_background_denominator = {"$match": where_part_background_denominator} + query_background_denominator.append(query_where_background_denominator) + + group_part = {"_id": {}} + real_field = translate_dictionary['product'] + group_part["_id"]["product"] = f"${real_field}" + real_field = translate_dictionary['start_aa_original'] + group_part["_id"]["start_aa_original"] = f"${real_field}" + real_field = translate_dictionary['sequence_aa_original'] + group_part["_id"]["sequence_aa_original"] = f"${real_field}" + real_field = translate_dictionary['sequence_aa_alternative'] + group_part["_id"]["sequence_aa_alternative"] = f"${real_field}" + group_part["count"] = {"$sum": 1} + query_group = {"$group": group_part} + query_target.append(query_group) + query_background.append(query_group) + + group_part_denominator = {"_id": {}} + group_part_denominator["count"] = {"$sum": 1} + query_group = {"$group": group_part_denominator} + query_target_denominator.append(query_group) + query_background_denominator.append(query_group) + + sort_part = {"count": -1} + query_sort = {"$sort": sort_part} + query_target.append(query_sort) + query_background.append(query_sort) + + # print("query target", query_target) + # print("query target denominator", query_target_denominator) + # print("query background", query_background) + # print("query background denominator", query_background_denominator) + + results_target = collection_db.aggregate(query_target, allowDiskUse=True) + results_background = collection_db.aggregate(query_background, allowDiskUse=True) + # results_target_denominator = collection_db.aggregate(query_target_denominator, allowDiskUse=True) + # results_background_denominator = collection_db.aggregate(query_background_denominator, allowDiskUse=True) + results_target_denominator = collection_db.count_documents(where_part_target_denominator) + results_background_denominator = collection_db.count_documents(where_part_background_denominator) + + denominator_target = results_target_denominator + # for single_item in list(results_target_denominator): + # denominator_target = single_item['count'] + + denominator = results_background_denominator + # for single_item in list(results_background_denominator): + # denominator = single_item['count'] + + list_dict_target = [] + for single_item in list(results_target): + single_item_remodel = {} + for key in single_item: + if key == '_id': + for k in single_item[key]: + single_item_remodel[k] = single_item[key][k] + else: + single_item_remodel['total'] = single_item['count'] + list_dict_target.append(single_item_remodel) + + list_dict_background = [] + for single_item in list(results_background): + single_item_remodel = {} + for key in single_item: + if key == '_id': + for k in single_item[key]: + single_item_remodel[k] = single_item[key][k] + else: + single_item_remodel['total'] = single_item['count'] + list_dict_background.append(single_item_remodel) + + for item in list_dict_target: + numerator = 0 + for item2 in list_dict_background: + if item['start_aa_original'] == item2['start_aa_original'] \ + and item['sequence_aa_original'] == item2['sequence_aa_original'] \ + and item['sequence_aa_alternative'] == item2['sequence_aa_alternative'] \ + and item['product'] == item2['product']: + numerator = item2['total'] + + if denominator == 0: + fraction = 0 + else: + fraction = (numerator / denominator) + if denominator_target == 0: + fraction_target = 0 + else: + fraction_target = (item['total'] / denominator_target) + + single_line = {'lineage': lineage, 'target': target, 'background': background, + 'count_seq': item['total'], + 'product': item['product'], + 'start_aa_original': item['start_aa_original'], + 'sequence_aa_original': item['sequence_aa_original'], + 'sequence_aa_alternative': item['sequence_aa_alternative'], + 'numerator': numerator, + 'denominator': denominator, + 'fraction': fraction * 100, + 'denominator_target': denominator_target, + 'fraction_target': fraction_target * 100} + + array_result.append(single_line) + + all_result = array_result + + mutation_table2 = [] + arr_p_values = [] + for item in all_result: + single_item = {} + if item['product'] == 'Spike (surface glycoprotein)': + protein = item['product'].split(" ", 1)[0] + mutation = protein + '_' + # mutation = 'S_' + else: + protein = item['product'].split(" ", 1)[0] + mutation = protein + '_' + mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative'] + single_item['start_aa_original'] = item['start_aa_original'] + single_item['sequence_aa_original'] = item['sequence_aa_original'] + single_item['sequence_aa_alternative'] = item['sequence_aa_alternative'] + single_item['mutation'] = mutation + single_item['product'] = item['product'] + single_item['mutation_position'] = item['start_aa_original'] + # if 'country' in item: + # single_item['target'] = item['region'] + # single_item['background'] = item['country'] + # else: + # single_item['target'] = item['province'] + # single_item['background'] = item['region'] + single_item['target'] = item['target'] + single_item['background'] = item['background'] + + single_item['lineage'] = item['lineage'] + single_item['count_target'] = item['count_seq'] + single_item['percentage_background'] = item['fraction'] + single_item['numerator_background'] = item['numerator'] + single_item['denominator_background'] = item['denominator'] + single_item['percentage_target'] = item['fraction_target'] + single_item['numerator_target'] = item['count_seq'] + single_item['denominator_target'] = item['denominator_target'] + + epsilon = 0.00000001 + single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \ + (single_item['percentage_background'] + epsilon) + + if single_item['odd_ratio'] >= 1: + # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'], + # item['numerator'] / item['denominator']) + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 + else: + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item['numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p + else: + # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'], + # item['numerator'] / item['denominator']) + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 + else: + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item['numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p + + arr_p_values.append(single_item['p_value']) + mutation_table2.append(single_item) + + a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni') + + i = 0 + for item in mutation_table2: + item['pvalue'] = new_p_values[i] + i = i + 1 + + return mutation_table2 + + +@api.route('/analyzeMutationTargetBackgroundFree') +class FieldList(Resource): + @api.doc('analyze_mutation_target_background_free') + def post(self): + + payload = api.payload + array_protein = payload['protein'] + query_target = payload['query_target'] + query_background = payload['query_background'] + remove_overlapping = payload['removeOverlapping'] + + target = 'empty' + background = 'empty' + + if 'lineage' in query_target: + lineage_target = query_target['lineage'] + else: + lineage_target = 'empty' + if 'lineage' in query_background: + lineage_background = query_background['lineage'] + else: + lineage_background = 'empty' + + array_result = [] + + where_part_target = {} + where_part_background = {} + where_part_target_denominator = {} + where_part_background_denominator = {} + where_part_target_overlapping = {} + where_part_background_overlapping = {} + start_date_target = datetime.strptime("2019-01-01", '%Y-%m-%d') + start_date_background = datetime.strptime("2019-01-01", '%Y-%m-%d') + + if 'accession_id' not in query_target: + where_part_target['c_coll_date_prec'] = {} + where_part_target['c_coll_date_prec']['$eq'] = 2 + where_part_target_denominator['c_coll_date_prec'] = {} + where_part_target_denominator['c_coll_date_prec']['$eq'] = 2 + where_part_target_overlapping['c_coll_date_prec'] = {} + where_part_target_overlapping['c_coll_date_prec']['$eq'] = 2 + if 'accession_id' not in query_background: + where_part_background['c_coll_date_prec'] = {} + where_part_background['c_coll_date_prec']['$eq'] = 2 + where_part_background_denominator['c_coll_date_prec'] = {} + where_part_background_denominator['c_coll_date_prec']['$eq'] = 2 + where_part_background_overlapping['c_coll_date_prec'] = {} + where_part_background_overlapping['c_coll_date_prec']['$eq'] = 2 + + where_part_target['collection_date'] = {} + where_part_target['collection_date']['$gte'] = start_date_target + where_part_target_denominator['collection_date'] = {} + where_part_target_denominator['collection_date']['$gte'] = start_date_target + where_part_target_overlapping['collection_date'] = {} + where_part_target_overlapping['collection_date']['$gte'] = start_date_target + where_part_background['collection_date'] = {} + where_part_background['collection_date']['$gte'] = start_date_background + where_part_background_denominator['collection_date'] = {} + where_part_background_denominator['collection_date']['$gte'] = start_date_background + where_part_background_overlapping['collection_date'] = {} + where_part_background_overlapping['collection_date']['$gte'] = start_date_background + + protein_length = len(array_protein) + if protein_length > 0: + if '$and' not in where_part_target: + where_part_target['$and'] = [] + if '$and' not in where_part_background: + where_part_background['$and'] = [] + single_where_part_or = {'$or': []} + for protein in array_protein: + specific_or = {} + real_key = translate_dictionary['product'] + specific_or[f'{real_key}'] = {'$eq': protein} + single_where_part_or['$or'].append(specific_or) + where_part_target['$and'].append(single_where_part_or) + where_part_background['$and'].append(single_where_part_or) + + if query_target is not None: + for key in query_target: + if key == 'minDate': + start_date = datetime.strptime(f"{query_target[key]}", '%Y-%m-%d') + where_part_target['collection_date']['$gte'] = start_date + where_part_target_denominator['collection_date']['$gte'] = start_date + where_part_target_overlapping['collection_date']['$gte'] = start_date + elif key == 'maxDate': + stop_date = datetime.strptime(f"{query_target[key]}", '%Y-%m-%d') + where_part_target['collection_date']['$lte'] = stop_date + where_part_target_denominator['collection_date']['$lte'] = stop_date + where_part_target_overlapping['collection_date']['$lte'] = stop_date + + elif key == 'toExclude': + for fieldToExclude in query_target[key]: + if '$and' not in where_part_target: + where_part_target['$and'] = [] + if '$and' not in where_part_target_denominator: + where_part_target_denominator['$and'] = [] + if '$and' not in where_part_target_overlapping: + where_part_target_overlapping['$and'] = [] + + if len(query_target[key][fieldToExclude]) > 0: + single_where_part = {'$and': []} + for geoToExclude in query_target[key][fieldToExclude]: + real_field_to_exclude = fieldToExclude + if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \ + or fieldToExclude == 'region' or fieldToExclude == 'province': + real_field_to_exclude = 'location.' + fieldToExclude + specific_and = {} + geo_value = geoToExclude # .replace("'", "''") + specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value} + single_where_part['$and'].append(specific_and) + where_part_target['$and'].append(single_where_part) + where_part_target_denominator['$and'].append(single_where_part) + where_part_target_overlapping['$and'].append(single_where_part) + + elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + if '$and' not in where_part_target: + where_part_target['$and'] = [] + if '$and' not in where_part_target_denominator: + where_part_target_denominator['$and'] = [] + if '$and' not in where_part_target_overlapping: + where_part_target_overlapping['$and'] = [] + + real_key = key + if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + real_key = 'location.' + key + if isinstance(query_target[key], list): + single_where_part_or = {'$or': []} + for itm in query_target[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part_target['$and'].append(single_where_part_or) + where_part_target_denominator['$and'].append(single_where_part_or) + where_part_target_overlapping['$and'].append(single_where_part_or) + else: + single_where_part_or = {'$or': []} + replace_fields_value = query_target[key] # .replace("'", "''") + specific_or = {f'{real_key}': {'$eq': replace_fields_value}} + single_where_part_or['$or'].append(specific_or) + where_part_target['$and'].append(single_where_part_or) + where_part_target_denominator['$and'].append(single_where_part_or) + where_part_target_overlapping['$and'].append(single_where_part_or) + + else: + real_key = key + if key in translate_dictionary: + real_key = translate_dictionary[key] + if isinstance(query_target[key], list): + if '$and' not in where_part_target: + where_part_target['$and'] = [] + if '$and' not in where_part_target_denominator: + where_part_target_denominator['$and'] = [] + if '$and' not in where_part_target_overlapping: + where_part_target_overlapping['$and'] = [] + single_where_part_or = {'$or': []} + for itm in query_target[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part_target['$and'].append(single_where_part_or) + where_part_target_denominator['$and'].append(single_where_part_or) + where_part_target_overlapping['$and'].append(single_where_part_or) + else: + replace_fields_value = query_target[key] + if key != 'start_aa_original': + replace_fields_value = query_target[key] # .replace("'", "''") + if real_key not in where_part_target: + where_part_target[real_key] = {} + if real_key not in where_part_target_denominator: + where_part_target_denominator[real_key] = {} + if real_key not in where_part_target_overlapping: + where_part_target_overlapping[real_key] = {} + where_part_target[real_key]['$eq'] = replace_fields_value + where_part_target_denominator[real_key]['$eq'] = replace_fields_value + where_part_target_overlapping[real_key]['$eq'] = replace_fields_value + + if query_background is not None: + for key in query_background: + if key == 'minDate': + start_date = datetime.strptime(f"{query_background[key]}", '%Y-%m-%d') + where_part_background['collection_date']['$gte'] = start_date + where_part_background_denominator['collection_date']['$gte'] = start_date + where_part_background_overlapping['collection_date']['$gte'] = start_date + elif key == 'maxDate': + stop_date = datetime.strptime(f"{query_background[key]}", '%Y-%m-%d') + where_part_background['collection_date']['$lte'] = stop_date + where_part_background_denominator['collection_date']['$lte'] = stop_date + where_part_background_overlapping['collection_date']['$lte'] = stop_date + + elif key == 'toExclude': + for fieldToExclude in query_background[key]: + if '$and' not in where_part_background: + where_part_background['$and'] = [] + if '$and' not in where_part_background_denominator: + where_part_background_denominator['$and'] = [] + if '$and' not in where_part_background_overlapping: + where_part_background_overlapping['$and'] = [] + + if len(query_background[key][fieldToExclude]) > 0: + single_where_part = {'$and': []} + for geoToExclude in query_background[key][fieldToExclude]: + real_field_to_exclude = fieldToExclude + if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \ + or fieldToExclude == 'region' or fieldToExclude == 'province': + real_field_to_exclude = 'location.' + fieldToExclude + specific_and = {} + geo_value = geoToExclude # .replace("'", "''") + specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value} + single_where_part['$and'].append(specific_and) + where_part_background['$and'].append(single_where_part) + where_part_background_denominator['$and'].append(single_where_part) + where_part_background_overlapping['$and'].append(single_where_part) + + elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + if '$and' not in where_part_background: + where_part_background['$and'] = [] + if '$and' not in where_part_background_denominator: + where_part_background_denominator['$and'] = [] + if '$and' not in where_part_background_overlapping: + where_part_background_overlapping['$and'] = [] + + real_key = key + if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + real_key = 'location.' + key + if isinstance(query_background[key], list): + single_where_part_or = {'$or': []} + for itm in query_background[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part_background['$and'].append(single_where_part_or) + where_part_background_denominator['$and'].append(single_where_part_or) + where_part_background_overlapping['$and'].append(single_where_part_or) + else: + single_where_part_or = {'$or': []} + replace_fields_value = query_background[key] # .replace("'", "''") + specific_or = {f'{real_key}': {'$eq': replace_fields_value}} + single_where_part_or['$or'].append(specific_or) + where_part_background['$and'].append(single_where_part_or) + where_part_background_denominator['$and'].append(single_where_part_or) + where_part_background_overlapping['$and'].append(single_where_part_or) + + else: + real_key = key + if key in translate_dictionary: + real_key = translate_dictionary[key] + if isinstance(query_background[key], list): + if '$and' not in where_part_background: + where_part_background['$and'] = [] + if '$and' not in where_part_background_denominator: + where_part_background_denominator['$and'] = [] + if '$and' not in where_part_background_overlapping: + where_part_background_overlapping['$and'] = [] + single_where_part_or = {'$or': []} + for itm in query_background[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part_background['$and'].append(single_where_part_or) + where_part_background_denominator['$and'].append(single_where_part_or) + where_part_background_overlapping['$and'].append(single_where_part_or) + else: + replace_fields_value = query_background[key] + if key != 'start_aa_original': + replace_fields_value = query_background[key] # .replace("'", "''") + if real_key not in where_part_background: + where_part_background[real_key] = {} + if real_key not in where_part_background_denominator: + where_part_background_denominator[real_key] = {} + if real_key not in where_part_background_overlapping: + where_part_background_overlapping[real_key] = {} + where_part_background[real_key]['$eq'] = replace_fields_value + where_part_background_denominator[real_key]['$eq'] = replace_fields_value + where_part_background_overlapping[real_key]['$eq'] = replace_fields_value + + if remove_overlapping.lower() == 'target' or remove_overlapping.lower() == 'both': + query_background_overlapping = [] + query_where_background_overlapping = {"$match": where_part_background_overlapping} + query_background_overlapping.append(query_where_background_overlapping) + group_part_background_overlapping = {"_id": "$_id"} + query_group_background_overlapping = {"$group": group_part_background_overlapping} + query_background_overlapping.append(query_group_background_overlapping) + # ("query target overlapping", query_background_overlapping) + # results_background_overlapping = collection_db.aggregate(query_background_overlapping, allowDiskUse=True) + results_background_overlapping = collection_db.find(where_part_background_overlapping, {}) + array_background_overlapping = list(results_background_overlapping) + if '$and' not in where_part_target: + where_part_target['$and'] = [] + if '$and' not in where_part_target_denominator: + where_part_target_denominator['$and'] = [] + single_where_part_target_overlapping_and = {'$and': []} + for single_accession_id in array_background_overlapping: + specific_and = {} + field = translate_dictionary['accession_id'] + real_field = field # "$" + field + field_value = single_accession_id[f"{field}"] + specific_and[f'{real_field}'] = {'$ne': field_value} + single_where_part_target_overlapping_and['$and'].append(specific_and) + where_part_target['$and'].append(single_where_part_target_overlapping_and) + where_part_target_denominator['$and'].append(single_where_part_target_overlapping_and) + + if remove_overlapping.lower() == 'background' or remove_overlapping.lower() == 'both': + query_target_overlapping = [] + query_where_target_overlapping = {"$match": where_part_target_overlapping} + query_target_overlapping.append(query_where_target_overlapping) + group_part_target_overlapping = {"_id": "$_id"} + query_group_target_overlapping = {"$group": group_part_target_overlapping} + query_target_overlapping.append(query_group_target_overlapping) + # print("query background overlapping", query_target_overlapping) + # results_target_overlapping = collection_db.aggregate(query_target_overlapping, allowDiskUse=True) + results_target_overlapping = collection_db.find(where_part_target_overlapping, {}) + array_target_overlapping = list(results_target_overlapping) + if '$and' not in where_part_background: + where_part_background['$and'] = [] + if '$and' not in where_part_background_denominator: + where_part_background_denominator['$and'] = [] + single_where_part_background_overlapping_and = {'$and': []} + for single_accession_id in array_target_overlapping: + specific_and = {} + field = translate_dictionary['accession_id'] + real_field = field # "$" + field + field_value = single_accession_id[f"{field}"] + specific_and[f'{real_field}'] = {'$ne': field_value} + single_where_part_background_overlapping_and['$and'].append(specific_and) + where_part_background['$and'].append(single_where_part_background_overlapping_and) + where_part_background_denominator['$and'].append(single_where_part_background_overlapping_and) + + query_target = [] + query_background = [] + query_target_denominator = [] + query_background_denominator = [] + + query_unwind_target = {"$unwind": "$muts"} + query_target.append(query_unwind_target) + query_unwind_background = {"$unwind": "$muts"} + query_background.append(query_unwind_background) + + query_where_target = {"$match": where_part_target} + query_target.append(query_where_target) + query_where_background = {"$match": where_part_background} + query_background.append(query_where_background) + query_where_target_denominator = {"$match": where_part_target_denominator} + query_target_denominator.append(query_where_target_denominator) + query_where_background_denominator = {"$match": where_part_background_denominator} + query_background_denominator.append(query_where_background_denominator) + + group_part = {"_id": {}} + real_field = translate_dictionary['product'] + group_part["_id"]["product"] = f"${real_field}" + real_field = translate_dictionary['start_aa_original'] + group_part["_id"]["start_aa_original"] = f"${real_field}" + real_field = translate_dictionary['sequence_aa_original'] + group_part["_id"]["sequence_aa_original"] = f"${real_field}" + real_field = translate_dictionary['sequence_aa_alternative'] + group_part["_id"]["sequence_aa_alternative"] = f"${real_field}" + group_part["count"] = {"$sum": 1} + query_group = {"$group": group_part} + query_target.append(query_group) + query_background.append(query_group) + + group_part_denominator = {"_id": {}} + group_part_denominator["count"] = {"$sum": 1} + query_group = {"$group": group_part_denominator} + query_target_denominator.append(query_group) + query_background_denominator.append(query_group) + + sort_part = {"count": -1} + query_sort = {"$sort": sort_part} + query_target.append(query_sort) + query_background.append(query_sort) + + # print("query target", query_target) + # print("query target denominator", query_target_denominator) + # print("query background", query_background) + # print("query background denominator", query_background_denominator) + + results_target = collection_db.aggregate(query_target, allowDiskUse=True) + results_background = collection_db.aggregate(query_background, allowDiskUse=True) + # results_target_denominator = collection_db.aggregate(query_target_denominator, allowDiskUse=True) + # results_background_denominator = collection_db.aggregate(query_background_denominator, allowDiskUse=True) + results_target_denominator = collection_db.count_documents(where_part_target_denominator) + results_background_denominator = collection_db.count_documents(where_part_background_denominator) + + denominator_target = results_target_denominator + # for single_item in list(results_target_denominator): + # denominator_target = single_item['count'] + + denominator = results_background_denominator + # for single_item in list(results_background_denominator): + # denominator = single_item['count'] + + list_dict_target = [] + for single_item in list(results_target): + single_item_remodel = {} + for key in single_item: + if key == '_id': + for k in single_item[key]: + single_item_remodel[k] = single_item[key][k] + else: + single_item_remodel['total'] = single_item['count'] + list_dict_target.append(single_item_remodel) + + list_dict_background = [] + for single_item in list(results_background): + single_item_remodel = {} + for key in single_item: + if key == '_id': + for k in single_item[key]: + single_item_remodel[k] = single_item[key][k] + else: + single_item_remodel['total'] = single_item['count'] + list_dict_background.append(single_item_remodel) + + for item in list_dict_target: + numerator = 0 + for item2 in list_dict_background: + if item['start_aa_original'] == item2['start_aa_original'] \ + and item['sequence_aa_original'] == item2['sequence_aa_original'] \ + and item['sequence_aa_alternative'] == item2['sequence_aa_alternative'] \ + and item['product'] == item2['product']: + numerator = item2['total'] + + if denominator == 0: + fraction = 0 + else: + fraction = (numerator / denominator) + if denominator_target == 0: + fraction_target = 0 + else: + fraction_target = (item['total'] / denominator_target) + + single_line = {'lineage': 'empty', 'lineage_target': lineage_target, + 'lineage_background': lineage_background, + 'target': target, 'background': background, + 'count_seq': item['total'], + 'product': item['product'], + 'start_aa_original': item['start_aa_original'], + 'sequence_aa_original': item['sequence_aa_original'], + 'sequence_aa_alternative': item['sequence_aa_alternative'], + 'numerator': numerator, + 'denominator': denominator, + 'fraction': fraction * 100, + 'denominator_target': denominator_target, + 'fraction_target': fraction_target * 100} + + array_result.append(single_line) + + all_result = array_result + + mutation_table2 = [] + arr_p_values = [] + for item in all_result: + single_item = {} + if item['product'] == 'Spike (surface glycoprotein)': + protein = item['product'].split(" ", 1)[0] + mutation = protein + '_' + # mutation = 'S_' + else: + protein = item['product'].split(" ", 1)[0] + mutation = protein + '_' + mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative'] + single_item['start_aa_original'] = item['start_aa_original'] + single_item['sequence_aa_original'] = item['sequence_aa_original'] + single_item['sequence_aa_alternative'] = item['sequence_aa_alternative'] + single_item['mutation'] = mutation + single_item['product'] = item['product'] + single_item['mutation_position'] = item['start_aa_original'] + single_item['target'] = item['target'] + single_item['background'] = item['background'] + + single_item['lineage'] = item['lineage'] + single_item['lineage_target'] = item['lineage_target'] + single_item['lineage_background'] = item['lineage_background'] + single_item['count_target'] = item['count_seq'] + single_item['percentage_background'] = item['fraction'] + single_item['numerator_background'] = item['numerator'] + single_item['denominator_background'] = item['denominator'] + single_item['percentage_target'] = item['fraction_target'] + single_item['numerator_target'] = item['count_seq'] + single_item['denominator_target'] = item['denominator_target'] + + epsilon = 0.00000001 + single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \ + (single_item['percentage_background'] + epsilon) + + if single_item['odd_ratio'] >= 1: + if item['denominator'] != 0: + # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'], + # item['numerator'] / item['denominator']) + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 + else: + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item[ + 'numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p + else: + # single_item['p_value'] = 0 + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 + else: + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item[ + 'numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p + else: + if item['denominator'] != 0: + # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'], + # item['numerator'] / item['denominator']) + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 + else: + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item[ + 'numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p + else: + # single_item['p_value'] = 0 + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 + else: + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item[ + 'numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p + + arr_p_values.append(single_item['p_value']) + mutation_table2.append(single_item) + + a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni') + + i = 0 + for item in mutation_table2: + item['pvalue'] = new_p_values[i] + i = i + 1 + + return mutation_table2 + + +@api.route('/countOverlappingSequenceTargetBackground') +class FieldList(Resource): + @api.doc('count_overlapping_sequence_target_background') + def post(self): + payload = api.payload + query_target = payload['query_target'] + query_background = payload['query_background'] + + array_result = [] + + where_part_target_overlapping = {} + where_part_background_overlapping = {} + start_date_target = datetime.strptime("2019-01-01", '%Y-%m-%d') + start_date_background = datetime.strptime("2019-01-01", '%Y-%m-%d') + + if 'accession_id' not in query_target: + where_part_target_overlapping['c_coll_date_prec'] = {} + where_part_target_overlapping['c_coll_date_prec']['$eq'] = 2 + if 'accession_id' not in query_background: + where_part_background_overlapping['c_coll_date_prec'] = {} + where_part_background_overlapping['c_coll_date_prec']['$eq'] = 2 + + where_part_target_overlapping['collection_date'] = {} + where_part_target_overlapping['collection_date']['$gte'] = start_date_target + where_part_background_overlapping['collection_date'] = {} + where_part_background_overlapping['collection_date']['$gte'] = start_date_background + + if query_target is not None: + for key in query_target: + if key == 'minDate': + start_date = datetime.strptime(f"{query_target[key]}", '%Y-%m-%d') + where_part_target_overlapping['collection_date']['$gte'] = start_date + elif key == 'maxDate': + stop_date = datetime.strptime(f"{query_target[key]}", '%Y-%m-%d') + where_part_target_overlapping['collection_date']['$lte'] = stop_date + + elif key == 'toExclude': + for fieldToExclude in query_target[key]: + if '$and' not in where_part_target_overlapping: + where_part_target_overlapping['$and'] = [] + + single_where_part = {'$and': []} + for geoToExclude in query_target[key][fieldToExclude]: + real_field_to_exclude = fieldToExclude + if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \ + or fieldToExclude == 'region' or fieldToExclude == 'province': + real_field_to_exclude = 'location.' + fieldToExclude + specific_and = {} + geo_value = geoToExclude # .replace("'", "''") + specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value} + single_where_part['$and'].append(specific_and) + where_part_target_overlapping['$and'].append(single_where_part) + + elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + if '$and' not in where_part_target_overlapping: + where_part_target_overlapping['$and'] = [] + + real_key = key + if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + real_key = 'location.' + key + if isinstance(query_target[key], list): + single_where_part_or = {'$or': []} + for itm in query_target[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part_target_overlapping['$and'].append(single_where_part_or) + else: + single_where_part_or = {'$or': []} + replace_fields_value = query_target[key] # .replace("'", "''") + specific_or = {f'{real_key}': {'$eq': replace_fields_value}} + single_where_part_or['$or'].append(specific_or) + where_part_target_overlapping['$and'].append(single_where_part_or) + + else: + real_key = key + if key in translate_dictionary: + real_key = translate_dictionary[key] + if isinstance(query_target[key], list): + if '$and' not in where_part_target_overlapping: + where_part_target_overlapping['$and'] = [] + single_where_part_or = {'$or': []} + for itm in query_target[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part_target_overlapping['$and'].append(single_where_part_or) + else: + replace_fields_value = query_target[key] + if key != 'start_aa_original': + replace_fields_value = query_target[key] # .replace("'", "''") + if real_key not in where_part_target_overlapping: + where_part_target_overlapping[real_key] = {} + where_part_target_overlapping[real_key]['$eq'] = replace_fields_value + + if query_background is not None: + for key in query_background: + if key == 'minDate': + start_date = datetime.strptime(f"{query_background[key]}", '%Y-%m-%d') + where_part_background_overlapping['collection_date']['$gte'] = start_date + if where_part_target_overlapping['collection_date']['$gte'] < start_date\ + < where_part_target_overlapping['collection_date']['$lte']: + where_part_target_overlapping['collection_date']['$gte'] = start_date + elif key == 'maxDate': + stop_date = datetime.strptime(f"{query_background[key]}", '%Y-%m-%d') + where_part_background_overlapping['collection_date']['$lte'] = stop_date + if where_part_target_overlapping['collection_date']['$gte'] < stop_date \ + < where_part_target_overlapping['collection_date']['$lte']: + where_part_target_overlapping['collection_date']['$lte'] = stop_date + + elif key == 'toExclude': + for fieldToExclude in query_background[key]: + if '$and' not in where_part_background_overlapping: + where_part_background_overlapping['$and'] = [] + if '$and' not in where_part_target_overlapping: + where_part_target_overlapping['$and'] = [] + + single_where_part = {'$and': []} + for geoToExclude in query_background[key][fieldToExclude]: + real_field_to_exclude = fieldToExclude + if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \ + or fieldToExclude == 'region' or fieldToExclude == 'province': + real_field_to_exclude = 'location.' + fieldToExclude + specific_and = {} + geo_value = geoToExclude # .replace("'", "''") + specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value} + single_where_part['$and'].append(specific_and) + where_part_background_overlapping['$and'].append(single_where_part) + where_part_target_overlapping['$and'].append(single_where_part) + + elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + if '$and' not in where_part_background_overlapping: + where_part_background_overlapping['$and'] = [] + if '$and' not in where_part_target_overlapping: + where_part_target_overlapping['$and'] = [] + + real_key = key + if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + real_key = 'location.' + key + if isinstance(query_background[key], list): + single_where_part_or = {'$or': []} + for itm in query_background[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part_background_overlapping['$and'].append(single_where_part_or) + where_part_target_overlapping['$and'].append(single_where_part_or) + else: + single_where_part_or = {'$or': []} + replace_fields_value = query_background[key] # .replace("'", "''") + specific_or = {f'{real_key}': {'$eq': replace_fields_value}} + single_where_part_or['$or'].append(specific_or) + where_part_background_overlapping['$and'].append(single_where_part_or) + where_part_target_overlapping['$and'].append(single_where_part_or) + + else: + real_key = key + if key in translate_dictionary: + real_key = translate_dictionary[key] + if isinstance(query_background[key], list): + if '$and' not in where_part_background_overlapping: + where_part_background_overlapping['$and'] = [] + if '$and' not in where_part_target_overlapping: + where_part_target_overlapping['$and'] = [] + single_where_part_or = {'$or': []} + for itm in query_background[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part_background_overlapping['$and'].append(single_where_part_or) + where_part_target_overlapping['$and'].append(single_where_part_or) + else: + replace_fields_value = query_background[key] + if key != 'start_aa_original': + replace_fields_value = query_background[key] # .replace("'", "''") + if real_key not in where_part_background_overlapping: + where_part_background_overlapping[real_key] = {} + if real_key not in where_part_target_overlapping: + where_part_target_overlapping[real_key] = {} + where_part_background_overlapping[real_key]['$eq'] = replace_fields_value + where_part_target_overlapping[real_key]['$eq'] = replace_fields_value + + # query_target_overlapping = [] + # query_where_target_overlapping = {"$match": where_part_target_overlapping} + # query_target_overlapping.append(query_where_target_overlapping) + # group_part_target_overlapping = {"_id": "$_id"} + # query_group_target_overlapping = {"$group": group_part_target_overlapping} + # query_target_overlapping.append(query_group_target_overlapping) + # print("query target overlapping", query_target_overlapping) + # # results_target_overlapping = collection_db.aggregate(query_target_overlapping, allowDiskUse=True) + # results_target_overlapping = collection_db.find(where_part_target_overlapping, {}) + # array_target_overlapping = list(results_target_overlapping) + # if '$and' not in where_part_background_overlapping: + # where_part_background_overlapping['$and'] = [] + # single_where_part_background_overlapping_or = {'$or': []} + # for single_accession_id in array_target_overlapping: + # specific_or = {} + # field = translate_dictionary['accession_id'] + # real_field = field # "$" + field + # field_value = single_accession_id[f"{field}"] + # specific_or[f'{real_field}'] = {'$eq': field_value} + # single_where_part_background_overlapping_or['$or'].append(specific_or) + # where_part_background_overlapping['$and'].append(single_where_part_background_overlapping_or) + + query_background_overlapping = [] + # if '$and' not in where_part_background_overlapping: + # where_part_background_overlapping['$and'] = [] + # where_part_background_overlapping['$and'].append(where_part_target_overlapping) + query_where_background_overlapping = {"$match": where_part_background_overlapping} + query_background_overlapping.append(query_where_background_overlapping) + + group_part = {"_id": {}, "count": {"$sum": 1}} + query_group = {"$group": group_part} + query_background_overlapping.append(query_group) + + # print("query count overlapping", query_background_overlapping) + # results_count_overlapping = collection_db.aggregate(query_background_overlapping, allowDiskUse=True) + results_count_overlapping = collection_db.count_documents(where_part_target_overlapping) + + count_overlapping = [{"count": results_count_overlapping}] + # for single_item in list(results_count_overlapping): + # single_count = {"count": single_item['count']} + # count_overlapping[0] = single_count + + return count_overlapping + + +@api.route('/getAccessionIds') +class FieldList(Resource): + @api.doc('get_accession_ids') + def post(self): + payload = api.payload + # payload = {'query': {'lineage': 'B.1.1.7', 'country': 'Italy', 'geo_group': 'Europe', + # 'minDateTerget': '2021-03-31', 'maxDateTarget': '2021-06-28', + # 'start_aa_original': 614, 'sequence_aa_original': 'D', + # 'sequence_aa_alternative': 'G', 'product': 'Spike (surface glycoprotein)'}, + # 'query_false': ''} + query_false_field = payload['query_false'] + query_fields = payload['query'] + query_fields_target = payload['query_target'] + + if "lineage" in query_fields and query_fields['lineage'] == 'empty': + del query_fields['lineage'] + if "lineage" in query_fields_target and query_fields_target['lineage'] == 'empty': + del query_fields_target['lineage'] + + where_part_target = {} + where_part = {} + start_date = datetime.strptime("2019-01-01", '%Y-%m-%d') + where_part_target['c_coll_date_prec'] = {} + where_part_target['c_coll_date_prec']['$eq'] = 2 + where_part['c_coll_date_prec'] = {} + where_part['c_coll_date_prec']['$eq'] = 2 + + where_part_target['collection_date'] = {} + where_part_target['collection_date']['$gte'] = start_date + where_part['collection_date'] = {} + where_part['collection_date']['$gte'] = start_date + + if query_fields_target != 'empty': + if '$and' not in where_part_target: + where_part_target['$and'] = [] + + if query_fields_target is not None: + for key in query_fields_target: + if key == 'minDateTarget' or key == 'minDateBackground': + start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') + where_part_target['collection_date']['$gte'] = start_date + elif key == 'maxDateTarget' or key == 'maxDateBackground': + stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') + where_part_target['collection_date']['$lte'] = stop_date + + elif key == 'toExclude': + for fieldToExclude in query_fields_target[key]: + if '$and' not in where_part_target: + where_part_target['$and'] = [] + + single_where_part = {'$and': []} + for geoToExclude in query_fields_target[key][fieldToExclude]: + real_field_to_exclude = fieldToExclude + if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \ + or fieldToExclude == 'region' or fieldToExclude == 'province': + real_field_to_exclude = 'location.' + fieldToExclude + specific_and = {} + geo_value = geoToExclude # .replace("'", "''") + specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value} + single_where_part['$and'].append(specific_and) + where_part_target['$and'].append(single_where_part) + + elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + if '$and' not in where_part_target: + where_part_target['$and'] = [] + + real_key = key + if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + real_key = 'location.' + key + if isinstance(query_fields_target[key], list): + single_where_part_or = {'$or': []} + for itm in query_fields_target[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part_target['$and'].append(single_where_part_or) + else: + single_where_part_or = {'$or': []} + replace_fields_value = query_fields_target[key] # .replace("'", "''") + specific_or = {f'{real_key}': {'$eq': replace_fields_value}} + single_where_part_or['$or'].append(specific_or) + where_part_target['$and'].append(single_where_part_or) + + else: + real_key = key + if key in translate_dictionary: + real_key = translate_dictionary[key] + if isinstance(query_fields_target[key], list): + if '$and' not in where_part_target: + where_part_target['$and'] = [] + single_where_part_or = {'$or': []} + for itm in query_fields_target[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part_target['$and'].append(single_where_part_or) + else: + replace_fields_value = query_fields_target[key] + if key != 'start_aa_original': + replace_fields_value = query_fields_target[key] # .replace("'", "''") + if real_key not in where_part_target: + where_part_target[real_key] = {} + where_part_target[real_key]['$eq'] = replace_fields_value + + if query_fields is not None: + for key in query_fields: + if key == 'minDateTarget' or key == 'minDateBackground': + start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') + where_part['collection_date']['$gte'] = start_date + elif key == 'maxDateTarget' or key == 'maxDateBackground': + stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') + where_part['collection_date']['$lte'] = stop_date + + elif key == 'toExclude': + for fieldToExclude in query_fields[key]: + if '$and' not in where_part: + where_part['$and'] = [] + + single_where_part = {'$and': []} + for geoToExclude in query_fields[key][fieldToExclude]: + real_field_to_exclude = fieldToExclude + if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \ + or fieldToExclude == 'region' or fieldToExclude == 'province': + real_field_to_exclude = 'location.' + fieldToExclude + specific_and = {} + geo_value = geoToExclude # .replace("'", "''") + specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value} + single_where_part['$and'].append(specific_and) + where_part['$and'].append(single_where_part) + + elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + if '$and' not in where_part: + where_part['$and'] = [] + + real_key = key + if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': + real_key = 'location.' + key + if key == query_false_field: + single_where_part_or = {'$or': []} + specific_or = {f'{real_key}': {'$eq': None}} + single_where_part_or['$or'].append(specific_or) + specific_or = {f'{real_key}': {'$ne': query_fields[key]}} + single_where_part_or['$or'].append(specific_or) + where_part['$and'].append(single_where_part_or) + else: + if isinstance(query_fields[key], list): + single_where_part_or = {'$or': []} + for itm in query_fields[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part['$and'].append(single_where_part_or) + else: + single_where_part_or = {'$or': []} + replace_fields_value = query_fields[key] # .replace("'", "''") + specific_or = {f'{real_key}': {'$eq': replace_fields_value}} + single_where_part_or['$or'].append(specific_or) + where_part['$and'].append(single_where_part_or) + + else: + real_key = key + if key in translate_dictionary: + real_key = translate_dictionary[key] + if isinstance(query_fields[key], list): + if '$and' not in where_part: + where_part['$and'] = [] + single_where_part_or = {'$or': []} + for itm in query_fields[key]: + specific_or = {} + field_value = itm # .replace("'", "''") + specific_or[f'{real_key}'] = {'$eq': field_value} + single_where_part_or['$or'].append(specific_or) + where_part['$and'].append(single_where_part_or) + else: + if key == query_false_field: + single_where_part_or = {'$or': []} + specific_or = {f'{real_key}': {'$eq': None}} + single_where_part_or['$or'].append(specific_or) + specific_or = {f'{real_key}': {'$ne': query_fields[key]}} + single_where_part_or['$or'].append(specific_or) + where_part['$and'].append(single_where_part_or) + else: + replace_fields_value = query_fields[key] + if key != 'start_aa_original': + replace_fields_value = query_fields[key] # .replace("'", "''") + if real_key not in where_part: + where_part[real_key] = {} + where_part[real_key]['$eq'] = replace_fields_value + + query_target = [] + query = [] + + query_unwind_target = {"$unwind": "$muts"} + query_target.append(query_unwind_target) + query_unwind = {"$unwind": "$muts"} + query.append(query_unwind) + + query_where_target = {"$match": where_part_target} + query_target.append(query_where_target) + query_where = {"$match": where_part} + query.append(query_where) + + group_part = {"_id": {"accession_id": "$_id"}} + query_group = {"$group": group_part} + query_target.append(query_group) + query.append(query_group) + + sort_part = {"_id": 1} + query_sort = {"$sort": sort_part} + query_target.append(query_sort) + query.append(query_sort) + + list_dict_target = [] + if query_fields_target != 'empty': + # print("query target", query_target) + results_target = collection_db.aggregate(query_target, allowDiskUse=True) + for single_item in list(results_target): + for key in single_item: + if key == '_id': + for k in single_item[key]: + list_dict_target.append(single_item[key][k]) + + # print("query", query) + results = collection_db.aggregate(query, allowDiskUse=True) + list_dict = [] + for single_item in list(results): + for key in single_item: + if key == '_id': + for k in single_item[key]: + if single_item[key][k] not in list_dict_target: + list_dict.append(single_item[key][k]) + + acc_ids_result = [{'acc_ids': list_dict}] + + return acc_ids_result + + +all_important_mutation_dict = {} + + +def get_all_important_mutation(): + print("inizio request important mutation") + + pipeline = [ + {"$group": {"_id": '$covv_lineage', "count": {"$sum": 1}}}, + ] + + lin_info = {x['_id']: (x['count'], []) for x in collection_db.aggregate(pipeline, allowDiskUse=True)} + + pipeline = [ + {"$unwind": "$muts"}, + {"$group": {"_id": {'lin': '$covv_lineage', + 'pro': "$muts.pro", + 'org': "$muts.org", + 'loc': "$muts.loc", + 'alt': "$muts.alt", + }, + "count": {"$sum": 1}}}, + ] + + results = collection_db.aggregate(pipeline, allowDiskUse=True) + + results = (x['_id'] for x in results if x['count'] / lin_info[x['_id']['lin']][0] >= 0.75) + + for x in results: + ch = f"{x['pro']}_{x['org']}{x['loc']}{x['alt']}" + lin_info[x['lin']][1].append(ch) + + lin_info = {x: [c, sorted(arr)] for x, (c, arr) in lin_info.items()} + + for lin in lin_info: + all_important_mutation_dict[lin] = lin_info[lin] + + print("fine request important mutation") + x = datetime.today() + y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1) + delta_t = y - x + secs = delta_t.total_seconds() + t1 = Timer(secs, get_all_important_mutation) + t1.start() + + +all_protein_dict = {} + + +def get_all_protein(): + print("inizio request protein") + all_protein_arr = [] + all_protein = sars_cov_2_products['A'] + for item in all_protein: + name = str(item.get('name')) + all_protein_arr.append(name) + + all_protein_dict['all_protein'] = all_protein_arr + + print("fine request protein") + x = datetime.today() + y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1) + delta_t = y - x + secs = delta_t.total_seconds() + t2 = Timer(secs, get_all_protein) + t2.start() + + +all_accession_id_dict = {} + + +def get_all_accession_id(): + print("inizio request accession id") + query = { + 'collection_date': { + '$gte': datetime.strptime("2019-01-01", '%Y-%m-%d') + }, + 'c_coll_date_prec': 2 + } + + results = collection_db.find(query, {}) + all_acc_id = [] + for single_item in results: + accession_id = single_item['_id'] + all_acc_id.append(accession_id) + all_accession_id_dict['all_acc_id'] = all_acc_id + print("fine request accession id") + x = datetime.today() + y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1) + delta_t = y - x + secs = delta_t.total_seconds() + t3 = Timer(secs, get_all_geo) + t3.start() + + +all_geo_dict = {} + + +def get_all_geo(): + print("inizio request geo") + start_date = datetime.strptime("2019-01-01", '%Y-%m-%d') + query = [ + { + "$match": { + 'collection_date': { + '$gte': start_date + }, + 'c_coll_date_prec': { + '$eq': 2 + }, + }, + }, + { + "$group": {"_id": + { + 'geo_group': '$location.geo_group', + 'country': '$location.country', + 'region': '$location.region', + 'province': '$location.province', + }, + "count": {"$sum": 1} + } + }, + ] + + results = collection_db.aggregate(query, allowDiskUse=True) + list_geo_dict = [] + for single_item in results: + single_item_remodel = {'geo_group': single_item['_id']['geo_group'], + 'country': single_item['_id']['country'], + 'region': single_item['_id']['region'], + 'province': single_item['_id']['province'], 'count': single_item['count']} + list_geo_dict.append(single_item_remodel) + all_geo_dict['all_geo'] = list_geo_dict + print("fine request geo") + x = datetime.today() + y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1) + delta_t = y - x + secs = delta_t.total_seconds() + t4 = Timer(secs, get_all_geo) + t4.start() + + +# ----------------------------------------- START FUNCTIONS ----------------------------------------------- # + + +get_all_important_mutation() +get_all_accession_id() +get_all_geo() +get_all_protein() + + +# ----------------------------------------- OLD QUERIES ----------------------------------------------- # + + +sars_cov_2_products_old = { + "A": [ + { + "name": "E (envelope protein)", + "start": 26245, + "end": 26472, + "row": 0, + "color": "#7c98b3", + "sequence": "MYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVSLVKPSFYVYSRVKNLNSSRVPDLLV" + }, + { + "name": "M (membrane glycoprotein)", + "start": 26523, + "end": 27191, + "row": 0, + "color": "#536b78", + "sequence": "MADSNGTITVEELKKLLEQWNLVIGFLFLTWICLLQFAYANRNRFLYIIKLIFLWLLWPVTLACFVLAAVYRINWITGGIAIAMACLVGLMWLSYFIASFRLFARTRSMWSFNPETNILLNVPLHGTILTRPLLESELVIGAVILRGHLRIAGHHLGRCDIKDLPKEITVATSRTLSYYKLGASQRVAGDSGFAAYSRYRIGNYKLNTDHSSSSDNIALLVQ" + }, + { + "name": "N (nucleocapsid phosphoprotein)", + "start": 28274, + "end": 29533, + "row": 0, + "color": "#f68e5f", + "sequence": "MSDNGPQNQRNAPRITFGGPSDSTGSNQNGERSGARSKQRRPQGLPNNTASWFTALTQHGKEDLKFPRGQGVPINTNSSPDDQIGYYRRATRRIRGGDGKMKDLSPRWYFYYLGTGPEAGLPYGANKDGIIWVATEGALNTPKDHIGTRNPANNAAIVLQLPQGTTLPKGFYAEGSRGGSQASSRSSSRSRNSSRNSTPGSSRGTSPARMAGNGGDAALALLLLDRLNQLESKMSGKGQQQQGQTVTKKSAAEASKKPRQKRTATKAYNVTQAFGRRGPEQTQGNFGDQELIRQGTDYKHWPQIAQFAPSASAFFGMSRIGMEVTPSGTWLTYTGAIKLDDKDPNFKDQVILLNKHIDAYKTFPPTEPKKDKKKKADETQALPQRQKKQQTVTLLPAADLDDFSKQLQQSMSSADSTQA" + }, + { + "name": "ORF10 protein", + "start": 29558, + "end": 29674, + "row": 0, + "color": "#f76c5e", + "sequence": "MGYINVFAFPFTIYSLLLCRMNSRNYIAQVDVVNFNLT" + }, + { + "name": "NSP16 (2'-O-ribose methyltransferase)", + "start": 20659, + "end": 21552, + "row": 0, + "color": "#22577a", + "sequence": "SSQAWQPGVAMPNLYKMQRMLLEKCDLQNYGDSATLPKGIMMNVAKYTQLCQYLNTLTLAVPYNMRVIHFGAGSDKGVAPGTAVLRQWLPTGTLLVDSDLNDFVSDADSTLIGDCATVHTANKWDLIISDMYDPKTKNVTKENDSKEGFFTYICGFIQQKLALGGSVAIKITEHSWNADLYKLMGHFAWWTAFVTNVNASSSEAFLIGCNYLGKPREQIDGYVMHANYIFWRNTNPIQLSSYSLFDMSKFPLKLRGTAVMSLKEGQINDMILSLLSKGRLIIRENNRVVISSDVLVNN" + }, + { + "name": "NSP3", + "start": 2720, + "end": 8554, + "row": 0, + "color": "#7209b7", + "sequence": "APTKVTFGDDTVIEVQGYKSVNITFELDERIDKVLNEKCSAYTVELGTEVNEFACVVADAVIKTLQPVSELLTPLGIDLDEWSMATYYLFDESGEFKLASHMYCSFYPPDEDEEEGDCEEEEFEPSTQYEYGTEDDYQGKPLEFGATSAALQPEEEQEEDWLDDDSQQTVGQQDGSEDNQTTTIQTIVEVQPQLEMELTPVVQTIEVNSFSGYLKLTDNVYIKNADIVEEAKKVKPTVVVNAANVYLKHGGGVAGALNKATNNAMQVESDDYIATNGPLKVGGSCVLSGHNLAKHCLHVVGPNVNKGEDIQLLKSAYENFNQHEVLLAPLLSAGIFGADPIHSLRVCVDTVRTNVYLAVFDKNLYDKLVSSFLEMKSEKQVEQKIAEIPKEEVKPFITESKPSVEQRKQDDKKIKACVEEVTTTLEETKFLTENLLLYIDINGNLHPDSATLVSDIDITFLKKDAPYIVGDVVQEGVLTAVVIPTKKAGGTTEMLAKALRKVPTDNYITTYPGQGLNGYTVEEAKTVLKKCKSAFYILPSIISNEKQEILGTVSWNLREMLAHAEETRKLMPVCVETKAIVSTIQRKYKGIKIQEGVVDYGARFYFYTSKTTVASLINTLNDLNETLVTMPLGYVTHGLNLEEAARYMRSLKVPATVSVSSPDAVTAYNGYLTSSSKTPEEHFIETISLAGSYKDWSYSGQSTQLGIEFLKRGDKSVYYTSNPTTFHLDGEVITFDNLKTLLSLREVRTIKVFTTVDNINLHTQVVDMSMTYGQQFGPTYLDGADVTKIKPHNSHEGKTFYVLPNDDTLRVEAFEYYHTTDPSFLGRYMSALNHTKKWKYPQVNGLTSIKWADNNCYLATALLTLQQIELKFNPPALQDAYYRARAGEAANFCALILAYCNKTVGELGDVRETMSYLFQHANLDSCKRVLNVVCKTCGQQQTTLKGVEAVMYMGTLSYEQFKKGVQIPCTCGKQATKYLVQQESPFVMMSAPPAQYELKHGTFTCASEYTGNYQCGHYKHITSKETLYCIDGALLTKSSEYKGPITDVFYKENSYTTTIKPVTYKLDGVVCTEIDPKLDNYYKKDNSYFTEQPIDLVPNQPYPNASFDNFKFVCDNIKFADDLNQLTGYKKPASRELKVTFFPDLNGDVVAIDYKHYTPSFKKGAKLLHKPIVWHVNNATNKATYKPNTWCIRCLWSTKPVETSNSFDVLKSEDAQGMDNLACEDLKPVSEEVVENPTIQKDVLECNVKTTEVVGDIILKPANNSLKITEEVGHTDLMAAYVDNSSLTIKKPNELSRVLGLKTLATHGLAAVNSVPWDTIANYAKPFLNKVVSTTTNIVTRCLNRVCTNYMPYFFTLLLQLCTFTRSTNSRIKASMPTTIAKNTVKSVGKFCLEASFNYLKSPNFSKLINIIIWFLLLSVCLGSLIYSTAALGVLMSNLGMPSYCTGYREGYLNSTNVTIATYCTGSIPCSVCLSGLDSLDTYPSLETIQITISSFKWDLTAFGLVAEWFLAYILFTRFFYVLGLAAIMQLFFSYFAVHFISNSWLMWLIINLVQMAPISAMVRMYIFFASFYYVWKSYVHVVDGCNSSTCMMCYKRNRATRVECTTIVNGVRRSFYVYANGGKGFCKLHNWNCVNCDTFCAGSTFISDEVARDLSLQFKRPINPTDQSSYIVDSVTVKNGSIHLYFDKAGQKTYERHSLSHFVNLDNLRANNTKGSLPINVIVFDGKSKCEESSAKSASVYYSQLMCQPILLLDQALVSDVGDSAEVAVKMFDAYVNTFSSTFNVPMEKLKTLVATAEAELAKNVSLDNVLSTFISAARQGFVDSDVETKDVVECLKLSHQSDIEVTGDSCNNYMLTYNKVENMTPRDLGACIDCSARHINAQVAKSHNIALIWNVKDFMSLSEQLRKQIRSAAKKNNLPFKLTCATTRQVVNVVTTKIALKGG" + }, + { + "name": "NSP4", + "start": 8555, + "end": 10054, + "row": 0, + "color": "#560bad", + "sequence": "KIVNNWLKQLIKVTLVFLFVAAIFYLITPVHVMSKHTDFSSEIIGYKAIDGGVTRDIASTDTCFANKHADFDTWFSQRGGSYTNDKACPLIAAVITREVGFVVPGLPGTILRTTNGDFLHFLPRVFSAVGNICYTPSKLIEYTDFATSACVLAAECTIFKDASGKPVPYCYDTNVLEGSVAYESLRPDTRYVLMDGSIIQFPNTYLEGSVRVVTTFDSEYCRHGTCERSEAGVCVSTSGRWVLNNDYYRSLPGVFCGVDAVNLLTNMFTPLIQPIGALDISASIVAGGIVAIVVTCLAYYFMRFRRAFGEYSHVVAFNTLLFLMSFTVLCLTPVYSFLPGVYSVIYLYLTFYLTNDVSFLAHIQWMVMFTPLVPFWITIAYIICISTKHFYWFFSNYLKRRVVFNGVSFSTFEEAALCTFLLNKEMYLKLRSDVLLPLTQYNRYLALYNKYKYFSGAMDTTSYREAACCHLAKALNDFSNSGSDVLYQPPQTSITSAVLQ" + }, + { + "name": "NSP15 (endoRNAse)", + "start": 19621, + "end": 20658, + "row": 0, + "color": "#38a3a5", + "sequence": "SLENVAFNVVNKGHFDGQQGEVPVSIINNTVYTKVDGVDVELFENKTTLPVNVAFELWAKRNIKPVPEVKILNNLGVDIAANTVIWDYKRDAPAHISTIGVCSMTDIAKKPTETICAPLTVFFDGRVDGQVDLFRNARNGVLITEGSVKGLQPSVGPKQASLNGVTLIGEAVKTQFNYYKKVDGVVQQLPETYFTQSRNLQEFKPRSQMEIDFLELAMDEFIERYKLEGYAFEHIVYGDFSHSQLGGLHLLIGLAKRFKESPFELEDFIPMDSTVKNYFITDAQTGSSKCVCSVIDLLLDDFVEIIKSQDLSVVSKVVKVTIDYTEISFMLWCKDGHVETFYPKLQ" + }, + { + "name": "NSP5 (3C-like proteinase)", + "start": 10055, + "end": 10972, + "row": 0, + "color": "#480ca8", + "sequence": "SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ" + }, + { + "name": "NSP14 (3'-to-5' exonuclease)", + "start": 18040, + "end": 19620, + "row": 0, + "color": "#57cc99", + "sequence": "AENVTGLFKDCSKVITGLHPTQAPTHLSVDTKFKTEGLCVDIPGIPKDMTYRRLISMMGFKMNYQVNGYPNMFITREEAIRHVRAWIGFDVEGCHATREAVGTNLPLQLGFSTGVNLVAVPTGYVDTPNNTDFSRVSAKPPPGDQFKHLIPLMYKGLPWNVVRIKIVQMLSDTLKNLSDRVVFVLWAHGFELTSMKYFVKIGPERTCCLCDRRATCFSTASDTYACWHHSIGFDYVYNPFMIDVQQWGFTGNLQSNHDLYCQVHGNAHVASCDAIMTRCLAVHECFVKRVDWTIEYPIIGDELKINAACRKVQHMVVKAALLADKFPVLHDIGNPKAIKCVPQADVEWKFYDAQPCSDKAYKIEELFYSYATHSDKFTDGVCLFWNCNVDRYPANSIVCRFDTRVLSNLNLPGCDGGSLYVNKHAFHTPAFDKSAFVNLKQLPFFYYSDSPCESHGKQVVSDIDYVPLKSATCITRCNLGGAVCRHHANEYRLYLDAYNMMISAGFSLWVYKQFDTYNLWNTFTRLQ" + }, + { + "name": "NSP11", + "start": 13442, + "end": 13480, + "row": 0, + "color": "#65bc6e", + "sequence": "SADAQSFLNGFAV" + }, + { + "name": "NSP13 (helicase)", + "start": 16237, + "end": 18039, + "row": 0, + "color": "#80ed99", + "sequence": "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ" + }, + { + "name": "NSP6", + "start": 10973, + "end": 11842, + "row": 0, + "color": "#3a0ca3", + "sequence": "SAVKRTIKGTHHWLLLTILTSLLVLVQSTQWSLFFFLYENAFLPFAMGIIAMSAFAMMFVKHKHAFLCLFLLPSLATVAYFNMVYMPASWVMRIMTWLDMVDTSLSGFKLKDCVMYASAVVLLILMTARTVYDDGARRVWTLMNVLTLVYKVYYGNALDQAISMWALIISVTSNYSGVVTTVMFLARGIVFMCVEYCPIFFITGNTLQCIMLVYCFLGYFCTCYFGLFCLLNRYFRLTLGVYDYLVSTQEFRYMNSQGLLPPKNSIDAFKLNIKLLGVGGKPCIKVATVQ" + }, + { + "name": "NSP7", + "start": 11843, + "end": 12091, + "row": 0, + "color": "#3f37c9", + "sequence": "SKMSDVKCTSVVLLSVLQQLRVESSSKLWAQCVQLHNDILLAKDTTEAFEKMVSLLSVLLSMQGAVDINKLCEEMLDNRATLQ" + }, + { + "name": "NSP8", + "start": 12092, + "end": 12685, + "row": 0, + "color": "#4361ee", + "sequence": "AIASEFSSLPSYAAFATAQEAYEQAVANGDSEVVLKKLKKSLNVAKSEFDRDAAMQRKLEKMADQAMTQMYKQARSEDKRAKVTSAMQTMLFTMLRKLDNDALNNIINNARDGCVPLNIIPLTTAAKLMVVIPDYNTYKNTCDGTTFTYASALWEIQQVVDADSKIVQLSEISMDNSPNLAWPLIVTALRANSAVKLQ" + }, + { + "name": "NSP9", + "start": 12686, + "end": 13024, + "row": 0, + "color": "#4895ef", + "sequence": "NNELSPVALRQMSCAAGTTQTACTDDNALAYYNTTKGGRFVLALLSDLQDLKWARFPKSDGTGTIYTELEPPCRFVTDTPKGPKVKYLYFIKGLNNLNRGMVLGSLAATVRLQ" + }, + { + "name": "NSP12 (RNA-dependent RNA polymerase)", + "start": 13442, + "end": 16236, + "row": 0, + "color": "#c7f9cc", + "sequence": "SADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVYLPYPDPSRILGAGCFVDDIVKTDGTLMIERFVSLAIDAYPLTKHPNQEYADVFHLYLQYIRKLHDELTGHMLDMYSVMLTNDNTSRYWEPEFYEAMYTPHTVLQ" + }, + { + "name": "ORF1ab polyprotein", + "start": 266, + "end": 21555, + "row": 0, + "color": "#89c4be", + "sequence": "MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLEQPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGGAYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGGAPTKVTFGDDTVIEVQGYKSVNITFELDERIDKVLNEKCSAYTVELGTEVNEFACVVADAVIKTLQPVSELLTPLGIDLDEWSMATYYLFDESGEFKLASHMYCSFYPPDEDEEEGDCEEEEFEPSTQYEYGTEDDYQGKPLEFGATSAALQPEEEQEEDWLDDDSQQTVGQQDGSEDNQTTTIQTIVEVQPQLEMELTPVVQTIEVNSFSGYLKLTDNVYIKNADIVEEAKKVKPTVVVNAANVYLKHGGGVAGALNKATNNAMQVESDDYIATNGPLKVGGSCVLSGHNLAKHCLHVVGPNVNKGEDIQLLKSAYENFNQHEVLLAPLLSAGIFGADPIHSLRVCVDTVRTNVYLAVFDKNLYDKLVSSFLEMKSEKQVEQKIAEIPKEEVKPFITESKPSVEQRKQDDKKIKACVEEVTTTLEETKFLTENLLLYIDINGNLHPDSATLVSDIDITFLKKDAPYIVGDVVQEGVLTAVVIPTKKAGGTTEMLAKALRKVPTDNYITTYPGQGLNGYTVEEAKTVLKKCKSAFYILPSIISNEKQEILGTVSWNLREMLAHAEETRKLMPVCVETKAIVSTIQRKYKGIKIQEGVVDYGARFYFYTSKTTVASLINTLNDLNETLVTMPLGYVTHGLNLEEAARYMRSLKVPATVSVSSPDAVTAYNGYLTSSSKTPEEHFIETISLAGSYKDWSYSGQSTQLGIEFLKRGDKSVYYTSNPTTFHLDGEVITFDNLKTLLSLREVRTIKVFTTVDNINLHTQVVDMSMTYGQQFGPTYLDGADVTKIKPHNSHEGKTFYVLPNDDTLRVEAFEYYHTTDPSFLGRYMSALNHTKKWKYPQVNGLTSIKWADNNCYLATALLTLQQIELKFNPPALQDAYYRARAGEAANFCALILAYCNKTVGELGDVRETMSYLFQHANLDSCKRVLNVVCKTCGQQQTTLKGVEAVMYMGTLSYEQFKKGVQIPCTCGKQATKYLVQQESPFVMMSAPPAQYELKHGTFTCASEYTGNYQCGHYKHITSKETLYCIDGALLTKSSEYKGPITDVFYKENSYTTTIKPVTYKLDGVVCTEIDPKLDNYYKKDNSYFTEQPIDLVPNQPYPNASFDNFKFVCDNIKFADDLNQLTGYKKPASRELKVTFFPDLNGDVVAIDYKHYTPSFKKGAKLLHKPIVWHVNNATNKATYKPNTWCIRCLWSTKPVETSNSFDVLKSEDAQGMDNLACEDLKPVSEEVVENPTIQKDVLECNVKTTEVVGDIILKPANNSLKITEEVGHTDLMAAYVDNSSLTIKKPNELSRVLGLKTLATHGLAAVNSVPWDTIANYAKPFLNKVVSTTTNIVTRCLNRVCTNYMPYFFTLLLQLCTFTRSTNSRIKASMPTTIAKNTVKSVGKFCLEASFNYLKSPNFSKLINIIIWFLLLSVCLGSLIYSTAALGVLMSNLGMPSYCTGYREGYLNSTNVTIATYCTGSIPCSVCLSGLDSLDTYPSLETIQITISSFKWDLTAFGLVAEWFLAYILFTRFFYVLGLAAIMQLFFSYFAVHFISNSWLMWLIINLVQMAPISAMVRMYIFFASFYYVWKSYVHVVDGCNSSTCMMCYKRNRATRVECTTIVNGVRRSFYVYANGGKGFCKLHNWNCVNCDTFCAGSTFISDEVARDLSLQFKRPINPTDQSSYIVDSVTVKNGSIHLYFDKAGQKTYERHSLSHFVNLDNLRANNTKGSLPINVIVFDGKSKCEESSAKSASVYYSQLMCQPILLLDQALVSDVGDSAEVAVKMFDAYVNTFSSTFNVPMEKLKTLVATAEAELAKNVSLDNVLSTFISAARQGFVDSDVETKDVVECLKLSHQSDIEVTGDSCNNYMLTYNKVENMTPRDLGACIDCSARHINAQVAKSHNIALIWNVKDFMSLSEQLRKQIRSAAKKNNLPFKLTCATTRQVVNVVTTKIALKGGKIVNNWLKQLIKVTLVFLFVAAIFYLITPVHVMSKHTDFSSEIIGYKAIDGGVTRDIASTDTCFANKHADFDTWFSQRGGSYTNDKACPLIAAVITREVGFVVPGLPGTILRTTNGDFLHFLPRVFSAVGNICYTPSKLIEYTDFATSACVLAAECTIFKDASGKPVPYCYDTNVLEGSVAYESLRPDTRYVLMDGSIIQFPNTYLEGSVRVVTTFDSEYCRHGTCERSEAGVCVSTSGRWVLNNDYYRSLPGVFCGVDAVNLLTNMFTPLIQPIGALDISASIVAGGIVAIVVTCLAYYFMRFRRAFGEYSHVVAFNTLLFLMSFTVLCLTPVYSFLPGVYSVIYLYLTFYLTNDVSFLAHIQWMVMFTPLVPFWITIAYIICISTKHFYWFFSNYLKRRVVFNGVSFSTFEEAALCTFLLNKEMYLKLRSDVLLPLTQYNRYLALYNKYKYFSGAMDTTSYREAACCHLAKALNDFSNSGSDVLYQPPQTSITSAVLQSGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQSAVKRTIKGTHHWLLLTILTSLLVLVQSTQWSLFFFLYENAFLPFAMGIIAMSAFAMMFVKHKHAFLCLFLLPSLATVAYFNMVYMPASWVMRIMTWLDMVDTSLSGFKLKDCVMYASAVVLLILMTARTVYDDGARRVWTLMNVLTLVYKVYYGNALDQAISMWALIISVTSNYSGVVTTVMFLARGIVFMCVEYCPIFFITGNTLQCIMLVYCFLGYFCTCYFGLFCLLNRYFRLTLGVYDYLVSTQEFRYMNSQGLLPPKNSIDAFKLNIKLLGVGGKPCIKVATVQSKMSDVKCTSVVLLSVLQQLRVESSSKLWAQCVQLHNDILLAKDTTEAFEKMVSLLSVLLSMQGAVDINKLCEEMLDNRATLQAIASEFSSLPSYAAFATAQEAYEQAVANGDSEVVLKKLKKSLNVAKSEFDRDAAMQRKLEKMADQAMTQMYKQARSEDKRAKVTSAMQTMLFTMLRKLDNDALNNIINNARDGCVPLNIIPLTTAAKLMVVIPDYNTYKNTCDGTTFTYASALWEIQQVVDADSKIVQLSEISMDNSPNLAWPLIVTALRANSAVKLQNNELSPVALRQMSCAAGTTQTACTDDNALAYYNTTKGGRFVLALLSDLQDLKWARFPKSDGTGTIYTELEPPCRFVTDTPKGPKVKYLYFIKGLNNLNRGMVLGSLAATVRLQAGNATEVPANSTVLSFCAFAVDAAKAYKDYLASGGQPITNCVKMLCTHTGTGQAITVTPEANMDQESFGGASCCLYCRCHIDHPNPKGFCDLKGKYVQIPTTCANDPVGFTLKNTVCTVCGMWKGYGCSCDQLREPMLQSADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVYLPYPDPSRILGAGCFVDDIVKTDGTLMIERFVSLAIDAYPLTKHPNQEYADVFHLYLQYIRKLHDELTGHMLDMYSVMLTNDNTSRYWEPEFYEAMYTPHTVLQAVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQAENVTGLFKDCSKVITGLHPTQAPTHLSVDTKFKTEGLCVDIPGIPKDMTYRRLISMMGFKMNYQVNGYPNMFITREEAIRHVRAWIGFDVEGCHATREAVGTNLPLQLGFSTGVNLVAVPTGYVDTPNNTDFSRVSAKPPPGDQFKHLIPLMYKGLPWNVVRIKIVQMLSDTLKNLSDRVVFVLWAHGFELTSMKYFVKIGPERTCCLCDRRATCFSTASDTYACWHHSIGFDYVYNPFMIDVQQWGFTGNLQSNHDLYCQVHGNAHVASCDAIMTRCLAVHECFVKRVDWTIEYPIIGDELKINAACRKVQHMVVKAALLADKFPVLHDIGNPKAIKCVPQADVEWKFYDAQPCSDKAYKIEELFYSYATHSDKFTDGVCLFWNCNVDRYPANSIVCRFDTRVLSNLNLPGCDGGSLYVNKHAFHTPAFDKSAFVNLKQLPFFYYSDSPCESHGKQVVSDIDYVPLKSATCITRCNLGGAVCRHHANEYRLYLDAYNMMISAGFSLWVYKQFDTYNLWNTFTRLQSLENVAFNVVNKGHFDGQQGEVPVSIINNTVYTKVDGVDVELFENKTTLPVNVAFELWAKRNIKPVPEVKILNNLGVDIAANTVIWDYKRDAPAHISTIGVCSMTDIAKKPTETICAPLTVFFDGRVDGQVDLFRNARNGVLITEGSVKGLQPSVGPKQASLNGVTLIGEAVKTQFNYYKKVDGVVQQLPETYFTQSRNLQEFKPRSQMEIDFLELAMDEFIERYKLEGYAFEHIVYGDFSHSQLGGLHLLIGLAKRFKESPFELEDFIPMDSTVKNYFITDAQTGSSKCVCSVIDLLLDDFVEIIKSQDLSVVSKVVKVTIDYTEISFMLWCKDGHVETFYPKLQSSQAWQPGVAMPNLYKMQRMLLEKCDLQNYGDSATLPKGIMMNVAKYTQLCQYLNTLTLAVPYNMRVIHFGAGSDKGVAPGTAVLRQWLPTGTLLVDSDLNDFVSDADSTLIGDCATVHTANKWDLIISDMYDPKTKNVTKENDSKEGFFTYICGFIQQKLALGGSVAIKITEHSWNADLYKLMGHFAWWTAFVTNVNASSSEAFLIGCNYLGKPREQIDGYVMHANYIFWRNTNPIQLSSYSLFDMSKFPLKLRGTAVMSLKEGQINDMILSLLSKGRLIIRENNRVVISSDVLVNN" + }, + { + "name": "NSP10", + "start": 13025, + "end": 13441, + "row": 0, + "color": "#4cc9f0", + "sequence": "AGNATEVPANSTVLSFCAFAVDAAKAYKDYLASGGQPITNCVKMLCTHTGTGQAITVTPEANMDQESFGGASCCLYCRCHIDHPNPKGFCDLKGKYVQIPTTCANDPVGFTLKNTVCTVCGMWKGYGCSCDQLREPMLQ" + }, + { + "name": "NSP1 (leader protein)", + "start": 266, + "end": 805, + "row": 0, + "color": "#f72585", + "sequence": "MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLEQPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGG" + }, + { + "name": "ORF1a polyprotein", + "start": 266, + "end": 13483, + "row": 0, + "sequence": "MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLEQPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGGAYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGGAPTKVTFGDDTVIEVQGYKSVNITFELDERIDKVLNEKCSAYTVELGTEVNEFACVVADAVIKTLQPVSELLTPLGIDLDEWSMATYYLFDESGEFKLASHMYCSFYPPDEDEEEGDCEEEEFEPSTQYEYGTEDDYQGKPLEFGATSAALQPEEEQEEDWLDDDSQQTVGQQDGSEDNQTTTIQTIVEVQPQLEMELTPVVQTIEVNSFSGYLKLTDNVYIKNADIVEEAKKVKPTVVVNAANVYLKHGGGVAGALNKATNNAMQVESDDYIATNGPLKVGGSCVLSGHNLAKHCLHVVGPNVNKGEDIQLLKSAYENFNQHEVLLAPLLSAGIFGADPIHSLRVCVDTVRTNVYLAVFDKNLYDKLVSSFLEMKSEKQVEQKIAEIPKEEVKPFITESKPSVEQRKQDDKKIKACVEEVTTTLEETKFLTENLLLYIDINGNLHPDSATLVSDIDITFLKKDAPYIVGDVVQEGVLTAVVIPTKKAGGTTEMLAKALRKVPTDNYITTYPGQGLNGYTVEEAKTVLKKCKSAFYILPSIISNEKQEILGTVSWNLREMLAHAEETRKLMPVCVETKAIVSTIQRKYKGIKIQEGVVDYGARFYFYTSKTTVASLINTLNDLNETLVTMPLGYVTHGLNLEEAARYMRSLKVPATVSVSSPDAVTAYNGYLTSSSKTPEEHFIETISLAGSYKDWSYSGQSTQLGIEFLKRGDKSVYYTSNPTTFHLDGEVITFDNLKTLLSLREVRTIKVFTTVDNINLHTQVVDMSMTYGQQFGPTYLDGADVTKIKPHNSHEGKTFYVLPNDDTLRVEAFEYYHTTDPSFLGRYMSALNHTKKWKYPQVNGLTSIKWADNNCYLATALLTLQQIELKFNPPALQDAYYRARAGEAANFCALILAYCNKTVGELGDVRETMSYLFQHANLDSCKRVLNVVCKTCGQQQTTLKGVEAVMYMGTLSYEQFKKGVQIPCTCGKQATKYLVQQESPFVMMSAPPAQYELKHGTFTCASEYTGNYQCGHYKHITSKETLYCIDGALLTKSSEYKGPITDVFYKENSYTTTIKPVTYKLDGVVCTEIDPKLDNYYKKDNSYFTEQPIDLVPNQPYPNASFDNFKFVCDNIKFADDLNQLTGYKKPASRELKVTFFPDLNGDVVAIDYKHYTPSFKKGAKLLHKPIVWHVNNATNKATYKPNTWCIRCLWSTKPVETSNSFDVLKSEDAQGMDNLACEDLKPVSEEVVENPTIQKDVLECNVKTTEVVGDIILKPANNSLKITEEVGHTDLMAAYVDNSSLTIKKPNELSRVLGLKTLATHGLAAVNSVPWDTIANYAKPFLNKVVSTTTNIVTRCLNRVCTNYMPYFFTLLLQLCTFTRSTNSRIKASMPTTIAKNTVKSVGKFCLEASFNYLKSPNFSKLINIIIWFLLLSVCLGSLIYSTAALGVLMSNLGMPSYCTGYREGYLNSTNVTIATYCTGSIPCSVCLSGLDSLDTYPSLETIQITISSFKWDLTAFGLVAEWFLAYILFTRFFYVLGLAAIMQLFFSYFAVHFISNSWLMWLIINLVQMAPISAMVRMYIFFASFYYVWKSYVHVVDGCNSSTCMMCYKRNRATRVECTTIVNGVRRSFYVYANGGKGFCKLHNWNCVNCDTFCAGSTFISDEVARDLSLQFKRPINPTDQSSYIVDSVTVKNGSIHLYFDKAGQKTYERHSLSHFVNLDNLRANNTKGSLPINVIVFDGKSKCEESSAKSASVYYSQLMCQPILLLDQALVSDVGDSAEVAVKMFDAYVNTFSSTFNVPMEKLKTLVATAEAELAKNVSLDNVLSTFISAARQGFVDSDVETKDVVECLKLSHQSDIEVTGDSCNNYMLTYNKVENMTPRDLGACIDCSARHINAQVAKSHNIALIWNVKDFMSLSEQLRKQIRSAAKKNNLPFKLTCATTRQVVNVVTTKIALKGGKIVNNWLKQLIKVTLVFLFVAAIFYLITPVHVMSKHTDFSSEIIGYKAIDGGVTRDIASTDTCFANKHADFDTWFSQRGGSYTNDKACPLIAAVITREVGFVVPGLPGTILRTTNGDFLHFLPRVFSAVGNICYTPSKLIEYTDFATSACVLAAECTIFKDASGKPVPYCYDTNVLEGSVAYESLRPDTRYVLMDGSIIQFPNTYLEGSVRVVTTFDSEYCRHGTCERSEAGVCVSTSGRWVLNNDYYRSLPGVFCGVDAVNLLTNMFTPLIQPIGALDISASIVAGGIVAIVVTCLAYYFMRFRRAFGEYSHVVAFNTLLFLMSFTVLCLTPVYSFLPGVYSVIYLYLTFYLTNDVSFLAHIQWMVMFTPLVPFWITIAYIICISTKHFYWFFSNYLKRRVVFNGVSFSTFEEAALCTFLLNKEMYLKLRSDVLLPLTQYNRYLALYNKYKYFSGAMDTTSYREAACCHLAKALNDFSNSGSDVLYQPPQTSITSAVLQSGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQSAVKRTIKGTHHWLLLTILTSLLVLVQSTQWSLFFFLYENAFLPFAMGIIAMSAFAMMFVKHKHAFLCLFLLPSLATVAYFNMVYMPASWVMRIMTWLDMVDTSLSGFKLKDCVMYASAVVLLILMTARTVYDDGARRVWTLMNVLTLVYKVYYGNALDQAISMWALIISVTSNYSGVVTTVMFLARGIVFMCVEYCPIFFITGNTLQCIMLVYCFLGYFCTCYFGLFCLLNRYFRLTLGVYDYLVSTQEFRYMNSQGLLPPKNSIDAFKLNIKLLGVGGKPCIKVATVQSKMSDVKCTSVVLLSVLQQLRVESSSKLWAQCVQLHNDILLAKDTTEAFEKMVSLLSVLLSMQGAVDINKLCEEMLDNRATLQAIASEFSSLPSYAAFATAQEAYEQAVANGDSEVVLKKLKKSLNVAKSEFDRDAAMQRKLEKMADQAMTQMYKQARSEDKRAKVTSAMQTMLFTMLRKLDNDALNNIINNARDGCVPLNIIPLTTAAKLMVVIPDYNTYKNTCDGTTFTYASALWEIQQVVDADSKIVQLSEISMDNSPNLAWPLIVTALRANSAVKLQNNELSPVALRQMSCAAGTTQTACTDDNALAYYNTTKGGRFVLALLSDLQDLKWARFPKSDGTGTIYTELEPPCRFVTDTPKGPKVKYLYFIKGLNNLNRGMVLGSLAATVRLQAGNATEVPANSTVLSFCAFAVDAAKAYKDYLASGGQPITNCVKMLCTHTGTGQAITVTPEANMDQESFGGASCCLYCRCHIDHPNPKGFCDLKGKYVQIPTTCANDPVGFTLKNTVCTVCGMWKGYGCSCDQLREPMLQSADAQSFLNGFAV" + }, + { + "name": "NSP2", + "start": 806, + "end": 2719, + "row": 0, + "color": "#ccb7ae", + "sequence": "AYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGG" + }, + { + "name": "NS3 (ORF3a protein)", + "start": 25393, + "end": 26220, + "row": 0, + "color": "#a3a3a3", + "sequence": "MDLFMRIFTIGTVTLKQGEIKDATPSDFVRATATIPIQASLPFGWLIVGVALLAVFQSASKIITLKKRWQLALSKGVHFVCNLLLLFVTVYSHLLLVAAGLEAPFLYLYALVYFLQSINFVRIIMRLWLCWKCRSKNPLLYDANYFLCWHTNCYDYCIPYNSVTSSIVITSGDGTTSPISEHDYQIGGYTEKWESGVKDCVVLHSYFTSDYYQLYSTQLSTDTGVEHVTFFIYNKIVDEPEEHVQIHTIDGSSGVVNPVMEPIYDEPTTTTSVPL" + }, + { + "name": "NS6 (ORF6 protein)", + "start": 27202, + "end": 27387, + "row": 0, + "color": "#586ba4", + "sequence": "MFHLVDFQVTIAEILLIIMRTFKVSIWNLDYIINLIIKNLSKSLTENKYSQLDEEQPMEID" + }, + { + "name": "NS7a (ORF7a protein)", + "start": 27394, + "end": 27759, + "row": 0, + "color": "#324376", + "sequence": "MKIILFLALITLATCELYHYQECVRGTTVLLKEPCSSGTYEGNSPFHPLADNKFALTCFSTQFAFACPDGVKHVYQLRARSVSPKLFIRQEEVQELYSPIFLIVAAIVFITLCFTLKRKTE" + }, + { + "name": "NS7b (ORF7b)", + "start": 27756, + "end": 27887, + "row": 0, + "color": "#f5dd90", + "sequence": "MIELSLIDFYLCFLAFLLFLVLIMLIIFWFSLELQDHNETCHA" + }, + { + "name": "NS8 (ORF8 protein)", + "start": 27894, + "end": 28259, + "row": 0, + "color": "#b79738", + "sequence": "MKFLVFLGIITTVAAFHQECSLQSCTQHQPYVVDDPCPIHFYSKWYIRVGARKSAPLIELCVDEAGSKSPIQYIDIGNYTVSCLPFTINCQEPKLGSLVVRCSFYEDFLEYHDVRVVLDFI" + }, + { + "name": "Spike (surface glycoprotein)", + "start": 21563, + "end": 25384, + "row": 0, + "color": "#accbe1", + "sequence": "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT" + } + ], + "N": [ + { + "name": "ORF10", + "start": 29558, + "end": 29674, + "row": 0 + }, + { + "name": "ORF1ab", + "start": 266, + "end": 21555, + "row": 0 + }, + { + "name": "ORF3a", + "start": 25393, + "end": 26220, + "row": 0 + }, + { + "name": "ORF6", + "start": 27202, + "end": 27387, + "row": 0 + }, + { + "name": "ORF7a", + "start": 27394, + "end": 27759, + "row": 0 + }, + { + "name": "ORF7b", + "start": 27756, + "end": 27887, + "row": 0 + }, + { + "name": "ORF8", + "start": 27894, + "end": 28259, + "row": 0 + } + ] +} - arr_p_values.append(single_item['p_value']) - mutation_table2.append(single_item) - a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni') +@api.route('/tableLineageCountry111') +class FieldList(Resource): + @api.doc('table_lineage_country') + def post(self): - i = 0 - for item in mutation_table2: - item['pvalue'] = new_p_values[i] - i = i + 1 + to_send = api.payload - return mutation_table2 + conn = http.client.HTTPConnection('geco.deib.polimi.it') + headers = {'Content-type': 'application/json'} + send = to_send + json_data = json.dumps(send) + conn.request('POST', '/virusurf_epitope/api/epitope/tableLineageCountry', json_data, headers) + response = conn.getresponse() + all_geo = response.read().decode() + all_geo = json.loads(all_geo) -@api.route('/analyzeMutationTargetBackgroundFree') + table = [] + for item in all_geo: + single_line = {'lineage': item['lineage']} + country_count = item['country_count'] + country_count = country_count.replace('"', "") + country_count = country_count.replace(")\\", "") + country_count = country_count.replace("\\", "") + country_count = country_count.replace("{", "") + country_count = country_count.replace("}", "") + country_count = country_count.replace("(", "") + array_country_count = country_count.split("),") + for single_country in array_country_count: + single_country = single_country.replace(")", "") + array_single_country = single_country.split(',') + single_line[array_single_country[0]] = array_single_country[1] + table.append(single_line) + + return table + + +@api.route('/denominatorLineageCountry111') class FieldList(Resource): - @api.doc('analyze_mutation_target_background_free') + @api.doc('possible_country_lineage') def post(self): to_send = api.payload @@ -731,7 +3428,124 @@ def post(self): headers = {'Content-type': 'application/json'} send = to_send json_data = json.dumps(send) - conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationTargetBackgroundFree', json_data, headers) + conn.request('POST', '/virusurf_epitope/api/epitope/denominatorLineageCountry', json_data, headers) + + response = conn.getresponse() + resp = response.read().decode() + resp = json.loads(resp) + + denominators = {} + + for item in resp: + if item['geo'] is None: + denominators['N/D'] = item['cnt'] + else: + denominators[item['geo']] = item['cnt'] + + return denominators + + +# @api.route('/analyzeMutationCountryLineage') +# class FieldList(Resource): +# @api.doc('analyze_mutation_country_lineage') +# def post(self): +# +# to_send = api.payload +# +# conn = http.client.HTTPConnection('geco.deib.polimi.it') +# headers = {'Content-type': 'application/json'} +# send = to_send +# json_data = json.dumps(send) +# conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationCountryLineage', json_data, headers) +# +# response = conn.getresponse() +# all_result = response.read().decode() +# all_result = json.loads(all_result) +# +# mutation_table2 = [] +# arr_p_values = [] +# for item in all_result: +# single_item = {} +# if item['product'] == 'Spike (surface glycoprotein)': +# protein = item['product'].split(" ", 1)[0] +# mutation = protein + '_' +# # mutation = 'S_' +# else: +# protein = item['product'].split(" ", 1)[0] +# mutation = protein + '_' +# mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative'] +# single_item['mutation'] = mutation +# single_item['start_aa_original'] = item['start_aa_original'] +# single_item['sequence_aa_original'] = item['sequence_aa_original'] +# single_item['sequence_aa_alternative'] = item['sequence_aa_alternative'] +# single_item['product'] = item['product'] +# single_item['mutation_position'] = item['start_aa_original'] +# single_item['target'] = item['country'] +# single_item['background'] = item['lineage'] +# single_item['count_target'] = item['count_seq'] +# single_item['percentage_background'] = item['fraction'] +# single_item['numerator_background'] = item['numerator'] +# single_item['denominator_background'] = item['denominator'] +# single_item['percentage_target'] = item['fraction_country'] +# single_item['numerator_target'] = item['count_seq'] +# single_item['denominator_target'] = item['denominator_country'] +# +# epsilon = 0.00000001 +# single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \ +# (single_item['percentage_background'] + epsilon) +# +# if single_item['odd_ratio'] >= 1: +# # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_country'], +# # item['numerator'] / item['denominator']) +# if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ +# and single_item['denominator_target'] - single_item['numerator_target'] == 0: +# single_item['p_value'] = 1 +# else: +# stat, p, dof, expected = \ +# chi2_contingency([[single_item['numerator_background'], +# single_item['denominator_background'] - single_item['numerator_background']], +# [single_item['numerator_target'], +# single_item['denominator_target'] - single_item['numerator_target']]]) +# single_item['p_value'] = p +# else: +# # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_country'], +# # item['numerator'] / item['denominator']) +# if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ +# and single_item['denominator_target'] - single_item['numerator_target'] == 0: +# single_item['p_value'] = 1 +# else: +# stat, p, dof, expected = \ +# chi2_contingency([[single_item['numerator_background'], +# single_item['denominator_background'] - single_item['numerator_background']], +# [single_item['numerator_target'], +# single_item['denominator_target'] - single_item['numerator_target']]]) +# single_item['p_value'] = p +# +# arr_p_values.append(single_item['p_value']) +# mutation_table2.append(single_item) +# +# a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni') +# +# i = 0 +# for item in mutation_table2: +# item['pvalue'] = new_p_values[i] +# i = i + 1 +# +# return mutation_table2 + + +@api.route('/analyzeMutationCountryLineageInTime111') +class FieldList(Resource): + @api.doc('analyze_mutation_country_lineage_in_time') + def post(self): + + to_send = api.payload + + conn = http.client.HTTPConnection('geco.deib.polimi.it') + headers = {'Content-type': 'application/json'} + send = to_send + json_data = json.dumps(send) + conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationCountryLineageInTime', json_data, headers) response = conn.getresponse() all_result = response.read().decode() @@ -749,18 +3563,16 @@ def post(self): protein = item['product'].split(" ", 1)[0] mutation = protein + '_' mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative'] + single_item['mutation'] = mutation single_item['start_aa_original'] = item['start_aa_original'] single_item['sequence_aa_original'] = item['sequence_aa_original'] single_item['sequence_aa_alternative'] = item['sequence_aa_alternative'] - single_item['mutation'] = mutation single_item['product'] = item['product'] single_item['mutation_position'] = item['start_aa_original'] - single_item['target'] = item['target'] - single_item['background'] = item['background'] - + single_item['target'] = item['target_time'] + single_item['background'] = item['background_time'] + single_item['country'] = item['country'] single_item['lineage'] = item['lineage'] - single_item['lineage_target'] = item['lineage_target'] - single_item['lineage_background'] = item['lineage_background'] single_item['count_target'] = item['count_seq'] single_item['percentage_background'] = item['fraction'] single_item['numerator_background'] = item['numerator'] @@ -774,61 +3586,31 @@ def post(self): (single_item['percentage_background'] + epsilon) if single_item['odd_ratio'] >= 1: - if item['denominator'] != 0: - # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'], - # item['numerator'] / item['denominator']) - if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ - and single_item['denominator_target'] - single_item['numerator_target'] == 0: - single_item['p_value'] = 1 - else: - stat, p, dof, expected = \ - chi2_contingency([[single_item['numerator_background'], - single_item['denominator_background'] - single_item[ - 'numerator_background']], - [single_item['numerator_target'], - single_item['denominator_target'] - single_item['numerator_target']]]) - single_item['p_value'] = p + # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'], + # item['numerator'] / item['denominator']) + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 else: - # single_item['p_value'] = 0 - if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ - and single_item['denominator_target'] - single_item['numerator_target'] == 0: - single_item['p_value'] = 1 - else: - stat, p, dof, expected = \ - chi2_contingency([[single_item['numerator_background'], - single_item['denominator_background'] - single_item[ - 'numerator_background']], - [single_item['numerator_target'], - single_item['denominator_target'] - single_item['numerator_target']]]) - single_item['p_value'] = p + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item['numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p else: - if item['denominator'] != 0: - # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'], - # item['numerator'] / item['denominator']) - if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ - and single_item['denominator_target'] - single_item['numerator_target'] == 0: - single_item['p_value'] = 1 - else: - stat, p, dof, expected = \ - chi2_contingency([[single_item['numerator_background'], - single_item['denominator_background'] - single_item[ - 'numerator_background']], - [single_item['numerator_target'], - single_item['denominator_target'] - single_item['numerator_target']]]) - single_item['p_value'] = p + # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'], + # item['numerator'] / item['denominator']) + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 else: - # single_item['p_value'] = 0 - if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ - and single_item['denominator_target'] - single_item['numerator_target'] == 0: - single_item['p_value'] = 1 - else: - stat, p, dof, expected = \ - chi2_contingency([[single_item['numerator_background'], - single_item['denominator_background'] - single_item[ - 'numerator_background']], - [single_item['numerator_target'], - single_item['denominator_target'] - single_item['numerator_target']]]) - single_item['p_value'] = p + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item['numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p arr_p_values.append(single_item['p_value']) mutation_table2.append(single_item) @@ -843,9 +3625,9 @@ def post(self): return mutation_table2 -@api.route('/countOverlappingSequenceTargetBackground') +@api.route('/analyzeTimeDistributionCountryLineage111') class FieldList(Resource): - @api.doc('count_overlapping_sequence_target_background') + @api.doc('analyze_time_distribution_country_lineage') def post(self): to_send = api.payload @@ -853,8 +3635,7 @@ def post(self): headers = {'Content-type': 'application/json'} send = to_send json_data = json.dumps(send) - conn.request('POST', '/virusurf_epitope/api/epitope/countOverlappingSequenceTargetBackground', json_data, - headers) + conn.request('POST', '/virusurf_epitope/api/epitope/analyzeTimeDistributionCountryLineage', json_data, headers) response = conn.getresponse() all_result = response.read().decode() @@ -863,18 +3644,18 @@ def post(self): return all_result -@api.route('/selectorQuery') +@api.route('/analyzeTimeDistributionBackgroundQueryGeo111') class FieldList(Resource): - @api.doc('selector_query') + @api.doc('analyze_time_distribution_country_lineage') def post(self): - to_send = api.payload conn = http.client.HTTPConnection('geco.deib.polimi.it') headers = {'Content-type': 'application/json'} send = to_send json_data = json.dumps(send) - conn.request('POST', '/virusurf_epitope/api/epitope/selectorQuery', json_data, headers) + conn.request('POST', '/virusurf_epitope/api/epitope/analyzeTimeDistributionBackgroundQueryGeo', json_data, + headers) response = conn.getresponse() all_result = response.read().decode() @@ -883,307 +3664,286 @@ def post(self): return all_result -@api.route('/getAccessionIds') +@api.route('/analyzeMutationProvinceRegion111') class FieldList(Resource): - @api.doc('selector_query') + @api.doc('analyze_mutation_province_region') def post(self): + to_send = api.payload conn = http.client.HTTPConnection('geco.deib.polimi.it') headers = {'Content-type': 'application/json'} send = to_send json_data = json.dumps(send) - conn.request('POST', '/virusurf_epitope/api/epitope/getAccessionIds', json_data, headers) + conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationProvinceRegion', json_data, headers) response = conn.getresponse() all_result = response.read().decode() all_result = json.loads(all_result) - return all_result - - -@api.route('/getProteinPosition') -class FieldList(Resource): - @api.doc('get_protein_position') - def post(self): - - payload = api.payload - name_protein = payload['protein'] - - all_protein = sars_cov_2_products['A'] - min_pos = 0 - max_pos = 0 - for item in all_protein: - name = str(item.get('name')) - if name.lower() == name_protein.lower(): - min_pos = 1 - max_pos = (item.get('end') - item.get('start')) // 3 - if "nsp" in name.lower(): - max_pos = max_pos + 1 - - res = {'start': min_pos, 'stop': max_pos} - - return res - - -@api.route('/getDomains') -class FieldList(Resource): - @api.doc('get_domains') - def post(self): - payload = api.payload - name_protein = payload['protein'] - - annotations = pd.read_csv("apis/protein_annotations.csv", - delimiter=',') - - annotations1 = copy.deepcopy(annotations) - annotations2 = copy.deepcopy(annotations) - annotations3 = copy.deepcopy(annotations) - - ann_mutagenesis = annotations1[(annotations.Description.str.lower() != 'n/d') - & (annotations.Protein.str.lower() == name_protein.lower()) - & (annotations.Category.str.lower() == 'mutagenesis') - ] - ann_mutagenesis2 = ann_mutagenesis[['Description', 'Begin', 'End']] - ann_mutagenesis3 = json.loads(ann_mutagenesis2.to_json(orient="records")) - - ann_aa_modifications = annotations2[(annotations.Description.str.lower() != 'n/d') - & (annotations.Protein.str.lower() == name_protein.lower()) - & (annotations.Category.str.lower() == 'ptm') - & (annotations.Type.str.lower() == 'carbohyd') - ] - ann_aa_modifications2 = ann_aa_modifications[['Description', 'Begin', 'End']] - ann_aa_modifications3 = json.loads(ann_aa_modifications2.to_json(orient="records")) - - ann_sites_family_dom = annotations3[(annotations.Description.str.lower() != 'n/d') - & (annotations.Protein.str.lower() == name_protein.lower()) - & ((annotations.Category.str.lower() == 'domains_and_sites') | - (annotations.Type.str.lower() == 'n/d')) - ] - ann_sites_family_dom2 = ann_sites_family_dom[['Description', 'Begin', 'End']] - ann_sites_family_dom3 = json.loads(ann_sites_family_dom2.to_json(orient="records")) - - result = {'mutagenesis': ann_mutagenesis3, 'aa_modifications': ann_aa_modifications3, - 'sites_and_domains': ann_sites_family_dom3} - - return result - - -@api.route('/getImportantMutation') -class FieldList(Resource): - @api.doc('get_important_mutation') - def post(self): - - payload = api.payload - name_lineage = payload['lineage'] - - result = {'mutation': [], 'additional_mutation': []} - - if name_lineage in dict_lineage_mutation: - lineage_json = dict_lineage_mutation[name_lineage] - result['mutation'] = lineage_json['mutation'] - result['additional_mutation'] = lineage_json['additional_mutation'] - else: - all_mutation = [] - all_additional_mutation = [] - for lineage in dict_lineage_mutation: - row = dict_lineage_mutation[lineage] - for mutation in row['mutation']: - if mutation not in all_mutation: - all_mutation.append(mutation) - if mutation in all_additional_mutation: - all_additional_mutation.remove(mutation) - for additional_mutation in row['additional_mutation']: - if additional_mutation not in all_additional_mutation and additional_mutation not in all_mutation: - all_additional_mutation.append(additional_mutation) - result['mutation'] = all_mutation - result['additional_mutation'] = all_additional_mutation - - return result - - -@api.route('/getLineageTree') -class FieldList(Resource): - @api.doc('get_lineage_tree') - def post(self): - - payload = api.payload - possible_lineages = payload['possibleLineages'] - - dict_copy = dict_lineage_mutation - - arr_lineages = [] - dict_lineages = {} - for item in possible_lineages: - single_line = item - dict_lineages[item['value']] = single_line - arr_lineages.append(item['value']) + mutation_table2 = [] + arr_p_values = [] + for item in all_result: + single_item = {} + if item['product'] == 'Spike (surface glycoprotein)': + protein = item['product'].split(" ", 1)[0] + mutation = protein + '_' + # mutation = 'S_' + else: + protein = item['product'].split(" ", 1)[0] + mutation = protein + '_' + mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative'] + single_item['start_aa_original'] = item['start_aa_original'] + single_item['sequence_aa_original'] = item['sequence_aa_original'] + single_item['sequence_aa_alternative'] = item['sequence_aa_alternative'] + single_item['mutation'] = mutation + single_item['product'] = item['product'] + single_item['mutation_position'] = item['start_aa_original'] + # if 'country' in item: + # single_item['target'] = item['region'] + # single_item['background'] = item['country'] + # else: + # single_item['target'] = item['province'] + # single_item['background'] = item['region'] + single_item['target'] = item['target'] + single_item['background'] = item['background'] - dict_copy2 = dict(sorted(dict_copy.items(), key=lambda k_v: k_v[1]['alias'])) + single_item['lineage'] = item['lineage'] + single_item['count_target'] = item['count_seq'] + single_item['percentage_background'] = item['fraction'] + single_item['numerator_background'] = item['numerator'] + single_item['denominator_background'] = item['denominator'] + single_item['percentage_target'] = item['fraction_target'] + single_item['numerator_target'] = item['count_seq'] + single_item['denominator_target'] = item['denominator_target'] - items = [] - idx = 1 + epsilon = 0.00000001 + single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \ + (single_item['percentage_background'] + epsilon) - for lineage in dict_copy2: - already_done = False - children = False - children_lineage = False - important_lineage = False - alias = dict_copy2[lineage]['alias'] - if lineage in arr_lineages: - if dict_copy2[lineage]['WHO label'] != '': - important_lineage = True - for itm in items: - possible_parent_alias = str(itm['alias']) + '.' - possible_children_alias = str(alias) - possible_parent_lineage = str(itm['real_name']) + '.' - possible_children_lineage = str(lineage) - if possible_parent_alias in possible_children_alias: - children = True - recursive_children_lineage(itm, lineage, alias, dict_copy2, dict_lineages) - if possible_parent_lineage in possible_children_lineage: - children_lineage = True - if possible_children_lineage != possible_children_alias: - recursive_children_lineage(itm, lineage, lineage, dict_copy2, dict_lineages) - if not children: - already_done = True - name_complete = lineage - if dict_copy2[lineage]['WHO label'] != '': - name_complete = lineage + ' (' + dict_copy2[lineage]['WHO label'] + ') ' - single_lineage = {'id': idx, 'alias': alias, 'name': name_complete, 'real_name': lineage, - 'who': dict_copy2[lineage]['WHO label'], 'children': [], - 'count': dict_lineages[lineage]['count']} - items.append(single_lineage) - idx = idx + 1 + if single_item['odd_ratio'] >= 1: + # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'], + # item['numerator'] / item['denominator']) + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 + else: + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item['numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p + else: + # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'], + # item['numerator'] / item['denominator']) + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 + else: + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item['numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p - if not children_lineage and not already_done: - name_complete = lineage.split('.')[0] - single_lineage = {'id': idx, 'alias': name_complete, 'name': name_complete, - 'real_name': name_complete, - 'who': '', 'children': [], - 'count': 0} - items.append(single_lineage) - idx = idx + 1 - recursive_children_lineage(single_lineage, lineage, lineage, dict_copy2, dict_lineages) + arr_p_values.append(single_item['p_value']) + mutation_table2.append(single_item) - # if important_lineage and not already_done: - # name_complete = lineage - # if dict_copy2[lineage]['WHO label'] != '': - # name_complete = lineage + ' (' + dict_copy2[lineage]['WHO label'] + ') ' - # single_lineage = {'id': idx, 'alias': alias, 'name': name_complete, 'real_name': lineage, - # 'who': dict_copy2[lineage]['WHO label'], 'children': [], - # 'count': dict_lineages[lineage]['count']} - # items.append(single_lineage) - # idx = idx + 1 + a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni') - return items + i = 0 + for item in mutation_table2: + item['pvalue'] = new_p_values[i] + i = i + 1 + return mutation_table2 -@api.route('/getAllImportantMutationPerLineage') + +@api.route('/analyzeMutationTargetBackgroundFree111') class FieldList(Resource): - @api.doc('get_important_mutation') + @api.doc('analyze_mutation_target_background_free') def post(self): - payload = api.payload - lineage = payload['lineage'] - proteins = payload['proteins'] + to_send = api.payload - array_proteins = [] + conn = http.client.HTTPConnection('geco.deib.polimi.it') + headers = {'Content-type': 'application/json'} + send = to_send + json_data = json.dumps(send) + conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationTargetBackgroundFree', json_data, headers) - for protein in proteins: - protein_rewritten = protein.split(" ")[0] - array_proteins.append(protein_rewritten) + response = conn.getresponse() + all_result = response.read().decode() + all_result = json.loads(all_result) - dict_copy = all_important_mutation_dict + mutation_table2 = [] + arr_p_values = [] + for item in all_result: + single_item = {} + if item['product'] == 'Spike (surface glycoprotein)': + protein = item['product'].split(" ", 1)[0] + mutation = protein + '_' + # mutation = 'S_' + else: + protein = item['product'].split(" ", 1)[0] + mutation = protein + '_' + mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative'] + single_item['start_aa_original'] = item['start_aa_original'] + single_item['sequence_aa_original'] = item['sequence_aa_original'] + single_item['sequence_aa_alternative'] = item['sequence_aa_alternative'] + single_item['mutation'] = mutation + single_item['product'] = item['product'] + single_item['mutation_position'] = item['start_aa_original'] + single_item['target'] = item['target'] + single_item['background'] = item['background'] - array_important_mutation = [] + single_item['lineage'] = item['lineage'] + single_item['lineage_target'] = item['lineage_target'] + single_item['lineage_background'] = item['lineage_background'] + single_item['count_target'] = item['count_seq'] + single_item['percentage_background'] = item['fraction'] + single_item['numerator_background'] = item['numerator'] + single_item['denominator_background'] = item['denominator'] + single_item['percentage_target'] = item['fraction_target'] + single_item['numerator_target'] = item['count_seq'] + single_item['denominator_target'] = item['denominator_target'] - if lineage is None: - for lineage_mutations in dict_copy: - single_lineage_mutation = dict_copy[lineage_mutations] - for mutation in single_lineage_mutation['common_changes']: - if mutation not in array_important_mutation: - protein = mutation.split("_")[0] - if protein in array_proteins: - array_important_mutation.append(mutation) - array_important_mutation.sort() - else: - if lineage in dict_copy: - single_lineage_mutation = dict_copy[lineage] - for mutation in single_lineage_mutation['common_changes']: - if mutation not in array_important_mutation: - protein = mutation.split("_")[0] - if protein in array_proteins: - array_important_mutation.append(mutation) - array_important_mutation.sort() + epsilon = 0.00000001 + single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \ + (single_item['percentage_background'] + epsilon) - return array_important_mutation + if single_item['odd_ratio'] >= 1: + if item['denominator'] != 0: + # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'], + # item['numerator'] / item['denominator']) + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 + else: + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item[ + 'numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p + else: + # single_item['p_value'] = 0 + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 + else: + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item[ + 'numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p + else: + if item['denominator'] != 0: + # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'], + # item['numerator'] / item['denominator']) + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 + else: + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item[ + 'numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p + else: + # single_item['p_value'] = 0 + if single_item['denominator_background'] - single_item['numerator_background'] == 0 \ + and single_item['denominator_target'] - single_item['numerator_target'] == 0: + single_item['p_value'] = 1 + else: + stat, p, dof, expected = \ + chi2_contingency([[single_item['numerator_background'], + single_item['denominator_background'] - single_item[ + 'numerator_background']], + [single_item['numerator_target'], + single_item['denominator_target'] - single_item['numerator_target']]]) + single_item['p_value'] = p + arr_p_values.append(single_item['p_value']) + mutation_table2.append(single_item) -@api.route('/checkAccessionId') + a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni') + + i = 0 + for item in mutation_table2: + item['pvalue'] = new_p_values[i] + i = i + 1 + + return mutation_table2 + + +@api.route('/selectorQuery111') class FieldList(Resource): - @api.doc('check_accession_id') + @api.doc('selector_query') def post(self): - payload = api.payload - accession_id = payload['accession_id'] - acc_id_arr = all_accession_id_dict['all_acc_id'] - result = False - if accession_id in acc_id_arr: - result = True - return result + to_send = api.payload + conn = http.client.HTTPConnection('geco.deib.polimi.it') + headers = {'Content-type': 'application/json'} + send = to_send + json_data = json.dumps(send) + conn.request('POST', '/virusurf_epitope/api/epitope/selectorQuery', json_data, headers) -def recursive_children_lineage(parent, lineage, alias, dict_copy2, dict_lineages): - children = False - idx = str(parent['id']) + '_' + str(len(parent['children'])) - for itm in parent['children']: - possible_parent_alias = str(itm['alias']) + '.' - possible_children_alias = str(alias) - if possible_parent_alias in possible_children_alias: - children = True - recursive_children_lineage(itm, lineage, alias, dict_copy2, dict_lineages) - break - else: - children = False - if not children: - name_complete = lineage - if dict_copy2[lineage]['WHO label'] != '': - name_complete = lineage + ' (' + dict_copy2[lineage]['WHO label'] + ') ' - single_lineage = {'id': idx, 'alias': alias, 'name': name_complete, 'real_name': lineage, - 'who': dict_copy2[lineage]['WHO label'], - 'children': [], 'count': dict_lineages[lineage]['count']} - parent['children'].append(single_lineage) + response = conn.getresponse() + all_result = response.read().decode() + all_result = json.loads(all_result) + return all_result -all_important_mutation_dict = {} +@api.route('/countOverlappingSequenceTargetBackground111') +class FieldList(Resource): + @api.doc('count_overlapping_sequence_target_background') + def post(self): + to_send = api.payload -def get_all_important_mutation(): - print("inizio request important mutation") - conn = http.client.HTTPConnection('geco.deib.polimi.it') - conn.request('GET', '/virusurf_epitope/api/epitope/allImportantMutations') + conn = http.client.HTTPConnection('geco.deib.polimi.it') + headers = {'Content-type': 'application/json'} + send = to_send + json_data = json.dumps(send) + conn.request('POST', '/virusurf_epitope/api/epitope/countOverlappingSequenceTargetBackground', json_data, + headers) - response = conn.getresponse() - all_important_mutation = response.read().decode() - all_important_mutation = json.loads(all_important_mutation) + response = conn.getresponse() + all_result = response.read().decode() + all_result = json.loads(all_result) + + return all_result + + +@api.route('/getAccessionIds111') +class FieldList(Resource): + @api.doc('get_accession_ids') + def post(self): + to_send = api.payload - for mutation_per_lineage in all_important_mutation: - lineage = mutation_per_lineage['lineage'] - all_important_mutation_dict[lineage] = mutation_per_lineage - print("fine request important mutation") - x = datetime.today() - y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1) - delta_t = y - x - secs = delta_t.total_seconds() - t1 = Timer(secs, get_all_important_mutation) - t1.start() + conn = http.client.HTTPConnection('geco.deib.polimi.it') + headers = {'Content-type': 'application/json'} + send = to_send + json_data = json.dumps(send) + conn.request('POST', '/virusurf_epitope/api/epitope/getAccessionIds', json_data, headers) + response = conn.getresponse() + all_result = response.read().decode() + all_result = json.loads(all_result) -all_protein_dict = {} + return all_result -def get_all_protein(): +def get_all_protein111(): print("inizio request protein") to_send = {'gcm': {'taxon_name': ["severe acute respiratory syndrome coronavirus 2"]}} @@ -1191,12 +3951,13 @@ def get_all_protein(): headers = {'Content-type': 'application/json'} send = to_send json_data = json.dumps(send) - conn.request('POST', '/virusurf_epitope/api/epitope/allProtein', json_data, headers) + conn.request('POST', '/virusurf_gisaid/api/epitope/allProtein', json_data, headers) response = conn.getresponse() all_protein = response.read().decode() all_protein = json.loads(all_protein) all_protein_dict['all_protein'] = all_protein + print("fine request protein") x = datetime.today() y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1) @@ -1206,10 +3967,7 @@ def get_all_protein(): t2.start() -all_geo_dict = {} - - -def get_all_geo(): +def get_all_geo111(): print("inizio request geo") conn = http.client.HTTPConnection('geco.deib.polimi.it') conn.request('GET', '/virusurf_epitope/api/epitope/allGeo') @@ -1227,10 +3985,7 @@ def get_all_geo(): t4.start() -all_accession_id_dict = {} - - -def get_all_accession_id(): +def get_all_accession_id111(): print("inizio request accession id") conn = http.client.HTTPConnection('geco.deib.polimi.it') conn.request('GET', '/virusurf_epitope/api/epitope/allAccessionIds') @@ -1242,6 +3997,7 @@ def get_all_accession_id(): for itm in all_acc_id: all_accession_id_arr.append(itm['accession_id']) all_accession_id_dict['all_acc_id'] = all_accession_id_arr + print("fine request accession id") x = datetime.today() y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1) @@ -1250,370 +4006,23 @@ def get_all_accession_id(): t3 = Timer(secs, get_all_geo) t3.start() -# ----------------------------------------- MONGO DB ----------------------------------------------- # - - -translate_dictionary = { - 'accession_id': '_id', - 'lineage': 'covv_lineage', - 'collection_date': 'covv_collection_date', - 'location': 'covv_location', -} - - -@api.route('/selectorQueryMongoDB') -class FieldList(Resource): - @api.doc('selector_query_mongo_db') - def post(self): - - to_use = api.payload - field_name = to_use['field'] - query_fields = to_use['query'] - - # field_name = 'country' - # query_fields = {'lineage': 'B.1', 'geo_group': ['Europe', 'Asia'], 'minDate': '2020-01-01', 'maxDate': "2021-01-01", - # 'toExclude': {}} - # 'toExclude': {'geo_group': ['Asia'], 'country': ['Italy', 'France'] - - if field_name in query_fields: - del query_fields[field_name] - - i = 0 - where_part = {} - start_date = datetime.strptime("2019-01-01", '%Y-%m-%d') - where_part['c_coll_date_prec'] = {} - where_part['c_coll_date_prec']['$eq'] = 2 - where_part['collection_date'] = {} - where_part['collection_date']['$gte'] = start_date - - field_not_null = field_name - if field_not_null in translate_dictionary: - field_not_null = translate_dictionary[field_name] - if field_name == 'geo_group' or field_name == 'country' or field_name == 'region' or field_name == 'province': - field_not_null = 'location.' + field_name - where_part[field_not_null] = {'$ne': None} - - if query_fields is not None: - for key in query_fields: - if key == 'minDate': - start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') - where_part['collection_date']['$gte'] = start_date - elif key == 'maxDate': - stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d') - where_part['collection_date']['$lte'] = stop_date - - elif key == 'toExclude': - for fieldToExclude in query_fields[key]: - if '$and' not in where_part: - where_part['$and'] = [] - - single_where_part = {'$and': []} - for geoToExclude in query_fields[key][fieldToExclude]: - real_field_to_exclude = fieldToExclude - if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \ - or fieldToExclude == 'region' or fieldToExclude == 'province': - real_field_to_exclude = 'location.' + fieldToExclude - specific_and = {} - geo_value = geoToExclude.replace("'", "''") - specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value} - single_where_part['$and'].append(specific_and) - where_part['$and'].append(single_where_part) - - elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': - if '$and' not in where_part: - where_part['$and'] = [] - - real_key = key - if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province': - real_key = 'location.' + key - if isinstance(query_fields[key], list): - single_where_part_or = {'$or': []} - for itm in query_fields[key]: - specific_or = {} - field_value = itm.replace("'", "''") - specific_or[f'{real_key}'] = {'$eq': field_value} - single_where_part_or['$or'].append(specific_or) - where_part['$and'].append(single_where_part_or) - else: - single_where_part_or = {'$or': []} - replace_fields_value = query_fields[key].replace("'", "''") - specific_or = {f'{real_key}': {'$eq': replace_fields_value}} - single_where_part_or['$or'].append(specific_or) - where_part['$and'].append(single_where_part_or) - - else: - real_key = key - if key in translate_dictionary: - real_key = translate_dictionary[key] - replace_fields_value = query_fields[key] - if key != 'start_aa_original': - replace_fields_value = query_fields[key].replace("'", "''") - if real_key not in where_part: - where_part[real_key] = {} - where_part[real_key]['$eq'] = replace_fields_value - - i = i + 1 - - query = [] - - query_where = {"$match": where_part} - query.append(query_where) - - group_part = {} - real_field = field_name - if field_name in translate_dictionary: - real_field = translate_dictionary[field_name] - if field_name == 'geo_group' or field_name == 'country' or field_name == 'region' or field_name == 'province': - real_field = 'location.' + field_name - # group_part["_id"] = {"value": - # {"$cond": - # [{"$eq": [f"${real_field}", ""]}, - # None, - # {"$cond": - # [{"$eq": [f"${real_field}", None]}, - # f"${real_field}", - # {"$concat": [ - # {"$toUpper": - # {"$substrCP": [f"${real_field}", 0, 1]} - # }, - # { - # "$substrCP": [ - # f"${real_field}", 1, - # {"$subtract": [{"$strLenCP": f"${real_field}"}, 1]} - # ] - # } - # ]} - # ] - # } - # ] - # }, - # } - group_part["_id"] = {"value": f"${real_field}"} - group_part["count"] = {"$sum": 1} - query_group = {"$group": group_part} - query.append(query_group) - - sort_part = {"count": -1} - query_sort = {"$sort": sort_part} - query.append(query_sort) - # print("query", query) - - results = collection_db.aggregate(query) - - list_dict = [] - for single_item in list(results): - single_item_remodel = {} - for key in single_item: - if key == '_id': - single_item_remodel['value'] = single_item['_id']['value'] - else: - single_item_remodel[key] = single_item[key] - list_dict.append(single_item_remodel) - - # print("field:", field_name, " result:", list_dict) - return list_dict +def get_all_important_mutation111(): + print("inizio request important mutation") + conn = http.client.HTTPConnection('geco.deib.polimi.it') + conn.request('GET', '/virusurf_epitope/api/epitope/allImportantMutations') -def get_all_geo_mongoDB(): - print("inizio request geo") - start_date = datetime.strptime("2019-01-01", '%Y-%m-%d') - query = [ - { - "$match": { - 'collection_date': { - '$gte': start_date - }, - 'c_coll_date_prec': { - '$eq': 2 - }, - }, - }, - { - "$group": {"_id": - { - 'geo_group': '$location.geo_group', - 'country': '$location.country', - 'region': '$location.region', - 'province': '$location.province', - }, - "count": {"$sum": 1} - } - }, - ] + response = conn.getresponse() + all_important_mutation = response.read().decode() + all_important_mutation = json.loads(all_important_mutation) - # {"geo_group": - # {"$cond": - # [{"$eq": ["$location.geo_group", ""]}, - # None, - # {"$cond": - # [{"$eq": ["$location.geo_group", None]}, - # "$location.geo_group", - # {"$concat": [ - # {"$toUpper": - # {"$substrCP": ["$location.geo_group", 0, 1]} - # }, - # { - # "$substrCP": [ - # "$location.geo_group", 1, - # {"$subtract": [{"$strLenCP": "$location.geo_group"}, 1]} - # ] - # } - # ]} - # ] - # } - # ] - # }, - # "country": - # {"$cond": - # [{"$eq": ["$location.country", ""]}, - # None, - # {"$cond": - # [{"$eq": ["$location.country", None]}, - # "$location.country", - # {"$concat": [ - # {"$toUpper": - # {"$substrCP": ["$location.country", 0, 1]} - # }, - # { - # "$substrCP": [ - # "$location.country", 1, - # {"$subtract": [{"$strLenCP": "$location.country"}, 1]} - # ] - # } - # ]} - # ] - # } - # ] - # }, - # "region": - # {"$cond": - # [{"$eq": ["$location.region", ""]}, - # None, - # {"$cond": - # [{"$eq": ["$location.region", None]}, - # "$location.region", - # {"$concat": [ - # {"$toUpper": - # {"$substrCP": ["$location.region", 0, 1]} - # }, - # { - # "$substrCP": [ - # "$location.region", 1, - # {"$subtract": [{"$strLenCP": "$location.region"}, 1]} - # ] - # } - # ]} - # ] - # } - # ] - # }, - # "province": - # {"$cond": - # [{"$eq": ["$location.province", ""]}, - # None, - # {"$cond": - # [{"$eq": ["$location.province", None]}, - # "$location.province", - # {"$concat": [ - # {"$toUpper": - # {"$substrCP": ["$location.province", 0, 1]} - # }, - # { - # "$substrCP": [ - # "$location.province", 1, - # {"$subtract": [{"$strLenCP": "$location.province"}, 1]} - # ] - # } - # ]} - # ] - # } - # ] - # }, - - results = collection_db.aggregate(query) - list_geo_dict = [] - for single_item in results: - single_item_remodel = {'geo_group': single_item['_id']['geo_group'], - 'country': single_item['_id']['country'], - 'region': single_item['_id']['region'], - 'province': single_item['_id']['province'], 'count': single_item['count']} - list_geo_dict.append(single_item_remodel) - all_geo_dict['all_geo'] = list_geo_dict - print("fine request geo") + for mutation_per_lineage in all_important_mutation: + lineage = mutation_per_lineage['lineage'] + all_important_mutation_dict[lineage] = mutation_per_lineage + print("fine request important mutation") x = datetime.today() y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1) delta_t = y - x secs = delta_t.total_seconds() - t4 = Timer(secs, get_all_geo) - t4.start() - - -def prova_mongo_db(): - print("prova Mongo") - seq = collection_db - print("prova Mongo2") - - # "$match": { - # # 'covv_collection_date': { - # # '$gte': "2019-01-01", - # # '$lte': "2021-07-31", - # # '$regex': "\d\d\d\d-\d\d-\d\d" - # # }, - # # 'covv_location': { - # # '$regex': "Italy" - # # }, - - pipeline = [ - { - "$match": { - 'location.geo_group': { - '$eq': 'Oceania' - }, - 'location.country': { - '$eq': 'Australia' - }, - 'location.region': { - '$eq': 'Northern Territory' - }, - }, - }, - {"$unwind": "$muts"}, - {"$group": - #{"_id": "$_id", - {"_id": - {'pro': "$muts.pro", - 'org': "$muts.org", - 'loc': "$muts.loc", - 'alt': "$muts.alt", - }, - "count": {"$sum": 1} - } - }, - {'$sort': - {"_id.pro": -1} - } - ] - print("start") - results = seq.aggregate(pipeline, ) - print("stop", len(list(results))) - # for i, x in enumerate(results): - # print("qui", x) - # if i < 1: - # print("qui", x) - # break - print("fine prova Mongo2") - - -def prova_mongo_2(): - print("qui2") - -# ----------------------------------------- START FUNCTIONS ----------------------------------------------- # - - -get_all_important_mutation() -get_all_accession_id() -get_all_geo() -get_all_protein() - -# prova_mongo_db() -# prova_mongo_2() + t1 = Timer(secs, get_all_important_mutation) + t1.start() diff --git a/frontend/src/components/FreeTargetVsBackground.vue b/frontend/src/components/FreeTargetVsBackground.vue index 64c428e..7e641f1 100644 --- a/frontend/src/components/FreeTargetVsBackground.vue +++ b/frontend/src/components/FreeTargetVsBackground.vue @@ -1697,14 +1697,14 @@ export default { ...mapState(['queryFreeTarget', 'queryFreeBackground', 'numSequencesQueryFreeTarget', 'numSequencesQueryFreeBackground', 'all_protein', 'startDateQueryFreeTarget', "stopDateQueryFreeTarget", 'startDateQueryFreeBackground', 'stopDateQueryFreeBackground', 'toExcludeFreeTarget', 'toExcludeFreeBackground', - 'colorPValueInfoBox', 'color_1', 'color_2', 'color_3']), + 'colorPValueInfoBox', 'color_1', 'color_2', 'color_3', 'startAndStopQueryFreeTarget', 'startAndStopQueryFreeBackground']), ...mapGetters({}), }, methods: { ...mapMutations(['setStartDateQueryFreeTarget', 'setStartDateQueryFreeBackground', 'setStopDateQueryFreeTarget', 'setStopDateQueryFreeBackground', 'setNumSequencesQueryFreeTarget', - 'setNumSequencesQueryFreeBackground']), - ...mapActions(['setQueryFreeTarget', 'setQueryFreeBackground']), + 'setNumSequencesQueryFreeBackground', 'setStartAndStopQueryFreeBackground', 'setStartAndStopQueryFreeTarget']), + ...mapActions(['setQueryFreeTarget', 'setQueryFreeBackground', 'setToExcludeFreeTarget', 'setToExcludeFreeBackground']), deleteAllAccIdsTargetInserted(){ this.listAccIdsTargetInserted = []; this.dialogAccIdsTargetInserted = false; @@ -2828,10 +2828,16 @@ export default { this.setQueryFreeTarget({field: 'country', list: null}); this.setQueryFreeTarget({field: 'region', list: null}); this.setQueryFreeTarget({field: 'province', list: null}); - this.setStartDateQueryFreeTarget(null); - this.setStopDateQueryFreeTarget(null); + let obj = {'start': null, 'stop': null} + this.setStartAndStopQueryFreeTarget(obj); + // this.setStartDateQueryFreeTarget(null); + // this.setStopDateQueryFreeTarget(null); // this.setQueryFreeBackground({field: 'accession_id', list: this.listAccIdsTarget}); this.setNumSequencesQueryFreeTarget(0); + this.setToExcludeFreeTarget({field: 'geo_group', list: null}); + this.setToExcludeFreeTarget({field: 'country', list: null}); + this.setToExcludeFreeTarget({field: 'region', list: null}); + this.setToExcludeFreeTarget({field: 'province', list: null}); } else{ let id1 = 'tabTargetFree2'; @@ -2865,9 +2871,15 @@ export default { this.setQueryFreeBackground({field: 'country', list: null}); this.setQueryFreeBackground({field: 'region', list: null}); this.setQueryFreeBackground({field: 'province', list: null}); - this.setStartDateQueryFreeBackground(null); - this.setStopDateQueryFreeBackground(null); + let obj = {'start': null, 'stop': null} + this.setStartAndStopQueryFreeBackground(obj); + // this.setStartDateQueryFreeBackground(null); + // this.setStopDateQueryFreeBackground(null); this.setNumSequencesQueryFreeBackground(0); + this.setToExcludeFreeBackground({field: 'geo_group', list: null}); + this.setToExcludeFreeBackground({field: 'country', list: null}); + this.setToExcludeFreeBackground({field: 'region', list: null}); + this.setToExcludeFreeBackground({field: 'province', list: null}); } else{ let id1 = 'tabBackgroundFree2'; @@ -2894,8 +2906,10 @@ export default { this.setQueryFreeTarget({field: 'country', list: null}); this.setQueryFreeTarget({field: 'region', list: null}); this.setQueryFreeTarget({field: 'province', list: null}); - this.setStartDateQueryFreeTarget(null); - this.setStopDateQueryFreeTarget(null); + let obj = {'start': null, 'stop': null} + this.setStartAndStopQueryFreeTarget(obj); + // this.setStartDateQueryFreeTarget(null); + // this.setStopDateQueryFreeTarget(null); if(this.listAccIdsTargetFile.length > 0){ arrayFull = JSON.parse(JSON.stringify(this.listAccIdsTargetFile)); for(let i = 0; i < this.listAccIdsTargetInserted.length; i = i + 1){ @@ -2926,8 +2940,10 @@ export default { this.setQueryFreeBackground({field: 'country', list: null}); this.setQueryFreeBackground({field: 'region', list: null}); this.setQueryFreeBackground({field: 'province', list: null}); - this.setStartDateQueryFreeBackground(null); - this.setStopDateQueryFreeBackground(null); + let obj = {'start': null, 'stop': null}; + this.setStartAndStopQueryFreeBackground(obj); + // this.setStartDateQueryFreeBackground(null); + // this.setStopDateQueryFreeBackground(null); if(this.listAccIdsBackgroundFile.length > 0){ arrayFull = JSON.parse(JSON.stringify(this.listAccIdsBackgroundFile)); for(let i = 0; i < this.listAccIdsBackgroundInserted.length; i = i + 1){ @@ -2958,8 +2974,10 @@ export default { this.setQueryFreeTarget({field: 'country', list: null}); this.setQueryFreeTarget({field: 'region', list: null}); this.setQueryFreeTarget({field: 'province', list: null}); - this.setStartDateQueryFreeTarget(null); - this.setStopDateQueryFreeTarget(null); + let obj = {'start': null, 'stop': null}; + this.setStartAndStopQueryFreeTarget(obj); + // this.setStartDateQueryFreeTarget(null); + // this.setStopDateQueryFreeTarget(null); if(this.fileAccIdsTarget !== null) { this.listAccIdsTargetFile = this.fileAccIdsTarget; for(let i = 0; i < this.listAccIdsTargetFile.length; i = i + 1){ @@ -2999,8 +3017,10 @@ export default { this.setQueryFreeBackground({field: 'country', list: null}); this.setQueryFreeBackground({field: 'region', list: null}); this.setQueryFreeBackground({field: 'province', list: null}); - this.setStartDateQueryFreeBackground(null); - this.setStopDateQueryFreeBackground(null); + let obj = {'start': null, 'stop': null}; + this.setStartAndStopQueryFreeBackground(obj); + // this.setStartDateQueryFreeBackground(null); + // this.setStopDateQueryFreeBackground(null); if(this.fileAccIdsBackground !== null){ this.listAccIdsBackgroundFile = this.fileAccIdsBackground; for(let i = 0; i < this.listAccIdsBackgroundFile.length; i = i + 1){ @@ -3194,22 +3214,30 @@ export default { all_protein(){ this.possibleProtein = this.all_protein; }, - startDateQueryFreeTarget(){ + startAndStopQueryFreeTarget(){ this.resetApplied(); this.countOverlappingSequenceTargetBackground(); }, - stopDateQueryFreeTarget(){ - this.resetApplied(); - this.countOverlappingSequenceTargetBackground(); - }, - startDateQueryFreeBackground(){ - this.resetApplied(); - this.countOverlappingSequenceTargetBackground(); - }, - stopDateQueryFreeBackground(){ + // startDateQueryFreeTarget(){ + // this.resetApplied(); + // this.countOverlappingSequenceTargetBackground(); + // }, + // stopDateQueryFreeTarget(){ + // this.resetApplied(); + // this.countOverlappingSequenceTargetBackground(); + // }, + startAndStopQueryFreeBackground(){ this.resetApplied(); this.countOverlappingSequenceTargetBackground(); }, + // startDateQueryFreeBackground(){ + // this.resetApplied(); + // this.countOverlappingSequenceTargetBackground(); + // }, + // stopDateQueryFreeBackground(){ + // this.resetApplied(); + // this.countOverlappingSequenceTargetBackground(); + // }, queryFreeTarget(){ this.resetApplied(); this.countOverlappingSequenceTargetBackground(); diff --git a/frontend/src/components/SelectorsQueryFree.vue b/frontend/src/components/SelectorsQueryFree.vue index 013074e..d1af12d 100644 --- a/frontend/src/components/SelectorsQueryFree.vue +++ b/frontend/src/components/SelectorsQueryFree.vue @@ -234,42 +234,58 @@ export default { }, 'queryFreeTarget.geo_group': function (){ if(this.field === 'geo_group' && (!this.queryFreeTarget['geo_group'] || this.queryFreeTarget['geo_group'].length === 0)) { - this.clearToExcludeField(); + if(this.type === 'target') { + this.clearToExcludeField(); + } } }, 'queryFreeTarget.country': function (){ if(this.field === 'country' && (!this.queryFreeTarget['country'] || this.queryFreeTarget['country'].length === 0)) { - this.clearToExcludeField(); + if(this.type === 'target') { + this.clearToExcludeField(); + } } }, 'queryFreeTarget.region': function (){ if(this.field === 'region' && (!this.queryFreeTarget['region'] || this.queryFreeTarget['region'].length === 0)) { - this.clearToExcludeField(); + if(this.type === 'target') { + this.clearToExcludeField(); + } } }, 'queryFreeTarget.province': function (){ if(this.field === 'province' && (!this.queryFreeTarget['province'] || this.queryFreeTarget['province'].length === 0 )) { - this.clearToExcludeField(); + if(this.type === 'target') { + this.clearToExcludeField(); + } } }, 'queryFreeBackground.geo_group': function (){ if(this.field === 'geo_group' && (!this.queryFreeBackground['geo_group'] || this.queryFreeBackground['geo_group'].length === 0)) { - this.clearToExcludeField(); + if(this.type === 'background') { + this.clearToExcludeField(); + } } }, 'queryFreeBackground.country': function (){ if(this.field === 'country' && (!this.queryFreeBackground['country'] || this.queryFreeBackground['country'].length === 0)) { - this.clearToExcludeField(); + if(this.type === 'background') { + this.clearToExcludeField(); + } } }, 'queryFreeBackground.region': function (){ if(this.field === 'region' && (!this.queryFreeBackground['region'] || this.queryFreeBackground['region'].length === 0)) { - this.clearToExcludeField(); + if(this.type === 'background') { + this.clearToExcludeField(); + } } }, 'queryFreeBackground.province': function (){ if(this.field === 'province' && (!this.queryFreeBackground['province'] || this.queryFreeBackground['province'].length === 0 )) { - this.clearToExcludeField(); + if(this.type === 'background') { + this.clearToExcludeField(); + } } }, } diff --git a/frontend/src/components/TimeSelectorDistributionLineageInGeo.vue b/frontend/src/components/TimeSelectorDistributionLineageInGeo.vue index a64eac2..971fa92 100644 --- a/frontend/src/components/TimeSelectorDistributionLineageInGeo.vue +++ b/frontend/src/components/TimeSelectorDistributionLineageInGeo.vue @@ -129,6 +129,13 @@ + + + + diff --git a/frontend/src/components/TimeSelectorQueryFree.vue b/frontend/src/components/TimeSelectorQueryFree.vue index 4a3b255..fe2b0cb 100644 --- a/frontend/src/components/TimeSelectorQueryFree.vue +++ b/frontend/src/components/TimeSelectorQueryFree.vue @@ -37,6 +37,7 @@ color="#F48C0680" track-color="grey" height="2px" + @mouseup="mouseUpSlider" > @@ -118,6 +119,13 @@ + + + + @@ -220,8 +228,29 @@ export default { }, methods: { ...mapMutations(['setStartDateQueryFreeTarget', 'setStopDateQueryFreeTarget', 'setStartDateQueryFreeBackground', - 'setStopDateQueryFreeBackground', 'setNumSequencesQueryFreeTarget', 'setNumSequencesQueryFreeBackground']), + 'setStopDateQueryFreeBackground', 'setNumSequencesQueryFreeTarget', 'setNumSequencesQueryFreeBackground', + 'setStartAndStopQueryFreeTarget', 'setStartAndStopQueryFreeBackground']), ...mapActions(['setQueryFreeTarget', 'setQueryFreeBackground']), + mouseUpSlider() { + let min = this.slider[0]; + let max = this.slider[1]; + this.changeMarkerAndRender(min, max); + + this.last_start_date = this.translateIndexToDate(this.slider[0]); + this.last_stop_date = this.translateIndexToDate(this.slider[1]); + if(this.type === 'target') { + let obj = {'start': this.last_start_date, 'stop': this.last_stop_date} + this.setStartAndStopQueryFreeTarget(obj); + // this.setStartDateQueryFreeTarget(this.last_start_date); + // this.setStopDateQueryFreeTarget(this.last_stop_date); + } + else if(this.type === 'background') { + let obj = {'start': this.last_start_date, 'stop': this.last_stop_date} + this.setStartAndStopQueryFreeBackground(obj); + // this.setStartDateQueryFreeBackground(this.last_start_date); + // this.setStopDateQueryFreeBackground(this.last_stop_date); + } + }, download(){ let url = this.my_chart.getConnectedDataURL({ pixelRatio: 2, @@ -501,16 +530,21 @@ export default { } this.slider = [index_start, index_stop]; + this.mouseUpSlider(); this.last_start_date = this.translateIndexToDate(this.slider[0]); this.last_stop_date = this.translateIndexToDate(this.slider[1]); - this.changeMarkerAndRender(this.slider[0], this.slider[0]); + this.changeMarkerAndRender(this.slider[0], this.slider[1]); if(this.type === 'target') { - this.setStartDateQueryFreeTarget(this.last_start_date); - this.setStopDateQueryFreeTarget(this.last_stop_date); + let obj = {'start': this.last_start_date, 'stop': this.last_stop_date} + this.setStartAndStopQueryFreeTarget(obj); + // this.setStartDateQueryFreeTarget(this.last_start_date); + // this.setStopDateQueryFreeTarget(this.last_stop_date); } else if(this.type === 'background') { - this.setStartDateQueryFreeBackground(this.last_start_date); - this.setStopDateQueryFreeBackground(this.last_stop_date); + let obj = {'start': this.last_start_date, 'stop': this.last_stop_date} + this.setStartAndStopQueryFreeBackground(obj); + // this.setStartDateQueryFreeBackground(this.last_start_date); + // this.setStopDateQueryFreeBackground(this.last_stop_date); } this.chosenApplied = true; @@ -529,6 +563,7 @@ export default { else { let stop = this.slider[1]; this.slider = [start, stop]; + this.mouseUpSlider(); } }, last_stop_date(){ @@ -540,6 +575,7 @@ export default { else { let start = this.slider[0]; this.slider = [start, stop]; + this.mouseUpSlider(); } }, queryFreeTarget() { @@ -562,22 +598,22 @@ export default { this.loadData(); } }, - slider(){ - let min = this.slider[0]; - let max = this.slider[1]; - this.changeMarkerAndRender(min, max); - - this.last_start_date = this.translateIndexToDate(this.slider[0]); - this.last_stop_date = this.translateIndexToDate(this.slider[1]); - if(this.type === 'target') { - this.setStartDateQueryFreeTarget(this.last_start_date); - this.setStopDateQueryFreeTarget(this.last_stop_date); - } - else if(this.type === 'background') { - this.setStartDateQueryFreeBackground(this.last_start_date); - this.setStopDateQueryFreeBackground(this.last_stop_date); - } - }, + // slider(){ + // let min = this.slider[0]; + // let max = this.slider[1]; + // this.changeMarkerAndRender(min, max); + // + // this.last_start_date = this.translateIndexToDate(this.slider[0]); + // this.last_stop_date = this.translateIndexToDate(this.slider[1]); + // if(this.type === 'target') { + // this.setStartDateQueryFreeTarget(this.last_start_date); + // this.setStopDateQueryFreeTarget(this.last_stop_date); + // } + // else if(this.type === 'background') { + // this.setStartDateQueryFreeBackground(this.last_start_date); + // this.setStopDateQueryFreeBackground(this.last_stop_date); + // } + // }, }, mounted() { this.loadData(); diff --git a/frontend/src/components/TimeSelectorQueryGeo.vue b/frontend/src/components/TimeSelectorQueryGeo.vue index 77df131..b9d0723 100644 --- a/frontend/src/components/TimeSelectorQueryGeo.vue +++ b/frontend/src/components/TimeSelectorQueryGeo.vue @@ -253,12 +253,12 @@ - - - - - - + + + @@ -782,7 +782,7 @@ export default { this.changeMarkerAndRender(this.slider[0], this.slider[0]); this.setStartDateQueryGeo(this.last_start_date); this.setStopDateQueryGeo(this.last_stop_date); - + this.overlay = false; }); }); } diff --git a/frontend/src/store.js b/frontend/src/store.js index 441c519..0d63f52 100644 --- a/frontend/src/store.js +++ b/frontend/src/store.js @@ -41,8 +41,10 @@ const state = { queryFreeBackground: {}, startDateQueryFreeTarget: null, stopDateQueryFreeTarget: null, + startAndStopQueryFreeTarget: {'start' : '2019-01-01', 'stop': '2019-01-01'}, startDateQueryFreeBackground: null, stopDateQueryFreeBackground: null, + startAndStopQueryFreeBackground: {'start' : '2019-01-01', 'stop': '2019-01-01'}, numSequencesQueryFreeTarget: 0, numSequencesQueryFreeBackground: 0, @@ -215,16 +217,39 @@ const mutations = { state.numLevelAboveBackground = value; }, setStartDateQueryFreeTarget: (state, value) => { - state.startDateQueryFreeTarget = value; + let obj = state.startAndStopQueryFreeTarget; + obj['start'] = value; + state.startAndStopQueryFreeTarget = obj; + // state.startDateQueryFreeTarget = value; }, setStopDateQueryFreeTarget: (state, value) => { - state.stopDateQueryFreeTarget= value; + let obj = state.startAndStopQueryFreeTarget; + obj['stop'] = value; + state.startAndStopQueryFreeTarget = obj; + // state.stopDateQueryFreeTarget= value; + }, + setStartAndStopQueryFreeTarget: (state, value) => { + // obj = {'start' : xxx, 'stop': yyy} + state.startAndStopQueryFreeTarget = value; + state.startDateQueryFreeTarget = value['start']; + state.stopDateQueryFreeTarget = value['stop']; }, setStartDateQueryFreeBackground: (state, value) => { - state.startDateQueryFreeBackground = value; + let obj = state.startAndStopQueryFreeBackground; + obj['start'] = value; + state.startAndStopQueryFreeBackground = obj; + // state.startDateQueryFreeBackground = value; }, setStopDateQueryFreeBackground: (state, value) => { - state.stopDateQueryFreeBackground = value; + let obj = state.startAndStopQueryFreeBackground; + obj['stop'] = value; + state.startAndStopQueryFreeBackground = obj; + // state.stopDateQueryFreeBackground = value; + }, + setStartAndStopQueryFreeBackground: (state, value) => { + state.startAndStopQueryFreeBackground = value; + state.startDateQueryFreeBackground = value['start']; + state.stopDateQueryFreeBackground = value['stop']; }, setQueryFreeTargetField: (state, payload) => { state.queryFreeTarget[payload.field] = payload.fieldQuery;