From f90d823946448cab7da79d1c2f4740ff01d8a89f Mon Sep 17 00:00:00 2001
From: LucaCilibrasi <37807618+LucaCilibrasi@users.noreply.github.com>
Date: Fri, 10 Sep 2021 13:44:37 +0200
Subject: [PATCH] - Change everything to MongoDB - Minor changes
---
backend/apis/analyze.py | 4501 +++++++++++++----
.../src/components/FreeTargetVsBackground.vue | 78 +-
.../src/components/SelectorsQueryFree.vue | 32 +-
.../TimeSelectorDistributionLineageInGeo.vue | 7 +
.../src/components/TimeSelectorQueryFree.vue | 80 +-
.../src/components/TimeSelectorQueryGeo.vue | 14 +-
frontend/src/store.js | 33 +-
7 files changed, 3633 insertions(+), 1112 deletions(-)
diff --git a/backend/apis/analyze.py b/backend/apis/analyze.py
index e78e826..cf2092d 100644
--- a/backend/apis/analyze.py
+++ b/backend/apis/analyze.py
@@ -19,10 +19,11 @@
api = Namespace('analyze', description='analyze')
uri = "mongodb://localhost:23456/gcm_gisaid"
+# uri = "mongodb://localhost:23457/gcm_gisaid"
client = MongoClient(uri)
db = client.gcm_gisaid
-collection_db = db.seq_2021_08_26
+collection_db = db.seq_2021_08_26_2
########################################################################################################
@@ -30,7 +31,7 @@
sars_cov_2_products = {
"A": [
{
- "name": "E (envelope protein)",
+ "name": "E",
"start": 26245,
"end": 26472,
"row": 0,
@@ -38,7 +39,7 @@
"sequence": "MYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVSLVKPSFYVYSRVKNLNSSRVPDLLV"
},
{
- "name": "M (membrane glycoprotein)",
+ "name": "M",
"start": 26523,
"end": 27191,
"row": 0,
@@ -46,7 +47,7 @@
"sequence": "MADSNGTITVEELKKLLEQWNLVIGFLFLTWICLLQFAYANRNRFLYIIKLIFLWLLWPVTLACFVLAAVYRINWITGGIAIAMACLVGLMWLSYFIASFRLFARTRSMWSFNPETNILLNVPLHGTILTRPLLESELVIGAVILRGHLRIAGHHLGRCDIKDLPKEITVATSRTLSYYKLGASQRVAGDSGFAAYSRYRIGNYKLNTDHSSSSDNIALLVQ"
},
{
- "name": "N (nucleocapsid phosphoprotein)",
+ "name": "N",
"start": 28274,
"end": 29533,
"row": 0,
@@ -54,7 +55,7 @@
"sequence": "MSDNGPQNQRNAPRITFGGPSDSTGSNQNGERSGARSKQRRPQGLPNNTASWFTALTQHGKEDLKFPRGQGVPINTNSSPDDQIGYYRRATRRIRGGDGKMKDLSPRWYFYYLGTGPEAGLPYGANKDGIIWVATEGALNTPKDHIGTRNPANNAAIVLQLPQGTTLPKGFYAEGSRGGSQASSRSSSRSRNSSRNSTPGSSRGTSPARMAGNGGDAALALLLLDRLNQLESKMSGKGQQQQGQTVTKKSAAEASKKPRQKRTATKAYNVTQAFGRRGPEQTQGNFGDQELIRQGTDYKHWPQIAQFAPSASAFFGMSRIGMEVTPSGTWLTYTGAIKLDDKDPNFKDQVILLNKHIDAYKTFPPTEPKKDKKKKADETQALPQRQKKQQTVTLLPAADLDDFSKQLQQSMSSADSTQA"
},
{
- "name": "ORF10 protein",
+ "name": "ORF10",
"start": 29558,
"end": 29674,
"row": 0,
@@ -62,7 +63,7 @@
"sequence": "MGYINVFAFPFTIYSLLLCRMNSRNYIAQVDVVNFNLT"
},
{
- "name": "NSP16 (2'-O-ribose methyltransferase)",
+ "name": "NSP16",
"start": 20659,
"end": 21552,
"row": 0,
@@ -86,7 +87,7 @@
"sequence": "KIVNNWLKQLIKVTLVFLFVAAIFYLITPVHVMSKHTDFSSEIIGYKAIDGGVTRDIASTDTCFANKHADFDTWFSQRGGSYTNDKACPLIAAVITREVGFVVPGLPGTILRTTNGDFLHFLPRVFSAVGNICYTPSKLIEYTDFATSACVLAAECTIFKDASGKPVPYCYDTNVLEGSVAYESLRPDTRYVLMDGSIIQFPNTYLEGSVRVVTTFDSEYCRHGTCERSEAGVCVSTSGRWVLNNDYYRSLPGVFCGVDAVNLLTNMFTPLIQPIGALDISASIVAGGIVAIVVTCLAYYFMRFRRAFGEYSHVVAFNTLLFLMSFTVLCLTPVYSFLPGVYSVIYLYLTFYLTNDVSFLAHIQWMVMFTPLVPFWITIAYIICISTKHFYWFFSNYLKRRVVFNGVSFSTFEEAALCTFLLNKEMYLKLRSDVLLPLTQYNRYLALYNKYKYFSGAMDTTSYREAACCHLAKALNDFSNSGSDVLYQPPQTSITSAVLQ"
},
{
- "name": "NSP15 (endoRNAse)",
+ "name": "NSP15",
"start": 19621,
"end": 20658,
"row": 0,
@@ -94,7 +95,7 @@
"sequence": "SLENVAFNVVNKGHFDGQQGEVPVSIINNTVYTKVDGVDVELFENKTTLPVNVAFELWAKRNIKPVPEVKILNNLGVDIAANTVIWDYKRDAPAHISTIGVCSMTDIAKKPTETICAPLTVFFDGRVDGQVDLFRNARNGVLITEGSVKGLQPSVGPKQASLNGVTLIGEAVKTQFNYYKKVDGVVQQLPETYFTQSRNLQEFKPRSQMEIDFLELAMDEFIERYKLEGYAFEHIVYGDFSHSQLGGLHLLIGLAKRFKESPFELEDFIPMDSTVKNYFITDAQTGSSKCVCSVIDLLLDDFVEIIKSQDLSVVSKVVKVTIDYTEISFMLWCKDGHVETFYPKLQ"
},
{
- "name": "NSP5 (3C-like proteinase)",
+ "name": "NSP5",
"start": 10055,
"end": 10972,
"row": 0,
@@ -102,7 +103,7 @@
"sequence": "SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ"
},
{
- "name": "NSP14 (3'-to-5' exonuclease)",
+ "name": "NSP14",
"start": 18040,
"end": 19620,
"row": 0,
@@ -118,7 +119,7 @@
"sequence": "SADAQSFLNGFAV"
},
{
- "name": "NSP13 (helicase)",
+ "name": "NSP13",
"start": 16237,
"end": 18039,
"row": 0,
@@ -158,7 +159,7 @@
"sequence": "NNELSPVALRQMSCAAGTTQTACTDDNALAYYNTTKGGRFVLALLSDLQDLKWARFPKSDGTGTIYTELEPPCRFVTDTPKGPKVKYLYFIKGLNNLNRGMVLGSLAATVRLQ"
},
{
- "name": "NSP12 (RNA-dependent RNA polymerase)",
+ "name": "NSP12",
"start": 13442,
"end": 16236,
"row": 0,
@@ -166,7 +167,7 @@
"sequence": "SADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVYLPYPDPSRILGAGCFVDDIVKTDGTLMIERFVSLAIDAYPLTKHPNQEYADVFHLYLQYIRKLHDELTGHMLDMYSVMLTNDNTSRYWEPEFYEAMYTPHTVLQ"
},
{
- "name": "ORF1ab polyprotein",
+ "name": "ORF1ab",
"start": 266,
"end": 21555,
"row": 0,
@@ -182,7 +183,7 @@
"sequence": "AGNATEVPANSTVLSFCAFAVDAAKAYKDYLASGGQPITNCVKMLCTHTGTGQAITVTPEANMDQESFGGASCCLYCRCHIDHPNPKGFCDLKGKYVQIPTTCANDPVGFTLKNTVCTVCGMWKGYGCSCDQLREPMLQ"
},
{
- "name": "NSP1 (leader protein)",
+ "name": "NSP1",
"start": 266,
"end": 805,
"row": 0,
@@ -190,7 +191,7 @@
"sequence": "MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLEQPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGG"
},
{
- "name": "ORF1a polyprotein",
+ "name": "ORF1a",
"start": 266,
"end": 13483,
"row": 0,
@@ -205,7 +206,7 @@
"sequence": "AYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGG"
},
{
- "name": "NS3 (ORF3a protein)",
+ "name": "NS3",
"start": 25393,
"end": 26220,
"row": 0,
@@ -213,7 +214,7 @@
"sequence": "MDLFMRIFTIGTVTLKQGEIKDATPSDFVRATATIPIQASLPFGWLIVGVALLAVFQSASKIITLKKRWQLALSKGVHFVCNLLLLFVTVYSHLLLVAAGLEAPFLYLYALVYFLQSINFVRIIMRLWLCWKCRSKNPLLYDANYFLCWHTNCYDYCIPYNSVTSSIVITSGDGTTSPISEHDYQIGGYTEKWESGVKDCVVLHSYFTSDYYQLYSTQLSTDTGVEHVTFFIYNKIVDEPEEHVQIHTIDGSSGVVNPVMEPIYDEPTTTTSVPL"
},
{
- "name": "NS6 (ORF6 protein)",
+ "name": "NS6",
"start": 27202,
"end": 27387,
"row": 0,
@@ -221,7 +222,7 @@
"sequence": "MFHLVDFQVTIAEILLIIMRTFKVSIWNLDYIINLIIKNLSKSLTENKYSQLDEEQPMEID"
},
{
- "name": "NS7a (ORF7a protein)",
+ "name": "NS7a",
"start": 27394,
"end": 27759,
"row": 0,
@@ -229,7 +230,7 @@
"sequence": "MKIILFLALITLATCELYHYQECVRGTTVLLKEPCSSGTYEGNSPFHPLADNKFALTCFSTQFAFACPDGVKHVYQLRARSVSPKLFIRQEEVQELYSPIFLIVAAIVFITLCFTLKRKTE"
},
{
- "name": "NS7b (ORF7b)",
+ "name": "NS7b",
"start": 27756,
"end": 27887,
"row": 0,
@@ -237,7 +238,7 @@
"sequence": "MIELSLIDFYLCFLAFLLFLVLIMLIIFWFSLELQDHNETCHA"
},
{
- "name": "NS8 (ORF8 protein)",
+ "name": "NS8",
"start": 27894,
"end": 28259,
"row": 0,
@@ -245,7 +246,7 @@
"sequence": "MKFLVFLGIITTVAAFHQECSLQSCTQHQPYVVDDPCPIHFYSKWYIRVGARKSAPLIELCVDEAGSKSPIQYIDIGNYTVSCLPFTINCQEPKLGSLVVRCSFYEDFLEYHDVRVVLDFI"
},
{
- "name": "Spike (surface glycoprotein)",
+ "name": "Spike",
"start": 21563,
"end": 25384,
"row": 0,
@@ -319,410 +320,3106 @@ def get(self):
return all_protein
-@api.route('/tableLineageCountry')
+@api.route('/getProteinPosition')
class FieldList(Resource):
- @api.doc('table_lineage_country')
+ @api.doc('get_protein_position')
def post(self):
- to_send = api.payload
-
- conn = http.client.HTTPConnection('geco.deib.polimi.it')
- headers = {'Content-type': 'application/json'}
- send = to_send
- json_data = json.dumps(send)
- conn.request('POST', '/virusurf_epitope/api/epitope/tableLineageCountry', json_data, headers)
+ payload = api.payload
+ name_protein = payload['protein']
- response = conn.getresponse()
- all_geo = response.read().decode()
- all_geo = json.loads(all_geo)
+ all_protein = sars_cov_2_products['A']
+ min_pos = 0
+ max_pos = 0
+ for item in all_protein:
+ name = str(item.get('name'))
+ if name.lower() == name_protein.lower():
+ min_pos = 1
+ max_pos = (item.get('end') - item.get('start')) // 3
+ if "nsp" in name.lower():
+ max_pos = max_pos + 1
- table = []
- for item in all_geo:
- single_line = {'lineage': item['lineage']}
- country_count = item['country_count']
- country_count = country_count.replace('"', "")
- country_count = country_count.replace(")\\", "")
- country_count = country_count.replace("\\", "")
- country_count = country_count.replace("{", "")
- country_count = country_count.replace("}", "")
- country_count = country_count.replace("(", "")
- array_country_count = country_count.split("),")
- for single_country in array_country_count:
- single_country = single_country.replace(")", "")
- array_single_country = single_country.split(',')
- single_line[array_single_country[0]] = array_single_country[1]
- table.append(single_line)
+ res = {'start': min_pos, 'stop': max_pos}
- return table
+ return res
-@api.route('/possibleCountryLineage')
+@api.route('/getDomains')
class FieldList(Resource):
- @api.doc('possible_country_lineage')
+ @api.doc('get_domains')
def post(self):
- to_send = api.payload
-
- conn = http.client.HTTPConnection('geco.deib.polimi.it')
- headers = {'Content-type': 'application/json'}
- send = to_send
- json_data = json.dumps(send)
- conn.request('POST', '/virusurf_epitope/api/epitope/arrayCountryForLineage', json_data, headers)
-
- response = conn.getresponse()
- all_country = response.read().decode()
- all_country = all_country.replace(']', '').replace('[', '')
- all_country = all_country.replace('"', '').split(",")
-
- return all_country
-
+ payload = api.payload
+ name_protein = payload['protein']
-@api.route('/denominatorLineageCountry')
-class FieldList(Resource):
- @api.doc('possible_country_lineage')
- def post(self):
+ annotations = pd.read_csv("apis/protein_annotations.csv",
+ delimiter=',')
- to_send = api.payload
+ annotations1 = copy.deepcopy(annotations)
+ annotations2 = copy.deepcopy(annotations)
+ annotations3 = copy.deepcopy(annotations)
- conn = http.client.HTTPConnection('geco.deib.polimi.it')
- headers = {'Content-type': 'application/json'}
- send = to_send
- json_data = json.dumps(send)
- conn.request('POST', '/virusurf_epitope/api/epitope/denominatorLineageCountry', json_data, headers)
+ ann_mutagenesis = annotations1[(annotations.Description.str.lower() != 'n/d')
+ & (annotations.Protein.str.lower() == name_protein.lower())
+ & (annotations.Category.str.lower() == 'mutagenesis')
+ ]
+ ann_mutagenesis2 = ann_mutagenesis[['Description', 'Begin', 'End']]
+ ann_mutagenesis3 = json.loads(ann_mutagenesis2.to_json(orient="records"))
- response = conn.getresponse()
- resp = response.read().decode()
- resp = json.loads(resp)
+ ann_aa_modifications = annotations2[(annotations.Description.str.lower() != 'n/d')
+ & (annotations.Protein.str.lower() == name_protein.lower())
+ & (annotations.Category.str.lower() == 'ptm')
+ & (annotations.Type.str.lower() == 'carbohyd')
+ ]
+ ann_aa_modifications2 = ann_aa_modifications[['Description', 'Begin', 'End']]
+ ann_aa_modifications3 = json.loads(ann_aa_modifications2.to_json(orient="records"))
- denominators = {}
+ ann_sites_family_dom = annotations3[(annotations.Description.str.lower() != 'n/d')
+ & (annotations.Protein.str.lower() == name_protein.lower())
+ & ((annotations.Category.str.lower() == 'domains_and_sites') |
+ (annotations.Type.str.lower() == 'n/d'))
+ ]
+ ann_sites_family_dom2 = ann_sites_family_dom[['Description', 'Begin', 'End']]
+ ann_sites_family_dom3 = json.loads(ann_sites_family_dom2.to_json(orient="records"))
- for item in resp:
- if item['geo'] is None:
- denominators['N/D'] = item['cnt']
- else:
- denominators[item['geo']] = item['cnt']
+ result = {'mutagenesis': ann_mutagenesis3, 'aa_modifications': ann_aa_modifications3,
+ 'sites_and_domains': ann_sites_family_dom3}
- return denominators
+ return result
-@api.route('/analyzeMutationCountryLineage')
+@api.route('/getImportantMutation')
class FieldList(Resource):
- @api.doc('analyze_mutation_country_lineage')
+ @api.doc('get_important_mutation')
def post(self):
- to_send = api.payload
-
- conn = http.client.HTTPConnection('geco.deib.polimi.it')
- headers = {'Content-type': 'application/json'}
- send = to_send
- json_data = json.dumps(send)
- conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationCountryLineage', json_data, headers)
-
- response = conn.getresponse()
- all_result = response.read().decode()
- all_result = json.loads(all_result)
-
- mutation_table2 = []
- arr_p_values = []
- for item in all_result:
- single_item = {}
- if item['product'] == 'Spike (surface glycoprotein)':
- protein = item['product'].split(" ", 1)[0]
- mutation = protein + '_'
- # mutation = 'S_'
- else:
- protein = item['product'].split(" ", 1)[0]
- mutation = protein + '_'
- mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative']
- single_item['mutation'] = mutation
- single_item['start_aa_original'] = item['start_aa_original']
- single_item['sequence_aa_original'] = item['sequence_aa_original']
- single_item['sequence_aa_alternative'] = item['sequence_aa_alternative']
- single_item['product'] = item['product']
- single_item['mutation_position'] = item['start_aa_original']
- single_item['target'] = item['country']
- single_item['background'] = item['lineage']
- single_item['count_target'] = item['count_seq']
- single_item['percentage_background'] = item['fraction']
- single_item['numerator_background'] = item['numerator']
- single_item['denominator_background'] = item['denominator']
- single_item['percentage_target'] = item['fraction_country']
- single_item['numerator_target'] = item['count_seq']
- single_item['denominator_target'] = item['denominator_country']
-
- epsilon = 0.00000001
- single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \
- (single_item['percentage_background'] + epsilon)
-
- if single_item['odd_ratio'] >= 1:
- # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_country'],
- # item['numerator'] / item['denominator'])
- if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
- and single_item['denominator_target'] - single_item['numerator_target'] == 0:
- single_item['p_value'] = 1
- else:
- stat, p, dof, expected = \
- chi2_contingency([[single_item['numerator_background'],
- single_item['denominator_background'] - single_item['numerator_background']],
- [single_item['numerator_target'],
- single_item['denominator_target'] - single_item['numerator_target']]])
- single_item['p_value'] = p
- else:
- # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_country'],
- # item['numerator'] / item['denominator'])
- if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
- and single_item['denominator_target'] - single_item['numerator_target'] == 0:
- single_item['p_value'] = 1
- else:
- stat, p, dof, expected = \
- chi2_contingency([[single_item['numerator_background'],
- single_item['denominator_background'] - single_item['numerator_background']],
- [single_item['numerator_target'],
- single_item['denominator_target'] - single_item['numerator_target']]])
- single_item['p_value'] = p
-
- arr_p_values.append(single_item['p_value'])
- mutation_table2.append(single_item)
+ payload = api.payload
+ name_lineage = payload['lineage']
- a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni')
+ result = {'mutation': [], 'additional_mutation': []}
- i = 0
- for item in mutation_table2:
- item['pvalue'] = new_p_values[i]
- i = i + 1
+ if name_lineage in dict_lineage_mutation:
+ lineage_json = dict_lineage_mutation[name_lineage]
+ result['mutation'] = lineage_json['mutation']
+ result['additional_mutation'] = lineage_json['additional_mutation']
+ else:
+ all_mutation = []
+ all_additional_mutation = []
+ for lineage in dict_lineage_mutation:
+ row = dict_lineage_mutation[lineage]
+ for mutation in row['mutation']:
+ if mutation not in all_mutation:
+ all_mutation.append(mutation)
+ if mutation in all_additional_mutation:
+ all_additional_mutation.remove(mutation)
+ for additional_mutation in row['additional_mutation']:
+ if additional_mutation not in all_additional_mutation and additional_mutation not in all_mutation:
+ all_additional_mutation.append(additional_mutation)
+ result['mutation'] = all_mutation
+ result['additional_mutation'] = all_additional_mutation
- return mutation_table2
+ return result
-@api.route('/analyzeMutationCountryLineageInTime')
+@api.route('/getLineageTree')
class FieldList(Resource):
- @api.doc('analyze_mutation_country_lineage_in_time')
+ @api.doc('get_lineage_tree')
def post(self):
- to_send = api.payload
+ payload = api.payload
+ possible_lineages = payload['possibleLineages']
- conn = http.client.HTTPConnection('geco.deib.polimi.it')
- headers = {'Content-type': 'application/json'}
- send = to_send
- json_data = json.dumps(send)
- conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationCountryLineageInTime', json_data, headers)
+ dict_copy = dict_lineage_mutation
- response = conn.getresponse()
- all_result = response.read().decode()
- all_result = json.loads(all_result)
+ arr_lineages = []
+ dict_lineages = {}
+ for item in possible_lineages:
+ single_line = item
+ dict_lineages[item['value']] = single_line
+ arr_lineages.append(item['value'])
- mutation_table2 = []
- arr_p_values = []
- for item in all_result:
- single_item = {}
- if item['product'] == 'Spike (surface glycoprotein)':
- protein = item['product'].split(" ", 1)[0]
- mutation = protein + '_'
- # mutation = 'S_'
- else:
- protein = item['product'].split(" ", 1)[0]
- mutation = protein + '_'
- mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative']
- single_item['mutation'] = mutation
- single_item['start_aa_original'] = item['start_aa_original']
- single_item['sequence_aa_original'] = item['sequence_aa_original']
- single_item['sequence_aa_alternative'] = item['sequence_aa_alternative']
- single_item['product'] = item['product']
- single_item['mutation_position'] = item['start_aa_original']
- single_item['target'] = item['target_time']
- single_item['background'] = item['background_time']
- single_item['country'] = item['country']
- single_item['lineage'] = item['lineage']
- single_item['count_target'] = item['count_seq']
- single_item['percentage_background'] = item['fraction']
- single_item['numerator_background'] = item['numerator']
- single_item['denominator_background'] = item['denominator']
- single_item['percentage_target'] = item['fraction_target']
- single_item['numerator_target'] = item['count_seq']
- single_item['denominator_target'] = item['denominator_target']
-
- epsilon = 0.00000001
- single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \
- (single_item['percentage_background'] + epsilon)
+ dict_copy2 = dict(sorted(dict_copy.items(), key=lambda k_v: k_v[1]['alias']))
- if single_item['odd_ratio'] >= 1:
- # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'],
- # item['numerator'] / item['denominator'])
- if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
- and single_item['denominator_target'] - single_item['numerator_target'] == 0:
- single_item['p_value'] = 1
- else:
- stat, p, dof, expected = \
- chi2_contingency([[single_item['numerator_background'],
- single_item['denominator_background'] - single_item['numerator_background']],
- [single_item['numerator_target'],
- single_item['denominator_target'] - single_item['numerator_target']]])
- single_item['p_value'] = p
- else:
- # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'],
- # item['numerator'] / item['denominator'])
- if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
- and single_item['denominator_target'] - single_item['numerator_target'] == 0:
- single_item['p_value'] = 1
- else:
- stat, p, dof, expected = \
- chi2_contingency([[single_item['numerator_background'],
- single_item['denominator_background'] - single_item['numerator_background']],
- [single_item['numerator_target'],
- single_item['denominator_target'] - single_item['numerator_target']]])
- single_item['p_value'] = p
+ items = []
+ idx = 1
- arr_p_values.append(single_item['p_value'])
- mutation_table2.append(single_item)
+ for lineage in dict_copy2:
+ already_done = False
+ children = False
+ children_lineage = False
+ important_lineage = False
+ alias = dict_copy2[lineage]['alias']
+ if lineage in arr_lineages:
+ if dict_copy2[lineage]['WHO label'] != '':
+ important_lineage = True
+ for itm in items:
+ possible_parent_alias = str(itm['alias']) + '.'
+ possible_children_alias = str(alias)
+ possible_parent_lineage = str(itm['real_name']) + '.'
+ possible_children_lineage = str(lineage)
+ if possible_parent_alias in possible_children_alias:
+ children = True
+ recursive_children_lineage(itm, lineage, alias, dict_copy2, dict_lineages)
+ if possible_parent_lineage in possible_children_lineage:
+ children_lineage = True
+ if possible_children_lineage != possible_children_alias:
+ recursive_children_lineage(itm, lineage, lineage, dict_copy2, dict_lineages)
+ if not children:
+ already_done = True
+ name_complete = lineage
+ if dict_copy2[lineage]['WHO label'] != '':
+ name_complete = lineage + ' (' + dict_copy2[lineage]['WHO label'] + ') '
+ single_lineage = {'id': idx, 'alias': alias, 'name': name_complete, 'real_name': lineage,
+ 'who': dict_copy2[lineage]['WHO label'], 'children': [],
+ 'count': dict_lineages[lineage]['count']}
+ items.append(single_lineage)
+ idx = idx + 1
- a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni')
+ if not children_lineage and not already_done:
+ name_complete = lineage.split('.')[0]
+ single_lineage = {'id': idx, 'alias': name_complete, 'name': name_complete,
+ 'real_name': name_complete,
+ 'who': '', 'children': [],
+ 'count': 0}
+ items.append(single_lineage)
+ idx = idx + 1
+ recursive_children_lineage(single_lineage, lineage, lineage, dict_copy2, dict_lineages)
- i = 0
- for item in mutation_table2:
- item['pvalue'] = new_p_values[i]
- i = i + 1
+ # if important_lineage and not already_done:
+ # name_complete = lineage
+ # if dict_copy2[lineage]['WHO label'] != '':
+ # name_complete = lineage + ' (' + dict_copy2[lineage]['WHO label'] + ') '
+ # single_lineage = {'id': idx, 'alias': alias, 'name': name_complete, 'real_name': lineage,
+ # 'who': dict_copy2[lineage]['WHO label'], 'children': [],
+ # 'count': dict_lineages[lineage]['count']}
+ # items.append(single_lineage)
+ # idx = idx + 1
- return mutation_table2
+ return items
-@api.route('/analyzeTimeDistributionCountryLineage')
+@api.route('/getAllImportantMutationPerLineage')
class FieldList(Resource):
- @api.doc('analyze_time_distribution_country_lineage')
+ @api.doc('get_important_mutation')
def post(self):
- to_send = api.payload
-
- conn = http.client.HTTPConnection('geco.deib.polimi.it')
- headers = {'Content-type': 'application/json'}
- send = to_send
- json_data = json.dumps(send)
- conn.request('POST', '/virusurf_epitope/api/epitope/analyzeTimeDistributionCountryLineage', json_data, headers)
- response = conn.getresponse()
- all_result = response.read().decode()
- all_result = json.loads(all_result)
+ payload = api.payload
+ lineage = payload['lineage']
+ proteins = payload['proteins']
- return all_result
+ array_proteins = []
+ for protein in proteins:
+ protein_rewritten = protein.split(" ")[0]
+ array_proteins.append(protein_rewritten)
-@api.route('/analyzeTimeDistributionBackgroundQueryGeo')
-class FieldList(Resource):
- @api.doc('analyze_time_distribution_country_lineage')
- def post(self):
- to_send = api.payload
+ dict_copy = all_important_mutation_dict
- conn = http.client.HTTPConnection('geco.deib.polimi.it')
- headers = {'Content-type': 'application/json'}
- send = to_send
- json_data = json.dumps(send)
- conn.request('POST', '/virusurf_epitope/api/epitope/analyzeTimeDistributionBackgroundQueryGeo', json_data,
- headers)
+ array_important_mutation = []
- response = conn.getresponse()
- all_result = response.read().decode()
- all_result = json.loads(all_result)
+ if lineage is None:
+ for lineage_mutations in dict_copy:
+ single_lineage_mutation = dict_copy[lineage_mutations]
+ for mutation in single_lineage_mutation[1]:
+ if mutation not in array_important_mutation:
+ protein = mutation.split("_")[0]
+ if protein in array_proteins:
+ array_important_mutation.append(mutation)
+ array_important_mutation.sort()
+ else:
+ if lineage in dict_copy:
+ single_lineage_mutation = dict_copy[lineage]
+ for mutation in single_lineage_mutation[1]:
+ if mutation not in array_important_mutation:
+ protein = mutation.split("_")[0]
+ if protein in array_proteins:
+ array_important_mutation.append(mutation)
+ array_important_mutation.sort()
- return all_result
+ return array_important_mutation
-@api.route('/analyzeMutationProvinceRegion')
+@api.route('/checkAccessionId')
class FieldList(Resource):
- @api.doc('analyze_mutation_province_region')
+ @api.doc('check_accession_id')
def post(self):
- to_send = api.payload
+ payload = api.payload
+ accession_id = payload['accession_id']
+ acc_id_arr = all_accession_id_dict['all_acc_id']
+ result = False
+ if accession_id in acc_id_arr:
+ result = True
+ return result
- conn = http.client.HTTPConnection('geco.deib.polimi.it')
- headers = {'Content-type': 'application/json'}
- send = to_send
- json_data = json.dumps(send)
- conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationProvinceRegion', json_data, headers)
- response = conn.getresponse()
- all_result = response.read().decode()
- all_result = json.loads(all_result)
+def recursive_children_lineage(parent, lineage, alias, dict_copy2, dict_lineages):
+ children = False
+ idx = str(parent['id']) + '_' + str(len(parent['children']))
+ for itm in parent['children']:
+ possible_parent_alias = str(itm['alias']) + '.'
+ possible_children_alias = str(alias)
+ if possible_parent_alias in possible_children_alias:
+ children = True
+ recursive_children_lineage(itm, lineage, alias, dict_copy2, dict_lineages)
+ break
+ else:
+ children = False
+ if not children:
+ name_complete = lineage
+ if dict_copy2[lineage]['WHO label'] != '':
+ name_complete = lineage + ' (' + dict_copy2[lineage]['WHO label'] + ') '
+ single_lineage = {'id': idx, 'alias': alias, 'name': name_complete, 'real_name': lineage,
+ 'who': dict_copy2[lineage]['WHO label'],
+ 'children': [], 'count': dict_lineages[lineage]['count']}
+ parent['children'].append(single_lineage)
- mutation_table2 = []
- arr_p_values = []
- for item in all_result:
- single_item = {}
- if item['product'] == 'Spike (surface glycoprotein)':
- protein = item['product'].split(" ", 1)[0]
- mutation = protein + '_'
- # mutation = 'S_'
- else:
- protein = item['product'].split(" ", 1)[0]
- mutation = protein + '_'
- mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative']
- single_item['start_aa_original'] = item['start_aa_original']
- single_item['sequence_aa_original'] = item['sequence_aa_original']
- single_item['sequence_aa_alternative'] = item['sequence_aa_alternative']
- single_item['mutation'] = mutation
- single_item['product'] = item['product']
- single_item['mutation_position'] = item['start_aa_original']
- # if 'country' in item:
- # single_item['target'] = item['region']
- # single_item['background'] = item['country']
- # else:
- # single_item['target'] = item['province']
- # single_item['background'] = item['region']
- single_item['target'] = item['target']
- single_item['background'] = item['background']
- single_item['lineage'] = item['lineage']
- single_item['count_target'] = item['count_seq']
- single_item['percentage_background'] = item['fraction']
- single_item['numerator_background'] = item['numerator']
- single_item['denominator_background'] = item['denominator']
- single_item['percentage_target'] = item['fraction_target']
- single_item['numerator_target'] = item['count_seq']
- single_item['denominator_target'] = item['denominator_target']
+# ----------------------------------------- MONGO DB ----------------------------------------------- #
- epsilon = 0.00000001
- single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \
- (single_item['percentage_background'] + epsilon)
- if single_item['odd_ratio'] >= 1:
- # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'],
- # item['numerator'] / item['denominator'])
- if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
- and single_item['denominator_target'] - single_item['numerator_target'] == 0:
- single_item['p_value'] = 1
- else:
- stat, p, dof, expected = \
- chi2_contingency([[single_item['numerator_background'],
- single_item['denominator_background'] - single_item['numerator_background']],
- [single_item['numerator_target'],
- single_item['denominator_target'] - single_item['numerator_target']]])
- single_item['p_value'] = p
- else:
- # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'],
- # item['numerator'] / item['denominator'])
- if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
- and single_item['denominator_target'] - single_item['numerator_target'] == 0:
- single_item['p_value'] = 1
+translate_dictionary = {
+ 'accession_id': '_id',
+ 'lineage': 'covv_lineage',
+ 'collection_date': 'covv_collection_date',
+ 'location': 'covv_location',
+ 'product': 'muts.pro',
+ 'start_aa_original': 'muts.loc',
+ 'sequence_aa_original': 'muts.org',
+ 'sequence_aa_alternative': 'muts.alt',
+}
+
+
+@api.route('/selectorQuery')
+class FieldList(Resource):
+ @api.doc('selector_query')
+ def post(self):
+
+ to_use = api.payload
+ field_name = to_use['field']
+ query_fields = to_use['query']
+
+ # field_name = 'country'
+ # query_fields = {'lineage': 'B.1', 'geo_group': ['Europe', 'Asia'], 'minDate': '2020-01-01', 'maxDate': "2021-01-01",
+ # 'toExclude': {}}
+ # 'toExclude': {'geo_group': ['Asia'], 'country': ['Italy', 'France']
+
+ if field_name in query_fields:
+ del query_fields[field_name]
+
+ where_part = {}
+ start_date = datetime.strptime("2019-01-01", '%Y-%m-%d')
+ where_part['c_coll_date_prec'] = {}
+ where_part['c_coll_date_prec']['$eq'] = 2
+ where_part['collection_date'] = {}
+ where_part['collection_date']['$gte'] = start_date
+
+ field_not_null = field_name
+ if field_not_null in translate_dictionary:
+ field_not_null = translate_dictionary[field_name]
+ if field_name == 'geo_group' or field_name == 'country' or field_name == 'region' or field_name == 'province':
+ field_not_null = 'location.' + field_name
+ where_part[field_not_null] = {'$ne': None}
+
+ if query_fields is not None:
+ for key in query_fields:
+ if key == 'minDate':
+ start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
+ where_part['collection_date']['$gte'] = start_date
+ elif key == 'maxDate':
+ stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
+ where_part['collection_date']['$lte'] = stop_date
+
+ elif key == 'toExclude':
+ for fieldToExclude in query_fields[key]:
+ if '$and' not in where_part:
+ where_part['$and'] = []
+
+ single_where_part = {'$and': []}
+ for geoToExclude in query_fields[key][fieldToExclude]:
+ real_field_to_exclude = fieldToExclude
+ if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \
+ or fieldToExclude == 'region' or fieldToExclude == 'province':
+ real_field_to_exclude = 'location.' + fieldToExclude
+ specific_and = {}
+ geo_value = geoToExclude # .replace("'", "''")
+ specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value}
+ single_where_part['$and'].append(specific_and)
+ where_part['$and'].append(single_where_part)
+
+ elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ if '$and' not in where_part:
+ where_part['$and'] = []
+
+ real_key = key
+ if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ real_key = 'location.' + key
+ if isinstance(query_fields[key], list):
+ single_where_part_or = {'$or': []}
+ for itm in query_fields[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part['$and'].append(single_where_part_or)
+ else:
+ single_where_part_or = {'$or': []}
+ replace_fields_value = query_fields[key] # .replace("'", "''")
+ specific_or = {f'{real_key}': {'$eq': replace_fields_value}}
+ single_where_part_or['$or'].append(specific_or)
+ where_part['$and'].append(single_where_part_or)
+
else:
- stat, p, dof, expected = \
- chi2_contingency([[single_item['numerator_background'],
- single_item['denominator_background'] - single_item['numerator_background']],
- [single_item['numerator_target'],
- single_item['denominator_target'] - single_item['numerator_target']]])
- single_item['p_value'] = p
+ real_key = key
+ if key in translate_dictionary:
+ real_key = translate_dictionary[key]
+ replace_fields_value = query_fields[key]
+ if key != 'start_aa_original':
+ replace_fields_value = query_fields[key] # .replace("'", "''")
+ if real_key not in where_part:
+ where_part[real_key] = {}
+ where_part[real_key]['$eq'] = replace_fields_value
+
+ query = []
+
+ query_where = {"$match": where_part}
+ query.append(query_where)
+
+ group_part = {}
+ real_field = field_name
+ if field_name in translate_dictionary:
+ real_field = translate_dictionary[field_name]
+ if field_name == 'geo_group' or field_name == 'country' or field_name == 'region' or field_name == 'province':
+ real_field = 'location.' + field_name
+ # group_part["_id"] = {"value":
+ # {"$cond":
+ # [{"$eq": [f"${real_field}", ""]},
+ # None,
+ # {"$cond":
+ # [{"$eq": [f"${real_field}", None]},
+ # f"${real_field}",
+ # {"$concat": [
+ # {"$toUpper":
+ # {"$substrCP": [f"${real_field}", 0, 1]}
+ # },
+ # {
+ # "$substrCP": [
+ # f"${real_field}", 1,
+ # {"$subtract": [{"$strLenCP": f"${real_field}"}, 1]}
+ # ]
+ # }
+ # ]}
+ # ]
+ # }
+ # ]
+ # },
+ # }
+ group_part["_id"] = {"value": f"${real_field}"}
+ group_part["count"] = {"$sum": 1}
+ query_group = {"$group": group_part}
+ query.append(query_group)
+
+ sort_part = {"count": -1}
+ query_sort = {"$sort": sort_part}
+ query.append(query_sort)
+
+ list_dict = []
+ if field_name == 'lineage' and (len(query_fields['toExclude']) == 0 and ((len(query_fields) == 2 and "lineage" in query_fields) or len(query_fields) < 2)):
+ for lineage in all_important_mutation_dict:
+ single_item_remodel = {'value': lineage, 'count': all_important_mutation_dict[lineage][0]}
+ list_dict.append(single_item_remodel)
+ elif field_name == 'geo_group' and (len(query_fields['toExclude']) == 0 and ((len(query_fields) == 2 and "geo_group" in query_fields) or len(query_fields) < 2)):
+ dict_geo = {}
+ for geo in all_geo_dict['all_geo']:
+ geo_group = geo['geo_group']
+ if geo_group not in dict_geo:
+ single_item_remodel = {'value': geo_group, 'count': geo['count']}
+ dict_geo[geo_group] = single_item_remodel
+ else:
+ dict_geo[geo_group]['count'] = dict_geo[geo_group]['count'] + geo['count']
+ for location in dict_geo:
+ list_dict.append(dict_geo[location])
+ else:
+ # print("query", query)
+ results = collection_db.aggregate(query, allowDiskUse=True)
+
+ for single_item in list(results):
+ single_item_remodel = {}
+ for key in single_item:
+ if key == '_id':
+ single_item_remodel['value'] = single_item['_id']['value']
+ else:
+ single_item_remodel[key] = single_item[key]
+ list_dict.append(single_item_remodel)
+
+ # print("field:", field_name, " result:", list_dict)
+ return list_dict
+
+
+@api.route('/tableLineageCountry')
+class FieldList(Resource):
+ @api.doc('table_lineage_country')
+ def post(self):
+ filter_geo = api.payload
+ # filter_geo = {'type': 'country', 'value': 'Italy', 'minCountSeq': 500}
+ geo_selection = 'country'
+ geo_min_count = filter_geo['minCountSeq']
+ geo_where = filter_geo['type']
+ min_date = filter_geo['minDate']
+ max_date = filter_geo['maxDate']
+ geo_where_value = filter_geo['value']
+ # if geo_where_value is not None:
+ # geo_where_value = geo_where_value.replace("'", "''")
+
+ geo_where_part = None
+ if geo_where == 'geo_group':
+ geo_selection = 'country'
+ geo_where_part = {"$eq": geo_where_value}
+ elif geo_where == 'country':
+ geo_selection = 'region'
+ geo_where_part = {"$eq": geo_where_value}
+ elif geo_where == 'region':
+ geo_selection = 'province'
+ geo_where_part = {"$eq": geo_where_value}
+ elif geo_where == 'world':
+ geo_selection = 'geo_group'
+ geo_where_part = None
+
+ where_part = {}
+ start_date = datetime.strptime(min_date, '%Y-%m-%d')
+ stop_date = datetime.strptime(max_date, '%Y-%m-%d')
+ where_part['c_coll_date_prec'] = {}
+ where_part['c_coll_date_prec']['$eq'] = 2
+ where_part['collection_date'] = {}
+ where_part['collection_date']['$gte'] = start_date
+ where_part['collection_date']['$lte'] = stop_date
+ if geo_where_part is not None:
+ mongo_field = "location." + geo_where
+ where_part[mongo_field] = geo_where_part
+
+ query = []
+
+ query_where = {"$match": where_part}
+ query.append(query_where)
+
+ query_denominator = [query_where]
+ group_part_denominator = {}
+
+ modified_field_denominator = "location." + geo_where
+ group_part_denominator["_id"] = {f"{geo_where}": f"${modified_field_denominator}"}
+ group_part_denominator["count"] = {"$sum": 1}
+ query_group_denominator = {"$group": group_part_denominator}
+ query_denominator.append(query_group_denominator)
+
+ # print("query denominator", query_denominator)
+ results = collection_db.aggregate(query_denominator, allowDiskUse=True)
+
+ denominator = 0
+ for single_item in list(results):
+ for key in single_item:
+ if key == 'count':
+ denominator = single_item[key]
+
+ group_part = {}
+
+ modified_field_location = "location." + geo_selection
+ modified_field_lineage = translate_dictionary["lineage"]
+ group_part["_id"] = {"lineage": f"${modified_field_lineage}", f"{geo_selection}": f"${modified_field_location}"}
+ group_part["count"] = {"$sum": 1}
+ query_group = {"$group": group_part}
+ query.append(query_group)
+
+ # print("query", query)
+ results = collection_db.aggregate(query, allowDiskUse=True)
+
+ dict_lineage_copy = dict_lineage_mutation
+
+ list_dict = []
+ list_dict_dict = {}
+ for single_item in list(results):
+ for key in single_item:
+ if key == '_id':
+ for k in single_item[key]:
+ if single_item[key]['lineage'] is None or single_item[key]['lineage'] == 'None':
+ lineage_to_use = 'N/D'
+ else:
+ if single_item[key]['lineage'] in dict_lineage_copy and 'WHO label' in dict_lineage_copy[single_item[key]['lineage']] and dict_lineage_copy[single_item[key]['lineage']]['WHO label'] != '':
+ lineage_to_use = single_item[key]['lineage'] + ' (' + dict_lineage_copy[single_item[key]['lineage']]['WHO label'] + ') '
+ else:
+ lineage_to_use = single_item[key]['lineage']
+ if k == 'lineage':
+ if lineage_to_use not in list_dict_dict:
+ list_dict_dict[lineage_to_use] = {k: lineage_to_use}
+ else:
+ if single_item[key][k] is None:
+ list_dict_dict[lineage_to_use]['N/D'] = single_item['count']
+ else:
+ list_dict_dict[lineage_to_use][single_item[key][k]] = single_item['count']
+
+ for item in list_dict_dict:
+ count = 0
+ real_item = list_dict_dict[item]
+ for key in real_item:
+ if key != 'lineage':
+ count = count + real_item[key]
+ if (count / denominator) * 100 >= geo_min_count:
+ list_dict.append(real_item)
+
+ # print("dict", list_dict)
+ return list_dict
+
+
+@api.route('/denominatorLineageCountry')
+class FieldList(Resource):
+ @api.doc('denominator_lineage_country')
+ def post(self):
+ filter_geo = api.payload
+
+ geo_selection = 'country'
+ geo_where = filter_geo['type']
+ min_date = filter_geo['minDate']
+ max_date = filter_geo['maxDate']
+ geo_where_value = filter_geo['value']
+
+ geo_where_part = None
+ if geo_where == 'geo_group':
+ geo_selection = 'country'
+ geo_where_part = {"$eq": geo_where_value}
+ elif geo_where == 'country':
+ geo_selection = 'region'
+ geo_where_part = {"$eq": geo_where_value}
+ elif geo_where == 'region':
+ geo_selection = 'province'
+ geo_where_part = {"$eq": geo_where_value}
+ elif geo_where == 'world':
+ geo_selection = 'geo_group'
+ geo_where_part = None
+
+ where_part = {}
+ start_date = datetime.strptime(min_date, '%Y-%m-%d')
+ stop_date = datetime.strptime(max_date, '%Y-%m-%d')
+ where_part['c_coll_date_prec'] = {}
+ where_part['c_coll_date_prec']['$eq'] = 2
+ where_part['collection_date'] = {}
+ where_part['collection_date']['$gte'] = start_date
+ where_part['collection_date']['$lte'] = stop_date
+ if geo_where_part is not None:
+ mongo_field = "location." + geo_where
+ where_part[mongo_field] = geo_where_part
+
+ query = []
+
+ query_where = {"$match": where_part}
+ query.append(query_where)
+
+ group_part = {}
+
+ modified_field = "location." + geo_selection
+ group_part["_id"] = {f"{geo_selection}": f"${modified_field}"}
+ group_part["count"] = {"$sum": 1}
+ query_group = {"$group": group_part}
+ query.append(query_group)
+
+ # print("query", query)
+ results = collection_db.aggregate(query, allowDiskUse=True)
+
+ list_dict = {}
+ for single_item in list(results):
+ for key in single_item:
+ if key == '_id':
+ for k in single_item[key]:
+ list_dict[single_item[key][k]] = single_item['count']
+
+ result_dict = {}
+ for item in list_dict:
+ if item is None:
+ result_dict['N/D'] = list_dict[item]
+ else:
+ result_dict[item] = list_dict[item]
+
+ # print("denominators", result_dict)
+ return result_dict
+
+
+@api.route('/analyzeTimeDistributionCountryLineage')
+class FieldList(Resource):
+ @api.doc('analyze_time_distribution_country_lineage')
+ def post(self):
+ to_use = api.payload
+ query_fields = to_use['query']
+
+ where_part = {}
+ start_date = datetime.strptime("2019-01-01", '%Y-%m-%d')
+ where_part['c_coll_date_prec'] = {}
+ where_part['c_coll_date_prec']['$eq'] = 2
+ where_part['collection_date'] = {}
+ where_part['collection_date']['$gte'] = start_date
+
+ if query_fields is not None:
+ for key in query_fields:
+ if key == 'minDate':
+ start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
+ where_part['collection_date']['$gte'] = start_date
+ elif key == 'maxDate':
+ stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
+ where_part['collection_date']['$lte'] = stop_date
+
+ elif key == 'toExclude':
+ for fieldToExclude in query_fields[key]:
+ if '$and' not in where_part:
+ where_part['$and'] = []
+
+ single_where_part = {'$and': []}
+ for geoToExclude in query_fields[key][fieldToExclude]:
+ real_field_to_exclude = fieldToExclude
+ if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \
+ or fieldToExclude == 'region' or fieldToExclude == 'province':
+ real_field_to_exclude = 'location.' + fieldToExclude
+ specific_and = {}
+ geo_value = geoToExclude # .replace("'", "''")
+ specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value}
+ single_where_part['$and'].append(specific_and)
+ where_part['$and'].append(single_where_part)
+
+ elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ if '$and' not in where_part:
+ where_part['$and'] = []
+
+ real_key = key
+ if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ real_key = 'location.' + key
+ if isinstance(query_fields[key], list):
+ single_where_part_or = {'$or': []}
+ for itm in query_fields[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part['$and'].append(single_where_part_or)
+ else:
+ single_where_part_or = {'$or': []}
+ replace_fields_value = query_fields[key] # .replace("'", "''")
+ specific_or = {f'{real_key}': {'$eq': replace_fields_value}}
+ single_where_part_or['$or'].append(specific_or)
+ where_part['$and'].append(single_where_part_or)
+
+ else:
+ real_key = key
+ if key in translate_dictionary:
+ real_key = translate_dictionary[key]
+ replace_fields_value = query_fields[key]
+ if key != 'start_aa_original':
+ replace_fields_value = query_fields[key] # .replace("'", "''")
+ if real_key not in where_part:
+ where_part[real_key] = {}
+ where_part[real_key]['$eq'] = replace_fields_value
+
+ query = []
+
+ query_where = {"$match": where_part}
+ query.append(query_where)
+
+ group_part = {}
+ real_field = translate_dictionary['collection_date']
+ # group_part["_id"] = {"name": f"${real_field}"}
+ group_part["_id"] = {"name": {"$toString": '$collection_date'}}
+ group_part["count"] = {"$sum": 1}
+ query_group = {"$group": group_part}
+ query.append(query_group)
+
+ sort_part = {"_id": 1}
+ query_sort = {"$sort": sort_part}
+ query.append(query_sort)
+
+ # print("query", query)
+ results = collection_db.aggregate(query, allowDiskUse=True)
+
+ list_dict = []
+ for single_item in list(results):
+ single_item_remodel = {}
+ for key in single_item:
+ if key == '_id':
+ single_item_remodel['name'] = single_item['_id']['name'].split("T")[0]
+ else:
+ single_item_remodel['value'] = single_item['count']
+ list_dict.append(single_item_remodel)
+
+ return list_dict
+
+
+@api.route('/analyzeTimeDistributionBackgroundQueryGeo')
+class FieldList(Resource):
+ @api.doc('analyze_time_distribution_country_lineage')
+ def post(self):
+ to_use = api.payload
+ query_fields = to_use['query']
+ query_false = to_use['query_false']
+
+ where_part = {}
+ start_date = datetime.strptime("2019-01-01", '%Y-%m-%d')
+ where_part['c_coll_date_prec'] = {}
+ where_part['c_coll_date_prec']['$eq'] = 2
+ where_part['collection_date'] = {}
+ where_part['collection_date']['$gte'] = start_date
+
+ if query_fields is not None:
+ for key in query_fields:
+ if key == 'minDate':
+ start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
+ where_part['collection_date']['$gte'] = start_date
+ elif key == 'maxDate':
+ stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
+ where_part['collection_date']['$lte'] = stop_date
+
+ elif key == 'toExclude':
+ for fieldToExclude in query_fields[key]:
+ if '$and' not in where_part:
+ where_part['$and'] = []
+
+ single_where_part = {'$and': []}
+ for geoToExclude in query_fields[key][fieldToExclude]:
+ real_field_to_exclude = fieldToExclude
+ if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \
+ or fieldToExclude == 'region' or fieldToExclude == 'province':
+ real_field_to_exclude = 'location.' + fieldToExclude
+ specific_and = {}
+ geo_value = geoToExclude # .replace("'", "''")
+ specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value}
+ single_where_part['$and'].append(specific_and)
+ where_part['$and'].append(single_where_part)
+
+ elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ if '$and' not in where_part:
+ where_part['$and'] = []
+
+ real_key = key
+ if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ real_key = 'location.' + key
+ if key == query_false:
+ single_where_part_or = {'$or': []}
+ specific_or = {f'{real_key}': {'$eq': None}}
+ single_where_part_or['$or'].append(specific_or)
+ specific_or = {f'{real_key}': {'$ne': query_fields[key]}}
+ single_where_part_or['$or'].append(specific_or)
+ where_part['$and'].append(single_where_part_or)
+ else:
+ if isinstance(query_fields[key], list):
+ single_where_part_or = {'$or': []}
+ for itm in query_fields[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part['$and'].append(single_where_part_or)
+ else:
+ single_where_part_or = {'$or': []}
+ replace_fields_value = query_fields[key] # .replace("'", "''")
+ specific_or = {f'{real_key}': {'$eq': replace_fields_value}}
+ single_where_part_or['$or'].append(specific_or)
+ where_part['$and'].append(single_where_part_or)
+
+ else:
+ real_key = key
+ if key == query_false:
+ single_where_part_or = {'$or': []}
+ specific_or = {f'{real_key}': {'$eq': None}}
+ single_where_part_or['$or'].append(specific_or)
+ specific_or = {f'{real_key}': {'$ne': query_fields[key]}}
+ single_where_part_or['$or'].append(specific_or)
+ where_part['$and'].append(single_where_part_or)
+ else:
+ if key in translate_dictionary:
+ real_key = translate_dictionary[key]
+ replace_fields_value = query_fields[key]
+ if key != 'start_aa_original':
+ replace_fields_value = query_fields[key] # .replace("'", "''")
+ if real_key not in where_part:
+ where_part[real_key] = {}
+ where_part[real_key]['$eq'] = replace_fields_value
+
+ query = []
+
+ query_where = {"$match": where_part}
+ query.append(query_where)
+
+ group_part = {}
+ real_field = translate_dictionary['collection_date']
+ # group_part["_id"] = {"name": f"${real_field}"}
+ group_part["_id"] = {"name": {"$toString": '$collection_date'}}
+ group_part["count"] = {"$sum": 1}
+ query_group = {"$group": group_part}
+ query.append(query_group)
+
+ sort_part = {"_id": 1}
+ query_sort = {"$sort": sort_part}
+ query.append(query_sort)
+
+ # print("query", query)
+ results = collection_db.aggregate(query, allowDiskUse=True)
+
+ list_dict = []
+ for single_item in list(results):
+ single_item_remodel = {}
+ for key in single_item:
+ if key == '_id':
+ single_item_remodel['name'] = single_item['_id']['name'].split("T")[0]
+ else:
+ single_item_remodel['value'] = single_item['count']
+ list_dict.append(single_item_remodel)
+
+ return list_dict
+
+
+@api.route('/analyzeMutationCountryLineageInTime')
+class FieldList(Resource):
+ @api.doc('analyze_mutation_country_lineage_in_time')
+ def post(self):
+ payload = api.payload
+ start_target_time = payload['start_target'] # '2021-03-31'
+ end_target_time = payload['end_target'] # '2021-06-31'
+ start_background_time = payload['start_background'] # '2019-01-31'
+ end_background_time = payload['end_background'] # '2021-03-31'
+ array_protein = payload['protein'] # ['Spike (surface glycoprotein)']
+
+ query_fields = payload['query']
+
+ if 'lineage' in query_fields:
+ lineage = query_fields['lineage']
+ else:
+ lineage = 'empty'
+ if 'province' in query_fields:
+ geo1 = query_fields['province']
+ elif 'region' in query_fields:
+ geo1 = query_fields['region']
+ elif 'country' in query_fields:
+ geo1 = query_fields['country']
+ elif 'geo_group' in query_fields:
+ geo1 = query_fields['geo_group']
+ else:
+ geo1 = 'empty'
+
+ array_result = []
+
+ where_part_target = {}
+ where_part_background = {}
+ where_part_target_denominator = {}
+ where_part_background_denominator = {}
+ start_date_target = datetime.strptime(start_target_time, '%Y-%m-%d')
+ end_date_target = datetime.strptime(end_target_time, '%Y-%m-%d')
+ start_date_background = datetime.strptime(start_background_time, '%Y-%m-%d')
+ end_date_background = datetime.strptime(end_background_time, '%Y-%m-%d')
+ where_part_target['c_coll_date_prec'] = {}
+ where_part_target['c_coll_date_prec']['$eq'] = 2
+ where_part_background['c_coll_date_prec'] = {}
+ where_part_background['c_coll_date_prec']['$eq'] = 2
+ where_part_target_denominator['c_coll_date_prec'] = {}
+ where_part_target_denominator['c_coll_date_prec']['$eq'] = 2
+ where_part_background_denominator['c_coll_date_prec'] = {}
+ where_part_background_denominator['c_coll_date_prec']['$eq'] = 2
+
+ where_part_target['collection_date'] = {}
+ where_part_target['collection_date']['$gte'] = start_date_target
+ where_part_target['collection_date']['$lte'] = end_date_target
+ where_part_background['collection_date'] = {}
+ where_part_background['collection_date']['$gte'] = start_date_background
+ where_part_background['collection_date']['$lte'] = end_date_background
+ where_part_target_denominator['collection_date'] = {}
+ where_part_target_denominator['collection_date']['$gte'] = start_date_target
+ where_part_target_denominator['collection_date']['$lte'] = end_date_target
+ where_part_background_denominator['collection_date'] = {}
+ where_part_background_denominator['collection_date']['$gte'] = start_date_background
+ where_part_background_denominator['collection_date']['$lte'] = end_date_background
+
+ protein_length = len(array_protein)
+ if protein_length > 0:
+ if '$and' not in where_part_target:
+ where_part_target['$and'] = []
+ if '$and' not in where_part_background:
+ where_part_background['$and'] = []
+ single_where_part_or = {'$or': []}
+ for protein in array_protein:
+ specific_or = {}
+ real_key = translate_dictionary['product']
+ specific_or[f'{real_key}'] = {'$eq': protein}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_target['$and'].append(single_where_part_or)
+ where_part_background['$and'].append(single_where_part_or)
+
+ if query_fields is not None:
+ for key in query_fields:
+ if key == 'minDate':
+ start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
+ where_part_target['collection_date']['$gte'] = start_date
+ where_part_background['collection_date']['$gte'] = start_date
+ where_part_target_denominator['collection_date']['$gte'] = start_date
+ where_part_background_denominator['collection_date']['$gte'] = start_date
+ elif key == 'maxDate':
+ stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
+ where_part_target['collection_date']['$lte'] = stop_date
+ where_part_background['collection_date']['$lte'] = stop_date
+ where_part_target_denominator['collection_date']['$lte'] = stop_date
+ where_part_background_denominator['collection_date']['$lte'] = stop_date
+
+ elif key == 'toExclude':
+ for fieldToExclude in query_fields[key]:
+ if '$and' not in where_part_target:
+ where_part_target['$and'] = []
+ if '$and' not in where_part_background:
+ where_part_background['$and'] = []
+ if '$and' not in where_part_target_denominator:
+ where_part_target_denominator['$and'] = []
+ if '$and' not in where_part_background_denominator:
+ where_part_background_denominator['$and'] = []
+
+ single_where_part = {'$and': []}
+ for geoToExclude in query_fields[key][fieldToExclude]:
+ real_field_to_exclude = fieldToExclude
+ if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \
+ or fieldToExclude == 'region' or fieldToExclude == 'province':
+ real_field_to_exclude = 'location.' + fieldToExclude
+ specific_and = {}
+ geo_value = geoToExclude # .replace("'", "''")
+ specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value}
+ single_where_part['$and'].append(specific_and)
+ where_part_target['$and'].append(single_where_part)
+ where_part_background['$and'].append(single_where_part)
+ where_part_target_denominator['$and'].append(single_where_part)
+ where_part_background_denominator['$and'].append(single_where_part)
+
+ elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ if '$and' not in where_part_target:
+ where_part_target['$and'] = []
+ if '$and' not in where_part_background:
+ where_part_background['$and'] = []
+ if '$and' not in where_part_target_denominator:
+ where_part_target_denominator['$and'] = []
+ if '$and' not in where_part_background_denominator:
+ where_part_background_denominator['$and'] = []
+
+ real_key = key
+ if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ real_key = 'location.' + key
+ if isinstance(query_fields[key], list):
+ single_where_part_or = {'$or': []}
+ for itm in query_fields[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_target['$and'].append(single_where_part_or)
+ where_part_background['$and'].append(single_where_part_or)
+ where_part_target_denominator['$and'].append(single_where_part_or)
+ where_part_background_denominator['$and'].append(single_where_part_or)
+ else:
+ single_where_part_or = {'$or': []}
+ replace_fields_value = query_fields[key] # .replace("'", "''")
+ specific_or = {f'{real_key}': {'$eq': replace_fields_value}}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_target['$and'].append(single_where_part_or)
+ where_part_background['$and'].append(single_where_part_or)
+ where_part_target_denominator['$and'].append(single_where_part_or)
+ where_part_background_denominator['$and'].append(single_where_part_or)
+
+ else:
+ real_key = key
+ if key in translate_dictionary:
+ real_key = translate_dictionary[key]
+ replace_fields_value = query_fields[key]
+ if key != 'start_aa_original':
+ replace_fields_value = query_fields[key] # .replace("'", "''")
+ if real_key not in where_part_target:
+ where_part_target[real_key] = {}
+ if real_key not in where_part_background:
+ where_part_background[real_key] = {}
+ if real_key not in where_part_target_denominator:
+ where_part_target_denominator[real_key] = {}
+ if real_key not in where_part_background_denominator:
+ where_part_background_denominator[real_key] = {}
+ where_part_target[real_key]['$eq'] = replace_fields_value
+ where_part_background[real_key]['$eq'] = replace_fields_value
+ where_part_target_denominator[real_key]['$eq'] = replace_fields_value
+ where_part_background_denominator[real_key]['$eq'] = replace_fields_value
+
+ query_target = []
+ query_background = []
+ query_target_denominator = []
+ query_background_denominator = []
+
+ query_unwind_target = {"$unwind": "$muts"}
+ query_target.append(query_unwind_target)
+ query_unwind_background = {"$unwind": "$muts"}
+ query_background.append(query_unwind_background)
+
+ query_where_target = {"$match": where_part_target}
+ query_target.append(query_where_target)
+ query_where_background = {"$match": where_part_background}
+ query_background.append(query_where_background)
+ query_where_target_denominator = {"$match": where_part_target_denominator}
+ query_target_denominator.append(query_where_target_denominator)
+ query_where_background_denominator = {"$match": where_part_background_denominator}
+ query_background_denominator.append(query_where_background_denominator)
+
+ group_part = {"_id": {}}
+ real_field = translate_dictionary['product']
+ group_part["_id"]["product"] = f"${real_field}"
+ real_field = translate_dictionary['start_aa_original']
+ group_part["_id"]["start_aa_original"] = f"${real_field}"
+ real_field = translate_dictionary['sequence_aa_original']
+ group_part["_id"]["sequence_aa_original"] = f"${real_field}"
+ real_field = translate_dictionary['sequence_aa_alternative']
+ group_part["_id"]["sequence_aa_alternative"] = f"${real_field}"
+ group_part["count"] = {"$sum": 1}
+ query_group = {"$group": group_part}
+ query_target.append(query_group)
+ query_background.append(query_group)
+
+ group_part_denominator = {"_id": {}}
+ group_part_denominator["count"] = {"$sum": 1}
+ query_group = {"$group": group_part_denominator}
+ query_target_denominator.append(query_group)
+ query_background_denominator.append(query_group)
+
+ sort_part = {"count": -1}
+ query_sort = {"$sort": sort_part}
+ query_target.append(query_sort)
+ query_background.append(query_sort)
+
+ # print("query target", query_target)
+ # print("query target denominator", query_target_denominator)
+ # print("query background", query_background)
+ # print("query background denominator", query_background_denominator)
+
+ results_target = collection_db.aggregate(query_target, allowDiskUse=True)
+ results_background = collection_db.aggregate(query_background, allowDiskUse=True)
+ # results_target_denominator = collection_db.aggregate(query_target_denominator, allowDiskUse=True)
+ # results_background_denominator = collection_db.aggregate(query_background_denominator, allowDiskUse=True)
+ results_target_denominator = collection_db.count_documents(where_part_target_denominator)
+ results_background_denominator = collection_db.count_documents(where_part_background_denominator)
+
+ denominator_country = results_target_denominator
+ # for single_item in list(results_target_denominator):
+ # denominator_country = single_item['count']
+
+ denominator = results_background_denominator
+ # for single_item in list(results_background_denominator):
+ # denominator = single_item['count']
+
+ list_dict_target = []
+ for single_item in list(results_target):
+ single_item_remodel = {}
+ for key in single_item:
+ if key == '_id':
+ for k in single_item[key]:
+ single_item_remodel[k] = single_item[key][k]
+ else:
+ single_item_remodel['total'] = single_item['count']
+ list_dict_target.append(single_item_remodel)
+
+ list_dict_background = []
+ for single_item in list(results_background):
+ single_item_remodel = {}
+ for key in single_item:
+ if key == '_id':
+ for k in single_item[key]:
+ single_item_remodel[k] = single_item[key][k]
+ else:
+ single_item_remodel['total'] = single_item['count']
+ list_dict_background.append(single_item_remodel)
+
+ for item in list_dict_target:
+ numerator = 0
+ for item2 in list_dict_background:
+ if item['start_aa_original'] == item2['start_aa_original'] \
+ and item['sequence_aa_original'] == item2['sequence_aa_original'] \
+ and item['sequence_aa_alternative'] == item2['sequence_aa_alternative'] \
+ and item['product'] == item2['product']:
+ numerator = item2['total']
+
+ if denominator == 0:
+ fraction = 0
+ else:
+ fraction = (numerator / denominator)
+ if denominator_country == 0:
+ fraction_target = 0
+ else:
+ fraction_target = (item['total'] / denominator_country)
+
+ single_line = {'lineage': lineage, 'country': geo1, 'count_seq': item['total'],
+ 'target_time': start_target_time + ' / ' + end_target_time,
+ 'background_time': start_background_time + ' / ' + end_background_time,
+ 'start_aa_original': item['start_aa_original'],
+ 'product': item['product'],
+ 'sequence_aa_original': item['sequence_aa_original'],
+ 'sequence_aa_alternative': item['sequence_aa_alternative'],
+ 'numerator': numerator,
+ 'denominator': denominator,
+ 'fraction': fraction * 100,
+ 'denominator_target': denominator_country,
+ 'fraction_target': fraction_target * 100}
+
+ array_result.append(single_line)
+
+ all_result = array_result
+
+ mutation_table2 = []
+ arr_p_values = []
+ for item in all_result:
+ single_item = {}
+ if item['product'] == 'Spike (surface glycoprotein)':
+ protein = item['product'].split(" ", 1)[0]
+ mutation = protein + '_'
+ # mutation = 'S_'
+ else:
+ protein = item['product'].split(" ", 1)[0]
+ mutation = protein + '_'
+ mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative']
+ single_item['mutation'] = mutation
+ single_item['start_aa_original'] = item['start_aa_original']
+ single_item['sequence_aa_original'] = item['sequence_aa_original']
+ single_item['sequence_aa_alternative'] = item['sequence_aa_alternative']
+ single_item['product'] = item['product']
+ single_item['mutation_position'] = item['start_aa_original']
+ single_item['target'] = item['target_time']
+ single_item['background'] = item['background_time']
+ single_item['country'] = item['country']
+ single_item['lineage'] = item['lineage']
+ single_item['count_target'] = item['count_seq']
+ single_item['percentage_background'] = item['fraction']
+ single_item['numerator_background'] = item['numerator']
+ single_item['denominator_background'] = item['denominator']
+ single_item['percentage_target'] = item['fraction_target']
+ single_item['numerator_target'] = item['count_seq']
+ single_item['denominator_target'] = item['denominator_target']
+
+ epsilon = 0.00000001
+ single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \
+ (single_item['percentage_background'] + epsilon)
+
+ if single_item['odd_ratio'] >= 1:
+ # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'],
+ # item['numerator'] / item['denominator'])
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
+ else:
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item['numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
+ else:
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
+ else:
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item['numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
+
+ arr_p_values.append(single_item['p_value'])
+ mutation_table2.append(single_item)
+
+ a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni')
+
+ i = 0
+ for item in mutation_table2:
+ item['pvalue'] = new_p_values[i]
+ i = i + 1
+
+ return mutation_table2
+
+
+@api.route('/analyzeMutationProvinceRegion')
+class FieldList(Resource):
+ @api.doc('analyze_mutation_province_region')
+ def post(self):
+ payload = api.payload
+ array_protein = payload['protein']
+ query_fields = payload['query']
+ toExcludeBackground = payload['toExcludeBackground']
+
+ if 'province' in query_fields:
+ target = query_fields['province']
+ target_key = 'province'
+ if 'region' in query_fields:
+ background = query_fields['region']
+ elif 'country' in query_fields:
+ background = query_fields['country']
+ elif 'geo_group' in query_fields:
+ background = query_fields['geo_group']
+ else:
+ background = 'World'
+ elif 'region' in query_fields:
+ target = query_fields['region']
+ target_key = 'region'
+ if 'country' in query_fields:
+ background = query_fields['country']
+ elif 'geo_group' in query_fields:
+ background = query_fields['geo_group']
+ else:
+ background = 'World'
+ elif 'country' in query_fields:
+ target = query_fields['country']
+ target_key = 'country'
+ if 'geo_group' in query_fields:
+ background = query_fields['geo_group']
+ else:
+ background = 'World'
+ elif 'geo_group' in query_fields:
+ target = query_fields['geo_group']
+ target_key = 'geo_group'
+ background = 'World'
+ else:
+ target = 'empty'
+ target_key = 'empty'
+ background = 'empty'
+
+ if 'lineage' in query_fields:
+ lineage = query_fields['lineage']
+ else:
+ lineage = 'empty'
+
+ array_result = []
+
+ where_part_target = {}
+ where_part_background = {}
+ where_part_target_denominator = {}
+ where_part_background_denominator = {}
+ start_date_target = datetime.strptime("2019-01-01", '%Y-%m-%d')
+ start_date_background = datetime.strptime("2019-01-01", '%Y-%m-%d')
+ where_part_target['c_coll_date_prec'] = {}
+ where_part_target['c_coll_date_prec']['$eq'] = 2
+ where_part_background['c_coll_date_prec'] = {}
+ where_part_background['c_coll_date_prec']['$eq'] = 2
+ where_part_target_denominator['c_coll_date_prec'] = {}
+ where_part_target_denominator['c_coll_date_prec']['$eq'] = 2
+ where_part_background_denominator['c_coll_date_prec'] = {}
+ where_part_background_denominator['c_coll_date_prec']['$eq'] = 2
+
+ where_part_target['collection_date'] = {}
+ where_part_target['collection_date']['$gte'] = start_date_target
+ where_part_background['collection_date'] = {}
+ where_part_background['collection_date']['$gte'] = start_date_background
+ where_part_target_denominator['collection_date'] = {}
+ where_part_target_denominator['collection_date']['$gte'] = start_date_target
+ where_part_background_denominator['collection_date'] = {}
+ where_part_background_denominator['collection_date']['$gte'] = start_date_background
+
+ protein_length = len(array_protein)
+ if protein_length > 0:
+ if '$and' not in where_part_target:
+ where_part_target['$and'] = []
+ if '$and' not in where_part_background:
+ where_part_background['$and'] = []
+ single_where_part_or = {'$or': []}
+ for protein in array_protein:
+ specific_or = {}
+ real_key = translate_dictionary['product']
+ specific_or[f'{real_key}'] = {'$eq': protein}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_target['$and'].append(single_where_part_or)
+ where_part_background['$and'].append(single_where_part_or)
+
+ if query_fields is not None:
+ for key in query_fields:
+ if key == 'minDate':
+ start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
+ where_part_target['collection_date']['$gte'] = start_date
+ where_part_background['collection_date']['$gte'] = start_date
+ where_part_target_denominator['collection_date']['$gte'] = start_date
+ where_part_background_denominator['collection_date']['$gte'] = start_date
+ elif key == 'maxDate':
+ stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
+ where_part_target['collection_date']['$lte'] = stop_date
+ where_part_background['collection_date']['$lte'] = stop_date
+ where_part_target_denominator['collection_date']['$lte'] = stop_date
+ where_part_background_denominator['collection_date']['$lte'] = stop_date
+
+ elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ if '$and' not in where_part_target:
+ where_part_target['$and'] = []
+ if '$and' not in where_part_background:
+ where_part_background['$and'] = []
+ if '$and' not in where_part_target_denominator:
+ where_part_target_denominator['$and'] = []
+ if '$and' not in where_part_background_denominator:
+ where_part_background_denominator['$and'] = []
+
+ real_key = key
+ if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ real_key = 'location.' + key
+ if isinstance(query_fields[key], list):
+ single_where_part_or_target = {'$or': []}
+ single_where_part_or_background = {'$or': []}
+ for itm in query_fields[key]:
+ specific_or_target = {}
+ specific_or_background = {}
+ field_value = itm # .replace("'", "''")
+ if key == target_key:
+ specific_or_target[f'{real_key}'] = {'$eq': field_value}
+ specific_or_background[f'{real_key}'] = {'$ne': field_value}
+ else:
+ specific_or_target[f'{real_key}'] = {'$eq': field_value}
+ specific_or_background[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or_target['$or'].append(specific_or_target)
+ single_where_part_or_background['$or'].append(specific_or_background)
+ where_part_target['$and'].append(single_where_part_or_target)
+ where_part_background['$and'].append(single_where_part_or_background)
+ where_part_target_denominator['$and'].append(single_where_part_or_target)
+ where_part_background_denominator['$and'].append(single_where_part_or_background)
+ else:
+ single_where_part_or_target = {'$or': []}
+ single_where_part_or_background = {'$or': []}
+ replace_fields_value = query_fields[key] # .replace("'", "''")
+ if key == target_key:
+ specific_or_target = {f'{real_key}': {'$eq': replace_fields_value}}
+ specific_or_background = {f'{real_key}': {'$ne': replace_fields_value}}
+ else:
+ specific_or_target = {f'{real_key}': {'$eq': replace_fields_value}}
+ specific_or_background = {f'{real_key}': {'$eq': replace_fields_value}}
+ single_where_part_or_target['$or'].append(specific_or_target)
+ single_where_part_or_background['$or'].append(specific_or_background)
+ where_part_target['$and'].append(single_where_part_or_target)
+ where_part_background['$and'].append(single_where_part_or_background)
+ where_part_target_denominator['$and'].append(single_where_part_or_target)
+ where_part_background_denominator['$and'].append(single_where_part_or_background)
+
+ else:
+ real_key = key
+ if key in translate_dictionary:
+ real_key = translate_dictionary[key]
+ replace_fields_value = query_fields[key]
+ if key != 'start_aa_original':
+ replace_fields_value = query_fields[key] # .replace("'", "''")
+ if real_key not in where_part_target:
+ where_part_target[real_key] = {}
+ if real_key not in where_part_background:
+ where_part_background[real_key] = {}
+ if real_key not in where_part_target_denominator:
+ where_part_target_denominator[real_key] = {}
+ if real_key not in where_part_background_denominator:
+ where_part_background_denominator[real_key] = {}
+ where_part_target[real_key]['$eq'] = replace_fields_value
+ where_part_target_denominator[real_key]['$eq'] = replace_fields_value
+ if key == target_key:
+ where_part_background[real_key]['$ne'] = replace_fields_value
+ where_part_background_denominator[real_key]['$ne'] = replace_fields_value
+ else:
+ where_part_background[real_key]['$eq'] = replace_fields_value
+ where_part_background_denominator[real_key]['$eq'] = replace_fields_value
+
+ for fieldToExclude in toExcludeBackground:
+ single_where_part_and_background = {'$and': []}
+ for geoToExclude in toExcludeBackground[fieldToExclude]:
+ specific_and_background = {}
+ geo_value = geoToExclude # .replace("'", "''")
+ if fieldToExclude == 'geo_group' or fieldToExclude == 'country' or fieldToExclude == 'region' or fieldToExclude == 'province':
+ fieldToExclude = 'location.' + fieldToExclude
+ specific_and_background[f'{fieldToExclude}'] = {'$ne': f"{geo_value}"}
+ single_where_part_and_background['$and'].append(specific_and_background)
+ where_part_background['$and'].append(single_where_part_and_background)
+ where_part_background_denominator['$and'].append(single_where_part_and_background)
+
+ query_target = []
+ query_background = []
+ query_target_denominator = []
+ query_background_denominator = []
+
+ query_unwind_target = {"$unwind": "$muts"}
+ query_target.append(query_unwind_target)
+ query_unwind_background = {"$unwind": "$muts"}
+ query_background.append(query_unwind_background)
+
+ query_where_target = {"$match": where_part_target}
+ query_target.append(query_where_target)
+ query_where_background = {"$match": where_part_background}
+ query_background.append(query_where_background)
+ query_where_target_denominator = {"$match": where_part_target_denominator}
+ query_target_denominator.append(query_where_target_denominator)
+ query_where_background_denominator = {"$match": where_part_background_denominator}
+ query_background_denominator.append(query_where_background_denominator)
+
+ group_part = {"_id": {}}
+ real_field = translate_dictionary['product']
+ group_part["_id"]["product"] = f"${real_field}"
+ real_field = translate_dictionary['start_aa_original']
+ group_part["_id"]["start_aa_original"] = f"${real_field}"
+ real_field = translate_dictionary['sequence_aa_original']
+ group_part["_id"]["sequence_aa_original"] = f"${real_field}"
+ real_field = translate_dictionary['sequence_aa_alternative']
+ group_part["_id"]["sequence_aa_alternative"] = f"${real_field}"
+ group_part["count"] = {"$sum": 1}
+ query_group = {"$group": group_part}
+ query_target.append(query_group)
+ query_background.append(query_group)
+
+ group_part_denominator = {"_id": {}}
+ group_part_denominator["count"] = {"$sum": 1}
+ query_group = {"$group": group_part_denominator}
+ query_target_denominator.append(query_group)
+ query_background_denominator.append(query_group)
+
+ sort_part = {"count": -1}
+ query_sort = {"$sort": sort_part}
+ query_target.append(query_sort)
+ query_background.append(query_sort)
+
+ # print("query target", query_target)
+ # print("query target denominator", query_target_denominator)
+ # print("query background", query_background)
+ # print("query background denominator", query_background_denominator)
+
+ results_target = collection_db.aggregate(query_target, allowDiskUse=True)
+ results_background = collection_db.aggregate(query_background, allowDiskUse=True)
+ # results_target_denominator = collection_db.aggregate(query_target_denominator, allowDiskUse=True)
+ # results_background_denominator = collection_db.aggregate(query_background_denominator, allowDiskUse=True)
+ results_target_denominator = collection_db.count_documents(where_part_target_denominator)
+ results_background_denominator = collection_db.count_documents(where_part_background_denominator)
+
+ denominator_target = results_target_denominator
+ # for single_item in list(results_target_denominator):
+ # denominator_target = single_item['count']
+
+ denominator = results_background_denominator
+ # for single_item in list(results_background_denominator):
+ # denominator = single_item['count']
+
+ list_dict_target = []
+ for single_item in list(results_target):
+ single_item_remodel = {}
+ for key in single_item:
+ if key == '_id':
+ for k in single_item[key]:
+ single_item_remodel[k] = single_item[key][k]
+ else:
+ single_item_remodel['total'] = single_item['count']
+ list_dict_target.append(single_item_remodel)
+
+ list_dict_background = []
+ for single_item in list(results_background):
+ single_item_remodel = {}
+ for key in single_item:
+ if key == '_id':
+ for k in single_item[key]:
+ single_item_remodel[k] = single_item[key][k]
+ else:
+ single_item_remodel['total'] = single_item['count']
+ list_dict_background.append(single_item_remodel)
+
+ for item in list_dict_target:
+ numerator = 0
+ for item2 in list_dict_background:
+ if item['start_aa_original'] == item2['start_aa_original'] \
+ and item['sequence_aa_original'] == item2['sequence_aa_original'] \
+ and item['sequence_aa_alternative'] == item2['sequence_aa_alternative'] \
+ and item['product'] == item2['product']:
+ numerator = item2['total']
+
+ if denominator == 0:
+ fraction = 0
+ else:
+ fraction = (numerator / denominator)
+ if denominator_target == 0:
+ fraction_target = 0
+ else:
+ fraction_target = (item['total'] / denominator_target)
+
+ single_line = {'lineage': lineage, 'target': target, 'background': background,
+ 'count_seq': item['total'],
+ 'product': item['product'],
+ 'start_aa_original': item['start_aa_original'],
+ 'sequence_aa_original': item['sequence_aa_original'],
+ 'sequence_aa_alternative': item['sequence_aa_alternative'],
+ 'numerator': numerator,
+ 'denominator': denominator,
+ 'fraction': fraction * 100,
+ 'denominator_target': denominator_target,
+ 'fraction_target': fraction_target * 100}
+
+ array_result.append(single_line)
+
+ all_result = array_result
+
+ mutation_table2 = []
+ arr_p_values = []
+ for item in all_result:
+ single_item = {}
+ if item['product'] == 'Spike (surface glycoprotein)':
+ protein = item['product'].split(" ", 1)[0]
+ mutation = protein + '_'
+ # mutation = 'S_'
+ else:
+ protein = item['product'].split(" ", 1)[0]
+ mutation = protein + '_'
+ mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative']
+ single_item['start_aa_original'] = item['start_aa_original']
+ single_item['sequence_aa_original'] = item['sequence_aa_original']
+ single_item['sequence_aa_alternative'] = item['sequence_aa_alternative']
+ single_item['mutation'] = mutation
+ single_item['product'] = item['product']
+ single_item['mutation_position'] = item['start_aa_original']
+ # if 'country' in item:
+ # single_item['target'] = item['region']
+ # single_item['background'] = item['country']
+ # else:
+ # single_item['target'] = item['province']
+ # single_item['background'] = item['region']
+ single_item['target'] = item['target']
+ single_item['background'] = item['background']
+
+ single_item['lineage'] = item['lineage']
+ single_item['count_target'] = item['count_seq']
+ single_item['percentage_background'] = item['fraction']
+ single_item['numerator_background'] = item['numerator']
+ single_item['denominator_background'] = item['denominator']
+ single_item['percentage_target'] = item['fraction_target']
+ single_item['numerator_target'] = item['count_seq']
+ single_item['denominator_target'] = item['denominator_target']
+
+ epsilon = 0.00000001
+ single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \
+ (single_item['percentage_background'] + epsilon)
+
+ if single_item['odd_ratio'] >= 1:
+ # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'],
+ # item['numerator'] / item['denominator'])
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
+ else:
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item['numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
+ else:
+ # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'],
+ # item['numerator'] / item['denominator'])
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
+ else:
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item['numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
+
+ arr_p_values.append(single_item['p_value'])
+ mutation_table2.append(single_item)
+
+ a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni')
+
+ i = 0
+ for item in mutation_table2:
+ item['pvalue'] = new_p_values[i]
+ i = i + 1
+
+ return mutation_table2
+
+
+@api.route('/analyzeMutationTargetBackgroundFree')
+class FieldList(Resource):
+ @api.doc('analyze_mutation_target_background_free')
+ def post(self):
+
+ payload = api.payload
+ array_protein = payload['protein']
+ query_target = payload['query_target']
+ query_background = payload['query_background']
+ remove_overlapping = payload['removeOverlapping']
+
+ target = 'empty'
+ background = 'empty'
+
+ if 'lineage' in query_target:
+ lineage_target = query_target['lineage']
+ else:
+ lineage_target = 'empty'
+ if 'lineage' in query_background:
+ lineage_background = query_background['lineage']
+ else:
+ lineage_background = 'empty'
+
+ array_result = []
+
+ where_part_target = {}
+ where_part_background = {}
+ where_part_target_denominator = {}
+ where_part_background_denominator = {}
+ where_part_target_overlapping = {}
+ where_part_background_overlapping = {}
+ start_date_target = datetime.strptime("2019-01-01", '%Y-%m-%d')
+ start_date_background = datetime.strptime("2019-01-01", '%Y-%m-%d')
+
+ if 'accession_id' not in query_target:
+ where_part_target['c_coll_date_prec'] = {}
+ where_part_target['c_coll_date_prec']['$eq'] = 2
+ where_part_target_denominator['c_coll_date_prec'] = {}
+ where_part_target_denominator['c_coll_date_prec']['$eq'] = 2
+ where_part_target_overlapping['c_coll_date_prec'] = {}
+ where_part_target_overlapping['c_coll_date_prec']['$eq'] = 2
+ if 'accession_id' not in query_background:
+ where_part_background['c_coll_date_prec'] = {}
+ where_part_background['c_coll_date_prec']['$eq'] = 2
+ where_part_background_denominator['c_coll_date_prec'] = {}
+ where_part_background_denominator['c_coll_date_prec']['$eq'] = 2
+ where_part_background_overlapping['c_coll_date_prec'] = {}
+ where_part_background_overlapping['c_coll_date_prec']['$eq'] = 2
+
+ where_part_target['collection_date'] = {}
+ where_part_target['collection_date']['$gte'] = start_date_target
+ where_part_target_denominator['collection_date'] = {}
+ where_part_target_denominator['collection_date']['$gte'] = start_date_target
+ where_part_target_overlapping['collection_date'] = {}
+ where_part_target_overlapping['collection_date']['$gte'] = start_date_target
+ where_part_background['collection_date'] = {}
+ where_part_background['collection_date']['$gte'] = start_date_background
+ where_part_background_denominator['collection_date'] = {}
+ where_part_background_denominator['collection_date']['$gte'] = start_date_background
+ where_part_background_overlapping['collection_date'] = {}
+ where_part_background_overlapping['collection_date']['$gte'] = start_date_background
+
+ protein_length = len(array_protein)
+ if protein_length > 0:
+ if '$and' not in where_part_target:
+ where_part_target['$and'] = []
+ if '$and' not in where_part_background:
+ where_part_background['$and'] = []
+ single_where_part_or = {'$or': []}
+ for protein in array_protein:
+ specific_or = {}
+ real_key = translate_dictionary['product']
+ specific_or[f'{real_key}'] = {'$eq': protein}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_target['$and'].append(single_where_part_or)
+ where_part_background['$and'].append(single_where_part_or)
+
+ if query_target is not None:
+ for key in query_target:
+ if key == 'minDate':
+ start_date = datetime.strptime(f"{query_target[key]}", '%Y-%m-%d')
+ where_part_target['collection_date']['$gte'] = start_date
+ where_part_target_denominator['collection_date']['$gte'] = start_date
+ where_part_target_overlapping['collection_date']['$gte'] = start_date
+ elif key == 'maxDate':
+ stop_date = datetime.strptime(f"{query_target[key]}", '%Y-%m-%d')
+ where_part_target['collection_date']['$lte'] = stop_date
+ where_part_target_denominator['collection_date']['$lte'] = stop_date
+ where_part_target_overlapping['collection_date']['$lte'] = stop_date
+
+ elif key == 'toExclude':
+ for fieldToExclude in query_target[key]:
+ if '$and' not in where_part_target:
+ where_part_target['$and'] = []
+ if '$and' not in where_part_target_denominator:
+ where_part_target_denominator['$and'] = []
+ if '$and' not in where_part_target_overlapping:
+ where_part_target_overlapping['$and'] = []
+
+ if len(query_target[key][fieldToExclude]) > 0:
+ single_where_part = {'$and': []}
+ for geoToExclude in query_target[key][fieldToExclude]:
+ real_field_to_exclude = fieldToExclude
+ if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \
+ or fieldToExclude == 'region' or fieldToExclude == 'province':
+ real_field_to_exclude = 'location.' + fieldToExclude
+ specific_and = {}
+ geo_value = geoToExclude # .replace("'", "''")
+ specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value}
+ single_where_part['$and'].append(specific_and)
+ where_part_target['$and'].append(single_where_part)
+ where_part_target_denominator['$and'].append(single_where_part)
+ where_part_target_overlapping['$and'].append(single_where_part)
+
+ elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ if '$and' not in where_part_target:
+ where_part_target['$and'] = []
+ if '$and' not in where_part_target_denominator:
+ where_part_target_denominator['$and'] = []
+ if '$and' not in where_part_target_overlapping:
+ where_part_target_overlapping['$and'] = []
+
+ real_key = key
+ if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ real_key = 'location.' + key
+ if isinstance(query_target[key], list):
+ single_where_part_or = {'$or': []}
+ for itm in query_target[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_target['$and'].append(single_where_part_or)
+ where_part_target_denominator['$and'].append(single_where_part_or)
+ where_part_target_overlapping['$and'].append(single_where_part_or)
+ else:
+ single_where_part_or = {'$or': []}
+ replace_fields_value = query_target[key] # .replace("'", "''")
+ specific_or = {f'{real_key}': {'$eq': replace_fields_value}}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_target['$and'].append(single_where_part_or)
+ where_part_target_denominator['$and'].append(single_where_part_or)
+ where_part_target_overlapping['$and'].append(single_where_part_or)
+
+ else:
+ real_key = key
+ if key in translate_dictionary:
+ real_key = translate_dictionary[key]
+ if isinstance(query_target[key], list):
+ if '$and' not in where_part_target:
+ where_part_target['$and'] = []
+ if '$and' not in where_part_target_denominator:
+ where_part_target_denominator['$and'] = []
+ if '$and' not in where_part_target_overlapping:
+ where_part_target_overlapping['$and'] = []
+ single_where_part_or = {'$or': []}
+ for itm in query_target[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_target['$and'].append(single_where_part_or)
+ where_part_target_denominator['$and'].append(single_where_part_or)
+ where_part_target_overlapping['$and'].append(single_where_part_or)
+ else:
+ replace_fields_value = query_target[key]
+ if key != 'start_aa_original':
+ replace_fields_value = query_target[key] # .replace("'", "''")
+ if real_key not in where_part_target:
+ where_part_target[real_key] = {}
+ if real_key not in where_part_target_denominator:
+ where_part_target_denominator[real_key] = {}
+ if real_key not in where_part_target_overlapping:
+ where_part_target_overlapping[real_key] = {}
+ where_part_target[real_key]['$eq'] = replace_fields_value
+ where_part_target_denominator[real_key]['$eq'] = replace_fields_value
+ where_part_target_overlapping[real_key]['$eq'] = replace_fields_value
+
+ if query_background is not None:
+ for key in query_background:
+ if key == 'minDate':
+ start_date = datetime.strptime(f"{query_background[key]}", '%Y-%m-%d')
+ where_part_background['collection_date']['$gte'] = start_date
+ where_part_background_denominator['collection_date']['$gte'] = start_date
+ where_part_background_overlapping['collection_date']['$gte'] = start_date
+ elif key == 'maxDate':
+ stop_date = datetime.strptime(f"{query_background[key]}", '%Y-%m-%d')
+ where_part_background['collection_date']['$lte'] = stop_date
+ where_part_background_denominator['collection_date']['$lte'] = stop_date
+ where_part_background_overlapping['collection_date']['$lte'] = stop_date
+
+ elif key == 'toExclude':
+ for fieldToExclude in query_background[key]:
+ if '$and' not in where_part_background:
+ where_part_background['$and'] = []
+ if '$and' not in where_part_background_denominator:
+ where_part_background_denominator['$and'] = []
+ if '$and' not in where_part_background_overlapping:
+ where_part_background_overlapping['$and'] = []
+
+ if len(query_background[key][fieldToExclude]) > 0:
+ single_where_part = {'$and': []}
+ for geoToExclude in query_background[key][fieldToExclude]:
+ real_field_to_exclude = fieldToExclude
+ if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \
+ or fieldToExclude == 'region' or fieldToExclude == 'province':
+ real_field_to_exclude = 'location.' + fieldToExclude
+ specific_and = {}
+ geo_value = geoToExclude # .replace("'", "''")
+ specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value}
+ single_where_part['$and'].append(specific_and)
+ where_part_background['$and'].append(single_where_part)
+ where_part_background_denominator['$and'].append(single_where_part)
+ where_part_background_overlapping['$and'].append(single_where_part)
+
+ elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ if '$and' not in where_part_background:
+ where_part_background['$and'] = []
+ if '$and' not in where_part_background_denominator:
+ where_part_background_denominator['$and'] = []
+ if '$and' not in where_part_background_overlapping:
+ where_part_background_overlapping['$and'] = []
+
+ real_key = key
+ if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ real_key = 'location.' + key
+ if isinstance(query_background[key], list):
+ single_where_part_or = {'$or': []}
+ for itm in query_background[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_background['$and'].append(single_where_part_or)
+ where_part_background_denominator['$and'].append(single_where_part_or)
+ where_part_background_overlapping['$and'].append(single_where_part_or)
+ else:
+ single_where_part_or = {'$or': []}
+ replace_fields_value = query_background[key] # .replace("'", "''")
+ specific_or = {f'{real_key}': {'$eq': replace_fields_value}}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_background['$and'].append(single_where_part_or)
+ where_part_background_denominator['$and'].append(single_where_part_or)
+ where_part_background_overlapping['$and'].append(single_where_part_or)
+
+ else:
+ real_key = key
+ if key in translate_dictionary:
+ real_key = translate_dictionary[key]
+ if isinstance(query_background[key], list):
+ if '$and' not in where_part_background:
+ where_part_background['$and'] = []
+ if '$and' not in where_part_background_denominator:
+ where_part_background_denominator['$and'] = []
+ if '$and' not in where_part_background_overlapping:
+ where_part_background_overlapping['$and'] = []
+ single_where_part_or = {'$or': []}
+ for itm in query_background[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_background['$and'].append(single_where_part_or)
+ where_part_background_denominator['$and'].append(single_where_part_or)
+ where_part_background_overlapping['$and'].append(single_where_part_or)
+ else:
+ replace_fields_value = query_background[key]
+ if key != 'start_aa_original':
+ replace_fields_value = query_background[key] # .replace("'", "''")
+ if real_key not in where_part_background:
+ where_part_background[real_key] = {}
+ if real_key not in where_part_background_denominator:
+ where_part_background_denominator[real_key] = {}
+ if real_key not in where_part_background_overlapping:
+ where_part_background_overlapping[real_key] = {}
+ where_part_background[real_key]['$eq'] = replace_fields_value
+ where_part_background_denominator[real_key]['$eq'] = replace_fields_value
+ where_part_background_overlapping[real_key]['$eq'] = replace_fields_value
+
+ if remove_overlapping.lower() == 'target' or remove_overlapping.lower() == 'both':
+ query_background_overlapping = []
+ query_where_background_overlapping = {"$match": where_part_background_overlapping}
+ query_background_overlapping.append(query_where_background_overlapping)
+ group_part_background_overlapping = {"_id": "$_id"}
+ query_group_background_overlapping = {"$group": group_part_background_overlapping}
+ query_background_overlapping.append(query_group_background_overlapping)
+ # ("query target overlapping", query_background_overlapping)
+ # results_background_overlapping = collection_db.aggregate(query_background_overlapping, allowDiskUse=True)
+ results_background_overlapping = collection_db.find(where_part_background_overlapping, {})
+ array_background_overlapping = list(results_background_overlapping)
+ if '$and' not in where_part_target:
+ where_part_target['$and'] = []
+ if '$and' not in where_part_target_denominator:
+ where_part_target_denominator['$and'] = []
+ single_where_part_target_overlapping_and = {'$and': []}
+ for single_accession_id in array_background_overlapping:
+ specific_and = {}
+ field = translate_dictionary['accession_id']
+ real_field = field # "$" + field
+ field_value = single_accession_id[f"{field}"]
+ specific_and[f'{real_field}'] = {'$ne': field_value}
+ single_where_part_target_overlapping_and['$and'].append(specific_and)
+ where_part_target['$and'].append(single_where_part_target_overlapping_and)
+ where_part_target_denominator['$and'].append(single_where_part_target_overlapping_and)
+
+ if remove_overlapping.lower() == 'background' or remove_overlapping.lower() == 'both':
+ query_target_overlapping = []
+ query_where_target_overlapping = {"$match": where_part_target_overlapping}
+ query_target_overlapping.append(query_where_target_overlapping)
+ group_part_target_overlapping = {"_id": "$_id"}
+ query_group_target_overlapping = {"$group": group_part_target_overlapping}
+ query_target_overlapping.append(query_group_target_overlapping)
+ # print("query background overlapping", query_target_overlapping)
+ # results_target_overlapping = collection_db.aggregate(query_target_overlapping, allowDiskUse=True)
+ results_target_overlapping = collection_db.find(where_part_target_overlapping, {})
+ array_target_overlapping = list(results_target_overlapping)
+ if '$and' not in where_part_background:
+ where_part_background['$and'] = []
+ if '$and' not in where_part_background_denominator:
+ where_part_background_denominator['$and'] = []
+ single_where_part_background_overlapping_and = {'$and': []}
+ for single_accession_id in array_target_overlapping:
+ specific_and = {}
+ field = translate_dictionary['accession_id']
+ real_field = field # "$" + field
+ field_value = single_accession_id[f"{field}"]
+ specific_and[f'{real_field}'] = {'$ne': field_value}
+ single_where_part_background_overlapping_and['$and'].append(specific_and)
+ where_part_background['$and'].append(single_where_part_background_overlapping_and)
+ where_part_background_denominator['$and'].append(single_where_part_background_overlapping_and)
+
+ query_target = []
+ query_background = []
+ query_target_denominator = []
+ query_background_denominator = []
+
+ query_unwind_target = {"$unwind": "$muts"}
+ query_target.append(query_unwind_target)
+ query_unwind_background = {"$unwind": "$muts"}
+ query_background.append(query_unwind_background)
+
+ query_where_target = {"$match": where_part_target}
+ query_target.append(query_where_target)
+ query_where_background = {"$match": where_part_background}
+ query_background.append(query_where_background)
+ query_where_target_denominator = {"$match": where_part_target_denominator}
+ query_target_denominator.append(query_where_target_denominator)
+ query_where_background_denominator = {"$match": where_part_background_denominator}
+ query_background_denominator.append(query_where_background_denominator)
+
+ group_part = {"_id": {}}
+ real_field = translate_dictionary['product']
+ group_part["_id"]["product"] = f"${real_field}"
+ real_field = translate_dictionary['start_aa_original']
+ group_part["_id"]["start_aa_original"] = f"${real_field}"
+ real_field = translate_dictionary['sequence_aa_original']
+ group_part["_id"]["sequence_aa_original"] = f"${real_field}"
+ real_field = translate_dictionary['sequence_aa_alternative']
+ group_part["_id"]["sequence_aa_alternative"] = f"${real_field}"
+ group_part["count"] = {"$sum": 1}
+ query_group = {"$group": group_part}
+ query_target.append(query_group)
+ query_background.append(query_group)
+
+ group_part_denominator = {"_id": {}}
+ group_part_denominator["count"] = {"$sum": 1}
+ query_group = {"$group": group_part_denominator}
+ query_target_denominator.append(query_group)
+ query_background_denominator.append(query_group)
+
+ sort_part = {"count": -1}
+ query_sort = {"$sort": sort_part}
+ query_target.append(query_sort)
+ query_background.append(query_sort)
+
+ # print("query target", query_target)
+ # print("query target denominator", query_target_denominator)
+ # print("query background", query_background)
+ # print("query background denominator", query_background_denominator)
+
+ results_target = collection_db.aggregate(query_target, allowDiskUse=True)
+ results_background = collection_db.aggregate(query_background, allowDiskUse=True)
+ # results_target_denominator = collection_db.aggregate(query_target_denominator, allowDiskUse=True)
+ # results_background_denominator = collection_db.aggregate(query_background_denominator, allowDiskUse=True)
+ results_target_denominator = collection_db.count_documents(where_part_target_denominator)
+ results_background_denominator = collection_db.count_documents(where_part_background_denominator)
+
+ denominator_target = results_target_denominator
+ # for single_item in list(results_target_denominator):
+ # denominator_target = single_item['count']
+
+ denominator = results_background_denominator
+ # for single_item in list(results_background_denominator):
+ # denominator = single_item['count']
+
+ list_dict_target = []
+ for single_item in list(results_target):
+ single_item_remodel = {}
+ for key in single_item:
+ if key == '_id':
+ for k in single_item[key]:
+ single_item_remodel[k] = single_item[key][k]
+ else:
+ single_item_remodel['total'] = single_item['count']
+ list_dict_target.append(single_item_remodel)
+
+ list_dict_background = []
+ for single_item in list(results_background):
+ single_item_remodel = {}
+ for key in single_item:
+ if key == '_id':
+ for k in single_item[key]:
+ single_item_remodel[k] = single_item[key][k]
+ else:
+ single_item_remodel['total'] = single_item['count']
+ list_dict_background.append(single_item_remodel)
+
+ for item in list_dict_target:
+ numerator = 0
+ for item2 in list_dict_background:
+ if item['start_aa_original'] == item2['start_aa_original'] \
+ and item['sequence_aa_original'] == item2['sequence_aa_original'] \
+ and item['sequence_aa_alternative'] == item2['sequence_aa_alternative'] \
+ and item['product'] == item2['product']:
+ numerator = item2['total']
+
+ if denominator == 0:
+ fraction = 0
+ else:
+ fraction = (numerator / denominator)
+ if denominator_target == 0:
+ fraction_target = 0
+ else:
+ fraction_target = (item['total'] / denominator_target)
+
+ single_line = {'lineage': 'empty', 'lineage_target': lineage_target,
+ 'lineage_background': lineage_background,
+ 'target': target, 'background': background,
+ 'count_seq': item['total'],
+ 'product': item['product'],
+ 'start_aa_original': item['start_aa_original'],
+ 'sequence_aa_original': item['sequence_aa_original'],
+ 'sequence_aa_alternative': item['sequence_aa_alternative'],
+ 'numerator': numerator,
+ 'denominator': denominator,
+ 'fraction': fraction * 100,
+ 'denominator_target': denominator_target,
+ 'fraction_target': fraction_target * 100}
+
+ array_result.append(single_line)
+
+ all_result = array_result
+
+ mutation_table2 = []
+ arr_p_values = []
+ for item in all_result:
+ single_item = {}
+ if item['product'] == 'Spike (surface glycoprotein)':
+ protein = item['product'].split(" ", 1)[0]
+ mutation = protein + '_'
+ # mutation = 'S_'
+ else:
+ protein = item['product'].split(" ", 1)[0]
+ mutation = protein + '_'
+ mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative']
+ single_item['start_aa_original'] = item['start_aa_original']
+ single_item['sequence_aa_original'] = item['sequence_aa_original']
+ single_item['sequence_aa_alternative'] = item['sequence_aa_alternative']
+ single_item['mutation'] = mutation
+ single_item['product'] = item['product']
+ single_item['mutation_position'] = item['start_aa_original']
+ single_item['target'] = item['target']
+ single_item['background'] = item['background']
+
+ single_item['lineage'] = item['lineage']
+ single_item['lineage_target'] = item['lineage_target']
+ single_item['lineage_background'] = item['lineage_background']
+ single_item['count_target'] = item['count_seq']
+ single_item['percentage_background'] = item['fraction']
+ single_item['numerator_background'] = item['numerator']
+ single_item['denominator_background'] = item['denominator']
+ single_item['percentage_target'] = item['fraction_target']
+ single_item['numerator_target'] = item['count_seq']
+ single_item['denominator_target'] = item['denominator_target']
+
+ epsilon = 0.00000001
+ single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \
+ (single_item['percentage_background'] + epsilon)
+
+ if single_item['odd_ratio'] >= 1:
+ if item['denominator'] != 0:
+ # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'],
+ # item['numerator'] / item['denominator'])
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
+ else:
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item[
+ 'numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
+ else:
+ # single_item['p_value'] = 0
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
+ else:
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item[
+ 'numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
+ else:
+ if item['denominator'] != 0:
+ # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'],
+ # item['numerator'] / item['denominator'])
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
+ else:
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item[
+ 'numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
+ else:
+ # single_item['p_value'] = 0
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
+ else:
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item[
+ 'numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
+
+ arr_p_values.append(single_item['p_value'])
+ mutation_table2.append(single_item)
+
+ a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni')
+
+ i = 0
+ for item in mutation_table2:
+ item['pvalue'] = new_p_values[i]
+ i = i + 1
+
+ return mutation_table2
+
+
+@api.route('/countOverlappingSequenceTargetBackground')
+class FieldList(Resource):
+ @api.doc('count_overlapping_sequence_target_background')
+ def post(self):
+ payload = api.payload
+ query_target = payload['query_target']
+ query_background = payload['query_background']
+
+ array_result = []
+
+ where_part_target_overlapping = {}
+ where_part_background_overlapping = {}
+ start_date_target = datetime.strptime("2019-01-01", '%Y-%m-%d')
+ start_date_background = datetime.strptime("2019-01-01", '%Y-%m-%d')
+
+ if 'accession_id' not in query_target:
+ where_part_target_overlapping['c_coll_date_prec'] = {}
+ where_part_target_overlapping['c_coll_date_prec']['$eq'] = 2
+ if 'accession_id' not in query_background:
+ where_part_background_overlapping['c_coll_date_prec'] = {}
+ where_part_background_overlapping['c_coll_date_prec']['$eq'] = 2
+
+ where_part_target_overlapping['collection_date'] = {}
+ where_part_target_overlapping['collection_date']['$gte'] = start_date_target
+ where_part_background_overlapping['collection_date'] = {}
+ where_part_background_overlapping['collection_date']['$gte'] = start_date_background
+
+ if query_target is not None:
+ for key in query_target:
+ if key == 'minDate':
+ start_date = datetime.strptime(f"{query_target[key]}", '%Y-%m-%d')
+ where_part_target_overlapping['collection_date']['$gte'] = start_date
+ elif key == 'maxDate':
+ stop_date = datetime.strptime(f"{query_target[key]}", '%Y-%m-%d')
+ where_part_target_overlapping['collection_date']['$lte'] = stop_date
+
+ elif key == 'toExclude':
+ for fieldToExclude in query_target[key]:
+ if '$and' not in where_part_target_overlapping:
+ where_part_target_overlapping['$and'] = []
+
+ single_where_part = {'$and': []}
+ for geoToExclude in query_target[key][fieldToExclude]:
+ real_field_to_exclude = fieldToExclude
+ if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \
+ or fieldToExclude == 'region' or fieldToExclude == 'province':
+ real_field_to_exclude = 'location.' + fieldToExclude
+ specific_and = {}
+ geo_value = geoToExclude # .replace("'", "''")
+ specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value}
+ single_where_part['$and'].append(specific_and)
+ where_part_target_overlapping['$and'].append(single_where_part)
+
+ elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ if '$and' not in where_part_target_overlapping:
+ where_part_target_overlapping['$and'] = []
+
+ real_key = key
+ if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ real_key = 'location.' + key
+ if isinstance(query_target[key], list):
+ single_where_part_or = {'$or': []}
+ for itm in query_target[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_target_overlapping['$and'].append(single_where_part_or)
+ else:
+ single_where_part_or = {'$or': []}
+ replace_fields_value = query_target[key] # .replace("'", "''")
+ specific_or = {f'{real_key}': {'$eq': replace_fields_value}}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_target_overlapping['$and'].append(single_where_part_or)
+
+ else:
+ real_key = key
+ if key in translate_dictionary:
+ real_key = translate_dictionary[key]
+ if isinstance(query_target[key], list):
+ if '$and' not in where_part_target_overlapping:
+ where_part_target_overlapping['$and'] = []
+ single_where_part_or = {'$or': []}
+ for itm in query_target[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_target_overlapping['$and'].append(single_where_part_or)
+ else:
+ replace_fields_value = query_target[key]
+ if key != 'start_aa_original':
+ replace_fields_value = query_target[key] # .replace("'", "''")
+ if real_key not in where_part_target_overlapping:
+ where_part_target_overlapping[real_key] = {}
+ where_part_target_overlapping[real_key]['$eq'] = replace_fields_value
+
+ if query_background is not None:
+ for key in query_background:
+ if key == 'minDate':
+ start_date = datetime.strptime(f"{query_background[key]}", '%Y-%m-%d')
+ where_part_background_overlapping['collection_date']['$gte'] = start_date
+ if where_part_target_overlapping['collection_date']['$gte'] < start_date\
+ < where_part_target_overlapping['collection_date']['$lte']:
+ where_part_target_overlapping['collection_date']['$gte'] = start_date
+ elif key == 'maxDate':
+ stop_date = datetime.strptime(f"{query_background[key]}", '%Y-%m-%d')
+ where_part_background_overlapping['collection_date']['$lte'] = stop_date
+ if where_part_target_overlapping['collection_date']['$gte'] < stop_date \
+ < where_part_target_overlapping['collection_date']['$lte']:
+ where_part_target_overlapping['collection_date']['$lte'] = stop_date
+
+ elif key == 'toExclude':
+ for fieldToExclude in query_background[key]:
+ if '$and' not in where_part_background_overlapping:
+ where_part_background_overlapping['$and'] = []
+ if '$and' not in where_part_target_overlapping:
+ where_part_target_overlapping['$and'] = []
+
+ single_where_part = {'$and': []}
+ for geoToExclude in query_background[key][fieldToExclude]:
+ real_field_to_exclude = fieldToExclude
+ if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \
+ or fieldToExclude == 'region' or fieldToExclude == 'province':
+ real_field_to_exclude = 'location.' + fieldToExclude
+ specific_and = {}
+ geo_value = geoToExclude # .replace("'", "''")
+ specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value}
+ single_where_part['$and'].append(specific_and)
+ where_part_background_overlapping['$and'].append(single_where_part)
+ where_part_target_overlapping['$and'].append(single_where_part)
+
+ elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ if '$and' not in where_part_background_overlapping:
+ where_part_background_overlapping['$and'] = []
+ if '$and' not in where_part_target_overlapping:
+ where_part_target_overlapping['$and'] = []
+
+ real_key = key
+ if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ real_key = 'location.' + key
+ if isinstance(query_background[key], list):
+ single_where_part_or = {'$or': []}
+ for itm in query_background[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_background_overlapping['$and'].append(single_where_part_or)
+ where_part_target_overlapping['$and'].append(single_where_part_or)
+ else:
+ single_where_part_or = {'$or': []}
+ replace_fields_value = query_background[key] # .replace("'", "''")
+ specific_or = {f'{real_key}': {'$eq': replace_fields_value}}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_background_overlapping['$and'].append(single_where_part_or)
+ where_part_target_overlapping['$and'].append(single_where_part_or)
+
+ else:
+ real_key = key
+ if key in translate_dictionary:
+ real_key = translate_dictionary[key]
+ if isinstance(query_background[key], list):
+ if '$and' not in where_part_background_overlapping:
+ where_part_background_overlapping['$and'] = []
+ if '$and' not in where_part_target_overlapping:
+ where_part_target_overlapping['$and'] = []
+ single_where_part_or = {'$or': []}
+ for itm in query_background[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_background_overlapping['$and'].append(single_where_part_or)
+ where_part_target_overlapping['$and'].append(single_where_part_or)
+ else:
+ replace_fields_value = query_background[key]
+ if key != 'start_aa_original':
+ replace_fields_value = query_background[key] # .replace("'", "''")
+ if real_key not in where_part_background_overlapping:
+ where_part_background_overlapping[real_key] = {}
+ if real_key not in where_part_target_overlapping:
+ where_part_target_overlapping[real_key] = {}
+ where_part_background_overlapping[real_key]['$eq'] = replace_fields_value
+ where_part_target_overlapping[real_key]['$eq'] = replace_fields_value
+
+ # query_target_overlapping = []
+ # query_where_target_overlapping = {"$match": where_part_target_overlapping}
+ # query_target_overlapping.append(query_where_target_overlapping)
+ # group_part_target_overlapping = {"_id": "$_id"}
+ # query_group_target_overlapping = {"$group": group_part_target_overlapping}
+ # query_target_overlapping.append(query_group_target_overlapping)
+ # print("query target overlapping", query_target_overlapping)
+ # # results_target_overlapping = collection_db.aggregate(query_target_overlapping, allowDiskUse=True)
+ # results_target_overlapping = collection_db.find(where_part_target_overlapping, {})
+ # array_target_overlapping = list(results_target_overlapping)
+ # if '$and' not in where_part_background_overlapping:
+ # where_part_background_overlapping['$and'] = []
+ # single_where_part_background_overlapping_or = {'$or': []}
+ # for single_accession_id in array_target_overlapping:
+ # specific_or = {}
+ # field = translate_dictionary['accession_id']
+ # real_field = field # "$" + field
+ # field_value = single_accession_id[f"{field}"]
+ # specific_or[f'{real_field}'] = {'$eq': field_value}
+ # single_where_part_background_overlapping_or['$or'].append(specific_or)
+ # where_part_background_overlapping['$and'].append(single_where_part_background_overlapping_or)
+
+ query_background_overlapping = []
+ # if '$and' not in where_part_background_overlapping:
+ # where_part_background_overlapping['$and'] = []
+ # where_part_background_overlapping['$and'].append(where_part_target_overlapping)
+ query_where_background_overlapping = {"$match": where_part_background_overlapping}
+ query_background_overlapping.append(query_where_background_overlapping)
+
+ group_part = {"_id": {}, "count": {"$sum": 1}}
+ query_group = {"$group": group_part}
+ query_background_overlapping.append(query_group)
+
+ # print("query count overlapping", query_background_overlapping)
+ # results_count_overlapping = collection_db.aggregate(query_background_overlapping, allowDiskUse=True)
+ results_count_overlapping = collection_db.count_documents(where_part_target_overlapping)
+
+ count_overlapping = [{"count": results_count_overlapping}]
+ # for single_item in list(results_count_overlapping):
+ # single_count = {"count": single_item['count']}
+ # count_overlapping[0] = single_count
+
+ return count_overlapping
+
+
+@api.route('/getAccessionIds')
+class FieldList(Resource):
+ @api.doc('get_accession_ids')
+ def post(self):
+ payload = api.payload
+ # payload = {'query': {'lineage': 'B.1.1.7', 'country': 'Italy', 'geo_group': 'Europe',
+ # 'minDateTerget': '2021-03-31', 'maxDateTarget': '2021-06-28',
+ # 'start_aa_original': 614, 'sequence_aa_original': 'D',
+ # 'sequence_aa_alternative': 'G', 'product': 'Spike (surface glycoprotein)'},
+ # 'query_false': ''}
+ query_false_field = payload['query_false']
+ query_fields = payload['query']
+ query_fields_target = payload['query_target']
+
+ if "lineage" in query_fields and query_fields['lineage'] == 'empty':
+ del query_fields['lineage']
+ if "lineage" in query_fields_target and query_fields_target['lineage'] == 'empty':
+ del query_fields_target['lineage']
+
+ where_part_target = {}
+ where_part = {}
+ start_date = datetime.strptime("2019-01-01", '%Y-%m-%d')
+ where_part_target['c_coll_date_prec'] = {}
+ where_part_target['c_coll_date_prec']['$eq'] = 2
+ where_part['c_coll_date_prec'] = {}
+ where_part['c_coll_date_prec']['$eq'] = 2
+
+ where_part_target['collection_date'] = {}
+ where_part_target['collection_date']['$gte'] = start_date
+ where_part['collection_date'] = {}
+ where_part['collection_date']['$gte'] = start_date
+
+ if query_fields_target != 'empty':
+ if '$and' not in where_part_target:
+ where_part_target['$and'] = []
+
+ if query_fields_target is not None:
+ for key in query_fields_target:
+ if key == 'minDateTarget' or key == 'minDateBackground':
+ start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
+ where_part_target['collection_date']['$gte'] = start_date
+ elif key == 'maxDateTarget' or key == 'maxDateBackground':
+ stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
+ where_part_target['collection_date']['$lte'] = stop_date
+
+ elif key == 'toExclude':
+ for fieldToExclude in query_fields_target[key]:
+ if '$and' not in where_part_target:
+ where_part_target['$and'] = []
+
+ single_where_part = {'$and': []}
+ for geoToExclude in query_fields_target[key][fieldToExclude]:
+ real_field_to_exclude = fieldToExclude
+ if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \
+ or fieldToExclude == 'region' or fieldToExclude == 'province':
+ real_field_to_exclude = 'location.' + fieldToExclude
+ specific_and = {}
+ geo_value = geoToExclude # .replace("'", "''")
+ specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value}
+ single_where_part['$and'].append(specific_and)
+ where_part_target['$and'].append(single_where_part)
+
+ elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ if '$and' not in where_part_target:
+ where_part_target['$and'] = []
+
+ real_key = key
+ if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ real_key = 'location.' + key
+ if isinstance(query_fields_target[key], list):
+ single_where_part_or = {'$or': []}
+ for itm in query_fields_target[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_target['$and'].append(single_where_part_or)
+ else:
+ single_where_part_or = {'$or': []}
+ replace_fields_value = query_fields_target[key] # .replace("'", "''")
+ specific_or = {f'{real_key}': {'$eq': replace_fields_value}}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_target['$and'].append(single_where_part_or)
+
+ else:
+ real_key = key
+ if key in translate_dictionary:
+ real_key = translate_dictionary[key]
+ if isinstance(query_fields_target[key], list):
+ if '$and' not in where_part_target:
+ where_part_target['$and'] = []
+ single_where_part_or = {'$or': []}
+ for itm in query_fields_target[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part_target['$and'].append(single_where_part_or)
+ else:
+ replace_fields_value = query_fields_target[key]
+ if key != 'start_aa_original':
+ replace_fields_value = query_fields_target[key] # .replace("'", "''")
+ if real_key not in where_part_target:
+ where_part_target[real_key] = {}
+ where_part_target[real_key]['$eq'] = replace_fields_value
+
+ if query_fields is not None:
+ for key in query_fields:
+ if key == 'minDateTarget' or key == 'minDateBackground':
+ start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
+ where_part['collection_date']['$gte'] = start_date
+ elif key == 'maxDateTarget' or key == 'maxDateBackground':
+ stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
+ where_part['collection_date']['$lte'] = stop_date
+
+ elif key == 'toExclude':
+ for fieldToExclude in query_fields[key]:
+ if '$and' not in where_part:
+ where_part['$and'] = []
+
+ single_where_part = {'$and': []}
+ for geoToExclude in query_fields[key][fieldToExclude]:
+ real_field_to_exclude = fieldToExclude
+ if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \
+ or fieldToExclude == 'region' or fieldToExclude == 'province':
+ real_field_to_exclude = 'location.' + fieldToExclude
+ specific_and = {}
+ geo_value = geoToExclude # .replace("'", "''")
+ specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value}
+ single_where_part['$and'].append(specific_and)
+ where_part['$and'].append(single_where_part)
+
+ elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ if '$and' not in where_part:
+ where_part['$and'] = []
+
+ real_key = key
+ if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
+ real_key = 'location.' + key
+ if key == query_false_field:
+ single_where_part_or = {'$or': []}
+ specific_or = {f'{real_key}': {'$eq': None}}
+ single_where_part_or['$or'].append(specific_or)
+ specific_or = {f'{real_key}': {'$ne': query_fields[key]}}
+ single_where_part_or['$or'].append(specific_or)
+ where_part['$and'].append(single_where_part_or)
+ else:
+ if isinstance(query_fields[key], list):
+ single_where_part_or = {'$or': []}
+ for itm in query_fields[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part['$and'].append(single_where_part_or)
+ else:
+ single_where_part_or = {'$or': []}
+ replace_fields_value = query_fields[key] # .replace("'", "''")
+ specific_or = {f'{real_key}': {'$eq': replace_fields_value}}
+ single_where_part_or['$or'].append(specific_or)
+ where_part['$and'].append(single_where_part_or)
+
+ else:
+ real_key = key
+ if key in translate_dictionary:
+ real_key = translate_dictionary[key]
+ if isinstance(query_fields[key], list):
+ if '$and' not in where_part:
+ where_part['$and'] = []
+ single_where_part_or = {'$or': []}
+ for itm in query_fields[key]:
+ specific_or = {}
+ field_value = itm # .replace("'", "''")
+ specific_or[f'{real_key}'] = {'$eq': field_value}
+ single_where_part_or['$or'].append(specific_or)
+ where_part['$and'].append(single_where_part_or)
+ else:
+ if key == query_false_field:
+ single_where_part_or = {'$or': []}
+ specific_or = {f'{real_key}': {'$eq': None}}
+ single_where_part_or['$or'].append(specific_or)
+ specific_or = {f'{real_key}': {'$ne': query_fields[key]}}
+ single_where_part_or['$or'].append(specific_or)
+ where_part['$and'].append(single_where_part_or)
+ else:
+ replace_fields_value = query_fields[key]
+ if key != 'start_aa_original':
+ replace_fields_value = query_fields[key] # .replace("'", "''")
+ if real_key not in where_part:
+ where_part[real_key] = {}
+ where_part[real_key]['$eq'] = replace_fields_value
+
+ query_target = []
+ query = []
+
+ query_unwind_target = {"$unwind": "$muts"}
+ query_target.append(query_unwind_target)
+ query_unwind = {"$unwind": "$muts"}
+ query.append(query_unwind)
+
+ query_where_target = {"$match": where_part_target}
+ query_target.append(query_where_target)
+ query_where = {"$match": where_part}
+ query.append(query_where)
+
+ group_part = {"_id": {"accession_id": "$_id"}}
+ query_group = {"$group": group_part}
+ query_target.append(query_group)
+ query.append(query_group)
+
+ sort_part = {"_id": 1}
+ query_sort = {"$sort": sort_part}
+ query_target.append(query_sort)
+ query.append(query_sort)
+
+ list_dict_target = []
+ if query_fields_target != 'empty':
+ # print("query target", query_target)
+ results_target = collection_db.aggregate(query_target, allowDiskUse=True)
+ for single_item in list(results_target):
+ for key in single_item:
+ if key == '_id':
+ for k in single_item[key]:
+ list_dict_target.append(single_item[key][k])
+
+ # print("query", query)
+ results = collection_db.aggregate(query, allowDiskUse=True)
+ list_dict = []
+ for single_item in list(results):
+ for key in single_item:
+ if key == '_id':
+ for k in single_item[key]:
+ if single_item[key][k] not in list_dict_target:
+ list_dict.append(single_item[key][k])
+
+ acc_ids_result = [{'acc_ids': list_dict}]
+
+ return acc_ids_result
+
+
+all_important_mutation_dict = {}
+
+
+def get_all_important_mutation():
+ print("inizio request important mutation")
+
+ pipeline = [
+ {"$group": {"_id": '$covv_lineage', "count": {"$sum": 1}}},
+ ]
+
+ lin_info = {x['_id']: (x['count'], []) for x in collection_db.aggregate(pipeline, allowDiskUse=True)}
+
+ pipeline = [
+ {"$unwind": "$muts"},
+ {"$group": {"_id": {'lin': '$covv_lineage',
+ 'pro': "$muts.pro",
+ 'org': "$muts.org",
+ 'loc': "$muts.loc",
+ 'alt': "$muts.alt",
+ },
+ "count": {"$sum": 1}}},
+ ]
+
+ results = collection_db.aggregate(pipeline, allowDiskUse=True)
+
+ results = (x['_id'] for x in results if x['count'] / lin_info[x['_id']['lin']][0] >= 0.75)
+
+ for x in results:
+ ch = f"{x['pro']}_{x['org']}{x['loc']}{x['alt']}"
+ lin_info[x['lin']][1].append(ch)
+
+ lin_info = {x: [c, sorted(arr)] for x, (c, arr) in lin_info.items()}
+
+ for lin in lin_info:
+ all_important_mutation_dict[lin] = lin_info[lin]
+
+ print("fine request important mutation")
+ x = datetime.today()
+ y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1)
+ delta_t = y - x
+ secs = delta_t.total_seconds()
+ t1 = Timer(secs, get_all_important_mutation)
+ t1.start()
+
+
+all_protein_dict = {}
+
+
+def get_all_protein():
+ print("inizio request protein")
+ all_protein_arr = []
+ all_protein = sars_cov_2_products['A']
+ for item in all_protein:
+ name = str(item.get('name'))
+ all_protein_arr.append(name)
+
+ all_protein_dict['all_protein'] = all_protein_arr
+
+ print("fine request protein")
+ x = datetime.today()
+ y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1)
+ delta_t = y - x
+ secs = delta_t.total_seconds()
+ t2 = Timer(secs, get_all_protein)
+ t2.start()
+
+
+all_accession_id_dict = {}
+
+
+def get_all_accession_id():
+ print("inizio request accession id")
+ query = {
+ 'collection_date': {
+ '$gte': datetime.strptime("2019-01-01", '%Y-%m-%d')
+ },
+ 'c_coll_date_prec': 2
+ }
+
+ results = collection_db.find(query, {})
+ all_acc_id = []
+ for single_item in results:
+ accession_id = single_item['_id']
+ all_acc_id.append(accession_id)
+ all_accession_id_dict['all_acc_id'] = all_acc_id
+ print("fine request accession id")
+ x = datetime.today()
+ y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1)
+ delta_t = y - x
+ secs = delta_t.total_seconds()
+ t3 = Timer(secs, get_all_geo)
+ t3.start()
+
+
+all_geo_dict = {}
+
+
+def get_all_geo():
+ print("inizio request geo")
+ start_date = datetime.strptime("2019-01-01", '%Y-%m-%d')
+ query = [
+ {
+ "$match": {
+ 'collection_date': {
+ '$gte': start_date
+ },
+ 'c_coll_date_prec': {
+ '$eq': 2
+ },
+ },
+ },
+ {
+ "$group": {"_id":
+ {
+ 'geo_group': '$location.geo_group',
+ 'country': '$location.country',
+ 'region': '$location.region',
+ 'province': '$location.province',
+ },
+ "count": {"$sum": 1}
+ }
+ },
+ ]
+
+ results = collection_db.aggregate(query, allowDiskUse=True)
+ list_geo_dict = []
+ for single_item in results:
+ single_item_remodel = {'geo_group': single_item['_id']['geo_group'],
+ 'country': single_item['_id']['country'],
+ 'region': single_item['_id']['region'],
+ 'province': single_item['_id']['province'], 'count': single_item['count']}
+ list_geo_dict.append(single_item_remodel)
+ all_geo_dict['all_geo'] = list_geo_dict
+ print("fine request geo")
+ x = datetime.today()
+ y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1)
+ delta_t = y - x
+ secs = delta_t.total_seconds()
+ t4 = Timer(secs, get_all_geo)
+ t4.start()
+
+
+# ----------------------------------------- START FUNCTIONS ----------------------------------------------- #
+
+
+get_all_important_mutation()
+get_all_accession_id()
+get_all_geo()
+get_all_protein()
+
+
+# ----------------------------------------- OLD QUERIES ----------------------------------------------- #
+
+
+sars_cov_2_products_old = {
+ "A": [
+ {
+ "name": "E (envelope protein)",
+ "start": 26245,
+ "end": 26472,
+ "row": 0,
+ "color": "#7c98b3",
+ "sequence": "MYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVSLVKPSFYVYSRVKNLNSSRVPDLLV"
+ },
+ {
+ "name": "M (membrane glycoprotein)",
+ "start": 26523,
+ "end": 27191,
+ "row": 0,
+ "color": "#536b78",
+ "sequence": "MADSNGTITVEELKKLLEQWNLVIGFLFLTWICLLQFAYANRNRFLYIIKLIFLWLLWPVTLACFVLAAVYRINWITGGIAIAMACLVGLMWLSYFIASFRLFARTRSMWSFNPETNILLNVPLHGTILTRPLLESELVIGAVILRGHLRIAGHHLGRCDIKDLPKEITVATSRTLSYYKLGASQRVAGDSGFAAYSRYRIGNYKLNTDHSSSSDNIALLVQ"
+ },
+ {
+ "name": "N (nucleocapsid phosphoprotein)",
+ "start": 28274,
+ "end": 29533,
+ "row": 0,
+ "color": "#f68e5f",
+ "sequence": "MSDNGPQNQRNAPRITFGGPSDSTGSNQNGERSGARSKQRRPQGLPNNTASWFTALTQHGKEDLKFPRGQGVPINTNSSPDDQIGYYRRATRRIRGGDGKMKDLSPRWYFYYLGTGPEAGLPYGANKDGIIWVATEGALNTPKDHIGTRNPANNAAIVLQLPQGTTLPKGFYAEGSRGGSQASSRSSSRSRNSSRNSTPGSSRGTSPARMAGNGGDAALALLLLDRLNQLESKMSGKGQQQQGQTVTKKSAAEASKKPRQKRTATKAYNVTQAFGRRGPEQTQGNFGDQELIRQGTDYKHWPQIAQFAPSASAFFGMSRIGMEVTPSGTWLTYTGAIKLDDKDPNFKDQVILLNKHIDAYKTFPPTEPKKDKKKKADETQALPQRQKKQQTVTLLPAADLDDFSKQLQQSMSSADSTQA"
+ },
+ {
+ "name": "ORF10 protein",
+ "start": 29558,
+ "end": 29674,
+ "row": 0,
+ "color": "#f76c5e",
+ "sequence": "MGYINVFAFPFTIYSLLLCRMNSRNYIAQVDVVNFNLT"
+ },
+ {
+ "name": "NSP16 (2'-O-ribose methyltransferase)",
+ "start": 20659,
+ "end": 21552,
+ "row": 0,
+ "color": "#22577a",
+ "sequence": "SSQAWQPGVAMPNLYKMQRMLLEKCDLQNYGDSATLPKGIMMNVAKYTQLCQYLNTLTLAVPYNMRVIHFGAGSDKGVAPGTAVLRQWLPTGTLLVDSDLNDFVSDADSTLIGDCATVHTANKWDLIISDMYDPKTKNVTKENDSKEGFFTYICGFIQQKLALGGSVAIKITEHSWNADLYKLMGHFAWWTAFVTNVNASSSEAFLIGCNYLGKPREQIDGYVMHANYIFWRNTNPIQLSSYSLFDMSKFPLKLRGTAVMSLKEGQINDMILSLLSKGRLIIRENNRVVISSDVLVNN"
+ },
+ {
+ "name": "NSP3",
+ "start": 2720,
+ "end": 8554,
+ "row": 0,
+ "color": "#7209b7",
+ "sequence": "APTKVTFGDDTVIEVQGYKSVNITFELDERIDKVLNEKCSAYTVELGTEVNEFACVVADAVIKTLQPVSELLTPLGIDLDEWSMATYYLFDESGEFKLASHMYCSFYPPDEDEEEGDCEEEEFEPSTQYEYGTEDDYQGKPLEFGATSAALQPEEEQEEDWLDDDSQQTVGQQDGSEDNQTTTIQTIVEVQPQLEMELTPVVQTIEVNSFSGYLKLTDNVYIKNADIVEEAKKVKPTVVVNAANVYLKHGGGVAGALNKATNNAMQVESDDYIATNGPLKVGGSCVLSGHNLAKHCLHVVGPNVNKGEDIQLLKSAYENFNQHEVLLAPLLSAGIFGADPIHSLRVCVDTVRTNVYLAVFDKNLYDKLVSSFLEMKSEKQVEQKIAEIPKEEVKPFITESKPSVEQRKQDDKKIKACVEEVTTTLEETKFLTENLLLYIDINGNLHPDSATLVSDIDITFLKKDAPYIVGDVVQEGVLTAVVIPTKKAGGTTEMLAKALRKVPTDNYITTYPGQGLNGYTVEEAKTVLKKCKSAFYILPSIISNEKQEILGTVSWNLREMLAHAEETRKLMPVCVETKAIVSTIQRKYKGIKIQEGVVDYGARFYFYTSKTTVASLINTLNDLNETLVTMPLGYVTHGLNLEEAARYMRSLKVPATVSVSSPDAVTAYNGYLTSSSKTPEEHFIETISLAGSYKDWSYSGQSTQLGIEFLKRGDKSVYYTSNPTTFHLDGEVITFDNLKTLLSLREVRTIKVFTTVDNINLHTQVVDMSMTYGQQFGPTYLDGADVTKIKPHNSHEGKTFYVLPNDDTLRVEAFEYYHTTDPSFLGRYMSALNHTKKWKYPQVNGLTSIKWADNNCYLATALLTLQQIELKFNPPALQDAYYRARAGEAANFCALILAYCNKTVGELGDVRETMSYLFQHANLDSCKRVLNVVCKTCGQQQTTLKGVEAVMYMGTLSYEQFKKGVQIPCTCGKQATKYLVQQESPFVMMSAPPAQYELKHGTFTCASEYTGNYQCGHYKHITSKETLYCIDGALLTKSSEYKGPITDVFYKENSYTTTIKPVTYKLDGVVCTEIDPKLDNYYKKDNSYFTEQPIDLVPNQPYPNASFDNFKFVCDNIKFADDLNQLTGYKKPASRELKVTFFPDLNGDVVAIDYKHYTPSFKKGAKLLHKPIVWHVNNATNKATYKPNTWCIRCLWSTKPVETSNSFDVLKSEDAQGMDNLACEDLKPVSEEVVENPTIQKDVLECNVKTTEVVGDIILKPANNSLKITEEVGHTDLMAAYVDNSSLTIKKPNELSRVLGLKTLATHGLAAVNSVPWDTIANYAKPFLNKVVSTTTNIVTRCLNRVCTNYMPYFFTLLLQLCTFTRSTNSRIKASMPTTIAKNTVKSVGKFCLEASFNYLKSPNFSKLINIIIWFLLLSVCLGSLIYSTAALGVLMSNLGMPSYCTGYREGYLNSTNVTIATYCTGSIPCSVCLSGLDSLDTYPSLETIQITISSFKWDLTAFGLVAEWFLAYILFTRFFYVLGLAAIMQLFFSYFAVHFISNSWLMWLIINLVQMAPISAMVRMYIFFASFYYVWKSYVHVVDGCNSSTCMMCYKRNRATRVECTTIVNGVRRSFYVYANGGKGFCKLHNWNCVNCDTFCAGSTFISDEVARDLSLQFKRPINPTDQSSYIVDSVTVKNGSIHLYFDKAGQKTYERHSLSHFVNLDNLRANNTKGSLPINVIVFDGKSKCEESSAKSASVYYSQLMCQPILLLDQALVSDVGDSAEVAVKMFDAYVNTFSSTFNVPMEKLKTLVATAEAELAKNVSLDNVLSTFISAARQGFVDSDVETKDVVECLKLSHQSDIEVTGDSCNNYMLTYNKVENMTPRDLGACIDCSARHINAQVAKSHNIALIWNVKDFMSLSEQLRKQIRSAAKKNNLPFKLTCATTRQVVNVVTTKIALKGG"
+ },
+ {
+ "name": "NSP4",
+ "start": 8555,
+ "end": 10054,
+ "row": 0,
+ "color": "#560bad",
+ "sequence": "KIVNNWLKQLIKVTLVFLFVAAIFYLITPVHVMSKHTDFSSEIIGYKAIDGGVTRDIASTDTCFANKHADFDTWFSQRGGSYTNDKACPLIAAVITREVGFVVPGLPGTILRTTNGDFLHFLPRVFSAVGNICYTPSKLIEYTDFATSACVLAAECTIFKDASGKPVPYCYDTNVLEGSVAYESLRPDTRYVLMDGSIIQFPNTYLEGSVRVVTTFDSEYCRHGTCERSEAGVCVSTSGRWVLNNDYYRSLPGVFCGVDAVNLLTNMFTPLIQPIGALDISASIVAGGIVAIVVTCLAYYFMRFRRAFGEYSHVVAFNTLLFLMSFTVLCLTPVYSFLPGVYSVIYLYLTFYLTNDVSFLAHIQWMVMFTPLVPFWITIAYIICISTKHFYWFFSNYLKRRVVFNGVSFSTFEEAALCTFLLNKEMYLKLRSDVLLPLTQYNRYLALYNKYKYFSGAMDTTSYREAACCHLAKALNDFSNSGSDVLYQPPQTSITSAVLQ"
+ },
+ {
+ "name": "NSP15 (endoRNAse)",
+ "start": 19621,
+ "end": 20658,
+ "row": 0,
+ "color": "#38a3a5",
+ "sequence": "SLENVAFNVVNKGHFDGQQGEVPVSIINNTVYTKVDGVDVELFENKTTLPVNVAFELWAKRNIKPVPEVKILNNLGVDIAANTVIWDYKRDAPAHISTIGVCSMTDIAKKPTETICAPLTVFFDGRVDGQVDLFRNARNGVLITEGSVKGLQPSVGPKQASLNGVTLIGEAVKTQFNYYKKVDGVVQQLPETYFTQSRNLQEFKPRSQMEIDFLELAMDEFIERYKLEGYAFEHIVYGDFSHSQLGGLHLLIGLAKRFKESPFELEDFIPMDSTVKNYFITDAQTGSSKCVCSVIDLLLDDFVEIIKSQDLSVVSKVVKVTIDYTEISFMLWCKDGHVETFYPKLQ"
+ },
+ {
+ "name": "NSP5 (3C-like proteinase)",
+ "start": 10055,
+ "end": 10972,
+ "row": 0,
+ "color": "#480ca8",
+ "sequence": "SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ"
+ },
+ {
+ "name": "NSP14 (3'-to-5' exonuclease)",
+ "start": 18040,
+ "end": 19620,
+ "row": 0,
+ "color": "#57cc99",
+ "sequence": "AENVTGLFKDCSKVITGLHPTQAPTHLSVDTKFKTEGLCVDIPGIPKDMTYRRLISMMGFKMNYQVNGYPNMFITREEAIRHVRAWIGFDVEGCHATREAVGTNLPLQLGFSTGVNLVAVPTGYVDTPNNTDFSRVSAKPPPGDQFKHLIPLMYKGLPWNVVRIKIVQMLSDTLKNLSDRVVFVLWAHGFELTSMKYFVKIGPERTCCLCDRRATCFSTASDTYACWHHSIGFDYVYNPFMIDVQQWGFTGNLQSNHDLYCQVHGNAHVASCDAIMTRCLAVHECFVKRVDWTIEYPIIGDELKINAACRKVQHMVVKAALLADKFPVLHDIGNPKAIKCVPQADVEWKFYDAQPCSDKAYKIEELFYSYATHSDKFTDGVCLFWNCNVDRYPANSIVCRFDTRVLSNLNLPGCDGGSLYVNKHAFHTPAFDKSAFVNLKQLPFFYYSDSPCESHGKQVVSDIDYVPLKSATCITRCNLGGAVCRHHANEYRLYLDAYNMMISAGFSLWVYKQFDTYNLWNTFTRLQ"
+ },
+ {
+ "name": "NSP11",
+ "start": 13442,
+ "end": 13480,
+ "row": 0,
+ "color": "#65bc6e",
+ "sequence": "SADAQSFLNGFAV"
+ },
+ {
+ "name": "NSP13 (helicase)",
+ "start": 16237,
+ "end": 18039,
+ "row": 0,
+ "color": "#80ed99",
+ "sequence": "AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ"
+ },
+ {
+ "name": "NSP6",
+ "start": 10973,
+ "end": 11842,
+ "row": 0,
+ "color": "#3a0ca3",
+ "sequence": "SAVKRTIKGTHHWLLLTILTSLLVLVQSTQWSLFFFLYENAFLPFAMGIIAMSAFAMMFVKHKHAFLCLFLLPSLATVAYFNMVYMPASWVMRIMTWLDMVDTSLSGFKLKDCVMYASAVVLLILMTARTVYDDGARRVWTLMNVLTLVYKVYYGNALDQAISMWALIISVTSNYSGVVTTVMFLARGIVFMCVEYCPIFFITGNTLQCIMLVYCFLGYFCTCYFGLFCLLNRYFRLTLGVYDYLVSTQEFRYMNSQGLLPPKNSIDAFKLNIKLLGVGGKPCIKVATVQ"
+ },
+ {
+ "name": "NSP7",
+ "start": 11843,
+ "end": 12091,
+ "row": 0,
+ "color": "#3f37c9",
+ "sequence": "SKMSDVKCTSVVLLSVLQQLRVESSSKLWAQCVQLHNDILLAKDTTEAFEKMVSLLSVLLSMQGAVDINKLCEEMLDNRATLQ"
+ },
+ {
+ "name": "NSP8",
+ "start": 12092,
+ "end": 12685,
+ "row": 0,
+ "color": "#4361ee",
+ "sequence": "AIASEFSSLPSYAAFATAQEAYEQAVANGDSEVVLKKLKKSLNVAKSEFDRDAAMQRKLEKMADQAMTQMYKQARSEDKRAKVTSAMQTMLFTMLRKLDNDALNNIINNARDGCVPLNIIPLTTAAKLMVVIPDYNTYKNTCDGTTFTYASALWEIQQVVDADSKIVQLSEISMDNSPNLAWPLIVTALRANSAVKLQ"
+ },
+ {
+ "name": "NSP9",
+ "start": 12686,
+ "end": 13024,
+ "row": 0,
+ "color": "#4895ef",
+ "sequence": "NNELSPVALRQMSCAAGTTQTACTDDNALAYYNTTKGGRFVLALLSDLQDLKWARFPKSDGTGTIYTELEPPCRFVTDTPKGPKVKYLYFIKGLNNLNRGMVLGSLAATVRLQ"
+ },
+ {
+ "name": "NSP12 (RNA-dependent RNA polymerase)",
+ "start": 13442,
+ "end": 16236,
+ "row": 0,
+ "color": "#c7f9cc",
+ "sequence": "SADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVYLPYPDPSRILGAGCFVDDIVKTDGTLMIERFVSLAIDAYPLTKHPNQEYADVFHLYLQYIRKLHDELTGHMLDMYSVMLTNDNTSRYWEPEFYEAMYTPHTVLQ"
+ },
+ {
+ "name": "ORF1ab polyprotein",
+ "start": 266,
+ "end": 21555,
+ "row": 0,
+ "color": "#89c4be",
+ "sequence": "MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLEQPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGGAYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGGAPTKVTFGDDTVIEVQGYKSVNITFELDERIDKVLNEKCSAYTVELGTEVNEFACVVADAVIKTLQPVSELLTPLGIDLDEWSMATYYLFDESGEFKLASHMYCSFYPPDEDEEEGDCEEEEFEPSTQYEYGTEDDYQGKPLEFGATSAALQPEEEQEEDWLDDDSQQTVGQQDGSEDNQTTTIQTIVEVQPQLEMELTPVVQTIEVNSFSGYLKLTDNVYIKNADIVEEAKKVKPTVVVNAANVYLKHGGGVAGALNKATNNAMQVESDDYIATNGPLKVGGSCVLSGHNLAKHCLHVVGPNVNKGEDIQLLKSAYENFNQHEVLLAPLLSAGIFGADPIHSLRVCVDTVRTNVYLAVFDKNLYDKLVSSFLEMKSEKQVEQKIAEIPKEEVKPFITESKPSVEQRKQDDKKIKACVEEVTTTLEETKFLTENLLLYIDINGNLHPDSATLVSDIDITFLKKDAPYIVGDVVQEGVLTAVVIPTKKAGGTTEMLAKALRKVPTDNYITTYPGQGLNGYTVEEAKTVLKKCKSAFYILPSIISNEKQEILGTVSWNLREMLAHAEETRKLMPVCVETKAIVSTIQRKYKGIKIQEGVVDYGARFYFYTSKTTVASLINTLNDLNETLVTMPLGYVTHGLNLEEAARYMRSLKVPATVSVSSPDAVTAYNGYLTSSSKTPEEHFIETISLAGSYKDWSYSGQSTQLGIEFLKRGDKSVYYTSNPTTFHLDGEVITFDNLKTLLSLREVRTIKVFTTVDNINLHTQVVDMSMTYGQQFGPTYLDGADVTKIKPHNSHEGKTFYVLPNDDTLRVEAFEYYHTTDPSFLGRYMSALNHTKKWKYPQVNGLTSIKWADNNCYLATALLTLQQIELKFNPPALQDAYYRARAGEAANFCALILAYCNKTVGELGDVRETMSYLFQHANLDSCKRVLNVVCKTCGQQQTTLKGVEAVMYMGTLSYEQFKKGVQIPCTCGKQATKYLVQQESPFVMMSAPPAQYELKHGTFTCASEYTGNYQCGHYKHITSKETLYCIDGALLTKSSEYKGPITDVFYKENSYTTTIKPVTYKLDGVVCTEIDPKLDNYYKKDNSYFTEQPIDLVPNQPYPNASFDNFKFVCDNIKFADDLNQLTGYKKPASRELKVTFFPDLNGDVVAIDYKHYTPSFKKGAKLLHKPIVWHVNNATNKATYKPNTWCIRCLWSTKPVETSNSFDVLKSEDAQGMDNLACEDLKPVSEEVVENPTIQKDVLECNVKTTEVVGDIILKPANNSLKITEEVGHTDLMAAYVDNSSLTIKKPNELSRVLGLKTLATHGLAAVNSVPWDTIANYAKPFLNKVVSTTTNIVTRCLNRVCTNYMPYFFTLLLQLCTFTRSTNSRIKASMPTTIAKNTVKSVGKFCLEASFNYLKSPNFSKLINIIIWFLLLSVCLGSLIYSTAALGVLMSNLGMPSYCTGYREGYLNSTNVTIATYCTGSIPCSVCLSGLDSLDTYPSLETIQITISSFKWDLTAFGLVAEWFLAYILFTRFFYVLGLAAIMQLFFSYFAVHFISNSWLMWLIINLVQMAPISAMVRMYIFFASFYYVWKSYVHVVDGCNSSTCMMCYKRNRATRVECTTIVNGVRRSFYVYANGGKGFCKLHNWNCVNCDTFCAGSTFISDEVARDLSLQFKRPINPTDQSSYIVDSVTVKNGSIHLYFDKAGQKTYERHSLSHFVNLDNLRANNTKGSLPINVIVFDGKSKCEESSAKSASVYYSQLMCQPILLLDQALVSDVGDSAEVAVKMFDAYVNTFSSTFNVPMEKLKTLVATAEAELAKNVSLDNVLSTFISAARQGFVDSDVETKDVVECLKLSHQSDIEVTGDSCNNYMLTYNKVENMTPRDLGACIDCSARHINAQVAKSHNIALIWNVKDFMSLSEQLRKQIRSAAKKNNLPFKLTCATTRQVVNVVTTKIALKGGKIVNNWLKQLIKVTLVFLFVAAIFYLITPVHVMSKHTDFSSEIIGYKAIDGGVTRDIASTDTCFANKHADFDTWFSQRGGSYTNDKACPLIAAVITREVGFVVPGLPGTILRTTNGDFLHFLPRVFSAVGNICYTPSKLIEYTDFATSACVLAAECTIFKDASGKPVPYCYDTNVLEGSVAYESLRPDTRYVLMDGSIIQFPNTYLEGSVRVVTTFDSEYCRHGTCERSEAGVCVSTSGRWVLNNDYYRSLPGVFCGVDAVNLLTNMFTPLIQPIGALDISASIVAGGIVAIVVTCLAYYFMRFRRAFGEYSHVVAFNTLLFLMSFTVLCLTPVYSFLPGVYSVIYLYLTFYLTNDVSFLAHIQWMVMFTPLVPFWITIAYIICISTKHFYWFFSNYLKRRVVFNGVSFSTFEEAALCTFLLNKEMYLKLRSDVLLPLTQYNRYLALYNKYKYFSGAMDTTSYREAACCHLAKALNDFSNSGSDVLYQPPQTSITSAVLQSGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQSAVKRTIKGTHHWLLLTILTSLLVLVQSTQWSLFFFLYENAFLPFAMGIIAMSAFAMMFVKHKHAFLCLFLLPSLATVAYFNMVYMPASWVMRIMTWLDMVDTSLSGFKLKDCVMYASAVVLLILMTARTVYDDGARRVWTLMNVLTLVYKVYYGNALDQAISMWALIISVTSNYSGVVTTVMFLARGIVFMCVEYCPIFFITGNTLQCIMLVYCFLGYFCTCYFGLFCLLNRYFRLTLGVYDYLVSTQEFRYMNSQGLLPPKNSIDAFKLNIKLLGVGGKPCIKVATVQSKMSDVKCTSVVLLSVLQQLRVESSSKLWAQCVQLHNDILLAKDTTEAFEKMVSLLSVLLSMQGAVDINKLCEEMLDNRATLQAIASEFSSLPSYAAFATAQEAYEQAVANGDSEVVLKKLKKSLNVAKSEFDRDAAMQRKLEKMADQAMTQMYKQARSEDKRAKVTSAMQTMLFTMLRKLDNDALNNIINNARDGCVPLNIIPLTTAAKLMVVIPDYNTYKNTCDGTTFTYASALWEIQQVVDADSKIVQLSEISMDNSPNLAWPLIVTALRANSAVKLQNNELSPVALRQMSCAAGTTQTACTDDNALAYYNTTKGGRFVLALLSDLQDLKWARFPKSDGTGTIYTELEPPCRFVTDTPKGPKVKYLYFIKGLNNLNRGMVLGSLAATVRLQAGNATEVPANSTVLSFCAFAVDAAKAYKDYLASGGQPITNCVKMLCTHTGTGQAITVTPEANMDQESFGGASCCLYCRCHIDHPNPKGFCDLKGKYVQIPTTCANDPVGFTLKNTVCTVCGMWKGYGCSCDQLREPMLQSADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVYLPYPDPSRILGAGCFVDDIVKTDGTLMIERFVSLAIDAYPLTKHPNQEYADVFHLYLQYIRKLHDELTGHMLDMYSVMLTNDNTSRYWEPEFYEAMYTPHTVLQAVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQAENVTGLFKDCSKVITGLHPTQAPTHLSVDTKFKTEGLCVDIPGIPKDMTYRRLISMMGFKMNYQVNGYPNMFITREEAIRHVRAWIGFDVEGCHATREAVGTNLPLQLGFSTGVNLVAVPTGYVDTPNNTDFSRVSAKPPPGDQFKHLIPLMYKGLPWNVVRIKIVQMLSDTLKNLSDRVVFVLWAHGFELTSMKYFVKIGPERTCCLCDRRATCFSTASDTYACWHHSIGFDYVYNPFMIDVQQWGFTGNLQSNHDLYCQVHGNAHVASCDAIMTRCLAVHECFVKRVDWTIEYPIIGDELKINAACRKVQHMVVKAALLADKFPVLHDIGNPKAIKCVPQADVEWKFYDAQPCSDKAYKIEELFYSYATHSDKFTDGVCLFWNCNVDRYPANSIVCRFDTRVLSNLNLPGCDGGSLYVNKHAFHTPAFDKSAFVNLKQLPFFYYSDSPCESHGKQVVSDIDYVPLKSATCITRCNLGGAVCRHHANEYRLYLDAYNMMISAGFSLWVYKQFDTYNLWNTFTRLQSLENVAFNVVNKGHFDGQQGEVPVSIINNTVYTKVDGVDVELFENKTTLPVNVAFELWAKRNIKPVPEVKILNNLGVDIAANTVIWDYKRDAPAHISTIGVCSMTDIAKKPTETICAPLTVFFDGRVDGQVDLFRNARNGVLITEGSVKGLQPSVGPKQASLNGVTLIGEAVKTQFNYYKKVDGVVQQLPETYFTQSRNLQEFKPRSQMEIDFLELAMDEFIERYKLEGYAFEHIVYGDFSHSQLGGLHLLIGLAKRFKESPFELEDFIPMDSTVKNYFITDAQTGSSKCVCSVIDLLLDDFVEIIKSQDLSVVSKVVKVTIDYTEISFMLWCKDGHVETFYPKLQSSQAWQPGVAMPNLYKMQRMLLEKCDLQNYGDSATLPKGIMMNVAKYTQLCQYLNTLTLAVPYNMRVIHFGAGSDKGVAPGTAVLRQWLPTGTLLVDSDLNDFVSDADSTLIGDCATVHTANKWDLIISDMYDPKTKNVTKENDSKEGFFTYICGFIQQKLALGGSVAIKITEHSWNADLYKLMGHFAWWTAFVTNVNASSSEAFLIGCNYLGKPREQIDGYVMHANYIFWRNTNPIQLSSYSLFDMSKFPLKLRGTAVMSLKEGQINDMILSLLSKGRLIIRENNRVVISSDVLVNN"
+ },
+ {
+ "name": "NSP10",
+ "start": 13025,
+ "end": 13441,
+ "row": 0,
+ "color": "#4cc9f0",
+ "sequence": "AGNATEVPANSTVLSFCAFAVDAAKAYKDYLASGGQPITNCVKMLCTHTGTGQAITVTPEANMDQESFGGASCCLYCRCHIDHPNPKGFCDLKGKYVQIPTTCANDPVGFTLKNTVCTVCGMWKGYGCSCDQLREPMLQ"
+ },
+ {
+ "name": "NSP1 (leader protein)",
+ "start": 266,
+ "end": 805,
+ "row": 0,
+ "color": "#f72585",
+ "sequence": "MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLEQPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGG"
+ },
+ {
+ "name": "ORF1a polyprotein",
+ "start": 266,
+ "end": 13483,
+ "row": 0,
+ "sequence": "MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLEQPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGGAYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGGAPTKVTFGDDTVIEVQGYKSVNITFELDERIDKVLNEKCSAYTVELGTEVNEFACVVADAVIKTLQPVSELLTPLGIDLDEWSMATYYLFDESGEFKLASHMYCSFYPPDEDEEEGDCEEEEFEPSTQYEYGTEDDYQGKPLEFGATSAALQPEEEQEEDWLDDDSQQTVGQQDGSEDNQTTTIQTIVEVQPQLEMELTPVVQTIEVNSFSGYLKLTDNVYIKNADIVEEAKKVKPTVVVNAANVYLKHGGGVAGALNKATNNAMQVESDDYIATNGPLKVGGSCVLSGHNLAKHCLHVVGPNVNKGEDIQLLKSAYENFNQHEVLLAPLLSAGIFGADPIHSLRVCVDTVRTNVYLAVFDKNLYDKLVSSFLEMKSEKQVEQKIAEIPKEEVKPFITESKPSVEQRKQDDKKIKACVEEVTTTLEETKFLTENLLLYIDINGNLHPDSATLVSDIDITFLKKDAPYIVGDVVQEGVLTAVVIPTKKAGGTTEMLAKALRKVPTDNYITTYPGQGLNGYTVEEAKTVLKKCKSAFYILPSIISNEKQEILGTVSWNLREMLAHAEETRKLMPVCVETKAIVSTIQRKYKGIKIQEGVVDYGARFYFYTSKTTVASLINTLNDLNETLVTMPLGYVTHGLNLEEAARYMRSLKVPATVSVSSPDAVTAYNGYLTSSSKTPEEHFIETISLAGSYKDWSYSGQSTQLGIEFLKRGDKSVYYTSNPTTFHLDGEVITFDNLKTLLSLREVRTIKVFTTVDNINLHTQVVDMSMTYGQQFGPTYLDGADVTKIKPHNSHEGKTFYVLPNDDTLRVEAFEYYHTTDPSFLGRYMSALNHTKKWKYPQVNGLTSIKWADNNCYLATALLTLQQIELKFNPPALQDAYYRARAGEAANFCALILAYCNKTVGELGDVRETMSYLFQHANLDSCKRVLNVVCKTCGQQQTTLKGVEAVMYMGTLSYEQFKKGVQIPCTCGKQATKYLVQQESPFVMMSAPPAQYELKHGTFTCASEYTGNYQCGHYKHITSKETLYCIDGALLTKSSEYKGPITDVFYKENSYTTTIKPVTYKLDGVVCTEIDPKLDNYYKKDNSYFTEQPIDLVPNQPYPNASFDNFKFVCDNIKFADDLNQLTGYKKPASRELKVTFFPDLNGDVVAIDYKHYTPSFKKGAKLLHKPIVWHVNNATNKATYKPNTWCIRCLWSTKPVETSNSFDVLKSEDAQGMDNLACEDLKPVSEEVVENPTIQKDVLECNVKTTEVVGDIILKPANNSLKITEEVGHTDLMAAYVDNSSLTIKKPNELSRVLGLKTLATHGLAAVNSVPWDTIANYAKPFLNKVVSTTTNIVTRCLNRVCTNYMPYFFTLLLQLCTFTRSTNSRIKASMPTTIAKNTVKSVGKFCLEASFNYLKSPNFSKLINIIIWFLLLSVCLGSLIYSTAALGVLMSNLGMPSYCTGYREGYLNSTNVTIATYCTGSIPCSVCLSGLDSLDTYPSLETIQITISSFKWDLTAFGLVAEWFLAYILFTRFFYVLGLAAIMQLFFSYFAVHFISNSWLMWLIINLVQMAPISAMVRMYIFFASFYYVWKSYVHVVDGCNSSTCMMCYKRNRATRVECTTIVNGVRRSFYVYANGGKGFCKLHNWNCVNCDTFCAGSTFISDEVARDLSLQFKRPINPTDQSSYIVDSVTVKNGSIHLYFDKAGQKTYERHSLSHFVNLDNLRANNTKGSLPINVIVFDGKSKCEESSAKSASVYYSQLMCQPILLLDQALVSDVGDSAEVAVKMFDAYVNTFSSTFNVPMEKLKTLVATAEAELAKNVSLDNVLSTFISAARQGFVDSDVETKDVVECLKLSHQSDIEVTGDSCNNYMLTYNKVENMTPRDLGACIDCSARHINAQVAKSHNIALIWNVKDFMSLSEQLRKQIRSAAKKNNLPFKLTCATTRQVVNVVTTKIALKGGKIVNNWLKQLIKVTLVFLFVAAIFYLITPVHVMSKHTDFSSEIIGYKAIDGGVTRDIASTDTCFANKHADFDTWFSQRGGSYTNDKACPLIAAVITREVGFVVPGLPGTILRTTNGDFLHFLPRVFSAVGNICYTPSKLIEYTDFATSACVLAAECTIFKDASGKPVPYCYDTNVLEGSVAYESLRPDTRYVLMDGSIIQFPNTYLEGSVRVVTTFDSEYCRHGTCERSEAGVCVSTSGRWVLNNDYYRSLPGVFCGVDAVNLLTNMFTPLIQPIGALDISASIVAGGIVAIVVTCLAYYFMRFRRAFGEYSHVVAFNTLLFLMSFTVLCLTPVYSFLPGVYSVIYLYLTFYLTNDVSFLAHIQWMVMFTPLVPFWITIAYIICISTKHFYWFFSNYLKRRVVFNGVSFSTFEEAALCTFLLNKEMYLKLRSDVLLPLTQYNRYLALYNKYKYFSGAMDTTSYREAACCHLAKALNDFSNSGSDVLYQPPQTSITSAVLQSGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQSAVKRTIKGTHHWLLLTILTSLLVLVQSTQWSLFFFLYENAFLPFAMGIIAMSAFAMMFVKHKHAFLCLFLLPSLATVAYFNMVYMPASWVMRIMTWLDMVDTSLSGFKLKDCVMYASAVVLLILMTARTVYDDGARRVWTLMNVLTLVYKVYYGNALDQAISMWALIISVTSNYSGVVTTVMFLARGIVFMCVEYCPIFFITGNTLQCIMLVYCFLGYFCTCYFGLFCLLNRYFRLTLGVYDYLVSTQEFRYMNSQGLLPPKNSIDAFKLNIKLLGVGGKPCIKVATVQSKMSDVKCTSVVLLSVLQQLRVESSSKLWAQCVQLHNDILLAKDTTEAFEKMVSLLSVLLSMQGAVDINKLCEEMLDNRATLQAIASEFSSLPSYAAFATAQEAYEQAVANGDSEVVLKKLKKSLNVAKSEFDRDAAMQRKLEKMADQAMTQMYKQARSEDKRAKVTSAMQTMLFTMLRKLDNDALNNIINNARDGCVPLNIIPLTTAAKLMVVIPDYNTYKNTCDGTTFTYASALWEIQQVVDADSKIVQLSEISMDNSPNLAWPLIVTALRANSAVKLQNNELSPVALRQMSCAAGTTQTACTDDNALAYYNTTKGGRFVLALLSDLQDLKWARFPKSDGTGTIYTELEPPCRFVTDTPKGPKVKYLYFIKGLNNLNRGMVLGSLAATVRLQAGNATEVPANSTVLSFCAFAVDAAKAYKDYLASGGQPITNCVKMLCTHTGTGQAITVTPEANMDQESFGGASCCLYCRCHIDHPNPKGFCDLKGKYVQIPTTCANDPVGFTLKNTVCTVCGMWKGYGCSCDQLREPMLQSADAQSFLNGFAV"
+ },
+ {
+ "name": "NSP2",
+ "start": 806,
+ "end": 2719,
+ "row": 0,
+ "color": "#ccb7ae",
+ "sequence": "AYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGG"
+ },
+ {
+ "name": "NS3 (ORF3a protein)",
+ "start": 25393,
+ "end": 26220,
+ "row": 0,
+ "color": "#a3a3a3",
+ "sequence": "MDLFMRIFTIGTVTLKQGEIKDATPSDFVRATATIPIQASLPFGWLIVGVALLAVFQSASKIITLKKRWQLALSKGVHFVCNLLLLFVTVYSHLLLVAAGLEAPFLYLYALVYFLQSINFVRIIMRLWLCWKCRSKNPLLYDANYFLCWHTNCYDYCIPYNSVTSSIVITSGDGTTSPISEHDYQIGGYTEKWESGVKDCVVLHSYFTSDYYQLYSTQLSTDTGVEHVTFFIYNKIVDEPEEHVQIHTIDGSSGVVNPVMEPIYDEPTTTTSVPL"
+ },
+ {
+ "name": "NS6 (ORF6 protein)",
+ "start": 27202,
+ "end": 27387,
+ "row": 0,
+ "color": "#586ba4",
+ "sequence": "MFHLVDFQVTIAEILLIIMRTFKVSIWNLDYIINLIIKNLSKSLTENKYSQLDEEQPMEID"
+ },
+ {
+ "name": "NS7a (ORF7a protein)",
+ "start": 27394,
+ "end": 27759,
+ "row": 0,
+ "color": "#324376",
+ "sequence": "MKIILFLALITLATCELYHYQECVRGTTVLLKEPCSSGTYEGNSPFHPLADNKFALTCFSTQFAFACPDGVKHVYQLRARSVSPKLFIRQEEVQELYSPIFLIVAAIVFITLCFTLKRKTE"
+ },
+ {
+ "name": "NS7b (ORF7b)",
+ "start": 27756,
+ "end": 27887,
+ "row": 0,
+ "color": "#f5dd90",
+ "sequence": "MIELSLIDFYLCFLAFLLFLVLIMLIIFWFSLELQDHNETCHA"
+ },
+ {
+ "name": "NS8 (ORF8 protein)",
+ "start": 27894,
+ "end": 28259,
+ "row": 0,
+ "color": "#b79738",
+ "sequence": "MKFLVFLGIITTVAAFHQECSLQSCTQHQPYVVDDPCPIHFYSKWYIRVGARKSAPLIELCVDEAGSKSPIQYIDIGNYTVSCLPFTINCQEPKLGSLVVRCSFYEDFLEYHDVRVVLDFI"
+ },
+ {
+ "name": "Spike (surface glycoprotein)",
+ "start": 21563,
+ "end": 25384,
+ "row": 0,
+ "color": "#accbe1",
+ "sequence": "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT"
+ }
+ ],
+ "N": [
+ {
+ "name": "ORF10",
+ "start": 29558,
+ "end": 29674,
+ "row": 0
+ },
+ {
+ "name": "ORF1ab",
+ "start": 266,
+ "end": 21555,
+ "row": 0
+ },
+ {
+ "name": "ORF3a",
+ "start": 25393,
+ "end": 26220,
+ "row": 0
+ },
+ {
+ "name": "ORF6",
+ "start": 27202,
+ "end": 27387,
+ "row": 0
+ },
+ {
+ "name": "ORF7a",
+ "start": 27394,
+ "end": 27759,
+ "row": 0
+ },
+ {
+ "name": "ORF7b",
+ "start": 27756,
+ "end": 27887,
+ "row": 0
+ },
+ {
+ "name": "ORF8",
+ "start": 27894,
+ "end": 28259,
+ "row": 0
+ }
+ ]
+}
- arr_p_values.append(single_item['p_value'])
- mutation_table2.append(single_item)
- a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni')
+@api.route('/tableLineageCountry111')
+class FieldList(Resource):
+ @api.doc('table_lineage_country')
+ def post(self):
- i = 0
- for item in mutation_table2:
- item['pvalue'] = new_p_values[i]
- i = i + 1
+ to_send = api.payload
- return mutation_table2
+ conn = http.client.HTTPConnection('geco.deib.polimi.it')
+ headers = {'Content-type': 'application/json'}
+ send = to_send
+ json_data = json.dumps(send)
+ conn.request('POST', '/virusurf_epitope/api/epitope/tableLineageCountry', json_data, headers)
+ response = conn.getresponse()
+ all_geo = response.read().decode()
+ all_geo = json.loads(all_geo)
-@api.route('/analyzeMutationTargetBackgroundFree')
+ table = []
+ for item in all_geo:
+ single_line = {'lineage': item['lineage']}
+ country_count = item['country_count']
+ country_count = country_count.replace('"', "")
+ country_count = country_count.replace(")\\", "")
+ country_count = country_count.replace("\\", "")
+ country_count = country_count.replace("{", "")
+ country_count = country_count.replace("}", "")
+ country_count = country_count.replace("(", "")
+ array_country_count = country_count.split("),")
+ for single_country in array_country_count:
+ single_country = single_country.replace(")", "")
+ array_single_country = single_country.split(',')
+ single_line[array_single_country[0]] = array_single_country[1]
+ table.append(single_line)
+
+ return table
+
+
+@api.route('/denominatorLineageCountry111')
class FieldList(Resource):
- @api.doc('analyze_mutation_target_background_free')
+ @api.doc('possible_country_lineage')
def post(self):
to_send = api.payload
@@ -731,7 +3428,124 @@ def post(self):
headers = {'Content-type': 'application/json'}
send = to_send
json_data = json.dumps(send)
- conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationTargetBackgroundFree', json_data, headers)
+ conn.request('POST', '/virusurf_epitope/api/epitope/denominatorLineageCountry', json_data, headers)
+
+ response = conn.getresponse()
+ resp = response.read().decode()
+ resp = json.loads(resp)
+
+ denominators = {}
+
+ for item in resp:
+ if item['geo'] is None:
+ denominators['N/D'] = item['cnt']
+ else:
+ denominators[item['geo']] = item['cnt']
+
+ return denominators
+
+
+# @api.route('/analyzeMutationCountryLineage')
+# class FieldList(Resource):
+# @api.doc('analyze_mutation_country_lineage')
+# def post(self):
+#
+# to_send = api.payload
+#
+# conn = http.client.HTTPConnection('geco.deib.polimi.it')
+# headers = {'Content-type': 'application/json'}
+# send = to_send
+# json_data = json.dumps(send)
+# conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationCountryLineage', json_data, headers)
+#
+# response = conn.getresponse()
+# all_result = response.read().decode()
+# all_result = json.loads(all_result)
+#
+# mutation_table2 = []
+# arr_p_values = []
+# for item in all_result:
+# single_item = {}
+# if item['product'] == 'Spike (surface glycoprotein)':
+# protein = item['product'].split(" ", 1)[0]
+# mutation = protein + '_'
+# # mutation = 'S_'
+# else:
+# protein = item['product'].split(" ", 1)[0]
+# mutation = protein + '_'
+# mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative']
+# single_item['mutation'] = mutation
+# single_item['start_aa_original'] = item['start_aa_original']
+# single_item['sequence_aa_original'] = item['sequence_aa_original']
+# single_item['sequence_aa_alternative'] = item['sequence_aa_alternative']
+# single_item['product'] = item['product']
+# single_item['mutation_position'] = item['start_aa_original']
+# single_item['target'] = item['country']
+# single_item['background'] = item['lineage']
+# single_item['count_target'] = item['count_seq']
+# single_item['percentage_background'] = item['fraction']
+# single_item['numerator_background'] = item['numerator']
+# single_item['denominator_background'] = item['denominator']
+# single_item['percentage_target'] = item['fraction_country']
+# single_item['numerator_target'] = item['count_seq']
+# single_item['denominator_target'] = item['denominator_country']
+#
+# epsilon = 0.00000001
+# single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \
+# (single_item['percentage_background'] + epsilon)
+#
+# if single_item['odd_ratio'] >= 1:
+# # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_country'],
+# # item['numerator'] / item['denominator'])
+# if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+# and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+# single_item['p_value'] = 1
+# else:
+# stat, p, dof, expected = \
+# chi2_contingency([[single_item['numerator_background'],
+# single_item['denominator_background'] - single_item['numerator_background']],
+# [single_item['numerator_target'],
+# single_item['denominator_target'] - single_item['numerator_target']]])
+# single_item['p_value'] = p
+# else:
+# # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_country'],
+# # item['numerator'] / item['denominator'])
+# if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+# and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+# single_item['p_value'] = 1
+# else:
+# stat, p, dof, expected = \
+# chi2_contingency([[single_item['numerator_background'],
+# single_item['denominator_background'] - single_item['numerator_background']],
+# [single_item['numerator_target'],
+# single_item['denominator_target'] - single_item['numerator_target']]])
+# single_item['p_value'] = p
+#
+# arr_p_values.append(single_item['p_value'])
+# mutation_table2.append(single_item)
+#
+# a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni')
+#
+# i = 0
+# for item in mutation_table2:
+# item['pvalue'] = new_p_values[i]
+# i = i + 1
+#
+# return mutation_table2
+
+
+@api.route('/analyzeMutationCountryLineageInTime111')
+class FieldList(Resource):
+ @api.doc('analyze_mutation_country_lineage_in_time')
+ def post(self):
+
+ to_send = api.payload
+
+ conn = http.client.HTTPConnection('geco.deib.polimi.it')
+ headers = {'Content-type': 'application/json'}
+ send = to_send
+ json_data = json.dumps(send)
+ conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationCountryLineageInTime', json_data, headers)
response = conn.getresponse()
all_result = response.read().decode()
@@ -749,18 +3563,16 @@ def post(self):
protein = item['product'].split(" ", 1)[0]
mutation = protein + '_'
mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative']
+ single_item['mutation'] = mutation
single_item['start_aa_original'] = item['start_aa_original']
single_item['sequence_aa_original'] = item['sequence_aa_original']
single_item['sequence_aa_alternative'] = item['sequence_aa_alternative']
- single_item['mutation'] = mutation
single_item['product'] = item['product']
single_item['mutation_position'] = item['start_aa_original']
- single_item['target'] = item['target']
- single_item['background'] = item['background']
-
+ single_item['target'] = item['target_time']
+ single_item['background'] = item['background_time']
+ single_item['country'] = item['country']
single_item['lineage'] = item['lineage']
- single_item['lineage_target'] = item['lineage_target']
- single_item['lineage_background'] = item['lineage_background']
single_item['count_target'] = item['count_seq']
single_item['percentage_background'] = item['fraction']
single_item['numerator_background'] = item['numerator']
@@ -774,61 +3586,31 @@ def post(self):
(single_item['percentage_background'] + epsilon)
if single_item['odd_ratio'] >= 1:
- if item['denominator'] != 0:
- # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'],
- # item['numerator'] / item['denominator'])
- if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
- and single_item['denominator_target'] - single_item['numerator_target'] == 0:
- single_item['p_value'] = 1
- else:
- stat, p, dof, expected = \
- chi2_contingency([[single_item['numerator_background'],
- single_item['denominator_background'] - single_item[
- 'numerator_background']],
- [single_item['numerator_target'],
- single_item['denominator_target'] - single_item['numerator_target']]])
- single_item['p_value'] = p
+ # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'],
+ # item['numerator'] / item['denominator'])
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
else:
- # single_item['p_value'] = 0
- if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
- and single_item['denominator_target'] - single_item['numerator_target'] == 0:
- single_item['p_value'] = 1
- else:
- stat, p, dof, expected = \
- chi2_contingency([[single_item['numerator_background'],
- single_item['denominator_background'] - single_item[
- 'numerator_background']],
- [single_item['numerator_target'],
- single_item['denominator_target'] - single_item['numerator_target']]])
- single_item['p_value'] = p
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item['numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
else:
- if item['denominator'] != 0:
- # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'],
- # item['numerator'] / item['denominator'])
- if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
- and single_item['denominator_target'] - single_item['numerator_target'] == 0:
- single_item['p_value'] = 1
- else:
- stat, p, dof, expected = \
- chi2_contingency([[single_item['numerator_background'],
- single_item['denominator_background'] - single_item[
- 'numerator_background']],
- [single_item['numerator_target'],
- single_item['denominator_target'] - single_item['numerator_target']]])
- single_item['p_value'] = p
+ # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'],
+ # item['numerator'] / item['denominator'])
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
else:
- # single_item['p_value'] = 0
- if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
- and single_item['denominator_target'] - single_item['numerator_target'] == 0:
- single_item['p_value'] = 1
- else:
- stat, p, dof, expected = \
- chi2_contingency([[single_item['numerator_background'],
- single_item['denominator_background'] - single_item[
- 'numerator_background']],
- [single_item['numerator_target'],
- single_item['denominator_target'] - single_item['numerator_target']]])
- single_item['p_value'] = p
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item['numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
arr_p_values.append(single_item['p_value'])
mutation_table2.append(single_item)
@@ -843,9 +3625,9 @@ def post(self):
return mutation_table2
-@api.route('/countOverlappingSequenceTargetBackground')
+@api.route('/analyzeTimeDistributionCountryLineage111')
class FieldList(Resource):
- @api.doc('count_overlapping_sequence_target_background')
+ @api.doc('analyze_time_distribution_country_lineage')
def post(self):
to_send = api.payload
@@ -853,8 +3635,7 @@ def post(self):
headers = {'Content-type': 'application/json'}
send = to_send
json_data = json.dumps(send)
- conn.request('POST', '/virusurf_epitope/api/epitope/countOverlappingSequenceTargetBackground', json_data,
- headers)
+ conn.request('POST', '/virusurf_epitope/api/epitope/analyzeTimeDistributionCountryLineage', json_data, headers)
response = conn.getresponse()
all_result = response.read().decode()
@@ -863,18 +3644,18 @@ def post(self):
return all_result
-@api.route('/selectorQuery')
+@api.route('/analyzeTimeDistributionBackgroundQueryGeo111')
class FieldList(Resource):
- @api.doc('selector_query')
+ @api.doc('analyze_time_distribution_country_lineage')
def post(self):
-
to_send = api.payload
conn = http.client.HTTPConnection('geco.deib.polimi.it')
headers = {'Content-type': 'application/json'}
send = to_send
json_data = json.dumps(send)
- conn.request('POST', '/virusurf_epitope/api/epitope/selectorQuery', json_data, headers)
+ conn.request('POST', '/virusurf_epitope/api/epitope/analyzeTimeDistributionBackgroundQueryGeo', json_data,
+ headers)
response = conn.getresponse()
all_result = response.read().decode()
@@ -883,307 +3664,286 @@ def post(self):
return all_result
-@api.route('/getAccessionIds')
+@api.route('/analyzeMutationProvinceRegion111')
class FieldList(Resource):
- @api.doc('selector_query')
+ @api.doc('analyze_mutation_province_region')
def post(self):
+
to_send = api.payload
conn = http.client.HTTPConnection('geco.deib.polimi.it')
headers = {'Content-type': 'application/json'}
send = to_send
json_data = json.dumps(send)
- conn.request('POST', '/virusurf_epitope/api/epitope/getAccessionIds', json_data, headers)
+ conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationProvinceRegion', json_data, headers)
response = conn.getresponse()
all_result = response.read().decode()
all_result = json.loads(all_result)
- return all_result
-
-
-@api.route('/getProteinPosition')
-class FieldList(Resource):
- @api.doc('get_protein_position')
- def post(self):
-
- payload = api.payload
- name_protein = payload['protein']
-
- all_protein = sars_cov_2_products['A']
- min_pos = 0
- max_pos = 0
- for item in all_protein:
- name = str(item.get('name'))
- if name.lower() == name_protein.lower():
- min_pos = 1
- max_pos = (item.get('end') - item.get('start')) // 3
- if "nsp" in name.lower():
- max_pos = max_pos + 1
-
- res = {'start': min_pos, 'stop': max_pos}
-
- return res
-
-
-@api.route('/getDomains')
-class FieldList(Resource):
- @api.doc('get_domains')
- def post(self):
- payload = api.payload
- name_protein = payload['protein']
-
- annotations = pd.read_csv("apis/protein_annotations.csv",
- delimiter=',')
-
- annotations1 = copy.deepcopy(annotations)
- annotations2 = copy.deepcopy(annotations)
- annotations3 = copy.deepcopy(annotations)
-
- ann_mutagenesis = annotations1[(annotations.Description.str.lower() != 'n/d')
- & (annotations.Protein.str.lower() == name_protein.lower())
- & (annotations.Category.str.lower() == 'mutagenesis')
- ]
- ann_mutagenesis2 = ann_mutagenesis[['Description', 'Begin', 'End']]
- ann_mutagenesis3 = json.loads(ann_mutagenesis2.to_json(orient="records"))
-
- ann_aa_modifications = annotations2[(annotations.Description.str.lower() != 'n/d')
- & (annotations.Protein.str.lower() == name_protein.lower())
- & (annotations.Category.str.lower() == 'ptm')
- & (annotations.Type.str.lower() == 'carbohyd')
- ]
- ann_aa_modifications2 = ann_aa_modifications[['Description', 'Begin', 'End']]
- ann_aa_modifications3 = json.loads(ann_aa_modifications2.to_json(orient="records"))
-
- ann_sites_family_dom = annotations3[(annotations.Description.str.lower() != 'n/d')
- & (annotations.Protein.str.lower() == name_protein.lower())
- & ((annotations.Category.str.lower() == 'domains_and_sites') |
- (annotations.Type.str.lower() == 'n/d'))
- ]
- ann_sites_family_dom2 = ann_sites_family_dom[['Description', 'Begin', 'End']]
- ann_sites_family_dom3 = json.loads(ann_sites_family_dom2.to_json(orient="records"))
-
- result = {'mutagenesis': ann_mutagenesis3, 'aa_modifications': ann_aa_modifications3,
- 'sites_and_domains': ann_sites_family_dom3}
-
- return result
-
-
-@api.route('/getImportantMutation')
-class FieldList(Resource):
- @api.doc('get_important_mutation')
- def post(self):
-
- payload = api.payload
- name_lineage = payload['lineage']
-
- result = {'mutation': [], 'additional_mutation': []}
-
- if name_lineage in dict_lineage_mutation:
- lineage_json = dict_lineage_mutation[name_lineage]
- result['mutation'] = lineage_json['mutation']
- result['additional_mutation'] = lineage_json['additional_mutation']
- else:
- all_mutation = []
- all_additional_mutation = []
- for lineage in dict_lineage_mutation:
- row = dict_lineage_mutation[lineage]
- for mutation in row['mutation']:
- if mutation not in all_mutation:
- all_mutation.append(mutation)
- if mutation in all_additional_mutation:
- all_additional_mutation.remove(mutation)
- for additional_mutation in row['additional_mutation']:
- if additional_mutation not in all_additional_mutation and additional_mutation not in all_mutation:
- all_additional_mutation.append(additional_mutation)
- result['mutation'] = all_mutation
- result['additional_mutation'] = all_additional_mutation
-
- return result
-
-
-@api.route('/getLineageTree')
-class FieldList(Resource):
- @api.doc('get_lineage_tree')
- def post(self):
-
- payload = api.payload
- possible_lineages = payload['possibleLineages']
-
- dict_copy = dict_lineage_mutation
-
- arr_lineages = []
- dict_lineages = {}
- for item in possible_lineages:
- single_line = item
- dict_lineages[item['value']] = single_line
- arr_lineages.append(item['value'])
+ mutation_table2 = []
+ arr_p_values = []
+ for item in all_result:
+ single_item = {}
+ if item['product'] == 'Spike (surface glycoprotein)':
+ protein = item['product'].split(" ", 1)[0]
+ mutation = protein + '_'
+ # mutation = 'S_'
+ else:
+ protein = item['product'].split(" ", 1)[0]
+ mutation = protein + '_'
+ mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative']
+ single_item['start_aa_original'] = item['start_aa_original']
+ single_item['sequence_aa_original'] = item['sequence_aa_original']
+ single_item['sequence_aa_alternative'] = item['sequence_aa_alternative']
+ single_item['mutation'] = mutation
+ single_item['product'] = item['product']
+ single_item['mutation_position'] = item['start_aa_original']
+ # if 'country' in item:
+ # single_item['target'] = item['region']
+ # single_item['background'] = item['country']
+ # else:
+ # single_item['target'] = item['province']
+ # single_item['background'] = item['region']
+ single_item['target'] = item['target']
+ single_item['background'] = item['background']
- dict_copy2 = dict(sorted(dict_copy.items(), key=lambda k_v: k_v[1]['alias']))
+ single_item['lineage'] = item['lineage']
+ single_item['count_target'] = item['count_seq']
+ single_item['percentage_background'] = item['fraction']
+ single_item['numerator_background'] = item['numerator']
+ single_item['denominator_background'] = item['denominator']
+ single_item['percentage_target'] = item['fraction_target']
+ single_item['numerator_target'] = item['count_seq']
+ single_item['denominator_target'] = item['denominator_target']
- items = []
- idx = 1
+ epsilon = 0.00000001
+ single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \
+ (single_item['percentage_background'] + epsilon)
- for lineage in dict_copy2:
- already_done = False
- children = False
- children_lineage = False
- important_lineage = False
- alias = dict_copy2[lineage]['alias']
- if lineage in arr_lineages:
- if dict_copy2[lineage]['WHO label'] != '':
- important_lineage = True
- for itm in items:
- possible_parent_alias = str(itm['alias']) + '.'
- possible_children_alias = str(alias)
- possible_parent_lineage = str(itm['real_name']) + '.'
- possible_children_lineage = str(lineage)
- if possible_parent_alias in possible_children_alias:
- children = True
- recursive_children_lineage(itm, lineage, alias, dict_copy2, dict_lineages)
- if possible_parent_lineage in possible_children_lineage:
- children_lineage = True
- if possible_children_lineage != possible_children_alias:
- recursive_children_lineage(itm, lineage, lineage, dict_copy2, dict_lineages)
- if not children:
- already_done = True
- name_complete = lineage
- if dict_copy2[lineage]['WHO label'] != '':
- name_complete = lineage + ' (' + dict_copy2[lineage]['WHO label'] + ') '
- single_lineage = {'id': idx, 'alias': alias, 'name': name_complete, 'real_name': lineage,
- 'who': dict_copy2[lineage]['WHO label'], 'children': [],
- 'count': dict_lineages[lineage]['count']}
- items.append(single_lineage)
- idx = idx + 1
+ if single_item['odd_ratio'] >= 1:
+ # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'],
+ # item['numerator'] / item['denominator'])
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
+ else:
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item['numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
+ else:
+ # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'],
+ # item['numerator'] / item['denominator'])
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
+ else:
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item['numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
- if not children_lineage and not already_done:
- name_complete = lineage.split('.')[0]
- single_lineage = {'id': idx, 'alias': name_complete, 'name': name_complete,
- 'real_name': name_complete,
- 'who': '', 'children': [],
- 'count': 0}
- items.append(single_lineage)
- idx = idx + 1
- recursive_children_lineage(single_lineage, lineage, lineage, dict_copy2, dict_lineages)
+ arr_p_values.append(single_item['p_value'])
+ mutation_table2.append(single_item)
- # if important_lineage and not already_done:
- # name_complete = lineage
- # if dict_copy2[lineage]['WHO label'] != '':
- # name_complete = lineage + ' (' + dict_copy2[lineage]['WHO label'] + ') '
- # single_lineage = {'id': idx, 'alias': alias, 'name': name_complete, 'real_name': lineage,
- # 'who': dict_copy2[lineage]['WHO label'], 'children': [],
- # 'count': dict_lineages[lineage]['count']}
- # items.append(single_lineage)
- # idx = idx + 1
+ a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni')
- return items
+ i = 0
+ for item in mutation_table2:
+ item['pvalue'] = new_p_values[i]
+ i = i + 1
+ return mutation_table2
-@api.route('/getAllImportantMutationPerLineage')
+
+@api.route('/analyzeMutationTargetBackgroundFree111')
class FieldList(Resource):
- @api.doc('get_important_mutation')
+ @api.doc('analyze_mutation_target_background_free')
def post(self):
- payload = api.payload
- lineage = payload['lineage']
- proteins = payload['proteins']
+ to_send = api.payload
- array_proteins = []
+ conn = http.client.HTTPConnection('geco.deib.polimi.it')
+ headers = {'Content-type': 'application/json'}
+ send = to_send
+ json_data = json.dumps(send)
+ conn.request('POST', '/virusurf_epitope/api/epitope/analyzeMutationTargetBackgroundFree', json_data, headers)
- for protein in proteins:
- protein_rewritten = protein.split(" ")[0]
- array_proteins.append(protein_rewritten)
+ response = conn.getresponse()
+ all_result = response.read().decode()
+ all_result = json.loads(all_result)
- dict_copy = all_important_mutation_dict
+ mutation_table2 = []
+ arr_p_values = []
+ for item in all_result:
+ single_item = {}
+ if item['product'] == 'Spike (surface glycoprotein)':
+ protein = item['product'].split(" ", 1)[0]
+ mutation = protein + '_'
+ # mutation = 'S_'
+ else:
+ protein = item['product'].split(" ", 1)[0]
+ mutation = protein + '_'
+ mutation += item['sequence_aa_original'] + str(item['start_aa_original']) + item['sequence_aa_alternative']
+ single_item['start_aa_original'] = item['start_aa_original']
+ single_item['sequence_aa_original'] = item['sequence_aa_original']
+ single_item['sequence_aa_alternative'] = item['sequence_aa_alternative']
+ single_item['mutation'] = mutation
+ single_item['product'] = item['product']
+ single_item['mutation_position'] = item['start_aa_original']
+ single_item['target'] = item['target']
+ single_item['background'] = item['background']
- array_important_mutation = []
+ single_item['lineage'] = item['lineage']
+ single_item['lineage_target'] = item['lineage_target']
+ single_item['lineage_background'] = item['lineage_background']
+ single_item['count_target'] = item['count_seq']
+ single_item['percentage_background'] = item['fraction']
+ single_item['numerator_background'] = item['numerator']
+ single_item['denominator_background'] = item['denominator']
+ single_item['percentage_target'] = item['fraction_target']
+ single_item['numerator_target'] = item['count_seq']
+ single_item['denominator_target'] = item['denominator_target']
- if lineage is None:
- for lineage_mutations in dict_copy:
- single_lineage_mutation = dict_copy[lineage_mutations]
- for mutation in single_lineage_mutation['common_changes']:
- if mutation not in array_important_mutation:
- protein = mutation.split("_")[0]
- if protein in array_proteins:
- array_important_mutation.append(mutation)
- array_important_mutation.sort()
- else:
- if lineage in dict_copy:
- single_lineage_mutation = dict_copy[lineage]
- for mutation in single_lineage_mutation['common_changes']:
- if mutation not in array_important_mutation:
- protein = mutation.split("_")[0]
- if protein in array_proteins:
- array_important_mutation.append(mutation)
- array_important_mutation.sort()
+ epsilon = 0.00000001
+ single_item['odd_ratio'] = (single_item['percentage_target'] + epsilon) / \
+ (single_item['percentage_background'] + epsilon)
- return array_important_mutation
+ if single_item['odd_ratio'] >= 1:
+ if item['denominator'] != 0:
+ # single_item['p_value'] = 1 - binom.cdf(item['count_seq'] - 1, item['denominator_target'],
+ # item['numerator'] / item['denominator'])
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
+ else:
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item[
+ 'numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
+ else:
+ # single_item['p_value'] = 0
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
+ else:
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item[
+ 'numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
+ else:
+ if item['denominator'] != 0:
+ # single_item['p_value'] = binom.cdf(item['count_seq'], item['denominator_target'],
+ # item['numerator'] / item['denominator'])
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
+ else:
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item[
+ 'numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
+ else:
+ # single_item['p_value'] = 0
+ if single_item['denominator_background'] - single_item['numerator_background'] == 0 \
+ and single_item['denominator_target'] - single_item['numerator_target'] == 0:
+ single_item['p_value'] = 1
+ else:
+ stat, p, dof, expected = \
+ chi2_contingency([[single_item['numerator_background'],
+ single_item['denominator_background'] - single_item[
+ 'numerator_background']],
+ [single_item['numerator_target'],
+ single_item['denominator_target'] - single_item['numerator_target']]])
+ single_item['p_value'] = p
+ arr_p_values.append(single_item['p_value'])
+ mutation_table2.append(single_item)
-@api.route('/checkAccessionId')
+ a, new_p_values, c, d = sms.multipletests(arr_p_values, method='bonferroni')
+
+ i = 0
+ for item in mutation_table2:
+ item['pvalue'] = new_p_values[i]
+ i = i + 1
+
+ return mutation_table2
+
+
+@api.route('/selectorQuery111')
class FieldList(Resource):
- @api.doc('check_accession_id')
+ @api.doc('selector_query')
def post(self):
- payload = api.payload
- accession_id = payload['accession_id']
- acc_id_arr = all_accession_id_dict['all_acc_id']
- result = False
- if accession_id in acc_id_arr:
- result = True
- return result
+ to_send = api.payload
+ conn = http.client.HTTPConnection('geco.deib.polimi.it')
+ headers = {'Content-type': 'application/json'}
+ send = to_send
+ json_data = json.dumps(send)
+ conn.request('POST', '/virusurf_epitope/api/epitope/selectorQuery', json_data, headers)
-def recursive_children_lineage(parent, lineage, alias, dict_copy2, dict_lineages):
- children = False
- idx = str(parent['id']) + '_' + str(len(parent['children']))
- for itm in parent['children']:
- possible_parent_alias = str(itm['alias']) + '.'
- possible_children_alias = str(alias)
- if possible_parent_alias in possible_children_alias:
- children = True
- recursive_children_lineage(itm, lineage, alias, dict_copy2, dict_lineages)
- break
- else:
- children = False
- if not children:
- name_complete = lineage
- if dict_copy2[lineage]['WHO label'] != '':
- name_complete = lineage + ' (' + dict_copy2[lineage]['WHO label'] + ') '
- single_lineage = {'id': idx, 'alias': alias, 'name': name_complete, 'real_name': lineage,
- 'who': dict_copy2[lineage]['WHO label'],
- 'children': [], 'count': dict_lineages[lineage]['count']}
- parent['children'].append(single_lineage)
+ response = conn.getresponse()
+ all_result = response.read().decode()
+ all_result = json.loads(all_result)
+ return all_result
-all_important_mutation_dict = {}
+@api.route('/countOverlappingSequenceTargetBackground111')
+class FieldList(Resource):
+ @api.doc('count_overlapping_sequence_target_background')
+ def post(self):
+ to_send = api.payload
-def get_all_important_mutation():
- print("inizio request important mutation")
- conn = http.client.HTTPConnection('geco.deib.polimi.it')
- conn.request('GET', '/virusurf_epitope/api/epitope/allImportantMutations')
+ conn = http.client.HTTPConnection('geco.deib.polimi.it')
+ headers = {'Content-type': 'application/json'}
+ send = to_send
+ json_data = json.dumps(send)
+ conn.request('POST', '/virusurf_epitope/api/epitope/countOverlappingSequenceTargetBackground', json_data,
+ headers)
- response = conn.getresponse()
- all_important_mutation = response.read().decode()
- all_important_mutation = json.loads(all_important_mutation)
+ response = conn.getresponse()
+ all_result = response.read().decode()
+ all_result = json.loads(all_result)
+
+ return all_result
+
+
+@api.route('/getAccessionIds111')
+class FieldList(Resource):
+ @api.doc('get_accession_ids')
+ def post(self):
+ to_send = api.payload
- for mutation_per_lineage in all_important_mutation:
- lineage = mutation_per_lineage['lineage']
- all_important_mutation_dict[lineage] = mutation_per_lineage
- print("fine request important mutation")
- x = datetime.today()
- y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1)
- delta_t = y - x
- secs = delta_t.total_seconds()
- t1 = Timer(secs, get_all_important_mutation)
- t1.start()
+ conn = http.client.HTTPConnection('geco.deib.polimi.it')
+ headers = {'Content-type': 'application/json'}
+ send = to_send
+ json_data = json.dumps(send)
+ conn.request('POST', '/virusurf_epitope/api/epitope/getAccessionIds', json_data, headers)
+ response = conn.getresponse()
+ all_result = response.read().decode()
+ all_result = json.loads(all_result)
-all_protein_dict = {}
+ return all_result
-def get_all_protein():
+def get_all_protein111():
print("inizio request protein")
to_send = {'gcm': {'taxon_name': ["severe acute respiratory syndrome coronavirus 2"]}}
@@ -1191,12 +3951,13 @@ def get_all_protein():
headers = {'Content-type': 'application/json'}
send = to_send
json_data = json.dumps(send)
- conn.request('POST', '/virusurf_epitope/api/epitope/allProtein', json_data, headers)
+ conn.request('POST', '/virusurf_gisaid/api/epitope/allProtein', json_data, headers)
response = conn.getresponse()
all_protein = response.read().decode()
all_protein = json.loads(all_protein)
all_protein_dict['all_protein'] = all_protein
+
print("fine request protein")
x = datetime.today()
y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1)
@@ -1206,10 +3967,7 @@ def get_all_protein():
t2.start()
-all_geo_dict = {}
-
-
-def get_all_geo():
+def get_all_geo111():
print("inizio request geo")
conn = http.client.HTTPConnection('geco.deib.polimi.it')
conn.request('GET', '/virusurf_epitope/api/epitope/allGeo')
@@ -1227,10 +3985,7 @@ def get_all_geo():
t4.start()
-all_accession_id_dict = {}
-
-
-def get_all_accession_id():
+def get_all_accession_id111():
print("inizio request accession id")
conn = http.client.HTTPConnection('geco.deib.polimi.it')
conn.request('GET', '/virusurf_epitope/api/epitope/allAccessionIds')
@@ -1242,6 +3997,7 @@ def get_all_accession_id():
for itm in all_acc_id:
all_accession_id_arr.append(itm['accession_id'])
all_accession_id_dict['all_acc_id'] = all_accession_id_arr
+
print("fine request accession id")
x = datetime.today()
y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1)
@@ -1250,370 +4006,23 @@ def get_all_accession_id():
t3 = Timer(secs, get_all_geo)
t3.start()
-# ----------------------------------------- MONGO DB ----------------------------------------------- #
-
-
-translate_dictionary = {
- 'accession_id': '_id',
- 'lineage': 'covv_lineage',
- 'collection_date': 'covv_collection_date',
- 'location': 'covv_location',
-}
-
-
-@api.route('/selectorQueryMongoDB')
-class FieldList(Resource):
- @api.doc('selector_query_mongo_db')
- def post(self):
-
- to_use = api.payload
- field_name = to_use['field']
- query_fields = to_use['query']
-
- # field_name = 'country'
- # query_fields = {'lineage': 'B.1', 'geo_group': ['Europe', 'Asia'], 'minDate': '2020-01-01', 'maxDate': "2021-01-01",
- # 'toExclude': {}}
- # 'toExclude': {'geo_group': ['Asia'], 'country': ['Italy', 'France']
-
- if field_name in query_fields:
- del query_fields[field_name]
-
- i = 0
- where_part = {}
- start_date = datetime.strptime("2019-01-01", '%Y-%m-%d')
- where_part['c_coll_date_prec'] = {}
- where_part['c_coll_date_prec']['$eq'] = 2
- where_part['collection_date'] = {}
- where_part['collection_date']['$gte'] = start_date
-
- field_not_null = field_name
- if field_not_null in translate_dictionary:
- field_not_null = translate_dictionary[field_name]
- if field_name == 'geo_group' or field_name == 'country' or field_name == 'region' or field_name == 'province':
- field_not_null = 'location.' + field_name
- where_part[field_not_null] = {'$ne': None}
-
- if query_fields is not None:
- for key in query_fields:
- if key == 'minDate':
- start_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
- where_part['collection_date']['$gte'] = start_date
- elif key == 'maxDate':
- stop_date = datetime.strptime(f"{query_fields[key]}", '%Y-%m-%d')
- where_part['collection_date']['$lte'] = stop_date
-
- elif key == 'toExclude':
- for fieldToExclude in query_fields[key]:
- if '$and' not in where_part:
- where_part['$and'] = []
-
- single_where_part = {'$and': []}
- for geoToExclude in query_fields[key][fieldToExclude]:
- real_field_to_exclude = fieldToExclude
- if fieldToExclude == 'geo_group' or fieldToExclude == 'country' \
- or fieldToExclude == 'region' or fieldToExclude == 'province':
- real_field_to_exclude = 'location.' + fieldToExclude
- specific_and = {}
- geo_value = geoToExclude.replace("'", "''")
- specific_and[f'{real_field_to_exclude}'] = {'$ne': geo_value}
- single_where_part['$and'].append(specific_and)
- where_part['$and'].append(single_where_part)
-
- elif key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
- if '$and' not in where_part:
- where_part['$and'] = []
-
- real_key = key
- if key == 'geo_group' or key == 'country' or key == 'region' or key == 'province':
- real_key = 'location.' + key
- if isinstance(query_fields[key], list):
- single_where_part_or = {'$or': []}
- for itm in query_fields[key]:
- specific_or = {}
- field_value = itm.replace("'", "''")
- specific_or[f'{real_key}'] = {'$eq': field_value}
- single_where_part_or['$or'].append(specific_or)
- where_part['$and'].append(single_where_part_or)
- else:
- single_where_part_or = {'$or': []}
- replace_fields_value = query_fields[key].replace("'", "''")
- specific_or = {f'{real_key}': {'$eq': replace_fields_value}}
- single_where_part_or['$or'].append(specific_or)
- where_part['$and'].append(single_where_part_or)
-
- else:
- real_key = key
- if key in translate_dictionary:
- real_key = translate_dictionary[key]
- replace_fields_value = query_fields[key]
- if key != 'start_aa_original':
- replace_fields_value = query_fields[key].replace("'", "''")
- if real_key not in where_part:
- where_part[real_key] = {}
- where_part[real_key]['$eq'] = replace_fields_value
-
- i = i + 1
-
- query = []
-
- query_where = {"$match": where_part}
- query.append(query_where)
-
- group_part = {}
- real_field = field_name
- if field_name in translate_dictionary:
- real_field = translate_dictionary[field_name]
- if field_name == 'geo_group' or field_name == 'country' or field_name == 'region' or field_name == 'province':
- real_field = 'location.' + field_name
- # group_part["_id"] = {"value":
- # {"$cond":
- # [{"$eq": [f"${real_field}", ""]},
- # None,
- # {"$cond":
- # [{"$eq": [f"${real_field}", None]},
- # f"${real_field}",
- # {"$concat": [
- # {"$toUpper":
- # {"$substrCP": [f"${real_field}", 0, 1]}
- # },
- # {
- # "$substrCP": [
- # f"${real_field}", 1,
- # {"$subtract": [{"$strLenCP": f"${real_field}"}, 1]}
- # ]
- # }
- # ]}
- # ]
- # }
- # ]
- # },
- # }
- group_part["_id"] = {"value": f"${real_field}"}
- group_part["count"] = {"$sum": 1}
- query_group = {"$group": group_part}
- query.append(query_group)
-
- sort_part = {"count": -1}
- query_sort = {"$sort": sort_part}
- query.append(query_sort)
- # print("query", query)
-
- results = collection_db.aggregate(query)
-
- list_dict = []
- for single_item in list(results):
- single_item_remodel = {}
- for key in single_item:
- if key == '_id':
- single_item_remodel['value'] = single_item['_id']['value']
- else:
- single_item_remodel[key] = single_item[key]
- list_dict.append(single_item_remodel)
-
- # print("field:", field_name, " result:", list_dict)
- return list_dict
+def get_all_important_mutation111():
+ print("inizio request important mutation")
+ conn = http.client.HTTPConnection('geco.deib.polimi.it')
+ conn.request('GET', '/virusurf_epitope/api/epitope/allImportantMutations')
-def get_all_geo_mongoDB():
- print("inizio request geo")
- start_date = datetime.strptime("2019-01-01", '%Y-%m-%d')
- query = [
- {
- "$match": {
- 'collection_date': {
- '$gte': start_date
- },
- 'c_coll_date_prec': {
- '$eq': 2
- },
- },
- },
- {
- "$group": {"_id":
- {
- 'geo_group': '$location.geo_group',
- 'country': '$location.country',
- 'region': '$location.region',
- 'province': '$location.province',
- },
- "count": {"$sum": 1}
- }
- },
- ]
+ response = conn.getresponse()
+ all_important_mutation = response.read().decode()
+ all_important_mutation = json.loads(all_important_mutation)
- # {"geo_group":
- # {"$cond":
- # [{"$eq": ["$location.geo_group", ""]},
- # None,
- # {"$cond":
- # [{"$eq": ["$location.geo_group", None]},
- # "$location.geo_group",
- # {"$concat": [
- # {"$toUpper":
- # {"$substrCP": ["$location.geo_group", 0, 1]}
- # },
- # {
- # "$substrCP": [
- # "$location.geo_group", 1,
- # {"$subtract": [{"$strLenCP": "$location.geo_group"}, 1]}
- # ]
- # }
- # ]}
- # ]
- # }
- # ]
- # },
- # "country":
- # {"$cond":
- # [{"$eq": ["$location.country", ""]},
- # None,
- # {"$cond":
- # [{"$eq": ["$location.country", None]},
- # "$location.country",
- # {"$concat": [
- # {"$toUpper":
- # {"$substrCP": ["$location.country", 0, 1]}
- # },
- # {
- # "$substrCP": [
- # "$location.country", 1,
- # {"$subtract": [{"$strLenCP": "$location.country"}, 1]}
- # ]
- # }
- # ]}
- # ]
- # }
- # ]
- # },
- # "region":
- # {"$cond":
- # [{"$eq": ["$location.region", ""]},
- # None,
- # {"$cond":
- # [{"$eq": ["$location.region", None]},
- # "$location.region",
- # {"$concat": [
- # {"$toUpper":
- # {"$substrCP": ["$location.region", 0, 1]}
- # },
- # {
- # "$substrCP": [
- # "$location.region", 1,
- # {"$subtract": [{"$strLenCP": "$location.region"}, 1]}
- # ]
- # }
- # ]}
- # ]
- # }
- # ]
- # },
- # "province":
- # {"$cond":
- # [{"$eq": ["$location.province", ""]},
- # None,
- # {"$cond":
- # [{"$eq": ["$location.province", None]},
- # "$location.province",
- # {"$concat": [
- # {"$toUpper":
- # {"$substrCP": ["$location.province", 0, 1]}
- # },
- # {
- # "$substrCP": [
- # "$location.province", 1,
- # {"$subtract": [{"$strLenCP": "$location.province"}, 1]}
- # ]
- # }
- # ]}
- # ]
- # }
- # ]
- # },
-
- results = collection_db.aggregate(query)
- list_geo_dict = []
- for single_item in results:
- single_item_remodel = {'geo_group': single_item['_id']['geo_group'],
- 'country': single_item['_id']['country'],
- 'region': single_item['_id']['region'],
- 'province': single_item['_id']['province'], 'count': single_item['count']}
- list_geo_dict.append(single_item_remodel)
- all_geo_dict['all_geo'] = list_geo_dict
- print("fine request geo")
+ for mutation_per_lineage in all_important_mutation:
+ lineage = mutation_per_lineage['lineage']
+ all_important_mutation_dict[lineage] = mutation_per_lineage
+ print("fine request important mutation")
x = datetime.today()
y = x.replace(day=x.day, hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1)
delta_t = y - x
secs = delta_t.total_seconds()
- t4 = Timer(secs, get_all_geo)
- t4.start()
-
-
-def prova_mongo_db():
- print("prova Mongo")
- seq = collection_db
- print("prova Mongo2")
-
- # "$match": {
- # # 'covv_collection_date': {
- # # '$gte': "2019-01-01",
- # # '$lte': "2021-07-31",
- # # '$regex': "\d\d\d\d-\d\d-\d\d"
- # # },
- # # 'covv_location': {
- # # '$regex': "Italy"
- # # },
-
- pipeline = [
- {
- "$match": {
- 'location.geo_group': {
- '$eq': 'Oceania'
- },
- 'location.country': {
- '$eq': 'Australia'
- },
- 'location.region': {
- '$eq': 'Northern Territory'
- },
- },
- },
- {"$unwind": "$muts"},
- {"$group":
- #{"_id": "$_id",
- {"_id":
- {'pro': "$muts.pro",
- 'org': "$muts.org",
- 'loc': "$muts.loc",
- 'alt': "$muts.alt",
- },
- "count": {"$sum": 1}
- }
- },
- {'$sort':
- {"_id.pro": -1}
- }
- ]
- print("start")
- results = seq.aggregate(pipeline, )
- print("stop", len(list(results)))
- # for i, x in enumerate(results):
- # print("qui", x)
- # if i < 1:
- # print("qui", x)
- # break
- print("fine prova Mongo2")
-
-
-def prova_mongo_2():
- print("qui2")
-
-# ----------------------------------------- START FUNCTIONS ----------------------------------------------- #
-
-
-get_all_important_mutation()
-get_all_accession_id()
-get_all_geo()
-get_all_protein()
-
-# prova_mongo_db()
-# prova_mongo_2()
+ t1 = Timer(secs, get_all_important_mutation)
+ t1.start()
diff --git a/frontend/src/components/FreeTargetVsBackground.vue b/frontend/src/components/FreeTargetVsBackground.vue
index 64c428e..7e641f1 100644
--- a/frontend/src/components/FreeTargetVsBackground.vue
+++ b/frontend/src/components/FreeTargetVsBackground.vue
@@ -1697,14 +1697,14 @@ export default {
...mapState(['queryFreeTarget', 'queryFreeBackground', 'numSequencesQueryFreeTarget',
'numSequencesQueryFreeBackground', 'all_protein', 'startDateQueryFreeTarget', "stopDateQueryFreeTarget",
'startDateQueryFreeBackground', 'stopDateQueryFreeBackground', 'toExcludeFreeTarget', 'toExcludeFreeBackground',
- 'colorPValueInfoBox', 'color_1', 'color_2', 'color_3']),
+ 'colorPValueInfoBox', 'color_1', 'color_2', 'color_3', 'startAndStopQueryFreeTarget', 'startAndStopQueryFreeBackground']),
...mapGetters({}),
},
methods: {
...mapMutations(['setStartDateQueryFreeTarget', 'setStartDateQueryFreeBackground',
'setStopDateQueryFreeTarget', 'setStopDateQueryFreeBackground', 'setNumSequencesQueryFreeTarget',
- 'setNumSequencesQueryFreeBackground']),
- ...mapActions(['setQueryFreeTarget', 'setQueryFreeBackground']),
+ 'setNumSequencesQueryFreeBackground', 'setStartAndStopQueryFreeBackground', 'setStartAndStopQueryFreeTarget']),
+ ...mapActions(['setQueryFreeTarget', 'setQueryFreeBackground', 'setToExcludeFreeTarget', 'setToExcludeFreeBackground']),
deleteAllAccIdsTargetInserted(){
this.listAccIdsTargetInserted = [];
this.dialogAccIdsTargetInserted = false;
@@ -2828,10 +2828,16 @@ export default {
this.setQueryFreeTarget({field: 'country', list: null});
this.setQueryFreeTarget({field: 'region', list: null});
this.setQueryFreeTarget({field: 'province', list: null});
- this.setStartDateQueryFreeTarget(null);
- this.setStopDateQueryFreeTarget(null);
+ let obj = {'start': null, 'stop': null}
+ this.setStartAndStopQueryFreeTarget(obj);
+ // this.setStartDateQueryFreeTarget(null);
+ // this.setStopDateQueryFreeTarget(null);
// this.setQueryFreeBackground({field: 'accession_id', list: this.listAccIdsTarget});
this.setNumSequencesQueryFreeTarget(0);
+ this.setToExcludeFreeTarget({field: 'geo_group', list: null});
+ this.setToExcludeFreeTarget({field: 'country', list: null});
+ this.setToExcludeFreeTarget({field: 'region', list: null});
+ this.setToExcludeFreeTarget({field: 'province', list: null});
}
else{
let id1 = 'tabTargetFree2';
@@ -2865,9 +2871,15 @@ export default {
this.setQueryFreeBackground({field: 'country', list: null});
this.setQueryFreeBackground({field: 'region', list: null});
this.setQueryFreeBackground({field: 'province', list: null});
- this.setStartDateQueryFreeBackground(null);
- this.setStopDateQueryFreeBackground(null);
+ let obj = {'start': null, 'stop': null}
+ this.setStartAndStopQueryFreeBackground(obj);
+ // this.setStartDateQueryFreeBackground(null);
+ // this.setStopDateQueryFreeBackground(null);
this.setNumSequencesQueryFreeBackground(0);
+ this.setToExcludeFreeBackground({field: 'geo_group', list: null});
+ this.setToExcludeFreeBackground({field: 'country', list: null});
+ this.setToExcludeFreeBackground({field: 'region', list: null});
+ this.setToExcludeFreeBackground({field: 'province', list: null});
}
else{
let id1 = 'tabBackgroundFree2';
@@ -2894,8 +2906,10 @@ export default {
this.setQueryFreeTarget({field: 'country', list: null});
this.setQueryFreeTarget({field: 'region', list: null});
this.setQueryFreeTarget({field: 'province', list: null});
- this.setStartDateQueryFreeTarget(null);
- this.setStopDateQueryFreeTarget(null);
+ let obj = {'start': null, 'stop': null}
+ this.setStartAndStopQueryFreeTarget(obj);
+ // this.setStartDateQueryFreeTarget(null);
+ // this.setStopDateQueryFreeTarget(null);
if(this.listAccIdsTargetFile.length > 0){
arrayFull = JSON.parse(JSON.stringify(this.listAccIdsTargetFile));
for(let i = 0; i < this.listAccIdsTargetInserted.length; i = i + 1){
@@ -2926,8 +2940,10 @@ export default {
this.setQueryFreeBackground({field: 'country', list: null});
this.setQueryFreeBackground({field: 'region', list: null});
this.setQueryFreeBackground({field: 'province', list: null});
- this.setStartDateQueryFreeBackground(null);
- this.setStopDateQueryFreeBackground(null);
+ let obj = {'start': null, 'stop': null};
+ this.setStartAndStopQueryFreeBackground(obj);
+ // this.setStartDateQueryFreeBackground(null);
+ // this.setStopDateQueryFreeBackground(null);
if(this.listAccIdsBackgroundFile.length > 0){
arrayFull = JSON.parse(JSON.stringify(this.listAccIdsBackgroundFile));
for(let i = 0; i < this.listAccIdsBackgroundInserted.length; i = i + 1){
@@ -2958,8 +2974,10 @@ export default {
this.setQueryFreeTarget({field: 'country', list: null});
this.setQueryFreeTarget({field: 'region', list: null});
this.setQueryFreeTarget({field: 'province', list: null});
- this.setStartDateQueryFreeTarget(null);
- this.setStopDateQueryFreeTarget(null);
+ let obj = {'start': null, 'stop': null};
+ this.setStartAndStopQueryFreeTarget(obj);
+ // this.setStartDateQueryFreeTarget(null);
+ // this.setStopDateQueryFreeTarget(null);
if(this.fileAccIdsTarget !== null) {
this.listAccIdsTargetFile = this.fileAccIdsTarget;
for(let i = 0; i < this.listAccIdsTargetFile.length; i = i + 1){
@@ -2999,8 +3017,10 @@ export default {
this.setQueryFreeBackground({field: 'country', list: null});
this.setQueryFreeBackground({field: 'region', list: null});
this.setQueryFreeBackground({field: 'province', list: null});
- this.setStartDateQueryFreeBackground(null);
- this.setStopDateQueryFreeBackground(null);
+ let obj = {'start': null, 'stop': null};
+ this.setStartAndStopQueryFreeBackground(obj);
+ // this.setStartDateQueryFreeBackground(null);
+ // this.setStopDateQueryFreeBackground(null);
if(this.fileAccIdsBackground !== null){
this.listAccIdsBackgroundFile = this.fileAccIdsBackground;
for(let i = 0; i < this.listAccIdsBackgroundFile.length; i = i + 1){
@@ -3194,22 +3214,30 @@ export default {
all_protein(){
this.possibleProtein = this.all_protein;
},
- startDateQueryFreeTarget(){
+ startAndStopQueryFreeTarget(){
this.resetApplied();
this.countOverlappingSequenceTargetBackground();
},
- stopDateQueryFreeTarget(){
- this.resetApplied();
- this.countOverlappingSequenceTargetBackground();
- },
- startDateQueryFreeBackground(){
- this.resetApplied();
- this.countOverlappingSequenceTargetBackground();
- },
- stopDateQueryFreeBackground(){
+ // startDateQueryFreeTarget(){
+ // this.resetApplied();
+ // this.countOverlappingSequenceTargetBackground();
+ // },
+ // stopDateQueryFreeTarget(){
+ // this.resetApplied();
+ // this.countOverlappingSequenceTargetBackground();
+ // },
+ startAndStopQueryFreeBackground(){
this.resetApplied();
this.countOverlappingSequenceTargetBackground();
},
+ // startDateQueryFreeBackground(){
+ // this.resetApplied();
+ // this.countOverlappingSequenceTargetBackground();
+ // },
+ // stopDateQueryFreeBackground(){
+ // this.resetApplied();
+ // this.countOverlappingSequenceTargetBackground();
+ // },
queryFreeTarget(){
this.resetApplied();
this.countOverlappingSequenceTargetBackground();
diff --git a/frontend/src/components/SelectorsQueryFree.vue b/frontend/src/components/SelectorsQueryFree.vue
index 013074e..d1af12d 100644
--- a/frontend/src/components/SelectorsQueryFree.vue
+++ b/frontend/src/components/SelectorsQueryFree.vue
@@ -234,42 +234,58 @@ export default {
},
'queryFreeTarget.geo_group': function (){
if(this.field === 'geo_group' && (!this.queryFreeTarget['geo_group'] || this.queryFreeTarget['geo_group'].length === 0)) {
- this.clearToExcludeField();
+ if(this.type === 'target') {
+ this.clearToExcludeField();
+ }
}
},
'queryFreeTarget.country': function (){
if(this.field === 'country' && (!this.queryFreeTarget['country'] || this.queryFreeTarget['country'].length === 0)) {
- this.clearToExcludeField();
+ if(this.type === 'target') {
+ this.clearToExcludeField();
+ }
}
},
'queryFreeTarget.region': function (){
if(this.field === 'region' && (!this.queryFreeTarget['region'] || this.queryFreeTarget['region'].length === 0)) {
- this.clearToExcludeField();
+ if(this.type === 'target') {
+ this.clearToExcludeField();
+ }
}
},
'queryFreeTarget.province': function (){
if(this.field === 'province' && (!this.queryFreeTarget['province'] || this.queryFreeTarget['province'].length === 0 )) {
- this.clearToExcludeField();
+ if(this.type === 'target') {
+ this.clearToExcludeField();
+ }
}
},
'queryFreeBackground.geo_group': function (){
if(this.field === 'geo_group' && (!this.queryFreeBackground['geo_group'] || this.queryFreeBackground['geo_group'].length === 0)) {
- this.clearToExcludeField();
+ if(this.type === 'background') {
+ this.clearToExcludeField();
+ }
}
},
'queryFreeBackground.country': function (){
if(this.field === 'country' && (!this.queryFreeBackground['country'] || this.queryFreeBackground['country'].length === 0)) {
- this.clearToExcludeField();
+ if(this.type === 'background') {
+ this.clearToExcludeField();
+ }
}
},
'queryFreeBackground.region': function (){
if(this.field === 'region' && (!this.queryFreeBackground['region'] || this.queryFreeBackground['region'].length === 0)) {
- this.clearToExcludeField();
+ if(this.type === 'background') {
+ this.clearToExcludeField();
+ }
}
},
'queryFreeBackground.province': function (){
if(this.field === 'province' && (!this.queryFreeBackground['province'] || this.queryFreeBackground['province'].length === 0 )) {
- this.clearToExcludeField();
+ if(this.type === 'background') {
+ this.clearToExcludeField();
+ }
}
},
}
diff --git a/frontend/src/components/TimeSelectorDistributionLineageInGeo.vue b/frontend/src/components/TimeSelectorDistributionLineageInGeo.vue
index a64eac2..971fa92 100644
--- a/frontend/src/components/TimeSelectorDistributionLineageInGeo.vue
+++ b/frontend/src/components/TimeSelectorDistributionLineageInGeo.vue
@@ -129,6 +129,13 @@
+
+
+
+
diff --git a/frontend/src/components/TimeSelectorQueryFree.vue b/frontend/src/components/TimeSelectorQueryFree.vue
index 4a3b255..fe2b0cb 100644
--- a/frontend/src/components/TimeSelectorQueryFree.vue
+++ b/frontend/src/components/TimeSelectorQueryFree.vue
@@ -37,6 +37,7 @@
color="#F48C0680"
track-color="grey"
height="2px"
+ @mouseup="mouseUpSlider"
>
@@ -118,6 +119,13 @@
+
+
+
+
@@ -220,8 +228,29 @@ export default {
},
methods: {
...mapMutations(['setStartDateQueryFreeTarget', 'setStopDateQueryFreeTarget', 'setStartDateQueryFreeBackground',
- 'setStopDateQueryFreeBackground', 'setNumSequencesQueryFreeTarget', 'setNumSequencesQueryFreeBackground']),
+ 'setStopDateQueryFreeBackground', 'setNumSequencesQueryFreeTarget', 'setNumSequencesQueryFreeBackground',
+ 'setStartAndStopQueryFreeTarget', 'setStartAndStopQueryFreeBackground']),
...mapActions(['setQueryFreeTarget', 'setQueryFreeBackground']),
+ mouseUpSlider() {
+ let min = this.slider[0];
+ let max = this.slider[1];
+ this.changeMarkerAndRender(min, max);
+
+ this.last_start_date = this.translateIndexToDate(this.slider[0]);
+ this.last_stop_date = this.translateIndexToDate(this.slider[1]);
+ if(this.type === 'target') {
+ let obj = {'start': this.last_start_date, 'stop': this.last_stop_date}
+ this.setStartAndStopQueryFreeTarget(obj);
+ // this.setStartDateQueryFreeTarget(this.last_start_date);
+ // this.setStopDateQueryFreeTarget(this.last_stop_date);
+ }
+ else if(this.type === 'background') {
+ let obj = {'start': this.last_start_date, 'stop': this.last_stop_date}
+ this.setStartAndStopQueryFreeBackground(obj);
+ // this.setStartDateQueryFreeBackground(this.last_start_date);
+ // this.setStopDateQueryFreeBackground(this.last_stop_date);
+ }
+ },
download(){
let url = this.my_chart.getConnectedDataURL({
pixelRatio: 2,
@@ -501,16 +530,21 @@ export default {
}
this.slider = [index_start, index_stop];
+ this.mouseUpSlider();
this.last_start_date = this.translateIndexToDate(this.slider[0]);
this.last_stop_date = this.translateIndexToDate(this.slider[1]);
- this.changeMarkerAndRender(this.slider[0], this.slider[0]);
+ this.changeMarkerAndRender(this.slider[0], this.slider[1]);
if(this.type === 'target') {
- this.setStartDateQueryFreeTarget(this.last_start_date);
- this.setStopDateQueryFreeTarget(this.last_stop_date);
+ let obj = {'start': this.last_start_date, 'stop': this.last_stop_date}
+ this.setStartAndStopQueryFreeTarget(obj);
+ // this.setStartDateQueryFreeTarget(this.last_start_date);
+ // this.setStopDateQueryFreeTarget(this.last_stop_date);
}
else if(this.type === 'background') {
- this.setStartDateQueryFreeBackground(this.last_start_date);
- this.setStopDateQueryFreeBackground(this.last_stop_date);
+ let obj = {'start': this.last_start_date, 'stop': this.last_stop_date}
+ this.setStartAndStopQueryFreeBackground(obj);
+ // this.setStartDateQueryFreeBackground(this.last_start_date);
+ // this.setStopDateQueryFreeBackground(this.last_stop_date);
}
this.chosenApplied = true;
@@ -529,6 +563,7 @@ export default {
else {
let stop = this.slider[1];
this.slider = [start, stop];
+ this.mouseUpSlider();
}
},
last_stop_date(){
@@ -540,6 +575,7 @@ export default {
else {
let start = this.slider[0];
this.slider = [start, stop];
+ this.mouseUpSlider();
}
},
queryFreeTarget() {
@@ -562,22 +598,22 @@ export default {
this.loadData();
}
},
- slider(){
- let min = this.slider[0];
- let max = this.slider[1];
- this.changeMarkerAndRender(min, max);
-
- this.last_start_date = this.translateIndexToDate(this.slider[0]);
- this.last_stop_date = this.translateIndexToDate(this.slider[1]);
- if(this.type === 'target') {
- this.setStartDateQueryFreeTarget(this.last_start_date);
- this.setStopDateQueryFreeTarget(this.last_stop_date);
- }
- else if(this.type === 'background') {
- this.setStartDateQueryFreeBackground(this.last_start_date);
- this.setStopDateQueryFreeBackground(this.last_stop_date);
- }
- },
+ // slider(){
+ // let min = this.slider[0];
+ // let max = this.slider[1];
+ // this.changeMarkerAndRender(min, max);
+ //
+ // this.last_start_date = this.translateIndexToDate(this.slider[0]);
+ // this.last_stop_date = this.translateIndexToDate(this.slider[1]);
+ // if(this.type === 'target') {
+ // this.setStartDateQueryFreeTarget(this.last_start_date);
+ // this.setStopDateQueryFreeTarget(this.last_stop_date);
+ // }
+ // else if(this.type === 'background') {
+ // this.setStartDateQueryFreeBackground(this.last_start_date);
+ // this.setStopDateQueryFreeBackground(this.last_stop_date);
+ // }
+ // },
},
mounted() {
this.loadData();
diff --git a/frontend/src/components/TimeSelectorQueryGeo.vue b/frontend/src/components/TimeSelectorQueryGeo.vue
index 77df131..b9d0723 100644
--- a/frontend/src/components/TimeSelectorQueryGeo.vue
+++ b/frontend/src/components/TimeSelectorQueryGeo.vue
@@ -253,12 +253,12 @@
-
-
-
-
-
-
+
+
+
@@ -782,7 +782,7 @@ export default {
this.changeMarkerAndRender(this.slider[0], this.slider[0]);
this.setStartDateQueryGeo(this.last_start_date);
this.setStopDateQueryGeo(this.last_stop_date);
-
+ this.overlay = false;
});
});
}
diff --git a/frontend/src/store.js b/frontend/src/store.js
index 441c519..0d63f52 100644
--- a/frontend/src/store.js
+++ b/frontend/src/store.js
@@ -41,8 +41,10 @@ const state = {
queryFreeBackground: {},
startDateQueryFreeTarget: null,
stopDateQueryFreeTarget: null,
+ startAndStopQueryFreeTarget: {'start' : '2019-01-01', 'stop': '2019-01-01'},
startDateQueryFreeBackground: null,
stopDateQueryFreeBackground: null,
+ startAndStopQueryFreeBackground: {'start' : '2019-01-01', 'stop': '2019-01-01'},
numSequencesQueryFreeTarget: 0,
numSequencesQueryFreeBackground: 0,
@@ -215,16 +217,39 @@ const mutations = {
state.numLevelAboveBackground = value;
},
setStartDateQueryFreeTarget: (state, value) => {
- state.startDateQueryFreeTarget = value;
+ let obj = state.startAndStopQueryFreeTarget;
+ obj['start'] = value;
+ state.startAndStopQueryFreeTarget = obj;
+ // state.startDateQueryFreeTarget = value;
},
setStopDateQueryFreeTarget: (state, value) => {
- state.stopDateQueryFreeTarget= value;
+ let obj = state.startAndStopQueryFreeTarget;
+ obj['stop'] = value;
+ state.startAndStopQueryFreeTarget = obj;
+ // state.stopDateQueryFreeTarget= value;
+ },
+ setStartAndStopQueryFreeTarget: (state, value) => {
+ // obj = {'start' : xxx, 'stop': yyy}
+ state.startAndStopQueryFreeTarget = value;
+ state.startDateQueryFreeTarget = value['start'];
+ state.stopDateQueryFreeTarget = value['stop'];
},
setStartDateQueryFreeBackground: (state, value) => {
- state.startDateQueryFreeBackground = value;
+ let obj = state.startAndStopQueryFreeBackground;
+ obj['start'] = value;
+ state.startAndStopQueryFreeBackground = obj;
+ // state.startDateQueryFreeBackground = value;
},
setStopDateQueryFreeBackground: (state, value) => {
- state.stopDateQueryFreeBackground = value;
+ let obj = state.startAndStopQueryFreeBackground;
+ obj['stop'] = value;
+ state.startAndStopQueryFreeBackground = obj;
+ // state.stopDateQueryFreeBackground = value;
+ },
+ setStartAndStopQueryFreeBackground: (state, value) => {
+ state.startAndStopQueryFreeBackground = value;
+ state.startDateQueryFreeBackground = value['start'];
+ state.stopDateQueryFreeBackground = value['stop'];
},
setQueryFreeTargetField: (state, payload) => {
state.queryFreeTarget[payload.field] = payload.fieldQuery;