From 786ef1054f505d7a743a01553b6e53351c63f64f Mon Sep 17 00:00:00 2001 From: aineniamh Date: Mon, 29 Mar 2021 11:51:41 +0100 Subject: [PATCH] updating for post-hoc p.3 --- pangolin/__init__.py | 2 +- pangolin/command.py | 3 +++ pangolin/data/config_p.3.csv | 12 ++++++++++ pangolin/scripts/pangolearn.smk | 42 +++++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 pangolin/data/config_p.3.csv diff --git a/pangolin/__init__.py b/pangolin/__init__.py index d8bd3b0..619eb5b 100644 --- a/pangolin/__init__.py +++ b/pangolin/__init__.py @@ -1,2 +1,2 @@ _program = "pangolin" -__version__ = "2.3.5" +__version__ = "2.3.6" diff --git a/pangolin/command.py b/pangolin/command.py index 32b5505..036928b 100644 --- a/pangolin/command.py +++ b/pangolin/command.py @@ -269,6 +269,9 @@ def main(sysargs = sys.argv[1:]): variants_file = pkg_resources.resource_filename('pangolin', 'data/config_p.2.csv') config["p2_variants"] = variants_file + variants_file = pkg_resources.resource_filename('pangolin', 'data/config_p.3.csv') + config["p3_variants"] = variants_file + variants_file = pkg_resources.resource_filename('pangolin', 'data/config_b.1.214.2.csv') config["b12142_variants"] = variants_file diff --git a/pangolin/data/config_p.3.csv b/pangolin/data/config_p.3.csv new file mode 100644 index 0000000..48a29d8 --- /dev/null +++ b/pangolin/data/config_p.3.csv @@ -0,0 +1,12 @@ +aa:orf1a:D1554G +aa:orf1a:L3201P +aa:orf1a:D3681E +aa:orf1b:L1203F +aa:S:H1101Y +aa:S:K417T +aa:S:E484K +aa:S:N501Y +aa:S:P681H +aa:S:E1092K +aa:orf8:K2Q +aa:N:P80R diff --git a/pangolin/scripts/pangolearn.smk b/pangolin/scripts/pangolearn.smk index 91c9710..42543f8 100644 --- a/pangolin/scripts/pangolearn.smk +++ b/pangolin/scripts/pangolearn.smk @@ -207,6 +207,24 @@ rule type_variants_p1: --append-genotypes """ +rule type_variants_p3: + input: + fasta = rules.align_to_reference.output.fasta, + variants = config["p3_variants"], + reference = config["reference_fasta"] + output: + variants = os.path.join(config["tempdir"],"variants_p3.csv") + shell: + """ + type_variants.py \ + --fasta-in {input.fasta:q} \ + --variants-config {input.variants:q} \ + --reference {input.reference:q} \ + --variants-out {output.variants:q} \ + --append-genotypes + """ + + rule type_variants_b12142: input: fasta = rules.align_to_reference.output.fasta, @@ -230,6 +248,7 @@ rule overwrite: csv = os.path.join(config["tempdir"],"pangolearn_assignments.csv"), b117_variants = rules.type_variants_b117.output.variants, b1351_variants = rules.type_variants_b1351.output.variants, + p3_variants = rules.type_variants_p3.output.variants, p2_variants = rules.type_variants_p2.output.variants, p1_variants = rules.type_variants_p1.output.variants, b12142_variants = rules.type_variants_b12142.output.variants @@ -260,6 +279,12 @@ rule overwrite: for row in reader: if int(row["alt_count"]) > 4 and int(row["ref_count"])<4: p2[row["query"]] = row["alt_count"] + p3 = {} + with open(input.p3_variants, "r") as f: + reader = csv.DictReader(f) + for row in reader: + if int(row["alt_count"]) > 8 and int(row["ref_count"])<4: + p3[row["query"]] = row["alt_count"] b12142 = {} with open(input.b12142_variants, "r") as f: reader = csv.DictReader(f) @@ -349,6 +374,23 @@ rule overwrite: new_row["probability"] = "1.0" new_row["lineage"] = "B.1.1.28" + writer.writerow(new_row) + elif row["taxon"] in p3: + new_row = row + snps = p3[row["taxon"]] + note = f"{snps}/12 P.3 (B.1.1.28.3) SNPs" + + new_row["note"] = note + new_row["probability"] = "1.0" + new_row["lineage"] = "P.3" + + writer.writerow(new_row) + elif row["lineage"] =="P.3" and row["taxon"] not in p3: + new_row = row + + new_row["probability"] = "1.0" + new_row["lineage"] = "B.1.1.28" + writer.writerow(new_row) elif row["lineage"] == "B.1.214": new_row = row