Skip to content

Commit 3008881

Browse files
committed
Added management of low af insertions at the end of the sequence
1 parent 23314b8 commit 3008881

File tree

1 file changed

+43
-2
lines changed

1 file changed

+43
-2
lines changed

bu_isciii/templates/IRMA/ANALYSIS/ANALYSIS01_FLU_IRMA/04-irma/create_irma_vcf.py

+43-2
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ def align2dict(alignment_file):
318318
return vcf_dict
319319

320320

321-
def stats_vcf(vcf_dictionary, alleles_dictionary):
321+
def stats_vcf(vcf_dictionary, alleles_dictionary, last_pos, last_allele):
322322
"""Add stats to VCF dictionary.
323323
324324
Parameters
@@ -521,6 +521,41 @@ def stats_vcf(vcf_dictionary, alleles_dictionary):
521521
af_vcf_dict = {}
522522
for _, value in alleles_dictionary.items():
523523
pos = value["Position"]
524+
chrom = next(iter(vcf_dictionary.values()))["CHROM"]
525+
526+
if int(pos) > last_pos and value["Allele_Type"] == "Minority":
527+
content_dict = {
528+
"CHROM": chrom,
529+
"REF_POS": last_pos,
530+
"SAMPLE_POS": [pos],
531+
"REF": last_allele,
532+
"ALT": last_allele + value["Allele"],
533+
"TYPE": "INS",
534+
"DP": [value["Count"]],
535+
"TOTAL_DP": [value["Total"]],
536+
"AF": [value["Frequency"]],
537+
"QUAL": [value["Frequency"]],
538+
}
539+
540+
variant = (
541+
content_dict["CHROM"]
542+
+ "_"
543+
+ str(content_dict["REF_POS"])
544+
+ "_"
545+
+ "final_ins"
546+
)
547+
548+
if variant in af_vcf_dict:
549+
af_vcf_dict[variant]["DP"] += content_dict["DP"]
550+
af_vcf_dict[variant]["TOTAL_DP"] += content_dict["TOTAL_DP"]
551+
af_vcf_dict[variant]["AF"] += content_dict["AF"]
552+
af_vcf_dict[variant]["QUAL"] += content_dict["QUAL"]
553+
af_vcf_dict[variant]["SAMPLE_POS"] += content_dict["SAMPLE_POS"]
554+
af_vcf_dict[variant]["ALT"] += value["Allele"]
555+
else:
556+
af_vcf_dict[variant] = content_dict
557+
pass
558+
524559
for align_pos, subdict in vcf_dictionary.items():
525560
if (value["Allele_Type"] == "Consensus" and subdict["TYPE"] == "REF") or (
526561
value["Allele"] == subdict["REF"]
@@ -970,7 +1005,13 @@ def main(args=None):
9701005
# Start analysis
9711006
alleles_dict = alleles_to_dict(all_alleles, freq, dp)
9721007
alignment_dict = align2dict(alignment)
973-
af_vcf_dict = stats_vcf(alignment_dict, alleles_dict)
1008+
last_ref_pos = max(position["REF_POS"] for position in alignment_dict.values())
1009+
last_ref_allele = None
1010+
for _, value in alignment_dict.items():
1011+
if value["REF_POS"] == last_ref_pos:
1012+
last_ref_allele = value["REF"]
1013+
break
1014+
af_vcf_dict = stats_vcf(alignment_dict, alleles_dict, last_ref_pos, last_ref_allele)
9741015
combined_vcf_dict = combine_indels(af_vcf_dict)
9751016
create_vcf(combined_vcf_dict, output_vcf, alignment)
9761017

0 commit comments

Comments
 (0)