-
Notifications
You must be signed in to change notification settings - Fork 0
/
add_mate_breakends.py
78 lines (71 loc) · 4.4 KB
/
add_mate_breakends.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def breakpoints(in_vcf, out_vcf):
accepted_bases = ["A","T","G","C","N"]
with open(in_vcf ,'r') as bnd, open(out_vcf, "w") as out:
output_vcf = list()
for line in bnd:
if line.startswith('#'):
out.write(line)
continue
if line.startswith("#C"):
out.write(line)
continue
output_vcf.append(line)
if "SVTYPE=BND" not in line:
continue
v_chr, v_pos, v_id, v_ref, v_alt, v_qual, v_filter, v_info, v_format, v_sample = line.split("\t")
"""
if ',' in v_alt:
mate_information1, mate_information2 = v_alt.split(',')
old_multi_mate_variant1 = v_chr + "\t" + v_pos + "\t" + v_id + "\t" + v_ref + "\t" + mate_information1 + "\t" + v_qual + "\t" + v_filter + "\t" + v_info + "\t" + v_format + "\t" + v_sample
old_multi_mate_variant2 = v_chr + "\t" + v_pos + "\t" + v_id + "\t" + v_ref + "\t" + mate_information2 + "\t" + v_qual + "\t" + v_filter + "\t" + v_info + "\t" + v_format + "\t" + v_sample
return breakpoints(in_vcf)
"""
for bases in accepted_bases:
# t]p] : t]p]
if bases + "]" in v_alt[0:2]:
mate_information = v_alt.split("]")
mate_chromosome,mate_location = mate_information[1].split(":")
mate_alt = "N" + "]" + v_chr + ":" + v_pos + "]"
mate_info = v_info.replace("CHR2=" + mate_chromosome, "CHR2=" + v_chr)
mate_variant = mate_chromosome + "\t" + mate_location + "\t" + v_id + "\t" + "N" + "\t" + mate_alt + "\t" + v_qual + "\t" + v_filter + "\t" + v_info + "\t" + v_format + "\t" + v_sample
#print(f"breakpoint variant is \n {line}")
#print(f"mate variant is \n {mate_variant}")
output_vcf.append(mate_variant)
#"[p[t" : "[p[t"
elif "[" + bases in v_alt[-2:]:
mate_information = v_alt.split("[")
mate_chromosome,mate_location = mate_information[1].split(":")
mate_alt = "[" + v_chr + ":" + v_pos + "[" + "N"
mate_info = v_info.replace("CHR2="+mate_chromosome, "CHR2="+v_chr)
mate_variant = mate_chromosome + "\t" + mate_location + "\t" + v_id + "\t" + "N" + "\t" + mate_alt + "\t" + v_qual + "\t" + v_filter + "\t" + mate_info + "\t" + v_format + "\t" + v_sample
#print(f"breakpoint variant is \n {line} ")
#print(f"mate variant is \n {mate_variant}")
output_vcf.append(mate_variant)
#t[p[" : "]p]t",
elif bases + "[" in v_alt[0:2]:
mate_information = v_alt.split("[")
mate_chromosome, mate_location = mate_information[1].split(":")
mate_alt = "]" + v_chr + ":" + v_pos + "]" + "N"
mate_info = v_info.replace("CHR2=" + mate_chromosome, "CHR2=" + v_chr)
mate_variant = mate_chromosome + "\t" + mate_location + "\t" + v_id + "\t" + "N" + "\t" + mate_alt + "\t" + v_qual + "\t" + v_filter + "\t" + mate_info + "\t" + v_format + "\t" + v_sample
#print(f"breakpoint variant is \n {line}")
#print(f"mate variant is \n {mate_variant}")
output_vcf.append(mate_variant)
#"]p]t" : "t[p["
elif "]" + bases in v_alt[-2:]:
#print(f"breakpoint variant is \n {line}")
mate_information = v_alt.split("]")
#print(mate_information)
mate_chromosome, mate_location = mate_information[1].split(":")
mate_alt = "N" + "[" + v_chr + ":" + v_pos + "["
mate_info = v_info.replace("CHR2=" + mate_chromosome, "CHR2=" + v_chr)
mate_variant = mate_chromosome + "\t" + mate_location + "\t" + v_id + "\t" + "N" + "\t" + mate_alt + "\t" + v_qual + "\t" + v_filter + "\t" + mate_info + "\t" + v_format + "\t" + v_sample
#print(f"mate variant is \n {mate_variant}")
output_vcf.append(mate_variant)
else:
continue
output_vcf.sort()
for variants in output_vcf:
#print(output_vcf)
out.write(variants)
breakpoints("/home/cagatay/PycharmProjects/sniffles_reformat/sniffles_bnd_reformat.vcf","/home/cagatay/PycharmProjects/sniffles_reformat/reformatted.vcf")