Skip to content
This repository has been archived by the owner on Nov 6, 2020. It is now read-only.

Commit

Permalink
V0.1.10 update (#66)
Browse files Browse the repository at this point in the history
* Updating to v0.1.10

* Commenting resources files

* Updating README to reflect bug fix

* Bug fix and updating to be ready for new docker image
  • Loading branch information
Samantha Zarate authored Jun 10, 2019
1 parent 4ea10cd commit 5c1fb27
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 11 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ RUN conda update -y pyopenssl

WORKDIR /
ADD resources.tar.gz /
RUN cp -a /resources/* / && rm -rf /resources/

RUN conda install -c defaults -y numpy
RUN pip install https://github.com/bioinform/breakseq2/archive/2.2.tar.gz
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ breakseq2 -2.2- has requirement pysam==0.7.7, but you'll have pysam 0.15.1 which
```
> What's going on?
This is a known error message caused by how we currently manage the conflicting pysam versions required for BreakSeq and SVTyper. We are currently working on a more stable solution.
This is a known error message caused by how we currently manage the conflicting pysam versions required for BreakSeq and SVTyper. This issue should be resolved in v0.1.10.

### Tool versions

Expand Down
2 changes: 1 addition & 1 deletion dx_app_code/parliament2/dxapp.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"WGS"
],
"dxapi": "1.0.0",
"version": "0.1.9",
"version": "0.1.10",
"inputSpec": [
{
"name": "illumina_bam",
Expand Down
6 changes: 2 additions & 4 deletions dx_app_code/parliament2/parliament2.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ def main(**job_inputs):
else:
prefix = job_inputs['prefix']

# Running Docker image
subprocess.check_call(['mkdir', '-p', '/home/dnanexus/in', '/home/dnanexus/out'])
docker_pull = ['docker', 'pull', 'dnanexus/parliament2:v0.1.9-13-g37d63065']
docker_pull = ['docker', 'pull', 'dnanexus/parliament2:0.1.10']
subprocess.check_call(docker_pull)

print "Downloading input files"
Expand All @@ -33,8 +32,7 @@ def main(**job_inputs):
ref_name = "/home/dnanexus/in/{0}".format(ref_genome.name)
dxpy.download_dxfile(ref_genome.id, ref_name)

docker_call = ['docker', 'run', '-v', '/home/dnanexus/in/:/home/dnanexus/in/', '-v', '/home/dnanexus/out/:/home/dnanexus/out/', 'dnanexus/parliament2:v0.1.9-13-g37d63065', '--bam', bam_name, '-r', ref_name, '--prefix', str(prefix)]
# docker_call = ['dx-docker', 'run', '-v', '/home/dnanexus/in/:/home/dnanexus/in/', '-v', '/home/dnanexus/out/:/home/dnanexus/out/', 'parliament2:0.1.9', '--bam', bam_name, '-r', ref_name, '--prefix', str(prefix)]
docker_call = ['docker', 'run', '-v', '/home/dnanexus/in/:/home/dnanexus/in/', '-v', '/home/dnanexus/out/:/home/dnanexus/out/', 'dnanexus/parliament2:0.1.10', '--bam', bam_name, '-r', ref_name, '--prefix', str(prefix)]

if 'illumina_bai' in job_inputs:
input_bai = dxpy.DXFile(job_inputs['illumina_bai'])
Expand Down
30 changes: 25 additions & 5 deletions resources/combine_combined.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
import sys

# arg 1: survivor_sorted.vcf (sorted SUVIVOR output file)
# arg 2: "${prefix}"
# arg 3: survivor_inputs (all files generated by SURVIVOR)
# arg 4: /all.phred.txt (phred thresholds of calls by various callers)

def main():
headers = []

written_additional_header = False

sample = sys.argv[2]
# get all SV callers used to generate this file
with open(sys.argv[3]) as survivor_input_list:
for line in survivor_input_list:
if "cnvnator" in line:
Expand All @@ -25,6 +31,7 @@ def main():

quality_mappings = { "lt300": {}, "300to1000": {}, "1kbplus": {}, "all": {}, "ins": {} }

# parse all phred file
with open(sys.argv[4]) as all_phred_values:
for line in all_phred_values:
size_split = line.split("_")
Expand All @@ -39,6 +46,7 @@ def main():

with open(sys.argv[1]) as survivor_output:
for line in survivor_output:
# modify header
if line.startswith("##"):
if "FORMAT" in line and not written_additional_header:
print "##INFO=<ID=SUPP,Number=.,Type=String,Description=\"Number of callers that support an ALT call. This count is based on the presence of a call, whether it could be confirmed by SVTyper. Due to differences in the breakpoints, this number may differ from the sum of all callers in the CALLERS field\">"
Expand All @@ -51,27 +59,32 @@ def main():
written_additional_header = True
else:
sys.stdout.write(line)
# add sample to line describing VCF fields
elif line[0] == "#" and line[1] != "#":
tab_split = line.strip().split("\t")
print "\t".join(tab_split[:9]) + "\t%s" % sample
# VCF entries
else:
tab_split = line.strip().split("\t")
position = int(tab_split[1])
end = tab_split[7].replace("CIEND","XXXXX").split("END=")[-1].split(";")[0].split("\t")[0]
end_position = int(end)
# possibly same as correct_max_position?
if end_position < position:
new_end = str(position)
new_start = end
tab_split[1] = new_start
tab_split[7].replace("END=%s" % end, "END=%s" % new_end)

# adds "chr"
if "chr" not in tab_split[0]:
tab_split[0] = "chr" + tab_split[0]

support = ""
het = 0
hom = 0
ref = 0
if "chr" not in tab_split[0]:
tab_split[0] = "chr" + tab_split[0]

# counts support for het/hom/ref
for i in range(len(tab_split[9:])):
if "0/1" in tab_split[9+i] or "1/1" in tab_split[9+i] or "./1" in tab_split[9+i]:
if "0/1" in tab_split[9+i] or "./1" in tab_split[9+i]:
Expand All @@ -80,13 +93,16 @@ def main():
hom += 1
if "0/0" in tab_split[9+i]:
ref += 1
# adds SV caller to "support" string if not there already
if headers[i] not in support:
support += ",%s" % headers[i]
# if caller(s) supports variant, adds this to string at end
if len(support) > 0:
tab_split[7] += ";CALLERS=%s" % support.lstrip(",")
else:
support = "."

# parses hom/het/ref into short genotype strings
tab_split[8] = "GT:SP"
if het == 0 and hom == 0:
if ref > 0:
Expand All @@ -104,6 +120,8 @@ def main():

tab_split[9] += support.lstrip(",")

# adding size range for SVs
# deletions:
if "SVTYPE=DEL" in line:
#try:
size = end_position - position
Expand All @@ -115,11 +133,12 @@ def main():
size_range = "1kbplus"
#except:
# size_range = "all"
# insertions:
if "SVTYPE=INS" in line:
size_range="ins"


if "SVTYPE=DEL" in line or "SVTYPE=DEL" in line:
# adds quality mappings if deletion
if "SVTYPE=DEL" in line:
callers = support.lstrip(",").split(",")
callers.sort()
while len(callers) > 0:
Expand All @@ -133,6 +152,7 @@ def main():
if "SVTYPE=DUP" in line and (tab_split[9].split(":")[0] == "0/1" or tab_split[9].split(":")[0] == "1/1"):
tab_split[6] = "Unknown"

# prints final line
print "\t".join(tab_split[:10])

main()
8 changes: 8 additions & 0 deletions resources/correct_max_position.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,29 @@
import sys

for line in sys.stdin:
# line is header; continue
if line.startswith('#'):
sys.stdout.write(line)
continue
# line contains variant
else:
tab_split = line.strip().split("\t")
# only one item in line
if len(tab_split) == 1:
continue
# full VCF entry
else:
# get position, end, chr1, and chr2
position = int(tab_split[1])
end = int(line.replace("CIEND","XXXXX").split("END=")[-1].split(";")[0].split("\t")[0].split(",")[0])
chr2 = line.split("CHR2=")[-1].split(";")[0].split("\t")[0]
chr1 = line.split("\t")[0].split("chr")[-1]

# if chr1 and chr2 are the same, and the max position is greater than the end
if end < position and chr1 == chr2:
# correct the end
tab_split[1] = str(end)
# correct the info field to reflect position
info_fields = tab_split[7].split(";")
for i in range(len(info_fields)):
if "END=" in info_fields[i]:
Expand Down

0 comments on commit 5c1fb27

Please sign in to comment.