From 37d6306530cccbb61d503e070fb50b8e85405f56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sameer=20D=E2=80=99Costa?= Date: Fri, 15 Feb 2019 11:49:40 -0600 Subject: [PATCH] Changes to get parliament2 working with Singularity on HPC systems (#46) * Adding Singularity file spython recipe DockerFile > Singularity * Trying to create resources.tar.gz file beforehand * Changing tar options * tar xtract * try different version of chmod * Setting entrypoint * Moving back to runscript * Fixing paths for singularity * Minor change to maintainer flag * Fix manta path * need to learn how to trigger a singularity build. * Changes to paths for survivor and svtyper * trying to run svviz * typo in svviz * trigger * Removing memfree option from parallel * Conda environment for svtyper * removing double sourcing of conda.sh * Creating environment for svtyper * changing dependency to python 2.7 * making Singularity changes to Dockerfile * build hook to create resources.tar.gz on dockerhub * Removing Singularity build file as now we are able to build with docker and run with singularity * Smaller travis test at the end * Changing test to just do Breakdancer and SVTyper --- .travis.yml | 5 +- Dockerfile | 6 + hooks/pre_build | 5 + parliament2.sh | 106 ++++++++++-------- .../home/dnanexus/parallelize_svtyper.sh | 6 +- resources/usr/bin/runManta | 2 +- svtyper_env.yml | 6 + 7 files changed, 84 insertions(+), 52 deletions(-) create mode 100644 hooks/pre_build create mode 100644 svtyper_env.yml diff --git a/.travis.yml b/.travis.yml index 7e3e10d5..6e6db4b8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,8 @@ jobs: - script: docker run -v /home/dnanexus/in:/home/dnanexus/in -v /home/dnanexus/out:/home/dnanexus/out dnanexus/parliament2:$TAG --bam /home/dnanexus/in/small_input.bam --bai /home/dnanexus/in/small_input.bai --ref_genome /home/dnanexus/in/ref.fa.gz --fai /home/dnanexus/in/ref.fa.fai --prefix lumpy --lumpy && ls -sh /home/dnanexus/out - script: docker run -v /home/dnanexus/in:/home/dnanexus/in -v /home/dnanexus/out:/home/dnanexus/out dnanexus/parliament2:$TAG --bam /home/dnanexus/in/small_input.bam --bai /home/dnanexus/in/small_input.bai --ref_genome /home/dnanexus/in/ref.fa.gz --fai /home/dnanexus/in/ref.fa.fai --prefix manta --manta && ls -sh /home/dnanexus/out - script: docker run -v /home/dnanexus/in:/home/dnanexus/in -v /home/dnanexus/out:/home/dnanexus/out dnanexus/parliament2:$TAG --bam /home/dnanexus/in/small_input.bam --bai /home/dnanexus/in/small_input.bai --ref_genome /home/dnanexus/in/ref.fa.gz --fai /home/dnanexus/in/ref.fa.fai --prefix svviz --breakdancer --svviz && ls -sh /home/dnanexus/out - - script: docker run -v /home/dnanexus/in:/home/dnanexus/in -v /home/dnanexus/out:/home/dnanexus/out dnanexus/parliament2:$TAG --bam /home/dnanexus/in/small_input.bam --bai /home/dnanexus/in/small_input.bai --ref_genome /home/dnanexus/in/ref.fa.gz --fai /home/dnanexus/in/ref.fa.fai --prefix full --breakdancer --breakseq --cnvnator --delly_deletion --delly_duplication --delly_insertion --delly_inversion --lumpy --manta && ls -sh /home/dnanexus/out + - script: docker run -v /home/dnanexus/in:/home/dnanexus/in -v /home/dnanexus/out:/home/dnanexus/out dnanexus/parliament2:$TAG --bam /home/dnanexus/in/small_input.bam --bai /home/dnanexus/in/small_input.bai --ref_genome /home/dnanexus/in/ref.fa.gz --fai /home/dnanexus/in/ref.fa.fai --prefix full --breakdancer --genotype && ls -sh /home/dnanexus/out + deploy: provider: script @@ -42,4 +43,4 @@ branches: language: python python: - - "2.7.13" \ No newline at end of file + - "2.7.13" diff --git a/Dockerfile b/Dockerfile index 94f7e771..6daa7346 100644 --- a/Dockerfile +++ b/Dockerfile @@ -96,6 +96,12 @@ RUN mkdir -p /home/dnanexus/in /home/dnanexus/out WORKDIR /home/dnanexus COPY parliament2.py . COPY parliament2.sh . +COPY svtyper_env.yml . + +RUN conda create -y --name svviz_env svviz +# We have to use a slightly different method for +# svtyper as it installs software directly from git +RUN conda env create --name svtyper_env --file svtyper_env.yml RUN /bin/bash -c "source /etc/profile.d/dnanexus.environment.sh" diff --git a/hooks/pre_build b/hooks/pre_build new file mode 100644 index 00000000..1644d08f --- /dev/null +++ b/hooks/pre_build @@ -0,0 +1,5 @@ +#!/bin/bash + +echo "Compressing the resources directory" + +tar -czf resources.tar.gz resources/ diff --git a/parliament2.sh b/parliament2.sh index 5e8952ef..5084d7f3 100644 --- a/parliament2.sh +++ b/parliament2.sh @@ -75,11 +75,11 @@ fi ref_genome=$(python /home/dnanexus/get_reference.py) lumpy_exclude_string="" if [[ "${ref_genome}" == "b37" ]]; then - lumpy_exclude_string="-x b37.bed" + lumpy_exclude_string="-x /home/dnanexus/b37.bed" elif [[ "$ref_genome" == "hg19" ]]; then - lumpy_exclude_string="-x hg19.bed" + lumpy_exclude_string="-x /home/dnanexus/hg19.bed" else - lumpy_exclude_string="-x hg38.bed" + lumpy_exclude_string="-x /home/dnanexus/hg38.bed" fi export lumpy_scripts="/home/dnanexus/lumpy-sv/scripts" @@ -129,8 +129,8 @@ else touch /home/dnanexus/in/done.txt fi -ln -s /home/dnanexus/in/input.bam /home/dnanexus/input.bam -ln -s /home/dnanexus/in/input.bam.bai /home/dnanexus/input.bam.bai +ln -s /home/dnanexus/in/input.bam +ln -s /home/dnanexus/in/input.bam.bai wait @@ -151,7 +151,7 @@ if [[ "${run_breakseq}" == "True" ]]; then mkdir -p /home/dnanexus/out/log_files/breakseq_logs/ bplib="/breakseq2_bplib_20150129/breakseq2_bplib_20150129.gff" work="breakseq2" - timeout 6h ./breakseq2-2.2/scripts/run_breakseq2.py --reference ref.fa \ + timeout 6h /home/dnanexus/breakseq2-2.2/scripts/run_breakseq2.py --reference ref.fa \ --bams input.bam --work "${work}" \ --bwa /usr/local/bin/bwa --samtools /usr/local/bin/samtools \ --bplib_gff "${bplib}" \ @@ -261,7 +261,7 @@ if [[ "${run_cnvnator}" == "True" ]] || [[ "${run_delly}" == "True" ]] || [[ "${ if [[ "${run_lumpy}" == "True" ]]; then echo "Running Lumpy for contig ${contig}" - timeout 6h ./lumpy-sv/bin/lumpyexpress -B chr."${count}".bam -o lumpy."${count}".vcf ${lumpy_exclude_string} -k 1> /home/dnanexus/out/log_files/lumpy_logs/"${prefix}".lumpy."${count}".stdout.log 2> /home/dnanexus/out/log_files/lumpy_logs/"${prefix}".lumpy."${count}".stderr.log & + timeout 6h /home/dnanexus/lumpy-sv/bin/lumpyexpress -B chr."${count}".bam -o lumpy."${count}".vcf ${lumpy_exclude_string} -k 1> /home/dnanexus/out/log_files/lumpy_logs/"${prefix}".lumpy."${count}".stdout.log 2> /home/dnanexus/out/log_files/lumpy_logs/"${prefix}".lumpy."${count}".stderr.log & lumpy_merge_command="$lumpy_merge_command lumpy.$count.vcf" fi fi @@ -274,11 +274,6 @@ fi wait -# Only install SVTyper if necessary -if [[ "${run_genotype_candidates}" == "True" ]]; then - pip install git+https://github.com/hall-lab/svtyper.git -q & -fi - echo "Converting results to VCF format" mkdir -p /home/dnanexus/out/sv_caller_results/ @@ -346,6 +341,19 @@ fi) & (if [[ "${run_breakseq}" == "True" ]]; then echo "Convert Breakseq results to VCF format" + if [[ ! -f breakseq2/breakseq_genotyped.gff && ! -f breakseq2/breakseq.vcf.gz && ! -f breakseq2/final.bam ]]; then + echo "No outputs of Breakseq found. Continuing." + else + mv breakseq2/breakseq.vcf.gz . + gunzip breakseq.vcf.gz + + cp breakseq2/breakseq_genotyped.gff /home/dnanexus/out/sv_caller_results/"${prefix}".breakseq.gff + cp breakseq.vcf /home/dnanexus/out/sv_caller_results/"${prefix}".breakseq.vcf + cp breakseq2/final.bam /home/dnanexus/out/sv_caller_results/"${prefix}".breakseq.bam + fi + + # Do the log files after we copy the output so that the + # cd /home/dnanexus command doesn't spoil singularity if [[ -z $(find "${work}" -name "*.log") ]]; then echo "No Breakseq log files found." else @@ -356,16 +364,7 @@ fi) & cd /home/dnanexus || return fi - if [[ ! -f breakseq2/breakseq_genotyped.gff && ! -f breakseq2/breakseq.vcf.gz && ! -f breakseq2/final.bam ]]; then - echo "No outputs of Breakseq found. Continuing." - else - mv breakseq2/breakseq.vcf.gz . - gunzip breakseq.vcf.gz - cp breakseq2/breakseq_genotyped.gff /home/dnanexus/out/sv_caller_results/"${prefix}".breakseq.gff - cp breakseq.vcf /home/dnanexus/out/sv_caller_results/"${prefix}".breakseq.vcf - cp breakseq2/final.bam /home/dnanexus/out/sv_caller_results/"${prefix}".breakseq.bam - fi fi) & (if [[ "${run_delly_deletion}" == "True" ]]; then @@ -428,24 +427,25 @@ set +e # Run SVtyper and SVviz if [[ "${run_genotype_candidates}" == "True" ]]; then - echo "Running SVTyper" - # SVviz and BreakSeq have mutually exclusive versions of pysam required, so - # SVviz is only installed later and if necessary - if [[ "${run_svviz}" == "True" ]]; then - pip install svviz -q & - fi + # Only install SVTyper if necessary + #pip install git+https://github.com/hall-lab/svtyper.git -q & + source /miniconda/etc/profile.d/conda.sh + conda activate svtyper_env + + + echo "Running SVTyper" mkdir -p /home/dnanexus/out/svtyped_vcfs/ i=0 # Breakdancer if [[ "${run_breakdancer}" == "True" ]]; then echo "Running SVTyper on Breakdancer outputs" - mkdir /home/dnanexus/svtype_breakdancer - if [[ -f /home/dnanexus/breakdancer.vcf ]]; then - bash ./parallelize_svtyper.sh /home/dnanexus/breakdancer.vcf svtype_breakdancer /home/dnanexus/"${prefix}".breakdancer.svtyped.vcf input.bam + mkdir svtype_breakdancer + if [[ -f breakdancer.vcf ]]; then + bash /home/dnanexus/parallelize_svtyper.sh breakdancer.vcf svtype_breakdancer "${prefix}".breakdancer.svtyped.vcf input.bam - sed -i 's/SAMPLE/breakdancer/g' /home/dnanexus/"${prefix}".breakdancer.svtyped.vcf + sed -i 's/SAMPLE/breakdancer/g' "${prefix}".breakdancer.svtyped.vcf else "No Breakdancer VCF file found. Continuing." fi @@ -454,9 +454,9 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then # Breakseq if [[ "${run_breakseq}" == "True" ]]; then echo "Running SVTyper on BreakSeq outputs" - mkdir /home/dnanexus/svtype_breakseq - if [[ -f /home/dnanexus/breakseq.vcf ]]; then - bash ./parallelize_svtyper.sh /home/dnanexus/breakseq.vcf svtype_breakseq /home/dnanexus/"${prefix}".breakseq.svtyped.vcf input.bam + mkdir svtype_breakseq + if [[ -f breakseq.vcf ]]; then + bash /home/dnanexus/parallelize_svtyper.sh breakseq.vcf svtype_breakseq "${prefix}".breakseq.svtyped.vcf input.bam else echo "No BreakSeq VCF file found. Continuing." fi @@ -465,10 +465,10 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then # CNVnator if [[ "${run_cnvnator}" == "True" ]]; then echo "Running SVTyper on CNVnator outputs" - mkdir /home/dnanexus/svtype_cnvnator - if [[ -f /home/dnanexus/cnvnator.vcf ]]; then - python /get_uncalled_cnvnator.py | python /add_ciend.py 1000 > /home/dnanexus/cnvnator.ci.vcf < cnvnator.vcf - bash ./parallelize_svtyper.sh /home/dnanexus/cnvnator.vcf svtype_cnvnator "${prefix}".cnvnator.svtyped.vcf input.bam + mkdir svtype_cnvnator + if [[ -f cnvnator.vcf ]]; then + python /get_uncalled_cnvnator.py | python /add_ciend.py 1000 > cnvnator.ci.vcf < cnvnator.vcf + bash /home/dnanexus/parallelize_svtyper.sh cnvnator.vcf svtype_cnvnator "${prefix}".cnvnator.svtyped.vcf input.bam else echo "No CNVnator VCF file found. Continuing." fi @@ -481,8 +481,8 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then echo "No Delly VCF file found. Continuing." else for item in delly*vcf; do - mkdir /home/dnanexus/svtype_delly_"${i}" - bash ./parallelize_svtyper.sh /home/dnanexus/"${item}" svtype_delly_"${i}" /home/dnanexus/delly.svtyper."${i}".vcf input.bam + mkdir svtype_delly_"${i}" + bash /home/dnanexus/parallelize_svtyper.sh "${item}" svtype_delly_"${i}" delly.svtyper."${i}".vcf input.bam i=$((i + 1)) done @@ -497,9 +497,9 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then # Lumpy if [[ "${run_lumpy}" == "True" ]]; then echo "Running SVTyper on Lumpy outputs" - mkdir /home/dnanexus/svtype_lumpy - if [[ -f /home/dnanexus/lumpy.vcf ]]; then - bash ./parallelize_svtyper.sh /home/dnanexus/lumpy.vcf svtype_lumpy /home/dnanexus/"${prefix}".lumpy.svtyped.vcf input.bam + mkdir svtype_lumpy + if [[ -f lumpy.vcf ]]; then + bash /home/dnanexus/parallelize_svtyper.sh lumpy.vcf svtype_lumpy "${prefix}".lumpy.svtyped.vcf input.bam else echo "No Lumpy VCF file found. Continuing." fi @@ -509,7 +509,7 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then if [[ "${run_manta}" == "True" ]]; then echo "Running SVTyper on Manta outputs" if [[ -f diploidSV.vcf ]]; then - mv diploidSV.vcf /home/dnanexus/"${prefix}".manta.svtyped.vcf + mv diploidSV.vcf "${prefix}".manta.svtyped.vcf else echo "No Manta VCF file found. Continuing." fi @@ -517,6 +517,9 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then wait + # deactivate svtyper + source deactivate + # Prepare inputs for SURVIVOR echo "Preparing inputs for SURVIVOR" for item in *svtyped.vcf; do @@ -543,9 +546,16 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then # Run svviz if [[ "${run_svviz}" == "True" ]]; then + + # SVviz and BreakSeq have mutually exclusive versions of pysam required, so + # SVviz is only installed later and if necessary + conda activate svviz_env + #pip install svviz -q & + + echo "Running svviz" mkdir -p /home/dnanexus/out/log_files/svviz_logs/ - mkdir /home/dnanexus/svviz_outputs + mkdir svviz_outputs grep \# survivor_sorted.vcf > header.txt @@ -576,9 +586,11 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then threads="$(nproc)" threads=$((threads / 2)) - parallel --memfree 5G --retries 2 --verbose -a commands.txt eval 1>/home/dnanexus/out/log_files/svviz_logs/svviz.stdout.log 2>/home/dnanexus/out/log_files/svviz_logs/svviz.stderr.log + # removing the memfree option as it doesn't seem to exist in Ubuntu 14.04 + #parallel --memfree 5G --retries 2 --verbose -a commands.txt eval 1>/home/dnanexus/out/log_files/svviz_logs/svviz.stdout.log 2>/home/dnanexus/out/log_files/svviz_logs/svviz.stderr.log + parallel --retries 2 --verbose -a commands.txt eval 1>/home/dnanexus/out/log_files/svviz_logs/svviz.stdout.log 2>/home/dnanexus/out/log_files/svviz_logs/svviz.stderr.log - cd /home/dnanexus/svviz_outputs && tar -czf /home/dnanexus/out/"${prefix}".svviz_outputs.tar.gz . + cd svviz_outputs && tar -czf /home/dnanexus/out/"${prefix}".svviz_outputs.tar.gz . fi fi fi diff --git a/resources/home/dnanexus/parallelize_svtyper.sh b/resources/home/dnanexus/parallelize_svtyper.sh index 8bbd7296..cd148584 100755 --- a/resources/home/dnanexus/parallelize_svtyper.sh +++ b/resources/home/dnanexus/parallelize_svtyper.sh @@ -21,9 +21,11 @@ for item in $directory*; do echo "svtyper -B $input_bam -i $directory/$i >> $directory/$i" >> $output.cmds done -parallel --memfree 5G --retries 2 --verbose -a $output.cmds eval 2> /dev/null +# We don't have the memfree option is the Ubuntu 14.04 version of parallel +#parallel --memfree 5G --retries 2 --verbose -a $output.cmds eval 2> /dev/null +parallel --retries 2 --verbose -a $output.cmds eval 2> /dev/null grep \# $input > $output for item in $directory/*; do grep -v \# $item >> $output -done \ No newline at end of file +done diff --git a/resources/usr/bin/runManta b/resources/usr/bin/runManta index ad0e2cae..8f4aeae0 100755 --- a/resources/usr/bin/runManta +++ b/resources/usr/bin/runManta @@ -7,4 +7,4 @@ done < contigs python /miniconda/bin/configManta.py --referenceFasta ref.fa --normalBam input.bam --runDir manta $region_string -python /home/dnanexus/manta/runWorkflow.py -m local -j 16 \ No newline at end of file +python ./manta/runWorkflow.py -m local -j 16 diff --git a/svtyper_env.yml b/svtyper_env.yml new file mode 100644 index 00000000..35b2bcc2 --- /dev/null +++ b/svtyper_env.yml @@ -0,0 +1,6 @@ +name: svtyper_env +channels: +dependencies: + - python=2.7 + - pip: + - "--editable=git+https://github.com/hall-lab/svtyper.git#egg=svtyper_git"