Skip to content
This repository has been archived by the owner on Nov 6, 2020. It is now read-only.

Commit

Permalink
Changes to get parliament2 working with Singularity on HPC systems (#46)
Browse files Browse the repository at this point in the history
* Adding Singularity file
spython recipe DockerFile > Singularity

* Trying to create resources.tar.gz file beforehand

* Changing tar options

* tar xtract

* try different version of chmod

* Setting entrypoint

* Moving back to runscript

* Fixing paths for singularity

* Minor change to maintainer flag

* Fix manta path

* need to learn how to trigger a singularity build.

* Changes to paths for survivor and svtyper

* trying to run svviz

* typo in svviz

* trigger

* Removing memfree option from parallel

* Conda environment for svtyper

* removing double sourcing of conda.sh

* Creating environment for svtyper

* changing dependency to python 2.7

* making Singularity changes to Dockerfile

* build hook to create resources.tar.gz on dockerhub

* Removing Singularity build file as now we are able to build with docker
and run with singularity

* Smaller travis test at the end

* Changing test to just do Breakdancer and SVTyper
  • Loading branch information
sameerd authored and Samantha Zarate committed Feb 15, 2019
1 parent e134fcd commit 37d6306
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 52 deletions.
5 changes: 3 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ jobs:
- script: docker run -v /home/dnanexus/in:/home/dnanexus/in -v /home/dnanexus/out:/home/dnanexus/out dnanexus/parliament2:$TAG --bam /home/dnanexus/in/small_input.bam --bai /home/dnanexus/in/small_input.bai --ref_genome /home/dnanexus/in/ref.fa.gz --fai /home/dnanexus/in/ref.fa.fai --prefix lumpy --lumpy && ls -sh /home/dnanexus/out
- script: docker run -v /home/dnanexus/in:/home/dnanexus/in -v /home/dnanexus/out:/home/dnanexus/out dnanexus/parliament2:$TAG --bam /home/dnanexus/in/small_input.bam --bai /home/dnanexus/in/small_input.bai --ref_genome /home/dnanexus/in/ref.fa.gz --fai /home/dnanexus/in/ref.fa.fai --prefix manta --manta && ls -sh /home/dnanexus/out
- script: docker run -v /home/dnanexus/in:/home/dnanexus/in -v /home/dnanexus/out:/home/dnanexus/out dnanexus/parliament2:$TAG --bam /home/dnanexus/in/small_input.bam --bai /home/dnanexus/in/small_input.bai --ref_genome /home/dnanexus/in/ref.fa.gz --fai /home/dnanexus/in/ref.fa.fai --prefix svviz --breakdancer --svviz && ls -sh /home/dnanexus/out
- script: docker run -v /home/dnanexus/in:/home/dnanexus/in -v /home/dnanexus/out:/home/dnanexus/out dnanexus/parliament2:$TAG --bam /home/dnanexus/in/small_input.bam --bai /home/dnanexus/in/small_input.bai --ref_genome /home/dnanexus/in/ref.fa.gz --fai /home/dnanexus/in/ref.fa.fai --prefix full --breakdancer --breakseq --cnvnator --delly_deletion --delly_duplication --delly_insertion --delly_inversion --lumpy --manta && ls -sh /home/dnanexus/out
- script: docker run -v /home/dnanexus/in:/home/dnanexus/in -v /home/dnanexus/out:/home/dnanexus/out dnanexus/parliament2:$TAG --bam /home/dnanexus/in/small_input.bam --bai /home/dnanexus/in/small_input.bai --ref_genome /home/dnanexus/in/ref.fa.gz --fai /home/dnanexus/in/ref.fa.fai --prefix full --breakdancer --genotype && ls -sh /home/dnanexus/out


deploy:
provider: script
Expand All @@ -42,4 +43,4 @@ branches:

language: python
python:
- "2.7.13"
- "2.7.13"
6 changes: 6 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ RUN mkdir -p /home/dnanexus/in /home/dnanexus/out
WORKDIR /home/dnanexus
COPY parliament2.py .
COPY parliament2.sh .
COPY svtyper_env.yml .

RUN conda create -y --name svviz_env svviz
# We have to use a slightly different method for
# svtyper as it installs software directly from git
RUN conda env create --name svtyper_env --file svtyper_env.yml

RUN /bin/bash -c "source /etc/profile.d/dnanexus.environment.sh"

Expand Down
5 changes: 5 additions & 0 deletions hooks/pre_build
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

echo "Compressing the resources directory"

tar -czf resources.tar.gz resources/
106 changes: 59 additions & 47 deletions parliament2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ fi
ref_genome=$(python /home/dnanexus/get_reference.py)
lumpy_exclude_string=""
if [[ "${ref_genome}" == "b37" ]]; then
lumpy_exclude_string="-x b37.bed"
lumpy_exclude_string="-x /home/dnanexus/b37.bed"
elif [[ "$ref_genome" == "hg19" ]]; then
lumpy_exclude_string="-x hg19.bed"
lumpy_exclude_string="-x /home/dnanexus/hg19.bed"
else
lumpy_exclude_string="-x hg38.bed"
lumpy_exclude_string="-x /home/dnanexus/hg38.bed"
fi

export lumpy_scripts="/home/dnanexus/lumpy-sv/scripts"
Expand Down Expand Up @@ -129,8 +129,8 @@ else
touch /home/dnanexus/in/done.txt
fi

ln -s /home/dnanexus/in/input.bam /home/dnanexus/input.bam
ln -s /home/dnanexus/in/input.bam.bai /home/dnanexus/input.bam.bai
ln -s /home/dnanexus/in/input.bam
ln -s /home/dnanexus/in/input.bam.bai

wait

Expand All @@ -151,7 +151,7 @@ if [[ "${run_breakseq}" == "True" ]]; then
mkdir -p /home/dnanexus/out/log_files/breakseq_logs/
bplib="/breakseq2_bplib_20150129/breakseq2_bplib_20150129.gff"
work="breakseq2"
timeout 6h ./breakseq2-2.2/scripts/run_breakseq2.py --reference ref.fa \
timeout 6h /home/dnanexus/breakseq2-2.2/scripts/run_breakseq2.py --reference ref.fa \
--bams input.bam --work "${work}" \
--bwa /usr/local/bin/bwa --samtools /usr/local/bin/samtools \
--bplib_gff "${bplib}" \
Expand Down Expand Up @@ -261,7 +261,7 @@ if [[ "${run_cnvnator}" == "True" ]] || [[ "${run_delly}" == "True" ]] || [[ "${

if [[ "${run_lumpy}" == "True" ]]; then
echo "Running Lumpy for contig ${contig}"
timeout 6h ./lumpy-sv/bin/lumpyexpress -B chr."${count}".bam -o lumpy."${count}".vcf ${lumpy_exclude_string} -k 1> /home/dnanexus/out/log_files/lumpy_logs/"${prefix}".lumpy."${count}".stdout.log 2> /home/dnanexus/out/log_files/lumpy_logs/"${prefix}".lumpy."${count}".stderr.log &
timeout 6h /home/dnanexus/lumpy-sv/bin/lumpyexpress -B chr."${count}".bam -o lumpy."${count}".vcf ${lumpy_exclude_string} -k 1> /home/dnanexus/out/log_files/lumpy_logs/"${prefix}".lumpy."${count}".stdout.log 2> /home/dnanexus/out/log_files/lumpy_logs/"${prefix}".lumpy."${count}".stderr.log &
lumpy_merge_command="$lumpy_merge_command lumpy.$count.vcf"
fi
fi
Expand All @@ -274,11 +274,6 @@ fi

wait

# Only install SVTyper if necessary
if [[ "${run_genotype_candidates}" == "True" ]]; then
pip install git+https://github.com/hall-lab/svtyper.git -q &
fi

echo "Converting results to VCF format"
mkdir -p /home/dnanexus/out/sv_caller_results/

Expand Down Expand Up @@ -346,6 +341,19 @@ fi) &

(if [[ "${run_breakseq}" == "True" ]]; then
echo "Convert Breakseq results to VCF format"
if [[ ! -f breakseq2/breakseq_genotyped.gff && ! -f breakseq2/breakseq.vcf.gz && ! -f breakseq2/final.bam ]]; then
echo "No outputs of Breakseq found. Continuing."
else
mv breakseq2/breakseq.vcf.gz .
gunzip breakseq.vcf.gz

cp breakseq2/breakseq_genotyped.gff /home/dnanexus/out/sv_caller_results/"${prefix}".breakseq.gff
cp breakseq.vcf /home/dnanexus/out/sv_caller_results/"${prefix}".breakseq.vcf
cp breakseq2/final.bam /home/dnanexus/out/sv_caller_results/"${prefix}".breakseq.bam
fi

# Do the log files after we copy the output so that the
# cd /home/dnanexus command doesn't spoil singularity
if [[ -z $(find "${work}" -name "*.log") ]]; then
echo "No Breakseq log files found."
else
Expand All @@ -356,16 +364,7 @@ fi) &
cd /home/dnanexus || return
fi

if [[ ! -f breakseq2/breakseq_genotyped.gff && ! -f breakseq2/breakseq.vcf.gz && ! -f breakseq2/final.bam ]]; then
echo "No outputs of Breakseq found. Continuing."
else
mv breakseq2/breakseq.vcf.gz .
gunzip breakseq.vcf.gz

cp breakseq2/breakseq_genotyped.gff /home/dnanexus/out/sv_caller_results/"${prefix}".breakseq.gff
cp breakseq.vcf /home/dnanexus/out/sv_caller_results/"${prefix}".breakseq.vcf
cp breakseq2/final.bam /home/dnanexus/out/sv_caller_results/"${prefix}".breakseq.bam
fi
fi) &

(if [[ "${run_delly_deletion}" == "True" ]]; then
Expand Down Expand Up @@ -428,24 +427,25 @@ set +e

# Run SVtyper and SVviz
if [[ "${run_genotype_candidates}" == "True" ]]; then
echo "Running SVTyper"
# SVviz and BreakSeq have mutually exclusive versions of pysam required, so
# SVviz is only installed later and if necessary
if [[ "${run_svviz}" == "True" ]]; then
pip install svviz -q &
fi

# Only install SVTyper if necessary
#pip install git+https://github.com/hall-lab/svtyper.git -q &
source /miniconda/etc/profile.d/conda.sh
conda activate svtyper_env


echo "Running SVTyper"
mkdir -p /home/dnanexus/out/svtyped_vcfs/

i=0
# Breakdancer
if [[ "${run_breakdancer}" == "True" ]]; then
echo "Running SVTyper on Breakdancer outputs"
mkdir /home/dnanexus/svtype_breakdancer
if [[ -f /home/dnanexus/breakdancer.vcf ]]; then
bash ./parallelize_svtyper.sh /home/dnanexus/breakdancer.vcf svtype_breakdancer /home/dnanexus/"${prefix}".breakdancer.svtyped.vcf input.bam
mkdir svtype_breakdancer
if [[ -f breakdancer.vcf ]]; then
bash /home/dnanexus/parallelize_svtyper.sh breakdancer.vcf svtype_breakdancer "${prefix}".breakdancer.svtyped.vcf input.bam

sed -i 's/SAMPLE/breakdancer/g' /home/dnanexus/"${prefix}".breakdancer.svtyped.vcf
sed -i 's/SAMPLE/breakdancer/g' "${prefix}".breakdancer.svtyped.vcf
else
"No Breakdancer VCF file found. Continuing."
fi
Expand All @@ -454,9 +454,9 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then
# Breakseq
if [[ "${run_breakseq}" == "True" ]]; then
echo "Running SVTyper on BreakSeq outputs"
mkdir /home/dnanexus/svtype_breakseq
if [[ -f /home/dnanexus/breakseq.vcf ]]; then
bash ./parallelize_svtyper.sh /home/dnanexus/breakseq.vcf svtype_breakseq /home/dnanexus/"${prefix}".breakseq.svtyped.vcf input.bam
mkdir svtype_breakseq
if [[ -f breakseq.vcf ]]; then
bash /home/dnanexus/parallelize_svtyper.sh breakseq.vcf svtype_breakseq "${prefix}".breakseq.svtyped.vcf input.bam
else
echo "No BreakSeq VCF file found. Continuing."
fi
Expand All @@ -465,10 +465,10 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then
# CNVnator
if [[ "${run_cnvnator}" == "True" ]]; then
echo "Running SVTyper on CNVnator outputs"
mkdir /home/dnanexus/svtype_cnvnator
if [[ -f /home/dnanexus/cnvnator.vcf ]]; then
python /get_uncalled_cnvnator.py | python /add_ciend.py 1000 > /home/dnanexus/cnvnator.ci.vcf < cnvnator.vcf
bash ./parallelize_svtyper.sh /home/dnanexus/cnvnator.vcf svtype_cnvnator "${prefix}".cnvnator.svtyped.vcf input.bam
mkdir svtype_cnvnator
if [[ -f cnvnator.vcf ]]; then
python /get_uncalled_cnvnator.py | python /add_ciend.py 1000 > cnvnator.ci.vcf < cnvnator.vcf
bash /home/dnanexus/parallelize_svtyper.sh cnvnator.vcf svtype_cnvnator "${prefix}".cnvnator.svtyped.vcf input.bam
else
echo "No CNVnator VCF file found. Continuing."
fi
Expand All @@ -481,8 +481,8 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then
echo "No Delly VCF file found. Continuing."
else
for item in delly*vcf; do
mkdir /home/dnanexus/svtype_delly_"${i}"
bash ./parallelize_svtyper.sh /home/dnanexus/"${item}" svtype_delly_"${i}" /home/dnanexus/delly.svtyper."${i}".vcf input.bam
mkdir svtype_delly_"${i}"
bash /home/dnanexus/parallelize_svtyper.sh "${item}" svtype_delly_"${i}" delly.svtyper."${i}".vcf input.bam
i=$((i + 1))
done

Expand All @@ -497,9 +497,9 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then
# Lumpy
if [[ "${run_lumpy}" == "True" ]]; then
echo "Running SVTyper on Lumpy outputs"
mkdir /home/dnanexus/svtype_lumpy
if [[ -f /home/dnanexus/lumpy.vcf ]]; then
bash ./parallelize_svtyper.sh /home/dnanexus/lumpy.vcf svtype_lumpy /home/dnanexus/"${prefix}".lumpy.svtyped.vcf input.bam
mkdir svtype_lumpy
if [[ -f lumpy.vcf ]]; then
bash /home/dnanexus/parallelize_svtyper.sh lumpy.vcf svtype_lumpy "${prefix}".lumpy.svtyped.vcf input.bam
else
echo "No Lumpy VCF file found. Continuing."
fi
Expand All @@ -509,14 +509,17 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then
if [[ "${run_manta}" == "True" ]]; then
echo "Running SVTyper on Manta outputs"
if [[ -f diploidSV.vcf ]]; then
mv diploidSV.vcf /home/dnanexus/"${prefix}".manta.svtyped.vcf
mv diploidSV.vcf "${prefix}".manta.svtyped.vcf
else
echo "No Manta VCF file found. Continuing."
fi
fi

wait

# deactivate svtyper
source deactivate

# Prepare inputs for SURVIVOR
echo "Preparing inputs for SURVIVOR"
for item in *svtyped.vcf; do
Expand All @@ -543,9 +546,16 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then

# Run svviz
if [[ "${run_svviz}" == "True" ]]; then

# SVviz and BreakSeq have mutually exclusive versions of pysam required, so
# SVviz is only installed later and if necessary
conda activate svviz_env
#pip install svviz -q &


echo "Running svviz"
mkdir -p /home/dnanexus/out/log_files/svviz_logs/
mkdir /home/dnanexus/svviz_outputs
mkdir svviz_outputs

grep \# survivor_sorted.vcf > header.txt

Expand Down Expand Up @@ -576,9 +586,11 @@ if [[ "${run_genotype_candidates}" == "True" ]]; then

threads="$(nproc)"
threads=$((threads / 2))
parallel --memfree 5G --retries 2 --verbose -a commands.txt eval 1>/home/dnanexus/out/log_files/svviz_logs/svviz.stdout.log 2>/home/dnanexus/out/log_files/svviz_logs/svviz.stderr.log
# removing the memfree option as it doesn't seem to exist in Ubuntu 14.04
#parallel --memfree 5G --retries 2 --verbose -a commands.txt eval 1>/home/dnanexus/out/log_files/svviz_logs/svviz.stdout.log 2>/home/dnanexus/out/log_files/svviz_logs/svviz.stderr.log
parallel --retries 2 --verbose -a commands.txt eval 1>/home/dnanexus/out/log_files/svviz_logs/svviz.stdout.log 2>/home/dnanexus/out/log_files/svviz_logs/svviz.stderr.log

cd /home/dnanexus/svviz_outputs && tar -czf /home/dnanexus/out/"${prefix}".svviz_outputs.tar.gz .
cd svviz_outputs && tar -czf /home/dnanexus/out/"${prefix}".svviz_outputs.tar.gz .
fi
fi
fi
Expand Down
6 changes: 4 additions & 2 deletions resources/home/dnanexus/parallelize_svtyper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ for item in $directory*; do
echo "svtyper -B $input_bam -i $directory/$i >> $directory/$i" >> $output.cmds
done

parallel --memfree 5G --retries 2 --verbose -a $output.cmds eval 2> /dev/null
# We don't have the memfree option is the Ubuntu 14.04 version of parallel
#parallel --memfree 5G --retries 2 --verbose -a $output.cmds eval 2> /dev/null
parallel --retries 2 --verbose -a $output.cmds eval 2> /dev/null

grep \# $input > $output
for item in $directory/*; do
grep -v \# $item >> $output
done
done
2 changes: 1 addition & 1 deletion resources/usr/bin/runManta
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ done < contigs

python /miniconda/bin/configManta.py --referenceFasta ref.fa --normalBam input.bam --runDir manta $region_string

python /home/dnanexus/manta/runWorkflow.py -m local -j 16
python ./manta/runWorkflow.py -m local -j 16
6 changes: 6 additions & 0 deletions svtyper_env.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: svtyper_env
channels:
dependencies:
- python=2.7
- pip:
- "--editable=git+https://github.com/hall-lab/svtyper.git#egg=svtyper_git"

0 comments on commit 37d6306

Please sign in to comment.