Skip to content

Commit

Permalink
Merge pull request #5 from uclahs-cds/development
Browse files Browse the repository at this point in the history
call-gSV: Initial Development
  • Loading branch information
timothyjsanders authored Feb 27, 2021
2 parents bbc6dd1 + c9f750e commit 025b07b
Show file tree
Hide file tree
Showing 22 changed files with 656 additions and 95 deletions.
36 changes: 36 additions & 0 deletions docker/bcftools/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# use the ubuntu base image
FROM ubuntu:20.04

LABEL maintainer="Tim Sanders <[email protected]>"

RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
curl \
autoconf \
build-essential \
cmake \
g++ \
gfortran \
libcurl4-gnutls-dev \
hdf5-tools \
libboost-date-time-dev \
libboost-program-options-dev \
libboost-system-dev \
libboost-filesystem-dev \
libboost-iostreams-dev \
libbz2-dev \
libhdf5-dev \
libncurses-dev \
liblzma-dev \
zlib1g-dev

ADD https://github.com/samtools/bcftools/releases/download/1.11/bcftools-1.11.tar.bz2 /
RUN tar -jxf bcftools-1.11.tar.bz2 \
&& rm bcftools-1.11.tar.bz2

WORKDIR bcftools-1.11

RUN ./configure --prefix=/app/bcftools-1.11 \
&& make \
&& make install

ENV PATH /app/bcftools-1.11/bin:$PATH
54 changes: 54 additions & 0 deletions docker/delly/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# use the ubuntu base image
FROM ubuntu:20.04

LABEL maintainer="Tim Sanders <[email protected]>"

# install required packages
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
autoconf \
build-essential \
cmake \
g++ \
gfortran \
git \
libcurl4-gnutls-dev \
hdf5-tools \
libboost-date-time-dev \
libboost-program-options-dev \
libboost-system-dev \
libboost-filesystem-dev \
libboost-iostreams-dev \
libbz2-dev \
libhdf5-dev \
libncurses-dev \
liblzma-dev \
zlib1g-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# set environment
ENV BOOST_ROOT /usr

# install delly
RUN cd /opt \
&& git clone --recursive https://github.com/dellytools/delly.git --branch v0.8.6 \
&& cd /opt/delly/ \
&& make STATIC=1 all \
&& make install


# Multi-stage build
FROM alpine:3.13
RUN mkdir -p /opt/delly/bin
WORKDIR /opt/delly/bin
COPY --from=0 /opt/delly/bin/delly .

RUN apk add --update bash && rm -rf /var/cache/apk/*

# Workdir
WORKDIR /root/

# Add Delly to PATH
ENV PATH="/opt/delly/bin:${PATH}"

CMD ["/bin/sh"]
18 changes: 18 additions & 0 deletions docker/rtgtools/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM ubuntu:20.04

LABEL maintainer="Tim Sanders <[email protected]>"

RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
unzip

ADD https://github.com/RealTimeGenomics/rtg-tools/releases/download/3.11/rtg-tools-3.11-linux-x64.zip /

# RTG Tools requires you answer whether or not you want to enable automatic usage reporting (via 'rtg help')
# so this line passes "n" to that prompt after extraction
RUN unzip rtg-tools-3.11-linux-x64.zip \
&& rm rtg-tools-3.11-linux-x64.zip \
&& echo "n" | /rtg-tools-3.11/rtg help

WORKDIR rtg-tools-3.11

ENV PATH="/rtg-tools-3.11:${PATH}"
5 changes: 5 additions & 0 deletions docker/sha512/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM alpine:3.12 as builder

LABEL maintainer="Tim Sanders <[email protected]>"

RUN apk add --update bash && rm -rf /var/cache/apk/*
3 changes: 0 additions & 3 deletions docker/tool_name/Dockerfile

This file was deleted.

20 changes: 20 additions & 0 deletions docker/vcftools/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM alpine:3.12 as builder

LABEL maintainer="Tim Sanders <[email protected]>"

RUN apk --no-cache add alpine-sdk perl zlib-dev curl
RUN set -ex \
&& curl -L -O https://github.com/vcftools/vcftools/releases/download/v0.1.16/vcftools-0.1.16.tar.gz \
&& tar zxf vcftools-0.1.16.tar.gz \
&& rm vcftools-0.1.16.tar.gz \
&& cd vcftools-0.1.16 \
&& ./configure --prefix=/app/vcftools-0.1.16 \
&& make \
&& make install

FROM alpine:3.12
ENV PATH /app/vcftools-0.1.16/bin:$PATH
ENV PERL5LIB /app/vcftools-0.1.16/share/perl5/site_perl:$PERL5LIB
RUN apk add --no-cache libstdc++ perl zlib
RUN apk add --update bash && rm -rf /var/cache/apk/*
COPY --from=builder /app/ /app/
100 changes: 100 additions & 0 deletions pipeline/call-gSV.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl=2


log.info """\
======================================
C A L L - G S V N F P I P E L I N E
======================================
Boutros Lab
Current Configuration:
- input:
input_csv: ${params.input_csv}
reference_fasta: ${params.reference_fasta}
reference_fasta_index: ${params.reference_fasta_index}
reference_prefix: ${params.reference_prefix}
exclusion_file: ${params.exclusion_file}
- output:
output_dir: ${params.output_dir}
output_log_dir: ${params.output_log_dir}
temp_dir: ${params.temp_dir}
- options:
save_intermediate_files: ${params.save_intermediate_files}
run_qc: ${params.run_qc}
map_qual: ${params.map_qual}
- tools:
delly: ${params.delly_version}
bcftools: ${params.bcftools_version}
vcftools: ${params.vcftools_version}
rtgtools: ${params.rtgtools_version}
validation tool: ${params.validate_version}
sha512: ${params.sha512_version}
------------------------------------
Starting workflow...
------------------------------------
"""
.stripIndent()

include { validate_file } from './modules/validation'
include { delly_call_sv } from './modules/delly'
include { bcftools_vcf } from './modules/bcftools'
include { rtgtools_vcfstats } from './modules/rtgtools'
include { vcftools_validator } from './modules/vcftools'
include { generate_sha512 } from './modules/sha512'

input_bam_ch = Channel
.fromPath(params.input_csv, checkIfExists:true)
.splitCsv(header:true)
.map{ row -> tuple(
row.patient,
row.sample,
row.input_bam,
"${row.input_bam}.bai"
)
}

if (!params.reference_fasta) {
// error out - must provide a reference FASTA file
error "***Error: You must specify a reference FASTA file***"
}

if (!params.exclusion_file) {
// error out - must provide exclusion file
error "*** Error: You must provide an exclusion file***"
}

if (params.reference_fasta_index) {
reference_fasta_index = params.reference_fasta_index
}
else {
reference_fasta_index = "${params.reference_fasta}.fai"
}

// Create channel for validation
validation_channel = Channel
.fromPath(params.input_csv, checkIfExists:true)
.splitCsv(header:true)
.map{ row -> [
row.input_bam,
params.reference_fasta
]
}
.flatten()

workflow {
validate_file(validation_channel)
delly_call_sv(input_bam_ch, params.reference_fasta, reference_fasta_index, params.exclusion_file)
bcftools_vcf(delly_call_sv.out.bcf_sv_file, delly_call_sv.out.bam_sample_name)
if (params.run_qc) {
rtgtools_vcfstats(bcftools_vcf.out.vcf_sv_file, delly_call_sv.out.bam_sample_name)
vcftools_validator(bcftools_vcf.out.vcf_sv_file, delly_call_sv.out.bam_sample_name)
}
generate_sha512(delly_call_sv.out.bcf_sv_file.mix(bcftools_vcf.out.vcf_sv_file))
}
10 changes: 10 additions & 0 deletions pipeline/config/execute.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
process {
withName: validate_file {
cpus = 1
memory = 1.GB
}
withName: delly_call_sv {
cpus = 63
memory = 940.GB
}
}
10 changes: 10 additions & 0 deletions pipeline/config/lowmem.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
process {
withName: validate_file {
cpus = 1
memory = 1.GB
}
withName: delly_call_sv {
cpus = 1
memory = 3.GB
}
}
111 changes: 111 additions & 0 deletions pipeline/config/methods.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
class log_output_dir {
static def check_permissions(path) {
def filePath = new File(path)

if (filePath.exists()) {
if (filePath.canWrite()) {
return
}
throw new Exception("${path} is not writable")
}

// Attempts to create directory if the path does not exist
if (!filePath.mkdirs()) {
throw new Exception("${path} does not exist and could not create")
}
}
}

methods {
set_log_output_dir = {

def patient
def sample

// assumes that patient and samples name are in the pipeline.config
def reader = new FileReader(params.input_csv)
reader.splitEachLine(',') { parts -> [patient = parts[0], sample = parts[1]] }

def date = new Date().format('yyyyMMdd-HHmmss')
if (params.sge_scheduler) {
params.avere_prefix = '/data/data'
} else {
params.avere_prefix = '/hot/data'
}

if (params.blcds_registered_dataset == true) {
if ("${params.dataset_id.length()}" != 11) {
throw new Exception("Dataset id must be eleven characters long")
}
def disease = "${params.dataset_id.substring(0,4)}"
params.output_log_dir = "${params.avere_prefix}/$disease/${params.dataset_id}/${patient}/${sample}/DNA/WGS/aligned/${params.reference_prefix}/log/call-gSV/$date"
params.disease = "${disease}"
} else {
params.output_log_dir = "${params.output_dir}/$date/log/"
params.disease = null
}
params.patient = "${patient}"
params.sample = "${sample}"
params.date = "${date}"
}

// Process specific scope
set_process = {
// Monitor process jobs with local (not slurm) executor
process.executor = "local"
}

// Location of Nextflow temp directories
set_env = {
workDir = params.temp_dir
NXF_WORK = params.temp_dir
NXF_TEMP = params.temp_dir
NXF_HOME = params.temp_dir
}

// Pipeline monitoring and metric files
set_timeline = {
timeline.enabled = true
timeline.file = "${params.output_log_dir}/timeline.html"
}
set_trace = {
trace.enabled = true
trace.file = "${params.output_log_dir}/trace.txt"
}
set_report = {
report.enabled = true
report.file = "${params.output_log_dir}/report.html"
}

// Set up env, timeline, trace, and report above.
setup = {
methods.set_log_output_dir()
log_output_dir.check_permissions(params.output_log_dir)
methods.set_process()
methods.set_env()
methods.set_timeline()
methods.set_trace()
methods.set_report()
}
}


methods.setup()

params {
// Pipeline tool versions
delly_version = '0.8.6'
bcftools_version = '1.11'
vcftools_version = '0.1.16'
rtgtools_version = '3.11'
validate_version = '1.0.0'
sha512_version = '0.1'
}

// Enable docker
docker {
enabled = true
sudo = params.sge_scheduler // Set to true if run on SGE
runOptions = "-u \$(id -u):\$(id -g)"
runOptions = "\$(for i in `id --real --groups`; do echo -n \"--group-add=\$i \"; done)"
}
10 changes: 10 additions & 0 deletions pipeline/config/midmem.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
process {
withName: validate_file {
cpus = 1
memory = 1.GB
}
withName: delly_call_sv {
cpus = 71
memory = 130.GB
}
}
Loading

0 comments on commit 025b07b

Please sign in to comment.