diff --git a/.circ b/.circ new file mode 100644 index 0000000..e201850 --- /dev/null +++ b/.circ @@ -0,0 +1,42 @@ +# add project folder to PATH +projectDir=$(dirname $(realpath $_)) +export PATH=${projectDir}:$PATH + +NXF_OPTIONS=$(nextflow help run | egrep -o '\-\w+\.?' | sort -u) +PIPE_OPTIONS=$(egrep log chipseq-pipeline.nf | egrep -o '\-{2}[^ ]+ ') + +_ci() { + local cur prev + + cur=${COMP_WORDS[COMP_CWORD]} + prev=${COMP_WORDS[COMP_CWORD-1]} + + case ${COMP_CWORD} in + 1) + COMPREPLY=($(compgen -W "run validate cleanup" ${cur})) + ;; + [2-9]|[1-9][0-9]) + case ${COMP_WORDS[1]} in + run) + case "${cur}" in + -*) + COMPREPLY=($(compgen -o default -W "$NXF_OPTIONS $PIPE_OPTIONS" -- ${cur})) + ;; + *) + COMPREPLY=($(compgen -o default)) + ;; + esac + ;; + validate) + COMPREPLY=($(compgen -f ${cur})) + ;; + esac + ;; + *) + COMPREPLY=() + ;; + esac +} + +# add command autocompletion +complete -o nospace -F _ci ci diff --git a/CHANGELOG.md b/CHANGELOG.md index 769a7fb..7eafbc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # ChIP-nf Changelog +## Version 0.2.0 + +- Update groupTuple for merging and fix input sorting for Zerone - resolves #3 +- Add paramter to set MACS2 temp dir and set default to process folder +- Use global fragment length from cli option when not shifting - close #2 +- Fix issue with samples using the same control +- Add more test data +- Update pipeline with Zerone process and change config and readme + ## Version 0.1.0 First version diff --git a/README.adoc b/README.adoc index fb8e37e..6572fa0 100644 --- a/README.adoc +++ b/README.adoc @@ -4,7 +4,7 @@ :macs2-outfiles: https://github.com/taoliu/MACS#output-files :pvalue: pass:q[[red]#___-log_10(P)___#] :circle-shield: https://circleci.com/gh/guigolab/chip-nf.svg?style=shield -:nf-shield: https://img.shields.io/badge/nextflow-%E2%89%A50.17.0-blue.svg +:nf-shield: https://img.shields.io/badge/nextflow-%E2%89%A50.23.1-blue.svg image:{nf-shield}["Nextflow", link="https://nextflow.io", window="_blank"] image:{circle-shield}["CircleCI status", link="https://circleci.com/gh/guigolab/chip-nf", window="_blank"] diff --git a/chipseq-pipeline.nf b/chipseq-pipeline.nf index 163be24..427224b 100755 --- a/chipseq-pipeline.nf +++ b/chipseq-pipeline.nf @@ -1,17 +1,40 @@ -params.dbFile = 'chipseq-pipeline.db' -params.genome = 'data/genome.fa' -params.genomeIndex = '' -params.genomeSize = 'hs' -params.fragmentLength = 200 +// Define defaults +def defaults = [ + shift: false, + mismatches: 2, + multimaps: 10, + rescale: false, + genomeSize: 'hs', + fragmentLength: 200, + minMatchedBases: 0.8, + qualityThreshold: 26, + zeroneMinConfidence: 0, + removeDuplicates: false, + replicatePattern: '.[12]', + dbFile: 'chipseq-pipeline.db', + tmpDirMACS2: '.', +] + +// params for test run +params.index = "${baseDir}/data/index.tsv" +params.genome = "${baseDir}/data/genome.fa" + +// params defaults params.help = false -params.index = 'data/index.tsv' -params.minMatchedBases = 0.8 -params.mismatches = 2 -params.multimaps = 10 -params.qualityThreshold = 26 -params.rescale = false -params.removeDuplicates = false -params.shift = false +params.genomeIndex = '' +params.shift = defaults.shift +params.dbFile = defaults.dbFile +params.rescale = defaults.rescale +params.multimaps = defaults.multimaps +params.genomeSize = defaults.genomeSize +params.mismatches = defaults.mismatches +params.fragmentLength = 0 +params.minMatchedBases = defaults.minMatchedBases +params.qualityThreshold = defaults.qualityThreshold +params.removeDuplicates = defaults.removeDuplicates +params.replicatePattern = defaults.replicatePattern +params.zeroneMinConfidence = defaults.zeroneMinConfidence +params.tmpDirMACS2 = defaults.tmpDirMACS2 //print usage if (params.help) { @@ -27,18 +50,23 @@ if (params.help) { log.info ' --help Show this message and exit.' log.info ' --index TSV_FILE Tab separted file containing information about the data.' log.info ' --genome GENOME_FILE Reference genome file.' - log.info ' --genome-index GENOME_INDEX_ FILE Reference genome index file.' + log.info ' --genome-index GENOME_INDEX_FILE Reference genome index file.' log.info ' --genome-size GENOME_SIZE Reference genome size for MACS2 callpeaks. Must be one of' - log.info ' MACS2 precomputed sizes: hs, mm, dm, ce. (Default: hs)' - log.info ' --mismatches MISMATCHES Sets the maximum number/percentage of mismatches allowed for a read (Default: 2).' - log.info ' --multimaps MULTIMAPS Sets the maximum number of mappings allowed for a read (Default: 10).' - log.info ' --min-matched-bases BASES Sets the minimum number/percentage of bases that have to match with the reference (Default: 0.80).' - log.info ' --quality-threshold THRESHOLD Sets the sequence quality threshold for a base to be considered as low-quality (Default: 26).' - log.info ' --fragment-length LENGTH Sets the fragment length globally for all samples (Default: 200).' - log.info ' --remove-duplicates Remove duplicate alignments instead of just flagging them (Default: false).' + log.info " MACS2 precomputed sizes: hs, mm, dm, ce. (Default: '${defaults.genomeSize}')" + log.info " --db-file FILE Output file where to store results information (Default: '${defaults.dbFile}')" + log.info " --replicate-pattern PATTERN Glob pattern used to match replicates (Default: '${defaults.replicatePattern}')." + log.info " --mismatches MISMATCHES Sets the maximum number/percentage of mismatches allowed for a read (Default: '${defaults.mismatches}'). " + log.info " --multimaps MULTIMAPS Sets the maximum number of mappings allowed for a read (Default: '${defaults.multimaps}')." + log.info " --min-matched-bases BASES Sets the minimum number/percentage of bases that have to match with the reference (Default: '${defaults.minMatchedBases}')." + log.info " --quality-threshold THRESHOLD Sets the sequence quality threshold for a base to be considered as low-quality (Default: '${defaults.qualityThreshold}')." + log.info " --fragment-length LENGTH Sets the fragment length globally for all samples (Default: '${defaults.fragmentLength}')." + log.info " --zerone-min-confidence CONFIDENCE Make Zerone print targets with confidence higher than CONFIDENCE (Default: ${defaults.zeroneMinConfidence})." + log.info " --remove-duplicates Remove duplicate alignments instead of just flagging them (Default: '${defaults.removeDuplicates}')." log.info ' --rescale Rescale peak scores to conform to the format supported by the' - log.info ' UCSC genome browser (score must be <1000) (Default: false).' - log.info ' --shift Move fragments ends and apply global extsize in peak calling. (Default: false).' + log.info " UCSC genome browser (score must be <1000) (Default: '${defaults.rescale}')." + log.info " --shift Move fragments ends and apply global extsize in peak calling (Default: '${defaults.shift}')." + log.info " If '--shift' is set and '--fragment-length' is not sepcified the global fragmenth length" + log.info " is forced to '200'." log.info '' exit 1 } @@ -54,6 +82,7 @@ if (!params.genome) { if (!params.index) { exit 1, "Please specify the input table file" } + ////// End of input parameters check //////// ////// Print parameters /////// @@ -82,6 +111,11 @@ log.info "Max Multimaps : ${params.multimaps}" log.info "Minimum Matched Bases : ${params.minMatchedBases}" log.info "Low Quality Threshold : ${params.qualityThreshold}" log.info '' +log.info "Zerone parameters" +log.info '-----------------' +log.info '' +log.info "Confidence Threshold : ${params.zeroneMinConfidence}" +log.info '' genome = file(params.genome) index = file(params.index) @@ -89,20 +123,23 @@ index = file(params.index) fastqs = Channel .from(index.readLines()) .map { line -> - def list = line.split() + def list = line.tokenize() def mergeId = list[0] def id = list[1] - def path = file(list[2]) + def path = resolveFile(list[2], index) def controlId = list[3] def mark = list[4] - def fragLen = list.size() == 6 ? list[5] as Integer : -1 + def fragLen = params.fragmentLength + if ( params.shift || !fragLen ) { + fragLen = list[5] as Integer + } def message = '[INFO] ' - if (params.shift) { - message += "Using global fragment length `${params.fragmentLength}` and compute shift size by " + if ( params.shift ) { + message += "Using global fragment length `${params.fragmentLength ?: defaults.fragmentLength}` and compute shift size by " } else { message += "Using " } - if ( fragLen != -1) { + if ( fragLen ) { message += "fragment length `${fragLen}` for ${mergeId}" } else { message += "estimated fragment length for ${mergeId}" @@ -194,7 +231,7 @@ process mapping { singleBams = Channel.create() groupedBams = Channel.create() -bams.groupTuple(by: [0,3,4]) +bams.groupTuple(by: [0,3,4,5,6]) .choice(singleBams, groupedBams) { it[2].size() > 1 ? 1 : 0 } @@ -209,7 +246,7 @@ process mergeBam { script: def cpus = task.cpus - def prefix = prefix.sort().join(':') + prefix = prefix.sort().join(':') """ ( samtools view -H ${bam} | grep -v '@RG'; @@ -224,13 +261,15 @@ process mergeBam { """ } +mergedBams.println() +return singleBams .mix(mergedBams) .map { mergeId, prefix, bam, controlId, mark, fragLen, view -> [ mergeId, bam, controlId, mark, fragLen, view].flatten() } -.into { bamsMarkDup } +.set { bamsMarkDup } process markDup { @@ -276,7 +315,7 @@ process readCount { process model { when: - fragLen == -1 + !fragLen input: set prefix, file(bam), controlId, mark, fragLen, view from modelBams @@ -349,7 +388,15 @@ controlBams .map { c, t -> [t[0], t[1], c[1], t[3], t[4], t[5]] } -.into { bamsNarrowPeakCall; bamsBroadPeakCall } +.tap { bamsNarrowPeakCall; bamsBroadPeakCall } +.map { replicateId, bam, control, mark, fragLen, view -> + sampleId = replicateId.replaceAll(/${params.replicatePattern}$/,'') + [sampleId, bam, control, mark, view] +} +.groupTuple(by:[0,3,4], sort: {it.baseName}) +.set {bamsZerone} + +def globalFragmentLength = params.fragmentLength ?: defaults.fragmentLength process narrowPeakCall { @@ -362,14 +409,14 @@ process narrowPeakCall { set prefix, file("peakOut/${prefix}*.bdg"), mark, fragLen, val("pileupBedGraphs") into pileupBedGraphFiles, pileupBedGraphFilesPileupSignalTracks, pileupBedGraphFilesFeSignalTracks script: - def extSize = params.shift ? params.fragmentLength : fragLen - def shiftSize = params.shift ? Math.round((fragLen - params.fragmentLength) / 2) : 0 + def extSize = params.shift ? globalFragmentLength : fragLen + def shiftSize = params.shift ? Math.round((fragLen - globalFragmentLength) / 2) : 0 """ # narrow peaks and preliminary signal tracks macs2 callpeak -t ${bam} -c ${control} -n ${prefix} --outdir peakOut \ -f BAM -g ${params.genomeSize} -p 1e-2 \ --nomodel --shift=${shiftSize} --extsize=${extSize} \ - --keep-dup all -B --SPMR + --keep-dup all -B --SPMR --tempdir ${params.tmpDirMACS2} """ } @@ -384,14 +431,14 @@ process narrowPeakCallNoInput { set prefix, file("peakOut/${prefix}*.bdg"), mark, fragLen, val("pileupBedGraphs") into pileupBedGraphFilesNoInput, pileupBedGraphFilesPileupSignalTracksNoInput, pileupBedGraphFilesFeSignalTracksNoInput script: - def extSize = params.shift ? params.fragmentLength : fragLen - def shiftSize = params.shift ? Math.round((fragLen - params.fragmentLength) / 2) : 0 + def extSize = params.shift ? globalFragmentLength : fragLen + def shiftSize = params.shift ? Math.round((fragLen - globalFragmentLength) / 2) : 0 """ # narrow peaks and preliminary signal tracks macs2 callpeak -t ${bam} -n ${prefix} --outdir peakOut \ -f BAM -g ${params.genomeSize} -p 1e-2 \ --nomodel --shift=${shiftSize} --extsize=${extSize} \ - --keep-dup all -B --SPMR + --keep-dup all -B --SPMR --tempdir ${params.tmpDirMACS2} """ } @@ -403,7 +450,7 @@ crossedBams.map{ c, t -> def count = treat < control ? treat : control [s[0], s[1], s[2], s[3], count/1000000, s[4]] } -.into{ pileupBedGraphFilesPvalSignalTracks } +.set{ pileupBedGraphFilesPvalSignalTracks } process broadPeakCall { @@ -416,14 +463,14 @@ process broadPeakCall { set prefix, file("peakOut/${prefix}_peaks.gappedPeak"), mark, fragLen, val("gappedPeak") into gappedPeakFiles script: - def extSize = params.shift ? params.fragmentLength : fragLen - def shiftSize = params.shift ? Math.round((fragLen - params.fragmentLength) / 2) : 0 + def extSize = params.shift ? globalFragmentLength : fragLen + def shiftSize = params.shift ? Math.round((fragLen - globalFragmentLength) / 2) : 0 """ # Broad and Gapped Peaks macs2 callpeak -t ${bam} -c ${control} -n ${prefix} --outdir peakOut \ -f BAM -g ${params.genomeSize} -p 1e-2 --broad \ --nomodel --shift=${shiftSize} --extsize=${extSize} \ - --keep-dup all + --keep-dup all --tempdir ${params.tmpDirMACS2} """ } @@ -438,14 +485,14 @@ process broadPeakCallNoInput { set prefix, file("peakOut/${prefix}_peaks.gappedPeak"), mark, fragLen, val("gappedPeak") into gappedPeakFilesNoInput script: - def extSize = params.shift ? params.fragmentLength : fragLen - def shiftSize = params.shift ? Math.round((fragLen - params.fragmentLength) / 2) : 0 + def extSize = params.shift ? globalFragmentLength : fragLen + def shiftSize = params.shift ? Math.round((fragLen - globalFragmentLength) / 2) : 0 """ # Broad and Gapped Peaks macs2 callpeak -t ${bam} -n ${prefix} --outdir peakOut \ -f BAM -g ${params.genomeSize} -p 1e-2 --broad \ --nomodel --shift=${shiftSize} --extsize=${extSize} \ - --keep-dup all + --keep-dup all --tempdir ${params.tmpDirMACS2} """ } @@ -551,9 +598,29 @@ process pvalSignalTracks { """ } +process zerone { + + input: + set prefix, file(bam), file(control), mark, view from bamsZerone + + output: + set prefix, file("${prefix}_zerone.01"), mark, val("zeroneMatrix") into zeroneMatrixFiles + set prefix, file("${prefix}_zerone.bed"), mark, val("zeroneBed") into zeroneBedFiles + set prefix, file("${prefix}_zerone_merged.bed"), mark, val("zeroneMergedBed") into zeroneMergedBedFiles + + script: + def awkScaleMergedBed = '$0~/^#/ || $NF=$NF*1000' + def awkMatrix2Bed = '$0~/^#/ || $0=$1 OFS $2 OFS $3 OFS $NF*1000' + """ + zerone -c ${params.zeroneMinConfidence} -0 ${control.join(",")} -1 ${bam.join(",")} > ${prefix}_zerone.01 + awk -F"\\t" '${awkMatrix2Bed}' OFS="\\t" ${prefix}_zerone.01 > ${prefix}_zerone.bed + zerone -c ${params.zeroneMinConfidence} -l -0 ${control.join(",")} -1 ${bam.join(",")} | awk -F"\\t" '${awkScaleMergedBed}' OFS="\\t" > ${prefix}_zerone_merged.bed + """ +} + process NRF { input: - set prefix, file(bam), controlId, mark, view from bams4NRF + set prefix, file(bam), controlId, mark, fragLen, view from bams4NRF output: set prefix, stdout into NRFBams @@ -614,13 +681,38 @@ metrics.cross( .map { it + [ '-', '-' ] } +).mix( + zeroneMatrixFiles.mix(zeroneBedFiles).mix(zeroneMergedBedFiles) + .map { prefix, path, mark, view -> + [prefix, path, mark, '-', view, '-', '-'] + } ) .collectFile(name: pdb.name, storeDir: pdb.parent, newLine: true) { prefix, path, mark, fragLen, view, nrf, frip -> [ prefix, path, mark, fragLen, view, nrf, frip ].join("\t") } -.subscribe { + + +workflow.onComplete { log.info "" log.info "-----------------------" log.info "Pipeline run completed." log.info "-----------------------" } + +/* + * Given a string path resolve it against the index file location. + * Params: + * - str: a string value represting the file path to be resolved + * - index: path location against which relative paths need to be resolved + */ +def resolveFile( str, index ) { + if( str.startsWith('/') || str =~ /^[\w\d]*:\// ) { + return file(str) + } + else if( index instanceof Path ) { + return index.parent.resolve(str) + } + else { + return file(str) + } +} diff --git a/ci b/ci new file mode 100755 index 0000000..0e623d9 --- /dev/null +++ b/ci @@ -0,0 +1,39 @@ +#!/bin/bash +set -e +set -o pipefail + +DB_FILE=chipseq-pipeline.db +VALIDATE_DIR=validate-ci + +getPath() { + if [ -n "$(type -a realpath)" ]; then + realpath $@ + else + readlink -f $@ + fi +} + +case "$1" in + run) + shift + echo "Running test pipeline..." >&2 + nextflow run . -resume $@ + ;; + validate) + shift + f=$(getPath ${1-data/md5s}) + echo "Validating test results..." >&2 + [[ -s ${DB_FILE} ]] || false + mkdir -p ${VALIDATE_DIR} && cd ${VALIDATE_DIR} + cut -f 2 ../${DB_FILE} | xargs -I{} ln -fs {} + md5sum -c ${f} + ;; + cleanup) + echo "Cleaning up test results..." >&2 + find ${VALIDATE_DIR} -type l -exec rm {} \+ + find ${VALIDATE_DIR} -type d -empty -exec rmdir {} \+ + ;; + *) + echo "Usage: ci {run|validate|cleanup}" >&2 + exit 1 +esac diff --git a/circle.yml b/circle.yml index e70ef20..fe6f598 100644 --- a/circle.yml +++ b/circle.yml @@ -8,8 +8,8 @@ machine: test: pre: - - docker pull guigolab/chip-nf@sha256:c08d9c4653e4e8ea95d03ef1668b1c222dea0d5b033f2afd4afbc0e9bb558434 - - curl -fsSL get.nextflow.io | bash + - docker pull guigolab/chip-nf@sha256:f912436e8791a9d1f9cadf76099f760151435a43d7873712ee692cb2b0f8947e + - curl -fsSL get.nextflow.io | bash && mv nextflow $HOME/bin override: - - ./nextflow run . - - bash -x validate-ci.sh \ No newline at end of file + - ./ci run -profile circleci && ./ci validate + \ No newline at end of file diff --git a/data/AWP.K4m3.1.fastq.gz b/data/AWP.K4m3.1.fastq.gz index ceadd48..ff929a0 100644 Binary files a/data/AWP.K4m3.1.fastq.gz and b/data/AWP.K4m3.1.fastq.gz differ diff --git a/data/AWP.K4m3.2.fastq.gz b/data/AWP.K4m3.2.fastq.gz new file mode 100644 index 0000000..8f93adf Binary files /dev/null and b/data/AWP.K4m3.2.fastq.gz differ diff --git a/data/EL3.K4m3.1.fastq.gz b/data/EL3.K4m3.1.fastq.gz new file mode 100644 index 0000000..6588add Binary files /dev/null and b/data/EL3.K4m3.1.fastq.gz differ diff --git a/data/EL3.K4m3.2.fastq.gz b/data/EL3.K4m3.2.fastq.gz new file mode 100644 index 0000000..e3e87c6 Binary files /dev/null and b/data/EL3.K4m3.2.fastq.gz differ diff --git a/data/I.AWP.K4m3.1.fastq.gz b/data/I.AWP.K4m3.1.fastq.gz index c86b7ae..b0aae80 100644 Binary files a/data/I.AWP.K4m3.1.fastq.gz and b/data/I.AWP.K4m3.1.fastq.gz differ diff --git a/data/I.AWP.K4m3.2.fastq.gz b/data/I.AWP.K4m3.2.fastq.gz new file mode 100644 index 0000000..5e37cb1 Binary files /dev/null and b/data/I.AWP.K4m3.2.fastq.gz differ diff --git a/data/I.EL3.K4m3.1.fastq.gz b/data/I.EL3.K4m3.1.fastq.gz new file mode 100644 index 0000000..068c689 Binary files /dev/null and b/data/I.EL3.K4m3.1.fastq.gz differ diff --git a/data/WLP.K36m3.1.fastq.gz b/data/WLP.K36m3.1.fastq.gz index f661b57..ac000ba 100644 Binary files a/data/WLP.K36m3.1.fastq.gz and b/data/WLP.K36m3.1.fastq.gz differ diff --git a/data/WLP.K36m3.2.fastq.gz b/data/WLP.K36m3.2.fastq.gz new file mode 100644 index 0000000..a85d890 Binary files /dev/null and b/data/WLP.K36m3.2.fastq.gz differ diff --git a/data/index.tsv b/data/index.tsv index 47d5e79..7832b52 100644 --- a/data/index.tsv +++ b/data/index.tsv @@ -1,3 +1,9 @@ -WLP.K36m3.1 WLP.K36m3.1 data/WLP.K36m3.1.fastq.gz - H3K36m3 80 -AWP.K4m3.1 AWP.K4m3.1 data/AWP.K4m3.1.fastq.gz I.AWP.K4m3.1 H3K4m3 -I.AWP.K4m3.1 I.AWP.K4m3.1 data/I.AWP.K4m3.1.fastq.gz I.AWP.K4m3.1 input +WLP.K36m3.1 WLP.K36m3.1 WLP.K36m3.1.fastq.gz - H3K36m3 80 +WLP.K36m3.2 WLP.K36m3.2 WLP.K36m3.2.fastq.gz - H3K36m3 +AWP.K4m3.1 AWP.K4m3.1 AWP.K4m3.1.fastq.gz I.AWP.K4m3.1 H3K4m3 +I.AWP.K4m3.1 I.AWP.K4m3.1 I.AWP.K4m3.1.fastq.gz I.AWP.K4m3.1 input +AWP.K4m3.2 AWP.K4m3.2 AWP.K4m3.2.fastq.gz I.AWP.K4m3.2 H3K4m3 +I.AWP.K4m3.2 I.AWP.K4m3.2 I.AWP.K4m3.2.fastq.gz I.AWP.K4m3.2 input +EL3.K4m3.1 EL3.K4m3.1 EL3.K4m3.1.fastq.gz I.EL3.K4m3.1 H3K4m3 +EL3.K4m3.2 EL3.K4m3.2 EL3.K4m3.2.fastq.gz I.EL3.K4m3.1 H3K4m3 +I.EL3.K4m3.1 I.EL3.K4m3.1 I.EL3.K4m3.1.fastq.gz I.EL3.K4m3.1 input 255 diff --git a/data/md5s b/data/md5s index 419e5a3..9eff1fc 100644 --- a/data/md5s +++ b/data/md5s @@ -5,11 +5,48 @@ db2ef6de651dbd6d0a2ed803c9ec93bd *AWP.K4m3.1.fc_signal.bw 9c1df58926ee19d638c76bfd7999d1ac *AWP.K4m3.1_peaks.gappedPeak 4d5981b48b3a206e7ab473957789cff5 *AWP.K4m3.1_peaks.narrowPeak 5b8f66bc4217d467a69ab964c7079a38 *AWP.K4m3.1_primary_picard.bam +8d61338247c1807a564a9e498ddfb987 *AWP.K4m3.2.fc_signal.bw +dbef979a8ebd2db846e0beadd0618dcf *AWP.K4m3.2.pileup_signal.bw +d7214a6deefb34a1e90ed8d0d7d71585 *AWP.K4m3.2.pval_signal.bw +67b5d943ff7033dd1c4dfd7fd594d87c *AWP.K4m3.2_peaks.broadPeak +7609f418bbc2c7f6c364c09eac8d3680 *AWP.K4m3.2_peaks.gappedPeak +90d96c977329133693e113ee1c7fa2c5 *AWP.K4m3.2_peaks.narrowPeak +9030a6b845f20babe6ec19358ee8dd67 *AWP.K4m3.2_primary_picard.bam +0ac3e9988011c2dee7399b7a72787bfe *AWP.K4m3_zerone.01 +5ff908163d8c3b285c1005afcff7a2de *AWP.K4m3_zerone.bed +42d2e968ea8cf8a9ceb6f74470f0110b *AWP.K4m3_zerone_merged.bed +b26041395bace0725b39fab9fe362493 *EL3.K4m3.1.fc_signal.bw +69df2fee075f248bf8f46116afa2f494 *EL3.K4m3.1.pileup_signal.bw +a87616e3ddaf237838b41788aa9316d4 *EL3.K4m3.1.pval_signal.bw +5089e03b92f9b512094571316b42d79c *EL3.K4m3.1_peaks.broadPeak +5e7e2df8c358d3cd847c8b05df8d7bcd *EL3.K4m3.1_peaks.gappedPeak +c7dad83f7cbb445212de0dd46e784bc9 *EL3.K4m3.1_peaks.narrowPeak +a2e19949c33f4641f21f14a5a73dbfc2 *EL3.K4m3.1_primary_picard.bam +d00c97f685453e6c34b0b2f85316d444 *EL3.K4m3.2.fc_signal.bw +13fb4b33319853efa1d09a1156e07df3 *EL3.K4m3.2.pileup_signal.bw +67ac7d3dab203338302deb873b332b98 *EL3.K4m3.2.pval_signal.bw +7a841d2f78c7c28eb85cd79b74ce8d6f *EL3.K4m3.2_peaks.broadPeak +c66362708918c5175791ef9bcb6650ca *EL3.K4m3.2_peaks.gappedPeak +52240055f06797ec8d31665a13356317 *EL3.K4m3.2_peaks.narrowPeak +e29638e254934bf106a5896d355da91d *EL3.K4m3.2_primary_picard.bam +bed16a748cbad9c77401ea46c3ce8021 *EL3.K4m3_zerone.01 +86b7205c919899c486001f7a578dab29 *EL3.K4m3_zerone.bed +6fb0cfb72bec72e98fa0659d12233264 *EL3.K4m3_zerone_merged.bed 7f35b43c906ca1117d2a811355c047d8 *I.AWP.K4m3.1.pileup_signal.bw e1b0b2657262afec92d8aeba415f62a5 *I.AWP.K4m3.1_primary_picard.bam +d834205d2a469e915856566ab29044fa *I.AWP.K4m3.2.pileup_signal.bw +0f72000903d5b9f1de0a17209b89a919 *I.AWP.K4m3.2_primary_picard.bam +97196c005895c7f84549e3088bc1fdfa *I.EL3.K4m3.1.pileup_signal.bw +9860fa935e89e5283f32661f692a6a57 *I.EL3.K4m3.1_primary_picard.bam e154e70494309d49e6cae46d9181150f *WLP.K36m3.1.fc_signal.bw d3c99b50ba68dd30374d7196b9d5b345 *WLP.K36m3.1.pileup_signal.bw a9273e1a62abb091364e6085601e703d *WLP.K36m3.1_peaks.broadPeak 767defca60ec95b8cd30934a4d0ef010 *WLP.K36m3.1_peaks.gappedPeak 7cee456ff38e172a7bf1410b2acc5084 *WLP.K36m3.1_peaks.narrowPeak 77c6a67f18af9a796651a29e5bd53939 *WLP.K36m3.1_primary_picard.bam +c394ed6b1ecbb484cef26802bd066fd3 *WLP.K36m3.2.fc_signal.bw +c412ee2767cc3f3f17cc1f52972fb8fb *WLP.K36m3.2.pileup_signal.bw +fe67e6d0663266473b39baaf6539770a *WLP.K36m3.2_peaks.broadPeak +b41cba4a16137f5b4932fef7105260ac *WLP.K36m3.2_peaks.gappedPeak +77597a1e62a818576b662cd98bb1c642 *WLP.K36m3.2_peaks.narrowPeak +d35e62ecfec2ef1ac29e07bcf7724362 *WLP.K36m3.2_primary_picard.bam diff --git a/data/md5s-100 b/data/md5s-100 new file mode 100644 index 0000000..0cf2547 --- /dev/null +++ b/data/md5s-100 @@ -0,0 +1,52 @@ +4efed0323330dbbd197459f9085169b3 *AWP.K4m3.1.fc_signal.bw +c1c157943ba03f05eaf9c6a2f1d4eb96 *AWP.K4m3.1.pileup_signal.bw +9143f0ff37d3daed04599264147656e6 *AWP.K4m3.1.pval_signal.bw +937fe024ca4cf02b0a701555f2031615 *AWP.K4m3.1_peaks.broadPeak +347d68ae52ee0f224eb64e3b057223e1 *AWP.K4m3.1_peaks.gappedPeak +57a179be6ec9aeb46f45b3bff7b71857 *AWP.K4m3.1_peaks.narrowPeak +5b8f66bc4217d467a69ab964c7079a38 *AWP.K4m3.1_primary_picard.bam +cc13313592bcfaf33cf6a5eb76e4dcc9 *AWP.K4m3.2.fc_signal.bw +42a7a2bf9c2d63cb6604f023f03e33f6 *AWP.K4m3.2.pileup_signal.bw +e05be532c26ecddd2be90438245affc3 *AWP.K4m3.2.pval_signal.bw +d76c6903af6372d11dd5528a7c06470c *AWP.K4m3.2_peaks.broadPeak +9709bff0697a89025b74ecef39033196 *AWP.K4m3.2_peaks.gappedPeak +a6ee3a75d32b83907025278244190f7f *AWP.K4m3.2_peaks.narrowPeak +9030a6b845f20babe6ec19358ee8dd67 *AWP.K4m3.2_primary_picard.bam +0ac3e9988011c2dee7399b7a72787bfe *AWP.K4m3_zerone.01 +5ff908163d8c3b285c1005afcff7a2de *AWP.K4m3_zerone.bed +42d2e968ea8cf8a9ceb6f74470f0110b *AWP.K4m3_zerone_merged.bed +38fc54f1e9bedc5c4d84ebb66f65375f *EL3.K4m3.1.fc_signal.bw +dcff002dd2353639bdda54a1b5a70ac9 *EL3.K4m3.1.pileup_signal.bw +7448afe0e5fb4f9b239d8fc0f2f2407b *EL3.K4m3.1.pval_signal.bw +2b7a0e418c41abd8f887f1783f59e047 *EL3.K4m3.1_peaks.broadPeak +50b9699c895d8befd8eb0849fb47c3f6 *EL3.K4m3.1_peaks.gappedPeak +912821fc6145e8a4c9806c6ffbdf5e27 *EL3.K4m3.1_peaks.narrowPeak +a2e19949c33f4641f21f14a5a73dbfc2 *EL3.K4m3.1_primary_picard.bam +9da275255c0be81524502eb58b0f83cf *EL3.K4m3.2.fc_signal.bw +99d82cce9d3393a002de5d2be44a4666 *EL3.K4m3.2.pileup_signal.bw +bb27663f8b9b12aaf8f6acc94744229f *EL3.K4m3.2.pval_signal.bw +e0765246bf8e0152b1f3a8784e412ce3 *EL3.K4m3.2_peaks.broadPeak +50965e9bc45c03d044ff839d327d575e *EL3.K4m3.2_peaks.gappedPeak +2282ff1965ec0076d6d29e527ed1bf87 *EL3.K4m3.2_peaks.narrowPeak +e29638e254934bf106a5896d355da91d *EL3.K4m3.2_primary_picard.bam +bed16a748cbad9c77401ea46c3ce8021 *EL3.K4m3_zerone.01 +86b7205c919899c486001f7a578dab29 *EL3.K4m3_zerone.bed +6fb0cfb72bec72e98fa0659d12233264 *EL3.K4m3_zerone_merged.bed +82e2570c6c4d3518406e02ef417a655d *I.AWP.K4m3.1.pileup_signal.bw +e1b0b2657262afec92d8aeba415f62a5 *I.AWP.K4m3.1_primary_picard.bam +b3c0cc9e85a1610281495abbd5f737ea *I.AWP.K4m3.2.pileup_signal.bw +0f72000903d5b9f1de0a17209b89a919 *I.AWP.K4m3.2_primary_picard.bam +bba84a5e3a1e87afba6208e90cf5391c *I.EL3.K4m3.1.pileup_signal.bw +9860fa935e89e5283f32661f692a6a57 *I.EL3.K4m3.1_primary_picard.bam +62cc826e910a402a192bd5ad3c821d92 *WLP.K36m3.1.fc_signal.bw +078373fab1fe926f8cdcf535b24fdbb6 *WLP.K36m3.1.pileup_signal.bw +bac546387fd1d18abc82dd2394e531c1 *WLP.K36m3.1_peaks.broadPeak +c0e4bdbfba741c26c49f42c379fbbad5 *WLP.K36m3.1_peaks.gappedPeak +a7c208dba69b32ce14cc31c696b2712e *WLP.K36m3.1_peaks.narrowPeak +77c6a67f18af9a796651a29e5bd53939 *WLP.K36m3.1_primary_picard.bam +e5857681158382210fd58fa174415b5a *WLP.K36m3.2.fc_signal.bw +394203be641a6759df62cd36edad8d86 *WLP.K36m3.2.pileup_signal.bw +00065eacd29132dae12e7358d5aeeec8 *WLP.K36m3.2_peaks.broadPeak +3f8a8df7f958f18f761a5482cb146b19 *WLP.K36m3.2_peaks.gappedPeak +34448fd522a6e313ce3d400220e6d5a8 *WLP.K36m3.2_peaks.narrowPeak +d35e62ecfec2ef1ac29e07bcf7724362 *WLP.K36m3.2_primary_picard.bam diff --git a/data/md5s-shift b/data/md5s-shift new file mode 100644 index 0000000..e7d488a --- /dev/null +++ b/data/md5s-shift @@ -0,0 +1,52 @@ +6e464f8c23e56ae8953baed88f23dbdb *AWP.K4m3.1.fc_signal.bw +0c4ff11c7b71def5222c810371773958 *AWP.K4m3.1.pileup_signal.bw +d1820efe38a37f7c82be7ebeb4bc9d10 *AWP.K4m3.1.pval_signal.bw +a172946ad22bd0bb7c9e3dc0154d5eb5 *AWP.K4m3.1_peaks.broadPeak +114b0edd1fcc4553a969333a3ca05e13 *AWP.K4m3.1_peaks.gappedPeak +46052b35a232b68f6b95493218826241 *AWP.K4m3.1_peaks.narrowPeak +5b8f66bc4217d467a69ab964c7079a38 *AWP.K4m3.1_primary_picard.bam +52e81d72c77eb8b529a186afa36cef78 *AWP.K4m3.2.fc_signal.bw +4434cd0a2db668c4d2061d58b1762938 *AWP.K4m3.2.pileup_signal.bw +a99b15ef668f9427397b10148ccdbfe5 *AWP.K4m3.2.pval_signal.bw +aede649bb1f7a2b47aaac0092e2dca6f *AWP.K4m3.2_peaks.broadPeak +2c0786548195b132011d5862f268c8d4 *AWP.K4m3.2_peaks.gappedPeak +f4bc181751c23132abfa5bb67651eca6 *AWP.K4m3.2_peaks.narrowPeak +9030a6b845f20babe6ec19358ee8dd67 *AWP.K4m3.2_primary_picard.bam +0ac3e9988011c2dee7399b7a72787bfe *AWP.K4m3_zerone.01 +5ff908163d8c3b285c1005afcff7a2de *AWP.K4m3_zerone.bed +42d2e968ea8cf8a9ceb6f74470f0110b *AWP.K4m3_zerone_merged.bed +a45984f21ca5fe82257f7d799d9ecfc3 *EL3.K4m3.1.fc_signal.bw +e14a3dc3cce2e812a655b361081ed3d5 *EL3.K4m3.1.pileup_signal.bw +1a96c0f22ef77014f51c3b08a831a877 *EL3.K4m3.1.pval_signal.bw +d6a770775dfab791defc765b985fca6f *EL3.K4m3.1_peaks.broadPeak +c036e8507e181faaa2d420d4d28410bd *EL3.K4m3.1_peaks.gappedPeak +d903a74648a6d349173a851edb432f26 *EL3.K4m3.1_peaks.narrowPeak +a2e19949c33f4641f21f14a5a73dbfc2 *EL3.K4m3.1_primary_picard.bam +232ce97c4e54bdd1bf86fa023831f0d2 *EL3.K4m3.2.fc_signal.bw +450b354d016139e074403e6696451ef8 *EL3.K4m3.2.pileup_signal.bw +96cf3f2786d6c0365d9e5cb6f43f6297 *EL3.K4m3.2.pval_signal.bw +ac5b64d83c14a099f1a4cd87ef082741 *EL3.K4m3.2_peaks.broadPeak +404b708ca2b8ede8ccb270338af77e41 *EL3.K4m3.2_peaks.gappedPeak +55c12ab11dbfc18ac73da2e5dd35b99c *EL3.K4m3.2_peaks.narrowPeak +e29638e254934bf106a5896d355da91d *EL3.K4m3.2_primary_picard.bam +bed16a748cbad9c77401ea46c3ce8021 *EL3.K4m3_zerone.01 +86b7205c919899c486001f7a578dab29 *EL3.K4m3_zerone.bed +6fb0cfb72bec72e98fa0659d12233264 *EL3.K4m3_zerone_merged.bed +644fdbfbe75edb717d64080ceeaf2973 *I.AWP.K4m3.1.pileup_signal.bw +e1b0b2657262afec92d8aeba415f62a5 *I.AWP.K4m3.1_primary_picard.bam +db36be5c19d68cc28027c4e341c9f3ee *I.AWP.K4m3.2.pileup_signal.bw +0f72000903d5b9f1de0a17209b89a919 *I.AWP.K4m3.2_primary_picard.bam +e44b03c92d0e7965e0d82c63246ea4d7 *I.EL3.K4m3.1.pileup_signal.bw +9860fa935e89e5283f32661f692a6a57 *I.EL3.K4m3.1_primary_picard.bam +b20a457ccd0de19a3b3f8115898c8b6f *WLP.K36m3.1.fc_signal.bw +f6d3c12f3021202b75ad79c2f700834e *WLP.K36m3.1.pileup_signal.bw +da0f5b27d3329a05f286da64de55a39b *WLP.K36m3.1_peaks.broadPeak +ef3102417af23a36fc1ee8ae8e38b373 *WLP.K36m3.1_peaks.gappedPeak +eab68c6a0c752313ecd4614004866a68 *WLP.K36m3.1_peaks.narrowPeak +77c6a67f18af9a796651a29e5bd53939 *WLP.K36m3.1_primary_picard.bam +32744f9f0ad6b4e9f6af2962bd0d8340 *WLP.K36m3.2.fc_signal.bw +9bfa109869c7fc14aa87e54c15a5d927 *WLP.K36m3.2.pileup_signal.bw +e94ea105908c36476811b06963678f82 *WLP.K36m3.2_peaks.broadPeak +33487b562ff847b31d00c0dda416c510 *WLP.K36m3.2_peaks.gappedPeak +325565ea542735f93fdb34d40385c6eb *WLP.K36m3.2_peaks.narrowPeak +d35e62ecfec2ef1ac29e07bcf7724362 *WLP.K36m3.2_primary_picard.bam diff --git a/docker/Dockerfile b/docker/Dockerfile index d0fd85a..f599115 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -4,6 +4,8 @@ LABEL maintainer "Emilio Palumbo " \ version "1.0" \ description "ChIP-seq analysis pipeline image" +ENV _ZERONE_VERSION 5af03a1 + ENV PATH=/phantompeakqualtools/:$PATH # install needed tools @@ -47,4 +49,12 @@ RUN curl --header "Cookie: oraclelicense=accept-securebackup-cookie" -L http://d # install picard RUN curl -L -o /usr/local/bin/picard.jar https://github.com/broadinstitute/picard/releases/download/2.8.3/picard.jar \ - && chmod +x /usr/local/bin/picard.jar \ No newline at end of file + && chmod +x /usr/local/bin/picard.jar + +# install zerone +RUN mkdir -p zerone && \ + curl -fsSL https://github.com/nanakiksc/zerone/archive/${_ZERONE_VERSION}.tar.gz | tar xz --strip-components 1 -C zerone && \ + make -C zerone && \ + mv zerone/zerone /usr/local/bin && \ + make clean -C zerone + diff --git a/nextflow.config b/nextflow.config index 6b7eff7..330a313 100644 --- a/nextflow.config +++ b/nextflow.config @@ -6,6 +6,12 @@ trace.enabled = true docker.enabled = true process { - container = 'guigolab/chip-nf@sha256:c08d9c4653e4e8ea95d03ef1668b1c222dea0d5b033f2afd4afbc0e9bb558434' + container = 'guigolab/chip-nf@sha256:f912436e8791a9d1f9cadf76099f760151435a43d7873712ee692cb2b0f8947e' tag = { prefix ?: genome.baseName } } + +profiles { + circleci { + process.$markDup.memory = 300.MB + } +} diff --git a/validate-ci.sh b/validate-ci.sh deleted file mode 100644 index bd66438..0000000 --- a/validate-ci.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -set -e -set -o pipefail - -DB_FILE=chipseq-pipeline.db - -[[ -s ${DB_FILE} ]] || false - -DIR=validate-ci -mkdir -p ${DIR} && cd ${DIR} -cut -f 2 ../${DB_FILE} | xargs -I{} ln -fs {} -md5sum -c ../data/md5s -cd - && rm -rf ${DIR} \ No newline at end of file