Merge pull request #70 from nf-core/dev

Dev > Master for 1.3 release
nf-core · Feb 1, 2019 · 1d3f5cc · 1d3f5cc
2 parents 1f0eaa6 + 5e8add9
commit 1d3f5cc
Show file tree

Hide file tree

Showing 11 changed files with 90 additions and 24 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -13,7 +13,7 @@ before_install:
   # Pull the docker image first so the test doesn't wait for this
   - docker pull nfcore/methylseq:dev
   # Fake the tag locally so that the pipeline runs properly
-  - docker tag nfcore/methylseq:dev nfcore/methylseq:1.2
+  - docker tag nfcore/methylseq:dev nfcore/methylseq:1.3
 
 install:
   # Install Nextflow

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,17 @@
 # nf-core/methylseq
 
-## [v1.2](https://github.com/nf-core/methylseq/releases/tag/1.2) - 2018-12-26
+## [v1.3](https://github.com/nf-core/methylseq/releases/tag/1.3) - 2019-02-01
+
+#### New features
+* Added [preseq](http://smithlabresearch.org/software/preseq/) analysis to calculate sample complexity.
+  * This new step can help decide sufficient sequencing depth has been reached.
+
+#### Bug fixes
+* Fixed new bug that meant pipeline only worked with one sample at a time [#66](https://github.com/nf-core/methylseq/issues/66)
+  * Introduced in previous release. TrimGalore onwards would only process one sample.
+
+
+## [v1.2](https://github.com/nf-core/methylseq/releases/tag/1.2) - 2019-01-02
 
 #### New features
 * Trim 9bp from both ends of both reads for PBAT mode.

diff --git a/Dockerfile b/Dockerfile
@@ -5,4 +5,4 @@ LABEL authors="[email protected]" \
 
 COPY environment.yml /
 RUN conda env create -f /environment.yml && conda clean -a
-ENV PATH /opt/conda/envs/nf-core-methylseq-1.2/bin:$PATH
+ENV PATH /opt/conda/envs/nf-core-methylseq-1.3/bin:$PATH
diff --git a/README.md b/README.md
@@ -31,6 +31,7 @@ Choose between workflows by using `--aligner bismark` (default) or `--aligner bw
 | Sample report                                | Bismark          | -                     |
 | Summary Report                               | Bismark          | -                     |
 | Alignment QC                                 | Qualimap         | Qualimap              |
+| Sample complexity                            | Preseq           | Preseq                |
 | Project Report                               | MultiQC          | MultiQC               |
 
 

diff --git a/Singularity b/Singularity
@@ -4,10 +4,10 @@ Bootstrap:docker
 %labels
     MAINTAINER Phil Ewels <[email protected]>
     DESCRIPTION Container image containing all requirements for the nf-core/methylseq pipeline
-    VERSION 1.2
+    VERSION 1.3
 
 %environment
-    PATH=/opt/conda/envs/nf-core-methylseq-1.2/bin:$PATH
+    PATH=/opt/conda/envs/nf-core-methylseq-1.3/bin:$PATH
     export PATH
 
 %files

diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
@@ -21,6 +21,7 @@
     'Picard MarkDuplicates': ['v_picard_markdups.txt', r"([\d\.]+)"],
     'MethylDackel': ['v_methyldackel.txt', r"(.+)"],
     'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"],
+    'Preseq': ['v_preseq.txt', r"Version: (\S+)"],
     'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"],
 }
 results = OrderedDict()
@@ -41,6 +42,7 @@
 results['Picard MarkDuplicates'] = '<span style="color:#999999;\">N/A</span>'
 results['MethylDackel'] = '<span style="color:#999999;\">N/A</span>'
 results['Qualimap'] = '<span style="color:#999999;\">N/A</span>'
+results['Preseq'] = '<span style="color:#999999;\">N/A</span>'
 results['MultiQC'] = '<span style="color:#999999;\">N/A</span>'
 
 # Search each file using its regex

diff --git a/conf/base.config b/conf/base.config
@@ -17,7 +17,7 @@ process {
   memory = { check_max( 8.GB * task.attempt, 'memory') }
   time = { check_max( 2.h * task.attempt, 'time') }
 
-  errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'terminate' }
+  errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'terminate' }
   maxRetries = 3
   maxErrors = '-1'
 
@@ -52,9 +52,12 @@ process {
     memory = { check_max( 32.GB * task.attempt, 'memory') }
     time = { check_max( 6.h * task.attempt, 'time') }
   }
+  withName:preseq {
+    errorStrategy = 'ignore'
+  }
   withName:get_software_versions {
     validExitStatus = [0,1]
-    errorStrategy = 'ignore'
+    cache = false
   }
 
   withName:bwamem_align {

diff --git a/docs/output.md b/docs/output.md
@@ -15,6 +15,7 @@ and processes data using the following steps:
 * [Methylation Extraction](#methylation-extraction) - calling cytosine methylation steps
 * [Bismark Reports](#bismark-reports) - single-sample and summary analysis reports
 * [Qualimap](#qualimap) - tool for genome alignments QC
+* [Preseq](#preseq) - tool for estimating sample complexity
 * [MultiQC](#multiqc) - aggregate report, describing results of the whole pipeline
 * [Pipeline Info](#pipeline-info) - reports from nextflow about the pipeline run
 
@@ -155,6 +156,19 @@ Bismark generates a HTML reports describing results for each sample, as well as
   * Text-based statistics that can be loaded into downstream programs
 
 
+
+## Preseq
+
+[Preseq](http://smithlabresearch.org/software/preseq/) estimates the complexity of a library, showing how many additional unique reads are sequenced for increasing the total read count. A shallow curve indicates that the library has reached complexity saturation and further sequencing would likely not add further unique reads. The dashed line shows a perfectly complex library where total reads = unique reads.
+
+Note that these are predictive numbers only, not absolute. The MultiQC plot can sometimes give extreme sequencing depth on the X axis - click and drag from the left side of the plot to zoom in on more realistic numbers.
+
+**Output directory: `results/preseq`**
+
+* `sample_ccurve.txt`
+  * This file contains plot values for the complexity curve, plotted in the MultiQC report.
+
+
 ## MultiQC
 [MultiQC](http://multiqc.info) is a visualisation tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in within the report data directory.
 

diff --git a/environment.yml b/environment.yml
@@ -1,14 +1,14 @@
 # You can use this file to create a conda environment for this pipeline:
 #   conda env create -f environment.yml
-name: nf-core-methylseq-1.2
+name: nf-core-methylseq-1.3
 channels:
   - bioconda
   - conda-forge
   - defaults
 dependencies:
   # Dependencies for FastQC
   - conda-forge::openjdk=8.0.144
-  - font-ttf-dejavu-sans-mono=2.37
+  - anaconda::font-ttf-dejavu-sans-mono=2.37
   - fontconfig=2.12.6
   - fastqc=0.11.8
   # Default bismark pipeline
@@ -17,6 +17,7 @@ dependencies:
   - bowtie2=2.3.4.3
   - bismark=0.20.0
   - qualimap=2.2.2b
+  - preseq=2.0.3
   - multiqc=1.7
   # bwa-meth pipeline
   - picard=2.18.21

diff --git a/main.nf b/main.nf
@@ -344,7 +344,7 @@ if( params.notrim ){
 
         input:
         set val(name), file(reads) from ch_read_files_for_trim_galore
-        file wherearemyfiles from ch_wherearemyfiles_for_trimgalore
+        file wherearemyfiles from ch_wherearemyfiles_for_trimgalore.collect()
 
         output:
         set val(name), file('*fq.gz') into ch_trimmed_reads_for_alignment
@@ -388,10 +388,10 @@ if( params.aligner == 'bismark' ){
         input:
         set val(name), file(reads) from ch_trimmed_reads_for_alignment
         file index from ch_bismark_index_for_bismark_align.collect()
-        file wherearemyfiles from ch_wherearemyfiles_for_bismark_align
+        file wherearemyfiles from ch_wherearemyfiles_for_bismark_align.collect()
 
         output:
-        set val(name), file("*.bam") into ch_bam_for_bismark_deduplicate, ch_bam_for_bismark_summary
+        set val(name), file("*.bam") into ch_bam_for_bismark_deduplicate, ch_bam_for_bismark_summary, ch_bam_for_preseq
         set val(name), file("*report.txt") into ch_bismark_align_log_for_bismark_report, ch_bismark_align_log_for_bismark_summary, ch_bismark_align_log_for_multiqc
         file "*.fq.gz" optional true
         file "where_are_my_files.txt"
@@ -623,10 +623,10 @@ if( params.aligner == 'bwameth' ){
         input:
         set val(name), file(reads) from ch_trimmed_reads_for_alignment
         file bwa_meth_indices from ch_bwa_meth_indices_for_bwamem_align.collect()
-        file wherearemyfiles from ch_wherearemyfiles_for_bwamem_align
+        file wherearemyfiles from ch_wherearemyfiles_for_bwamem_align.collect()
 
         output:
-        set val(name), file('*.bam') into ch_bam_for_samtools_sort_index_flagstat
+        set val(name), file('*.bam') into ch_bam_for_samtools_sort_index_flagstat, ch_bam_for_preseq
         file "where_are_my_files.txt"
 
         script:
@@ -656,7 +656,7 @@ if( params.aligner == 'bwameth' ){
 
         input:
         set val(name), file(bam) from ch_bam_for_samtools_sort_index_flagstat
-        file wherearemyfiles from ch_wherearemyfiles_for_samtools_sort_index_flagstat
+        file wherearemyfiles from ch_wherearemyfiles_for_samtools_sort_index_flagstat.collect()
 
         output:
         set val(name), file("${bam.baseName}.sorted.bam") into ch_bam_sorted_for_markDuplicates
@@ -667,11 +667,10 @@ if( params.aligner == 'bwameth' ){
 
         script:
         """
-        samtools sort \\
-            $bam \\
+        samtools sort $bam \\
             -m ${task.memory.toBytes() / task.cpus} \\
             -@ ${task.cpus} \\
-            > ${bam.baseName}.sorted.bam
+            -o ${bam.baseName}.sorted.bam
         samtools index ${bam.baseName}.sorted.bam
         samtools flagstat ${bam.baseName}.sorted.bam > ${bam.baseName}_flagstat_report.txt
         samtools stats ${bam.baseName}.sorted.bam > ${bam.baseName}_stats_report.txt
@@ -772,7 +771,10 @@ process qualimap {
     gcref = params.genome == 'GRCh37' ? '-gd HUMAN' : ''
     gcref = params.genome == 'GRCm38' ? '-gd MOUSE' : ''
     """
-    samtools sort $bam -o ${bam.baseName}.sorted.bam
+    samtools sort $bam \\
+        -m ${task.memory.toBytes() / task.cpus} \\
+        -@ ${task.cpus} \\
+        -o ${bam.baseName}.sorted.bam
     qualimap bamqc $gcref \\
         -bam ${bam.baseName}.sorted.bam \\
         -outdir ${bam.baseName}_qualimap \\
@@ -782,6 +784,29 @@ process qualimap {
     """
 }
 
+/*
+ * STEP 9 - preseq
+ */
+process preseq {
+    tag "$name"
+    publishDir "${params.outdir}/preseq", mode: 'copy'
+
+    input:
+    set val(name), file(bam) from ch_bam_for_preseq
+
+    output:
+    file "${bam.baseName}.ccurve.txt" into preseq_results
+
+    script:
+    """
+    samtools sort $bam \\
+        -m ${task.memory.toBytes() / task.cpus} \\
+        -@ ${task.cpus} \\
+        -o ${bam.baseName}.sorted.bam
+    preseq lc_extrap -v -B ${bam.baseName}.sorted.bam -o ${bam.baseName}.ccurve.txt
+    """
+}
+
 /*
  * Parse software version numbers
  */
@@ -808,7 +833,8 @@ process get_software_versions {
     bwameth.py --version &> v_bwameth.txt
     picard MarkDuplicates --version &> v_picard_markdups.txt 2>&1 || true
     MethylDackel --version &> v_methyldackel.txt
-    qualimap --version &> v_qualimap.txt
+    qualimap --version &> v_qualimap.txt || true
+    preseq &> v_preseq.txt
     multiqc --version &> v_multiqc.txt
     scrape_software_versions.py &> software_versions_mqc.yaml
     """
@@ -817,9 +843,10 @@ process get_software_versions {
 
 
 /*
- * STEP 9 - MultiQC
+ * STEP 10 - MultiQC
  */
 process multiqc {
+    tag "${params.outdir}/MultiQC/$ofilename"
     publishDir "${params.outdir}/MultiQC", mode: 'copy'
 
     input:
@@ -837,6 +864,7 @@ process multiqc {
     file ('picard/*') from ch_markDups_results_for_multiqc.flatten().collect().ifEmpty([])
     file ('methyldackel/*') from ch_methyldackel_results_for_multiqc.flatten().collect().ifEmpty([])
     file ('qualimap/*') from ch_qualimap_results_for_multiqc.collect().ifEmpty([])
+    file ('preseq/*') from preseq_results.collect().ifEmpty([])
     file ('software_versions/*') from ch_software_versions_yaml_for_multiqc.collect().ifEmpty([])
 
     output:
@@ -846,7 +874,13 @@ process multiqc {
 
     script:
     rtitle = custom_runName ? "--title \"$custom_runName\"" : ''
-    rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : ''
+    if(custom_runName){
+      rfilename = "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report"
+      ofilename = rfilename+'.html'
+    } else {
+      rfilename = ''
+      ofilename = 'multiqc_report.html'
+    }
     """
     multiqc -f $rtitle $rfilename --config $multiqc_config .
     """

diff --git a/nextflow.config b/nextflow.config
@@ -12,7 +12,7 @@
 // Configurable variables
 params {
 
-  container = 'nfcore/methylseq:1.2' // Container slug. Stable releases should specify release tag!!
+  container = 'nfcore/methylseq:1.3' // Container slug. Stable releases should specify release tag!!
 
   // Pipeline options
   aligner = 'bismark'
@@ -98,7 +98,7 @@ manifest {
   name = 'nf-core/methylseq'
   author = 'Phil Ewels'
   description = 'Methylation (Bisulfite-Sequencing) Best Practice analysis pipeline, part of the nf-core community.'
-  version = '1.2'
+  version = '1.3'
   nextflowVersion = '>=0.32.0'
   homePage = 'https://github.com/nf-core/methylseq'
   mainScript = 'main.nf'