diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 26f6c8cf7f..71d634521f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -131,7 +131,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - tool: [Haplotypecaller, Freebayes, Manta, mpileup, Strelka, TIDDIT] + tool: [Haplotypecaller, Freebayes, Manta, mpileup, Strelka, TIDDIT, msisensor] steps: - uses: actions/checkout@v2 - name: Install Nextflow diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e74d9ec10..50cb7de2fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - [#141](https://github.com/nf-core/sarek/pull/141) - Add containers for `WBcel235` - [#150](https://github.com/nf-core/sarek/pull/150), [#151](https://github.com/nf-core/sarek/pull/151), [#154](https://github.com/nf-core/sarek/pull/154) - Add AWS mega test GitHub Actions - [#158](https://github.com/nf-core/sarek/pull/158) - Added `ggplot2` v `3.3.0` +- [#163](https://github.com/nf-core/sarek/pull/163) - Add [msisensor](https://github.com/ding-lab/msisensor) in tools and container ### `Changed` diff --git a/README.md b/README.md index 4fa84f9aea..5b561495d5 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,7 @@ Helpful contributors: * [gulfshores](https://github.com/gulfshores) * [pallolason](https://github.com/pallolason) * [silviamorins](https://github.com/silviamorins) +* [David Mas-Ponte](https://github.com/davidmasp) ## Contributions & Support diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index ba53181640..44580887f0 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -13,6 +13,7 @@ 'GATK': ['v_gatk.txt', r"Version:(\S+)"], 'htslib': ['v_samtools.txt', r"htslib (\S+)"], 'Manta': ['v_manta.txt', r"([0-9.]+)"], + 'msisensor': ["v_msisensor.txt", r"Version: v(\S+)"], 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"], 'Nextflow': ['v_nextflow.txt', r"(\S+)"], 'nf-core/sarek': ['v_pipeline.txt', r"(\S+)"], @@ -38,6 +39,7 @@ results['GATK'] = 'N/A' results['htslib'] = 'N/A' results['Manta'] = 'N/A' +results['msisensor'] = 'N/A' results['MultiQC'] = 'N/A' results['Qualimap'] = 'N/A' results['R'] = 'N/A' diff --git a/docs/containers.md b/docs/containers.md index 2c55b4015c..0d2aa08539 100644 --- a/docs/containers.md +++ b/docs/containers.md @@ -26,6 +26,7 @@ For annotation, the main container can be used, but the cache has to be download - Contain **[ggplot2](https://github.com/tidyverse/ggplot2)** 3.3.0 - Contain **[HTSlib](https://github.com/samtools/htslib)** 1.9 - Contain **[Manta](https://github.com/Illumina/manta)** 1.6.0 +- Contain **[msisensor](https://github.com/ding-lab/msisensor)** 0.5 - Contain **[MultiQC](https://github.com/ewels/MultiQC/)** 1.8 - Contain **[Qualimap](http://qualimap.bioinfo.cipf.es)** 2.2.2d - Contain **[samtools](https://github.com/samtools/samtools)** 1.9 diff --git a/docs/output.md b/docs/output.md index 493957cf9f..bc6e6beee4 100644 --- a/docs/output.md +++ b/docs/output.md @@ -34,6 +34,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [ConvertAlleleCounts](#convertallelecounts) - [ASCAT](#ascat) - [Control-FREEC](#control-freec) + - [MSI status](#msi-status) + - [MSIsensor](#msisensor) - [Variant annotation](#variant-annotation) - [snpEff](#snpeff) - [VEP](#vep) @@ -424,6 +426,36 @@ For a Tumor/Normal pair only: - `[TUMORSAMPLE].pileup.gz_BAF.txt` and `[NORMALSAMPLE].pileup.gz_BAF.txt` - file with beta allele frequencies for each possibly heterozygous SNP position +### MSI status + +[Microsatellite instability](https://en.wikipedia.org/wiki/Microsatellite_instability) +is a genetic condition associated to deficienceies in the +mismatch repair (MMR) system which causes a tendency to accumulate a high +number of mutations (SNVs and indels). + +#### MSIsensor + +[MSIsensor](https://github.com/ding-lab/msisensor) is a tool to detect the MSI +status of a tumor scaning the length of the microsatellite regions. An altered +distribution of microsatellite length is associated to a missed replication +slippage which would be corrected under normal mismatch repair (MMR) conditions. It requires +a normal sample for each tumour to differentiate the somatic and germline +cases. + +For further reading see the [MSIsensor paper](https://www.ncbi.nlm.nih.gov/pubmed/24371154). + +For a Tumor/Normal pair only: +**Output directory: `results/VariantCalling/[TUMORSAMPLE]_vs_[NORMALSAMPLE]/MSIsensor`** + +- `[TUMORSAMPLE]_vs_[NORMALSAMPLE]`_msisensor + - MSI score output, contains information about the number of somatic sites. +- `[TUMORSAMPLE]_vs_[NORMALSAMPLE]`_msisensor_dis + - The normal and tumor length distribution for each microsatellite position. +- `[TUMORSAMPLE]_vs_[NORMALSAMPLE]`_msisensor_germline + - somatic sites detected +- `[TUMORSAMPLE]_vs_[NORMALSAMPLE]`_msisensor_somatic + - germ line sites detected + ## Variant annotation This directory contains results from the final annotation steps: two software are used for annotation, [snpEff](http://snpeff.sourceforge.net/) and [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html). diff --git a/environment.yml b/environment.yml index 84fd4dc8cb..c1889a8476 100644 --- a/environment.yml +++ b/environment.yml @@ -21,6 +21,7 @@ dependencies: - bioconda::genesplicer=1.0 - bioconda::htslib=1.9 - bioconda::manta=1.6.0 + - bioconda::msisensor=0.5 - bioconda::multiqc=1.8 - bioconda::qualimap=2.2.2d - bioconda::samtools=1.9 diff --git a/main.nf b/main.nf index 788301b250..3e542da0b1 100644 --- a/main.nf +++ b/main.nf @@ -618,6 +618,7 @@ process Get_software_versions { trim_galore -v &> v_trim_galore.txt 2>&1 || true vcftools --version &> v_vcftools.txt 2>&1 || true vep --help &> v_vep.txt 2>&1 || true + msisensor &> v_msisensor.txt 2>&1 || true scrape_software_versions.py &> software_versions_mqc.yaml """ @@ -2069,8 +2070,8 @@ pairBam = bamNormal.cross(bamTumor).map { pairBam = pairBam.dump(tag:'BAM Somatic Pair') -// Manta, Strelka, Mutect2 -(pairBamManta, pairBamStrelka, pairBamStrelkaBP, pairBamCalculateContamination, pairBamFilterMutect2, pairBamTNscope, pairBam) = pairBam.into(7) +// Manta, Strelka, Mutect2, MSIsensor +(pairBamManta, pairBamStrelka, pairBamStrelkaBP, pairBamCalculateContamination, pairBamFilterMutect2, pairBamTNscope, pairBamMsisensor, pairBam) = pairBam.into(8) intervalPairBam = pairBam.spread(bedIntervals) @@ -2605,6 +2606,64 @@ process StrelkaBP { vcfStrelkaBP = vcfStrelkaBP.dump(tag:'Strelka BP') +// STEP MSISENSOR.1 - SCAN + +// Scan reference genome for microsattelites +process msisensorScan { + label 'cpus_1' + label 'memory_max' + // memory '20 GB' + + tag {fasta} + + input: + file(fasta) from ch_fasta + file(fastaFai) from ch_fai + + output: + file "microsatellites.list" into msi_scan_ch + + when: 'msisensor' in tools + + script: + """ + msisensor scan -d ${fasta} -o microsatellites.list + """ +} + +// STEP MSISENSOR.2 - SCORE + +// Score the normal vs somatic pair of bams + +process msisensor { + label 'cpus_4' + label 'memory_max' + // memory '10 GB' + + tag {idSampleTumor + "_vs_" + idSampleNormal} + + publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/MSIsensor", mode: params.publishDirMode + + input: + set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from pairBamMsisensor + file msiSites from msi_scan_ch + + output: + set val("Msisensor"), idPatient, file("${idSampleTumor}_vs_${idSampleNormal}_msisensor"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_dis"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_germline"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_somatic") into msisensor_out_ch + + when: + when: 'msisensor' in tools + + script: + """ + msisensor msi -d ${msiSites} \ + -b 4 \ + -n ${bamNormal} \ + -t ${bamTumor} \ + -o ${idSampleTumor}_vs_${idSampleNormal}_msisensor + """ +} + // STEP ASCAT.1 - ALLELECOUNTER // Run commands and code from Malin Larsson @@ -3595,7 +3654,8 @@ def defineToolList() { 'strelka', 'tiddit', 'tnscope', - 'vep' + 'vep', + 'msisensor' ] }