diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 26f6c8cf7f..71d634521f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -131,7 +131,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- tool: [Haplotypecaller, Freebayes, Manta, mpileup, Strelka, TIDDIT]
+ tool: [Haplotypecaller, Freebayes, Manta, mpileup, Strelka, TIDDIT, msisensor]
steps:
- uses: actions/checkout@v2
- name: Install Nextflow
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5e74d9ec10..50cb7de2fd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
- [#141](https://github.com/nf-core/sarek/pull/141) - Add containers for `WBcel235`
- [#150](https://github.com/nf-core/sarek/pull/150), [#151](https://github.com/nf-core/sarek/pull/151), [#154](https://github.com/nf-core/sarek/pull/154) - Add AWS mega test GitHub Actions
- [#158](https://github.com/nf-core/sarek/pull/158) - Added `ggplot2` v `3.3.0`
+- [#163](https://github.com/nf-core/sarek/pull/163) - Add [msisensor](https://github.com/ding-lab/msisensor) in tools and container
### `Changed`
diff --git a/README.md b/README.md
index 4fa84f9aea..5b561495d5 100644
--- a/README.md
+++ b/README.md
@@ -105,6 +105,7 @@ Helpful contributors:
* [gulfshores](https://github.com/gulfshores)
* [pallolason](https://github.com/pallolason)
* [silviamorins](https://github.com/silviamorins)
+* [David Mas-Ponte](https://github.com/davidmasp)
## Contributions & Support
diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index ba53181640..44580887f0 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -13,6 +13,7 @@
'GATK': ['v_gatk.txt', r"Version:(\S+)"],
'htslib': ['v_samtools.txt', r"htslib (\S+)"],
'Manta': ['v_manta.txt', r"([0-9.]+)"],
+ 'msisensor': ["v_msisensor.txt", r"Version: v(\S+)"],
'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"],
'Nextflow': ['v_nextflow.txt', r"(\S+)"],
'nf-core/sarek': ['v_pipeline.txt', r"(\S+)"],
@@ -38,6 +39,7 @@
results['GATK'] = 'N/A'
results['htslib'] = 'N/A'
results['Manta'] = 'N/A'
+results['msisensor'] = 'N/A'
results['MultiQC'] = 'N/A'
results['Qualimap'] = 'N/A'
results['R'] = 'N/A'
diff --git a/docs/containers.md b/docs/containers.md
index 2c55b4015c..0d2aa08539 100644
--- a/docs/containers.md
+++ b/docs/containers.md
@@ -26,6 +26,7 @@ For annotation, the main container can be used, but the cache has to be download
- Contain **[ggplot2](https://github.com/tidyverse/ggplot2)** 3.3.0
- Contain **[HTSlib](https://github.com/samtools/htslib)** 1.9
- Contain **[Manta](https://github.com/Illumina/manta)** 1.6.0
+- Contain **[msisensor](https://github.com/ding-lab/msisensor)** 0.5
- Contain **[MultiQC](https://github.com/ewels/MultiQC/)** 1.8
- Contain **[Qualimap](http://qualimap.bioinfo.cipf.es)** 2.2.2d
- Contain **[samtools](https://github.com/samtools/samtools)** 1.9
diff --git a/docs/output.md b/docs/output.md
index 493957cf9f..bc6e6beee4 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -34,6 +34,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [ConvertAlleleCounts](#convertallelecounts)
- [ASCAT](#ascat)
- [Control-FREEC](#control-freec)
+ - [MSI status](#msi-status)
+ - [MSIsensor](#msisensor)
- [Variant annotation](#variant-annotation)
- [snpEff](#snpeff)
- [VEP](#vep)
@@ -424,6 +426,36 @@ For a Tumor/Normal pair only:
- `[TUMORSAMPLE].pileup.gz_BAF.txt` and `[NORMALSAMPLE].pileup.gz_BAF.txt`
- file with beta allele frequencies for each possibly heterozygous SNP position
+### MSI status
+
+[Microsatellite instability](https://en.wikipedia.org/wiki/Microsatellite_instability)
+is a genetic condition associated to deficienceies in the
+mismatch repair (MMR) system which causes a tendency to accumulate a high
+number of mutations (SNVs and indels).
+
+#### MSIsensor
+
+[MSIsensor](https://github.com/ding-lab/msisensor) is a tool to detect the MSI
+status of a tumor scaning the length of the microsatellite regions. An altered
+distribution of microsatellite length is associated to a missed replication
+slippage which would be corrected under normal mismatch repair (MMR) conditions. It requires
+a normal sample for each tumour to differentiate the somatic and germline
+cases.
+
+For further reading see the [MSIsensor paper](https://www.ncbi.nlm.nih.gov/pubmed/24371154).
+
+For a Tumor/Normal pair only:
+**Output directory: `results/VariantCalling/[TUMORSAMPLE]_vs_[NORMALSAMPLE]/MSIsensor`**
+
+- `[TUMORSAMPLE]_vs_[NORMALSAMPLE]`_msisensor
+ - MSI score output, contains information about the number of somatic sites.
+- `[TUMORSAMPLE]_vs_[NORMALSAMPLE]`_msisensor_dis
+ - The normal and tumor length distribution for each microsatellite position.
+- `[TUMORSAMPLE]_vs_[NORMALSAMPLE]`_msisensor_germline
+ - somatic sites detected
+- `[TUMORSAMPLE]_vs_[NORMALSAMPLE]`_msisensor_somatic
+ - germ line sites detected
+
## Variant annotation
This directory contains results from the final annotation steps: two software are used for annotation, [snpEff](http://snpeff.sourceforge.net/) and [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html).
diff --git a/environment.yml b/environment.yml
index 84fd4dc8cb..c1889a8476 100644
--- a/environment.yml
+++ b/environment.yml
@@ -21,6 +21,7 @@ dependencies:
- bioconda::genesplicer=1.0
- bioconda::htslib=1.9
- bioconda::manta=1.6.0
+ - bioconda::msisensor=0.5
- bioconda::multiqc=1.8
- bioconda::qualimap=2.2.2d
- bioconda::samtools=1.9
diff --git a/main.nf b/main.nf
index 788301b250..3e542da0b1 100644
--- a/main.nf
+++ b/main.nf
@@ -618,6 +618,7 @@ process Get_software_versions {
trim_galore -v &> v_trim_galore.txt 2>&1 || true
vcftools --version &> v_vcftools.txt 2>&1 || true
vep --help &> v_vep.txt 2>&1 || true
+ msisensor &> v_msisensor.txt 2>&1 || true
scrape_software_versions.py &> software_versions_mqc.yaml
"""
@@ -2069,8 +2070,8 @@ pairBam = bamNormal.cross(bamTumor).map {
pairBam = pairBam.dump(tag:'BAM Somatic Pair')
-// Manta, Strelka, Mutect2
-(pairBamManta, pairBamStrelka, pairBamStrelkaBP, pairBamCalculateContamination, pairBamFilterMutect2, pairBamTNscope, pairBam) = pairBam.into(7)
+// Manta, Strelka, Mutect2, MSIsensor
+(pairBamManta, pairBamStrelka, pairBamStrelkaBP, pairBamCalculateContamination, pairBamFilterMutect2, pairBamTNscope, pairBamMsisensor, pairBam) = pairBam.into(8)
intervalPairBam = pairBam.spread(bedIntervals)
@@ -2605,6 +2606,64 @@ process StrelkaBP {
vcfStrelkaBP = vcfStrelkaBP.dump(tag:'Strelka BP')
+// STEP MSISENSOR.1 - SCAN
+
+// Scan reference genome for microsattelites
+process msisensorScan {
+ label 'cpus_1'
+ label 'memory_max'
+ // memory '20 GB'
+
+ tag {fasta}
+
+ input:
+ file(fasta) from ch_fasta
+ file(fastaFai) from ch_fai
+
+ output:
+ file "microsatellites.list" into msi_scan_ch
+
+ when: 'msisensor' in tools
+
+ script:
+ """
+ msisensor scan -d ${fasta} -o microsatellites.list
+ """
+}
+
+// STEP MSISENSOR.2 - SCORE
+
+// Score the normal vs somatic pair of bams
+
+process msisensor {
+ label 'cpus_4'
+ label 'memory_max'
+ // memory '10 GB'
+
+ tag {idSampleTumor + "_vs_" + idSampleNormal}
+
+ publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/MSIsensor", mode: params.publishDirMode
+
+ input:
+ set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from pairBamMsisensor
+ file msiSites from msi_scan_ch
+
+ output:
+ set val("Msisensor"), idPatient, file("${idSampleTumor}_vs_${idSampleNormal}_msisensor"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_dis"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_germline"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_somatic") into msisensor_out_ch
+
+ when:
+ when: 'msisensor' in tools
+
+ script:
+ """
+ msisensor msi -d ${msiSites} \
+ -b 4 \
+ -n ${bamNormal} \
+ -t ${bamTumor} \
+ -o ${idSampleTumor}_vs_${idSampleNormal}_msisensor
+ """
+}
+
// STEP ASCAT.1 - ALLELECOUNTER
// Run commands and code from Malin Larsson
@@ -3595,7 +3654,8 @@ def defineToolList() {
'strelka',
'tiddit',
'tnscope',
- 'vep'
+ 'vep',
+ 'msisensor'
]
}