Skip to content

Commit

Permalink
Merge pull request #116 from nf-core/implement-neo-subworkflow
Browse files Browse the repository at this point in the history
  • Loading branch information
scwatts authored Dec 5, 2024
2 parents 45bf362 + 8b15c62 commit f029bec
Show file tree
Hide file tree
Showing 19 changed files with 708 additions and 7 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ The following processes and tools can be run with `oncoanalyser`:
- HRD status prediction (`CHORD`)
- Mutational signature fitting (`Sigs`)
- Tissue of origin prediction (`CUPPA`)
- Neoepitope prediction (`Neo`)
- Report generation (`ORANGE`, `linxreport`)

## Usage
Expand Down
8 changes: 8 additions & 0 deletions conf/hmf_data.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@ params {
isofox_gc_ratios = 'rna_pipeline/read_100_exp_gc_ratios.csv'
// LILAC
lilac_resources = 'dna_pipeline/immune/'
// Neo
neo_resources = 'neo/binding/'
// ORANGE
cohort_mapping = 'orange/cohort_mapping.tsv'
// Hartwig cohort RNA data
cohort_tpm_medians = 'neo/tpm_cohort/hmf_tpm_medians.37.csv'
cohort_percentiles = 'orange/cohort_percentiles.tsv'
alt_sj_distribution = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.37.csv'
gene_exp_distribution = 'rna_pipeline/isofox.hmf_3444.gene_distribution.37.csv'
Expand Down Expand Up @@ -72,8 +76,12 @@ params {
isofox_gc_ratios = 'rna_pipeline/read_100_exp_gc_ratios.csv'
// LILAC
lilac_resources = 'dna_pipeline/immune/'
// Neo
neo_resources = 'neo/binding/'
// ORANGE
cohort_mapping = 'orange/cohort_mapping.tsv'
// Hartwig cohort RNA data
cohort_tpm_medians = 'neo/tpm_cohort/hmf_tpm_medians.38.csv'
cohort_percentiles = 'orange/cohort_percentiles.tsv'
alt_sj_distribution = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.38.csv'
gene_exp_distribution = 'rna_pipeline/isofox.hmf_3444.gene_distribution.38.csv'
Expand Down
28 changes: 28 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,34 @@ process {
]
}

withName: 'NEO_(?:SCORER|FINDER)' {
ext.jarPath = '/opt/neo/neo.jar'
}

withName: 'NEO_SCORER' {
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/scorer/" },
]
}

withName: '.*:NEO_PREDICTION:ANNOTATE_FUSIONS' {
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/annotated_fusions/${filename}" },
]
}

withName: 'NEO_FINDER' {
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/finder/" },
]
}

withName: 'CUPPA' {
publishDir = [
path: { "${params.outdir}" },
Expand Down
19 changes: 19 additions & 0 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ output/
- [Sigs](#sigs) - Mutational signature fitting
- [Tissue of origin prediction](#tissue-of-origin-prediction)
- [CUPPA](#cuppa) - Tissue of origin prediction
- [Neoepitope prediction](#neoepitope-prediction)
- [Neo](#neo) - Neoepitope prediction
- [Report generation](#report-generation)
- [ORANGE](#orange) - Key results summary
- [linxreport](#linxreport) - Interactive LINX report
Expand Down Expand Up @@ -473,6 +475,23 @@ signatures to tumor sample data.
[CUPPA](https://github.com/hartwigmedical/hmftools/tree/master/cuppa) predicts tissue of origin for a given tumor sample
using DNA and/or RNA features generated by upstream hmftools components.

### Neoepitope prediction

#### Neo

<details markdown="1">
<summary>Output files</summary>

- `<group_id>/neo/`
- `<tumor_dna_id>.neo.neo_data.tsv`: Neoepitope candidates.
- `<tumor_dna_id>.neo.neoepitope.tsv`: LINX fusion neoepitopes.
- `<tumor_dna_id>.neo.peptide_scores.tsv`: Peptide binding likelihood and scoring.

</details>

[Neo](https://github.com/hartwigmedical/hmftools/tree/master/neo) builds comprehensive neoepitope predictions from DNA
data with additional annotations made using RNA data.

### Report generation

#### ORANGE
Expand Down
1 change: 1 addition & 0 deletions lib/Constants.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class Constants {
LILAC,
LINX,
MARKDUPS,
NEO,
ORANGE,
PAVE,
PURPLE,
Expand Down
12 changes: 11 additions & 1 deletion lib/Processes.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,17 @@ import Utils
class Processes {

public static getRunStages(include, exclude, manual_select, log) {
def processes = manual_select ? [] : Constants.Process.values().toList()

// Get default processes
// NOTE(SW): currently set all except Neo to run by default; Process.NEO excluded to be more concise in code
def processes
if (manual_select) {
processes = []
} else {
processes = Constants.Process.values().toList()
processes.remove(Constants.Process.NEO)
}

def include_list = this.getProcessList(include, log)
def exclude_list = this.getProcessList(exclude, log)
this.checkIncludeExcludeList(include_list, exclude_list, log)
Expand Down
1 change: 1 addition & 0 deletions modules/local/linx/somatic/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ process LINX_SOMATIC {
-known_fusion_file ${known_fusion_data} \\
-driver_gene_panel ${driver_gene_panel} \\
-write_vis_data \\
-write_neo_epitopes \\
-output_dir linx_somatic/
cat <<-END_VERSIONS > versions.yml
Expand Down
22 changes: 22 additions & 0 deletions modules/local/neo/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
FROM mambaorg/micromamba:0.24.0

USER root

RUN \
apt-get update && \
apt-get install -y procps wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN \
mkdir -p /opt/neo/ && \
wget -O /opt/neo/neo.jar 'https://github.com/hartwigmedical/hmftools/releases/download/neo-v1.2_beta/neo_v1.2_beta.jar'

USER mambauser

RUN \
micromamba install -y -n base -c bioconda -c conda-forge \
'openjdk >=8' && \
micromamba clean --all --yes

ENV PATH="/opt/conda/bin:/opt/conda/condabin:${PATH}"
7 changes: 7 additions & 0 deletions modules/local/neo/annotate_fusions/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: isofox
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::hmftools-isofox=1.7.1
56 changes: 56 additions & 0 deletions modules/local/neo/annotate_fusions/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
process ANNOTATE_FUSIONS {
tag "${meta.id}"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.1--hdfd78af_0' :
'biocontainers/hmftools-isofox:1.7.1--hdfd78af_0' }"

input:
tuple val(meta), path(neo_finder_dir), path(bam), path(bai)
val read_length
path genome_fasta
val genome_ver
path genome_fai
path ensembl_data_resources

output:
tuple val(meta), path('*isf.neoepitope.tsv'), emit: annotated_fusions
path 'versions.yml' , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''

"""
mkdir -p isofox/
isofox \\
-Xmx${Math.round(task.memory.bytes * 0.95)} \\
${args} \\
-sample ${meta.sample_id} \\
-bam_file ${bam} \\
-functions NEO_EPITOPES \\
-neo_dir ${neo_finder_dir} \\
-read_length ${read_length} \\
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
-ensembl_data_dir ${ensembl_data_resources} \\
-threads ${task.cpus} \\
-output_dir ./
cat <<-END_VERSIONS > versions.yml
"${task.process}":
isofox: \$(isofox -version | sed 's/^.* //')
END_VERSIONS
"""

stub:
"""
touch ${meta.sample_id}.isf.neoepitope.tsv
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
"""
}
63 changes: 63 additions & 0 deletions modules/local/neo/annotate_fusions/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: annotate_fusions
description: Annotate neoeptitopes with RNA fusion data
keywords:
- neoepitopes
- rna
- rnaseq
tools:
- isofox:
description: Characterises and counts gene, transcript features
homepage: https://github.com/hartwigmedical/hmftools/tree/master/isofox
documentation: https://github.com/hartwigmedical/hmftools/tree/master/isofox
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [id: 'sample_id']
- neo_finder_dir:
type: directory
description: Neo Finder directory
- bam:
type: file
description: BAM file
pattern: "*.{bam}"
- bai:
type: file
description: BAI file
pattern: "*.{bai}"
- read_length:
type: integer
description: Read length
- genome_fasta:
type: file
description: Reference genome assembly FASTA file
pattern: "*.{fa,fasta}"
- genome_ver:
type: string
description: Reference genome version
- genome_fai:
type: file
description: Reference genome assembly fai file
pattern: "*.{fai}"
- ensembl_data_resources:
type: directory
description: HMF ensembl data resources directory
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [id: 'sample_id']
- annotated_fusions:
type: file
description: Annotated neoepitopes file
pattern: "*.{tsv}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@scwatts"
- "@charlesshale"
52 changes: 52 additions & 0 deletions modules/local/neo/finder/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
process NEO_FINDER {
tag "${meta.id}"
label 'process_low'

container 'docker.io/scwatts/neo:1.2_beta--1'

input:
tuple val(meta), path(purple_dir), path(linx_annotation_dir)
path genome_fasta
val genome_ver
path genome_fai
path ensembl_data_resources

output:
tuple val(meta), path('neo_finder/'), emit: neo_finder_dir
path 'versions.yml' , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''

"""
mkdir -p neo_finder/
java \\
-Xmx${Math.round(task.memory.bytes * 0.95)} \\
-jar ${task.ext.jarPath} \\
${args} \\
-sample ${meta.sample_id} \\
-linx_dir ${linx_annotation_dir} \\
-somatic_vcf ${purple_dir}/${meta.sample_id}.purple.somatic.vcf.gz \\
-ref_genome ${genome_fasta} \\
-ref_genome_version ${genome_ver} \\
-ensembl_data_dir ${ensembl_data_resources} \\
-log_debug \\
-output_dir neo_finder/
cat <<-END_VERSIONS > versions.yml
"${task.process}":
neo: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //')
END_VERSIONS
"""

stub:
"""
mkdir -p neo_finder/
echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml
"""
}

52 changes: 52 additions & 0 deletions modules/local/neo/finder/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: neo_finder
description: Identify candidate neoeptitops
keywords:
- neoepitopes
tools:
- neo:
description: Predict and score neoepitopes
homepage: https://github.com/hartwigmedical/hmftools/tree/master/neo
documentation: https://github.com/hartwigmedical/hmftools/tree/master/neo
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [id: 'sample_id']
- purple_dir:
type: directory
description: PURPLE output directory
- linx_annotation_dir:
type: directory
description: LINX somatic annotation output directory
- genome_fasta:
type: file
description: Reference genome assembly FASTA file
pattern: "*.{fa,fasta}"
- genome_ver:
type: string
description: Reference genome version
- genome_fai:
type: file
description: Reference genome assembly fai file
pattern: "*.{fai}"
- ensembl_data_resources:
type: directory
description: HMF ensembl data resources directory
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [id: 'sample_id']
- neo_finder_dir:
type: directory
description: Neo Finder output directory
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@scwatts"
- "@charlesshale"
Loading

0 comments on commit f029bec

Please sign in to comment.