Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplified target/bait inputs #970

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ before_install:
- chmod 777 nextflow
# to change the test-data for travis, please download using the following command, extract, make changes, tarball again with gzip, and upload to google drive.
# you will have to change the link below as well. Click to share the link, making it so anyone with the link can access, then extract the id in the link and put it here after "id="
- wget -O test-data.tar.gz --no-check-certificate 'https://docs.google.com/uc?export=download&confirm=no_antivirus&id=1xcDnXk468SLpzr01Lw3CcJgeirYZPQXO'
- wget -O test-data.tar.gz --no-check-certificate 'https://docs.google.com/uc?export=download&confirm=no_antivirus&id=1lmuRJ1YO0DwBgtzG1kuKzTvGSijgEssc'
- tar -xzvf test-data.tar.gz

script:
Expand Down
3 changes: 3 additions & 0 deletions conf/containers.config
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
withName:"CreateScatteredIntervals.*" {
container = "broadinstitute/gatk:4.1.0.0"
}
withName:"CreateBaitsetFiles" {
container = "broadinstitute/gatk:4.1.9.0"
}

//------------------- Somatic pipeline

Expand Down
2 changes: 1 addition & 1 deletion conf/juno.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ params {
mem_per_core = true
reference_base = "/juno/work/taylorlab/cmopipeline"
// targets_base = "/juno/work/ccs/resources/tempo/${params.genome}"
targets_base = "${reference_base}/mskcc-igenomes/${params.genome.toLowerCase()}/tempo_targets"
targets_base = "${reference_base}/mskcc-igenomes/${params.genome.toLowerCase()}/tempo_targets_dsl2"
genome_base = params.genome == 'GRCh37' ? "${reference_base}/mskcc-igenomes/igenomes/Homo_sapiens/GATK/GRCh37" : params.genome == 'GRCh38' ? "${reference_base}/mskcc-igenomes/igenomes/Homo_sapiens/GATK/GRCh38" : "${reference_base}/mskcc-igenomes/igenomes/smallGRCh37"
minWallTime = 3.h
medWallTime = 6.h
Expand Down
10 changes: 4 additions & 6 deletions conf/references.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,8 @@ params {
targets {
// If your files do not match this structure/naming, please create a folder and copy over the files or create symlinks.
// If editing we recommend only changing the basename.
baitsInterval = "${params.targets_base}/\${targets_id}/baits.interval_list"
targetsInterval = "${params.targets_base}/\${targets_id}/targets.interval_list"
targetsBed = "${params.targets_base}/\${targets_id}/targets.bed"
targetsBedGz = "${params.targets_base}/\${targets_id}/targets.bed.gz"
targetsBedGzTbi = "${params.targets_base}/\${targets_id}/targets.bed.gz.tbi"
codingBed = "${params.targets_base}/\${targets_id}/coding.bed"
targetsBed = "${params.targets_base}/${params.assayType}/\${targets_id}/targets.bed"
baitsBed = "${params.targets_base}/${params.assayType}/\${targets_id}/baits.bed"
}
genomes {
'smallGRCh37' {
Expand Down Expand Up @@ -58,6 +54,7 @@ params {
hlaDat = "${params.reference_base}/hla/hla.dat"
neoantigenCDNA = "${params.reference_base}/neoantigen/Homo_sapiens.GRCh37.75.cdna.all.fa.gz"
neoantigenCDS = "${params.reference_base}/neoantigen/Homo_sapiens.GRCh37.75.cds.all.fa.gz"
codingRegions = "${params.reference_base}/ensGene.all_CODING_exons.reference.bed"
}
'GRCh37' {
acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci"
Expand Down Expand Up @@ -95,6 +92,7 @@ params {
hlaDat = "${params.reference_base}/mskcc-igenomes/grch37/hla/hla.dat"
neoantigenCDNA = "${params.reference_base}/mskcc-igenomes/grch37/neoantigen/Homo_sapiens.GRCh37.75.cdna.all.fa.gz"
neoantigenCDS = "${params.reference_base}/mskcc-igenomes/grch37/neoantigen/Homo_sapiens.GRCh37.75.cds.all.fa.gz"
codingRegions = "${params.reference_base}/mskcc-igenomes/grch37/coding_regions/ensGene.all_CODING_exons.reference.bed"
}
'GRCh38' {
acLoci = "${params.genome_base}/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci"
Expand Down
23 changes: 16 additions & 7 deletions dsl2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ include { snv_wf } from './modules/subworkflow/snv_wf'
include { sampleQC_wf } from './modules/subworkflow/sampleQC_wf' addParams(referenceMap: referenceMap, targetsMap: targetsMap, multiqcWesConfig: multiqcWesConfig, multiqcWgsConfig: multiqcWgsConfig, multiqcTempoLogo: multiqcTempoLogo)
include { samplePairingQC_wf } from './modules/subworkflow/samplePairingQC_wf' addParams(referenceMap: referenceMap, targetsMap: targetsMap)
include { somaticMultiQC_wf } from './modules/subworkflow/somaticMultiQC_wf' addParams(multiqcWesConfig: multiqcWesConfig, multiqcWgsConfig: multiqcWgsConfig, multiqcTempoLogo: multiqcTempoLogo)
include { targets_wf } from './modules/subworkflow/targets_wf' addParams(referenceMap: referenceMap, targetsMap: targetsMap)
include { scatter_wf } from './modules/subworkflow/scatter_wf' addParams(referenceMap: referenceMap, targetsMap: targetsMap)
include { germlineSNV_wf } from './modules/subworkflow/germlineSNV_wf' addParams(referenceMap: referenceMap, targetsMap: targetsMap)
include { germlineSV_wf } from './modules/subworkflow/germlineSV_wf' addParams(referenceMap: referenceMap, targetsMap: targetsMap)
Expand All @@ -59,6 +60,7 @@ WFs = (!params.mapping && !params.bamMapping && aggregateParamIsFile) ? ['snv','
workflow {
//Set flags for when each pipeline is required to run.
doWF_align = (params.mapping) ? true : false
doWF_targets = WFs.size() > 0
doWF_manta = ['snv', 'sv', 'mutsig'].any(it -> it in WFs) ? true : false
doWF_scatter = ['snv', 'sv', 'mutsig', 'germsnv'].any(it -> it in WFs) ? true : false
doWF_germSNV = 'germsnv' in WFs ? true : false
Expand Down Expand Up @@ -164,9 +166,13 @@ workflow {
manta_wf(bamFiles)
}

if(doWF_targets){
targets_wf()
}

if(doWF_scatter)
{
scatter_wf()
scatter_wf(targets_wf.out.baitsetPlus5)
}

if(doWF_germSV)
Expand All @@ -181,7 +187,7 @@ workflow {

if(doWF_germSNV)
{
germlineSNV_wf(bams, bamsTumor, scatter_wf.out.mergedIList, facets_wf.out.facetsForMafAnno)
germlineSNV_wf(bams, bamsTumor, scatter_wf.out.mergedIList, facets_wf.out.facetsForMafAnno, targets_wf.out.baitsetPlus5)
}

if(doWF_SV)
Expand All @@ -196,12 +202,12 @@ workflow {

if(doWF_SNV)
{
snv_wf(bamFiles, scatter_wf.out.mergedIList, manta_wf.out.mantaToStrelka, loh_wf.out.hlaOutput, facets_wf.out.facetsForMafAnno)
snv_wf(bamFiles, scatter_wf.out.mergedIList, manta_wf.out.mantaToStrelka, loh_wf.out.hlaOutput, facets_wf.out.facetsForMafAnno, targets_wf.out.baitsetPlus5)
}

if(doWF_QC)
{
sampleQC_wf(inputBam, fastPJson)
sampleQC_wf(inputBam, fastPJson, targets_wf.out.baitsetInterval, targets_wf.out.baitsetPlus5_unzipped, targets_wf.out.baitsetPlus5)
}

if(doWF_msiSensor)
Expand All @@ -222,9 +228,12 @@ workflow {
.combine(mutSig_wf.out.mutSig4MetaDataParser, by: [0,1,2])
.combine(loh_wf.out.hlaOutput, by: [1,2])
.unique()
.map{ idNormal, target, idTumor, purityOut, mafFile, qcOutput, msifile, mutSig, placeHolder, polysolverFile ->
[idNormal, target, idTumor, purityOut, mafFile, qcOutput, msifile, mutSig, placeHolder, polysolverFile, targetsMap."$target".codingBed]
}.set{ mergedChannelMetaDataParser }
.combine(targets_wf.out.codingBaitsetBed)
.filter{ idNormal, target, idTumor, purityOut, mafFile, qcOutput, msifile, mutSig, placeHolder, polysolverFile, target2, codingBed ->
target == target2
}.map{ idNormal, target, idTumor, purityOut, mafFile, qcOutput, msifile, mutSig, placeHolder, polysolverFile, target2, codingBed ->
[ idNormal, target, idTumor, purityOut, mafFile, qcOutput, msifile, mutSig, placeHolder, polysolverFile, codingBed ]
}.set{ mergedChannelMetaDataParser }

mdParse_wf(mergedChannelMetaDataParser)
}
Expand Down
7 changes: 3 additions & 4 deletions modules/function/define_maps.nf
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,16 @@ def defineReferenceMap() {
result_array << ['neoantigenCDNA' : checkParamReturnFile("neoantigenCDNA")]
result_array << ['neoantigenCDS' : checkParamReturnFile("neoantigenCDS")]
// coding region BED files for calculating TMB
result_array << ['codingRegions' : checkParamReturnFile("codingRegions")]
return result_array
}

def loadTargetReferences(){
def result_array = [:]
new File(params.targets_base).eachDir{ i ->
new File("${params.targets_base}/${params.assayType}" ).eachDir{ i ->
def target_id = i.getBaseName()
if (params.assayType == "genome" && target_id != "wgs" ){ return }
if (params.assayType != "genome" && target_id == "wgs" ){ return }
result_array["${target_id}"] = [:]
for ( j in params.targets.keySet()) { // baitsInterval, targetsInterval, targetsBedGz, targetsBedGzTbi, codingBed
for ( j in params.targets.keySet()) { // baitsBed, targetsBed
result_array."${target_id}" << [ ("$j".toString()) : evalTargetPath(j,target_id)]
}
}
Expand Down
55 changes: 55 additions & 0 deletions modules/process/Targets/CreateBaitsetFiles.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
process CreateBaitsetFiles {
tag "${targetId}"

input:
tuple val(targetId), path("raw_targets.bed"), path("raw_baits.bed")
path(genomeFile)
path(genomeIndex)
path(genomeDict)
path(codingRegions)

output:
tuple val(targetId), path(targetInterval), path(baitInterval), emit: baitsetInterval
tuple val(targetId), path(codingBaitsetBed), emit: codingBaitsetBed
tuple val(targetId), path("${targetPlus5}.gz"), path("${targetPlus5}.gz.tbi"), emit:baitsetPlus5
tuple val(targetId), path(targetPlus5), emit:baitsetPlus5_unzipped

script:
targetInterval = "${targetId}.targets.ilist"
baitInterval = "${targetId}.baits.ilist"
codingBaitsetBed = "${targetId}.coding.bed"
targetBed = "${targetId}.targets.bed"
baitBed = "${targetId}.baits.bed"
targetPlus5 = "${targetId}.plus5bp.bed"
"""
bedtools sort -i raw_targets.bed | bedtools merge -i - > ${targetBed}
bedtools sort -i raw_baits.bed | bedtools merge -i - > ${baitBed}

bedtools intersect \\
-a ${codingRegions} \\
-b ${targetBed} > \\
intersect.bed
sort -k1,1 -k 2,2n -k 3,3n intersect.bed > intersect.sorted.bed
bedtools merge -i intersect.sorted.bed > ${codingBaitsetBed}

cut -f 1,2 ${genomeIndex} > this.genome
bedtools slop \\
-i ${targetBed} \\
-g ./this.genome \\
-b 5 > \\
${targetPlus5}
bgzip -c ${targetPlus5} > ${targetPlus5}.gz
tabix -p bed ${targetPlus5}.gz

gatk BedToIntervalList \\
-I ${targetBed} \\
-O ${targetInterval} \\
-SD ${genomeDict}

gatk BedToIntervalList \\
-I ${baitBed} \\
-O ${baitInterval} \\
-SD ${genomeDict}

"""
}
11 changes: 8 additions & 3 deletions modules/subworkflow/germlineSNV_wf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ workflow germlineSNV_wf
bamsTumor
mergedIList
facetsForMafAnno
baitsetPlus5

main:
referenceMap = params.referenceMap
Expand Down Expand Up @@ -46,9 +47,13 @@ workflow germlineSNV_wf
GermlineCombineHaplotypecallerVcf(haplotypecaller4Combine,
Channel.value([referenceMap.genomeFile, referenceMap.genomeIndex, referenceMap.genomeDict]))

bams.map{ idNormal, target, bamNormal, baiNormal ->
[idNormal, target, bamNormal, baiNormal, targetsMap."$target".targetsBedGz, targetsMap."$target".targetsBedGzTbi]
}.set{ bamsForStrelkaGermline }
bams
.combine(baitsetPlus5)
.filter{ idNormal, target, bamNormal, baiNormal, target2, bedGz, bedGzTbi ->
target == target2
}.map{ idNormal, target, bamNormal, baiNormal, target2, bedGz, bedGzTbi ->
[idNormal, target, bamNormal, baiNormal, bedGz, bedGzTbi]
}.set{bamsForStrelkaGermline}

GermlineRunStrelka2(bamsForStrelkaGermline,
Channel.value([referenceMap.genomeFile, referenceMap.genomeIndex, referenceMap.genomeDict]))
Expand Down
23 changes: 16 additions & 7 deletions modules/subworkflow/sampleQC_wf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,20 @@ workflow sampleQC_wf
take:
inputChannel
fastPJson
intervals
targetsBed
targetsBedGz

main:
referenceMap = params.referenceMap
targetsMap = params.targetsMap

inputChannel.map{ idSample, target, bam, bai ->
[idSample, target, bam, bai, targetsMap."$target".targetsInterval, targetsMap."$target".baitsInterval]
}.set{ bamsBQSR4HsMetrics }
inputChannel.combine(intervals)
.filter{ idSample, target, bam, bai, target2, targetsInterval, baitsInterval ->
target == target2
}.map{ idSample, target, bam, bai, target2, targetsInterval, baitsInterval ->
[idSample, target, bam, bai, targetsInterval, baitsInterval]
}.set{ bamsBQSR4HsMetrics }

QcCollectHsMetrics(bamsBQSR4HsMetrics,
Channel.value([referenceMap.genomeFile, referenceMap.genomeIndex, referenceMap.genomeDict]))
Expand All @@ -30,16 +36,19 @@ workflow sampleQC_wf
}

inputChannel
.map{ idSample, target, bam, bai -> [ idSample, target, bam, bai, file(targetsMap."$target".targetsBed) ]}
.combine(targetsBed)
.filter{ idSample, target, bam, bai, targets2, bedfile -> target == targets2 }
.map{ idSample, target, bam, bai, targets2, bedfile -> [idSample, target, bam, bai, bedfile] }
.set{ bamsBQSR4Qualimap }

QcQualimap(bamsBQSR4Qualimap)

Channel.from(true, false).set{ ignore_read_groups }
inputChannel
.map{ idSample, target, bam, bai ->
[ idSample, target, bam, bai, targetsMap."$target".targetsBedGz, targetsMap."$target".targetsBedGzTbi ]
}.set{ bamsBQSR4Alfred }
.combine(targetsBedGz)
.filter{ idSample, target, bam, bai, targets2, bedGz, bedGzTbi -> target == targets2 }
.map{ idSample, target, bam, bai, targets2, bedGz, bedGzTbi -> [idSample, target, bam, bai, bedGz, bedGzTbi] }
.set{ bamsBQSR4Alfred }

QcAlfred(ignore_read_groups,
bamsBQSR4Alfred,
Expand Down
8 changes: 3 additions & 5 deletions modules/subworkflow/scatter_wf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@ include { CreateScatteredIntervals } from '../process/Scatter/CreateScatteredI

workflow scatter_wf
{
take:
targets4Intervals

main:
referenceMap = params.referenceMap
targetsMap = params.targetsMap

targets4Intervals = Channel.from(targetsMap.keySet())
.map{ targetId ->
[ targetId, targetsMap."${targetId}".targetsBedGz, targetsMap."${targetId}".targetsBedGzTbi ]
}

CreateScatteredIntervals(Channel.value([referenceMap.genomeFile,
referenceMap.genomeIndex,
referenceMap.genomeDict]),
Expand Down
8 changes: 6 additions & 2 deletions modules/subworkflow/snv_wf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ workflow snv_wf
mantaToStrelka
hlaOutput
facetsForMafAnno
baitsetPlus5

main:
referenceMap = params.referenceMap
Expand Down Expand Up @@ -52,8 +53,11 @@ workflow snv_wf
Channel.value([referenceMap.genomeFile, referenceMap.genomeIndex, referenceMap.genomeDict]))

bamFiles.combine(mantaToStrelka, by: [0, 1, 2])
.map{ idTumor, idNormal, target, bamTumor, baiTumor, bamNormal, baiNormal, mantaCSI, mantaCSIi ->
[idTumor, idNormal, target, bamTumor, baiTumor, bamNormal, baiNormal, mantaCSI, mantaCSIi, targetsMap."$target".targetsBedGz, targetsMap."$target".targetsBedGzTbi]
.combine(baitsetPlus5)
.filter{ idTumor, idNormal, target, bamTumor, baiTumor, bamNormal, baiNormal, mantaCSI, mantaCSIi, target2, bedGz, bedGzTbi ->
target2 == target
}.map{ idTumor, idNormal, target, bamTumor, baiTumor, bamNormal, baiNormal, mantaCSI, mantaCSIi, target2, bedGz, bedGzTbi ->
[idTumor, idNormal, target, bamTumor, baiTumor, bamNormal, baiNormal, mantaCSI, mantaCSIi, bedGz, bedGzTbi ]
}.set{ input4Strelka }

SomaticRunStrelka2(input4Strelka,
Expand Down
25 changes: 25 additions & 0 deletions modules/subworkflow/targets_wf.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
include { CreateBaitsetFiles } from '../process/Targets/CreateBaitsetFiles'

workflow targets_wf
{
main:
referenceMap = params.referenceMap
targetsMap = params.targetsMap

CreateBaitsetFiles(
Channel.from(targetsMap.keySet())
.map{ targetId ->
[targetId, targetsMap."${targetId}".targetsBed, targetsMap."${targetId}".baitsBed]
},
referenceMap.genomeFile,
referenceMap.genomeIndex,
referenceMap.genomeDict,
referenceMap.codingRegions
)

emit:
baitsetInterval = CreateBaitsetFiles.out.baitsetInterval
codingBaitsetBed = CreateBaitsetFiles.out.codingBaitsetBed
baitsetPlus5 = CreateBaitsetFiles.out.baitsetPlus5
baitsetPlus5_unzipped = CreateBaitsetFiles.out.baitsetPlus5_unzipped
}