diff --git a/conf/modules.config b/conf/modules.config index acc23c3..21cb4af 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -102,7 +102,7 @@ process { ] } - withName: '.*GENOME_STATISTICS_RAW:BUSCO' { + withName: '.*GENOME_STATISTICS_RAW:BUSCO_PRI' { publishDir = [ path: { "${params.outdir}/${meta.id}.${params.hifiasm}/${meta.id}.p_ctg.${meta.lineage}.busco" }, mode: params.publish_dir_mode, @@ -132,8 +132,8 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_PRI_HIC' { - ext.prefix = { "${meta.id}.asm.hic.p_ctg" } + withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_HAP1_HIC' { + ext.prefix = { "${meta.id}.asm.hic.hap1" } publishDir = [ path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" }, mode: params.publish_dir_mode, @@ -141,8 +141,8 @@ process { ] } - withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_ALT_HIC' { - ext.prefix = { "${meta.id}.asm.hic.a_ctg" } + withName: '.*RAW_ASSEMBLY:GFA_TO_FASTA_HAP2_HIC' { + ext.prefix = { "${meta.id}.asm.hic.hap2" } publishDir = [ path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" }, mode: params.publish_dir_mode, @@ -158,7 +158,7 @@ process { ] } withName: '.*GENOME_STATISTICS_RAW_HIC:GFASTATS_PRI' { - ext.prefix = { "${meta.id}.asm.hic.p_ctg" } + ext.prefix = { "${meta.id}.asm.hic.hap1" } publishDir = [ path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" }, mode: params.publish_dir_mode, @@ -167,16 +167,30 @@ process { } withName: '.*GENOME_STATISTICS_RAW_HIC:GFASTATS_HAP' { - ext.prefix = { "${meta.id}.asm.hic.a_ctg" } + ext.prefix = { "${meta.id}.asm.hic.hap2" } publishDir = [ path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}" }, mode: params.publish_dir_mode, pattern: '*assembly_summary' ] } - withName: '.*GENOME_STATISTICS_RAW_HIC:BUSCO' { + + withName: '.*GENOME_STATISTICS_RAW_HIC:BUSCO_PRI' { publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.p_ctg.${meta.lineage}.busco" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.hap1.${meta.lineage}.busco" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.endsWith('busco.log') ? filename : + filename.endsWith('full_table.tsv') ? filename : + filename.endsWith('missing_busco_list.tsv') ? filename : + filename.startsWith('short_summary') ? filename : + filename.endsWith('busco.batch_summary.txt') ? filename : + null } + ] + } + + withName: '.*GENOME_STATISTICS_RAW_HIC:BUSCO_HAP' { + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.hap2.${meta.lineage}.busco" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.endsWith('busco.log') ? filename : filename.endsWith('full_table.tsv') ? filename : @@ -189,7 +203,7 @@ process { withName: '.*GENOME_STATISTICS_RAW_HIC:MERQURYFK_MERQURYFK' { publishDir = [ - path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.p_ctg.ccs.merquryk" }, + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/${meta.id}.hap1.ccs.merquryk" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -332,7 +346,7 @@ process { ] } - withName: '.*GENOME_STATISTICS_PURGED:BUSCO' { + withName: '.*GENOME_STATISTICS_PURGED:BUSCO_PRI' { publishDir = [ path: { "${params.outdir}/${meta.id}.${params.hifiasm}/purging/${meta.id}.purged.${meta.lineage}.busco" }, mode: params.publish_dir_mode, @@ -499,7 +513,7 @@ process { ] } - withName: '.*HIC_MAPPING:SAMTOOLS_MERGE_HIC_MAPPING' { + withName: '.*HIC_MAPPING.*:SAMTOOLS_MERGE_HIC_MAPPING' { ext.prefix = { "${meta.id}_merged" } } @@ -530,7 +544,7 @@ process { } - withName: '.*HIC_MAPPING:CONVERT_STATS:SAMTOOLS_VIEW' { + withName: '.*HIC_MAPPING.*:CONVERT_STATS:SAMTOOLS_VIEW' { ext.args = "--output-fmt cram" } @@ -559,7 +573,7 @@ process { } // Set up of the scffolding pipeline - withName: 'YAHS' { + withName: '.*SCAFFOLDING:YAHS' { ext.prefix = 'out' publishDir = [ path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, @@ -568,7 +582,7 @@ process { ] } - withName: 'COOLER_CLOAD' { + withName: '.*SCAFFOLDING:COOLER_CLOAD' { // Positions in the input file are zero-based; // chrom1 field number (one-based) is 2; // pos1 field number (one-based) is 3; @@ -582,7 +596,7 @@ process { ] } - withName: 'PRETEXTSNAPSHOT' { + withName: '.*SCAFFOLDING:PRETEXTSNAPSHOT' { // Make one plot containing all sequences ext.args = '--sequences \"=full\"' publishDir = [ @@ -592,7 +606,7 @@ process { ] } - withName: 'JUICER_TOOLS_PRE' { + withName: '.*SCAFFOLDING:JUICER_TOOLS_PRE' { ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar' ext.juicer_jvm_params = '-Xms1g -Xmx6g' publishDir = [ @@ -602,7 +616,7 @@ process { ] } - withName: 'JUICER_PRE' { + withName: '.*SCAFFOLDING:JUICER_PRE' { ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'" publishDir = [ path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs" }, @@ -620,7 +634,7 @@ process { ] } - withName: '.*GENOME_STATISTICS_SCAFFOLDS:BUSCO' { + withName: '.*GENOME_STATISTICS_SCAFFOLDS:BUSCO_PRI' { publishDir = [ path: { "${params.outdir}/${meta.id}.${params.hifiasm}/scaffolding/yahs/out.break.yahs/out_scaffolds_final.${meta.lineage}.busco" }, mode: params.publish_dir_mode, @@ -640,6 +654,161 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + // Scaffolding hap1/hap2 + if (params.hifiasm_hic_on) { + + withName: '.*HIC_MAPPING_HAP.*:SAMTOOLS_MARKDUP_HIC_MAPPING' { + ext.prefix = { "${meta.id}_mkdup" } + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*HIC_MAPPING_HAP.*:BAMTOBED_SORT' { + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + + withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_STATS' { + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_FLAGSTAT' { + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*HIC_MAPPING_HAP.*:CONVERT_STATS:SAMTOOLS_IDXSTATS' { + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*SCAFFOLDING_HAP.*:YAHS' { + ext.prefix = { "${meta.hap_id}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + } + + withName: '.*SCAFFOLDING_HAP.*:COOLER_CLOAD' { + // Positions in the input file are zero-based; + // chrom1 field number (one-based) is 2; + // pos1 field number (one-based) is 3; + // chrom2 field number (one-based) is 6; + // pos2 field number (one-based) is 7 + ext.args = 'pairs -0 -c1 2 -p1 3 -c2 6 -p2 7' + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*SCAFFOLDING_HAP.*:PRETEXTSNAPSHOT' { + // Make one plot containing all sequences + ext.args = '--sequences \"=full\"' + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*SCAFFOLDING_HAP.*:JUICER_TOOLS_PRE' { + ext.juicer_tools_jar = 'juicer_tools.1.9.9_jcuda.0.8.jar' + ext.juicer_jvm_params = '-Xms1g -Xmx6g' + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*SCAFFOLDING_HAP.*:JUICER_PRE' { + ext.args2 = "LC_ALL=C sort -k2,2d -k6,6d -S50G | awk '\$3>=0 && \$7>=0'" + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_${meta.hap_id}/yahs/out.break.yahs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + // End of hap1/hap2 scaffolding + + withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:GFASTATS_PRI' { + ext.prefix = { "${meta.id}_scaffolds_final" } + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs" }, + mode: params.publish_dir_mode, + pattern: '*assembly_summary' + ] + } + + withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:GFASTATS_HAP' { + ext.prefix = { "${meta.id}_scaffolds_final" } + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs" }, + mode: params.publish_dir_mode, + pattern: '*assembly_summary' + ] + } + + withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:BUSCO_PRI' { + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs/out_scaffolds_final.${meta.lineage}.busco" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.endsWith('busco.log') ? filename : + filename.endsWith('full_table.tsv') ? filename : + filename.endsWith('missing_busco_list.tsv') ? filename : + filename.startsWith('short_summary') ? filename : + filename.endsWith('busco.batch_summary.txt') ? filename : + null } + ] + } + + withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:BUSCO_HAP' { + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap2/yahs/out.break.yahs/out_scaffolds_final.${meta.lineage}.busco" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.endsWith('busco.log') ? filename : + filename.endsWith('full_table.tsv') ? filename : + filename.endsWith('missing_busco_list.tsv') ? filename : + filename.startsWith('short_summary') ? filename : + filename.endsWith('busco.batch_summary.txt') ? filename : + null } + ] + } + + withName: '.*GENOME_STATISTICS_SCAFFOLDS_HAPS:MERQURYFK_MERQURYFK' { + publishDir = [ + path: { "${params.outdir}/${meta.id}.${params.hifiasmhic}/scaffolding_hap1/yahs/out.break.yahs/out_scaffolds_final.ccs.merquryk" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // End of Scaffolding hap1/hap2 // End of Set up of the scaffolding pipeline //Set up of assembly stats subworkflow diff --git a/docs/output.md b/docs/output.md index 5a92b0b..fedc59b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -4,7 +4,14 @@ This document describes the output produced by the genomeassembly pipeline. -The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. +The standard assembly pipeline contains running hifiasm on the HiFi reads, purging the primary contigs with purge_dups, and scaffolding them up with YaHS. +Optionally, if Illumina 10X data is provided, the purged contigs and haplotigs can be polished. + +In case of a diploid genome when HiFi and HiC data come from the same individual an additional hifiasm run in HiC mode produces two balanced fully phased haplotypes. The haplotypes are not purged but scaffolded up directly with YaHS. + +Optionally, the organelles assembly can be triggered. The mitochondrion and (if relevant) plastid sequences are produced using MitoHiFi and OATK. + +The directories listed below will be created in the --outdir directory after the pipeline has finished. All paths are relative to the top-level --outdir directory. ## Subworkflows @@ -43,13 +50,16 @@ This subworkflow generates a KMER database and coverage model used in [PURGE_DUP - primary assembly in GFA and FASTA format; for more details refer to [hifiasm output](https://hifiasm.readthedocs.io/en/latest/interpreting-output.html) - .\*hifiasm.\*/.*a_ctg.[g]fa - haplotigs in GFA and FASTA format; for more details refer to [hifiasm output](https://hifiasm.readthedocs.io/en/latest/interpreting-output.html) + - .\*hifiasm-hic.\*/.*hap1.p_ctg.[g]fa + - fully phased hap1 if hifiasm is run in HiC mode; for more details refer to [hifiasm output](https://hifiasm.readthedocs.io/en/latest/interpreting-output.html) + - .\*hifiasm-hic.\*/.*hap2.p_ctg.[g]fa + - fully phased hap2 if hifiasm is run in HiC mode; for more details refer to [hifiasm output](https://hifiasm.readthedocs.io/en/latest/interpreting-output.html) - .\*hifiasm.\*/.*bin - internal binary hifiasm files; for more details refer [here](https://hifiasm.readthedocs.io/en/latest/faq.html#id12) This subworkflow generates a raw assembly(-ies). First, hifiasm is run on the input HiFi reads then raw contigs are converted from GFA into FASTA format, this assembly is due to purging, polishing (optional) and scaffolding further down the pipeline. -In case hifiasm HiC mode is switched on, it is performed as an extra step with results stored in hifiasm-hic folder.

![Raw assembly subworkflow](images/v1/raw_assembly.png) @@ -68,6 +78,7 @@ In case hifiasm HiC mode is switched on, it is performed as an extra step with r Retained haplotype is identified in primary assembly. The alternate contigs are updated correspondingly. The subworkflow relies on kmer coverage model to identify coverage thresholds. For more details see [purge_dups](https://github.com/dfguan/purge_dups) +The two haplotype assemblies produced by hifiasm in HiC mode are not purged.

@@ -98,9 +109,9 @@ This subworkflow uses read mapping of the Illumina 10X short read data to fix sh
Output files - - \*.hifiasm..\*/scaffolding/.*_merged_sorted.bed + - \*.hifiasm.\*/scaffolding[_hap1/_hap2/^$]/.*_merged_sorted.bed - bed file obtained from merged mkdup bam - - \*.hifiasm..\*/scaffolding/.*mkdup.bam + - \*.hifiasm.\*/scaffolding[_hap1/_hap2/^$]/.*mkdup.bam - final read mapping bam with mapped reads
@@ -113,11 +124,11 @@ This subworkflow implements alignment of the Illumina HiC short reads to the pri
Output files - - \*.hifiasm..\*/scaffolding/.*.stats + - \*.hifiasm.\*/scaffolding[_hap1/_hap2/^$]/.*.stats - output of samtools stats - - \*.hifiasm..\*/scaffolding/.*.idxstats + - \*.hifiasm.\*/scaffolding[_hap1/_hap2/^$]/.*.idxstats - output of samtools idxstats - - \*.hifiasm..\*/scaffolding/.*.flagstat + - \*.hifiasm.\*/scaffolding[_hap1/_hap2/^$]/.*.flagstat - output of samtools flagstat
@@ -128,17 +139,17 @@ This subworkflow produces statistcs for a bam file containing read mapping. It i
Output files - - \*.hifiasm..\*/scaffolding/yahs/out.break.yahs/out_scaffolds_final.fa + - \*.hifiasm.\*/scaffolding[_hap1/_hap2/^$]/yahs/out.break.yahs/out_scaffolds_final.fa - scaffolds in FASTA format - - \*.hifiasm..\*/scaffolding/yahs/out.break.yahs/out_scaffolds_final.agp + - \*.hifiasm.\*/scaffolding[_hap1/_hap2/^$]/yahs/out.break.yahs/out_scaffolds_final.agp - coordinates of contigs relative to scaffolds - - \*.hifiasm..\*/scaffolding/yahs/out.break.yahs/alignments_sorted.txt + - \*.hifiasm.\*/scaffolding[_hap1/_hap2/^$]/yahs/out.break.yahs/alignments_sorted.txt - Alignments for Juicer in text format - - \*.hifiasm..\*/scaffolding/yahs/out.break.yahs/yahs_scaffolds.hic + - \*.hifiasm.\*/scaffolding[_hap1/_hap2/^$]/yahs/out.break.yahs/yahs_scaffolds.hic - Juicer HiC map - - \*.hifiasm..\*/scaffolding/yahs/out.break.yahs/*cool + - \*.hifiasm.\*/scaffolding[_hap1/_hap2/^$]/yahs/out.break.yahs/*cool - HiC map for cooler - - \*.hifiasm..\*/scaffolding/yahs/out.break.yahs/*.FullMap.png + - \*.hifiasm.\*/scaffolding[_hap1/_hap2/^$]/yahs/out.break.yahs/*.FullMap.png - Pretext snapshot
diff --git a/docs/usage.md b/docs/usage.md index 83b5649..c121091 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -59,20 +59,27 @@ Example is based on [test.yaml](../assets/test.yaml). dataset: id: baUndUnlc1 illumina_10X: - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/ + reads: + - https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/baUndUnlc1_S12_L002_R1_001.fastq.gz + - https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/baUndUnlc1_S12_L002_R2_001.fastq.gz + - https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/10x/baUndUnlc1_S12_L002_I1_001.fastq.gz pacbio: reads: - - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/pacbio/fasta/HiFi.reads.fasta + - reads: https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/pacbio/fasta/HiFi.reads.fasta HiC: reads: - - reads: /lustre/scratch123/tol/resources/nextflow/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2#7.sub.cram + - reads: https://tolit.cog.sanger.ac.uk/test-data/Undibacterium_unclassified/genomic_data/baUndUnlc1/hic-arima2/41741_2%237.sub.cram hic_motif: GATC,GANTC,CTNAG,TTAA +hic_aligner: bwamem2 busco: lineage: bacteria_odb10 mito: species: Caradrina clavipalpis min_length: 15000 code: 5 + fam: https://github.com/c-zhou/OatkDB/raw/main/v20230921/insecta_mito.fam +plastid: + fam: https://github.com/c-zhou/OatkDB/raw/main/v20230921/acrogymnospermae_pltd.fam ``` diff --git a/modules/nf-core/hifiasm/environment.yml b/modules/nf-core/hifiasm/environment.yml new file mode 100644 index 0000000..3aa0fd5 --- /dev/null +++ b/modules/nf-core/hifiasm/environment.yml @@ -0,0 +1,8 @@ +name: hifiasm +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hifiasm=0.19.8 + - bioconda::samtools=1.20 diff --git a/modules/nf-core/hifiasm/hifiasm.diff b/modules/nf-core/hifiasm/hifiasm.diff index c9103ce..0594f11 100644 --- a/modules/nf-core/hifiasm/hifiasm.diff +++ b/modules/nf-core/hifiasm/hifiasm.diff @@ -13,7 +13,7 @@ Changes in module 'nf-core/hifiasm' + exit 1, "This version of HIFIASM module does not support Conda. Please use Docker / Singularity / Podman instead." + } + -+ container "wave.seqera.io/wt/73ac3caec075/wave/build:hifiasm-0.19.8_samtools-1.20--1f6824530f0d0ad5" ++ container "quay.io/sanger-tol/hifiasm_samtools:0.01" input: tuple val(meta), path(reads) @@ -25,12 +25,14 @@ Changes in module 'nf-core/hifiasm' output: tuple val(meta), path("*.r_utg.gfa") , emit: raw_unitigs -@@ -23,8 +25,10 @@ +@@ -23,8 +25,12 @@ tuple val(meta), path("*.p_utg.gfa") , emit: processed_unitigs, optional: true tuple val(meta), path("*.asm.p_ctg.gfa") , emit: primary_contigs , optional: true tuple val(meta), path("*.asm.a_ctg.gfa") , emit: alternate_contigs, optional: true - tuple val(meta), path("*.hap1.p_ctg.gfa") , emit: paternal_contigs , optional: true - tuple val(meta), path("*.hap2.p_ctg.gfa") , emit: maternal_contigs , optional: true ++ tuple val(meta), path("*.asm.hic.hap1.p_ctg.gfa") , emit: hap1_contigs , optional: true ++ tuple val(meta), path("*.asm.hic.hap2.p_ctg.gfa") , emit: hap2_contigs , optional: true + tuple val(meta), path("*.asm.hic.p_ctg.gfa") , emit: hic_primary_contigs , optional: true + tuple val(meta), path("*.asm.hic.a_ctg.gfa") , emit: hic_alternate_contigs , optional: true + tuple val(meta), path("*.asm.hic.hap1.p_ctg.gfa") , emit: paternal_contigs , optional: true @@ -38,7 +40,7 @@ Changes in module 'nf-core/hifiasm' tuple val(meta), path("*.log") , emit: log path "versions.yml" , emit: versions -@@ -34,6 +38,8 @@ +@@ -34,6 +40,8 @@ script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" @@ -47,7 +49,7 @@ Changes in module 'nf-core/hifiasm' if ((paternal_kmer_dump) && (maternal_kmer_dump) && (hic_read1) && (hic_read2)) { error "Hifiasm Trio-binning and Hi-C integrated should not be used at the same time" } else if ((paternal_kmer_dump) && !(maternal_kmer_dump)) { -@@ -67,8 +73,8 @@ +@@ -67,8 +75,8 @@ $args \\ -o ${prefix}.asm \\ -t $task.cpus \\ @@ -59,4 +61,15 @@ Changes in module 'nf-core/hifiasm' 2> >( tee ${prefix}.stderr.log >&2 ) +--- modules/nf-core/hifiasm/environment.yml ++++ /dev/null +@@ -1,7 +0,0 @@ +-name: hifiasm +-channels: +- - conda-forge +- - bioconda +- - defaults +-dependencies: +- - bioconda::hifiasm=0.19.8 + ************************************************************ diff --git a/modules/nf-core/hifiasm/main.nf b/modules/nf-core/hifiasm/main.nf index 7854a89..c7b8741 100644 --- a/modules/nf-core/hifiasm/main.nf +++ b/modules/nf-core/hifiasm/main.nf @@ -25,6 +25,8 @@ process HIFIASM { tuple val(meta), path("*.p_utg.gfa") , emit: processed_unitigs, optional: true tuple val(meta), path("*.asm.p_ctg.gfa") , emit: primary_contigs , optional: true tuple val(meta), path("*.asm.a_ctg.gfa") , emit: alternate_contigs, optional: true + tuple val(meta), path("*.asm.hic.hap1.p_ctg.gfa") , emit: hap1_contigs , optional: true + tuple val(meta), path("*.asm.hic.hap2.p_ctg.gfa") , emit: hap2_contigs , optional: true tuple val(meta), path("*.asm.hic.p_ctg.gfa") , emit: hic_primary_contigs , optional: true tuple val(meta), path("*.asm.hic.a_ctg.gfa") , emit: hic_alternate_contigs , optional: true tuple val(meta), path("*.asm.hic.hap1.p_ctg.gfa") , emit: paternal_contigs , optional: true diff --git a/subworkflows/local/genome_statistics.nf b/subworkflows/local/genome_statistics.nf index 37524e4..d4cfcb6 100644 --- a/subworkflows/local/genome_statistics.nf +++ b/subworkflows/local/genome_statistics.nf @@ -9,7 +9,8 @@ include { GFASTATS as GFASTATS_PRI } from '../../modules/nf-core/gfastats/main' include { GFASTATS as GFASTATS_HAP } from '../../modules/nf-core/gfastats/main' -include { BUSCO } from '../../modules/nf-core/busco/main' +include { BUSCO as BUSCO_PRI } from '../../modules/nf-core/busco/main' +include { BUSCO as BUSCO_HAP } from '../../modules/nf-core/busco/main' include { MERQURYFK_MERQURYFK } from '../../modules/nf-core/merquryfk/merquryfk/main' workflow GENOME_STATISTICS { @@ -18,6 +19,7 @@ workflow GENOME_STATISTICS { lineage // channel: [ meta, /path/to/buscoDB, lineage ] hist // channel: [meta, fastk_hist files] ktab // channel: [meta, fastk_ktab files] + busco_alt // channel: true/false main: ch_versions = Channel.empty() @@ -48,14 +50,27 @@ workflow GENOME_STATISTICS { // // MODULE: RUN BUSCO ON PRIMARY ASSEMBLY // - BUSCO ( primary_ch.join(lineage) + BUSCO_PRI ( primary_ch.join(lineage) .map{ meta, primary, lineage_db, lineage_name -> [[id:meta.id, lineage:lineage_name], primary]}, lineage.map{ meta, lineage_db, lineage_name -> lineage_name } , lineage.map{ meta, lineage_db, ch_lineage -> lineage_db }, [] ) - ch_versions = ch_versions.mix(BUSCO.out.versions.first()) + ch_versions = ch_versions.mix(BUSCO_PRI.out.versions.first()) + // + // MODULE: run BUSCO for haplotigs + // USED FOR HAP1/HAP2 ASSEMBLIES + // + if ( busco_alt ) { + BUSCO_HAP ( haplotigs_ch.join(lineage) + .map{ meta, haps, lineage_db, lineage_name -> + [[id:meta.id, lineage:lineage_name], haps]}, + lineage.map{ meta, lineage_db, lineage_name -> lineage_name } , + lineage.map{ meta, lineage_db, ch_lineage -> lineage_db }, + [] ) + } + // // LOGIC: JOIN ASSEMBLY AND KMER DATABASE INPUT // diff --git a/subworkflows/local/hic_bwamem2.nf b/subworkflows/local/hic_bwamem2.nf index f053ef8..7d4f07e 100755 --- a/subworkflows/local/hic_bwamem2.nf +++ b/subworkflows/local/hic_bwamem2.nf @@ -35,18 +35,17 @@ workflow HIC_BWAMEM2 { id: cram_id.id ], file(cram_info[0]), - cram_info[1], - cram_info[2], - cram_info[3], - cram_info[4], - cram_info[5], - cram_info[6], - bwa_path.toString() + '/' + ref_dir.toString().split('/')[-1], - ref_dir + cram_info[1], // crai path + cram_info[2], // chunk starting position + cram_info[3], // chunk end position + cram_info[4], // basename + cram_info[5], // the number of chunk + cram_info[6], // rgline + bwa_path.toString() + '/' + ref_dir.toString().split('/')[-1] ) } .set { ch_filtering_input } - ch_filtering_input.view() + // // MODULE: map hic reads by 10,000 container per time using bwamem2 // @@ -68,7 +67,7 @@ workflow HIC_BWAMEM2 { .map { file -> tuple ( [ - id: file[0].toString().split('/')[-1].split('_')[0] + '_' + file[0].toString().split('/')[-1].split('_')[1] + id: file[0].toString().split('/')[-1].split('_')[0] ], file ) @@ -78,4 +77,4 @@ workflow HIC_BWAMEM2 { emit: mappedbams = collected_files_for_merge versions = ch_versions.ifEmpty(null) -} \ No newline at end of file +} diff --git a/subworkflows/local/hic_mapping.nf b/subworkflows/local/hic_mapping.nf index c8786ba..05104a7 100644 --- a/subworkflows/local/hic_mapping.nf +++ b/subworkflows/local/hic_mapping.nf @@ -26,6 +26,7 @@ workflow HIC_MAPPING { reference_tuple // Channel [ val(meta), path(file) ] hic_reads_path // Channel [ val(meta), path(directory) ] hic_aligner_ch // Channel [ val(meta), val(hic_aligner)] + hap_id // Value hap_id main: ch_versions = Channel.empty() @@ -42,7 +43,7 @@ workflow HIC_MAPPING { reference_tuple .join( hic_reads_path ) .map { meta, ref, hic_reads_path -> - tuple([ id: meta.id, single_end: true], hic_reads_path, hic_reads_path.collect { p -> p.resolveSibling(p.name + ".crai") } ) } + tuple([ id: meta.id, hap_id: hap_id, single_end: true], hic_reads_path, hic_reads_path.collect { p -> p.resolveSibling(p.name + ".crai") } ) } .set { get_reads_input } // @@ -67,7 +68,8 @@ workflow HIC_MAPPING { bwamem2 : it[0].aligner == "bwamem2" } .set{ch_aligner} - + + // // SUBWORKFLOW: mapping hic reads using minimap2 // @@ -78,7 +80,7 @@ workflow HIC_MAPPING { ch_versions = ch_versions.mix( HIC_MINIMAP2.out.versions ) mappedbams = HIC_MINIMAP2.out.mappedbams - // + // // SUBWORKFLOW: mapping hic reads using bwamem2 // HIC_BWAMEM2 ( @@ -88,6 +90,9 @@ workflow HIC_MAPPING { ch_versions = ch_versions.mix( HIC_BWAMEM2.out.versions ) mappedbams = mappedbams.mix(HIC_BWAMEM2.out.mappedbams) + mappedbams.map{meta, bams -> [[id: meta.id, hap_id:hap_id], bams]} + .set { mappedbams } + // // LOGIC: GENERATE INDEX OF REFERENCE // diff --git a/subworkflows/local/hic_minimap2.nf b/subworkflows/local/hic_minimap2.nf index 3eff6e4..f9b97f7 100755 --- a/subworkflows/local/hic_minimap2.nf +++ b/subworkflows/local/hic_minimap2.nf @@ -43,12 +43,12 @@ workflow HIC_MINIMAP2 { id: cram_id.id ], file(cram_info[0]), - cram_info[1], - cram_info[2], - cram_info[3], - cram_info[4], - cram_info[5], - cram_info[6], + cram_info[1], // crai path + cram_info[2], // chunk starting position + cram_info[3], // chunk end position + cram_info[4], // basename + cram_info[5], // the number of chunk + cram_info[6], // rgline mmi_path.toString(), ref_dir ) @@ -77,7 +77,7 @@ workflow HIC_MINIMAP2 { .map { file -> tuple ( [ - id: file[0].toString().split('/')[-1].split('_')[0] + '_' + file[0].toString().split('/')[-1].split('_')[1] + id: file[0].toString().split('/')[-1].split('_')[0] ], file ) diff --git a/subworkflows/local/raw_assembly.nf b/subworkflows/local/raw_assembly.nf index 37aeb34..703efbf 100644 --- a/subworkflows/local/raw_assembly.nf +++ b/subworkflows/local/raw_assembly.nf @@ -3,8 +3,8 @@ include { HIFIASM as HIFIASM_HIC } from '../../modules/nf-core/hi include { GFA_TO_FASTA as GFA_TO_FASTA_PRI } from '../../modules/local/gfa_to_fasta' include { GFA_TO_FASTA as GFA_TO_FASTA_ALT } from '../../modules/local/gfa_to_fasta' -include { GFA_TO_FASTA as GFA_TO_FASTA_PRI_HIC } from '../../modules/local/gfa_to_fasta' -include { GFA_TO_FASTA as GFA_TO_FASTA_ALT_HIC } from '../../modules/local/gfa_to_fasta' +include { GFA_TO_FASTA as GFA_TO_FASTA_HAP1_HIC } from '../../modules/local/gfa_to_fasta' +include { GFA_TO_FASTA as GFA_TO_FASTA_HAP2_HIC } from '../../modules/local/gfa_to_fasta' workflow RAW_ASSEMBLY { take: @@ -44,19 +44,19 @@ workflow RAW_ASSEMBLY { // // MODULE: CONVERT HIFIASM-HIC PRIMARY CONTIGS TO FASTA // - GFA_TO_FASTA_PRI_HIC( HIFIASM_HIC.out.hic_primary_contigs ) + GFA_TO_FASTA_HAP1_HIC( HIFIASM_HIC.out.hap1_contigs ) // // MODULE: CONVERT HIFIASM-HIC ALT CONTIGS TO FASTA // - GFA_TO_FASTA_ALT_HIC( HIFIASM_HIC.out.hic_alternate_contigs ) + GFA_TO_FASTA_HAP2_HIC( HIFIASM_HIC.out.hap2_contigs ) } emit: primary_contigs = GFA_TO_FASTA_PRI.out.fasta alternate_contigs = GFA_TO_FASTA_ALT.out.fasta - primary_hic_contigs = hifiasm_hic_on ? GFA_TO_FASTA_PRI_HIC.out.fasta : null - alternate_hic_contigs = hifiasm_hic_on ? GFA_TO_FASTA_ALT_HIC.out.fasta : null + hap1_hic_contigs = hifiasm_hic_on ? GFA_TO_FASTA_HAP1_HIC.out.fasta : null + hap2_hic_contigs = hifiasm_hic_on ? GFA_TO_FASTA_HAP2_HIC.out.fasta : null versions = ch_versions } diff --git a/subworkflows/local/scaffolding.nf b/subworkflows/local/scaffolding.nf index b15b443..10f3226 100644 --- a/subworkflows/local/scaffolding.nf +++ b/subworkflows/local/scaffolding.nf @@ -16,6 +16,7 @@ workflow SCAFFOLDING { bed_in // tuple(meta, bed) fasta_in // tuple(meta, fasta) cool_bin // val: cooler cload parameter + hap_id // val: hap1/hap2/empty main: ch_versions = Channel.empty() @@ -37,9 +38,14 @@ workflow SCAFFOLDING { .set{ scaf_ref_fai } // + // LOGIC: MIX IN THE HAPLOTYPE ID TO CONTROL THE OUTPUT SUFFIX + // + bed_in.map{ meta, bed -> [[id:meta.id, hap_id:hap_id],bed] } + .set{ bed_in_hap } + // // MODULE: PERFORM SCAAFFOLDING WITH YAHS // - YAHS( bed_in, scaf_ref, scaf_ref_fai ) + YAHS( bed_in_hap , scaf_ref, scaf_ref_fai ) ch_versions = ch_versions.mix(YAHS.out.versions) // @@ -59,7 +65,7 @@ workflow SCAFFOLDING { YAHS.out.binary.join(YAHS.out.scaffolds_agp) .combine(scaf_ref) .combine(scaf_ref_fai) - .map{meta, binary, agp, fa, fai -> [meta, binary, agp, fai]} + .map{meta, binary, agp, fa, fai -> [[id:meta.id, hap_id:hap_id], binary, agp, fai]} .set{ch_merge} // @@ -71,7 +77,7 @@ workflow SCAFFOLDING { // // LOGIC: BIN CONTACT PAIRS // - JUICER_PRE.out.pairs.join(bed_in) + JUICER_PRE.out.pairs.join(bed_in_hap) .combine(Channel.of(cool_bin)) .set{ch_juicer} diff --git a/workflows/genomeassembly.nf b/workflows/genomeassembly.nf index a59ec08..2e5e389 100644 --- a/workflows/genomeassembly.nf +++ b/workflows/genomeassembly.nf @@ -20,6 +20,11 @@ if (params.cool_bin) { cool_bin = params.cool_bin } else { cool_bin = 1000; } if (params.polishing_on) { polishing_on = params.polishing_on } else { polishing_on = false; } if (params.hifiasm_hic_on) { hifiasm_hic_on = params.hifiasm_hic_on } else { hifiasm_hic_on = false; } if (params.organelles_on) { organelles_on = params.organelles_on } else { organelles_on = false; } + +// Declare constants to toggle BUSCO for alts +set_busco_alts = true +unset_busco_alts = false + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -35,21 +40,26 @@ if (params.organelles_on) { organelles_on = params.organelles_on } else { organe // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { PREPARE_INPUT } from '../subworkflows/local/prepare_input' -include { RAW_ASSEMBLY } from '../subworkflows/local/raw_assembly' -include { ORGANELLES } from '../subworkflows/local/organelles' -include { GENOMESCOPE_MODEL } from '../subworkflows/local/genomescope_model' -include { PURGE_DUPS } from '../subworkflows/local/purge_dups' -include { POLISHING } from '../subworkflows/local/polishing' -include { SCAFFOLDING } from '../subworkflows/local/scaffolding' -include { KEEP_SEQNAMES as KEEP_SEQNAMES_PRIMARY } from '../modules/local/keep_seqnames' -include { KEEP_SEQNAMES as KEEP_SEQNAMES_HAPLOTIGS } from '../modules/local/keep_seqnames' -include { HIC_MAPPING } from '../subworkflows/local/hic_mapping' -include { GENOME_STATISTICS as GENOME_STATISTICS_RAW } from '../subworkflows/local/genome_statistics' -include { GENOME_STATISTICS as GENOME_STATISTICS_RAW_HIC } from '../subworkflows/local/genome_statistics' -include { GENOME_STATISTICS as GENOME_STATISTICS_PURGED } from '../subworkflows/local/genome_statistics' -include { GENOME_STATISTICS as GENOME_STATISTICS_POLISHED } from '../subworkflows/local/genome_statistics' -include { GENOME_STATISTICS as GENOME_STATISTICS_SCAFFOLDS } from '../subworkflows/local/genome_statistics' +include { PREPARE_INPUT } from '../subworkflows/local/prepare_input' +include { RAW_ASSEMBLY } from '../subworkflows/local/raw_assembly' +include { ORGANELLES } from '../subworkflows/local/organelles' +include { GENOMESCOPE_MODEL } from '../subworkflows/local/genomescope_model' +include { PURGE_DUPS } from '../subworkflows/local/purge_dups' +include { POLISHING } from '../subworkflows/local/polishing' +include { SCAFFOLDING } from '../subworkflows/local/scaffolding' +include { SCAFFOLDING as SCAFFOLDING_HAP1 } from '../subworkflows/local/scaffolding' +include { SCAFFOLDING as SCAFFOLDING_HAP2 } from '../subworkflows/local/scaffolding' +include { KEEP_SEQNAMES as KEEP_SEQNAMES_PRIMARY } from '../modules/local/keep_seqnames' +include { KEEP_SEQNAMES as KEEP_SEQNAMES_HAPLOTIGS } from '../modules/local/keep_seqnames' +include { HIC_MAPPING } from '../subworkflows/local/hic_mapping' +include { HIC_MAPPING as HIC_MAPPING_HAP1 } from '../subworkflows/local/hic_mapping' +include { HIC_MAPPING as HIC_MAPPING_HAP2 } from '../subworkflows/local/hic_mapping' +include { GENOME_STATISTICS as GENOME_STATISTICS_RAW } from '../subworkflows/local/genome_statistics' +include { GENOME_STATISTICS as GENOME_STATISTICS_RAW_HIC } from '../subworkflows/local/genome_statistics' +include { GENOME_STATISTICS as GENOME_STATISTICS_PURGED } from '../subworkflows/local/genome_statistics' +include { GENOME_STATISTICS as GENOME_STATISTICS_POLISHED } from '../subworkflows/local/genome_statistics' +include { GENOME_STATISTICS as GENOME_STATISTICS_SCAFFOLDS } from '../subworkflows/local/genome_statistics' +include { GENOME_STATISTICS as GENOME_STATISTICS_SCAFFOLDS_HAPS } from '../subworkflows/local/genome_statistics' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -122,7 +132,8 @@ workflow GENOMEASSEMBLY { GENOME_STATISTICS_RAW( primary_contigs_ch.join(haplotigs_ch), PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, - GENOMESCOPE_MODEL.out.ktab + GENOMESCOPE_MODEL.out.ktab, + unset_busco_alts ) ch_versions = ch_versions.mix(GENOME_STATISTICS_RAW.out.versions) @@ -164,11 +175,12 @@ workflow GENOMEASSEMBLY { // // SUBWORKFLOW: CALCULATE RAW ASSEMBLY STATISTICS FOR THE HIFIASN IN HIC MODE // - GENOME_STATISTICS_RAW_HIC( RAW_ASSEMBLY.out.primary_hic_contigs - .join(RAW_ASSEMBLY.out.alternate_hic_contigs), + GENOME_STATISTICS_RAW_HIC( RAW_ASSEMBLY.out.hap1_hic_contigs + .join(RAW_ASSEMBLY.out.hap2_hic_contigs), PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, - GENOMESCOPE_MODEL.out.ktab + GENOMESCOPE_MODEL.out.ktab, + set_busco_alts ) } @@ -210,7 +222,8 @@ workflow GENOMEASSEMBLY { GENOME_STATISTICS_PURGED( primary_contigs_ch.join(haplotigs_ch), PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, - GENOMESCOPE_MODEL.out.ktab + GENOMESCOPE_MODEL.out.ktab, + unset_busco_alts ) // @@ -310,7 +323,8 @@ workflow GENOMEASSEMBLY { GENOME_STATISTICS_POLISHED( polished_asm_stats_input_ch, PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, - GENOMESCOPE_MODEL.out.ktab + GENOMESCOPE_MODEL.out.ktab, + unset_busco_alts ) } @@ -326,13 +340,13 @@ workflow GENOMEASSEMBLY { // // SUBWORKFLOW: MAP HIC DATA TO THE PRIMARY ASSEMBLY // - HIC_MAPPING ( primary_contigs_ch,crams_ch,hic_aligner_ch ) + HIC_MAPPING ( primary_contigs_ch,crams_ch,hic_aligner_ch, "") ch_versions = ch_versions.mix(HIC_MAPPING.out.versions) // // SUBWORKFLOW: SCAFFOLD THE PRIMARY ASSEMBLY // - SCAFFOLDING( HIC_MAPPING.out.bed, primary_contigs_ch, cool_bin ) + SCAFFOLDING( HIC_MAPPING.out.bed, primary_contigs_ch, cool_bin, "") ch_versions = ch_versions.mix(SCAFFOLDING.out.versions) // @@ -349,9 +363,54 @@ workflow GENOMEASSEMBLY { GENOME_STATISTICS_SCAFFOLDS( stats_input_ch, PREPARE_INPUT.out.busco, GENOMESCOPE_MODEL.out.hist, - GENOMESCOPE_MODEL.out.ktab + GENOMESCOPE_MODEL.out.ktab, + unset_busco_alts + ) + + if ( hifiasm_hic_on ) { + // + // SUBWORKFLOW: MAP HIC DATA TO THE HAP1 CONTIGS + // + HIC_MAPPING_HAP1 ( RAW_ASSEMBLY.out.hap1_hic_contigs, crams_ch, hic_aligner_ch, 'hap1' ) + ch_versions = ch_versions.mix(HIC_MAPPING_HAP1.out.versions) + + // + // SUBWORKFLOW: SCAFFOLD HAP1 + // + SCAFFOLDING_HAP1( HIC_MAPPING_HAP1.out.bed, RAW_ASSEMBLY.out.hap1_hic_contigs, cool_bin, 'hap1' ) + ch_versions = ch_versions.mix(SCAFFOLDING_HAP1.out.versions) + + // + // SUBWORKFLOW: MAP HIC DATA TO THE HAP2 CONTIGS + // + HIC_MAPPING_HAP2 ( RAW_ASSEMBLY.out.hap2_hic_contigs, crams_ch, hic_aligner_ch, 'hap2' ) + ch_versions = ch_versions.mix(HIC_MAPPING_HAP2.out.versions) + + // + // SUBWORKFLOW: SCAFFOLD HAP2 + // + SCAFFOLDING_HAP2( HIC_MAPPING_HAP2.out.bed, RAW_ASSEMBLY.out.hap2_hic_contigs, cool_bin, 'hap2' ) + ch_versions = ch_versions.mix(SCAFFOLDING_HAP2.out.versions) + + // + // LOGIC: CREATE A CHANNEL FOR THE FULL HAP1/HAP2 ASSEMBLY + // + SCAFFOLDING_HAP1.out.fasta.combine(SCAFFOLDING_HAP2.out.fasta) + .map{meta_s, fasta_s, meta_h, fasta_h -> [ [id:meta_h.id], fasta_s, fasta_h ]} + .set{ stats_haps_input_ch } + + // + // SUBWORKFLOW: CALCULATE ASSEMBLY STATISTICS FOR HAP1/HAP2 ASSEMBLY + // + GENOME_STATISTICS_SCAFFOLDS_HAPS( stats_haps_input_ch, + PREPARE_INPUT.out.busco, + GENOMESCOPE_MODEL.out.hist, + GENOMESCOPE_MODEL.out.ktab, + set_busco_alts ) + } + // // MODULE: Collate versions.yml file //