From 2935ab115c8996c9b3f52b8bce7a7126f3a32617 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 5 Aug 2024 10:16:30 +0100
Subject: [PATCH 01/52] Last weeks addition

---
 main.nf                                       |  31 +-
 modules.json                                  |  30 +
 modules/local/generate_samplesheet.nf         |  43 ++
 modules/local/nextflow/run/main.nf            |  38 ++
 modules/nf-core/busco/busco/environment.yml   |   7 +
 modules/nf-core/busco/busco/main.nf           | 107 +++
 modules/nf-core/busco/busco/meta.yml          |  98 +++
 .../nf-core/busco/busco/tests/main.nf.test    | 419 ++++++++++++
 .../busco/busco/tests/main.nf.test.snap       | 230 +++++++
 .../busco/tests/nextflow.augustus.config      |   5 +
 .../nf-core/busco/busco/tests/nextflow.config |   5 +
 .../busco/busco/tests/nextflow.metaeuk.config |   5 +
 .../nf-core/busco/busco/tests/old_test.yml    | 624 ++++++++++++++++++
 modules/nf-core/busco/busco/tests/tags.yml    |   2 +
 modules/nf-core/gfastats/environment.yml      |   7 +
 modules/nf-core/gfastats/main.nf              |  66 ++
 modules/nf-core/gfastats/meta.yml             |  72 ++
 .../merquryfk/merquryfk/environment.yml       |   5 +
 modules/nf-core/merquryfk/merquryfk/main.nf   |  58 ++
 modules/nf-core/merquryfk/merquryfk/meta.yml  | 112 ++++
 .../nf-core/minimap2/align/environment.yml    |  11 +
 modules/nf-core/minimap2/align/main.nf        |  78 +++
 modules/nf-core/minimap2/align/meta.yml       |  84 +++
 .../nf-core/minimap2/align/tests/main.nf.test | 441 +++++++++++++
 .../minimap2/align/tests/main.nf.test.snap    | 476 +++++++++++++
 modules/nf-core/minimap2/align/tests/tags.yml |   2 +
 .../nf-core/samtools/merge/environment.yml    |   8 +
 modules/nf-core/samtools/merge/main.nf        |  61 ++
 modules/nf-core/samtools/merge/meta.yml       |  83 +++
 .../nf-core/samtools/merge/tests/index.config |   3 +
 .../nf-core/samtools/merge/tests/main.nf.test | 137 ++++
 .../samtools/merge/tests/main.nf.test.snap    | 228 +++++++
 modules/nf-core/samtools/merge/tests/tags.yml |   2 +
 modules/nf-core/samtools/sort/environment.yml |   8 +
 modules/nf-core/samtools/sort/main.nf         |  73 ++
 modules/nf-core/samtools/sort/meta.yml        |  71 ++
 .../nf-core/samtools/sort/tests/main.nf.test  | 128 ++++
 .../samtools/sort/tests/main.nf.test.snap     | 192 ++++++
 .../samtools/sort/tests/nextflow.config       |   8 +
 .../samtools/sort/tests/nextflow_cram.config  |   8 +
 modules/nf-core/samtools/sort/tests/tags.yml  |   3 +
 nextflow.config                               |  31 +-
 nextflow_schema.json                          |   8 +-
 workflows/ear.nf                              | 229 +++++--
 44 files changed, 4247 insertions(+), 90 deletions(-)
 create mode 100644 modules/local/generate_samplesheet.nf
 create mode 100644 modules/local/nextflow/run/main.nf
 create mode 100644 modules/nf-core/busco/busco/environment.yml
 create mode 100644 modules/nf-core/busco/busco/main.nf
 create mode 100644 modules/nf-core/busco/busco/meta.yml
 create mode 100644 modules/nf-core/busco/busco/tests/main.nf.test
 create mode 100644 modules/nf-core/busco/busco/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/busco/busco/tests/nextflow.augustus.config
 create mode 100644 modules/nf-core/busco/busco/tests/nextflow.config
 create mode 100644 modules/nf-core/busco/busco/tests/nextflow.metaeuk.config
 create mode 100644 modules/nf-core/busco/busco/tests/old_test.yml
 create mode 100644 modules/nf-core/busco/busco/tests/tags.yml
 create mode 100644 modules/nf-core/gfastats/environment.yml
 create mode 100644 modules/nf-core/gfastats/main.nf
 create mode 100644 modules/nf-core/gfastats/meta.yml
 create mode 100644 modules/nf-core/merquryfk/merquryfk/environment.yml
 create mode 100644 modules/nf-core/merquryfk/merquryfk/main.nf
 create mode 100644 modules/nf-core/merquryfk/merquryfk/meta.yml
 create mode 100644 modules/nf-core/minimap2/align/environment.yml
 create mode 100644 modules/nf-core/minimap2/align/main.nf
 create mode 100644 modules/nf-core/minimap2/align/meta.yml
 create mode 100644 modules/nf-core/minimap2/align/tests/main.nf.test
 create mode 100644 modules/nf-core/minimap2/align/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/minimap2/align/tests/tags.yml
 create mode 100644 modules/nf-core/samtools/merge/environment.yml
 create mode 100644 modules/nf-core/samtools/merge/main.nf
 create mode 100644 modules/nf-core/samtools/merge/meta.yml
 create mode 100644 modules/nf-core/samtools/merge/tests/index.config
 create mode 100644 modules/nf-core/samtools/merge/tests/main.nf.test
 create mode 100644 modules/nf-core/samtools/merge/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/samtools/merge/tests/tags.yml
 create mode 100644 modules/nf-core/samtools/sort/environment.yml
 create mode 100644 modules/nf-core/samtools/sort/main.nf
 create mode 100644 modules/nf-core/samtools/sort/meta.yml
 create mode 100644 modules/nf-core/samtools/sort/tests/main.nf.test
 create mode 100644 modules/nf-core/samtools/sort/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/samtools/sort/tests/nextflow.config
 create mode 100644 modules/nf-core/samtools/sort/tests/nextflow_cram.config
 create mode 100644 modules/nf-core/samtools/sort/tests/tags.yml

diff --git a/main.nf b/main.nf
index 96a8a81..3b7bca7 100644
--- a/main.nf
+++ b/main.nf
@@ -16,8 +16,6 @@ nextflow.enable.dsl = 2
 */
 
 include { EAR  } from './workflows/ear'
-include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_ear_pipeline'
-include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_nfcore_ear_pipeline'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -42,8 +40,6 @@ workflow SANGERTOL_EAR {
         samplesheet
     )
 
-    emit:
-    multiqc_report = EAR.out.multiqc_report // channel: /path/to/multiqc_report.html
 
 }
 /*
@@ -56,38 +52,13 @@ workflow {
 
     main:
 
-    //
-    // SUBWORKFLOW: Run initialisation tasks
-    //
-    PIPELINE_INITIALISATION (
-        params.version,
-        params.help,
-        params.validate_params,
-        params.monochrome_logs,
-        args,
-        params.outdir,
-        params.input
-    )
-
     //
     // WORKFLOW: Run main workflow
     //
     SANGERTOL_EAR (
-        PIPELINE_INITIALISATION.out.samplesheet
+        params.input
     )
 
-    //
-    // SUBWORKFLOW: Run completion tasks
-    //
-    PIPELINE_COMPLETION (
-        params.email,
-        params.email_on_fail,
-        params.plaintext_email,
-        params.outdir,
-        params.monochrome_logs,
-        params.hook_url,
-        SANGERTOL_EAR.out.multiqc_report
-    )
 }
 
 /*
diff --git a/modules.json b/modules.json
index 99a74d8..3b0db89 100644
--- a/modules.json
+++ b/modules.json
@@ -5,15 +5,45 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "busco/busco": {
+                        "branch": "master",
+                        "git_sha": "17486961b8b1ab1aae258c83a7e947b40d8ab670",
+                        "installed_by": ["modules"]
+                    },
                     "fastqc": {
                         "branch": "master",
                         "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd",
                         "installed_by": ["modules"]
                     },
+                    "gfastats": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": ["modules"]
+                    },
+                    "merquryfk/merquryfk": {
+                        "branch": "master",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "installed_by": ["modules"]
+                    },
+                    "minimap2/align": {
+                        "branch": "master",
+                        "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306",
+                        "installed_by": ["modules"]
+                    },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
                         "installed_by": ["modules"]
+                    },
+                    "samtools/merge": {
+                        "branch": "master",
+                        "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
+                        "installed_by": ["modules"]
+                    },
+                    "samtools/sort": {
+                        "branch": "master",
+                        "git_sha": "46eca555142d6e597729fcb682adcc791796f514",
+                        "installed_by": ["modules"]
                     }
                 }
             },
diff --git a/modules/local/generate_samplesheet.nf b/modules/local/generate_samplesheet.nf
new file mode 100644
index 0000000..018f7ec
--- /dev/null
+++ b/modules/local/generate_samplesheet.nf
@@ -0,0 +1,43 @@
+process GENERATE_SAMPLESHEET {
+    tag "$meta.id"
+    label "process_low"
+
+    conda "conda-forge::python=3.9"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/python:3.9' :
+        'biocontainers/python:3.9' }"
+
+    input:
+    tuple val(meta),    path(pacbio_path)
+
+    output:
+    tuple val(meta),    path("*csv"),   emit: csv
+    path "versions.yml",                emit: versions
+
+    script:
+    def prefix  = task.ext.prefix   ?: "${meta.id}"
+    def args    = task.ext.args     ?: ""
+    """
+    generate_samplesheet.py \\
+        $prefix \\
+        $pacbio_path
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+        generate_samplesheet: \$(generate_samplesheet.py -v)
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix  = task.ext.prefix   ?: "${meta.id}"
+
+    """
+    touch ${prefix}.csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        generate_samplesheet: \$(generate_samplesheet.py -v)
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/nextflow/run/main.nf b/modules/local/nextflow/run/main.nf
new file mode 100644
index 0000000..cc522bc
--- /dev/null
+++ b/modules/local/nextflow/run/main.nf
@@ -0,0 +1,38 @@
+import java.nio.file.Paths
+import java.nio.file.Files
+
+process NEXTFLOW_RUN {
+    tag "$pipeline_name"
+
+    input:
+    val pipeline_name     // String
+    val nextflow_opts     // String
+    val params_file       // pipeline params-file
+    val samplesheet       // pipeline samplesheet
+    val additional_config // custom configs
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    exec:
+    // def args = task.ext.args ?: ''
+    def cache_dir = Paths.get(workflow.workDir.resolve(pipeline_name).toUri())
+    Files.createDirectories(cache_dir)
+    def nxf_cmd = [
+        'nextflow run',
+            pipeline_name,
+            nextflow_opts,
+            params_file ? "-params-file $params_file" : '',
+            additional_config ? "-c $additional_config" : '',
+            samplesheet ? "--input $samplesheet" : '',
+            "--outdir $task.workDir/results",
+    ]
+    def builder = new ProcessBuilder(nxf_cmd.join(" ").tokenize(" "))
+    builder.directory(cache_dir.toFile())
+    process = builder.start()
+    assert process.waitFor() == 0: process.text
+
+    output:
+    path "results"  , emit: output
+    val process.text, emit: log
+}
\ No newline at end of file
diff --git a/modules/nf-core/busco/busco/environment.yml b/modules/nf-core/busco/busco/environment.yml
new file mode 100644
index 0000000..06a5d93
--- /dev/null
+++ b/modules/nf-core/busco/busco/environment.yml
@@ -0,0 +1,7 @@
+name: busco_busco
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::busco=5.7.1
diff --git a/modules/nf-core/busco/busco/main.nf b/modules/nf-core/busco/busco/main.nf
new file mode 100644
index 0000000..f7c1a66
--- /dev/null
+++ b/modules/nf-core/busco/busco/main.nf
@@ -0,0 +1,107 @@
+process BUSCO_BUSCO {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/busco:5.7.1--pyhdfd78af_0':
+        'biocontainers/busco:5.7.1--pyhdfd78af_0' }"
+
+    input:
+    tuple val(meta), path(fasta, stageAs:'tmp_input/*')
+    val mode                              // Required:    One of genome, proteins, or transcriptome
+    val lineage                           // Required:    lineage to check against, "auto" enables --auto-lineage instead
+    path busco_lineages_path              // Recommended: path to busco lineages - downloads if not set
+    path config_file                      // Optional:    busco configuration file
+
+    output:
+    tuple val(meta), path("*-busco.batch_summary.txt")                , emit: batch_summary
+    tuple val(meta), path("short_summary.*.txt")                      , emit: short_summaries_txt   , optional: true
+    tuple val(meta), path("short_summary.*.json")                     , emit: short_summaries_json  , optional: true
+    tuple val(meta), path("*-busco/*/run_*/full_table.tsv")           , emit: full_table            , optional: true
+    tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv")   , emit: missing_busco_list    , optional: true
+    tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins  , optional: true
+    tuple val(meta), path("*-busco/*/run_*/busco_sequences")          , emit: seq_dir
+    tuple val(meta), path("*-busco/*/translated_proteins")            , emit: translated_dir        , optional: true
+    tuple val(meta), path("*-busco")                                  , emit: busco_dir
+    path "versions.yml"                                               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    if ( mode !in [ 'genome', 'proteins', 'transcriptome' ] ) {
+        error "Mode must be one of 'genome', 'proteins', or 'transcriptome'."
+    }
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}-${lineage}"
+    def busco_config = config_file ? "--config $config_file" : ''
+    def busco_lineage = lineage.equals('auto') ? '--auto-lineage' : "--lineage_dataset ${lineage}"
+    def busco_lineage_dir = busco_lineages_path ? "--download_path ${busco_lineages_path}" : ''
+    """
+    # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute)
+    # Check for container variable initialisation script and source it.
+    if [ -f "/usr/local/env-activate.sh" ]; then
+        set +u  # Otherwise, errors out because of various unbound variables
+        . "/usr/local/env-activate.sh"
+        set -u
+    fi
+
+    # If the augustus config directory is not writable, then copy to writeable area
+    if [ ! -w "\${AUGUSTUS_CONFIG_PATH}" ]; then
+        # Create writable tmp directory for augustus
+        AUG_CONF_DIR=\$( mktemp -d -p \$PWD )
+        cp -r \$AUGUSTUS_CONFIG_PATH/* \$AUG_CONF_DIR
+        export AUGUSTUS_CONFIG_PATH=\$AUG_CONF_DIR
+        echo "New AUGUSTUS_CONFIG_PATH=\${AUGUSTUS_CONFIG_PATH}"
+    fi
+
+    # Ensure the input is uncompressed
+    INPUT_SEQS=input_seqs
+    mkdir "\$INPUT_SEQS"
+    cd "\$INPUT_SEQS"
+    for FASTA in ../tmp_input/*; do
+        if [ "\${FASTA##*.}" == 'gz' ]; then
+            gzip -cdf "\$FASTA" > \$( basename "\$FASTA" .gz )
+        else
+            ln -s "\$FASTA" .
+        fi
+    done
+    cd ..
+
+    busco \\
+        --cpu $task.cpus \\
+        --in "\$INPUT_SEQS" \\
+        --out ${prefix}-busco \\
+        --mode $mode \\
+        $busco_lineage \\
+        $busco_lineage_dir \\
+        $busco_config \\
+        $args
+
+    # clean up
+    rm -rf "\$INPUT_SEQS"
+
+    # Move files to avoid staging/publishing issues
+    mv ${prefix}-busco/batch_summary.txt ${prefix}-busco.batch_summary.txt
+    mv ${prefix}-busco/*/short_summary.*.{json,txt} . || echo "Short summaries were not available: No genes were found."
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' )
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix      = task.ext.prefix ?: "${meta.id}-${lineage}"
+    def fasta_name  = files(fasta).first().name - '.gz'
+    """
+    touch ${prefix}-busco.batch_summary.txt
+    mkdir -p ${prefix}-busco/$fasta_name/run_${lineage}/busco_sequences
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/busco/busco/meta.yml b/modules/nf-core/busco/busco/meta.yml
new file mode 100644
index 0000000..29745d2
--- /dev/null
+++ b/modules/nf-core/busco/busco/meta.yml
@@ -0,0 +1,98 @@
+name: busco_busco
+description: Benchmarking Universal Single Copy Orthologs
+keywords:
+  - quality control
+  - genome
+  - transcriptome
+  - proteome
+tools:
+  - busco:
+      description: BUSCO provides measures for quantitative assessment of genome assembly, gene set, and transcriptome completeness based on evolutionarily informed expectations of gene content from near-universal single-copy orthologs selected from OrthoDB.
+      homepage: https://busco.ezlab.org/
+      documentation: https://busco.ezlab.org/busco_userguide.html
+      tool_dev_url: https://gitlab.com/ezlab/busco
+      doi: "10.1007/978-1-4939-9173-0_14"
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - fasta:
+      type: file
+      description: Nucleic or amino acid sequence file in FASTA format.
+      pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}"
+  - mode:
+      type: string
+      description: The mode to run Busco in. One of genome, proteins, or transcriptome
+      pattern: "{genome,proteins,transcriptome}"
+  - lineage:
+      type: string
+      description: The BUSCO lineage to use, or "auto" to automatically select lineage
+  - busco_lineages_path:
+      type: directory
+      description: Path to local BUSCO lineages directory.
+  - config_file:
+      type: file
+      description: Path to BUSCO config file.
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - batch_summary:
+      type: file
+      description: Summary of all sequence files analyzed
+      pattern: "*-busco.batch_summary.txt"
+  - short_summaries_txt:
+      type: file
+      description: Short Busco summary in plain text format
+      pattern: "short_summary.*.txt"
+  - short_summaries_json:
+      type: file
+      description: Short Busco summary in JSON format
+      pattern: "short_summary.*.json"
+  - busco_dir:
+      type: directory
+      description: BUSCO lineage specific output
+      pattern: "*-busco"
+  - full_table:
+      type: file
+      description: Full BUSCO results table
+      pattern: "full_table.tsv"
+  - missing_busco_list:
+      type: file
+      description: List of missing BUSCOs
+      pattern: "missing_busco_list.tsv"
+  - single_copy_proteins:
+      type: file
+      description: Fasta file of single copy proteins (transcriptome mode)
+      pattern: "single_copy_proteins.faa"
+  - seq_dir:
+      type: directory
+      description: BUSCO sequence directory
+      pattern: "busco_sequences"
+  - translated_dir:
+      type: directory
+      description: Six frame translations of each transcript made by the transcriptome mode
+      pattern: "translated_dir"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@priyanka-surana"
+  - "@charles-plessy"
+  - "@mahesh-panchal"
+  - "@muffato"
+  - "@jvhagey"
+  - "@gallvp"
+maintainers:
+  - "@priyanka-surana"
+  - "@charles-plessy"
+  - "@mahesh-panchal"
+  - "@muffato"
+  - "@jvhagey"
+  - "@gallvp"
diff --git a/modules/nf-core/busco/busco/tests/main.nf.test b/modules/nf-core/busco/busco/tests/main.nf.test
new file mode 100644
index 0000000..16b708b
--- /dev/null
+++ b/modules/nf-core/busco/busco/tests/main.nf.test
@@ -0,0 +1,419 @@
+nextflow_process {
+
+    name "Test Process BUSCO_BUSCO"
+    script "../main.nf"
+    process "BUSCO_BUSCO"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "busco"
+    tag "busco/busco"
+
+    test("test_busco_genome_single_fasta") {
+
+        config './nextflow.config'
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
+                ]
+                input[1] = 'genome'
+                input[2] = 'bacteria_odb10' // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues
+                input[3] = [] // Download busco lineage
+                input[4] = [] // No config
+                """
+            }
+        }
+
+        then {
+            assert process.success
+
+            with(path(process.out.short_summaries_txt[0][1]).text) {
+                assert contains('BUSCO version')
+                assert contains('The lineage dataset is')
+                assert contains('BUSCO was run in mode')
+                assert contains('Complete BUSCOs')
+                assert contains('Missing BUSCOs')
+                assert contains('Dependencies and versions')
+            }
+
+            with(path(process.out.short_summaries_json[0][1]).text) {
+                assert contains('one_line_summary')
+                assert contains('mode')
+                assert contains('dataset')
+            }
+
+            assert snapshot(
+                    process.out.batch_summary[0][1],
+                    process.out.full_table[0][1],
+                    process.out.missing_busco_list[0][1],
+                    process.out.versions[0]
+                ).match()
+
+            with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) {
+                assert contains('single_copy_busco_sequences.tar.gz')
+                assert contains('multi_copy_busco_sequences.tar.gz')
+                assert contains('fragmented_busco_sequences.tar.gz')
+            }
+
+            with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
+                assert contains('DEBUG:busco.run_BUSCO')
+                assert contains('Results from dataset')
+                assert contains('how to cite BUSCO')
+            }
+
+            assert process.out.single_copy_proteins == []
+            assert process.out.translated_dir == []
+        }
+    }
+
+    test("test_busco_genome_multi_fasta") {
+
+        config './nextflow.config'
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    [
+                        file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true),
+                        file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true)
+                    ]
+                ]
+                input[1] = 'genome'
+                input[2] = 'bacteria_odb10'
+                input[3] = []
+                input[4] = []
+                """
+            }
+        }
+
+        then {
+            assert process.success
+
+            with(path(process.out.short_summaries_txt[0][1][0]).text) {
+                assert contains('BUSCO version')
+                assert contains('The lineage dataset is')
+                assert contains('BUSCO was run in mode')
+                assert contains('Complete BUSCOs')
+                assert contains('Missing BUSCOs')
+                assert contains('Dependencies and versions')
+            }
+
+            with(path(process.out.short_summaries_txt[0][1][1]).text) {
+                assert contains('BUSCO version')
+                assert contains('The lineage dataset is')
+                assert contains('BUSCO was run in mode')
+                assert contains('Complete BUSCOs')
+                assert contains('Missing BUSCOs')
+                assert contains('Dependencies and versions')
+            }
+
+            with(path(process.out.short_summaries_json[0][1][0]).text) {
+                assert contains('one_line_summary')
+                assert contains('mode')
+                assert contains('dataset')
+            }
+
+            with(path(process.out.short_summaries_json[0][1][1]).text) {
+                assert contains('one_line_summary')
+                assert contains('mode')
+                assert contains('dataset')
+            }
+
+            assert snapshot(
+                    process.out.batch_summary[0][1],
+                    process.out.full_table[0][1],
+                    process.out.missing_busco_list[0][1],
+                    process.out.versions[0]
+                ).match()
+
+            with(file(process.out.seq_dir[0][1][0]).listFiles().collect { it.name }) {
+                assert contains('single_copy_busco_sequences.tar.gz')
+                assert contains('multi_copy_busco_sequences.tar.gz')
+                assert contains('fragmented_busco_sequences.tar.gz')
+            }
+
+            with(file(process.out.seq_dir[0][1][1]).listFiles().collect { it.name }) {
+                assert contains('single_copy_busco_sequences.tar.gz')
+                assert contains('multi_copy_busco_sequences.tar.gz')
+                assert contains('fragmented_busco_sequences.tar.gz')
+            }
+
+            with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
+                assert contains('DEBUG:busco.run_BUSCO')
+                assert contains('Results from dataset')
+                assert contains('how to cite BUSCO')
+            }
+
+            assert process.out.single_copy_proteins == []
+            assert process.out.translated_dir == []
+        }
+
+    }
+
+    test("test_busco_eukaryote_metaeuk") {
+
+        config './nextflow.metaeuk.config'
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+                ]
+                input[1] = 'genome'
+                input[2] = 'eukaryota_odb10'
+                input[3] = []
+                input[4] = []
+                """
+            }
+        }
+
+        then {
+            assert process.success
+
+            with(path(process.out.short_summaries_txt[0][1]).text) {
+                assert contains('BUSCO version')
+                assert contains('The lineage dataset is')
+                assert contains('BUSCO was run in mode')
+                assert contains('Complete BUSCOs')
+                assert contains('Missing BUSCOs')
+                assert contains('Dependencies and versions')
+            }
+
+            with(path(process.out.short_summaries_json[0][1]).text) {
+                assert contains('one_line_summary')
+                assert contains('mode')
+                assert contains('dataset')
+            }
+
+            assert snapshot(
+                    process.out.batch_summary[0][1],
+                    process.out.full_table[0][1],
+                    process.out.missing_busco_list[0][1],
+                    process.out.versions[0]
+                ).match()
+
+            with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) {
+                assert contains('single_copy_busco_sequences.tar.gz')
+                assert contains('multi_copy_busco_sequences.tar.gz')
+                assert contains('fragmented_busco_sequences.tar.gz')
+            }
+
+            with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
+                assert contains('DEBUG:busco.run_BUSCO')
+                assert contains("'use_augustus', 'False'")
+                assert contains("'use_metaeuk', 'True'") // METAEUK
+                assert contains('Results from dataset')
+                assert contains('how to cite BUSCO')
+
+            }
+
+            assert process.out.single_copy_proteins == []
+            assert process.out.translated_dir == []
+        }
+
+    }
+
+    test("test_busco_eukaryote_augustus") {
+
+        config './nextflow.augustus.config'
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+                ]
+                input[1] = 'genome'
+                input[2] = 'eukaryota_odb10'
+                input[3] = []
+                input[4] = []
+                """
+            }
+        }
+
+        then {
+            assert process.success
+
+            assert snapshot(
+                    process.out.batch_summary[0][1],
+                    process.out.versions[0]
+                ).match()
+
+            with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
+                assert contains('DEBUG:busco.run_BUSCO')
+                assert contains("'use_augustus', 'True'")
+                assert contains("'use_metaeuk', 'False'") // AUGUSTUS
+                assert contains('Augustus did not recognize any genes')
+
+            }
+
+            assert process.out.short_summaries_json == []
+            assert process.out.short_summaries_txt == []
+            assert process.out.missing_busco_list == []
+            assert process.out.full_table == []
+            assert process.out.single_copy_proteins == []
+            assert process.out.translated_dir == []
+        }
+
+    }
+
+    test("test_busco_protein") {
+
+        config './nextflow.config'
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true)
+                ]
+                input[1] = 'proteins'
+                input[2] = 'bacteria_odb10'
+                input[3] = []
+                input[4] = []
+                """
+            }
+        }
+
+        then {
+            assert process.success
+
+            with(path(process.out.short_summaries_txt[0][1]).text) {
+                assert contains('BUSCO version')
+                assert contains('The lineage dataset is')
+                assert contains('BUSCO was run in mode')
+                assert contains('Complete BUSCOs')
+                assert contains('Missing BUSCOs')
+                assert contains('Dependencies and versions')
+            }
+
+            with(path(process.out.short_summaries_json[0][1]).text) {
+                assert contains('one_line_summary')
+                assert contains('mode')
+                assert contains('dataset')
+            }
+
+            assert snapshot(
+                    process.out.batch_summary[0][1],
+                    process.out.full_table[0][1],
+                    process.out.missing_busco_list[0][1],
+                    process.out.versions[0]
+                ).match()
+
+            with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) {
+                assert contains('single_copy_busco_sequences.tar.gz')
+                assert contains('multi_copy_busco_sequences.tar.gz')
+                assert contains('fragmented_busco_sequences.tar.gz')
+            }
+
+            with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
+                assert contains('DEBUG:busco.run_BUSCO')
+                assert contains('Results from dataset')
+                assert contains('how to cite BUSCO')
+            }
+
+            assert process.out.single_copy_proteins == []
+            assert process.out.translated_dir == []
+        }
+
+    }
+
+    test("test_busco_transcriptome") {
+
+        config './nextflow.config'
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file( params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true)
+                ]
+                input[1] = 'transcriptome'
+                input[2] = 'bacteria_odb10'
+                input[3] = []
+                input[4] = []
+                """
+            }
+        }
+
+        then {
+            assert process.success
+
+            with(path(process.out.short_summaries_txt[0][1]).text) {
+                assert contains('BUSCO version')
+                assert contains('The lineage dataset is')
+                assert contains('BUSCO was run in mode')
+                assert contains('Complete BUSCOs')
+                assert contains('Missing BUSCOs')
+                assert contains('Dependencies and versions')
+            }
+
+            with(path(process.out.short_summaries_json[0][1]).text) {
+                assert contains('one_line_summary')
+                assert contains('mode')
+                assert contains('dataset')
+            }
+
+            assert snapshot(
+                    process.out.batch_summary[0][1],
+                    process.out.full_table[0][1],
+                    process.out.missing_busco_list[0][1],
+                    process.out.translated_dir[0][1],
+                    process.out.single_copy_proteins[0][1],
+                    process.out.versions[0]
+                ).match()
+
+            with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) {
+                assert contains('single_copy_busco_sequences.tar.gz')
+                assert contains('multi_copy_busco_sequences.tar.gz')
+                assert contains('fragmented_busco_sequences.tar.gz')
+            }
+
+            with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
+                assert contains('DEBUG:busco.run_BUSCO')
+                assert contains('Results from dataset')
+                assert contains('how to cite BUSCO')
+            }
+        }
+
+    }
+
+    test("minimal-stub") {
+
+        options '-stub'
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
+                ]
+                input[1] = 'genome'
+                input[2] = 'bacteria_odb10'
+                input[3] = []
+                input[4] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+}
diff --git a/modules/nf-core/busco/busco/tests/main.nf.test.snap b/modules/nf-core/busco/busco/tests/main.nf.test.snap
new file mode 100644
index 0000000..1b6411b
--- /dev/null
+++ b/modules/nf-core/busco/busco/tests/main.nf.test.snap
@@ -0,0 +1,230 @@
+{
+    "minimal-stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    
+                ],
+                "4": [
+                    
+                ],
+                "5": [
+                    
+                ],
+                "6": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            
+                        ]
+                    ]
+                ],
+                "7": [
+                    
+                ],
+                "8": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            [
+                                [
+                                    [
+                                        
+                                    ]
+                                ]
+                            ]
+                        ]
+                    ]
+                ],
+                "9": [
+                    "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+                ],
+                "batch_summary": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "busco_dir": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            [
+                                [
+                                    [
+                                        
+                                    ]
+                                ]
+                            ]
+                        ]
+                    ]
+                ],
+                "full_table": [
+                    
+                ],
+                "missing_busco_list": [
+                    
+                ],
+                "seq_dir": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            
+                        ]
+                    ]
+                ],
+                "short_summaries_json": [
+                    
+                ],
+                "short_summaries_txt": [
+                    
+                ],
+                "single_copy_proteins": [
+                    
+                ],
+                "translated_dir": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-05-03T13:28:04.451297"
+    },
+    "test_busco_eukaryote_augustus": {
+        "content": [
+            "test-eukaryota_odb10-busco.batch_summary.txt:md5,3ea3bdc423a461dae514d816bdc61c89",
+            "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-05-03T13:26:36.974986"
+    },
+    "test_busco_genome_single_fasta": {
+        "content": [
+            "test-bacteria_odb10-busco.batch_summary.txt:md5,21b3fb771cf36be917cc451540d999be",
+            "full_table.tsv:md5,638fe7590f442c57361554dae330eca1",
+            "missing_busco_list.tsv:md5,1530af4fe7673a6d001349537bcd410a",
+            "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-05-03T13:22:45.07816"
+    },
+    "test_busco_genome_multi_fasta": {
+        "content": [
+            "test-bacteria_odb10-busco.batch_summary.txt:md5,fcd3c208913e8abda3d6742c43fec5fa",
+            [
+                "full_table.tsv:md5,c657edcc7d0de0175869717551df6e83",
+                "full_table.tsv:md5,638fe7590f442c57361554dae330eca1"
+            ],
+            [
+                "missing_busco_list.tsv:md5,aceb66e347a353cb7fca8e2a725f9112",
+                "missing_busco_list.tsv:md5,1530af4fe7673a6d001349537bcd410a"
+            ],
+            "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-05-03T13:23:50.255602"
+    },
+    "test_busco_eukaryote_metaeuk": {
+        "content": [
+            "test-eukaryota_odb10-busco.batch_summary.txt:md5,ff6d8277e452a83ce9456bbee666feb6",
+            "full_table.tsv:md5,92b1b1d5cb5ea0e2093d16f00187e8c7",
+            "missing_busco_list.tsv:md5,0352e563de290bf804c708323c35a9e3",
+            "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-05-03T13:25:38.159041"
+    },
+    "test_busco_transcriptome": {
+        "content": [
+            "test-bacteria_odb10-busco.batch_summary.txt:md5,8734b3f379c4c0928e5dd4ea1873dc64",
+            "full_table.tsv:md5,1b2ce808fdafa744c56b5f781551272d",
+            "missing_busco_list.tsv:md5,a6931b6470262b997b8b99ea0f1d14a4",
+            [
+                "1024388at2.faa:md5,797d603d262a6595a112e25b73e878b0",
+                "1054741at2.faa:md5,cd4b928cba6b19b4437746ba507e7195",
+                "1093223at2.faa:md5,df9549708e5ffcfaee6a74dd70a0e5dc",
+                "1151822at2.faa:md5,12726afc1cdc40c13392e1596e93df3a",
+                "143460at2.faa:md5,d887431fd988a5556a523440f02d9594",
+                "1491686at2.faa:md5,d03362d19979b27306c192f1c74a84e5",
+                "1504821at2.faa:md5,4f5f6e5c57bac0092c1d85ded73d7e67",
+                "1574817at2.faa:md5,1153e55998c2929eacad2aed7d08d248",
+                "1592033at2.faa:md5,bb7a59e5f3a57ba12d10dabf4c77ab57",
+                "1623045at2.faa:md5,8fe38155feb1802beb97ef7714837bf5",
+                "1661836at2.faa:md5,6c6d592c2fbb0d7a4e5e1f47a15644f0",
+                "1674344at2.faa:md5,bb41b44e53565a54cadf0b780532fe08",
+                "1698718at2.faa:md5,f233860000028eb00329aa85236c71e5",
+                "1990650at2.faa:md5,34a2d29c5f8b6253159ddb7a43fa1829",
+                "223233at2.faa:md5,dec6705c7846c989296e73942f953cbc",
+                "402899at2.faa:md5,acc0f271f9a586d2ce1ee41669b22999",
+                "505485at2.faa:md5,aa0391f8fa5d9bd19b30d844d5a99845",
+                "665824at2.faa:md5,47f8ad43b6a6078206feb48c2e552793",
+                "776861at2.faa:md5,f8b90c13f7c6be828dea3bb920195e3d",
+                "874197at2.faa:md5,8d22a35a768debe6f376fc695d233a69",
+                "932854at2.faa:md5,2eff2de1ab83b22f3234a529a44e22bb",
+                "95696at2.faa:md5,247bfd1aef432f7b5456307768e9149c"
+            ],
+            "single_copy_proteins.faa:md5,73e2c5d6a9b0f01f2deea3cc5f21b764",
+            "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-05-03T13:27:53.992893"
+    },
+    "test_busco_protein": {
+        "content": [
+            "test-bacteria_odb10-busco.batch_summary.txt:md5,f5a782378f9f94a748aa907381fdef91",
+            "full_table.tsv:md5,812ab6a0496fccab774643cf40c4f2a8",
+            "missing_busco_list.tsv:md5,aceb66e347a353cb7fca8e2a725f9112",
+            "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-05-03T13:27:12.724862"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/busco/busco/tests/nextflow.augustus.config b/modules/nf-core/busco/busco/tests/nextflow.augustus.config
new file mode 100644
index 0000000..84daa69
--- /dev/null
+++ b/modules/nf-core/busco/busco/tests/nextflow.augustus.config
@@ -0,0 +1,5 @@
+process {
+    withName: 'BUSCO_BUSCO' {
+        ext.args = '--tar --augustus'
+    }
+}
diff --git a/modules/nf-core/busco/busco/tests/nextflow.config b/modules/nf-core/busco/busco/tests/nextflow.config
new file mode 100644
index 0000000..1ec3fec
--- /dev/null
+++ b/modules/nf-core/busco/busco/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: 'BUSCO_BUSCO' {
+        ext.args = '--tar'
+    }
+}
diff --git a/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config b/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config
new file mode 100644
index 0000000..c141844
--- /dev/null
+++ b/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config
@@ -0,0 +1,5 @@
+process {
+    withName: 'BUSCO_BUSCO' {
+        ext.args = '--tar --metaeuk'
+    }
+}
diff --git a/modules/nf-core/busco/busco/tests/old_test.yml b/modules/nf-core/busco/busco/tests/old_test.yml
new file mode 100644
index 0000000..75177f5
--- /dev/null
+++ b/modules/nf-core/busco/busco/tests/old_test.yml
@@ -0,0 +1,624 @@
+- name: busco test_busco_genome_single_fasta
+  command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_single_fasta -c ./tests/config/nextflow.config
+  tags:
+    - busco
+  files:
+    - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json
+      contains:
+        - "one_line_summary"
+        - "mode"
+        - "dataset"
+    - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt
+      contains:
+        - "BUSCO version"
+        - "The lineage dataset is"
+        - "BUSCO was run in mode"
+        - "Complete BUSCOs"
+        - "Missing BUSCOs"
+        - "Dependencies and versions"
+    - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
+      md5sum: bc2440f8a68d7fbf931ff911c1c3fdfa
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log
+      md5sum: 9caf1a1434414c78562eb0bbb9c0e53f
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log
+      contains:
+        - "# hmmsearch :: search profile(s) against a sequence database"
+        - "# target sequence database:"
+        - "Internal pipeline statistics summary:"
+        - "[ok]"
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log
+      md5sum: 538510cfc7483498210f01e53fe035ad
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log
+      md5sum: 61050b0706addc9498b2088a2d6efa9a
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint
+      contains:
+        - "Tool: prodigal"
+        - "Completed"
+        - "jobs"
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa
+      md5sum: 836e9a80d33d8b89168f07ddc13ee991
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna
+      md5sum: 20eeb75f86842e6e136f02bca8b73a9f
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa
+      md5sum: 836e9a80d33d8b89168f07ddc13ee991
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna
+      md5sum: 20eeb75f86842e6e136f02bca8b73a9f
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log
+      md5sum: 538510cfc7483498210f01e53fe035ad
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log
+      md5sum: 61050b0706addc9498b2088a2d6efa9a
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint
+      contains:
+        - "Tool: bbtools"
+        - "Completed"
+        - "jobs"
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv
+      md5sum: c56edab1dc1522e993c25ae2b730799f
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv
+      md5sum: b533ef30270f27160acce85a22d01bf5
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json
+      contains:
+        - "one_line_summary"
+        - "mode"
+        - "lineage_dataset"
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt
+      contains:
+        - "# BUSCO version is:"
+        - "Results:"
+        - "busco:"
+    - path: output/busco/test-bacteria_odb10-busco/logs/busco.log
+      contains:
+        - "DEBUG:busco.run_BUSCO"
+        - "Results from dataset"
+        - "how to cite BUSCO"
+    - path: output/busco/versions.yml
+
+- name: busco test_busco_genome_multi_fasta
+  command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_multi_fasta -c ./tests/config/nextflow.config
+  tags:
+    - busco
+  files:
+    - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.json
+      contains:
+        - "one_line_summary"
+        - "mode"
+        - "dataset"
+    - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.txt
+      contains:
+        - "BUSCO version"
+        - "The lineage dataset is"
+        - "BUSCO was run in mode"
+        - "Complete BUSCOs"
+        - "Missing BUSCOs"
+        - "Dependencies and versions"
+    - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json
+      contains:
+        - "one_line_summary"
+        - "mode"
+        - "dataset"
+    - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt
+      contains:
+        - "BUSCO version"
+        - "The lineage dataset is"
+        - "BUSCO was run in mode"
+        - "Complete BUSCOs"
+        - "Missing BUSCOs"
+        - "Dependencies and versions"
+    - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
+      md5sum: 8c64c1a28b086ef2ee444f99cbed5f7d
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_err.log
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_out.log
+      md5sum: 8f047bdb33264d22a83920bc2c63f29a
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_err.log
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_out.log
+      contains:
+        - "# hmmsearch :: search profile(s) against a sequence database"
+        - "# target sequence database:"
+        - "Internal pipeline statistics summary:"
+        - "[ok]"
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_err.log
+      md5sum: c1fdc6977332f53dfe7f632733bb4585
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_out.log
+      md5sum: 50752acb1c5a20be886bfdfc06635bcb
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/.checkpoint
+      contains:
+        - "Tool: prodigal"
+        - "Completed"
+        - "jobs"
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.faa
+      md5sum: 8166471fc5f08c82fd5643ab42327f9d
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.fna
+      md5sum: ddc508a18f60e7f3314534df50cdf8ca
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa
+      md5sum: 8166471fc5f08c82fd5643ab42327f9d
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna
+      md5sum: ddc508a18f60e7f3314534df50cdf8ca
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log
+      md5sum: c1fdc6977332f53dfe7f632733bb4585
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log
+      md5sum: 50752acb1c5a20be886bfdfc06635bcb
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.faa
+      md5sum: e56fd59c38248dc21ac94355dca98121
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.fna
+      md5sum: b365f84bf99c68357952e0b98ed7ce42
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_err.log
+      md5sum: e5f14d7925ba14a0f9850542f3739894
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_out.log
+      md5sum: d41971bfc1b621d4ffd2633bc47017ea
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/.bbtools_output/.checkpoint
+      contains:
+        - "Tool: bbtools"
+        - "Completed"
+        - "jobs"
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/full_table.tsv
+      md5sum: c9651b88b10871abc260ee655898e828
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/hmmer_output.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/missing_busco_list.tsv
+      md5sum: 9939309df2da5419de88c32d1435c779
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.json
+      contains:
+        - "one_line_summary"
+        - "mode"
+        - "dataset"
+    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.txt
+      contains:
+        - "# BUSCO version is:"
+        - "Results:"
+        - "busco:"
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log
+      md5sum: 9caf1a1434414c78562eb0bbb9c0e53f
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log
+      contains:
+        - "# hmmsearch :: search profile(s) against a sequence database"
+        - "# target sequence database:"
+        - "Internal pipeline statistics summary:"
+        - "[ok]"
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log
+      md5sum: 538510cfc7483498210f01e53fe035ad
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log
+      md5sum: 61050b0706addc9498b2088a2d6efa9a
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint
+      contains:
+        - "Tool: prodigal"
+        - "Completed"
+        - "jobs"
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa
+      md5sum: 836e9a80d33d8b89168f07ddc13ee991
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna
+      md5sum: 20eeb75f86842e6e136f02bca8b73a9f
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa
+      md5sum: 836e9a80d33d8b89168f07ddc13ee991
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna
+      md5sum: 20eeb75f86842e6e136f02bca8b73a9f
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log
+      md5sum: 538510cfc7483498210f01e53fe035ad
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log
+      md5sum: 61050b0706addc9498b2088a2d6efa9a
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint
+      contains:
+        - "Tool: bbtools"
+        - "Completed"
+        - "jobs"
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv
+      md5sum: c56edab1dc1522e993c25ae2b730799f
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv
+      md5sum: b533ef30270f27160acce85a22d01bf5
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json
+      contains:
+        - "one_line_summary"
+        - "mode"
+        - "dataset"
+    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt
+      contains:
+        - "# BUSCO version is:"
+        - "Results:"
+        - "busco:"
+    - path: output/busco/test-bacteria_odb10-busco/logs/busco.log
+      contains:
+        - "DEBUG:busco.run_BUSCO"
+        - "Results from dataset"
+        - "how to cite BUSCO"
+    - path: output/busco/versions.yml
+
+- name: busco test_busco_eukaryote_metaeuk
+  command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_metaeuk -c ./tests/config/nextflow.config
+  tags:
+    - busco
+  files:
+    - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json
+      contains:
+        - "one_line_summary"
+        - "mode"
+        - "dataset"
+    - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt
+      contains:
+        - "BUSCO version"
+        - "The lineage dataset is"
+        - "BUSCO was run in mode"
+        - "Complete BUSCOs"
+        - "Missing BUSCOs"
+        - "Dependencies and versions"
+    - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt
+      md5sum: ff6d8277e452a83ce9456bbee666feb6
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log
+      md5sum: e63debaa653f18f7405d936050abc093
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint
+      contains:
+        - "Tool: bbtools"
+        - "Completed"
+        - "jobs"
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv
+      md5sum: bd880e90b9e5620a58943a3e0f9ff16b
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint
+      contains:
+        - "Tool: metaeuk"
+        - "Completed"
+        - "jobs"
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa
+      md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv
+      md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json
+      contains:
+        - "one_line_summary"
+        - "mode"
+        - "dataset"
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt
+      contains:
+        - "# BUSCO version is:"
+        - "Results:"
+        - "busco:"
+    - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log
+      contains:
+        - "DEBUG:busco.run_BUSCO"
+        - "Results from dataset"
+        - "how to cite BUSCO"
+    - path: output/busco/versions.yml
+
+- name: busco test_busco_eukaryote_augustus
+  command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_augustus -c ./tests/config/nextflow.config
+  tags:
+    - busco
+  files:
+    - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json
+      contains:
+        - "one_line_summary"
+        - "mode"
+        - "dataset"
+    - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt
+      contains:
+        - "BUSCO version"
+        - "The lineage dataset is"
+        - "BUSCO was run in mode"
+        - "Complete BUSCOs"
+        - "Missing BUSCOs"
+        - "Dependencies and versions"
+    - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt
+      md5sum: ff6d8277e452a83ce9456bbee666feb6
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log
+      md5sum: e63debaa653f18f7405d936050abc093
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log
+      contains:
+        - "metaeuk"
+        - "easy-predict"
+        - "Compute score and coverage"
+        - "Time for processing:"
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log
+      contains:
+        - "metaeuk"
+        - "easy-predict"
+        - "Compute score and coverage"
+        - "Time for processing:"
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint
+      contains:
+        - "Tool: bbtools"
+        - "Completed"
+        - "jobs"
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv
+      md5sum: bd880e90b9e5620a58943a3e0f9ff16b
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint
+      contains:
+        - "Tool: metaeuk"
+        - "Completed"
+        - "jobs"
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa
+      md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv
+      md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json
+      contains:
+        - "one_line_summary"
+        - "mode"
+        - "dataset"
+    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt
+      contains:
+        - "# BUSCO version is:"
+        - "Results:"
+        - "busco:"
+    - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log
+      contains:
+        - "DEBUG:busco.run_BUSCO"
+        - "Results from dataset"
+        - "how to cite BUSCO"
+    - path: output/busco/versions.yml
+
+- name: busco test_busco_protein
+  command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_protein -c ./tests/config/nextflow.config
+  tags:
+    - busco
+  files:
+    - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.json
+      contains:
+        - "one_line_summary"
+        - "mode"
+        - "dataset"
+    - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.txt
+      contains:
+        - "BUSCO version"
+        - "The lineage dataset is"
+        - "BUSCO was run in mode"
+        - "Complete BUSCOs"
+        - "Missing BUSCOs"
+        - "Dependencies and versions"
+    - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
+      md5sum: 7a65e6cbb6c56a2ea4e739ae0aa3297d
+    - path: output/busco/test-bacteria_odb10-busco/logs/busco.log
+      contains:
+        - "DEBUG:busco.run_BUSCO"
+        - "Results from dataset"
+        - "how to cite BUSCO"
+    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_err.log
+    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_out.log
+      contains:
+        - "# hmmsearch :: search profile(s) against a sequence database"
+        - "# target sequence database:"
+        - "Internal pipeline statistics summary:"
+        - "[ok]"
+    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/full_table.tsv
+      md5sum: 0e34f1011cd83ea1d5d5103ec62b8922
+    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/hmmer_output.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/missing_busco_list.tsv
+      md5sum: 9939309df2da5419de88c32d1435c779
+    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.json
+      contains:
+        - "one_line_summary"
+        - "mode"
+        - "dataset"
+    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.txt
+      contains:
+        - "# BUSCO version is:"
+        - "Results:"
+        - "busco:"
+    - path: output/busco/versions.yml
+
+- name: busco test_busco_transcriptome
+  command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_transcriptome -c ./tests/config/nextflow.config
+  tags:
+    - busco
+  files:
+    - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.json
+      contains:
+        - "one_line_summary"
+        - "mode"
+        - "dataset"
+    - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt
+      contains:
+        - "BUSCO version"
+        - "The lineage dataset is"
+        - "BUSCO was run in mode"
+        - "Complete BUSCOs"
+        - "Missing BUSCOs"
+        - "Dependencies and versions"
+    - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
+      md5sum: 46118ecf60d1b87d22b96d80f4f03632
+    - path: output/busco/test-bacteria_odb10-busco/logs/busco.log
+      contains:
+        - "DEBUG:busco.run_BUSCO"
+        - "Results from dataset"
+        - "how to cite BUSCO"
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/.checkpoint
+      contains:
+        - "Tool: makeblastdb"
+        - "Completed"
+        - "jobs"
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ndb
+      md5sum: 3788c017fe5e6f0f58224e9cdd21822b
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nhr
+      md5sum: 8ecd2ce392bb5e25ddbe1d85f879582e
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nin
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.njs
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.not
+      md5sum: 0c340e376c7e85d19f82ec1a833e6a6e
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nsq
+      md5sum: 532d5c0a7ea00fe95ca3c97cb3be6198
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ntf
+      md5sum: de1250813f0c7affc6d12dac9d0fb6bb
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nto
+      md5sum: ff74bd41f9cc9b011c63a32c4f7693bf
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_err.log
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_out.log
+      contains:
+        - "# hmmsearch :: search profile(s) against a sequence database"
+        - "# target sequence database:"
+        - "Internal pipeline statistics summary:"
+        - "[ok]"
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_err.log
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_out.log
+      contains:
+        - "Building a new DB"
+        - "Adding sequences from FASTA"
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_err.log
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_out.log
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/.checkpoint
+      contains:
+        - "Tool: tblastn"
+        - "Completed"
+        - "jobs"
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/coordinates.tsv
+      md5sum: cc30eed321944af293452bdbcfc24292
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_101.temp
+      md5sum: 73e9c65fc83fedc58f57f09b08f08238
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_119.temp
+      md5sum: 7fa4cc7955ec0cc36330a221c579b975
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_129.temp
+      md5sum: 6f1601c875d019e3f6f1f98ed8e988d4
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_138.temp
+      md5sum: 3f8e034686cd240c2330650d791bcae2
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_143.temp
+      md5sum: df3dfa8e9ba30ed70cf75b5e7abf2179
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_172.temp
+      md5sum: 7d463e0e6cf7169bc9077d8dc776dda1
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_178.temp
+      md5sum: 2288edf7fa4f88f51b4cf4d94086f77e
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_188.temp
+      md5sum: 029906abbad6d87fc57830dd548cac24
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_195.temp
+      md5sum: 4937f3b348774a31b1160a00297c29cc
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_210.temp
+      md5sum: afcb20ba4c466479d6b91c8c62251e1f
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_232.temp
+      md5sum: 2e1e823ce017345bd998191a39fa9924
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_268.temp
+      md5sum: 08c2d82c34ecffbe1c638b410349412e
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_29.temp
+      md5sum: cd9b63cf93524284781535c888313764
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_44.temp
+      md5sum: d1929b742b24ebe379bf4801ca882dca
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_58.temp
+      md5sum: 69215765b010c05336538cb322c900b3
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_72.temp
+      md5sum: 6feaa1cc3b0899a147ea9d466878f3e3
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_80.temp
+      md5sum: 13625eae14e860a96ce17cd4e37e9d01
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_81.temp
+      md5sum: e14b2484649b0dbc8926815c207b806d
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_93.temp
+      md5sum: 6902c93691df00e690faea914c71839e
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_97.temp
+      md5sum: 0a0d9d38a83acbd5ad43c29cdf429988
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/tblastn.tsv
+      contains:
+        - "TBLASTN"
+        - "BLAST processed"
+        - "queries"
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/full_table.tsv
+      md5sum: 24df25199e13c88bd892fc3e7b541ca0
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/hmmer_output.tar.gz
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/missing_busco_list.tsv
+      md5sum: e7232e2b8cca4fdfdd9e363b39ebbc81
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.json
+      contains:
+        - "one_line_summary"
+        - "mode"
+        - "dataset"
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.txt
+      contains:
+        - "# BUSCO version is:"
+        - "Results:"
+        - "busco:"
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/single_copy_proteins.faa
+      md5sum: e04b9465733577ae6e4bccb7aa01e720
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1024388at2.faa
+      md5sum: 7333c39a20258f20c7019ea0cd83157c
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1054741at2.faa
+      md5sum: ebb481e77a824685fbe04d8a2f3a0d7d
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1093223at2.faa
+      md5sum: 34621c7d499034e8f8e6b92fd4020a93
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1151822at2.faa
+      md5sum: aa89ca381c1c70c9c4e1380351ca7c2a
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/143460at2.faa
+      md5sum: f2e91d78b8dd3722840378789f29e8c8
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1491686at2.faa
+      md5sum: 73c25aef5c9cba7f4151804941b146ea
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1504821at2.faa
+      md5sum: cda556018d1f84ebe517e89f6fc107d0
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1574817at2.faa
+      md5sum: a9096c9fb8b25c78a72871ab0463acdc
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1592033at2.faa
+      md5sum: e463d25ce186c0cebfd749474f3a4c64
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1623045at2.faa
+      md5sum: f2cfd241590c6d8377286d6135480937
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1661836at2.faa
+      md5sum: 586569546fb9861502468e3d9ba2775c
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1674344at2.faa
+      md5sum: 24c658bee14ad84b062d81ad96642eb8
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1698718at2.faa
+      md5sum: 0b8e26ddf5149bbd8805be7af125208d
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1990650at2.faa
+      md5sum: 159320712ee01fb2ccb31a25df44eead
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/223233at2.faa
+      md5sum: 812629c0b06ac3d18661c2ca78de0c08
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/402899at2.faa
+      md5sum: f7ff4e1591342d30b77392a2e84b57d9
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/505485at2.faa
+      md5sum: 7b34a24fc49c540d46fcf96ff5129564
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/665824at2.faa
+      md5sum: 4cff2df64f6bcaff8bc19c234c8bcccd
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/776861at2.faa
+      md5sum: 613af7a3fea30ea2bece66f603b9284a
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/874197at2.faa
+      md5sum: a7cd1b13c9ef91c7ef4e31614166f197
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/932854at2.faa
+      md5sum: fe313ffd5efdb0fed887a04fba352552
+    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/95696at2.faa
+      md5sum: 4e1f30a2fea4dfbf9bb7fae2700622a0
+    - path: output/busco/versions.yml
diff --git a/modules/nf-core/busco/busco/tests/tags.yml b/modules/nf-core/busco/busco/tests/tags.yml
new file mode 100644
index 0000000..7c4d283
--- /dev/null
+++ b/modules/nf-core/busco/busco/tests/tags.yml
@@ -0,0 +1,2 @@
+busco/busco:
+  - "modules/nf-core/busco/busco/**"
diff --git a/modules/nf-core/gfastats/environment.yml b/modules/nf-core/gfastats/environment.yml
new file mode 100644
index 0000000..1c875ce
--- /dev/null
+++ b/modules/nf-core/gfastats/environment.yml
@@ -0,0 +1,7 @@
+name: gfastats
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::gfastats=1.3.6
diff --git a/modules/nf-core/gfastats/main.nf b/modules/nf-core/gfastats/main.nf
new file mode 100644
index 0000000..8db239a
--- /dev/null
+++ b/modules/nf-core/gfastats/main.nf
@@ -0,0 +1,66 @@
+process GFASTATS {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gfastats:1.3.6--hdcf5f25_3':
+        'biocontainers/gfastats:1.3.6--hdcf5f25_3' }"
+
+    input:
+    tuple val(meta), path(assembly)   // input.[fasta|fastq|gfa][.gz]
+    val out_fmt                       // output format (fasta/fastq/gfa)
+    val genome_size                   // estimated genome size for NG* statistics (optional).
+    val target                        // target specific sequence by header, optionally with coordinates (optional).
+    path agpfile                      // -a --agp-to-path <file> converts input agp to path and replaces existing paths.
+    path include_bed                  // -i --include-bed <file> generates output on a subset list of headers or coordinates in 0-based bed format.
+    path exclude_bed                  // -e --exclude-bed <file> opposite of --include-bed. They can be combined (no coordinates).
+    path instructions                 // -k --swiss-army-knife <file> set of instructions provided as an ordered list.
+
+    output:
+    tuple val(meta), path("*.assembly_summary"), emit: assembly_summary
+    tuple val(meta), path("*.${out_fmt}.gz")   , emit: assembly
+    path "versions.yml"                        , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def agp  = agpfile ? "--agp-to-path $agp" : ""
+    def ibed = include_bed ? "--include-bed $include_bed" : ""
+    def ebed = exclude_bed ? "--exclude-bed $exclude_bed" : ""
+    def sak  = instructions ? "--swiss-army-knife $instructions" : ""
+    """
+    gfastats \\
+        $args \\
+        --threads $task.cpus \\
+        $agp \\
+        $ibed \\
+        $ebed \\
+        $sak \\
+        --out-format ${prefix}.${out_fmt}.gz \\
+        $assembly \\
+        $genome_size \\
+        $target \\
+        > ${prefix}.assembly_summary
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gfastats: \$( gfastats -v | sed '1!d;s/.*v//' )
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.${out_fmt}.gz
+    touch ${prefix}.assembly_summary
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gfastats: \$( gfastats -v | sed '1!d;s/.*v//' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/gfastats/meta.yml b/modules/nf-core/gfastats/meta.yml
new file mode 100644
index 0000000..d0e97a8
--- /dev/null
+++ b/modules/nf-core/gfastats/meta.yml
@@ -0,0 +1,72 @@
+name: "gfastats"
+description: |
+  A single fast and exhaustive tool for summary statistics and simultaneous *fa*
+  (fasta, fastq, gfa [.gz]) genome assembly file manipulation.
+keywords:
+  - gfastats
+  - fasta
+  - genome assembly
+  - genome summary
+  - genome manipulation
+  - genome statistics
+tools:
+  - "gfastats":
+      description: "The swiss army knife for genome assembly."
+      homepage: "https://github.com/vgl-hub/gfastats"
+      documentation: "https://github.com/vgl-hub/gfastats/tree/main/instructions"
+      tool_dev_url: "https://github.com/vgl-hub/gfastats"
+      doi: "10.1093/bioinformatics/btac460"
+      licence: "['MIT']"
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - assembly:
+      type: file
+      description: Draft assembly file
+      pattern: "*.{fasta,fastq,gfa}(.gz)?"
+  - out_fmt:
+      type: string
+      description: Output format (fasta, fastq, gfa)
+  - genome_size:
+      type: integer
+      description: estimated genome size (bp) for NG* statistics (optional).
+  - target:
+      type: string
+      description: target specific sequence by header, optionally with coordinates (optional).
+  - agpfile:
+      type: file
+      description: converts input agp to path and replaces existing paths.
+  - include_bed:
+      type: file
+      description: generates output on a subset list of headers or coordinates in 0-based bed format.
+  - exclude_bed:
+      type: file
+      description: opposite of --include-bed. They can be combined (no coordinates).
+  - instructions:
+      type: file
+      description: set of instructions provided as an ordered list.
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - assembly_summary:
+      type: file
+      description: Assembly summary statistics file
+      pattern: "*.assembly_summary"
+  - assembly:
+      type: file
+      description: The assembly as modified by gfastats
+      pattern: "*.{fasta,fastq,gfa}.gz"
+authors:
+  - "@mahesh-panchal"
+maintainers:
+  - "@mahesh-panchal"
diff --git a/modules/nf-core/merquryfk/merquryfk/environment.yml b/modules/nf-core/merquryfk/merquryfk/environment.yml
new file mode 100644
index 0000000..44a5ee9
--- /dev/null
+++ b/modules/nf-core/merquryfk/merquryfk/environment.yml
@@ -0,0 +1,5 @@
+name: merquryfk_merquryfk
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
diff --git a/modules/nf-core/merquryfk/merquryfk/main.nf b/modules/nf-core/merquryfk/merquryfk/main.nf
new file mode 100644
index 0000000..ac163da
--- /dev/null
+++ b/modules/nf-core/merquryfk/merquryfk/main.nf
@@ -0,0 +1,58 @@
+process MERQURYFK_MERQURYFK {
+    tag "$meta.id"
+    label 'process_medium'
+
+    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+    container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.2'
+
+    input:
+    tuple val(meta), path(fastk_hist), path(fastk_ktab), path(assembly), path(haplotigs)
+
+    output:
+    tuple val(meta), path("${prefix}.completeness.stats") , emit: stats
+    tuple val(meta), path("${prefix}.*_only.bed")         , emit: bed
+    tuple val(meta), path("${prefix}.*.qv")               , emit: assembly_qv
+    tuple val(meta), path("${prefix}.*.spectra-cn.fl.png"), emit: spectra_cn_fl_png,  optional: true
+    tuple val(meta), path("${prefix}.*.spectra-cn.fl.pdf"), emit: spectra_cn_fl_pdf,  optional: true
+    tuple val(meta), path("${prefix}.*.spectra-cn.ln.png"), emit: spectra_cn_ln_png,  optional: true
+    tuple val(meta), path("${prefix}.*.spectra-cn.ln.pdf"), emit: spectra_cn_ln_pdf,  optional: true
+    tuple val(meta), path("${prefix}.*.spectra-cn.st.png"), emit: spectra_cn_st_png,  optional: true
+    tuple val(meta), path("${prefix}.*.spectra-cn.st.pdf"), emit: spectra_cn_st_pdf,  optional: true
+    tuple val(meta), path("${prefix}.qv")                 , emit: qv
+    tuple val(meta), path("${prefix}.spectra-asm.fl.png") , emit: spectra_asm_fl_png, optional: true
+    tuple val(meta), path("${prefix}.spectra-asm.fl.pdf") , emit: spectra_asm_fl_pdf, optional: true
+    tuple val(meta), path("${prefix}.spectra-asm.ln.png") , emit: spectra_asm_ln_png, optional: true
+    tuple val(meta), path("${prefix}.spectra-asm.ln.pdf") , emit: spectra_asm_ln_pdf, optional: true
+    tuple val(meta), path("${prefix}.spectra-asm.st.png") , emit: spectra_asm_st_png, optional: true
+    tuple val(meta), path("${prefix}.spectra-asm.st.pdf") , emit: spectra_asm_st_pdf, optional: true
+    path "versions.yml"                                   , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error "MERQURYFK_MERQURYFK module does not support Conda. Please use Docker / Singularity / Podman instead."
+    }
+    def args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${meta.id}"
+    def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+    def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+    """
+    MerquryFK \\
+        $args \\
+        -T$task.cpus \\
+        ${fastk_ktab.find{ it.toString().endsWith(".ktab") }} \\
+        $assembly \\
+        $haplotigs \\
+        $prefix
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fastk: $FASTK_VERSION
+        merquryfk: $MERQURY_VERSION
+        r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/merquryfk/merquryfk/meta.yml b/modules/nf-core/merquryfk/merquryfk/meta.yml
new file mode 100644
index 0000000..7d4af79
--- /dev/null
+++ b/modules/nf-core/merquryfk/merquryfk/meta.yml
@@ -0,0 +1,112 @@
+name: "merquryfk_merquryfk"
+description: FastK based version of Merqury
+keywords:
+  - Merqury
+  - reference-free
+  - assembly evaluation
+tools:
+  - "merquryfk":
+      description: "FastK based version of Merqury"
+      homepage: "https://github.com/thegenemyers/MERQURY.FK"
+      tool_dev_url: "https://github.com/thegenemyers/MERQURY.FK"
+      licence: "https://github.com/thegenemyers/MERQURY.FK/blob/main/LICENSE"
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - fastk_hist:
+      type: file
+      description: A histogram files from the program FastK
+      pattern: "*.hist"
+  - fastk_ktab:
+      type: file
+      description: Histogram ktab files from the program FastK (option -t)
+      pattern: "*.ktab*"
+  - assembly:
+      type: file
+      description: Genome (primary) assembly files (fasta format)
+      pattern: ".fasta"
+  - haplotigs:
+      type: file
+      description: Assembly haplotigs (fasta format)
+      pattern: ".fasta"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - stats:
+      type: file
+      description: Assembly statistics file
+      pattern: "*.completeness.stats"
+  - bed:
+      type: file
+      description: Assembly only kmer positions not supported by reads in bed format
+      pattern: "*_only.bed"
+  - spectra_cn_fl_png:
+      type: file
+      description: "Unstacked copy number spectra filled plot in PNG format"
+      pattern: "*.spectra-cn.fl.png"
+  - spectra_cn_ln_png:
+      type: file
+      description: "Unstacked copy number spectra line plot in PNG format"
+      pattern: "*.spectra-cn.ln.png"
+  - spectra_cn_st_png:
+      type: file
+      description: "Stacked copy number spectra line plot in PNG format"
+      pattern: "*.spectra-cn.st.png"
+  - spectra_asm_fl_png:
+      type: file
+      description: "Unstacked assembly spectra filled plot in PNG format"
+      pattern: "*.spectra-asm.fl.png"
+  - spectra_asm_ln_png:
+      type: file
+      description: "Unstacked assembly spectra line plot in PNG format"
+      pattern: "*.spectra-asm.ln.png"
+  - spectra_asm_st_png:
+      type: file
+      description: "Stacked assembly spectra line plot in PNG format"
+      pattern: "*.spectra-asm.st.png"
+  - spectra_cn_fl_pdf:
+      type: file
+      description: "Unstacked copy number spectra filled plot in PDF format"
+      pattern: "*.spectra-cn.fl.pdf"
+  - spectra_cn_ln_pdf:
+      type: file
+      description: "Unstacked copy number spectra line plot in PDF format"
+      pattern: "*.spectra-cn.ln.pdf"
+  - spectra_cn_st_pdf:
+      type: file
+      description: "Stacked copy number spectra line plot in PDF format"
+      pattern: "*.spectra-cn.st.pdf"
+  - spectra_asm_fl_pdf:
+      type: file
+      description: "Unstacked assembly spectra filled plot in PDF format"
+      pattern: "*.spectra-asm.fl.pdf"
+  - spectra_asm_ln_pdf:
+      type: file
+      description: "Unstacked assembly spectra line plot in PDF format"
+      pattern: "*.spectra-asm.ln.pdf"
+  - spectra_asm_st_pdf:
+      type: file
+      description: "Stacked assembly spectra line plot in PDF format"
+      pattern: "*.spectra-asm.st.pdf"
+  - assembly_qv:
+      type: file
+      description: "error and qv table for each scaffold of the assembly"
+      pattern: "*.qv"
+  - qv:
+      type: file
+      description: "error and qv of each assembly as a whole"
+      pattern: "*.qv"
+authors:
+  - "@mahesh-panchal"
+maintainers:
+  - "@mahesh-panchal"
diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml
new file mode 100644
index 0000000..41e8fe9
--- /dev/null
+++ b/modules/nf-core/minimap2/align/environment.yml
@@ -0,0 +1,11 @@
+name: minimap2_align
+
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+
+dependencies:
+  - bioconda::htslib=1.20
+  - bioconda::minimap2=2.28
+  - bioconda::samtools=1.20
diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf
new file mode 100644
index 0000000..d82dc14
--- /dev/null
+++ b/modules/nf-core/minimap2/align/main.nf
@@ -0,0 +1,78 @@
+process MINIMAP2_ALIGN {
+    tag "$meta.id"
+    label 'process_high'
+
+    // Note: the versions here need to match the versions used in the mulled container below and minimap2/index
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' :
+        'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }"
+
+    input:
+    tuple val(meta), path(reads)
+    tuple val(meta2), path(reference)
+    val bam_format
+    val bam_index_extension
+    val cigar_paf_format
+    val cigar_bam
+
+    output:
+    tuple val(meta), path("*.paf")                       , optional: true, emit: paf
+    tuple val(meta), path("*.bam")                       , optional: true, emit: bam
+    tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index
+    path "versions.yml"                                  , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args  = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def args3 = task.ext.args3 ?: ''
+    def args4 = task.ext.args4 ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam"
+    def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${bam_index} ${args2}" : "-o ${prefix}.paf"
+    def cigar_paf = cigar_paf_format && !bam_format ? "-c" : ''
+    def set_cigar_bam = cigar_bam && bam_format ? "-L" : ''
+    def bam_input = "${reads.extension}".matches('sam|bam|cram')
+    def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : ''
+    def query = bam_input ? "-" : reads
+    def target = reference ?: (bam_input ? error("BAM input requires reference") : reads)
+
+    """
+    $samtools_reset_fastq \\
+    minimap2 \\
+        $args \\
+        -t $task.cpus \\
+        $target \\
+        $query \\
+        $cigar_paf \\
+        $set_cigar_bam \\
+        $bam_output
+
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        minimap2: \$(minimap2 --version 2>&1)
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf"
+    def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : ""
+    def bam_input = "${reads.extension}".matches('sam|bam|cram')
+    def target = reference ?: (bam_input ? error("BAM input requires reference") : reads)
+
+    """
+    touch $output_file
+    ${bam_index}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        minimap2: \$(minimap2 --version 2>&1)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml
new file mode 100644
index 0000000..8996f88
--- /dev/null
+++ b/modules/nf-core/minimap2/align/meta.yml
@@ -0,0 +1,84 @@
+name: minimap2_align
+description: A versatile pairwise aligner for genomic and spliced nucleotide sequences
+keywords:
+  - align
+  - fasta
+  - fastq
+  - genome
+  - paf
+  - reference
+tools:
+  - minimap2:
+      description: |
+        A versatile pairwise aligner for genomic and spliced nucleotide sequences.
+      homepage: https://github.com/lh3/minimap2
+      documentation: https://github.com/lh3/minimap2#uguide
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input FASTA or FASTQ files of size 1 and 2 for single-end
+        and paired-end data, respectively.
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'test_ref']
+  - reference:
+      type: file
+      description: |
+        Reference database in FASTA format.
+  - bam_format:
+      type: boolean
+      description: Specify that output should be in BAM format
+  - bam_index_extension:
+      type: string
+      description: BAM alignment index extension (e.g. "bai")
+  - cigar_paf_format:
+      type: boolean
+      description: Specify that output CIGAR should be in PAF format
+  - cigar_bam:
+      type: boolean
+      description: |
+        Write CIGAR with >65535 ops at the CG tag. This is recommended when
+        doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations)
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - paf:
+      type: file
+      description: Alignment in PAF format
+      pattern: "*.paf"
+  - bam:
+      type: file
+      description: Alignment in BAM format
+      pattern: "*.bam"
+  - index:
+      type: file
+      description: BAM alignment index
+      pattern: "*.bam.*"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@heuermh"
+  - "@sofstam"
+  - "@sateeshperi"
+  - "@jfy133"
+  - "@fellen31"
+maintainers:
+  - "@heuermh"
+  - "@sofstam"
+  - "@sateeshperi"
+  - "@jfy133"
+  - "@fellen31"
diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test
new file mode 100644
index 0000000..4072c17
--- /dev/null
+++ b/modules/nf-core/minimap2/align/tests/main.nf.test
@@ -0,0 +1,441 @@
+nextflow_process {
+
+    name "Test Process MINIMAP2_ALIGN"
+    script "../main.nf"
+    process "MINIMAP2_ALIGN"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "minimap2"
+    tag "minimap2/align"
+
+    test("sarscov2 - fastq, fasta, true, [], false, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    bam(process.out.bam[0][1]).getHeader(),
+                    bam(process.out.bam[0][1]).getReadsMD5(),
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - fastq, fasta, true, 'bai', false, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = 'bai'
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    bam(process.out.bam[0][1]).getHeader(),
+                    bam(process.out.bam[0][1]).getReadsMD5(),
+                    file(process.out.index[0][1]).name,
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - [fastq1, fastq2], fasta, true, false, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+                    ]
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    bam(process.out.bam[0][1]).getHeader(),
+                    bam(process.out.bam[0][1]).getReadsMD5(),
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - fastq, [], true, false, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    []
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    bam(process.out.bam[0][1]).getHeader(),
+                    bam(process.out.bam[0][1]).getReadsMD5(),
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - bam, fasta, true, [], false, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    bam(process.out.bam[0][1]).getHeader(),
+                    bam(process.out.bam[0][1]).getReadsMD5(),
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - bam, fasta, true, 'bai', false, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = 'bai'
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    bam(process.out.bam[0][1]).getHeader(),
+                    bam(process.out.bam[0][1]).getReadsMD5(),
+                    file(process.out.index[0][1]).name,
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - bam, [], true, false, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    []
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.failed }
+            )
+        }
+
+    }
+
+    test("sarscov2 - fastq, fasta, true, [], false, false - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - fastq, fasta, true, 'bai', false, false - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = 'bai'
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - fastq, fasta, false, [], false, false - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = false
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - bam, fasta, true, [], false, false - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - bam, fasta, true, 'bai', false, false - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = 'bai'
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - bam, [], true, false, false - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    []
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.failed }
+            )
+        }
+
+    }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap
new file mode 100644
index 0000000..12264a8
--- /dev/null
+++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap
@@ -0,0 +1,476 @@
+{
+    "sarscov2 - bam, fasta, true, 'bai', false, false": {
+        "content": [
+            [
+                "@HD\tVN:1.6\tSO:coordinate",
+                "@SQ\tSN:MT192765.1\tLN:29829",
+                "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -",
+                "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index"
+            ],
+            "5d426b9a5f5b2c54f1d7f1e4c238ae94",
+            "test.bam.bai",
+            [
+                "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-25T09:03:00.827260362"
+    },
+    "sarscov2 - bam, fasta, true, 'bai', false, false - stub": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "3": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "index": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "paf": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-23T11:21:37.92353539"
+    },
+    "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "3": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "index": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "paf": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-06-03T11:29:44.669021368"
+    },
+    "sarscov2 - fastq, fasta, false, [], false, false - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ],
+                "bam": [
+                    
+                ],
+                "index": [
+                    
+                ],
+                "paf": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-06-03T11:15:52.738781039"
+    },
+    "sarscov2 - fastq, fasta, true, [], false, false - stub": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "index": [
+                    
+                ],
+                "paf": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-06-03T11:15:23.033808223"
+    },
+    "sarscov2 - [fastq1, fastq2], fasta, true, false, false": {
+        "content": [
+            [
+                "@HD\tVN:1.6\tSO:coordinate",
+                "@SQ\tSN:MT192765.1\tLN:29829",
+                "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz",
+                "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+            ],
+            "1bc392244f228bf52cf0b5a8f6a654c9",
+            [
+                "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-23T11:18:18.964586894"
+    },
+    "sarscov2 - fastq, fasta, true, [], false, false": {
+        "content": [
+            [
+                "@HD\tVN:1.6\tSO:coordinate",
+                "@SQ\tSN:MT192765.1\tLN:29829",
+                "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz",
+                "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+            ],
+            "f194745c0ccfcb2a9c0aee094a08750",
+            [
+                "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-23T11:17:48.667488325"
+    },
+    "sarscov2 - fastq, fasta, true, 'bai', false, false": {
+        "content": [
+            [
+                "@HD\tVN:1.6\tSO:coordinate",
+                "@SQ\tSN:MT192765.1\tLN:29829",
+                "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz",
+                "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index"
+            ],
+            "f194745c0ccfcb2a9c0aee094a08750",
+            "test.bam.bai",
+            [
+                "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-23T11:18:02.517416733"
+    },
+    "sarscov2 - bam, fasta, true, [], false, false": {
+        "content": [
+            [
+                "@HD\tVN:1.6\tSO:coordinate",
+                "@SQ\tSN:MT192765.1\tLN:29829",
+                "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -",
+                "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+            ],
+            "5d426b9a5f5b2c54f1d7f1e4c238ae94",
+            [
+                "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-25T09:02:49.64829488"
+    },
+    "sarscov2 - bam, fasta, true, [], false, false - stub": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "index": [
+                    
+                ],
+                "paf": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-23T11:21:22.162291795"
+    },
+    "sarscov2 - fastq, [], true, false, false": {
+        "content": [
+            [
+                "@HD\tVN:1.6\tSO:coordinate",
+                "@SQ\tSN:ERR5069949.2151832\tLN:150",
+                "@SQ\tSN:ERR5069949.576388\tLN:77",
+                "@SQ\tSN:ERR5069949.501486\tLN:146",
+                "@SQ\tSN:ERR5069949.1331889\tLN:132",
+                "@SQ\tSN:ERR5069949.2161340\tLN:80",
+                "@SQ\tSN:ERR5069949.973930\tLN:79",
+                "@SQ\tSN:ERR5069949.2417063\tLN:150",
+                "@SQ\tSN:ERR5069949.376959\tLN:151",
+                "@SQ\tSN:ERR5069949.1088785\tLN:149",
+                "@SQ\tSN:ERR5069949.1066259\tLN:147",
+                "@SQ\tSN:ERR5069949.2832676\tLN:139",
+                "@SQ\tSN:ERR5069949.2953930\tLN:151",
+                "@SQ\tSN:ERR5069949.324865\tLN:151",
+                "@SQ\tSN:ERR5069949.2185111\tLN:150",
+                "@SQ\tSN:ERR5069949.937422\tLN:151",
+                "@SQ\tSN:ERR5069949.2431709\tLN:150",
+                "@SQ\tSN:ERR5069949.1246538\tLN:148",
+                "@SQ\tSN:ERR5069949.1189252\tLN:98",
+                "@SQ\tSN:ERR5069949.2216307\tLN:147",
+                "@SQ\tSN:ERR5069949.3273002\tLN:148",
+                "@SQ\tSN:ERR5069949.3277445\tLN:151",
+                "@SQ\tSN:ERR5069949.3022231\tLN:147",
+                "@SQ\tSN:ERR5069949.184542\tLN:151",
+                "@SQ\tSN:ERR5069949.540529\tLN:149",
+                "@SQ\tSN:ERR5069949.686090\tLN:150",
+                "@SQ\tSN:ERR5069949.2787556\tLN:106",
+                "@SQ\tSN:ERR5069949.2650879\tLN:150",
+                "@SQ\tSN:ERR5069949.2064910\tLN:149",
+                "@SQ\tSN:ERR5069949.2328704\tLN:150",
+                "@SQ\tSN:ERR5069949.1067032\tLN:150",
+                "@SQ\tSN:ERR5069949.3338256\tLN:151",
+                "@SQ\tSN:ERR5069949.1412839\tLN:147",
+                "@SQ\tSN:ERR5069949.1538968\tLN:150",
+                "@SQ\tSN:ERR5069949.147998\tLN:94",
+                "@SQ\tSN:ERR5069949.366975\tLN:106",
+                "@SQ\tSN:ERR5069949.1372331\tLN:151",
+                "@SQ\tSN:ERR5069949.1709367\tLN:129",
+                "@SQ\tSN:ERR5069949.2388984\tLN:150",
+                "@SQ\tSN:ERR5069949.1132353\tLN:150",
+                "@SQ\tSN:ERR5069949.1151736\tLN:151",
+                "@SQ\tSN:ERR5069949.479807\tLN:150",
+                "@SQ\tSN:ERR5069949.2176303\tLN:151",
+                "@SQ\tSN:ERR5069949.2772897\tLN:151",
+                "@SQ\tSN:ERR5069949.1020777\tLN:122",
+                "@SQ\tSN:ERR5069949.465452\tLN:151",
+                "@SQ\tSN:ERR5069949.1704586\tLN:149",
+                "@SQ\tSN:ERR5069949.1258508\tLN:151",
+                "@SQ\tSN:ERR5069949.986441\tLN:119",
+                "@SQ\tSN:ERR5069949.2674295\tLN:148",
+                "@SQ\tSN:ERR5069949.885966\tLN:79",
+                "@SQ\tSN:ERR5069949.2342766\tLN:151",
+                "@SQ\tSN:ERR5069949.3122970\tLN:127",
+                "@SQ\tSN:ERR5069949.3279513\tLN:72",
+                "@SQ\tSN:ERR5069949.309410\tLN:151",
+                "@SQ\tSN:ERR5069949.532979\tLN:149",
+                "@SQ\tSN:ERR5069949.2888794\tLN:151",
+                "@SQ\tSN:ERR5069949.2205229\tLN:150",
+                "@SQ\tSN:ERR5069949.786562\tLN:151",
+                "@SQ\tSN:ERR5069949.919671\tLN:151",
+                "@SQ\tSN:ERR5069949.1328186\tLN:151",
+                "@SQ\tSN:ERR5069949.870926\tLN:149",
+                "@SQ\tSN:ERR5069949.2257580\tLN:151",
+                "@SQ\tSN:ERR5069949.3249622\tLN:77",
+                "@SQ\tSN:ERR5069949.611123\tLN:125",
+                "@SQ\tSN:ERR5069949.651338\tLN:142",
+                "@SQ\tSN:ERR5069949.169513\tLN:92",
+                "@SQ\tSN:ERR5069949.155944\tLN:150",
+                "@SQ\tSN:ERR5069949.2033605\tLN:150",
+                "@SQ\tSN:ERR5069949.2730382\tLN:142",
+                "@SQ\tSN:ERR5069949.2125592\tLN:150",
+                "@SQ\tSN:ERR5069949.1062611\tLN:151",
+                "@SQ\tSN:ERR5069949.1778133\tLN:151",
+                "@SQ\tSN:ERR5069949.3057020\tLN:95",
+                "@SQ\tSN:ERR5069949.2972968\tLN:141",
+                "@SQ\tSN:ERR5069949.2734474\tLN:149",
+                "@SQ\tSN:ERR5069949.856527\tLN:151",
+                "@SQ\tSN:ERR5069949.2098070\tLN:151",
+                "@SQ\tSN:ERR5069949.1552198\tLN:150",
+                "@SQ\tSN:ERR5069949.2385514\tLN:150",
+                "@SQ\tSN:ERR5069949.2270078\tLN:151",
+                "@SQ\tSN:ERR5069949.114870\tLN:150",
+                "@SQ\tSN:ERR5069949.2668880\tLN:147",
+                "@SQ\tSN:ERR5069949.257821\tLN:139",
+                "@SQ\tSN:ERR5069949.2243023\tLN:150",
+                "@SQ\tSN:ERR5069949.2605155\tLN:146",
+                "@SQ\tSN:ERR5069949.1340552\tLN:151",
+                "@SQ\tSN:ERR5069949.1561137\tLN:150",
+                "@SQ\tSN:ERR5069949.2361683\tLN:149",
+                "@SQ\tSN:ERR5069949.2521353\tLN:150",
+                "@SQ\tSN:ERR5069949.1261808\tLN:149",
+                "@SQ\tSN:ERR5069949.2734873\tLN:98",
+                "@SQ\tSN:ERR5069949.3017828\tLN:107",
+                "@SQ\tSN:ERR5069949.573706\tLN:150",
+                "@SQ\tSN:ERR5069949.1980512\tLN:151",
+                "@SQ\tSN:ERR5069949.1014693\tLN:150",
+                "@SQ\tSN:ERR5069949.3184655\tLN:150",
+                "@SQ\tSN:ERR5069949.29668\tLN:89",
+                "@SQ\tSN:ERR5069949.3258358\tLN:151",
+                "@SQ\tSN:ERR5069949.1476386\tLN:151",
+                "@SQ\tSN:ERR5069949.2415814\tLN:150",
+                "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz",
+                "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+            ],
+            "16c1c651f8ec67383bcdee3c55aed94f",
+            [
+                "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-23T11:18:34.246998277"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/minimap2/align/tests/tags.yml b/modules/nf-core/minimap2/align/tests/tags.yml
new file mode 100644
index 0000000..39dba37
--- /dev/null
+++ b/modules/nf-core/minimap2/align/tests/tags.yml
@@ -0,0 +1,2 @@
+minimap2/align:
+  - "modules/nf-core/minimap2/align/**"
diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml
new file mode 100644
index 0000000..cd366d6
--- /dev/null
+++ b/modules/nf-core/samtools/merge/environment.yml
@@ -0,0 +1,8 @@
+name: samtools_merge
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::samtools=1.20
+  - bioconda::htslib=1.20
diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf
new file mode 100644
index 0000000..693b1d8
--- /dev/null
+++ b/modules/nf-core/samtools/merge/main.nf
@@ -0,0 +1,61 @@
+process SAMTOOLS_MERGE {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' :
+        'biocontainers/samtools:1.20--h50ea8bc_0' }"
+
+    input:
+    tuple val(meta), path(input_files, stageAs: "?/*")
+    tuple val(meta2), path(fasta)
+    tuple val(meta3), path(fai)
+
+    output:
+    tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam
+    tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram
+    tuple val(meta), path("*.csi")         , optional:true, emit: csi
+    tuple val(meta), path("*.crai")        , optional:true, emit: crai
+    path  "versions.yml"                                  , emit: versions
+
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args   ?: ''
+    prefix   = task.ext.prefix ?: "${meta.id}"
+    def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension()
+    def reference = fasta ? "--reference ${fasta}" : ""
+    """
+    samtools \\
+        merge \\
+        --threads ${task.cpus-1} \\
+        $args \\
+        ${reference} \\
+        ${prefix}.${file_type} \\
+        $input_files
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args   ?: ''
+    prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}"
+    def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension()
+    def index_type = file_type == "bam" ? "csi" : "crai"
+    def index = args.contains("--write-index") ? "touch ${prefix}.${index_type}" : ""
+    """
+    touch ${prefix}.${file_type}
+    ${index}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml
new file mode 100644
index 0000000..2e8f3db
--- /dev/null
+++ b/modules/nf-core/samtools/merge/meta.yml
@@ -0,0 +1,83 @@
+name: samtools_merge
+description: Merge BAM or CRAM file
+keywords:
+  - merge
+  - bam
+  - sam
+  - cram
+tools:
+  - samtools:
+      description: |
+        SAMtools is a set of utilities for interacting with and post-processing
+        short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+        These files are generated as output by short read aligners like BWA.
+      homepage: http://www.htslib.org/
+      documentation: http://www.htslib.org/doc/samtools.html
+      doi: 10.1093/bioinformatics/btp352
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - input_files:
+      type: file
+      description: BAM/CRAM file
+      pattern: "*.{bam,cram,sam}"
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'genome' ]
+  - fasta:
+      type: file
+      description: Reference file the CRAM was created with (optional)
+      pattern: "*.{fasta,fa}"
+  - meta3:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'genome' ]
+  - fai:
+      type: file
+      description: Index of the reference file the CRAM was created with (optional)
+      pattern: "*.fai"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM file
+      pattern: "*.{bam}"
+  - cram:
+      type: file
+      description: CRAM file
+      pattern: "*.{cram}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - csi:
+      type: file
+      description: BAM index file (optional)
+      pattern: "*.csi"
+  - crai:
+      type: file
+      description: CRAM index file (optional)
+      pattern: "*.crai"
+authors:
+  - "@drpatelh"
+  - "@yuukiiwa "
+  - "@maxulysse"
+  - "@FriederikeHanssen"
+  - "@ramprasadn"
+maintainers:
+  - "@drpatelh"
+  - "@yuukiiwa "
+  - "@maxulysse"
+  - "@FriederikeHanssen"
+  - "@ramprasadn"
diff --git a/modules/nf-core/samtools/merge/tests/index.config b/modules/nf-core/samtools/merge/tests/index.config
new file mode 100644
index 0000000..8c5668c
--- /dev/null
+++ b/modules/nf-core/samtools/merge/tests/index.config
@@ -0,0 +1,3 @@
+process {
+    ext.args = "--write-index"
+}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test b/modules/nf-core/samtools/merge/tests/main.nf.test
new file mode 100644
index 0000000..40b36e8
--- /dev/null
+++ b/modules/nf-core/samtools/merge/tests/main.nf.test
@@ -0,0 +1,137 @@
+nextflow_process {
+
+    name "Test Process SAMTOOLS_MERGE"
+    script "../main.nf"
+    process "SAMTOOLS_MERGE"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "samtools"
+    tag "samtools/merge"
+
+    test("bams") {
+
+        config "./index.config"
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ]
+                ])
+                input[1] = [[],[]]
+                input[2] = [[],[]]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(file(process.out.bam[0][1]).name).match("bams_bam") },
+                { assert snapshot(process.out.cram).match("bams_cram") },
+                { assert snapshot(file(process.out.csi[0][1]).name).match("bams_csi") },
+                { assert snapshot(process.out.crai).match("bams_crai") },
+                { assert snapshot(process.out.versions).match("bams_versions") }
+            )
+        }
+    }
+
+    test("crams") {
+
+        config "./index.config"
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true) ]
+                ])
+                input[1] = Channel.of([
+                    [ id:'genome' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+                ])
+                input[2] = Channel.of([
+                    [ id:'genome' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(file(process.out.cram[0][1]).name).match("crams_cram") },
+                { assert snapshot(process.out.bam).match("crams_bam") },
+                { assert snapshot(file(process.out.crai[0][1]).name).match("crams_crai") },
+                { assert snapshot(process.out.csi).match("crams_csi") },
+                { assert snapshot(process.out.versions).match("crams_versions") }
+            )
+        }
+    }
+
+    test("bam") {
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true) ]
+                ])
+                input[1] = [[],[]]
+                input[2] = [[],[]]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") },
+                { assert snapshot(process.out.cram).match("bam_cram") },
+                { assert snapshot(process.out.crai).match("bam_crai") },
+                { assert snapshot(process.out.csi).match("bam_csi") },
+                { assert snapshot(process.out.versions).match("bam_versions") }
+            )
+        }
+    }
+
+    test("bams_stub") {
+
+        config "./index.config"
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ]
+                ])
+                input[1] = [[],[]]
+                input[2] = [[],[]]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(file(process.out.bam[0][1]).name).match("bams_stub_bam") },
+                { assert snapshot(process.out.cram).match("bams_stub_cram") },
+                { assert snapshot(file(process.out.csi[0][1]).name).match("bams_stub_csi") },
+                { assert snapshot(process.out.crai).match("bams_stub_crai") },
+                { assert snapshot(process.out.versions).match("bams_stub_versions") }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap
new file mode 100644
index 0000000..17bc846
--- /dev/null
+++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap
@@ -0,0 +1,228 @@
+{
+    "crams_cram": {
+        "content": [
+            "test.cram"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:50:00.647389"
+    },
+    "bams_stub_cram": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:50:19.937013"
+    },
+    "bams_crai": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:49:24.928616"
+    },
+    "bams_bam": {
+        "content": [
+            "test.bam"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:49:24.923289"
+    },
+    "bams_cram": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:49:24.925716"
+    },
+    "crams_csi": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:50:00.655959"
+    },
+    "bam_bam": {
+        "content": [
+            "test.bam"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:50:10.319539"
+    },
+    "bam_versions": {
+        "content": [
+            [
+                "versions.yml:md5,84dab54b9812780df48f5cecef690c34"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-05-28T15:46:35.851936597"
+    },
+    "bams_csi": {
+        "content": [
+            "test.bam.csi"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:49:24.92719"
+    },
+    "bams_stub_csi": {
+        "content": [
+            "test.csi"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:50:19.940498"
+    },
+    "bam_crai": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:50:10.328852"
+    },
+    "bams_stub_versions": {
+        "content": [
+            [
+                "versions.yml:md5,84dab54b9812780df48f5cecef690c34"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-05-28T15:46:41.405707643"
+    },
+    "bam_cram": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:50:10.324219"
+    },
+    "bams_stub_bam": {
+        "content": [
+            "test.bam"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:50:19.933153"
+    },
+    "bams_versions": {
+        "content": [
+            [
+                "versions.yml:md5,84dab54b9812780df48f5cecef690c34"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-05-28T15:45:51.695689923"
+    },
+    "crams_bam": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:50:00.650652"
+    },
+    "crams_versions": {
+        "content": [
+            [
+                "versions.yml:md5,84dab54b9812780df48f5cecef690c34"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-05-28T15:46:30.185392319"
+    },
+    "bam_csi": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:50:10.33292"
+    },
+    "crams_crai": {
+        "content": [
+            "test.cram.crai"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:50:00.653512"
+    },
+    "bams_stub_crai": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.3"
+        },
+        "timestamp": "2024-02-12T18:50:19.943839"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/merge/tests/tags.yml b/modules/nf-core/samtools/merge/tests/tags.yml
new file mode 100644
index 0000000..b869abc
--- /dev/null
+++ b/modules/nf-core/samtools/merge/tests/tags.yml
@@ -0,0 +1,2 @@
+samtools/merge:
+  - "modules/nf-core/samtools/merge/**"
diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml
new file mode 100644
index 0000000..36a12ea
--- /dev/null
+++ b/modules/nf-core/samtools/sort/environment.yml
@@ -0,0 +1,8 @@
+name: samtools_sort
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::samtools=1.20
+  - bioconda::htslib=1.20
diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf
new file mode 100644
index 0000000..8e01909
--- /dev/null
+++ b/modules/nf-core/samtools/sort/main.nf
@@ -0,0 +1,73 @@
+process SAMTOOLS_SORT {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' :
+        'biocontainers/samtools:1.20--h50ea8bc_0' }"
+
+    input:
+    tuple val(meta) , path(bam)
+    tuple val(meta2), path(fasta)
+
+    output:
+    tuple val(meta), path("*.bam"),     emit: bam,  optional: true
+    tuple val(meta), path("*.cram"),    emit: cram, optional: true
+    tuple val(meta), path("*.crai"),    emit: crai, optional: true
+    tuple val(meta), path("*.csi"),     emit: csi,  optional: true
+    path  "versions.yml"          , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def extension = args.contains("--output-fmt sam") ? "sam" :
+                    args.contains("--output-fmt cram") ? "cram" :
+                    "bam"
+    def reference = fasta ? "--reference ${fasta}" : ""
+    if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+
+    """
+    samtools cat \\
+        --threads $task.cpus \\
+        ${bam} \\
+    | \\
+    samtools sort \\
+        $args \\
+        -T ${prefix} \\
+        --threads $task.cpus \\
+        ${reference} \\
+        -o ${prefix}.${extension} \\
+        -
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def extension = args.contains("--output-fmt sam") ? "sam" :
+                    args.contains("--output-fmt cram") ? "cram" :
+                    "bam"
+    """
+    touch ${prefix}.${extension}
+    if [ "${extension}" == "bam" ];
+    then
+        touch ${prefix}.${extension}.csi
+    elif [ "${extension}" == "cram" ];
+    then
+        touch ${prefix}.${extension}.crai
+    fi
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml
new file mode 100644
index 0000000..341a7d0
--- /dev/null
+++ b/modules/nf-core/samtools/sort/meta.yml
@@ -0,0 +1,71 @@
+name: samtools_sort
+description: Sort SAM/BAM/CRAM file
+keywords:
+  - sort
+  - bam
+  - sam
+  - cram
+tools:
+  - samtools:
+      description: |
+        SAMtools is a set of utilities for interacting with and post-processing
+        short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+        These files are generated as output by short read aligners like BWA.
+      homepage: http://www.htslib.org/
+      documentation: http://www.htslib.org/doc/samtools.html
+      doi: 10.1093/bioinformatics/btp352
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM/CRAM/SAM file(s)
+      pattern: "*.{bam,cram,sam}"
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'genome' ]
+  - fasta:
+      type: file
+      description: Reference genome FASTA file
+      pattern: "*.{fa,fasta,fna}"
+      optional: true
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: Sorted BAM file
+      pattern: "*.{bam}"
+  - cram:
+      type: file
+      description: Sorted CRAM file
+      pattern: "*.{cram}"
+  - crai:
+      type: file
+      description: CRAM index file (optional)
+      pattern: "*.crai"
+  - csi:
+      type: file
+      description: BAM index file (optional)
+      pattern: "*.csi"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@drpatelh"
+  - "@ewels"
+  - "@matthdsm"
+maintainers:
+  - "@drpatelh"
+  - "@ewels"
+  - "@matthdsm"
diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test
new file mode 100644
index 0000000..c2ea9c7
--- /dev/null
+++ b/modules/nf-core/samtools/sort/tests/main.nf.test
@@ -0,0 +1,128 @@
+nextflow_process {
+
+    name "Test Process SAMTOOLS_SORT"
+    script "../main.nf"
+    process "SAMTOOLS_SORT"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "samtools"
+    tag "samtools/sort"
+
+    test("bam") {
+
+        config "./nextflow.config"
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+                ])
+                input[1] = Channel.of([
+                    [ id:'fasta' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(
+                        process.out.bam,
+                        process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } },
+                        process.out.versions
+                ).match()}
+            )
+        }
+    }
+
+    test("cram") {
+
+        config "./nextflow_cram.config"
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true)
+                ])
+                input[1] = Channel.of([
+                    [ id:'fasta' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(
+                    process.out.cram.collect { it.collect { it instanceof Map ? it : file(it).name } },
+                    process.out.crai.collect { it.collect { it instanceof Map ? it : file(it).name } },
+                    process.out.versions
+                ).match()}
+            )
+        }
+    }
+
+    test("bam - stub") {
+
+        options "-stub"
+        config "./nextflow.config"
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+                ])
+                input[1] = Channel.of([
+                    [ id:'fasta' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("cram - stub") {
+
+        options "-stub"
+        config "./nextflow_cram.config"
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true)
+                ])
+                input[1] = Channel.of([
+                    [ id:'fasta' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap
new file mode 100644
index 0000000..da38d5d
--- /dev/null
+++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap
@@ -0,0 +1,192 @@
+{
+    "cram": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.sorted.cram"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.sorted.cram.crai"
+                ]
+            ],
+            [
+                "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T17:19:37.196205"
+    },
+    "bam - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "4": [
+                    "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062"
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "crai": [
+                    
+                ],
+                "cram": [
+                    
+                ],
+                "csi": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T15:54:46.580756"
+    },
+    "cram - stub": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "3": [
+                    
+                ],
+                "4": [
+                    "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062"
+                ],
+                "bam": [
+                    
+                ],
+                "crai": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "cram": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "csi": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T15:57:30.505698"
+    },
+    "bam": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.sorted.bam:md5,21c992d59615936b99f2ad008aa54400"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.sorted.bam.csi"
+                ]
+            ],
+            [
+                "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T15:54:25.872954"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config
new file mode 100644
index 0000000..f642771
--- /dev/null
+++ b/modules/nf-core/samtools/sort/tests/nextflow.config
@@ -0,0 +1,8 @@
+process {
+
+    withName: SAMTOOLS_SORT {
+        ext.prefix  = { "${meta.id}.sorted" }
+        ext.args    = "--write-index"
+    }
+
+}
diff --git a/modules/nf-core/samtools/sort/tests/nextflow_cram.config b/modules/nf-core/samtools/sort/tests/nextflow_cram.config
new file mode 100644
index 0000000..3a8c018
--- /dev/null
+++ b/modules/nf-core/samtools/sort/tests/nextflow_cram.config
@@ -0,0 +1,8 @@
+process {
+
+    withName: SAMTOOLS_SORT {
+        ext.prefix  = { "${meta.id}.sorted" }
+        ext.args    = "--write-index --output-fmt cram"
+    }
+
+}
diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml
new file mode 100644
index 0000000..cd63ea2
--- /dev/null
+++ b/modules/nf-core/samtools/sort/tests/tags.yml
@@ -0,0 +1,3 @@
+samtools/sort:
+  - modules/nf-core/samtools/sort/**
+  - tests/modules/nf-core/samtools/sort/**
diff --git a/nextflow.config b/nextflow.config
index fc630a6..3fb2d75 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -11,14 +11,7 @@ params {
 
     // TODO nf-core: Specify your pipeline's command line flags
     // Input options
-    input                      = null
-
-    // MultiQC options
-    multiqc_config             = null
-    multiqc_title              = null
-    multiqc_logo               = null
-    max_multiqc_email_size     = '25.MB'
-    multiqc_methods_description = null
+    input                        = null
 
     // Boilerplate options
     outdir                       = null
@@ -268,3 +261,25 @@ def check_max(obj, type) {
         }
     }
 }
+
+/**
+ * Returns a channel with the path if it's defined, otherwise returns a default channel.
+ *
+ * @param path             The path to include into the channel
+ * @param default_channel  A channel to use as the default if no path is defined.
+ * @return                 A channel with a path, or the default channel
+ */
+def readWithDefault( String path, Object default_channel ) {
+    path ? Channel.fromPath( path, checkIfExists: true ) : default_channel
+}
+
+/**
+ * Returns a channel with the file defined by the path resolved against the directory.
+ *
+ * @param path  The path of the file relative to the directory in dir
+ * @param dir   A channel with a directory.
+ * @return      A channel with a path relative to the dir path
+ */
+def resolveFileFromDir ( String path, Object dir ){
+    dir.map{ results -> file( results.resolve( path ) ) }
+}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b0fd6d6..55b26b3 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -17,11 +17,11 @@
                     "format": "file-path",
                     "exists": true,
                     "schema": "assets/schema_input.json",
-                    "mimetype": "text/csv",
-                    "pattern": "^\\S+\\.csv$",
-                    "description": "Path to comma-separated file containing information about the samples in the experiment.",
+                    "mimetype": "text/yaml",
+                    "pattern": "^\\S+\\.yaml$",
+                    "description": "Path to yaml file containing information about the samples in the experiment.",
                     "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.",
-                    "fa_icon": "fas fa-file-csv"
+                    "fa_icon": "fas fa-file-yaml"
                 },
                 "outdir": {
                     "type": "string",
diff --git a/workflows/ear.nf b/workflows/ear.nf
index 67d3ef6..b35a949 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -4,12 +4,20 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { FASTQC                 } from '../modules/nf-core/fastqc/main'
-include { MULTIQC                } from '../modules/nf-core/multiqc/main'
-include { paramsSummaryMap       } from 'plugin/nf-validation'
-include { paramsSummaryMultiqc   } from '../subworkflows/nf-core/utils_nfcore_pipeline'
-include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
-include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_ear_pipeline'
+include { NEXTFLOW_RUN as CURATIONPRETEXT   } from '../modules/local/nextflow/run'
+include { NEXTFLOW_RUN as BLOBTOOLKIT       } from '../modules/local/nextflow/run'
+
+include { YAML_INPUT                        } from '../subworkflows/local/yaml_input'
+include { GENERATE_SAMPLESHEET              } from '../modules/local/generate_samplesheet'
+include { GFASTATS                          } from '../modules/nf-core/gfastats/main'
+include { PE_MAPPING                        } from '../subworkflows/local/pe_mapping'
+include { SE_MAPPING                        } from '../subworkflows/local/se_mapping'
+include { SAMTOOLS_SORT                     } from '../modules/nf-core/samtools/sort/main'
+
+include { paramsSummaryMap                  } from 'plugin/nf-validation'
+include { paramsSummaryMultiqc              } from '../subworkflows/nf-core/utils_nfcore_pipeline'
+include { softwareVersionsToYAML            } from '../subworkflows/nf-core/utils_nfcore_pipeline'
+include { methodsDescriptionText            } from '../subworkflows/local/utils_nfcore_ear_pipeline'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -20,21 +28,180 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_ear_
 workflow EAR {
 
     take:
-    ch_samplesheet // channel: samplesheet read in from --input
+    ch_input
 
     main:
 
-    ch_versions = Channel.empty()
-    ch_multiqc_files = Channel.empty()
+    ch_versions     = Channel.empty()
+    ch_align_bam    = Channel.empty()
+
+    //
+    // MODULE: YAML_INPUT
+    //
+    YAML_INPUT(ch_input)
+    reference = YAML_INPUT.out.reference
+    reference.view()
+
+    //
+    // MODULE: Run Sanger-ToL/CurationPretext
+    //         - This was built using: https://github.com/mahesh-panchal/nf-cascade
+    //
+    CURATIONPRETEXT(
+        "sanger-tol/curationpretext",
+        [
+            "-r 1.0.0",
+            "--input",
+            reference,
+            "--longread",
+            YAML_INPUT.out.longread_dir,
+            "--cram",
+            YAML_INPUT.out.cpretext_hic_dir,
+            "$params.outdir/curationpretext",
+            "-profile singularity,sanger"
+        ].join(" ").trim(),                                            // workflow opts
+        Channel.value([]),  //readWithDefault( params.demo.params_file, Channel.value([]) ), // params file
+        Channel.value([]),  // samplesheet - not used by this pipeline
+        Channel.value([])   //readWithDefault( params.demo.add_config, Channel.value([]) ),  // custom config
+
+    )
+
+    //
+    // MODULE: ASSEMBLY STATISTICS FOR THE FASTA
+    //
+    GFASTATS(
+        YAML_INPUT.out.reference,
+        "fasta",
+        [],
+        [],
+        [],
+        [],
+        [],
+        []
+    )
+
+    // //
+    // // LOGIC:  REFORMAT A BUNCH OF CHANNELS FOR MERQUERYFK
+    // //
+    // YAML_INPUT.out.reference
+    //     .combine()
+    //     .combine()
+    //     .combine()
+    //     .map{ meta, primary, haplotigs, fastk_hist, fastk_ktab ->
+    //         tuple(  meta,
+    //                 fastk_hist,
+    //                 fastk_ktab,
+    //                 primary,
+    //                 haplotigs
+    //         )
+    //     }
+    //     .set { merquryfk_input }
+
+    // //
+    // // MODULE: MERQURYFK PLOTS OF GENOME
+    // //
+
+    // MERQURYFK(
+    //     merquryfk_input
+    // )
+
+    //
+    // LOGIC: SANGER-TOL/BLOBTOOLKIT expects the pacbio data to be already mapped
+    //
+    platform = YAML_INPUT.out.longread_type
+
+    YAML_INPUT.out.sample_id
+        .combine(YAML_INPUT.out.longread_dir)
+        .set {pacbio_tuple}
+
+    if ( platform.filter { it == "hifi" } || platform.filter { it == "clr" } || platform.filter { it == "ont" } ) {
+        //
+        // SUBWORKFLOW: SINGLE END MAPPING FOR ALIGNING LONGREAD DATA
+        //
+        SE_MAPPING (
+            YAML_INPUT.out.reference,
+            pacbio_tuple,
+            platform
+        )
+        ch_versions = ch_versions.mix(SE_MAPPING.out.versions)
+
+        ch_align_bam
+            .mix( SE_MAPPING.out.mapped_bam )
+            .set { merged_bam }
+    }
+    else if ( platform.filter { it == "illumina" } ) {
+        //
+        // SUBWORKFLOW: PAIRED END MAPPING FOR ALIGNING LONGREAD DATA
+        //
+        PE_MAPPING  (
+            YAML_INPUT.out.reference,
+            pacbio_tuple,
+            platform
+        )
+        ch_versions = ch_versions.mix(PE_MAPPING.out.versions)
+
+        ch_align_bam
+            .mix( PE_MAPPING.out.mapped_bam )
+            .set { merged_bam }
+    }
 
     //
-    // MODULE: Run FastQC
+    // MODULE: SORT MAPPED BAM
     //
-    FASTQC (
-        ch_samplesheet
+    SAMTOOLS_SORT (
+        merged_bam,
+        YAML_INPUT.out.reference
+    )
+    ch_versions = ch_versions.mix( SAMTOOLS_SORT.out.versions )
+
+    //
+    // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline
+    //
+    YAML_INPUT.out.sample_id
+        .combine(merged_bam)
+        .map{ sample_id, pacbio_path ->
+            tuple(  [id: sample_id],
+                    pacbio_path
+            )
+        }
+        .set { samplesheet_input }
+
+
+    GENERATE_SAMPLESHEET(
+        samplesheet_input
     )
-    ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]})
-    ch_versions = ch_versions.mix(FASTQC.out.versions.first())
+
+    //
+    // MODULE: Run Sanger-ToL/BlobToolKit
+    //         - This was built using: https://github.com/mahesh-panchal/nf-cascade
+    //
+    // BLOBTOOLKIT(
+    //     "sanger-tol/blobtoolkit",
+    //     [
+    //         "-r 0.4.0",
+    //         "--input",
+    //         GENERATE_SAMPLESHEET.out.csv,
+    //         "--fasta",
+    //         reference,
+    //         "--accession",
+    //         YAML_INPUT.out.btk_gca_accession,
+    //         "-taxon",
+    //         YAML_INPUT.out.btk_taxid,
+    //         "--taxdump",
+    //         YAML_INPUT.out.btk_ncbi_taxonomy_path,
+    //         "--blastp",
+    //         YAML_INPUT.out.btk_nt_diamond_database,
+    //         "--blastn",
+    //         YAML_INPUT.out.btk_nt_database,
+    //         "--blastx",
+    //         YAML_INPUT.out.btk_nt_diamond_database,
+    //         "$params.outdir/blobtoolkit",
+    //         "-profile singularity,sanger"
+    //     ].join(" ").trim(),                                                                 // workflow opts
+    //     Channel.value([]),//readWithDefault( params.demo.params_file, Channel.value([]) ),  // params file
+    //     Channel.value([]),//readWithDefault( params.demo.input, Channel.value([]) ),        // samplesheet
+    //     Channel.value([])//readWithDefault( params.demo.add_config, Channel.value([]) ),    // custom config
+
+    // )
 
     //
     // Collate and save software versions
@@ -47,47 +214,13 @@ workflow EAR {
             newLine: true
         ).set { ch_collated_versions }
 
-    //
-    // MODULE: MultiQC
-    //
-    ch_multiqc_config        = Channel.fromPath(
-        "$projectDir/assets/multiqc_config.yml", checkIfExists: true)
-    ch_multiqc_custom_config = params.multiqc_config ?
-        Channel.fromPath(params.multiqc_config, checkIfExists: true) :
-        Channel.empty()
-    ch_multiqc_logo          = params.multiqc_logo ?
-        Channel.fromPath(params.multiqc_logo, checkIfExists: true) :
-        Channel.empty()
-
     summary_params      = paramsSummaryMap(
         workflow, parameters_schema: "nextflow_schema.json")
     ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params))
 
-    ch_multiqc_custom_methods_description = params.multiqc_methods_description ?
-        file(params.multiqc_methods_description, checkIfExists: true) :
-        file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
-    ch_methods_description                = Channel.value(
-        methodsDescriptionText(ch_multiqc_custom_methods_description))
-
-    ch_multiqc_files = ch_multiqc_files.mix(
-        ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
-    ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions)
-    ch_multiqc_files = ch_multiqc_files.mix(
-        ch_methods_description.collectFile(
-            name: 'methods_description_mqc.yaml',
-            sort: true
-        )
-    )
 
-    MULTIQC (
-        ch_multiqc_files.collect(),
-        ch_multiqc_config.toList(),
-        ch_multiqc_custom_config.toList(),
-        ch_multiqc_logo.toList()
-    )
 
     emit:
-    multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html
     versions       = ch_versions                 // channel: [ path(versions.yml) ]
 }
 

From d7b8491d6675823befeee1df0c006f1dae1e0b3f Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 5 Aug 2024 10:16:41 +0100
Subject: [PATCH 02/52] Last weeks addition

---
 subworkflows/local/pe_mapping.nf | 116 +++++++++++++++++++++++++++++++
 subworkflows/local/se_mapping.nf | 115 ++++++++++++++++++++++++++++++
 subworkflows/local/yaml_input.nf |  44 ++++++++++++
 3 files changed, 275 insertions(+)
 create mode 100644 subworkflows/local/pe_mapping.nf
 create mode 100644 subworkflows/local/se_mapping.nf
 create mode 100644 subworkflows/local/yaml_input.nf

diff --git a/subworkflows/local/pe_mapping.nf b/subworkflows/local/pe_mapping.nf
new file mode 100644
index 0000000..3c41670
--- /dev/null
+++ b/subworkflows/local/pe_mapping.nf
@@ -0,0 +1,116 @@
+include { MINIMAP2_ALIGN as MINIMAP2_ALIGN_ILLUMINA } from '../../modules/nf-core/minimap2/align/main'
+include { SAMTOOLS_MERGE                            } from '../../modules/nf-core/samtools/merge/main'
+
+workflow PE_MAPPING {
+
+    take:
+    reference_tuple          // Channel [ val(meta), path(file) ]
+    pacbio_tuple             // Channel [ val(meta), val( str ) ]
+    reads_type               // Channel val( str )
+
+    main:
+    ch_versions     = Channel.empty()
+
+
+    //
+    // PROCESS: GETS PACBIO READ PATHS FROM READS_PATH
+    //
+    ch_grabbed_reads_path       = GrabFiles( pacbio_tuple )
+
+    ch_grabbed_reads_path
+        .map { meta, files ->
+            tuple( files )
+        }
+        .flatten()
+        .set { ch_reads_path }
+
+    //
+    // PROCESS: MAKE MINIMAP INPUT CHANNEL
+    //
+    reference_tuple
+        .combine( ch_reads_path )
+        .combine( reads_type )
+        .map { meta, ref, reads_path, reads_type ->
+            tuple(
+                [   id          : meta.id,
+                    single_end  : false,
+                    readtype: reads_type.toString()
+                ],
+                reads_path,
+                ref,
+                true,
+                false,
+                false,
+                reads_type
+            )
+        }
+        .set { pe_input }
+
+    //
+    // PROCESS: MULTIMAP TO MAKE BOOLEAN ARGUMENTS
+    //
+    pe_input
+        .multiMap { meta, reads_path, ref, bam_output, cigar_paf, cigar_bam, reads_type ->
+            read_tuple          : tuple( meta, read_path)
+            ref                 : ref
+            bool_bam_ouput      : bam_output
+            bool_cigar_paf      : cigar_paf
+            bool_cigar_bam      : cigar_bam
+        }
+        .set { illumina_input }
+
+    //
+    // MODULE: PAIRED END READ MAPPING USING MINIMAP
+    //
+    MINIMAP2_ALIGN_ILLUMINA (
+        illumina_input.read_tuple,
+        illumina_input.ref,
+        illumina_input.bool_bam_ouput,
+        [],
+        illumina_input.bool_cigar_paf,
+        illumina_input.bool_cigar_bam
+    )
+    ch_versions = ch_versions.mix(MINIMAP2_ALIGN_ILLUMINA.out.versions)
+
+    ch_bams = MINIMAP2_ALIGN_ILLUMINA.out.bam
+
+    ch_bams
+        .map { meta, file ->
+            tuple( file )
+        }
+        .collect()
+        .map { file ->
+            tuple (
+                [ id    : file[0].toString().split('/')[-1].split('_')[0] ], // Change sample ID
+                file
+            )
+        }
+        .set { collected_files_for_merge }
+
+    //
+    // MODULE: MERGE ALL OUTPUT BAM
+    //
+    SAMTOOLS_MERGE(
+        collected_files_for_merge,
+        reference_tuple,
+        [[],[]]
+    )
+    ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions)
+
+    emit:
+    versions           = ch_versions.ifEmpty(null)
+    mapped_bam         = SAMTOOLS_MERGE.out.bam
+}
+
+process GrabFiles {
+    tag "${meta.id}"
+    executor 'local'
+
+    input:
+    tuple val(meta), path("in")
+
+    output:
+    tuple val(meta), path("in/*.{fa,fasta}.{gz}")
+
+    "true"
+}
\ No newline at end of file
diff --git a/subworkflows/local/se_mapping.nf b/subworkflows/local/se_mapping.nf
new file mode 100644
index 0000000..0340425
--- /dev/null
+++ b/subworkflows/local/se_mapping.nf
@@ -0,0 +1,115 @@
+include { MINIMAP2_ALIGN as MINIMAP2_ALIGN_SE        } from '../../modules/nf-core/minimap2/align/main'
+include { SAMTOOLS_MERGE                             } from '../../modules/nf-core/samtools/merge/main'
+
+workflow SE_MAPPING {
+
+    take:
+    reference_tuple          // Channel [ val(meta), path(file) ]
+    pacbio_tuple             // Channel [ val(meta), path(file) ]
+    reads_type                // Channel val( str )
+
+    main:
+    ch_versions     = Channel.empty()
+    ch_align_bams   = Channel.empty()
+
+    //
+    // PROCESS: GETS PACBIO READ PATHS FROM READS_PATH
+    //
+    ch_grabbed_reads_path       = GrabFiles( pacbio_tuple )
+
+    ch_grabbed_reads_path
+        .map { meta, files ->
+            tuple( files )
+        }
+        .flatten()
+        .set { ch_reads_path }
+
+    //
+    // PROCESS: MAKE MINIMAP INPUT CHANNEL AND MAKE BRANCHES BASED ON INPUT READ TYPE
+    //
+    reference_tuple
+        .combine( ch_reads_path )
+        .combine( reads_type )
+        .map { meta, ref, reads_path, reads_type ->
+            tuple(
+                [   id          : meta.id,
+                    single_end  : true,
+                    readtype    : reads_type.toString()
+                ],
+                reads_path,
+                ref,
+                true,
+                false,
+                false,
+                reads_type
+            )
+        }
+        .set { minimap_se_input }
+
+    //
+    // PROCESS: MULTIMAP TO MAKE BOOLEAN ARGUMENTS FOR MINIMAP HIFI MAPPING INPUT
+    //
+    minimap_se_input
+        .multiMap { meta, reads_path, ref, bam_output, cigar_paf, cigar_bam, reads_type ->
+            read_tuple          : tuple( meta, reads_path)
+            ref                 : ref
+            bool_bam_ouput      : bam_output
+            bool_cigar_paf      : cigar_paf
+            bool_cigar_bam      : cigar_bam
+        }
+        .set { se_input }
+
+    //
+    // MOUDLES: MAPPING DIFFERENT TYPE OF READ AGAINIST REFERENCE
+    //
+
+    MINIMAP2_ALIGN_SE (
+            se_input.read_tuple,
+            se_input.ref,
+            se_input.bool_bam_ouput,
+            [],
+            se_input.bool_cigar_paf,
+            se_input.bool_cigar_bam
+    )
+    ch_bams = MINIMAP2_ALIGN_SE.out.bam
+
+    ch_bams
+        .map { meta, file ->
+            tuple( file )
+        }
+        .collect()
+        .map { file ->
+            tuple (
+                [ id    : file[0].toString().split('/')[-1].split('_')[0] ], // Change sample ID
+                file
+            )
+        }
+        .set { collected_files_for_merge }
+
+    //
+    // MODULE: MERGE ALL OUTPUT BAM
+    //
+    SAMTOOLS_MERGE(
+        collected_files_for_merge,
+        reference_tuple,
+        [[],[]]
+    )
+    ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions)
+
+    emit:
+    versions       = ch_versions.ifEmpty(null)
+    mapped_bam     = SAMTOOLS_MERGE.out.bam
+}
+
+process GrabFiles {
+    tag "${meta.id}"
+    executor 'local'
+
+    input:
+    tuple val(meta), path("in")
+
+    output:
+    tuple val(meta), path("in/*.{fa,fasta,fna}.{gz}")
+
+    "true"
+}
\ No newline at end of file
diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf
new file mode 100644
index 0000000..da75cb7
--- /dev/null
+++ b/subworkflows/local/yaml_input.nf
@@ -0,0 +1,44 @@
+#!/usr/bin/env nextflow
+
+import groovy.yaml.YamlSlurper
+
+workflow YAML_INPUT {
+    take:
+    input_file          // params.input
+
+    main:
+    ch_versions             = Channel.empty()
+
+    inputs                  = new YamlSlurper().parse(file(params.input))
+
+    emit:
+    //
+    // LOGIC: Building generic channels
+    //
+    sample_id               = Channel.of(inputs.assembly_id)
+    longread_type           = Channel.of(inputs.longread.type)
+    longread_dir            = Channel.of(inputs.longread.dir)
+    reference               = Channel.fromPath([inputs.assembly_id], inputs.reference_file, checkIfExists: true)
+
+    //
+    // LOGIC: Building CurationPretext specific channels
+    //
+    cpretext_aligner        = Channel.of(inputs.curationpretext.aligner)
+    cpretext_telomere_motif = Channel.of([inputs.assembly_id], inputs.curationpretext.telomere_motif)
+    cpretext_hic_dir        = Channel.of([inputs.assembly_id], inputs.curationpretext.hic_dir)
+
+    //
+    // LOGIC: Building BlobToolKit specific channels
+    //
+    btk_nt_database         = Channel.of([inputs.assembly_id], inputs.btk.nt_database)
+    btk_nt_database_prefix  = Channel.of(inputs.btk.nt_database_prefix)
+    btk_nt_diamond_database = Channel.of(inputs.btk.diamond_nt_database_path)
+    btk_un_diamond_database = Channel.of(inputs.btk.diamond_uniprot_database_path)
+    btk_ncbi_taxonomy_path  = Channel.of(inputs.btk.ncbi_taxonomy_path)
+    btk_ncbi_lineage_path   = Channel.of(inputs.btk.ncbi_rankedlineage_path)
+    btk_btk_yaml            = Channel.of(inputs.btk.btk_yaml)
+    btk_taxid               = Channel.of([inputs.assembly_id], inputs.btk.taxid)
+    btk_gca_accession       = Channel.of(inputs.btk.gca_accession)
+
+    versions                = ch_versions.ifEmpty(null)
+}

From ab69ccd5c212dbd4dbb671d5676b875b4b476ca6 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 5 Aug 2024 10:16:58 +0100
Subject: [PATCH 03/52] Last weeks addition

---
 bin/generate_samplesheet.py | 44 +++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100755 bin/generate_samplesheet.py

diff --git a/bin/generate_samplesheet.py b/bin/generate_samplesheet.py
new file mode 100755
index 0000000..12af705
--- /dev/null
+++ b/bin/generate_samplesheet.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import argparse
+
+"""
+A simple script to generate a csv file required for the sanger-tol/blobtoolkit pipeline-module.
+
+Required input include the sample ID and the mapped BAM file generated with PacBio data and input FASTA assembly
+
+Written by Damon-Lee Pointon (dp24/DLBPointon)
+"""
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Generate a csv file for BTK")
+    parser.add_argument("sample_name", type=str, help="Name of sample")
+    parser.add_argument(
+        "mapped_bam_file",
+        type=str,
+        help="Path containing the mapped BAM generated with PacBio data and the ASCC input assembly",
+    )
+    parser.add_argument("-v", "--version", action="version", version="1.0.0")
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    data_list = []
+
+    data_list.append("sample,datatype,datafile\n")
+    if args.mapped_bam_file.endswith(".bam"):
+        data_list.append(f"{args.sample_name},pacbio,{args.mapped_bam_file}\n")
+    else:
+        sys.exit("I was expecting a mapped BAM file")
+
+    with open("samplesheet.csv", "w") as file:
+        file.write("".join(data_list))
+
+
+if __name__ == "__main__":
+    main()

From c458efb415a4be7a8cb8a3783025036f9baff2cb Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 5 Aug 2024 10:17:12 +0100
Subject: [PATCH 04/52] Last weeks addition

---
 assets/btk_draft.yaml | 17 +++++++++++++++++
 assets/test.yaml      | 19 +++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 assets/btk_draft.yaml
 create mode 100755 assets/test.yaml

diff --git a/assets/btk_draft.yaml b/assets/btk_draft.yaml
new file mode 100644
index 0000000..0e02351
--- /dev/null
+++ b/assets/btk_draft.yaml
@@ -0,0 +1,17 @@
+assembly:
+  level: bar
+settings:
+  foo: 0
+similarity:
+  diamond_blastx:
+    foo: 0
+taxon:
+  class: class_name
+  family: family_name
+  genus: genus_name
+  kingdom: kingdom_name
+  name: species_name
+  order: order_name
+  phylum: phylum_name
+  superkingdom: superkingdom_name
+  taxid: 0
diff --git a/assets/test.yaml b/assets/test.yaml
new file mode 100755
index 0000000..e2a9c79
--- /dev/null
+++ b/assets/test.yaml
@@ -0,0 +1,19 @@
+assembly_id: Oscheius_DF5033
+reference_file: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/assembly/draft/DF5033.hifiasm.noTelos.20211120/DF5033.noTelos.hifiasm.purged.noCont.noMito.fasta
+longread:
+  type: hifi
+  dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/
+curationpretext:
+  aligner: minimap2
+  telomere_motif: TTAGG
+  hic_dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/full/
+btk:
+  nt_database: /home/runner/work/ascc/ascc/blastdb/
+  nt_database_prefix: tiny_plasmodium_blastdb.fa
+  diamond_uniprot_database_path: /home/runner/work/ascc/ascc/diamond.dmnd
+  diamond_nt_database_path: /home/runner/work/ascc/ascc/diamond.dmnd
+  ncbi_taxonomy_path: /home/runner/work/ascc/ascc/ncbi_taxdump/
+  ncbi_rankedlineage_path: /home/runner/work/ascc/ascc/ncbi_taxdump/rankedlineage.dmp
+  btk_yaml: /nfs/users/nfs_d/dp24/sanger-tol-ear/assets/btk_draft.yaml
+  taxid: 352914
+  gca_accession: GCA_0001

From 04adc75a35d6d7cbbcfc508fe13536d016fc03fa Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 7 Aug 2024 14:20:13 +0100
Subject: [PATCH 05/52] Adding 2 pipeline nesting method for btk

---
 modules/local/sanger_tol_btk.nf | 107 ++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 modules/local/sanger_tol_btk.nf

diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf
new file mode 100644
index 0000000..fec146c
--- /dev/null
+++ b/modules/local/sanger_tol_btk.nf
@@ -0,0 +1,107 @@
+process SANGER_TOL_BTK {
+    tag "$meta.id"
+    label 'process_low'
+
+    input:
+    tuple val(meta), path(reference, stageAs: "REFERENCE.fa")
+    tuple val(meta1), path(bam) // Name needs to remain the same as previous process as they are referenced in the samplesheet
+    tuple val(meta2), path(samplesheet_csv, stageAs: "SAMPLESHEET.csv")
+    path blastp, stageAs: "blastp.dmnd"
+    path blastn
+    path blastx
+    path btk_config_file
+    path tax_dump
+    path btk_yaml, stageAs: "BTK.yaml"
+    val busco_lineages
+    val taxon
+    val gca_accession
+
+    output:
+    tuple val(meta), path("${meta.id}_btk_out/blobtoolkit/draft"),  emit: dataset
+    path("${meta.id}_btk_out/blobtoolkit/plots"),                   emit: plots
+    path("${meta.id}_btk_out/blobtoolkit/draft/summary.json.gz"),   emit: summary_json
+    path("${meta.id}_btk_out/busco"),                               emit: busco_data
+    path("${meta.id}_btk_out/multiqc"),                             emit: multiqc_report
+    path("blobtoolkit_pipeline_info"),                              emit: pipeline_info
+    path "versions.yml",                                            emit: versions
+
+    script:
+    def prefix              =   task.ext.prefix         ?:  "${meta.id}"
+    def args                =   task.ext.args           ?:  ""
+    def executor            =   task.ext.executor       ?:  ""
+    def profiles            =   task.ext.profiles       ?:  ""
+    def get_version         =   task.ext.version_data   ?:  "UNKNOWN - SETTING NOT SET"
+    def btk_config          =   btk_config_file         ? "-c $btk_config_file"         : ""
+    def pipeline_version    =   task.ext.version        ?: "main"
+    // YAML used to avoid the use of GCA accession number
+    //    https://github.com/sanger-tol/blobtoolkit/issues/77
+
+    // Seems to be an issue where a nested pipeline can't see the files in the same directory
+    // Running realpath gets around this but the files copied into the folder are
+    // now just wasted space.
+
+    // outdir should be an arg
+
+    //        --accession draft \\
+
+    // blastx and blastp use the same database hence the StageAs
+
+
+    """
+    $executor 'nextflow run sanger-tol/blobtoolkit \\
+        -r $pipeline_version \\
+        -profile  $profiles \\
+        --input "\$(realpath $samplesheet_csv)" \\
+        --outdir ${prefix}_btk_out \\
+        --fasta "\$(realpath REFERENCE.fa)" \\
+        --yaml "\$(realpath BTK.yaml)" \\
+        --busco_lineages $busco_lineages \\
+        --taxon $taxon \\
+        --taxdump "\$(realpath $tax_dump)" \\
+        --blastp "\$(realpath blastp.dmnd)" \\
+        --blastn "\$(realpath $blastn)" \\
+        --blastx "\$(realpath $blastx)" \\
+        $btk_config \\
+        $args'
+
+    mv ${prefix}_btk_out/pipeline_info blobtoolkit_pipeline_info
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        Blobtoolkit: $pipeline_version
+        Nextflow: \$(nextflow -v | cut -d " " -f3)
+        executor system: $get_version
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix              =   task.ext.prefix         ?:  "${meta.id}"
+    def pipeline_version    =   task.ext.version        ?: "main"
+
+    """
+    mkdir -p ${prefix}_btk_out/blobtoolkit/$gca_accession
+    touch ${prefix}_btk_out/blobtoolkit/$gca_accession/test.json.gz
+
+    mkdir ${prefix}_btk_out/blobtoolkit/plots
+    touch ${prefix}_btk_out/blobtoolkit/plots/test.png
+
+    mkdir ${prefix}_btk_out/busco
+    touch ${prefix}_btk_out/busco/test.batch_summary.txt
+    touch ${prefix}_btk_out/busco/test.fasta.txt
+    touch ${prefix}_btk_out/busco/test.json
+
+    mkdir ${prefix}_btk_out/multiqc
+    mkdir ${prefix}_btk_out/multiqc/multiqc_data
+    mkdir ${prefix}_btk_out/multiqc/multiqc_plots
+    touch ${prefix}_btk_out/multiqc/multiqc_report.html
+
+    mv ${prefix}_btk_out/pipeline_info blobtoolkit_pipeline_info
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        Blobtoolkit: $pipeline_version
+        Nextflow: \$(nextflow -v | cut -d " " -f3)
+        executor system: $get_version
+    END_VERSIONS
+    """
+}

From ba74c101264753f2b6df53c07f036191f0db3642 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 7 Aug 2024 14:20:31 +0100
Subject: [PATCH 06/52] Updates to add BTK

---
 assets/test.yaml                 |  17 ++--
 conf/modules.config              |  13 +--
 subworkflows/local/se_mapping.nf |  18 ++---
 subworkflows/local/yaml_input.nf |  77 +++++++++++++++---
 workflows/ear.nf                 | 135 ++++++++++++++++++++-----------
 5 files changed, 178 insertions(+), 82 deletions(-)

diff --git a/assets/test.yaml b/assets/test.yaml
index e2a9c79..f0a0fd5 100755
--- a/assets/test.yaml
+++ b/assets/test.yaml
@@ -1,5 +1,6 @@
 assembly_id: Oscheius_DF5033
-reference_file: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/assembly/draft/DF5033.hifiasm.noTelos.20211120/DF5033.noTelos.hifiasm.purged.noCont.noMito.fasta
+reference_hap1: /nfs/users/nfs_d/dp24/sanger-tol-ear/test.fa
+reference_hap2: /nfs/users/nfs_d/dp24/sanger-tol-ear/test.fa
 longread:
   type: hifi
   dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/
@@ -7,13 +8,17 @@ curationpretext:
   aligner: minimap2
   telomere_motif: TTAGG
   hic_dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/full/
+merquryfk:
+  fastk_hist: "./"
+  fastk_ktab: "./"
 btk:
-  nt_database: /home/runner/work/ascc/ascc/blastdb/
+  nt_database: /lustre/scratch123/tol/teams/tola/users/ea10/pipeline_testing/20240704_blast_tiny_testdb/blastdb/
   nt_database_prefix: tiny_plasmodium_blastdb.fa
-  diamond_uniprot_database_path: /home/runner/work/ascc/ascc/diamond.dmnd
-  diamond_nt_database_path: /home/runner/work/ascc/ascc/diamond.dmnd
-  ncbi_taxonomy_path: /home/runner/work/ascc/ascc/ncbi_taxdump/
-  ncbi_rankedlineage_path: /home/runner/work/ascc/ascc/ncbi_taxdump/rankedlineage.dmp
+  diamond_uniprot_database_path: /lustre/scratch123/tol/teams/tola/users/ea10/pipeline_testing/20240704_diamond_tiny_testdb/ascc_tinytest_diamond_db.dmnd
+  diamond_nr_database_path: /lustre/scratch123/tol/resources/nr/latest/nr.dmnd
+  ncbi_taxonomy_path: /lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump
+  ncbi_rankedlineage_path: /lustre/scratch123/tol/teams/tola/users/ea10/databases/taxdump/rankedlineage.dmp
   btk_yaml: /nfs/users/nfs_d/dp24/sanger-tol-ear/assets/btk_draft.yaml
   taxid: 352914
   gca_accession: GCA_0001
+  lineages: "diptera_odb10,insecta_odb10"
diff --git a/conf/modules.config b/conf/modules.config
index d203d2b..388b183 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -18,17 +18,12 @@ process {
         saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
     ]
 
-    withName: FASTQC {
-        ext.args = '--quiet'
+    withName: GFASTATS {
+        ext.args = '--nstar-report'
     }
 
-    withName: 'MULTIQC' {
-        ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
-        publishDir = [
-            path: { "${params.outdir}/multiqc" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
+    withName: MERQURYFK_MERQURYFK {
+        ext.args        = "-P."
     }
 
 }
diff --git a/subworkflows/local/se_mapping.nf b/subworkflows/local/se_mapping.nf
index 0340425..8c7ad52 100644
--- a/subworkflows/local/se_mapping.nf
+++ b/subworkflows/local/se_mapping.nf
@@ -15,22 +15,22 @@ workflow SE_MAPPING {
     //
     // PROCESS: GETS PACBIO READ PATHS FROM READS_PATH
     //
-    ch_grabbed_reads_path       = GrabFiles( pacbio_tuple )
+    ch_grabbed_reads_path       = GrabFiles(pacbio_tuple)
 
     ch_grabbed_reads_path
-        .map { meta, files ->
-            tuple( files )
+        .map {meta, files ->
+            tuple(files)
         }
         .flatten()
-        .set { ch_reads_path }
+        .set {ch_reads_path}
 
     //
     // PROCESS: MAKE MINIMAP INPUT CHANNEL AND MAKE BRANCHES BASED ON INPUT READ TYPE
     //
-    reference_tuple
-        .combine( ch_reads_path )
+    ch_reads_path
+        .combine( reference_tuple )
         .combine( reads_type )
-        .map { meta, ref, reads_path, reads_type ->
+        .map { reads_path, meta, ref, reads_type ->
             tuple(
                 [   id          : meta.id,
                     single_end  : true,
@@ -44,7 +44,7 @@ workflow SE_MAPPING {
                 reads_type
             )
         }
-        .set { minimap_se_input }
+        .set {minimap_se_input}
 
     //
     // PROCESS: MULTIMAP TO MAKE BOOLEAN ARGUMENTS FOR MINIMAP HIFI MAPPING INPUT
@@ -52,7 +52,7 @@ workflow SE_MAPPING {
     minimap_se_input
         .multiMap { meta, reads_path, ref, bam_output, cigar_paf, cigar_bam, reads_type ->
             read_tuple          : tuple( meta, reads_path)
-            ref                 : ref
+            ref                 : tuple( meta, ref)
             bool_bam_ouput      : bam_output
             bool_cigar_paf      : cigar_paf
             bool_cigar_bam      : cigar_bam
diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf
index da75cb7..51350dd 100644
--- a/subworkflows/local/yaml_input.nf
+++ b/subworkflows/local/yaml_input.nf
@@ -7,25 +7,81 @@ workflow YAML_INPUT {
     input_file          // params.input
 
     main:
-    ch_versions             = Channel.empty()
+    ch_versions                 = Channel.empty()
 
-    inputs                  = new YamlSlurper().parse(file(params.input))
+    inputs                      = new YamlSlurper().parse(file(params.input))
+
+    sample_id                   = Channel.of(inputs.assembly_id)
+    longread_type               = Channel.of(inputs.longread.type)
+    longread_dir                = Channel.of(inputs.longread.dir)
+
+    sample_id
+        .combine(longread_dir)
+        .map{sample, dir ->
+                tuple([id: sample],
+                dir
+            )
+        }
+        .set {pacbio_tuple}
+
+    reference_1                 = Channel.fromPath(inputs.reference_hap1, checkIfExists: true)
+    reference_2                 = Channel.fromPath(inputs.reference_hap2, checkIfExists: true)
+
+    reference_1
+        .combine(sample_id)
+        .map{ref, sample_id ->
+            tuple([id:sample_id], ref)
+        }
+        .set{reference_hap1}
+
+
+
+    cpretext_aligner            = Channel.of(inputs.curationpretext.aligner)
+    cpretext_telomere_motif_raw = Channel.of(inputs.curationpretext.telomere_motif)
+    cpretext_hic_dir_raw        = Channel.of(inputs.curationpretext.hic_dir)
+
+    sample_id
+        .combine(cpretext_telomere_motif_raw)
+        .map{sample, dir ->
+                tuple([id: sample],
+                dir
+            )
+        }
+        .set {cpretext_telomere_motif}
+
+    sample_id
+        .combine(cpretext_hic_dir_raw)
+        .map{sample, dir ->
+                tuple([id: sample],
+                dir
+            )
+        }
+        .set {cpretext_hic_dir}
 
     emit:
     //
     // LOGIC: Building generic channels
     //
-    sample_id               = Channel.of(inputs.assembly_id)
-    longread_type           = Channel.of(inputs.longread.type)
-    longread_dir            = Channel.of(inputs.longread.dir)
-    reference               = Channel.fromPath([inputs.assembly_id], inputs.reference_file, checkIfExists: true)
+    sample_id
+    longread_type                                               // val(data)
+    longread_dir            = inputs.longread.dir               // DataVariable
+    pacbio_tuple                                                // tuple (meta), path(file)
+    reference_hap1          = reference_hap1                    // tuple (meta), path(file)
+    reference_hap2          = reference_2                       // DataVariable
+    reference_path          = inputs.reference_hap1             // DataVariable
 
     //
     // LOGIC: Building CurationPretext specific channels
     //
-    cpretext_aligner        = Channel.of(inputs.curationpretext.aligner)
-    cpretext_telomere_motif = Channel.of([inputs.assembly_id], inputs.curationpretext.telomere_motif)
-    cpretext_hic_dir        = Channel.of([inputs.assembly_id], inputs.curationpretext.hic_dir)
+    cpretext_aligner
+    cpretext_telomere_motif
+    cpretext_hic_dir_raw    = inputs.curationpretext.hic_dir    // DataVariable
+
+    //
+    // LOGIC: MERQURY CHANNELS
+    //
+    fastk_hist              = Channel.of(inputs.merquryfk.fastk_hist)
+    fastk_ktab              = Channel.of(inputs.merquryfk.fastk_ktab)
 
     //
     // LOGIC: Building BlobToolKit specific channels
@@ -36,9 +92,10 @@ workflow YAML_INPUT {
     btk_un_diamond_database = Channel.of(inputs.btk.diamond_uniprot_database_path)
     btk_ncbi_taxonomy_path  = Channel.of(inputs.btk.ncbi_taxonomy_path)
     btk_ncbi_lineage_path   = Channel.of(inputs.btk.ncbi_rankedlineage_path)
-    btk_btk_yaml            = Channel.of(inputs.btk.btk_yaml)
+    btk_yaml                = Channel.of(inputs.btk.btk_yaml)
     btk_taxid               = Channel.of([inputs.assembly_id], inputs.btk.taxid)
     btk_gca_accession       = Channel.of(inputs.btk.gca_accession)
+    busco_lineages          = Channel.of(inputs.btk.lineages)
 
     versions                = ch_versions.ifEmpty(null)
 }
diff --git a/workflows/ear.nf b/workflows/ear.nf
index b35a949..ac0193e 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -6,6 +6,7 @@
 
 include { NEXTFLOW_RUN as CURATIONPRETEXT   } from '../modules/local/nextflow/run'
 include { NEXTFLOW_RUN as BLOBTOOLKIT       } from '../modules/local/nextflow/run'
+include { SANGER_TOL_BTK                    } from '../modules/local/sanger_tol_btk'
 
 include { YAML_INPUT                        } from '../subworkflows/local/yaml_input'
 include { GENERATE_SAMPLESHEET              } from '../modules/local/generate_samplesheet'
@@ -39,13 +40,15 @@ workflow EAR {
     // MODULE: YAML_INPUT
     //
     YAML_INPUT(ch_input)
-    reference = YAML_INPUT.out.reference
-    reference.view()
 
     //
     // MODULE: Run Sanger-ToL/CurationPretext
     //         - This was built using: https://github.com/mahesh-panchal/nf-cascade
     //
+    reference = YAML_INPUT.out.reference_path.get()
+    hic_dir = YAML_INPUT.out.cpretext_hic_dir_raw.get()
+    longread_dir = YAML_INPUT.out.longread_dir.get()
+
     CURATIONPRETEXT(
         "sanger-tol/curationpretext",
         [
@@ -53,23 +56,23 @@ workflow EAR {
             "--input",
             reference,
             "--longread",
-            YAML_INPUT.out.longread_dir,
+            longread_dir,
             "--cram",
-            YAML_INPUT.out.cpretext_hic_dir,
-            "$params.outdir/curationpretext",
+            hic_dir,
             "-profile singularity,sanger"
-        ].join(" ").trim(),                                            // workflow opts
+        ].join(" ").trim(), // workflow opts
         Channel.value([]),  //readWithDefault( params.demo.params_file, Channel.value([]) ), // params file
         Channel.value([]),  // samplesheet - not used by this pipeline
         Channel.value([])   //readWithDefault( params.demo.add_config, Channel.value([]) ),  // custom config
-
+        //"$params.outdir/curationpretext",
     )
 
     //
     // MODULE: ASSEMBLY STATISTICS FOR THE FASTA
     //
+
     GFASTATS(
-        YAML_INPUT.out.reference,
+        YAML_INPUT.out.reference_hap1,
         "fasta",
         [],
         [],
@@ -79,38 +82,45 @@ workflow EAR {
         []
     )
 
-    // //
-    // // LOGIC:  REFORMAT A BUNCH OF CHANNELS FOR MERQUERYFK
-    // //
-    // YAML_INPUT.out.reference
-    //     .combine()
-    //     .combine()
-    //     .combine()
-    //     .map{ meta, primary, haplotigs, fastk_hist, fastk_ktab ->
-    //         tuple(  meta,
-    //                 fastk_hist,
-    //                 fastk_ktab,
-    //                 primary,
-    //                 haplotigs
-    //         )
-    //     }
-    //     .set { merquryfk_input }
-
-    // //
-    // // MODULE: MERQURYFK PLOTS OF GENOME
-    // //
-
-    // MERQURYFK(
-    //     merquryfk_input
-    // )
+    //
+    // LOGIC:  REFORMAT A BUNCH OF CHANNELS FOR MERQUERYFK
+    //
+
+    if (params.reference_hap2) {
+        YAML_INPUT.out.reference_hap1
+            .combine(YAML_INPUT.out.reference_hap2)
+            .combine(YAML_INPUT.out.fastk_hist)
+            .combine(YAML_INPUT.out.fastk_ktab)
+            .map{ meta, primary, haplotigs, fastk_hist, fastk_ktab ->
+                tuple(  meta,
+                        fastk_hist,
+                        fastk_ktab,
+                        primary,
+                        haplotigs
+                )
+            }
+            .set { merquryfk_input }
+
+        //
+        // MODULE: MERQURYFK PLOTS OF GENOME
+        //
+
+        MERQURYFK(
+            merquryfk_input
+        )
+    }
 
     //
-    // LOGIC: SANGER-TOL/BLOBTOOLKIT expects the pacbio data to be already mapped
+    // LOGIC: SANGER-TOL/BLOBTOOLKIT expects the pacbio data to be already mapped -> this has been changed but seeing as BTK and genomenote need it then we may as well keep it.
+    //          This is also a requirement for genomenote
     //
     platform = YAML_INPUT.out.longread_type
 
     YAML_INPUT.out.sample_id
         .combine(YAML_INPUT.out.longread_dir)
+        .map{ sample, dir ->
+            tuple([id: sample], dir )
+        }
         .set {pacbio_tuple}
 
     if ( platform.filter { it == "hifi" } || platform.filter { it == "clr" } || platform.filter { it == "ont" } ) {
@@ -118,8 +128,8 @@ workflow EAR {
         // SUBWORKFLOW: SINGLE END MAPPING FOR ALIGNING LONGREAD DATA
         //
         SE_MAPPING (
-            YAML_INPUT.out.reference,
-            pacbio_tuple,
+            YAML_INPUT.out.reference_hap1,
+            YAML_INPUT.out.pacbio_tuple,
             platform
         )
         ch_versions = ch_versions.mix(SE_MAPPING.out.versions)
@@ -133,8 +143,8 @@ workflow EAR {
         // SUBWORKFLOW: PAIRED END MAPPING FOR ALIGNING LONGREAD DATA
         //
         PE_MAPPING  (
-            YAML_INPUT.out.reference,
-            pacbio_tuple,
+            YAML_INPUT.out.reference_hap1,
+            YAML_INPUT.out.pacbio_tuple,
             platform
         )
         ch_versions = ch_versions.mix(PE_MAPPING.out.versions)
@@ -149,7 +159,7 @@ workflow EAR {
     //
     SAMTOOLS_SORT (
         merged_bam,
-        YAML_INPUT.out.reference
+        YAML_INPUT.out.reference_hap1
     )
     ch_versions = ch_versions.mix( SAMTOOLS_SORT.out.versions )
 
@@ -174,35 +184,50 @@ workflow EAR {
     // MODULE: Run Sanger-ToL/BlobToolKit
     //         - This was built using: https://github.com/mahesh-panchal/nf-cascade
     //
+
     // BLOBTOOLKIT(
     //     "sanger-tol/blobtoolkit",
     //     [
-    //         "-r 0.4.0",
+    //         "-r 0.5.0",
     //         "--input",
     //         GENERATE_SAMPLESHEET.out.csv,
     //         "--fasta",
     //         reference,
-    //         "--accession",
-    //         YAML_INPUT.out.btk_gca_accession,
+    //         "--yaml",
+    //         btk_yaml,
     //         "-taxon",
-    //         YAML_INPUT.out.btk_taxid,
+    //         btk_taxon,
     //         "--taxdump",
-    //         YAML_INPUT.out.btk_ncbi_taxonomy_path,
+    //         btk_taxdump,
     //         "--blastp",
-    //         YAML_INPUT.out.btk_nt_diamond_database,
+    //         btk_blastp,
     //         "--blastn",
-    //         YAML_INPUT.out.btk_nt_database,
+    //         btk_blastn,
     //         "--blastx",
-    //         YAML_INPUT.out.btk_nt_diamond_database,
-    //         "$params.outdir/blobtoolkit",
+    //         btk_uniprot,
     //         "-profile singularity,sanger"
     //     ].join(" ").trim(),                                                                 // workflow opts
     //     Channel.value([]),//readWithDefault( params.demo.params_file, Channel.value([]) ),  // params file
     //     Channel.value([]),//readWithDefault( params.demo.input, Channel.value([]) ),        // samplesheet
     //     Channel.value([])//readWithDefault( params.demo.add_config, Channel.value([]) ),    // custom config
-
     // )
 
+        SANGER_TOL_BTK (
+            YAML_INPUT.out.reference_hap1,
+            samplesheet_input,
+            GENERATE_SAMPLESHEET.out.csv,
+            YAML_INPUT.out.btk_un_diamond_database,
+            YAML_INPUT.out.btk_nt_diamond_database,
+            YAML_INPUT.out.btk_un_diamond_database,
+            [],
+            YAML_INPUT.out.btk_ncbi_taxonomy_path,
+            YAML_INPUT.out.btk_yaml,
+            YAML_INPUT.out.busco_lineages,
+            YAML_INPUT.out.btk_taxid,
+            'GCA_0001'
+        )
+        ch_versions              = ch_versions.mix(SANGER_TOL_BTK.out.versions)
+
     //
     // Collate and save software versions
     //
@@ -224,6 +249,20 @@ workflow EAR {
     versions       = ch_versions                 // channel: [ path(versions.yml) ]
 }
 
+
+process RenameDatabase {
+    tag "Rename DMND Database"
+    executor 'local'
+
+    input:
+    db_path
+
+    output:
+    path "UN.dmnd"
+
+    "true"
+}
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     THE END

From 18c3e15ac6a6fe9d7253a53d7a44482abc146539 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 8 Aug 2024 15:34:52 +0100
Subject: [PATCH 07/52] Updates and additions

---
 conf/modules.config | 4 ++++
 workflows/ear.nf    | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/conf/modules.config b/conf/modules.config
index 388b183..405d1d6 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -26,4 +26,8 @@ process {
         ext.args        = "-P."
     }
 
+    withName: SAMTOOLS_SORT {
+        ext.prefix      = { "${meta.id}_sorted"}
+    }
+
 }
diff --git a/workflows/ear.nf b/workflows/ear.nf
index ac0193e..1df63e5 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -134,6 +134,8 @@ workflow EAR {
         )
         ch_versions = ch_versions.mix(SE_MAPPING.out.versions)
 
+        SE_MAPPING.out.mapped_bam.view()
+
         ch_align_bam
             .mix( SE_MAPPING.out.mapped_bam )
             .set { merged_bam }
@@ -154,6 +156,8 @@ workflow EAR {
             .set { merged_bam }
     }
 
+    merged_bam.view()
+
     //
     // MODULE: SORT MAPPED BAM
     //

From 24700823fc282b9518ef2fde5a80ad727a56cb62 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 8 Aug 2024 15:35:24 +0100
Subject: [PATCH 08/52] Updates and additions

---
 workflows/ear.nf | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/workflows/ear.nf b/workflows/ear.nf
index 1df63e5..f068a6e 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -216,21 +216,21 @@ workflow EAR {
     //     Channel.value([])//readWithDefault( params.demo.add_config, Channel.value([]) ),    // custom config
     // )
 
-        SANGER_TOL_BTK (
-            YAML_INPUT.out.reference_hap1,
-            samplesheet_input,
-            GENERATE_SAMPLESHEET.out.csv,
-            YAML_INPUT.out.btk_un_diamond_database,
-            YAML_INPUT.out.btk_nt_diamond_database,
-            YAML_INPUT.out.btk_un_diamond_database,
-            [],
-            YAML_INPUT.out.btk_ncbi_taxonomy_path,
-            YAML_INPUT.out.btk_yaml,
-            YAML_INPUT.out.busco_lineages,
-            YAML_INPUT.out.btk_taxid,
-            'GCA_0001'
-        )
-        ch_versions              = ch_versions.mix(SANGER_TOL_BTK.out.versions)
+    SANGER_TOL_BTK (
+        YAML_INPUT.out.reference_hap1,
+        samplesheet_input,
+        GENERATE_SAMPLESHEET.out.csv,
+        YAML_INPUT.out.btk_un_diamond_database,
+        YAML_INPUT.out.btk_nt_diamond_database,
+        YAML_INPUT.out.btk_un_diamond_database,
+        [],
+        YAML_INPUT.out.btk_ncbi_taxonomy_path,
+        YAML_INPUT.out.btk_yaml,
+        YAML_INPUT.out.busco_lineages,
+        YAML_INPUT.out.btk_taxid,
+        'GCA_0001'
+    )
+    ch_versions              = ch_versions.mix(SANGER_TOL_BTK.out.versions)
 
     //
     // Collate and save software versions

From 56760f8f38f412727309ec22d195feb43ea7678e Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 9 Aug 2024 13:16:51 +0100
Subject: [PATCH 09/52] Updates to complete skeleton of pipeline

---
 assets/test.yaml                 |  4 +--
 conf/modules.config              |  8 +++++
 modules/local/sanger_tol_btk.nf  | 60 +++++++++++++++-----------------
 subworkflows/local/yaml_input.nf |  6 ++--
 workflows/ear.nf                 | 52 ++++++++-------------------
 5 files changed, 56 insertions(+), 74 deletions(-)

diff --git a/assets/test.yaml b/assets/test.yaml
index f0a0fd5..d4da164 100755
--- a/assets/test.yaml
+++ b/assets/test.yaml
@@ -1,6 +1,6 @@
 assembly_id: Oscheius_DF5033
-reference_hap1: /nfs/users/nfs_d/dp24/sanger-tol-ear/test.fa
-reference_hap2: /nfs/users/nfs_d/dp24/sanger-tol-ear/test.fa
+reference_hap1: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa
+reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa
 longread:
   type: hifi
   dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/
diff --git a/conf/modules.config b/conf/modules.config
index 405d1d6..a96a69f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -30,4 +30,12 @@ process {
         ext.prefix      = { "${meta.id}_sorted"}
     }
 
+    withName: SANGER_TOL_BTK {
+        ext.args            = "--blastx_outext 'txt'"
+        ext.executor        = "bsub -Is -tty -e test.e -o test.log -n 2 -q oversubscribed -M1400 -R'select[mem>1400] rusage[mem=1400] span[hosts=1]'"
+        ext.profiles        = "singularity,sanger"
+        ext.get_versions    = "lsid | head -n1 | cut -d ',' -f 1"
+        ext.version         = "draft_assemblies"
+    }
+
 }
diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf
index fec146c..27e3ca0 100644
--- a/modules/local/sanger_tol_btk.nf
+++ b/modules/local/sanger_tol_btk.nf
@@ -17,34 +17,32 @@ process SANGER_TOL_BTK {
     val gca_accession
 
     output:
-    tuple val(meta), path("${meta.id}_btk_out/blobtoolkit/draft"),  emit: dataset
-    path("${meta.id}_btk_out/blobtoolkit/plots"),                   emit: plots
-    path("${meta.id}_btk_out/blobtoolkit/draft/summary.json.gz"),   emit: summary_json
-    path("${meta.id}_btk_out/busco"),                               emit: busco_data
-    path("${meta.id}_btk_out/multiqc"),                             emit: multiqc_report
-    path("blobtoolkit_pipeline_info"),                              emit: pipeline_info
-    path "versions.yml",                                            emit: versions
+    tuple val(meta), path("${meta.id}_btk_out/blobtoolkit/REFERENCE"),      emit: dataset
+    path("${meta.id}_btk_out/blobtoolkit/plots"),                           emit: plots
+    path("${meta.id}_btk_out/blobtoolkit/REFERENCE/summary.json.gz"),     emit: summary_json
+    path("${meta.id}_btk_out/busco"),                                       emit: busco_data
+    path("${meta.id}_btk_out/multiqc"),                                     emit: multiqc_report
+    path("blobtoolkit_pipeline_info"),                                      emit: pipeline_info
+    path "versions.yml",                                                    emit: versions
 
     script:
-    def prefix              =   task.ext.prefix         ?:  "${meta.id}"
     def args                =   task.ext.args           ?:  ""
     def executor            =   task.ext.executor       ?:  ""
     def profiles            =   task.ext.profiles       ?:  ""
     def get_version         =   task.ext.version_data   ?:  "UNKNOWN - SETTING NOT SET"
     def btk_config          =   btk_config_file         ? "-c $btk_config_file"         : ""
-    def pipeline_version    =   task.ext.version        ?: "main"
+    def pipeline_version    =   task.ext.version        ?: "draft_assemblies"
     // YAML used to avoid the use of GCA accession number
     //    https://github.com/sanger-tol/blobtoolkit/issues/77
 
     // Seems to be an issue where a nested pipeline can't see the files in the same directory
     // Running realpath gets around this but the files copied into the folder are
-    // now just wasted space.
+    // now just wasted space. Should be fixed with using Mahesh's method of nesting but
+    // this is proving a bit complicated with BTK
 
     // outdir should be an arg
 
-    //        --accession draft \\
-
-    // blastx and blastp use the same database hence the StageAs
+    // blastx and blastp can use the same database hence the StageAs
 
 
     """
@@ -52,9 +50,8 @@ process SANGER_TOL_BTK {
         -r $pipeline_version \\
         -profile  $profiles \\
         --input "\$(realpath $samplesheet_csv)" \\
-        --outdir ${prefix}_btk_out \\
-        --fasta "\$(realpath REFERENCE.fa)" \\
-        --yaml "\$(realpath BTK.yaml)" \\
+        --outdir ${meta.id}_btk_out \\
+        --fasta ./REFERENCE.fa \\
         --busco_lineages $busco_lineages \\
         --taxon $taxon \\
         --taxdump "\$(realpath $tax_dump)" \\
@@ -64,7 +61,7 @@ process SANGER_TOL_BTK {
         $btk_config \\
         $args'
 
-    mv ${prefix}_btk_out/pipeline_info blobtoolkit_pipeline_info
+    mv ${meta.id}_btk_out/pipeline_info blobtoolkit_pipeline_info
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -75,27 +72,26 @@ process SANGER_TOL_BTK {
     """
 
     stub:
-    def prefix              =   task.ext.prefix         ?:  "${meta.id}"
-    def pipeline_version    =   task.ext.version        ?: "main"
+    def pipeline_version    =   task.ext.version        ?: "draft_assemblies"
 
     """
-    mkdir -p ${prefix}_btk_out/blobtoolkit/$gca_accession
-    touch ${prefix}_btk_out/blobtoolkit/$gca_accession/test.json.gz
+    mkdir -p ${meta.id}_btk_out/blobtoolkit/${meta.id}_out
+    touch ${meta.id}_btk_out/blobtoolkit/${meta.id}_out/test.json.gz
 
-    mkdir ${prefix}_btk_out/blobtoolkit/plots
-    touch ${prefix}_btk_out/blobtoolkit/plots/test.png
+    mkdir ${meta.id}_btk_out/blobtoolkit/plots
+    touch ${meta.id}_btk_out/blobtoolkit/plots/test.png
 
-    mkdir ${prefix}_btk_out/busco
-    touch ${prefix}_btk_out/busco/test.batch_summary.txt
-    touch ${prefix}_btk_out/busco/test.fasta.txt
-    touch ${prefix}_btk_out/busco/test.json
+    mkdir ${meta.id}_btk_out/busco
+    touch ${meta.id}_btk_out/busco/test.batch_summary.txt
+    touch ${meta.id}_btk_out/busco/test.fasta.txt
+    touch ${meta.id}_btk_out/busco/test.json
 
-    mkdir ${prefix}_btk_out/multiqc
-    mkdir ${prefix}_btk_out/multiqc/multiqc_data
-    mkdir ${prefix}_btk_out/multiqc/multiqc_plots
-    touch ${prefix}_btk_out/multiqc/multiqc_report.html
+    mkdir ${meta.id}_btk_out/multiqc
+    mkdir ${meta.id}_btk_out/multiqc/multiqc_data
+    mkdir ${meta.id}_btk_out/multiqc/multiqc_plots
+    touch ${meta.id}_btk_out/multiqc/multiqc_report.html
 
-    mv ${prefix}_btk_out/pipeline_info blobtoolkit_pipeline_info
+    mv ${meta.id}_btk_out/pipeline_info blobtoolkit_pipeline_info
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf
index 51350dd..687c5db 100644
--- a/subworkflows/local/yaml_input.nf
+++ b/subworkflows/local/yaml_input.nf
@@ -86,14 +86,14 @@ workflow YAML_INPUT {
     //
     // LOGIC: Building BlobToolKit specific channels
     //
-    btk_nt_database         = Channel.of([inputs.assembly_id], inputs.btk.nt_database)
+    btk_nt_database         = Channel.of(inputs.btk.nt_database)
     btk_nt_database_prefix  = Channel.of(inputs.btk.nt_database_prefix)
-    btk_nt_diamond_database = Channel.of(inputs.btk.diamond_nt_database_path)
+    btk_nt_diamond_database = Channel.of(inputs.btk.diamond_nr_database_path)
     btk_un_diamond_database = Channel.of(inputs.btk.diamond_uniprot_database_path)
     btk_ncbi_taxonomy_path  = Channel.of(inputs.btk.ncbi_taxonomy_path)
     btk_ncbi_lineage_path   = Channel.of(inputs.btk.ncbi_rankedlineage_path)
     btk_yaml                = Channel.of(inputs.btk.btk_yaml)
-    btk_taxid               = Channel.of([inputs.assembly_id], inputs.btk.taxid)
+    btk_taxid               = Channel.of(inputs.btk.taxid)
     btk_gca_accession       = Channel.of(inputs.btk.gca_accession)
     busco_lineages          = Channel.of(inputs.btk.lineages)
 
diff --git a/workflows/ear.nf b/workflows/ear.nf
index f068a6e..9f90920 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -134,8 +134,6 @@ workflow EAR {
         )
         ch_versions = ch_versions.mix(SE_MAPPING.out.versions)
 
-        SE_MAPPING.out.mapped_bam.view()
-
         ch_align_bam
             .mix( SE_MAPPING.out.mapped_bam )
             .set { merged_bam }
@@ -156,8 +154,6 @@ workflow EAR {
             .set { merged_bam }
     }
 
-    merged_bam.view()
-
     //
     // MODULE: SORT MAPPED BAM
     //
@@ -172,56 +168,38 @@ workflow EAR {
     //
     YAML_INPUT.out.sample_id
         .combine(merged_bam)
-        .map{ sample_id, pacbio_path ->
+        .map{ sample_id, pacbio_meta, pacbio_path ->
             tuple(  [id: sample_id],
                     pacbio_path
             )
         }
-        .set { samplesheet_input }
+        .set { mapped_bam }
 
 
     GENERATE_SAMPLESHEET(
-        samplesheet_input
+        mapped_bam
     )
+    ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions )
 
     //
     // MODULE: Run Sanger-ToL/BlobToolKit
-    //         - This was built using: https://github.com/mahesh-panchal/nf-cascade
     //
-
-    // BLOBTOOLKIT(
-    //     "sanger-tol/blobtoolkit",
-    //     [
-    //         "-r 0.5.0",
-    //         "--input",
-    //         GENERATE_SAMPLESHEET.out.csv,
-    //         "--fasta",
-    //         reference,
-    //         "--yaml",
-    //         btk_yaml,
-    //         "-taxon",
-    //         btk_taxon,
-    //         "--taxdump",
-    //         btk_taxdump,
-    //         "--blastp",
-    //         btk_blastp,
-    //         "--blastn",
-    //         btk_blastn,
-    //         "--blastx",
-    //         btk_uniprot,
-    //         "-profile singularity,sanger"
-    //     ].join(" ").trim(),                                                                 // workflow opts
-    //     Channel.value([]),//readWithDefault( params.demo.params_file, Channel.value([]) ),  // params file
-    //     Channel.value([]),//readWithDefault( params.demo.input, Channel.value([]) ),        // samplesheet
-    //     Channel.value([])//readWithDefault( params.demo.add_config, Channel.value([]) ),    // custom config
-    // )
+    YAML_INPUT.out.reference_hap1.view{ it -> "Reference: $it"}
+    mapped_bam.view{ it -> "samplesheet: $it"}
+    GENERATE_SAMPLESHEET.out.csv.view{ it -> "samplesheetcsv: $it"}
+    YAML_INPUT.out.btk_un_diamond_database.view{ it -> "un diamond: $it"}
+    YAML_INPUT.out.btk_nt_database.view{ it -> "nt diamond: $it"}
+    YAML_INPUT.out.btk_ncbi_taxonomy_path.view{ it -> "Taxdump: $it"}
+    YAML_INPUT.out.btk_yaml.view{ it -> "btk_yaml: $it"}
+    YAML_INPUT.out.busco_lineages.view{ it -> "lineages: $it"}
+    YAML_INPUT.out.btk_taxid.view{ it -> "TAXID: $it"}
 
     SANGER_TOL_BTK (
         YAML_INPUT.out.reference_hap1,
-        samplesheet_input,
+        mapped_bam,
         GENERATE_SAMPLESHEET.out.csv,
         YAML_INPUT.out.btk_un_diamond_database,
-        YAML_INPUT.out.btk_nt_diamond_database,
+        YAML_INPUT.out.btk_nt_database,
         YAML_INPUT.out.btk_un_diamond_database,
         [],
         YAML_INPUT.out.btk_ncbi_taxonomy_path,

From e24bba1ecd3e549554b62a30da5e0f0a261ff9df Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 19 Aug 2024 11:02:31 +0100
Subject: [PATCH 10/52] Updating nesting and fixing stuff

---
 assets/idCulLati1.yaml                      |  26 +++
 assets/real_pdf.yaml                        |  45 +++++
 assets/template_pdf.yaml                    |  45 +++++
 assets/test.yaml                            |   1 +
 conf/base.config                            |   4 +
 conf/modules.config                         |  22 ++-
 conf/sanger-tol-btk.config                  |   7 +
 modules.json                                |  47 +++--
 modules/local/sanger_tol_btk.nf             |  11 +-
 modules/local/sanger_tol_cpretext.nf        |  50 ++++++
 modules/nf-core/merquryfk/merquryfk/main.nf |   7 +-
 subworkflows/local/main_mapping.nf          |  77 ++++++++
 subworkflows/local/yaml_input.nf            |  62 ++++---
 workflows/ear.nf                            | 188 ++++++++------------
 14 files changed, 428 insertions(+), 164 deletions(-)
 create mode 100644 assets/idCulLati1.yaml
 create mode 100644 assets/real_pdf.yaml
 create mode 100644 assets/template_pdf.yaml
 create mode 100644 conf/sanger-tol-btk.config
 create mode 100644 modules/local/sanger_tol_cpretext.nf
 create mode 100644 subworkflows/local/main_mapping.nf

diff --git a/assets/idCulLati1.yaml b/assets/idCulLati1.yaml
new file mode 100644
index 0000000..85479be
--- /dev/null
+++ b/assets/idCulLati1.yaml
@@ -0,0 +1,26 @@
+assembly_id: idCulLati1_ear
+reference_hap1: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/primary.fa
+reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/hap2.fa
+mapped_bam: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/mapped_bam.bam
+longread:
+  type: hifi
+  dir: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati1/pacbio/fasta/
+curationpretext:
+  aligner: minimap2
+  telomere_motif: TTAGG
+  hic_dir: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati2/hic-arima2/
+merquryfk:
+  fastk_hist: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati1/pacbio/kmer/k31/idCulLati1.k31.hist
+  fastk_ktab: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati1/pacbio/kmer/k31/
+btk:
+  nt_database: /data/blastdb/Supported/NT/current
+  nt_database_prefix: nt
+  diamond_uniprot_database_path: /lustre/scratch123/tol/resources/uniprot_reference_proteomes/latest/reference_proteomes.dmnd
+  diamond_nr_database_path: /lustre/scratch123/tol/resources/nr/latest/nr.dmnd
+  ncbi_taxonomy_path: /lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump/
+  ncbi_rankedlineage_path: /lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump/rankedlineage.dmp
+  btk_yaml: /nfs/users/nfs_d/dp24/sanger-tol-ear/assets/btk_draft.yaml
+  taxid: 1464561
+  gca_accession: GCA_0001
+  lineages: "insecta_odb10"
+  config: /nfs/treeoflife-01/teams/tola/users/dp24/ear/conf/sanger-tol-btk.config
diff --git a/assets/real_pdf.yaml b/assets/real_pdf.yaml
new file mode 100644
index 0000000..8f8d4a0
--- /dev/null
+++ b/assets/real_pdf.yaml
@@ -0,0 +1,45 @@
+# SAMPLE INFORMATION
+ToLID: idCulLati1
+Species: Culex laticinctus
+Sex: XX
+Submitter: Michael Paulini
+Affiliation: WSI
+Tags: ERGA-BGE
+
+# SEQUENCING DATA
+DATA:
+  - PacBio HiFi: 51x
+  - Arima v2: 152x
+
+# GENOME PROFILING DATA
+PROFILING:
+  GenomeScope:
+    version: 2.0
+    results_folder: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/genomic_data/idCulLati1/pacbio/kmer/k31/
+
+# ASSEMBLY DATA
+ASSEMBLIES:
+  Pre-curation:
+    pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e,  yahs_v1.2a.2|]
+    pri:
+      gfastats--nstar-report_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.fa.gz.gfastats
+      busco_short_summary_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.insecta_odb10.busco/short_summary.specific.insecta_odb10.out_scaffolds_final.insecta_odb10.busco.txt
+      merqury_folder: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.ccs.merquryk/
+
+  Curated:
+    pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e,  yahs_v1.2a.2|, TreeVal_v1.1]
+    pri:
+      gfastats--nstar-report_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/assembly/curated/idCulLati1.1/ear/idCulLati1.1.primary.curated.fa.gfastats
+      busco_short_summary_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/assembly/curated/idCulLati1.1/ear/idCulLati1.1.primary.curated.insecta_odb10.busco/short_summary.specific.insecta_odb10.idCulLati1.1.primary.curated.insecta_odb10.busco.txt
+      merqury_folder: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/assembly/curated/idCulLati1.1/ear/idCulLati1.1.primary.curated.ccs.merquryk/
+      hic_FullMap_png: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/assembly/curated/idCulLati1.1/ear/idCulLati1.1_normal_snapshots/idCulLati1.1_normal_FullMap.png
+      hic_FullMap_link: https://tolqc.cog.sanger.ac.uk/erga-bge/insects/Culex_laticinctus/assembly/curated/idCulLati1.1/ear/idCulLati1.1_normal.pretext
+      blobplot_cont_png: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/assembly/curated/idCulLati1.1/ear/idCulLati1.1_primary_curated_btk_busco.blob.circle.png
+
+# CURATION NOTES
+NOTES:
+  Obs_Haploid_num: 3
+  Obs_Sex: XX
+  Interventions_per_Gb: 430
+  Contamination_notes: "Total length of scaffolds removed: 989,717 (0.1 %)\nScaffolds removed: 1 (0.2 %)\nLargest scaffold removed: (989,717)\nFCS-GX contaminant species (number of scaffolds; total length of scaffolds):\nWolbachia endosymbiont (group B) of Melanostoma mellinum, a-proteobacteria (1; 989,717)"
+  Other_notes: "Chromosomes named by size"
diff --git a/assets/template_pdf.yaml b/assets/template_pdf.yaml
new file mode 100644
index 0000000..3779c19
--- /dev/null
+++ b/assets/template_pdf.yaml
@@ -0,0 +1,45 @@
+# SAMPLE INFORMATION
+ToLID: <SAMPLE_ID>
+Species: <LATIN_NAME>
+Sex: <EXPECTED_SEX>
+Submitter: <CURATOR>
+Affiliation: WSI
+Tags: ERGA-BGE
+
+# SEQUENCING DATA
+DATA:
+  - PacBio HiFi: <PACBIO_COVERAGE>
+  - Arima v2: <ARIMA_COVERAGE>
+
+# GENOME PROFILING DATA
+PROFILING:
+  GenomeScope:
+    version: 2.0
+    results_folder: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/genomic_data/idCulLati1/pacbio/kmer/k31/
+
+# ASSEMBLY DATA
+ASSEMBLIES:
+  Pre-curation:
+    pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e,  yahs_v1.2a.2|]
+    pri:
+      gfastats--nstar-report_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.fa.gz.gfastats
+      busco_short_summary_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.insecta_odb10.busco/short_summary.specific.insecta_odb10.out_scaffolds_final.insecta_odb10.busco.txt
+      merqury_folder: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.ccs.merquryk/
+
+  Curated:
+    pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e,  yahs_v1.2a.2|, TreeVal_v1.1]
+    pri:
+      gfastats--nstar-report_txt: idCulLati1.1.primary.curated.fa.gfastats
+      busco_short_summary_txt: short_summary.specific.insecta_odb10.idCulLati1.1.primary.curated.insecta_odb10.busco.txt
+      merqury_folder: <POST_CURATION_MERQURY_FOLDER>
+      hic_FullMap_png: <CURATION_PRETEXT_PRETEXT_MAP_PNG>
+      hic_FullMap_link: https://tolqc.cog.sanger.ac.uk/erga-bge/insects/Culex_laticinctus/assembly/curated/idCulLati1.1/ear/idCulLati1.1_normal.pretext
+      blobplot_cont_png: idCulLati1.1_primary_curated_btk_busco.blob.circle.png
+
+# CURATION NOTES
+NOTES:
+  Obs_Haploid_num: <OBSERVED_HAPLOID_CHROMOSOME_COUNT>
+  Obs_Sex: <OBSERVED_SEX>
+  Interventions_per_Gb: <MANUAL_INTERVENTIONS_PER_GB>
+  Contamination_notes: "Total length of scaffolds removed: 989,717 (0.1 %)\nScaffolds removed: 1 (0.2 %)\nLargest scaffold removed: (989,717)\nFCS-GX contaminant species (number of scaffolds; total length of scaffolds):\nWolbachia endosymbiont (group B) of Melanostoma mellinum, a-proteobacteria (1; 989,717)"
+  Other_notes: "Chromosomes named by size"
diff --git a/assets/test.yaml b/assets/test.yaml
index d4da164..6a5299a 100755
--- a/assets/test.yaml
+++ b/assets/test.yaml
@@ -4,6 +4,7 @@ reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/as
 longread:
   type: hifi
   dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/
+mapped_bam: idCulLati1/mapped_bam.bam
 curationpretext:
   aligner: minimap2
   telomere_motif: TTAGG
diff --git a/conf/base.config b/conf/base.config
index 4136c84..e609a9e 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -19,6 +19,10 @@ process {
     maxRetries    = 1
     maxErrors     = '-1'
 
+    withName: "SANGER_TOL_CPRETEXT|SANGER_TOL_BTK" {
+        time    = { check_max( 70.h  * task.attempt, 'time'   ) }
+    }
+
     // Process-specific resource requirements
     // NOTE - Please try and re-use the labels below as much as possible.
     //        These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
diff --git a/conf/modules.config b/conf/modules.config
index a96a69f..d31543e 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -12,11 +12,13 @@
 
 process {
 
-    publishDir = [
-        path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
-        mode: params.publish_dir_mode,
-        saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-    ]
+    withName: "GFASTATS|MERQURYFK_MERQURYFK|SANGER_TOL_BTK|SANGER_TOL_CPRETEXT|CURATION_PRETEXT" {
+        publishDir = [
+            path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
 
     withName: GFASTATS {
         ext.args = '--nstar-report'
@@ -31,11 +33,19 @@ process {
     }
 
     withName: SANGER_TOL_BTK {
-        ext.args            = "--blastx_outext 'txt'"
+        ext.args            = ""
         ext.executor        = "bsub -Is -tty -e test.e -o test.log -n 2 -q oversubscribed -M1400 -R'select[mem>1400] rusage[mem=1400] span[hosts=1]'"
         ext.profiles        = "singularity,sanger"
         ext.get_versions    = "lsid | head -n1 | cut -d ',' -f 1"
         ext.version         = "draft_assemblies"
     }
 
+    withName: SANGER_TOL_CPRETEXT {
+        ext.args            = ""
+        ext.executor        = "bsub -Is -tty -e test.e -o test.log -n 2 -q oversubscribed -M1400 -R'select[mem>1400] rusage[mem=1400] span[hosts=1]'"
+        ext.profiles        = "singularity,sanger"
+        ext.get_versions    = "lsid | head -n1 | cut -d ',' -f 1"
+        ext.version         = "1.0.0"
+    }
+
 }
diff --git a/conf/sanger-tol-btk.config b/conf/sanger-tol-btk.config
new file mode 100644
index 0000000..247dbbd
--- /dev/null
+++ b/conf/sanger-tol-btk.config
@@ -0,0 +1,7 @@
+process {
+    withLabel:RUN_BLASTN:BLASTN_TAXON {
+        cpus   = { check_max( 12    * task.attempt, 'cpus'    ) }
+        memory = { check_max( 10.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 16.h  * task.attempt, 'time'    ) }
+    }
+}
\ No newline at end of file
diff --git a/modules.json b/modules.json
index 3b0db89..45499e0 100644
--- a/modules.json
+++ b/modules.json
@@ -8,42 +8,59 @@
                     "busco/busco": {
                         "branch": "master",
                         "git_sha": "17486961b8b1ab1aae258c83a7e947b40d8ab670",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "fastqc": {
                         "branch": "master",
                         "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "gfastats": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "merquryfk/merquryfk": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ],
+                        "patch": "modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff"
                     },
                     "minimap2/align": {
                         "branch": "master",
                         "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "samtools/merge": {
                         "branch": "master",
                         "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "samtools/sort": {
                         "branch": "master",
                         "git_sha": "46eca555142d6e597729fcb682adcc791796f514",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     }
                 }
             },
@@ -52,20 +69,26 @@
                     "utils_nextflow_pipeline": {
                         "branch": "master",
                         "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
-                        "installed_by": ["subworkflows"]
+                        "installed_by": [
+                            "subworkflows"
+                        ]
                     },
                     "utils_nfcore_pipeline": {
                         "branch": "master",
                         "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3",
-                        "installed_by": ["subworkflows"]
+                        "installed_by": [
+                            "subworkflows"
+                        ]
                     },
                     "utils_nfvalidation_plugin": {
                         "branch": "master",
                         "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
-                        "installed_by": ["subworkflows"]
+                        "installed_by": [
+                            "subworkflows"
+                        ]
                     }
                 }
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf
index 27e3ca0..4582179 100644
--- a/modules/local/sanger_tol_btk.nf
+++ b/modules/local/sanger_tol_btk.nf
@@ -9,7 +9,7 @@ process SANGER_TOL_BTK {
     path blastp, stageAs: "blastp.dmnd"
     path blastn
     path blastx
-    path btk_config_file
+    path config_file
     path tax_dump
     path btk_yaml, stageAs: "BTK.yaml"
     val busco_lineages
@@ -19,7 +19,7 @@ process SANGER_TOL_BTK {
     output:
     tuple val(meta), path("${meta.id}_btk_out/blobtoolkit/REFERENCE"),      emit: dataset
     path("${meta.id}_btk_out/blobtoolkit/plots"),                           emit: plots
-    path("${meta.id}_btk_out/blobtoolkit/REFERENCE/summary.json.gz"),     emit: summary_json
+    path("${meta.id}_btk_out/blobtoolkit/REFERENCE/summary.json.gz"),       emit: summary_json
     path("${meta.id}_btk_out/busco"),                                       emit: busco_data
     path("${meta.id}_btk_out/multiqc"),                                     emit: multiqc_report
     path("blobtoolkit_pipeline_info"),                                      emit: pipeline_info
@@ -30,7 +30,7 @@ process SANGER_TOL_BTK {
     def executor            =   task.ext.executor       ?:  ""
     def profiles            =   task.ext.profiles       ?:  ""
     def get_version         =   task.ext.version_data   ?:  "UNKNOWN - SETTING NOT SET"
-    def btk_config          =   btk_config_file         ? "-c $btk_config_file"         : ""
+    def config              =   config_file             ? "-c $config_file"         : ""
     def pipeline_version    =   task.ext.version        ?: "draft_assemblies"
     // YAML used to avoid the use of GCA accession number
     //    https://github.com/sanger-tol/blobtoolkit/issues/77
@@ -58,8 +58,9 @@ process SANGER_TOL_BTK {
         --blastp "\$(realpath blastp.dmnd)" \\
         --blastn "\$(realpath $blastn)" \\
         --blastx "\$(realpath $blastx)" \\
-        $btk_config \\
-        $args'
+        $config \\
+        $args \\
+        -resume'
 
     mv ${meta.id}_btk_out/pipeline_info blobtoolkit_pipeline_info
 
diff --git a/modules/local/sanger_tol_cpretext.nf b/modules/local/sanger_tol_cpretext.nf
new file mode 100644
index 0000000..eec53ae
--- /dev/null
+++ b/modules/local/sanger_tol_cpretext.nf
@@ -0,0 +1,50 @@
+process SANGER_TOL_CPRETEXT {
+    tag "$reference"
+    label 'process_low'
+
+    input:
+    path(reference)
+    path(longread_dir)
+    path(cram_dir)
+    path(config_file)
+
+    output:
+    tuple val(reference), path("*_out/*"),      emit: dataset
+    path "versions.yml",                        emit: versions
+
+    script:
+    def pipeline_name                       =   "sanger-tol/curationpretext" // should be a task.ext.args
+    def (pipeline_prefix,pipeline_suffix)   =   pipeline_name.split('/')
+    def args                                =   task.ext.args               ?:  ""
+    def executor                            =   task.ext.executor           ?:  ""
+    def profiles                            =   task.ext.profiles           ?:  ""
+    def get_version                         =   task.ext.version_data       ?:  "UNKNOWN - SETTING NOT SET"
+    def config                              =   config_file                 ? "-c $config_file"         : ""
+    def pipeline_version                    =   task.ext.version            ?: "draft_assemblies"
+
+    // Seems to be an issue where a nested pipeline can't see the files in the same directory
+    // Running realpath gets around this but the files copied into the folder are
+    // now just wasted space. Should be fixed with using Mahesh's method of nesting but
+    // this is proving a bit complicated with BTK
+
+    // outdir should be an arg
+    """
+    $executor 'nextflow run $pipeline_name \\
+        -r $pipeline_version \\
+        -profile  $profiles \\
+        --input "\$(realpath $reference)" \\
+        --outdir ${reference}_${pipeline_suffix}_out \\
+        --longread "\$(realpath $longread_dir)" \\
+        --cram "\$(realpath $cram_dir)" \\
+        $args \\
+        $config \\
+        -resume'
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        $pipeline_suffix: $pipeline_version
+        Nextflow: \$(nextflow -v | cut -d " " -f3)
+        executor system: $get_version
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/merquryfk/merquryfk/main.nf b/modules/nf-core/merquryfk/merquryfk/main.nf
index ac163da..f0e78cc 100644
--- a/modules/nf-core/merquryfk/merquryfk/main.nf
+++ b/modules/nf-core/merquryfk/merquryfk/main.nf
@@ -39,11 +39,16 @@ process MERQURYFK_MERQURYFK {
     prefix = task.ext.prefix ?: "${meta.id}"
     def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
     def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+
+    // Passing in the link through FASTK works, however passing in through YAML_INPUT results in being unable to find file
+    // seems as though it is because it is in a folder rather directly in the folder merqury is running in.
     """
+    cp ${fastk_ktab}/*ktab . && cp ${fastk_ktab}/.*ktab.* .
+
     MerquryFK \\
         $args \\
         -T$task.cpus \\
-        ${fastk_ktab.find{ it.toString().endsWith(".ktab") }} \\
+        *.ktab \\
         $assembly \\
         $haplotigs \\
         $prefix
diff --git a/subworkflows/local/main_mapping.nf b/subworkflows/local/main_mapping.nf
new file mode 100644
index 0000000..28c100f
--- /dev/null
+++ b/subworkflows/local/main_mapping.nf
@@ -0,0 +1,77 @@
+include { SE_MAPPING        } from './se_mapping'
+include { PE_MAPPING        } from './pe_mapping'
+
+include { SAMTOOLS_SORT     } from '../../modules/nf-core/samtools/sort/main'
+
+
+workflow MAIN_MAPPING {
+
+    take:
+    sample_id               // val(sample_id)
+    platform                // val(data_type)
+    reference_hap1          // tuple val(meta), path(reference)
+    pacbio_tuple            // tuple val(meta), path(longread_path)
+
+    main:
+    ch_align_bam    = Channel.empty()
+    ch_versions     = Channel.empty()
+
+    //
+    // LOGIC: SANGER-TOL/BLOBTOOLKIT expects the pacbio data to be already mapped -> this has been changed but seeing as BTK and genomenote need it then we may as well keep it.
+    //          This is also a requirement for genomenote
+    //
+
+    if ( platform.filter { it == "hifi" } || platform.filter { it == "clr" } || platform.filter { it == "ont" } ) {
+        //
+        // SUBWORKFLOW: SINGLE END MAPPING FOR ALIGNING LONGREAD DATA
+        //
+        SE_MAPPING (
+            reference_hap1,
+            pacbio_tuple,
+            platform
+        )
+        ch_versions = ch_versions.mix(SE_MAPPING.out.versions)
+
+        ch_align_bam
+            .mix( SE_MAPPING.out.mapped_bam )
+            .set { merged_bam }
+    }
+    else if ( platform.filter { it == "illumina" } ) {
+        //
+        // SUBWORKFLOW: PAIRED END MAPPING FOR ALIGNING LONGREAD DATA
+        //
+        PE_MAPPING  (
+            reference_hap1,
+            pacbio_tuple,
+            platform
+        )
+        ch_versions = ch_versions.mix(PE_MAPPING.out.versions)
+
+        ch_align_bam
+            .mix( PE_MAPPING.out.mapped_bam )
+            .set { merged_bam }
+    }
+
+    //
+    // MODULE: SORT MAPPED BAM
+    //
+    SAMTOOLS_SORT (
+        merged_bam,
+        reference_hap1
+    )
+    ch_versions = ch_versions.mix( SAMTOOLS_SORT.out.versions )
+
+    sample_id
+        .combine(merged_bam)
+        .map{ sample_id, pacbio_meta, pacbio_path ->
+            tuple(  [id: sample_id],
+                    pacbio_path
+            )
+        }
+        .set { mapped_bam }
+
+    emit:
+    mapped_bam                        // channel: tuple val(meta), path(mapped_bam)
+    versions       = ch_versions      // channel: [ path(versions.yml) ]
+
+}
\ No newline at end of file
diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf
index 687c5db..6561d27 100644
--- a/subworkflows/local/yaml_input.nf
+++ b/subworkflows/local/yaml_input.nf
@@ -4,7 +4,7 @@ import groovy.yaml.YamlSlurper
 
 workflow YAML_INPUT {
     take:
-    input_file          // params.input
+    input_file                  // params.input
 
     main:
     ch_versions                 = Channel.empty()
@@ -58,44 +58,62 @@ workflow YAML_INPUT {
         }
         .set {cpretext_hic_dir}
 
+
+    if (params.mapped) {
+        bam_path = Channel.of(inputs.mapped_bam)
+
+        sample_id
+            .combine(bam_path)
+            .map{ sample, dir ->
+                tuple([id: sample],
+                    dir
+                )
+            }
+            .set {mapped_bam}
+    } else {
+        mapped_bam = [[],[]]
+    }
+
     emit:
     //
     // LOGIC: Building generic channels
     //
     sample_id
-    longread_type                                               // val(data)
-    longread_dir            = inputs.longread.dir               // DataVariable
-    pacbio_tuple                                                // tuple (meta), path(file)
-    reference_hap1          = reference_hap1                    // tuple (meta), path(file)
-    reference_hap2          = reference_2                       // DataVariable
-    reference_path          = inputs.reference_hap1             // DataVariable
+    longread_type                                                   // val(data)
+    longread_dir                = inputs.longread.dir               // DataVariable
+    pacbio_tuple                                                    // tuple (meta), path(file)
+    reference_hap1                                                  // tuple (meta), path(file)
+    reference_hap2              = reference_2                       // DataVariable
+    reference_path              = inputs.reference_hap1             // DataVariable
+    mapped_bam
 
     //
     // LOGIC: Building CurationPretext specific channels
     //
     cpretext_aligner
     cpretext_telomere_motif
-    cpretext_hic_dir_raw    = inputs.curationpretext.hic_dir    // DataVariable
+    cpretext_hic_dir_raw        = inputs.curationpretext.hic_dir    // DataVariable
 
     //
     // LOGIC: MERQURY CHANNELS
     //
-    fastk_hist              = Channel.of(inputs.merquryfk.fastk_hist)
-    fastk_ktab              = Channel.of(inputs.merquryfk.fastk_ktab)
+    fastk_hist                  = Channel.fromPath(inputs.merquryfk.fastk_hist)
+    fastk_ktab                  = Channel.fromPath(inputs.merquryfk.fastk_ktab, hidden: true)
 
     //
     // LOGIC: Building BlobToolKit specific channels
     //
-    btk_nt_database         = Channel.of(inputs.btk.nt_database)
-    btk_nt_database_prefix  = Channel.of(inputs.btk.nt_database_prefix)
-    btk_nt_diamond_database = Channel.of(inputs.btk.diamond_nr_database_path)
-    btk_un_diamond_database = Channel.of(inputs.btk.diamond_uniprot_database_path)
-    btk_ncbi_taxonomy_path  = Channel.of(inputs.btk.ncbi_taxonomy_path)
-    btk_ncbi_lineage_path   = Channel.of(inputs.btk.ncbi_rankedlineage_path)
-    btk_yaml                = Channel.of(inputs.btk.btk_yaml)
-    btk_taxid               = Channel.of(inputs.btk.taxid)
-    btk_gca_accession       = Channel.of(inputs.btk.gca_accession)
-    busco_lineages          = Channel.of(inputs.btk.lineages)
-
-    versions                = ch_versions.ifEmpty(null)
+    btk_nt_database             = Channel.of(inputs.btk.nt_database)
+    btk_nt_database_prefix      = Channel.of(inputs.btk.nt_database_prefix)
+    btk_nt_diamond_database     = Channel.of(inputs.btk.diamond_nr_database_path)
+    btk_un_diamond_database     = Channel.of(inputs.btk.diamond_uniprot_database_path)
+    btk_ncbi_taxonomy_path      = Channel.of(inputs.btk.ncbi_taxonomy_path)
+    btk_ncbi_lineage_path       = Channel.of(inputs.btk.ncbi_rankedlineage_path)
+    btk_yaml                    = Channel.of(inputs.btk.btk_yaml)
+    btk_taxid                   = Channel.of(inputs.btk.taxid)
+    btk_gca_accession           = Channel.of(inputs.btk.gca_accession)
+    busco_lineages              = Channel.of(inputs.btk.lineages)
+    btk_config                  = Channel.fromPath(inputs.btk.config)
+
+    versions                    = ch_versions.ifEmpty(null)
 }
diff --git a/workflows/ear.nf b/workflows/ear.nf
index 9f90920..9062d84 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -4,16 +4,16 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { NEXTFLOW_RUN as CURATIONPRETEXT   } from '../modules/local/nextflow/run'
-include { NEXTFLOW_RUN as BLOBTOOLKIT       } from '../modules/local/nextflow/run'
+// include { NEXTFLOW_RUN as CURATIONPRETEXT   } from '../modules/local/nextflow/run'
+// include { NEXTFLOW_RUN as BLOBTOOLKIT       } from '../modules/local/nextflow/run'
 include { SANGER_TOL_BTK                    } from '../modules/local/sanger_tol_btk'
+include { SANGER_TOL_CPRETEXT               } from '../modules/local/sanger_tol_cpretext'
 
 include { YAML_INPUT                        } from '../subworkflows/local/yaml_input'
 include { GENERATE_SAMPLESHEET              } from '../modules/local/generate_samplesheet'
 include { GFASTATS                          } from '../modules/nf-core/gfastats/main'
-include { PE_MAPPING                        } from '../subworkflows/local/pe_mapping'
-include { SE_MAPPING                        } from '../subworkflows/local/se_mapping'
-include { SAMTOOLS_SORT                     } from '../modules/nf-core/samtools/sort/main'
+include { MAIN_MAPPING                      } from '../subworkflows/local/main_mapping'
+include { MERQURYFK_MERQURYFK               } from '../modules/nf-core/merquryfk/merquryfk/main'
 
 include { paramsSummaryMap                  } from 'plugin/nf-validation'
 include { paramsSummaryMultiqc              } from '../subworkflows/nf-core/utils_nfcore_pipeline'
@@ -32,7 +32,7 @@ workflow EAR {
     ch_input
 
     main:
-
+    params.mapped   = false
     ch_versions     = Channel.empty()
     ch_align_bam    = Channel.empty()
 
@@ -45,32 +45,39 @@ workflow EAR {
     // MODULE: Run Sanger-ToL/CurationPretext
     //         - This was built using: https://github.com/mahesh-panchal/nf-cascade
     //
-    reference = YAML_INPUT.out.reference_path.get()
-    hic_dir = YAML_INPUT.out.cpretext_hic_dir_raw.get()
-    longread_dir = YAML_INPUT.out.longread_dir.get()
-
-    CURATIONPRETEXT(
-        "sanger-tol/curationpretext",
-        [
-            "-r 1.0.0",
-            "--input",
-            reference,
-            "--longread",
-            longread_dir,
-            "--cram",
-            hic_dir,
-            "-profile singularity,sanger"
-        ].join(" ").trim(), // workflow opts
-        Channel.value([]),  //readWithDefault( params.demo.params_file, Channel.value([]) ), // params file
-        Channel.value([]),  // samplesheet - not used by this pipeline
-        Channel.value([])   //readWithDefault( params.demo.add_config, Channel.value([]) ),  // custom config
-        //"$params.outdir/curationpretext",
+    reference       = YAML_INPUT.out.reference_path.get()
+    hic_dir         = YAML_INPUT.out.cpretext_hic_dir_raw.get()
+    longread_dir    = YAML_INPUT.out.longread_dir.get()
+
+    // CURATIONPRETEXT(
+    //     "sanger-tol/curationpretext",
+    //     [
+    //         "-r 1.0.0",
+    //         "--input",
+    //         reference,
+    //         "--longread",
+    //         longread_dir,
+    //         "--cram",
+    //         hic_dir,
+    //         "-profile singularity,sanger"
+    //     ].join(" ").trim(), // workflow opts
+    //     Channel.value([]),  //readWithDefault( params.demo.params_file, Channel.value([]) ), // params file
+    //     Channel.value([]),  // samplesheet - not used by this pipeline
+    //     Channel.value([])   //readWithDefault( params.demo.add_config, Channel.value([]) ),  // custom config
+    // )
+
+    SANGER_TOL_CPRETEXT(
+        reference,
+        longread_dir,
+        hic_dir,
+        []
     )
+    ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions )
+
 
     //
     // MODULE: ASSEMBLY STATISTICS FOR THE FASTA
     //
-
     GFASTATS(
         YAML_INPUT.out.reference_hap1,
         "fasta",
@@ -81,127 +88,70 @@ workflow EAR {
         [],
         []
     )
+    ch_versions = ch_versions.mix( GFASTATS.out.versions )
+
 
     //
     // LOGIC:  REFORMAT A BUNCH OF CHANNELS FOR MERQUERYFK
     //
-
-    if (params.reference_hap2) {
-        YAML_INPUT.out.reference_hap1
-            .combine(YAML_INPUT.out.reference_hap2)
-            .combine(YAML_INPUT.out.fastk_hist)
-            .combine(YAML_INPUT.out.fastk_ktab)
-            .map{ meta, primary, haplotigs, fastk_hist, fastk_ktab ->
-                tuple(  meta,
-                        fastk_hist,
-                        fastk_ktab,
-                        primary,
-                        haplotigs
-                )
-            }
-            .set { merquryfk_input }
-
-        //
-        // MODULE: MERQURYFK PLOTS OF GENOME
-        //
-
-        MERQURYFK(
-            merquryfk_input
-        )
-    }
-
+    YAML_INPUT.out.reference_hap1
+        .combine(YAML_INPUT.out.reference_hap2)
+        .combine(YAML_INPUT.out.fastk_hist)
+        .combine(YAML_INPUT.out.fastk_ktab)
+        .map{ meta, primary, haplotigs, fastk_hist, fastk_ktab ->
+            tuple(  meta,
+                    fastk_hist,
+                    fastk_ktab,
+                    primary,
+                    haplotigs
+            )
+        }
+        .set { merquryfk_input }
     //
-    // LOGIC: SANGER-TOL/BLOBTOOLKIT expects the pacbio data to be already mapped -> this has been changed but seeing as BTK and genomenote need it then we may as well keep it.
-    //          This is also a requirement for genomenote
+    // MODULE: MERQURYFK PLOTS OF GENOME
     //
-    platform = YAML_INPUT.out.longread_type
-
-    YAML_INPUT.out.sample_id
-        .combine(YAML_INPUT.out.longread_dir)
-        .map{ sample, dir ->
-            tuple([id: sample], dir )
-        }
-        .set {pacbio_tuple}
+    MERQURYFK_MERQURYFK(
+        merquryfk_input
+    )
+    ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions )
 
-    if ( platform.filter { it == "hifi" } || platform.filter { it == "clr" } || platform.filter { it == "ont" } ) {
-        //
-        // SUBWORKFLOW: SINGLE END MAPPING FOR ALIGNING LONGREAD DATA
-        //
-        SE_MAPPING (
-            YAML_INPUT.out.reference_hap1,
-            YAML_INPUT.out.pacbio_tuple,
-            platform
-        )
-        ch_versions = ch_versions.mix(SE_MAPPING.out.versions)
 
-        ch_align_bam
-            .mix( SE_MAPPING.out.mapped_bam )
-            .set { merged_bam }
-    }
-    else if ( platform.filter { it == "illumina" } ) {
+    ch_mapped_bam = YAML_INPUT.out.mapped_bam
+    if (!params.mapped) {
         //
-        // SUBWORKFLOW: PAIRED END MAPPING FOR ALIGNING LONGREAD DATA
+        // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC
+        //              This allows us to more esily bypass the mapping if we already have a sorted and mapped bam
         //
-        PE_MAPPING  (
+        MAIN_MAPPING (
+            YAML_INPUT.out.sample_id,
+            YAML_INPUT.out.longread_type,
             YAML_INPUT.out.reference_hap1,
             YAML_INPUT.out.pacbio_tuple,
-            platform
         )
-        ch_versions = ch_versions.mix(PE_MAPPING.out.versions)
-
-        ch_align_bam
-            .mix( PE_MAPPING.out.mapped_bam )
-            .set { merged_bam }
+        ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions )
+        ch_mapped_bam = MAIN_MAPPING.out.mapped_bam
     }
 
-    //
-    // MODULE: SORT MAPPED BAM
-    //
-    SAMTOOLS_SORT (
-        merged_bam,
-        YAML_INPUT.out.reference_hap1
-    )
-    ch_versions = ch_versions.mix( SAMTOOLS_SORT.out.versions )
-
     //
     // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline
     //
-    YAML_INPUT.out.sample_id
-        .combine(merged_bam)
-        .map{ sample_id, pacbio_meta, pacbio_path ->
-            tuple(  [id: sample_id],
-                    pacbio_path
-            )
-        }
-        .set { mapped_bam }
-
 
     GENERATE_SAMPLESHEET(
-        mapped_bam
+        ch_mapped_bam
     )
     ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions )
 
     //
     // MODULE: Run Sanger-ToL/BlobToolKit
     //
-    YAML_INPUT.out.reference_hap1.view{ it -> "Reference: $it"}
-    mapped_bam.view{ it -> "samplesheet: $it"}
-    GENERATE_SAMPLESHEET.out.csv.view{ it -> "samplesheetcsv: $it"}
-    YAML_INPUT.out.btk_un_diamond_database.view{ it -> "un diamond: $it"}
-    YAML_INPUT.out.btk_nt_database.view{ it -> "nt diamond: $it"}
-    YAML_INPUT.out.btk_ncbi_taxonomy_path.view{ it -> "Taxdump: $it"}
-    YAML_INPUT.out.btk_yaml.view{ it -> "btk_yaml: $it"}
-    YAML_INPUT.out.busco_lineages.view{ it -> "lineages: $it"}
-    YAML_INPUT.out.btk_taxid.view{ it -> "TAXID: $it"}
-
     SANGER_TOL_BTK (
         YAML_INPUT.out.reference_hap1,
-        mapped_bam,
+        ch_mapped_bam,
         GENERATE_SAMPLESHEET.out.csv,
         YAML_INPUT.out.btk_un_diamond_database,
         YAML_INPUT.out.btk_nt_database,
         YAML_INPUT.out.btk_un_diamond_database,
-        [],
+        YAML_INPUT.out.btk_config,
         YAML_INPUT.out.btk_ncbi_taxonomy_path,
         YAML_INPUT.out.btk_yaml,
         YAML_INPUT.out.busco_lineages,
@@ -225,13 +175,15 @@ workflow EAR {
         workflow, parameters_schema: "nextflow_schema.json")
     ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params))
 
-
-
     emit:
     versions       = ch_versions                 // channel: [ path(versions.yml) ]
 }
 
 
+//
+// MODULE: THERE ARE TWO DATABASES WHICH ARE FREQUENTLY THE SAME DATABASE
+//          THIS STOPS NAME CONFLICTS BEFORE THEY ARE COPIED TO THE SAME PLACE
+//
 process RenameDatabase {
     tag "Rename DMND Database"
     executor 'local'

From 66a5f0911d92a11f29d541befd8132c5d6301db9 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 19 Aug 2024 11:04:12 +0100
Subject: [PATCH 11/52] adding merqury_fk

---
 .../merquryfk/merquryfk-merquryfk.diff        | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff

diff --git a/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff b/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff
new file mode 100644
index 0000000..751b30b
--- /dev/null
+++ b/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff
@@ -0,0 +1,23 @@
+Changes in module 'nf-core/merquryfk/merquryfk'
+--- modules/nf-core/merquryfk/merquryfk/main.nf
++++ modules/nf-core/merquryfk/merquryfk/main.nf
+@@ -39,11 +39,16 @@
+     prefix = task.ext.prefix ?: "${meta.id}"
+     def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+     def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
++
++    // Passing in the link through FASTK works, however passing in through YAML_INPUT results in being unable to find file
++    // seems as though it is because it is in a folder rather directly in the folder merqury is running in.
+     """
++    cp ${fastk_ktab}/*ktab . && cp ${fastk_ktab}/.*ktab.* .
++
+     MerquryFK \\
+         $args \\
+         -T$task.cpus \\
+-        ${fastk_ktab.find{ it.toString().endsWith(".ktab") }} \\
++        *.ktab \\
+         $assembly \\
+         $haplotigs \\
+         $prefix
+
+************************************************************

From 2fe82afa28e04889e2919b0ab2ba0b0043976edc Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 21 Aug 2024 13:21:40 +0100
Subject: [PATCH 12/52] Updating documentation

---
 CHANGELOG.md | 34 +++++++++++++++++---
 README.md    | 91 +++++++++++++++++++++++++++++++---------------------
 2 files changed, 84 insertions(+), 41 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c3d90bc..3173f7c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,15 +2,39 @@
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+Naming based on: [Mythical creatures](https://en.wikipedia.org/wiki/List_of_legendary_creatures_by_type).
 
-## v1.0dev - [date]
+## v1.0.0 - Aquatic Bahamut [21/08/2024]
 
 Initial release of sanger-tol/ear, created with the [nf-core](https://nf-co.re/) template.
+The current pipeline means the MVP for ear.
 
-### `Added`
+### Added
+GFASTATS to generate statistics on the input primary genome.
+MERQURY_FK to generate kmer graphs and analyses of the primary, haplotype and merged assembly.
+BLOBTOOLKIT to generate busco files and blobtoolkit dataset/plots.
+CURATIONPRETEXT to generate pretext plots and pngs.
 
-### `Fixed`
+### Parameters
 
-### `Dependencies`
+| Old parameter   | New parameter |
+| --------------- | ------------- |
+|                 | --mapped      |
 
-### `Deprecated`
+### Software dependencies
+
+| Dependency  | Old version   | New version   |
+| ----------- | ------------- | ------------- |
+| sanger-tol/blobtoolkit* |               | draft_assemblies        |
+| sanger-tol/curationpretext* |   |  1.0.0 (UNSC Cradle) |
+| GFASTATS |  | 1.3.6--hdcf5f25_3   |
+| MERQUERY_FK  | | 1.2   |
+| MINIMAP2_ALIGN |  | 2.28  |
+| SAMTOOLS_MERGE |  | 1.20--h50ea8bc_0 |
+| SAMTOOLS_SORT  |  | 1.20--h50ea8bc_0 |
+| 
+
+- Note: for pipelines, please check their own CHANGELOG file for a full list of software dependencies.
+
+### Dependencies
+The pipeline depends on a number of databases which are noted in  [README](README.md) and [USAGE](docs/usage.md).
diff --git a/README.md b/README.md
index 506512d..652eba6 100644
--- a/README.md
+++ b/README.md
@@ -10,51 +10,74 @@
 
 ## Introduction
 
-**sanger-tol/ear** is a bioinformatics pipeline that ...
+**sanger-tol/ear** is a bioinformatics pipeline that generates the data files required for the the generation of ERGA Assembly Reports. Sanger-tol/ear nests two other sanger-tol pipelines (blobtoolkit and curationpretext).
 
-<!-- TODO nf-core:
-   Complete this sentence with a 2-3 sentence summary of what types of data the pipeline ingests, a brief overview of the
-   major pipeline sections and the types of output it produces. You're giving an overview to someone new
-   to nf-core here, in 15-20 seconds. For an example, see https://github.com/nf-core/rnaseq/blob/master/README.md#introduction
--->
-
-<!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core
-     workflows use the "tube map" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   -->
-<!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->
-
-1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
-2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
+1. Read the input yaml file (YAML_INPUT)
+2. Run GFASTATS (GFASTARS)
+3. Run MERQURYFK_MERQURYFK (MERQURYFK)
+4. Run MAIN_MAPPING, longread single-end/paired-end mapping 
+5. Run GENERATE_SAMPLESHEET, generate a csv file required for SANGER_TOL_BTK.
+6. Run SANGER_TOL_BTK, also known as SANGER-TOL/BLOBTOOLKIT a subpipline for SANGER-TOL/EAR
+7. Run SANGER_TOL_CPRETEXT, also known as SANGER-TOL/CURATIONPRETEXT a subpipeline for SANGER-TOL/EAR.
 
 ## Usage
 
 > [!NOTE]
 > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.
 
-<!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.
-     Explain what rows and columns represent. For instance (please edit as appropriate):
-
-First, prepare a samplesheet with your input data that looks as follows:
-
-`samplesheet.csv`:
-
-```csv
-sample,fastq_1,fastq_2
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
+The sanger-tol/ear pipeline requires a number of databases in place in order to run the blobtoolkit pipeline.
+These include:
+   - A blast nt database
+   - A Diamond blast uniprot database
+   - A Diamond blast nr database
+   - An NCBI taxdump
+   - An NCBI rankedlineage.dmp
+
+Next, a yaml file containing the following should then be completed:
+
+```yaml
+# General Vales for all subpiplines and modules
+assembly_id: <NAME OF ASSEMBLY>
+reference_hap1: <LOCATION OF PRIMARY ASSEMBLY FILE .FA>
+reference_hap2: <LOCATION OF HAPLOTYPE ASSEBMLY FILE .FA>
+reference_haplotigs: <LOCATION OF THE HAPLOTIGS FILE, REMOVED DURING CURATION .FA>
+
+# If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore it and the pipeline will create it.
+mapped_bam: <MAPPED BAM .BAM>
+
+merquryfk:
+  fastk_hist: <THE PATH TO THE .HIST FILE>
+  fastk_ktab: <PATH TO THE DIRECTORY CONTAINING THE KTAB FILES, ENSURE THE HIDDEN FILES ARE HERE TOO>
+
+# Used by both subpipelines
+longread:
+  type: <hifi|clr|ont|illumina>
+  dir: <DIRECTORY OF LONGREAD FILES .FASTA.GZ>
+curationpretext:
+  aligner: <minimap2|BWAMEM>
+  telomere_motif: <TELOMERE MOTIF OF SAMPLE>
+  hic_dir: <DIRECTORY OF HIC READ FILES .CRAM AND .CRAI>
+btk:
+  taxid: 1464561
+  lineages: <CSV LIST OF DATABASES TO USE: "insecta_odb10,diptera_odb10">
+  gca_accession: GCA_0001 <DEFAULT, DO NOT CHANGE UNLESS YOU HAVE A GCA_ACCESSION FOR YOUR SPECIES>
+  nt_database: <DIRECTORY CONTAINING BLAST DB>
+  nt_database_prefix: <BLASTDB PREFIX>
+  diamond_uniprot_database_path: <PATH TO reference_proteomes.dmnd FROM UNIPROT>
+  diamond_nr_database_path: <PATH TO nr.dmnd>
+  ncbi_taxonomy_path: <DIRECTORY CONTAINING THE TAXDUMP>
+  ncbi_rankedlineage_path: <FOLDER CONTAINING THE rankedlineage.dmp FILE>
+  config: <PATH TO ear/conf/sanger-tol-btk.config TO OVERWRITE PROCESS LIMITS>
 ```
 
-Each row represents a fastq file (single-end) or a pair of fastq files (paired end).
-
--->
 
 Now, you can run the pipeline using:
 
-<!-- TODO nf-core: update the following command to include all required parameters for a minimal example -->
-
 ```bash
-nextflow run sanger-tol/ear \
-   -profile <docker/singularity/.../institute> \
-   --input samplesheet.csv \
-   --outdir <OUTDIR>
+nextflow run sanger-tol/ear -profile <singularity,docker> \\
+   --input assets/idCulLati1.yaml \\
+   --mapped TRUE \\ # OPTIONAL
+   --outdir test-truth
 ```
 
 > [!WARNING]
@@ -65,10 +88,6 @@ nextflow run sanger-tol/ear \
 
 sanger-tol/ear was originally written by DLBPointon.
 
-We thank the following people for their extensive assistance in the development of this pipeline:
-
-<!-- TODO nf-core: If applicable, make list of people who have also contributed -->
-
 ## Contributions and Support
 
 If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).

From 03b074c04b4e8368ed3c935819ce62bc9cb7c66a Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 21 Aug 2024 13:21:49 +0100
Subject: [PATCH 13/52] Updating documentation

---
 docs/output.md |  70 ++++++++++++------
 docs/usage.md  | 192 +++++++++++++++++++++++++++++++++++++------------
 2 files changed, 195 insertions(+), 67 deletions(-)

diff --git a/docs/output.md b/docs/output.md
index 335ec21..f5a9c8b 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -6,54 +6,80 @@ This document describes the output produced by the pipeline. Most of the plots a
 
 The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory.
 
-<!-- TODO nf-core: Write this documentation describing your workflow's output -->
-
 ## Pipeline overview
 
 The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:
 
-- [FastQC](#fastqc) - Raw read QC
-- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
+- [GFASTATS](#gfastats) - Collect statistics on the curated primary assembly
+- [MERQURYFK](#merquryfk) - Generate kmer plots for the curated assembly using previous run information
+- [SANGER_TOL_BTK](#sanger_tol_btk) - Run Blobtoolkit to generate plots and short_summary.txt from BUSCO.
+- [SANGER_TOL_CPRETEXT](#sanger_tol_cpretext) - Run Curationpretext to generate Pretext files and accessory tracks.
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
 
-### FastQC
+### GFASTATS
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `gfastats/`
+  - `*.assembly.summary`: Assembly metrics of the input primary file.
+  - `*_fasta.gz`: GZipped primary assembly file.
+
+</details>
+
+[GFASTATS](https://github.com/vgl-hub/gfastats) is a single fast and exhaustive tool for summary statistics and simultaneous *fa* (fasta, fastq, gfa [.gz]) genome assembly file manipulation.
+
+### MERQURYFK
 
 <details markdown="1">
 <summary>Output files</summary>
 
-- `fastqc/`
-  - `*_fastqc.html`: FastQC report containing quality metrics.
-  - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images.
+- `merquryfk/`
+  - `*.completeness.stats`: 
+  - `*{"primary","haplotype",""}_only.bed`:
+  - `*{"primary","haplotype",""}.qv`:
+  - `*.spectra-asm.{fl,ln,st}.png`:
+  - `*{"primary","haplotype"}.spectra-cn.{fl,ln,st}.png`:
 
 </details>
 
-[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/).
+[MERQURYFK](https://github.com/thegenemyers/MERQURY.FK) is a FastK based version of Merqury.
+
+Merqury is a novel tool for reference-free assembly evaluation based on efficient k-mer set operations. By comparing k-mers in a de novo assembly to those found in unassembled high-accuracy reads, Merqury estimates base-level accuracy and completeness.
+
+
+## SANGER_TOL_BTK
 
-![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png)
+<details markdown="1">
+<summary>Output files</summary>
 
-![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png)
+- `sanger/*_blobtoolkit_out/`
+  - `blobtoolkit/plots/*png`: Blobtoolkit plots 
+  - `blobtoolkit/{ASSEMBLY_NAME}/*.json.gz`: Blobtoolkit dataset for use in BTK_viewer.
+  - `busco/*_odb10/*.{tsv,tar.gz,json,txt}`: Busco output
+  - `muliqc/`: MultiQC plots/data and report.html.
+  - [`pipeline_info`](#pipeline-information)
+
+</details>
 
-![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png)
+[SANGER_TOL_BTK](https://pipelines.tol.sanger.ac.uk/blobtoolkit) is a bioinformatics pipeline that can be used to identify and analyse non-target DNA for eukaryotic genomes.
 
-:::note
-The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality.
-:::
 
-### MultiQC
+## SANGER_TOL_CPRETEXT
 
 <details markdown="1">
 <summary>Output files</summary>
 
-- `multiqc/`
-  - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser.
-  - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline.
-  - `multiqc_plots/`: directory containing static images from the report in various formats.
+- `sanger/*_curationpretext_out/`
+  - `accessory_files/*.{bigWig,bed,bedgraph}`: Track files describing Telomere, gap, coverage data across the genome. 
+  - `pretext_maps_raw`: Pre-accessory file ingestion pretext files.
+  - `pretext_maps_processed`: Post-accessory file ingestion pretext files, e.g. the final output.
+  - [`pipeline_info`](#pipeline-information)
 
 </details>
 
-[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory.
+[SANGER_TOL_CPRETEXT](https://pipelines.tol.sanger.ac.uk/curationpretext) is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://pipelines.tol.sanger.ac.uk/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.
 
-Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see <http://multiqc.info>.
 
 ### Pipeline information
 
diff --git a/docs/usage.md b/docs/usage.md
index 42521d3..b703d3e 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -6,60 +6,179 @@
 
 <!-- TODO nf-core: Add documentation about anything specific to running your pipeline. For general topics, please point to (and add to) the main nf-core website. -->
 
-## Samplesheet input
+## Yaml input
 
-You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below.
+You will need to create a yaml with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location.
 
 ```bash
 --input '[path to samplesheet file]'
 ```
 
-### Multiple runs of the same sample
+The structure of this file should be as follows:
 
-The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes:
+```yaml
+# General Vales for all subpiplines and modules
+assembly_id: <NAME OF ASSEMBLY>
+reference_hap1: <LOCATION OF PRIMARY ASSEMBLY FILE .FA>
+reference_hap2: <LOCATION OF HAPLOTYPE ASSEBMLY FILE .FA>
+reference_haplotigs: <LOCATION OF THE HAPLOTIGS FILE, REMOVED DURING CURATION .FA>
+
+# If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore it and the pipeline will create it.
+mapped_bam: <MAPPED BAM .BAM>
+
+merquryfk:
+  fastk_hist: <THE PATH TO THE .HIST FILE>
+  fastk_ktab: <PATH TO THE DIRECTORY CONTAINING THE KTAB FILES, ENSURE THE HIDDEN FILES ARE HERE TOO>
+
+# Used by both subpipelines
+longread:
+  type: <hifi|clr|ont|illumina>
+  dir: <DIRECTORY OF LONGREAD FILES .FASTA.GZ>
+curationpretext:
+  aligner: <minimap2|BWAMEM>
+  telomere_motif: <TELOMERE MOTIF OF SAMPLE>
+  hic_dir: <DIRECTORY OF HIC READ FILES .CRAM AND .CRAI>
+btk:
+  taxid: 1464561
+  lineages: <CSV LIST OF DATABASES TO USE: "insecta_odb10,diptera_odb10">
+  gca_accession: GCA_0001 <DEFAULT, DO NOT CHANGE UNLESS YOU HAVE A GCA_ACCESSION FOR YOUR SPECIES>
+  nt_database: <DIRECTORY CONTAINING BLAST DB>
+  nt_database_prefix: <BLASTDB PREFIX>
+  diamond_uniprot_database_path: <PATH TO reference_proteomes.dmnd FROM UNIPROT>
+  diamond_nr_database_path: <PATH TO nr.dmnd>
+  ncbi_taxonomy_path: <DIRECTORY CONTAINING THE TAXDUMP>
+  ncbi_rankedlineage_path: <FOLDER CONTAINING THE rankedlineage.dmp FILE>
+  config: <PATH TO ear/conf/sanger-tol-btk.config TO OVERWRITE PROCESS LIMITS>
+```
+
+## Database download and setup (Taken from sanger-tol/blobtoolkit)
+
+The BlobToolKit pipeline can be run in many different ways. The default way requires access to several databases:
+
+1. [NCBI taxdump database](https://www.ncbi.nlm.nih.gov/taxonomy)
+2. [NCBI nucleotide BLAST database](https://blast.ncbi.nlm.nih.gov/doc/blast-help/downloadblastdata.html#databases)
+3. [UniProt reference proteomes database](https://www.uniprot.org)
+4. [BUSCO database](https://busco.ezlab.org)
+
+It is a good idea to put a date suffix for each database location so you know at a glance whether you are using the latest version. We are using the `YYYY_MM` format as we do not expect the databases to be updated more frequently than once a month. However, feel free to use `DATE=YYYY_MM_DD` or a different format if you prefer.
+
+### 1. NCBI taxdump database
 
-```csv title="samplesheet.csv"
-sample,fastq_1,fastq_2
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
-CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz
-CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz
+Create the database directory and move into the directory:
+
+```bash
+DATE=2023_03
+TAXDUMP=/path/to/databases/taxdump_${DATE}
+mkdir -p $TAXDUMP
+cd $TAXDUMP
 ```
 
-### Full samplesheet
+Retrieve and decompress the NCBI taxdump:
 
-The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below.
+```bash
+curl -L ftp://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz | tar xzf -
+```
 
-A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice.
+### 2. NCBI nucleotide BLAST database
 
-```csv title="samplesheet.csv"
-sample,fastq_1,fastq_2
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
-CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz
-CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz
-TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz,
-TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz,
-TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,
-TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,
+Create the database directory and move into the directory:
+
+```bash
+DATE=2023_03
+NT=/path/to/databases/nt_${DATE}
+mkdir -p $NT
+cd $NT
+```
+
+Retrieve the NCBI blast nt database (version 5) files and tar gunzip them. We are using the `&&` syntax to ensure that each command completes without error before the next one is run:
+
+```bash
+wget "ftp://ftp.ncbi.nlm.nih.gov/blast/db/v5/nt.???.tar.gz" -P $NT/ &&
+for file in $NT/*.tar.gz; do
+    tar xf $file -C $NT && rm $file;
+done
+```
+
+### 3. UniProt reference proteomes database
+
+You need [diamond blast](https://github.com/bbuchfink/diamond) installed for this step. The easiest way is probably using [conda](https://anaconda.org/bioconda/diamond). Make sure you have the latest version of Diamond (>2.x.x) otherwise the `--taxonnames` argument may not work.
+
+Create the database directory and move into the directory:
+
+```bash
+DATE=2023_03
+UNIPROT=/path/to/databases/uniprot_${DATE}
+mkdir -p $UNIPROT
+cd $UNIPROT
+```
+
+The UniProt `Refseq_Proteomes_YYYY_MM.tar.gz` file is very large (>160 GB) and will take a long time to download. The command below looks complex because it needs to get around the problem of using wildcards with wget and curl.
+
+```bash
+wget -q -O $UNIPROT/reference_proteomes.tar.gz \
+  ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/$(curl \
+    -vs ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/ 2>&1 | \
+    awk '/tar.gz/ {print $9}')
+tar xf reference_proteomes.tar.gz
+
+# Create a single fasta file with all the fasta files from each subdirectory:
+touch reference_proteomes.fasta.gz
+find . -mindepth 2 | grep "fasta.gz" | grep -v 'DNA' | grep -v 'additional' | xargs cat >> reference_proteomes.fasta.gz
+
+# create the accession-to-taxid map for all reference proteome sequences:
+printf "accession\taccession.version\ttaxid\tgi\n" > reference_proteomes.taxid_map
+zcat */*/*.idmapping.gz | grep "NCBI_TaxID" | awk '{print $1 "\t" $1 "\t" $3 "\t" 0}' >> reference_proteomes.taxid_map
+
+# create the taxon aware diamond blast database
+diamond makedb -p 16 --in reference_proteomes.fasta.gz --taxonmap reference_proteomes.taxid_map --taxonnodes $TAXDUMP/nodes.dmp --taxonnames $TAXDUMP/names.dmp -d reference_proteomes.dmnd
 ```
 
-| Column    | Description                                                                                                                                                                            |
-| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `sample`  | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
-| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                             |
-| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                             |
+### 4. BUSCO databases
+
+Create the database directory and move into the directory:
+
+```bash
+DATE=2023_03
+BUSCO=/path/to/databases/busco_${DATE}
+mkdir -p $BUSCO
+cd $BUSCO
+```
+
+Download BUSCO data and lineages to allow BUSCO to run in offline mode:
+
+```bash
+wget -r -nH https://busco-data.ezlab.org/v5/data/
+# the trailing slash after data is important. Otherwise wget doesn't get the subdirectories
+
+# tar gunzip all folders that have been stored as tar.gz, in the same parent directories as where they were stored:
+find v5/data -name "*.tar.gz" | while read -r TAR; do tar -C `dirname $TAR` -xzf $TAR; done
+```
+
+If you have [GNU parallel](https://www.gnu.org/software/parallel/) installed, you can also use the command below which will run faster as it will run the decompression commands in parallel:
+
+```bash
+find v5/data -name "*.tar.gz" | parallel "cd {//}; tar -xzf {/}"
+```
+
+## Blobtoolkit - YAML File and Nextflow configuration
+
+As in the Snakemake version [a YAML configuration file](https://github.com/blobtoolkit/blobtoolkit/tree/main/src/blobtoolkit-pipeline/src#configuration) is needed to generate metadata summary. This YAML config file can be generated with a genome accession value for released assemblies (for example, GCA_XXXXXXXXX.X) or can be passed for draft assemblies (for example, [GCA_922984935.2.yaml](assets/test/GCA_922984935.2.yaml) using the `--yaml` parameter. Even for draft assemblies, a placeholder value should be passed with the `--accession` parameter.
+
+The data in the YAML is currently ignored in the Nextflow pipeline version. The YAML file is retained only to allow compatibility with the BlobDir dataset generated by the [Snakemake version](https://github.com/blobtoolkit/blobtoolkit/tree/main/src/blobtoolkit-pipeline/src). The taxonomic information in the YAML file can be obtained from [NCBI Taxonomy](https://www.ncbi.nlm.nih.gov/data-hub/taxonomy/).
 
-An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
 
 ## Running the pipeline
 
 The typical command for running the pipeline is as follows:
 
 ```bash
-nextflow run sanger-tol/ear --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker
+nextflow run sanger-tol/ear --input assets/test.yaml --outdir ./results  -profile docker
 ```
 
 This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.
 
+> Please note that conda is not supported for all tools in use for this pipeline, this limits use to docker or singularity
+
 Note that the pipeline will create the following files in your working directory:
 
 ```bash
@@ -77,23 +196,6 @@ Pipeline settings can be provided in a `yaml` or `json` file via `-params-file <
 Do not use `-c <file>` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args).
 :::
 
-The above pipeline run specified with a params file in yaml format:
-
-```bash
-nextflow run sanger-tol/ear -profile docker -params-file params.yaml
-```
-
-with `params.yaml` containing:
-
-```yaml
-input: './samplesheet.csv'
-outdir: './results/'
-genome: 'GRCh37'
-<...>
-```
-
-You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch).
-
 ### Updating the pipeline
 
 When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:

From 8b582b6a9f793621ece916a6c0bac6677f297225 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 21 Aug 2024 13:27:25 +0100
Subject: [PATCH 14/52] Module config update

---
 conf/modules.config | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index d31543e..137b892 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -12,7 +12,7 @@
 
 process {
 
-    withName: "GFASTATS|MERQURYFK_MERQURYFK|SANGER_TOL_BTK|SANGER_TOL_CPRETEXT|CURATION_PRETEXT" {
+    withName: "GFASTATS|MERQURYFK_MERQURYFK|SANGER_TOL_BTK|SANGER_TOL_CPRETEXT" {
         publishDir = [
             path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
             mode: params.publish_dir_mode,
@@ -21,18 +21,19 @@ process {
     }
 
     withName: GFASTATS {
-        ext.args = '--nstar-report'
+        ext.args            = '--nstar-report'
     }
 
     withName: MERQURYFK_MERQURYFK {
-        ext.args        = "-P."
+        ext.args            = "-P."
     }
 
     withName: SAMTOOLS_SORT {
-        ext.prefix      = { "${meta.id}_sorted"}
+        ext.prefix          = { "${meta.id}_sorted"}
     }
 
     withName: SANGER_TOL_BTK {
+        ext.pipeline_name   = "sanger-tol/blobtoolkit"
         ext.args            = ""
         ext.executor        = "bsub -Is -tty -e test.e -o test.log -n 2 -q oversubscribed -M1400 -R'select[mem>1400] rusage[mem=1400] span[hosts=1]'"
         ext.profiles        = "singularity,sanger"
@@ -41,6 +42,7 @@ process {
     }
 
     withName: SANGER_TOL_CPRETEXT {
+        ext.pipeline_name   = "sanger-tol/curationpretext"
         ext.args            = ""
         ext.executor        = "bsub -Is -tty -e test.e -o test.log -n 2 -q oversubscribed -M1400 -R'select[mem>1400] rusage[mem=1400] span[hosts=1]'"
         ext.profiles        = "singularity,sanger"

From e39ca7db7c872eaa61fa4eed4ab2c0195613f871 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 21 Aug 2024 13:28:10 +0100
Subject: [PATCH 15/52] Updating subworkflows

---
 subworkflows/local/yaml_input.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf
index 6561d27..916c003 100644
--- a/subworkflows/local/yaml_input.nf
+++ b/subworkflows/local/yaml_input.nf
@@ -109,7 +109,6 @@ workflow YAML_INPUT {
     btk_un_diamond_database     = Channel.of(inputs.btk.diamond_uniprot_database_path)
     btk_ncbi_taxonomy_path      = Channel.of(inputs.btk.ncbi_taxonomy_path)
     btk_ncbi_lineage_path       = Channel.of(inputs.btk.ncbi_rankedlineage_path)
-    btk_yaml                    = Channel.of(inputs.btk.btk_yaml)
     btk_taxid                   = Channel.of(inputs.btk.taxid)
     btk_gca_accession           = Channel.of(inputs.btk.gca_accession)
     busco_lineages              = Channel.of(inputs.btk.lineages)

From 427c6d6b9785704b68d9ed423e5006be24546e64 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 21 Aug 2024 13:29:05 +0100
Subject: [PATCH 16/52] Updating the main workflow file

---
 workflows/ear.nf | 92 ++++++++++++++++++------------------------------
 1 file changed, 35 insertions(+), 57 deletions(-)

diff --git a/workflows/ear.nf b/workflows/ear.nf
index 9062d84..1c93b33 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -4,17 +4,20 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-// include { NEXTFLOW_RUN as CURATIONPRETEXT   } from '../modules/local/nextflow/run'
-// include { NEXTFLOW_RUN as BLOBTOOLKIT       } from '../modules/local/nextflow/run'
+// Subpipeline imports
 include { SANGER_TOL_BTK                    } from '../modules/local/sanger_tol_btk'
 include { SANGER_TOL_CPRETEXT               } from '../modules/local/sanger_tol_cpretext'
 
+// Subworkflow imports
 include { YAML_INPUT                        } from '../subworkflows/local/yaml_input'
+include { MAIN_MAPPING                      } from '../subworkflows/local/main_mapping'
+
+// Module imports
 include { GENERATE_SAMPLESHEET              } from '../modules/local/generate_samplesheet'
 include { GFASTATS                          } from '../modules/nf-core/gfastats/main'
-include { MAIN_MAPPING                      } from '../subworkflows/local/main_mapping'
 include { MERQURYFK_MERQURYFK               } from '../modules/nf-core/merquryfk/merquryfk/main'
 
+// Plugin imports
 include { paramsSummaryMap                  } from 'plugin/nf-validation'
 include { paramsSummaryMultiqc              } from '../subworkflows/nf-core/utils_nfcore_pipeline'
 include { softwareVersionsToYAML            } from '../subworkflows/nf-core/utils_nfcore_pipeline'
@@ -36,44 +39,13 @@ workflow EAR {
     ch_versions     = Channel.empty()
     ch_align_bam    = Channel.empty()
 
+
     //
     // MODULE: YAML_INPUT
+    //          - YAML_INPUT SHOULD BE REWORKED TO BE SMARTER
     //
     YAML_INPUT(ch_input)
 
-    //
-    // MODULE: Run Sanger-ToL/CurationPretext
-    //         - This was built using: https://github.com/mahesh-panchal/nf-cascade
-    //
-    reference       = YAML_INPUT.out.reference_path.get()
-    hic_dir         = YAML_INPUT.out.cpretext_hic_dir_raw.get()
-    longread_dir    = YAML_INPUT.out.longread_dir.get()
-
-    // CURATIONPRETEXT(
-    //     "sanger-tol/curationpretext",
-    //     [
-    //         "-r 1.0.0",
-    //         "--input",
-    //         reference,
-    //         "--longread",
-    //         longread_dir,
-    //         "--cram",
-    //         hic_dir,
-    //         "-profile singularity,sanger"
-    //     ].join(" ").trim(), // workflow opts
-    //     Channel.value([]),  //readWithDefault( params.demo.params_file, Channel.value([]) ), // params file
-    //     Channel.value([]),  // samplesheet - not used by this pipeline
-    //     Channel.value([])   //readWithDefault( params.demo.add_config, Channel.value([]) ),  // custom config
-    // )
-
-    SANGER_TOL_CPRETEXT(
-        reference,
-        longread_dir,
-        hic_dir,
-        []
-    )
-    ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions )
-
 
     //
     // MODULE: ASSEMBLY STATISTICS FOR THE FASTA
@@ -107,6 +79,8 @@ workflow EAR {
             )
         }
         .set { merquryfk_input }
+
+
     //
     // MODULE: MERQURYFK PLOTS OF GENOME
     //
@@ -116,7 +90,10 @@ workflow EAR {
     ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions )
 
 
-    ch_mapped_bam = YAML_INPUT.out.mapped_bam
+    //
+    // LOGIC: IF A MAPPED BAM FILE EXISTS AND THE FLAG `mapped` IS TRUE
+    //          SKIP THE MAPPING SUBWORKFLOW
+    //
     if (!params.mapped) {
         //
         // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC
@@ -130,17 +107,20 @@ workflow EAR {
         )
         ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions )
         ch_mapped_bam = MAIN_MAPPING.out.mapped_bam
+    } else {
+        ch_mapped_bam = YAML_INPUT.out.mapped_bam
     }
 
+
     //
     // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline
     //
-
     GENERATE_SAMPLESHEET(
         ch_mapped_bam
     )
     ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions )
 
+
     //
     // MODULE: Run Sanger-ToL/BlobToolKit
     //
@@ -153,13 +133,29 @@ workflow EAR {
         YAML_INPUT.out.btk_un_diamond_database,
         YAML_INPUT.out.btk_config,
         YAML_INPUT.out.btk_ncbi_taxonomy_path,
-        YAML_INPUT.out.btk_yaml,
         YAML_INPUT.out.busco_lineages,
         YAML_INPUT.out.btk_taxid,
         'GCA_0001'
     )
     ch_versions              = ch_versions.mix(SANGER_TOL_BTK.out.versions)
 
+
+    //
+    // MODULE: Run Sanger-ToL/CurationPretext
+    //
+    reference       = YAML_INPUT.out.reference_path.get()
+    hic_dir         = YAML_INPUT.out.cpretext_hic_dir_raw.get()
+    longread_dir    = YAML_INPUT.out.longread_dir.get()
+
+    SANGER_TOL_CPRETEXT(
+        reference,
+        longread_dir,
+        hic_dir,
+        []
+    )
+    ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions )
+
+
     //
     // Collate and save software versions
     //
@@ -179,24 +175,6 @@ workflow EAR {
     versions       = ch_versions                 // channel: [ path(versions.yml) ]
 }
 
-
-//
-// MODULE: THERE ARE TWO DATABASES WHICH ARE FREQUENTLY THE SAME DATABASE
-//          THIS STOPS NAME CONFLICTS BEFORE THEY ARE COPIED TO THE SAME PLACE
-//
-process RenameDatabase {
-    tag "Rename DMND Database"
-    executor 'local'
-
-    input:
-    db_path
-
-    output:
-    path "UN.dmnd"
-
-    "true"
-}
-
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     THE END

From b443920ea3a40d652c1eb4de824d1cc3a941cb21 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 21 Aug 2024 13:29:47 +0100
Subject: [PATCH 17/52] Update to sample yaml and files

---
 assets/idCulLati1.yaml                        |  19 +-
 main.nf                                       |   4 +-
 modules.json                                  |  21 -
 modules/local/sanger_tol_btk.nf               |  49 +-
 modules/local/sanger_tol_cpretext.nf          |  36 +-
 modules/nf-core/busco/busco/environment.yml   |   7 -
 modules/nf-core/busco/busco/main.nf           | 107 ---
 modules/nf-core/busco/busco/meta.yml          |  98 ---
 .../nf-core/busco/busco/tests/main.nf.test    | 419 ------------
 .../busco/busco/tests/main.nf.test.snap       | 230 -------
 .../busco/tests/nextflow.augustus.config      |   5 -
 .../nf-core/busco/busco/tests/nextflow.config |   5 -
 .../busco/busco/tests/nextflow.metaeuk.config |   5 -
 .../nf-core/busco/busco/tests/old_test.yml    | 624 ------------------
 modules/nf-core/busco/busco/tests/tags.yml    |   2 -
 modules/nf-core/fastqc/environment.yml        |   7 -
 modules/nf-core/fastqc/main.nf                |  61 --
 modules/nf-core/fastqc/meta.yml               |  57 --
 modules/nf-core/fastqc/tests/main.nf.test     | 212 ------
 .../nf-core/fastqc/tests/main.nf.test.snap    |  88 ---
 modules/nf-core/fastqc/tests/tags.yml         |   2 -
 modules/nf-core/multiqc/environment.yml       |   7 -
 modules/nf-core/multiqc/main.nf               |  55 --
 modules/nf-core/multiqc/meta.yml              |  58 --
 modules/nf-core/multiqc/tests/main.nf.test    |  84 ---
 .../nf-core/multiqc/tests/main.nf.test.snap   |  41 --
 modules/nf-core/multiqc/tests/tags.yml        |   2 -
 nextflow.config                               |   1 +
 nextflow_schema.json                          |   2 +-
 29 files changed, 73 insertions(+), 2235 deletions(-)
 delete mode 100644 modules/nf-core/busco/busco/environment.yml
 delete mode 100644 modules/nf-core/busco/busco/main.nf
 delete mode 100644 modules/nf-core/busco/busco/meta.yml
 delete mode 100644 modules/nf-core/busco/busco/tests/main.nf.test
 delete mode 100644 modules/nf-core/busco/busco/tests/main.nf.test.snap
 delete mode 100644 modules/nf-core/busco/busco/tests/nextflow.augustus.config
 delete mode 100644 modules/nf-core/busco/busco/tests/nextflow.config
 delete mode 100644 modules/nf-core/busco/busco/tests/nextflow.metaeuk.config
 delete mode 100644 modules/nf-core/busco/busco/tests/old_test.yml
 delete mode 100644 modules/nf-core/busco/busco/tests/tags.yml
 delete mode 100644 modules/nf-core/fastqc/environment.yml
 delete mode 100644 modules/nf-core/fastqc/main.nf
 delete mode 100644 modules/nf-core/fastqc/meta.yml
 delete mode 100644 modules/nf-core/fastqc/tests/main.nf.test
 delete mode 100644 modules/nf-core/fastqc/tests/main.nf.test.snap
 delete mode 100644 modules/nf-core/fastqc/tests/tags.yml
 delete mode 100644 modules/nf-core/multiqc/environment.yml
 delete mode 100644 modules/nf-core/multiqc/main.nf
 delete mode 100644 modules/nf-core/multiqc/meta.yml
 delete mode 100644 modules/nf-core/multiqc/tests/main.nf.test
 delete mode 100644 modules/nf-core/multiqc/tests/main.nf.test.snap
 delete mode 100644 modules/nf-core/multiqc/tests/tags.yml

diff --git a/assets/idCulLati1.yaml b/assets/idCulLati1.yaml
index 85479be..ea48cc2 100644
--- a/assets/idCulLati1.yaml
+++ b/assets/idCulLati1.yaml
@@ -1,7 +1,17 @@
+# General Vales for all subpiplines and modules
 assembly_id: idCulLati1_ear
 reference_hap1: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/primary.fa
 reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/hap2.fa
+reference_haplotigs: /
+
+# If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore.
 mapped_bam: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/mapped_bam.bam
+
+merquryfk:
+  fastk_hist: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati1/pacbio/kmer/k31/idCulLati1.k31.hist
+  fastk_ktab: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati1/pacbio/kmer/k31/
+
+# Used by both subpipelines
 longread:
   type: hifi
   dir: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati1/pacbio/fasta/
@@ -9,10 +19,10 @@ curationpretext:
   aligner: minimap2
   telomere_motif: TTAGG
   hic_dir: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati2/hic-arima2/
-merquryfk:
-  fastk_hist: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati1/pacbio/kmer/k31/idCulLati1.k31.hist
-  fastk_ktab: /lustre/scratch122/tol/data/a/5/e/1/6/d/Culex_laticinctus/genomic_data/idCulLati1/pacbio/kmer/k31/
 btk:
+  taxid: 1464561
+  lineages: "insecta_odb10"
+  gca_accession: GCA_0001
   nt_database: /data/blastdb/Supported/NT/current
   nt_database_prefix: nt
   diamond_uniprot_database_path: /lustre/scratch123/tol/resources/uniprot_reference_proteomes/latest/reference_proteomes.dmnd
@@ -20,7 +30,4 @@ btk:
   ncbi_taxonomy_path: /lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump/
   ncbi_rankedlineage_path: /lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump/rankedlineage.dmp
   btk_yaml: /nfs/users/nfs_d/dp24/sanger-tol-ear/assets/btk_draft.yaml
-  taxid: 1464561
-  gca_accession: GCA_0001
-  lineages: "insecta_odb10"
   config: /nfs/treeoflife-01/teams/tola/users/dp24/ear/conf/sanger-tol-btk.config
diff --git a/main.nf b/main.nf
index 3b7bca7..1e4ece0 100644
--- a/main.nf
+++ b/main.nf
@@ -29,7 +29,7 @@ include { EAR  } from './workflows/ear'
 workflow SANGERTOL_EAR {
 
     take:
-    samplesheet // channel: samplesheet read in from --input
+    input_yaml // channel: input_yaml read in from --input
 
     main:
 
@@ -37,7 +37,7 @@ workflow SANGERTOL_EAR {
     // WORKFLOW: Run pipeline
     //
     EAR (
-        samplesheet
+        input_yaml
     )
 
 
diff --git a/modules.json b/modules.json
index 45499e0..23ee7d4 100644
--- a/modules.json
+++ b/modules.json
@@ -5,20 +5,6 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
-                    "busco/busco": {
-                        "branch": "master",
-                        "git_sha": "17486961b8b1ab1aae258c83a7e947b40d8ab670",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
-                    "fastqc": {
-                        "branch": "master",
-                        "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
                     "gfastats": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
@@ -41,13 +27,6 @@
                             "modules"
                         ]
                     },
-                    "multiqc": {
-                        "branch": "master",
-                        "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
-                        "installed_by": [
-                            "modules"
-                        ]
-                    },
                     "samtools/merge": {
                         "branch": "master",
                         "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf
index 4582179..6ce9191 100644
--- a/modules/local/sanger_tol_btk.nf
+++ b/modules/local/sanger_tol_btk.nf
@@ -7,46 +7,49 @@ process SANGER_TOL_BTK {
     tuple val(meta1), path(bam) // Name needs to remain the same as previous process as they are referenced in the samplesheet
     tuple val(meta2), path(samplesheet_csv, stageAs: "SAMPLESHEET.csv")
     path blastp, stageAs: "blastp.dmnd"
-    path blastn
+    path blastn, stageAs: ""
     path blastx
     path config_file
     path tax_dump
-    path btk_yaml, stageAs: "BTK.yaml"
     val busco_lineages
     val taxon
     val gca_accession
 
     output:
-    tuple val(meta), path("${meta.id}_btk_out/blobtoolkit/REFERENCE"),      emit: dataset
-    path("${meta.id}_btk_out/blobtoolkit/plots"),                           emit: plots
-    path("${meta.id}_btk_out/blobtoolkit/REFERENCE/summary.json.gz"),       emit: summary_json
-    path("${meta.id}_btk_out/busco"),                                       emit: busco_data
-    path("${meta.id}_btk_out/multiqc"),                                     emit: multiqc_report
-    path("blobtoolkit_pipeline_info"),                                      emit: pipeline_info
-    path "versions.yml",                                                    emit: versions
+    tuple val(meta), path("*_out/blobtoolkit/REFERENCE"),      emit: dataset
+    path("*_out/blobtoolkit/plots"),                           emit: plots
+    path("*_out/blobtoolkit/REFERENCE/summary.json.gz"),       emit: summary_json
+    path("*_out/busco"),                                       emit: busco_data
+    path("*_out/multiqc"),                                     emit: multiqc_report
+    path("*_out/blobtoolkit_pipeline_info"),                   emit: pipeline_info
+    path "versions.yml",                                       emit: versions
 
     script:
-    def args                =   task.ext.args           ?:  ""
-    def executor            =   task.ext.executor       ?:  ""
-    def profiles            =   task.ext.profiles       ?:  ""
-    def get_version         =   task.ext.version_data   ?:  "UNKNOWN - SETTING NOT SET"
-    def config              =   config_file             ? "-c $config_file"         : ""
-    def pipeline_version    =   task.ext.version        ?: "draft_assemblies"
-    // YAML used to avoid the use of GCA accession number
-    //    https://github.com/sanger-tol/blobtoolkit/issues/77
+    def pipeline_name                       =   task.ext.pipeline_name
+    def (pipeline_prefix,pipeline_suffix)   =   pipeline_name.split('/')
+    def output_dir                          =   "${meta.id}_${pipeline_suffix}_out"
+    def args                                =   task.ext.args           ?:  ""
+    def executor                            =   task.ext.executor       ?:  ""
+    def profiles                            =   task.ext.profiles       ?:  ""
+    def get_version                         =   task.ext.version_data   ?:  "UNKNOWN - SETTING NOT SET"
+    def config                              =   config_file             ? "-c $config_file"         : ""
+    def pipeline_version                    =   task.ext.version        ?: "main"
 
     // Seems to be an issue where a nested pipeline can't see the files in the same directory
     // Running realpath gets around this but the files copied into the folder are
     // now just wasted space. Should be fixed with using Mahesh's method of nesting but
     // this is proving a bit complicated with BTK
 
-    // outdir should be an arg
-
     // blastx and blastp can use the same database hence the StageAs
 
+    // Running these as unique jobs means we don't have to worry about multiple pipeline
+    // head jobs running in the same initial Nextflow head, this balloons memory
+    // for LSF we can use -Is -tty to keep the output of this sub-pipeline in
+    // terminal, keeping the job open until the pipeline completes
 
+    // the printf statement appends the subpipelines versions file to the main versions file
     """
-    $executor 'nextflow run sanger-tol/blobtoolkit \\
+    $executor 'nextflow run $pipeline_name \\
         -r $pipeline_version \\
         -profile  $profiles \\
         --input "\$(realpath $samplesheet_csv)" \\
@@ -62,18 +65,18 @@ process SANGER_TOL_BTK {
         $args \\
         -resume'
 
-    mv ${meta.id}_btk_out/pipeline_info blobtoolkit_pipeline_info
-
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         Blobtoolkit: $pipeline_version
         Nextflow: \$(nextflow -v | cut -d " " -f3)
         executor system: $get_version
     END_VERSIONS
+
+    printf "%s/t" <${output_dir}/pipeline_info/software_version.yml >> versions.yml
     """
 
     stub:
-    def pipeline_version    =   task.ext.version        ?: "draft_assemblies"
+    def pipeline_version    =   task.ext.version        ?: "main"
 
     """
     mkdir -p ${meta.id}_btk_out/blobtoolkit/${meta.id}_out
diff --git a/modules/local/sanger_tol_cpretext.nf b/modules/local/sanger_tol_cpretext.nf
index eec53ae..5b986e1 100644
--- a/modules/local/sanger_tol_cpretext.nf
+++ b/modules/local/sanger_tol_cpretext.nf
@@ -9,36 +9,60 @@ process SANGER_TOL_CPRETEXT {
     path(config_file)
 
     output:
-    tuple val(reference), path("*_out/*"),      emit: dataset
-    path "versions.yml",                        emit: versions
+    tuple val(reference), path("*_out/*"),  emit: dataset
+    path "versions.yml",                    emit: versions
 
     script:
-    def pipeline_name                       =   "sanger-tol/curationpretext" // should be a task.ext.args
+    def pipeline_name                       =   task.ext.pipeline_name
     def (pipeline_prefix,pipeline_suffix)   =   pipeline_name.split('/')
+    def output_dir                          =   "${reference}_${pipeline_suffix}_out"
     def args                                =   task.ext.args               ?:  ""
     def executor                            =   task.ext.executor           ?:  ""
     def profiles                            =   task.ext.profiles           ?:  ""
     def get_version                         =   task.ext.version_data       ?:  "UNKNOWN - SETTING NOT SET"
     def config                              =   config_file                 ? "-c $config_file"         : ""
-    def pipeline_version                    =   task.ext.version            ?: "draft_assemblies"
+    def pipeline_version                    =   task.ext.version            ?: "main"
 
     // Seems to be an issue where a nested pipeline can't see the files in the same directory
     // Running realpath gets around this but the files copied into the folder are
     // now just wasted space. Should be fixed with using Mahesh's method of nesting but
     // this is proving a bit complicated with BTK
 
-    // outdir should be an arg
+    // Running these as unique jobs means we don't have to worry about multiple pipeline
+    // head jobs running in the same initial Nextflow head, this balloons memory
+    // for LSF we can use -Is -tty to keep the output of this sub-pipeline in
+    // terminal, keeping the job open until the pipeline completes
+
+    // the printf statement appends the subpipelines versions file to the main versions file
     """
     $executor 'nextflow run $pipeline_name \\
         -r $pipeline_version \\
         -profile  $profiles \\
         --input "\$(realpath $reference)" \\
-        --outdir ${reference}_${pipeline_suffix}_out \\
+        --outdir $output_dir \\
         --longread "\$(realpath $longread_dir)" \\
         --cram "\$(realpath $cram_dir)" \\
         $args \\
         $config \\
         -resume'
+    
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        $pipeline_suffix: $pipeline_version
+        Nextflow: \$(nextflow -v | cut -d " " -f3)
+        executor system: $get_version
+    END_VERSIONS
+
+    printf "%s/t" <${output_dir}/pipeline_info/software_version.yml >> versions.yml
+    """
+
+    stub:
+    def pipeline_version                    =   task.ext.version        ?: "main"
+    def (pipeline_prefix,pipeline_suffix)   =   pipeline_name.split('/')
+    def output_dir                          =   "${reference}_${pipeline_suffix}_out"
+    """
+    mkdir ${output_dir}
+    touch ${output_dir}/reference.txt
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/nf-core/busco/busco/environment.yml b/modules/nf-core/busco/busco/environment.yml
deleted file mode 100644
index 06a5d93..0000000
--- a/modules/nf-core/busco/busco/environment.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-name: busco_busco
-channels:
-  - conda-forge
-  - bioconda
-  - defaults
-dependencies:
-  - bioconda::busco=5.7.1
diff --git a/modules/nf-core/busco/busco/main.nf b/modules/nf-core/busco/busco/main.nf
deleted file mode 100644
index f7c1a66..0000000
--- a/modules/nf-core/busco/busco/main.nf
+++ /dev/null
@@ -1,107 +0,0 @@
-process BUSCO_BUSCO {
-    tag "$meta.id"
-    label 'process_medium'
-
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/busco:5.7.1--pyhdfd78af_0':
-        'biocontainers/busco:5.7.1--pyhdfd78af_0' }"
-
-    input:
-    tuple val(meta), path(fasta, stageAs:'tmp_input/*')
-    val mode                              // Required:    One of genome, proteins, or transcriptome
-    val lineage                           // Required:    lineage to check against, "auto" enables --auto-lineage instead
-    path busco_lineages_path              // Recommended: path to busco lineages - downloads if not set
-    path config_file                      // Optional:    busco configuration file
-
-    output:
-    tuple val(meta), path("*-busco.batch_summary.txt")                , emit: batch_summary
-    tuple val(meta), path("short_summary.*.txt")                      , emit: short_summaries_txt   , optional: true
-    tuple val(meta), path("short_summary.*.json")                     , emit: short_summaries_json  , optional: true
-    tuple val(meta), path("*-busco/*/run_*/full_table.tsv")           , emit: full_table            , optional: true
-    tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv")   , emit: missing_busco_list    , optional: true
-    tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins  , optional: true
-    tuple val(meta), path("*-busco/*/run_*/busco_sequences")          , emit: seq_dir
-    tuple val(meta), path("*-busco/*/translated_proteins")            , emit: translated_dir        , optional: true
-    tuple val(meta), path("*-busco")                                  , emit: busco_dir
-    path "versions.yml"                                               , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    if ( mode !in [ 'genome', 'proteins', 'transcriptome' ] ) {
-        error "Mode must be one of 'genome', 'proteins', or 'transcriptome'."
-    }
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}-${lineage}"
-    def busco_config = config_file ? "--config $config_file" : ''
-    def busco_lineage = lineage.equals('auto') ? '--auto-lineage' : "--lineage_dataset ${lineage}"
-    def busco_lineage_dir = busco_lineages_path ? "--download_path ${busco_lineages_path}" : ''
-    """
-    # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute)
-    # Check for container variable initialisation script and source it.
-    if [ -f "/usr/local/env-activate.sh" ]; then
-        set +u  # Otherwise, errors out because of various unbound variables
-        . "/usr/local/env-activate.sh"
-        set -u
-    fi
-
-    # If the augustus config directory is not writable, then copy to writeable area
-    if [ ! -w "\${AUGUSTUS_CONFIG_PATH}" ]; then
-        # Create writable tmp directory for augustus
-        AUG_CONF_DIR=\$( mktemp -d -p \$PWD )
-        cp -r \$AUGUSTUS_CONFIG_PATH/* \$AUG_CONF_DIR
-        export AUGUSTUS_CONFIG_PATH=\$AUG_CONF_DIR
-        echo "New AUGUSTUS_CONFIG_PATH=\${AUGUSTUS_CONFIG_PATH}"
-    fi
-
-    # Ensure the input is uncompressed
-    INPUT_SEQS=input_seqs
-    mkdir "\$INPUT_SEQS"
-    cd "\$INPUT_SEQS"
-    for FASTA in ../tmp_input/*; do
-        if [ "\${FASTA##*.}" == 'gz' ]; then
-            gzip -cdf "\$FASTA" > \$( basename "\$FASTA" .gz )
-        else
-            ln -s "\$FASTA" .
-        fi
-    done
-    cd ..
-
-    busco \\
-        --cpu $task.cpus \\
-        --in "\$INPUT_SEQS" \\
-        --out ${prefix}-busco \\
-        --mode $mode \\
-        $busco_lineage \\
-        $busco_lineage_dir \\
-        $busco_config \\
-        $args
-
-    # clean up
-    rm -rf "\$INPUT_SEQS"
-
-    # Move files to avoid staging/publishing issues
-    mv ${prefix}-busco/batch_summary.txt ${prefix}-busco.batch_summary.txt
-    mv ${prefix}-busco/*/short_summary.*.{json,txt} . || echo "Short summaries were not available: No genes were found."
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' )
-    END_VERSIONS
-    """
-
-    stub:
-    def prefix      = task.ext.prefix ?: "${meta.id}-${lineage}"
-    def fasta_name  = files(fasta).first().name - '.gz'
-    """
-    touch ${prefix}-busco.batch_summary.txt
-    mkdir -p ${prefix}-busco/$fasta_name/run_${lineage}/busco_sequences
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' )
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/busco/busco/meta.yml b/modules/nf-core/busco/busco/meta.yml
deleted file mode 100644
index 29745d2..0000000
--- a/modules/nf-core/busco/busco/meta.yml
+++ /dev/null
@@ -1,98 +0,0 @@
-name: busco_busco
-description: Benchmarking Universal Single Copy Orthologs
-keywords:
-  - quality control
-  - genome
-  - transcriptome
-  - proteome
-tools:
-  - busco:
-      description: BUSCO provides measures for quantitative assessment of genome assembly, gene set, and transcriptome completeness based on evolutionarily informed expectations of gene content from near-universal single-copy orthologs selected from OrthoDB.
-      homepage: https://busco.ezlab.org/
-      documentation: https://busco.ezlab.org/busco_userguide.html
-      tool_dev_url: https://gitlab.com/ezlab/busco
-      doi: "10.1007/978-1-4939-9173-0_14"
-      licence: ["MIT"]
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - fasta:
-      type: file
-      description: Nucleic or amino acid sequence file in FASTA format.
-      pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}"
-  - mode:
-      type: string
-      description: The mode to run Busco in. One of genome, proteins, or transcriptome
-      pattern: "{genome,proteins,transcriptome}"
-  - lineage:
-      type: string
-      description: The BUSCO lineage to use, or "auto" to automatically select lineage
-  - busco_lineages_path:
-      type: directory
-      description: Path to local BUSCO lineages directory.
-  - config_file:
-      type: file
-      description: Path to BUSCO config file.
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - batch_summary:
-      type: file
-      description: Summary of all sequence files analyzed
-      pattern: "*-busco.batch_summary.txt"
-  - short_summaries_txt:
-      type: file
-      description: Short Busco summary in plain text format
-      pattern: "short_summary.*.txt"
-  - short_summaries_json:
-      type: file
-      description: Short Busco summary in JSON format
-      pattern: "short_summary.*.json"
-  - busco_dir:
-      type: directory
-      description: BUSCO lineage specific output
-      pattern: "*-busco"
-  - full_table:
-      type: file
-      description: Full BUSCO results table
-      pattern: "full_table.tsv"
-  - missing_busco_list:
-      type: file
-      description: List of missing BUSCOs
-      pattern: "missing_busco_list.tsv"
-  - single_copy_proteins:
-      type: file
-      description: Fasta file of single copy proteins (transcriptome mode)
-      pattern: "single_copy_proteins.faa"
-  - seq_dir:
-      type: directory
-      description: BUSCO sequence directory
-      pattern: "busco_sequences"
-  - translated_dir:
-      type: directory
-      description: Six frame translations of each transcript made by the transcriptome mode
-      pattern: "translated_dir"
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-authors:
-  - "@priyanka-surana"
-  - "@charles-plessy"
-  - "@mahesh-panchal"
-  - "@muffato"
-  - "@jvhagey"
-  - "@gallvp"
-maintainers:
-  - "@priyanka-surana"
-  - "@charles-plessy"
-  - "@mahesh-panchal"
-  - "@muffato"
-  - "@jvhagey"
-  - "@gallvp"
diff --git a/modules/nf-core/busco/busco/tests/main.nf.test b/modules/nf-core/busco/busco/tests/main.nf.test
deleted file mode 100644
index 16b708b..0000000
--- a/modules/nf-core/busco/busco/tests/main.nf.test
+++ /dev/null
@@ -1,419 +0,0 @@
-nextflow_process {
-
-    name "Test Process BUSCO_BUSCO"
-    script "../main.nf"
-    process "BUSCO_BUSCO"
-
-    tag "modules"
-    tag "modules_nfcore"
-    tag "busco"
-    tag "busco/busco"
-
-    test("test_busco_genome_single_fasta") {
-
-        config './nextflow.config'
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test' ], // meta map
-                    file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
-                ]
-                input[1] = 'genome'
-                input[2] = 'bacteria_odb10' // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues
-                input[3] = [] // Download busco lineage
-                input[4] = [] // No config
-                """
-            }
-        }
-
-        then {
-            assert process.success
-
-            with(path(process.out.short_summaries_txt[0][1]).text) {
-                assert contains('BUSCO version')
-                assert contains('The lineage dataset is')
-                assert contains('BUSCO was run in mode')
-                assert contains('Complete BUSCOs')
-                assert contains('Missing BUSCOs')
-                assert contains('Dependencies and versions')
-            }
-
-            with(path(process.out.short_summaries_json[0][1]).text) {
-                assert contains('one_line_summary')
-                assert contains('mode')
-                assert contains('dataset')
-            }
-
-            assert snapshot(
-                    process.out.batch_summary[0][1],
-                    process.out.full_table[0][1],
-                    process.out.missing_busco_list[0][1],
-                    process.out.versions[0]
-                ).match()
-
-            with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) {
-                assert contains('single_copy_busco_sequences.tar.gz')
-                assert contains('multi_copy_busco_sequences.tar.gz')
-                assert contains('fragmented_busco_sequences.tar.gz')
-            }
-
-            with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
-                assert contains('DEBUG:busco.run_BUSCO')
-                assert contains('Results from dataset')
-                assert contains('how to cite BUSCO')
-            }
-
-            assert process.out.single_copy_proteins == []
-            assert process.out.translated_dir == []
-        }
-    }
-
-    test("test_busco_genome_multi_fasta") {
-
-        config './nextflow.config'
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test' ], // meta map
-                    [
-                        file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true),
-                        file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true)
-                    ]
-                ]
-                input[1] = 'genome'
-                input[2] = 'bacteria_odb10'
-                input[3] = []
-                input[4] = []
-                """
-            }
-        }
-
-        then {
-            assert process.success
-
-            with(path(process.out.short_summaries_txt[0][1][0]).text) {
-                assert contains('BUSCO version')
-                assert contains('The lineage dataset is')
-                assert contains('BUSCO was run in mode')
-                assert contains('Complete BUSCOs')
-                assert contains('Missing BUSCOs')
-                assert contains('Dependencies and versions')
-            }
-
-            with(path(process.out.short_summaries_txt[0][1][1]).text) {
-                assert contains('BUSCO version')
-                assert contains('The lineage dataset is')
-                assert contains('BUSCO was run in mode')
-                assert contains('Complete BUSCOs')
-                assert contains('Missing BUSCOs')
-                assert contains('Dependencies and versions')
-            }
-
-            with(path(process.out.short_summaries_json[0][1][0]).text) {
-                assert contains('one_line_summary')
-                assert contains('mode')
-                assert contains('dataset')
-            }
-
-            with(path(process.out.short_summaries_json[0][1][1]).text) {
-                assert contains('one_line_summary')
-                assert contains('mode')
-                assert contains('dataset')
-            }
-
-            assert snapshot(
-                    process.out.batch_summary[0][1],
-                    process.out.full_table[0][1],
-                    process.out.missing_busco_list[0][1],
-                    process.out.versions[0]
-                ).match()
-
-            with(file(process.out.seq_dir[0][1][0]).listFiles().collect { it.name }) {
-                assert contains('single_copy_busco_sequences.tar.gz')
-                assert contains('multi_copy_busco_sequences.tar.gz')
-                assert contains('fragmented_busco_sequences.tar.gz')
-            }
-
-            with(file(process.out.seq_dir[0][1][1]).listFiles().collect { it.name }) {
-                assert contains('single_copy_busco_sequences.tar.gz')
-                assert contains('multi_copy_busco_sequences.tar.gz')
-                assert contains('fragmented_busco_sequences.tar.gz')
-            }
-
-            with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
-                assert contains('DEBUG:busco.run_BUSCO')
-                assert contains('Results from dataset')
-                assert contains('how to cite BUSCO')
-            }
-
-            assert process.out.single_copy_proteins == []
-            assert process.out.translated_dir == []
-        }
-
-    }
-
-    test("test_busco_eukaryote_metaeuk") {
-
-        config './nextflow.metaeuk.config'
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test' ], // meta map
-                    file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
-                ]
-                input[1] = 'genome'
-                input[2] = 'eukaryota_odb10'
-                input[3] = []
-                input[4] = []
-                """
-            }
-        }
-
-        then {
-            assert process.success
-
-            with(path(process.out.short_summaries_txt[0][1]).text) {
-                assert contains('BUSCO version')
-                assert contains('The lineage dataset is')
-                assert contains('BUSCO was run in mode')
-                assert contains('Complete BUSCOs')
-                assert contains('Missing BUSCOs')
-                assert contains('Dependencies and versions')
-            }
-
-            with(path(process.out.short_summaries_json[0][1]).text) {
-                assert contains('one_line_summary')
-                assert contains('mode')
-                assert contains('dataset')
-            }
-
-            assert snapshot(
-                    process.out.batch_summary[0][1],
-                    process.out.full_table[0][1],
-                    process.out.missing_busco_list[0][1],
-                    process.out.versions[0]
-                ).match()
-
-            with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) {
-                assert contains('single_copy_busco_sequences.tar.gz')
-                assert contains('multi_copy_busco_sequences.tar.gz')
-                assert contains('fragmented_busco_sequences.tar.gz')
-            }
-
-            with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
-                assert contains('DEBUG:busco.run_BUSCO')
-                assert contains("'use_augustus', 'False'")
-                assert contains("'use_metaeuk', 'True'") // METAEUK
-                assert contains('Results from dataset')
-                assert contains('how to cite BUSCO')
-
-            }
-
-            assert process.out.single_copy_proteins == []
-            assert process.out.translated_dir == []
-        }
-
-    }
-
-    test("test_busco_eukaryote_augustus") {
-
-        config './nextflow.augustus.config'
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test' ], // meta map
-                    file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
-                ]
-                input[1] = 'genome'
-                input[2] = 'eukaryota_odb10'
-                input[3] = []
-                input[4] = []
-                """
-            }
-        }
-
-        then {
-            assert process.success
-
-            assert snapshot(
-                    process.out.batch_summary[0][1],
-                    process.out.versions[0]
-                ).match()
-
-            with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
-                assert contains('DEBUG:busco.run_BUSCO')
-                assert contains("'use_augustus', 'True'")
-                assert contains("'use_metaeuk', 'False'") // AUGUSTUS
-                assert contains('Augustus did not recognize any genes')
-
-            }
-
-            assert process.out.short_summaries_json == []
-            assert process.out.short_summaries_txt == []
-            assert process.out.missing_busco_list == []
-            assert process.out.full_table == []
-            assert process.out.single_copy_proteins == []
-            assert process.out.translated_dir == []
-        }
-
-    }
-
-    test("test_busco_protein") {
-
-        config './nextflow.config'
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test' ], // meta map
-                    file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true)
-                ]
-                input[1] = 'proteins'
-                input[2] = 'bacteria_odb10'
-                input[3] = []
-                input[4] = []
-                """
-            }
-        }
-
-        then {
-            assert process.success
-
-            with(path(process.out.short_summaries_txt[0][1]).text) {
-                assert contains('BUSCO version')
-                assert contains('The lineage dataset is')
-                assert contains('BUSCO was run in mode')
-                assert contains('Complete BUSCOs')
-                assert contains('Missing BUSCOs')
-                assert contains('Dependencies and versions')
-            }
-
-            with(path(process.out.short_summaries_json[0][1]).text) {
-                assert contains('one_line_summary')
-                assert contains('mode')
-                assert contains('dataset')
-            }
-
-            assert snapshot(
-                    process.out.batch_summary[0][1],
-                    process.out.full_table[0][1],
-                    process.out.missing_busco_list[0][1],
-                    process.out.versions[0]
-                ).match()
-
-            with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) {
-                assert contains('single_copy_busco_sequences.tar.gz')
-                assert contains('multi_copy_busco_sequences.tar.gz')
-                assert contains('fragmented_busco_sequences.tar.gz')
-            }
-
-            with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
-                assert contains('DEBUG:busco.run_BUSCO')
-                assert contains('Results from dataset')
-                assert contains('how to cite BUSCO')
-            }
-
-            assert process.out.single_copy_proteins == []
-            assert process.out.translated_dir == []
-        }
-
-    }
-
-    test("test_busco_transcriptome") {
-
-        config './nextflow.config'
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test' ], // meta map
-                    file( params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true)
-                ]
-                input[1] = 'transcriptome'
-                input[2] = 'bacteria_odb10'
-                input[3] = []
-                input[4] = []
-                """
-            }
-        }
-
-        then {
-            assert process.success
-
-            with(path(process.out.short_summaries_txt[0][1]).text) {
-                assert contains('BUSCO version')
-                assert contains('The lineage dataset is')
-                assert contains('BUSCO was run in mode')
-                assert contains('Complete BUSCOs')
-                assert contains('Missing BUSCOs')
-                assert contains('Dependencies and versions')
-            }
-
-            with(path(process.out.short_summaries_json[0][1]).text) {
-                assert contains('one_line_summary')
-                assert contains('mode')
-                assert contains('dataset')
-            }
-
-            assert snapshot(
-                    process.out.batch_summary[0][1],
-                    process.out.full_table[0][1],
-                    process.out.missing_busco_list[0][1],
-                    process.out.translated_dir[0][1],
-                    process.out.single_copy_proteins[0][1],
-                    process.out.versions[0]
-                ).match()
-
-            with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) {
-                assert contains('single_copy_busco_sequences.tar.gz')
-                assert contains('multi_copy_busco_sequences.tar.gz')
-                assert contains('fragmented_busco_sequences.tar.gz')
-            }
-
-            with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
-                assert contains('DEBUG:busco.run_BUSCO')
-                assert contains('Results from dataset')
-                assert contains('how to cite BUSCO')
-            }
-        }
-
-    }
-
-    test("minimal-stub") {
-
-        options '-stub'
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test' ], // meta map
-                    file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
-                ]
-                input[1] = 'genome'
-                input[2] = 'bacteria_odb10'
-                input[3] = []
-                input[4] = []
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(process.out).match() }
-            )
-        }
-    }
-
-}
diff --git a/modules/nf-core/busco/busco/tests/main.nf.test.snap b/modules/nf-core/busco/busco/tests/main.nf.test.snap
deleted file mode 100644
index 1b6411b..0000000
--- a/modules/nf-core/busco/busco/tests/main.nf.test.snap
+++ /dev/null
@@ -1,230 +0,0 @@
-{
-    "minimal-stub": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "1": [
-                    
-                ],
-                "2": [
-                    
-                ],
-                "3": [
-                    
-                ],
-                "4": [
-                    
-                ],
-                "5": [
-                    
-                ],
-                "6": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            
-                        ]
-                    ]
-                ],
-                "7": [
-                    
-                ],
-                "8": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            [
-                                [
-                                    [
-                                        
-                                    ]
-                                ]
-                            ]
-                        ]
-                    ]
-                ],
-                "9": [
-                    "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
-                ],
-                "batch_summary": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "busco_dir": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            [
-                                [
-                                    [
-                                        
-                                    ]
-                                ]
-                            ]
-                        ]
-                    ]
-                ],
-                "full_table": [
-                    
-                ],
-                "missing_busco_list": [
-                    
-                ],
-                "seq_dir": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            
-                        ]
-                    ]
-                ],
-                "short_summaries_json": [
-                    
-                ],
-                "short_summaries_txt": [
-                    
-                ],
-                "single_copy_proteins": [
-                    
-                ],
-                "translated_dir": [
-                    
-                ],
-                "versions": [
-                    "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-05-03T13:28:04.451297"
-    },
-    "test_busco_eukaryote_augustus": {
-        "content": [
-            "test-eukaryota_odb10-busco.batch_summary.txt:md5,3ea3bdc423a461dae514d816bdc61c89",
-            "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-05-03T13:26:36.974986"
-    },
-    "test_busco_genome_single_fasta": {
-        "content": [
-            "test-bacteria_odb10-busco.batch_summary.txt:md5,21b3fb771cf36be917cc451540d999be",
-            "full_table.tsv:md5,638fe7590f442c57361554dae330eca1",
-            "missing_busco_list.tsv:md5,1530af4fe7673a6d001349537bcd410a",
-            "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-05-03T13:22:45.07816"
-    },
-    "test_busco_genome_multi_fasta": {
-        "content": [
-            "test-bacteria_odb10-busco.batch_summary.txt:md5,fcd3c208913e8abda3d6742c43fec5fa",
-            [
-                "full_table.tsv:md5,c657edcc7d0de0175869717551df6e83",
-                "full_table.tsv:md5,638fe7590f442c57361554dae330eca1"
-            ],
-            [
-                "missing_busco_list.tsv:md5,aceb66e347a353cb7fca8e2a725f9112",
-                "missing_busco_list.tsv:md5,1530af4fe7673a6d001349537bcd410a"
-            ],
-            "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-05-03T13:23:50.255602"
-    },
-    "test_busco_eukaryote_metaeuk": {
-        "content": [
-            "test-eukaryota_odb10-busco.batch_summary.txt:md5,ff6d8277e452a83ce9456bbee666feb6",
-            "full_table.tsv:md5,92b1b1d5cb5ea0e2093d16f00187e8c7",
-            "missing_busco_list.tsv:md5,0352e563de290bf804c708323c35a9e3",
-            "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-05-03T13:25:38.159041"
-    },
-    "test_busco_transcriptome": {
-        "content": [
-            "test-bacteria_odb10-busco.batch_summary.txt:md5,8734b3f379c4c0928e5dd4ea1873dc64",
-            "full_table.tsv:md5,1b2ce808fdafa744c56b5f781551272d",
-            "missing_busco_list.tsv:md5,a6931b6470262b997b8b99ea0f1d14a4",
-            [
-                "1024388at2.faa:md5,797d603d262a6595a112e25b73e878b0",
-                "1054741at2.faa:md5,cd4b928cba6b19b4437746ba507e7195",
-                "1093223at2.faa:md5,df9549708e5ffcfaee6a74dd70a0e5dc",
-                "1151822at2.faa:md5,12726afc1cdc40c13392e1596e93df3a",
-                "143460at2.faa:md5,d887431fd988a5556a523440f02d9594",
-                "1491686at2.faa:md5,d03362d19979b27306c192f1c74a84e5",
-                "1504821at2.faa:md5,4f5f6e5c57bac0092c1d85ded73d7e67",
-                "1574817at2.faa:md5,1153e55998c2929eacad2aed7d08d248",
-                "1592033at2.faa:md5,bb7a59e5f3a57ba12d10dabf4c77ab57",
-                "1623045at2.faa:md5,8fe38155feb1802beb97ef7714837bf5",
-                "1661836at2.faa:md5,6c6d592c2fbb0d7a4e5e1f47a15644f0",
-                "1674344at2.faa:md5,bb41b44e53565a54cadf0b780532fe08",
-                "1698718at2.faa:md5,f233860000028eb00329aa85236c71e5",
-                "1990650at2.faa:md5,34a2d29c5f8b6253159ddb7a43fa1829",
-                "223233at2.faa:md5,dec6705c7846c989296e73942f953cbc",
-                "402899at2.faa:md5,acc0f271f9a586d2ce1ee41669b22999",
-                "505485at2.faa:md5,aa0391f8fa5d9bd19b30d844d5a99845",
-                "665824at2.faa:md5,47f8ad43b6a6078206feb48c2e552793",
-                "776861at2.faa:md5,f8b90c13f7c6be828dea3bb920195e3d",
-                "874197at2.faa:md5,8d22a35a768debe6f376fc695d233a69",
-                "932854at2.faa:md5,2eff2de1ab83b22f3234a529a44e22bb",
-                "95696at2.faa:md5,247bfd1aef432f7b5456307768e9149c"
-            ],
-            "single_copy_proteins.faa:md5,73e2c5d6a9b0f01f2deea3cc5f21b764",
-            "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-05-03T13:27:53.992893"
-    },
-    "test_busco_protein": {
-        "content": [
-            "test-bacteria_odb10-busco.batch_summary.txt:md5,f5a782378f9f94a748aa907381fdef91",
-            "full_table.tsv:md5,812ab6a0496fccab774643cf40c4f2a8",
-            "missing_busco_list.tsv:md5,aceb66e347a353cb7fca8e2a725f9112",
-            "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-05-03T13:27:12.724862"
-    }
-}
\ No newline at end of file
diff --git a/modules/nf-core/busco/busco/tests/nextflow.augustus.config b/modules/nf-core/busco/busco/tests/nextflow.augustus.config
deleted file mode 100644
index 84daa69..0000000
--- a/modules/nf-core/busco/busco/tests/nextflow.augustus.config
+++ /dev/null
@@ -1,5 +0,0 @@
-process {
-    withName: 'BUSCO_BUSCO' {
-        ext.args = '--tar --augustus'
-    }
-}
diff --git a/modules/nf-core/busco/busco/tests/nextflow.config b/modules/nf-core/busco/busco/tests/nextflow.config
deleted file mode 100644
index 1ec3fec..0000000
--- a/modules/nf-core/busco/busco/tests/nextflow.config
+++ /dev/null
@@ -1,5 +0,0 @@
-process {
-    withName: 'BUSCO_BUSCO' {
-        ext.args = '--tar'
-    }
-}
diff --git a/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config b/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config
deleted file mode 100644
index c141844..0000000
--- a/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config
+++ /dev/null
@@ -1,5 +0,0 @@
-process {
-    withName: 'BUSCO_BUSCO' {
-        ext.args = '--tar --metaeuk'
-    }
-}
diff --git a/modules/nf-core/busco/busco/tests/old_test.yml b/modules/nf-core/busco/busco/tests/old_test.yml
deleted file mode 100644
index 75177f5..0000000
--- a/modules/nf-core/busco/busco/tests/old_test.yml
+++ /dev/null
@@ -1,624 +0,0 @@
-- name: busco test_busco_genome_single_fasta
-  command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_single_fasta -c ./tests/config/nextflow.config
-  tags:
-    - busco
-  files:
-    - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json
-      contains:
-        - "one_line_summary"
-        - "mode"
-        - "dataset"
-    - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt
-      contains:
-        - "BUSCO version"
-        - "The lineage dataset is"
-        - "BUSCO was run in mode"
-        - "Complete BUSCOs"
-        - "Missing BUSCOs"
-        - "Dependencies and versions"
-    - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
-      md5sum: bc2440f8a68d7fbf931ff911c1c3fdfa
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log
-      md5sum: 9caf1a1434414c78562eb0bbb9c0e53f
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log
-      contains:
-        - "# hmmsearch :: search profile(s) against a sequence database"
-        - "# target sequence database:"
-        - "Internal pipeline statistics summary:"
-        - "[ok]"
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log
-      md5sum: 538510cfc7483498210f01e53fe035ad
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log
-      md5sum: 61050b0706addc9498b2088a2d6efa9a
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint
-      contains:
-        - "Tool: prodigal"
-        - "Completed"
-        - "jobs"
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa
-      md5sum: 836e9a80d33d8b89168f07ddc13ee991
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna
-      md5sum: 20eeb75f86842e6e136f02bca8b73a9f
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa
-      md5sum: 836e9a80d33d8b89168f07ddc13ee991
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna
-      md5sum: 20eeb75f86842e6e136f02bca8b73a9f
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log
-      md5sum: 538510cfc7483498210f01e53fe035ad
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log
-      md5sum: 61050b0706addc9498b2088a2d6efa9a
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint
-      contains:
-        - "Tool: bbtools"
-        - "Completed"
-        - "jobs"
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv
-      md5sum: c56edab1dc1522e993c25ae2b730799f
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv
-      md5sum: b533ef30270f27160acce85a22d01bf5
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json
-      contains:
-        - "one_line_summary"
-        - "mode"
-        - "lineage_dataset"
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt
-      contains:
-        - "# BUSCO version is:"
-        - "Results:"
-        - "busco:"
-    - path: output/busco/test-bacteria_odb10-busco/logs/busco.log
-      contains:
-        - "DEBUG:busco.run_BUSCO"
-        - "Results from dataset"
-        - "how to cite BUSCO"
-    - path: output/busco/versions.yml
-
-- name: busco test_busco_genome_multi_fasta
-  command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_multi_fasta -c ./tests/config/nextflow.config
-  tags:
-    - busco
-  files:
-    - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.json
-      contains:
-        - "one_line_summary"
-        - "mode"
-        - "dataset"
-    - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.txt
-      contains:
-        - "BUSCO version"
-        - "The lineage dataset is"
-        - "BUSCO was run in mode"
-        - "Complete BUSCOs"
-        - "Missing BUSCOs"
-        - "Dependencies and versions"
-    - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json
-      contains:
-        - "one_line_summary"
-        - "mode"
-        - "dataset"
-    - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt
-      contains:
-        - "BUSCO version"
-        - "The lineage dataset is"
-        - "BUSCO was run in mode"
-        - "Complete BUSCOs"
-        - "Missing BUSCOs"
-        - "Dependencies and versions"
-    - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
-      md5sum: 8c64c1a28b086ef2ee444f99cbed5f7d
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_err.log
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_out.log
-      md5sum: 8f047bdb33264d22a83920bc2c63f29a
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_err.log
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_out.log
-      contains:
-        - "# hmmsearch :: search profile(s) against a sequence database"
-        - "# target sequence database:"
-        - "Internal pipeline statistics summary:"
-        - "[ok]"
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_err.log
-      md5sum: c1fdc6977332f53dfe7f632733bb4585
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_out.log
-      md5sum: 50752acb1c5a20be886bfdfc06635bcb
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/.checkpoint
-      contains:
-        - "Tool: prodigal"
-        - "Completed"
-        - "jobs"
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.faa
-      md5sum: 8166471fc5f08c82fd5643ab42327f9d
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.fna
-      md5sum: ddc508a18f60e7f3314534df50cdf8ca
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa
-      md5sum: 8166471fc5f08c82fd5643ab42327f9d
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna
-      md5sum: ddc508a18f60e7f3314534df50cdf8ca
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log
-      md5sum: c1fdc6977332f53dfe7f632733bb4585
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log
-      md5sum: 50752acb1c5a20be886bfdfc06635bcb
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.faa
-      md5sum: e56fd59c38248dc21ac94355dca98121
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.fna
-      md5sum: b365f84bf99c68357952e0b98ed7ce42
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_err.log
-      md5sum: e5f14d7925ba14a0f9850542f3739894
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_out.log
-      md5sum: d41971bfc1b621d4ffd2633bc47017ea
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/.bbtools_output/.checkpoint
-      contains:
-        - "Tool: bbtools"
-        - "Completed"
-        - "jobs"
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/full_table.tsv
-      md5sum: c9651b88b10871abc260ee655898e828
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/hmmer_output.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/missing_busco_list.tsv
-      md5sum: 9939309df2da5419de88c32d1435c779
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.json
-      contains:
-        - "one_line_summary"
-        - "mode"
-        - "dataset"
-    - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.txt
-      contains:
-        - "# BUSCO version is:"
-        - "Results:"
-        - "busco:"
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log
-      md5sum: 9caf1a1434414c78562eb0bbb9c0e53f
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log
-      contains:
-        - "# hmmsearch :: search profile(s) against a sequence database"
-        - "# target sequence database:"
-        - "Internal pipeline statistics summary:"
-        - "[ok]"
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log
-      md5sum: 538510cfc7483498210f01e53fe035ad
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log
-      md5sum: 61050b0706addc9498b2088a2d6efa9a
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint
-      contains:
-        - "Tool: prodigal"
-        - "Completed"
-        - "jobs"
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa
-      md5sum: 836e9a80d33d8b89168f07ddc13ee991
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna
-      md5sum: 20eeb75f86842e6e136f02bca8b73a9f
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa
-      md5sum: 836e9a80d33d8b89168f07ddc13ee991
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna
-      md5sum: 20eeb75f86842e6e136f02bca8b73a9f
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log
-      md5sum: 538510cfc7483498210f01e53fe035ad
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log
-      md5sum: 61050b0706addc9498b2088a2d6efa9a
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint
-      contains:
-        - "Tool: bbtools"
-        - "Completed"
-        - "jobs"
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv
-      md5sum: c56edab1dc1522e993c25ae2b730799f
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv
-      md5sum: b533ef30270f27160acce85a22d01bf5
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json
-      contains:
-        - "one_line_summary"
-        - "mode"
-        - "dataset"
-    - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt
-      contains:
-        - "# BUSCO version is:"
-        - "Results:"
-        - "busco:"
-    - path: output/busco/test-bacteria_odb10-busco/logs/busco.log
-      contains:
-        - "DEBUG:busco.run_BUSCO"
-        - "Results from dataset"
-        - "how to cite BUSCO"
-    - path: output/busco/versions.yml
-
-- name: busco test_busco_eukaryote_metaeuk
-  command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_metaeuk -c ./tests/config/nextflow.config
-  tags:
-    - busco
-  files:
-    - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json
-      contains:
-        - "one_line_summary"
-        - "mode"
-        - "dataset"
-    - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt
-      contains:
-        - "BUSCO version"
-        - "The lineage dataset is"
-        - "BUSCO was run in mode"
-        - "Complete BUSCOs"
-        - "Missing BUSCOs"
-        - "Dependencies and versions"
-    - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt
-      md5sum: ff6d8277e452a83ce9456bbee666feb6
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log
-      md5sum: e63debaa653f18f7405d936050abc093
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint
-      contains:
-        - "Tool: bbtools"
-        - "Completed"
-        - "jobs"
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv
-      md5sum: bd880e90b9e5620a58943a3e0f9ff16b
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint
-      contains:
-        - "Tool: metaeuk"
-        - "Completed"
-        - "jobs"
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa
-      md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv
-      md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json
-      contains:
-        - "one_line_summary"
-        - "mode"
-        - "dataset"
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt
-      contains:
-        - "# BUSCO version is:"
-        - "Results:"
-        - "busco:"
-    - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log
-      contains:
-        - "DEBUG:busco.run_BUSCO"
-        - "Results from dataset"
-        - "how to cite BUSCO"
-    - path: output/busco/versions.yml
-
-- name: busco test_busco_eukaryote_augustus
-  command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_augustus -c ./tests/config/nextflow.config
-  tags:
-    - busco
-  files:
-    - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json
-      contains:
-        - "one_line_summary"
-        - "mode"
-        - "dataset"
-    - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt
-      contains:
-        - "BUSCO version"
-        - "The lineage dataset is"
-        - "BUSCO was run in mode"
-        - "Complete BUSCOs"
-        - "Missing BUSCOs"
-        - "Dependencies and versions"
-    - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt
-      md5sum: ff6d8277e452a83ce9456bbee666feb6
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log
-      md5sum: e63debaa653f18f7405d936050abc093
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log
-      contains:
-        - "metaeuk"
-        - "easy-predict"
-        - "Compute score and coverage"
-        - "Time for processing:"
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log
-      contains:
-        - "metaeuk"
-        - "easy-predict"
-        - "Compute score and coverage"
-        - "Time for processing:"
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint
-      contains:
-        - "Tool: bbtools"
-        - "Completed"
-        - "jobs"
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv
-      md5sum: bd880e90b9e5620a58943a3e0f9ff16b
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint
-      contains:
-        - "Tool: metaeuk"
-        - "Completed"
-        - "jobs"
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa
-      md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv
-      md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json
-      contains:
-        - "one_line_summary"
-        - "mode"
-        - "dataset"
-    - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt
-      contains:
-        - "# BUSCO version is:"
-        - "Results:"
-        - "busco:"
-    - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log
-      contains:
-        - "DEBUG:busco.run_BUSCO"
-        - "Results from dataset"
-        - "how to cite BUSCO"
-    - path: output/busco/versions.yml
-
-- name: busco test_busco_protein
-  command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_protein -c ./tests/config/nextflow.config
-  tags:
-    - busco
-  files:
-    - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.json
-      contains:
-        - "one_line_summary"
-        - "mode"
-        - "dataset"
-    - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.txt
-      contains:
-        - "BUSCO version"
-        - "The lineage dataset is"
-        - "BUSCO was run in mode"
-        - "Complete BUSCOs"
-        - "Missing BUSCOs"
-        - "Dependencies and versions"
-    - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
-      md5sum: 7a65e6cbb6c56a2ea4e739ae0aa3297d
-    - path: output/busco/test-bacteria_odb10-busco/logs/busco.log
-      contains:
-        - "DEBUG:busco.run_BUSCO"
-        - "Results from dataset"
-        - "how to cite BUSCO"
-    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_err.log
-    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_out.log
-      contains:
-        - "# hmmsearch :: search profile(s) against a sequence database"
-        - "# target sequence database:"
-        - "Internal pipeline statistics summary:"
-        - "[ok]"
-    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/full_table.tsv
-      md5sum: 0e34f1011cd83ea1d5d5103ec62b8922
-    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/hmmer_output.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/missing_busco_list.tsv
-      md5sum: 9939309df2da5419de88c32d1435c779
-    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.json
-      contains:
-        - "one_line_summary"
-        - "mode"
-        - "dataset"
-    - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.txt
-      contains:
-        - "# BUSCO version is:"
-        - "Results:"
-        - "busco:"
-    - path: output/busco/versions.yml
-
-- name: busco test_busco_transcriptome
-  command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_transcriptome -c ./tests/config/nextflow.config
-  tags:
-    - busco
-  files:
-    - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.json
-      contains:
-        - "one_line_summary"
-        - "mode"
-        - "dataset"
-    - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt
-      contains:
-        - "BUSCO version"
-        - "The lineage dataset is"
-        - "BUSCO was run in mode"
-        - "Complete BUSCOs"
-        - "Missing BUSCOs"
-        - "Dependencies and versions"
-    - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
-      md5sum: 46118ecf60d1b87d22b96d80f4f03632
-    - path: output/busco/test-bacteria_odb10-busco/logs/busco.log
-      contains:
-        - "DEBUG:busco.run_BUSCO"
-        - "Results from dataset"
-        - "how to cite BUSCO"
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/.checkpoint
-      contains:
-        - "Tool: makeblastdb"
-        - "Completed"
-        - "jobs"
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ndb
-      md5sum: 3788c017fe5e6f0f58224e9cdd21822b
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nhr
-      md5sum: 8ecd2ce392bb5e25ddbe1d85f879582e
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nin
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.njs
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.not
-      md5sum: 0c340e376c7e85d19f82ec1a833e6a6e
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nsq
-      md5sum: 532d5c0a7ea00fe95ca3c97cb3be6198
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ntf
-      md5sum: de1250813f0c7affc6d12dac9d0fb6bb
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nto
-      md5sum: ff74bd41f9cc9b011c63a32c4f7693bf
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_err.log
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_out.log
-      contains:
-        - "# hmmsearch :: search profile(s) against a sequence database"
-        - "# target sequence database:"
-        - "Internal pipeline statistics summary:"
-        - "[ok]"
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_err.log
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_out.log
-      contains:
-        - "Building a new DB"
-        - "Adding sequences from FASTA"
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_err.log
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_out.log
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/.checkpoint
-      contains:
-        - "Tool: tblastn"
-        - "Completed"
-        - "jobs"
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/coordinates.tsv
-      md5sum: cc30eed321944af293452bdbcfc24292
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_101.temp
-      md5sum: 73e9c65fc83fedc58f57f09b08f08238
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_119.temp
-      md5sum: 7fa4cc7955ec0cc36330a221c579b975
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_129.temp
-      md5sum: 6f1601c875d019e3f6f1f98ed8e988d4
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_138.temp
-      md5sum: 3f8e034686cd240c2330650d791bcae2
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_143.temp
-      md5sum: df3dfa8e9ba30ed70cf75b5e7abf2179
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_172.temp
-      md5sum: 7d463e0e6cf7169bc9077d8dc776dda1
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_178.temp
-      md5sum: 2288edf7fa4f88f51b4cf4d94086f77e
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_188.temp
-      md5sum: 029906abbad6d87fc57830dd548cac24
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_195.temp
-      md5sum: 4937f3b348774a31b1160a00297c29cc
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_210.temp
-      md5sum: afcb20ba4c466479d6b91c8c62251e1f
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_232.temp
-      md5sum: 2e1e823ce017345bd998191a39fa9924
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_268.temp
-      md5sum: 08c2d82c34ecffbe1c638b410349412e
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_29.temp
-      md5sum: cd9b63cf93524284781535c888313764
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_44.temp
-      md5sum: d1929b742b24ebe379bf4801ca882dca
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_58.temp
-      md5sum: 69215765b010c05336538cb322c900b3
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_72.temp
-      md5sum: 6feaa1cc3b0899a147ea9d466878f3e3
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_80.temp
-      md5sum: 13625eae14e860a96ce17cd4e37e9d01
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_81.temp
-      md5sum: e14b2484649b0dbc8926815c207b806d
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_93.temp
-      md5sum: 6902c93691df00e690faea914c71839e
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_97.temp
-      md5sum: 0a0d9d38a83acbd5ad43c29cdf429988
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/tblastn.tsv
-      contains:
-        - "TBLASTN"
-        - "BLAST processed"
-        - "queries"
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/full_table.tsv
-      md5sum: 24df25199e13c88bd892fc3e7b541ca0
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/hmmer_output.tar.gz
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/missing_busco_list.tsv
-      md5sum: e7232e2b8cca4fdfdd9e363b39ebbc81
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.json
-      contains:
-        - "one_line_summary"
-        - "mode"
-        - "dataset"
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.txt
-      contains:
-        - "# BUSCO version is:"
-        - "Results:"
-        - "busco:"
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/single_copy_proteins.faa
-      md5sum: e04b9465733577ae6e4bccb7aa01e720
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1024388at2.faa
-      md5sum: 7333c39a20258f20c7019ea0cd83157c
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1054741at2.faa
-      md5sum: ebb481e77a824685fbe04d8a2f3a0d7d
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1093223at2.faa
-      md5sum: 34621c7d499034e8f8e6b92fd4020a93
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1151822at2.faa
-      md5sum: aa89ca381c1c70c9c4e1380351ca7c2a
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/143460at2.faa
-      md5sum: f2e91d78b8dd3722840378789f29e8c8
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1491686at2.faa
-      md5sum: 73c25aef5c9cba7f4151804941b146ea
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1504821at2.faa
-      md5sum: cda556018d1f84ebe517e89f6fc107d0
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1574817at2.faa
-      md5sum: a9096c9fb8b25c78a72871ab0463acdc
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1592033at2.faa
-      md5sum: e463d25ce186c0cebfd749474f3a4c64
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1623045at2.faa
-      md5sum: f2cfd241590c6d8377286d6135480937
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1661836at2.faa
-      md5sum: 586569546fb9861502468e3d9ba2775c
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1674344at2.faa
-      md5sum: 24c658bee14ad84b062d81ad96642eb8
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1698718at2.faa
-      md5sum: 0b8e26ddf5149bbd8805be7af125208d
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1990650at2.faa
-      md5sum: 159320712ee01fb2ccb31a25df44eead
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/223233at2.faa
-      md5sum: 812629c0b06ac3d18661c2ca78de0c08
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/402899at2.faa
-      md5sum: f7ff4e1591342d30b77392a2e84b57d9
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/505485at2.faa
-      md5sum: 7b34a24fc49c540d46fcf96ff5129564
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/665824at2.faa
-      md5sum: 4cff2df64f6bcaff8bc19c234c8bcccd
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/776861at2.faa
-      md5sum: 613af7a3fea30ea2bece66f603b9284a
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/874197at2.faa
-      md5sum: a7cd1b13c9ef91c7ef4e31614166f197
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/932854at2.faa
-      md5sum: fe313ffd5efdb0fed887a04fba352552
-    - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/95696at2.faa
-      md5sum: 4e1f30a2fea4dfbf9bb7fae2700622a0
-    - path: output/busco/versions.yml
diff --git a/modules/nf-core/busco/busco/tests/tags.yml b/modules/nf-core/busco/busco/tests/tags.yml
deleted file mode 100644
index 7c4d283..0000000
--- a/modules/nf-core/busco/busco/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-busco/busco:
-  - "modules/nf-core/busco/busco/**"
diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml
deleted file mode 100644
index 1787b38..0000000
--- a/modules/nf-core/fastqc/environment.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-name: fastqc
-channels:
-  - conda-forge
-  - bioconda
-  - defaults
-dependencies:
-  - bioconda::fastqc=0.12.1
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
deleted file mode 100644
index d79f1c8..0000000
--- a/modules/nf-core/fastqc/main.nf
+++ /dev/null
@@ -1,61 +0,0 @@
-process FASTQC {
-    tag "$meta.id"
-    label 'process_medium'
-
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' :
-        'biocontainers/fastqc:0.12.1--hdfd78af_0' }"
-
-    input:
-    tuple val(meta), path(reads)
-
-    output:
-    tuple val(meta), path("*.html"), emit: html
-    tuple val(meta), path("*.zip") , emit: zip
-    path  "versions.yml"           , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    // Make list of old name and new name pairs to use for renaming in the bash while loop
-    def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] }
-    def rename_to = old_new_pairs*.join(' ').join(' ')
-    def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ')
-
-    def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB')
-    // FastQC memory value allowed range (100 - 10000)
-    def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb)
-
-    """
-    printf "%s %s\\n" $rename_to | while read old_name new_name; do
-        [ -f "\${new_name}" ] || ln -s \$old_name \$new_name
-    done
-
-    fastqc \\
-        $args \\
-        --threads $task.cpus \\
-        --memory $fastqc_memory \\
-        $renamed_files
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
-    END_VERSIONS
-    """
-
-    stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    touch ${prefix}.html
-    touch ${prefix}.zip
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml
deleted file mode 100644
index ee5507e..0000000
--- a/modules/nf-core/fastqc/meta.yml
+++ /dev/null
@@ -1,57 +0,0 @@
-name: fastqc
-description: Run FastQC on sequenced reads
-keywords:
-  - quality control
-  - qc
-  - adapters
-  - fastq
-tools:
-  - fastqc:
-      description: |
-        FastQC gives general quality metrics about your reads.
-        It provides information about the quality score distribution
-        across your reads, the per base sequence content (%A/C/G/T).
-        You get information about adapter contamination and other
-        overrepresented sequences.
-      homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
-      documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
-      licence: ["GPL-2.0-only"]
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - reads:
-      type: file
-      description: |
-        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
-        respectively.
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - html:
-      type: file
-      description: FastQC report
-      pattern: "*_{fastqc.html}"
-  - zip:
-      type: file
-      description: FastQC report archive
-      pattern: "*_{fastqc.zip}"
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-authors:
-  - "@drpatelh"
-  - "@grst"
-  - "@ewels"
-  - "@FelixKrueger"
-maintainers:
-  - "@drpatelh"
-  - "@grst"
-  - "@ewels"
-  - "@FelixKrueger"
diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test
deleted file mode 100644
index 70edae4..0000000
--- a/modules/nf-core/fastqc/tests/main.nf.test
+++ /dev/null
@@ -1,212 +0,0 @@
-nextflow_process {
-
-    name "Test Process FASTQC"
-    script "../main.nf"
-    process "FASTQC"
-
-    tag "modules"
-    tag "modules_nfcore"
-    tag "fastqc"
-
-    test("sarscov2 single-end [fastq]") {
-
-        when {
-            process {
-                """
-                input[0] = Channel.of([
-                    [ id: 'test', single_end:true ],
-                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
-                ])
-                """
-            }
-        }
-
-        then {
-            assertAll (
-            { assert process.success },
-
-            // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it.
-            // looks like this: <div id="header_filename">Mon 2 Oct 2023<br/>test.gz</div>
-            // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039
-
-            { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
-            { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
-            { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
-
-            { assert snapshot(process.out.versions).match("fastqc_versions_single") }
-            )
-        }
-    }
-
-    test("sarscov2 paired-end [fastq]") {
-
-        when {
-            process {
-                """
-                input[0] = Channel.of([
-                    [id: 'test', single_end: false], // meta map
-                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
-                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
-                ])
-                """
-            }
-        }
-
-        then {
-            assertAll (
-            { assert process.success },
-
-            { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
-            { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
-            { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
-            { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
-            { assert path(process.out.html[0][1][0]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
-            { assert path(process.out.html[0][1][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
-
-            { assert snapshot(process.out.versions).match("fastqc_versions_paired") }
-            )
-        }
-    }
-
-    test("sarscov2 interleaved [fastq]") {
-
-        when {
-            process {
-                """
-                input[0] = Channel.of([
-                    [id: 'test', single_end: false], // meta map
-                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true)
-                ])
-            """
-            }
-        }
-
-        then {
-            assertAll (
-            { assert process.success },
-
-            { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
-            { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
-            { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
-
-            { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") }
-            )
-        }
-    }
-
-    test("sarscov2 paired-end [bam]") {
-
-        when {
-            process {
-                """
-                input[0] = Channel.of([
-                    [id: 'test', single_end: false], // meta map
-                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
-                ])
-                """
-            }
-        }
-
-        then {
-            assertAll (
-            { assert process.success },
-
-            { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
-            { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
-            { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
-
-            { assert snapshot(process.out.versions).match("fastqc_versions_bam") }
-            )
-        }
-    }
-
-    test("sarscov2 multiple [fastq]") {
-
-        when {
-            process {
-                """
-                input[0] = Channel.of([
-                    [id: 'test', single_end: false], // meta map
-                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
-                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
-                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true),
-                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ]
-                ])
-                """
-            }
-        }
-
-        then {
-            assertAll (
-            { assert process.success },
-
-            { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
-            { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
-            { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" },
-            { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" },
-            { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
-            { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
-            { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" },
-            { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" },
-            { assert path(process.out.html[0][1][0]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
-            { assert path(process.out.html[0][1][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
-            { assert path(process.out.html[0][1][2]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
-            { assert path(process.out.html[0][1][3]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
-
-            { assert snapshot(process.out.versions).match("fastqc_versions_multiple") }
-            )
-        }
-    }
-
-    test("sarscov2 custom_prefix") {
-
-        when {
-            process {
-                """
-                input[0] = Channel.of([
-                    [ id:'mysample', single_end:true ], // meta map
-                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
-                ])
-                """
-            }
-        }
-
-        then {
-            assertAll (
-            { assert process.success },
-
-            { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" },
-            { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" },
-            { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
-
-            { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") }
-            )
-        }
-    }
-
-    test("sarscov2 single-end [fastq] - stub") {
-
-        options "-stub"
-
-        when {
-            process {
-                """
-                input[0] = Channel.of([
-                    [ id: 'test', single_end:true ],
-                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
-                ])
-                """
-            }
-        }
-
-        then {
-            assertAll (
-            { assert process.success },
-            { assert snapshot(process.out.html.collect { file(it[1]).getName() } +
-                                process.out.zip.collect { file(it[1]).getName() } +
-                                process.out.versions ).match("fastqc_stub") }
-            )
-        }
-    }
-
-}
diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap
deleted file mode 100644
index 86f7c31..0000000
--- a/modules/nf-core/fastqc/tests/main.nf.test.snap
+++ /dev/null
@@ -1,88 +0,0 @@
-{
-    "fastqc_versions_interleaved": {
-        "content": [
-            [
-                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-01-31T17:40:07.293713"
-    },
-    "fastqc_stub": {
-        "content": [
-            [
-                "test.html",
-                "test.zip",
-                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-01-31T17:31:01.425198"
-    },
-    "fastqc_versions_multiple": {
-        "content": [
-            [
-                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-01-31T17:40:55.797907"
-    },
-    "fastqc_versions_bam": {
-        "content": [
-            [
-                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-01-31T17:40:26.795862"
-    },
-    "fastqc_versions_single": {
-        "content": [
-            [
-                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-01-31T17:39:27.043675"
-    },
-    "fastqc_versions_paired": {
-        "content": [
-            [
-                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-01-31T17:39:47.584191"
-    },
-    "fastqc_versions_custom_prefix": {
-        "content": [
-            [
-                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-01-31T17:41:14.576531"
-    }
-}
\ No newline at end of file
diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml
deleted file mode 100644
index 7834294..0000000
--- a/modules/nf-core/fastqc/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-fastqc:
-  - modules/nf-core/fastqc/**
diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml
deleted file mode 100644
index ca39fb6..0000000
--- a/modules/nf-core/multiqc/environment.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-name: multiqc
-channels:
-  - conda-forge
-  - bioconda
-  - defaults
-dependencies:
-  - bioconda::multiqc=1.21
diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
deleted file mode 100644
index 47ac352..0000000
--- a/modules/nf-core/multiqc/main.nf
+++ /dev/null
@@ -1,55 +0,0 @@
-process MULTIQC {
-    label 'process_single'
-
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' :
-        'biocontainers/multiqc:1.21--pyhdfd78af_0' }"
-
-    input:
-    path  multiqc_files, stageAs: "?/*"
-    path(multiqc_config)
-    path(extra_multiqc_config)
-    path(multiqc_logo)
-
-    output:
-    path "*multiqc_report.html", emit: report
-    path "*_data"              , emit: data
-    path "*_plots"             , optional:true, emit: plots
-    path "versions.yml"        , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def config = multiqc_config ? "--config $multiqc_config" : ''
-    def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : ''
-    def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : ''
-    """
-    multiqc \\
-        --force \\
-        $args \\
-        $config \\
-        $extra_config \\
-        $logo \\
-        .
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
-    END_VERSIONS
-    """
-
-    stub:
-    """
-    mkdir multiqc_data
-    touch multiqc_plots
-    touch multiqc_report.html
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml
deleted file mode 100644
index 45a9bc3..0000000
--- a/modules/nf-core/multiqc/meta.yml
+++ /dev/null
@@ -1,58 +0,0 @@
-name: multiqc
-description: Aggregate results from bioinformatics analyses across many samples into a single report
-keywords:
-  - QC
-  - bioinformatics tools
-  - Beautiful stand-alone HTML report
-tools:
-  - multiqc:
-      description: |
-        MultiQC searches a given directory for analysis logs and compiles a HTML report.
-        It's a general use tool, perfect for summarising the output from numerous bioinformatics tools.
-      homepage: https://multiqc.info/
-      documentation: https://multiqc.info/docs/
-      licence: ["GPL-3.0-or-later"]
-input:
-  - multiqc_files:
-      type: file
-      description: |
-        List of reports / files recognised by MultiQC, for example the html and zip output of FastQC
-  - multiqc_config:
-      type: file
-      description: Optional config yml for MultiQC
-      pattern: "*.{yml,yaml}"
-  - extra_multiqc_config:
-      type: file
-      description: Second optional config yml for MultiQC. Will override common sections in multiqc_config.
-      pattern: "*.{yml,yaml}"
-  - multiqc_logo:
-      type: file
-      description: Optional logo file for MultiQC
-      pattern: "*.{png}"
-output:
-  - report:
-      type: file
-      description: MultiQC report file
-      pattern: "multiqc_report.html"
-  - data:
-      type: directory
-      description: MultiQC data dir
-      pattern: "multiqc_data"
-  - plots:
-      type: file
-      description: Plots created by MultiQC
-      pattern: "*_data"
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-authors:
-  - "@abhi18av"
-  - "@bunop"
-  - "@drpatelh"
-  - "@jfy133"
-maintainers:
-  - "@abhi18av"
-  - "@bunop"
-  - "@drpatelh"
-  - "@jfy133"
diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test
deleted file mode 100644
index f1c4242..0000000
--- a/modules/nf-core/multiqc/tests/main.nf.test
+++ /dev/null
@@ -1,84 +0,0 @@
-nextflow_process {
-
-    name "Test Process MULTIQC"
-    script "../main.nf"
-    process "MULTIQC"
-
-    tag "modules"
-    tag "modules_nfcore"
-    tag "multiqc"
-
-    test("sarscov2 single-end [fastqc]") {
-
-        when {
-            process {
-                """
-                input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true))
-                input[1] = []
-                input[2] = []
-                input[3] = []
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert process.out.report[0] ==~ ".*/multiqc_report.html" },
-                { assert process.out.data[0] ==~ ".*/multiqc_data" },
-                { assert snapshot(process.out.versions).match("multiqc_versions_single") }
-            )
-        }
-
-    }
-
-    test("sarscov2 single-end [fastqc] [config]") {
-
-        when {
-            process {
-                """
-                input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true))
-                input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true))
-                input[2] = []
-                input[3] = []
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert process.out.report[0] ==~ ".*/multiqc_report.html" },
-                { assert process.out.data[0] ==~ ".*/multiqc_data" },
-                { assert snapshot(process.out.versions).match("multiqc_versions_config") }
-            )
-        }
-    }
-
-    test("sarscov2 single-end [fastqc] - stub") {
-
-        options "-stub"
-
-        when {
-            process {
-                """
-                input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true))
-                input[1] = []
-                input[2] = []
-                input[3] = []
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(process.out.report.collect { file(it).getName() } +
-                                process.out.data.collect { file(it).getName() } +
-                                process.out.plots.collect { file(it).getName() } +
-                                process.out.versions ).match("multiqc_stub") }
-            )
-        }
-
-    }
-}
diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap
deleted file mode 100644
index bfebd80..0000000
--- a/modules/nf-core/multiqc/tests/main.nf.test.snap
+++ /dev/null
@@ -1,41 +0,0 @@
-{
-    "multiqc_versions_single": {
-        "content": [
-            [
-                "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-02-29T08:48:55.657331"
-    },
-    "multiqc_stub": {
-        "content": [
-            [
-                "multiqc_report.html",
-                "multiqc_data",
-                "multiqc_plots",
-                "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-02-29T08:49:49.071937"
-    },
-    "multiqc_versions_config": {
-        "content": [
-            [
-                "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d"
-            ]
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-02-29T08:49:25.457567"
-    }
-}
\ No newline at end of file
diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml
deleted file mode 100644
index bea6c0d..0000000
--- a/modules/nf-core/multiqc/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-multiqc:
-  - modules/nf-core/multiqc/**
diff --git a/nextflow.config b/nextflow.config
index 3fb2d75..e39cd2f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -12,6 +12,7 @@ params {
     // TODO nf-core: Specify your pipeline's command line flags
     // Input options
     input                        = null
+    mapped                       = false
 
     // Boilerplate options
     outdir                       = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 55b26b3..f198603 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -20,7 +20,7 @@
                     "mimetype": "text/yaml",
                     "pattern": "^\\S+\\.yaml$",
                     "description": "Path to yaml file containing information about the samples in the experiment.",
-                    "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.",
+                    "help_text": "You will need to create a yaml file which contains data on the sample at hand",
                     "fa_icon": "fas fa-file-yaml"
                 },
                 "outdir": {

From c89745e8979026276e52d8e6a6b13c72057dda33 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 21 Aug 2024 14:01:29 +0100
Subject: [PATCH 18/52] testing

---
 .github/workflows/ci.yml | 66 ++++++++++++++++++++++++++++++++++++++--
 assets/test.yaml         | 27 ++++++++--------
 2 files changed, 77 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e2003fe..7c98354 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,6 +10,8 @@ on:
 
 env:
   NXF_ANSI_LOG: false
+  NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity
+  NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity
 
 concurrency:
   group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
@@ -27,6 +29,29 @@ jobs:
           - "23.04.0"
           - "latest-everything"
     steps:
+      - name: Get branch names
+        # Pulls the names of current branches in repo
+        # steps.branch-names.outputs.current_branch is used later and returns the name of the branch the PR is made FROM not to
+        id: branch-names
+        uses: tj-actions/branch-names@v8
+
+      - name: Setup apptainer
+        uses: eWaterCycle/setup-apptainer@main
+
+      - name: Set up Singularity
+        run: |
+          mkdir -p $NXF_SINGULARITY_CACHEDIR
+          mkdir -p $NXF_SINGULARITY_LIBRARYDIR
+
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+  
+      - name: Install nf-core
+        run: |
+          pip install nf-core
+
       - name: Check out pipeline code
         uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
 
@@ -35,8 +60,44 @@ jobs:
         with:
           version: "${{ matrix.NXF_VER }}"
 
-      - name: Disk space cleanup
-        uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+      - name: NF-Core Download - download singularity containers
+        run: |
+          nf-core download sanger-tol/ear --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
+
+      # - name: NF-Core Download - download singularity containers
+      #   run: |
+      #     nf-core download sanger-tol/blobtoolkit --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
+
+      # - name: NF-Core Download - download singularity containers
+      #   run: |
+      #     nf-core download sanger-tol/curationpretext --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
+      - name: Download Tiny test data
+        # Download A fungal test data set that is full enough to show some real output.
+        run: |
+          curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf -
+
+      - name: Download the NCBI taxdump database
+        run: |
+          mkdir ncbi_taxdump
+          curl -L https://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz | tar -C ncbi_taxdump -xzf -
+
+      - name: Download the BUSCO lineage database
+        run: |
+          mkdir busco_database
+          curl -L https://tolit.cog.sanger.ac.uk/test-data/resources/busco/blobtoolkit.GCA_922984935.2.2023-08-03.lineages.tar.gz | tar -C busco_database -xzf -
+
+      - name: Download the subset of NT database
+        run: |
+          mkdir NT_database
+          curl -L https://ftp.ncbi.nlm.nih.gov/blast/db/18S_fungal_sequences.tar.gz | tar -C NT_database -xzf -
+
+      - name: Download the subset of Diamond database
+        run: |
+          mkdir diamond
+          wget -c https://tolit.cog.sanger.ac.uk/test-data/resources/diamond/UP000000212_1234679_tax.dmnd -O diamond/UP000000212_1234679_tax.dmnd
+
+      # - name: Disk space cleanup
+      #   uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
 
       - name: Run pipeline with test data
         # TODO nf-core: You can customise CI pipeline run tests as required
@@ -44,3 +105,4 @@ jobs:
         # Remember that you can parallelise this by using strategy.matrix
         run: |
           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
+          ls ./results/*/*
diff --git a/assets/test.yaml b/assets/test.yaml
index 6a5299a..ba87caf 100755
--- a/assets/test.yaml
+++ b/assets/test.yaml
@@ -1,25 +1,24 @@
-assembly_id: Oscheius_DF5033
-reference_hap1: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa
-reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa
+assembly_id: grTriPseu1
+reference_hap1: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa
+reference_hap2: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa
 longread:
   type: hifi
-  dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/
+  dir: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/pacbio/
 mapped_bam: idCulLati1/mapped_bam.bam
 curationpretext:
   aligner: minimap2
-  telomere_motif: TTAGG
-  hic_dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/full/
+  telomere_motif: TTAGGG
+  hic_dir: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/hic-arima/
 merquryfk:
   fastk_hist: "./"
   fastk_ktab: "./"
 btk:
-  nt_database: /lustre/scratch123/tol/teams/tola/users/ea10/pipeline_testing/20240704_blast_tiny_testdb/blastdb/
-  nt_database_prefix: tiny_plasmodium_blastdb.fa
-  diamond_uniprot_database_path: /lustre/scratch123/tol/teams/tola/users/ea10/pipeline_testing/20240704_diamond_tiny_testdb/ascc_tinytest_diamond_db.dmnd
-  diamond_nr_database_path: /lustre/scratch123/tol/resources/nr/latest/nr.dmnd
-  ncbi_taxonomy_path: /lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump
-  ncbi_rankedlineage_path: /lustre/scratch123/tol/teams/tola/users/ea10/databases/taxdump/rankedlineage.dmp
-  btk_yaml: /nfs/users/nfs_d/dp24/sanger-tol-ear/assets/btk_draft.yaml
+  nt_database: /home/runner/work/ascc/ascc/NT_database/
+  nt_database_prefix: 18S_fungal_sequences
+  diamond_uniprot_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd
+  diamond_nr_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd
+  ncbi_taxonomy_path: /home/runner/work/ascc/ascc/ncbi_taxdump/
+  ncbi_rankedlineage_path: /home/runner/work/ascc/ascc/ncbi_taxdump/rankedlineage.dmp
   taxid: 352914
   gca_accession: GCA_0001
-  lineages: "diptera_odb10,insecta_odb10"
+  lineages: "fungi_odb10"

From fd77e2f7f93429ea6ee6a3fc65ddd75ceeb35d7c Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 21 Aug 2024 14:13:29 +0100
Subject: [PATCH 19/52] Updating the tests

---
 .github/workflows/ci.yml | 14 ++++++++------
 conf/test.config         | 14 +++++---------
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7c98354..a3dbb17 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -60,17 +60,19 @@ jobs:
         with:
           version: "${{ matrix.NXF_VER }}"
 
+      # This will only download the main pipeline containers, subpipelines need their own nf-download
       - name: NF-Core Download - download singularity containers
         run: |
           nf-core download sanger-tol/ear --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
 
-      # - name: NF-Core Download - download singularity containers
-      #   run: |
-      #     nf-core download sanger-tol/blobtoolkit --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
+      - name: NF-Core Download - download singularity containers
+        run: |
+          nf-core download sanger-tol/blobtoolkit --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
+
+      - name: NF-Core Download - download singularity containers
+        run: |
+          nf-core download sanger-tol/curationpretext --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
 
-      # - name: NF-Core Download - download singularity containers
-      #   run: |
-      #     nf-core download sanger-tol/curationpretext --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
       - name: Download Tiny test data
         # Download A fungal test data set that is full enough to show some real output.
         run: |
diff --git a/conf/test.config b/conf/test.config
index 024498b..7313f18 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -15,14 +15,10 @@ params {
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
     // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
-    max_memory = '6.GB'
-    max_time   = '6.h'
-
-    // Input data
-    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input  = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
-
+    max_cpus    = 2
+    max_memory  = '6.GB'
+    max_time    = '6.h'
+    input       = "${projectDir}/assets/test.yaml"
+    outdir      = "results"
     
 }

From f8f9456fb32cecbf5c70f5d7dae82335c7616cf8 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 21 Aug 2024 14:15:55 +0100
Subject: [PATCH 20/52] Updating the tests

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a3dbb17..200afd0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -67,11 +67,11 @@ jobs:
 
       - name: NF-Core Download - download singularity containers
         run: |
-          nf-core download sanger-tol/blobtoolkit --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
+          nf-core download sanger-tol/blobtoolkit --revision draft_assemblies --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
 
       - name: NF-Core Download - download singularity containers
         run: |
-          nf-core download sanger-tol/curationpretext --revision draft_assembly --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
+          nf-core download sanger-tol/curationpretext --revision 1.0.0 --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
 
       - name: Download Tiny test data
         # Download A fungal test data set that is full enough to show some real output.

From 098dae1b158f830708e4d73eace84c791faee836 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 21 Aug 2024 15:08:38 +0100
Subject: [PATCH 21/52] upping version of nextflow, due to errors on Actions

---
 .github/workflows/ci.yml | 2 +-
 nextflow.config          | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 200afd0..abd9874 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -26,7 +26,7 @@ jobs:
     strategy:
       matrix:
         NXF_VER:
-          - "23.04.0"
+          - "24.04.5"
           - "latest-everything"
     steps:
       - name: Get branch names
diff --git a/nextflow.config b/nextflow.config
index e39cd2f..a2b702d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -222,8 +222,8 @@ manifest {
     homePage        = 'https://github.com/sanger-tol/ear'
     description     = """ERGA Assembly Report pipeline"""
     mainScript      = 'main.nf'
-    nextflowVersion = '!>=23.04.0'
-    version         = '1.0dev'
+    nextflowVersion = '!>=24.04.0'
+    version         = '1.0'
     doi             = ''
 }
 

From 44b080927efb4f1eb552023d7ded987ace4708a8 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 21 Aug 2024 16:31:19 +0100
Subject: [PATCH 22/52] Multi-hap support

---
 .github/workflows/ci.yml                      |   2 +-
 assets/idCulLati1.yaml                        |   2 +-
 conf/modules.config                           |   4 +
 modules.json                                  |  39 ++--
 modules/nf-core/cat/cat/environment.yml       |   7 +
 modules/nf-core/cat/cat/main.nf               |  78 +++++++
 modules/nf-core/cat/cat/meta.yml              |  36 ++++
 modules/nf-core/cat/cat/tests/main.nf.test    | 191 ++++++++++++++++++
 .../nf-core/cat/cat/tests/main.nf.test.snap   | 147 ++++++++++++++
 .../cat/tests/nextflow_unzipped_zipped.config |   6 +
 .../cat/tests/nextflow_zipped_unzipped.config |   8 +
 modules/nf-core/cat/cat/tests/tags.yml        |   2 +
 subworkflows/local/yaml_input.nf              |   2 +
 workflows/ear.nf                              |  33 ++-
 14 files changed, 527 insertions(+), 30 deletions(-)
 create mode 100644 modules/nf-core/cat/cat/environment.yml
 create mode 100644 modules/nf-core/cat/cat/main.nf
 create mode 100644 modules/nf-core/cat/cat/meta.yml
 create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test
 create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
 create mode 100644 modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
 create mode 100644 modules/nf-core/cat/cat/tests/tags.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index abd9874..61b0cbf 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -26,7 +26,7 @@ jobs:
     strategy:
       matrix:
         NXF_VER:
-          - "24.04.5"
+          - "24.04.2"
           - "latest-everything"
     steps:
       - name: Get branch names
diff --git a/assets/idCulLati1.yaml b/assets/idCulLati1.yaml
index ea48cc2..404f4a5 100644
--- a/assets/idCulLati1.yaml
+++ b/assets/idCulLati1.yaml
@@ -2,7 +2,7 @@
 assembly_id: idCulLati1_ear
 reference_hap1: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/primary.fa
 reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/hap2.fa
-reference_haplotigs: /
+reference_haplotigs: /nfs/treeoflife-01/teams/tola/users/dp24/ear/haplotigs.fa
 
 # If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore.
 mapped_bam: /nfs/treeoflife-01/teams/tola/users/dp24/ear/idCulLati1/mapped_bam.bam
diff --git a/conf/modules.config b/conf/modules.config
index 137b892..73e83bb 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -20,6 +20,10 @@ process {
         ]
     }
 
+    withName: CAT_CAT {
+        ext.prefix          = 'combined_haplos.fa'
+    }
+
     withName: GFASTATS {
         ext.args            = '--nstar-report'
     }
diff --git a/modules.json b/modules.json
index 23ee7d4..b93de71 100644
--- a/modules.json
+++ b/modules.json
@@ -5,41 +5,36 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "cat/cat": {
+                        "branch": "master",
+                        "git_sha": "5bb8ca085e17549e185e1823495ab8d20727a805",
+                        "installed_by": ["modules"]
+                    },
                     "gfastats": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "merquryfk/merquryfk": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ],
+                        "installed_by": ["modules"],
                         "patch": "modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff"
                     },
                     "minimap2/align": {
                         "branch": "master",
                         "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/merge": {
                         "branch": "master",
                         "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/sort": {
                         "branch": "master",
                         "git_sha": "46eca555142d6e597729fcb682adcc791796f514",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     }
                 }
             },
@@ -48,26 +43,20 @@
                     "utils_nextflow_pipeline": {
                         "branch": "master",
                         "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     },
                     "utils_nfcore_pipeline": {
                         "branch": "master",
                         "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     },
                     "utils_nfvalidation_plugin": {
                         "branch": "master",
                         "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     }
                 }
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml
new file mode 100644
index 0000000..17a04ef
--- /dev/null
+++ b/modules/nf-core/cat/cat/environment.yml
@@ -0,0 +1,7 @@
+name: cat_cat
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - conda-forge::pigz=2.3.4
diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf
new file mode 100644
index 0000000..2862c64
--- /dev/null
+++ b/modules/nf-core/cat/cat/main.nf
@@ -0,0 +1,78 @@
+process CAT_CAT {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/pigz:2.3.4' :
+        'biocontainers/pigz:2.3.4' }"
+
+    input:
+    tuple val(meta), path(files_in)
+
+    output:
+    tuple val(meta), path("${prefix}"), emit: file_out
+    path "versions.yml"               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def file_list = files_in.collect { it.toString() }
+
+    // choose appropriate concatenation tool depending on input and output format
+
+    // | input     | output     | command1 | command2 |
+    // |-----------|------------|----------|----------|
+    // | gzipped   | gzipped    | cat      |          |
+    // | ungzipped | ungzipped  | cat      |          |
+    // | gzipped   | ungzipped  | zcat     |          |
+    // | ungzipped | gzipped    | cat      | pigz     |
+
+    // Use input file ending as default
+    prefix   = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}"
+    out_zip  = prefix.endsWith('.gz')
+    in_zip   = file_list[0].endsWith('.gz')
+    command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
+    command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : ''
+    if(file_list.contains(prefix.trim())) {
+        error "The name of the input file can't be the same as for the output prefix in the " +
+        "module CAT_CAT (currently `$prefix`). Please choose a different one."
+    }
+    """
+    $command1 \\
+        $args \\
+        ${file_list.join(' ')} \\
+        $command2 \\
+        > ${prefix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+    END_VERSIONS
+    """
+
+    stub:
+    def file_list   = files_in.collect { it.toString() }
+    prefix          = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
+    if(file_list.contains(prefix.trim())) {
+        error "The name of the input file can't be the same as for the output prefix in the " +
+        "module CAT_CAT (currently `$prefix`). Please choose a different one."
+    }
+    """
+    touch $prefix
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+    END_VERSIONS
+    """
+}
+
+// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz
+def getFileSuffix(filename) {
+    def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/
+    return match ? match[0][1] : filename.substring(filename.lastIndexOf('.'))
+}
diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml
new file mode 100644
index 0000000..00a8db0
--- /dev/null
+++ b/modules/nf-core/cat/cat/meta.yml
@@ -0,0 +1,36 @@
+name: cat_cat
+description: A module for concatenation of gzipped or uncompressed files
+keywords:
+  - concatenate
+  - gzip
+  - cat
+tools:
+  - cat:
+      description: Just concatenation
+      documentation: https://man7.org/linux/man-pages/man1/cat.1.html
+      licence: ["GPL-3.0-or-later"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - files_in:
+      type: file
+      description: List of compressed / uncompressed files
+      pattern: "*"
+output:
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - file_out:
+      type: file
+      description: Concatenated file. Will be gzipped if file_out ends with ".gz"
+      pattern: "${file_out}"
+authors:
+  - "@erikrikarddaniel"
+  - "@FriederikeHanssen"
+maintainers:
+  - "@erikrikarddaniel"
+  - "@FriederikeHanssen"
diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test
new file mode 100644
index 0000000..9cb1617
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/main.nf.test
@@ -0,0 +1,191 @@
+nextflow_process {
+
+    name "Test Process CAT_CAT"
+    script "../main.nf"
+    process "CAT_CAT"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "cat"
+    tag "cat/cat"
+
+    test("test_cat_name_conflict") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'genome', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert !process.success },
+                { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") },
+                { assert snapshot(process.out.versions).match() }
+            )
+        }
+    }
+
+    test("test_cat_unzipped_unzipped") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+
+    test("test_cat_zipped_zipped") {
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    lines[0..5],
+                    lines.size(),
+                    process.out.versions
+                    ).match()
+                }
+            )
+        }
+    }
+
+    test("test_cat_zipped_unzipped") {
+        config './nextflow_zipped_unzipped.config'
+
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("test_cat_unzipped_zipped") {
+        config './nextflow_unzipped_zipped.config'
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    lines[0..5],
+                    lines.size(),
+                    process.out.versions
+                    ).match()
+                }
+            )
+        }
+    }
+
+    test("test_cat_one_file_unzipped_zipped") {
+        config './nextflow_unzipped_zipped.config'
+        when {
+            params {
+                outdir   = "${outputDir}"
+            }
+            process {
+                """
+                input[0] =
+                    [
+                        [ id:'test', single_end:true ],
+                        [
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                        ]
+                    ]
+                """
+            }
+        }
+        then {
+            def lines = path(process.out.file_out.get(0).get(1)).linesGzip
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    lines[0..5],
+                    lines.size(),
+                    process.out.versions
+                    ).match()
+                }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap
new file mode 100644
index 0000000..b7623ee
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap
@@ -0,0 +1,147 @@
+{
+    "test_cat_unzipped_unzipped": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ],
+                "file_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2023-10-16T14:32:18.500464399"
+    },
+    "test_cat_zipped_unzipped": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ],
+                "file_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2023-10-16T14:32:49.642741302"
+    },
+    "test_cat_zipped_zipped": {
+        "content": [
+            [
+                "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab",
+                "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1",
+                "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1",
+                "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1",
+                "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1",
+                "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1"
+            ],
+            78,
+            [
+                "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:51:46.802978"
+    },
+    "test_cat_name_conflict": {
+        "content": [
+            [
+                
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:51:29.45394"
+    },
+    "test_cat_one_file_unzipped_zipped": {
+        "content": [
+            [
+                ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome",
+                "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT",
+                "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG",
+                "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG",
+                "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT",
+                "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG"
+            ],
+            374,
+            [
+                "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:52:02.774016"
+    },
+    "test_cat_unzipped_zipped": {
+        "content": [
+            [
+                ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome",
+                "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT",
+                "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG",
+                "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG",
+                "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT",
+                "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG"
+            ],
+            375,
+            [
+                "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:51:57.581523"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
new file mode 100644
index 0000000..ec26b0f
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config
@@ -0,0 +1,6 @@
+
+process {
+    withName: CAT_CAT {
+        ext.prefix = 'cat.txt.gz'
+    }
+}
diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
new file mode 100644
index 0000000..fbc7978
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config
@@ -0,0 +1,8 @@
+
+process {
+
+    withName: CAT_CAT {
+        ext.prefix = 'cat.txt'
+    }
+
+}
diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml
new file mode 100644
index 0000000..37b578f
--- /dev/null
+++ b/modules/nf-core/cat/cat/tests/tags.yml
@@ -0,0 +1,2 @@
+cat/cat:
+  - modules/nf-core/cat/cat/**
diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf
index 916c003..4e3cc9e 100644
--- a/subworkflows/local/yaml_input.nf
+++ b/subworkflows/local/yaml_input.nf
@@ -26,6 +26,7 @@ workflow YAML_INPUT {
 
     reference_1                 = Channel.fromPath(inputs.reference_hap1, checkIfExists: true)
     reference_2                 = Channel.fromPath(inputs.reference_hap2, checkIfExists: true)
+    reference_3                 = Channel.fromPath(inputs.reference_haplotigs, checkIfExists: true)
 
     reference_1
         .combine(sample_id)
@@ -84,6 +85,7 @@ workflow YAML_INPUT {
     pacbio_tuple                                                    // tuple (meta), path(file)
     reference_hap1                                                  // tuple (meta), path(file)
     reference_hap2              = reference_2                       // DataVariable
+    reference_haplotigs         = reference_3
     reference_path              = inputs.reference_hap1             // DataVariable
     mapped_bam
 
diff --git a/workflows/ear.nf b/workflows/ear.nf
index 1c93b33..091697a 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -13,6 +13,7 @@ include { YAML_INPUT                        } from '../subworkflows/local/yaml_i
 include { MAIN_MAPPING                      } from '../subworkflows/local/main_mapping'
 
 // Module imports
+include { CAT_CAT                           } from '../modules/nf-core/cat/cat/main' 
 include { GENERATE_SAMPLESHEET              } from '../modules/local/generate_samplesheet'
 include { GFASTATS                          } from '../modules/nf-core/gfastats/main'
 include { MERQURYFK_MERQURYFK               } from '../modules/nf-core/merquryfk/merquryfk/main'
@@ -47,6 +48,32 @@ workflow EAR {
     YAML_INPUT(ch_input)
 
 
+    //
+    // LOGIC: IF HAPLOTIGS IS EMPTY THEN PASS ON HALPLOTYPE ASSEMBLY
+    //          IF HAPLOTIGS EXISTS THEN MERGE WITH HAPLOTYPE ASSEMBLY
+    // 
+    if (YAML_INPUT.out.reference_haplotigs.ifEmpty(true)) {
+        YAML_INPUT.out.sample_id
+            .combine(YAML_INPUT.out.reference_hap2)
+            .combine(YAML_INPUT.out.reference_haplotigs)
+            .map{ sample_id, file1, file2 ->
+                tuple(
+                    [   id: sample_id   ],
+                    [file1, file2]
+                )
+            }
+            .set {
+                cat_cat_input
+            }
+
+        CAT_CAT(cat_cat_input)
+        ch_versions = ch_versions.mix( CAT_CAT.out.versions )
+
+        ch_haplotype_fasta  = CAT_CAT.out.file_out
+    } else {
+        ch_haplotype_fasta = YAML_INPUT.out.reference_hap2
+    }
+
     //
     // MODULE: ASSEMBLY STATISTICS FOR THE FASTA
     //
@@ -67,11 +94,11 @@ workflow EAR {
     // LOGIC:  REFORMAT A BUNCH OF CHANNELS FOR MERQUERYFK
     //
     YAML_INPUT.out.reference_hap1
-        .combine(YAML_INPUT.out.reference_hap2)
+        .combine(ch_haplotype_fasta)
         .combine(YAML_INPUT.out.fastk_hist)
         .combine(YAML_INPUT.out.fastk_ktab)
-        .map{ meta, primary, haplotigs, fastk_hist, fastk_ktab ->
-            tuple(  meta,
+        .map{ meta1, primary, meta2, haplotigs, fastk_hist, fastk_ktab ->
+            tuple(  meta1,
                     fastk_hist,
                     fastk_ktab,
                     primary,

From eec25397a932c3d786a24fe6c197465872ecfd2d Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 2 Sep 2024 15:02:00 +0100
Subject: [PATCH 23/52] Corrected GFASTATS error caused by a faulty flag

---
 assets/test.yaml                       |  1 +
 modules.json                           | 39 ++++++++---
 modules/local/sanger_tol_btk.nf        | 12 +++-
 modules/local/sanger_tol_cpretext.nf   | 10 ++-
 modules/nf-core/gfastats/gfastats.diff | 38 ++++++++++
 modules/nf-core/gfastats/main.nf       | 15 ++--
 workflows/ear.nf                       | 96 +++++++++++++-------------
 7 files changed, 139 insertions(+), 72 deletions(-)
 create mode 100644 modules/nf-core/gfastats/gfastats.diff

diff --git a/assets/test.yaml b/assets/test.yaml
index ba87caf..4175309 100755
--- a/assets/test.yaml
+++ b/assets/test.yaml
@@ -1,6 +1,7 @@
 assembly_id: grTriPseu1
 reference_hap1: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa
 reference_hap2: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa
+reference_haplotigs: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa
 longread:
   type: hifi
   dir: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/pacbio/
diff --git a/modules.json b/modules.json
index b93de71..ef2ff42 100644
--- a/modules.json
+++ b/modules.json
@@ -8,33 +8,46 @@
                     "cat/cat": {
                         "branch": "master",
                         "git_sha": "5bb8ca085e17549e185e1823495ab8d20727a805",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "gfastats": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ],
+                        "patch": "modules/nf-core/gfastats/gfastats.diff"
                     },
                     "merquryfk/merquryfk": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": ["modules"],
+                        "installed_by": [
+                            "modules"
+                        ],
                         "patch": "modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff"
                     },
                     "minimap2/align": {
                         "branch": "master",
                         "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "samtools/merge": {
                         "branch": "master",
                         "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     },
                     "samtools/sort": {
                         "branch": "master",
                         "git_sha": "46eca555142d6e597729fcb682adcc791796f514",
-                        "installed_by": ["modules"]
+                        "installed_by": [
+                            "modules"
+                        ]
                     }
                 }
             },
@@ -43,20 +56,26 @@
                     "utils_nextflow_pipeline": {
                         "branch": "master",
                         "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
-                        "installed_by": ["subworkflows"]
+                        "installed_by": [
+                            "subworkflows"
+                        ]
                     },
                     "utils_nfcore_pipeline": {
                         "branch": "master",
                         "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3",
-                        "installed_by": ["subworkflows"]
+                        "installed_by": [
+                            "subworkflows"
+                        ]
                     },
                     "utils_nfvalidation_plugin": {
                         "branch": "master",
                         "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
-                        "installed_by": ["subworkflows"]
+                        "installed_by": [
+                            "subworkflows"
+                        ]
                     }
                 }
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf
index 6ce9191..c9cb6a9 100644
--- a/modules/local/sanger_tol_btk.nf
+++ b/modules/local/sanger_tol_btk.nf
@@ -21,7 +21,7 @@ process SANGER_TOL_BTK {
     path("*_out/blobtoolkit/REFERENCE/summary.json.gz"),       emit: summary_json
     path("*_out/busco"),                                       emit: busco_data
     path("*_out/multiqc"),                                     emit: multiqc_report
-    path("*_out/blobtoolkit_pipeline_info"),                   emit: pipeline_info
+    path("*_out/pipeline_info/blobtoolkit"),                   emit: pipeline_info
     path "versions.yml",                                       emit: versions
 
     script:
@@ -71,10 +71,16 @@ process SANGER_TOL_BTK {
         Nextflow: \$(nextflow -v | cut -d " " -f3)
         executor system: $get_version
     END_VERSIONS
-
-    printf "%s/t" <${output_dir}/pipeline_info/software_version.yml >> versions.yml
     """
 
+    // INFILE=${output_dir}/pipeline_info/software_versions.yml
+    // IFS=\$'\n'
+    // echo "$pipeline_name:" >> versions.yml
+    // for \${LINE} in \$(cat "\$INFILE")
+    // do
+    //     echo "  \${LINE}" >> versions.yml
+    // done
+
     stub:
     def pipeline_version    =   task.ext.version        ?: "main"
 
diff --git a/modules/local/sanger_tol_cpretext.nf b/modules/local/sanger_tol_cpretext.nf
index 5b986e1..f9b12d8 100644
--- a/modules/local/sanger_tol_cpretext.nf
+++ b/modules/local/sanger_tol_cpretext.nf
@@ -52,10 +52,16 @@ process SANGER_TOL_CPRETEXT {
         Nextflow: \$(nextflow -v | cut -d " " -f3)
         executor system: $get_version
     END_VERSIONS
-
-    printf "%s/t" <${output_dir}/pipeline_info/software_version.yml >> versions.yml
     """
 
+    // INFILE=${output_dir}/pipeline_info/software_versions.yml
+    // IFS=\$'\n'
+    // echo "$pipeline_name:" >> versions.yml
+    // for LINE in \$(cat "\$INFILE")
+    // do
+    //     echo "  \$LINE" >> versions.yml
+    // done
+
     stub:
     def pipeline_version                    =   task.ext.version        ?: "main"
     def (pipeline_prefix,pipeline_suffix)   =   pipeline_name.split('/')
diff --git a/modules/nf-core/gfastats/gfastats.diff b/modules/nf-core/gfastats/gfastats.diff
new file mode 100644
index 0000000..0f108e1
--- /dev/null
+++ b/modules/nf-core/gfastats/gfastats.diff
@@ -0,0 +1,38 @@
+Changes in module 'nf-core/gfastats'
+--- modules/nf-core/gfastats/main.nf
++++ modules/nf-core/gfastats/main.nf
+@@ -19,7 +19,6 @@
+ 
+     output:
+     tuple val(meta), path("*.assembly_summary"), emit: assembly_summary
+-    tuple val(meta), path("*.${out_fmt}.gz")   , emit: assembly
+     path "versions.yml"                        , emit: versions
+ 
+     when:
+@@ -32,18 +31,16 @@
+     def ibed = include_bed ? "--include-bed $include_bed" : ""
+     def ebed = exclude_bed ? "--exclude-bed $exclude_bed" : ""
+     def sak  = instructions ? "--swiss-army-knife $instructions" : ""
++
++    // Arguments have been removed due to causing errors with output values being 0
++    // out-format seemed to be the main cause of this, in testing
++    // Even using the main branch of the github repo yielded the same error.
++
+     """
+     gfastats \\
+-        $args \\
++        --nstar-report \\
+         --threads $task.cpus \\
+-        $agp \\
+-        $ibed \\
+-        $ebed \\
+-        $sak \\
+-        --out-format ${prefix}.${out_fmt}.gz \\
+         $assembly \\
+-        $genome_size \\
+-        $target \\
+         > ${prefix}.assembly_summary
+ 
+     cat <<-END_VERSIONS > versions.yml
+
+************************************************************
diff --git a/modules/nf-core/gfastats/main.nf b/modules/nf-core/gfastats/main.nf
index 8db239a..37a811e 100644
--- a/modules/nf-core/gfastats/main.nf
+++ b/modules/nf-core/gfastats/main.nf
@@ -19,7 +19,6 @@ process GFASTATS {
 
     output:
     tuple val(meta), path("*.assembly_summary"), emit: assembly_summary
-    tuple val(meta), path("*.${out_fmt}.gz")   , emit: assembly
     path "versions.yml"                        , emit: versions
 
     when:
@@ -32,18 +31,16 @@ process GFASTATS {
     def ibed = include_bed ? "--include-bed $include_bed" : ""
     def ebed = exclude_bed ? "--exclude-bed $exclude_bed" : ""
     def sak  = instructions ? "--swiss-army-knife $instructions" : ""
+
+    // Arguments have been removed due to causing errors with output values being 0
+    // out-format seemed to be the main cause of this, in testing
+    // Even using the main branch of the github repo yielded the same error.
+
     """
     gfastats \\
-        $args \\
+        --nstar-report \\
         --threads $task.cpus \\
-        $agp \\
-        $ibed \\
-        $ebed \\
-        $sak \\
-        --out-format ${prefix}.${out_fmt}.gz \\
         $assembly \\
-        $genome_size \\
-        $target \\
         > ${prefix}.assembly_summary
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/workflows/ear.nf b/workflows/ear.nf
index 091697a..4b7db54 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -111,60 +111,60 @@ workflow EAR {
     //
     // MODULE: MERQURYFK PLOTS OF GENOME
     //
-    MERQURYFK_MERQURYFK(
-        merquryfk_input
-    )
-    ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions )
+    // MERQURYFK_MERQURYFK(
+    //     merquryfk_input
+    // )
+    // ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions )
 
 
     //
     // LOGIC: IF A MAPPED BAM FILE EXISTS AND THE FLAG `mapped` IS TRUE
     //          SKIP THE MAPPING SUBWORKFLOW
     //
-    if (!params.mapped) {
-        //
-        // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC
-        //              This allows us to more esily bypass the mapping if we already have a sorted and mapped bam
-        //
-        MAIN_MAPPING (
-            YAML_INPUT.out.sample_id,
-            YAML_INPUT.out.longread_type,
-            YAML_INPUT.out.reference_hap1,
-            YAML_INPUT.out.pacbio_tuple,
-        )
-        ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions )
-        ch_mapped_bam = MAIN_MAPPING.out.mapped_bam
-    } else {
-        ch_mapped_bam = YAML_INPUT.out.mapped_bam
-    }
+    // if (!params.mapped) {
+    //     //
+    //     // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC
+    //     //              This allows us to more esily bypass the mapping if we already have a sorted and mapped bam
+    //     //
+    //     MAIN_MAPPING (
+    //         YAML_INPUT.out.sample_id,
+    //         YAML_INPUT.out.longread_type,
+    //         YAML_INPUT.out.reference_hap1,
+    //         YAML_INPUT.out.pacbio_tuple,
+    //     )
+    //     ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions )
+    //     ch_mapped_bam = MAIN_MAPPING.out.mapped_bam
+    // } else {
+    //     ch_mapped_bam = YAML_INPUT.out.mapped_bam
+    // }
 
 
     //
     // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline
     //
-    GENERATE_SAMPLESHEET(
-        ch_mapped_bam
-    )
-    ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions )
+    // GENERATE_SAMPLESHEET(
+    //     ch_mapped_bam
+    // )
+    // ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions )
 
 
-    //
-    // MODULE: Run Sanger-ToL/BlobToolKit
-    //
-    SANGER_TOL_BTK (
-        YAML_INPUT.out.reference_hap1,
-        ch_mapped_bam,
-        GENERATE_SAMPLESHEET.out.csv,
-        YAML_INPUT.out.btk_un_diamond_database,
-        YAML_INPUT.out.btk_nt_database,
-        YAML_INPUT.out.btk_un_diamond_database,
-        YAML_INPUT.out.btk_config,
-        YAML_INPUT.out.btk_ncbi_taxonomy_path,
-        YAML_INPUT.out.busco_lineages,
-        YAML_INPUT.out.btk_taxid,
-        'GCA_0001'
-    )
-    ch_versions              = ch_versions.mix(SANGER_TOL_BTK.out.versions)
+    // //
+    // // MODULE: Run Sanger-ToL/BlobToolKit
+    // //
+    // SANGER_TOL_BTK (
+    //     YAML_INPUT.out.reference_hap1,
+    //     ch_mapped_bam,
+    //     GENERATE_SAMPLESHEET.out.csv,
+    //     YAML_INPUT.out.btk_un_diamond_database,
+    //     YAML_INPUT.out.btk_nt_database,
+    //     YAML_INPUT.out.btk_un_diamond_database,
+    //     YAML_INPUT.out.btk_config,
+    //     YAML_INPUT.out.btk_ncbi_taxonomy_path,
+    //     YAML_INPUT.out.busco_lineages,
+    //     YAML_INPUT.out.btk_taxid,
+    //     'GCA_0001'
+    // )
+    // ch_versions              = ch_versions.mix(SANGER_TOL_BTK.out.versions)
 
 
     //
@@ -174,13 +174,13 @@ workflow EAR {
     hic_dir         = YAML_INPUT.out.cpretext_hic_dir_raw.get()
     longread_dir    = YAML_INPUT.out.longread_dir.get()
 
-    SANGER_TOL_CPRETEXT(
-        reference,
-        longread_dir,
-        hic_dir,
-        []
-    )
-    ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions )
+    // SANGER_TOL_CPRETEXT(
+    //     reference,
+    //     longread_dir,
+    //     hic_dir,
+    //     []
+    // )
+    // ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions )
 
 
     //

From 0ff29b3a421a2c390ba2821cd6ac62ec67006de1 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Mon, 2 Sep 2024 15:26:19 +0100
Subject: [PATCH 24/52] Uncomment workflow

---
 workflows/ear.nf | 96 ++++++++++++++++++++++++------------------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/workflows/ear.nf b/workflows/ear.nf
index 4b7db54..091697a 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -111,60 +111,60 @@ workflow EAR {
     //
     // MODULE: MERQURYFK PLOTS OF GENOME
     //
-    // MERQURYFK_MERQURYFK(
-    //     merquryfk_input
-    // )
-    // ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions )
+    MERQURYFK_MERQURYFK(
+        merquryfk_input
+    )
+    ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions )
 
 
     //
     // LOGIC: IF A MAPPED BAM FILE EXISTS AND THE FLAG `mapped` IS TRUE
     //          SKIP THE MAPPING SUBWORKFLOW
     //
-    // if (!params.mapped) {
-    //     //
-    //     // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC
-    //     //              This allows us to more esily bypass the mapping if we already have a sorted and mapped bam
-    //     //
-    //     MAIN_MAPPING (
-    //         YAML_INPUT.out.sample_id,
-    //         YAML_INPUT.out.longread_type,
-    //         YAML_INPUT.out.reference_hap1,
-    //         YAML_INPUT.out.pacbio_tuple,
-    //     )
-    //     ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions )
-    //     ch_mapped_bam = MAIN_MAPPING.out.mapped_bam
-    // } else {
-    //     ch_mapped_bam = YAML_INPUT.out.mapped_bam
-    // }
+    if (!params.mapped) {
+        //
+        // SUBWORKFLOW: MAIN_MAPPING CONTAINS ALL THE MAPPING LOGIC
+        //              This allows us to more esily bypass the mapping if we already have a sorted and mapped bam
+        //
+        MAIN_MAPPING (
+            YAML_INPUT.out.sample_id,
+            YAML_INPUT.out.longread_type,
+            YAML_INPUT.out.reference_hap1,
+            YAML_INPUT.out.pacbio_tuple,
+        )
+        ch_versions = ch_versions.mix( MAIN_MAPPING.out.versions )
+        ch_mapped_bam = MAIN_MAPPING.out.mapped_bam
+    } else {
+        ch_mapped_bam = YAML_INPUT.out.mapped_bam
+    }
 
 
     //
     // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline
     //
-    // GENERATE_SAMPLESHEET(
-    //     ch_mapped_bam
-    // )
-    // ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions )
+    GENERATE_SAMPLESHEET(
+        ch_mapped_bam
+    )
+    ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions )
 
 
-    // //
-    // // MODULE: Run Sanger-ToL/BlobToolKit
-    // //
-    // SANGER_TOL_BTK (
-    //     YAML_INPUT.out.reference_hap1,
-    //     ch_mapped_bam,
-    //     GENERATE_SAMPLESHEET.out.csv,
-    //     YAML_INPUT.out.btk_un_diamond_database,
-    //     YAML_INPUT.out.btk_nt_database,
-    //     YAML_INPUT.out.btk_un_diamond_database,
-    //     YAML_INPUT.out.btk_config,
-    //     YAML_INPUT.out.btk_ncbi_taxonomy_path,
-    //     YAML_INPUT.out.busco_lineages,
-    //     YAML_INPUT.out.btk_taxid,
-    //     'GCA_0001'
-    // )
-    // ch_versions              = ch_versions.mix(SANGER_TOL_BTK.out.versions)
+    //
+    // MODULE: Run Sanger-ToL/BlobToolKit
+    //
+    SANGER_TOL_BTK (
+        YAML_INPUT.out.reference_hap1,
+        ch_mapped_bam,
+        GENERATE_SAMPLESHEET.out.csv,
+        YAML_INPUT.out.btk_un_diamond_database,
+        YAML_INPUT.out.btk_nt_database,
+        YAML_INPUT.out.btk_un_diamond_database,
+        YAML_INPUT.out.btk_config,
+        YAML_INPUT.out.btk_ncbi_taxonomy_path,
+        YAML_INPUT.out.busco_lineages,
+        YAML_INPUT.out.btk_taxid,
+        'GCA_0001'
+    )
+    ch_versions              = ch_versions.mix(SANGER_TOL_BTK.out.versions)
 
 
     //
@@ -174,13 +174,13 @@ workflow EAR {
     hic_dir         = YAML_INPUT.out.cpretext_hic_dir_raw.get()
     longread_dir    = YAML_INPUT.out.longread_dir.get()
 
-    // SANGER_TOL_CPRETEXT(
-    //     reference,
-    //     longread_dir,
-    //     hic_dir,
-    //     []
-    // )
-    // ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions )
+    SANGER_TOL_CPRETEXT(
+        reference,
+        longread_dir,
+        hic_dir,
+        []
+    )
+    ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions )
 
 
     //

From d5448fae161f39443c0374760c2ced3d00e29a4c Mon Sep 17 00:00:00 2001
From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com>
Date: Thu, 12 Sep 2024 11:46:09 +0100
Subject: [PATCH 25/52] Update ci.yml

---
 .github/workflows/ci.yml | 31 ++-----------------------------
 1 file changed, 2 insertions(+), 29 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 61b0cbf..1d1ce2e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -65,39 +65,12 @@ jobs:
         run: |
           nf-core download sanger-tol/ear --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
 
-      - name: NF-Core Download - download singularity containers
-        run: |
-          nf-core download sanger-tol/blobtoolkit --revision draft_assemblies --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
-
-      - name: NF-Core Download - download singularity containers
-        run: |
-          nf-core download sanger-tol/curationpretext --revision 1.0.0 --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
-
       - name: Download Tiny test data
         # Download A fungal test data set that is full enough to show some real output.
+        # Needs a kmer db for merqury
         run: |
           curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf -
 
-      - name: Download the NCBI taxdump database
-        run: |
-          mkdir ncbi_taxdump
-          curl -L https://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz | tar -C ncbi_taxdump -xzf -
-
-      - name: Download the BUSCO lineage database
-        run: |
-          mkdir busco_database
-          curl -L https://tolit.cog.sanger.ac.uk/test-data/resources/busco/blobtoolkit.GCA_922984935.2.2023-08-03.lineages.tar.gz | tar -C busco_database -xzf -
-
-      - name: Download the subset of NT database
-        run: |
-          mkdir NT_database
-          curl -L https://ftp.ncbi.nlm.nih.gov/blast/db/18S_fungal_sequences.tar.gz | tar -C NT_database -xzf -
-
-      - name: Download the subset of Diamond database
-        run: |
-          mkdir diamond
-          wget -c https://tolit.cog.sanger.ac.uk/test-data/resources/diamond/UP000000212_1234679_tax.dmnd -O diamond/UP000000212_1234679_tax.dmnd
-
       # - name: Disk space cleanup
       #   uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
 
@@ -106,5 +79,5 @@ jobs:
         # For example: adding multiple test runs with different parameters
         # Remember that you can parallelise this by using strategy.matrix
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
+          nextflow run ${GITHUB_WORKSPACE} -profile test,singularity --outdir ./results
           ls ./results/*/*

From f476781b483508c3d3f6853dc37cf868b13b61b5 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 18 Sep 2024 13:30:27 +0100
Subject: [PATCH 26/52] Updating the CICD and adding steps

---
 .github/workflows/ci.yml        | 30 +-----------
 assets/test.yaml                | 32 +++++++-----
 modules/local/sanger_tol_btk.nf | 12 +++--
 nextflow.config                 |  2 +-
 workflows/ear.nf                | 86 +++++++++++++++++++--------------
 5 files changed, 80 insertions(+), 82 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 61b0cbf..e1f61fc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -65,38 +65,11 @@ jobs:
         run: |
           nf-core download sanger-tol/ear --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
 
-      - name: NF-Core Download - download singularity containers
-        run: |
-          nf-core download sanger-tol/blobtoolkit --revision draft_assemblies --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
-
-      - name: NF-Core Download - download singularity containers
-        run: |
-          nf-core download sanger-tol/curationpretext --revision 1.0.0 --compress none -d --force --outdir sanger-ear --container-cache-utilisation amend --container-system singularity
-
       - name: Download Tiny test data
         # Download A fungal test data set that is full enough to show some real output.
         run: |
           curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf -
-
-      - name: Download the NCBI taxdump database
-        run: |
-          mkdir ncbi_taxdump
-          curl -L https://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz | tar -C ncbi_taxdump -xzf -
-
-      - name: Download the BUSCO lineage database
-        run: |
-          mkdir busco_database
-          curl -L https://tolit.cog.sanger.ac.uk/test-data/resources/busco/blobtoolkit.GCA_922984935.2.2023-08-03.lineages.tar.gz | tar -C busco_database -xzf -
-
-      - name: Download the subset of NT database
-        run: |
-          mkdir NT_database
-          curl -L https://ftp.ncbi.nlm.nih.gov/blast/db/18S_fungal_sequences.tar.gz | tar -C NT_database -xzf -
-
-      - name: Download the subset of Diamond database
-        run: |
-          mkdir diamond
-          wget -c https://tolit.cog.sanger.ac.uk/test-data/resources/diamond/UP000000212_1234679_tax.dmnd -O diamond/UP000000212_1234679_tax.dmnd
+          cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-hap.fa
 
       # - name: Disk space cleanup
       #   uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
@@ -107,4 +80,3 @@ jobs:
         # Remember that you can parallelise this by using strategy.matrix
         run: |
           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
-          ls ./results/*/*
diff --git a/assets/test.yaml b/assets/test.yaml
index 4175309..e7cd829 100755
--- a/assets/test.yaml
+++ b/assets/test.yaml
@@ -1,25 +1,33 @@
+# General Vales for all subpiplines and modules
 assembly_id: grTriPseu1
-reference_hap1: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa
-reference_hap2: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa
-reference_haplotigs: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa
+reference_hap1: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1.fa
+reference_hap2: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1-hap.fa
+reference_haplotigs: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1.fa
+
+# If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore.
+mapped_bam: []
+
+merquryfk:
+  fastk_hist: "./"
+  fastk_ktab: "./"
+
+# Used by both subpipelines
 longread:
   type: hifi
-  dir: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/pacbio/
-mapped_bam: idCulLati1/mapped_bam.bam
+  dir: /lustre/scratch122/tol/data/d/0/d/1/f/e/Anisus_vorticulus/genomic_data/xgAniVori1/pacbio/fasta/
+
 curationpretext:
   aligner: minimap2
   telomere_motif: TTAGGG
-  hic_dir: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/hic-arima/
-merquryfk:
-  fastk_hist: "./"
-  fastk_ktab: "./"
+  hic_dir: /home/runner/work/ear/ear/TreeValTinyData/genomic_data/hic-arima/
 btk:
+  taxid: 352914
+  gca_accession: GCA_0001
+  lineages: "fungi_odb10"
   nt_database: /home/runner/work/ascc/ascc/NT_database/
   nt_database_prefix: 18S_fungal_sequences
   diamond_uniprot_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd
   diamond_nr_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd
   ncbi_taxonomy_path: /home/runner/work/ascc/ascc/ncbi_taxdump/
   ncbi_rankedlineage_path: /home/runner/work/ascc/ascc/ncbi_taxdump/rankedlineage.dmp
-  taxid: 352914
-  gca_accession: GCA_0001
-  lineages: "fungi_odb10"
+  config: /home/runner/work/ear/ear/conf/sanger-tol-btk.config
diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf
index c9cb6a9..c8a5776 100644
--- a/modules/local/sanger_tol_btk.nf
+++ b/modules/local/sanger_tol_btk.nf
@@ -17,11 +17,11 @@ process SANGER_TOL_BTK {
 
     output:
     tuple val(meta), path("*_out/blobtoolkit/REFERENCE"),      emit: dataset
-    path("*_out/blobtoolkit/plots"),                           emit: plots
-    path("*_out/blobtoolkit/REFERENCE/summary.json.gz"),       emit: summary_json
-    path("*_out/busco"),                                       emit: busco_data
-    path("*_out/multiqc"),                                     emit: multiqc_report
-    path("*_out/pipeline_info/blobtoolkit"),                   emit: pipeline_info
+    path "*_out/blobtoolkit/plots" ,                           emit: plots
+    path "*_out/blobtoolkit/REFERENCE/summary.json.gz",       emit: summary_json
+    path "*_out/busco",                                       emit: busco_data
+    path "*_out/multiqc",                                     emit: multiqc_report
+    path "*_out/pipeline_info/blobtoolkit",                   emit: pipeline_info
     path "versions.yml",                                       emit: versions
 
     script:
@@ -111,3 +111,5 @@ process SANGER_TOL_BTK {
     END_VERSIONS
     """
 }
+
+}
diff --git a/nextflow.config b/nextflow.config
index a2b702d..9302ce8 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -178,7 +178,7 @@ singularity.registry = 'quay.io'
 
 // Nextflow plugins
 plugins {
-    id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet
+    id: 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet
 }
 
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container
diff --git a/workflows/ear.nf b/workflows/ear.nf
index 091697a..34310ba 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -40,6 +40,13 @@ workflow EAR {
     ch_versions     = Channel.empty()
     ch_align_bam    = Channel.empty()
 
+    exclude_steps   = params.steps ? params.steps.split(",") : ""
+
+    full_list       = ["btk", "cpretext"]
+
+    if (!full_list.containsAll(exclude_steps)) {
+        exit 1, "There is an extra argument given on Command Line: \n Check contents of: $exclude_steps\nMaster list is: $full_list"
+    }
 
     //
     // MODULE: YAML_INPUT
@@ -140,48 +147,57 @@ workflow EAR {
 
 
     //
-    // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline
+    // LOGIC: STEP TO STOP BTK RUNNING IF SPECIFIED BY USER
     //
-    GENERATE_SAMPLESHEET(
-        ch_mapped_bam
-    )
-    ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions )
+    if (!exclude_steps.contains('btk')) {
 
+        //
+        // MODULE: GENERATE_SAMPLESHEET creates a csv for the blobtoolkit pipeline
+        //
+        GENERATE_SAMPLESHEET(
+            ch_mapped_bam
+        )
+        ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions )
 
-    //
-    // MODULE: Run Sanger-ToL/BlobToolKit
-    //
-    SANGER_TOL_BTK (
-        YAML_INPUT.out.reference_hap1,
-        ch_mapped_bam,
-        GENERATE_SAMPLESHEET.out.csv,
-        YAML_INPUT.out.btk_un_diamond_database,
-        YAML_INPUT.out.btk_nt_database,
-        YAML_INPUT.out.btk_un_diamond_database,
-        YAML_INPUT.out.btk_config,
-        YAML_INPUT.out.btk_ncbi_taxonomy_path,
-        YAML_INPUT.out.busco_lineages,
-        YAML_INPUT.out.btk_taxid,
-        'GCA_0001'
-    )
-    ch_versions              = ch_versions.mix(SANGER_TOL_BTK.out.versions)
 
+        //
+        // MODULE: Run Sanger-ToL/BlobToolKit
+        //
+        SANGER_TOL_BTK (
+            YAML_INPUT.out.reference_hap1,
+            ch_mapped_bam,
+            GENERATE_SAMPLESHEET.out.csv,
+            YAML_INPUT.out.btk_un_diamond_database,
+            YAML_INPUT.out.btk_nt_database,
+            YAML_INPUT.out.btk_un_diamond_database,
+            YAML_INPUT.out.btk_config,
+            YAML_INPUT.out.btk_ncbi_taxonomy_path,
+            YAML_INPUT.out.busco_lineages,
+            YAML_INPUT.out.btk_taxid,
+            'GCA_0001'
+        )
+        ch_versions              = ch_versions.mix(SANGER_TOL_BTK.out.versions)
+    }
 
     //
-    // MODULE: Run Sanger-ToL/CurationPretext
+    // LOGIC: STEP TO STOP CURATION_PRETEXT RUNNING IF SPECIFIED BY USER
     //
-    reference       = YAML_INPUT.out.reference_path.get()
-    hic_dir         = YAML_INPUT.out.cpretext_hic_dir_raw.get()
-    longread_dir    = YAML_INPUT.out.longread_dir.get()
-
-    SANGER_TOL_CPRETEXT(
-        reference,
-        longread_dir,
-        hic_dir,
-        []
-    )
-    ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions )
-
+    if (!exclude_steps.contains('cpretext')) {
+        //
+        // MODULE: Run Sanger-ToL/CurationPretext
+        //
+        reference       = YAML_INPUT.out.reference_path.get()
+        hic_dir         = YAML_INPUT.out.cpretext_hic_dir_raw.get()
+        longread_dir    = YAML_INPUT.out.longread_dir.get()
+
+        SANGER_TOL_CPRETEXT(
+            reference,
+            longread_dir,
+            hic_dir,
+            []
+        )
+        ch_versions = ch_versions.mix( SANGER_TOL_CPRETEXT.out.versions )
+    }
 
     //
     // Collate and save software versions

From 53242fe097068b0dffa3c26e968c634a942aca21 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 18 Sep 2024 13:40:04 +0100
Subject: [PATCH 27/52] Extra }

---
 modules/local/sanger_tol_btk.nf | 2 --
 1 file changed, 2 deletions(-)

diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf
index c8a5776..d7675fb 100644
--- a/modules/local/sanger_tol_btk.nf
+++ b/modules/local/sanger_tol_btk.nf
@@ -111,5 +111,3 @@ process SANGER_TOL_BTK {
     END_VERSIONS
     """
 }
-
-}

From 73399cb13d0a7fc07b5f13c9651fef83666d0882 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 18 Sep 2024 13:48:48 +0100
Subject: [PATCH 28/52] Fix steps

---
 .github/workflows/ci.yml | 3 ++-
 workflows/ear.nf         | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a501dff..1311aea 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -79,5 +79,6 @@ jobs:
         # TODO nf-core: You can customise CI pipeline run tests as required
         # For example: adding multiple test runs with different parameters
         # Remember that you can parallelise this by using strategy.matrix
+        # Skip BTK and CPRETEXT as they are already tested on their repos.
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
+          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --steps btk,cpretext
diff --git a/workflows/ear.nf b/workflows/ear.nf
index 34310ba..fb8b441 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -42,7 +42,7 @@ workflow EAR {
 
     exclude_steps   = params.steps ? params.steps.split(",") : ""
 
-    full_list       = ["btk", "cpretext"]
+    full_list       = ["btk", "cpretext", ""]
 
     if (!full_list.containsAll(exclude_steps)) {
         exit 1, "There is an extra argument given on Command Line: \n Check contents of: $exclude_steps\nMaster list is: $full_list"

From 0a441e7092b7cac5abda9d8b03c064103178fad4 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 18 Sep 2024 13:54:50 +0100
Subject: [PATCH 29/52] Wrong Pacbio path

---
 assets/test.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assets/test.yaml b/assets/test.yaml
index e7cd829..4cbec2b 100755
--- a/assets/test.yaml
+++ b/assets/test.yaml
@@ -14,7 +14,7 @@ merquryfk:
 # Used by both subpipelines
 longread:
   type: hifi
-  dir: /lustre/scratch122/tol/data/d/0/d/1/f/e/Anisus_vorticulus/genomic_data/xgAniVori1/pacbio/fasta/
+  dir: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/genomic_data/pacbio/
 
 curationpretext:
   aligner: minimap2

From b0fe91f707f6f2456eead753fc7a8232bb173e62 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 18 Sep 2024 13:59:19 +0100
Subject: [PATCH 30/52] Wrong Pacbio path

---
 assets/test.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assets/test.yaml b/assets/test.yaml
index 4cbec2b..47947d9 100755
--- a/assets/test.yaml
+++ b/assets/test.yaml
@@ -14,7 +14,7 @@ merquryfk:
 # Used by both subpipelines
 longread:
   type: hifi
-  dir: /nfs/treeoflife-01/teams/tola/users/dp24/ear/TreeValTinyData/genomic_data/pacbio/
+  dir: /home/runner/work/ear/ear/TreeValTinyData/genomic_data/pacbio/
 
 curationpretext:
   aligner: minimap2

From a758e817ce9413305c365614192542491dc9a78b Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 18 Sep 2024 14:09:12 +0100
Subject: [PATCH 31/52] Correct collision

---
 .github/workflows/ci.yml | 1 +
 assets/test.yaml         | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1311aea..9f1beaf 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -71,6 +71,7 @@ jobs:
         run: |
           curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf -
           cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-hap.fa
+          cp TreeValTinyData/assembly/draft/grTriPseu1.fa TreeValTinyData/assembly/draft/grTriPseu1-all_hap.fa
 
       # - name: Disk space cleanup
       #   uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
diff --git a/assets/test.yaml b/assets/test.yaml
index 47947d9..0f5f6ed 100755
--- a/assets/test.yaml
+++ b/assets/test.yaml
@@ -2,7 +2,7 @@
 assembly_id: grTriPseu1
 reference_hap1: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1.fa
 reference_hap2: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1-hap.fa
-reference_haplotigs: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1.fa
+reference_haplotigs: /home/runner/work/ear/ear/TreeValTinyData/assembly/draft/grTriPseu1-all_hap.fa
 
 # If a mapped bam already exists use the below + --mapped TRUE on the nextflow command else ignore.
 mapped_bam: []

From f429c473e8ebbe4ec7c54631ec1a3d5b4d2f5d2d Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 18 Sep 2024 16:27:48 +0100
Subject: [PATCH 32/52] Update for linting

---
 .github/workflows/ci.yml             |  4 +--
 .nf-core.yml                         |  1 +
 README.md                            |  2 +-
 conf/sanger-tol-btk.config           |  2 +-
 conf/test.config                     |  2 +-
 modules.json                         | 38 ++++++++--------------------
 modules/local/nextflow/run/main.nf   |  2 +-
 modules/local/sanger_tol_btk.nf      | 14 +++++-----
 modules/local/sanger_tol_cpretext.nf |  2 +-
 nextflow.config                      |  1 +
 nextflow_schema.json                 | 35 +++++--------------------
 subworkflows/local/main_mapping.nf   |  2 +-
 subworkflows/local/pe_mapping.nf     |  2 +-
 subworkflows/local/se_mapping.nf     |  2 +-
 workflows/ear.nf                     | 24 +++++++++++-------
 15 files changed, 51 insertions(+), 82 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9f1beaf..95c5d8c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -26,7 +26,7 @@ jobs:
     strategy:
       matrix:
         NXF_VER:
-          - "24.04.2"
+          - "24.04.0"
           - "latest-everything"
     steps:
       - name: Get branch names
@@ -82,4 +82,4 @@ jobs:
         # Remember that you can parallelise this by using strategy.matrix
         # Skip BTK and CPRETEXT as they are already tested on their repos.
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --steps btk,cpretext
+          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --steps btk,cpretext,merquryfk
diff --git a/.nf-core.yml b/.nf-core.yml
index 9a35f55..fd05354 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -4,6 +4,7 @@ lint:
     - assets/nf-core-ear_logo_light.png
     - docs/images/nf-core-ear_logo_light.png
     - docs/images/nf-core-ear_logo_dark.png
+    - lib/nfcore_external_java_deps.jar 
     - .github/ISSUE_TEMPLATE/config.yml
     - .github/workflows/awstest.yml
     - .github/workflows/awsfulltest.yml
diff --git a/README.md b/README.md
index 652eba6..697f288 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 [![GitHub Actions Linting Status](https://github.com/sanger-tol/ear/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
 [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)
 
-[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/)
+[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.0-23aa62.svg)](https://www.nextflow.io/)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
 [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
 [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
diff --git a/conf/sanger-tol-btk.config b/conf/sanger-tol-btk.config
index 247dbbd..553ad56 100644
--- a/conf/sanger-tol-btk.config
+++ b/conf/sanger-tol-btk.config
@@ -4,4 +4,4 @@ process {
         memory = { check_max( 10.GB * task.attempt, 'memory'  ) }
         time   = { check_max( 16.h  * task.attempt, 'time'    ) }
     }
-}
\ No newline at end of file
+}
diff --git a/conf/test.config b/conf/test.config
index 7313f18..06d069f 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -20,5 +20,5 @@ params {
     max_time    = '6.h'
     input       = "${projectDir}/assets/test.yaml"
     outdir      = "results"
-    
+
 }
diff --git a/modules.json b/modules.json
index ef2ff42..d4e081b 100644
--- a/modules.json
+++ b/modules.json
@@ -8,46 +8,34 @@
                     "cat/cat": {
                         "branch": "master",
                         "git_sha": "5bb8ca085e17549e185e1823495ab8d20727a805",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "gfastats": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ],
+                        "installed_by": ["modules"],
                         "patch": "modules/nf-core/gfastats/gfastats.diff"
                     },
                     "merquryfk/merquryfk": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ],
+                        "installed_by": ["modules"],
                         "patch": "modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff"
                     },
                     "minimap2/align": {
                         "branch": "master",
                         "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/merge": {
                         "branch": "master",
                         "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "samtools/sort": {
                         "branch": "master",
                         "git_sha": "46eca555142d6e597729fcb682adcc791796f514",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     }
                 }
             },
@@ -56,26 +44,20 @@
                     "utils_nextflow_pipeline": {
                         "branch": "master",
                         "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     },
                     "utils_nfcore_pipeline": {
                         "branch": "master",
                         "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     },
                     "utils_nfvalidation_plugin": {
                         "branch": "master",
                         "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     }
                 }
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/modules/local/nextflow/run/main.nf b/modules/local/nextflow/run/main.nf
index cc522bc..af6ba65 100644
--- a/modules/local/nextflow/run/main.nf
+++ b/modules/local/nextflow/run/main.nf
@@ -35,4 +35,4 @@ process NEXTFLOW_RUN {
     output:
     path "results"  , emit: output
     val process.text, emit: log
-}
\ No newline at end of file
+}
diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf
index d7675fb..699ee04 100644
--- a/modules/local/sanger_tol_btk.nf
+++ b/modules/local/sanger_tol_btk.nf
@@ -16,13 +16,13 @@ process SANGER_TOL_BTK {
     val gca_accession
 
     output:
-    tuple val(meta), path("*_out/blobtoolkit/REFERENCE"),      emit: dataset
-    path "*_out/blobtoolkit/plots" ,                           emit: plots
-    path "*_out/blobtoolkit/REFERENCE/summary.json.gz",       emit: summary_json
-    path "*_out/busco",                                       emit: busco_data
-    path "*_out/multiqc",                                     emit: multiqc_report
-    path "*_out/pipeline_info/blobtoolkit",                   emit: pipeline_info
-    path "versions.yml",                                       emit: versions
+    tuple val(meta), path("*_out/blobtoolkit/REFERENCE"),   emit: dataset
+    path "*_out/blobtoolkit/plots" ,                        emit: plots
+    path "*_out/blobtoolkit/REFERENCE/summary.json.gz",     emit: summary_json
+    path "*_out/busco",                                     emit: busco_data
+    path "*_out/multiqc",                                   emit: multiqc_report
+    path "*_out/pipeline_info/blobtoolkit",                 emit: pipeline_info
+    path "versions.yml",                                    emit: versions
 
     script:
     def pipeline_name                       =   task.ext.pipeline_name
diff --git a/modules/local/sanger_tol_cpretext.nf b/modules/local/sanger_tol_cpretext.nf
index f9b12d8..b073039 100644
--- a/modules/local/sanger_tol_cpretext.nf
+++ b/modules/local/sanger_tol_cpretext.nf
@@ -45,7 +45,7 @@ process SANGER_TOL_CPRETEXT {
         $args \\
         $config \\
         -resume'
-    
+
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         $pipeline_suffix: $pipeline_version
diff --git a/nextflow.config b/nextflow.config
index 9302ce8..e564534 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -13,6 +13,7 @@ params {
     // Input options
     input                        = null
     mapped                       = false
+    steps                        = ""
 
     // Boilerplate options
     outdir                       = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index f198603..eee6164 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -36,9 +36,14 @@
                     "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
                     "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
                 },
-                "multiqc_title": {
+                "mapped": {
+                    "type": "boolean",
+                    "description": "Have you got a mapped bam as input?",
+                    "fa_icon": "fas fa-file-signature"
+                },
+                "steps": {
                     "type": "string",
-                    "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.",
+                    "description": "csv list of steps to skip",
                     "fa_icon": "fas fa-file-signature"
                 }
             }
@@ -168,14 +173,6 @@
                     "fa_icon": "fas fa-remove-format",
                     "hidden": true
                 },
-                "max_multiqc_email_size": {
-                    "type": "string",
-                    "description": "File size limit when attaching MultiQC reports to summary emails.",
-                    "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
-                    "default": "25.MB",
-                    "fa_icon": "fas fa-file-upload",
-                    "hidden": true
-                },
                 "monochrome_logs": {
                     "type": "boolean",
                     "description": "Do not use coloured log outputs.",
@@ -189,24 +186,6 @@
                     "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.",
                     "hidden": true
                 },
-                "multiqc_config": {
-                    "type": "string",
-                    "format": "file-path",
-                    "description": "Custom config file to supply to MultiQC.",
-                    "fa_icon": "fas fa-cog",
-                    "hidden": true
-                },
-                "multiqc_logo": {
-                    "type": "string",
-                    "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file",
-                    "fa_icon": "fas fa-image",
-                    "hidden": true
-                },
-                "multiqc_methods_description": {
-                    "type": "string",
-                    "description": "Custom MultiQC yaml file containing HTML including a methods description.",
-                    "fa_icon": "fas fa-cog"
-                },
                 "validate_params": {
                     "type": "boolean",
                     "description": "Boolean whether to validate parameters against the schema at runtime",
diff --git a/subworkflows/local/main_mapping.nf b/subworkflows/local/main_mapping.nf
index 28c100f..0531201 100644
--- a/subworkflows/local/main_mapping.nf
+++ b/subworkflows/local/main_mapping.nf
@@ -74,4 +74,4 @@ workflow MAIN_MAPPING {
     mapped_bam                        // channel: tuple val(meta), path(mapped_bam)
     versions       = ch_versions      // channel: [ path(versions.yml) ]
 
-}
\ No newline at end of file
+}
diff --git a/subworkflows/local/pe_mapping.nf b/subworkflows/local/pe_mapping.nf
index 3c41670..358be3b 100644
--- a/subworkflows/local/pe_mapping.nf
+++ b/subworkflows/local/pe_mapping.nf
@@ -113,4 +113,4 @@ process GrabFiles {
     tuple val(meta), path("in/*.{fa,fasta}.{gz}")
 
     "true"
-}
\ No newline at end of file
+}
diff --git a/subworkflows/local/se_mapping.nf b/subworkflows/local/se_mapping.nf
index 8c7ad52..c3307d4 100644
--- a/subworkflows/local/se_mapping.nf
+++ b/subworkflows/local/se_mapping.nf
@@ -112,4 +112,4 @@ process GrabFiles {
     tuple val(meta), path("in/*.{fa,fasta,fna}.{gz}")
 
     "true"
-}
\ No newline at end of file
+}
diff --git a/workflows/ear.nf b/workflows/ear.nf
index fb8b441..6a87e9e 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -13,7 +13,7 @@ include { YAML_INPUT                        } from '../subworkflows/local/yaml_i
 include { MAIN_MAPPING                      } from '../subworkflows/local/main_mapping'
 
 // Module imports
-include { CAT_CAT                           } from '../modules/nf-core/cat/cat/main' 
+include { CAT_CAT                           } from '../modules/nf-core/cat/cat/main'
 include { GENERATE_SAMPLESHEET              } from '../modules/local/generate_samplesheet'
 include { GFASTATS                          } from '../modules/nf-core/gfastats/main'
 include { MERQURYFK_MERQURYFK               } from '../modules/nf-core/merquryfk/merquryfk/main'
@@ -42,10 +42,10 @@ workflow EAR {
 
     exclude_steps   = params.steps ? params.steps.split(",") : ""
 
-    full_list       = ["btk", "cpretext", ""]
+    full_list       = ["btk", "cpretext", "merquryfk", ""]
 
     if (!full_list.containsAll(exclude_steps)) {
-        exit 1, "There is an extra argument given on Command Line: \n Check contents of: $exclude_steps\nMaster list is: $full_list"
+        exit 1, "There is an extra argument given on Command Line: \nCheck contents of: $exclude_steps\nMaster list is: $full_list"
     }
 
     //
@@ -58,7 +58,7 @@ workflow EAR {
     //
     // LOGIC: IF HAPLOTIGS IS EMPTY THEN PASS ON HALPLOTYPE ASSEMBLY
     //          IF HAPLOTIGS EXISTS THEN MERGE WITH HAPLOTYPE ASSEMBLY
-    // 
+    //
     if (YAML_INPUT.out.reference_haplotigs.ifEmpty(true)) {
         YAML_INPUT.out.sample_id
             .combine(YAML_INPUT.out.reference_hap2)
@@ -116,13 +116,19 @@ workflow EAR {
 
 
     //
-    // MODULE: MERQURYFK PLOTS OF GENOME
+    // LOGIC: STEP TO STOP MERQURY_FK RUNNING IF SPECIFIED BY USER
     //
-    MERQURYFK_MERQURYFK(
-        merquryfk_input
-    )
-    ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions )
+    if (!exclude_steps.contains('merquryfk')) {
 
+        //
+        // MODULE: MERQURYFK PLOTS OF GENOME
+        //
+        merquryfk_input.view()
+        MERQURYFK_MERQURYFK(
+            merquryfk_input
+        )
+        ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions )
+    }
 
     //
     // LOGIC: IF A MAPPED BAM FILE EXISTS AND THE FLAG `mapped` IS TRUE

From 7f1dd548a13e48a582973a18b07fb46febdc5f97 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 18 Sep 2024 16:32:43 +0100
Subject: [PATCH 33/52] PRETTIER

---
 .github/workflows/ci.yml |  2 +-
 .nf-core.yml             |  2 +-
 CHANGELOG.md             | 31 +++++++++++++++++--------------
 README.md                | 14 +++++++-------
 assets/real_pdf.yaml     |  4 ++--
 docs/output.md           | 11 ++++-------
 docs/usage.md            |  1 -
 7 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 95c5d8c..c347e38 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -47,7 +47,7 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: "3.10"
-  
+
       - name: Install nf-core
         run: |
           pip install nf-core
diff --git a/.nf-core.yml b/.nf-core.yml
index fd05354..d9fe12b 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -4,7 +4,7 @@ lint:
     - assets/nf-core-ear_logo_light.png
     - docs/images/nf-core-ear_logo_light.png
     - docs/images/nf-core-ear_logo_dark.png
-    - lib/nfcore_external_java_deps.jar 
+    - lib/nfcore_external_java_deps.jar
     - .github/ISSUE_TEMPLATE/config.yml
     - .github/workflows/awstest.yml
     - .github/workflows/awsfulltest.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3173f7c..9106bfd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ Initial release of sanger-tol/ear, created with the [nf-core](https://nf-co.re/)
 The current pipeline means the MVP for ear.
 
 ### Added
+
 GFASTATS to generate statistics on the input primary genome.
 MERQURY_FK to generate kmer graphs and analyses of the primary, haplotype and merged assembly.
 BLOBTOOLKIT to generate busco files and blobtoolkit dataset/plots.
@@ -17,24 +18,26 @@ CURATIONPRETEXT to generate pretext plots and pngs.
 
 ### Parameters
 
-| Old parameter   | New parameter |
-| --------------- | ------------- |
-|                 | --mapped      |
+| Old parameter | New parameter |
+| ------------- | ------------- |
+|               | --mapped      |
 
 ### Software dependencies
 
-| Dependency  | Old version   | New version   |
-| ----------- | ------------- | ------------- |
-| sanger-tol/blobtoolkit* |               | draft_assemblies        |
-| sanger-tol/curationpretext* |   |  1.0.0 (UNSC Cradle) |
-| GFASTATS |  | 1.3.6--hdcf5f25_3   |
-| MERQUERY_FK  | | 1.2   |
-| MINIMAP2_ALIGN |  | 2.28  |
-| SAMTOOLS_MERGE |  | 1.20--h50ea8bc_0 |
-| SAMTOOLS_SORT  |  | 1.20--h50ea8bc_0 |
-| 
+| Dependency                   | Old version | New version         |
+| ---------------------------- | ----------- | ------------------- |
+| sanger-tol/blobtoolkit\*     |             | draft_assemblies    |
+| sanger-tol/curationpretext\* |             | 1.0.0 (UNSC Cradle) |
+| GFASTATS                     |             | 1.3.6--hdcf5f25_3   |
+| MERQUERY_FK                  |             | 1.2                 |
+| MINIMAP2_ALIGN               |             | 2.28                |
+| SAMTOOLS_MERGE               |             | 1.20--h50ea8bc_0    |
+| SAMTOOLS_SORT                |             | 1.20--h50ea8bc_0    |
+
+|
 
 - Note: for pipelines, please check their own CHANGELOG file for a full list of software dependencies.
 
 ### Dependencies
-The pipeline depends on a number of databases which are noted in  [README](README.md) and [USAGE](docs/usage.md).
+
+The pipeline depends on a number of databases which are noted in [README](README.md) and [USAGE](docs/usage.md).
diff --git a/README.md b/README.md
index 697f288..5665771 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@
 1. Read the input yaml file (YAML_INPUT)
 2. Run GFASTATS (GFASTARS)
 3. Run MERQURYFK_MERQURYFK (MERQURYFK)
-4. Run MAIN_MAPPING, longread single-end/paired-end mapping 
+4. Run MAIN_MAPPING, longread single-end/paired-end mapping
 5. Run GENERATE_SAMPLESHEET, generate a csv file required for SANGER_TOL_BTK.
 6. Run SANGER_TOL_BTK, also known as SANGER-TOL/BLOBTOOLKIT a subpipline for SANGER-TOL/EAR
 7. Run SANGER_TOL_CPRETEXT, also known as SANGER-TOL/CURATIONPRETEXT a subpipeline for SANGER-TOL/EAR.
@@ -27,11 +27,12 @@
 
 The sanger-tol/ear pipeline requires a number of databases in place in order to run the blobtoolkit pipeline.
 These include:
-   - A blast nt database
-   - A Diamond blast uniprot database
-   - A Diamond blast nr database
-   - An NCBI taxdump
-   - An NCBI rankedlineage.dmp
+
+- A blast nt database
+- A Diamond blast uniprot database
+- A Diamond blast nr database
+- An NCBI taxdump
+- An NCBI rankedlineage.dmp
 
 Next, a yaml file containing the following should then be completed:
 
@@ -70,7 +71,6 @@ btk:
   config: <PATH TO ear/conf/sanger-tol-btk.config TO OVERWRITE PROCESS LIMITS>
 ```
 
-
 Now, you can run the pipeline using:
 
 ```bash
diff --git a/assets/real_pdf.yaml b/assets/real_pdf.yaml
index 8f8d4a0..19c4c35 100644
--- a/assets/real_pdf.yaml
+++ b/assets/real_pdf.yaml
@@ -20,14 +20,14 @@ PROFILING:
 # ASSEMBLY DATA
 ASSEMBLIES:
   Pre-curation:
-    pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e,  yahs_v1.2a.2|]
+    pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|]
     pri:
       gfastats--nstar-report_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.fa.gz.gfastats
       busco_short_summary_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.insecta_odb10.busco/short_summary.specific.insecta_odb10.out_scaffolds_final.insecta_odb10.busco.txt
       merqury_folder: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.ccs.merquryk/
 
   Curated:
-    pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e,  yahs_v1.2a.2|, TreeVal_v1.1]
+    pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|, TreeVal_v1.1]
     pri:
       gfastats--nstar-report_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/assembly/curated/idCulLati1.1/ear/idCulLati1.1.primary.curated.fa.gfastats
       busco_short_summary_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/assembly/curated/idCulLati1.1/ear/idCulLati1.1.primary.curated.insecta_odb10.busco/short_summary.specific.insecta_odb10.idCulLati1.1.primary.curated.insecta_odb10.busco.txt
diff --git a/docs/output.md b/docs/output.md
index f5a9c8b..dac22bd 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -27,7 +27,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 
 </details>
 
-[GFASTATS](https://github.com/vgl-hub/gfastats) is a single fast and exhaustive tool for summary statistics and simultaneous *fa* (fasta, fastq, gfa [.gz]) genome assembly file manipulation.
+[GFASTATS](https://github.com/vgl-hub/gfastats) is a single fast and exhaustive tool for summary statistics and simultaneous _fa_ (fasta, fastq, gfa [.gz]) genome assembly file manipulation.
 
 ### MERQURYFK
 
@@ -35,7 +35,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 <summary>Output files</summary>
 
 - `merquryfk/`
-  - `*.completeness.stats`: 
+  - `*.completeness.stats`:
   - `*{"primary","haplotype",""}_only.bed`:
   - `*{"primary","haplotype",""}.qv`:
   - `*.spectra-asm.{fl,ln,st}.png`:
@@ -47,14 +47,13 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 
 Merqury is a novel tool for reference-free assembly evaluation based on efficient k-mer set operations. By comparing k-mers in a de novo assembly to those found in unassembled high-accuracy reads, Merqury estimates base-level accuracy and completeness.
 
-
 ## SANGER_TOL_BTK
 
 <details markdown="1">
 <summary>Output files</summary>
 
 - `sanger/*_blobtoolkit_out/`
-  - `blobtoolkit/plots/*png`: Blobtoolkit plots 
+  - `blobtoolkit/plots/*png`: Blobtoolkit plots
   - `blobtoolkit/{ASSEMBLY_NAME}/*.json.gz`: Blobtoolkit dataset for use in BTK_viewer.
   - `busco/*_odb10/*.{tsv,tar.gz,json,txt}`: Busco output
   - `muliqc/`: MultiQC plots/data and report.html.
@@ -64,14 +63,13 @@ Merqury is a novel tool for reference-free assembly evaluation based on efficien
 
 [SANGER_TOL_BTK](https://pipelines.tol.sanger.ac.uk/blobtoolkit) is a bioinformatics pipeline that can be used to identify and analyse non-target DNA for eukaryotic genomes.
 
-
 ## SANGER_TOL_CPRETEXT
 
 <details markdown="1">
 <summary>Output files</summary>
 
 - `sanger/*_curationpretext_out/`
-  - `accessory_files/*.{bigWig,bed,bedgraph}`: Track files describing Telomere, gap, coverage data across the genome. 
+  - `accessory_files/*.{bigWig,bed,bedgraph}`: Track files describing Telomere, gap, coverage data across the genome.
   - `pretext_maps_raw`: Pre-accessory file ingestion pretext files.
   - `pretext_maps_processed`: Post-accessory file ingestion pretext files, e.g. the final output.
   - [`pipeline_info`](#pipeline-information)
@@ -80,7 +78,6 @@ Merqury is a novel tool for reference-free assembly evaluation based on efficien
 
 [SANGER_TOL_CPRETEXT](https://pipelines.tol.sanger.ac.uk/curationpretext) is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://pipelines.tol.sanger.ac.uk/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes.
 
-
 ### Pipeline information
 
 <details markdown="1">
diff --git a/docs/usage.md b/docs/usage.md
index b703d3e..a1e62af 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -166,7 +166,6 @@ As in the Snakemake version [a YAML configuration file](https://github.com/blobt
 
 The data in the YAML is currently ignored in the Nextflow pipeline version. The YAML file is retained only to allow compatibility with the BlobDir dataset generated by the [Snakemake version](https://github.com/blobtoolkit/blobtoolkit/tree/main/src/blobtoolkit-pipeline/src). The taxonomic information in the YAML file can be obtained from [NCBI Taxonomy](https://www.ncbi.nlm.nih.gov/data-hub/taxonomy/).
 
-
 ## Running the pipeline
 
 The typical command for running the pipeline is as follows:

From a8c8189a6cc5c98a769a33767e070e15d75ffdf2 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 18 Sep 2024 16:36:21 +0100
Subject: [PATCH 34/52] Template yaml PRETTIER fix

---
 README.md                | 1 -
 assets/template_pdf.yaml | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 5665771..5b61fe6 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,6 @@
 [![GitHub Actions CI Status](https://github.com/sanger-tol/ear/actions/workflows/ci.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/ci.yml)
 [![GitHub Actions Linting Status](https://github.com/sanger-tol/ear/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
 [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)
-
 [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.0-23aa62.svg)](https://www.nextflow.io/)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
 [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
diff --git a/assets/template_pdf.yaml b/assets/template_pdf.yaml
index 3779c19..5688f4e 100644
--- a/assets/template_pdf.yaml
+++ b/assets/template_pdf.yaml
@@ -20,14 +20,14 @@ PROFILING:
 # ASSEMBLY DATA
 ASSEMBLIES:
   Pre-curation:
-    pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e,  yahs_v1.2a.2|]
+    pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|]
     pri:
       gfastats--nstar-report_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.fa.gz.gfastats
       busco_short_summary_txt: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.insecta_odb10.busco/short_summary.specific.insecta_odb10.out_scaffolds_final.insecta_odb10.busco.txt
       merqury_folder: /lustre/scratch123/tol/tolqc/data/erga-bge/insects/Culex_laticinctus/working/idCulLati1.hifiasm.20240430/scaffolding/yahs/out.break.yahs/out_scaffolds_final.ccs.merquryk/
 
   Curated:
-    pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e,  yahs_v1.2a.2|, TreeVal_v1.1]
+    pipeline: [hifiasm_v0.19.8-r603|--primary, purge_dups_v1.2.5|-e, yahs_v1.2a.2|, TreeVal_v1.1]
     pri:
       gfastats--nstar-report_txt: idCulLati1.1.primary.curated.fa.gfastats
       busco_short_summary_txt: short_summary.specific.insecta_odb10.idCulLati1.1.primary.curated.insecta_odb10.busco.txt

From 229f0a97d80006c52c30fef7cb358cd83d7e8c3e Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 18 Sep 2024 20:12:43 +0100
Subject: [PATCH 35/52] Updates

---
 modules/local/sanger_tol_btk.nf | 2 +-
 workflows/ear.nf                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf
index 699ee04..4f0bb06 100644
--- a/modules/local/sanger_tol_btk.nf
+++ b/modules/local/sanger_tol_btk.nf
@@ -7,7 +7,7 @@ process SANGER_TOL_BTK {
     tuple val(meta1), path(bam) // Name needs to remain the same as previous process as they are referenced in the samplesheet
     tuple val(meta2), path(samplesheet_csv, stageAs: "SAMPLESHEET.csv")
     path blastp, stageAs: "blastp.dmnd"
-    path blastn, stageAs: ""
+    path blastn,
     path blastx
     path config_file
     path tax_dump
diff --git a/workflows/ear.nf b/workflows/ear.nf
index 6a87e9e..9f1c434 100644
--- a/workflows/ear.nf
+++ b/workflows/ear.nf
@@ -81,6 +81,7 @@ workflow EAR {
         ch_haplotype_fasta = YAML_INPUT.out.reference_hap2
     }
 
+
     //
     // MODULE: ASSEMBLY STATISTICS FOR THE FASTA
     //
@@ -123,7 +124,6 @@ workflow EAR {
         //
         // MODULE: MERQURYFK PLOTS OF GENOME
         //
-        merquryfk_input.view()
         MERQURYFK_MERQURYFK(
             merquryfk_input
         )

From f64792560028421487b125ddcd80419a087d67bb Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 18 Sep 2024 20:14:00 +0100
Subject: [PATCH 36/52] Testing addition of ncbidatasets summary module

---
 modules/local/ncbidatasets/summary/main.nf | 52 ++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 modules/local/ncbidatasets/summary/main.nf

diff --git a/modules/local/ncbidatasets/summary/main.nf b/modules/local/ncbidatasets/summary/main.nf
new file mode 100644
index 0000000..bb9191f
--- /dev/null
+++ b/modules/local/ncbidatasets/summary/main.nf
@@ -0,0 +1,52 @@
+process NCBIDATASETS_DOWNLOAD {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "conda-forge::ncbi-datasets-cli=15.11.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/ncbi-datasets-pylib:15.11.0--pyhdfd78af_0':
+        'staphb/ncbi-datasets:15.11.0' }"
+
+    input:
+    val(input_data)
+
+    output:
+    val(output_data)    , emit: taxonomy
+    path "versions.yml" , emit: versions
+    
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def valid_commands = ["taxonomy", "taxon"]
+    if (!valid_commands.contains(meta.command)) {
+        error "Unsupported command: ${meta.command} "
+    }
+
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id.replaceAll(' ', '_')}"
+
+    """
+
+    [ -e /usr/local/ssl/cacert.pem ] && export SSL_CERT_FILE=/usr/local/ssl/cacert.pem
+
+    datasets summary \\
+        ${meta.command} "${meta.latin_name}" ${args}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        ncbi-datasets-cli: \$(echo \$(datasets --version 2>&1) | sed 's/datasets version: //' )
+    END_VERSIONS
+    """
+
+    stub:
+    def args    = task.ext.args     ?: ''
+    def prefix  = task.ext.prefix   ?: "${meta.id.replaceAll(' ', '_')}"
+    """
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        ncbi-datasets-cli: \$(echo \$(datasets --version 2>&1) | sed 's/datasets version: //' )
+    END_VERSIONS
+    """
+}
\ No newline at end of file

From c3475ee7660348f48c0e345f72cd37a503e715c2 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Wed, 18 Sep 2024 20:20:32 +0100
Subject: [PATCH 37/52] not touching ncbi datasets cli

---
 modules/local/ncbidatasets/summary/main.nf | 52 ----------------------
 1 file changed, 52 deletions(-)
 delete mode 100644 modules/local/ncbidatasets/summary/main.nf

diff --git a/modules/local/ncbidatasets/summary/main.nf b/modules/local/ncbidatasets/summary/main.nf
deleted file mode 100644
index bb9191f..0000000
--- a/modules/local/ncbidatasets/summary/main.nf
+++ /dev/null
@@ -1,52 +0,0 @@
-process NCBIDATASETS_DOWNLOAD {
-    tag "$meta.id"
-    label 'process_single'
-
-    conda "conda-forge::ncbi-datasets-cli=15.11.0"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/ncbi-datasets-pylib:15.11.0--pyhdfd78af_0':
-        'staphb/ncbi-datasets:15.11.0' }"
-
-    input:
-    val(input_data)
-
-    output:
-    val(output_data)    , emit: taxonomy
-    path "versions.yml" , emit: versions
-    
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def valid_commands = ["taxonomy", "taxon"]
-    if (!valid_commands.contains(meta.command)) {
-        error "Unsupported command: ${meta.command} "
-    }
-
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id.replaceAll(' ', '_')}"
-
-    """
-
-    [ -e /usr/local/ssl/cacert.pem ] && export SSL_CERT_FILE=/usr/local/ssl/cacert.pem
-
-    datasets summary \\
-        ${meta.command} "${meta.latin_name}" ${args}
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        ncbi-datasets-cli: \$(echo \$(datasets --version 2>&1) | sed 's/datasets version: //' )
-    END_VERSIONS
-    """
-
-    stub:
-    def args    = task.ext.args     ?: ''
-    def prefix  = task.ext.prefix   ?: "${meta.id.replaceAll(' ', '_')}"
-    """
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        ncbi-datasets-cli: \$(echo \$(datasets --version 2>&1) | sed 's/datasets version: //' )
-    END_VERSIONS
-    """
-}
\ No newline at end of file

From 1b68e566398215510c8bf3e8f4b49e24b4ac733a Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Thu, 19 Sep 2024 09:14:41 +0100
Subject: [PATCH 38/52] Bug Fix for extra comma in btk module

---
 modules/local/sanger_tol_btk.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/sanger_tol_btk.nf b/modules/local/sanger_tol_btk.nf
index 4f0bb06..543e693 100644
--- a/modules/local/sanger_tol_btk.nf
+++ b/modules/local/sanger_tol_btk.nf
@@ -7,7 +7,7 @@ process SANGER_TOL_BTK {
     tuple val(meta1), path(bam) // Name needs to remain the same as previous process as they are referenced in the samplesheet
     tuple val(meta2), path(samplesheet_csv, stageAs: "SAMPLESHEET.csv")
     path blastp, stageAs: "blastp.dmnd"
-    path blastn,
+    path blastn
     path blastx
     path config_file
     path tax_dump

From e5b60b54c00eff45ff2ddd26b3bff7c503ab4a2b Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 20 Sep 2024 12:44:02 +0100
Subject: [PATCH 39/52] Adding MINIMAP2 resource fix

---
 conf/base.config | 54 +++++++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index e609a9e..aa5a770 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -11,16 +11,22 @@
 process {
 
     // TODO nf-core: Check the defaults for all processes
-    cpus   = { check_max( 1    * task.attempt, 'cpus'   ) }
-    memory = { check_max( 6.GB * task.attempt, 'memory' ) }
-    time   = { check_max( 4.h  * task.attempt, 'time'   ) }
+    cpus                = { check_max( 1    * task.attempt, 'cpus'   ) }
+    memory              = { check_max( 6.GB * task.attempt, 'memory' ) }
+    time                = { check_max( 4.h  * task.attempt, 'time'   ) }
 
-    errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
-    maxRetries    = 1
-    maxErrors     = '-1'
+    errorStrategy       = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
+    maxRetries          = 1
+    maxErrors           = '-1'
 
     withName: "SANGER_TOL_CPRETEXT|SANGER_TOL_BTK" {
-        time    = { check_max( 70.h  * task.attempt, 'time'   ) }
+        time            = { check_max( 70.h  * task.attempt, 'time'   ) }
+    }
+
+        withName: "MINIMAP2_ALIGN_SE" {
+        cpus            = { check_max( 16                  , 'cpus'    ) }
+        memory          = { check_max( 1.GB     * ( reference.size() < 2e9 ? 40 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * task.attempt ) , 'memory') }
+        time            = { check_max( 1.h      * ( reference.size() < 1e9 ? 10 : reference.size() < 10e9 ? 30 : 48), 'time'      ) }
     }
 
     // Process-specific resource requirements
@@ -31,36 +37,36 @@ process {
     // TODO nf-core: Customise requirements for specific processes.
     // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
     withLabel:process_single {
-        cpus   = { check_max( 1                  , 'cpus'    ) }
-        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+        cpus            = { check_max( 1                  , 'cpus'    ) }
+        memory          = { check_max( 6.GB * task.attempt, 'memory'  ) }
+        time            = { check_max( 4.h  * task.attempt, 'time'    ) }
     }
     withLabel:process_low {
-        cpus   = { check_max( 2     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 12.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h   * task.attempt, 'time'    ) }
+        cpus            = { check_max( 2     * task.attempt, 'cpus'    ) }
+        memory          = { check_max( 12.GB * task.attempt, 'memory'  ) }
+        time            = { check_max( 4.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_medium {
-        cpus   = { check_max( 6     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 36.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 8.h   * task.attempt, 'time'    ) }
+        cpus            = { check_max( 6     * task.attempt, 'cpus'    ) }
+        memory          = { check_max( 36.GB * task.attempt, 'memory'  ) }
+        time            = { check_max( 8.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_high {
-        cpus   = { check_max( 12    * task.attempt, 'cpus'    ) }
-        memory = { check_max( 72.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 16.h  * task.attempt, 'time'    ) }
+        cpus            = { check_max( 12    * task.attempt, 'cpus'    ) }
+        memory          = { check_max( 72.GB * task.attempt, 'memory'  ) }
+        time            = { check_max( 16.h  * task.attempt, 'time'    ) }
     }
     withLabel:process_long {
-        time   = { check_max( 20.h  * task.attempt, 'time'    ) }
+        time            = { check_max( 20.h  * task.attempt, 'time'    ) }
     }
     withLabel:process_high_memory {
-        memory = { check_max( 200.GB * task.attempt, 'memory' ) }
+        memory          = { check_max( 200.GB * task.attempt, 'memory' ) }
     }
     withLabel:error_ignore {
-        errorStrategy = 'ignore'
+        errorStrategy   = 'ignore'
     }
     withLabel:error_retry {
-        errorStrategy = 'retry'
-        maxRetries    = 2
+        errorStrategy   = 'retry'
+        maxRetries      = 2
     }
 }

From 870bf41e0c54e18949edd016d53cbf4dc31824c0 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 20 Sep 2024 12:44:36 +0100
Subject: [PATCH 40/52] Updating documentation

---
 CHANGELOG.md             |   6 +-
 CITATIONS.md             |  24 ++++++--
 LICENSE                  |   4 +-
 README.md                |   8 ++-
 assets/samplesheet.csv   |   3 -
 assets/schema_input.json | 130 ++++++++++++++++++++++++++++++++++++---
 6 files changed, 154 insertions(+), 21 deletions(-)
 delete mode 100644 assets/samplesheet.csv

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9106bfd..9959669 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@ The current pipeline means the MVP for ear.
 
 GFASTATS to generate statistics on the input primary genome.
 MERQURY_FK to generate kmer graphs and analyses of the primary, haplotype and merged assembly.
+MAIN_MAPPING which is a small mapping subworkflow, that can work with single and paired reads.
 BLOBTOOLKIT to generate busco files and blobtoolkit dataset/plots.
 CURATIONPRETEXT to generate pretext plots and pngs.
 
@@ -21,12 +22,13 @@ CURATIONPRETEXT to generate pretext plots and pngs.
 | Old parameter | New parameter |
 | ------------- | ------------- |
 |               | --mapped      |
+|               | --steps       |
 
 ### Software dependencies
 
 | Dependency                   | Old version | New version         |
 | ---------------------------- | ----------- | ------------------- |
-| sanger-tol/blobtoolkit\*     |             | draft_assemblies    |
+| sanger-tol/blobtoolkit\*     |             | 0.6.0 (Bellsprout)  |
 | sanger-tol/curationpretext\* |             | 1.0.0 (UNSC Cradle) |
 | GFASTATS                     |             | 1.3.6--hdcf5f25_3   |
 | MERQUERY_FK                  |             | 1.2                 |
@@ -36,7 +38,7 @@ CURATIONPRETEXT to generate pretext plots and pngs.
 
 |
 
-- Note: for pipelines, please check their own CHANGELOG file for a full list of software dependencies.
+\* for pipelines, please check their own CHANGELOG file for a full list of software dependencies.
 
 ### Dependencies
 
diff --git a/CITATIONS.md b/CITATIONS.md
index c0cf948..28e3ca8 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -10,13 +10,29 @@
 
 ## Pipeline tools
 
-- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
+- [GFastar/GFastats](https://www.biorxiv.org/content/10.1101/2022.03.24.485682v1)
 
-  > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online].
+  > Formenti, G., Abueg, L., Brajuka, N., Gallardo, C., Giani, A., Fedrigo, O., Jarvis, ED. (2022). Gfastats: conversion, evaluation and manipulation of genome sequences using assembly graphs. bioRxiv. doi: https://doi.org/10.1101/2022.03.24.485682
 
-- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
+- [Merqury_FK](https://github.com/thegenemyers/MERQURY.FK)
 
-  > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
+  > Myers, G., Rhie, A. (2024). MerquryFK & KatFK. [online]. https://github.com/thegenemyers/MERQURY.FK. (Accessed on 20 September 2024).
+
+- [Minimap2](https://pubmed.ncbi.nlm.nih.gov/34623391/)
+
+  > Li, H. 2021. ‘New strategies to improve MINIMAP2 alignment accuracy’, Bioinformatics, 37(23), pp. 4572–4574. doi:10.1093/bioinformatics/btab705.
+
+- [Samtools](https://pubmed.ncbi.nlm.nih.gov/33590861/)
+
+  > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. Twelve years of SAMtools and BCFtools. Gigascience. 2021 Feb 16;10(2):giab008. doi: 10.1093/gigascience/giab008. PMID: 33590861; PMCID: PMC7931819.
+
+- [sanger-tol/blobtoolkit](https://zenodo.org/records/13758882)
+
+  > Muffato, M., Butt, Z., Challis, R., Kumar, S., Qi, G., Ramos Díaz, A., Surana, P., & Yates, B. (2024). sanger-tol/blobtoolkit: v0.6.0 – Bellsprout (0.6.0). Zenodo. https://doi.org/10.5281/zenodo.13758882
+
+- [sanger-tol/curationpretext](https://zenodo.org/records/13758882)
+
+  > Pointon, DLB. (2024). sanger-tol/curationpretext: v1.0.0 (UNSC Cradle). [online]. https://github.com/sanger-tol/curationpretext/releases/tag/1.0.0. (Accessed on 20 September 2024).
 
 ## Software packaging/containerisation tools
 
diff --git a/LICENSE b/LICENSE
index 967fdcd..138ff19 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) DLBPointon
+Copyright (c) 2022 - 2023 Genome Research Ltd.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
index 5b61fe6..b8e17ab 100644
--- a/README.md
+++ b/README.md
@@ -59,8 +59,9 @@ curationpretext:
   hic_dir: <DIRECTORY OF HIC READ FILES .CRAM AND .CRAI>
 btk:
   taxid: 1464561
-  lineages: <CSV LIST OF DATABASES TO USE: "insecta_odb10,diptera_odb10">
-  gca_accession: GCA_0001 <DEFAULT, DO NOT CHANGE UNLESS YOU HAVE A GCA_ACCESSION FOR YOUR SPECIES>
+  lineages: < CSV LIST OF DATABASES TO USE: "insecta_odb10,diptera_odb10">
+  gca_accession: GCA_0001 <DEFAULT, DO NOT CHANGE UNLESS YOU HAVE A GCA_ACCESSION FOR YOUR SPECIES >
+
   nt_database: <DIRECTORY CONTAINING BLAST DB>
   nt_database_prefix: <BLASTDB PREFIX>
   diamond_uniprot_database_path: <PATH TO reference_proteomes.dmnd FROM UNIPROT>
@@ -76,7 +77,8 @@ Now, you can run the pipeline using:
 nextflow run sanger-tol/ear -profile <singularity,docker> \\
    --input assets/idCulLati1.yaml \\
    --mapped TRUE \\ # OPTIONAL
-   --outdir test-truth
+   --steps ["", "btk", "cpretext", "merquryfk"] # OPTIONAL CSV LIST OF STEPS TO EXCLUDE FROM EXECUTION
+   --outdir test
 ```
 
 > [!WARNING]
diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv
deleted file mode 100644
index 5f653ab..0000000
--- a/assets/samplesheet.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-sample,fastq_1,fastq_2
-SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz
-SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,
diff --git a/assets/schema_input.json b/assets/schema_input.json
index 8012bf6..61d2b74 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -13,21 +13,137 @@
                 "errorMessage": "Sample name must be provided and cannot contain spaces",
                 "meta": ["id"]
             },
-            "fastq_1": {
+            "reference_hap1": {
                 "type": "string",
                 "format": "file-path",
                 "exists": true,
-                "pattern": "^\\S+\\.f(ast)?q\\.gz$",
-                "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
+                "pattern": "^\\S+\\.f[ast]a$",
+                "errorMessage": "Primary assembly input file, decompressed"
             },
-            "fastq_2": {
+            "reference_hap2": {
                 "type": "string",
                 "format": "file-path",
                 "exists": true,
-                "pattern": "^\\S+\\.f(ast)?q\\.gz$",
-                "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
+                "pattern": "^\\S+\\.f[ast]a$",
+                "errorMessage": "Haplotype assembly input file, decompressed"
+            },
+            "reference_haplotigs": {
+                "type": "string",
+                "format": "file-path",
+                "exists": true,
+                "pattern": "^\\S+\\.f[ast]a$",
+                "errorMessage": "Haplotigs removed from Primary Assembly input file during curation, decompressed"
+            },
+            "mapped_bam": {
+                "type": "string",
+                "format": "file-path",
+                "exists": true,
+                "pattern": "^\\S+\\.bam$",
+                "errorMessage": "Optional mapped bam file used to skip mapping of pacbio files"
+            },
+            "merquryfk": {
+                "type": "object",
+                "properties": {
+                    "fastk_hist": {
+                        "type": "string",
+                        "format": "file-path",
+                        "exists": true,
+                        "pattern": "^\\S+\\.hist$",
+                        "errorMessage": "Path to hist file"
+                    },
+                    "fastk_ktab": {
+                        "type": "string",
+                        "errorMessage": "Directory containing ktab files"
+                    }
+                }
+            },
+            "longread": {
+                "type": "object",
+                "properties": {
+                    "dir": {
+                        "type": "string",
+                        "errorMessage": "Path to folder containing fasta.gz files"
+                    },
+                    "type": {
+                        "type": "string",
+                        "errorMessage": "type of longread data"
+                    }
+                }
+            },
+            "curationpretext": {
+                "type": "object",
+                "properties": {
+                    "aligner": {
+                        "type": "string",
+                        "errorMessage": "Aligner"
+                    },
+                    "telomere_motif": {
+                        "type": "string",
+                        "errorMessage": "Telomere motif for telomere search"
+                    },
+                    "hic_dir": {
+                        "type": "string",
+                        "errorMessage": "Directory of the cram data"
+                    }
+                }
+            },
+            "btk": {
+                "type": "object",
+                "properties": {
+                    "taxid": {
+                        "type": "string",
+                        "errorMessage": "NCBI Taxid of organism"
+                    },
+                    "lineages": {
+                        "type": "string",
+                        "errorMessage": "CSV list of BUSCO lineages to run against"
+                    },
+                    "gca_accession": {
+                        "type": "string",
+                        "errorMessage": "gca_accession if applicable"
+                    },
+                    "nt_database": {
+                        "type": "string",
+                        "errorMessage": "nt database directory"
+                    },
+                    "nt_database_prefix": {
+                        "type": "string",
+                        "errorMessage": "Prefix for nt database"
+                    },
+                    "diamond_uniprot_database_path": {
+                        "type": "string",
+                        "format": "file-path",
+                        "exists": true,
+                        "pattern": "^\\S+\\.dmnd$",
+                        "errorMessage": "Diamond protein database"
+                    },
+                    "diamond_nr_database_path": {
+                        "type": "string",
+                        "format": "file-path",
+                        "exists": true,
+                        "pattern": "^\\S+\\.dmnd$",
+                        "errorMessage": "Nuclear diamond database"
+                    },
+                    "ncbi_taxonomy_path": {
+                        "type": "string",
+                        "errorMessage": "Directory for tax2taxid"
+                    },
+                    "ncbi_rankedlineage_path": {
+                        "type": "string",
+                        "format": "file-path",
+                        "exists": true,
+                        "pattern": "^\\S+\\.dmp$",
+                        "errorMessage": "Taxonomy dump"
+                    },
+                    "config": {
+                        "type": "string",
+                        "format": "file-path",
+                        "pattern": "^\\S+\\.config$",
+                        "errorMessage": "Extra configuration file for Blobtoolkit pipeline"
+                    }
+                }
             }
         },
-        "required": ["sample", "fastq_1"]
+        "required": ["sample", "reference_hap1", "reference_hap2"]
     }
 }

From 93d17c240a0ab2dd1bf1ff35c2359b9a74d87068 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Fri, 20 Sep 2024 12:53:03 +0100
Subject: [PATCH 41/52] Fix LICENSE lint

---
 .nf-core.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.nf-core.yml b/.nf-core.yml
index d9fe12b..407734e 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -10,6 +10,7 @@ lint:
     - .github/workflows/awsfulltest.yml
     - conf/igenomes.config
   files_unchanged:
+    - LICENSE
     - CODE_OF_CONDUCT.md
     - assets/nf-core-ear_logo_light.png
     - docs/images/nf-core-ear_logo_light.png

From fb826a379a2081d6fea67ecbff20a25c3dd60fde Mon Sep 17 00:00:00 2001
From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com>
Date: Fri, 20 Sep 2024 12:59:53 +0100
Subject: [PATCH 42/52] Update CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9959669..0ebb5ba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 Naming based on: [Mythical creatures](https://en.wikipedia.org/wiki/List_of_legendary_creatures_by_type).
 
-## v1.0.0 - Aquatic Bahamut [21/08/2024]
+## v1.0.0 - Robert Beiny [20/09/2024]
 
 Initial release of sanger-tol/ear, created with the [nf-core](https://nf-co.re/) template.
 The current pipeline means the MVP for ear.

From 828cf7cfcbb40f217582c3c4a186a0174a9d5aff Mon Sep 17 00:00:00 2001
From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com>
Date: Fri, 20 Sep 2024 13:50:37 +0100
Subject: [PATCH 43/52] Update nextflow.config

---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index e564534..83055b9 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -224,7 +224,7 @@ manifest {
     description     = """ERGA Assembly Report pipeline"""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=24.04.0'
-    version         = '1.0'
+    version         = '0.6.0'
     doi             = ''
 }
 

From c39373703d8a8495f5ffab327556f17867e8d8a0 Mon Sep 17 00:00:00 2001
From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com>
Date: Tue, 1 Oct 2024 14:59:08 +0100
Subject: [PATCH 44/52] Update README.md

Adding DOI
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b8e17ab..5c42432 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 [![GitHub Actions CI Status](https://github.com/sanger-tol/ear/actions/workflows/ci.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/ci.yml)
-[![GitHub Actions Linting Status](https://github.com/sanger-tol/ear/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
+[![GitHub Actions Linting Status](https://github.com/sanger-tol/ear/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/ear/actions/workflows/linting.yml)[![DOI](https://zenodo.org/badge/833605808.svg)](https://doi.org/10.5281/zenodo.13819520)
 [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)
 [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.0-23aa62.svg)](https://www.nextflow.io/)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)

From e7e4fc3e6873d69278c682b10d46347aba53e383 Mon Sep 17 00:00:00 2001
From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com>
Date: Tue, 1 Oct 2024 15:00:23 +0100
Subject: [PATCH 45/52] Update nextflow.config

---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 83055b9..6e8499e 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -225,7 +225,7 @@ manifest {
     mainScript      = 'main.nf'
     nextflowVersion = '!>=24.04.0'
     version         = '0.6.0'
-    doi             = ''
+    doi             = 'https://zenodo.org/records/13819520'
 }
 
 // Load modules.config for DSL2 module specific options

From 79a12a03ec0998299575ac4b5ce935bbc955703d Mon Sep 17 00:00:00 2001
From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com>
Date: Tue, 8 Oct 2024 08:42:18 +0100
Subject: [PATCH 46/52] Update modules.config

Moving btk to tagged version 0.6.0
---
 conf/modules.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules.config b/conf/modules.config
index 73e83bb..90a7a0e 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -42,7 +42,7 @@ process {
         ext.executor        = "bsub -Is -tty -e test.e -o test.log -n 2 -q oversubscribed -M1400 -R'select[mem>1400] rusage[mem=1400] span[hosts=1]'"
         ext.profiles        = "singularity,sanger"
         ext.get_versions    = "lsid | head -n1 | cut -d ',' -f 1"
-        ext.version         = "draft_assemblies"
+        ext.version         = "0.6.0"
     }
 
     withName: SANGER_TOL_CPRETEXT {

From 2fbf4124573a7dfb3329af5ff15c3c90b1ef5755 Mon Sep 17 00:00:00 2001
From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com>
Date: Tue, 8 Oct 2024 08:44:39 +0100
Subject: [PATCH 47/52] Update CHANGELOG.md

Update
---
 CHANGELOG.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0ebb5ba..d921035 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 Naming based on: [Mythical creatures](https://en.wikipedia.org/wiki/List_of_legendary_creatures_by_type).
 
+## v1.0.1 - Robert Beiny H1 [08/10/2024]
+- Blobtookit version was specified in the wrong location, so defaulted to a development branch "draft_assemblies", this has now been updated to v0.6.0.
+- Zenodo DOI has now been added to the repo.
+
 ## v1.0.0 - Robert Beiny [20/09/2024]
 
 Initial release of sanger-tol/ear, created with the [nf-core](https://nf-co.re/) template.
@@ -36,8 +40,6 @@ CURATIONPRETEXT to generate pretext plots and pngs.
 | SAMTOOLS_MERGE               |             | 1.20--h50ea8bc_0    |
 | SAMTOOLS_SORT                |             | 1.20--h50ea8bc_0    |
 
-|
-
 \* for pipelines, please check their own CHANGELOG file for a full list of software dependencies.
 
 ### Dependencies

From a126d8a0708db088e17a460e53c63448a3cc526b Mon Sep 17 00:00:00 2001
From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com>
Date: Tue, 8 Oct 2024 08:46:01 +0100
Subject: [PATCH 48/52] Update nextflow.config

---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 6e8499e..c278c76 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -224,7 +224,7 @@ manifest {
     description     = """ERGA Assembly Report pipeline"""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=24.04.0'
-    version         = '0.6.0'
+    version         = '0.6.1'
     doi             = 'https://zenodo.org/records/13819520'
 }
 

From 2d2ac6bec02cda6d711bee1628280b15bfe6e7eb Mon Sep 17 00:00:00 2001
From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com>
Date: Tue, 8 Oct 2024 08:46:25 +0100
Subject: [PATCH 49/52] Update CHANGELOG.md

Correct version information
---
 CHANGELOG.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d921035..2bb9758 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,11 +4,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 Naming based on: [Mythical creatures](https://en.wikipedia.org/wiki/List_of_legendary_creatures_by_type).
 
-## v1.0.1 - Robert Beiny H1 [08/10/2024]
+## v0.6.1 - Robert Beiny H1 [08/10/2024]
 - Blobtookit version was specified in the wrong location, so defaulted to a development branch "draft_assemblies", this has now been updated to v0.6.0.
 - Zenodo DOI has now been added to the repo.
 
-## v1.0.0 - Robert Beiny [20/09/2024]
+## v0.6.0 - Robert Beiny [20/09/2024]
 
 Initial release of sanger-tol/ear, created with the [nf-core](https://nf-co.re/) template.
 The current pipeline means the MVP for ear.

From dfb79a09ca41119c2befbfd5a719779cde65b1c1 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 8 Oct 2024 08:54:07 +0100
Subject: [PATCH 50/52] updating version info and fixing linting error

---
 conf/base.config | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/conf/base.config b/conf/base.config
index aa5a770..f600868 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -23,7 +23,7 @@ process {
         time            = { check_max( 70.h  * task.attempt, 'time'   ) }
     }
 
-        withName: "MINIMAP2_ALIGN_SE" {
+    withName: "MINIMAP2_ALIGN_SE" {
         cpus            = { check_max( 16                  , 'cpus'    ) }
         memory          = { check_max( 1.GB     * ( reference.size() < 2e9 ? 40 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * task.attempt ) , 'memory') }
         time            = { check_max( 1.h      * ( reference.size() < 1e9 ? 10 : reference.size() < 10e9 ? 30 : 48), 'time'      ) }
@@ -36,35 +36,43 @@ process {
     //        adding in your local modules too.
     // TODO nf-core: Customise requirements for specific processes.
     // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
+
     withLabel:process_single {
         cpus            = { check_max( 1                  , 'cpus'    ) }
         memory          = { check_max( 6.GB * task.attempt, 'memory'  ) }
         time            = { check_max( 4.h  * task.attempt, 'time'    ) }
     }
+
     withLabel:process_low {
         cpus            = { check_max( 2     * task.attempt, 'cpus'    ) }
         memory          = { check_max( 12.GB * task.attempt, 'memory'  ) }
         time            = { check_max( 4.h   * task.attempt, 'time'    ) }
     }
+
     withLabel:process_medium {
         cpus            = { check_max( 6     * task.attempt, 'cpus'    ) }
         memory          = { check_max( 36.GB * task.attempt, 'memory'  ) }
         time            = { check_max( 8.h   * task.attempt, 'time'    ) }
     }
+
     withLabel:process_high {
         cpus            = { check_max( 12    * task.attempt, 'cpus'    ) }
         memory          = { check_max( 72.GB * task.attempt, 'memory'  ) }
         time            = { check_max( 16.h  * task.attempt, 'time'    ) }
     }
+
     withLabel:process_long {
         time            = { check_max( 20.h  * task.attempt, 'time'    ) }
     }
+
     withLabel:process_high_memory {
         memory          = { check_max( 200.GB * task.attempt, 'memory' ) }
     }
+
     withLabel:error_ignore {
         errorStrategy   = 'ignore'
     }
+
     withLabel:error_retry {
         errorStrategy   = 'retry'
         maxRetries      = 2

From 01baf9287701c33ddf0e025efc068a19b9e0f6b3 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 8 Oct 2024 08:54:58 +0100
Subject: [PATCH 51/52] Prettier

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2bb9758..ff4ec69 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 Naming based on: [Mythical creatures](https://en.wikipedia.org/wiki/List_of_legendary_creatures_by_type).
 
 ## v0.6.1 - Robert Beiny H1 [08/10/2024]
+
 - Blobtookit version was specified in the wrong location, so defaulted to a development branch "draft_assemblies", this has now been updated to v0.6.0.
 - Zenodo DOI has now been added to the repo.
 

From 8127c7c2c811e2084d94737e801d6145748b3544 Mon Sep 17 00:00:00 2001
From: DLBPointon <damonlbp@hotmail.co.uk>
Date: Tue, 8 Oct 2024 08:56:18 +0100
Subject: [PATCH 52/52] New line for editorconfig

---
 LICENSE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE b/LICENSE
index 138ff19..ac4a5f3 100644
--- a/LICENSE
+++ b/LICENSE
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
+SOFTWARE.