nf-core · trianglegrrl · May 1, 2025 · May 1, 2025 · May 1, 2025 · May 1, 2025
diff --git a/.prettierignore b/.prettierignore
@@ -3,6 +3,7 @@ adaptivecard.json
 slackreport.json
 .nextflow*
 work/
+docs/manual_tests.md
 data/
 results/
 .DS_Store

diff --git a/conf/modules.config b/conf/modules.config
@@ -1752,4 +1752,11 @@ process {
             ]
         ]
     }
+
+    withName: 'NFCORE_EAGER:EAGER:CLASSIFY_MTDNA_HAPLOGROUP:HAPLOGREP3_CLASSIFY' {
+        ext.args = {
+            def phylotree = params.human_mtdna_phylotree ?: (params.human_mtdna_reference.toLowerCase() == 'rsrs' ? '[email protected]' : '[email protected]')
+            "--tree ${phylotree}"
+        }
+    }
 }
diff --git a/docs/development/code_conventions.md b/docs/development/code_conventions.md
@@ -27,11 +27,11 @@ The alias should ideally make it intuitive to understand which subworkflow the m
 
 - The unique module names specified above should make it possible to always configure modules without the need for a regex/glob when using `withName`. Exception to this is modules named within nf-core subworkflows, which should be configured with a regex/glob.
 - The order of attributes within configuration blocks should always be the following:
-  1.  tag (mandatory)
-  2.  ext.args\* (optional. Followed by ext.args{2,3,...} in ascending order)
-  3.  ext.prefix (optional)
-  4.  publishDir (optional)
-  5.  any other attributes go to the end.
+  1. tag (mandatory)
+  2. ext.args\* (optional. Followed by ext.args{2,3,...} in ascending order)
+  3. ext.prefix (optional)
+  4. publishDir (optional)
+  5. any other attributes go to the end.
 - NEVER use `meta.id` in module configuration (`tag`,`ext.*`), but instead the full explicit combination of unique attributes expected. `meta.sample_id` is fine to use and is equivalent to `meta.id`, but should be supplemented by `meta.library_id` and `meta.lane` etc, as required.
 - Every process that is reference-specific MUST include `${meta.reference}` in its `tag` and `ext.prefix` attributes. This is to avoid confusion when running the pipeline with multiple references.
   - Tags that include reference and sample information should be formatted as `${meta.reference}|${meta.sample_id}_*`. Reference specific attributes go on the left-hand-side of the tag, data-specific attributes on the right-hand-side.

diff --git a/docs/development/dev_docs.md b/docs/development/dev_docs.md
@@ -16,7 +16,7 @@ To add new input files or options to the reference sheet, you have to complete a
 
 ### Multi-reference input workflow
 
-1. Add new column named <SOFTWARE_FILETYPE> and test data to the test reference sheet (https://github.com/nf-core/test-datasets/blob/eager/reference/reference_sheet_multiref.csv).
+1. Add new column named <SOFTWARE_FILETYPE> and test data to the test reference sheet (<https://github.com/nf-core/test-datasets/blob/eager/reference/reference_sheet_multiref.csv>).
 2. Read in new input via nf-validation plugin within the reference_indexing_multi local subworkflow.
     1. Add new "property" <SOFTWARE_FILETYPE> to the fasta validation schema (assets/schema_fasta.json).
         1. Add "type" of your object, e.g. `"type": "string"` for file paths and `"type": "integer"` for numbers.

diff --git a/docs/development/manual_tests.md b/docs/development/manual_tests.md
@@ -1,3 +1,4 @@
+<!-- markdownlint-disable -->
 # Manual Tests
 
 Here is a list of manual tests we can run with the expect output commands
@@ -1133,3 +1134,18 @@ nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume --
 ## Expect: BAM input shows up in FastQC -> mapping results.
 nextflow run main.nf -profile test,docker --outdir ./results -w work/ --convert_inputbam --skip_deduplication -resume -ansi-log false -dump-channels
 ```
+
+### MTDNA HAPLOGROUP CLASSIFICATION
+
+```bash
+#### MTDNA HAPLOGROUP CLASSIFICATION with default settings
+## Expect: Directory created 'mtdna_haplogroup/<reference>/<sample_id>' containing a .txt file for each sample with haplogroup assignments
+## Expect: The haplogroup .txt file contains at minimum columns for rank, name, quality, range, and details of the haplogroup assignment
+nextflow run main.nf -profile docker,test --outdir ./results/mtdna_haplogroup_test --run_genotyping --genotyping_tool ug --genotyping_source raw --run_classify_mtdna_haplogroup  -resume
+
+#### MTDNA HAPLOGROUP CLASSIFICATION with specific arguments
+## Expect: Directory created 'mtdna_haplogroup/<reference>/<sample_id>' containing a .txt file for each sample with haplogroup assignments
+## Expect: The haplogroup assignment may differ based on the classification settings
+nextflow run main.nf -profile docker,test --outdir ./results/mtdna_haplogroup_test --run_classify_mtdna_haplogroup --run_genotyping --genotyping_tool ug --genotyping_source raw --run_classify_mtdna_haplogroup --human_mtdna_reference rsrs --human_mtdna_phylotree [email protected] -resume
+```
+<!-- markdownlint-enable -->
diff --git a/docs/usage.md b/docs/usage.md
@@ -107,7 +107,7 @@ Only the `reference_name`, and `fasta` columns are mandatory, whereas all other
 
 Files for `fai`, `dict`, `mapper_index` will be generated by the pipeline for you if not specified.
 
-A real-world example could look as follows, where a user-supplied `.dict` file and `circular_target ` and `mitochondrion_header` are not specified:
+A real-world example could look as follows, where a user-supplied `.dict` file and `circular_target` and `mitochondrion_header` are not specified:
 
 ```txt
 reference_name,fasta,fai,dict,mapper_index,circular_target,mitochondrion
@@ -217,7 +217,7 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof
 - `apptainer`
   - A generic configuration profile to be used with [Apptainer](https://apptainer.org/)
 - `wave`
-  - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later).
+  - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow `24.03.0-edge` or later).
 - `conda`
   - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer.
 

diff --git a/modules.json b/modules.json
@@ -180,6 +180,11 @@
                         "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208",
                         "installed_by": ["modules"]
                     },
+                    "haplogrep3/classify": {
+                        "branch": "master",
+                        "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
+                        "installed_by": ["modules"]
+                    },
                     "kraken2/kraken2": {
                         "branch": "master",
                         "git_sha": "653218e79ffa76fde20319e9062f8b8da5cf7555",

diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
diff --git a/modules/nf-core/haplogrep3/classify/environment.yml b/modules/nf-core/haplogrep3/classify/environment.yml
diff --git a/modules/nf-core/haplogrep3/classify/main.nf b/modules/nf-core/haplogrep3/classify/main.nf
diff --git a/modules/nf-core/haplogrep3/classify/meta.yml b/modules/nf-core/haplogrep3/classify/meta.yml
diff --git a/modules/nf-core/haplogrep3/classify/tests/main.nf.test b/modules/nf-core/haplogrep3/classify/tests/main.nf.test
diff --git a/modules/nf-core/haplogrep3/classify/tests/main.nf.test.snap b/modules/nf-core/haplogrep3/classify/tests/main.nf.test.snap
diff --git a/modules/nf-core/haplogrep3/classify/tests/nextflow.config b/modules/nf-core/haplogrep3/classify/tests/nextflow.config
diff --git a/nextflow.config b/nextflow.config
@@ -249,6 +249,11 @@ params {
     run_sexdeterrmine                                                = false
     sexdeterrmine_bedfile                                            = null
 
+    // mtDNA haplogroup classification
+    run_mtdna_haplogroup_classification                              = false
+    human_mtdna_reference                                            = 'rcrs'
+    human_mtdna_phylotree                                            = null
+
     // Genotyping
     run_genotyping                           = false
     genotyping_tool                          = null