From d999df94a8ec84def6bb95f63a832baef1fd44e9 Mon Sep 17 00:00:00 2001
From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com>
Date: Tue, 31 Oct 2023 16:48:20 +0100
Subject: [PATCH] Update cellranger_mapping workflow to use fromState/toState
 (#610)

* Update cellranger_mapping workflow to use fromState/toState

* Update CHANGELOG
---
 CHANGELOG.md                                  | 22 +++---
 .../cellranger_mapping/config.vsh.yaml        |  4 +-
 .../cellranger_mapping/integration_test.sh    |  4 +-
 .../ingestion/cellranger_mapping/main.nf      | 77 ++++++++++---------
 4 files changed, 57 insertions(+), 50 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0090f0b454f..36a22a22a58 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,18 +2,17 @@
 
 ## BREAKING CHANGES
 
-This project now uses viash version 0.8.0 to build components and workflows. Moving to 0.8.0 involved the following changes:
+* This project now uses viash version 0.8.0 to build components and workflows. Moving to 0.8.0 involved the following changes:
 
-* Bump viash version to 0.8.0 (PR #598) in the project configuration.
+    * Bump viash version to 0.8.0 (PR #598) in the project configuration.
+    * The `concat` component had been deprecated and will be removed in a future release. It's functionality has been copied to the `concatenate_h5mu` component because the name is in conflict with the `concat` operator from nextflow (PR #598).
+    * All pipelines no longer use the anonymous workflow. Instead, these workflows were given a name which was added to the viash config as the entrypoint to the pipeline (PR #598).
+    * Removed the `workflows` folder and moved its contents to new locations (PR #605):
+        1. The `resources_test_scripts` folder now resides in the root of the project. 
+        2. All workflows have been moved to the `src/workflows` folder.
+        3. Adjust GitHub Actions to account for new workflow paths.
 
-* The `concat` component had been deprecated and will be removed in a future release. It's functionality has been copied to the `concatenate_h5mu` component because the name is in conflict with the `concat` operator from nextflow (PR #598).
-
-* All pipelines no longer use the anonymous workflow. Instead, these workflows were given a name which was added to the viash config as the entrypoint to the pipeline (PR #598).
-
-* Removed the `workflows` folder and moved its contents to new locations (PR #605):
-    1. The `resources_test_scripts` folder now resides in the root of the project. 
-    2. All workflows have been moved to the `src/workflows` folder.
-    3. Adjust GitHub Actions to account for new workflow paths.
+* Renamed `obsm_metrics` to `uns_metrics` for the `cellranger_mapping` workflow because the cellranger metrics are stored in `.uns` and not `.obsm` (PR #610).
 
 ## NEW FUNCTIONALITY
 
@@ -25,6 +24,9 @@ This project now uses viash version 0.8.0 to build components and workflows. Mov
 
 * Refactored `cellranger_multi` workflow to use `fromState` and `toState` functionality (PR #609).
 
+* Refactored `cellranger_mapping` workflow to use `fromState` and `toState` functionality (PR #610).
+
+
 # openpipelines 0.12.0
 
 ## BREAKING CHANGES
diff --git a/src/workflows/ingestion/cellranger_mapping/config.vsh.yaml b/src/workflows/ingestion/cellranger_mapping/config.vsh.yaml
index 28000d32037..50ac52db196 100644
--- a/src/workflows/ingestion/cellranger_mapping/config.vsh.yaml
+++ b/src/workflows/ingestion/cellranger_mapping/config.vsh.yaml
@@ -45,9 +45,9 @@ functionality:
           description: "The output from Cell Ranger, converted to h5mu."
           required: true
           example: output.h5mu
-        - name: "--obsm_metrics"
+        - name: "--uns_metrics"
           type: string
-          description: Name of the .obsm slot under which to QC metrics (if any).
+          description: Name of the .uns slot under which to QC metrics (if any).
           default: "metrics_summary"
         - name: "--output_type"
           type: string
diff --git a/src/workflows/ingestion/cellranger_mapping/integration_test.sh b/src/workflows/ingestion/cellranger_mapping/integration_test.sh
index be498acab4e..2142754a49b 100755
--- a/src/workflows/ingestion/cellranger_mapping/integration_test.sh
+++ b/src/workflows/ingestion/cellranger_mapping/integration_test.sh
@@ -12,9 +12,9 @@ export NXF_VER=21.10.6
 
 nextflow \
   run . \
-  -main-script workflows/ingestion/cellranger_mapping/main.nf \
+  -main-script src/workflows/ingestion/cellranger_mapping/main.nf \
   -entry test_wf \
   -resume \
   -profile docker,no_publish \
-  -c workflows/utils/labels_ci.config \
+  -c src/workflows/utils/labels_ci.config \
   -with-trace work/trace.txt
\ No newline at end of file
diff --git a/src/workflows/ingestion/cellranger_mapping/main.nf b/src/workflows/ingestion/cellranger_mapping/main.nf
index d755fe4b2df..7f2234a2721 100644
--- a/src/workflows/ingestion/cellranger_mapping/main.nf
+++ b/src/workflows/ingestion/cellranger_mapping/main.nf
@@ -29,54 +29,59 @@ workflow run_wf {
   output_ch = input_ch
     | preprocessInputs("config": config)
     // split params for downstream components
-    | setWorkflowArguments(
-      cellranger_count: [
+    | cellranger_count.run(
+      fromState: [
         "input": "input",
+        "output": "output_raw",
         "expect_cells": "expect_cells",
         "chemistry": "chemistry",
         "secondary_analysis": "secondary_analysis",
         "generate_bam": "generate_bam",
-        "include_introns": "include_introns"
+        "include_introns": "include_introns",
+        "reference": "reference"
       ],
-      from_10xh5_to_h5mu: [ 
-        "output": "output_h5mu",
-        "obsm_metrics": "obsm_metrics",
-        "output_type": "output_type",
-      ]
+      toState: [
+        "input": "output",
+        "output_raw": "output"
+      ],
+      auto: [ publish: true ]
     )
-
-    | getWorkflowArguments(key: "cellranger_count")
-    | cellranger_count.run(auto: [ publish: true ])
-    | pmap {id, data ->
-        def new_data = ["input": data.output]
-        [id, new_data]
-    }
     // split output dir into map
-    | cellranger_count_split
+    | cellranger_count_split.run(
+      fromState: {id, state -> 
+        def stateMapping = [
+          "input": state.input,
+        ]
+        outputType = state.output_type == "raw" ? "raw_h5" : "filtered_h5"
+        stateMapping += [outputType: "\$id.\$key.${outputType}.h5"]
+        stateMapping += ["metrics_summary": "\$id.\$key.metrics_summary.csv"]
+        return stateMapping
+      },
+      toState: {id, output, state -> 
+        def outputFile = state.output_type == "raw" ? output.raw_h5 : output.filtered_h5
+        def newState = state + [ "input": outputFile ] 
+        return newState
+      }
+    )
     // convert to h5mu
-    | pmap { id, output_data, other_args -> 
-      input_data = other_args.from_10xh5_to_h5mu.output_type == "filtered" ? 
-        output_data.filtered_h5 : output_data.raw_h5
-      // combine new data for from_10xh5_to_h5mu
-      new_data =
-        [ 
-          input: input_data, 
-          input_metrics_summary: output_data.metrics_summary
-        ] +
-        other_args.from_10xh5_to_h5mu
-
-      // store output to fourth field to return as output
-      [ id, new_data, other_args, output_data ]
-    }
     | from_10xh5_to_h5mu.run(
+      fromState: {id, state ->
+        [
+          "input": state.input,
+          "output_compression": "gzip",
+          "output": state.output_h5mu,
+          "uns_metrics": state.uns_metrics,
+          "input_metrics_summary": state.metrics_summary
+        ]
+      },
+      toState: { id, output, state ->
+        [
+          "output_raw": state.output_raw,
+          "output_h5mu": output.output
+        ]
+      },
       auto: [ publish: true ],
-      args: [ output_compression: "gzip" ]
     )
-    
-    // return output map
-    | pmap { id, data, other_args, output_data ->
-      [ id, output_data + [h5mu: data] ]
-    }
 
   emit:
   output_ch