From d999df94a8ec84def6bb95f63a832baef1fd44e9 Mon Sep 17 00:00:00 2001 From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Tue, 31 Oct 2023 16:48:20 +0100 Subject: [PATCH] Update cellranger_mapping workflow to use fromState/toState (#610) * Update cellranger_mapping workflow to use fromState/toState * Update CHANGELOG --- CHANGELOG.md | 22 +++--- .../cellranger_mapping/config.vsh.yaml | 4 +- .../cellranger_mapping/integration_test.sh | 4 +- .../ingestion/cellranger_mapping/main.nf | 77 ++++++++++--------- 4 files changed, 57 insertions(+), 50 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0090f0b454f..36a22a22a58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,18 +2,17 @@ ## BREAKING CHANGES -This project now uses viash version 0.8.0 to build components and workflows. Moving to 0.8.0 involved the following changes: +* This project now uses viash version 0.8.0 to build components and workflows. Moving to 0.8.0 involved the following changes: -* Bump viash version to 0.8.0 (PR #598) in the project configuration. + * Bump viash version to 0.8.0 (PR #598) in the project configuration. + * The `concat` component had been deprecated and will be removed in a future release. It's functionality has been copied to the `concatenate_h5mu` component because the name is in conflict with the `concat` operator from nextflow (PR #598). + * All pipelines no longer use the anonymous workflow. Instead, these workflows were given a name which was added to the viash config as the entrypoint to the pipeline (PR #598). + * Removed the `workflows` folder and moved its contents to new locations (PR #605): + 1. The `resources_test_scripts` folder now resides in the root of the project. + 2. All workflows have been moved to the `src/workflows` folder. + 3. Adjust GitHub Actions to account for new workflow paths. -* The `concat` component had been deprecated and will be removed in a future release. It's functionality has been copied to the `concatenate_h5mu` component because the name is in conflict with the `concat` operator from nextflow (PR #598). - -* All pipelines no longer use the anonymous workflow. Instead, these workflows were given a name which was added to the viash config as the entrypoint to the pipeline (PR #598). - -* Removed the `workflows` folder and moved its contents to new locations (PR #605): - 1. The `resources_test_scripts` folder now resides in the root of the project. - 2. All workflows have been moved to the `src/workflows` folder. - 3. Adjust GitHub Actions to account for new workflow paths. +* Renamed `obsm_metrics` to `uns_metrics` for the `cellranger_mapping` workflow because the cellranger metrics are stored in `.uns` and not `.obsm` (PR #610). ## NEW FUNCTIONALITY @@ -25,6 +24,9 @@ This project now uses viash version 0.8.0 to build components and workflows. Mov * Refactored `cellranger_multi` workflow to use `fromState` and `toState` functionality (PR #609). +* Refactored `cellranger_mapping` workflow to use `fromState` and `toState` functionality (PR #610). + + # openpipelines 0.12.0 ## BREAKING CHANGES diff --git a/src/workflows/ingestion/cellranger_mapping/config.vsh.yaml b/src/workflows/ingestion/cellranger_mapping/config.vsh.yaml index 28000d32037..50ac52db196 100644 --- a/src/workflows/ingestion/cellranger_mapping/config.vsh.yaml +++ b/src/workflows/ingestion/cellranger_mapping/config.vsh.yaml @@ -45,9 +45,9 @@ functionality: description: "The output from Cell Ranger, converted to h5mu." required: true example: output.h5mu - - name: "--obsm_metrics" + - name: "--uns_metrics" type: string - description: Name of the .obsm slot under which to QC metrics (if any). + description: Name of the .uns slot under which to QC metrics (if any). default: "metrics_summary" - name: "--output_type" type: string diff --git a/src/workflows/ingestion/cellranger_mapping/integration_test.sh b/src/workflows/ingestion/cellranger_mapping/integration_test.sh index be498acab4e..2142754a49b 100755 --- a/src/workflows/ingestion/cellranger_mapping/integration_test.sh +++ b/src/workflows/ingestion/cellranger_mapping/integration_test.sh @@ -12,9 +12,9 @@ export NXF_VER=21.10.6 nextflow \ run . \ - -main-script workflows/ingestion/cellranger_mapping/main.nf \ + -main-script src/workflows/ingestion/cellranger_mapping/main.nf \ -entry test_wf \ -resume \ -profile docker,no_publish \ - -c workflows/utils/labels_ci.config \ + -c src/workflows/utils/labels_ci.config \ -with-trace work/trace.txt \ No newline at end of file diff --git a/src/workflows/ingestion/cellranger_mapping/main.nf b/src/workflows/ingestion/cellranger_mapping/main.nf index d755fe4b2df..7f2234a2721 100644 --- a/src/workflows/ingestion/cellranger_mapping/main.nf +++ b/src/workflows/ingestion/cellranger_mapping/main.nf @@ -29,54 +29,59 @@ workflow run_wf { output_ch = input_ch | preprocessInputs("config": config) // split params for downstream components - | setWorkflowArguments( - cellranger_count: [ + | cellranger_count.run( + fromState: [ "input": "input", + "output": "output_raw", "expect_cells": "expect_cells", "chemistry": "chemistry", "secondary_analysis": "secondary_analysis", "generate_bam": "generate_bam", - "include_introns": "include_introns" + "include_introns": "include_introns", + "reference": "reference" ], - from_10xh5_to_h5mu: [ - "output": "output_h5mu", - "obsm_metrics": "obsm_metrics", - "output_type": "output_type", - ] + toState: [ + "input": "output", + "output_raw": "output" + ], + auto: [ publish: true ] ) - - | getWorkflowArguments(key: "cellranger_count") - | cellranger_count.run(auto: [ publish: true ]) - | pmap {id, data -> - def new_data = ["input": data.output] - [id, new_data] - } // split output dir into map - | cellranger_count_split + | cellranger_count_split.run( + fromState: {id, state -> + def stateMapping = [ + "input": state.input, + ] + outputType = state.output_type == "raw" ? "raw_h5" : "filtered_h5" + stateMapping += [outputType: "\$id.\$key.${outputType}.h5"] + stateMapping += ["metrics_summary": "\$id.\$key.metrics_summary.csv"] + return stateMapping + }, + toState: {id, output, state -> + def outputFile = state.output_type == "raw" ? output.raw_h5 : output.filtered_h5 + def newState = state + [ "input": outputFile ] + return newState + } + ) // convert to h5mu - | pmap { id, output_data, other_args -> - input_data = other_args.from_10xh5_to_h5mu.output_type == "filtered" ? - output_data.filtered_h5 : output_data.raw_h5 - // combine new data for from_10xh5_to_h5mu - new_data = - [ - input: input_data, - input_metrics_summary: output_data.metrics_summary - ] + - other_args.from_10xh5_to_h5mu - - // store output to fourth field to return as output - [ id, new_data, other_args, output_data ] - } | from_10xh5_to_h5mu.run( + fromState: {id, state -> + [ + "input": state.input, + "output_compression": "gzip", + "output": state.output_h5mu, + "uns_metrics": state.uns_metrics, + "input_metrics_summary": state.metrics_summary + ] + }, + toState: { id, output, state -> + [ + "output_raw": state.output_raw, + "output_h5mu": output.output + ] + }, auto: [ publish: true ], - args: [ output_compression: "gzip" ] ) - - // return output map - | pmap { id, data, other_args, output_data -> - [ id, output_data + [h5mu: data] ] - } emit: output_ch