Prevent leiden component to run in workflows when no resolutions are …

…given (#583) * Do not run leiden when no resolutions are given * Undo chmod * Update CHANGELOG * Formatting
openpipelines-bio · Oct 9, 2023 · 046363b · 046363b
1 parent dea329e
commit 046363b
Show file tree

Hide file tree

Showing 18 changed files with 474 additions and 179 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -22,6 +22,8 @@
 
 * `correction/cellbender_remove_background`: change base image to `nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04` and downwgrade MuData to 0.2.1 because it is the oldest version that uses python 3.7 (PR #575).
 
+* Several integration workflows: prevent leiden from being executed when no resolutions are provided (PR #583).
+
 ## BUG FIXES
 
 * `transform/clr`: raise an error when CLR fails to return the requested output (PR #579).

diff --git a/src/cluster/leiden/config.vsh.yaml b/src/cluster/leiden/config.vsh.yaml
@@ -66,6 +66,7 @@ functionality:
         A parameter value controlling the coarseness of the clustering. Higher values lead to more clusters.
         Multiple values will result in clustering being performed multiple times.
       default: [1]
+      required: true
       multiple: true
   resources:
     - type: python_script

diff --git a/workflows/multiomics/integration/bbknn_leiden/config.vsh.yaml b/workflows/multiomics/integration/bbknn_leiden/config.vsh.yaml
@@ -99,4 +99,7 @@ functionality:
     - type: nextflow_script
       path: main.nf
       entrypoint: test_wf
+    - type: nextflow_script
+      path: main.nf
+      entrypoint: test_wf2
     - path: /resources_test/pbmc_1k_protein_v3
diff --git a/workflows/multiomics/integration/bbknn_leiden/integration_test.sh b/workflows/multiomics/integration/bbknn_leiden/integration_test.sh
@@ -14,5 +14,10 @@ nextflow run . \
   -main-script workflows/multiomics/integration/bbknn_leiden/main.nf \
   -profile docker,no_publish \
   -entry test_wf \
-  -with-trace work/trace.txt \
-  -with-dag workflows/multiomics/integration/bbknn_leiden/graph.dot
+  -c workflows/utils/labels_ci.config
+
+nextflow run . \
+  -main-script workflows/multiomics/integration/bbknn_leiden/main.nf \
+  -profile docker,no_publish \
+  -entry test_wf2 \
+  -c workflows/utils/labels_ci.config
diff --git a/workflows/multiomics/integration/bbknn_leiden/main.nf b/workflows/multiomics/integration/bbknn_leiden/main.nf
@@ -27,7 +27,7 @@ workflow run_wf {
   input_ch
 
   main:
-  output_ch = input_ch
+  bbknn_ch = input_ch
     | preprocessInputs("config": config)
 
     // compute bbknn graph
@@ -64,7 +64,8 @@ workflow run_wf {
         "input": "output"
       ]
     )
-
+  with_leiden_ch = bbknn_ch
+    | filter{id, state -> state.leiden_resolution}
     // run leiden on the bbknn graph
     | leiden.run(
       fromState: [
@@ -78,30 +79,32 @@ workflow run_wf {
         "input": "output"
       ]
     )
+    // move obsm leiden cluster dataframe to obs
+    | move_obsm_to_obs.run(
+      fromState:
+        [
+          "input": "input",
+          "obsm_key": "obs_cluster",
+          "modality": "modality",
+        ],
+      toState: ["input": "output"]
+    )
 
+  without_leiden_ch = bbknn_ch
+    | filter{id, state -> !state.leiden_resolution}
+
+  output_ch = with_leiden_ch.mix(without_leiden_ch)
     // run umap on the bbknn graph
     | umap.run(
-      fromState: [
-        "input": "input",
-        "uns_neighbors": "uns_output",
-        "obsm_output": "obsm_umap",
-        "modality": "modality"
-      ],
-      toState: [
-        "input": "output"
-      ]
-    )
-
-    // move obsm leiden cluster dataframe to obs
-    | move_obsm_to_obs.run(
       fromState: { id, state ->
-        [
-          input: state.input,
-          obsm_key: state.obs_cluster,
-          modality: state.modality,
-          output: state.output,
-          output_compression: "gzip"
-        ]
+       [
+          "input": state.input,
+          "uns_neighbors": state.uns_output,
+          "obsm_output": state.obsm_umap,
+          "modality": state.modality,
+          "output": state.output,
+          "output_compression": "gzip"
+       ]
       },
       toState: { id, output, state -> 
         [ output: output.output ]
@@ -128,6 +131,51 @@ workflow test_wf {
     ]
   ]
 
+  output_ch =
+    channelFromParams(testParams, config)
+    | view { "Input: $it" }
+    | run_wf
+    | view { tup ->
+      assert tup.size() == 2 : "outputs should contain two elements; [id, output]"
+
+      // check id
+      def id = tup[0]
+      assert id == "foo" : "ID should be 'foo'. Found: ${id}"
+
+      // check output
+      def output = tup[1]
+      assert output instanceof Map: "Output should be a map. Found: ${output}"
+      assert "output" in output : "Output should contain key 'output'. Found: ${output}"
+
+      // check h5mu
+      def output_h5mu = output.output
+      assert output_h5mu.toString().endsWith(".h5mu") : "Output file should be a h5mu file. Found: ${output}"
+
+      "Output: $output"
+    }
+    | toList()
+    | map { output_list ->
+      assert output_list.size() == 1 : "output channel should contain 1 event"
+    }
+    //| check_format(args: {""}) // todo: check whether output h5mu has the right slots defined
+}
+
+workflow test_wf2 {
+  // allow changing the resources_test dir
+  params.resources_test = params.rootDir + "/resources_test"
+
+  // or when running from s3: params.resources_test = "s3://openpipelines-data/"
+  testParams = [
+    param_list: [
+      [
+        id: "foo",
+        input: params.resources_test + "/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu",
+        layer: "log_normalized",
+        leiden_resolution: []
+      ]
+    ]
+  ]
+
   output_ch =
     channelFromParams(testParams, config)
     | view { "Input: $it" }

diff --git a/workflows/multiomics/integration/harmony_leiden/config.vsh.yaml b/workflows/multiomics/integration/harmony_leiden/config.vsh.yaml
@@ -103,4 +103,7 @@ functionality:
     - type: nextflow_script
       path: main.nf
       entrypoint: test_wf
+    - type: nextflow_script
+      path: main.nf
+      entrypoint: test_wf2
     - path: /resources_test/pbmc_1k_protein_v3
diff --git a/workflows/multiomics/integration/harmony_leiden/integration_test.sh b/workflows/multiomics/integration/harmony_leiden/integration_test.sh
@@ -14,5 +14,11 @@ nextflow run . \
   -main-script workflows/multiomics/integration/harmony_leiden/main.nf \
   -profile docker,no_publish \
   -entry test_wf \
-  -with-trace work/trace.txt \
-  -with-dag workflows/multiomics/integration/harmony_leiden/graph.dot
+  -c workflows/utils/labels_ci.config
+
+nextflow run . \
+  -main-script workflows/multiomics/integration/harmony_leiden/main.nf \
+  -profile docker,no_publish \
+  -entry test_wf2 \
+  -c workflows/utils/labels_ci.config
+
diff --git a/workflows/multiomics/integration/harmony_leiden/main.nf b/workflows/multiomics/integration/harmony_leiden/main.nf
@@ -28,7 +28,7 @@ workflow run_wf {
   input_ch
 
   main:
-  output_ch = input_ch
+  neighbors_ch = input_ch
     | preprocessInputs("config": config)
 
     // run harmonypy
@@ -57,6 +57,8 @@ workflow run_wf {
       toState: ["input": "output"]
     )
 
+  with_leiden_ch = neighbors_ch
+    | filter{id, state -> state.leiden_resolution}
     // run leiden clustering
     | leiden.run(
       fromState: [
@@ -68,30 +70,37 @@ workflow run_wf {
       ],
       toState: ["input": "output"]
     )
-
-    // run umap
-    | umap.run(
-      fromState: [
-        "input": "input",
-        "modality": "modality",
-        "obsm_input": "obsm_integrated",
-        "obsm_output": "obsm_umap",
-        "uns_neighbors": "uns_neighbors"
-      ],
-      toState: ["input": "output"]
-    )
-
     // move obsm to obs
     | move_obsm_to_obs.run(
+      fromState: 
+        [
+          "input": "input",
+          "obsm_key": "obs_cluster",
+          "modality": "modality",
+        ],
+      toState: ["input": "output"]
+    )
+
+  without_leiden_ch = neighbors_ch
+    | filter{id, state -> !state.leiden_resolution}
+
+  output_ch = with_leiden_ch.mix(without_leiden_ch)
+    // run umap
+    | umap.run(
       fromState: { id, state ->
         [
           "input": state.input,
           "modality": state.modality,
-          "obsm_key": state.obs_cluster,
+          "obsm_input": state.obsm_integrated,
+          "obsm_output": state.obsm_umap,
+          "uns_neighbors": state.uns_neighbors,
           "output": state.output,
           "output_compression": "gzip"
         ]
       },
+      toState: { id, output, state ->
+        [ output: output.output ]
+      },
       auto: [ publish: true ]
     )
 
@@ -135,3 +144,41 @@ workflow test_wf {
     }
     //| check_format(args: {""}) // todo: check whether output h5mu has the right slots defined
 }
+
+workflow test_wf2 {
+  // allow changing the resources_test dir
+  params.resources_test = params.rootDir + "/resources_test"
+
+  // or when running from s3: params.resources_test = "s3://openpipelines-data/"
+  testParams = [
+    param_list: [
+      [
+        id: "foo",
+        input: params.resources_test + "/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu",
+        layer: "log_normalized",
+        obs_covariates: "sample_id",
+        embedding: "X_pca",
+        leiden_resolution: [],
+        output: "foo.final.h5mu"
+      ]
+    ]
+  ]
+
+  output_ch =
+    channelFromParams(testParams, config)
+    | view { "Input: $it" }
+    | run_wf
+    | view { output ->
+      assert output.size() == 2 : "outputs should contain two elements; [id, file]"
+      assert output[1].output.toString().endsWith(".h5mu") : "Output file should be a h5mu file. Found: ${output[1]}"
+      "Output: $output"
+    }
+    | toList()
+    | map { output_list ->
+      assert output_list.size() == 1 : "output channel should contain 1 event"
+      assert (output_list.collect({it[0]}) as Set).equals(["foo"] as Set): "Output ID should be same as input ID"
+      assert (output_list.collect({it[1].output.getFileName().toString()}) as Set).equals(["foo.final.h5mu"] as Set)
+    }
+    //| check_format(args: {""}) // todo: check whether output h5mu has the right slots defined
+}
+
diff --git a/workflows/multiomics/integration/scanorama_leiden/config.vsh.yaml b/workflows/multiomics/integration/scanorama_leiden/config.vsh.yaml
@@ -115,4 +115,7 @@ functionality:
     - type: nextflow_script
       path: main.nf
       entrypoint: test_wf
+    - type: nextflow_script
+      path: main.nf
+      entrypoint: test_wf2
     - path: /resources_test/pbmc_1k_protein_v3
diff --git a/workflows/multiomics/integration/scanorama_leiden/integration_test.sh b/workflows/multiomics/integration/scanorama_leiden/integration_test.sh
@@ -13,6 +13,9 @@ export NXF_VER=21.10.6
 nextflow run . \
   -main-script workflows/multiomics/integration/scanorama_leiden/main.nf \
   -profile docker,no_publish \
-  -entry test_wf \
-  -with-trace work/trace.txt \
-  -with-dag workflows/multiomics/integration/scanorama_leiden/graph.dot
+  -entry test_wf
+
+nextflow run . \
+  -main-script workflows/multiomics/integration/scanorama_leiden/main.nf \
+  -profile docker,no_publish \
+  -entry test_wf2