Skip to content

Commit

Permalink
Prevent leiden component to run in workflows when no resolutions are …
Browse files Browse the repository at this point in the history
…given (#583)

* Do not run leiden when no resolutions are given

* Undo chmod

* Update CHANGELOG

* Formatting
  • Loading branch information
DriesSchaumont authored Oct 9, 2023
1 parent dea329e commit 046363b
Show file tree
Hide file tree
Showing 18 changed files with 474 additions and 179 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

* `correction/cellbender_remove_background`: change base image to `nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04` and downwgrade MuData to 0.2.1 because it is the oldest version that uses python 3.7 (PR #575).

* Several integration workflows: prevent leiden from being executed when no resolutions are provided (PR #583).

## BUG FIXES

* `transform/clr`: raise an error when CLR fails to return the requested output (PR #579).
Expand Down
1 change: 1 addition & 0 deletions src/cluster/leiden/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ functionality:
A parameter value controlling the coarseness of the clustering. Higher values lead to more clusters.
Multiple values will result in clustering being performed multiple times.
default: [1]
required: true
multiple: true
resources:
- type: python_script
Expand Down
3 changes: 3 additions & 0 deletions workflows/multiomics/integration/bbknn_leiden/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,7 @@ functionality:
- type: nextflow_script
path: main.nf
entrypoint: test_wf
- type: nextflow_script
path: main.nf
entrypoint: test_wf2
- path: /resources_test/pbmc_1k_protein_v3
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,10 @@ nextflow run . \
-main-script workflows/multiomics/integration/bbknn_leiden/main.nf \
-profile docker,no_publish \
-entry test_wf \
-with-trace work/trace.txt \
-with-dag workflows/multiomics/integration/bbknn_leiden/graph.dot
-c workflows/utils/labels_ci.config

nextflow run . \
-main-script workflows/multiomics/integration/bbknn_leiden/main.nf \
-profile docker,no_publish \
-entry test_wf2 \
-c workflows/utils/labels_ci.config
92 changes: 70 additions & 22 deletions workflows/multiomics/integration/bbknn_leiden/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ workflow run_wf {
input_ch

main:
output_ch = input_ch
bbknn_ch = input_ch
| preprocessInputs("config": config)

// compute bbknn graph
Expand Down Expand Up @@ -64,7 +64,8 @@ workflow run_wf {
"input": "output"
]
)

with_leiden_ch = bbknn_ch
| filter{id, state -> state.leiden_resolution}
// run leiden on the bbknn graph
| leiden.run(
fromState: [
Expand All @@ -78,30 +79,32 @@ workflow run_wf {
"input": "output"
]
)
// move obsm leiden cluster dataframe to obs
| move_obsm_to_obs.run(
fromState:
[
"input": "input",
"obsm_key": "obs_cluster",
"modality": "modality",
],
toState: ["input": "output"]
)

without_leiden_ch = bbknn_ch
| filter{id, state -> !state.leiden_resolution}

output_ch = with_leiden_ch.mix(without_leiden_ch)
// run umap on the bbknn graph
| umap.run(
fromState: [
"input": "input",
"uns_neighbors": "uns_output",
"obsm_output": "obsm_umap",
"modality": "modality"
],
toState: [
"input": "output"
]
)

// move obsm leiden cluster dataframe to obs
| move_obsm_to_obs.run(
fromState: { id, state ->
[
input: state.input,
obsm_key: state.obs_cluster,
modality: state.modality,
output: state.output,
output_compression: "gzip"
]
[
"input": state.input,
"uns_neighbors": state.uns_output,
"obsm_output": state.obsm_umap,
"modality": state.modality,
"output": state.output,
"output_compression": "gzip"
]
},
toState: { id, output, state ->
[ output: output.output ]
Expand All @@ -128,6 +131,51 @@ workflow test_wf {
]
]

output_ch =
channelFromParams(testParams, config)
| view { "Input: $it" }
| run_wf
| view { tup ->
assert tup.size() == 2 : "outputs should contain two elements; [id, output]"

// check id
def id = tup[0]
assert id == "foo" : "ID should be 'foo'. Found: ${id}"

// check output
def output = tup[1]
assert output instanceof Map: "Output should be a map. Found: ${output}"
assert "output" in output : "Output should contain key 'output'. Found: ${output}"

// check h5mu
def output_h5mu = output.output
assert output_h5mu.toString().endsWith(".h5mu") : "Output file should be a h5mu file. Found: ${output}"

"Output: $output"
}
| toList()
| map { output_list ->
assert output_list.size() == 1 : "output channel should contain 1 event"
}
//| check_format(args: {""}) // todo: check whether output h5mu has the right slots defined
}

workflow test_wf2 {
// allow changing the resources_test dir
params.resources_test = params.rootDir + "/resources_test"

// or when running from s3: params.resources_test = "s3://openpipelines-data/"
testParams = [
param_list: [
[
id: "foo",
input: params.resources_test + "/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu",
layer: "log_normalized",
leiden_resolution: []
]
]
]

output_ch =
channelFromParams(testParams, config)
| view { "Input: $it" }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,7 @@ functionality:
- type: nextflow_script
path: main.nf
entrypoint: test_wf
- type: nextflow_script
path: main.nf
entrypoint: test_wf2
- path: /resources_test/pbmc_1k_protein_v3
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,11 @@ nextflow run . \
-main-script workflows/multiomics/integration/harmony_leiden/main.nf \
-profile docker,no_publish \
-entry test_wf \
-with-trace work/trace.txt \
-with-dag workflows/multiomics/integration/harmony_leiden/graph.dot
-c workflows/utils/labels_ci.config

nextflow run . \
-main-script workflows/multiomics/integration/harmony_leiden/main.nf \
-profile docker,no_publish \
-entry test_wf2 \
-c workflows/utils/labels_ci.config

77 changes: 62 additions & 15 deletions workflows/multiomics/integration/harmony_leiden/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ workflow run_wf {
input_ch

main:
output_ch = input_ch
neighbors_ch = input_ch
| preprocessInputs("config": config)

// run harmonypy
Expand Down Expand Up @@ -57,6 +57,8 @@ workflow run_wf {
toState: ["input": "output"]
)

with_leiden_ch = neighbors_ch
| filter{id, state -> state.leiden_resolution}
// run leiden clustering
| leiden.run(
fromState: [
Expand All @@ -68,30 +70,37 @@ workflow run_wf {
],
toState: ["input": "output"]
)

// run umap
| umap.run(
fromState: [
"input": "input",
"modality": "modality",
"obsm_input": "obsm_integrated",
"obsm_output": "obsm_umap",
"uns_neighbors": "uns_neighbors"
],
toState: ["input": "output"]
)

// move obsm to obs
| move_obsm_to_obs.run(
fromState:
[
"input": "input",
"obsm_key": "obs_cluster",
"modality": "modality",
],
toState: ["input": "output"]
)

without_leiden_ch = neighbors_ch
| filter{id, state -> !state.leiden_resolution}

output_ch = with_leiden_ch.mix(without_leiden_ch)
// run umap
| umap.run(
fromState: { id, state ->
[
"input": state.input,
"modality": state.modality,
"obsm_key": state.obs_cluster,
"obsm_input": state.obsm_integrated,
"obsm_output": state.obsm_umap,
"uns_neighbors": state.uns_neighbors,
"output": state.output,
"output_compression": "gzip"
]
},
toState: { id, output, state ->
[ output: output.output ]
},
auto: [ publish: true ]
)

Expand Down Expand Up @@ -135,3 +144,41 @@ workflow test_wf {
}
//| check_format(args: {""}) // todo: check whether output h5mu has the right slots defined
}

workflow test_wf2 {
// allow changing the resources_test dir
params.resources_test = params.rootDir + "/resources_test"

// or when running from s3: params.resources_test = "s3://openpipelines-data/"
testParams = [
param_list: [
[
id: "foo",
input: params.resources_test + "/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu",
layer: "log_normalized",
obs_covariates: "sample_id",
embedding: "X_pca",
leiden_resolution: [],
output: "foo.final.h5mu"
]
]
]

output_ch =
channelFromParams(testParams, config)
| view { "Input: $it" }
| run_wf
| view { output ->
assert output.size() == 2 : "outputs should contain two elements; [id, file]"
assert output[1].output.toString().endsWith(".h5mu") : "Output file should be a h5mu file. Found: ${output[1]}"
"Output: $output"
}
| toList()
| map { output_list ->
assert output_list.size() == 1 : "output channel should contain 1 event"
assert (output_list.collect({it[0]}) as Set).equals(["foo"] as Set): "Output ID should be same as input ID"
assert (output_list.collect({it[1].output.getFileName().toString()}) as Set).equals(["foo.final.h5mu"] as Set)
}
//| check_format(args: {""}) // todo: check whether output h5mu has the right slots defined
}

Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,7 @@ functionality:
- type: nextflow_script
path: main.nf
entrypoint: test_wf
- type: nextflow_script
path: main.nf
entrypoint: test_wf2
- path: /resources_test/pbmc_1k_protein_v3
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ export NXF_VER=21.10.6
nextflow run . \
-main-script workflows/multiomics/integration/scanorama_leiden/main.nf \
-profile docker,no_publish \
-entry test_wf \
-with-trace work/trace.txt \
-with-dag workflows/multiomics/integration/scanorama_leiden/graph.dot
-entry test_wf

nextflow run . \
-main-script workflows/multiomics/integration/scanorama_leiden/main.nf \
-profile docker,no_publish \
-entry test_wf2
Loading

0 comments on commit 046363b

Please sign in to comment.