Skip to content

Commit

Permalink
Backport PR #837 to 1.0.x (#842)
Browse files Browse the repository at this point in the history
* Concatenate_h5mu: fix writing multi-dimensional annotation frames. (#837)

* Concatenate_h5mu: fix writing multidomensional annotation frames.

* Undo some changes

* Update test

* Add PR number

* Trigger CI

* Update CHANGELOG

* deploy: 57add3f

* Update CHANGELOG

---------

Co-authored-by: DriesSchaumont <[email protected]>
  • Loading branch information
DriesSchaumont and DriesSchaumont authored Jul 22, 2024
1 parent 0ead6c6 commit 2963adb
Show file tree
Hide file tree
Showing 573 changed files with 2,368 additions and 2,251 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# openpipelines 1.0.2

## BUG FIXES

* `dataflow/concatenate_h5mu`: fix writing out multidimensional annotation dataframes (e.g. `.varm`) that had their
data dtype (dtype) changed as a result of adding more observations after concatenation, causing `TypeError`.
One notable example of this happening is when one of the samples does not have a multimodal annotation dataframe
which is present in another sample; causing the values being filled with `NA` (PR #842, backported from PR #837).

# openpipelines 1.0.1

## BUG FIXES
Expand Down
10 changes: 9 additions & 1 deletion src/dataflow/concatenate_h5mu/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,15 @@ def split_conflicts_modalities(n_processes: int, samples: dict[str, anndata.AnnD
output_index = getattr(output, matrix_name).index
conflicts, concatenated_matrix = concatenate_matrices(n_processes, matrices, output_index)
if concatenated_matrix.empty:
concatenated_matrix.index = output_index
concatenated_matrix.index = output_index

# Even though we did not touch the varm and obsm matrices that were already present,
# the joining of observations might have caused a dtype change in these matrices as well
# so these also need to be casted to a writable dtype...
for multidim_name, multidim_data in getattr(output, f"{matrix_name}m").items():
new_data = cast_to_writeable_dtype(multidim_data) if isinstance(multidim_data, pd.DataFrame) else multidim_data
getattr(output, f"{matrix_name}m")[multidim_name] = new_data

# Write the conflicts to the output
for conflict_name, conflict_data in conflicts.items():
getattr(output, f"{matrix_name}m")[conflict_name] = conflict_data
Expand Down
62 changes: 57 additions & 5 deletions src/dataflow/concatenate_h5mu/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,9 +478,14 @@ def test_concat_different_columns_per_modality_and_per_sample(run_component, sam
non_shared_features = data_sample1.var_names.difference(data_sample2.var_names)
assert concatenated_data.var.loc[non_shared_features, 'mod2:Feat4'].isna().all()

@pytest.mark.parametrize("test_value,expected", [("bar", "bar"), (True, True), (0.1, 0.1), (np.nan, pd.NA)])
@pytest.mark.parametrize("test_value,test_value_dtype,expected", [("bar", "str", "bar"),
(True, pd.BooleanDtype(), True),
(1, pd.Int16Dtype(), 1),
(0.1, float, 0.1),
(0.1, np.float64, 0.1),
(np.nan, np.float64, pd.NA)])
def test_concat_remove_na(run_component, sample_1_h5mu, sample_2_h5mu,
write_mudata_to_file, random_h5mu_path, test_value, expected,
write_mudata_to_file, random_h5mu_path, test_value, test_value_dtype, expected,
change_column_contents):
"""
Test concatenation of samples where the column from one sample contains NA values
Expand All @@ -492,7 +497,7 @@ def test_concat_remove_na(run_component, sample_1_h5mu, sample_2_h5mu,
"""
change_column_contents(sample_1_h5mu, 'var', 'Shared_feat', {'mod1': np.nan, 'mod2': np.nan})
change_column_contents(sample_2_h5mu, 'var', 'Shared_feat', {'mod1': test_value, 'mod2': np.nan})

sample_2_h5mu.var['Shared_feat'] = sample_2_h5mu.var['Shared_feat'].astype(test_value_dtype)
output_path = random_h5mu_path()

run_component([
Expand Down Expand Up @@ -547,9 +552,17 @@ def test_concat_invalid_h5_error_includes_path(run_component, tmp_path,
err.value.stdout.decode('utf-8'))


@pytest.mark.parametrize("test_value_1,test_value_2,expected", [(1, "1", pd.CategoricalDtype(categories=['1.0', '1']))])
@pytest.mark.parametrize("test_value_1,value_1_dtype,test_value_2,value_2_dtype,expected",
[(1, float, "1", str, pd.CategoricalDtype(categories=['1.0', '1'])),
(1, np.float64, "1", str, pd.CategoricalDtype(categories=['1.0', '1'])),
(1, pd.Int16Dtype(), 2.0, pd.Int16Dtype(), pd.Int64Dtype()),
(True, bool, False, bool, pd.BooleanDtype()),
(True, pd.BooleanDtype(), False, bool, pd.BooleanDtype()),
("foo", str, "bar", str, pd.CategoricalDtype(categories=['bar', 'foo'])),
]
)
def test_concat_dtypes_per_modality(run_component, write_mudata_to_file, change_column_contents,
sample_1_h5mu, sample_2_h5mu, test_value_1, test_value_2,
sample_1_h5mu, sample_2_h5mu, test_value_1, value_1_dtype, test_value_2, value_2_dtype,
expected, random_h5mu_path):
"""
Test joining column with different dtypes to make sure that they are writable.
Expand All @@ -561,7 +574,10 @@ def test_concat_dtypes_per_modality(run_component, write_mudata_to_file, change_
for the test column in mod2 is still writable.
"""
change_column_contents(sample_1_h5mu, "var", "test_col", {"mod1": test_value_1, "mod2": test_value_1})
sample_1_h5mu.var['test_col'] = sample_1_h5mu.var['test_col'].astype(value_1_dtype)
change_column_contents(sample_2_h5mu, "var", "test_col", {"mod1": test_value_2, "mod2": test_value_2})
sample_2_h5mu.var['test_col'] = sample_2_h5mu.var['test_col'].astype(value_2_dtype)

output_file = random_h5mu_path()
run_component([
"--input_id", "sample1;sample2",
Expand All @@ -573,6 +589,40 @@ def test_concat_dtypes_per_modality(run_component, write_mudata_to_file, change_
concatenated_data = md.read(output_file)
assert concatenated_data['mod2'].var['test_col'].dtype == expected


@pytest.mark.parametrize("test_value,value_dtype,expected",
[(1, float, pd.Int64Dtype()),
(1, np.float64, pd.Int64Dtype()),
(1, pd.Int16Dtype(), pd.Int16Dtype()),
(True, bool, pd.BooleanDtype()),
(True, pd.BooleanDtype(), pd.BooleanDtype()),
("foo", str, pd.CategoricalDtype(categories=['foo'])),
]
)
def test_concat_dtypes_per_modality_multidim(run_component, write_mudata_to_file,
sample_1_h5mu, sample_2_h5mu, test_value, value_dtype,
expected, random_h5mu_path):
"""
Test if the result of concatenation is still writable when the input already contain
data in .varm and this data is kept. Because we are joining observations, the dtype of this
data may change and the result might not be writable anymore
"""

sample_1_h5mu['mod1'].varm['test_df'] = pd.DataFrame(index=sample_1_h5mu['mod1'].var_names)
sample_1_h5mu['mod1'].varm['test_df']['test_col'] = test_value
sample_1_h5mu['mod1'].varm['test_df']['test_col'] = sample_1_h5mu['mod1'].varm['test_df']['test_col'].astype(value_dtype)

output_file = random_h5mu_path()
run_component([
"--input_id", "sample1;sample2",
"--input", write_mudata_to_file(sample_1_h5mu),
"--input", write_mudata_to_file(sample_2_h5mu),
"--output", output_file,
"--other_axis_mode", "move"
])
concatenated_data = md.read(output_file)
assert concatenated_data['mod1'].varm['test_df']['test_col'].dtype == expected

@pytest.mark.parametrize("test_value_1,test_value_2,expected", [(1, "1", pd.CategoricalDtype(categories=['1.0', '1']))])
def test_concat_dtypes_global(run_component, write_mudata_to_file, change_column_contents,
sample_1_h5mu, sample_2_h5mu, test_value_1, test_value_2,
Expand Down Expand Up @@ -622,6 +672,8 @@ def test_non_overlapping_modalities(run_component, sample_2_h5mu, sample_3_h5mu,
"--output", output_path,
"--other_axis_mode", "move"
])
output_data = md.read(output_path)
assert set(output_data.mod.keys()) == {"mod1", "mod2", "mod3"}


def test_resolve_annotation_conflict_missing_column(run_component, sample_1_h5mu,
Expand Down
6 changes: 3 additions & 3 deletions target/docker/annotate/popv/.config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
functionality:
name: "popv"
namespace: "annotate"
version: "1.0.1"
version: "1.0.2"
authors:
- name: "Matthias Beyens"
roles:
Expand Down Expand Up @@ -352,6 +352,6 @@ info:
output: "/home/runner/work/openpipeline/openpipeline/target/docker/annotate/popv"
executable: "/home/runner/work/openpipeline/openpipeline/target/docker/annotate/popv/popv"
viash_version: "0.8.6"
git_commit: "8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
git_commit: "57add3ff137ac44d67bc7456cb9146bc6dc633cc"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
git_tag: "1.0.0-1-g8ba584e550"
git_tag: "1.0.1-3-g57add3ff13"
12 changes: 6 additions & 6 deletions target/docker/annotate/popv/popv
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

# popv 1.0.1
# popv 1.0.2
#
# This wrapper script is auto-generated by viash 0.8.6 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
Expand Down Expand Up @@ -174,7 +174,7 @@ VIASH_META_TEMP_DIR="$VIASH_TEMP"

# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "popv 1.0.1"
echo "popv 1.0.2"
echo ""
echo "Performs popular major vote cell typing on single cell sequence data using"
echo "multiple algorithms. Note that this is a one-shot version of PopV."
Expand Down Expand Up @@ -503,10 +503,10 @@ RUN cd /opt && git clone --depth 1 https://github.com/YosefLab/PopV.git && \
LABEL org.opencontainers.image.authors="Matthias Beyens, Robrecht Cannoodt"
LABEL org.opencontainers.image.description="Companion container for running component annotate popv"
LABEL org.opencontainers.image.created="2024-06-18T14:14:56Z"
LABEL org.opencontainers.image.created="2024-07-22T07:16:09Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline"
LABEL org.opencontainers.image.revision="8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
LABEL org.opencontainers.image.version="1.0.1"
LABEL org.opencontainers.image.revision="57add3ff137ac44d67bc7456cb9146bc6dc633cc"
LABEL org.opencontainers.image.version="1.0.2"
VIASHDOCKER
}
Expand Down Expand Up @@ -661,7 +661,7 @@ while [[ $# -gt 0 ]]; do
shift 1
;;
--version)
echo "popv 1.0.1"
echo "popv 1.0.2"
exit
;;
--input)
Expand Down
6 changes: 3 additions & 3 deletions target/docker/cluster/leiden/.config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
functionality:
name: "leiden"
namespace: "cluster"
version: "1.0.1"
version: "1.0.2"
authors:
- name: "Dries De Maeyer"
roles:
Expand Down Expand Up @@ -230,6 +230,6 @@ info:
output: "/home/runner/work/openpipeline/openpipeline/target/docker/cluster/leiden"
executable: "/home/runner/work/openpipeline/openpipeline/target/docker/cluster/leiden/leiden"
viash_version: "0.8.6"
git_commit: "8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
git_commit: "57add3ff137ac44d67bc7456cb9146bc6dc633cc"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
git_tag: "1.0.0-1-g8ba584e550"
git_tag: "1.0.1-3-g57add3ff13"
12 changes: 6 additions & 6 deletions target/docker/cluster/leiden/leiden
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

# leiden 1.0.1
# leiden 1.0.2
#
# This wrapper script is auto-generated by viash 0.8.6 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
Expand Down Expand Up @@ -173,7 +173,7 @@ VIASH_META_TEMP_DIR="$VIASH_TEMP"

# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "leiden 1.0.1"
echo "leiden 1.0.2"
echo ""
echo "Cluster cells using the [Leiden algorithm] [Traag18] implemented in the [Scanpy"
echo "framework] [Wolf18]."
Expand Down Expand Up @@ -460,10 +460,10 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dries De Maeyer"
LABEL org.opencontainers.image.description="Companion container for running component cluster leiden"
LABEL org.opencontainers.image.created="2024-06-18T14:14:57Z"
LABEL org.opencontainers.image.created="2024-07-22T07:16:05Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline"
LABEL org.opencontainers.image.revision="8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
LABEL org.opencontainers.image.version="1.0.1"
LABEL org.opencontainers.image.revision="57add3ff137ac44d67bc7456cb9146bc6dc633cc"
LABEL org.opencontainers.image.version="1.0.2"
VIASHDOCKER
}
Expand Down Expand Up @@ -618,7 +618,7 @@ while [[ $# -gt 0 ]]; do
shift 1
;;
--version)
echo "leiden 1.0.1"
echo "leiden 1.0.2"
exit
;;
--input)
Expand Down
6 changes: 3 additions & 3 deletions target/docker/compression/compress_h5mu/.config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
functionality:
name: "compress_h5mu"
namespace: "compression"
version: "1.0.1"
version: "1.0.2"
authors:
- name: "Dries Schaumont"
roles:
Expand Down Expand Up @@ -171,6 +171,6 @@ info:
output: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/compress_h5mu"
executable: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/compress_h5mu/compress_h5mu"
viash_version: "0.8.6"
git_commit: "8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
git_commit: "57add3ff137ac44d67bc7456cb9146bc6dc633cc"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
git_tag: "1.0.0-1-g8ba584e550"
git_tag: "1.0.1-3-g57add3ff13"
12 changes: 6 additions & 6 deletions target/docker/compression/compress_h5mu/compress_h5mu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

# compress_h5mu 1.0.1
# compress_h5mu 1.0.2
#
# This wrapper script is auto-generated by viash 0.8.6 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
Expand Down Expand Up @@ -173,7 +173,7 @@ VIASH_META_TEMP_DIR="$VIASH_TEMP"

# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "compress_h5mu 1.0.1"
echo "compress_h5mu 1.0.2"
echo ""
echo "Compress a MuData file."
echo ""
Expand Down Expand Up @@ -423,10 +423,10 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dries Schaumont"
LABEL org.opencontainers.image.description="Companion container for running component compression compress_h5mu"
LABEL org.opencontainers.image.created="2024-06-18T14:14:58Z"
LABEL org.opencontainers.image.created="2024-07-22T07:16:06Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline"
LABEL org.opencontainers.image.revision="8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
LABEL org.opencontainers.image.version="1.0.1"
LABEL org.opencontainers.image.revision="57add3ff137ac44d67bc7456cb9146bc6dc633cc"
LABEL org.opencontainers.image.version="1.0.2"
VIASHDOCKER
}
Expand Down Expand Up @@ -581,7 +581,7 @@ while [[ $# -gt 0 ]]; do
shift 1
;;
--version)
echo "compress_h5mu 1.0.1"
echo "compress_h5mu 1.0.2"
exit
;;
--input)
Expand Down
6 changes: 3 additions & 3 deletions target/docker/compression/tar_extract/.config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
functionality:
name: "tar_extract"
namespace: "compression"
version: "1.0.1"
version: "1.0.2"
arguments:
- type: "file"
name: "--input"
Expand Down Expand Up @@ -107,6 +107,6 @@ info:
output: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/tar_extract"
executable: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/tar_extract/tar_extract"
viash_version: "0.8.6"
git_commit: "8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
git_commit: "57add3ff137ac44d67bc7456cb9146bc6dc633cc"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
git_tag: "1.0.0-1-g8ba584e550"
git_tag: "1.0.1-3-g57add3ff13"
12 changes: 6 additions & 6 deletions target/docker/compression/tar_extract/tar_extract
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

# tar_extract 1.0.1
# tar_extract 1.0.2
#
# This wrapper script is auto-generated by viash 0.8.6 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
Expand Down Expand Up @@ -170,7 +170,7 @@ VIASH_META_TEMP_DIR="$VIASH_TEMP"

# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "tar_extract 1.0.1"
echo "tar_extract 1.0.2"
echo ""
echo "Extract files from a tar archive"
echo ""
Expand Down Expand Up @@ -421,10 +421,10 @@ ENTRYPOINT []
RUN :
LABEL org.opencontainers.image.description="Companion container for running component compression tar_extract"
LABEL org.opencontainers.image.created="2024-06-18T14:14:58Z"
LABEL org.opencontainers.image.created="2024-07-22T07:16:06Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline"
LABEL org.opencontainers.image.revision="8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
LABEL org.opencontainers.image.version="1.0.1"
LABEL org.opencontainers.image.revision="57add3ff137ac44d67bc7456cb9146bc6dc633cc"
LABEL org.opencontainers.image.version="1.0.2"
VIASHDOCKER
}
Expand Down Expand Up @@ -579,7 +579,7 @@ while [[ $# -gt 0 ]]; do
shift 1
;;
--version)
echo "tar_extract 1.0.1"
echo "tar_extract 1.0.2"
exit
;;
--input)
Expand Down
6 changes: 3 additions & 3 deletions target/docker/convert/from_10xh5_to_h5mu/.config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
functionality:
name: "from_10xh5_to_h5mu"
namespace: "convert"
version: "1.0.1"
version: "1.0.2"
authors:
- name: "Robrecht Cannoodt"
roles:
Expand Down Expand Up @@ -277,6 +277,6 @@ info:
output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_10xh5_to_h5mu"
executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu"
viash_version: "0.8.6"
git_commit: "8ba584e550ac93c484a8b6ddea0c44618d02cf3f"
git_commit: "57add3ff137ac44d67bc7456cb9146bc6dc633cc"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
git_tag: "1.0.0-1-g8ba584e550"
git_tag: "1.0.1-3-g57add3ff13"
Loading

0 comments on commit 2963adb

Please sign in to comment.