From 075df2fb4681abb4a493c0fa58e1d437ff679989 Mon Sep 17 00:00:00 2001
From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com>
Date: Tue, 7 Jan 2025 10:53:34 +0100
Subject: [PATCH] Fix compatibility with SciPy 1.15.0 (#945)

---
 CHANGELOG.md                                  | 4 +++-
 src/filter/subset_obsp/script.py              | 5 +++++
 src/metadata/grep_annotation_column/script.py | 8 ++++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e21fe92dc29..86640cde7bc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,8 @@
 # openpipelines 2.1.0
 
-## MINOR CHANGES
+# MINOR CHANGES
+
+* `grep_annotation_column` and `subset_obsp`: Fix compatibility for SciPy (PR #945).
 
 * `popv`: Pin numpy<2 after new release of scvi-tools (PR #946).
 
diff --git a/src/filter/subset_obsp/script.py b/src/filter/subset_obsp/script.py
index 24ce002f586..909502a59b9 100644
--- a/src/filter/subset_obsp/script.py
+++ b/src/filter/subset_obsp/script.py
@@ -31,6 +31,11 @@ def main():
     # the index dimensions remain unaltered, but .obsp columns will be subset
     obsp = adata.obsp[par["input_obsp_key"]]
     idx = adata.obs[par["input_obs_key"]].astype(str) == par["input_obs_value"]
+    # A Series object cannot be used as an indexer for a scipy sparse array
+    # when the data type is a pandas boolean extension array because
+    # extension arrays do not define .nonzero()
+    # See https://github.com/pandas-dev/pandas/issues/46025
+    idx = idx.to_numpy(dtype="bool", na_value=False)
     obsm_subset = obsp[:, idx]
 
     logger.info(f"Writing subset obsp matrix to .obsm {par['output_obsm_key']}")
diff --git a/src/metadata/grep_annotation_column/script.py b/src/metadata/grep_annotation_column/script.py
index f5c29db0357..555591e672f 100644
--- a/src/metadata/grep_annotation_column/script.py
+++ b/src/metadata/grep_annotation_column/script.py
@@ -11,6 +11,7 @@
 ### VIASH START
 par = {
     "input": "./resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu",
+    "input_layer": None,
     "modality": "rna",
     "matrix": "var",
     "input_column": "gene_symbol",
@@ -21,6 +22,8 @@
     "output_fraction_column": "fraction_test",
     "output_compression": "gzip",
 }
+
+meta = {"resources_dir": "src/utils"}
 ### VIASH END
 sys.path.append(meta["resources_dir"])
 from setup_logger import setup_logger
@@ -90,6 +93,11 @@ def main(par):
     logger.info("Applying regex search.")
     grep_result = annotation_column.str.contains(par["regex_pattern"], regex=True)
     logger.info("Search results: %s", grep_result.value_counts())
+    # A Series object cannot be used as an indexer for a scipy sparse array
+    # when the data type is a pandas boolean extension array because
+    # extension arrays do not define .nonzero()
+    # See https://github.com/pandas-dev/pandas/issues/46025
+    grep_result = grep_result.to_numpy(dtype="bool", na_value=False)
 
     other_axis_attribute = {"var": "obs", "obs": "var"}
     if par["output_fraction_column"]: