Unpin numpy 2 (#3115)

* Unpin numpy 2 * float64 and harmonize metrics code * Skip tests for old skmisc * Fix parallel tests * fix numpy 2 reprs * add relnote * (fix): release notes version --------- Co-authored-by: Ilan Gold <[email protected]>
scverse · Jul 2, 2024 · 4b090c0 · 4b090c0
1 parent 2046129
commit 4b090c0
Show file tree

Hide file tree

Showing 10 changed files with 162 additions and 102 deletions.
diff --git a/docs/release-notes/1.10.3.md b/docs/release-notes/1.10.3.md
@@ -10,7 +10,7 @@
 ```
 
 * Fix `subset=True` of {func}`~scanpy.pp.highly_variable_genes` when `flavor` is `seurat` or `cell_ranger`, and `batch_key!=None` {pr}`3042` {smaller}`E Roellin`
-
+* Add compatibility with {mod}`numpy` 2.0 {pr}`3065` and {pr}`3115` {smaller}`P Angerer`
 
 ```{rubric} Performance
 ```
diff --git a/pyproject.toml b/pyproject.toml
@@ -48,9 +48,7 @@ classifiers = [
 ]
 dependencies = [
     "anndata>=0.8",
-    # TODO: remove <2 requirement once PyNNDescent releases this fix:
-    # https://github.com/lmcinnes/pynndescent/issues/241
-    "numpy>=1.23,<2",
+    "numpy>=1.23",
     "matplotlib>=3.6",
     "pandas >=1.5",
     "scipy>=1.8",

diff --git a/src/scanpy/_utils/compute/is_constant.py b/src/scanpy/_utils/compute/is_constant.py
@@ -81,7 +81,7 @@ def is_constant(
 def _(a: NDArray, axis: Literal[0, 1] | None = None) -> bool | NDArray[np.bool_]:
     # Should eventually support nd, not now.
     if axis is None:
-        return (a == a.flat[0]).all()
+        return bool((a == a.flat[0]).all())
     if axis == 0:
         return _is_constant_rows(a.T)
     elif axis == 1:
@@ -116,9 +116,9 @@ def _is_constant_csr_rows(
     indptr: NDArray[np.integer],
     shape: tuple[int, int],
 ):
-    N = len(indptr) - 1
-    result = np.ones(N, dtype=np.bool_)
-    for i in range(N):
+    n = len(indptr) - 1
+    result = np.ones(n, dtype=np.bool_)
+    for i in range(n):
         start = indptr[i]
         stop = indptr[i + 1]
         if stop - start == shape[1]:

diff --git a/src/scanpy/metrics/_gearys_c.py b/src/scanpy/metrics/_gearys_c.py
@@ -1,3 +1,5 @@
+"""Geary's C autocorrelation."""
+
 from __future__ import annotations
 
 from functools import singledispatch
@@ -87,7 +89,7 @@ def gearys_c(
     Examples
     --------
 
-    Calculate Gearys C for each components of a dimensionality reduction:
+    Calculate Geary’s C for each components of a dimensionality reduction:
 
     .. code:: python
 
@@ -135,29 +137,38 @@ def gearys_c(
 
 
 @numba.njit(cache=True, parallel=True)
-def _gearys_c_vec(data, indices, indptr, x):
+def _gearys_c_vec(
+    data: np.ndarray,
+    indices: np.ndarray,
+    indptr: np.ndarray,
+    x: np.ndarray,
+) -> float:
     W = data.sum()
     return _gearys_c_vec_W(data, indices, indptr, x, W)
 
 
 @numba.njit(cache=True, parallel=True)
-def _gearys_c_vec_W(data, indices, indptr, x, W):
-    N = len(indptr) - 1
-    x = x.astype(np.float_)
+def _gearys_c_vec_W(
+    data: np.ndarray,
+    indices: np.ndarray,
+    indptr: np.ndarray,
+    x: np.ndarray,
+    W: np.float64,
+):
+    n = len(indptr) - 1
+    x = x.astype(np.float64)
     x_bar = x.mean()
 
     total = 0.0
-    for i in numba.prange(N):
+    for i in numba.prange(n):
         s = slice(indptr[i], indptr[i + 1])
         i_indices = indices[s]
         i_data = data[s]
         total += np.sum(i_data * ((x[i] - x[i_indices]) ** 2))
 
-    numer = (N - 1) * total
+    numer = (n - 1) * total
     denom = 2 * W * ((x - x_bar) ** 2).sum()
-    C = numer / denom
-
-    return C
+    return numer / denom
 
 
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -172,36 +183,47 @@ def _gearys_c_vec_W(data, indices, indptr, x, W):
 
 
 @numba.njit(cache=True)
-def _gearys_c_inner_sparse_x_densevec(g_data, g_indices, g_indptr, x, W):
+def _gearys_c_inner_sparse_x_densevec(
+    g_data: np.ndarray,
+    g_indices: np.ndarray,
+    g_indptr: np.ndarray,
+    x: np.ndarray,
+    W: np.float64,
+) -> float:
     x_bar = x.mean()
     total = 0.0
-    N = len(x)
-    for i in numba.prange(N):
+    n = len(x)
+    for i in numba.prange(n):
         s = slice(g_indptr[i], g_indptr[i + 1])
         i_indices = g_indices[s]
         i_data = g_data[s]
         total += np.sum(i_data * ((x[i] - x[i_indices]) ** 2))
-    numer = (N - 1) * total
+    numer = (n - 1) * total
     denom = 2 * W * ((x - x_bar) ** 2).sum()
-    C = numer / denom
-    return C
+    return numer / denom
 
 
 @numba.njit(cache=True)
 def _gearys_c_inner_sparse_x_sparsevec(  # noqa: PLR0917
-    g_data, g_indices, g_indptr, x_data, x_indices, N, W
-):
-    x = np.zeros(N, dtype=np.float_)
+    g_data: np.ndarray,
+    g_indices: np.ndarray,
+    g_indptr: np.ndarray,
+    x_data: np.ndarray,
+    x_indices: np.ndarray,
+    n: int,
+    W: np.float64,
+) -> float:
+    x = np.zeros(n, dtype=np.float64)
     x[x_indices] = x_data
-    x_bar = np.sum(x_data) / N
+    x_bar = np.sum(x_data) / n
     total = 0.0
-    N = len(x)
-    for i in numba.prange(N):
+    n = len(x)
+    for i in numba.prange(n):
         s = slice(g_indptr[i], g_indptr[i + 1])
         i_indices = g_indices[s]
         i_data = g_data[s]
         total += np.sum(i_data * ((x[i] - x[i_indices]) ** 2))
-    numer = (N - 1) * total
+    numer = (n - 1) * total
     # Expanded from 2 * W * ((x_k - x_k_bar) ** 2).sum(), but uses sparsity
     # to skip some calculations
     # fmt: off
@@ -210,43 +232,53 @@ def _gearys_c_inner_sparse_x_sparsevec(  # noqa: PLR0917
         * (
             np.sum(x_data ** 2)
             - np.sum(x_data * x_bar * 2)
-            + (x_bar ** 2) * N
+            + (x_bar ** 2) * n
         )
     )
     # fmt: on
-    C = numer / denom
-    return C
+    return numer / denom
 
 
 @numba.njit(cache=True, parallel=True)
-def _gearys_c_mtx(g_data, g_indices, g_indptr, X):
-    M, N = X.shape
-    assert N == len(g_indptr) - 1
+def _gearys_c_mtx(
+    g_data: np.ndarray,
+    g_indices: np.ndarray,
+    g_indptr: np.ndarray,
+    X: np.ndarray,
+) -> np.ndarray:
+    m, n = X.shape
+    assert n == len(g_indptr) - 1
     W = g_data.sum()
-    out = np.zeros(M, dtype=np.float_)
-    for k in numba.prange(M):
-        x = X[k, :].astype(np.float_)
+    out = np.zeros(m, dtype=np.float64)
+    for k in numba.prange(m):
+        x = X[k, :].astype(np.float64)
         out[k] = _gearys_c_inner_sparse_x_densevec(g_data, g_indices, g_indptr, x, W)
     return out
 
 
 @numba.njit(cache=True, parallel=True)
 def _gearys_c_mtx_csr(  # noqa: PLR0917
-    g_data, g_indices, g_indptr, x_data, x_indices, x_indptr, x_shape
-):
-    M, N = x_shape
+    g_data: np.ndarray,
+    g_indices: np.ndarray,
+    g_indptr: np.ndarray,
+    x_data: np.ndarray,
+    x_indices: np.ndarray,
+    x_indptr: np.ndarray,
+    x_shape: tuple,
+) -> np.ndarray:
+    m, n = x_shape
     W = g_data.sum()
-    out = np.zeros(M, dtype=np.float_)
+    out = np.zeros(m, dtype=np.float64)
     x_data_list = np.split(x_data, x_indptr[1:-1])
     x_indices_list = np.split(x_indices, x_indptr[1:-1])
-    for k in numba.prange(M):
+    for k in numba.prange(m):
         out[k] = _gearys_c_inner_sparse_x_sparsevec(
             g_data,
             g_indices,
             g_indptr,
             x_data_list[k],
             x_indices_list[k],
-            N,
+            n,
             W,
         )
     return out
@@ -261,15 +293,15 @@ def _gearys_c_mtx_csr(  # noqa: PLR0917
 def _gearys_c(g: sparse.csr_matrix, vals: np.ndarray | sparse.spmatrix) -> np.ndarray:
     assert g.shape[0] == g.shape[1], "`g` should be a square adjacency matrix"
     vals = _resolve_vals(vals)
-    g_data = g.data.astype(np.float_, copy=False)
+    g_data = g.data.astype(np.float64, copy=False)
     if isinstance(vals, sparse.csr_matrix):
         assert g.shape[0] == vals.shape[1]
         new_vals, idxer, full_result = _check_vals(vals)
         result = _gearys_c_mtx_csr(
             g_data,
             g.indices,
             g.indptr,
-            new_vals.data.astype(np.float_, copy=False),
+            new_vals.data.astype(np.float64, copy=False),
             new_vals.indices,
             new_vals.indptr,
             new_vals.shape,