From 30365a359c61b1a3484d77c08b9fbf80d00022c4 Mon Sep 17 00:00:00 2001 From: Eljas Roellin <65244425+eroell@users.noreply.github.com> Date: Thu, 19 Oct 2023 17:41:30 +0200 Subject: [PATCH] Backport PR #2698: Fix for highly_variable_genes flavor=seurat modifying layer --- docs/release-notes/1.9.6.md | 4 +++- .../preprocessing/_highly_variable_genes.py | 7 ++++++- scanpy/tests/test_highly_variable_genes.py | 20 +++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/docs/release-notes/1.9.6.md b/docs/release-notes/1.9.6.md index b4bd868911..63fbe87644 100644 --- a/docs/release-notes/1.9.6.md +++ b/docs/release-notes/1.9.6.md @@ -5,4 +5,6 @@ - Allow {func}`scanpy.pl.scatter` to accept a {class}`str` palette name {pr}`2571` {smaller}`P Angerer` - Make {func}`scanpy.external.tl.palantir` compatible with palantir >=1.3 {pr}`2672` {smaller}`DJ Otto` -- Fix {func}`scanpy.pl.pca` when when `return_fig=True` and `annotate_var_explained=True` {pr}`2682` {smaller}`J Wagner` +- Fix {func}`scanpy.pl.pca` when `return_fig=True` and `annotate_var_explained=True` {pr}`2682` {smaller}`J Wagner` +- Temp fix for {issue}`2680` by skipping `seaborn` version 0.13.0 {pr}`2661` {smaller}`P Angerer` +- Fix {func}`scanpy.pp.highly_variable_genes` to not modify the used layer when `flavor=seurat` {pr}`2698` {smaller}`E Roellin` diff --git a/scanpy/preprocessing/_highly_variable_genes.py b/scanpy/preprocessing/_highly_variable_genes.py index eb792c1eb9..45e83732ec 100644 --- a/scanpy/preprocessing/_highly_variable_genes.py +++ b/scanpy/preprocessing/_highly_variable_genes.py @@ -194,9 +194,14 @@ def _highly_variable_genes_single_batch( """ X = adata.layers[layer] if layer is not None else adata.X if flavor == 'seurat': + X = X.copy() if 'log1p' in adata.uns_keys() and adata.uns['log1p'].get('base') is not None: X *= np.log(adata.uns['log1p']['base']) - X = np.expm1(X) + # use out if possible. only possible since we copy X + if isinstance(X, np.ndarray): + np.expm1(X, out=X) + else: + X = np.expm1(X) mean, var = materialize_as_ndarray(_get_mean_var(X)) # now actually compute the dispersion diff --git a/scanpy/tests/test_highly_variable_genes.py b/scanpy/tests/test_highly_variable_genes.py index c469428b86..a46ef0eee1 100644 --- a/scanpy/tests/test_highly_variable_genes.py +++ b/scanpy/tests/test_highly_variable_genes.py @@ -72,6 +72,26 @@ def test_highly_variable_genes_basic(): assert np.all(np.isin(colnames, hvg_df.columns)) +@pytest.mark.parametrize('base', [None, 10]) +@pytest.mark.parametrize('flavor', ['seurat', 'cell_ranger']) +def test_highly_variable_genes_keep_layer(base, flavor): + adata = pbmc3k() + # cell_ranger flavor can raise error if many 0 genes + sc.pp.filter_genes(adata, min_counts=1) + + sc.pp.log1p(adata, base=base) + X_orig = adata.X.copy() + + if flavor == 'seurat': + sc.pp.highly_variable_genes(adata, n_top_genes=50, flavor=flavor) + elif flavor == 'cell_ranger': + sc.pp.highly_variable_genes(adata, flavor=flavor) + else: + assert False + + assert np.allclose(X_orig.A, adata.X.A) + + def _check_pearson_hvg_columns(output_df, n_top_genes): assert pd.api.types.is_float_dtype(output_df['residual_variances'].dtype)