Fix DataFrames in obsm (#95)

theislab · Jul 13, 2023 · 637e30f · 637e30f
1 parent 165fe0c
commit 637e30f
Show file tree

Hide file tree

Showing 13 changed files with 115 additions and 45 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,7 @@
 /docs/_build/
 _version.py
 __pycache__/
+.pytest_cache/
 
 # Jupyter
 /.ipynb_checkpoints/

diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -1,12 +1,13 @@
 version: 2
 build:
-  image: latest
-sphinx:
-  configuration: docs/conf.py
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
 python:
-  version: 3.7
   install:
   - method: pip
     path: .
     extra_requirements:
     - doc
+sphinx:
+  configuration: docs/conf.py
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,13 @@
+{
+    "[python]": {
+        "editor.formatOnSave": true,
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        //"editor.codeActionsOnSave": {
+        //    "source.fixAll.ruff": true,
+        //},
+    },
+    "python.testing.pytestArgs": [],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "python.terminal.activateEnvironment": false,
+}
diff --git a/pyproject.toml b/pyproject.toml
@@ -17,16 +17,15 @@ urls.'Documentation' = 'https://icb-anndata2ri.readthedocs-hosted.com/'
 urls.'Source Code' = 'https://github.com/theislab/anndata2ri'
 urls.'Issue Tracker' = 'https://github.com/theislab/anndata2ri/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc'
 dynamic = ['version']
-requires-python = '>= 3.7'
+requires-python = '>= 3.8'
 dependencies = [
-    'get_version',
     'rpy2 >= 3.4.3',
     'tzlocal',  # for pandas2ri
     'anndata',
 ]
 
 [project.optional-dependencies]
-dev = ['pre-commit']
+dev = ['pre-commit', 'setuptools-scm']
 test = [
     'pytest',
     'pytest-faulthandler',
@@ -37,7 +36,7 @@ doc = [
     'sphinx>=3.0',
     'sphinx-autodoc-typehints',
     'scanpydoc',
-    'sphinx-rtd-theme>=0.5',  # Already a dep but 0.5 is prettier
+    'sphinx-rtd-theme>=0.5',
     'lxml',  # For scraping the R link info
     'importlib_metadata; python_version < "3.8"',
 ]
@@ -49,6 +48,38 @@ raw-options = { local_scheme = 'no-local-version' }  # be able to publish dev ve
 [tool.hatch.build.hooks.vcs]
 version-file = 'src/anndata2ri/_version.py'
 
+[tool.hatch.envs.docs]
+features = ['doc']
+[tool.hatch.envs.docs.scripts]
+build = 'sphinx-build -M html docs docs/_build'
+
+[[tool.hatch.envs.test.matrix]]
+python = ['3.8', '3.9', '3.10', '3.11']
+[tool.hatch.envs.test]
+features = ['test']
+[tool.hatch.envs.test.scripts]
+run = 'pytest -vv {args}'
+
+[tool.pytest.ini_options]
+addopts = [
+    '--import-mode=importlib',
+    '-panndata2ri.test_utils',
+    # TODO '-Werror',
+]
+filterwarnings = [
+    # eventlet 0.24.1 imports dns.hash: https://github.com/eventlet/eventlet/pull/563
+    'ignore::DeprecationWarning:dns.hash',
+    # igraph 0.7.1post6 imports SafeConfigParser: https://github.com/igraph/python-igraph/pull/203
+    'ignore::DeprecationWarning:igraph.configuration',
+    # ipywidgets 7.4.2 imports ABCs from collections: https://github.com/jupyter-widgets/ipywidgets/pull/2395
+    'ignore::DeprecationWarning:ipywidgets.widgets.widget_selection',
+    # jinja2 2.10.1 imports ABCs from collections: https://github.com/pallets/jinja/pull/867
+    'ignore::DeprecationWarning:jinja2.utils',
+    'ignore::DeprecationWarning:jinja2.runtime',
+    # rpy2 3.0.2 imports ABCs from collections: https://bitbucket.org/rpy2/rpy2/pull-requests/74/fix-deprecationwarning/diff
+    'ignore::DeprecationWarning:rpy2.rinterface_lib.sexp',
+]
+
 [tool.black]
 line-length = 120
 skip-string-normalization = true

diff --git a/pytest.ini b/pytest.ini
diff --git a/src/anndata2ri/conv.py b/src/anndata2ri/conv.py
@@ -1,15 +1,31 @@
-from typing import Optional
+from __future__ import annotations
 
+import numpy as np
+import pandas as pd
 from rpy2.robjects import conversion, numpy2ri, pandas2ri
 from rpy2.robjects.conversion import overlay_converter
 
 from . import scipy2ri
 
 
-original_converter: Optional[conversion.Converter] = None
+original_converter: conversion.Converter | None = None
 converter = conversion.Converter('original anndata conversion')
 
-mat_converter = numpy2ri.converter + scipy2ri.converter
+_mat_converter = numpy2ri.converter + scipy2ri.converter
+
+
+def mat_py2rpy(obj: np.ndarray) -> np.ndarray:
+    if isinstance(obj, pd.DataFrame):
+        numeric_cols = obj.dtypes <= np.number
+        if not numeric_cols.all():
+            non_num = numeric_cols.index[~numeric_cols]
+            msg = f'DataFrame contains non-numeric columns {list(non_num)}'
+            raise ValueError(msg)
+        obj = obj.to_numpy()
+    return _mat_converter.py2rpy(obj)
+
+
+mat_rpy2py = _mat_converter.rpy2py
 
 
 def full_converter() -> conversion.Converter:

diff --git a/src/anndata2ri/py2r.py b/src/anndata2ri/py2r.py
@@ -10,7 +10,7 @@
 from rpy2.robjects.vectors import ListVector
 
 from . import conv_name
-from .conv import converter, full_converter, mat_converter
+from .conv import converter, full_converter, mat_py2rpy
 from .rpy2_ext import importr
 
 
@@ -52,8 +52,8 @@ def py2rpy_anndata(obj: AnnData) -> RS4:
         s4v = importr('S4Vectors')
         sce = importr('SingleCellExperiment')
         # TODO: sparse
-        x = {} if obj.X is None else dict(X=mat_converter.py2rpy(obj.X.T))
-        layers = {k: mat_converter.py2rpy(v.T) for k, v in obj.layers.items()}
+        x = {} if obj.X is None else dict(X=mat_py2rpy(obj.X.T))
+        layers = {k: mat_py2rpy(v.T) for k, v in obj.layers.items()}
         assays = ListVector({**x, **layers})
 
         row_args = {k: pandas2ri.py2rpy(v) for k, v in obj.var.items()}
@@ -70,7 +70,7 @@ def py2rpy_anndata(obj: AnnData) -> RS4:
         with localconverter(full_converter() + dict_converter):
             metadata = ListVector(obj.uns.items())
 
-        rd_args = {conv_name.scanpy2sce(k): mat_converter.py2rpy(obj.obsm[k]) for k in obj.obsm.keys()}
+        rd_args = {conv_name.scanpy2sce(k): mat_py2rpy(obj.obsm[k]) for k in obj.obsm.keys()}
         reduced_dims = s4v.SimpleList(**rd_args)
 
         return sce.SingleCellExperiment(

diff --git a/src/anndata2ri/r2py.py b/src/anndata2ri/r2py.py
@@ -11,7 +11,7 @@
 from rpy2.robjects.robject import RSlots
 
 from . import conv_name
-from .conv import converter, full_converter, mat_converter
+from .conv import converter, full_converter, mat_rpy2py
 from .rpy2_ext import importr
 from .scipy2ri import supported_r_matrix_classes
 from .scipy2ri.r2py import rmat_to_spmat
@@ -78,9 +78,9 @@ def rpy2py_single_cell_experiment(obj: SexpS4) -> AnnData:
         def convert_mats(attr: str, mats: Mapping[str, Sexp], *, transpose: bool = False):
             rv = []
             for n, mat in mats.items():
-                conv = mat_converter.rpy2py(mat)
+                conv = mat_rpy2py(mat)
                 if isinstance(conv, RS4):
-                    cls_names = mat_converter.rpy2py(conv.slots['class']).tolist()
+                    cls_names = mat_rpy2py(conv.slots['class']).tolist()
                     raise TypeError(f'Cannot convert {attr} “{n}” of type(s) {cls_names} to Python')
                 rv.append(conv.T if transpose else conv)
             return rv
@@ -89,7 +89,7 @@ def convert_mats(attr: str, mats: Mapping[str, Sexp], *, transpose: bool = False
         if not isinstance(assay_names, NULLType):
             assay_names = [str(a) for a in se.assayNames(obj)]
             # The assays can be stored in an env or elsewise so we don’t use obj.slots['assays']
-            assays = convert_mats(f'assay', {n: se.assay(obj, n) for n in assay_names}, transpose=True)
+            assays = convert_mats('assay', {n: se.assay(obj, n) for n in assay_names}, transpose=True)
             # There’s SingleCellExperiment with no assays
             exprs, layers = assays[0], dict(zip(assay_names[1:], assays[1:]))
             assert len(exprs.shape) == 2, exprs.shape

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,2 @@
+# the following line is only necessary for IDEs
+from anndata2ri.test_utils import py2r, r2py  # noqa: F401
diff --git a/tests/test_py2rpy.py b/tests/test_py2rpy.py
@@ -1,15 +1,15 @@
-from typing import List
-from warnings import WarningMessage, catch_warnings, simplefilter
+from warnings import catch_warnings, simplefilter
 
 import numpy as np
 import pytest
 import scanpy as sc
 from anndata import AnnData
+from pandas import DataFrame
 from rpy2.robjects import baseenv, globalenv
+from rpy2.robjects.conversion import localconverter
 
 import anndata2ri
 from anndata2ri.rpy2_ext import importr
-from anndata2ri.test_utils import py2r  # noqa
 
 
 def mk_ad_simple():
@@ -56,9 +56,33 @@ def test_py2rpy2_numpy_pbmc68k():
 
     try:
         anndata2ri.activate()
-        with catch_warnings(record=True) as logs:  # type: List[WarningMessage]
+        with catch_warnings(record=True) as logs:
             simplefilter('ignore', DeprecationWarning)
             globalenv['adata'] = pbmc68k_reduced()
         assert len(logs) == 0, [m.message for m in logs]
     finally:
         anndata2ri.deactivate()
+
+
+@pytest.mark.parametrize('attr', ['X', 'layers', 'obsm'])
+def test_dfs(attr):
+    """X, layers, obsm can contain dataframes"""
+    adata = mk_ad_simple()
+    if attr == 'X':
+        adata.X = DataFrame(adata.X, index=adata.obs_names)
+    elif attr == 'layers':
+        adata.layers['X2'] = DataFrame(adata.X, index=adata.obs_names)
+    elif attr == 'obsm':
+        adata.obsm['X_pca'] = DataFrame(adata.obsm['X_pca'], index=adata.obs_names)
+    else:
+        assert False, attr
+
+    with localconverter(anndata2ri.converter):
+        globalenv['adata_obsm_pd'] = adata
+
+
+def test_df_error():
+    adata = mk_ad_simple()
+    adata.obsm['stuff'] = DataFrame(dict(a=[1, 2], b=list('ab'), c=[1.0, 2.0]), index=adata.obs_names)
+    with pytest.raises(ValueError, match=r"DataFrame contains non-numeric columns \['b'\]"):
+        anndata2ri.converter.py2rpy(adata)
diff --git a/tests/test_rpy2py.py b/tests/test_rpy2py.py
@@ -7,7 +7,6 @@
 
 import anndata2ri
 from anndata2ri.rpy2_ext import importr
-from anndata2ri.test_utils import r2py  # noqa
 
 
 as_ = getattr(importr('methods'), 'as')

diff --git a/tests/test_scipy_py2rpy.py b/tests/test_scipy_py2rpy.py
@@ -4,7 +4,6 @@
 from scipy import sparse
 
 from anndata2ri import scipy2ri
-from anndata2ri.test_utils import conversions_py2rpy
 
 
 mats = [
@@ -19,12 +18,11 @@
 
 
 @pytest.mark.parametrize('typ', ['l', 'd'])
-@pytest.mark.parametrize('conversion', conversions_py2rpy)
 @pytest.mark.parametrize('shape,dataset,cls', mats)
-def test_py2rpy(typ, conversion, shape, dataset, cls):
+def test_py2rpy(py2r, typ, shape, dataset, cls):
     if typ == 'l':
         dataset = dataset.astype(bool)
-    sm = conversion(scipy2ri, dataset)
+    sm = py2r(scipy2ri, dataset)
     assert f'{typ}{cls}Matrix' in set(sm.rclass)
     assert tuple(baseenv['dim'](sm)) == shape
 

diff --git a/tests/test_scipy_rpy2py.py b/tests/test_scipy_rpy2py.py
@@ -9,7 +9,6 @@
 
 from anndata2ri import scipy2ri
 from anndata2ri.rpy2_ext import importr
-from anndata2ri.test_utils import ConversionModule, conversions_rpy2py
 
 
 matrix = importr('Matrix')
@@ -54,17 +53,16 @@
 ]
 
 
-@pytest.mark.parametrize('conversion', conversions_rpy2py)
 @pytest.mark.parametrize('shape,cls,dtype,arr,dataset', mats)
 def test_py2rpy(
-    conversion: Callable[[ConversionModule, Callable[[], Sexp]], sparse.spmatrix],
+    r2py,
     shape: Tuple[int, int],
     cls: Type[sparse.spmatrix],
     dtype: np.dtype,
     arr: np.ndarray,
     dataset: Callable[[], Sexp],
 ):
-    sm = conversion(scipy2ri, dataset)
+    sm = r2py(scipy2ri, dataset)
     assert isinstance(sm, cls)
     assert sm.shape == shape
     assert np.allclose(sm.toarray(), np.array(arr))
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# the following line is only necessary for IDEs
		from anndata2ri.test_utils import py2r, r2py # noqa: F401