Skip to content

Commit

Permalink
Fix DataFrames in obsm (#95)
Browse files Browse the repository at this point in the history
  • Loading branch information
flying-sheep authored Jul 13, 2023
1 parent 165fe0c commit 637e30f
Show file tree
Hide file tree
Showing 13 changed files with 115 additions and 45 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
/docs/_build/
_version.py
__pycache__/
.pytest_cache/

# Jupyter
/.ipynb_checkpoints/
Expand Down
9 changes: 5 additions & 4 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
version: 2
build:
image: latest
sphinx:
configuration: docs/conf.py
os: ubuntu-22.04
tools:
python: "3.11"
python:
version: 3.7
install:
- method: pip
path: .
extra_requirements:
- doc
sphinx:
configuration: docs/conf.py
13 changes: 13 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"[python]": {
"editor.formatOnSave": true,
"editor.defaultFormatter": "ms-python.black-formatter",
//"editor.codeActionsOnSave": {
// "source.fixAll.ruff": true,
//},
},
"python.testing.pytestArgs": [],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.terminal.activateEnvironment": false,
}
39 changes: 35 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,15 @@ urls.'Documentation' = 'https://icb-anndata2ri.readthedocs-hosted.com/'
urls.'Source Code' = 'https://github.com/theislab/anndata2ri'
urls.'Issue Tracker' = 'https://github.com/theislab/anndata2ri/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc'
dynamic = ['version']
requires-python = '>= 3.7'
requires-python = '>= 3.8'
dependencies = [
'get_version',
'rpy2 >= 3.4.3',
'tzlocal', # for pandas2ri
'anndata',
]

[project.optional-dependencies]
dev = ['pre-commit']
dev = ['pre-commit', 'setuptools-scm']
test = [
'pytest',
'pytest-faulthandler',
Expand All @@ -37,7 +36,7 @@ doc = [
'sphinx>=3.0',
'sphinx-autodoc-typehints',
'scanpydoc',
'sphinx-rtd-theme>=0.5', # Already a dep but 0.5 is prettier
'sphinx-rtd-theme>=0.5',
'lxml', # For scraping the R link info
'importlib_metadata; python_version < "3.8"',
]
Expand All @@ -49,6 +48,38 @@ raw-options = { local_scheme = 'no-local-version' } # be able to publish dev ve
[tool.hatch.build.hooks.vcs]
version-file = 'src/anndata2ri/_version.py'

[tool.hatch.envs.docs]
features = ['doc']
[tool.hatch.envs.docs.scripts]
build = 'sphinx-build -M html docs docs/_build'

[[tool.hatch.envs.test.matrix]]
python = ['3.8', '3.9', '3.10', '3.11']
[tool.hatch.envs.test]
features = ['test']
[tool.hatch.envs.test.scripts]
run = 'pytest -vv {args}'

[tool.pytest.ini_options]
addopts = [
'--import-mode=importlib',
'-panndata2ri.test_utils',
# TODO '-Werror',
]
filterwarnings = [
# eventlet 0.24.1 imports dns.hash: https://github.com/eventlet/eventlet/pull/563
'ignore::DeprecationWarning:dns.hash',
# igraph 0.7.1post6 imports SafeConfigParser: https://github.com/igraph/python-igraph/pull/203
'ignore::DeprecationWarning:igraph.configuration',
# ipywidgets 7.4.2 imports ABCs from collections: https://github.com/jupyter-widgets/ipywidgets/pull/2395
'ignore::DeprecationWarning:ipywidgets.widgets.widget_selection',
# jinja2 2.10.1 imports ABCs from collections: https://github.com/pallets/jinja/pull/867
'ignore::DeprecationWarning:jinja2.utils',
'ignore::DeprecationWarning:jinja2.runtime',
# rpy2 3.0.2 imports ABCs from collections: https://bitbucket.org/rpy2/rpy2/pull-requests/74/fix-deprecationwarning/diff
'ignore::DeprecationWarning:rpy2.rinterface_lib.sexp',
]

[tool.black]
line-length = 120
skip-string-normalization = true
Expand Down
13 changes: 0 additions & 13 deletions pytest.ini

This file was deleted.

22 changes: 19 additions & 3 deletions src/anndata2ri/conv.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,31 @@
from typing import Optional
from __future__ import annotations

import numpy as np
import pandas as pd
from rpy2.robjects import conversion, numpy2ri, pandas2ri
from rpy2.robjects.conversion import overlay_converter

from . import scipy2ri


original_converter: Optional[conversion.Converter] = None
original_converter: conversion.Converter | None = None
converter = conversion.Converter('original anndata conversion')

mat_converter = numpy2ri.converter + scipy2ri.converter
_mat_converter = numpy2ri.converter + scipy2ri.converter


def mat_py2rpy(obj: np.ndarray) -> np.ndarray:
if isinstance(obj, pd.DataFrame):
numeric_cols = obj.dtypes <= np.number
if not numeric_cols.all():
non_num = numeric_cols.index[~numeric_cols]
msg = f'DataFrame contains non-numeric columns {list(non_num)}'
raise ValueError(msg)
obj = obj.to_numpy()
return _mat_converter.py2rpy(obj)


mat_rpy2py = _mat_converter.rpy2py


def full_converter() -> conversion.Converter:
Expand Down
8 changes: 4 additions & 4 deletions src/anndata2ri/py2r.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from rpy2.robjects.vectors import ListVector

from . import conv_name
from .conv import converter, full_converter, mat_converter
from .conv import converter, full_converter, mat_py2rpy
from .rpy2_ext import importr


Expand Down Expand Up @@ -52,8 +52,8 @@ def py2rpy_anndata(obj: AnnData) -> RS4:
s4v = importr('S4Vectors')
sce = importr('SingleCellExperiment')
# TODO: sparse
x = {} if obj.X is None else dict(X=mat_converter.py2rpy(obj.X.T))
layers = {k: mat_converter.py2rpy(v.T) for k, v in obj.layers.items()}
x = {} if obj.X is None else dict(X=mat_py2rpy(obj.X.T))
layers = {k: mat_py2rpy(v.T) for k, v in obj.layers.items()}
assays = ListVector({**x, **layers})

row_args = {k: pandas2ri.py2rpy(v) for k, v in obj.var.items()}
Expand All @@ -70,7 +70,7 @@ def py2rpy_anndata(obj: AnnData) -> RS4:
with localconverter(full_converter() + dict_converter):
metadata = ListVector(obj.uns.items())

rd_args = {conv_name.scanpy2sce(k): mat_converter.py2rpy(obj.obsm[k]) for k in obj.obsm.keys()}
rd_args = {conv_name.scanpy2sce(k): mat_py2rpy(obj.obsm[k]) for k in obj.obsm.keys()}
reduced_dims = s4v.SimpleList(**rd_args)

return sce.SingleCellExperiment(
Expand Down
8 changes: 4 additions & 4 deletions src/anndata2ri/r2py.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from rpy2.robjects.robject import RSlots

from . import conv_name
from .conv import converter, full_converter, mat_converter
from .conv import converter, full_converter, mat_rpy2py
from .rpy2_ext import importr
from .scipy2ri import supported_r_matrix_classes
from .scipy2ri.r2py import rmat_to_spmat
Expand Down Expand Up @@ -78,9 +78,9 @@ def rpy2py_single_cell_experiment(obj: SexpS4) -> AnnData:
def convert_mats(attr: str, mats: Mapping[str, Sexp], *, transpose: bool = False):
rv = []
for n, mat in mats.items():
conv = mat_converter.rpy2py(mat)
conv = mat_rpy2py(mat)
if isinstance(conv, RS4):
cls_names = mat_converter.rpy2py(conv.slots['class']).tolist()
cls_names = mat_rpy2py(conv.slots['class']).tolist()
raise TypeError(f'Cannot convert {attr}{n}” of type(s) {cls_names} to Python')
rv.append(conv.T if transpose else conv)
return rv
Expand All @@ -89,7 +89,7 @@ def convert_mats(attr: str, mats: Mapping[str, Sexp], *, transpose: bool = False
if not isinstance(assay_names, NULLType):
assay_names = [str(a) for a in se.assayNames(obj)]
# The assays can be stored in an env or elsewise so we don’t use obj.slots['assays']
assays = convert_mats(f'assay', {n: se.assay(obj, n) for n in assay_names}, transpose=True)
assays = convert_mats('assay', {n: se.assay(obj, n) for n in assay_names}, transpose=True)
# There’s SingleCellExperiment with no assays
exprs, layers = assays[0], dict(zip(assay_names[1:], assays[1:]))
assert len(exprs.shape) == 2, exprs.shape
Expand Down
2 changes: 2 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# the following line is only necessary for IDEs
from anndata2ri.test_utils import py2r, r2py # noqa: F401
32 changes: 28 additions & 4 deletions tests/test_py2rpy.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from typing import List
from warnings import WarningMessage, catch_warnings, simplefilter
from warnings import catch_warnings, simplefilter

import numpy as np
import pytest
import scanpy as sc
from anndata import AnnData
from pandas import DataFrame
from rpy2.robjects import baseenv, globalenv
from rpy2.robjects.conversion import localconverter

import anndata2ri
from anndata2ri.rpy2_ext import importr
from anndata2ri.test_utils import py2r # noqa


def mk_ad_simple():
Expand Down Expand Up @@ -56,9 +56,33 @@ def test_py2rpy2_numpy_pbmc68k():

try:
anndata2ri.activate()
with catch_warnings(record=True) as logs: # type: List[WarningMessage]
with catch_warnings(record=True) as logs:
simplefilter('ignore', DeprecationWarning)
globalenv['adata'] = pbmc68k_reduced()
assert len(logs) == 0, [m.message for m in logs]
finally:
anndata2ri.deactivate()


@pytest.mark.parametrize('attr', ['X', 'layers', 'obsm'])
def test_dfs(attr):
"""X, layers, obsm can contain dataframes"""
adata = mk_ad_simple()
if attr == 'X':
adata.X = DataFrame(adata.X, index=adata.obs_names)
elif attr == 'layers':
adata.layers['X2'] = DataFrame(adata.X, index=adata.obs_names)
elif attr == 'obsm':
adata.obsm['X_pca'] = DataFrame(adata.obsm['X_pca'], index=adata.obs_names)
else:
assert False, attr

with localconverter(anndata2ri.converter):
globalenv['adata_obsm_pd'] = adata


def test_df_error():
adata = mk_ad_simple()
adata.obsm['stuff'] = DataFrame(dict(a=[1, 2], b=list('ab'), c=[1.0, 2.0]), index=adata.obs_names)
with pytest.raises(ValueError, match=r"DataFrame contains non-numeric columns \['b'\]"):
anndata2ri.converter.py2rpy(adata)
1 change: 0 additions & 1 deletion tests/test_rpy2py.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import anndata2ri
from anndata2ri.rpy2_ext import importr
from anndata2ri.test_utils import r2py # noqa


as_ = getattr(importr('methods'), 'as')
Expand Down
6 changes: 2 additions & 4 deletions tests/test_scipy_py2rpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from scipy import sparse

from anndata2ri import scipy2ri
from anndata2ri.test_utils import conversions_py2rpy


mats = [
Expand All @@ -19,12 +18,11 @@


@pytest.mark.parametrize('typ', ['l', 'd'])
@pytest.mark.parametrize('conversion', conversions_py2rpy)
@pytest.mark.parametrize('shape,dataset,cls', mats)
def test_py2rpy(typ, conversion, shape, dataset, cls):
def test_py2rpy(py2r, typ, shape, dataset, cls):
if typ == 'l':
dataset = dataset.astype(bool)
sm = conversion(scipy2ri, dataset)
sm = py2r(scipy2ri, dataset)
assert f'{typ}{cls}Matrix' in set(sm.rclass)
assert tuple(baseenv['dim'](sm)) == shape

Expand Down
6 changes: 2 additions & 4 deletions tests/test_scipy_rpy2py.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from anndata2ri import scipy2ri
from anndata2ri.rpy2_ext import importr
from anndata2ri.test_utils import ConversionModule, conversions_rpy2py


matrix = importr('Matrix')
Expand Down Expand Up @@ -54,17 +53,16 @@
]


@pytest.mark.parametrize('conversion', conversions_rpy2py)
@pytest.mark.parametrize('shape,cls,dtype,arr,dataset', mats)
def test_py2rpy(
conversion: Callable[[ConversionModule, Callable[[], Sexp]], sparse.spmatrix],
r2py,
shape: Tuple[int, int],
cls: Type[sparse.spmatrix],
dtype: np.dtype,
arr: np.ndarray,
dataset: Callable[[], Sexp],
):
sm = conversion(scipy2ri, dataset)
sm = r2py(scipy2ri, dataset)
assert isinstance(sm, cls)
assert sm.shape == shape
assert np.allclose(sm.toarray(), np.array(arr))
Expand Down

0 comments on commit 637e30f

Please sign in to comment.