Skip to content

Commit

Permalink
Convert factors to pd.Categorical (#41)
Browse files Browse the repository at this point in the history
  • Loading branch information
flying-sheep authored Jan 29, 2020
1 parent a2bcfcc commit 2dbc4b3
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 11 deletions.
2 changes: 1 addition & 1 deletion anndata2ri/py2r.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
from rpy2.robjects.conversion import localconverter
from rpy2.robjects.vectors import ListVector
from rpy2.robjects.methods import RS4
from rpy2.robjects.packages import importr

from . import conv_name
from .conv import converter, mat_converter, full_converter
from .rpy2_ext import importr


class NotConvertedWarning(Warning):
Expand Down
6 changes: 3 additions & 3 deletions anndata2ri/r2py.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
from anndata import AnnData

from rpy2.rinterface import NULLType, SexpS4
from rpy2.robjects import default_converter
from rpy2.robjects import default_converter, pandas2ri
from rpy2.robjects.conversion import localconverter
from rpy2.robjects.robject import RSlots
from rpy2.robjects.packages import importr

from . import conv_name
from .conv import converter, mat_converter, full_converter
from .rpy2_ext import importr
from .scipy2ri import supported_r_matrix_classes
from .scipy2ri.r2py import rmat_to_spmat

Expand All @@ -35,7 +35,7 @@ def rpy2py_data_frame(obj: SexpS4) -> pd.DataFrame:
"""
S4 DataFrame class, not data.frame
"""
with localconverter(default_converter):
with localconverter(default_converter + pandas2ri.converter):
slots = RSlots(obj)
columns = dict(slots["listData"].items())
rownames = slots["rownames"]
Expand Down
18 changes: 18 additions & 0 deletions anndata2ri/rpy2_ext.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from functools import lru_cache
from typing import Optional, Union

from rpy2.robjects import packages, Environment


@lru_cache()
def importr(name: str) -> packages.Package:
return packages.importr(name)


@lru_cache()
def data(package: str, name: Optional[str] = None) -> Union[packages.PackageData, Environment]:
if name is None:
return packages.data(importr(package))
else:
# Use cached version of PackageData collection and just fetch
return data(package).fetch(name)
3 changes: 2 additions & 1 deletion anndata2ri/scipy2ri/py2r.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
from rpy2.robjects import default_converter, numpy2ri, baseenv
from rpy2.robjects import Vector, BoolVector, IntVector, FloatVector
from rpy2.robjects.conversion import localconverter
from rpy2.robjects.packages import importr, Package
from rpy2.robjects.packages import Package
from scipy import sparse

from ..rpy2_ext import importr
from .conv import converter


Expand Down
2 changes: 1 addition & 1 deletion tests/test_py2rpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
import scanpy as sc
from anndata import AnnData
from rpy2.robjects import baseenv, globalenv
from rpy2.robjects.packages import importr

import anndata2ri
from anndata2ri.rpy2_ext import importr
from anndata2ri.test_utils import conversions_py2rpy


Expand Down
20 changes: 16 additions & 4 deletions tests/test_rpy2py.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
import pandas as pd
from anndata import AnnData
from rpy2.robjects import r, conversion
from rpy2.robjects.packages import importr, data

import anndata2ri
from anndata2ri.rpy2_ext import importr, data
from anndata2ri.test_utils import conversions_rpy2py

as_ = getattr(importr("methods"), "as")
se = importr("SummarizedExperiment")
sce = importr("SingleCellExperiment")
sc_rna_seq_data = data(importr("scRNAseq"))
as_ = getattr(importr("methods"), "as")
sumex_allen = data("scRNAseq", "allen")["allen"]


def check_allen(adata):
Expand All @@ -23,7 +23,6 @@ def check_example(adata):
assert adata.obsm["X_pca"].shape == (100, 5)


sumex_allen = sc_rna_seq_data.fetch("allen")["allen"]
code_example = """
local({
ncells <- 100
Expand Down Expand Up @@ -60,3 +59,16 @@ def test_convert_empty_df_with_rows(convert):

df_py = convert(anndata2ri, lambda: conversion.rpy2py(df))
assert isinstance(df_py, pd.DataFrame)


@pytest.mark.parametrize("convert", conversions_rpy2py)
def test_convert_factor(convert):
code = """
SingleCellExperiment::SingleCellExperiment(
assays = list(counts = matrix(rpois(6*4, 5), ncol=4)),
colData = S4Vectors::DataFrame(a_factor = factor(c(rep('A', 3), rep('B', 1))))
)
"""
ad = convert(anndata2ri, lambda: r(code))
assert isinstance(ad.obs["a_factor"].values, pd.Categorical)
assert all(ad.obs["a_factor"].values == pd.Categorical.from_codes([0, 0, 0, 1], ["A", "B"]))
2 changes: 1 addition & 1 deletion tests/test_scipy_rpy2py.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
import pytest
from rpy2.rinterface import Sexp
from rpy2.robjects import baseenv, r, numpy2ri
from rpy2.robjects.packages import importr
from scipy import sparse

from anndata2ri import scipy2ri
from anndata2ri.rpy2_ext import importr
from anndata2ri.test_utils import conversions_rpy2py, ConversionModule


Expand Down

0 comments on commit 2dbc4b3

Please sign in to comment.