From 60c9aebfc68a9d6e4b0b5efbeb0ce2467f9242d6 Mon Sep 17 00:00:00 2001 From: Scott Gigante <84813314+scottgigante-immunai@users.noreply.github.com> Date: Fri, 1 Jul 2022 04:44:13 -0400 Subject: [PATCH] Upgrade rpy2 to >= 3.4.3 (#80) --- .github/workflows/run_tests.yml | 5 +- pyproject.toml | 2 +- src/anndata2ri/r2py.py | 15 ++-- src/anndata2ri/scipy2ri/py2r.py | 144 +++++++++++++++++++++----------- 4 files changed, 107 insertions(+), 59 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index d55b6ac..1abf61d 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -40,8 +40,9 @@ jobs: - name: Install system dependencies if: runner.os == 'Linux' run: | - sudo apt-get update -qq - sudo apt-get install -y pandoc gfortran libblas-dev liblapack-dev libedit-dev llvm-dev libcurl4-openssl-dev ffmpeg libhdf5-dev + sudo apt-get update -qq --allow-releaseinfo-change + sudo apt-get install -y --no-install-recommends \ + pandoc gfortran libblas-dev liblapack-dev libedit-dev llvm-dev libcurl4-openssl-dev ffmpeg libhdf5-dev - name: Set up Python uses: actions/setup-python@v1 diff --git a/pyproject.toml b/pyproject.toml index fed6335..8b68d48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dynamic = ['version'] requires-python = '>= 3.7' dependencies = [ 'get_version', - 'rpy2 >= 3.4, < 3.4.3', + 'rpy2 >= 3.4.3', 'tzlocal', # for pandas2ri 'anndata', ] diff --git a/src/anndata2ri/r2py.py b/src/anndata2ri/r2py.py index 367535e..599910f 100644 --- a/src/anndata2ri/r2py.py +++ b/src/anndata2ri/r2py.py @@ -4,7 +4,7 @@ import pandas as pd from anndata import AnnData from rpy2.rinterface import IntSexpVector, NULLType, Sexp, SexpS4, baseenv -from rpy2.robjects import default_converter, pandas2ri +from rpy2.robjects import default_converter, numpy2ri, pandas2ri from rpy2.robjects.conversion import localconverter from rpy2.robjects.robject import RSlots @@ -39,11 +39,16 @@ def rpy2py_vector(v): return v if isinstance(v, IntSexpVector): assert v._R_SIZEOF_ELT == 4, 'R integer size changed away from 32 bit' + r = pd.array(v, dtype=pd.Int32Dtype()) + v_is_na = numpy2ri.rpy2py(baseenv['is.na'](v)).astype(bool) if 'factor' in v.rclass: - r = pandas2ri.rpy2py(v) - else: - r = pd.array(v, dtype=pd.Int32Dtype()) - r[np.array(baseenv['is.na'](v), dtype=bool)] = pd.NA + levels = numpy2ri.rpy2py(baseenv['levels'](v)) + codes = r.to_numpy() - 1 + # temporarily set NA values to a valid index + codes[v_is_na] = 0 + codes = codes.astype(int) + r = pd.array(levels[codes], dtype=pd.CategoricalDtype(levels)) + r[v_is_na] = pd.NA return r return pandas2ri.rpy2py(v) diff --git a/src/anndata2ri/scipy2ri/py2r.py b/src/anndata2ri/scipy2ri/py2r.py index 88f5500..9986b0e 100644 --- a/src/anndata2ri/scipy2ri/py2r.py +++ b/src/anndata2ri/scipy2ri/py2r.py @@ -1,28 +1,29 @@ from functools import wraps -from typing import Any, Callable, Optional, Tuple, Type +from typing import Callable, Optional import numpy as np from rpy2.rinterface import Sexp -from rpy2.robjects import BoolVector, FloatVector, IntVector, Vector, baseenv, default_converter, numpy2ri +from rpy2.robjects import default_converter, numpy2ri from rpy2.robjects.conversion import localconverter -from rpy2.robjects.packages import Package +from rpy2.robjects.packages import Package, SignatureTranslatedAnonymousPackage from scipy import sparse from ..rpy2_ext import importr from .conv import converter -methods: Optional[Package] = None -as_logical: Optional[Callable[[Any], BoolVector]] = None -as_integer: Optional[Callable[[Any], IntVector]] = None -as_double: Optional[Callable[[Any], FloatVector]] = None +matrix: Optional[SignatureTranslatedAnonymousPackage] = None +base: Optional[Package] = None -def get_type_conv(dtype: np.dtype) -> Tuple[str, Callable[[np.ndarray], Sexp], Type[Vector]]: +def get_type_conv(dtype: np.dtype) -> Callable[[np.ndarray], Sexp]: + global base + if base is None: + base = importr('base') if np.issubdtype(dtype, np.floating): - return 'd', as_double, FloatVector + return base.as_double elif np.issubdtype(dtype, np.bool_): - return 'l', as_logical, BoolVector + return base.as_logical else: raise ValueError(f'Unknown dtype {dtype!r} cannot be converted to ?gRMatrix.') @@ -30,16 +31,61 @@ def get_type_conv(dtype: np.dtype) -> Tuple[str, Callable[[np.ndarray], Sexp], T def py2r_context(f): @wraps(f) def wrapper(obj): - global methods, as_logical, as_integer, as_double - if methods is None: + global as_logical, as_integer, as_double, matrix + if matrix is None: importr('Matrix') # make class available - methods = importr('methods') - as_logical = baseenv['as.logical'] - as_integer = baseenv['as.integer'] - as_double = baseenv['as.double'] - - with localconverter(default_converter + numpy2ri.converter): - return f(obj) + matrix = SignatureTranslatedAnonymousPackage( + """ + sparse_matrix <- function(x, conv_data, dims, ...) { + Matrix::sparseMatrix( + ..., + x=conv_data(x), + dims=as.integer(dims), + index1=FALSE + ) + } + + from_csc <- function(i, p, x, dims, conv_data) { + sparse_matrix( + i=as.integer(i), + p=as.integer(p), + x=x, + conv_data=conv_data, + dims=dims, + repr="C" + ) + } + + from_csr <- function(j, p, x, dims, conv_data) { + sparse_matrix( + j=as.integer(j), + p=as.integer(p), + x=x, + conv_data=conv_data, + dims=dims, + repr="R" + ) + } + + from_coo <- function(i, j, x, dims, conv_data) { + sparse_matrix( + i=as.integer(i), + j=as.integer(j), + x=x, + conv_data=conv_data, + dims=dims, + repr="T" + ) + } + + from_dia <- function(n, x, conv_data) { + Matrix::Diagonal(n=as.integer(n), x=conv_data(x)) + } + """, + 'matrix', + ) + + return f(obj) return wrapper @@ -48,56 +94,52 @@ def wrapper(obj): @py2r_context def csc_to_rmat(csc: sparse.csc_matrix): csc.sort_indices() - t, conv_data, _ = get_type_conv(csc.dtype) - return methods.new( - f'{t}gCMatrix', - i=as_integer(csc.indices), - p=as_integer(csc.indptr), - x=conv_data(csc.data), - Dim=as_integer(list(csc.shape)), - ) + conv_data = get_type_conv(csc.dtype) + with localconverter(default_converter + numpy2ri.converter): + return matrix.from_csc(i=csc.indices, p=csc.indptr, x=csc.data, dims=list(csc.shape), conv_data=conv_data) @converter.py2rpy.register(sparse.csr_matrix) @py2r_context def csr_to_rmat(csr: sparse.csr_matrix): csr.sort_indices() - t, conv_data, _ = get_type_conv(csr.dtype) - return methods.new( - f'{t}gRMatrix', - j=as_integer(csr.indices), - p=as_integer(csr.indptr), - x=conv_data(csr.data), - Dim=as_integer(list(csr.shape)), - ) + conv_data = get_type_conv(csr.dtype) + with localconverter(default_converter + numpy2ri.converter): + return matrix.from_csr( + j=csr.indices, + p=csr.indptr, + x=csr.data, + conv_data=conv_data, + dims=list(csr.shape), + ) @converter.py2rpy.register(sparse.coo_matrix) @py2r_context def coo_to_rmat(coo: sparse.coo_matrix): - t, conv_data, _ = get_type_conv(coo.dtype) - return methods.new( - f'{t}gTMatrix', - i=as_integer(coo.row), - j=as_integer(coo.col), - x=conv_data(coo.data), - Dim=as_integer(list(coo.shape)), - ) + conv_data = get_type_conv(coo.dtype) + with localconverter(default_converter + numpy2ri.converter): + return matrix.from_coo( + i=coo.row, + j=coo.col, + x=coo.data, + conv_data=conv_data, + dims=list(coo.shape), + ) @converter.py2rpy.register(sparse.dia_matrix) @py2r_context def dia_to_rmat(dia: sparse.dia_matrix): - t, conv_data, vec_cls = get_type_conv(dia.dtype) + conv_data = get_type_conv(dia.dtype) if len(dia.offsets) > 1: raise ValueError( 'Cannot convert a dia_matrix with more than 1 diagonal to a *diMatrix. ' f'R diagonal matrices only support 1 diagonal, but this has {len(dia.offsets)}.' ) - is_unit = np.all(dia.data == 1) - return methods.new( - f'{t}diMatrix', - x=vec_cls([]) if is_unit else conv_data(dia.data), - diag='U' if is_unit else 'N', - Dim=as_integer(list(dia.shape)), - ) + with localconverter(default_converter + numpy2ri.converter): + return matrix.from_dia( + n=dia.shape[0], + x=dia.data, + conv_data=conv_data, + )