Skip to content

Commit

Permalink
Merge branch 'branch-25.04' into remove-pandas-backend-from-ibis-inte…
Browse files Browse the repository at this point in the history
…gration-tests
  • Loading branch information
Matt711 authored Feb 7, 2025
2 parents e9a8f64 + 6a03229 commit e64548e
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 3 deletions.
3 changes: 2 additions & 1 deletion cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -39,6 +39,7 @@
#include <thrust/pair.h>

#include <algorithm>
#include <functional>
#include <type_traits>

/**
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/copying/pack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@

#include <rmm/cuda_stream_view.hpp>

#include <algorithm>
#include <functional>
#include <memory>
#include <utility>
#include <vector>

namespace cudf {
namespace detail {

Expand Down
81 changes: 81 additions & 0 deletions python/cudf/cudf/pandas/_wrappers/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# SPDX-License-Identifier: Apache-2.0
import abc
import copyreg
import functools
import importlib
import os
import pickle
Expand Down Expand Up @@ -37,6 +38,7 @@
_FunctionProxy,
_maybe_wrap_result,
_Unusable,
is_proxy_object,
make_final_proxy_type as _make_final_proxy_type,
make_intermediate_proxy_type as _make_intermediate_proxy_type,
register_proxy_func,
Expand Down Expand Up @@ -1734,6 +1736,85 @@ def _unpickle_obj(pickled_args):
return obj


# Save the original __init__ methods
_original_Series_init = cudf.Series.__init__
_original_DataFrame_init = cudf.DataFrame.__init__
_original_Index_init = cudf.Index.__init__
_original_IndexMeta_call = cudf.core.index.IndexMeta.__call__


def wrap_init(original_init):
@functools.wraps(original_init)
def wrapped_init(self, data=None, *args, **kwargs):
if is_proxy_object(data):
data = data.as_gpu_object()
if (
isinstance(data, type(self))
and len(args) == 0
and len(kwargs) == 0
):
# This short-circuits the constructor to avoid
# unnecessary work when the data is already a
# proxy object of the same type.
# It is a common case in `cuml` and `xgboost`.
# For perf impact see:
# https://github.com/rapidsai/cudf/pull/17878/files#r1936469215
self.__dict__.update(data.__dict__)
return
original_init(self, data, *args, **kwargs)

return wrapped_init


def wrap_call(original_call):
@functools.wraps(original_call)
def wrapped_call(cls, data, *args, **kwargs):
if is_proxy_object(data):
data = data.as_gpu_object()
return original_call(cls, data, *args, **kwargs)

return wrapped_call


@functools.wraps(_original_DataFrame_init)
def DataFrame_init_(self, data, index=None, columns=None, *args, **kwargs):
data_is_proxy = is_proxy_object(data)

if data_is_proxy:
data = data.as_gpu_object()
if is_proxy_object(index):
index = index.as_gpu_object()
if is_proxy_object(columns):
columns = columns.as_cpu_object()
if (
(
(data_is_proxy and isinstance(data, type(self)))
and (index is None)
and (columns is None)
)
and len(args) == 0
and len(kwargs) == 0
):
self.__dict__.update(data.__dict__)
return
_original_DataFrame_init(self, data, index, columns, *args, **kwargs)


def initial_setup():
"""
This is a one-time setup function that can contain
any initialization code that needs to be run once
when the module is imported. Currently, it is used
to wrap the __init__ methods and enable pandas compatibility mode.
"""
cudf.Series.__init__ = wrap_init(_original_Series_init)
cudf.Index.__init__ = wrap_init(_original_Index_init)
cudf.DataFrame.__init__ = DataFrame_init_
cudf.core.index.IndexMeta.__call__ = wrap_call(_original_IndexMeta_call)

cudf.set_option("mode.pandas_compatible", True)


copyreg.dispatch_table[pd.Timestamp] = _reduce_obj
# same reducer/unpickler can be used for Timedelta:
copyreg.dispatch_table[pd.Timedelta] = _reduce_obj
4 changes: 2 additions & 2 deletions python/cudf/cudf/pandas/module_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,10 +595,10 @@ def install(
)
mode = deduce_cudf_pandas_mode(slow_lib, fast_lib)
if mode.use_fast_lib:
pandas_wrappers = importlib.import_module(
lib_wrappers = importlib.import_module(
f".._wrappers.{mode.slow_lib}", __name__
)
pandas_wrappers.cudf.set_option("mode.pandas_compatible", True)
lib_wrappers.initial_setup()
try:
(self,) = (
p
Expand Down
62 changes: 62 additions & 0 deletions python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
import collections
import contextlib
import copy
import cProfile
import datetime
import operator
import os
import pathlib
import pickle
import pstats
import subprocess
import tempfile
import time
Expand Down Expand Up @@ -1910,6 +1912,66 @@ def test_series_dtype_property():
assert expected == actual


def assert_functions_called(profiler, functions):
# Process profiling data
stream = StringIO()
stats = pstats.Stats(profiler, stream=stream)

# Get all called functions as (filename, lineno, func_name)
called_functions = {func[2] for func in stats.stats.keys()}
print(called_functions)
for func_str in functions:
assert func_str in called_functions


def test_cudf_series_from_cudf_pandas():
s = xpd.Series([1, 2, 3])

with cProfile.Profile() as profiler:
gs = cudf.Series(s)

assert_functions_called(
profiler, ["as_gpu_object", "<method 'update' of 'dict' objects>"]
)

tm.assert_equal(s.as_gpu_object(), gs)


def test_cudf_dataframe_from_cudf_pandas():
df = xpd.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})

with cProfile.Profile() as profiler:
gdf = cudf.DataFrame(df)

assert_functions_called(
profiler, ["as_gpu_object", "<method 'update' of 'dict' objects>"]
)
tm.assert_frame_equal(df.as_gpu_object(), gdf)

df = xpd.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
gdf = cudf.DataFrame(
{"a": xpd.Series([1, 2, 3]), "b": xpd.Series([1, 2, 3])}
)

tm.assert_frame_equal(df.as_gpu_object(), gdf)

df = xpd.DataFrame({0: [1, 2, 3], 1: [1, 2, 3]})
gdf = cudf.DataFrame(
[xpd.Series([1, 1]), xpd.Series([2, 2]), xpd.Series([3, 3])]
)

tm.assert_frame_equal(df.as_gpu_object(), gdf)


def test_cudf_index_from_cudf_pandas():
idx = xpd.Index([1, 2, 3])
with cProfile.Profile() as profiler:
gidx = cudf.Index(idx)
assert_functions_called(profiler, ["as_gpu_object"])

tm.assert_equal(idx.as_gpu_object(), gidx)


def test_numpy_data_access():
s = pd.Series([1, 2, 3])
xs = xpd.Series([1, 2, 3])
Expand Down

0 comments on commit e64548e

Please sign in to comment.