From 9d6953a6fa3c81bd6103d2fc21231cc47c906f88 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 19 Feb 2025 09:00:46 -0500 Subject: [PATCH 1/8] Remove deprecated single component datetime extract APIs (#18010) Follows up #17221 to remove the deprecated APIs. Note: This should have been removed in 25.02. Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Yunsong Wang (https://github.com/PointKernel) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/18010 --- cpp/include/cudf/datetime.hpp | 191 +----------------- cpp/include/cudf/detail/datetime.hpp | 92 +-------- cpp/src/datetime/datetime_ops.cu | 152 +------------- python/pylibcudf/pylibcudf/datetime.pxd | 14 +- python/pylibcudf/pylibcudf/datetime.pyi | 3 - python/pylibcudf/pylibcudf/datetime.pyx | 80 +------- .../pylibcudf/pylibcudf/libcudf/datetime.pxd | 32 +-- .../pylibcudf/tests/test_datetime.py | 22 +- 8 files changed, 7 insertions(+), 579 deletions(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 1f6e86d0389..f385ede96b9 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,195 +54,6 @@ enum class datetime_component : uint8_t { NANOSECOND }; -/** - * @brief Extracts year from any datetime type and returns an int16_t - * cudf::column. - * - * @deprecated Deprecated in 24.12, to be removed in 25.02 - * - * @param column cudf::column_view of the input datetime values - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate device memory of the returned column - * - * @returns cudf::column of the extracted int16_t years - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - */ -[[deprecated]] std::unique_ptr extract_year( - cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); - -/** - * @brief Extracts month from any datetime type and returns an int16_t - * cudf::column. - * - * @deprecated Deprecated in 24.12, to be removed in 25.02 - * - * @param column cudf::column_view of the input datetime values - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate device memory of the returned column - * - * @returns cudf::column of the extracted int16_t months - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - */ -[[deprecated]] std::unique_ptr extract_month( - cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); - -/** - * @brief Extracts day from any datetime type and returns an int16_t - * cudf::column. - * - * @deprecated Deprecated in 24.12, to be removed in 25.02 - * - * @param column cudf::column_view of the input datetime values - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate device memory of the returned column - * - * @returns cudf::column of the extracted int16_t days - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - */ -[[deprecated]] std::unique_ptr extract_day( - cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); - -/** - * @brief Extracts a weekday from any datetime type and returns an int16_t - * cudf::column. - * - * @deprecated Deprecated in 24.12, to be removed in 25.02 - * - * @param column cudf::column_view of the input datetime values - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate device memory of the returned column - * - * @returns cudf::column of the extracted int16_t days - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - */ -[[deprecated]] std::unique_ptr extract_weekday( - cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); - -/** - * @brief Extracts hour from any datetime type and returns an int16_t - * cudf::column. - * - * @deprecated Deprecated in 24.12, to be removed in 25.02 - * - * @param column cudf::column_view of the input datetime values - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate device memory of the returned column - * - * @returns cudf::column of the extracted int16_t hours - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - */ -[[deprecated]] std::unique_ptr extract_hour( - cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); - -/** - * @brief Extracts minute from any datetime type and returns an int16_t - * cudf::column. - * - * @deprecated Deprecated in 24.12, to be removed in 25.02 - * - * @param column cudf::column_view of the input datetime values - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate device memory of the returned column - * - * @returns cudf::column of the extracted int16_t minutes - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - */ -[[deprecated]] std::unique_ptr extract_minute( - cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); - -/** - * @brief Extracts second from any datetime type and returns an int16_t - * cudf::column. - * - * @deprecated Deprecated in 24.12, to be removed in 25.02 - * - * @param column cudf::column_view of the input datetime values - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate device memory of the returned column - * - * @returns cudf::column of the extracted int16_t seconds - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - */ -[[deprecated]] std::unique_ptr extract_second( - cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); - -/** - * @brief Extracts millisecond fraction from any datetime type and returns an int16_t - * cudf::column. - * - * A millisecond fraction is only the 3 digits that make up the millisecond portion of a duration. - * For example, the millisecond fraction of 1.234567890 seconds is 234. - * - * @deprecated Deprecated in 24.12, to be removed in 25.02 - * - * @param column cudf::column_view of the input datetime values - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate device memory of the returned column - * - * @returns cudf::column of the extracted int16_t milliseconds - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - */ -[[deprecated]] std::unique_ptr extract_millisecond_fraction( - cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); - -/** - * @brief Extracts microsecond fraction from any datetime type and returns an int16_t - * cudf::column. - * - * A microsecond fraction is only the 3 digits that make up the microsecond portion of a duration. - * For example, the microsecond fraction of 1.234567890 seconds is 567. - * - * @deprecated Deprecated in 24.12, to be removed in 25.02 - * - * @param column cudf::column_view of the input datetime values - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate device memory of the returned column - * - * @returns cudf::column of the extracted int16_t microseconds - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - */ -[[deprecated]] std::unique_ptr extract_microsecond_fraction( - cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); - -/** - * @brief Extracts nanosecond fraction from any datetime type and returns an int16_t - * cudf::column. - * - * A nanosecond fraction is only the 3 digits that make up the nanosecond portion of a duration. - * For example, the nanosecond fraction of 1.234567890 seconds is 890. - * - * @deprecated Deprecated in 24.12, to be removed in 25.02 - * - * @param column cudf::column_view of the input datetime values - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate device memory of the returned column - * - * @returns cudf::column of the extracted int16_t nanoseconds - * @throw cudf::logic_error if input column datatype is not TIMESTAMP - */ -[[deprecated]] std::unique_ptr extract_nanosecond_fraction( - cudf::column_view const& column, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); - /** * @brief Extracts the specified datetime component from any datetime type and * returns an int16_t cudf::column. diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp index df3050d6494..2b01231deab 100644 --- a/cpp/include/cudf/detail/datetime.hpp +++ b/cpp/include/cudf/detail/datetime.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,96 +25,6 @@ namespace CUDF_EXPORT cudf { namespace datetime { namespace detail { -/** - * @copydoc cudf::extract_year(cudf::column_view const&, rmm::cuda_stream_view, - * rmm::device_async_resource_ref) - * - */ -std::unique_ptr extract_year(cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - -/** - * @copydoc cudf::extract_month(cudf::column_view const&, rmm::cuda_stream_view, - * rmm::device_async_resource_ref) - * - */ -std::unique_ptr extract_month(cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - -/** - * @copydoc cudf::extract_day(cudf::column_view const&, rmm::cuda_stream_view, - * rmm::device_async_resource_ref) - * - */ -std::unique_ptr extract_day(cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - -/** - * @copydoc cudf::extract_weekday(cudf::column_view const&, rmm::cuda_stream_view, - * rmm::device_async_resource_ref) - * - */ -std::unique_ptr extract_weekday(cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - -/** - * @copydoc cudf::extract_hour(cudf::column_view const&, rmm::cuda_stream_view, - * rmm::device_async_resource_ref) - * - */ -std::unique_ptr extract_hour(cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - -/** - * @copydoc cudf::extract_minute(cudf::column_view const&, rmm::cuda_stream_view, - * rmm::device_async_resource_ref) - * - */ -std::unique_ptr extract_minute(cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - -/** - * @copydoc cudf::extract_second(cudf::column_view const&, rmm::cuda_stream_view, - * rmm::device_async_resource_ref) - * - */ -std::unique_ptr extract_second(cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - -/** - * @copydoc cudf::extract_millisecond_fraction(cudf::column_view const&, rmm::cuda_stream_view, - * rmm::device_async_resource_ref) - * - */ -std::unique_ptr extract_millisecond_fraction(cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - -/** - * @copydoc cudf::extract_microsecond_fraction(cudf::column_view const&, rmm::cuda_stream_view, - * rmm::device_async_resource_ref) - * - */ -std::unique_ptr extract_microsecond_fraction(cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - -/** - * @copydoc cudf::extract_nanosecond_fraction(cudf::column_view const&, rmm::cuda_stream_view, - * rmm::device_async_resource_ref) - * - */ -std::unique_ptr extract_nanosecond_fraction(cudf::column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - /** * @copydoc cudf::extract_datetime_component(cudf::column_view const&, datetime_component, * rmm::cuda_stream_view, rmm::device_async_resource_ref) diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index a497cedb3bc..62f702ac147 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -436,76 +436,6 @@ std::unique_ptr round_general(rounding_function round_kind, column.type(), dispatch_round{}, round_kind, component, column, stream, mr); } -std::unique_ptr extract_year(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - return detail::extract_datetime_component(column, datetime_component::YEAR, stream, mr); -} - -std::unique_ptr extract_month(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - return detail::extract_datetime_component(column, datetime_component::MONTH, stream, mr); -} - -std::unique_ptr extract_day(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - return detail::extract_datetime_component(column, datetime_component::DAY, stream, mr); -} - -std::unique_ptr extract_weekday(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - return detail::extract_datetime_component(column, datetime_component::WEEKDAY, stream, mr); -} - -std::unique_ptr extract_hour(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - return detail::extract_datetime_component(column, datetime_component::HOUR, stream, mr); -} - -std::unique_ptr extract_minute(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - return detail::extract_datetime_component(column, datetime_component::MINUTE, stream, mr); -} - -std::unique_ptr extract_second(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - return detail::extract_datetime_component(column, datetime_component::SECOND, stream, mr); -} - -std::unique_ptr extract_millisecond_fraction(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - return detail::extract_datetime_component(column, datetime_component::MILLISECOND, stream, mr); -} - -std::unique_ptr extract_microsecond_fraction(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - return detail::extract_datetime_component(column, datetime_component::MICROSECOND, stream, mr); -} - -std::unique_ptr extract_nanosecond_fraction(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - return detail::extract_datetime_component(column, datetime_component::NANOSECOND, stream, mr); -} - std::unique_ptr last_day_of_month(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) @@ -598,62 +528,6 @@ std::unique_ptr round_datetimes(column_view const& column, return detail::round_general(detail::rounding_function::ROUND, freq, column, stream, mr); } -std::unique_ptr extract_year(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_FUNC_RANGE(); - return detail::extract_year(column, stream, mr); -} - -std::unique_ptr extract_month(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_FUNC_RANGE(); - return detail::extract_month(column, stream, mr); -} - -std::unique_ptr extract_day(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_FUNC_RANGE(); - return detail::extract_day(column, stream, mr); -} - -std::unique_ptr extract_weekday(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_FUNC_RANGE(); - return detail::extract_weekday(column, stream, mr); -} - -std::unique_ptr extract_hour(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_FUNC_RANGE(); - return detail::extract_hour(column, stream, mr); -} - -std::unique_ptr extract_minute(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_FUNC_RANGE(); - return detail::extract_minute(column, stream, mr); -} - -std::unique_ptr extract_second(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_FUNC_RANGE(); - return detail::extract_second(column, stream, mr); -} - std::unique_ptr extract_datetime_component(cudf::column_view const& column, datetime_component component, rmm::cuda_stream_view stream, @@ -663,30 +537,6 @@ std::unique_ptr extract_datetime_component(cudf::column_view const return detail::extract_datetime_component(column, component, stream, mr); } -std::unique_ptr extract_millisecond_fraction(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_FUNC_RANGE(); - return detail::extract_millisecond_fraction(column, stream, mr); -} - -std::unique_ptr extract_microsecond_fraction(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_FUNC_RANGE(); - return detail::extract_microsecond_fraction(column, stream, mr); -} - -std::unique_ptr extract_nanosecond_fraction(column_view const& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_FUNC_RANGE(); - return detail::extract_nanosecond_fraction(column, stream, mr); -} - std::unique_ptr last_day_of_month(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) diff --git a/python/pylibcudf/pylibcudf/datetime.pxd b/python/pylibcudf/pylibcudf/datetime.pxd index 335ef435f9b..ce295990d26 100644 --- a/python/pylibcudf/pylibcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/datetime.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. from pylibcudf.column cimport Column from pylibcudf.libcudf.datetime cimport datetime_component, rounding_frequency @@ -8,18 +8,6 @@ ctypedef fused ColumnOrScalar: Column Scalar -cpdef Column extract_millisecond_fraction( - Column input -) - -cpdef Column extract_microsecond_fraction( - Column input -) - -cpdef Column extract_nanosecond_fraction( - Column input -) - cpdef Column extract_datetime_component( Column input, datetime_component component diff --git a/python/pylibcudf/pylibcudf/datetime.pyi b/python/pylibcudf/pylibcudf/datetime.pyi index 6a3ae7953d9..8eedaeefe61 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyi +++ b/python/pylibcudf/pylibcudf/datetime.pyi @@ -26,9 +26,6 @@ class RoundingFrequency(IntEnum): MICROSECOND = ... NANOSECOND = ... -def extract_millisecond_fraction(input: Column) -> Column: ... -def extract_microsecond_fraction(input: Column) -> Column: ... -def extract_nanosecond_fraction(input: Column) -> Column: ... def extract_datetime_component( input: Column, component: DatetimeComponent ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx index b100e3e22d0..15aee4c3e9e 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyx +++ b/python/pylibcudf/pylibcudf/datetime.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr from libcpp.utility cimport move from pylibcudf.libcudf.column.column cimport column @@ -9,9 +9,6 @@ from pylibcudf.libcudf.datetime cimport ( day_of_year as cpp_day_of_year, days_in_month as cpp_days_in_month, extract_datetime_component as cpp_extract_datetime_component, - extract_microsecond_fraction as cpp_extract_microsecond_fraction, - extract_millisecond_fraction as cpp_extract_millisecond_fraction, - extract_nanosecond_fraction as cpp_extract_nanosecond_fraction, extract_quarter as cpp_extract_quarter, floor_datetimes as cpp_floor_datetimes, is_leap_year as cpp_is_leap_year, @@ -37,9 +34,6 @@ __all__ = [ "day_of_year", "days_in_month", "extract_datetime_component", - "extract_microsecond_fraction", - "extract_millisecond_fraction", - "extract_nanosecond_fraction", "extract_quarter", "floor_datetimes", "is_leap_year", @@ -47,78 +41,6 @@ __all__ = [ "round_datetimes", ] -cpdef Column extract_millisecond_fraction( - Column input -): - """ - Extract the millisecond from a datetime column. - - For details, see :cpp:func:`extract_millisecond_fraction`. - - Parameters - ---------- - input : Column - The column to extract the millisecond from. - - Returns - ------- - Column - Column with the extracted milliseconds. - """ - cdef unique_ptr[column] result - - with nogil: - result = cpp_extract_millisecond_fraction(input.view()) - return Column.from_libcudf(move(result)) - -cpdef Column extract_microsecond_fraction( - Column input -): - """ - Extract the microsecond fraction from a datetime column. - - For details, see :cpp:func:`extract_microsecond_fraction`. - - Parameters - ---------- - input : Column - The column to extract the microsecond fraction from. - - Returns - ------- - Column - Column with the extracted microsecond fractions. - """ - cdef unique_ptr[column] result - - with nogil: - result = cpp_extract_microsecond_fraction(input.view()) - return Column.from_libcudf(move(result)) - -cpdef Column extract_nanosecond_fraction( - Column input -): - """ - Extract the nanosecond fraction from a datetime column. - - For details, see :cpp:func:`extract_nanosecond_fraction`. - - Parameters - ---------- - input : Column - The column to extract the nanosecond fraction from. - - Returns - ------- - Column - Column with the extracted nanosecond fractions. - """ - cdef unique_ptr[column] result - - with nogil: - result = cpp_extract_nanosecond_fraction(input.view()) - return Column.from_libcudf(move(result)) - cpdef Column extract_datetime_component( Column input, datetime_component component diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd index 049a1b06c2e..7dacab668b6 100644 --- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. from libc.stdint cimport int32_t, uint8_t from libcpp.memory cimport unique_ptr @@ -21,36 +21,6 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: MICROSECOND NANOSECOND - cdef unique_ptr[column] extract_year( - const column_view& column - ) except +libcudf_exception_handler - cdef unique_ptr[column] extract_month( - const column_view& column - ) except +libcudf_exception_handler - cdef unique_ptr[column] extract_day( - const column_view& column - ) except +libcudf_exception_handler - cdef unique_ptr[column] extract_weekday( - const column_view& column - ) except +libcudf_exception_handler - cdef unique_ptr[column] extract_hour( - const column_view& column - ) except +libcudf_exception_handler - cdef unique_ptr[column] extract_minute( - const column_view& column - ) except +libcudf_exception_handler - cdef unique_ptr[column] extract_second( - const column_view& column - ) except +libcudf_exception_handler - cdef unique_ptr[column] extract_millisecond_fraction( - const column_view& column - ) except +libcudf_exception_handler - cdef unique_ptr[column] extract_microsecond_fraction( - const column_view& column - ) except +libcudf_exception_handler - cdef unique_ptr[column] extract_nanosecond_fraction( - const column_view& column - ) except +libcudf_exception_handler cdef unique_ptr[column] extract_datetime_component( const column_view& column, datetime_component component diff --git a/python/pylibcudf/pylibcudf/tests/test_datetime.py b/python/pylibcudf/pylibcudf/tests/test_datetime.py index f5f24ef28e2..6251a4bbb86 100644 --- a/python/pylibcudf/pylibcudf/tests/test_datetime.py +++ b/python/pylibcudf/pylibcudf/tests/test_datetime.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. import calendar import datetime @@ -77,26 +77,6 @@ def test_extract_datetime_component(datetime_column, component): assert_column_eq(expect, got) -@pytest.mark.parametrize( - "datetime_func", - [ - "extract_millisecond_fraction", - "extract_microsecond_fraction", - "extract_nanosecond_fraction", - ], -) -def test_datetime_extracting_functions(datetime_column, datetime_func): - pa_col = plc.interop.to_arrow(datetime_column) - got = getattr(plc.datetime, datetime_func)(datetime_column) - kwargs = {} - attr = datetime_func.split("_")[1] - if attr == "weekday": - kwargs = {"count_from_zero": False} - attr = "day_of_week" - expect = getattr(pc, attr)(pa_col, **kwargs).cast(pa.int16()) - assert_column_eq(expect, got) - - @pytest.mark.parametrize( "op", [ From d660873068bd9a54d9a78f6eabd3eaf53e0296b1 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 19 Feb 2025 11:16:34 -0500 Subject: [PATCH 2/8] Refactor math_ops.cu dispatcher logic (#18006) Refactors the type-dispatcher logic and cleans up the code in `math_ops.cu` for unary operations. The 3 of the 4 dispatch functors had the same logic except for the supported types SFINAE clause. Also correcting the code for handling RINT properly created a 4th common functor. These have been refactored into a single functor and separated from the supported-types checks. The single functor now excepts the transform function as well as the supported-types expression. Also, the 2nd dispatcher call for dictionary was replaced with an if-statement to help simplify the code and minimize maintenance syncing up the supported-types clauses correctly. One side effect is that more ops are now supported appropriately with dictionary types. Referenced cleanup needed here: https://github.com/rapidsai/cudf/pull/17560#discussion_r1934160760 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Matthew Murray (https://github.com/Matt711) - Shruti Shivakumar (https://github.com/shrshi) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/18006 --- cpp/src/unary/math_ops.cu | 323 +++++++++++++------------------------- 1 file changed, 112 insertions(+), 211 deletions(-) diff --git a/cpp/src/unary/math_ops.cu b/cpp/src/unary/math_ops.cu index 4e96f900bf3..aead6710082 100644 --- a/cpp/src/unary/math_ops.cu +++ b/cpp/src/unary/math_ops.cu @@ -27,9 +27,9 @@ #include +#include #include -#include #include namespace cudf { @@ -42,7 +42,7 @@ struct DeviceSin { template __device__ T operator()(T data) { - return std::sin(data); + return cuda::std::sin(data); } }; @@ -50,7 +50,7 @@ struct DeviceCos { template __device__ T operator()(T data) { - return std::cos(data); + return cuda::std::cos(data); } }; @@ -58,7 +58,7 @@ struct DeviceTan { template __device__ T operator()(T data) { - return std::tan(data); + return cuda::std::tan(data); } }; @@ -66,7 +66,7 @@ struct DeviceArcSin { template __device__ T operator()(T data) { - return std::asin(data); + return cuda::std::asin(data); } }; @@ -74,7 +74,7 @@ struct DeviceArcCos { template __device__ T operator()(T data) { - return std::acos(data); + return cuda::std::acos(data); } }; @@ -82,7 +82,7 @@ struct DeviceArcTan { template __device__ T operator()(T data) { - return std::atan(data); + return cuda::std::atan(data); } }; @@ -90,7 +90,7 @@ struct DeviceSinH { template __device__ T operator()(T data) { - return std::sinh(data); + return cuda::std::sinh(data); } }; @@ -98,7 +98,7 @@ struct DeviceCosH { template __device__ T operator()(T data) { - return std::cosh(data); + return cuda::std::cosh(data); } }; @@ -106,7 +106,7 @@ struct DeviceTanH { template __device__ T operator()(T data) { - return std::tanh(data); + return cuda::std::tanh(data); } }; @@ -114,7 +114,7 @@ struct DeviceArcSinH { template __device__ T operator()(T data) { - return std::asinh(data); + return cuda::std::asinh(data); } }; @@ -122,7 +122,7 @@ struct DeviceArcCosH { template __device__ T operator()(T data) { - return std::acosh(data); + return cuda::std::acosh(data); } }; @@ -130,7 +130,7 @@ struct DeviceArcTanH { template __device__ T operator()(T data) { - return std::atanh(data); + return cuda::std::atanh(data); } }; @@ -140,7 +140,7 @@ struct DeviceExp { template __device__ T operator()(T data) { - return std::exp(data); + return cuda::std::exp(data); } }; @@ -148,7 +148,7 @@ struct DeviceLog { template __device__ T operator()(T data) { - return std::log(data); + return cuda::std::log(data); } }; @@ -156,7 +156,7 @@ struct DeviceSqrt { template __device__ T operator()(T data) { - return std::sqrt(data); + return cuda::std::sqrt(data); } }; @@ -164,7 +164,7 @@ struct DeviceCbrt { template __device__ T operator()(T data) { - return std::cbrt(data); + return cuda::std::cbrt(data); } }; @@ -174,7 +174,7 @@ struct DeviceCeil { template __device__ T operator()(T data) { - return std::ceil(data); + return cuda::std::ceil(data); } }; @@ -182,7 +182,7 @@ struct DeviceFloor { template __device__ T operator()(T data) { - return std::floor(data); + return cuda::std::floor(data); } }; @@ -190,7 +190,7 @@ struct DeviceAbs { template std::enable_if_t, T> __device__ operator()(T data) { - return std::abs(data); + return cuda::std::abs(data); } template std::enable_if_t, T> __device__ operator()(T data) @@ -199,18 +199,13 @@ struct DeviceAbs { } }; -struct DeviceRInt { - template - std::enable_if_t, T> __device__ operator()(T data) - { - return std::rint(data); - } +// round float to int - // Dummy to handle other types, will never be executed +struct DeviceRInt { template - std::enable_if_t, T> __device__ operator()(T data) + __device__ T operator()(T data) { - return data; + return cuda::std::rint(data); } }; @@ -238,7 +233,7 @@ struct DeviceNot { struct DeviceNegate { template - T __device__ operator()(T data) + __device__ T operator()(T data) { return -data; } @@ -350,7 +345,6 @@ std::unique_ptr transform_fn(InputIterator begin, null_count, stream, mr); - if (size == 0) return output; auto output_view = output->mutable_view(); thrust::transform(rmm::exec_policy(stream), begin, end, output_view.begin(), UFN{}); @@ -358,6 +352,19 @@ std::unique_ptr transform_fn(InputIterator begin, return output; } +template +std::unique_ptr transform_fn(cudf::column_view const& input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + return transform_fn(input.begin(), + input.end(), + detail::copy_bitmask(input, stream, mr), + input.null_count(), + stream, + mr); +} + template std::unique_ptr transform_fn(cudf::dictionary_column_view const& input, rmm::cuda_stream_view stream, @@ -377,136 +384,52 @@ std::unique_ptr transform_fn(cudf::dictionary_column_view const& i output->view(), dictionary::detail::get_indices_type_for_size(output->size()), stream, mr); } -template -struct MathOpDispatcher { - template >* = nullptr> - std::unique_ptr operator()(cudf::column_view const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - { - return transform_fn(input.begin(), - input.end(), - cudf::detail::copy_bitmask(input, stream, mr), - input.null_count(), - stream, - mr); - } - - struct dictionary_dispatch { - template >* = nullptr> - std::unique_ptr operator()(cudf::dictionary_column_view const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - { - return transform_fn(input, stream, mr); - } - - template - std::enable_if_t, std::unique_ptr> operator()(Args&&...) - { - CUDF_FAIL("dictionary keys must be numeric for this operation"); - } - }; - - template < - typename T, - std::enable_if_t and std::is_same_v>* = nullptr> - std::unique_ptr operator()(cudf::column_view const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - { - if (input.is_empty()) return empty_like(input); - auto dictionary_col = dictionary_column_view(input); - return type_dispatcher( - dictionary_col.keys().type(), dictionary_dispatch{}, dictionary_col, stream, mr); - } - - template - std::enable_if_t and !std::is_same_v, - std::unique_ptr> - operator()(Args&&...) - { - CUDF_FAIL("Unsupported data type for operation"); - } +template +struct ArithmeticOps { + static constexpr bool is_supported() { return std::is_arithmetic_v; } }; -template -struct NegateOpDispatcher { - template - static constexpr bool is_supported() - { - return std::is_signed_v || cudf::is_duration(); - } - - template ()>* = nullptr> - std::unique_ptr operator()(cudf::column_view const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - { - return transform_fn(input.begin(), - input.end(), - cudf::detail::copy_bitmask(input, stream, mr), - input.null_count(), - stream, - mr); - } - - template - std::enable_if_t(), std::unique_ptr> operator()(Args&&...) - { - CUDF_FAIL("Unsupported data type for negate operation"); - } +template +struct NegateOps { + static constexpr bool is_supported() { return std::is_signed_v || cudf::is_duration(); } }; -template -struct BitwiseOpDispatcher { - template >* = nullptr> - std::unique_ptr operator()(cudf::column_view const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - { - return transform_fn(input.begin(), - input.end(), - cudf::detail::copy_bitmask(input, stream, mr), - input.null_count(), - stream, - mr); - } - - struct dictionary_dispatch { - template >* = nullptr> - std::unique_ptr operator()(cudf::dictionary_column_view const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - { - return transform_fn(input, stream, mr); - } +template +struct BitWiseOps { + static constexpr bool is_supported() { return std::is_integral_v; } +}; - template - std::enable_if_t, std::unique_ptr> operator()(Args&&...) - { - CUDF_FAIL("dictionary keys type not supported for this operation"); - } - }; +template +struct FloatOnlyOps { + static constexpr bool is_supported() { return std::is_floating_point_v; } +}; - template and std::is_same_v>* = nullptr> +/** + * @brief Generic math-ops dispatcher + * + * Performs a transform on the input data using the operator defined by UFN. + * The Supported type determines which types are allowed by the operator. + * + * @tparam UFN The actual operator to perform on the input data + * @tparam Supported Contains the 'is_supported()' function + */ +template typename Supported> +struct MathOpDispatcher { + template ::is_supported()>* = nullptr> std::unique_ptr operator()(cudf::column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - if (input.is_empty()) return empty_like(input); - auto dictionary_col = dictionary_column_view(input); - return type_dispatcher( - dictionary_col.keys().type(), dictionary_dispatch{}, dictionary_col, stream, mr); + return (input.type().id() == type_id::DICTIONARY32) + ? transform_fn(cudf::dictionary_column_view(input), stream, mr) + : transform_fn(input, stream, mr); } template - std::enable_if_t and !std::is_same_v, - std::unique_ptr> - operator()(Args&&...) + std::enable_if_t::is_supported(), std::unique_ptr> operator()( + Args&&...) { - CUDF_FAIL("Unsupported datatype for operation"); + CUDF_FAIL("Unsupported data type for this operation"); } }; @@ -525,54 +448,26 @@ struct LogicalOpDispatcher { rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return transform_fn(input.begin(), - input.end(), - cudf::detail::copy_bitmask(input, stream, mr), - input.null_count(), - - stream, - mr); - } - - struct dictionary_dispatch { - template ()>* = nullptr> - std::unique_ptr operator()(cudf::dictionary_column_view const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - { - auto dictionary_view = cudf::column_device_view::create(input.parent(), stream); + if (input.type().id() == type_id::DICTIONARY32) { + auto dictionary_view = cudf::column_device_view::create(input, stream); auto dictionary_itr = dictionary::detail::make_dictionary_iterator(*dictionary_view); return transform_fn(dictionary_itr, dictionary_itr + input.size(), - cudf::detail::copy_bitmask(input.parent(), stream, mr), + cudf::detail::copy_bitmask(input, stream, mr), input.null_count(), stream, mr); } - - template - std::enable_if_t(), std::unique_ptr> operator()(Args&&...) - { - CUDF_FAIL("dictionary keys type not supported for this operation"); - } - }; - - template () and std::is_same_v>* = nullptr> - std::unique_ptr operator()(cudf::column_view const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - { - if (input.is_empty()) return make_empty_column(cudf::data_type{cudf::type_id::BOOL8}); - auto dictionary_col = dictionary_column_view(input); - return type_dispatcher( - dictionary_col.keys().type(), dictionary_dispatch{}, dictionary_col, stream, mr); + return transform_fn(input.begin(), + input.end(), + cudf::detail::copy_bitmask(input, stream, mr), + input.null_count(), + stream, + mr); } template - std::enable_if_t() and !std::is_same_v, - std::unique_ptr> - operator()(Args&&...) + std::enable_if_t(), std::unique_ptr> operator()(Args&&...) { CUDF_FAIL("Unsupported datatype for operation"); } @@ -614,79 +509,85 @@ std::unique_ptr unary_operation(cudf::column_view const& input, if (cudf::is_fixed_point(input.type())) return type_dispatcher(input.type(), detail::FixedPointOpDispatcher{}, input, op, stream, mr); + if (input.is_empty()) { + return op == cudf::unary_operator::NOT ? make_empty_column(type_id::BOOL8) : empty_like(input); + } + + // dispatch on the keys if dictionary saves a 2nd dispatch later + auto dispatch_type = input.type().id() == type_id::DICTIONARY32 + ? dictionary_column_view(input).keys().type() + : input.type(); + switch (op) { case cudf::unary_operator::SIN: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::COS: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::TAN: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::ARCSIN: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::ARCCOS: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::ARCTAN: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::SINH: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::COSH: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::TANH: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::ARCSINH: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::ARCCOSH: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::ARCTANH: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::EXP: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::LOG: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::SQRT: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::CBRT: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::CEIL: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::FLOOR: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::ABS: return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::RINT: - CUDF_EXPECTS( - (input.type().id() == type_id::FLOAT32) or (input.type().id() == type_id::FLOAT64), - "rint expects floating point values"); return cudf::type_dispatcher( - input.type(), detail::MathOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::BIT_INVERT: return cudf::type_dispatcher( - input.type(), detail::BitwiseOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); case cudf::unary_operator::NOT: return cudf::type_dispatcher( - input.type(), detail::LogicalOpDispatcher{}, input, stream, mr); + dispatch_type, detail::LogicalOpDispatcher{}, input, stream, mr); case cudf::unary_operator::NEGATE: return cudf::type_dispatcher( - input.type(), detail::NegateOpDispatcher{}, input, stream, mr); + dispatch_type, MathOpDispatcher{}, input, stream, mr); default: CUDF_FAIL("Undefined unary operation"); } } From c99f393b61a41893b02709ecdc166f7f2a1fbcb2 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 19 Feb 2025 13:31:45 -0500 Subject: [PATCH 3/8] Skip the failing connectorx polars tests (#18037) In #18015, we tried skipping the failing polars tests and applying the workaround mentioned in polars issue 21274. But pip is [unable to solve our test environment](https://github.com/rapidsai/cudf/actions/runs/13406947992/job/37463788766) in that case. This PR just skips the tests because we only need to do one or the other, not both. --- python/cudf_polars/cudf_polars/testing/plugin.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py index 0b52cf1c61c..e56d906833f 100644 --- a/python/cudf_polars/cudf_polars/testing/plugin.py +++ b/python/cudf_polars/cudf_polars/testing/plugin.py @@ -214,6 +214,10 @@ def pytest_configure(config: pytest.Config) -> None: "tests/unit/streaming/test_streaming_group_by.py::test_streaming_group_by_literal[1]": "May segfault w/the legacy streaming engine", # Fails in CI, but passes locally "tests/unit/streaming/test_streaming.py::test_streaming_streamable_functions": "RuntimeError: polars_python::sql::PySQLContext is unsendable, but is being dropped on another thread", + # TODO: Remove once when we support polars 1.23 + "tests/unit/io/database/test_read.py::test_read_database[uri: connectorx]": "ValueError: arrow2", + "tests/unit/io/database/test_read.py::test_read_database_cx_credentials[fakedb://123:456@account/database/schema?warehouse=warehouse&role=role]": "ValueError: arrow2", + "tests/unit/io/database/test_read.py::test_read_database_cx_credentials[fakedb://my#%us3r:p433w0rd@not_a_real_host:9999/database]": "ValueError: arrow2", } From e500794479c3b1a23c1a12c8425d9120424871f8 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 19 Feb 2025 10:47:37 -0800 Subject: [PATCH 4/8] remove pip install --- ci/test_cudf_polars_polars_tests.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/test_cudf_polars_polars_tests.sh b/ci/test_cudf_polars_polars_tests.sh index 909abbe9d1e..3466edacfc5 100755 --- a/ci/test_cudf_polars_polars_tests.sh +++ b/ci/test_cudf_polars_polars_tests.sh @@ -27,8 +27,6 @@ git clone https://github.com/pola-rs/polars.git --branch "${TAG}" --depth 1 # Install requirements for running polars tests rapids-logger "Install polars test requirements" rapids-pip-retry install -r polars/py-polars/requirements-dev.txt -r polars/py-polars/requirements-ci.txt -# TODO: Workaround until https://github.com/pola-rs/polars/issues/21274 is fixed. -rapids-pip-retry install connectorx==0.4.1 # shellcheck disable=SC2317 function set_exitcode() From 3117dc26b8466ac8e2c64574ab0b26cc621a44ff Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 19 Feb 2025 14:45:15 -0500 Subject: [PATCH 5/8] Bump polars version to <1.23 (#17986) The PR upgrades the Polars version to 1.22. Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - James Lamb (https://github.com/jameslamb) - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/17986 --- .../all_cuda-118_arch-x86_64.yaml | 2 +- .../all_cuda-128_arch-x86_64.yaml | 2 +- conda/recipes/cudf-polars/meta.yaml | 2 +- dependencies.yaml | 2 +- python/cudf_polars/cudf_polars/dsl/ir.py | 43 ++++++++++++++++--- .../cudf_polars/cudf_polars/dsl/translate.py | 28 +++++++++--- .../cudf_polars/cudf_polars/testing/plugin.py | 3 ++ python/cudf_polars/pyproject.toml | 2 +- python/cudf_polars/tests/test_mapfunction.py | 13 +++++- 9 files changed, 78 insertions(+), 19 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 09eb9949f1d..4ec6ef1883a 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -66,7 +66,7 @@ dependencies: - pandas - pandas>=2.0,<2.2.4dev0 - pandoc -- polars>=1.20,<1.22 +- polars>=1.20,<1.23 - pre-commit - ptxcompiler - pyarrow>=14.0.0,<20.0.0a0 diff --git a/conda/environments/all_cuda-128_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml index 56cef28ac61..dcf96a02a36 100644 --- a/conda/environments/all_cuda-128_arch-x86_64.yaml +++ b/conda/environments/all_cuda-128_arch-x86_64.yaml @@ -64,7 +64,7 @@ dependencies: - pandas - pandas>=2.0,<2.2.4dev0 - pandoc -- polars>=1.20,<1.22 +- polars>=1.20,<1.23 - pre-commit - pyarrow>=14.0.0,<20.0.0a0 - pydata-sphinx-theme>=0.15.4 diff --git a/conda/recipes/cudf-polars/meta.yaml b/conda/recipes/cudf-polars/meta.yaml index fb7ab9332d8..1d36ab2a3e4 100644 --- a/conda/recipes/cudf-polars/meta.yaml +++ b/conda/recipes/cudf-polars/meta.yaml @@ -43,7 +43,7 @@ requirements: run: - python - pylibcudf ={{ version }} - - polars >=1.20,<1.22 + - polars >=1.20,<1.23 - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} test: diff --git a/dependencies.yaml b/dependencies.yaml index 7188e10b058..c8893fc8b49 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -803,7 +803,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - polars>=1.20,<1.22 + - polars>=1.20,<1.23 run_cudf_polars_experimental: common: - output_types: [conda, requirements, pyproject] diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index 8f12a4a7570..603f51e9d40 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -1650,6 +1650,16 @@ def do_evaluate(cls, schema: Schema, df: DataFrame) -> DataFrame: return DataFrame(columns) +class MergeSorted(IR): + """Merge sorted operation.""" + + def __init__(self, schema: Schema, left: IR, right: IR, key: str): + # libcudf merge is not stable wrt order of inputs, since + # it uses a priority queue to manage the tables it produces. + # See: https://github.com/rapidsai/cudf/issues/16010 + raise NotImplementedError("MergeSorted not yet implemented") + + class MapFunction(IR): """Apply some function to a dataframe.""" @@ -1663,13 +1673,10 @@ class MapFunction(IR): _NAMES: ClassVar[frozenset[str]] = frozenset( [ "rechunk", - # libcudf merge is not stable wrt order of inputs, since - # it uses a priority queue to manage the tables it produces. - # See: https://github.com/rapidsai/cudf/issues/16010 - # "merge_sorted", "rename", "explode", "unpivot", + "row_index", ] ) @@ -1678,8 +1685,12 @@ def __init__(self, schema: Schema, name: str, options: Any, df: IR): self.name = name self.options = options self.children = (df,) - if self.name not in MapFunction._NAMES: - raise NotImplementedError(f"Unhandled map function {self.name}") + if ( + self.name not in MapFunction._NAMES + ): # pragma: no cover; need more polars rust functions + raise NotImplementedError( + f"Unhandled map function {self.name}" + ) # pragma: no cover if self.name == "explode": (to_explode,) = self.options if len(to_explode) > 1: @@ -1716,6 +1727,9 @@ def __init__(self, schema: Schema, name: str, options: Any, df: IR): variable_name, value_name, ) + elif self.name == "row_index": + col_name, offset = options + self.options = (col_name, offset) self._non_child_args = (schema, name, self.options) @classmethod @@ -1781,6 +1795,23 @@ def do_evaluate( Column(value_column, name=value_name), ] ) + elif name == "row_index": + col_name, offset = options + dtype = schema[col_name] + step = plc.interop.from_arrow( + pa.scalar(1, type=plc.interop.to_arrow(dtype)) + ) + init = plc.interop.from_arrow( + pa.scalar(offset, type=plc.interop.to_arrow(dtype)) + ) + index_col = Column( + plc.filling.sequence(df.num_rows, init, step), + is_sorted=plc.types.Sorted.YES, + order=plc.types.Order.ASCENDING, + null_order=plc.types.NullOrder.AFTER, + name=col_name, + ) + return DataFrame([index_col, *df.columns]) else: raise AssertionError("Should never be reached") # pragma: no cover diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py index 4ed36e463f3..22f97f2bf52 100644 --- a/python/cudf_polars/cudf_polars/dsl/translate.py +++ b/python/cudf_polars/cudf_polars/dsl/translate.py @@ -84,7 +84,7 @@ def translate_ir(self, *, n: int | None = None) -> ir.IR: # IR is versioned with major.minor, minor is bumped for backwards # compatible changes (e.g. adding new nodes), major is bumped for # incompatible changes (e.g. renaming nodes). - if (version := self.visitor.version()) >= (5, 1): + if (version := self.visitor.version()) >= (6, 1): e = NotImplementedError( f"No support for polars IR {version=}" ) # pragma: no cover; no such version for now. @@ -299,7 +299,7 @@ def _( # Join key dtypes are dependent on the schema of the left and # right inputs, so these must be translated with the relevant # input active. - def adjust_literal_dtype(literal: expr.Literal) -> expr.Literal: + def adjust_literal_dtype(literal: expr.Literal) -> expr.Literal: # pragma: no cover if literal.dtype.id() == plc.types.TypeId.INT32: plc_int64 = plc.types.DataType(plc.types.TypeId.INT64) return expr.Literal( @@ -308,7 +308,7 @@ def adjust_literal_dtype(literal: expr.Literal) -> expr.Literal: ) return literal - def maybe_adjust_binop(e) -> expr.Expr: + def maybe_adjust_binop(e) -> expr.Expr: # pragma: no cover if isinstance(e.value, expr.BinOp): left, right = e.value.children if isinstance(left, expr.Col) and isinstance(right, expr.Literal): @@ -323,10 +323,10 @@ def translate_expr_and_maybe_fix_binop_args(translator, exprs): ] with set_node(translator.visitor, node.input_left): + # TODO: There's bug in the polars type coercion phase. + # Use translate_named_expr directly once our minimum + # supported polars version is 1.22 inp_left = translator.translate_ir(n=None) - # TODO: There's bug in the polars type coercion phase. Use - # translate_named_expr directly once it is resolved. - # Tracking issue: https://github.com/pola-rs/polars/issues/20935 left_on = translate_expr_and_maybe_fix_binop_args(translator, node.left_on) with set_node(translator.visitor, node.input_right): inp_right = translator.translate_ir(n=None) @@ -463,6 +463,21 @@ def _( return ir.Projection(schema, translator.translate_ir(n=node.input)) +@_translate_ir.register +def _( + node: pl_ir.MergeSorted, translator: Translator, schema: dict[str, plc.DataType] +) -> ir.IR: + inp_left = translator.translate_ir(n=node.input_left) + inp_right = translator.translate_ir(n=node.input_right) + key = node.key + return ir.MergeSorted( + schema, + inp_left, + inp_right, + key, + ) + + @_translate_ir.register def _( node: pl_ir.MapFunction, translator: Translator, schema: dict[str, plc.DataType] @@ -472,7 +487,6 @@ def _( schema, name, options, - # TODO: merge_sorted breaks this pattern translator.translate_ir(n=node.input), ) diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py index 48629af920d..cf1bfbe8a69 100644 --- a/python/cudf_polars/cudf_polars/testing/plugin.py +++ b/python/cudf_polars/cudf_polars/testing/plugin.py @@ -193,6 +193,9 @@ def pytest_configure(config: pytest.Config) -> None: "tests/unit/test_cse.py::test_cse_predicate_self_join": "Debug output on stderr doesn't match", "tests/unit/test_empty.py::test_empty_9137": "Mismatching dtypes, needs cudf#15852", "tests/unit/test_errors.py::test_error_on_empty_group_by": "Incorrect exception raised", + "tests/unit/io/test_multiscan.py::test_include_file_paths[scan_parquet-write_parquet]": "Need to expose include_file_paths xref: cudf#18012", + "tests/unit/io/test_multiscan.py::test_include_file_paths[scan_csv-write_csv]": "Need to expose include_file_paths xref: cudf#18012", + "tests/unit/streaming/test_streaming_io.py::test_parquet_eq_statistics[False]": "Debug output on stderr doesn't match", # Maybe flaky, order-dependent? "tests/unit/test_projections.py::test_schema_full_outer_join_projection_pd_13287": "Order-specific result check, query is correct but in different order", "tests/unit/test_queries.py::test_group_by_agg_equals_zero_3535": "libcudf sums all nulls to null, not zero", diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index 805d7925bb4..872c08a66f9 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -19,7 +19,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "polars>=1.20,<1.22", + "polars>=1.20,<1.23", "pylibcudf==25.4.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ diff --git a/python/cudf_polars/tests/test_mapfunction.py b/python/cudf_polars/tests/test_mapfunction.py index 63aa1c573a9..7a9f4a56545 100644 --- a/python/cudf_polars/tests/test_mapfunction.py +++ b/python/cudf_polars/tests/test_mapfunction.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations @@ -93,3 +93,14 @@ def test_unpivot_defaults(): ) q = df.unpivot(index="d") assert_gpu_result_equal(q) + + +def test_with_row_index_defaults(): + lf = pl.LazyFrame( + { + "a": [1, 3, 5], + "b": [2, 4, 6], + } + ) + q = lf.with_row_index() + assert_gpu_result_equal(q) From abffae8fa2bd43d3285d0ec1f684cbad9582dc9d Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 19 Feb 2025 21:09:36 -0600 Subject: [PATCH 6/8] Prevent setting custom attributes to `ColumnMethods` (#18005) Fixes: #17750 This PR disallows setting custom attributes to `ColumnMethods` Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/18005 --- python/cudf/cudf/core/column/methods.py | 8 +++++++- python/cudf/cudf/tests/test_list.py | 7 +++++++ python/cudf/cudf/tests/test_string.py | 12 ++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py index a91c080fe21..b42e4419d72 100644 --- a/python/cudf/cudf/core/column/methods.py +++ b/python/cudf/cudf/core/column/methods.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. from __future__ import annotations @@ -93,3 +93,9 @@ def _return_or_inplace( return cudf.Index._from_column(new_col, name=self._parent.name) else: return self._parent._mimic_inplace(new_col, inplace=False) + + def __setattr__(self, key, value): + if key in {"_parent", "_column"}: + super().__setattr__(key, value) + else: + raise AttributeError(f"You cannot add any new attribute '{key}'") diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 3ffbd5ff2a8..3de733f1de2 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -956,6 +956,13 @@ def test_empty_nested_list_uninitialized_offsets_memory_usage(): assert ser.memory_usage() == 8 +def test_list_methods_setattr(): + ser = cudf.Series([["a", "b", "c"], ["d", "e", "f"]]) + + with pytest.raises(AttributeError): + ser.list.a = "b" + + def test_dataframe_list_round_trip(): data = [{"text": "hello", "list_col": np.asarray([1, 2], dtype="uint32")}] cudf_arrow = cudf.DataFrame(data).to_arrow() diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 809fedfde7b..164fcb06624 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -3575,3 +3575,15 @@ def test_replace_invalid_scalar_repl(): ser = cudf.Series(["1"]) with pytest.raises(TypeError): ser.str.replace("1", 2) + + +def test_string_methods_setattr(): + ser = cudf.Series(["ab", "cd", "ef"]) + pser = ser.to_pandas() + + assert_exceptions_equal( + lfunc=ser.str.__setattr__, + rfunc=pser.str.__setattr__, + lfunc_args_and_kwargs=(("a", "b"),), + rfunc_args_and_kwargs=(("a", "b"),), + ) From 3c06da355e22162d167912a093b39c465cf4057a Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 19 Feb 2025 22:46:45 -0500 Subject: [PATCH 7/8] Expose `num_rows_per_source` (IO metadata) to pylibcudf (#18049) Closes #18048 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/18049 --- python/pylibcudf/pylibcudf/io/types.pyi | 2 ++ python/pylibcudf/pylibcudf/io/types.pyx | 10 ++++++- .../pylibcudf/tests/io/test_types.py | 26 ++++++++++++++++++- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/python/pylibcudf/pylibcudf/io/types.pyi b/python/pylibcudf/pylibcudf/io/types.pyi index 63fa9d1ff79..1463f4d0073 100644 --- a/python/pylibcudf/pylibcudf/io/types.pyi +++ b/python/pylibcudf/pylibcudf/io/types.pyi @@ -101,6 +101,8 @@ class TableWithMetadata: def child_names(self) -> ChildNameSpec: ... @property def per_file_user_data(self) -> list[Mapping[str, str]]: ... + @property + def num_rows_per_source(self) -> list[int]: ... class SourceInfo: def __init__( diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx index 458595ca0e0..83330cf14ff 100644 --- a/python/pylibcudf/pylibcudf/io/types.pyx +++ b/python/pylibcudf/pylibcudf/io/types.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. from cpython.buffer cimport PyBUF_READ from cpython.memoryview cimport PyMemoryView_FromMemory @@ -401,6 +401,14 @@ cdef class TableWithMetadata: """ return self.metadata.per_file_user_data + @property + def num_rows_per_source(self): + """ + Returns a list containing the number + of rows for each file being read in. + """ + return self.metadata.num_rows_per_source + cdef class SourceInfo: """A class containing details on a source to read from. diff --git a/python/pylibcudf/pylibcudf/tests/io/test_types.py b/python/pylibcudf/pylibcudf/tests/io/test_types.py index a7642556bf2..b14e7770e7b 100644 --- a/python/pylibcudf/pylibcudf/tests/io/test_types.py +++ b/python/pylibcudf/pylibcudf/tests/io/test_types.py @@ -1,13 +1,28 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. import gc import weakref import pyarrow as pa +import pytest import pylibcudf as plc +@pytest.fixture +def parquet_data(tmp_path): + tbl1 = pa.Table.from_pydict({"a": [3, 1, 4], "b": [1, 5, 9]}) + tbl2 = pa.Table.from_pydict({"a": [1, 6], "b": [1, 8]}) + + path1 = tmp_path / "tbl1.parquet" + path2 = tmp_path / "tbl2.parquet" + + pa.parquet.write_table(tbl1, path1) + pa.parquet.write_table(tbl2, path2) + + return [path1, path2] + + def test_gc_with_table_and_column_input_metadata(): class Foo(plc.io.types.TableInputMetadata): def __del__(self): @@ -26,3 +41,12 @@ def __del__(self): gc.collect() assert weak_tbl_meta() is None + + +def test_num_rows_per_resource(parquet_data): + source = plc.io.SourceInfo(parquet_data) + options = plc.io.parquet.ParquetReaderOptions.builder(source).build() + assert plc.io.parquet.read_parquet(options).num_rows_per_source == [3, 2] + + +# TODO: Test more IO types From eb5c309d24a9267656bb33d93ff90e4a2b12af89 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 19 Feb 2025 22:03:02 -0800 Subject: [PATCH 8/8] Pass more dtype objects to `astype` calls (#18044) Broken off from https://github.com/rapidsai/cudf/pull/17978 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Matthew Murray (https://github.com/Matt711) URL: https://github.com/rapidsai/cudf/pull/18044 --- python/cudf/cudf/core/column/categorical.py | 14 ++++-------- python/cudf/cudf/core/column/column.py | 2 +- python/cudf/cudf/core/dtypes.py | 2 +- python/cudf/cudf/core/index.py | 2 +- python/cudf/cudf/core/indexed_frame.py | 2 +- python/cudf/cudf/core/join/_join_helpers.py | 5 +++-- python/cudf/cudf/core/tools/datetimes.py | 4 ++-- python/cudf/cudf/tests/test_dataframe.py | 24 +++++++++++++++------ 8 files changed, 30 insertions(+), 25 deletions(-) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index a789d5d5ab1..a57ff9a7817 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -811,21 +811,15 @@ def to_pandas( def to_arrow(self) -> pa.Array: """Convert to PyArrow Array.""" - # arrow doesn't support unsigned codes + # pyarrow.Table doesn't support unsigned codes signed_type = ( min_signed_type(self.codes.max()) if self.codes.size > 0 - else np.int8 + else np.dtype(np.int8) ) - codes = self.codes.astype(signed_type) - categories = self.categories - - out_indices = codes.to_arrow() - out_dictionary = categories.to_arrow() - return pa.DictionaryArray.from_arrays( - out_indices, - out_dictionary, + self.codes.astype(signed_type).to_arrow(), + self.categories.to_arrow(), ordered=self.ordered, ) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index d281076690a..06dc4058115 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1629,7 +1629,7 @@ def astype(self, dtype: Dtype, copy: bool = False) -> ColumnBase: elif isinstance(dtype, IntervalDtype): result = self.as_interval_column(dtype) elif isinstance(dtype, (ListDtype, StructDtype)): - if not self.dtype == dtype: + if self.dtype != dtype: raise NotImplementedError( f"Casting {self.dtype} columns not currently supported" ) diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 983950580d0..12a9cce9f1c 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -262,7 +262,7 @@ def _init_categories( getattr(categories, "dtype", None), (cudf.IntervalDtype, pd.IntervalDtype), ): - dtype = "object" # type: Any + dtype = CUDF_STRING_DTYPE else: dtype = None diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 8ce8dfd2198..8587bff2e32 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -3135,7 +3135,7 @@ def __init__( data = column.as_column(data) else: data = column.as_column( - data, dtype="category" if dtype is None else dtype + data, dtype=cudf.CategoricalDtype() if dtype is None else dtype ) # dtype has already been taken care dtype = None diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index ac4303394f7..9c48b31a309 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -6517,7 +6517,7 @@ def convert_dtypes( for col in self._columns: if col.dtype.kind == "f": col = col.fillna(0) - as_int = col.astype("int64") + as_int = col.astype(np.dtype(np.int64)) if cp.allclose(col, as_int): cols.append(as_int) continue diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py index 854c44ff1a1..c329bf11d97 100644 --- a/python/cudf/cudf/core/join/_join_helpers.py +++ b/python/cudf/cudf/core/join/_join_helpers.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# Copyright (c) 2021-2025, NVIDIA CORPORATION. from __future__ import annotations @@ -114,7 +114,8 @@ def _match_join_keys( if how == "left" and rcol.fillna(0).can_cast_safely(ltype): return lcol, rcol.astype(ltype) - + elif common_type is None: + common_type = np.dtype(np.float64) return lcol.astype(common_type), rcol.astype(common_type) diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 546abfc4d3d..4478be2fd04 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -369,9 +369,9 @@ def _process_col( elif col.dtype.kind == "O": if unit not in (None, "ns") or col.null_count == len(col): try: - col = col.astype(dtype="int64") + col = col.astype(np.dtype(np.int64)) except ValueError: - col = col.astype(dtype="float64") + col = col.astype(np.dtype(np.float64)) return _process_col( col=col, unit=unit, diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 05bc221bf9d..15c11db5a84 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -4343,21 +4343,27 @@ def test_as_column_types(): assert_eq(pds, gds) - col = column.as_column(cudf.Series([], dtype="float64"), dtype="float32") + col = column.as_column( + cudf.Series([], dtype="float64"), dtype=np.dtype(np.float32) + ) assert_eq(col.dtype, np.dtype("float32")) gds = cudf.Series._from_column(col) pds = pd.Series(pd.Series([], dtype="float32")) assert_eq(pds, gds) - col = column.as_column(cudf.Series([], dtype="float64"), dtype="str") + col = column.as_column( + cudf.Series([], dtype="float64"), dtype=cudf.dtype("str") + ) assert_eq(col.dtype, np.dtype("object")) gds = cudf.Series._from_column(col) pds = pd.Series(pd.Series([], dtype="str")) assert_eq(pds, gds) - col = column.as_column(cudf.Series([], dtype="float64"), dtype="object") + col = column.as_column( + cudf.Series([], dtype="float64"), dtype=cudf.dtype("str") + ) assert_eq(col.dtype, np.dtype("object")) gds = cudf.Series._from_column(col) pds = pd.Series(pd.Series([], dtype="object")) @@ -4366,7 +4372,7 @@ def test_as_column_types(): pds = pd.Series(np.array([1, 2, 3]), dtype="float32") gds = cudf.Series._from_column( - column.as_column(np.array([1, 2, 3]), dtype="float32") + column.as_column(np.array([1, 2, 3]), dtype=np.dtype(np.float32)) ) assert_eq(pds, gds) @@ -4389,14 +4395,18 @@ def test_as_column_types(): pds = pd.Series([1.2, 18.0, 9.0], dtype="float32") gds = cudf.Series._from_column( - column.as_column(cudf.Series([1.2, 18.0, 9.0]), dtype="float32") + column.as_column( + cudf.Series([1.2, 18.0, 9.0]), dtype=np.dtype(np.float32) + ) ) assert_eq(pds, gds) pds = pd.Series([1.2, 18.0, 9.0], dtype="str") gds = cudf.Series._from_column( - column.as_column(cudf.Series([1.2, 18.0, 9.0]), dtype="str") + column.as_column( + cudf.Series([1.2, 18.0, 9.0]), dtype=cudf.dtype("str") + ) ) assert_eq(pds, gds) @@ -5228,7 +5238,7 @@ def test_empty_df_astype(dtype): ) def test_series_astype_error_handling(errors): sr = cudf.Series(["random", "words"]) - got = sr.astype("datetime64", errors=errors) + got = sr.astype("datetime64[ns]", errors=errors) assert_eq(sr, got)