From 9d6953a6fa3c81bd6103d2fc21231cc47c906f88 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 19 Feb 2025 09:00:46 -0500
Subject: [PATCH 1/8] Remove deprecated single component datetime extract APIs
 (#18010)

Follows up #17221 to remove the deprecated APIs.

Note: This should have been removed in 25.02.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Yunsong Wang (https://github.com/PointKernel)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/18010
---
 cpp/include/cudf/datetime.hpp                 | 191 +-----------------
 cpp/include/cudf/detail/datetime.hpp          |  92 +--------
 cpp/src/datetime/datetime_ops.cu              | 152 +-------------
 python/pylibcudf/pylibcudf/datetime.pxd       |  14 +-
 python/pylibcudf/pylibcudf/datetime.pyi       |   3 -
 python/pylibcudf/pylibcudf/datetime.pyx       |  80 +-------
 .../pylibcudf/pylibcudf/libcudf/datetime.pxd  |  32 +--
 .../pylibcudf/tests/test_datetime.py          |  22 +-
 8 files changed, 7 insertions(+), 579 deletions(-)

diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp
index 1f6e86d0389..f385ede96b9 100644
--- a/cpp/include/cudf/datetime.hpp
+++ b/cpp/include/cudf/datetime.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -54,195 +54,6 @@ enum class datetime_component : uint8_t {
   NANOSECOND
 };
 
-/**
- * @brief  Extracts year from any datetime type and returns an int16_t
- * cudf::column.
- *
- * @deprecated Deprecated in 24.12, to be removed in 25.02
- *
- * @param column cudf::column_view of the input datetime values
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate device memory of the returned column
- *
- * @returns cudf::column of the extracted int16_t years
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- */
-[[deprecated]] std::unique_ptr<cudf::column> extract_year(
-  cudf::column_view const& column,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
-
-/**
- * @brief  Extracts month from any datetime type and returns an int16_t
- * cudf::column.
- *
- * @deprecated Deprecated in 24.12, to be removed in 25.02
- *
- * @param column cudf::column_view of the input datetime values
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate device memory of the returned column
- *
- * @returns cudf::column of the extracted int16_t months
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- */
-[[deprecated]] std::unique_ptr<cudf::column> extract_month(
-  cudf::column_view const& column,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
-
-/**
- * @brief  Extracts day from any datetime type and returns an int16_t
- * cudf::column.
- *
- * @deprecated Deprecated in 24.12, to be removed in 25.02
- *
- * @param column cudf::column_view of the input datetime values
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate device memory of the returned column
- *
- * @returns cudf::column of the extracted int16_t days
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- */
-[[deprecated]] std::unique_ptr<cudf::column> extract_day(
-  cudf::column_view const& column,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
-
-/**
- * @brief  Extracts a weekday from any datetime type and returns an int16_t
- * cudf::column.
- *
- * @deprecated Deprecated in 24.12, to be removed in 25.02
- *
- * @param column cudf::column_view of the input datetime values
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate device memory of the returned column
- *
- * @returns cudf::column of the extracted int16_t days
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- */
-[[deprecated]] std::unique_ptr<cudf::column> extract_weekday(
-  cudf::column_view const& column,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
-
-/**
- * @brief  Extracts hour from any datetime type and returns an int16_t
- * cudf::column.
- *
- * @deprecated Deprecated in 24.12, to be removed in 25.02
- *
- * @param column cudf::column_view of the input datetime values
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate device memory of the returned column
- *
- * @returns cudf::column of the extracted int16_t hours
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- */
-[[deprecated]] std::unique_ptr<cudf::column> extract_hour(
-  cudf::column_view const& column,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
-
-/**
- * @brief  Extracts minute from any datetime type and returns an int16_t
- * cudf::column.
- *
- * @deprecated Deprecated in 24.12, to be removed in 25.02
- *
- * @param column cudf::column_view of the input datetime values
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate device memory of the returned column
- *
- * @returns cudf::column of the extracted int16_t minutes
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- */
-[[deprecated]] std::unique_ptr<cudf::column> extract_minute(
-  cudf::column_view const& column,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
-
-/**
- * @brief  Extracts second from any datetime type and returns an int16_t
- * cudf::column.
- *
- * @deprecated Deprecated in 24.12, to be removed in 25.02
- *
- * @param column cudf::column_view of the input datetime values
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate device memory of the returned column
- *
- * @returns cudf::column of the extracted int16_t seconds
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- */
-[[deprecated]] std::unique_ptr<cudf::column> extract_second(
-  cudf::column_view const& column,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
-
-/**
- * @brief  Extracts millisecond fraction from any datetime type and returns an int16_t
- * cudf::column.
- *
- * A millisecond fraction is only the 3 digits that make up the millisecond portion of a duration.
- * For example, the millisecond fraction of 1.234567890 seconds is 234.
- *
- * @deprecated Deprecated in 24.12, to be removed in 25.02
- *
- * @param column cudf::column_view of the input datetime values
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate device memory of the returned column
- *
- * @returns cudf::column of the extracted int16_t milliseconds
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- */
-[[deprecated]] std::unique_ptr<cudf::column> extract_millisecond_fraction(
-  cudf::column_view const& column,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
-
-/**
- * @brief  Extracts microsecond fraction from any datetime type and returns an int16_t
- * cudf::column.
- *
- * A microsecond fraction is only the 3 digits that make up the microsecond portion of a duration.
- * For example, the microsecond fraction of 1.234567890 seconds is 567.
- *
- * @deprecated Deprecated in 24.12, to be removed in 25.02
- *
- * @param column cudf::column_view of the input datetime values
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate device memory of the returned column
- *
- * @returns cudf::column of the extracted int16_t microseconds
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- */
-[[deprecated]] std::unique_ptr<cudf::column> extract_microsecond_fraction(
-  cudf::column_view const& column,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
-
-/**
- * @brief  Extracts nanosecond fraction from any datetime type and returns an int16_t
- * cudf::column.
- *
- * A nanosecond fraction is only the 3 digits that make up the nanosecond portion of a duration.
- * For example, the nanosecond fraction of 1.234567890 seconds is 890.
- *
- * @deprecated Deprecated in 24.12, to be removed in 25.02
- *
- * @param column cudf::column_view of the input datetime values
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate device memory of the returned column
- *
- * @returns cudf::column of the extracted int16_t nanoseconds
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- */
-[[deprecated]] std::unique_ptr<cudf::column> extract_nanosecond_fraction(
-  cudf::column_view const& column,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
-
 /**
  * @brief Extracts the specified datetime component from any datetime type and
  * returns an int16_t cudf::column.
diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp
index df3050d6494..2b01231deab 100644
--- a/cpp/include/cudf/detail/datetime.hpp
+++ b/cpp/include/cudf/detail/datetime.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,96 +25,6 @@
 namespace CUDF_EXPORT cudf {
 namespace datetime {
 namespace detail {
-/**
- * @copydoc cudf::extract_year(cudf::column_view const&, rmm::cuda_stream_view,
- * rmm::device_async_resource_ref)
- *
- */
-std::unique_ptr<cudf::column> extract_year(cudf::column_view const& column,
-                                           rmm::cuda_stream_view stream,
-                                           rmm::device_async_resource_ref mr);
-
-/**
- * @copydoc cudf::extract_month(cudf::column_view const&, rmm::cuda_stream_view,
- * rmm::device_async_resource_ref)
- *
- */
-std::unique_ptr<cudf::column> extract_month(cudf::column_view const& column,
-                                            rmm::cuda_stream_view stream,
-                                            rmm::device_async_resource_ref mr);
-
-/**
- * @copydoc cudf::extract_day(cudf::column_view const&, rmm::cuda_stream_view,
- * rmm::device_async_resource_ref)
- *
- */
-std::unique_ptr<cudf::column> extract_day(cudf::column_view const& column,
-                                          rmm::cuda_stream_view stream,
-                                          rmm::device_async_resource_ref mr);
-
-/**
- * @copydoc cudf::extract_weekday(cudf::column_view const&, rmm::cuda_stream_view,
- * rmm::device_async_resource_ref)
- *
- */
-std::unique_ptr<cudf::column> extract_weekday(cudf::column_view const& column,
-                                              rmm::cuda_stream_view stream,
-                                              rmm::device_async_resource_ref mr);
-
-/**
- * @copydoc cudf::extract_hour(cudf::column_view const&, rmm::cuda_stream_view,
- * rmm::device_async_resource_ref)
- *
- */
-std::unique_ptr<cudf::column> extract_hour(cudf::column_view const& column,
-                                           rmm::cuda_stream_view stream,
-                                           rmm::device_async_resource_ref mr);
-
-/**
- * @copydoc cudf::extract_minute(cudf::column_view const&, rmm::cuda_stream_view,
- * rmm::device_async_resource_ref)
- *
- */
-std::unique_ptr<cudf::column> extract_minute(cudf::column_view const& column,
-                                             rmm::cuda_stream_view stream,
-                                             rmm::device_async_resource_ref mr);
-
-/**
- * @copydoc cudf::extract_second(cudf::column_view const&, rmm::cuda_stream_view,
- * rmm::device_async_resource_ref)
- *
- */
-std::unique_ptr<cudf::column> extract_second(cudf::column_view const& column,
-                                             rmm::cuda_stream_view stream,
-                                             rmm::device_async_resource_ref mr);
-
-/**
- * @copydoc cudf::extract_millisecond_fraction(cudf::column_view const&, rmm::cuda_stream_view,
- * rmm::device_async_resource_ref)
- *
- */
-std::unique_ptr<cudf::column> extract_millisecond_fraction(cudf::column_view const& column,
-                                                           rmm::cuda_stream_view stream,
-                                                           rmm::device_async_resource_ref mr);
-
-/**
- * @copydoc cudf::extract_microsecond_fraction(cudf::column_view const&, rmm::cuda_stream_view,
- * rmm::device_async_resource_ref)
- *
- */
-std::unique_ptr<cudf::column> extract_microsecond_fraction(cudf::column_view const& column,
-                                                           rmm::cuda_stream_view stream,
-                                                           rmm::device_async_resource_ref mr);
-
-/**
- * @copydoc cudf::extract_nanosecond_fraction(cudf::column_view const&, rmm::cuda_stream_view,
- * rmm::device_async_resource_ref)
- *
- */
-std::unique_ptr<cudf::column> extract_nanosecond_fraction(cudf::column_view const& column,
-                                                          rmm::cuda_stream_view stream,
-                                                          rmm::device_async_resource_ref mr);
-
 /**
  * @copydoc cudf::extract_datetime_component(cudf::column_view const&, datetime_component,
  * rmm::cuda_stream_view, rmm::device_async_resource_ref)
diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu
index a497cedb3bc..62f702ac147 100644
--- a/cpp/src/datetime/datetime_ops.cu
+++ b/cpp/src/datetime/datetime_ops.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -436,76 +436,6 @@ std::unique_ptr<column> round_general(rounding_function round_kind,
     column.type(), dispatch_round{}, round_kind, component, column, stream, mr);
 }
 
-std::unique_ptr<column> extract_year(column_view const& column,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::device_async_resource_ref mr)
-{
-  return detail::extract_datetime_component(column, datetime_component::YEAR, stream, mr);
-}
-
-std::unique_ptr<column> extract_month(column_view const& column,
-                                      rmm::cuda_stream_view stream,
-                                      rmm::device_async_resource_ref mr)
-{
-  return detail::extract_datetime_component(column, datetime_component::MONTH, stream, mr);
-}
-
-std::unique_ptr<column> extract_day(column_view const& column,
-                                    rmm::cuda_stream_view stream,
-                                    rmm::device_async_resource_ref mr)
-{
-  return detail::extract_datetime_component(column, datetime_component::DAY, stream, mr);
-}
-
-std::unique_ptr<column> extract_weekday(column_view const& column,
-                                        rmm::cuda_stream_view stream,
-                                        rmm::device_async_resource_ref mr)
-{
-  return detail::extract_datetime_component(column, datetime_component::WEEKDAY, stream, mr);
-}
-
-std::unique_ptr<column> extract_hour(column_view const& column,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::device_async_resource_ref mr)
-{
-  return detail::extract_datetime_component(column, datetime_component::HOUR, stream, mr);
-}
-
-std::unique_ptr<column> extract_minute(column_view const& column,
-                                       rmm::cuda_stream_view stream,
-                                       rmm::device_async_resource_ref mr)
-{
-  return detail::extract_datetime_component(column, datetime_component::MINUTE, stream, mr);
-}
-
-std::unique_ptr<column> extract_second(column_view const& column,
-                                       rmm::cuda_stream_view stream,
-                                       rmm::device_async_resource_ref mr)
-{
-  return detail::extract_datetime_component(column, datetime_component::SECOND, stream, mr);
-}
-
-std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
-                                                     rmm::cuda_stream_view stream,
-                                                     rmm::device_async_resource_ref mr)
-{
-  return detail::extract_datetime_component(column, datetime_component::MILLISECOND, stream, mr);
-}
-
-std::unique_ptr<column> extract_microsecond_fraction(column_view const& column,
-                                                     rmm::cuda_stream_view stream,
-                                                     rmm::device_async_resource_ref mr)
-{
-  return detail::extract_datetime_component(column, datetime_component::MICROSECOND, stream, mr);
-}
-
-std::unique_ptr<column> extract_nanosecond_fraction(column_view const& column,
-                                                    rmm::cuda_stream_view stream,
-                                                    rmm::device_async_resource_ref mr)
-{
-  return detail::extract_datetime_component(column, datetime_component::NANOSECOND, stream, mr);
-}
-
 std::unique_ptr<column> last_day_of_month(column_view const& column,
                                           rmm::cuda_stream_view stream,
                                           rmm::device_async_resource_ref mr)
@@ -598,62 +528,6 @@ std::unique_ptr<column> round_datetimes(column_view const& column,
   return detail::round_general(detail::rounding_function::ROUND, freq, column, stream, mr);
 }
 
-std::unique_ptr<column> extract_year(column_view const& column,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::extract_year(column, stream, mr);
-}
-
-std::unique_ptr<column> extract_month(column_view const& column,
-                                      rmm::cuda_stream_view stream,
-                                      rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::extract_month(column, stream, mr);
-}
-
-std::unique_ptr<column> extract_day(column_view const& column,
-                                    rmm::cuda_stream_view stream,
-                                    rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::extract_day(column, stream, mr);
-}
-
-std::unique_ptr<column> extract_weekday(column_view const& column,
-                                        rmm::cuda_stream_view stream,
-                                        rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::extract_weekday(column, stream, mr);
-}
-
-std::unique_ptr<column> extract_hour(column_view const& column,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::extract_hour(column, stream, mr);
-}
-
-std::unique_ptr<column> extract_minute(column_view const& column,
-                                       rmm::cuda_stream_view stream,
-                                       rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::extract_minute(column, stream, mr);
-}
-
-std::unique_ptr<column> extract_second(column_view const& column,
-                                       rmm::cuda_stream_view stream,
-                                       rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::extract_second(column, stream, mr);
-}
-
 std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
                                                          datetime_component component,
                                                          rmm::cuda_stream_view stream,
@@ -663,30 +537,6 @@ std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const
   return detail::extract_datetime_component(column, component, stream, mr);
 }
 
-std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
-                                                     rmm::cuda_stream_view stream,
-                                                     rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::extract_millisecond_fraction(column, stream, mr);
-}
-
-std::unique_ptr<column> extract_microsecond_fraction(column_view const& column,
-                                                     rmm::cuda_stream_view stream,
-                                                     rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::extract_microsecond_fraction(column, stream, mr);
-}
-
-std::unique_ptr<column> extract_nanosecond_fraction(column_view const& column,
-                                                    rmm::cuda_stream_view stream,
-                                                    rmm::device_async_resource_ref mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::extract_nanosecond_fraction(column, stream, mr);
-}
-
 std::unique_ptr<column> last_day_of_month(column_view const& column,
                                           rmm::cuda_stream_view stream,
                                           rmm::device_async_resource_ref mr)
diff --git a/python/pylibcudf/pylibcudf/datetime.pxd b/python/pylibcudf/pylibcudf/datetime.pxd
index 335ef435f9b..ce295990d26 100644
--- a/python/pylibcudf/pylibcudf/datetime.pxd
+++ b/python/pylibcudf/pylibcudf/datetime.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column cimport Column
 from pylibcudf.libcudf.datetime cimport datetime_component, rounding_frequency
@@ -8,18 +8,6 @@ ctypedef fused ColumnOrScalar:
     Column
     Scalar
 
-cpdef Column extract_millisecond_fraction(
-    Column input
-)
-
-cpdef Column extract_microsecond_fraction(
-    Column input
-)
-
-cpdef Column extract_nanosecond_fraction(
-    Column input
-)
-
 cpdef Column extract_datetime_component(
     Column input,
     datetime_component component
diff --git a/python/pylibcudf/pylibcudf/datetime.pyi b/python/pylibcudf/pylibcudf/datetime.pyi
index 6a3ae7953d9..8eedaeefe61 100644
--- a/python/pylibcudf/pylibcudf/datetime.pyi
+++ b/python/pylibcudf/pylibcudf/datetime.pyi
@@ -26,9 +26,6 @@ class RoundingFrequency(IntEnum):
     MICROSECOND = ...
     NANOSECOND = ...
 
-def extract_millisecond_fraction(input: Column) -> Column: ...
-def extract_microsecond_fraction(input: Column) -> Column: ...
-def extract_nanosecond_fraction(input: Column) -> Column: ...
 def extract_datetime_component(
     input: Column, component: DatetimeComponent
 ) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx
index b100e3e22d0..15aee4c3e9e 100644
--- a/python/pylibcudf/pylibcudf/datetime.pyx
+++ b/python/pylibcudf/pylibcudf/datetime.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from pylibcudf.libcudf.column.column cimport column
@@ -9,9 +9,6 @@ from pylibcudf.libcudf.datetime cimport (
     day_of_year as cpp_day_of_year,
     days_in_month as cpp_days_in_month,
     extract_datetime_component as cpp_extract_datetime_component,
-    extract_microsecond_fraction as cpp_extract_microsecond_fraction,
-    extract_millisecond_fraction as cpp_extract_millisecond_fraction,
-    extract_nanosecond_fraction as cpp_extract_nanosecond_fraction,
     extract_quarter as cpp_extract_quarter,
     floor_datetimes as cpp_floor_datetimes,
     is_leap_year as cpp_is_leap_year,
@@ -37,9 +34,6 @@ __all__ = [
     "day_of_year",
     "days_in_month",
     "extract_datetime_component",
-    "extract_microsecond_fraction",
-    "extract_millisecond_fraction",
-    "extract_nanosecond_fraction",
     "extract_quarter",
     "floor_datetimes",
     "is_leap_year",
@@ -47,78 +41,6 @@ __all__ = [
     "round_datetimes",
 ]
 
-cpdef Column extract_millisecond_fraction(
-    Column input
-):
-    """
-    Extract the millisecond from a datetime column.
-
-    For details, see :cpp:func:`extract_millisecond_fraction`.
-
-    Parameters
-    ----------
-    input : Column
-        The column to extract the millisecond from.
-
-    Returns
-    -------
-    Column
-        Column with the extracted milliseconds.
-    """
-    cdef unique_ptr[column] result
-
-    with nogil:
-        result = cpp_extract_millisecond_fraction(input.view())
-    return Column.from_libcudf(move(result))
-
-cpdef Column extract_microsecond_fraction(
-    Column input
-):
-    """
-    Extract the microsecond fraction from a datetime column.
-
-    For details, see :cpp:func:`extract_microsecond_fraction`.
-
-    Parameters
-    ----------
-    input : Column
-        The column to extract the microsecond fraction from.
-
-    Returns
-    -------
-    Column
-        Column with the extracted microsecond fractions.
-    """
-    cdef unique_ptr[column] result
-
-    with nogil:
-        result = cpp_extract_microsecond_fraction(input.view())
-    return Column.from_libcudf(move(result))
-
-cpdef Column extract_nanosecond_fraction(
-    Column input
-):
-    """
-    Extract the nanosecond fraction from a datetime column.
-
-    For details, see :cpp:func:`extract_nanosecond_fraction`.
-
-    Parameters
-    ----------
-    input : Column
-        The column to extract the nanosecond fraction from.
-
-    Returns
-    -------
-    Column
-        Column with the extracted nanosecond fractions.
-    """
-    cdef unique_ptr[column] result
-
-    with nogil:
-        result = cpp_extract_nanosecond_fraction(input.view())
-    return Column.from_libcudf(move(result))
-
 cpdef Column extract_datetime_component(
     Column input,
     datetime_component component
diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
index 049a1b06c2e..7dacab668b6 100644
--- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 from libc.stdint cimport int32_t, uint8_t
 from libcpp.memory cimport unique_ptr
@@ -21,36 +21,6 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
         MICROSECOND
         NANOSECOND
 
-    cdef unique_ptr[column] extract_year(
-        const column_view& column
-    ) except +libcudf_exception_handler
-    cdef unique_ptr[column] extract_month(
-        const column_view& column
-    ) except +libcudf_exception_handler
-    cdef unique_ptr[column] extract_day(
-        const column_view& column
-    ) except +libcudf_exception_handler
-    cdef unique_ptr[column] extract_weekday(
-        const column_view& column
-    ) except +libcudf_exception_handler
-    cdef unique_ptr[column] extract_hour(
-        const column_view& column
-    ) except +libcudf_exception_handler
-    cdef unique_ptr[column] extract_minute(
-        const column_view& column
-    ) except +libcudf_exception_handler
-    cdef unique_ptr[column] extract_second(
-        const column_view& column
-    ) except +libcudf_exception_handler
-    cdef unique_ptr[column] extract_millisecond_fraction(
-        const column_view& column
-    ) except +libcudf_exception_handler
-    cdef unique_ptr[column] extract_microsecond_fraction(
-        const column_view& column
-    ) except +libcudf_exception_handler
-    cdef unique_ptr[column] extract_nanosecond_fraction(
-        const column_view& column
-    ) except +libcudf_exception_handler
     cdef unique_ptr[column] extract_datetime_component(
         const column_view& column,
         datetime_component component
diff --git a/python/pylibcudf/pylibcudf/tests/test_datetime.py b/python/pylibcudf/pylibcudf/tests/test_datetime.py
index f5f24ef28e2..6251a4bbb86 100644
--- a/python/pylibcudf/pylibcudf/tests/test_datetime.py
+++ b/python/pylibcudf/pylibcudf/tests/test_datetime.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 import calendar
 import datetime
@@ -77,26 +77,6 @@ def test_extract_datetime_component(datetime_column, component):
     assert_column_eq(expect, got)
 
 
-@pytest.mark.parametrize(
-    "datetime_func",
-    [
-        "extract_millisecond_fraction",
-        "extract_microsecond_fraction",
-        "extract_nanosecond_fraction",
-    ],
-)
-def test_datetime_extracting_functions(datetime_column, datetime_func):
-    pa_col = plc.interop.to_arrow(datetime_column)
-    got = getattr(plc.datetime, datetime_func)(datetime_column)
-    kwargs = {}
-    attr = datetime_func.split("_")[1]
-    if attr == "weekday":
-        kwargs = {"count_from_zero": False}
-        attr = "day_of_week"
-    expect = getattr(pc, attr)(pa_col, **kwargs).cast(pa.int16())
-    assert_column_eq(expect, got)
-
-
 @pytest.mark.parametrize(
     "op",
     [

From d660873068bd9a54d9a78f6eabd3eaf53e0296b1 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 19 Feb 2025 11:16:34 -0500
Subject: [PATCH 2/8] Refactor math_ops.cu dispatcher logic (#18006)

Refactors the type-dispatcher logic and cleans up the code in `math_ops.cu` for unary operations.
The 3 of the 4 dispatch functors had the same logic except for the supported types SFINAE clause.
Also correcting the code for handling RINT properly created a 4th common functor.
These have been refactored into a single functor and separated from the supported-types checks.
The single functor now excepts the transform function as well as the supported-types expression.
Also, the 2nd dispatcher call for dictionary was replaced with an if-statement to help simplify the code and minimize maintenance syncing up the supported-types clauses correctly.

One side effect is that more ops are now supported appropriately with dictionary types.

Referenced cleanup needed here: https://github.com/rapidsai/cudf/pull/17560#discussion_r1934160760

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Shruti Shivakumar (https://github.com/shrshi)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/18006
---
 cpp/src/unary/math_ops.cu | 323 +++++++++++++-------------------------
 1 file changed, 112 insertions(+), 211 deletions(-)

diff --git a/cpp/src/unary/math_ops.cu b/cpp/src/unary/math_ops.cu
index 4e96f900bf3..aead6710082 100644
--- a/cpp/src/unary/math_ops.cu
+++ b/cpp/src/unary/math_ops.cu
@@ -27,9 +27,9 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
+#include <cuda/std/cmath>
 #include <thrust/transform.h>
 
-#include <cmath>
 #include <type_traits>
 
 namespace cudf {
@@ -42,7 +42,7 @@ struct DeviceSin {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::sin(data);
+    return cuda::std::sin(data);
   }
 };
 
@@ -50,7 +50,7 @@ struct DeviceCos {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::cos(data);
+    return cuda::std::cos(data);
   }
 };
 
@@ -58,7 +58,7 @@ struct DeviceTan {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::tan(data);
+    return cuda::std::tan(data);
   }
 };
 
@@ -66,7 +66,7 @@ struct DeviceArcSin {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::asin(data);
+    return cuda::std::asin(data);
   }
 };
 
@@ -74,7 +74,7 @@ struct DeviceArcCos {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::acos(data);
+    return cuda::std::acos(data);
   }
 };
 
@@ -82,7 +82,7 @@ struct DeviceArcTan {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::atan(data);
+    return cuda::std::atan(data);
   }
 };
 
@@ -90,7 +90,7 @@ struct DeviceSinH {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::sinh(data);
+    return cuda::std::sinh(data);
   }
 };
 
@@ -98,7 +98,7 @@ struct DeviceCosH {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::cosh(data);
+    return cuda::std::cosh(data);
   }
 };
 
@@ -106,7 +106,7 @@ struct DeviceTanH {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::tanh(data);
+    return cuda::std::tanh(data);
   }
 };
 
@@ -114,7 +114,7 @@ struct DeviceArcSinH {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::asinh(data);
+    return cuda::std::asinh(data);
   }
 };
 
@@ -122,7 +122,7 @@ struct DeviceArcCosH {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::acosh(data);
+    return cuda::std::acosh(data);
   }
 };
 
@@ -130,7 +130,7 @@ struct DeviceArcTanH {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::atanh(data);
+    return cuda::std::atanh(data);
   }
 };
 
@@ -140,7 +140,7 @@ struct DeviceExp {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::exp(data);
+    return cuda::std::exp(data);
   }
 };
 
@@ -148,7 +148,7 @@ struct DeviceLog {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::log(data);
+    return cuda::std::log(data);
   }
 };
 
@@ -156,7 +156,7 @@ struct DeviceSqrt {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::sqrt(data);
+    return cuda::std::sqrt(data);
   }
 };
 
@@ -164,7 +164,7 @@ struct DeviceCbrt {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::cbrt(data);
+    return cuda::std::cbrt(data);
   }
 };
 
@@ -174,7 +174,7 @@ struct DeviceCeil {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::ceil(data);
+    return cuda::std::ceil(data);
   }
 };
 
@@ -182,7 +182,7 @@ struct DeviceFloor {
   template <typename T>
   __device__ T operator()(T data)
   {
-    return std::floor(data);
+    return cuda::std::floor(data);
   }
 };
 
@@ -190,7 +190,7 @@ struct DeviceAbs {
   template <typename T>
   std::enable_if_t<std::is_signed_v<T>, T> __device__ operator()(T data)
   {
-    return std::abs(data);
+    return cuda::std::abs(data);
   }
   template <typename T>
   std::enable_if_t<!std::is_signed_v<T>, T> __device__ operator()(T data)
@@ -199,18 +199,13 @@ struct DeviceAbs {
   }
 };
 
-struct DeviceRInt {
-  template <typename T>
-  std::enable_if_t<std::is_floating_point_v<T>, T> __device__ operator()(T data)
-  {
-    return std::rint(data);
-  }
+// round float to int
 
-  // Dummy to handle other types, will never be executed
+struct DeviceRInt {
   template <typename T>
-  std::enable_if_t<!std::is_floating_point_v<T>, T> __device__ operator()(T data)
+  __device__ T operator()(T data)
   {
-    return data;
+    return cuda::std::rint(data);
   }
 };
 
@@ -238,7 +233,7 @@ struct DeviceNot {
 
 struct DeviceNegate {
   template <typename T>
-  T __device__ operator()(T data)
+  __device__ T operator()(T data)
   {
     return -data;
   }
@@ -350,7 +345,6 @@ std::unique_ptr<cudf::column> transform_fn(InputIterator begin,
                             null_count,
                             stream,
                             mr);
-  if (size == 0) return output;
 
   auto output_view = output->mutable_view();
   thrust::transform(rmm::exec_policy(stream), begin, end, output_view.begin<OutputType>(), UFN{});
@@ -358,6 +352,19 @@ std::unique_ptr<cudf::column> transform_fn(InputIterator begin,
   return output;
 }
 
+template <typename T, typename UFN>
+std::unique_ptr<cudf::column> transform_fn(cudf::column_view const& input,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::device_async_resource_ref mr)
+{
+  return transform_fn<T, UFN>(input.begin<T>(),
+                              input.end<T>(),
+                              detail::copy_bitmask(input, stream, mr),
+                              input.null_count(),
+                              stream,
+                              mr);
+}
+
 template <typename T, typename UFN>
 std::unique_ptr<cudf::column> transform_fn(cudf::dictionary_column_view const& input,
                                            rmm::cuda_stream_view stream,
@@ -377,136 +384,52 @@ std::unique_ptr<cudf::column> transform_fn(cudf::dictionary_column_view const& i
     output->view(), dictionary::detail::get_indices_type_for_size(output->size()), stream, mr);
 }
 
-template <typename UFN>
-struct MathOpDispatcher {
-  template <typename T, std::enable_if_t<std::is_arithmetic_v<T>>* = nullptr>
-  std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
-                                           rmm::cuda_stream_view stream,
-                                           rmm::device_async_resource_ref mr)
-  {
-    return transform_fn<T, UFN>(input.begin<T>(),
-                                input.end<T>(),
-                                cudf::detail::copy_bitmask(input, stream, mr),
-                                input.null_count(),
-                                stream,
-                                mr);
-  }
-
-  struct dictionary_dispatch {
-    template <typename T, std::enable_if_t<std::is_arithmetic_v<T>>* = nullptr>
-    std::unique_ptr<cudf::column> operator()(cudf::dictionary_column_view const& input,
-                                             rmm::cuda_stream_view stream,
-                                             rmm::device_async_resource_ref mr)
-    {
-      return transform_fn<T, UFN>(input, stream, mr);
-    }
-
-    template <typename T, typename... Args>
-    std::enable_if_t<!std::is_arithmetic_v<T>, std::unique_ptr<cudf::column>> operator()(Args&&...)
-    {
-      CUDF_FAIL("dictionary keys must be numeric for this operation");
-    }
-  };
-
-  template <
-    typename T,
-    std::enable_if_t<!std::is_arithmetic_v<T> and std::is_same_v<T, dictionary32>>* = nullptr>
-  std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
-                                           rmm::cuda_stream_view stream,
-                                           rmm::device_async_resource_ref mr)
-  {
-    if (input.is_empty()) return empty_like(input);
-    auto dictionary_col = dictionary_column_view(input);
-    return type_dispatcher(
-      dictionary_col.keys().type(), dictionary_dispatch{}, dictionary_col, stream, mr);
-  }
-
-  template <typename T, typename... Args>
-  std::enable_if_t<!std::is_arithmetic_v<T> and !std::is_same_v<T, dictionary32>,
-                   std::unique_ptr<cudf::column>>
-  operator()(Args&&...)
-  {
-    CUDF_FAIL("Unsupported data type for operation");
-  }
+template <typename T>
+struct ArithmeticOps {
+  static constexpr bool is_supported() { return std::is_arithmetic_v<T>; }
 };
 
-template <typename UFN>
-struct NegateOpDispatcher {
-  template <typename T>
-  static constexpr bool is_supported()
-  {
-    return std::is_signed_v<T> || cudf::is_duration<T>();
-  }
-
-  template <typename T, std::enable_if_t<is_supported<T>()>* = nullptr>
-  std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
-                                           rmm::cuda_stream_view stream,
-                                           rmm::device_async_resource_ref mr)
-  {
-    return transform_fn<T, UFN>(input.begin<T>(),
-                                input.end<T>(),
-                                cudf::detail::copy_bitmask(input, stream, mr),
-                                input.null_count(),
-                                stream,
-                                mr);
-  }
-
-  template <typename T, typename... Args>
-  std::enable_if_t<!is_supported<T>(), std::unique_ptr<cudf::column>> operator()(Args&&...)
-  {
-    CUDF_FAIL("Unsupported data type for negate operation");
-  }
+template <typename T>
+struct NegateOps {
+  static constexpr bool is_supported() { return std::is_signed_v<T> || cudf::is_duration<T>(); }
 };
 
-template <typename UFN>
-struct BitwiseOpDispatcher {
-  template <typename T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
-  std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
-                                           rmm::cuda_stream_view stream,
-                                           rmm::device_async_resource_ref mr)
-  {
-    return transform_fn<T, UFN>(input.begin<T>(),
-                                input.end<T>(),
-                                cudf::detail::copy_bitmask(input, stream, mr),
-                                input.null_count(),
-                                stream,
-                                mr);
-  }
-
-  struct dictionary_dispatch {
-    template <typename T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
-    std::unique_ptr<cudf::column> operator()(cudf::dictionary_column_view const& input,
-                                             rmm::cuda_stream_view stream,
-                                             rmm::device_async_resource_ref mr)
-    {
-      return transform_fn<T, UFN>(input, stream, mr);
-    }
+template <typename T>
+struct BitWiseOps {
+  static constexpr bool is_supported() { return std::is_integral_v<T>; }
+};
 
-    template <typename T, typename... Args>
-    std::enable_if_t<!std::is_integral_v<T>, std::unique_ptr<cudf::column>> operator()(Args&&...)
-    {
-      CUDF_FAIL("dictionary keys type not supported for this operation");
-    }
-  };
+template <typename T>
+struct FloatOnlyOps {
+  static constexpr bool is_supported() { return std::is_floating_point_v<T>; }
+};
 
-  template <typename T,
-            std::enable_if_t<!std::is_integral_v<T> and std::is_same_v<T, dictionary32>>* = nullptr>
+/**
+ * @brief Generic math-ops dispatcher
+ *
+ * Performs a transform on the input data using the operator defined by UFN.
+ * The Supported type determines which types are allowed by the operator.
+ *
+ * @tparam UFN The actual operator to perform on the input data
+ * @tparam Supported Contains the 'is_supported()' function
+ */
+template <typename UFN, template <typename> typename Supported>
+struct MathOpDispatcher {
+  template <typename T, std::enable_if_t<Supported<T>::is_supported()>* = nullptr>
   std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
                                            rmm::cuda_stream_view stream,
                                            rmm::device_async_resource_ref mr)
   {
-    if (input.is_empty()) return empty_like(input);
-    auto dictionary_col = dictionary_column_view(input);
-    return type_dispatcher(
-      dictionary_col.keys().type(), dictionary_dispatch{}, dictionary_col, stream, mr);
+    return (input.type().id() == type_id::DICTIONARY32)
+             ? transform_fn<T, UFN>(cudf::dictionary_column_view(input), stream, mr)
+             : transform_fn<T, UFN>(input, stream, mr);
   }
 
   template <typename T, typename... Args>
-  std::enable_if_t<!std::is_integral_v<T> and !std::is_same_v<T, dictionary32>,
-                   std::unique_ptr<cudf::column>>
-  operator()(Args&&...)
+  std::enable_if_t<!Supported<T>::is_supported(), std::unique_ptr<cudf::column>> operator()(
+    Args&&...)
   {
-    CUDF_FAIL("Unsupported datatype for operation");
+    CUDF_FAIL("Unsupported data type for this operation");
   }
 };
 
@@ -525,54 +448,26 @@ struct LogicalOpDispatcher {
                                            rmm::cuda_stream_view stream,
                                            rmm::device_async_resource_ref mr)
   {
-    return transform_fn<bool, UFN>(input.begin<T>(),
-                                   input.end<T>(),
-                                   cudf::detail::copy_bitmask(input, stream, mr),
-                                   input.null_count(),
-
-                                   stream,
-                                   mr);
-  }
-
-  struct dictionary_dispatch {
-    template <typename T, std::enable_if_t<is_supported<T>()>* = nullptr>
-    std::unique_ptr<cudf::column> operator()(cudf::dictionary_column_view const& input,
-                                             rmm::cuda_stream_view stream,
-                                             rmm::device_async_resource_ref mr)
-    {
-      auto dictionary_view = cudf::column_device_view::create(input.parent(), stream);
+    if (input.type().id() == type_id::DICTIONARY32) {
+      auto dictionary_view = cudf::column_device_view::create(input, stream);
       auto dictionary_itr  = dictionary::detail::make_dictionary_iterator<T>(*dictionary_view);
       return transform_fn<bool, UFN>(dictionary_itr,
                                      dictionary_itr + input.size(),
-                                     cudf::detail::copy_bitmask(input.parent(), stream, mr),
+                                     cudf::detail::copy_bitmask(input, stream, mr),
                                      input.null_count(),
                                      stream,
                                      mr);
     }
-
-    template <typename T, typename... Args>
-    std::enable_if_t<!is_supported<T>(), std::unique_ptr<cudf::column>> operator()(Args&&...)
-    {
-      CUDF_FAIL("dictionary keys type not supported for this operation");
-    }
-  };
-
-  template <typename T,
-            std::enable_if_t<!is_supported<T>() and std::is_same_v<T, dictionary32>>* = nullptr>
-  std::unique_ptr<cudf::column> operator()(cudf::column_view const& input,
-                                           rmm::cuda_stream_view stream,
-                                           rmm::device_async_resource_ref mr)
-  {
-    if (input.is_empty()) return make_empty_column(cudf::data_type{cudf::type_id::BOOL8});
-    auto dictionary_col = dictionary_column_view(input);
-    return type_dispatcher(
-      dictionary_col.keys().type(), dictionary_dispatch{}, dictionary_col, stream, mr);
+    return transform_fn<bool, UFN>(input.begin<T>(),
+                                   input.end<T>(),
+                                   cudf::detail::copy_bitmask(input, stream, mr),
+                                   input.null_count(),
+                                   stream,
+                                   mr);
   }
 
   template <typename T, typename... Args>
-  std::enable_if_t<!is_supported<T>() and !std::is_same_v<T, dictionary32>,
-                   std::unique_ptr<cudf::column>>
-  operator()(Args&&...)
+  std::enable_if_t<!is_supported<T>(), std::unique_ptr<cudf::column>> operator()(Args&&...)
   {
     CUDF_FAIL("Unsupported datatype for operation");
   }
@@ -614,79 +509,85 @@ std::unique_ptr<cudf::column> unary_operation(cudf::column_view const& input,
   if (cudf::is_fixed_point(input.type()))
     return type_dispatcher(input.type(), detail::FixedPointOpDispatcher{}, input, op, stream, mr);
 
+  if (input.is_empty()) {
+    return op == cudf::unary_operator::NOT ? make_empty_column(type_id::BOOL8) : empty_like(input);
+  }
+
+  // dispatch on the keys if dictionary saves a 2nd dispatch later
+  auto dispatch_type = input.type().id() == type_id::DICTIONARY32
+                         ? dictionary_column_view(input).keys().type()
+                         : input.type();
+
   switch (op) {
     case cudf::unary_operator::SIN:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceSin>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceSin, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::COS:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceCos>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceCos, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::TAN:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceTan>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceTan, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::ARCSIN:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceArcSin>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceArcSin, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::ARCCOS:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceArcCos>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceArcCos, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::ARCTAN:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceArcTan>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceArcTan, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::SINH:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceSinH>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceSinH, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::COSH:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceCosH>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceCosH, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::TANH:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceTanH>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceTanH, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::ARCSINH:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceArcSinH>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceArcSinH, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::ARCCOSH:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceArcCosH>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceArcCosH, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::ARCTANH:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceArcTanH>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceArcTanH, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::EXP:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceExp>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceExp, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::LOG:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceLog>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceLog, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::SQRT:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceSqrt>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceSqrt, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::CBRT:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceCbrt>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceCbrt, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::CEIL:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceCeil>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceCeil, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::FLOOR:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceFloor>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceFloor, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::ABS:
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceAbs>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceAbs, ArithmeticOps>{}, input, stream, mr);
     case cudf::unary_operator::RINT:
-      CUDF_EXPECTS(
-        (input.type().id() == type_id::FLOAT32) or (input.type().id() == type_id::FLOAT64),
-        "rint expects floating point values");
       return cudf::type_dispatcher(
-        input.type(), detail::MathOpDispatcher<detail::DeviceRInt>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceRInt, FloatOnlyOps>{}, input, stream, mr);
     case cudf::unary_operator::BIT_INVERT:
       return cudf::type_dispatcher(
-        input.type(), detail::BitwiseOpDispatcher<detail::DeviceInvert>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceInvert, BitWiseOps>{}, input, stream, mr);
     case cudf::unary_operator::NOT:
       return cudf::type_dispatcher(
-        input.type(), detail::LogicalOpDispatcher<detail::DeviceNot>{}, input, stream, mr);
+        dispatch_type, detail::LogicalOpDispatcher<DeviceNot>{}, input, stream, mr);
     case cudf::unary_operator::NEGATE:
       return cudf::type_dispatcher(
-        input.type(), detail::NegateOpDispatcher<detail::DeviceNegate>{}, input, stream, mr);
+        dispatch_type, MathOpDispatcher<DeviceNegate, NegateOps>{}, input, stream, mr);
     default: CUDF_FAIL("Undefined unary operation");
   }
 }

From c99f393b61a41893b02709ecdc166f7f2a1fbcb2 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 19 Feb 2025 13:31:45 -0500
Subject: [PATCH 3/8] Skip the failing connectorx polars tests (#18037)

In #18015, we tried skipping the failing polars tests and applying the
workaround mentioned in polars issue 21274. But pip is [unable to solve
our test
environment](https://github.com/rapidsai/cudf/actions/runs/13406947992/job/37463788766)
in that case. This PR just skips the tests because we only need to do
one or the other, not both.
---
 python/cudf_polars/cudf_polars/testing/plugin.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index 0b52cf1c61c..e56d906833f 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -214,6 +214,10 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/streaming/test_streaming_group_by.py::test_streaming_group_by_literal[1]": "May segfault w/the legacy streaming engine",
     # Fails in CI, but passes locally
     "tests/unit/streaming/test_streaming.py::test_streaming_streamable_functions": "RuntimeError: polars_python::sql::PySQLContext is unsendable, but is being dropped on another thread",
+    # TODO: Remove once when we support polars 1.23
+    "tests/unit/io/database/test_read.py::test_read_database[uri: connectorx]": "ValueError: arrow2",
+    "tests/unit/io/database/test_read.py::test_read_database_cx_credentials[fakedb://123:456@account/database/schema?warehouse=warehouse&role=role]": "ValueError: arrow2",
+    "tests/unit/io/database/test_read.py::test_read_database_cx_credentials[fakedb://my#%us3r:p433w0rd@not_a_real_host:9999/database]": "ValueError: arrow2",
 }
 
 

From e500794479c3b1a23c1a12c8425d9120424871f8 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Wed, 19 Feb 2025 10:47:37 -0800
Subject: [PATCH 4/8] remove pip install

---
 ci/test_cudf_polars_polars_tests.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ci/test_cudf_polars_polars_tests.sh b/ci/test_cudf_polars_polars_tests.sh
index 909abbe9d1e..3466edacfc5 100755
--- a/ci/test_cudf_polars_polars_tests.sh
+++ b/ci/test_cudf_polars_polars_tests.sh
@@ -27,8 +27,6 @@ git clone https://github.com/pola-rs/polars.git --branch "${TAG}" --depth 1
 # Install requirements for running polars tests
 rapids-logger "Install polars test requirements"
 rapids-pip-retry install -r polars/py-polars/requirements-dev.txt -r polars/py-polars/requirements-ci.txt
-# TODO: Workaround until https://github.com/pola-rs/polars/issues/21274 is fixed.
-rapids-pip-retry install connectorx==0.4.1
 
 # shellcheck disable=SC2317
 function set_exitcode()

From 3117dc26b8466ac8e2c64574ab0b26cc621a44ff Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 19 Feb 2025 14:45:15 -0500
Subject: [PATCH 5/8] Bump polars version to <1.23 (#17986)

The PR upgrades the Polars version to 1.22.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - James Lamb (https://github.com/jameslamb)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/17986
---
 .../all_cuda-118_arch-x86_64.yaml             |  2 +-
 .../all_cuda-128_arch-x86_64.yaml             |  2 +-
 conda/recipes/cudf-polars/meta.yaml           |  2 +-
 dependencies.yaml                             |  2 +-
 python/cudf_polars/cudf_polars/dsl/ir.py      | 43 ++++++++++++++++---
 .../cudf_polars/cudf_polars/dsl/translate.py  | 28 +++++++++---
 .../cudf_polars/cudf_polars/testing/plugin.py |  3 ++
 python/cudf_polars/pyproject.toml             |  2 +-
 python/cudf_polars/tests/test_mapfunction.py  | 13 +++++-
 9 files changed, 78 insertions(+), 19 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 09eb9949f1d..4ec6ef1883a 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -66,7 +66,7 @@ dependencies:
 - pandas
 - pandas>=2.0,<2.2.4dev0
 - pandoc
-- polars>=1.20,<1.22
+- polars>=1.20,<1.23
 - pre-commit
 - ptxcompiler
 - pyarrow>=14.0.0,<20.0.0a0
diff --git a/conda/environments/all_cuda-128_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml
index 56cef28ac61..dcf96a02a36 100644
--- a/conda/environments/all_cuda-128_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-128_arch-x86_64.yaml
@@ -64,7 +64,7 @@ dependencies:
 - pandas
 - pandas>=2.0,<2.2.4dev0
 - pandoc
-- polars>=1.20,<1.22
+- polars>=1.20,<1.23
 - pre-commit
 - pyarrow>=14.0.0,<20.0.0a0
 - pydata-sphinx-theme>=0.15.4
diff --git a/conda/recipes/cudf-polars/meta.yaml b/conda/recipes/cudf-polars/meta.yaml
index fb7ab9332d8..1d36ab2a3e4 100644
--- a/conda/recipes/cudf-polars/meta.yaml
+++ b/conda/recipes/cudf-polars/meta.yaml
@@ -43,7 +43,7 @@ requirements:
   run:
     - python
     - pylibcudf ={{ version }}
-    - polars >=1.20,<1.22
+    - polars >=1.20,<1.23
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
 
 test:
diff --git a/dependencies.yaml b/dependencies.yaml
index 7188e10b058..c8893fc8b49 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -803,7 +803,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - polars>=1.20,<1.22
+          - polars>=1.20,<1.23
   run_cudf_polars_experimental:
     common:
       - output_types: [conda, requirements, pyproject]
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 8f12a4a7570..603f51e9d40 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -1650,6 +1650,16 @@ def do_evaluate(cls, schema: Schema, df: DataFrame) -> DataFrame:
         return DataFrame(columns)
 
 
+class MergeSorted(IR):
+    """Merge sorted operation."""
+
+    def __init__(self, schema: Schema, left: IR, right: IR, key: str):
+        # libcudf merge is not stable wrt order of inputs, since
+        # it uses a priority queue to manage the tables it produces.
+        # See: https://github.com/rapidsai/cudf/issues/16010
+        raise NotImplementedError("MergeSorted not yet implemented")
+
+
 class MapFunction(IR):
     """Apply some function to a dataframe."""
 
@@ -1663,13 +1673,10 @@ class MapFunction(IR):
     _NAMES: ClassVar[frozenset[str]] = frozenset(
         [
             "rechunk",
-            # libcudf merge is not stable wrt order of inputs, since
-            # it uses a priority queue to manage the tables it produces.
-            # See: https://github.com/rapidsai/cudf/issues/16010
-            # "merge_sorted",
             "rename",
             "explode",
             "unpivot",
+            "row_index",
         ]
     )
 
@@ -1678,8 +1685,12 @@ def __init__(self, schema: Schema, name: str, options: Any, df: IR):
         self.name = name
         self.options = options
         self.children = (df,)
-        if self.name not in MapFunction._NAMES:
-            raise NotImplementedError(f"Unhandled map function {self.name}")
+        if (
+            self.name not in MapFunction._NAMES
+        ):  # pragma: no cover; need more polars rust functions
+            raise NotImplementedError(
+                f"Unhandled map function {self.name}"
+            )  # pragma: no cover
         if self.name == "explode":
             (to_explode,) = self.options
             if len(to_explode) > 1:
@@ -1716,6 +1727,9 @@ def __init__(self, schema: Schema, name: str, options: Any, df: IR):
                 variable_name,
                 value_name,
             )
+        elif self.name == "row_index":
+            col_name, offset = options
+            self.options = (col_name, offset)
         self._non_child_args = (schema, name, self.options)
 
     @classmethod
@@ -1781,6 +1795,23 @@ def do_evaluate(
                     Column(value_column, name=value_name),
                 ]
             )
+        elif name == "row_index":
+            col_name, offset = options
+            dtype = schema[col_name]
+            step = plc.interop.from_arrow(
+                pa.scalar(1, type=plc.interop.to_arrow(dtype))
+            )
+            init = plc.interop.from_arrow(
+                pa.scalar(offset, type=plc.interop.to_arrow(dtype))
+            )
+            index_col = Column(
+                plc.filling.sequence(df.num_rows, init, step),
+                is_sorted=plc.types.Sorted.YES,
+                order=plc.types.Order.ASCENDING,
+                null_order=plc.types.NullOrder.AFTER,
+                name=col_name,
+            )
+            return DataFrame([index_col, *df.columns])
         else:
             raise AssertionError("Should never be reached")  # pragma: no cover
 
diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py
index 4ed36e463f3..22f97f2bf52 100644
--- a/python/cudf_polars/cudf_polars/dsl/translate.py
+++ b/python/cudf_polars/cudf_polars/dsl/translate.py
@@ -84,7 +84,7 @@ def translate_ir(self, *, n: int | None = None) -> ir.IR:
         # IR is versioned with major.minor, minor is bumped for backwards
         # compatible changes (e.g. adding new nodes), major is bumped for
         # incompatible changes (e.g. renaming nodes).
-        if (version := self.visitor.version()) >= (5, 1):
+        if (version := self.visitor.version()) >= (6, 1):
             e = NotImplementedError(
                 f"No support for polars IR {version=}"
             )  # pragma: no cover; no such version for now.
@@ -299,7 +299,7 @@ def _(
     # Join key dtypes are dependent on the schema of the left and
     # right inputs, so these must be translated with the relevant
     # input active.
-    def adjust_literal_dtype(literal: expr.Literal) -> expr.Literal:
+    def adjust_literal_dtype(literal: expr.Literal) -> expr.Literal:  # pragma: no cover
         if literal.dtype.id() == plc.types.TypeId.INT32:
             plc_int64 = plc.types.DataType(plc.types.TypeId.INT64)
             return expr.Literal(
@@ -308,7 +308,7 @@ def adjust_literal_dtype(literal: expr.Literal) -> expr.Literal:
             )
         return literal
 
-    def maybe_adjust_binop(e) -> expr.Expr:
+    def maybe_adjust_binop(e) -> expr.Expr:  # pragma: no cover
         if isinstance(e.value, expr.BinOp):
             left, right = e.value.children
             if isinstance(left, expr.Col) and isinstance(right, expr.Literal):
@@ -323,10 +323,10 @@ def translate_expr_and_maybe_fix_binop_args(translator, exprs):
         ]
 
     with set_node(translator.visitor, node.input_left):
+        # TODO: There's bug in the polars type coercion phase.
+        # Use translate_named_expr directly once our minimum
+        # supported polars version is 1.22
         inp_left = translator.translate_ir(n=None)
-        # TODO: There's bug in the polars type coercion phase. Use
-        # translate_named_expr directly once it is resolved.
-        # Tracking issue: https://github.com/pola-rs/polars/issues/20935
         left_on = translate_expr_and_maybe_fix_binop_args(translator, node.left_on)
     with set_node(translator.visitor, node.input_right):
         inp_right = translator.translate_ir(n=None)
@@ -463,6 +463,21 @@ def _(
     return ir.Projection(schema, translator.translate_ir(n=node.input))
 
 
+@_translate_ir.register
+def _(
+    node: pl_ir.MergeSorted, translator: Translator, schema: dict[str, plc.DataType]
+) -> ir.IR:
+    inp_left = translator.translate_ir(n=node.input_left)
+    inp_right = translator.translate_ir(n=node.input_right)
+    key = node.key
+    return ir.MergeSorted(
+        schema,
+        inp_left,
+        inp_right,
+        key,
+    )
+
+
 @_translate_ir.register
 def _(
     node: pl_ir.MapFunction, translator: Translator, schema: dict[str, plc.DataType]
@@ -472,7 +487,6 @@ def _(
         schema,
         name,
         options,
-        # TODO: merge_sorted breaks this pattern
         translator.translate_ir(n=node.input),
     )
 
diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index 48629af920d..cf1bfbe8a69 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -193,6 +193,9 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/test_cse.py::test_cse_predicate_self_join": "Debug output on stderr doesn't match",
     "tests/unit/test_empty.py::test_empty_9137": "Mismatching dtypes, needs cudf#15852",
     "tests/unit/test_errors.py::test_error_on_empty_group_by": "Incorrect exception raised",
+    "tests/unit/io/test_multiscan.py::test_include_file_paths[scan_parquet-write_parquet]": "Need to expose include_file_paths xref: cudf#18012",
+    "tests/unit/io/test_multiscan.py::test_include_file_paths[scan_csv-write_csv]": "Need to expose include_file_paths xref: cudf#18012",
+    "tests/unit/streaming/test_streaming_io.py::test_parquet_eq_statistics[False]": "Debug output on stderr doesn't match",
     # Maybe flaky, order-dependent?
     "tests/unit/test_projections.py::test_schema_full_outer_join_projection_pd_13287": "Order-specific result check, query is correct but in different order",
     "tests/unit/test_queries.py::test_group_by_agg_equals_zero_3535": "libcudf sums all nulls to null, not zero",
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index 805d7925bb4..872c08a66f9 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -19,7 +19,7 @@ authors = [
 license = { text = "Apache 2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "polars>=1.20,<1.22",
+    "polars>=1.20,<1.23",
     "pylibcudf==25.4.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
diff --git a/python/cudf_polars/tests/test_mapfunction.py b/python/cudf_polars/tests/test_mapfunction.py
index 63aa1c573a9..7a9f4a56545 100644
--- a/python/cudf_polars/tests/test_mapfunction.py
+++ b/python/cudf_polars/tests/test_mapfunction.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
@@ -93,3 +93,14 @@ def test_unpivot_defaults():
     )
     q = df.unpivot(index="d")
     assert_gpu_result_equal(q)
+
+
+def test_with_row_index_defaults():
+    lf = pl.LazyFrame(
+        {
+            "a": [1, 3, 5],
+            "b": [2, 4, 6],
+        }
+    )
+    q = lf.with_row_index()
+    assert_gpu_result_equal(q)

From abffae8fa2bd43d3285d0ec1f684cbad9582dc9d Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 19 Feb 2025 21:09:36 -0600
Subject: [PATCH 6/8] Prevent setting custom attributes to `ColumnMethods`
 (#18005)

Fixes: #17750

This PR disallows setting custom attributes to `ColumnMethods`

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/18005
---
 python/cudf/cudf/core/column/methods.py |  8 +++++++-
 python/cudf/cudf/tests/test_list.py     |  7 +++++++
 python/cudf/cudf/tests/test_string.py   | 12 ++++++++++++
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py
index a91c080fe21..b42e4419d72 100644
--- a/python/cudf/cudf/core/column/methods.py
+++ b/python/cudf/cudf/core/column/methods.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -93,3 +93,9 @@ def _return_or_inplace(
                 return cudf.Index._from_column(new_col, name=self._parent.name)
             else:
                 return self._parent._mimic_inplace(new_col, inplace=False)
+
+    def __setattr__(self, key, value):
+        if key in {"_parent", "_column"}:
+            super().__setattr__(key, value)
+        else:
+            raise AttributeError(f"You cannot add any new attribute '{key}'")
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index 3ffbd5ff2a8..3de733f1de2 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -956,6 +956,13 @@ def test_empty_nested_list_uninitialized_offsets_memory_usage():
     assert ser.memory_usage() == 8
 
 
+def test_list_methods_setattr():
+    ser = cudf.Series([["a", "b", "c"], ["d", "e", "f"]])
+
+    with pytest.raises(AttributeError):
+        ser.list.a = "b"
+
+
 def test_dataframe_list_round_trip():
     data = [{"text": "hello", "list_col": np.asarray([1, 2], dtype="uint32")}]
     cudf_arrow = cudf.DataFrame(data).to_arrow()
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 809fedfde7b..164fcb06624 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -3575,3 +3575,15 @@ def test_replace_invalid_scalar_repl():
     ser = cudf.Series(["1"])
     with pytest.raises(TypeError):
         ser.str.replace("1", 2)
+
+
+def test_string_methods_setattr():
+    ser = cudf.Series(["ab", "cd", "ef"])
+    pser = ser.to_pandas()
+
+    assert_exceptions_equal(
+        lfunc=ser.str.__setattr__,
+        rfunc=pser.str.__setattr__,
+        lfunc_args_and_kwargs=(("a", "b"),),
+        rfunc_args_and_kwargs=(("a", "b"),),
+    )

From 3c06da355e22162d167912a093b39c465cf4057a Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 19 Feb 2025 22:46:45 -0500
Subject: [PATCH 7/8] Expose `num_rows_per_source` (IO metadata) to pylibcudf
 (#18049)

Closes #18048

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/18049
---
 python/pylibcudf/pylibcudf/io/types.pyi       |  2 ++
 python/pylibcudf/pylibcudf/io/types.pyx       | 10 ++++++-
 .../pylibcudf/tests/io/test_types.py          | 26 ++++++++++++++++++-
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/io/types.pyi b/python/pylibcudf/pylibcudf/io/types.pyi
index 63fa9d1ff79..1463f4d0073 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyi
+++ b/python/pylibcudf/pylibcudf/io/types.pyi
@@ -101,6 +101,8 @@ class TableWithMetadata:
     def child_names(self) -> ChildNameSpec: ...
     @property
     def per_file_user_data(self) -> list[Mapping[str, str]]: ...
+    @property
+    def num_rows_per_source(self) -> list[int]: ...
 
 class SourceInfo:
     def __init__(
diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx
index 458595ca0e0..83330cf14ff 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyx
+++ b/python/pylibcudf/pylibcudf/io/types.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from cpython.buffer cimport PyBUF_READ
 from cpython.memoryview cimport PyMemoryView_FromMemory
@@ -401,6 +401,14 @@ cdef class TableWithMetadata:
         """
         return self.metadata.per_file_user_data
 
+    @property
+    def num_rows_per_source(self):
+        """
+        Returns a list containing the number
+        of rows for each file being read in.
+        """
+        return self.metadata.num_rows_per_source
+
 
 cdef class SourceInfo:
     """A class containing details on a source to read from.
diff --git a/python/pylibcudf/pylibcudf/tests/io/test_types.py b/python/pylibcudf/pylibcudf/tests/io/test_types.py
index a7642556bf2..b14e7770e7b 100644
--- a/python/pylibcudf/pylibcudf/tests/io/test_types.py
+++ b/python/pylibcudf/pylibcudf/tests/io/test_types.py
@@ -1,13 +1,28 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 import gc
 import weakref
 
 import pyarrow as pa
+import pytest
 
 import pylibcudf as plc
 
 
+@pytest.fixture
+def parquet_data(tmp_path):
+    tbl1 = pa.Table.from_pydict({"a": [3, 1, 4], "b": [1, 5, 9]})
+    tbl2 = pa.Table.from_pydict({"a": [1, 6], "b": [1, 8]})
+
+    path1 = tmp_path / "tbl1.parquet"
+    path2 = tmp_path / "tbl2.parquet"
+
+    pa.parquet.write_table(tbl1, path1)
+    pa.parquet.write_table(tbl2, path2)
+
+    return [path1, path2]
+
+
 def test_gc_with_table_and_column_input_metadata():
     class Foo(plc.io.types.TableInputMetadata):
         def __del__(self):
@@ -26,3 +41,12 @@ def __del__(self):
     gc.collect()
 
     assert weak_tbl_meta() is None
+
+
+def test_num_rows_per_resource(parquet_data):
+    source = plc.io.SourceInfo(parquet_data)
+    options = plc.io.parquet.ParquetReaderOptions.builder(source).build()
+    assert plc.io.parquet.read_parquet(options).num_rows_per_source == [3, 2]
+
+
+# TODO: Test more IO types

From eb5c309d24a9267656bb33d93ff90e4a2b12af89 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 19 Feb 2025 22:03:02 -0800
Subject: [PATCH 8/8] Pass more dtype objects to `astype` calls (#18044)

Broken off from https://github.com/rapidsai/cudf/pull/17978

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/18044
---
 python/cudf/cudf/core/column/categorical.py | 14 ++++--------
 python/cudf/cudf/core/column/column.py      |  2 +-
 python/cudf/cudf/core/dtypes.py             |  2 +-
 python/cudf/cudf/core/index.py              |  2 +-
 python/cudf/cudf/core/indexed_frame.py      |  2 +-
 python/cudf/cudf/core/join/_join_helpers.py |  5 +++--
 python/cudf/cudf/core/tools/datetimes.py    |  4 ++--
 python/cudf/cudf/tests/test_dataframe.py    | 24 +++++++++++++++------
 8 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index a789d5d5ab1..a57ff9a7817 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -811,21 +811,15 @@ def to_pandas(
 
     def to_arrow(self) -> pa.Array:
         """Convert to PyArrow Array."""
-        # arrow doesn't support unsigned codes
+        # pyarrow.Table doesn't support unsigned codes
         signed_type = (
             min_signed_type(self.codes.max())
             if self.codes.size > 0
-            else np.int8
+            else np.dtype(np.int8)
         )
-        codes = self.codes.astype(signed_type)
-        categories = self.categories
-
-        out_indices = codes.to_arrow()
-        out_dictionary = categories.to_arrow()
-
         return pa.DictionaryArray.from_arrays(
-            out_indices,
-            out_dictionary,
+            self.codes.astype(signed_type).to_arrow(),
+            self.categories.to_arrow(),
             ordered=self.ordered,
         )
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index d281076690a..06dc4058115 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1629,7 +1629,7 @@ def astype(self, dtype: Dtype, copy: bool = False) -> ColumnBase:
             elif isinstance(dtype, IntervalDtype):
                 result = self.as_interval_column(dtype)
             elif isinstance(dtype, (ListDtype, StructDtype)):
-                if not self.dtype == dtype:
+                if self.dtype != dtype:
                     raise NotImplementedError(
                         f"Casting {self.dtype} columns not currently supported"
                     )
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 983950580d0..12a9cce9f1c 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -262,7 +262,7 @@ def _init_categories(
             getattr(categories, "dtype", None),
             (cudf.IntervalDtype, pd.IntervalDtype),
         ):
-            dtype = "object"  # type: Any
+            dtype = CUDF_STRING_DTYPE
         else:
             dtype = None
 
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 8ce8dfd2198..8587bff2e32 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -3135,7 +3135,7 @@ def __init__(
             data = column.as_column(data)
         else:
             data = column.as_column(
-                data, dtype="category" if dtype is None else dtype
+                data, dtype=cudf.CategoricalDtype() if dtype is None else dtype
             )
             # dtype has already been taken care
             dtype = None
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index ac4303394f7..9c48b31a309 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -6517,7 +6517,7 @@ def convert_dtypes(
             for col in self._columns:
                 if col.dtype.kind == "f":
                     col = col.fillna(0)
-                    as_int = col.astype("int64")
+                    as_int = col.astype(np.dtype(np.int64))
                     if cp.allclose(col, as_int):
                         cols.append(as_int)
                         continue
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 854c44ff1a1..c329bf11d97 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -114,7 +114,8 @@ def _match_join_keys(
 
     if how == "left" and rcol.fillna(0).can_cast_safely(ltype):
         return lcol, rcol.astype(ltype)
-
+    elif common_type is None:
+        common_type = np.dtype(np.float64)
     return lcol.astype(common_type), rcol.astype(common_type)
 
 
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index 546abfc4d3d..4478be2fd04 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -369,9 +369,9 @@ def _process_col(
     elif col.dtype.kind == "O":
         if unit not in (None, "ns") or col.null_count == len(col):
             try:
-                col = col.astype(dtype="int64")
+                col = col.astype(np.dtype(np.int64))
             except ValueError:
-                col = col.astype(dtype="float64")
+                col = col.astype(np.dtype(np.float64))
             return _process_col(
                 col=col,
                 unit=unit,
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 05bc221bf9d..15c11db5a84 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -4343,21 +4343,27 @@ def test_as_column_types():
 
     assert_eq(pds, gds)
 
-    col = column.as_column(cudf.Series([], dtype="float64"), dtype="float32")
+    col = column.as_column(
+        cudf.Series([], dtype="float64"), dtype=np.dtype(np.float32)
+    )
     assert_eq(col.dtype, np.dtype("float32"))
     gds = cudf.Series._from_column(col)
     pds = pd.Series(pd.Series([], dtype="float32"))
 
     assert_eq(pds, gds)
 
-    col = column.as_column(cudf.Series([], dtype="float64"), dtype="str")
+    col = column.as_column(
+        cudf.Series([], dtype="float64"), dtype=cudf.dtype("str")
+    )
     assert_eq(col.dtype, np.dtype("object"))
     gds = cudf.Series._from_column(col)
     pds = pd.Series(pd.Series([], dtype="str"))
 
     assert_eq(pds, gds)
 
-    col = column.as_column(cudf.Series([], dtype="float64"), dtype="object")
+    col = column.as_column(
+        cudf.Series([], dtype="float64"), dtype=cudf.dtype("str")
+    )
     assert_eq(col.dtype, np.dtype("object"))
     gds = cudf.Series._from_column(col)
     pds = pd.Series(pd.Series([], dtype="object"))
@@ -4366,7 +4372,7 @@ def test_as_column_types():
 
     pds = pd.Series(np.array([1, 2, 3]), dtype="float32")
     gds = cudf.Series._from_column(
-        column.as_column(np.array([1, 2, 3]), dtype="float32")
+        column.as_column(np.array([1, 2, 3]), dtype=np.dtype(np.float32))
     )
 
     assert_eq(pds, gds)
@@ -4389,14 +4395,18 @@ def test_as_column_types():
 
     pds = pd.Series([1.2, 18.0, 9.0], dtype="float32")
     gds = cudf.Series._from_column(
-        column.as_column(cudf.Series([1.2, 18.0, 9.0]), dtype="float32")
+        column.as_column(
+            cudf.Series([1.2, 18.0, 9.0]), dtype=np.dtype(np.float32)
+        )
     )
 
     assert_eq(pds, gds)
 
     pds = pd.Series([1.2, 18.0, 9.0], dtype="str")
     gds = cudf.Series._from_column(
-        column.as_column(cudf.Series([1.2, 18.0, 9.0]), dtype="str")
+        column.as_column(
+            cudf.Series([1.2, 18.0, 9.0]), dtype=cudf.dtype("str")
+        )
     )
 
     assert_eq(pds, gds)
@@ -5228,7 +5238,7 @@ def test_empty_df_astype(dtype):
 )
 def test_series_astype_error_handling(errors):
     sr = cudf.Series(["random", "words"])
-    got = sr.astype("datetime64", errors=errors)
+    got = sr.astype("datetime64[ns]", errors=errors)
     assert_eq(sr, got)