From 45a73291d4b9aa9f668405549ecce6e5df29eb7d Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 6 Jan 2025 14:29:59 -0600 Subject: [PATCH 1/2] remove find_package(Python) in libcudf build (#17683) Nothing in `libcudf`'s CMake should need a Python interpreter or linking to Python components. This proposes removing the `find(Python)` there, to simplify that build: https://github.com/rapidsai/cudf/blob/955b1f4566abccf920a022dc78a1e654acf0de16/python/libcudf/CMakeLists.txt#L37-L38 Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17683 --- .github/CODEOWNERS | 7 +++---- python/libcudf/CMakeLists.txt | 5 +---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5e2f46714d9..e0b315f34fc 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,10 +8,9 @@ notebooks/ @rapidsai/cudf-python-codeowners python/dask_cudf/ @rapidsai/cudf-dask-codeowners #cmake code owners -cpp/CMakeLists.txt @rapidsai/cudf-cmake-codeowners -cpp/libcudf_kafka/CMakeLists.txt @rapidsai/cudf-cmake-codeowners -**/cmake/ @rapidsai/cudf-cmake-codeowners -*.cmake @rapidsai/cudf-cmake-codeowners +CMakeLists.txt @rapidsai/cudf-cmake-codeowners +**/cmake/ @rapidsai/cudf-cmake-codeowners +*.cmake @rapidsai/cudf-cmake-codeowners #java code owners java/ @rapidsai/cudf-java-codeowners diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt index 5f9a04d3cee..259492b98d1 100644 --- a/python/libcudf/CMakeLists.txt +++ b/python/libcudf/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -34,9 +34,6 @@ endif() unset(cudf_FOUND) -# Find Python early so that later commands can use it -find_package(Python 3.10 REQUIRED COMPONENTS Interpreter) - set(BUILD_TESTS OFF) set(BUILD_BENCHMARKS OFF) set(CUDF_BUILD_TESTUTIL OFF) From b81d9e17fbffbb912e0128148f556bf7af41b6ab Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 6 Jan 2025 14:25:05 -0800 Subject: [PATCH 2/2] Fix cudf.polars sum of empty not equalling zero (#17685) closes #17681 (We have a similar carve-out in cudf classic due to `sum([]) == 0` in Python) Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/17685 --- .../cudf_polars/dsl/expressions/aggregation.py | 14 +++++++++++++- python/cudf_polars/tests/expressions/test_agg.py | 8 +++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py b/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py index b88b109a975..92f39abe71e 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py @@ -91,7 +91,7 @@ def __init__( op = partial(self._reduce, request=req) elif name in {"min", "max"}: op = partial(op, propagate_nans=options) - elif name in {"count", "first", "last"}: + elif name in {"count", "sum", "first", "last"}: pass else: raise NotImplementedError( @@ -180,6 +180,18 @@ def _count(self, column: Column) -> Column: ) ) + def _sum(self, column: Column) -> Column: + if column.obj.size() == 0: + return Column( + plc.Column.from_scalar( + plc.interop.from_arrow( + pa.scalar(0, type=plc.interop.to_arrow(self.dtype)) + ), + 1, + ) + ) + return self._reduce(column, request=plc.aggregation.sum()) + def _min(self, column: Column, *, propagate_nans: bool) -> Column: if propagate_nans and column.nan_count > 0: return Column( diff --git a/python/cudf_polars/tests/expressions/test_agg.py b/python/cudf_polars/tests/expressions/test_agg.py index 86cb2352dcc..15ad845ea78 100644 --- a/python/cudf_polars/tests/expressions/test_agg.py +++ b/python/cudf_polars/tests/expressions/test_agg.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations @@ -148,3 +148,9 @@ def test_agg_singleton(op): q = df.select(op(pl.col("a"))) assert_gpu_result_equal(q) + + +def test_sum_empty_zero(): + df = pl.LazyFrame({"a": pl.Series(values=[], dtype=pl.Int32())}) + q = df.select(pl.col("a").sum()) + assert_gpu_result_equal(q)