Skip to content

Commit

Permalink
Bump Polars version to <1.18 (#17632)
Browse files Browse the repository at this point in the history
This PR upgrades the Polars version to 1.17. It xfails some polars tests due to known issues and adds the `maintain_order` param to joins (not implemented yet).

Notable change

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

URL: #17632
  • Loading branch information
Matt711 authored Jan 9, 2025
1 parent 76f1c8b commit cb77046
Show file tree
Hide file tree
Showing 9 changed files with 76 additions and 16 deletions.
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.11,<1.15
- polars>=1.11,<1.18
- pre-commit
- ptxcompiler
- pyarrow>=14.0.0,<19.0.0a0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.11,<1.15
- polars>=1.11,<1.18
- pre-commit
- pyarrow>=14.0.0,<19.0.0a0
- pydata-sphinx-theme!=0.14.2
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf-polars/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
Expand Down Expand Up @@ -43,7 +43,7 @@ requirements:
run:
- python
- pylibcudf ={{ version }}
- polars >=1.11,<1.15
- polars >=1.11,<1.18
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}

test:
Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,7 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
- polars>=1.11,<1.15
- polars>=1.11,<1.18
run_cudf_polars_experimental:
common:
- output_types: [conda, requirements, pyproject]
Expand Down
42 changes: 36 additions & 6 deletions python/cudf_polars/cudf_polars/dsl/ir.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0
"""
DSL nodes for the LogicalPlan of polars.
Expand Down Expand Up @@ -34,9 +34,11 @@
from cudf_polars.utils.versions import POLARS_VERSION_GT_112

if TYPE_CHECKING:
from collections.abc import Callable, Hashable, MutableMapping, Sequence
from collections.abc import Callable, Hashable, Iterable, MutableMapping, Sequence
from typing import Literal

from polars.polars import _expr_nodes as pl_expr

from cudf_polars.typing import Schema


Expand Down Expand Up @@ -1019,7 +1021,27 @@ class ConditionalJoin(IR):
__slots__ = ("ast_predicate", "options", "predicate")
_non_child = ("schema", "predicate", "options")
predicate: expr.Expr
options: tuple
"""Expression predicate to join on"""
options: tuple[
tuple[
str,
pl_expr.Operator | Iterable[pl_expr.Operator],
],
bool,
tuple[int, int] | None,
str,
bool,
Literal["none", "left", "right", "left_right", "right_left"],
]
"""
tuple of options:
- predicates: tuple of ir join type (eg. ie_join) and (In)Equality conditions
- join_nulls: do nulls compare equal?
- slice: optional slice to perform after joining.
- suffix: string suffix for right columns if names match
- coalesce: should key columns be coalesced (only makes sense for outer joins)
- maintain_order: which DataFrame row order to preserve, if any
"""

def __init__(
self, schema: Schema, predicate: expr.Expr, options: tuple, left: IR, right: IR
Expand All @@ -1029,22 +1051,24 @@ def __init__(
self.options = options
self.children = (left, right)
self.ast_predicate = to_ast(predicate)
_, join_nulls, zlice, suffix, coalesce = self.options
_, join_nulls, zlice, suffix, coalesce, maintain_order = self.options
# Preconditions from polars
assert not join_nulls
assert not coalesce
assert maintain_order == "none"
if self.ast_predicate is None:
raise NotImplementedError(
f"Conditional join with predicate {predicate}"
) # pragma: no cover; polars never delivers expressions we can't handle
self._non_child_args = (self.ast_predicate, zlice, suffix)
self._non_child_args = (self.ast_predicate, zlice, suffix, maintain_order)

@classmethod
def do_evaluate(
cls,
predicate: plc.expressions.Expression,
zlice: tuple[int, int] | None,
suffix: str,
maintain_order: Literal["none", "left", "right", "left_right", "right_left"],
left: DataFrame,
right: DataFrame,
) -> DataFrame:
Expand Down Expand Up @@ -1088,6 +1112,7 @@ class Join(IR):
tuple[int, int] | None,
str,
bool,
Literal["none", "left", "right", "left_right", "right_left"],
]
"""
tuple of options:
Expand All @@ -1096,6 +1121,7 @@ class Join(IR):
- slice: optional slice to perform after joining.
- suffix: string suffix for right columns if names match
- coalesce: should key columns be coalesced (only makes sense for outer joins)
- maintain_order: which DataFrame row order to preserve, if any
"""

def __init__(
Expand All @@ -1113,6 +1139,9 @@ def __init__(
self.options = options
self.children = (left, right)
self._non_child_args = (self.left_on, self.right_on, self.options)
# TODO: Implement maintain_order
if options[5] != "none":
raise NotImplementedError("maintain_order not implemented yet")
if any(
isinstance(e.value, expr.Literal)
for e in itertools.chain(self.left_on, self.right_on)
Expand Down Expand Up @@ -1222,12 +1251,13 @@ def do_evaluate(
tuple[int, int] | None,
str,
bool,
Literal["none", "left", "right", "left_right", "right_left"],
],
left: DataFrame,
right: DataFrame,
) -> DataFrame:
"""Evaluate and return a dataframe."""
how, join_nulls, zlice, suffix, coalesce = options
how, join_nulls, zlice, suffix, coalesce, _ = options
if how == "cross":
# Separate implementation, since cross_join returns the
# result, not the gather maps
Expand Down
4 changes: 2 additions & 2 deletions python/cudf_polars/cudf_polars/dsl/translate.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0

"""Translate polars IR representation to ours."""
Expand Down Expand Up @@ -84,7 +84,7 @@ def translate_ir(self, *, n: int | None = None) -> ir.IR:
# IR is versioned with major.minor, minor is bumped for backwards
# compatible changes (e.g. adding new nodes), major is bumped for
# incompatible changes (e.g. renaming nodes).
if (version := self.visitor.version()) >= (4, 0):
if (version := self.visitor.version()) >= (4, 3):
e = NotImplementedError(
f"No support for polars IR {version=}"
) # pragma: no cover; no such version for now.
Expand Down
21 changes: 21 additions & 0 deletions python/cudf_polars/cudf_polars/testing/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ def pytest_configure(config: pytest.Config) -> None:
"tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_parquet-write_parquet]": "Need to add include_file_path to IR",
"tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_csv-write_csv]": "Need to add include_file_path to IR",
"tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_ndjson-write_ndjson]": "Need to add include_file_path to IR",
"tests/unit/io/test_write.py::test_write_async[read_parquet-write_parquet]": "Need to add include_file_path to IR",
"tests/unit/io/test_write.py::test_write_async[<lambda>-write_csv]": "Need to add include_file_path to IR",
"tests/unit/io/test_write.py::test_write_async[read_parquet-<lambda>]": "Need to add include_file_path to IR",
"tests/unit/io/test_write.py::test_write_async[<lambda>-<lambda>0]": "Need to add include_file_path to IR",
"tests/unit/io/test_write.py::test_write_async[<lambda>-<lambda>2]": "Need to add include_file_path to IR",
"tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[gpu]": "Expect this to pass because cudf-polars is installed",
"tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[engine1]": "Expect this to pass because cudf-polars is installed",
"tests/unit/lazyframe/test_lazyframe.py::test_round[dtype1-123.55-1-123.6]": "Rounding midpoints is handled incorrectly",
Expand All @@ -140,6 +145,22 @@ def pytest_configure(config: pytest.Config) -> None:
"tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func1-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func2-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func3-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/test_abs.py::test_abs_duration": "Need to raise for unsupported uops on timelike values",
"tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input7-expected7-Float32-Float32]": "Mismatching dtypes, needs cudf#15852",
"tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input10-expected10-Date-output_dtype10]": "Unsupported groupby-agg for a particular dtype",
Expand Down
4 changes: 2 additions & 2 deletions python/cudf_polars/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

[build-system]
build-backend = "rapids_build_backend.build"
Expand All @@ -19,7 +19,7 @@ authors = [
license = { text = "Apache 2.0" }
requires-python = ">=3.10"
dependencies = [
"polars>=1.11,<1.15",
"polars>=1.11,<1.18",
"pylibcudf==25.2.*,>=0.0.0a0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
classifiers = [
Expand Down
11 changes: 10 additions & 1 deletion python/cudf_polars/tests/test_join.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

Expand Down Expand Up @@ -53,6 +53,15 @@ def right():
)


@pytest.mark.parametrize(
"maintain_order", ["left", "left_right", "right_left", "right"]
)
def test_join_maintain_order_param_unsupported(left, right, maintain_order):
q = left.join(right, on=pl.col("a"), how="inner", maintain_order=maintain_order)

assert_ir_translation_raises(q, NotImplementedError)


@pytest.mark.parametrize(
"join_expr",
[
Expand Down

0 comments on commit cb77046

Please sign in to comment.