From 925bb4972099af4c63c49ded0789f88f7c833627 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Tue, 11 Jun 2024 10:30:26 +0000 Subject: [PATCH 1/3] fix(rust): Reject non-integral offset and length in AExpr::Slice - Fixes #16855 --- .../logical_plan/conversion/type_coercion/mod.rs | 10 ++++++++++ crates/polars/tests/it/lazy/expressions/slice.rs | 2 +- py-polars/tests/unit/expr/test_exprs.py | 13 +++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/crates/polars-plan/src/logical_plan/conversion/type_coercion/mod.rs b/crates/polars-plan/src/logical_plan/conversion/type_coercion/mod.rs index ccaa3ba64e3a..51176b246839 100644 --- a/crates/polars-plan/src/logical_plan/conversion/type_coercion/mod.rs +++ b/crates/polars-plan/src/logical_plan/conversion/type_coercion/mod.rs @@ -440,6 +440,16 @@ impl OptimizationRule for TypeCoercionRule { options, }) }, + AExpr::Slice { offset, length, .. } => { + let input_schema = get_schema(lp_arena, lp_node); + let (_, offset_dtype) = + unpack!(get_aexpr_and_type(expr_arena, offset, &input_schema)); + polars_ensure!(offset_dtype.is_integer(), InvalidOperation: "offset must be integral for slice, not {}", offset_dtype); + let (_, length_dtype) = + unpack!(get_aexpr_and_type(expr_arena, length, &input_schema)); + polars_ensure!(length_dtype.is_integer(), InvalidOperation: "length must be integral for slice, not {}", length_dtype); + None + }, _ => None, }; Ok(out) diff --git a/crates/polars/tests/it/lazy/expressions/slice.rs b/crates/polars/tests/it/lazy/expressions/slice.rs index 3cc1010d5bb5..52ac5422a935 100644 --- a/crates/polars/tests/it/lazy/expressions/slice.rs +++ b/crates/polars/tests/it/lazy/expressions/slice.rs @@ -15,7 +15,7 @@ fn test_slice_args() -> PolarsResult<()> { ]? .lazy() .group_by_stable([col("groups")]) - .agg([col("vals").slice(lit(0i64), len() * lit(0.2))]) + .agg([col("vals").slice(lit(0i64), (len() * lit(0.2)).cast(DataType::Int32))]) .collect()?; let out = df.column("vals")?.explode()?; diff --git a/py-polars/tests/unit/expr/test_exprs.py b/py-polars/tests/unit/expr/test_exprs.py index e8fe369db24f..020879154411 100644 --- a/py-polars/tests/unit/expr/test_exprs.py +++ b/py-polars/tests/unit/expr/test_exprs.py @@ -732,3 +732,16 @@ def test_replace_no_cse() -> None: .explain() ) assert "POLARS_CSER" not in plan + + +def test_slice_rejects_non_integral() -> None: + df = pl.LazyFrame({"a": [0, 1, 2, 3], "b": [1.5, 2, 3, 4]}) + + with pytest.raises(pl.exceptions.InvalidOperationError): + df.select(pl.col("a").slice(pl.col("b").slice(0, 1), None)).collect() + + with pytest.raises(pl.exceptions.InvalidOperationError): + df.select(pl.col("a").slice(0, pl.col("b").slice(1, 2))).collect() + + with pytest.raises(pl.exceptions.InvalidOperationError): + df.select(pl.col("a").slice(pl.lit("1"), None)).collect() From e4a5b6dc79a38dbb8ba57a6162aa40fd5203a046 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Tue, 11 Jun 2024 11:09:07 +0000 Subject: [PATCH 2/3] Support unsigned integer expressions in Expr.tail --- py-polars/polars/expr/expr.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 712f119325d6..3fbc3ae01a8b 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -4851,7 +4851,11 @@ def tail(self, n: int | Expr = 10) -> Self: │ 7 │ └─────┘ """ - offset = -self._from_pyexpr(parse_into_expression(n)) + # This cast enables tail with expressions that return unsigned integers, + # for which negate otherwise raises InvalidOperationError. + offset = -self._from_pyexpr( + parse_into_expression(n).cast(Int64, strict=False, wrap_numerical=True) + ) return self.slice(offset, n) def limit(self, n: int | Expr = 10) -> Self: From cbe409199ed8f0ea940a4ae6b0bc9ddb6bfe57af Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Tue, 11 Jun 2024 11:09:26 +0000 Subject: [PATCH 3/3] Update tests --- py-polars/tests/unit/dataframe/test_df.py | 2 +- py-polars/tests/unit/expr/test_exprs.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index b2f2fdfd7a6d..826176f702d3 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -1976,7 +1976,7 @@ def test_group_by_slice_expression_args() -> None: out = ( df.group_by("groups", maintain_order=True) - .agg([pl.col("vals").slice(pl.len() * 0.1, (pl.len() // 5))]) + .agg([pl.col("vals").slice((pl.len() * 0.1).cast(int), (pl.len() // 5))]) .explode("vals") ) diff --git a/py-polars/tests/unit/expr/test_exprs.py b/py-polars/tests/unit/expr/test_exprs.py index 020879154411..ffee9858a825 100644 --- a/py-polars/tests/unit/expr/test_exprs.py +++ b/py-polars/tests/unit/expr/test_exprs.py @@ -678,7 +678,7 @@ def test_head() -> None: assert df.select(pl.col("a").head(10)).to_dict(as_series=False) == { "a": [1, 2, 3, 4, 5] } - assert df.select(pl.col("a").head(pl.len() / 2)).to_dict(as_series=False) == { + assert df.select(pl.col("a").head(pl.len() // 2)).to_dict(as_series=False) == { "a": [1, 2] } @@ -690,7 +690,7 @@ def test_tail() -> None: assert df.select(pl.col("a").tail(10)).to_dict(as_series=False) == { "a": [1, 2, 3, 4, 5] } - assert df.select(pl.col("a").tail(pl.len() / 2)).to_dict(as_series=False) == { + assert df.select(pl.col("a").tail(pl.len() // 2)).to_dict(as_series=False) == { "a": [4, 5] }