From e7fd2d164172b6e81966d3b282f6eea57630124a Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Wed, 25 Sep 2024 15:06:54 +0200 Subject: [PATCH] refactor: Fix/skip variety of new-streaming tests --- crates/polars-plan/src/plans/python/pyarrow.rs | 2 +- .../src/physical_plan/lower_expr.rs | 18 ++++++++++++++---- py-polars/tests/unit/io/test_parquet.py | 1 + .../tests/unit/lazyframe/test_lazyframe.py | 1 + .../operations/namespaces/list/test_list.py | 1 + .../namespaces/string/test_string.py | 2 +- .../operations/namespaces/test_strptime.py | 2 +- 7 files changed, 20 insertions(+), 7 deletions(-) diff --git a/crates/polars-plan/src/plans/python/pyarrow.rs b/crates/polars-plan/src/plans/python/pyarrow.rs index 20b800fa81b1..78fcc20cc453 100644 --- a/crates/polars-plan/src/plans/python/pyarrow.rs +++ b/crates/polars-plan/src/plans/python/pyarrow.rs @@ -44,7 +44,7 @@ pub fn predicate_to_pa( } else { let mut list_repr = String::with_capacity(s.len() * 5); list_repr.push('['); - for av in s.iter() { + for av in s.rechunk().iter() { if let AnyValue::Boolean(v) = av { let s = if v { "True" } else { "False" }; write!(list_repr, "{},", s).unwrap(); diff --git a/crates/polars-stream/src/physical_plan/lower_expr.rs b/crates/polars-stream/src/physical_plan/lower_expr.rs index 8e891bd408af..919694e8c538 100644 --- a/crates/polars-stream/src/physical_plan/lower_expr.rs +++ b/crates/polars-stream/src/physical_plan/lower_expr.rs @@ -87,13 +87,23 @@ pub(crate) fn is_elementwise( function: _, output_type: _, options, - } - | AExpr::Function { + } => { + options.is_elementwise() && input.iter().all(|e| is_elementwise(e.node(), arena, cache)) + }, + AExpr::Function { input, - function: _, + function, options, } => { - options.is_elementwise() && input.iter().all(|e| is_elementwise(e.node(), arena, cache)) + match function { + // Non-strict strptime must be done in-memory to ensure the format + // is consistent across the entire dataframe. + FunctionExpr::StringExpr(StringFunction::Strptime(_, opts)) => opts.strict, + _ => { + options.is_elementwise() + && input.iter().all(|e| is_elementwise(e.node(), arena, cache)) + }, + } }, AExpr::Window { .. } => false, diff --git a/py-polars/tests/unit/io/test_parquet.py b/py-polars/tests/unit/io/test_parquet.py index 10aa23dfa7b6..b53766ae2c2c 100644 --- a/py-polars/tests/unit/io/test_parquet.py +++ b/py-polars/tests/unit/io/test_parquet.py @@ -114,6 +114,7 @@ def test_to_from_buffer( @pytest.mark.parametrize("use_pyarrow", [True, False]) @pytest.mark.parametrize("rechunk_and_expected_chunks", [(True, 1), (False, 3)]) +@pytest.mark.may_fail_auto_streaming def test_read_parquet_respects_rechunk_16416( use_pyarrow: bool, rechunk_and_expected_chunks: tuple[bool, int] ) -> None: diff --git a/py-polars/tests/unit/lazyframe/test_lazyframe.py b/py-polars/tests/unit/lazyframe/test_lazyframe.py index 23394110b40e..026d4f157e2f 100644 --- a/py-polars/tests/unit/lazyframe/test_lazyframe.py +++ b/py-polars/tests/unit/lazyframe/test_lazyframe.py @@ -354,6 +354,7 @@ def test_inspect(capsys: CaptureFixture[str]) -> None: assert len(res.out) > 0 +@pytest.mark.may_fail_auto_streaming def test_fetch(fruits_cars: pl.DataFrame) -> None: res = fruits_cars.lazy().select("*")._fetch(2) assert_frame_equal(res, res[:2]) diff --git a/py-polars/tests/unit/operations/namespaces/list/test_list.py b/py-polars/tests/unit/operations/namespaces/list/test_list.py index f306bbff5d7b..966fee3ea5ac 100644 --- a/py-polars/tests/unit/operations/namespaces/list/test_list.py +++ b/py-polars/tests/unit/operations/namespaces/list/test_list.py @@ -620,6 +620,7 @@ def test_list_unique2() -> None: assert sorted(result[1]) == [1, 2] +@pytest.mark.may_fail_auto_streaming def test_list_to_struct() -> None: df = pl.DataFrame({"n": [[0, 1, 2], [0, 1]]}) diff --git a/py-polars/tests/unit/operations/namespaces/string/test_string.py b/py-polars/tests/unit/operations/namespaces/string/test_string.py index fe47b8d07d2e..842b0fd141a5 100644 --- a/py-polars/tests/unit/operations/namespaces/string/test_string.py +++ b/py-polars/tests/unit/operations/namespaces/string/test_string.py @@ -429,7 +429,7 @@ def test_str_to_integer_base_expr() -> None: # test strict raise df = pl.DataFrame({"str": ["110", "ff00", "cafe", None], "base": [2, 10, 10, 8]}) - with pytest.raises(ComputeError, match="failed for 2 value"): + with pytest.raises(ComputeError): df.select(pl.col("str").str.to_integer(base="base")) diff --git a/py-polars/tests/unit/operations/namespaces/test_strptime.py b/py-polars/tests/unit/operations/namespaces/test_strptime.py index 3aa5890198df..41fdb028e31d 100644 --- a/py-polars/tests/unit/operations/namespaces/test_strptime.py +++ b/py-polars/tests/unit/operations/namespaces/test_strptime.py @@ -161,7 +161,7 @@ def test_to_date_all_inferred_date_patterns(time_string: str, expected: date) -> ], ) def test_non_exact_short_elements_10223(value: str, attr: str) -> None: - with pytest.raises(InvalidOperationError, match="conversion .* failed"): + with pytest.raises((InvalidOperationError, ComputeError)): getattr(pl.Series(["2019-01-01", value]).str, attr)(exact=False)