From a7b933ac98504fbade5c2db9566182b1694674d0 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Wed, 29 Jan 2025 11:14:33 +0100 Subject: [PATCH] fix: Revert length check of `patterns` in `str.extract_many()` (#20953) --- .../src/dsl/function_expr/strings.rs | 1 - .../namespaces/string/test_string.py | 28 ++++++------------- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/crates/polars-plan/src/dsl/function_expr/strings.rs b/crates/polars-plan/src/dsl/function_expr/strings.rs index 46d4b524c14a..f3d93605b501 100644 --- a/crates/polars-plan/src/dsl/function_expr/strings.rs +++ b/crates/polars-plan/src/dsl/function_expr/strings.rs @@ -466,7 +466,6 @@ fn extract_many( ascii_case_insensitive: bool, overlapping: bool, ) -> PolarsResult { - _check_same_length(s, "extract_many")?; let ca = s[0].str()?; let patterns = &s[1]; diff --git a/py-polars/tests/unit/operations/namespaces/string/test_string.py b/py-polars/tests/unit/operations/namespaces/string/test_string.py index f951629f15ad..3671ac483a5f 100644 --- a/py-polars/tests/unit/operations/namespaces/string/test_string.py +++ b/py-polars/tests/unit/operations/namespaces/string/test_string.py @@ -1824,21 +1824,17 @@ def test_replace_lit_n_char_13385( def test_extract_many() -> None: - df = pl.DataFrame({"values": ["discontent"]}) + df = pl.DataFrame({"values": ["discontent", "foobar"]}) patterns = ["winter", "disco", "onte", "discontent"] - assert ( - df.with_columns( - pl.col("values") - .str.extract_many(patterns, overlapping=False) - .alias("matches"), - pl.col("values") - .str.extract_many(patterns, overlapping=True) - .alias("matches_overlapping"), - ) + assert df.with_columns( + pl.col("values").str.extract_many(patterns, overlapping=False).alias("matches"), + pl.col("values") + .str.extract_many(patterns, overlapping=True) + .alias("matches_overlapping"), ).to_dict(as_series=False) == { - "values": ["discontent"], - "matches": [["disco"]], - "matches_overlapping": [["disco", "onte", "discontent"]], + "values": ["discontent", "foobar"], + "matches": [["disco"], []], + "matches_overlapping": [["disco", "onte", "discontent"], []], } # many patterns @@ -1865,12 +1861,6 @@ def test_extract_many() -> None: assert f2.to_list() == [[0], [0, 5]] -def test_str_extract_many_wrong_length() -> None: - df = pl.DataFrame({"num": ["-10", "-1", "0"]}) - with pytest.raises(ComputeError, match="should have equal or unit length"): - df.select(pl.col("num").str.extract_many(pl.Series(["a", "b"]))) - - def test_json_decode_raise_on_data_type_mismatch_13061() -> None: assert_series_equal( pl.Series(["null", "null"]).str.json_decode(infer_schema_length=1),