diff --git a/crates/polars-core/src/frame/column/mod.rs b/crates/polars-core/src/frame/column/mod.rs index fbc735e71497..125fc62bb99e 100644 --- a/crates/polars-core/src/frame/column/mod.rs +++ b/crates/polars-core/src/frame/column/mod.rs @@ -1026,6 +1026,18 @@ impl Column { } } + // @NOTE: This can theoretically be pushed into the previous operation but it is really + // worth it... probably not... + if let Some((limit, limit_dsc)) = options.limit { + let limit = limit.min(length); + + if limit_dsc { + values = values.drain((length - limit) as usize..).collect(); + } else { + values.truncate(limit as usize); + } + } + IdxCa::from_vec(self.name().clone(), values) } diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 46e4779cdef8..9cac26d1e7df 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -1014,7 +1014,7 @@ def test_multiple_column_sort() -> None: pl.DataFrame({"a": [3, 2, 1], "b": ["b", "a", "a"]}), ) assert_frame_equal( - df.sort("b", descending=True), + df.sort("b", descending=True, maintain_order=True), pl.DataFrame({"a": [3, 1, 2], "b": ["b", "a", "a"]}), ) assert_frame_equal( diff --git a/py-polars/tests/unit/datatypes/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py index b44255aba244..b28dd269c19c 100644 --- a/py-polars/tests/unit/datatypes/test_categorical.py +++ b/py-polars/tests/unit/datatypes/test_categorical.py @@ -825,6 +825,8 @@ def test_cat_preserve_lexical_ordering_on_concat() -> None: assert df2["x"].dtype == dtype +# TODO: Bug see: https://github.com/pola-rs/polars/issues/20440 +@pytest.mark.may_fail_auto_streaming def test_cat_append_lexical_sorted_flag() -> None: df = pl.DataFrame({"x": [0, 1, 1], "y": ["B", "B", "A"]}).with_columns( pl.col("y").cast(pl.Categorical(ordering="lexical")) diff --git a/py-polars/tests/unit/operations/namespaces/test_categorical.py b/py-polars/tests/unit/operations/namespaces/test_categorical.py index 7bd457871b86..9f60ff4f7be9 100644 --- a/py-polars/tests/unit/operations/namespaces/test_categorical.py +++ b/py-polars/tests/unit/operations/namespaces/test_categorical.py @@ -29,6 +29,8 @@ def test_global_and_local( yield +# @TODO: Bug, see https://github.com/pola-rs/polars/issues/20440 +@pytest.mark.may_fail_auto_streaming def test_categorical_lexical_sort() -> None: df = pl.DataFrame( {"cats": ["z", "z", "k", "a", "b"], "vals": [3, 1, 2, 2, 3]} diff --git a/py-polars/tests/unit/operations/test_interpolate_by.py b/py-polars/tests/unit/operations/test_interpolate_by.py index bcfaf562426c..07bee621c505 100644 --- a/py-polars/tests/unit/operations/test_interpolate_by.py +++ b/py-polars/tests/unit/operations/test_interpolate_by.py @@ -105,17 +105,15 @@ def test_interpolate_by_leading_nulls() -> None: } ) result = df.select(pl.col("values").interpolate_by("times")) - expected = pl.DataFrame( - {"values": [None, None, None, 1.0, 1.7999999999999998, 4.6, 5.0]} - ) + expected = pl.DataFrame({"values": [None, None, None, 1.0, 1.8, 4.6, 5.0]}) assert_frame_equal(result, expected) result = ( - df.sort("times", descending=True) + df.sort("times", maintain_order=True, descending=True) .with_columns(pl.col("values").interpolate_by("times")) .sort("times", maintain_order=True) .drop("times") ) - assert_frame_equal(result, expected) + assert_frame_equal(result, expected, check_exact=False) @pytest.mark.parametrize("dataset", ["floats", "dates"]) diff --git a/py-polars/tests/unit/operations/test_join.py b/py-polars/tests/unit/operations/test_join.py index 3bc0649a72ea..fd384408e247 100644 --- a/py-polars/tests/unit/operations/test_join.py +++ b/py-polars/tests/unit/operations/test_join.py @@ -285,11 +285,18 @@ def test_join_on_cast() -> None: df_b = pl.DataFrame({"a": [-2, -3, 3, 10]}) - assert df_a.join(df_b, on=pl.col("a").cast(pl.Int64)).to_dict(as_series=False) == { - "index": [1, 2, 3, 5], - "a": [-2, 3, 3, 10], - "a_right": [-2, 3, 3, 10], - } + assert_frame_equal( + df_a.join(df_b, on=pl.col("a").cast(pl.Int64)), + pl.DataFrame( + { + "index": [1, 2, 3, 5], + "a": [-2, 3, 3, 10], + "a_right": [-2, 3, 3, 10], + } + ), + check_row_order=False, + check_dtypes=False, + ) assert df_a.lazy().join( df_b.lazy(), on=pl.col("a").cast(pl.Int64) ).collect().to_dict(as_series=False) == {