From f4df861e43aa67b4a924cb27f3ebb03ab9db64f4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 5 Jan 2025 17:16:58 +0000 Subject: [PATCH 1/4] fix: nw.lit(date, dtype=nw.Date) --- narwhals/_pandas_like/utils.py | 5 +---- narwhals/expr.py | 2 +- narwhals/stable/v1/__init__.py | 2 +- tests/{frame => expr_and_series}/lit_test.py | 7 +++++++ tests/series_only/cast_test.py | 12 ------------ 5 files changed, 10 insertions(+), 18 deletions(-) rename tests/{frame => expr_and_series}/lit_test.py (91%) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 655e60773..24760d783 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -637,10 +637,7 @@ def narwhals_to_native_dtype( # noqa: PLR0915 else f"timedelta64[{du_time_unit}]" ) if isinstance_or_issubclass(dtype, dtypes.Date): - if dtype_backend == "pyarrow-nullable": - return "date32[pyarrow]" - msg = "Date dtype only supported for pyarrow-backed data types in pandas" - raise NotImplementedError(msg) + return "date32[pyarrow]" if isinstance_or_issubclass(dtype, dtypes.Enum): msg = "Converting to Enum is not (yet) supported" raise NotImplementedError(msg) diff --git a/narwhals/expr.py b/narwhals/expr.py index 0ab7ba20e..aa934a01f 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -7023,7 +7023,7 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: ) -def lit(value: Any, dtype: DType | None = None) -> Expr: +def lit(value: Any, dtype: DType | type[DType] | None = None) -> Expr: """Return an expression representing a literal value. Arguments: diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index ba5117425..5ffc475e5 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -2542,7 +2542,7 @@ def len() -> Expr: return _stableify(nw.len()) -def lit(value: Any, dtype: DType | None = None) -> Expr: +def lit(value: Any, dtype: DType | type[DType] | None = None) -> Expr: """Return an expression representing a literal value. Arguments: diff --git a/tests/frame/lit_test.py b/tests/expr_and_series/lit_test.py similarity index 91% rename from tests/frame/lit_test.py rename to tests/expr_and_series/lit_test.py index 8b3bcd8e2..346a6c727 100644 --- a/tests/frame/lit_test.py +++ b/tests/expr_and_series/lit_test.py @@ -1,5 +1,6 @@ from __future__ import annotations +from datetime import date from typing import TYPE_CHECKING from typing import Any @@ -89,3 +90,9 @@ def test_lit_operation( result = df.select(expr.alias(col_name)) expected = {col_name: expected_result} assert_equal_data(result, expected) + + +def test_date_lit(constructor: Constructor) -> None: + df = nw.from_native(constructor({"a": [1]})) + result = df.with_columns(nw.lit(date(2020, 1, 1), dtype=nw.Date)).collect_schema() + assert result == {"a": nw.Int64, "literal": nw.Date} diff --git a/tests/series_only/cast_test.py b/tests/series_only/cast_test.py index 10587a084..b4051e503 100644 --- a/tests/series_only/cast_test.py +++ b/tests/series_only/cast_test.py @@ -98,18 +98,6 @@ def test_cast_date_datetime_pandas() -> None: assert df.schema == {"a": nw.Date} -@pytest.mark.skipif( - PANDAS_VERSION < (2, 0, 0), - reason="pyarrow dtype not available", -) -def test_cast_date_datetime_invalid() -> None: - # pandas: pyarrow datetime to date - dfpd = pd.DataFrame({"a": [datetime(2020, 1, 1), datetime(2020, 1, 2)]}) - df = nw.from_native(dfpd) - with pytest.raises(NotImplementedError, match="pyarrow"): - df.select(nw.col("a").cast(nw.Date)) - - @pytest.mark.filterwarnings("ignore: casting period") def test_unknown_to_int() -> None: df = pd.DataFrame({"a": pd.period_range("2000", periods=3, freq="min")}) From 6a3ae7d8a880c7e8daac4ca963ccc5a2e02d8c4b Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 5 Jan 2025 17:19:09 +0000 Subject: [PATCH 2/4] throw if pyarrow not installed --- narwhals/_pandas_like/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 24760d783..03f025f78 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -637,6 +637,10 @@ def narwhals_to_native_dtype( # noqa: PLR0915 else f"timedelta64[{du_time_unit}]" ) if isinstance_or_issubclass(dtype, dtypes.Date): + try: + import pyarrow as pa # ignore-banned-import + except ModuleNotFoundError: # pragma: no cover + msg = "PyArrow>=11.0.0 is required for `Date` dtype." return "date32[pyarrow]" if isinstance_or_issubclass(dtype, dtypes.Enum): msg = "Converting to Enum is not (yet) supported" From 8f1df17f4b485436a73772d3ad6d0bf64daf06da Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 5 Jan 2025 17:19:33 +0000 Subject: [PATCH 3/4] throw if pyarrow not installed --- tests/expr_and_series/lit_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/expr_and_series/lit_test.py b/tests/expr_and_series/lit_test.py index 346a6c727..39aa72415 100644 --- a/tests/expr_and_series/lit_test.py +++ b/tests/expr_and_series/lit_test.py @@ -8,6 +8,7 @@ import pytest import narwhals.stable.v1 as nw +from tests.utils import PANDAS_VERSION from tests.utils import Constructor from tests.utils import assert_equal_data @@ -92,6 +93,7 @@ def test_lit_operation( assert_equal_data(result, expected) +@pytest.mark.skipif(PANDAS_VERSION < (1, 5), reason="too old for pyarrow") def test_date_lit(constructor: Constructor) -> None: df = nw.from_native(constructor({"a": [1]})) result = df.with_columns(nw.lit(date(2020, 1, 1), dtype=nw.Date)).collect_schema() From 496f3a5fbaeea0d903ef55f016008ab603229b6e Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 5 Jan 2025 19:10:32 +0000 Subject: [PATCH 4/4] loosen dask minimum back to 2024.8 --- narwhals/_dask/utils.py | 2 ++ narwhals/utils.py | 2 +- pyproject.toml | 2 +- tests/expr_and_series/arithmetic_test.py | 5 +++++ tests/expr_and_series/binary_test.py | 7 ++++++- tests/expr_and_series/lit_test.py | 13 ++++++++++++- tests/expr_and_series/operators_test.py | 12 +++++++++++- tests/frame/select_test.py | 2 +- tests/tpch_q1_test.py | 3 +++ 9 files changed, 42 insertions(+), 6 deletions(-) diff --git a/narwhals/_dask/utils.py b/narwhals/_dask/utils.py index 88d59b532..4f2952d0b 100644 --- a/narwhals/_dask/utils.py +++ b/narwhals/_dask/utils.py @@ -136,6 +136,8 @@ def narwhals_to_native_dtype(dtype: DType | type[DType], version: Version) -> An return "category" if isinstance_or_issubclass(dtype, dtypes.Datetime): return "datetime64[us]" + if isinstance_or_issubclass(dtype, dtypes.Date): + return "date32[day][pyarrow]" if isinstance_or_issubclass(dtype, dtypes.Duration): return "timedelta64[ns]" if isinstance_or_issubclass(dtype, dtypes.List): # pragma: no cover diff --git a/narwhals/utils.py b/narwhals/utils.py index 658c0e7bf..b8e9830e1 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -293,7 +293,7 @@ def is_ibis(self) -> bool: Implementation.PYARROW: (11,), Implementation.PYSPARK: (3, 3), Implementation.POLARS: (0, 20, 3), - Implementation.DASK: (2024, 10), + Implementation.DASK: (2024, 8), Implementation.DUCKDB: (1,), Implementation.IBIS: (6,), } diff --git a/pyproject.toml b/pyproject.toml index c01ebbafa..0c2b4a9be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ cudf = ["cudf>=24.10.0"] pyarrow = ["pyarrow>=11.0.0"] pyspark = ["pyspark>=3.3.0"] polars = ["polars>=0.20.3"] -dask = ["dask[dataframe]>=2024.10"] +dask = ["dask[dataframe]>=2024.8"] duckdb = ["duckdb>=1.0"] ibis = ["ibis-framework>=6.0.0", "rich", "packaging", "pyarrow_hotfix"] dev = [ diff --git a/tests/expr_and_series/arithmetic_test.py b/tests/expr_and_series/arithmetic_test.py index eb38c6a14..cd82a945e 100644 --- a/tests/expr_and_series/arithmetic_test.py +++ b/tests/expr_and_series/arithmetic_test.py @@ -11,6 +11,7 @@ from hypothesis import given import narwhals.stable.v1 as nw +from tests.utils import DASK_VERSION from tests.utils import PANDAS_VERSION from tests.utils import Constructor from tests.utils import ConstructorEager @@ -67,6 +68,8 @@ def test_right_arithmetic_expr( constructor: Constructor, request: pytest.FixtureRequest, ) -> None: + if "dask" in str(constructor) and DASK_VERSION < (2024, 10): + request.applymarker(pytest.mark.xfail) if attr == "__rmod__" and any( x in str(constructor) for x in ["pandas_pyarrow", "modin_pyarrow"] ): @@ -241,6 +244,8 @@ def test_arithmetic_expr_left_literal( constructor: Constructor, request: pytest.FixtureRequest, ) -> None: + if "dask" in str(constructor) and DASK_VERSION < (2024, 10): + request.applymarker(pytest.mark.xfail) if attr == "__mod__" and any( x in str(constructor) for x in ["pandas_pyarrow", "modin_pyarrow"] ): diff --git a/tests/expr_and_series/binary_test.py b/tests/expr_and_series/binary_test.py index 3693ccebd..0808810bc 100644 --- a/tests/expr_and_series/binary_test.py +++ b/tests/expr_and_series/binary_test.py @@ -1,11 +1,16 @@ from __future__ import annotations +import pytest + import narwhals.stable.v1 as nw +from tests.utils import DASK_VERSION from tests.utils import Constructor from tests.utils import assert_equal_data -def test_expr_binary(constructor: Constructor) -> None: +def test_expr_binary(constructor: Constructor, request: pytest.FixtureRequest) -> None: + if "dask" in str(constructor) and DASK_VERSION < (2024, 10): + request.applymarker(pytest.mark.xfail) data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} df_raw = constructor(data) result = nw.from_native(df_raw).with_columns( diff --git a/tests/expr_and_series/lit_test.py b/tests/expr_and_series/lit_test.py index 39aa72415..f5be7dfbe 100644 --- a/tests/expr_and_series/lit_test.py +++ b/tests/expr_and_series/lit_test.py @@ -8,6 +8,7 @@ import pytest import narwhals.stable.v1 as nw +from tests.utils import DASK_VERSION from tests.utils import PANDAS_VERSION from tests.utils import Constructor from tests.utils import assert_equal_data @@ -84,7 +85,14 @@ def test_lit_operation( col_name: str, expr: nw.Expr, expected_result: list[int], + request: pytest.FixtureRequest, ) -> None: + if ( + "dask" in str(constructor) + and col_name in ("left_lit", "left_scalar") + and DASK_VERSION < (2024, 10) + ): + request.applymarker(pytest.mark.xfail) data = {"a": [1, 3, 2]} df_raw = constructor(data) df = nw.from_native(df_raw).lazy() @@ -94,7 +102,10 @@ def test_lit_operation( @pytest.mark.skipif(PANDAS_VERSION < (1, 5), reason="too old for pyarrow") -def test_date_lit(constructor: Constructor) -> None: +def test_date_lit(constructor: Constructor, request: pytest.FixtureRequest) -> None: + if "dask" in str(constructor): + # https://github.com/dask/dask/issues/11637 + request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor({"a": [1]})) result = df.with_columns(nw.lit(date(2020, 1, 1), dtype=nw.Date)).collect_schema() assert result == {"a": nw.Int64, "literal": nw.Date} diff --git a/tests/expr_and_series/operators_test.py b/tests/expr_and_series/operators_test.py index ff01747a6..356d81d5b 100644 --- a/tests/expr_and_series/operators_test.py +++ b/tests/expr_and_series/operators_test.py @@ -3,6 +3,7 @@ import pytest import narwhals.stable.v1 as nw +from tests.utils import DASK_VERSION from tests.utils import Constructor from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -75,8 +76,17 @@ def test_logic_operators_expr( ], ) def test_logic_operators_expr_scalar( - constructor: Constructor, operator: str, expected: list[bool] + constructor: Constructor, + operator: str, + expected: list[bool], + request: pytest.FixtureRequest, ) -> None: + if ( + "dask" in str(constructor) + and DASK_VERSION < (2024, 10) + and operator in ("__rand__", "__ror__") + ): + request.applymarker(pytest.mark.xfail) data = {"a": [True, True, False, False]} df = nw.from_native(constructor(data)) diff --git a/tests/frame/select_test.py b/tests/frame/select_test.py index 2cb3df91d..d85697249 100644 --- a/tests/frame/select_test.py +++ b/tests/frame/select_test.py @@ -118,7 +118,7 @@ def test_missing_columns(constructor: Constructor) -> None: def test_left_to_right_broadcasting( constructor: Constructor, request: pytest.FixtureRequest ) -> None: - if "dask" in str(constructor) and DASK_VERSION < (2024, 9): + if "dask" in str(constructor) and DASK_VERSION < (2024, 10): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor({"a": [1, 1, 2], "b": [4, 5, 6]})) result = df.select(nw.col("a") + nw.col("b").sum()) diff --git a/tests/tpch_q1_test.py b/tests/tpch_q1_test.py index fd2a7d24c..cb6d48548 100644 --- a/tests/tpch_q1_test.py +++ b/tests/tpch_q1_test.py @@ -10,6 +10,7 @@ import pytest import narwhals.stable.v1 as nw +from tests.utils import DASK_VERSION from tests.utils import PANDAS_VERSION from tests.utils import assert_equal_data @@ -20,6 +21,8 @@ ) @pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning") def test_q1(library: str, request: pytest.FixtureRequest) -> None: + if library == "dask" and DASK_VERSION < (2024, 10): + request.applymarker(pytest.mark.xfail) if library == "pandas" and PANDAS_VERSION < (1, 5): request.applymarker(pytest.mark.xfail) elif library == "pandas":