From 31158b2b32d17e3b75d9553cc90d620eaea627c5 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Sun, 5 Jan 2025 15:53:53 +0000 Subject: [PATCH] feat: validate library minimum version in compliant objects (#1727) --- narwhals/_arrow/dataframe.py | 2 + narwhals/_arrow/series.py | 2 + narwhals/_dask/dataframe.py | 4 +- narwhals/_duckdb/dataframe.py | 18 +++++++-- narwhals/_ibis/dataframe.py | 18 +++++++-- narwhals/_pandas_like/dataframe.py | 2 + narwhals/_pandas_like/series.py | 2 + narwhals/_polars/dataframe.py | 3 ++ narwhals/_polars/series.py | 2 + narwhals/_spark_like/dataframe.py | 2 + narwhals/dependencies.py | 10 ++--- narwhals/translate.py | 14 ++++++- narwhals/utils.py | 61 ++++++++++++++++++++++++++++++ pyproject.toml | 16 +++++--- tests/expr_and_series/clip_test.py | 3 ++ 15 files changed, 140 insertions(+), 19 deletions(-) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index c0efa50fe..9e5ce0621 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -22,6 +22,7 @@ from narwhals.utils import is_sequence_but_not_str from narwhals.utils import parse_columns_to_drop from narwhals.utils import scale_bytes +from narwhals.utils import validate_backend_version if TYPE_CHECKING: from types import ModuleType @@ -56,6 +57,7 @@ def __init__( self._implementation = Implementation.PYARROW self._backend_version = backend_version self._version = version + validate_backend_version(self._implementation, self._backend_version) def __narwhals_namespace__(self: Self) -> ArrowNamespace: from narwhals._arrow.namespace import ArrowNamespace diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index cf7760d49..046e26e05 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -18,6 +18,7 @@ from narwhals.utils import Implementation from narwhals.utils import generate_temporary_column_name from narwhals.utils import import_dtypes_module +from narwhals.utils import validate_backend_version if TYPE_CHECKING: from types import ModuleType @@ -54,6 +55,7 @@ def __init__( self._implementation = Implementation.PYARROW self._backend_version = backend_version self._version = version + validate_backend_version(self._implementation, self._backend_version) def _change_version(self: Self, version: Version) -> Self: return self.__class__( diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py index 6542253a0..5e652a937 100644 --- a/narwhals/_dask/dataframe.py +++ b/narwhals/_dask/dataframe.py @@ -11,11 +11,13 @@ from narwhals._dask.utils import parse_exprs_and_named_exprs from narwhals._pandas_like.utils import native_to_narwhals_dtype from narwhals._pandas_like.utils import select_columns_by_name +from narwhals.typing import CompliantLazyFrame from narwhals.utils import Implementation from narwhals.utils import flatten from narwhals.utils import generate_temporary_column_name from narwhals.utils import parse_columns_to_drop from narwhals.utils import parse_version +from narwhals.utils import validate_backend_version if TYPE_CHECKING: from types import ModuleType @@ -29,7 +31,6 @@ from narwhals._dask.typing import IntoDaskExpr from narwhals.dtypes import DType from narwhals.utils import Version -from narwhals.typing import CompliantLazyFrame class DaskLazyFrame(CompliantLazyFrame): @@ -44,6 +45,7 @@ def __init__( self._backend_version = backend_version self._implementation = Implementation.DASK self._version = version + validate_backend_version(self._implementation, self._backend_version) def __native_namespace__(self: Self) -> ModuleType: if self._implementation is Implementation.DASK: diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 339fca137..73dd055ca 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -6,8 +6,10 @@ from typing import Any from narwhals.dependencies import get_duckdb +from narwhals.utils import Implementation from narwhals.utils import import_dtypes_module from narwhals.utils import parse_version +from narwhals.utils import validate_backend_version if TYPE_CHECKING: from types import ModuleType @@ -82,9 +84,15 @@ def native_to_narwhals_dtype(duckdb_dtype: str, version: Version) -> DType: class DuckDBInterchangeFrame: - def __init__(self, df: Any, version: Version) -> None: + _implementation = Implementation.DUCKDB + + def __init__( + self, df: Any, *, backend_version: tuple[int, ...], version: Version + ) -> None: self._native_frame = df self._version = version + self._backend_version = backend_version + validate_backend_version(self._implementation, self._backend_version) def __narwhals_dataframe__(self) -> Any: return self @@ -147,10 +155,14 @@ def to_arrow(self: Self) -> pa.Table: return self._native_frame.arrow() def _change_version(self: Self, version: Version) -> Self: - return self.__class__(self._native_frame, version=version) + return self.__class__( + self._native_frame, version=version, backend_version=self._backend_version + ) def _from_native_frame(self: Self, df: Any) -> Self: - return self.__class__(df, version=self._version) + return self.__class__( + df, version=self._version, backend_version=self._backend_version + ) def collect_schema(self) -> dict[str, DType]: return { diff --git a/narwhals/_ibis/dataframe.py b/narwhals/_ibis/dataframe.py index f62a31e8b..6fe8997a9 100644 --- a/narwhals/_ibis/dataframe.py +++ b/narwhals/_ibis/dataframe.py @@ -5,7 +5,9 @@ from typing import Any from narwhals.dependencies import get_ibis +from narwhals.utils import Implementation from narwhals.utils import import_dtypes_module +from narwhals.utils import validate_backend_version if TYPE_CHECKING: from types import ModuleType @@ -69,9 +71,15 @@ def native_to_narwhals_dtype(ibis_dtype: Any, version: Version) -> DType: class IbisInterchangeFrame: - def __init__(self, df: Any, version: Version) -> None: + _implementation = Implementation.IBIS + + def __init__( + self, df: Any, *, backend_version: tuple[int, ...], version: Version + ) -> None: self._native_frame = df self._version = version + self._backend_version = backend_version + validate_backend_version(self._implementation, self._backend_version) def __narwhals_dataframe__(self) -> Any: return self @@ -125,10 +133,14 @@ def __getattr__(self, attr: str) -> Any: raise NotImplementedError(msg) def _change_version(self: Self, version: Version) -> Self: - return self.__class__(self._native_frame, version=version) + return self.__class__( + self._native_frame, version=version, backend_version=self._backend_version + ) def _from_native_frame(self: Self, df: Any) -> Self: - return self.__class__(df, version=self._version) + return self.__class__( + df, version=self._version, backend_version=self._backend_version + ) def collect_schema(self) -> dict[str, DType]: return { diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index c10aacec5..293f5cefe 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -27,6 +27,7 @@ from narwhals.utils import is_sequence_but_not_str from narwhals.utils import parse_columns_to_drop from narwhals.utils import scale_bytes +from narwhals.utils import validate_backend_version if TYPE_CHECKING: from types import ModuleType @@ -59,6 +60,7 @@ def __init__( self._implementation = implementation self._backend_version = backend_version self._version = version + validate_backend_version(self._implementation, self._backend_version) def __narwhals_dataframe__(self) -> Self: return self diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index cf8972deb..8a6779828 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -24,6 +24,7 @@ from narwhals.typing import CompliantSeries from narwhals.utils import Implementation from narwhals.utils import import_dtypes_module +from narwhals.utils import validate_backend_version if TYPE_CHECKING: from types import ModuleType @@ -94,6 +95,7 @@ def __init__( self._implementation = implementation self._backend_version = backend_version self._version = version + validate_backend_version(self._implementation, self._backend_version) def __native_namespace__(self: Self) -> ModuleType: if self._implementation in { diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py index 760b5f4b6..d5e115284 100644 --- a/narwhals/_polars/dataframe.py +++ b/narwhals/_polars/dataframe.py @@ -15,6 +15,7 @@ from narwhals.utils import Implementation from narwhals.utils import is_sequence_but_not_str from narwhals.utils import parse_columns_to_drop +from narwhals.utils import validate_backend_version if TYPE_CHECKING: from types import ModuleType @@ -45,6 +46,7 @@ def __init__( self._backend_version = backend_version self._implementation = Implementation.POLARS self._version = version + validate_backend_version(self._implementation, self._backend_version) def __repr__(self: Self) -> str: # pragma: no cover return "PolarsDataFrame" @@ -343,6 +345,7 @@ def __init__( self._backend_version = backend_version self._implementation = Implementation.POLARS self._version = version + validate_backend_version(self._implementation, self._backend_version) def __repr__(self: Self) -> str: # pragma: no cover return "PolarsLazyFrame" diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 30cd90fd5..33572db7c 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -10,6 +10,7 @@ from narwhals._polars.utils import narwhals_to_native_dtype from narwhals._polars.utils import native_to_narwhals_dtype from narwhals.utils import Implementation +from narwhals.utils import validate_backend_version if TYPE_CHECKING: from types import ModuleType @@ -38,6 +39,7 @@ def __init__( self._backend_version = backend_version self._implementation = Implementation.POLARS self._version = version + validate_backend_version(self._implementation, self._backend_version) def __repr__(self: Self) -> str: # pragma: no cover return "PolarsSeries" diff --git a/narwhals/_spark_like/dataframe.py b/narwhals/_spark_like/dataframe.py index eb7118b23..e04da7f57 100644 --- a/narwhals/_spark_like/dataframe.py +++ b/narwhals/_spark_like/dataframe.py @@ -12,6 +12,7 @@ from narwhals.utils import flatten from narwhals.utils import parse_columns_to_drop from narwhals.utils import parse_version +from narwhals.utils import validate_backend_version if TYPE_CHECKING: from pyspark.sql import DataFrame @@ -37,6 +38,7 @@ def __init__( self._backend_version = backend_version self._implementation = Implementation.PYSPARK self._version = version + validate_backend_version(self._implementation, self._backend_version) def __native_namespace__(self) -> Any: # pragma: no cover if self._implementation is Implementation.PYSPARK: diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py index 0c5d11720..43904a0ba 100644 --- a/narwhals/dependencies.py +++ b/narwhals/dependencies.py @@ -87,16 +87,16 @@ def get_duckdb() -> Any: return sys.modules.get("duckdb", None) -def get_dask_expr() -> Any: - """Get dask_expr module (if already imported - else return None).""" - return sys.modules.get("dask_expr", None) - - def get_ibis() -> Any: """Get ibis module (if already imported - else return None).""" return sys.modules.get("ibis", None) +def get_dask_expr() -> Any: + """Get dask_expr module (if already imported - else return None).""" + return sys.modules.get("dask_expr", None) + + def get_pyspark() -> Any: # pragma: no cover """Get pyspark module (if already imported - else return None).""" return sys.modules.get("pyspark", None) diff --git a/narwhals/translate.py b/narwhals/translate.py index 8542a62f0..77c83b548 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -709,8 +709,13 @@ def _from_native_impl( # noqa: PLR0915 else: return native_object raise TypeError(msg) + import duckdb # ignore-banned-import + + backend_version = parse_version(duckdb.__version__) return DataFrame( - DuckDBInterchangeFrame(native_object, version=version), + DuckDBInterchangeFrame( + native_object, version=version, backend_version=backend_version + ), level="interchange", ) @@ -726,8 +731,13 @@ def _from_native_impl( # noqa: PLR0915 ) raise TypeError(msg) return native_object + import ibis # ignore-banned-import + + backend_version = parse_version(ibis.__version__) return DataFrame( - IbisInterchangeFrame(native_object, version=version), + IbisInterchangeFrame( + native_object, version=version, backend_version=backend_version + ), level="interchange", ) diff --git a/narwhals/utils.py b/narwhals/utils.py index 2125d46c4..658c0e7bf 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -16,6 +16,8 @@ from narwhals.dependencies import get_cudf from narwhals.dependencies import get_dask_dataframe +from narwhals.dependencies import get_duckdb +from narwhals.dependencies import get_ibis from narwhals.dependencies import get_modin from narwhals.dependencies import get_pandas from narwhals.dependencies import get_polars @@ -73,6 +75,10 @@ class Implementation(Enum): """Polars implementation.""" DASK = auto() """Dask implementation.""" + DUCKDB = auto() + """DuckDB implementation.""" + IBIS = auto() + """Ibis implementation.""" UNKNOWN = auto() """Unknown implementation.""" @@ -97,6 +103,8 @@ def from_native_namespace( get_pyspark_sql(): Implementation.PYSPARK, get_polars(): Implementation.POLARS, get_dask_dataframe(): Implementation.DASK, + get_duckdb(): Implementation.DUCKDB, + get_ibis(): Implementation.IBIS, } return mapping.get(native_namespace, Implementation.UNKNOWN) @@ -245,6 +253,59 @@ def is_dask(self) -> bool: """ return self is Implementation.DASK # pragma: no cover + def is_duckdb(self) -> bool: + """Return whether implementation is DuckDB. + + Returns: + Boolean. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_duckdb() + False + """ + return self is Implementation.DUCKDB # pragma: no cover + + def is_ibis(self) -> bool: + """Return whether implementation is Ibis. + + Returns: + Boolean. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.implementation.is_ibis() + False + """ + return self is Implementation.IBIS # pragma: no cover + + +MIN_VERSIONS: dict[Implementation, tuple[int, ...]] = { + Implementation.PANDAS: (0, 25, 3), + Implementation.MODIN: (0, 25, 3), + Implementation.CUDF: (24, 10), + Implementation.PYARROW: (11,), + Implementation.PYSPARK: (3, 3), + Implementation.POLARS: (0, 20, 3), + Implementation.DASK: (2024, 10), + Implementation.DUCKDB: (1,), + Implementation.IBIS: (6,), +} + + +def validate_backend_version( + implementation: Implementation, backend_version: tuple[int, ...] +) -> None: + if backend_version < (min_version := MIN_VERSIONS[implementation]): + msg = f"Minimum version of {implementation} supported by Narwhals is {min_version}, found: {backend_version}" + raise ValueError(msg) + def import_dtypes_module(version: Version) -> DTypes: if version is Version.V1: diff --git a/pyproject.toml b/pyproject.toml index a0a68cf3a..c01ebbafa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,15 +21,21 @@ classifiers = [ ] [project.optional-dependencies] -cudf = ["cudf>=24.10.0"] -modin = ["modin"] +# These should be aligned with MIN_VERSIONS in narwhals/utils.py +# Exception: modin, because `modin.__version__` isn't aligned with +# `modin.pandas.__version__`. The latter is the one that we make +# API decisions based on, so that's the one we track internally. +# We have yet to determine the minimum Modin version we support +# https://github.com/narwhals-dev/narwhals/issues/817 pandas = ["pandas>=0.25.3"] -polars = ["polars>=0.20.3"] -ibis = ["ibis-framework>=6.0.0", "rich", "packaging", "pyarrow_hotfix"] +modin = ["modin"] +cudf = ["cudf>=24.10.0"] pyarrow = ["pyarrow>=11.0.0"] +pyspark = ["pyspark>=3.3.0"] +polars = ["polars>=0.20.3"] dask = ["dask[dataframe]>=2024.10"] duckdb = ["duckdb>=1.0"] -pyspark = ["pyspark>=3.3.0"] +ibis = ["ibis-framework>=6.0.0", "rich", "packaging", "pyarrow_hotfix"] dev = [ "covdefaults", "pre-commit", diff --git a/tests/expr_and_series/clip_test.py b/tests/expr_and_series/clip_test.py index 838ca6b08..29ed6379b 100644 --- a/tests/expr_and_series/clip_test.py +++ b/tests/expr_and_series/clip_test.py @@ -57,6 +57,9 @@ def test_clip_series_expressified( ) -> None: if "modin_pyarrow" in str(constructor_eager): request.applymarker(pytest.mark.xfail) + if "cudf" in str(constructor_eager): + # https://github.com/rapidsai/cudf/issues/17682 + request.applymarker(pytest.mark.xfail) data = {"a": [1, 2, 3, -4, 5], "lb": [3, 2, 1, 1, 1], "ub": [4, 4, 2, 2, 2]} df = nw.from_native(constructor_eager(data), eager_only=True)