Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into duckdb-relational
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Jan 5, 2025
2 parents a8cfa91 + 31158b2 commit 8b6ef7c
Show file tree
Hide file tree
Showing 15 changed files with 123 additions and 22 deletions.
2 changes: 2 additions & 0 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from narwhals.utils import is_sequence_but_not_str
from narwhals.utils import parse_columns_to_drop
from narwhals.utils import scale_bytes
from narwhals.utils import validate_backend_version

if TYPE_CHECKING:
from types import ModuleType
Expand Down Expand Up @@ -57,6 +58,7 @@ def __init__(
self._implementation = Implementation.PYARROW
self._backend_version = backend_version
self._version = version
validate_backend_version(self._implementation, self._backend_version)

def __narwhals_namespace__(self: Self) -> ArrowNamespace:
from narwhals._arrow.namespace import ArrowNamespace
Expand Down
2 changes: 2 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from narwhals.utils import Implementation
from narwhals.utils import generate_temporary_column_name
from narwhals.utils import import_dtypes_module
from narwhals.utils import validate_backend_version

if TYPE_CHECKING:
from types import ModuleType
Expand Down Expand Up @@ -54,6 +55,7 @@ def __init__(
self._implementation = Implementation.PYARROW
self._backend_version = backend_version
self._version = version
validate_backend_version(self._implementation, self._backend_version)

def _change_version(self: Self, version: Version) -> Self:
return self.__class__(
Expand Down
4 changes: 3 additions & 1 deletion narwhals/_dask/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
from narwhals._pandas_like.utils import native_to_narwhals_dtype
from narwhals._pandas_like.utils import select_columns_by_name
from narwhals.exceptions import ColumnNotFoundError
from narwhals.typing import CompliantLazyFrame
from narwhals.utils import Implementation
from narwhals.utils import flatten
from narwhals.utils import generate_temporary_column_name
from narwhals.utils import parse_columns_to_drop
from narwhals.utils import parse_version
from narwhals.utils import validate_backend_version

if TYPE_CHECKING:
from types import ModuleType
Expand All @@ -30,7 +32,6 @@
from narwhals._dask.typing import IntoDaskExpr
from narwhals.dtypes import DType
from narwhals.utils import Version
from narwhals.typing import CompliantLazyFrame


class DaskLazyFrame(CompliantLazyFrame):
Expand All @@ -45,6 +46,7 @@ def __init__(
self._backend_version = backend_version
self._implementation = Implementation.DASK
self._version = version
validate_backend_version(self._implementation, self._backend_version)

def __native_namespace__(self: Self) -> ModuleType:
if self._implementation is Implementation.DASK:
Expand Down
2 changes: 2 additions & 0 deletions narwhals/_duckdb/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from narwhals.utils import generate_temporary_column_name
from narwhals.utils import parse_columns_to_drop
from narwhals.utils import parse_version
from narwhals.utils import validate_backend_version

if TYPE_CHECKING:
from types import ModuleType
Expand Down Expand Up @@ -46,6 +47,7 @@ def __init__(
self._native_frame: duckdb.DuckDBPyRelation = df
self._version = version
self._backend_version = backend_version
validate_backend_version(self._implementation, self._backend_version)

def __narwhals_dataframe__(self) -> Any: # pragma: no cover
# Keep around for backcompat.
Expand Down
18 changes: 15 additions & 3 deletions narwhals/_ibis/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from typing import Any

from narwhals.dependencies import get_ibis
from narwhals.utils import Implementation
from narwhals.utils import import_dtypes_module
from narwhals.utils import validate_backend_version

if TYPE_CHECKING:
from types import ModuleType
Expand Down Expand Up @@ -69,9 +71,15 @@ def native_to_narwhals_dtype(ibis_dtype: Any, version: Version) -> DType:


class IbisInterchangeFrame:
def __init__(self, df: Any, version: Version) -> None:
_implementation = Implementation.IBIS

def __init__(
self, df: Any, *, backend_version: tuple[int, ...], version: Version
) -> None:
self._native_frame = df
self._version = version
self._backend_version = backend_version
validate_backend_version(self._implementation, self._backend_version)

def __narwhals_dataframe__(self) -> Any:
return self
Expand Down Expand Up @@ -125,10 +133,14 @@ def __getattr__(self, attr: str) -> Any:
raise NotImplementedError(msg)

def _change_version(self: Self, version: Version) -> Self:
return self.__class__(self._native_frame, version=version)
return self.__class__(
self._native_frame, version=version, backend_version=self._backend_version
)

def _from_native_frame(self: Self, df: Any) -> Self:
return self.__class__(df, version=self._version)
return self.__class__(
df, version=self._version, backend_version=self._backend_version
)

def collect_schema(self) -> dict[str, DType]:
return {
Expand Down
2 changes: 2 additions & 0 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from narwhals.utils import is_sequence_but_not_str
from narwhals.utils import parse_columns_to_drop
from narwhals.utils import scale_bytes
from narwhals.utils import validate_backend_version

if TYPE_CHECKING:
from types import ModuleType
Expand Down Expand Up @@ -60,6 +61,7 @@ def __init__(
self._implementation = implementation
self._backend_version = backend_version
self._version = version
validate_backend_version(self._implementation, self._backend_version)

def __narwhals_dataframe__(self) -> Self:
return self
Expand Down
2 changes: 2 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from narwhals.typing import CompliantSeries
from narwhals.utils import Implementation
from narwhals.utils import import_dtypes_module
from narwhals.utils import validate_backend_version

if TYPE_CHECKING:
from types import ModuleType
Expand Down Expand Up @@ -94,6 +95,7 @@ def __init__(
self._implementation = implementation
self._backend_version = backend_version
self._version = version
validate_backend_version(self._implementation, self._backend_version)

def __native_namespace__(self: Self) -> ModuleType:
if self._implementation in {
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_polars/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from narwhals.utils import Implementation
from narwhals.utils import is_sequence_but_not_str
from narwhals.utils import parse_columns_to_drop
from narwhals.utils import validate_backend_version

if TYPE_CHECKING:
from types import ModuleType
Expand Down Expand Up @@ -45,6 +46,7 @@ def __init__(
self._backend_version = backend_version
self._implementation = Implementation.POLARS
self._version = version
validate_backend_version(self._implementation, self._backend_version)

def __repr__(self: Self) -> str: # pragma: no cover
return "PolarsDataFrame"
Expand Down Expand Up @@ -343,6 +345,7 @@ def __init__(
self._backend_version = backend_version
self._implementation = Implementation.POLARS
self._version = version
validate_backend_version(self._implementation, self._backend_version)

def __repr__(self: Self) -> str: # pragma: no cover
return "PolarsLazyFrame"
Expand Down
2 changes: 2 additions & 0 deletions narwhals/_polars/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from narwhals._polars.utils import narwhals_to_native_dtype
from narwhals._polars.utils import native_to_narwhals_dtype
from narwhals.utils import Implementation
from narwhals.utils import validate_backend_version

if TYPE_CHECKING:
from types import ModuleType
Expand Down Expand Up @@ -38,6 +39,7 @@ def __init__(
self._backend_version = backend_version
self._implementation = Implementation.POLARS
self._version = version
validate_backend_version(self._implementation, self._backend_version)

def __repr__(self: Self) -> str: # pragma: no cover
return "PolarsSeries"
Expand Down
2 changes: 2 additions & 0 deletions narwhals/_spark_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from narwhals.utils import flatten
from narwhals.utils import parse_columns_to_drop
from narwhals.utils import parse_version
from narwhals.utils import validate_backend_version

if TYPE_CHECKING:
from pyspark.sql import DataFrame
Expand All @@ -37,6 +38,7 @@ def __init__(
self._backend_version = backend_version
self._implementation = Implementation.PYSPARK
self._version = version
validate_backend_version(self._implementation, self._backend_version)

def __native_namespace__(self) -> Any: # pragma: no cover
if self._implementation is Implementation.PYSPARK:
Expand Down
10 changes: 5 additions & 5 deletions narwhals/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,16 @@ def get_duckdb() -> Any:
return sys.modules.get("duckdb", None)


def get_dask_expr() -> Any:
"""Get dask_expr module (if already imported - else return None)."""
return sys.modules.get("dask_expr", None)


def get_ibis() -> Any:
"""Get ibis module (if already imported - else return None)."""
return sys.modules.get("ibis", None)


def get_dask_expr() -> Any:
"""Get dask_expr module (if already imported - else return None)."""
return sys.modules.get("dask_expr", None)


def get_pyspark() -> Any: # pragma: no cover
"""Get pyspark module (if already imported - else return None)."""
return sys.modules.get("pyspark", None)
Expand Down
20 changes: 12 additions & 8 deletions narwhals/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,13 +719,12 @@ def _from_native_impl( # noqa: PLR0915
),
level="interchange",
)
else:
return LazyFrame(
DuckDBLazyFrame(
native_object, backend_version=backend_version, version=version
),
level="full",
)
return LazyFrame(
DuckDBLazyFrame(
native_object, backend_version=backend_version, version=version
),
level="full",
)

# Ibis
elif is_ibis_table(native_object): # pragma: no cover
Expand All @@ -739,8 +738,13 @@ def _from_native_impl( # noqa: PLR0915
)
raise TypeError(msg)
return native_object
import ibis # ignore-banned-import

backend_version = parse_version(ibis.__version__)
return DataFrame(
IbisInterchangeFrame(native_object, version=version),
IbisInterchangeFrame(
native_object, version=version, backend_version=backend_version
),
level="interchange",
)

Expand Down
57 changes: 57 additions & 0 deletions narwhals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from narwhals.dependencies import get_cudf
from narwhals.dependencies import get_dask_dataframe
from narwhals.dependencies import get_duckdb
from narwhals.dependencies import get_ibis
from narwhals.dependencies import get_modin
from narwhals.dependencies import get_pandas
from narwhals.dependencies import get_polars
Expand Down Expand Up @@ -76,6 +77,8 @@ class Implementation(Enum):
"""Dask implementation."""
DUCKDB = auto()
"""DuckDB implementation."""
IBIS = auto()
"""Ibis implementation."""

UNKNOWN = auto()
"""Unknown implementation."""
Expand All @@ -101,6 +104,7 @@ def from_native_namespace(
get_polars(): Implementation.POLARS,
get_dask_dataframe(): Implementation.DASK,
get_duckdb(): Implementation.DUCKDB,
get_ibis(): Implementation.IBIS,
}
return mapping.get(native_namespace, Implementation.UNKNOWN)

Expand Down Expand Up @@ -249,6 +253,59 @@ def is_dask(self) -> bool:
"""
return self is Implementation.DASK # pragma: no cover

def is_duckdb(self) -> bool:
"""Return whether implementation is DuckDB.
Returns:
Boolean.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.implementation.is_duckdb()
False
"""
return self is Implementation.DUCKDB # pragma: no cover

def is_ibis(self) -> bool:
"""Return whether implementation is Ibis.
Returns:
Boolean.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.implementation.is_ibis()
False
"""
return self is Implementation.IBIS # pragma: no cover


MIN_VERSIONS: dict[Implementation, tuple[int, ...]] = {
Implementation.PANDAS: (0, 25, 3),
Implementation.MODIN: (0, 25, 3),
Implementation.CUDF: (24, 10),
Implementation.PYARROW: (11,),
Implementation.PYSPARK: (3, 3),
Implementation.POLARS: (0, 20, 3),
Implementation.DASK: (2024, 10),
Implementation.DUCKDB: (1,),
Implementation.IBIS: (6,),
}


def validate_backend_version(
implementation: Implementation, backend_version: tuple[int, ...]
) -> None:
if backend_version < (min_version := MIN_VERSIONS[implementation]):
msg = f"Minimum version of {implementation} supported by Narwhals is {min_version}, found: {backend_version}"
raise ValueError(msg)


def import_dtypes_module(version: Version) -> DTypes:
if version is Version.V1:
Expand Down
16 changes: 11 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,21 @@ classifiers = [
]

[project.optional-dependencies]
cudf = ["cudf>=24.10.0"]
modin = ["modin"]
# These should be aligned with MIN_VERSIONS in narwhals/utils.py
# Exception: modin, because `modin.__version__` isn't aligned with
# `modin.pandas.__version__`. The latter is the one that we make
# API decisions based on, so that's the one we track internally.
# We have yet to determine the minimum Modin version we support
# https://github.com/narwhals-dev/narwhals/issues/817
pandas = ["pandas>=0.25.3"]
polars = ["polars>=0.20.3"]
ibis = ["ibis-framework>=6.0.0", "rich", "packaging", "pyarrow_hotfix"]
modin = ["modin"]
cudf = ["cudf>=24.10.0"]
pyarrow = ["pyarrow>=11.0.0"]
pyspark = ["pyspark>=3.3.0"]
polars = ["polars>=0.20.3"]
dask = ["dask[dataframe]>=2024.10"]
duckdb = ["duckdb>=1.0"]
pyspark = ["pyspark>=3.3.0"]
ibis = ["ibis-framework>=6.0.0", "rich", "packaging", "pyarrow_hotfix"]
dev = [
"covdefaults",
"pre-commit",
Expand Down
3 changes: 3 additions & 0 deletions tests/expr_and_series/clip_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ def test_clip_series_expressified(
) -> None:
if "modin_pyarrow" in str(constructor_eager):
request.applymarker(pytest.mark.xfail)
if "cudf" in str(constructor_eager):
# https://github.com/rapidsai/cudf/issues/17682
request.applymarker(pytest.mark.xfail)

data = {"a": [1, 2, 3, -4, 5], "lb": [3, 2, 1, 1, 1], "ub": [4, 4, 2, 2, 2]}
df = nw.from_native(constructor_eager(data), eager_only=True)
Expand Down

0 comments on commit 8b6ef7c

Please sign in to comment.