diff --git a/README.md b/README.md index 1cb1b453f..7e24b80fa 100644 --- a/README.md +++ b/README.md @@ -51,8 +51,8 @@ def my_agnostic_function( suppliers_native: AnyDataFrame, parts_native: AnyDataFrame, ) -> AnyDataFrame: - suppliers, pl = to_polars_api(suppliers_native, version="0.20") - parts, _ = to_polars_api(parts_native, version="0.20") + suppliers, pl = to_polars_api(suppliers_native, lazy_only=True) + parts, _ = to_polars_api(parts_native, lazy_only=True) result = ( suppliers.join(parts, left_on="city", right_on="city") .filter( diff --git a/narwhals/pandas_like/dataframe.py b/narwhals/pandas_like/dataframe.py index 475857569..3b635e064 100644 --- a/narwhals/pandas_like/dataframe.py +++ b/narwhals/pandas_like/dataframe.py @@ -32,20 +32,19 @@ def __init__( self, dataframe: Any, *, - api_version: str, implementation: str, ) -> None: self._validate_columns(dataframe.columns) self._dataframe = dataframe.reset_index(drop=True) - self._api_version = api_version self._implementation = implementation def _dispatch_to_lazy(self, method: str, *args: Any, **kwargs: Any) -> Self: return getattr(self.lazy(), method)(*args, **kwargs).collect() # type: ignore[no-any-return] def __repr__(self) -> str: # pragma: no cover - header = f" Narwhals DataFrame (api_version={self._api_version}) " + header = " Narwhals DataFrame" length = len(header) + # TODO: use `to_original_object` instead of `._dataframe` return ( "┌" + "─" * length @@ -125,7 +124,6 @@ def sort( def lazy(self) -> LazyFrame: return LazyFrame( self._dataframe, - api_version=self._api_version, implementation=self._implementation, ) @@ -151,7 +149,7 @@ def to_dict(self, *, as_series: bool = True) -> dict[str, Any]: def group_by(self, *keys: str | Iterable[str]) -> GroupBy: from narwhals.pandas_like.group_by import GroupBy - return GroupBy(self, flatten_str(*keys), api_version=self._api_version) + return GroupBy(self, flatten_str(*keys)) def join( self, @@ -179,16 +177,14 @@ def __init__( self, dataframe: Any, *, - api_version: str, implementation: str, ) -> None: self._validate_columns(dataframe.columns) self._dataframe = dataframe.reset_index(drop=True) - self._api_version = api_version self._implementation = implementation def __repr__(self) -> str: # pragma: no cover - header = f" Narwhals DataFrame (api_version={self._api_version}) " + header = " Narwhals DataFrame" length = len(header) return ( "┌" @@ -222,7 +218,6 @@ def _validate_booleanness(self) -> None: def _from_dataframe(self, df: Any) -> Self: return self.__class__( df, - api_version=self._api_version, implementation=self._implementation, ) @@ -298,7 +293,6 @@ def sort( def collect(self) -> DataFrame: return DataFrame( self._dataframe, - api_version=self._api_version, implementation=self._implementation, ) @@ -306,7 +300,7 @@ def collect(self) -> DataFrame: def group_by(self, *keys: str | Iterable[str]) -> LazyGroupBy: from narwhals.pandas_like.group_by import LazyGroupBy - return LazyGroupBy(self, flatten_str(*keys), api_version=self._api_version) + return LazyGroupBy(self, flatten_str(*keys)) def join( self, diff --git a/narwhals/pandas_like/expr.py b/narwhals/pandas_like/expr.py index 3f59a217e..9c4085188 100644 --- a/narwhals/pandas_like/expr.py +++ b/narwhals/pandas_like/expr.py @@ -29,7 +29,6 @@ def __init__( # noqa: PLR0913 implementation: str, ) -> None: self._call = call - self._api_version = "0.20.0" # todo self._depth = depth self._function_name = function_name self._root_names = root_names @@ -54,7 +53,6 @@ def from_column_names( lambda df: [ Series( df._dataframe.loc[:, column_name], - api_version=df._api_version, implementation=implementation, ) for column_name in column_names @@ -217,7 +215,6 @@ def ends_with(self, suffix: str) -> Expr: lambda df: [ Series( series.series.str.endswith(suffix), - api_version=df._api_version, implementation=df._implementation, ) for series in self._expr._call(df) @@ -234,7 +231,6 @@ def strip_chars(self, characters: str = " ") -> Expr: lambda df: [ Series( series.series.str.strip(characters), - api_version=df._api_version, implementation=df._implementation, ) for series in self._expr._call(df) diff --git a/narwhals/pandas_like/group_by.py b/narwhals/pandas_like/group_by.py index e3732cd2c..b5a03d65e 100644 --- a/narwhals/pandas_like/group_by.py +++ b/narwhals/pandas_like/group_by.py @@ -28,28 +28,22 @@ class GroupBy(GroupByProtocol): - def __init__(self, df: DataFrame, keys: list[str], api_version: str) -> None: + def __init__(self, df: DataFrame, keys: list[str]) -> None: self._df = df self._keys = list(keys) - self._api_version = api_version def agg( self, *aggs: IntoExpr | Iterable[IntoExpr], **named_aggs: IntoExpr, ) -> DataFrame: - return ( - LazyGroupBy(self._df.lazy(), self._keys, self._api_version) - .agg(*aggs, **named_aggs) - .collect() - ) + return LazyGroupBy(self._df.lazy(), self._keys).agg(*aggs, **named_aggs).collect() class LazyGroupBy(LazyGroupByProtocol): - def __init__(self, df: LazyFrame, keys: list[str], api_version: str) -> None: + def __init__(self, df: LazyFrame, keys: list[str]) -> None: self._df = df self._keys = list(keys) - self._api_version = api_version def agg( self, @@ -99,9 +93,7 @@ def agg( def _from_dataframe(self, df: DataFrame) -> LazyFrame: from narwhals.pandas_like.dataframe import LazyFrame - return LazyFrame( - df, api_version=self._api_version, implementation=self._df._implementation - ) + return LazyFrame(df, implementation=self._df._implementation) def agg_pandas( diff --git a/narwhals/pandas_like/namespace.py b/narwhals/pandas_like/namespace.py index e0e2dd150..9b476ce47 100644 --- a/narwhals/pandas_like/namespace.py +++ b/narwhals/pandas_like/namespace.py @@ -38,9 +38,7 @@ class Namespace(NamespaceProtocol): String = dtypes.String # --- not in spec --- - def __init__(self, *, api_version: str, implementation: str) -> None: - self.__dataframeapi_version__ = api_version - self.api_version = api_version + def __init__(self, *, implementation: str) -> None: self._implementation = implementation def _create_expr_from_callable( # noqa: PLR0913 @@ -69,7 +67,6 @@ def _create_series_from_scalar(self, value: Any, series: Series) -> Series: index=series.series.index[0:1], implementation=self._implementation, ), - api_version=self.api_version, implementation=self._implementation, ) @@ -94,7 +91,6 @@ def all(self) -> Expr: lambda df: [ Series( df._dataframe.loc[:, column_name], - api_version=df._api_version, implementation=self._implementation, ) for column_name in df.columns @@ -137,7 +133,6 @@ def len(self) -> Expr: index=[0], implementation=self._implementation, ), - api_version=df._api_version, implementation=self._implementation, ), ], @@ -173,11 +168,9 @@ def concat(self, items: Iterable[AnyDataFrame], *, how: str) -> AnyDataFrame: # if kind[0] is DataFrame: return DataFrame( # type: ignore[return-value] horizontal_concat(dfs, implementation=self._implementation), - api_version=self.api_version, implementation=self._implementation, ) return LazyFrame( # type: ignore[return-value] horizontal_concat(dfs, implementation=self._implementation), - api_version=self.api_version, implementation=self._implementation, ) diff --git a/narwhals/pandas_like/series.py b/narwhals/pandas_like/series.py index 72797ad3e..175eb7994 100644 --- a/narwhals/pandas_like/series.py +++ b/narwhals/pandas_like/series.py @@ -21,7 +21,6 @@ def __init__( self, series: Any, *, - api_version: str, implementation: str, ) -> None: """Parameters @@ -33,11 +32,10 @@ def __init__( self._name = series.name assert self._name is not None self._series = series.reset_index(drop=True) - self.api_version = api_version self._implementation = implementation def __repr__(self) -> str: # pragma: no cover - header = f" Narwhals Series (api_version={self.api_version}) " + header = " Narwhals Series" length = len(header) return ( "┌" @@ -53,7 +51,6 @@ def __repr__(self) -> str: # pragma: no cover def _from_series(self, series: Any) -> Self: return self.__class__( series.rename(series.name, copy=False), - api_version=self.api_version, implementation=self._implementation, ) diff --git a/narwhals/pandas_like/translate.py b/narwhals/pandas_like/translate.py index 5ca2981a3..51373d247 100644 --- a/narwhals/pandas_like/translate.py +++ b/narwhals/pandas_like/translate.py @@ -15,10 +15,10 @@ def translate( df: Any, implementation: str, - api_version: str, *, - eager: Literal[True], -) -> tuple[DataFrame, Namespace]: + eager_only: Literal[False], + lazy_only: Literal[False], +) -> tuple[LazyFrame, Namespace]: ... @@ -26,9 +26,9 @@ def translate( def translate( df: Any, implementation: str, - api_version: str, *, - eager: Literal[False], + eager_only: Literal[False], + lazy_only: Literal[True], ) -> tuple[LazyFrame, Namespace]: ... @@ -37,9 +37,20 @@ def translate( def translate( df: Any, implementation: str, - api_version: str, *, - eager: bool, + eager_only: Literal[True], + lazy_only: Literal[False], +) -> tuple[DataFrame, Namespace]: + ... + + +@overload +def translate( + df: Any, + implementation: str, + *, + eager_only: bool, + lazy_only: bool, ) -> tuple[DataFrame | LazyFrame, Namespace]: ... @@ -47,24 +58,22 @@ def translate( def translate( df: Any, implementation: str, - api_version: str, *, - eager: bool, + eager_only: bool, + lazy_only: bool, ) -> tuple[LazyFrame | DataFrame, Namespace]: from narwhals.pandas_like.dataframe import DataFrame from narwhals.pandas_like.dataframe import LazyFrame from narwhals.pandas_like.utils import get_namespace - if eager: + if eager_only and not lazy_only: df = DataFrame( df, - api_version=api_version, implementation=implementation, ) else: df = LazyFrame( df, - api_version=api_version, implementation=implementation, ) return df, get_namespace(df) diff --git a/narwhals/pandas_like/utils.py b/narwhals/pandas_like/utils.py index 733d37684..54eaa73a9 100644 --- a/narwhals/pandas_like/utils.py +++ b/narwhals/pandas_like/utils.py @@ -88,7 +88,7 @@ def maybe_evaluate_expr(df: DataFrame | LazyFrame, arg: Any) -> Any: def get_namespace(obj: Any) -> Namespace: from narwhals.pandas_like.namespace import Namespace - return Namespace(api_version="0.20.0", implementation=obj._implementation) + return Namespace(implementation=obj._implementation) def parse_into_exprs( diff --git a/narwhals/translate.py b/narwhals/translate.py index 7bcb03c25..e806bcecf 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -13,44 +13,66 @@ @overload def translate_frame( - df: Any, version: str, *, eager: Literal[True] + df: Any, + *, + eager_only: Literal[False] = ..., + lazy_only: Literal[False] = ..., +) -> tuple[DataFrame | LazyFrame, Namespace]: + ... + + +@overload +def translate_frame( + df: Any, + *, + eager_only: Literal[True], + lazy_only: Literal[False] = ..., ) -> tuple[DataFrame, Namespace]: ... @overload def translate_frame( - df: Any, version: str, *, eager: Literal[False] = ... + df: Any, + *, + eager_only: Literal[False] = ..., + lazy_only: Literal[True], ) -> tuple[LazyFrame, Namespace]: ... def translate_frame( - df: Any, version: str, *, eager: bool = False + df: Any, + *, + eager_only: bool = False, + lazy_only: bool = False, ) -> tuple[DataFrame | LazyFrame, Namespace]: + if eager_only and lazy_only: + msg = "Only one of `eager_only` and `lazy_only` can be True." + raise ValueError(msg) + if hasattr(df, "__narwhals_frame__"): - return df.__narwhals_frame__(version=version, eager=eager) # type: ignore[no-any-return] + return df.__narwhals_frame__(eager_only=eager_only, lazy_only=lazy_only) # type: ignore[no-any-return] try: import polars as pl except ModuleNotFoundError: pass else: - if isinstance(df, pl.DataFrame) and not eager: + if isinstance(df, pl.LazyFrame) and eager_only: msg = ( - "Expected LazyFrame, got DataFrame. Set `eager=False` if you function requires " - "eager execution, or make you frame lazy before passing it to this function." + "Expected DataFrame, got LazyFrame. Set `eager_only=False` if you " + "function doesn't require eager execution, or collect your frame " + "before passing it to this function." ) raise TypeError(msg) - if isinstance(df, pl.LazyFrame) and eager: + if isinstance(df, pl.DataFrame) and lazy_only: msg = ( - "Expected DataFrame, got LazyFrame. Set `eager=True` if you function doesn't " - "require eager execution, or make you frame lazy before passing it to this " - "function." + "Expected LazyFrame, got DataFrame. Set `lazy_only=False` if you " + "function doesn't doesn't need to use `.collect`, or make your frame " + "before passing it to this function." ) raise TypeError(msg) - if isinstance(df, pl.DataFrame): - return df, pl # type: ignore[return-value] - if isinstance(df, pl.LazyFrame) and not eager: + if isinstance(df, (pl.DataFrame, pl.LazyFrame)): return df, pl # type: ignore[return-value] try: import pandas as pd @@ -61,7 +83,10 @@ def translate_frame( from narwhals.pandas_like.translate import translate return translate( - df, api_version=version, implementation="pandas", eager=eager + df, + implementation="pandas", + eager_only=eager_only, + lazy_only=lazy_only, ) try: import cudf @@ -71,7 +96,9 @@ def translate_frame( if isinstance(df, cudf.DataFrame): from narwhals.pandas_like.translate import translate - return translate(df, api_version=version, implementation="cudf", eager=eager) + return translate( + df, implementation="cudf", eager_only=eager_only, lazy_only=lazy_only + ) try: import modin.pandas as mpd except ModuleNotFoundError: @@ -80,7 +107,9 @@ def translate_frame( if isinstance(df, mpd.DataFrame): from narwhals.pandas_like.translate import translate - return translate(df, api_version=version, implementation="modin", eager=eager) + return translate( + df, implementation="modin", eager_only=eager_only, lazy_only=lazy_only + ) msg = f"Could not translate DataFrame {type(df)}, please open a feature request." raise TypeError(msg) @@ -110,4 +139,4 @@ def get_namespace(obj: Any, implementation: str | None = None) -> Namespace: return pl # type: ignore[return-value] from narwhals.pandas_like.namespace import Namespace - return Namespace(api_version="0.20.0", implementation=obj._implementation) + return Namespace(implementation=obj._implementation) diff --git a/tests/tpch_q1_test.py b/tests/tpch_q1_test.py index 8ca8a11d5..4904b63ff 100644 --- a/tests/tpch_q1_test.py +++ b/tests/tpch_q1_test.py @@ -21,7 +21,7 @@ ) def test_q1(df_raw: Any) -> None: var_1 = datetime(1998, 9, 2) - df, pl = translate_frame(df_raw, version="0.20") + df, pl = translate_frame(df_raw, lazy_only=True) query_result = ( df.filter(pl.col("l_shipdate") <= var_1) .group_by(["l_returnflag", "l_linestatus"]) @@ -82,7 +82,7 @@ def test_q1(df_raw: Any) -> None: @mock.patch.dict(os.environ, {"NARWHALS_FORCE_GENERIC": "1"}) def test_q1_w_pandas_agg_generic_path(df_raw: Any) -> None: var_1 = datetime(1998, 9, 2) - df, pl = translate_frame(df_raw, version="0.20") + df, pl = translate_frame(df_raw, lazy_only=True) query_result = ( df.filter(pl.col("l_shipdate") <= var_1) .group_by(["l_returnflag", "l_linestatus"])