diff --git a/narwhals/__init__.py b/narwhals/__init__.py index 289068eb14..7f57751b37 100644 --- a/narwhals/__init__.py +++ b/narwhals/__init__.py @@ -3,6 +3,7 @@ from narwhals.containers import is_pandas from narwhals.containers import is_polars from narwhals.containers import is_series +from narwhals.expressions import col from narwhals.translate import get_namespace from narwhals.translate import to_native from narwhals.translate import translate_any @@ -22,4 +23,5 @@ "get_implementation", "get_namespace", "to_native", + "col", ] diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/narwhals/expression.py b/narwhals/expression.py new file mode 100644 index 0000000000..e71ea137e3 --- /dev/null +++ b/narwhals/expression.py @@ -0,0 +1,105 @@ +from __future__ import annotations +from typing import Callable, Any + +def extract_native(expr, other: Any) -> Any: + if isinstance(other, NarwhalsExpr): + return other._call(expr) + return other + +class NarwhalsExpr: + def __init__(self, call: str): + self._call = call + + # --- convert --- + def alias(self, name: str) -> Self: + return self.__class__(self._expr.alias(name)) + + def cast( + self, + dtype: DType, # type: ignore[override] + ) -> Self: + return self.__class__(self._expr.cast(reverse_translate_dtype(dtype))) + + # --- binary --- + def __eq__(self, other: object) -> Expr: # type: ignore[override] + return self.__class__(self._expr.__eq__(extract_native(other))) + + def __and__(self, other: Any) -> Expr: + return self.__class__(self._expr.__and__(extract_native(other))) + + def __or__(self, other: Any) -> Expr: + return self.__class__(self._expr.__or__(extract_native(other))) + + def __add__(self, other): + return self.__class__(lambda expr: self._call(expr).__add__(extract_native(expr, other))) + + def __radd__(self, other: Any) -> Expr: + return self.__class__(self._expr.__radd__(extract_native(other))) + + def __sub__(self, other: Any) -> Expr: + return self.__class__(self._expr.__sub__(extract_native(other))) + + def __rsub__(self, other: Any) -> Expr: + return self.__class__(self._expr.__rsub__(extract_native(other))) + + def __mul__(self, other: Any) -> Expr: + return self.__class__(self._expr.__mul__(extract_native(other))) + + def __rmul__(self, other: Any) -> Expr: + return self.__class__(self._expr.__rmul__(extract_native(other))) + + def __le__(self, other: Any) -> Expr: + return self.__class__(self._expr.__le__(extract_native(other))) + + def __lt__(self, other: Any) -> Expr: + return self.__class__(self._expr.__lt__(extract_native(other))) + + def __gt__(self, other: Any) -> Expr: + return self.__class__(self._expr.__gt__(extract_native(other))) + + def __ge__(self, other: Any) -> Expr: + return self.__class__(self._expr.__ge__(extract_native(other))) + + # --- unary --- + def mean(self) -> Expr: + return self.__class__(self._expr.mean()) + + def sum(self) -> Expr: + return self.__class__(self._expr.sum()) + + def min(self) -> Expr: + return self.__class__(self._expr.min()) + + def max(self) -> Expr: + return self.__class__(self._expr.max()) + + def n_unique(self) -> Expr: + return self.__class__(self._expr.n_unique()) + + def unique(self) -> Expr: + return self.__class__(self._expr.unique()) + + # --- transform --- + def is_between( + self, lower_bound: Any, upper_bound: Any, closed: str = "both" + ) -> Expr: + return self.__class__(self._expr.is_between(lower_bound, upper_bound, closed)) # type: ignore[arg-type] + + def is_in(self, other: Any) -> Expr: + return self.__class__(self._expr.is_in(other)) + + def is_null(self) -> Expr: + return self.__class__(self._expr.is_null()) + + # --- partial reduction --- + def drop_nulls(self) -> Expr: + return self.__class__(self._expr.drop_nulls()) + + def sample(self, n: int, fraction: float, *, with_replacement: bool) -> Expr: + return self.__class__( + self._expr.sample(n, fraction=fraction, with_replacement=with_replacement) + ) + + +def col(col_name: str): + return NarwhalsExpr(lambda expr: expr(col_name)) diff --git a/narwhals/pandas_like/dataframe.py b/narwhals/pandas_like/dataframe.py index c7deb8c598..6c9823910d 100644 --- a/narwhals/pandas_like/dataframe.py +++ b/narwhals/pandas_like/dataframe.py @@ -112,7 +112,7 @@ def select( ) -> Self: new_series = evaluate_into_exprs(self, *exprs, **named_exprs) df = horizontal_concat( - [series.series for series in new_series], + [series._series for series in new_series], implementation=self._implementation, ) return self._from_dataframe(df) @@ -137,7 +137,7 @@ def with_columns( ) -> Self: new_series = evaluate_into_exprs(self, *exprs, **named_exprs) df = self._dataframe.assign( - **{series.name: series.series for series in new_series} + **{series.name: series._series for series in new_series} ) return self._from_dataframe(df) diff --git a/narwhals/pandas_like/expr.py b/narwhals/pandas_like/expr.py index 88f0ab3008..51f01dbacf 100644 --- a/narwhals/pandas_like/expr.py +++ b/narwhals/pandas_like/expr.py @@ -213,7 +213,7 @@ def ends_with(self, suffix: str) -> Expr: return Expr( lambda df: [ PandasSeries( - series.series.str.endswith(suffix), + series._series.str.endswith(suffix), implementation=df._implementation, ) for series in self._expr._call(df) @@ -229,7 +229,7 @@ def strip_chars(self, characters: str = " ") -> Expr: return Expr( lambda df: [ PandasSeries( - series.series.str.strip(characters), + series._series.str.strip(characters), implementation=df._implementation, ) for series in self._expr._call(df) diff --git a/narwhals/pandas_like/namespace.py b/narwhals/pandas_like/namespace.py index acb29f47fe..ca34f3d897 100644 --- a/narwhals/pandas_like/namespace.py +++ b/narwhals/pandas_like/namespace.py @@ -70,8 +70,8 @@ def _create_series_from_scalar( return PandasSeries( series_from_iterable( [value], - name=series.series.name, - index=series.series.index[0:1], + name=series._series.name, + index=series._series.index[0:1], implementation=self._implementation, ), implementation=self._implementation, diff --git a/narwhals/pandas_like/series.py b/narwhals/pandas_like/series.py index 1931b6d6aa..8b5e682ec4 100644 --- a/narwhals/pandas_like/series.py +++ b/narwhals/pandas_like/series.py @@ -66,13 +66,9 @@ def shape(self) -> tuple[int]: return self._series.shape # type: ignore[no-any-return] def rename(self, name: str) -> PandasSeries: - ser = self.series + ser = self._series return self._from_series(ser.rename(name, copy=False)) - @property - def series(self) -> Any: - return self._series - @property def dtype(self) -> DType: return translate_dtype(self._series.dtype) @@ -81,27 +77,27 @@ def cast( self, dtype: DType, # type: ignore[override] ) -> Self: - ser = self.series + ser = self._series dtype = reverse_translate_dtype(dtype) return self._from_series(ser.astype(dtype)) def filter(self, mask: Self) -> Self: - ser = self.series + ser = self._series return self._from_series(ser.loc[validate_column_comparand(mask)]) def item(self) -> Any: - return item(self.series) + return item(self._series) def is_between( self, lower_bound: Any, upper_bound: Any, closed: str = "both" ) -> PandasSeries: - ser = self.series + ser = self._series return self._from_series(ser.between(lower_bound, upper_bound, inclusive=closed)) def is_in(self, other: Any) -> PandasSeries: import pandas as pd - ser = self.series + ser = self._series res = ser.isin(other).convert_dtypes() res[ser.isna()] = pd.NA return self._from_series(res) @@ -110,36 +106,36 @@ def is_in(self, other: Any) -> PandasSeries: def __eq__(self, other: object) -> PandasSeries: # type: ignore[override] other = validate_column_comparand(other) - ser = self.series + ser = self._series return self._from_series((ser == other).rename(ser.name, copy=False)) def __ne__(self, other: object) -> PandasSeries: # type: ignore[override] other = validate_column_comparand(other) - ser = self.series + ser = self._series return self._from_series((ser != other).rename(ser.name, copy=False)) def __ge__(self, other: Any) -> PandasSeries: other = validate_column_comparand(other) - ser = self.series + ser = self._series return self._from_series((ser >= other).rename(ser.name, copy=False)) def __gt__(self, other: Any) -> PandasSeries: other = validate_column_comparand(other) - ser = self.series + ser = self._series return self._from_series((ser > other).rename(ser.name, copy=False)) def __le__(self, other: Any) -> PandasSeries: other = validate_column_comparand(other) - ser = self.series + ser = self._series return self._from_series((ser <= other).rename(ser.name, copy=False)) def __lt__(self, other: Any) -> PandasSeries: other = validate_column_comparand(other) - ser = self.series + ser = self._series return self._from_series((ser < other).rename(ser.name, copy=False)) def __and__(self, other: Any) -> PandasSeries: - ser = self.series + ser = self._series other = validate_column_comparand(other) return self._from_series((ser & other).rename(ser.name, copy=False)) @@ -147,7 +143,7 @@ def __rand__(self, other: Any) -> PandasSeries: return self.__and__(other) def __or__(self, other: Any) -> PandasSeries: - ser = self.series + ser = self._series other = validate_column_comparand(other) return self._from_series((ser | other).rename(ser.name, copy=False)) @@ -155,7 +151,7 @@ def __ror__(self, other: Any) -> PandasSeries: return self.__or__(other) def __add__(self, other: Any) -> PandasSeries: - ser = self.series + ser = self._series other = validate_column_comparand(other) return self._from_series((ser + other).rename(ser.name, copy=False)) @@ -163,7 +159,7 @@ def __radd__(self, other: Any) -> PandasSeries: return self.__add__(other) def __sub__(self, other: Any) -> PandasSeries: - ser = self.series + ser = self._series other = validate_column_comparand(other) return self._from_series((ser - other).rename(ser.name, copy=False)) @@ -171,7 +167,7 @@ def __rsub__(self, other: Any) -> PandasSeries: return -1 * self.__sub__(other) def __mul__(self, other: Any) -> PandasSeries: - ser = self.series + ser = self._series other = validate_column_comparand(other) return self._from_series((ser * other).rename(ser.name, copy=False)) @@ -179,7 +175,7 @@ def __rmul__(self, other: Any) -> PandasSeries: return self.__mul__(other) def __truediv__(self, other: Any) -> PandasSeries: - ser = self.series + ser = self._series other = validate_column_comparand(other) return self._from_series((ser / other).rename(ser.name, copy=False)) @@ -187,7 +183,7 @@ def __rtruediv__(self, other: Any) -> PandasSeries: raise NotImplementedError def __floordiv__(self, other: Any) -> PandasSeries: - ser = self.series + ser = self._series other = validate_column_comparand(other) return self._from_series((ser // other).rename(ser.name, copy=False)) @@ -195,7 +191,7 @@ def __rfloordiv__(self, other: Any) -> PandasSeries: raise NotImplementedError def __pow__(self, other: Any) -> PandasSeries: - ser = self.series + ser = self._series other = validate_column_comparand(other) return self._from_series((ser**other).rename(ser.name, copy=False)) @@ -203,7 +199,7 @@ def __rpow__(self, other: Any) -> PandasSeries: # pragma: no cover raise NotImplementedError def __mod__(self, other: Any) -> PandasSeries: - ser = self.series + ser = self._series other = validate_column_comparand(other) return self._from_series((ser % other).rename(ser.name, copy=False)) @@ -213,41 +209,41 @@ def __rmod__(self, other: Any) -> PandasSeries: # pragma: no cover # Unary def __invert__(self: PandasSeries) -> PandasSeries: - ser = self.series + ser = self._series return self._from_series(~ser) # Reductions def any(self) -> Any: - ser = self.series + ser = self._series return ser.any() def all(self) -> Any: - ser = self.series + ser = self._series return ser.all() def min(self) -> Any: - ser = self.series + ser = self._series return ser.min() def max(self) -> Any: - ser = self.series + ser = self._series return ser.max() def sum(self) -> Any: - ser = self.series + ser = self._series return ser.sum() def prod(self) -> Any: - ser = self.series + ser = self._series return ser.prod() def median(self) -> Any: - ser = self.series + ser = self._series return ser.median() def mean(self) -> Any: - ser = self.series + ser = self._series return ser.mean() def std( @@ -255,7 +251,7 @@ def std( *, correction: float = 1.0, ) -> Any: - ser = self.series + ser = self._series return ser.std(ddof=correction) def var( @@ -263,7 +259,7 @@ def var( *, correction: float = 1.0, ) -> Any: - ser = self.series + ser = self._series return ser.var(ddof=correction) def len(self) -> Any: @@ -272,36 +268,36 @@ def len(self) -> Any: # Transformations def is_null(self) -> PandasSeries: - ser = self.series + ser = self._series return self._from_series(ser.isna()) def drop_nulls(self) -> PandasSeries: - ser = self.series + ser = self._series return self._from_series(ser.dropna()) def n_unique(self) -> int: - ser = self.series + ser = self._series return ser.nunique() # type: ignore[no-any-return] def zip_with(self, mask: SeriesProtocol, other: SeriesProtocol) -> PandasSeries: mask = validate_column_comparand(mask) other = validate_column_comparand(other) - ser = self.series + ser = self._series return self._from_series(ser.where(mask, other)) def sample(self, n: int, fraction: float, *, with_replacement: bool) -> PandasSeries: - ser = self.series + ser = self._series return self._from_series( ser.sample(n=n, frac=fraction, with_replacement=with_replacement) ) def unique(self) -> PandasSeries: - ser = self.series + ser = self._series plx = get_namespace(self._implementation) return plx.Series(self.name, ser.unique()) # type: ignore[no-any-return, attr-defined] def is_nan(self) -> PandasSeries: - ser = self.series + ser = self._series if is_extension_array_dtype(ser.dtype): return self._from_series((ser != ser).fillna(False)) # noqa: PLR0124 return self._from_series(ser.isna()) @@ -311,24 +307,24 @@ def sort( *, descending: bool | Sequence[bool] = True, ) -> PandasSeries: - ser = self.series + ser = self._series return self._from_series( ser.sort_values(ascending=not descending).rename(self.name) ) def alias(self, name: str) -> Self: - ser = self.series + ser = self._series return self._from_series(ser.rename(name, copy=False)) def to_numpy(self) -> Any: - return self.series.to_numpy() + return self._series.to_numpy() def to_pandas(self) -> Any: if self._implementation == "pandas": - return self.series + return self._series elif self._implementation == "cudf": - return self.series.to_pandas() + return self._series.to_pandas() elif self._implementation == "modin": - return self.series._to_pandas() + return self._series._to_pandas() msg = f"Unknown implementation: {self._implementation}" raise TypeError(msg) diff --git a/narwhals/pandas_like/utils.py b/narwhals/pandas_like/utils.py index b8bc6e1bb4..479fe5f738 100644 --- a/narwhals/pandas_like/utils.py +++ b/narwhals/pandas_like/utils.py @@ -46,7 +46,7 @@ def validate_column_comparand(other: Any) -> Any: if other.len() == 1: # broadcast return other.item() - return other.series + return other._series return other @@ -97,12 +97,15 @@ def parse_into_exprs( def parse_into_expr(implementation: str, into_expr: IntoExpr) -> Expr: + from narwhals.expressions import NarwhalsExpr from narwhals.pandas_like.expr import Expr from narwhals.pandas_like.namespace import Namespace from narwhals.pandas_like.series import PandasSeries plx = Namespace(implementation=implementation) + if isinstance(into_expr, NarwhalsExpr): + return into_expr._call(plx.col) if isinstance(into_expr, str): return plx.col(into_expr) if isinstance(into_expr, Expr): diff --git a/narwhals/polars.py b/narwhals/polars.py index af2f934883..f11eb190e0 100644 --- a/narwhals/polars.py +++ b/narwhals/polars.py @@ -26,6 +26,10 @@ def extract_native(obj: Any) -> Any: + from narwhals.expressions import NarwhalsExpr + + if isinstance(obj, NarwhalsExpr): + return obj._call(pl.col) if isinstance(obj, Expr): return obj._expr if isinstance(obj, DType): @@ -61,9 +65,6 @@ def __and__(self, other: Any) -> Expr: def __or__(self, other: Any) -> Expr: return self.__class__(self._expr.__or__(extract_native(other))) - def __add__(self, other: Any) -> Expr: - return self.__class__(self._expr.__add__(extract_native(other))) - def __radd__(self, other: Any) -> Expr: return self.__class__(self._expr.__radd__(extract_native(other)))