diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index e6bb6fa65..ed9597521 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -360,7 +360,7 @@ def join( join_type="inner", right_suffix=suffix, ) - .drop([key_token]), + .drop([key_token]) ) return self._from_native_frame( @@ -370,7 +370,7 @@ def join( right_keys=right_on, join_type=how_to_join_map[how], right_suffix=suffix, - ), + ) ) def join_asof( diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 1c0d0734e..9b6277be9 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -409,9 +409,7 @@ def mode(self: Self) -> Self: return reuse_series_implementation(self, "mode") def map_batches( - self: Self, - function: Callable[[Any], Any], - return_dtype: DType | None, + self: Self, function: Callable[[Any], Any], return_dtype: DType | None ) -> Self: def func(df: ArrowDataFrame) -> list[ArrowSeries]: input_series_list = self._call(df) @@ -463,11 +461,7 @@ def cum_prod(self: Self, *, reverse: bool) -> Self: return reuse_series_implementation(self, "cum_prod", reverse=reverse) def rolling_sum( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, + self: Self, window_size: int, *, min_periods: int | None, center: bool ) -> Self: return reuse_series_implementation( self, @@ -478,11 +472,7 @@ def rolling_sum( ) def rolling_mean( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, + self: Self, window_size: int, *, min_periods: int | None, center: bool ) -> Self: return reuse_series_implementation( self, @@ -493,12 +483,7 @@ def rolling_mean( ) def rolling_var( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, - ddof: int, + self: Self, window_size: int, *, min_periods: int | None, center: bool, ddof: int ) -> Self: return reuse_series_implementation( self, @@ -510,12 +495,7 @@ def rolling_var( ) def rolling_std( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, - ddof: int, + self: Self, window_size: int, *, min_periods: int | None, center: bool, ddof: int ) -> Self: return reuse_series_implementation( self, diff --git a/narwhals/_arrow/group_by.py b/narwhals/_arrow/group_by.py index 11ed914fe..6d651b92c 100644 --- a/narwhals/_arrow/group_by.py +++ b/narwhals/_arrow/group_by.py @@ -50,14 +50,10 @@ def __init__( self._grouped = pa.TableGroupBy(self._df._native_frame, list(self._keys)) def agg( - self: Self, - *aggs: IntoArrowExpr, - **named_aggs: IntoArrowExpr, + self: Self, *aggs: IntoArrowExpr, **named_aggs: IntoArrowExpr ) -> ArrowDataFrame: exprs = parse_into_exprs( - *aggs, - namespace=self._df.__narwhals_namespace__(), - **named_aggs, + *aggs, namespace=self._df.__narwhals_namespace__(), **named_aggs ) for expr in exprs: if expr._output_names is None: diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index b02ad32ee..4303ddc92 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -354,10 +354,7 @@ def selectors(self: Self) -> ArrowSelectorNamespace: backend_version=self._backend_version, version=self._version ) - def when( - self: Self, - *predicates: IntoArrowExpr, - ) -> ArrowWhen: + def when(self: Self, *predicates: IntoArrowExpr) -> ArrowWhen: plx = self.__class__(backend_version=self._backend_version, version=self._version) condition = plx.all_horizontal(*predicates) return ArrowWhen(condition, self._backend_version, version=self._version) diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py index 48e837ec7..8f08e1933 100644 --- a/narwhals/_arrow/selectors.py +++ b/narwhals/_arrow/selectors.py @@ -55,7 +55,7 @@ def numeric(self: Self) -> ArrowSelector: dtypes.UInt8, dtypes.Float64, dtypes.Float32, - ], + ] ) def categorical(self: Self) -> ArrowSelector: diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 1e8d09827..8511580bc 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -956,11 +956,7 @@ def cum_prod(self: Self, *, reverse: bool) -> Self: return self._from_native_series(result) def rolling_sum( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, + self: Self, window_size: int, *, min_periods: int | None, center: bool ) -> Self: import pyarrow.compute as pc @@ -992,11 +988,7 @@ def rolling_sum( return result[offset:] def rolling_mean( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, + self: Self, window_size: int, *, min_periods: int | None, center: bool ) -> Self: import pyarrow.compute as pc @@ -1031,12 +1023,7 @@ def rolling_mean( return result[offset:] def rolling_var( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, - ddof: int, + self: Self, window_size: int, *, min_periods: int | None, center: bool, ddof: int ) -> Self: import pyarrow.compute as pc # ignore-banned-import @@ -1083,12 +1070,7 @@ def rolling_var( return result[offset:] def rolling_std( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, - ddof: int, + self: Self, window_size: int, *, min_periods: int | None, center: bool, ddof: int ) -> Self: return ( self.rolling_var( @@ -1149,8 +1131,7 @@ def __contains__(self: Self, other: Any) -> bool: else pa.scalar(None, type=native_series.type) ) return maybe_extract_py_scalar( # type: ignore[no-any-return] - pc.is_in(other_, native_series), - return_py_scalar=True, + pc.is_in(other_, native_series), return_py_scalar=True ) except (ArrowInvalid, ArrowNotImplementedError, ArrowTypeError) as exc: from narwhals.exceptions import InvalidOperationError @@ -1553,7 +1534,7 @@ def slice(self: Self, offset: int, length: int | None) -> ArrowSeries: return self._compliant_series._from_native_series( pc.utf8_slice_codeunits( self._compliant_series._native_series, start=offset, stop=stop - ), + ) ) def to_datetime(self: Self, format: str | None) -> ArrowSeries: # noqa: A002 diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index ca4852655..1654aa227 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -312,8 +312,7 @@ def floordiv_compat(left: Any, right: Any) -> Any: # GH 56676 has_remainder = pc.not_equal(pc.multiply(divided, right), left) has_one_negative_operand = pc.less( - pc.bit_wise_xor(left, right), - pa.scalar(0, type=divided.type), + pc.bit_wise_xor(left, right), pa.scalar(0, type=divided.type) ) result = pc.if_else( pc.and_( diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py index 16053d69a..dd1d1ebb4 100644 --- a/narwhals/_dask/dataframe.py +++ b/narwhals/_dask/dataframe.py @@ -105,11 +105,7 @@ def filter(self, *predicates: DaskExpr, **constraints: Any) -> Self: mask = expr._call(self)[0] return self._from_native_frame(self._native_frame.loc[mask]) - def select( - self: Self, - *exprs: IntoDaskExpr, - **named_exprs: IntoDaskExpr, - ) -> Self: + def select(self: Self, *exprs: IntoDaskExpr, **named_exprs: IntoDaskExpr) -> Self: import dask.dataframe as dd if exprs and all(isinstance(x, str) for x in exprs) and not named_exprs: @@ -193,10 +189,7 @@ def head(self: Self, n: int) -> Self: ) def unique( - self: Self, - subset: list[str] | None, - *, - keep: Literal["any", "none"] = "any", + self: Self, subset: list[str] | None, *, keep: Literal["any", "none"] = "any" ) -> Self: if subset is not None and any(x not in self.columns for x in subset): msg = f"Column(s) {subset} not found in {self.columns}" @@ -259,7 +252,7 @@ def join( right_on=key_token, suffixes=("", suffix), ) - .drop(columns=key_token), + .drop(columns=key_token) ) if how == "anti": @@ -342,7 +335,7 @@ def join( right_on=right_on, how=how, suffixes=("", suffix), - ), + ) ) def join_asof( @@ -370,7 +363,7 @@ def join_asof( by=by, direction=strategy, suffixes=("", "_right"), - ), + ) ) def group_by(self, *by: str, drop_null_keys: bool) -> DaskLazyGroupBy: diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index cb20fa616..8fd8b7290 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -84,8 +84,7 @@ def func(df: DaskLazyFrame) -> list[dask_expr.Series]: except KeyError as e: missing_columns = [x for x in column_names if x not in df.columns] raise ColumnNotFoundError.from_missing_and_available_column_names( - missing_columns=missing_columns, - available_columns=df.columns, + missing_columns=missing_columns, available_columns=df.columns ) from e return cls( @@ -360,9 +359,7 @@ def cum_sum(self: Self, *, reverse: bool) -> Self: raise NotImplementedError(msg) return self._from_call( - lambda _input: _input.cumsum(), - "cum_sum", - returns_scalar=self._returns_scalar, + lambda _input: _input.cumsum(), "cum_sum", returns_scalar=self._returns_scalar ) def cum_count(self: Self, *, reverse: bool) -> Self: @@ -382,9 +379,7 @@ def cum_min(self: Self, *, reverse: bool) -> Self: raise NotImplementedError(msg) return self._from_call( - lambda _input: _input.cummin(), - "cum_min", - returns_scalar=self._returns_scalar, + lambda _input: _input.cummin(), "cum_min", returns_scalar=self._returns_scalar ) def cum_max(self: Self, *, reverse: bool) -> Self: @@ -393,9 +388,7 @@ def cum_max(self: Self, *, reverse: bool) -> Self: raise NotImplementedError(msg) return self._from_call( - lambda _input: _input.cummax(), - "cum_max", - returns_scalar=self._returns_scalar, + lambda _input: _input.cummax(), "cum_max", returns_scalar=self._returns_scalar ) def cum_prod(self: Self, *, reverse: bool) -> Self: @@ -410,10 +403,7 @@ def cum_prod(self: Self, *, reverse: bool) -> Self: ) def is_between( - self, - lower_bound: Self | Any, - upper_bound: Self | Any, - closed: str = "both", + self, lower_bound: Self | Any, upper_bound: Self | Any, closed: str = "both" ) -> Self: if closed == "none": closed = "neither" @@ -537,9 +527,7 @@ def func( ) def clip( - self: Self, - lower_bound: Self | Any | None, - upper_bound: Self | Any | None, + self: Self, lower_bound: Self | Any | None, upper_bound: Self | Any | None ) -> Self: return self._from_call( lambda _input, lower_bound, upper_bound: _input.clip( @@ -669,9 +657,7 @@ def is_in(self: Self, other: Any) -> Self: def null_count(self: Self) -> Self: return self._from_call( - lambda _input: _input.isna().sum(), - "null_count", - returns_scalar=True, + lambda _input: _input.isna().sum(), "null_count", returns_scalar=True ) def tail(self: Self) -> NoReturn: diff --git a/narwhals/_dask/group_by.py b/narwhals/_dask/group_by.py index 243b21b71..a2ac61fd6 100644 --- a/narwhals/_dask/group_by.py +++ b/narwhals/_dask/group_by.py @@ -29,11 +29,7 @@ def chunk(s: pd.core.groupby.generic.SeriesGroupBy) -> int: def agg(s0: pd.core.groupby.generic.SeriesGroupBy) -> int: return s0.sum() # type: ignore[no-any-return] - return dd.Aggregation( - name="nunique", - chunk=chunk, - agg=agg, - ) + return dd.Aggregation(name="nunique", chunk=chunk, agg=agg) def var( @@ -81,20 +77,12 @@ def __init__( self._df = df self._keys = keys self._grouped = self._df._native_frame.groupby( - list(self._keys), - dropna=drop_null_keys, - observed=True, + list(self._keys), dropna=drop_null_keys, observed=True ) - def agg( - self, - *aggs: IntoDaskExpr, - **named_aggs: IntoDaskExpr, - ) -> DaskLazyFrame: + def agg(self, *aggs: IntoDaskExpr, **named_aggs: IntoDaskExpr) -> DaskLazyFrame: exprs = parse_into_exprs( - *aggs, - namespace=self._df.__narwhals_namespace__(), - **named_aggs, + *aggs, namespace=self._df.__narwhals_namespace__(), **named_aggs ) output_names: list[str] = copy(self._keys) for expr in exprs: @@ -109,11 +97,7 @@ def agg( output_names.extend(expr._output_names) return agg_dask( - self._df, - self._grouped, - exprs, - self._keys, - self._from_native_frame, + self._df, self._grouped, exprs, self._keys, self._from_native_frame ) def _from_native_frame(self, df: DaskLazyFrame) -> DaskLazyFrame: diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index 9a16d7f13..eeb86480e 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -305,10 +305,7 @@ def func(df: DaskLazyFrame) -> list[dask_expr.Series]: kwargs={"exprs": exprs}, ) - def when( - self, - *predicates: IntoDaskExpr, - ) -> DaskWhen: + def when(self, *predicates: IntoDaskExpr) -> DaskWhen: plx = self.__class__(backend_version=self._backend_version, version=self._version) condition = plx.all_horizontal(*predicates) return DaskWhen( diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py index 2891d84ff..50763658d 100644 --- a/narwhals/_dask/selectors.py +++ b/narwhals/_dask/selectors.py @@ -55,7 +55,7 @@ def numeric(self: Self) -> DaskSelector: dtypes.UInt8, dtypes.Float64, dtypes.Float32, - ], + ] ) def categorical(self: Self) -> DaskSelector: diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 76ff68ae0..4a0931cfe 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -94,11 +94,7 @@ def collect(self) -> Any: def head(self, n: int) -> Self: return self._from_native_frame(self._native_frame.limit(n)) - def select( - self: Self, - *exprs: Any, - **named_exprs: Any, - ) -> Self: + def select(self: Self, *exprs: Any, **named_exprs: Any) -> Self: new_columns_map = parse_exprs_and_named_exprs(self, *exprs, **named_exprs) if not new_columns_map: # TODO(marco): return empty relation with 0 columns? @@ -129,11 +125,7 @@ def drop(self: Self, columns: list[str], strict: bool) -> Self: # noqa: FBT001 def lazy(self) -> Self: return self - def with_columns( - self: Self, - *exprs: Any, - **named_exprs: Any, - ) -> Self: + def with_columns(self: Self, *exprs: Any, **named_exprs: Any) -> Self: from duckdb import ColumnExpression new_columns_map = parse_exprs_and_named_exprs(self, *exprs, **named_exprs) diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index 0f33ff846..cd6533293 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -259,9 +259,7 @@ def __ne__(self, other: DuckDBExpr) -> Self: # type: ignore[override] def __invert__(self) -> Self: return self._from_call( - lambda _input: ~_input, - "__invert__", - returns_scalar=self._returns_scalar, + lambda _input: ~_input, "__invert__", returns_scalar=self._returns_scalar ) def alias(self, name: str) -> Self: @@ -295,9 +293,7 @@ def mean(self) -> Self: from duckdb import FunctionExpression return self._from_call( - lambda _input: FunctionExpression("mean", _input), - "mean", - returns_scalar=True, + lambda _input: FunctionExpression("mean", _input), "mean", returns_scalar=True ) def skew(self) -> Self: @@ -352,11 +348,7 @@ def func(_input: duckdb.Expression) -> duckdb.Expression: msg = "Only linear interpolation methods are supported for DuckDB quantile." raise NotImplementedError(msg) - return self._from_call( - func, - "quantile", - returns_scalar=True, - ) + return self._from_call(func, "quantile", returns_scalar=True) def clip(self, lower_bound: Any, upper_bound: Any) -> Self: from duckdb import FunctionExpression @@ -510,19 +502,13 @@ def fill_null(self, value: Any, strategy: Any, limit: int | None) -> Self: returns_scalar=self._returns_scalar, ) - def cast( - self: Self, - dtype: DType | type[DType], - ) -> Self: + def cast(self: Self, dtype: DType | type[DType]) -> Self: def func(_input: Any, dtype: DType | type[DType]) -> Any: native_dtype = narwhals_to_native_dtype(dtype, self._version) return _input.cast(native_dtype) return self._from_call( - func, - "cast", - dtype=dtype, - returns_scalar=self._returns_scalar, + func, "cast", dtype=dtype, returns_scalar=self._returns_scalar ) @property diff --git a/narwhals/_duckdb/group_by.py b/narwhals/_duckdb/group_by.py index 0b312ff03..a32d84cdc 100644 --- a/narwhals/_duckdb/group_by.py +++ b/narwhals/_duckdb/group_by.py @@ -20,15 +20,9 @@ def __init__( self._compliant_frame = compliant_frame self._keys = keys - def agg( - self, - *aggs: IntoDuckDBExpr, - **named_aggs: IntoDuckDBExpr, - ) -> DuckDBLazyFrame: + def agg(self, *aggs: IntoDuckDBExpr, **named_aggs: IntoDuckDBExpr) -> DuckDBLazyFrame: exprs = parse_into_exprs( - *aggs, - namespace=self._compliant_frame.__narwhals_namespace__(), - **named_aggs, + *aggs, namespace=self._compliant_frame.__narwhals_namespace__(), **named_aggs ) output_names: list[str] = copy(self._keys) for expr in exprs: diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index abac2e158..fb69f90a2 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -48,9 +48,7 @@ def maybe_evaluate(df: DuckDBLazyFrame, obj: Any) -> Any: def parse_exprs_and_named_exprs( - df: DuckDBLazyFrame, - *exprs: IntoDuckDBExpr, - **named_exprs: IntoDuckDBExpr, + df: DuckDBLazyFrame, *exprs: IntoDuckDBExpr, **named_exprs: IntoDuckDBExpr ) -> dict[str, duckdb.Expression]: result_columns: dict[str, list[duckdb.Expression]] = {} for expr in exprs: @@ -145,8 +143,7 @@ def native_to_narwhals_dtype(duckdb_dtype: str, version: Version) -> DType: return dtypes.List(native_to_narwhals_dtype(match_.group(1), version)) if match_ := re.match(r"(\w+)\[(\d+)\]", duckdb_dtype): return dtypes.Array( - native_to_narwhals_dtype(match_.group(1), version), - int(match_.group(2)), + native_to_narwhals_dtype(match_.group(1), version), int(match_.group(2)) ) if duckdb_dtype.startswith("DECIMAL("): return dtypes.Decimal() diff --git a/narwhals/_expression_parsing.py b/narwhals/_expression_parsing.py index 99bb3bb24..00850964e 100644 --- a/narwhals/_expression_parsing.py +++ b/narwhals/_expression_parsing.py @@ -159,21 +159,13 @@ def infer_new_root_output_names( @overload def reuse_series_implementation( - expr: PandasLikeExprT, - attr: str, - *, - returns_scalar: bool = False, - **kwargs: Any, + expr: PandasLikeExprT, attr: str, *, returns_scalar: bool = False, **kwargs: Any ) -> PandasLikeExprT: ... @overload def reuse_series_implementation( - expr: ArrowExprT, - attr: str, - *, - returns_scalar: bool = False, - **kwargs: Any, + expr: ArrowExprT, attr: str, *, returns_scalar: bool = False, **kwargs: Any ) -> ArrowExprT: ... @@ -254,10 +246,7 @@ def reuse_series_namespace_implementation( expr: PandasLikeExprT, series_namespace: str, attr: str, **kwargs: Any ) -> PandasLikeExprT: ... def reuse_series_namespace_implementation( - expr: ArrowExprT | PandasLikeExprT, - series_namespace: str, - attr: str, - **kwargs: Any, + expr: ArrowExprT | PandasLikeExprT, series_namespace: str, attr: str, **kwargs: Any ) -> ArrowExprT | PandasLikeExprT: """Reuse Series implementation for expression. diff --git a/narwhals/_ibis/dataframe.py b/narwhals/_ibis/dataframe.py index 6fe8997a9..6acbec1ad 100644 --- a/narwhals/_ibis/dataframe.py +++ b/narwhals/_ibis/dataframe.py @@ -98,11 +98,7 @@ def to_pandas(self: Self) -> pd.DataFrame: def to_arrow(self: Self) -> pa.Table: return self._native_frame.to_pyarrow() - def select( - self: Self, - *exprs: Any, - **named_exprs: Any, - ) -> Self: + def select(self: Self, *exprs: Any, **named_exprs: Any) -> Self: if named_exprs or not all(isinstance(x, str) for x in exprs): # pragma: no cover msg = ( "`select`-ing not by name is not supported for Ibis backend.\n\n" diff --git a/narwhals/_interchange/dataframe.py b/narwhals/_interchange/dataframe.py index 562ee0749..6e26d010d 100644 --- a/narwhals/_interchange/dataframe.py +++ b/narwhals/_interchange/dataframe.py @@ -148,11 +148,7 @@ def __getattr__(self, attr: str) -> Any: ) raise NotImplementedError(msg) - def select( - self: Self, - *exprs: Any, - **named_exprs: Any, - ) -> Self: + def select(self: Self, *exprs: Any, **named_exprs: Any) -> Self: if named_exprs or not all(isinstance(x, str) for x in exprs): # pragma: no cover msg = ( "`select`-ing not by name is not supported for interchange-only level.\n\n" diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index e11c02710..99da259f3 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -200,10 +200,7 @@ def __getitem__( if all(isinstance(x, int) for x in item[1]): return self._from_native_frame(self._native_frame.iloc[item]) if all(isinstance(x, str) for x in item[1]): - indexer = ( - item[0], - self._native_frame.columns.get_indexer(item[1]), - ) + indexer = (item[0], self._native_frame.columns.get_indexer(item[1])) return self._from_native_frame(self._native_frame.iloc[indexer]) msg = ( f"Expected sequence str or int, got: {type(item[1])}" # pragma: no cover @@ -279,25 +276,13 @@ def columns(self) -> list[str]: return self._native_frame.columns.tolist() # type: ignore[no-any-return] @overload - def rows( - self, - *, - named: Literal[True], - ) -> list[dict[str, Any]]: ... + def rows(self, *, named: Literal[True]) -> list[dict[str, Any]]: ... @overload - def rows( - self, - *, - named: Literal[False] = False, - ) -> list[tuple[Any, ...]]: ... + def rows(self, *, named: Literal[False] = False) -> list[tuple[Any, ...]]: ... @overload - def rows( - self, - *, - named: bool, - ) -> list[tuple[Any, ...]] | list[dict[str, Any]]: ... + def rows(self, *, named: bool) -> list[tuple[Any, ...]] | list[dict[str, Any]]: ... def rows( self, *, named: bool = False @@ -313,10 +298,7 @@ def rows( return self._native_frame.to_dict(orient="records") # type: ignore[no-any-return] def iter_rows( - self, - *, - named: bool = False, - buffer_size: int = 512, + self, *, named: bool = False, buffer_size: int = 512 ) -> Iterator[list[tuple[Any, ...]]] | Iterator[list[dict[str, Any]]]: # The param ``buffer_size`` is only here for compatibility with the Polars API # and has no effect on the output. @@ -343,9 +325,7 @@ def collect_schema(self) -> dict[str, DType]: # --- reshape --- def select( - self, - *exprs: IntoPandasLikeExpr, - **named_exprs: IntoPandasLikeExpr, + self, *exprs: IntoPandasLikeExpr, **named_exprs: IntoPandasLikeExpr ) -> Self: if exprs and all(isinstance(x, str) for x in exprs) and not named_exprs: # This is a simple slice => fastpath! @@ -421,9 +401,7 @@ def filter(self, *predicates: IntoPandasLikeExpr, **constraints: Any) -> Self: return self._from_native_frame(self._native_frame.loc[_mask]) def with_columns( - self, - *exprs: IntoPandasLikeExpr, - **named_exprs: IntoPandasLikeExpr, + self, *exprs: IntoPandasLikeExpr, **named_exprs: IntoPandasLikeExpr ) -> Self: index = self._native_frame.index new_columns = evaluate_into_exprs(self, *exprs, **named_exprs) @@ -502,11 +480,7 @@ def collect(self) -> PandasLikeDataFrame: def group_by(self, *keys: str, drop_null_keys: bool) -> PandasLikeGroupBy: from narwhals._pandas_like.group_by import PandasLikeGroupBy - return PandasLikeGroupBy( - self, - list(keys), - drop_null_keys=drop_null_keys, - ) + return PandasLikeGroupBy(self, list(keys), drop_null_keys=drop_null_keys) def join( self, @@ -542,7 +516,7 @@ def join( right_on=key_token, suffixes=("", suffix), ) - .drop(columns=key_token), + .drop(columns=key_token) ) else: return self._from_native_frame( @@ -550,7 +524,7 @@ def join( other._native_frame, how="cross", suffixes=("", suffix), - ), + ) ) if how == "anti": @@ -646,7 +620,7 @@ def join( right_on=right_on, how=how, suffixes=("", suffix), - ), + ) ) def join_asof( @@ -674,7 +648,7 @@ def join_asof( by=by, direction=strategy, suffixes=("", "_right"), - ), + ) ) # --- partial reduction --- diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 34d05b7eb..457c17da5 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -106,8 +106,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: except KeyError as e: missing_columns = [x for x in column_names if x not in df.columns] raise ColumnNotFoundError.from_missing_and_available_column_names( - missing_columns=missing_columns, - available_columns=df.columns, + missing_columns=missing_columns, available_columns=df.columns ) from e return cls( @@ -153,10 +152,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: kwargs={}, ) - def cast( - self, - dtype: Any, - ) -> Self: + def cast(self, dtype: Any) -> Self: return reuse_series_implementation(self, "cast", dtype=dtype) def __eq__(self, other: PandasLikeExpr | Any) -> Self: # type: ignore[override] @@ -513,9 +509,7 @@ def mode(self: Self) -> Self: return reuse_series_implementation(self, "mode") def map_batches( - self: Self, - function: Callable[[Any], Any], - return_dtype: DType | None = None, + self: Self, function: Callable[[Any], Any], return_dtype: DType | None = None ) -> Self: def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: input_series_list = self._call(df) @@ -562,11 +556,7 @@ def cum_prod(self: Self, *, reverse: bool) -> Self: return reuse_series_implementation(self, "cum_prod", reverse=reverse) def rolling_sum( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, + self: Self, window_size: int, *, min_periods: int | None, center: bool ) -> Self: return reuse_series_implementation( self, @@ -577,11 +567,7 @@ def rolling_sum( ) def rolling_mean( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, + self: Self, window_size: int, *, min_periods: int | None, center: bool ) -> Self: return reuse_series_implementation( self, @@ -592,12 +578,7 @@ def rolling_mean( ) def rolling_var( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, - ddof: int, + self: Self, window_size: int, *, min_periods: int | None, center: bool, ddof: int ) -> Self: return reuse_series_implementation( self, @@ -609,12 +590,7 @@ def rolling_var( ) def rolling_std( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, - ddof: int, + self: Self, window_size: int, *, min_periods: int | None, center: bool, ddof: int ) -> Self: return reuse_series_implementation( self, @@ -662,9 +638,7 @@ def __init__(self, expr: PandasLikeExpr) -> None: def get_categories(self) -> PandasLikeExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "cat", - "get_categories", + self._compliant_expr, "cat", "get_categories" ) @@ -680,12 +654,7 @@ def len_chars( ) def replace( - self, - pattern: str, - value: str, - *, - literal: bool = False, - n: int = 1, + self, pattern: str, value: str, *, literal: bool = False, n: int = 1 ) -> PandasLikeExpr: return reuse_series_namespace_implementation( self._compliant_expr, @@ -698,11 +667,7 @@ def replace( ) def replace_all( - self, - pattern: str, - value: str, - *, - literal: bool = False, + self, pattern: str, value: str, *, literal: bool = False ) -> PandasLikeExpr: return reuse_series_namespace_implementation( self._compliant_expr, @@ -715,35 +680,22 @@ def replace_all( def strip_chars(self, characters: str | None = None) -> PandasLikeExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "strip_chars", - characters=characters, + self._compliant_expr, "str", "strip_chars", characters=characters ) def starts_with(self, prefix: str) -> PandasLikeExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "starts_with", - prefix=prefix, + self._compliant_expr, "str", "starts_with", prefix=prefix ) def ends_with(self, suffix: str) -> PandasLikeExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "ends_with", - suffix=suffix, + self._compliant_expr, "str", "ends_with", suffix=suffix ) def contains(self, pattern: str, *, literal: bool) -> PandasLikeExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "contains", - pattern=pattern, - literal=literal, + self._compliant_expr, "str", "contains", pattern=pattern, literal=literal ) def slice(self, offset: int, length: int | None = None) -> PandasLikeExpr: @@ -753,24 +705,17 @@ def slice(self, offset: int, length: int | None = None) -> PandasLikeExpr: def to_datetime(self: Self, format: str | None) -> PandasLikeExpr: # noqa: A002 return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "to_datetime", - format=format, + self._compliant_expr, "str", "to_datetime", format=format ) def to_uppercase(self) -> PandasLikeExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "to_uppercase", + self._compliant_expr, "str", "to_uppercase" ) def to_lowercase(self) -> PandasLikeExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "to_lowercase", + self._compliant_expr, "str", "to_lowercase" ) @@ -1041,8 +986,4 @@ def __init__(self: Self, expr: PandasLikeExpr) -> None: self._expr = expr def len(self: Self) -> PandasLikeExpr: - return reuse_series_namespace_implementation( - self._expr, - "list", - "len", - ) + return reuse_series_namespace_implementation(self._expr, "list", "len") diff --git a/narwhals/_pandas_like/group_by.py b/narwhals/_pandas_like/group_by.py index a1eca5b5d..78e3ab5a4 100644 --- a/narwhals/_pandas_like/group_by.py +++ b/narwhals/_pandas_like/group_by.py @@ -64,10 +64,7 @@ def __init__( msg = "Grouping by null values is not supported in pandas < 1.0.0" raise NotImplementedError(msg) self._grouped = self._df._native_frame.groupby( - list(self._keys), - sort=False, - as_index=True, - observed=True, + list(self._keys), sort=False, as_index=True, observed=True ) else: self._grouped = self._df._native_frame.groupby( @@ -79,14 +76,10 @@ def __init__( ) def agg( - self, - *aggs: IntoPandasLikeExpr, - **named_aggs: IntoPandasLikeExpr, + self, *aggs: IntoPandasLikeExpr, **named_aggs: IntoPandasLikeExpr ) -> PandasLikeDataFrame: exprs = parse_into_exprs( - *aggs, - namespace=self._df.__narwhals_namespace__(), - **named_aggs, + *aggs, namespace=self._df.__narwhals_namespace__(), **named_aggs ) implementation: Implementation = self._df._implementation output_names: list[str] = copy(self._keys) @@ -355,10 +348,7 @@ def func(df: Any) -> Any: out_group.append(result_keys._native_series.iloc[0]) out_names.append(result_keys.name) return native_series_from_iterable( - out_group, - index=out_names, - name="", - implementation=implementation, + out_group, index=out_names, name="", implementation=implementation ) if implementation is Implementation.PANDAS and backend_version >= (2, 2): diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 212c9c938..b1af27676 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -364,10 +364,7 @@ def concat( ) raise NotImplementedError - def when( - self, - *predicates: IntoPandasLikeExpr, - ) -> PandasWhen: + def when(self, *predicates: IntoPandasLikeExpr) -> PandasWhen: plx = self.__class__( self._implementation, self._backend_version, version=self._version ) diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py index e7d7fe18d..6e3e758af 100644 --- a/narwhals/_pandas_like/selectors.py +++ b/narwhals/_pandas_like/selectors.py @@ -57,7 +57,7 @@ def numeric(self) -> PandasSelector: dtypes.UInt8, dtypes.Float64, dtypes.Float32, - ], + ] ) def categorical(self) -> PandasSelector: diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index e5c5e771e..c69ff5074 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -225,10 +225,7 @@ def scatter(self, indices: int | Sequence[int], values: Any) -> Self: s.name = self.name return self._from_native_series(s) - def cast( - self, - dtype: Any, - ) -> Self: + def cast(self, dtype: Any) -> Self: ser = self._native_series dtype = narwhals_to_native_dtype( dtype, ser.dtype, self._implementation, self._backend_version, self._version @@ -829,8 +826,7 @@ def to_numpy(self, dtype: Any = None, copy: bool | None = None) -> Any: ) if not has_missing and str(s.dtype) in PANDAS_TO_NUMPY_DTYPE_NO_MISSING: return s.to_numpy( - dtype=dtype or PANDAS_TO_NUMPY_DTYPE_NO_MISSING[str(s.dtype)], - copy=copy, + dtype=dtype or PANDAS_TO_NUMPY_DTYPE_NO_MISSING[str(s.dtype)], copy=copy ) return s.to_numpy(dtype=dtype, copy=copy) @@ -913,9 +909,7 @@ def value_counts( value_name_ = name or ("proportion" if normalize else "count") val_count = self._native_series.value_counts( - dropna=False, - sort=False, - normalize=normalize, + dropna=False, sort=False, normalize=normalize ).reset_index() val_count.columns = [index_name_, value_name_] @@ -1056,11 +1050,7 @@ def cum_prod(self: Self, *, reverse: bool) -> Self: return self._from_native_series(result) def rolling_sum( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, + self: Self, window_size: int, *, min_periods: int | None, center: bool ) -> Self: result = self._native_series.rolling( window=window_size, min_periods=min_periods, center=center @@ -1068,11 +1058,7 @@ def rolling_sum( return self._from_native_series(result) def rolling_mean( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, + self: Self, window_size: int, *, min_periods: int | None, center: bool ) -> Self: result = self._native_series.rolling( window=window_size, min_periods=min_periods, center=center @@ -1080,12 +1066,7 @@ def rolling_mean( return self._from_native_series(result) def rolling_var( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, - ddof: int, + self: Self, window_size: int, *, min_periods: int | None, center: bool, ddof: int ) -> Self: result = self._native_series.rolling( window=window_size, min_periods=min_periods, center=center @@ -1093,12 +1074,7 @@ def rolling_var( return self._from_native_series(result) def rolling_std( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, - ddof: int, + self: Self, window_size: int, *, min_periods: int | None, center: bool, ddof: int ) -> Self: result = self._native_series.rolling( window=window_size, min_periods=min_periods, center=center @@ -1161,10 +1137,7 @@ def rank( else: ranked_series = native_series.rank( - method=pd_method, - na_option="keep", - ascending=not descending, - pct=False, + method=pd_method, na_option="keep", ascending=not descending, pct=False ) return self._from_native_series(ranked_series) @@ -1212,7 +1185,7 @@ def replace( return self._compliant_series._from_native_series( self._compliant_series._native_series.str.replace( pat=pattern, repl=value, n=n, regex=not literal - ), + ) ) def replace_all( @@ -1245,7 +1218,7 @@ def contains(self, pattern: str, *, literal: bool = False) -> PandasLikeSeries: def slice(self, offset: int, length: int | None = None) -> PandasLikeSeries: stop = offset + length if length else None return self._compliant_series._from_native_series( - self._compliant_series._native_series.str.slice(start=offset, stop=stop), + self._compliant_series._native_series.str.slice(start=offset, stop=stop) ) def to_datetime(self: Self, format: str | None) -> PandasLikeSeries: # noqa: A002 diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 08d490581..39a223e6d 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -295,10 +295,7 @@ def diagonal_concat( def native_series_from_iterable( - data: Iterable[Any], - name: str, - index: Any, - implementation: Implementation, + data: Iterable[Any], name: str, index: Any, implementation: Implementation ) -> Any: """Return native series.""" if implementation in PANDAS_LIKE_IMPLEMENTATION: diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py index d5e115284..ab1624f5e 100644 --- a/narwhals/_polars/dataframe.py +++ b/narwhals/_polars/dataframe.py @@ -434,9 +434,7 @@ def collect(self: Self) -> PolarsDataFrame: raise ColumnNotFoundError(str(e)) from e return PolarsDataFrame( - result, - backend_version=self._backend_version, - version=self._version, + result, backend_version=self._backend_version, version=self._version ) def group_by(self: Self, *by: str, drop_null_keys: bool) -> PolarsLazyGroupBy: diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py index 0e4240010..b13b336d3 100644 --- a/narwhals/_polars/expr.py +++ b/narwhals/_polars/expr.py @@ -89,12 +89,7 @@ def is_nan(self: Self) -> Self: return self._from_native_expr(self._native_expr.is_nan()) def rolling_var( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, - ddof: int, + self: Self, window_size: int, *, min_periods: int | None, center: bool, ddof: int ) -> Self: if self._backend_version < (1,): # pragma: no cover msg = "`rolling_var` not implemented for polars older than 1.0" @@ -110,12 +105,7 @@ def rolling_var( ) def rolling_std( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, - ddof: int, + self: Self, window_size: int, *, min_periods: int | None, center: bool, ddof: int ) -> Self: if self._backend_version < (1,): # pragma: no cover msg = "`rolling_std` not implemented for polars older than 1.0" @@ -131,9 +121,7 @@ def rolling_std( ) def map_batches( - self, - function: Callable[..., Self], - return_dtype: DType | None, + self, function: Callable[..., Self], return_dtype: DType | None ) -> Self: if return_dtype is not None: return_dtype_pl = narwhals_to_native_dtype(return_dtype, self._version) diff --git a/narwhals/_polars/group_by.py b/narwhals/_polars/group_by.py index 5bb1b58fc..1536c6112 100644 --- a/narwhals/_polars/group_by.py +++ b/narwhals/_polars/group_by.py @@ -27,7 +27,7 @@ def __init__( def agg(self: Self, *aggs: PolarsExpr, **named_aggs: PolarsExpr) -> PolarsDataFrame: aggs, named_aggs = extract_args_kwargs(aggs, named_aggs) # type: ignore[assignment] return self._compliant_frame._from_native_frame( - self._grouped.agg(*aggs, **named_aggs), + self._grouped.agg(*aggs, **named_aggs) ) def __iter__(self: Self) -> Iterator[tuple[tuple[str, ...], PolarsDataFrame]]: @@ -49,5 +49,5 @@ def __init__( def agg(self: Self, *aggs: PolarsExpr, **named_aggs: PolarsExpr) -> PolarsLazyFrame: aggs, named_aggs = extract_args_kwargs(aggs, named_aggs) # type: ignore[assignment] return self._compliant_frame._from_native_frame( - self._grouped.agg(*aggs, **named_aggs), + self._grouped.agg(*aggs, **named_aggs) ) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 33572db7c..690569aa8 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -300,12 +300,7 @@ def ewm_mean( return self._from_native_series(native_result) def rolling_var( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, - ddof: int, + self: Self, window_size: int, *, min_periods: int | None, center: bool, ddof: int ) -> Self: if self._backend_version < (1,): # pragma: no cover msg = "`rolling_var` not implemented for polars older than 1.0" @@ -321,12 +316,7 @@ def rolling_var( ) def rolling_std( - self: Self, - window_size: int, - *, - min_periods: int | None, - center: bool, - ddof: int, + self: Self, window_size: int, *, min_periods: int | None, center: bool, ddof: int ) -> Self: if self._backend_version < (1,): # pragma: no cover msg = "`rolling_std` not implemented for polars older than 1.0" @@ -364,12 +354,7 @@ def scatter(self: Self, indices: int | Sequence[int], values: Any) -> Self: return self._from_native_series(s) def value_counts( - self: Self, - *, - sort: bool, - parallel: bool, - name: str | None, - normalize: bool, + self: Self, *, sort: bool, parallel: bool, name: str | None, normalize: bool ) -> PolarsDataFrame: from narwhals._polars.dataframe import PolarsDataFrame diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index e85132f8e..2d8a4430c 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -67,9 +67,7 @@ def extract_args_kwargs(args: Any, kwargs: Any) -> tuple[list[Any], dict[str, An @lru_cache(maxsize=16) def native_to_narwhals_dtype( - dtype: pl.DataType, - version: Version, - backend_version: tuple[int, ...], + dtype: pl.DataType, version: Version, backend_version: tuple[int, ...] ) -> DType: import polars as pl diff --git a/narwhals/_spark_like/dataframe.py b/narwhals/_spark_like/dataframe.py index e04da7f57..1d8e4a307 100644 --- a/narwhals/_spark_like/dataframe.py +++ b/narwhals/_spark_like/dataframe.py @@ -84,9 +84,7 @@ def collect(self) -> Any: ) def select( - self: Self, - *exprs: IntoSparkLikeExpr, - **named_exprs: IntoSparkLikeExpr, + self: Self, *exprs: IntoSparkLikeExpr, **named_exprs: IntoSparkLikeExpr ) -> Self: if exprs and all(isinstance(x, str) for x in exprs) and not named_exprs: # This is a simple select @@ -127,9 +125,7 @@ def collect_schema(self) -> dict[str, DType]: return self.schema def with_columns( - self: Self, - *exprs: IntoSparkLikeExpr, - **named_exprs: IntoSparkLikeExpr, + self: Self, *exprs: IntoSparkLikeExpr, **named_exprs: IntoSparkLikeExpr ) -> Self: new_columns_map = parse_exprs_and_named_exprs(self, *exprs, **named_exprs) return self._from_native_frame(self._native_frame.withColumns(new_columns_map)) @@ -195,10 +191,7 @@ def rename(self: Self, mapping: dict[str, str]) -> Self: ) def unique( - self: Self, - subset: str | list[str] | None = None, - *, - keep: Literal["any", "none"], + self: Self, subset: str | list[str] | None = None, *, keep: Literal["any", "none"] ) -> Self: if keep != "any": msg = "`LazyFrame.unique` with PySpark backend only supports `keep='any'`." diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index 66826a6ab..992b55f49 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -244,9 +244,7 @@ def var(self: Self, ddof: int) -> Self: return self._from_call(func, "var", returns_scalar=True, ddof=ddof) def clip( - self, - lower_bound: Any | None = None, - upper_bound: Any | None = None, + self, lower_bound: Any | None = None, upper_bound: Any | None = None ) -> Self: def _clip(_input: Column, lower_bound: Any, upper_bound: Any) -> Column: from pyspark.sql import functions as F # noqa: N812 @@ -272,12 +270,7 @@ def _clip(_input: Column, lower_bound: Any, upper_bound: Any) -> Column: returns_scalar=self._returns_scalar, ) - def is_between( - self, - lower_bound: Any, - upper_bound: Any, - closed: str, - ) -> Self: + def is_between(self, lower_bound: Any, upper_bound: Any, closed: str) -> Self: def _is_between(_input: Column, lower_bound: Any, upper_bound: Any) -> Column: if closed == "both": return (_input >= lower_bound) & (_input <= upper_bound) @@ -328,10 +321,7 @@ def _is_in(_input: Column, values: Sequence[Any]) -> Column: return _input.isin(values) return self._from_call( - _is_in, - "is_in", - values=values, - returns_scalar=self._returns_scalar, + _is_in, "is_in", values=values, returns_scalar=self._returns_scalar ) def is_unique(self) -> Self: @@ -362,10 +352,7 @@ def _round(_input: Column, decimals: int) -> Column: return F.round(_input, decimals) return self._from_call( - _round, - "round", - decimals=decimals, - returns_scalar=self._returns_scalar, + _round, "round", decimals=decimals, returns_scalar=self._returns_scalar ) def skew(self) -> Self: diff --git a/narwhals/_spark_like/group_by.py b/narwhals/_spark_like/group_by.py index 0100500ff..36c1da2da 100644 --- a/narwhals/_spark_like/group_by.py +++ b/narwhals/_spark_like/group_by.py @@ -43,14 +43,10 @@ def __init__( self._grouped = self._df._native_frame.groupBy(*self._keys) def agg( - self, - *aggs: IntoSparkLikeExpr, - **named_aggs: IntoSparkLikeExpr, + self, *aggs: IntoSparkLikeExpr, **named_aggs: IntoSparkLikeExpr ) -> SparkLikeLazyFrame: exprs = parse_into_exprs( - *aggs, - namespace=self._df.__narwhals_namespace__(), - **named_aggs, + *aggs, namespace=self._df.__narwhals_namespace__(), **named_aggs ) output_names: list[str] = copy(self._keys) for expr in exprs: @@ -64,12 +60,7 @@ def agg( output_names.extend(expr._output_names) - return agg_pyspark( - self._grouped, - exprs, - self._keys, - self._from_native_frame, - ) + return agg_pyspark(self._grouped, exprs, self._keys, self._from_native_frame) def _from_native_frame(self, df: SparkLikeLazyFrame) -> SparkLikeLazyFrame: from narwhals._spark_like.dataframe import SparkLikeLazyFrame diff --git a/narwhals/_spark_like/namespace.py b/narwhals/_spark_like/namespace.py index d34867b00..1dd9330cf 100644 --- a/narwhals/_spark_like/namespace.py +++ b/narwhals/_spark_like/namespace.py @@ -76,10 +76,9 @@ def func(df: SparkLikeLazyFrame) -> list[Column]: cols = [c for _expr in parsed_exprs for c in _expr(df)] col_name = get_column_name(df, cols[0]) return [ - reduce( - operator.add, - (F.coalesce(col, F.lit(0)) for col in cols), - ).alias(col_name) + reduce(operator.add, (F.coalesce(col, F.lit(0)) for col in cols)).alias( + col_name + ) ] return SparkLikeExpr( # type: ignore[abstract] diff --git a/narwhals/_spark_like/utils.py b/narwhals/_spark_like/utils.py index fb3a3f3c4..439ad172f 100644 --- a/narwhals/_spark_like/utils.py +++ b/narwhals/_spark_like/utils.py @@ -19,8 +19,7 @@ @lru_cache(maxsize=16) def native_to_narwhals_dtype( - dtype: pyspark_types.DataType, - version: Version, + dtype: pyspark_types.DataType, version: Version ) -> DType: # pragma: no cover dtypes = import_dtypes_module(version=version) from pyspark.sql import types as pyspark_types diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index dd786ef3d..ed8eda37c 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -59,8 +59,7 @@ def __narwhals_namespace__(self) -> Any: def _from_compliant_dataframe(self, df: Any) -> Self: # construct, preserving properties return self.__class__( # type: ignore[call-arg] - df, - level=self._level, + df, level=self._level ) def _flatten_and_extract(self, *args: Any, **kwargs: Any) -> Any: @@ -120,17 +119,15 @@ def with_columns( ) -> Self: exprs, named_exprs = self._flatten_and_extract(*exprs, **named_exprs) return self._from_compliant_dataframe( - self._compliant_frame.with_columns(*exprs, **named_exprs), + self._compliant_frame.with_columns(*exprs, **named_exprs) ) def select( - self, - *exprs: IntoExpr | Iterable[IntoExpr], - **named_exprs: IntoExpr, + self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr ) -> Self: exprs, named_exprs = self._flatten_and_extract(*exprs, **named_exprs) return self._from_compliant_dataframe( - self._compliant_frame.select(*exprs, **named_exprs), + self._compliant_frame.select(*exprs, **named_exprs) ) def rename(self, mapping: dict[str, str]) -> Self: @@ -159,7 +156,7 @@ def filter( *predicates, **constraints ) return self._from_compliant_dataframe( - self._compliant_frame.filter(*predicates, **constraints), + self._compliant_frame.filter(*predicates, **constraints) ) def sort( @@ -369,12 +366,7 @@ def _series(self) -> type[Series[Any]]: def _lazyframe(self) -> type[LazyFrame[Any]]: return LazyFrame - def __init__( - self, - df: Any, - *, - level: Literal["full", "lazy", "interchange"], - ) -> None: + def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None: self._level: Literal["full", "lazy", "interchange"] = level if hasattr(df, "__narwhals_dataframe__"): self._compliant_frame: Any = df.__narwhals_dataframe__() @@ -804,10 +796,7 @@ def get_column(self, name: str) -> Series[Any]: ] ] """ - return self._series( - self._compliant_frame.get_column(name), - level=self._level, - ) + return self._series(self._compliant_frame.get_column(name), level=self._level) def estimated_size(self, unit: SizeUnit = "b") -> int | float: """Return an estimation of the total (heap) allocated size of the `DataFrame`. @@ -1002,10 +991,7 @@ def __getitem__( return self return self._from_compliant_dataframe(self._compliant_frame[item]) if isinstance(item, str) or (isinstance(item, tuple) and len(item) == 2): - return self._series( - self._compliant_frame[item], - level=self._level, - ) + return self._series(self._compliant_frame[item], level=self._level) elif ( is_sequence_but_not_str(item) @@ -1077,10 +1063,7 @@ def to_dict( """ if as_series: return { - key: self._series( - value, - level=self._level, - ) + key: self._series(value, level=self._level) for key, value in self._compliant_frame.to_dict( as_series=as_series ).items() @@ -1631,9 +1614,7 @@ def with_columns( return super().with_columns(*exprs, **named_exprs) def select( - self, - *exprs: IntoExpr | Iterable[IntoExpr], - **named_exprs: IntoExpr, + self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr ) -> Self: r"""Select columns from this DataFrame. @@ -2847,10 +2828,7 @@ def is_duplicated(self: Self) -> Series[Any]: ] ] """ - return self._series( - self._compliant_frame.is_duplicated(), - level=self._level, - ) + return self._series(self._compliant_frame.is_duplicated(), level=self._level) def is_empty(self: Self) -> bool: r"""Check if the dataframe is empty. @@ -2948,10 +2926,7 @@ def is_unique(self: Self) -> Series[Any]: ] ] """ - return self._series( - self._compliant_frame.is_unique(), - level=self._level, - ) + return self._series(self._compliant_frame.is_unique(), level=self._level) def null_count(self: Self) -> Self: r"""Create a new DataFrame that shows the null counts per column. @@ -3557,12 +3532,7 @@ class LazyFrame(BaseFrame[FrameT]): def _dataframe(self) -> type[DataFrame[Any]]: return DataFrame - def __init__( - self, - df: Any, - *, - level: Literal["full", "lazy", "interchange"], - ) -> None: + def __init__(self, df: Any, *, level: Literal["full", "lazy", "interchange"]) -> None: self._level = level if hasattr(df, "__narwhals_lazyframe__"): self._compliant_frame: Any = df.__narwhals_lazyframe__() @@ -3669,10 +3639,7 @@ def collect(self) -> DataFrame[Any]: 1 b 11 10 2 c 6 1 """ - return self._dataframe( - self._compliant_frame.collect(), - level="full", - ) + return self._dataframe(self._compliant_frame.collect(), level="full") def to_native(self) -> FrameT: """Convert Narwhals LazyFrame to native one. @@ -4011,9 +3978,7 @@ def with_columns( return super().with_columns(*exprs, **named_exprs) def select( - self, - *exprs: IntoExpr | Iterable[IntoExpr], - **named_exprs: IntoExpr, + self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr ) -> Self: r"""Select columns from this LazyFrame. diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index 57ee762eb..98964e9ae 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -498,10 +498,7 @@ class Duration(TemporalType): Duration(time_unit='ms') """ - def __init__( - self: Self, - time_unit: Literal["us", "ns", "ms", "s"] = "us", - ) -> None: + def __init__(self: Self, time_unit: Literal["us", "ns", "ms", "s"] = "us") -> None: if time_unit not in ("s", "ms", "us", "ns"): msg = ( "invalid `time_unit`" diff --git a/narwhals/expr.py b/narwhals/expr.py index 653300da8..4e7eb2a23 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -811,9 +811,7 @@ def var(self, *, ddof: int = 1) -> Self: return self.__class__(lambda plx: self._to_compliant_expr(plx).var(ddof=ddof)) def map_batches( - self, - function: Callable[[Any], Self], - return_dtype: DType | None = None, + self, function: Callable[[Any], Self], return_dtype: DType | None = None ) -> Self: """Apply a custom python function to a whole Series or sequence of Series. diff --git a/narwhals/functions.py b/narwhals/functions.py index ed167fb0d..a753fb25c 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -192,7 +192,7 @@ def concat( first_item = items[0] plx = first_item.__narwhals_namespace__() return first_item._from_compliant_dataframe( # type: ignore[return-value] - plx.concat([df._compliant_frame for df in items], how=how), + plx.concat([df._compliant_frame for df in items], how=how) ) @@ -264,11 +264,7 @@ def new_series( ] """ return _new_series_impl( - name, - values, - dtype, - native_namespace=native_namespace, - version=Version.MAIN, + name, values, dtype, native_namespace=native_namespace, version=Version.MAIN ) @@ -396,10 +392,7 @@ def from_dict( d: [[1,4]] """ return _from_dict_impl( - data, - schema, - native_namespace=native_namespace, - version=Version.MAIN, + data, schema, native_namespace=native_namespace, version=Version.MAIN ) @@ -643,10 +636,7 @@ def from_numpy( e: [[1,3]] """ return _from_numpy_impl( - data, - schema, - native_namespace=native_namespace, - version=Version.MAIN, + data, schema, native_namespace=native_namespace, version=Version.MAIN ) @@ -888,14 +878,7 @@ def _get_deps_info() -> dict[str, str]: Returns: Mapping from dependency to version. """ - deps = ( - "pandas", - "polars", - "cudf", - "modin", - "pyarrow", - "numpy", - ) + deps = ("pandas", "polars", "cudf", "modin", "pyarrow", "numpy") from . import __version__ @@ -953,10 +936,7 @@ def get_level( def read_csv( - source: str, - *, - native_namespace: ModuleType, - **kwargs: Any, + source: str, *, native_namespace: ModuleType, **kwargs: Any ) -> DataFrame[Any]: """Read a CSV file into a DataFrame. @@ -1121,10 +1101,7 @@ def _scan_csv_impl( def read_parquet( - source: str, - *, - native_namespace: ModuleType, - **kwargs: Any, + source: str, *, native_namespace: ModuleType, **kwargs: Any ) -> DataFrame[Any]: """Read into a DataFrame from a parquet file. diff --git a/narwhals/group_by.py b/narwhals/group_by.py index 76c04fa1f..0c601bc61 100644 --- a/narwhals/group_by.py +++ b/narwhals/group_by.py @@ -111,7 +111,7 @@ def agg( """ aggs, named_aggs = self._df._flatten_and_extract(*aggs, **named_aggs) return self._df._from_compliant_dataframe( # type: ignore[return-value] - self._grouped.agg(*aggs, **named_aggs), + self._grouped.agg(*aggs, **named_aggs) ) def __iter__(self) -> Iterator[tuple[Any, DataFrameT]]: @@ -197,5 +197,5 @@ def agg( """ aggs, named_aggs = self._df._flatten_and_extract(*aggs, **named_aggs) return self._df._from_compliant_dataframe( # type: ignore[return-value] - self._grouped.agg(*aggs, **named_aggs), + self._grouped.agg(*aggs, **named_aggs) ) diff --git a/narwhals/series.py b/narwhals/series.py index 7b4cfbf6e..1b0c2ded4 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -63,10 +63,7 @@ def _dataframe(self) -> type[DataFrame[Any]]: return DataFrame def __init__( - self: Self, - series: Any, - *, - level: Literal["full", "lazy", "interchange"], + self: Self, series: Any, *, level: Literal["full", "lazy", "interchange"] ) -> None: self._level = level if hasattr(series, "__narwhals_series__"): @@ -404,10 +401,7 @@ def _extract_native(self, arg: Any) -> Any: return arg def _from_compliant_series(self, series: Any) -> Self: - return self.__class__( - series, - level=self._level, - ) + return self.__class__(series, level=self._level) def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Self: """Pipe function call. @@ -789,10 +783,7 @@ def to_frame(self) -> DataFrame[Any]: ---- : [[1,2]] """ - return self._dataframe( - self._compliant_series.to_frame(), - level=self._level, - ) + return self._dataframe(self._compliant_series.to_frame(), level=self._level) def to_list(self) -> list[Any]: """Convert to list. diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index cb5d2006c..482e0800a 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -835,9 +835,7 @@ def rolling_sum( ) warn(message=msg, category=NarwhalsUnstableWarning, stacklevel=find_stacklevel()) return super().rolling_sum( - window_size=window_size, - min_periods=min_periods, - center=center, + window_size=window_size, min_periods=min_periods, center=center ) def rolling_mean( @@ -929,9 +927,7 @@ def rolling_mean( ) warn(message=msg, category=NarwhalsUnstableWarning, stacklevel=find_stacklevel()) return super().rolling_mean( - window_size=window_size, - min_periods=min_periods, - center=center, + window_size=window_size, min_periods=min_periods, center=center ) def rolling_var( @@ -1025,10 +1021,7 @@ def rolling_var( ) warn(message=msg, category=NarwhalsUnstableWarning, stacklevel=find_stacklevel()) return super().rolling_var( - window_size=window_size, - min_periods=min_periods, - center=center, - ddof=ddof, + window_size=window_size, min_periods=min_periods, center=center, ddof=ddof ) def rolling_std( @@ -1122,10 +1115,7 @@ def rolling_std( ) warn(message=msg, category=NarwhalsUnstableWarning, stacklevel=find_stacklevel()) return super().rolling_std( - window_size=window_size, - min_periods=min_periods, - center=center, - ddof=ddof, + window_size=window_size, min_periods=min_periods, center=center, ddof=ddof ) @@ -1328,9 +1318,7 @@ def rolling_sum( ) warn(message=msg, category=NarwhalsUnstableWarning, stacklevel=find_stacklevel()) return super().rolling_sum( - window_size=window_size, - min_periods=min_periods, - center=center, + window_size=window_size, min_periods=min_periods, center=center ) def rolling_mean( @@ -1421,9 +1409,7 @@ def rolling_mean( ) warn(message=msg, category=NarwhalsUnstableWarning, stacklevel=find_stacklevel()) return super().rolling_mean( - window_size=window_size, - min_periods=min_periods, - center=center, + window_size=window_size, min_periods=min_periods, center=center ) def rolling_var( @@ -1611,10 +1597,7 @@ def rolling_std( ) warn(message=msg, category=NarwhalsUnstableWarning, stacklevel=find_stacklevel()) return super().rolling_std( - window_size=window_size, - min_periods=min_periods, - center=center, - ddof=ddof, + window_size=window_size, min_periods=min_periods, center=center, ddof=ddof ) @@ -1667,19 +1650,14 @@ def _stableify( ) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series | Expr | Any: if isinstance(obj, NwDataFrame): return DataFrame( - obj._compliant_frame._change_version(Version.V1), - level=obj._level, + obj._compliant_frame._change_version(Version.V1), level=obj._level ) if isinstance(obj, NwLazyFrame): return LazyFrame( - obj._compliant_frame._change_version(Version.V1), - level=obj._level, + obj._compliant_frame._change_version(Version.V1), level=obj._level ) if isinstance(obj, NwSeries): - return Series( - obj._compliant_series._change_version(Version.V1), - level=obj._level, - ) + return Series(obj._compliant_series._change_version(Version.V1), level=obj._level) if isinstance(obj, NwExpr): return Expr(obj._to_compliant_expr) return obj diff --git a/narwhals/translate.py b/narwhals/translate.py index 8d0805a26..04aa6e6a6 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -52,14 +52,7 @@ T = TypeVar("T") -NON_TEMPORAL_SCALAR_TYPES = ( - bool, - bytes, - str, - int, - float, - complex, -) +NON_TEMPORAL_SCALAR_TYPES = (bool, bytes, str, int, float, complex) @overload @@ -431,10 +424,7 @@ def _from_native_impl( # noqa: PLR0915 msg = "Cannot only use `series_only` with dataframe" raise TypeError(msg) return native_object - return DataFrame( - native_object.__narwhals_dataframe__(), - level="full", - ) + return DataFrame(native_object.__narwhals_dataframe__(), level="full") elif hasattr(native_object, "__narwhals_lazyframe__"): if series_only: if not pass_through: @@ -446,20 +436,14 @@ def _from_native_impl( # noqa: PLR0915 msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with lazyframe" raise TypeError(msg) return native_object - return LazyFrame( - native_object.__narwhals_lazyframe__(), - level="full", - ) + return LazyFrame(native_object.__narwhals_lazyframe__(), level="full") elif hasattr(native_object, "__narwhals_series__"): if not allow_series: if not pass_through: msg = "Please set `allow_series=True` or `series_only=True`" raise TypeError(msg) return native_object - return Series( - native_object.__narwhals_series__(), - level="full", - ) + return Series(native_object.__narwhals_series__(), level="full") # Polars elif is_polars_dataframe(native_object): @@ -780,8 +764,7 @@ def _from_native_impl( # noqa: PLR0915 raise TypeError(msg) return native_object return DataFrame( - InterchangeFrame(native_object, version=version), - level="interchange", + InterchangeFrame(native_object, version=version), level="interchange" ) elif not pass_through: