From 9d95b5e6542bf8dac9121b73b37fded317035870 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 7 Jan 2025 15:18:44 +0000 Subject: [PATCH] chore: Remove some unnecessary trailing commas --- narwhals/_arrow/dataframe.py | 50 ++++----------- narwhals/_arrow/expr.py | 56 ++++------------ narwhals/_dask/expr.py | 120 +++++++---------------------------- narwhals/_duckdb/expr.py | 45 ++++--------- 4 files changed, 56 insertions(+), 215 deletions(-) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index f4ad2912e..e6bb6fa65 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -101,23 +101,14 @@ def row(self: Self, index: int) -> tuple[Any, ...]: return tuple(col[index] for col in self._native_frame) @overload - def rows( - self: Self, - *, - named: Literal[True], - ) -> list[dict[str, Any]]: ... + def rows(self: Self, *, named: Literal[True]) -> list[dict[str, Any]]: ... @overload - def rows( - self: Self, - *, - named: Literal[False], - ) -> list[tuple[Any, ...]]: ... + def rows(self: Self, *, named: Literal[False]) -> list[tuple[Any, ...]]: ... + @overload def rows( - self: Self, - *, - named: bool, + self: Self, *, named: bool ) -> list[tuple[Any, ...]] | list[dict[str, Any]]: ... def rows(self: Self, *, named: bool) -> list[tuple[Any, ...]] | list[dict[str, Any]]: @@ -126,10 +117,7 @@ def rows(self: Self, *, named: bool) -> list[tuple[Any, ...]] | list[dict[str, A return self._native_frame.to_pylist() # type: ignore[no-any-return] def iter_rows( - self: Self, - *, - named: bool, - buffer_size: int, + self: Self, *, named: bool, buffer_size: int ) -> Iterator[tuple[Any, ...]] | Iterator[dict[str, Any]]: df = self._native_frame num_rows = df.num_rows @@ -263,9 +251,7 @@ def __getitem__( ) start = item.start or 0 stop = item.stop if item.stop is not None else len(self._native_frame) - return self._from_native_frame( - self._native_frame.slice(start, stop - start), - ) + return self._from_native_frame(self._native_frame.slice(start, stop - start)) elif isinstance(item, Sequence) or (is_numpy_array(item) and item.ndim == 1): if ( @@ -301,11 +287,7 @@ def estimated_size(self: Self, unit: SizeUnit) -> int | float: def columns(self: Self) -> list[str]: return self._native_frame.schema.names # type: ignore[no-any-return] - def select( - self: Self, - *exprs: IntoArrowExpr, - **named_exprs: IntoArrowExpr, - ) -> Self: + def select(self: Self, *exprs: IntoArrowExpr, **named_exprs: IntoArrowExpr) -> Self: import pyarrow as pa new_series = evaluate_into_exprs(self, *exprs, **named_exprs) @@ -313,16 +295,11 @@ def select( # return empty dataframe, like Polars does return self._from_native_frame(self._native_frame.__class__.from_arrays([])) names = [s.name for s in new_series] - df = pa.Table.from_arrays( - broadcast_series(new_series), - names=names, - ) + df = pa.Table.from_arrays(broadcast_series(new_series), names=names) return self._from_native_frame(df) def with_columns( - self: Self, - *exprs: IntoArrowExpr, - **named_exprs: IntoArrowExpr, + self: Self, *exprs: IntoArrowExpr, **named_exprs: IntoArrowExpr ) -> Self: native_frame = self._native_frame new_columns = evaluate_into_exprs(self, *exprs, **named_exprs) @@ -334,9 +311,7 @@ def with_columns( col_name = col_value.name column = validate_dataframe_comparand( - length=length, - other=col_value, - backend_version=self._backend_version, + length=length, other=col_value, backend_version=self._backend_version ) native_frame = ( @@ -611,12 +586,9 @@ def is_duplicated(self: Self) -> ArrowSeries: columns = self.columns index_token = generate_temporary_column_name(n_bytes=8, columns=columns) col_token = generate_temporary_column_name( - n_bytes=8, - columns=[*columns, index_token], + n_bytes=8, columns=[*columns, index_token] ) - df = self.with_row_index(index_token)._native_frame - row_count = ( df.append_column(col_token, pa.repeat(pa.scalar(1), len(self))) .group_by(columns) diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 5ae6ce6b0..1c0d0734e 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -87,8 +87,7 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: except KeyError as e: missing_columns = [x for x in column_names if x not in df.columns] raise ColumnNotFoundError.from_missing_and_available_column_names( - missing_columns=missing_columns, - available_columns=df.columns, + missing_columns=missing_columns, available_columns=df.columns ) from e return cls( @@ -564,9 +563,7 @@ def __init__(self: Self, expr: ArrowExpr) -> None: def get_categories(self: Self) -> ArrowExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "cat", - "get_categories", + self._compliant_expr, "cat", "get_categories" ) @@ -676,12 +673,7 @@ def len_chars(self: Self) -> ArrowExpr: ) def replace( - self: Self, - pattern: str, - value: str, - *, - literal: bool, - n: int, + self: Self, pattern: str, value: str, *, literal: bool, n: int ) -> ArrowExpr: return reuse_series_namespace_implementation( self._compliant_expr, @@ -693,13 +685,7 @@ def replace( n=n, ) - def replace_all( - self: Self, - pattern: str, - value: str, - *, - literal: bool, - ) -> ArrowExpr: + def replace_all(self: Self, pattern: str, value: str, *, literal: bool) -> ArrowExpr: return reuse_series_namespace_implementation( self._compliant_expr, "str", @@ -711,26 +697,17 @@ def replace_all( def strip_chars(self: Self, characters: str | None) -> ArrowExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "strip_chars", - characters=characters, + self._compliant_expr, "str", "strip_chars", characters=characters ) def starts_with(self: Self, prefix: str) -> ArrowExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "starts_with", - prefix=prefix, + self._compliant_expr, "str", "starts_with", prefix=prefix ) def ends_with(self: Self, suffix: str) -> ArrowExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "ends_with", - suffix=suffix, + self._compliant_expr, "str", "ends_with", suffix=suffix ) def contains(self, pattern: str, *, literal: bool) -> ArrowExpr: @@ -745,24 +722,17 @@ def slice(self: Self, offset: int, length: int | None) -> ArrowExpr: def to_datetime(self: Self, format: str | None) -> ArrowExpr: # noqa: A002 return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "to_datetime", - format=format, + self._compliant_expr, "str", "to_datetime", format=format ) def to_uppercase(self: Self) -> ArrowExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "to_uppercase", + self._compliant_expr, "str", "to_uppercase" ) def to_lowercase(self: Self) -> ArrowExpr: return reuse_series_namespace_implementation( - self._compliant_expr, - "str", - "to_lowercase", + self._compliant_expr, "str", "to_lowercase" ) @@ -931,8 +901,4 @@ def __init__(self: Self, expr: ArrowExpr) -> None: self._expr = expr def len(self: Self) -> ArrowExpr: - return reuse_series_namespace_implementation( - self._expr, - "list", - "len", - ) + return reuse_series_namespace_implementation(self._expr, "list", "len") diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index c76593404..938cbc369 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -307,11 +307,7 @@ def __invert__(self: Self) -> Self: ) def mean(self) -> Self: - return self._from_call( - lambda _input: _input.mean(), - "mean", - returns_scalar=True, - ) + return self._from_call(lambda _input: _input.mean(), "mean", returns_scalar=True) def median(self) -> Self: from narwhals.exceptions import InvalidOperationError @@ -326,18 +322,10 @@ def func(s: dask_expr.Series) -> dask_expr.Series: return self._from_call(func, "median", returns_scalar=True) def min(self) -> Self: - return self._from_call( - lambda _input: _input.min(), - "min", - returns_scalar=True, - ) + return self._from_call(lambda _input: _input.min(), "min", returns_scalar=True) def max(self) -> Self: - return self._from_call( - lambda _input: _input.max(), - "max", - returns_scalar=True, - ) + return self._from_call(lambda _input: _input.max(), "max", returns_scalar=True) def std(self, ddof: int) -> Self: return self._from_call( @@ -356,11 +344,7 @@ def var(self, ddof: int) -> Self: ) def skew(self: Self) -> Self: - return self._from_call( - lambda _input: _input.skew(), - "skew", - returns_scalar=True, - ) + return self._from_call(lambda _input: _input.skew(), "skew", returns_scalar=True) def shift(self, n: int) -> Self: return self._from_call( @@ -435,9 +419,7 @@ def is_between( closed = "neither" return self._from_call( lambda _input, lower_bound, upper_bound, closed: _input.between( - lower_bound, - upper_bound, - closed, + lower_bound, upper_bound, closed ), "is_between", lower_bound=lower_bound, @@ -447,17 +429,11 @@ def is_between( ) def sum(self) -> Self: - return self._from_call( - lambda _input: _input.sum(), - "sum", - returns_scalar=True, - ) + return self._from_call(lambda _input: _input.sum(), "sum", returns_scalar=True) def count(self) -> Self: return self._from_call( - lambda _input: _input.count(), - "count", - returns_scalar=True, + lambda _input: _input.count(), "count", returns_scalar=True ) def round(self, decimals: int) -> Self: @@ -510,9 +486,7 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> NoRetur def abs(self) -> Self: return self._from_call( - lambda _input: _input.abs(), - "abs", - returns_scalar=self._returns_scalar, + lambda _input: _input.abs(), "abs", returns_scalar=self._returns_scalar ) def all(self) -> Self: @@ -579,23 +553,17 @@ def clip( def diff(self: Self) -> Self: return self._from_call( - lambda _input: _input.diff(), - "diff", - returns_scalar=self._returns_scalar, + lambda _input: _input.diff(), "diff", returns_scalar=self._returns_scalar ) def n_unique(self: Self) -> Self: return self._from_call( - lambda _input: _input.nunique(dropna=False), - "n_unique", - returns_scalar=True, + lambda _input: _input.nunique(dropna=False), "n_unique", returns_scalar=True ) def is_null(self: Self) -> Self: return self._from_call( - lambda _input: _input.isna(), - "is_null", - returns_scalar=self._returns_scalar, + lambda _input: _input.isna(), "is_null", returns_scalar=self._returns_scalar ) def is_nan(self: Self) -> Self: @@ -606,18 +574,10 @@ def func(_input: dask_expr.Series) -> dask_expr.Series: msg = f"`.is_nan` only supported for numeric dtypes and not {dtype}, did you mean `.is_null`?" raise InvalidOperationError(msg) - return self._from_call( - func, - "is_null", - returns_scalar=self._returns_scalar, - ) + return self._from_call(func, "is_null", returns_scalar=self._returns_scalar) def len(self: Self) -> Self: - return self._from_call( - lambda _input: _input.size, - "len", - returns_scalar=True, - ) + return self._from_call(lambda _input: _input.size, "len", returns_scalar=True) def quantile( self: Self, @@ -633,10 +593,7 @@ def func(_input: dask_expr.Series, quantile: float) -> dask_expr.Series: return _input.quantile(q=quantile, method="dask") # pragma: no cover return self._from_call( - func, - "quantile", - quantile=quantile, - returns_scalar=True, + func, "quantile", quantile=quantile, returns_scalar=True ) else: msg = "`higher`, `lower`, `midpoint`, `nearest` - interpolation methods are not supported by Dask. Please use `linear` instead." @@ -655,13 +612,10 @@ def func(_input: dask_expr.Series) -> dask_expr.Series: first_distinct_index = _input.groupby(_name).agg({col_token: "min"})[ col_token ] - return _input[col_token].isin(first_distinct_index) return self._from_call( - func, - "is_first_distinct", - returns_scalar=self._returns_scalar, + func, "is_first_distinct", returns_scalar=self._returns_scalar ) def is_last_distinct(self: Self) -> Self: @@ -675,13 +629,10 @@ def func(_input: dask_expr.Series) -> dask_expr.Series: implementation=self._implementation, ) last_distinct_index = _input.groupby(_name).agg({col_token: "max"})[col_token] - return _input[col_token].isin(last_distinct_index) return self._from_call( - func, - "is_last_distinct", - returns_scalar=self._returns_scalar, + func, "is_last_distinct", returns_scalar=self._returns_scalar ) def is_duplicated(self: Self) -> Self: @@ -694,11 +645,7 @@ def func(_input: dask_expr.Series) -> dask_expr.Series: > 1 ) - return self._from_call( - func, - "is_duplicated", - returns_scalar=self._returns_scalar, - ) + return self._from_call(func, "is_duplicated", returns_scalar=self._returns_scalar) def is_unique(self: Self) -> Self: def func(_input: dask_expr.Series) -> dask_expr.Series: @@ -710,11 +657,7 @@ def func(_input: dask_expr.Series) -> dask_expr.Series: == 1 ) - return self._from_call( - func, - "is_unique", - returns_scalar=self._returns_scalar, - ) + return self._from_call(func, "is_unique", returns_scalar=self._returns_scalar) def is_in(self: Self, other: Any) -> Self: return self._from_call( @@ -788,19 +731,13 @@ def dt(self: Self) -> DaskExprDateTimeNamespace: def name(self: Self) -> DaskExprNameNamespace: return DaskExprNameNamespace(self) - def cast( - self: Self, - dtype: DType | type[DType], - ) -> Self: - def func(_input: Any, dtype: DType | type[DType]) -> Any: + def cast(self: Self, dtype: (DType | type[DType])) -> Self: + def func(_input: Any, dtype: (DType | type[DType])) -> Any: dtype = narwhals_to_native_dtype(dtype, self._version) return _input.astype(dtype) return self._from_call( - func, - "cast", - dtype=dtype, - returns_scalar=self._returns_scalar, + func, "cast", dtype=dtype, returns_scalar=self._returns_scalar ) def is_finite(self: Self) -> Self: @@ -825,12 +762,7 @@ def len_chars(self) -> DaskExpr: ) def replace( - self, - pattern: str, - value: str, - *, - literal: bool = False, - n: int = 1, + self, pattern: str, value: str, *, literal: bool = False, n: int = 1 ) -> DaskExpr: return self._compliant_expr._from_call( lambda _input, pattern, value, literal, n: _input.str.replace( @@ -844,13 +776,7 @@ def replace( returns_scalar=self._compliant_expr._returns_scalar, ) - def replace_all( - self, - pattern: str, - value: str, - *, - literal: bool = False, - ) -> DaskExpr: + def replace_all(self, pattern: str, value: str, *, literal: bool = False) -> DaskExpr: return self._compliant_expr._from_call( lambda _input, pattern, value, literal: _input.str.replace( pattern, value, n=-1, regex=not literal diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index 3956e919d..0f33ff846 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -365,9 +365,7 @@ def func( _input: duckdb.Expression, lower_bound: Any, upper_bound: Any ) -> duckdb.Expression: return FunctionExpression( - "greatest", - FunctionExpression("least", _input, upper_bound), - lower_bound, + "greatest", FunctionExpression("least", _input, upper_bound), lower_bound ) return self._from_call( @@ -407,9 +405,7 @@ def sum(self) -> Self: from duckdb import FunctionExpression return self._from_call( - lambda _input: FunctionExpression("sum", _input), - "sum", - returns_scalar=True, + lambda _input: FunctionExpression("sum", _input), "sum", returns_scalar=True ) def count(self) -> Self: @@ -425,9 +421,7 @@ def len(self) -> Self: from duckdb import FunctionExpression return self._from_call( - lambda _input: FunctionExpression("count"), - "len", - returns_scalar=True, + lambda _input: FunctionExpression("count"), "len", returns_scalar=True ) def std(self, ddof: int) -> Self: @@ -441,9 +435,7 @@ def std(self, ddof: int) -> Self: msg = f"std with ddof {ddof} is not supported in DuckDB" raise NotImplementedError(msg) return self._from_call( - lambda _input: FunctionExpression(func, _input), - "std", - returns_scalar=True, + lambda _input: FunctionExpression(func, _input), "std", returns_scalar=True ) def var(self, ddof: int) -> Self: @@ -457,34 +449,26 @@ def var(self, ddof: int) -> Self: msg = f"var with ddof {ddof} is not supported in DuckDB" raise NotImplementedError(msg) return self._from_call( - lambda _input: FunctionExpression(func, _input), - "var", - returns_scalar=True, + lambda _input: FunctionExpression(func, _input), "var", returns_scalar=True ) def max(self) -> Self: from duckdb import FunctionExpression return self._from_call( - lambda _input: FunctionExpression("max", _input), - "max", - returns_scalar=True, + lambda _input: FunctionExpression("max", _input), "max", returns_scalar=True ) def min(self) -> Self: from duckdb import FunctionExpression return self._from_call( - lambda _input: FunctionExpression("min", _input), - "min", - returns_scalar=True, + lambda _input: FunctionExpression("min", _input), "min", returns_scalar=True ) def is_null(self) -> Self: return self._from_call( - lambda _input: _input.isnull(), - "is_null", - returns_scalar=self._returns_scalar, + lambda _input: _input.isnull(), "is_null", returns_scalar=self._returns_scalar ) def is_in(self, other: Sequence[Any]) -> Self: @@ -590,9 +574,7 @@ def func(_input: duckdb.Expression) -> duckdb.Expression: ) return self._compliant_expr._from_call( - func, - "contains", - returns_scalar=self._compliant_expr._returns_scalar, + func, "contains", returns_scalar=self._compliant_expr._returns_scalar ) def slice(self, offset: int, length: int) -> DuckDBExpr: @@ -612,9 +594,7 @@ def func(_input: duckdb.Expression) -> duckdb.Expression: ) return self._compliant_expr._from_call( - func, - "slice", - returns_scalar=self._compliant_expr._returns_scalar, + func, "slice", returns_scalar=self._compliant_expr._returns_scalar ) def to_lowercase(self) -> DuckDBExpr: @@ -664,10 +644,7 @@ def replace_all( raise NotImplementedError(msg) return self._compliant_expr._from_call( lambda _input: FunctionExpression( - "replace", - _input, - ConstantExpression(pattern), - ConstantExpression(value), + "replace", _input, ConstantExpression(pattern), ConstantExpression(value) ), "replace_all", returns_scalar=self._compliant_expr._returns_scalar,