Skip to content

Commit

Permalink
perf: always use copy=False when doing rename for pandas
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Oct 24, 2024
1 parent cf07bd3 commit 62eff56
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 45 deletions.
10 changes: 7 additions & 3 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,9 @@ def with_columns(
return self._from_native_frame(df)

def rename(self, mapping: dict[str, str]) -> Self:
return self._from_native_frame(self._native_frame.rename(columns=mapping))
return self._from_native_frame(
self._native_frame.rename(columns=mapping, copy=False)
)

def drop(self: Self, columns: list[str], strict: bool) -> Self: # noqa: FBT001
to_drop = parse_columns_to_drop(
Expand Down Expand Up @@ -539,7 +541,8 @@ def join(
other_native = (
other._native_frame.loc[:, right_on]
.rename( # rename to avoid creating extra columns in join
columns=dict(zip(right_on, left_on)) # type: ignore[arg-type]
columns=dict(zip(right_on, left_on)), # type: ignore[arg-type]
copy=False,
)
.drop_duplicates()
)
Expand All @@ -559,7 +562,8 @@ def join(
other_native = (
other._native_frame.loc[:, right_on]
.rename( # rename to avoid creating extra columns in join
columns=dict(zip(right_on, left_on)) # type: ignore[arg-type]
columns=dict(zip(right_on, left_on)), # type: ignore[arg-type]
copy=False,
)
.drop_duplicates() # avoids potential rows duplication from inner join
)
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def agg_pandas( # noqa: PLR0915
f"{a}_{b}" for a, b in result_simple_aggs.columns
]
result_simple_aggs = result_simple_aggs.rename(
columns=name_mapping
columns=name_mapping, copy=False
).reset_index()
if nunique_aggs:
result_nunique_aggs = grouped[list(nunique_aggs.values())].nunique(
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
(s.to_frame() for s in series), how="horizontal"
)
._native_frame.min(axis=1)
.rename(series[0].name),
.rename(series[0].name, copy=False),
implementation=self._implementation,
backend_version=self._backend_version,
dtypes=self._dtypes,
Expand All @@ -317,7 +317,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
(s.to_frame() for s in series), how="horizontal"
)
._native_frame.max(axis=1)
.rename(series[0].name),
.rename(series[0].name, copy=False),
implementation=self._implementation,
backend_version=self._backend_version,
dtypes=self._dtypes,
Expand Down
78 changes: 39 additions & 39 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,6 @@ def __getitem__(self, idx: int | slice | Sequence[int]) -> Any | Self:
return self._native_series.iloc[idx]
return self._from_native_series(self._native_series.iloc[idx])

def _rename(self, series: Any, name: str) -> Any:
if self._use_copy_false:
return series.rename(name, copy=False)
return series.rename(name) # pragma: no cover

def _from_native_series(self, series: Any) -> Self:
return self.__class__(
series,
Expand Down Expand Up @@ -260,127 +255,135 @@ def filter(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
if not (isinstance(other, list) and all(isinstance(x, bool) for x in other)):
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.loc[other], ser.name))
return self._from_native_series(ser.loc[other].rename(ser.name, copy=False))

def __eq__(self, other: object) -> PandasLikeSeries: # type: ignore[override]
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__eq__(other), ser.name))
return self._from_native_series(ser.__eq__(other).rename(ser.name, copy=False))

def __ne__(self, other: object) -> PandasLikeSeries: # type: ignore[override]
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__ne__(other), ser.name))
return self._from_native_series(ser.__ne__(other).rename(ser.name, copy=False))

def __ge__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__ge__(other), ser.name))
return self._from_native_series(ser.__ge__(other).rename(ser.name, copy=False))

def __gt__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__gt__(other), ser.name))
return self._from_native_series(ser.__gt__(other).rename(ser.name, copy=False))

def __le__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__le__(other), ser.name))
return self._from_native_series(ser.__le__(other).rename(ser.name, copy=False))

def __lt__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__lt__(other), ser.name))
return self._from_native_series(ser.__lt__(other).rename(ser.name, copy=False))

def __and__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__and__(other), ser.name))
return self._from_native_series(ser.__and__(other).rename(ser.name, copy=False))

def __rand__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__rand__(other), ser.name))
return self._from_native_series(ser.__rand__(other).rename(ser.name, copy=False))

def __or__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__or__(other), ser.name))
return self._from_native_series(ser.__or__(other).rename(ser.name, copy=False))

def __ror__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__ror__(other), ser.name))
return self._from_native_series(ser.__ror__(other).rename(ser.name, copy=False))

def __add__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__add__(other), ser.name))
return self._from_native_series(ser.__add__(other).rename(ser.name, copy=False))

def __radd__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__radd__(other), ser.name))
return self._from_native_series(ser.__radd__(other).rename(ser.name, copy=False))

def __sub__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__sub__(other), ser.name))
return self._from_native_series(ser.__sub__(other).rename(ser.name, copy=False))

def __rsub__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__rsub__(other), ser.name))
return self._from_native_series(ser.__rsub__(other).rename(ser.name, copy=False))

def __mul__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__mul__(other), ser.name))
return self._from_native_series(ser.__mul__(other).rename(ser.name, copy=False))

def __rmul__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__rmul__(other), ser.name))
return self._from_native_series(ser.__rmul__(other).rename(ser.name, copy=False))

def __truediv__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__truediv__(other), ser.name))
return self._from_native_series(
ser.__truediv__(other).rename(ser.name, copy=False)
)

def __rtruediv__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__rtruediv__(other), ser.name))
return self._from_native_series(
ser.__rtruediv__(other).rename(ser.name, copy=False)
)

def __floordiv__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__floordiv__(other), ser.name))
return self._from_native_series(
ser.__floordiv__(other).rename(ser.name, copy=False)
)

def __rfloordiv__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__rfloordiv__(other), ser.name))
return self._from_native_series(
ser.__rfloordiv__(other).rename(ser.name, copy=False)
)

def __pow__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__pow__(other), ser.name))
return self._from_native_series(ser.__pow__(other).rename(ser.name, copy=False))

def __rpow__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__rpow__(other), ser.name))
return self._from_native_series(ser.__rpow__(other).rename(ser.name, copy=False))

def __mod__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__mod__(other), ser.name))
return self._from_native_series(ser.__mod__(other).rename(ser.name, copy=False))

def __rmod__(self, other: Any) -> PandasLikeSeries:
ser = self._native_series
other = validate_column_comparand(self._native_series.index, other)
return self._from_native_series(self._rename(ser.__rmod__(other), ser.name))
return self._from_native_series(ser.__rmod__(other).rename(ser.name, copy=False))

# Unary

Expand Down Expand Up @@ -486,13 +489,13 @@ def sort(
na_position = "last" if nulls_last else "first"
return self._from_native_series(
ser.sort_values(ascending=not descending, na_position=na_position).rename(
self.name
self.name, copy=False
)
)

def alias(self, name: str) -> Self:
ser = self._native_series
return self._from_native_series(self._rename(ser, name))
return self._from_native_series(ser.rename(name, copy=False))

def __array__(self, dtype: Any = None, copy: bool | None = None) -> Any:
# pandas used to always return object dtype for nullable dtypes.
Expand Down Expand Up @@ -546,28 +549,25 @@ def to_pandas(self) -> Any:
# --- descriptive ---
def is_duplicated(self: Self) -> Self:
res = self._native_series.duplicated(keep=False)
res = self._rename(res, self.name)
res = res.rename(self.name, copy=False)
return self._from_native_series(res)

def is_empty(self: Self) -> bool:
return self._native_series.empty # type: ignore[no-any-return]

def is_unique(self: Self) -> Self:
res = ~self._native_series.duplicated(keep=False)
res = self._rename(res, self.name)
res = ~self._native_series.duplicated(keep=False).rename(self.name, copy=False)
return self._from_native_series(res)

def null_count(self: Self) -> int:
return self._native_series.isna().sum() # type: ignore[no-any-return]

def is_first_distinct(self: Self) -> Self:
res = ~self._native_series.duplicated(keep="first")
res = self._rename(res, self.name)
res = ~self._native_series.duplicated(keep="first").rename(self.name, copy=False)
return self._from_native_series(res)

def is_last_distinct(self: Self) -> Self:
res = ~self._native_series.duplicated(keep="last")
res = self._rename(res, self.name)
res = ~self._native_series.duplicated(keep="last").rename(self.name, copy=False)
return self._from_native_series(res)

def is_sorted(self: Self, *, descending: bool = False) -> bool:
Expand Down

0 comments on commit 62eff56

Please sign in to comment.