Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: refactor root_names and output_names tracking #1731

Merged
merged 3 commits into from
Jan 5, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 2 additions & 25 deletions narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from copy import copy
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
Expand All @@ -12,6 +11,7 @@
from narwhals._dask.utils import binary_operation_returns_scalar
from narwhals._dask.utils import maybe_evaluate
from narwhals._dask.utils import narwhals_to_native_dtype
from narwhals._expression_parsing import infer_new_root_output_names
from narwhals._pandas_like.utils import calculate_timestamp_date
from narwhals._pandas_like.utils import calculate_timestamp_datetime
from narwhals._pandas_like.utils import native_to_narwhals_dtype
Expand Down Expand Up @@ -148,30 +148,7 @@ def func(df: DaskLazyFrame) -> list[dask_expr.Series]:
results.append(result)
return results

# Try tracking root and output names by combining them from all
# expressions appearing in args and kwargs. If any anonymous
# expression appears (e.g. nw.all()), then give up on tracking root names
# and just set it to None.
root_names = copy(self._root_names)
output_names = self._output_names
for arg in list(kwargs.values()):
if root_names is not None and isinstance(arg, self.__class__):
if arg._root_names is not None:
root_names.extend(arg._root_names)
else:
root_names = None
output_names = None
break
elif root_names is None:
output_names = None
break

if not (
(output_names is None and root_names is None)
or (output_names is not None and root_names is not None)
): # pragma: no cover
msg = "Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues"
raise AssertionError(msg)
root_names, output_names = infer_new_root_output_names(self, **kwargs)

return self.__class__(
func,
Expand Down
56 changes: 33 additions & 23 deletions narwhals/_expression_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,38 @@ def parse_into_expr(
raise InvalidIntoExprError.from_invalid_type(type(into_expr))


def infer_new_root_output_names(
expr: CompliantExpr[Any], **kwargs: Any
) -> tuple[list[str] | None, list[str] | None]:
"""Return new root and output names after chaining expressions.

Try tracking root and output names by combining them from all expressions appearing in kwargs.
If any anonymous expression appears (e.g. nw.all()), then give up on tracking root names
and just set it to None.
"""
root_names = copy(expr._root_names)
output_names = expr._output_names
for arg in list(kwargs.values()):
if root_names is not None and isinstance(arg, expr.__class__):
if arg._root_names is not None:
root_names.extend(arg._root_names)
else:
root_names = None
output_names = None
break
elif root_names is None:
output_names = None
break

if not (
(output_names is None and root_names is None)
or (output_names is not None and root_names is not None)
): # pragma: no cover
msg = "Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues"
raise AssertionError(msg)
return root_names, output_names


@overload
def reuse_series_implementation(
expr: PandasLikeExprT,
Expand Down Expand Up @@ -201,30 +233,8 @@ def func(df: CompliantDataFrame) -> Sequence[CompliantSeries]:
raise AssertionError(msg)
return out

# Try tracking root and output names by combining them from all
# expressions appearing in args and kwargs. If any anonymous
# expression appears (e.g. nw.all()), then give up on tracking root names
# and just set it to None.
root_names = copy(expr._root_names)
output_names = expr._output_names
for arg in list(kwargs.values()):
if root_names is not None and isinstance(arg, expr.__class__):
if arg._root_names is not None:
root_names.extend(arg._root_names)
else:
root_names = None
output_names = None
break
elif root_names is None:
output_names = None
break
root_names, output_names = infer_new_root_output_names(expr, **kwargs)

if not (
(output_names is None and root_names is None)
or (output_names is not None and root_names is not None)
): # pragma: no cover
msg = "Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues"
raise AssertionError(msg)
return plx._create_expr_from_callable( # type: ignore[return-value]
func, # type: ignore[arg-type]
depth=expr._depth + 1,
Expand Down
27 changes: 2 additions & 25 deletions narwhals/_spark_like/expr.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations

from copy import copy
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Sequence

from narwhals._expression_parsing import infer_new_root_output_names
from narwhals._spark_like.utils import get_column_name
from narwhals._spark_like.utils import maybe_evaluate
from narwhals.typing import CompliantExpr
Expand Down Expand Up @@ -106,30 +106,7 @@ def func(df: SparkLikeLazyFrame) -> list[Column]:
results.append(column_result)
return results

# Try tracking root and output names by combining them from all
# expressions appearing in args and kwargs. If any anonymous
# expression appears (e.g. nw.all()), then give up on tracking root names
# and just set it to None.
root_names = copy(self._root_names)
output_names = self._output_names
for arg in list(kwargs.values()):
if root_names is not None and isinstance(arg, self.__class__):
if arg._root_names is not None:
root_names.extend(arg._root_names)
else: # pragma: no cover
root_names = None
output_names = None
break
elif root_names is None:
output_names = None
break

if not (
(output_names is None and root_names is None)
or (output_names is not None and root_names is not None)
): # pragma: no cover
msg = "Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues"
raise AssertionError(msg)
root_names, output_names = infer_new_root_output_names(self, **kwargs)

return self.__class__(
func,
Expand Down
Loading