Skip to content

Commit

Permalink
chore: refactor root_names and output_names tracking (#1731)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Marco Gorelli <[email protected]>
  • Loading branch information
EdAbati and MarcoGorelli authored Jan 5, 2025
1 parent 19418cf commit 03f6754
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 73 deletions.
27 changes: 2 additions & 25 deletions narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from copy import copy
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
Expand All @@ -12,6 +11,7 @@
from narwhals._dask.utils import binary_operation_returns_scalar
from narwhals._dask.utils import maybe_evaluate
from narwhals._dask.utils import narwhals_to_native_dtype
from narwhals._expression_parsing import infer_new_root_output_names
from narwhals._pandas_like.utils import calculate_timestamp_date
from narwhals._pandas_like.utils import calculate_timestamp_datetime
from narwhals._pandas_like.utils import native_to_narwhals_dtype
Expand Down Expand Up @@ -148,30 +148,7 @@ def func(df: DaskLazyFrame) -> list[dask_expr.Series]:
results.append(result)
return results

# Try tracking root and output names by combining them from all
# expressions appearing in args and kwargs. If any anonymous
# expression appears (e.g. nw.all()), then give up on tracking root names
# and just set it to None.
root_names = copy(self._root_names)
output_names = self._output_names
for arg in list(kwargs.values()):
if root_names is not None and isinstance(arg, self.__class__):
if arg._root_names is not None:
root_names.extend(arg._root_names)
else:
root_names = None
output_names = None
break
elif root_names is None:
output_names = None
break

if not (
(output_names is None and root_names is None)
or (output_names is not None and root_names is not None)
): # pragma: no cover
msg = "Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues"
raise AssertionError(msg)
root_names, output_names = infer_new_root_output_names(self, **kwargs)

return self.__class__(
func,
Expand Down
56 changes: 33 additions & 23 deletions narwhals/_expression_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,38 @@ def parse_into_expr(
raise InvalidIntoExprError.from_invalid_type(type(into_expr))


def infer_new_root_output_names(
expr: CompliantExpr[Any], **kwargs: Any
) -> tuple[list[str] | None, list[str] | None]:
"""Return new root and output names after chaining expressions.
Try tracking root and output names by combining them from all expressions appearing in kwargs.
If any anonymous expression appears (e.g. nw.all()), then give up on tracking root names
and just set it to None.
"""
root_names = copy(expr._root_names)
output_names = expr._output_names
for arg in list(kwargs.values()):
if root_names is not None and isinstance(arg, expr.__class__):
if arg._root_names is not None:
root_names.extend(arg._root_names)
else:
root_names = None
output_names = None
break
elif root_names is None:
output_names = None
break

if not (
(output_names is None and root_names is None)
or (output_names is not None and root_names is not None)
): # pragma: no cover
msg = "Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues"
raise AssertionError(msg)
return root_names, output_names


@overload
def reuse_series_implementation(
expr: PandasLikeExprT,
Expand Down Expand Up @@ -201,30 +233,8 @@ def func(df: CompliantDataFrame) -> Sequence[CompliantSeries]:
raise AssertionError(msg)
return out

# Try tracking root and output names by combining them from all
# expressions appearing in args and kwargs. If any anonymous
# expression appears (e.g. nw.all()), then give up on tracking root names
# and just set it to None.
root_names = copy(expr._root_names)
output_names = expr._output_names
for arg in list(kwargs.values()):
if root_names is not None and isinstance(arg, expr.__class__):
if arg._root_names is not None:
root_names.extend(arg._root_names)
else:
root_names = None
output_names = None
break
elif root_names is None:
output_names = None
break
root_names, output_names = infer_new_root_output_names(expr, **kwargs)

if not (
(output_names is None and root_names is None)
or (output_names is not None and root_names is not None)
): # pragma: no cover
msg = "Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues"
raise AssertionError(msg)
return plx._create_expr_from_callable( # type: ignore[return-value]
func, # type: ignore[arg-type]
depth=expr._depth + 1,
Expand Down
27 changes: 2 additions & 25 deletions narwhals/_spark_like/expr.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations

from copy import copy
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Sequence

from narwhals._expression_parsing import infer_new_root_output_names
from narwhals._spark_like.utils import get_column_name
from narwhals._spark_like.utils import maybe_evaluate
from narwhals.typing import CompliantExpr
Expand Down Expand Up @@ -106,30 +106,7 @@ def func(df: SparkLikeLazyFrame) -> list[Column]:
results.append(column_result)
return results

# Try tracking root and output names by combining them from all
# expressions appearing in args and kwargs. If any anonymous
# expression appears (e.g. nw.all()), then give up on tracking root names
# and just set it to None.
root_names = copy(self._root_names)
output_names = self._output_names
for arg in list(kwargs.values()):
if root_names is not None and isinstance(arg, self.__class__):
if arg._root_names is not None:
root_names.extend(arg._root_names)
else: # pragma: no cover
root_names = None
output_names = None
break
elif root_names is None:
output_names = None
break

if not (
(output_names is None and root_names is None)
or (output_names is not None and root_names is not None)
): # pragma: no cover
msg = "Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues"
raise AssertionError(msg)
root_names, output_names = infer_new_root_output_names(self, **kwargs)

return self.__class__(
func,
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ filterwarnings = [
'ignore:.*defaulting to pandas implementation',
'ignore:.*implementation has mismatches with pandas',
'ignore:.*You are using pyarrow version',
# This warning was temporarily raised by pandas but then reverted.
'ignore:.*Passing a BlockManager to DataFrame:DeprecationWarning',
]
xfail_strict = true
markers = ["slow: marks tests as slow (deselect with '-m \"not slow\"')"]
Expand Down

0 comments on commit 03f6754

Please sign in to comment.