Skip to content

Commit

Permalink
Merge branch 'main' into feat/duckdb-replace_all-regex
Browse files Browse the repository at this point in the history
  • Loading branch information
camriddell authored Jan 10, 2025
2 parents 360e1e2 + 339683c commit affcf91
Show file tree
Hide file tree
Showing 24 changed files with 305 additions and 116 deletions.
2 changes: 1 addition & 1 deletion narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ def join(
self: Self,
other: Self,
*,
how: Literal["left", "inner", "outer", "cross", "anti", "semi"],
how: Literal["left", "inner", "cross", "anti", "semi"],
left_on: str | list[str] | None,
right_on: str | list[str] | None,
suffix: str,
Expand Down
2 changes: 1 addition & 1 deletion narwhals/_dask/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def join(
self: Self,
other: Self,
*,
how: Literal["left", "inner", "outer", "cross", "anti", "semi"] = "inner",
how: Literal["left", "inner", "cross", "anti", "semi"] = "inner",
left_on: str | list[str] | None,
right_on: str | list[str] | None,
suffix: str,
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_dask/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,8 @@ def concat_str(
self,
exprs: Iterable[IntoDaskExpr],
*more_exprs: IntoDaskExpr,
separator: str = "",
ignore_nulls: bool = False,
separator: str,
ignore_nulls: bool,
) -> DaskExpr:
parsed_exprs = [
*parse_into_exprs(*exprs, namespace=self),
Expand Down
6 changes: 1 addition & 5 deletions narwhals/_duckdb/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def join(
self: Self,
other: Self,
*,
how: Literal["left", "inner", "outer", "cross", "anti", "semi"] = "inner",
how: Literal["left", "inner", "cross", "anti", "semi"] = "inner",
left_on: str | list[str] | None,
right_on: str | list[str] | None,
suffix: str,
Expand All @@ -226,10 +226,6 @@ def join(
right_on = [right_on]
original_alias = self._native_frame.alias

if how not in ("inner", "left", "semi", "cross"):
msg = "Only inner and left join is implemented for DuckDB"
raise NotImplementedError(msg)

if how == "cross":
if self._backend_version < (1, 1, 4):
msg = f"DuckDB>=1.1.4 is required for cross-join, found version: {self._backend_version}"
Expand Down
25 changes: 19 additions & 6 deletions narwhals/_duckdb/expr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import functools
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
Expand Down Expand Up @@ -488,6 +487,15 @@ def min(self) -> Self:
lambda _input: FunctionExpression("min", _input), "min", returns_scalar=True
)

def null_count(self) -> Self:
from duckdb import FunctionExpression

return self._from_call(
lambda _input: FunctionExpression("sum", _input.isnull().cast("int")),
"null_count",
returns_scalar=True,
)

def is_null(self) -> Self:
return self._from_call(
lambda _input: _input.isnull(), "is_null", returns_scalar=self._returns_scalar
Expand All @@ -497,11 +505,7 @@ def is_in(self, other: Sequence[Any]) -> Self:
from duckdb import ConstantExpression

return self._from_call(
lambda _input: functools.reduce(
lambda x, y: x | _input.isin(ConstantExpression(y)),
other[1:],
_input.isin(ConstantExpression(other[0])),
),
lambda _input: _input.isin(*[ConstantExpression(x) for x in other]),
"is_in",
returns_scalar=self._returns_scalar,
)
Expand Down Expand Up @@ -619,6 +623,15 @@ def func(_input: duckdb.Expression) -> duckdb.Expression:
func, "slice", returns_scalar=self._compliant_expr._returns_scalar
)

def len_chars(self) -> DuckDBExpr:
from duckdb import FunctionExpression

return self._compliant_expr._from_call(
lambda _input: FunctionExpression("length", _input),
"len_chars",
returns_scalar=self._compliant_expr._returns_scalar,
)

def to_lowercase(self) -> DuckDBExpr:
from duckdb import FunctionExpression

Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ def join(
self,
other: Self,
*,
how: Literal["left", "inner", "outer", "cross", "anti", "semi"] = "inner",
how: Literal["left", "inner", "cross", "anti", "semi"] = "inner",
left_on: str | list[str] | None,
right_on: str | list[str] | None,
suffix: str,
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,8 @@ def concat_str(
self,
exprs: Iterable[IntoPandasLikeExpr],
*more_exprs: IntoPandasLikeExpr,
separator: str = "",
ignore_nulls: bool = False,
separator: str,
ignore_nulls: bool,
) -> PandasLikeExpr:
parsed_exprs = [
*parse_into_exprs(*exprs, namespace=self),
Expand Down
14 changes: 14 additions & 0 deletions narwhals/_spark_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,20 @@ def skew(self) -> Self:

return self._from_call(F.skewness, "skew", returns_scalar=True)

def n_unique(self: Self) -> Self:
from pyspark.sql import functions as F # noqa: N812
from pyspark.sql.types import IntegerType

def _n_unique(_input: Column) -> Column:
return F.count_distinct(_input) + F.max(F.isnull(_input).cast(IntegerType()))

return self._from_call(_n_unique, "n_unique", returns_scalar=True)

def is_null(self: Self) -> Self:
from pyspark.sql import functions as F # noqa: N812

return self._from_call(F.isnull, "is_null", returns_scalar=self._returns_scalar)

@property
def str(self: Self) -> SparkLikeExprStringNamespace:
return SparkLikeExprStringNamespace(self)
Expand Down
6 changes: 1 addition & 5 deletions narwhals/_spark_like/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,7 @@ def agg_pyspark(
if expr._output_names is None: # pragma: no cover
msg = "Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues"
raise AssertionError(msg)

function_name = POLARS_TO_PYSPARK_AGGREGATIONS.get(
expr._function_name, expr._function_name
)
agg_func = get_spark_function(function_name, **expr._kwargs)
agg_func = get_spark_function(expr._function_name, **expr._kwargs)
simple_aggregations.update(
{output_name: agg_func(keys[0]) for output_name in expr._output_names}
)
Expand Down
Loading

0 comments on commit affcf91

Please sign in to comment.