Skip to content

Commit

Permalink
Test dask wih npartitions (#877)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nikoleta-v3 authored Aug 30, 2024
1 parent 03d255a commit b7707dd
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 25 deletions.
2 changes: 1 addition & 1 deletion narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def join(
_supported_joins = ("inner", "left", "cross", "anti", "semi")

if how not in _supported_joins:
msg = f"Only the following join stragies are supported: {_supported_joins}; found '{how}'."
msg = f"Only the following join strategies are supported: {_supported_joins}; found '{how}'."
raise NotImplementedError(msg)

if how == "cross" and (left_on or right_on):
Expand Down
12 changes: 3 additions & 9 deletions tests/expr_and_series/is_duplicated_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,13 @@
import narwhals.stable.v1 as nw
from tests.utils import compare_dicts

data = {
"a": [1, 1, 2],
"b": [1, 2, 3],
}
data = {"a": [1, 1, 2], "b": [1, 2, 3], "index": [0, 1, 2]}


def test_is_duplicated_expr(constructor: Any) -> None:
df = nw.from_native(constructor(data))
result = df.select(nw.all().is_duplicated())
expected = {
"a": [True, True, False],
"b": [False, False, False],
}
result = df.select(nw.col("a", "b").is_duplicated(), "index").sort("index")
expected = {"a": [True, True, False], "b": [False, False, False], "index": [0, 1, 2]}
compare_dicts(result, expected)


Expand Down
4 changes: 3 additions & 1 deletion tests/expr_and_series/is_unique_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@
data = {
"a": [1, 1, 2],
"b": [1, 2, 3],
"index": [0, 1, 2],
}


def test_is_unique_expr(constructor: Any) -> None:
df = nw.from_native(constructor(data))
result = df.select(nw.all().is_unique())
result = df.select(nw.col("a", "b").is_unique(), "index").sort("index")
expected = {
"a": [False, False, True],
"b": [True, True, True],
"index": [0, 1, 2],
}
compare_dicts(result, expected)

Expand Down
57 changes: 43 additions & 14 deletions tests/frame/join_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,35 @@


def test_inner_join_two_keys(constructor: Any) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "index": [0, 1, 2]}
df = nw.from_native(constructor(data))
df_right = df
result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner") # type: ignore[arg-type]
result = result.sort("index")
result = result.drop("index_right")
expected = {
"a": [1, 3, 2],
"b": [4, 4, 6],
"z": [7.0, 8, 9],
"z_right": [7.0, 8, 9],
"index": [0, 1, 2],
}
compare_dicts(result, expected)


def test_inner_join_single_key(constructor: Any) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "index": [0, 1, 2]}
df = nw.from_native(constructor(data))
df_right = df
result = df.join(df_right, left_on="a", right_on="a", how="inner") # type: ignore[arg-type]
result = df.join(df_right, left_on="a", right_on="a", how="inner").sort("index") # type: ignore[arg-type]
result = result.drop("index_right")
expected = {
"a": [1, 3, 2],
"b": [4, 4, 6],
"b_right": [4, 4, 6],
"z": [7.0, 8, 9],
"z_right": [7.0, 8, 9],
"index": [0, 1, 2],
}
compare_dicts(result, expected)

Expand Down Expand Up @@ -105,7 +110,7 @@ def test_semi_join(
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(constructor(data))
other = df.filter(filter_expr)
result = df.join(other, how="semi", left_on=join_key, right_on=join_key) # type: ignore[arg-type]
result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort("a") # type: ignore[arg-type]
compare_dicts(result, expected)


Expand All @@ -117,59 +122,83 @@ def test_join_not_implemented(constructor: Any, how: str) -> None:
with pytest.raises(
NotImplementedError,
match=re.escape(
f"Only the following join stragies are supported: ('inner', 'left', 'cross', 'anti', 'semi'); found '{how}'."
f"Only the following join strategies are supported: ('inner', 'left', 'cross', 'anti', 'semi'); found '{how}'."
),
):
df.join(df, left_on="a", right_on="a", how=how) # type: ignore[arg-type]


@pytest.mark.filterwarnings("ignore:the default coalesce behavior")
def test_left_join(constructor: Any) -> None:
data_left = {"a": [1.0, 2, 3], "b": [4.0, 5, 6]}
data_right = {"a": [1.0, 2, 3], "c": [4.0, 5, 7]}
data_left = {"a": [1.0, 2, 3], "b": [4.0, 5, 6], "index": [0.0, 1.0, 2.0]}
data_right = {"a": [1.0, 2, 3], "c": [4.0, 5, 7], "index": [0.0, 1.0, 2.0]}
df_left = nw.from_native(constructor(data_left))
df_right = nw.from_native(constructor(data_right))
result = df_left.join(df_right, left_on="b", right_on="c", how="left").select( # type: ignore[arg-type]
nw.all().fill_null(float("nan"))
)
expected = {"a": [1, 2, 3], "b": [4, 5, 6], "a_right": [1, 2, float("nan")]}
result = result.sort("index")
result = result.drop("index_right")
expected = {
"a": [1, 2, 3],
"b": [4, 5, 6],
"a_right": [1, 2, float("nan")],
"index": [0, 1, 2],
}
compare_dicts(result, expected)


@pytest.mark.filterwarnings("ignore: the default coalesce behavior")
def test_left_join_multiple_column(constructor: Any) -> None:
data_left = {"a": [1, 2, 3], "b": [4, 5, 6]}
data_right = {"a": [1, 2, 3], "c": [4, 5, 6]}
data_left = {"a": [1, 2, 3], "b": [4, 5, 6], "index": [0, 1, 2]}
data_right = {"a": [1, 2, 3], "c": [4, 5, 6], "index": [0, 1, 2]}
df_left = nw.from_native(constructor(data_left))
df_right = nw.from_native(constructor(data_right))
result = df_left.join(df_right, left_on=["a", "b"], right_on=["a", "c"], how="left") # type: ignore[arg-type]
expected = {"a": [1, 2, 3], "b": [4, 5, 6]}
result = result.sort("index")
result = result.drop("index_right")
expected = {"a": [1, 2, 3], "b": [4, 5, 6], "index": [0, 1, 2]}
compare_dicts(result, expected)


@pytest.mark.filterwarnings("ignore: the default coalesce behavior")
def test_left_join_overlapping_column(constructor: Any) -> None:
data_left = {"a": [1.0, 2, 3], "b": [4.0, 5, 6], "d": [1.0, 4, 2]}
data_right = {"a": [1.0, 2, 3], "c": [4.0, 5, 6], "d": [1.0, 4, 2]}
data_left = {
"a": [1.0, 2, 3],
"b": [4.0, 5, 6],
"d": [1.0, 4, 2],
"index": [0.0, 1.0, 2.0],
}
data_right = {
"a": [1.0, 2, 3],
"c": [4.0, 5, 6],
"d": [1.0, 4, 2],
"index": [0.0, 1.0, 2.0],
}
df_left = nw.from_native(constructor(data_left))
df_right = nw.from_native(constructor(data_right))
result = df_left.join(df_right, left_on="b", right_on="c", how="left") # type: ignore[arg-type]
result = df_left.join(df_right, left_on="b", right_on="c", how="left").sort("index") # type: ignore[arg-type]
result = result.drop("index_right")
expected: dict[str, list[Any]] = {
"a": [1, 2, 3],
"b": [4, 5, 6],
"d": [1, 4, 2],
"a_right": [1, 2, 3],
"d_right": [1, 4, 2],
"index": [0, 1, 2],
}
compare_dicts(result, expected)
result = df_left.join(df_right, left_on="a", right_on="d", how="left").select( # type: ignore[arg-type]
nw.all().fill_null(float("nan"))
)
result = result.sort("index")
result = result.drop("index_right")
expected = {
"a": [1, 2, 3],
"b": [4, 5, 6],
"d": [1, 4, 2],
"a_right": [1.0, 3.0, float("nan")],
"c": [4.0, 6.0, float("nan")],
"index": [0, 1, 2],
}
compare_dicts(result, expected)

0 comments on commit b7707dd

Please sign in to comment.