Skip to content

Commit

Permalink
improve test coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 committed Feb 25, 2025
1 parent 0131fb1 commit 9163acb
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 5 deletions.
6 changes: 5 additions & 1 deletion python/cudf_polars/cudf_polars/dsl/ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -1666,11 +1666,15 @@ def __init__(self, schema: Schema, key: str, left: IR, right: IR):
schema,
key,
)
assert isinstance(left, Sort)
assert isinstance(right, Sort)
assert left.order == right.order
assert len(left.schema.keys()) <= len(right.schema.keys())

@classmethod
def do_evaluate(cls, schema: Schema, key: str, *dfs: DataFrame) -> DataFrame:
left, right = dfs
left, right = dfs
right = right.discard_columns(right.column_names_set - left.column_names_set)
on_col_left = left.select_columns({key})[0]
on_col_right = right.select_columns({key})[0]
return DataFrame.from_table(
Expand Down
32 changes: 28 additions & 4 deletions python/cudf_polars/tests/test_merge_sorted.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,45 @@
[
pytest.param(
True,
marks=pytest.mark.xfail(
reason="https://github.com/rapidsai/cudf/issues/18089"
),
marks=pytest.mark.xfail(reason="polars/issues/21464"),
),
False,
],
)
def test_merge_sorted(descending):
def test_merge_sorted_without_nulls(descending):
df0 = pl.LazyFrame({"name": ["steve", "elise", "bob"], "age": [42, 44, 18]}).sort(
"age", descending=descending
)
df1 = pl.LazyFrame(
{
"name": ["anna", "megan", "steve", "thomas"],
"age": [21, 33, 42, 20],
"height": [5, 5, 5, 5],
}
).sort("age", descending=descending)
q = df0.merge_sorted(df1, key="age")
assert_gpu_result_equal(q)


@pytest.mark.parametrize(
"descending",
[
pytest.param(
True,
marks=pytest.mark.xfail(reason="polars/issues/21464 and cudf/issues/18089"),
),
False,
],
)
def test_merge_sorted_with_nulls(descending):
df0 = pl.LazyFrame(
{"name": ["steve", "elise", "bob", "john"], "age": [42, 44, 18, None]}
).sort("age", descending=descending)
df1 = pl.LazyFrame(
{
"name": ["anna", "megan", "steve", "thomas", "john"],
"age": [21, 33, 42, 20, None],
"height": [5, 5, 5, 5, 5],
}
).sort("age", descending=descending)
q = df0.merge_sorted(df1, key="age")
Expand Down

0 comments on commit 9163acb

Please sign in to comment.