Skip to content

Commit

Permalink
Merge pull request #10 from raisadz/increase-coverage
Browse files Browse the repository at this point in the history
Increase coverage for narwhals.dataframe
  • Loading branch information
MarcoGorelli authored Mar 16, 2024
2 parents 421494c + e8cf9e2 commit 2e8db11
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 20 deletions.
63 changes: 55 additions & 8 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import Any

import numpy as np
import pandas as pd
import polars as pl
import pytest
Expand All @@ -11,11 +12,12 @@

df_pandas = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
df_polars = pl.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})
df_lazy = pl.LazyFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]})


@pytest.mark.parametrize(
"df_raw",
[df_pandas, df_polars],
[df_pandas, df_polars, df_lazy],
)
def test_sort(df_raw: Any) -> None:
df = nw.DataFrame(df_raw)
Expand All @@ -31,7 +33,7 @@ def test_sort(df_raw: Any) -> None:

@pytest.mark.parametrize(
"df_raw",
[df_pandas, df_polars],
[df_pandas, df_polars, df_lazy],
)
def test_filter(df_raw: Any) -> None:
df = nw.DataFrame(df_raw)
Expand All @@ -43,7 +45,7 @@ def test_filter(df_raw: Any) -> None:

@pytest.mark.parametrize(
"df_raw",
[df_pandas, df_polars],
[df_pandas, df_polars, df_lazy],
)
def test_add(df_raw: Any) -> None:
df = nw.DataFrame(df_raw)
Expand All @@ -64,7 +66,7 @@ def test_add(df_raw: Any) -> None:

@pytest.mark.parametrize(
"df_raw",
[df_pandas, df_polars],
[df_pandas, df_polars, df_lazy],
)
def test_double(df_raw: Any) -> None:
df = nw.DataFrame(df_raw)
Expand All @@ -74,7 +76,7 @@ def test_double(df_raw: Any) -> None:
compare_dicts(result_native, expected)


@pytest.mark.parametrize("df_raw", [df_pandas, df_polars])
@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
def test_sumh(df_raw: Any) -> None:
df = nw.DataFrame(df_raw)
result = df.with_columns(horizonal_sum=nw.sum_horizontal(nw.col("a"), nw.col("b")))
Expand All @@ -88,7 +90,7 @@ def test_sumh(df_raw: Any) -> None:
compare_dicts(result_native, expected)


@pytest.mark.parametrize("df_raw", [df_pandas, df_polars])
@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
def test_sumh_literal(df_raw: Any) -> None:
df = nw.DataFrame(df_raw)
result = df.with_columns(horizonal_sum=nw.sum_horizontal("a", nw.col("b")))
Expand All @@ -102,7 +104,7 @@ def test_sumh_literal(df_raw: Any) -> None:
compare_dicts(result_native, expected)


@pytest.mark.parametrize("df_raw", [df_pandas, df_polars])
@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
def test_sum_all(df_raw: Any) -> None:
df = nw.DataFrame(df_raw)
result = df.select(nw.all().sum())
Expand All @@ -111,10 +113,55 @@ def test_sum_all(df_raw: Any) -> None:
compare_dicts(result_native, expected)


@pytest.mark.parametrize("df_raw", [df_pandas, df_polars])
@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
def test_double_selected(df_raw: Any) -> None:
df = nw.DataFrame(df_raw)
result = df.select(nw.col("a", "b") * 2)
result_native = nw.to_native(result)
expected = {"a": [2, 6, 4], "b": [8, 8, 12]}
compare_dicts(result_native, expected)


@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
def test_rename(df_raw: Any) -> None:
df = nw.DataFrame(df_raw)
result = df.rename({"a": "x", "b": "y"})
result_native = nw.to_native(result)
expected = {"x": [1, 3, 2], "y": [4, 4, 6], "z": [7.0, 8, 9]}
compare_dicts(result_native, expected)


@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
def test_join(df_raw: Any) -> None:
df = nw.DataFrame(df_raw)
df_right = df.rename({"z": "z_right"})
result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner")
result_native = nw.to_native(result)
expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "z_right": [7.0, 8, 9]}
compare_dicts(result_native, expected)


@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
def test_schema(df_raw: Any) -> None:
df = nw.DataFrame(df_raw)
result = df.schema
expected = {"a": nw.dtypes.Int64, "b": nw.dtypes.Int64, "z": nw.dtypes.Float64}
assert result == expected


@pytest.mark.parametrize("df_raw", [df_pandas, df_polars, df_lazy])
def test_columns(df_raw: Any) -> None:
df = nw.DataFrame(df_raw)
result = df.columns
expected = ["a", "b", "z"]
assert len(result) == len(expected)
assert all(x == y for x, y in zip(result, expected))


def test_accepted_dataframes() -> None:
array = np.array([[0, 4.0], [2, 5]])
with pytest.raises(
TypeError,
match="Expected pandas or Polars dataframe or lazyframe, got: <class 'numpy.ndarray'>",
):
nw.DataFrame(array)
28 changes: 16 additions & 12 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from __future__ import annotations

from typing import Any


def compare_dicts(result: dict[str, Any], expected: dict[str, Any]) -> None:
for key in expected:
for lhs, rhs in zip(result[key], expected[key]):
if isinstance(lhs, float):
assert abs(lhs - rhs) < 1e-6
else:
assert lhs == rhs
from __future__ import annotations

from typing import Any

import polars as pl


def compare_dicts(result: dict[str, Any], expected: dict[str, Any]) -> None:
if isinstance(result, pl.LazyFrame):
result = result.collect()
for key in expected:
for lhs, rhs in zip(result[key], expected[key]):
if isinstance(lhs, float):
assert abs(lhs - rhs) < 1e-6
else:
assert lhs == rhs

0 comments on commit 2e8db11

Please sign in to comment.