diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 003fa4350..76ff68ae0 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -170,7 +170,7 @@ def columns(self) -> list[str]: return self._native_frame.columns # type: ignore[no-any-return] def to_pandas(self: Self) -> pd.DataFrame: - # only is version if v1, keep around for backcompat + # only if version is v1, keep around for backcompat import pandas as pd # ignore-banned-import() if parse_version(pd.__version__) >= parse_version("1.0.0"): @@ -180,7 +180,7 @@ def to_pandas(self: Self) -> pd.DataFrame: raise NotImplementedError(msg) def to_arrow(self: Self) -> pa.Table: - # only is version if v1, keep around for backcompat + # only if version is v1, keep around for backcompat return self._native_frame.arrow() def _change_version(self: Self, version: Version) -> Self: @@ -228,13 +228,14 @@ def join( if how not in ("inner", "left"): msg = "Only inner and left join is implemented for DuckDB" raise NotImplementedError(msg) + + # help mypy assert left_on is not None # noqa: S101 assert right_on is not None # noqa: S101 - conditions = [] - lhs = [] - for left, right in zip(left_on, right_on): - conditions.append(f"lhs.{left} = rhs.{right}") - lhs.append(left) + + conditions = [ + f"lhs.{left} = rhs.{right}" for left, right in zip(left_on, right_on) + ] original_alias = self._native_frame.alias condition = " and ".join(conditions) rel = self._native_frame.set_alias("lhs").join( diff --git a/tests/duckdb_test.py b/tests/duckdb_test.py deleted file mode 100644 index 3edc90b05..000000000 --- a/tests/duckdb_test.py +++ /dev/null @@ -1,415 +0,0 @@ -"""PySpark support in Narwhals is still _very_ limited. - -Start with a simple test file whilst we develop the basics. -Once we're a bit further along, we can integrate PySpark tests into the main test suite. -""" - -from __future__ import annotations - -from contextlib import nullcontext as does_not_raise -from typing import TYPE_CHECKING -from typing import Any - -import pytest - -import narwhals.stable.v1 as nw -from narwhals.exceptions import ColumnNotFoundError -from tests.utils import assert_equal_data - -if TYPE_CHECKING: - from narwhals.typing import IntoFrame - -import polars as pl - -duckdb = pytest.importorskip("duckdb") - - -def duckdb_constructor(obj: dict[str, Any]) -> IntoFrame: - _df = pl.DataFrame(obj) - return duckdb.table("_df") # type: ignore[no-any-return] - - -# copied from tests/translate/from_native_test.py -def test_series_only() -> None: - obj = duckdb_constructor({"a": [1, 2, 3]}) - with pytest.raises(TypeError, match="Cannot only use `series_only"): - _ = nw.from_native(obj, series_only=True) - - -def test_eager_only_lazy() -> None: - dframe = duckdb_constructor({"a": [1, 2, 3]}) - with pytest.raises(TypeError, match="Cannot only use .*`eager_only"): - _ = nw.from_native(dframe, eager_only=True) - - -# copied from tests/frame/with_columns_test.py -def test_columns() -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - df = nw.from_native(duckdb_constructor(data)) - result = df.columns - expected = ["a", "b", "z"] - assert result == expected - - -# copied from tests/frame/with_columns_test.py -def test_with_columns_order() -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - df = nw.from_native(duckdb_constructor(data)) - result = df.with_columns(nw.col("a") + 1, d=nw.col("a") - 1) - assert result.collect_schema().names() == ["a", "b", "z", "d"] - expected = {"a": [2, 4, 3], "b": [4, 4, 6], "z": [7.0, 8, 9], "d": [0, 2, 1]} - assert_equal_data(result, expected) - - -@pytest.mark.filterwarnings("ignore:If `index_col` is not specified for `to_spark`") -def test_with_columns_empty() -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - df = nw.from_native(duckdb_constructor(data)) - result = df.select().with_columns() - assert_equal_data(result, {}) - - -def test_with_columns_order_single_row() -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "i": [0, 1, 2]} - df = nw.from_native(duckdb_constructor(data)).filter(nw.col("i") < 1).drop("i") - result = df.with_columns(nw.col("a") + 1, d=nw.col("a") - 1) - assert result.collect_schema().names() == ["a", "b", "z", "d"] - expected = {"a": [2], "b": [4], "z": [7.0], "d": [0]} - assert_equal_data(result, expected) - - -# copied from tests/frame/select_test.py -def test_select() -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - df = nw.from_native(duckdb_constructor(data)) - result = df.select("a") - expected = {"a": [1, 3, 2]} - assert_equal_data(result, expected) - - -@pytest.mark.filterwarnings("ignore:If `index_col` is not specified for `to_spark`") -@pytest.mark.xfail -def test_empty_select() -> None: - result = nw.from_native(duckdb_constructor({"a": [1, 2, 3]})).lazy().select() - assert result.collect().shape == (0, 0) - - -# copied from tests/frame/filter_test.py -def test_filter() -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - df = nw.from_native(duckdb_constructor(data)) - result = df.filter(nw.col("a") > 1) - expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]} - assert_equal_data(result, expected) - - -# copied from tests/frame/schema_test.py -@pytest.mark.filterwarnings("ignore:Determining|Resolving.*") -def test_schema() -> None: - df = nw.from_native( - duckdb_constructor({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8, 9]}) - ) - result = df.schema - expected = {"a": nw.Int64, "b": nw.Int64, "z": nw.Float64} - - result = df.schema - assert result == expected - result = df.lazy().collect().schema - assert result == expected - - -def test_collect_schema() -> None: - df = nw.from_native( - duckdb_constructor({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8, 9]}) - ) - expected = {"a": nw.Int64, "b": nw.Int64, "z": nw.Float64} - - result = df.collect_schema() - assert result == expected - result = df.lazy().collect().collect_schema() - assert result == expected - - -# copied from tests/frame/drop_test.py -@pytest.mark.parametrize( - ("to_drop", "expected"), - [ - ("abc", ["b", "z"]), - (["abc"], ["b", "z"]), - (["abc", "b"], ["z"]), - ], -) -def test_drop(to_drop: list[str], expected: list[str]) -> None: - data = {"abc": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - df = nw.from_native(duckdb_constructor(data)) - assert df.drop(to_drop).collect_schema().names() == expected - if not isinstance(to_drop, str): - assert df.drop(*to_drop).collect_schema().names() == expected - - -@pytest.mark.parametrize( - ("strict", "context"), - [ - (True, pytest.raises(ColumnNotFoundError, match="z")), - (False, does_not_raise()), - ], -) -def test_drop_strict(context: Any, *, strict: bool) -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6]} - to_drop = ["a", "z"] - - df = nw.from_native(duckdb_constructor(data)) - - with context: - names_out = df.drop(to_drop, strict=strict).collect_schema().names() - assert names_out == ["b"] - - -# copied from tests/frame/head_test.py -def test_head() -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - expected = {"a": [1, 3], "b": [4, 4], "z": [7.0, 8.0]} - - df_raw = duckdb_constructor(data) - df = nw.from_native(df_raw) - - result = df.head(2) - assert_equal_data(result, expected) - - result = df.head(2) - assert_equal_data(result, expected) - - # negative indices not allowed for lazyframes - result = df.lazy().collect().head(-1) - assert_equal_data(result, expected) - - -# copied from tests/frame/sort_test.py -def test_sort() -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - df = nw.from_native(duckdb_constructor(data)) - result = df.sort("a", "b") - expected = { - "a": [1, 2, 3], - "b": [4, 6, 4], - "z": [7.0, 9.0, 8.0], - } - assert_equal_data(result, expected) - result = df.sort("a", "b", descending=[True, False]).lazy().collect() - expected = { - "a": [3, 2, 1], - "b": [4, 6, 4], - "z": [8.0, 9.0, 7.0], - } - assert_equal_data(result, expected) - - -@pytest.mark.parametrize( - ("nulls_last", "expected"), - [ - (True, {"a": [0, 2, 0, -1], "b": [3, 2, 1, float("nan")]}), - (False, {"a": [-1, 0, 2, 0], "b": [float("nan"), 3, 2, 1]}), - ], -) -def test_sort_nulls(*, nulls_last: bool, expected: dict[str, float]) -> None: - data = {"a": [0, 0, 2, -1], "b": [1, 3, 2, None]} - df = nw.from_native(duckdb_constructor(data)) - result = df.sort("b", descending=True, nulls_last=nulls_last).lazy().collect() - assert_equal_data(result, expected) - - -# copied from tests/frame/add_test.py -@pytest.mark.xfail -def test_add() -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - df = nw.from_native(duckdb_constructor(data)) - result = df.with_columns( - c=nw.col("a") + nw.col("b"), - d=nw.col("a") - nw.col("a").mean(), - e=nw.col("a") - nw.col("a").std(), - ) - expected = { # pragma: no cover - "a": [1, 3, 2], - "b": [4, 4, 6], - "z": [7.0, 8.0, 9.0], - "c": [5, 7, 8], - "d": [-1.0, 1.0, 0.0], - "e": [0.0, 2.0, 1.0], - } - assert_equal_data(result, expected) # pragma: no cover - - -# copied from tests/expr_and_series/all_horizontal_test.py -@pytest.mark.parametrize("expr1", ["a", nw.col("a")]) -@pytest.mark.parametrize("expr2", ["b", nw.col("b")]) -def test_allh(expr1: Any, expr2: Any) -> None: - data = { - "a": [False, False, True], - "b": [False, True, True], - } - df = nw.from_native(duckdb_constructor(data)) - result = df.select(all=nw.all_horizontal(expr1, expr2)) - - expected = {"all": [False, False, True]} - assert_equal_data(result, expected) - - -def test_allh_all() -> None: - data = { - "a": [False, False, True], - "b": [False, True, True], - } - df = nw.from_native(duckdb_constructor(data)) - result = df.select(all=nw.all_horizontal(nw.all())) - expected = {"all": [False, False, True]} - assert_equal_data(result, expected) - result = df.select(nw.all_horizontal(nw.all())) - expected = {"a": [False, False, True]} - assert_equal_data(result, expected) - - -# copied from tests/expr_and_series/count_test.py -def test_count() -> None: - data = {"a": [1, 3, 2], "b": [4, None, 6], "z": [7.0, None, None]} - df = nw.from_native(duckdb_constructor(data)) - result = df.select(nw.col("a", "b", "z").count()) - expected = {"a": [3], "b": [2], "z": [1]} - assert_equal_data(result, expected) - - -# copied from tests/expr_and_series/double_test.py -def test_double() -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - df = nw.from_native(duckdb_constructor(data)) - result = df.with_columns(nw.all() * 2) - expected = {"a": [2, 6, 4], "b": [8, 8, 12], "z": [14.0, 16.0, 18.0]} - assert_equal_data(result, expected) - - -def test_double_alias() -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - df = nw.from_native(duckdb_constructor(data)) - result = df.with_columns(nw.col("a").alias("o"), nw.all() * 2) - expected = { - "a": [2, 6, 4], - "b": [8, 8, 12], - "z": [14.0, 16.0, 18.0], - "o": [1, 3, 2], - } - assert_equal_data(result, expected) - - -# copied from tests/expr_and_series/max_test.py -def test_expr_max_expr() -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - - df = nw.from_native(duckdb_constructor(data)) - result = df.select(nw.col("a", "b", "z").max()) - expected = {"a": [3], "b": [6], "z": [9.0]} - assert_equal_data(result, expected) - - -# copied from tests/expr_and_series/min_test.py -def test_expr_min_expr() -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - df = nw.from_native(duckdb_constructor(data)) - result = df.select(nw.col("a", "b", "z").min()) - expected = {"a": [1], "b": [4], "z": [7.0]} - assert_equal_data(result, expected) - - -# copied from tests/expr_and_series/std_test.py -@pytest.mark.xfail -def test_std() -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} - - df = nw.from_native(duckdb_constructor(data)) - result = df.select( - nw.col("a").std().alias("a_ddof_default"), - nw.col("a").std(ddof=1).alias("a_ddof_1"), - nw.col("a").std(ddof=0).alias("a_ddof_0"), - nw.col("b").std(ddof=2).alias("b_ddof_2"), - nw.col("z").std(ddof=0).alias("z_ddof_0"), - ) - expected = { # pragma: no cover - "a_ddof_default": [1.0], - "a_ddof_1": [1.0], - "a_ddof_0": [0.816497], - "b_ddof_2": [1.632993], - "z_ddof_0": [0.816497], - } - assert_equal_data(result, expected) # pragma: no cover - - -# copied from tests/group_by_test.py -def test_group_by_std() -> None: - data = {"a": [1, 1, 2, 2], "b": [5, 4, 3, 2]} - result = ( - nw.from_native(duckdb_constructor(data)) - .group_by("a") - .agg(nw.col("b").std()) - .sort("a") - ) - expected = {"a": [1, 2], "b": [0.707107] * 2} - assert_equal_data(result, expected) - - -def test_group_by_simple_named() -> None: - data = {"a": [1, 1, 2], "b": [4, 5, 6], "c": [7, 2, 1]} - df = nw.from_native(duckdb_constructor(data)).lazy() - result = ( - df.group_by("a") - .agg( - b_min=nw.col("b").min(), - b_max=nw.col("b").max(), - ) - .sort("a") - .collect() - ) - expected = { - "a": [1, 2], - "b_min": [4, 6], - "b_max": [5, 6], - } - assert_equal_data(result, expected) - - -def test_group_by_simple_unnamed() -> None: - data = {"a": [1, 1, 2], "b": [4, 5, 6], "c": [7, 2, 1]} - df = nw.from_native(duckdb_constructor(data)).lazy() - result = ( - df.group_by("a") - .agg( - nw.col("b").min(), - nw.col("c").max(), - ) - .collect() - .sort("a") - ) - expected = { - "a": [1, 2], - "b": [4, 6], - "c": [7, 1], - } - assert_equal_data(result, expected) - - -def test_group_by_multiple_keys() -> None: - data = {"a": [1, 1, 2], "b": [4, 4, 6], "c": [7, 2, 1]} - df = nw.from_native(duckdb_constructor(data)).lazy() - result = ( - df.group_by("a", "b") - .agg( - c_min=nw.col("c").min(), - c_max=nw.col("c").max(), - ) - .collect() - .sort("a") - ) - expected = { - "a": [1, 2], - "b": [4, 6], - "c_min": [2, 1], - "c_max": [7, 1], - } - assert_equal_data(result, expected)