From 3ac19abd1ad0908b0f9f5fcb18cbb819808b36f8 Mon Sep 17 00:00:00 2001 From: raisa <> Date: Sun, 17 Mar 2024 14:25:18 +0000 Subject: [PATCH] change to_pandas method, add more tests for dataframe --- narwhals/dataframe.py | 8 +++++-- narwhals/pandas_like/dataframe.py | 6 ++++- tests/test_common.py | 37 +++++++++++++++++++++++++++---- 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 69b383b08..eb72ae782 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -169,7 +169,9 @@ def __init__( elif (mpd := get_modin()) is not None and isinstance(df, mpd.DataFrame): self._dataframe = PandasDataFrame(df, implementation="modin") self._implementation = "modin" - elif (cudf := get_cudf()) is not None and isinstance(df, cudf.DataFrame): + elif (cudf := get_cudf()) is not None and isinstance( + df, cudf.DataFrame + ): # pragma: no cover self._dataframe = PandasDataFrame(df, implementation="cudf") self._implementation = "cudf" else: @@ -217,7 +219,9 @@ def __init__( elif (mpd := get_modin()) is not None and isinstance(df, mpd.DataFrame): self._dataframe = PandasDataFrame(df, implementation="modin") self._implementation = "modin" - elif (cudf := get_cudf()) is not None and isinstance(df, cudf.DataFrame): + elif (cudf := get_cudf()) is not None and isinstance( + df, cudf.DataFrame + ): # pragma: no cover self._dataframe = PandasDataFrame(df, implementation="cudf") self._implementation = "cudf" else: diff --git a/narwhals/pandas_like/dataframe.py b/narwhals/pandas_like/dataframe.py index 689d00b41..3d60efab0 100644 --- a/narwhals/pandas_like/dataframe.py +++ b/narwhals/pandas_like/dataframe.py @@ -229,4 +229,8 @@ def to_numpy(self) -> Any: return self._dataframe.to_numpy() def to_pandas(self) -> Any: - return self._dataframe + if self._implementation == "pandas": + return self._dataframe + if self._implementation == "modin": + return self._dataframe._to_pandas() + return self._dataframe.to_pandas() diff --git a/tests/test_common.py b/tests/test_common.py index b48fed525..83ef6cef9 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -3,6 +3,7 @@ import warnings from typing import Any +import modin.pandas as mpd import numpy as np import pandas as pd import polars as pl @@ -16,7 +17,7 @@ df_lazy = pl.LazyFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) - df_mpd = pd.DataFrame( + df_mpd = mpd.DataFrame( pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) ) @@ -172,12 +173,19 @@ def test_columns(df_raw: Any) -> None: df = nw.LazyFrame(df_raw) result = df.columns expected = ["a", "b", "z"] - assert len(result) == len(expected) - assert all(x == y for x, y in zip(result, expected)) + assert result == expected -@pytest.mark.parametrize("df_raw", [df_lazy]) +@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd, df_lazy]) def test_lazy_instantiation(df_raw: Any) -> None: + result = nw.LazyFrame(df_raw) + result_native = nw.to_native(result) + expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]} + compare_dicts(result_native, expected) + + +@pytest.mark.parametrize("df_raw", [df_lazy]) +def test_lazy_instantiation_error(df_raw: Any) -> None: with pytest.raises( TypeError, match="Can't instantiate DataFrame from Polars LazyFrame." ): @@ -204,3 +212,24 @@ def test_accepted_dataframes() -> None: match="Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: ", ): nw.LazyFrame(array) + + +@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd]) +def test_convert_pandas(df_raw: Any) -> None: + result = nw.DataFrame(df_raw).to_pandas() + expected = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) + pd.testing.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd]) +def test_convert_numpy(df_raw: Any) -> None: + result = nw.DataFrame(df_raw).to_numpy() + expected = np.array([[1, 3, 2], [4, 4, 6], [7.0, 8, 9]]).T + np.testing.assert_array_equal(result, expected) + + +@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd]) +def test_shape(df_raw: Any) -> None: + result = nw.DataFrame(df_raw).shape + expected = (3, 3) + assert result == expected