From 54c6cd8a581d952c434d1d97f9f7e598d3e003f5 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Fri, 23 Aug 2024 17:04:44 +0100 Subject: [PATCH] feat: Add is_pandas_like_dataframe and is_pandas_like_series (#848) --- docs/api-reference/dependencies.md | 2 ++ narwhals/__init__.py | 2 ++ narwhals/dependencies.py | 20 +++++++++++++++ narwhals/stable/v1.py | 7 ++++++ narwhals/utils.py | 39 +++++++++++++++++++----------- tests/test_utils.py | 7 +++++- 6 files changed, 62 insertions(+), 15 deletions(-) diff --git a/docs/api-reference/dependencies.md b/docs/api-reference/dependencies.md index 7e44ae9ca..6c1a93d91 100644 --- a/docs/api-reference/dependencies.md +++ b/docs/api-reference/dependencies.md @@ -16,6 +16,8 @@ - is_modin_series - is_numpy_array - is_pandas_dataframe + - is_pandas_like_dataframe + - is_pandas_like_series - is_pandas_series - is_polars_dataframe - is_polars_lazyframe diff --git a/narwhals/__init__.py b/narwhals/__init__.py index 716ffeb5f..15198fa35 100644 --- a/narwhals/__init__.py +++ b/narwhals/__init__.py @@ -1,3 +1,4 @@ +from narwhals import dependencies from narwhals import selectors from narwhals import stable from narwhals.dataframe import DataFrame @@ -53,6 +54,7 @@ __version__ = "1.5.3" __all__ = [ + "dependencies", "selectors", "concat", "from_dict", diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py index 7ed2f021d..66516eac9 100644 --- a/narwhals/dependencies.py +++ b/narwhals/dependencies.py @@ -134,6 +134,24 @@ def is_numpy_array(arr: Any) -> TypeGuard[np.ndarray]: return bool((np := get_numpy()) is not None and isinstance(arr, np.ndarray)) +def is_pandas_like_dataframe(df: Any) -> bool: + """ + Check whether `df` is a pandas-like DataFrame without doing any imports + + By "pandas-like", we mean: pandas, Modin, cuDF. + """ + return is_pandas_dataframe(df) or is_modin_dataframe(df) or is_cudf_dataframe(df) + + +def is_pandas_like_series(arr: Any) -> bool: + """ + Check whether `arr` is a pandas-like Series without doing any imports + + By "pandas-like", we mean: pandas, Modin, cuDF. + """ + return is_pandas_series(arr) or is_modin_series(arr) or is_cudf_series(arr) + + __all__ = [ "get_polars", "get_pandas", @@ -154,4 +172,6 @@ def is_numpy_array(arr: Any) -> TypeGuard[np.ndarray]: "is_pyarrow_chunked_array", "is_numpy_array", "is_dask_dataframe", + "is_pandas_like_dataframe", + "is_pandas_like_series", ] diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py index 961fff164..1f070c9a9 100644 --- a/narwhals/stable/v1.py +++ b/narwhals/stable/v1.py @@ -10,6 +10,7 @@ from typing import overload import narwhals as nw +from narwhals import dependencies from narwhals import selectors from narwhals.dataframe import DataFrame as NwDataFrame from narwhals.dataframe import LazyFrame as NwLazyFrame @@ -1400,6 +1401,11 @@ def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T: """ Convert columns to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like. + Arguments: + obj: DataFrame or Series. + *args: Additional arguments which gets passed through. + **kwargs: Additional arguments which gets passed through. + Notes: For non-pandas-like inputs, this is a no-op. Also, `args` and `kwargs` just get passed down to the underlying library as-is. @@ -1592,6 +1598,7 @@ def from_dict( __all__ = [ "selectors", "concat", + "dependencies", "to_native", "from_native", "is_ordered_categorical", diff --git a/narwhals/utils.py b/narwhals/utils.py index cc2a482c4..4142675e2 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -22,6 +22,8 @@ from narwhals.dependencies import is_cudf_series from narwhals.dependencies import is_modin_series from narwhals.dependencies import is_pandas_dataframe +from narwhals.dependencies import is_pandas_like_dataframe +from narwhals.dependencies import is_pandas_like_series from narwhals.dependencies import is_pandas_series from narwhals.dependencies import is_polars_series from narwhals.dependencies import is_pyarrow_chunked_array @@ -261,22 +263,26 @@ def maybe_set_index(df: T, column_names: str | list[str]) -> T: 4 1 5 2 """ - from narwhals._pandas_like.dataframe import PandasLikeDataFrame - df_any = cast(Any, df) - if isinstance(getattr(df_any, "_compliant_frame", None), PandasLikeDataFrame): + native_frame = to_native(df_any) + if is_pandas_like_dataframe(native_frame): return df_any._from_compliant_dataframe( # type: ignore[no-any-return] df_any._compliant_frame._from_native_frame( - df_any._compliant_frame._native_frame.set_index(column_names) + native_frame.set_index(column_names) ) ) - return df + return df_any # type: ignore[no-any-return] -def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T: +def maybe_convert_dtypes(obj: T, *args: bool, **kwargs: bool | str) -> T: """ Convert columns to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like. + Arguments: + obj: DataFrame or Series. + *args: Additional arguments which gets passed through. + **kwargs: Additional arguments which gets passed through. + Notes: For non-pandas-like inputs, this is a no-op. Also, `args` and `kwargs` just get passed down to the underlying library as-is. @@ -298,16 +304,21 @@ def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T: b boolean dtype: object """ - from narwhals._pandas_like.dataframe import PandasLikeDataFrame - - df_any = cast(Any, df) - if isinstance(getattr(df_any, "_compliant_frame", None), PandasLikeDataFrame): - return df_any._from_compliant_dataframe( # type: ignore[no-any-return] - df_any._compliant_frame._from_native_frame( - df_any._compliant_frame._native_frame.convert_dtypes(*args, **kwargs) + obj_any = cast(Any, obj) + native_obj = to_native(obj_any) + if is_pandas_like_dataframe(native_obj): + return obj_any._from_compliant_dataframe( # type: ignore[no-any-return] + obj_any._compliant_frame._from_native_frame( + native_obj.convert_dtypes(*args, **kwargs) + ) + ) + if is_pandas_like_series(native_obj): + return obj_any._from_compliant_series( # type: ignore[no-any-return] + obj_any._compliant_series._from_native_series( + native_obj.convert_dtypes(*args, **kwargs) ) ) - return df + return obj_any # type: ignore[no-any-return] def is_ordered_categorical(series: Series) -> bool: diff --git a/tests/test_utils.py b/tests/test_utils.py index f8b9b98ec..b94c8371e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -72,10 +72,15 @@ def test_maybe_set_index_polars() -> None: def test_maybe_convert_dtypes_pandas() -> None: import numpy as np - df = nw.from_native(pd.DataFrame({"a": [1, np.nan]}, dtype=np.dtype("float64"))) + df = nw.from_native( + pd.DataFrame({"a": [1, np.nan]}, dtype=np.dtype("float64")), eager_only=True + ) result = nw.to_native(nw.maybe_convert_dtypes(df)) expected = pd.DataFrame({"a": [1, pd.NA]}, dtype="Int64") pd.testing.assert_frame_equal(result, expected) + result_s = nw.to_native(nw.maybe_convert_dtypes(df["a"])) + expected_s = pd.Series([1, pd.NA], name="a", dtype="Int64") + pd.testing.assert_series_equal(result_s, expected_s) def test_maybe_convert_dtypes_polars() -> None: