feat: Add is_pandas_like_dataframe and is_pandas_like_series (#848)

narwhals-dev · Aug 23, 2024 · 54c6cd8 · 54c6cd8
1 parent a1e90db
commit 54c6cd8
Show file tree

Hide file tree

Showing 6 changed files with 62 additions and 15 deletions.
diff --git a/docs/api-reference/dependencies.md b/docs/api-reference/dependencies.md
@@ -16,6 +16,8 @@
         - is_modin_series
         - is_numpy_array
         - is_pandas_dataframe
+        - is_pandas_like_dataframe
+        - is_pandas_like_series
         - is_pandas_series
         - is_polars_dataframe
         - is_polars_lazyframe

diff --git a/narwhals/__init__.py b/narwhals/__init__.py
@@ -1,3 +1,4 @@
+from narwhals import dependencies
 from narwhals import selectors
 from narwhals import stable
 from narwhals.dataframe import DataFrame
@@ -53,6 +54,7 @@
 __version__ = "1.5.3"
 
 __all__ = [
+    "dependencies",
     "selectors",
     "concat",
     "from_dict",

diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py
@@ -134,6 +134,24 @@ def is_numpy_array(arr: Any) -> TypeGuard[np.ndarray]:
     return bool((np := get_numpy()) is not None and isinstance(arr, np.ndarray))
 
 
+def is_pandas_like_dataframe(df: Any) -> bool:
+    """
+    Check whether `df` is a pandas-like DataFrame without doing any imports
+
+    By "pandas-like", we mean: pandas, Modin, cuDF.
+    """
+    return is_pandas_dataframe(df) or is_modin_dataframe(df) or is_cudf_dataframe(df)
+
+
+def is_pandas_like_series(arr: Any) -> bool:
+    """
+    Check whether `arr` is a pandas-like Series without doing any imports
+
+    By "pandas-like", we mean: pandas, Modin, cuDF.
+    """
+    return is_pandas_series(arr) or is_modin_series(arr) or is_cudf_series(arr)
+
+
 __all__ = [
     "get_polars",
     "get_pandas",
@@ -154,4 +172,6 @@ def is_numpy_array(arr: Any) -> TypeGuard[np.ndarray]:
     "is_pyarrow_chunked_array",
     "is_numpy_array",
     "is_dask_dataframe",
+    "is_pandas_like_dataframe",
+    "is_pandas_like_series",
 ]
diff --git a/narwhals/stable/v1.py b/narwhals/stable/v1.py
@@ -10,6 +10,7 @@
 from typing import overload
 
 import narwhals as nw
+from narwhals import dependencies
 from narwhals import selectors
 from narwhals.dataframe import DataFrame as NwDataFrame
 from narwhals.dataframe import LazyFrame as NwLazyFrame
@@ -1400,6 +1401,11 @@ def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T:
     """
     Convert columns to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like.
 
+    Arguments:
+        obj: DataFrame or Series.
+        *args: Additional arguments which gets passed through.
+        **kwargs: Additional arguments which gets passed through.
+
     Notes:
         For non-pandas-like inputs, this is a no-op.
         Also, `args` and `kwargs` just get passed down to the underlying library as-is.
@@ -1592,6 +1598,7 @@ def from_dict(
 __all__ = [
     "selectors",
     "concat",
+    "dependencies",
     "to_native",
     "from_native",
     "is_ordered_categorical",

diff --git a/narwhals/utils.py b/narwhals/utils.py
@@ -22,6 +22,8 @@
 from narwhals.dependencies import is_cudf_series
 from narwhals.dependencies import is_modin_series
 from narwhals.dependencies import is_pandas_dataframe
+from narwhals.dependencies import is_pandas_like_dataframe
+from narwhals.dependencies import is_pandas_like_series
 from narwhals.dependencies import is_pandas_series
 from narwhals.dependencies import is_polars_series
 from narwhals.dependencies import is_pyarrow_chunked_array
@@ -261,22 +263,26 @@ def maybe_set_index(df: T, column_names: str | list[str]) -> T:
         4  1
         5  2
     """
-    from narwhals._pandas_like.dataframe import PandasLikeDataFrame
-
     df_any = cast(Any, df)
-    if isinstance(getattr(df_any, "_compliant_frame", None), PandasLikeDataFrame):
+    native_frame = to_native(df_any)
+    if is_pandas_like_dataframe(native_frame):
         return df_any._from_compliant_dataframe(  # type: ignore[no-any-return]
             df_any._compliant_frame._from_native_frame(
-                df_any._compliant_frame._native_frame.set_index(column_names)
+                native_frame.set_index(column_names)
             )
         )
-    return df
+    return df_any  # type: ignore[no-any-return]
 
 
-def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T:
+def maybe_convert_dtypes(obj: T, *args: bool, **kwargs: bool | str) -> T:
     """
     Convert columns to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like.
 
+    Arguments:
+        obj: DataFrame or Series.
+        *args: Additional arguments which gets passed through.
+        **kwargs: Additional arguments which gets passed through.
+
     Notes:
         For non-pandas-like inputs, this is a no-op.
         Also, `args` and `kwargs` just get passed down to the underlying library as-is.
@@ -298,16 +304,21 @@ def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T:
         b           boolean
         dtype: object
     """
-    from narwhals._pandas_like.dataframe import PandasLikeDataFrame
-
-    df_any = cast(Any, df)
-    if isinstance(getattr(df_any, "_compliant_frame", None), PandasLikeDataFrame):
-        return df_any._from_compliant_dataframe(  # type: ignore[no-any-return]
-            df_any._compliant_frame._from_native_frame(
-                df_any._compliant_frame._native_frame.convert_dtypes(*args, **kwargs)
+    obj_any = cast(Any, obj)
+    native_obj = to_native(obj_any)
+    if is_pandas_like_dataframe(native_obj):
+        return obj_any._from_compliant_dataframe(  # type: ignore[no-any-return]
+            obj_any._compliant_frame._from_native_frame(
+                native_obj.convert_dtypes(*args, **kwargs)
+            )
+        )
+    if is_pandas_like_series(native_obj):
+        return obj_any._from_compliant_series(  # type: ignore[no-any-return]
+            obj_any._compliant_series._from_native_series(
+                native_obj.convert_dtypes(*args, **kwargs)
             )
         )
-    return df
+    return obj_any  # type: ignore[no-any-return]
 
 
 def is_ordered_categorical(series: Series) -> bool:

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -72,10 +72,15 @@ def test_maybe_set_index_polars() -> None:
 def test_maybe_convert_dtypes_pandas() -> None:
     import numpy as np
 
-    df = nw.from_native(pd.DataFrame({"a": [1, np.nan]}, dtype=np.dtype("float64")))
+    df = nw.from_native(
+        pd.DataFrame({"a": [1, np.nan]}, dtype=np.dtype("float64")), eager_only=True
+    )
     result = nw.to_native(nw.maybe_convert_dtypes(df))
     expected = pd.DataFrame({"a": [1, pd.NA]}, dtype="Int64")
     pd.testing.assert_frame_equal(result, expected)
+    result_s = nw.to_native(nw.maybe_convert_dtypes(df["a"]))
+    expected_s = pd.Series([1, pd.NA], name="a", dtype="Int64")
+    pd.testing.assert_series_equal(result_s, expected_s)
 
 
 def test_maybe_convert_dtypes_polars() -> None: