Skip to content

Commit

Permalink
feat: Add is_pandas_like_dataframe and is_pandas_like_series (#848)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored Aug 23, 2024
1 parent a1e90db commit 54c6cd8
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 15 deletions.
2 changes: 2 additions & 0 deletions docs/api-reference/dependencies.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
- is_modin_series
- is_numpy_array
- is_pandas_dataframe
- is_pandas_like_dataframe
- is_pandas_like_series
- is_pandas_series
- is_polars_dataframe
- is_polars_lazyframe
Expand Down
2 changes: 2 additions & 0 deletions narwhals/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from narwhals import dependencies
from narwhals import selectors
from narwhals import stable
from narwhals.dataframe import DataFrame
Expand Down Expand Up @@ -53,6 +54,7 @@
__version__ = "1.5.3"

__all__ = [
"dependencies",
"selectors",
"concat",
"from_dict",
Expand Down
20 changes: 20 additions & 0 deletions narwhals/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,24 @@ def is_numpy_array(arr: Any) -> TypeGuard[np.ndarray]:
return bool((np := get_numpy()) is not None and isinstance(arr, np.ndarray))


def is_pandas_like_dataframe(df: Any) -> bool:
"""
Check whether `df` is a pandas-like DataFrame without doing any imports
By "pandas-like", we mean: pandas, Modin, cuDF.
"""
return is_pandas_dataframe(df) or is_modin_dataframe(df) or is_cudf_dataframe(df)


def is_pandas_like_series(arr: Any) -> bool:
"""
Check whether `arr` is a pandas-like Series without doing any imports
By "pandas-like", we mean: pandas, Modin, cuDF.
"""
return is_pandas_series(arr) or is_modin_series(arr) or is_cudf_series(arr)


__all__ = [
"get_polars",
"get_pandas",
Expand All @@ -154,4 +172,6 @@ def is_numpy_array(arr: Any) -> TypeGuard[np.ndarray]:
"is_pyarrow_chunked_array",
"is_numpy_array",
"is_dask_dataframe",
"is_pandas_like_dataframe",
"is_pandas_like_series",
]
7 changes: 7 additions & 0 deletions narwhals/stable/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import overload

import narwhals as nw
from narwhals import dependencies
from narwhals import selectors
from narwhals.dataframe import DataFrame as NwDataFrame
from narwhals.dataframe import LazyFrame as NwLazyFrame
Expand Down Expand Up @@ -1400,6 +1401,11 @@ def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T:
"""
Convert columns to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like.
Arguments:
obj: DataFrame or Series.
*args: Additional arguments which gets passed through.
**kwargs: Additional arguments which gets passed through.
Notes:
For non-pandas-like inputs, this is a no-op.
Also, `args` and `kwargs` just get passed down to the underlying library as-is.
Expand Down Expand Up @@ -1592,6 +1598,7 @@ def from_dict(
__all__ = [
"selectors",
"concat",
"dependencies",
"to_native",
"from_native",
"is_ordered_categorical",
Expand Down
39 changes: 25 additions & 14 deletions narwhals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from narwhals.dependencies import is_cudf_series
from narwhals.dependencies import is_modin_series
from narwhals.dependencies import is_pandas_dataframe
from narwhals.dependencies import is_pandas_like_dataframe
from narwhals.dependencies import is_pandas_like_series
from narwhals.dependencies import is_pandas_series
from narwhals.dependencies import is_polars_series
from narwhals.dependencies import is_pyarrow_chunked_array
Expand Down Expand Up @@ -261,22 +263,26 @@ def maybe_set_index(df: T, column_names: str | list[str]) -> T:
4 1
5 2
"""
from narwhals._pandas_like.dataframe import PandasLikeDataFrame

df_any = cast(Any, df)
if isinstance(getattr(df_any, "_compliant_frame", None), PandasLikeDataFrame):
native_frame = to_native(df_any)
if is_pandas_like_dataframe(native_frame):
return df_any._from_compliant_dataframe( # type: ignore[no-any-return]
df_any._compliant_frame._from_native_frame(
df_any._compliant_frame._native_frame.set_index(column_names)
native_frame.set_index(column_names)
)
)
return df
return df_any # type: ignore[no-any-return]


def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T:
def maybe_convert_dtypes(obj: T, *args: bool, **kwargs: bool | str) -> T:
"""
Convert columns to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like.
Arguments:
obj: DataFrame or Series.
*args: Additional arguments which gets passed through.
**kwargs: Additional arguments which gets passed through.
Notes:
For non-pandas-like inputs, this is a no-op.
Also, `args` and `kwargs` just get passed down to the underlying library as-is.
Expand All @@ -298,16 +304,21 @@ def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T:
b boolean
dtype: object
"""
from narwhals._pandas_like.dataframe import PandasLikeDataFrame

df_any = cast(Any, df)
if isinstance(getattr(df_any, "_compliant_frame", None), PandasLikeDataFrame):
return df_any._from_compliant_dataframe( # type: ignore[no-any-return]
df_any._compliant_frame._from_native_frame(
df_any._compliant_frame._native_frame.convert_dtypes(*args, **kwargs)
obj_any = cast(Any, obj)
native_obj = to_native(obj_any)
if is_pandas_like_dataframe(native_obj):
return obj_any._from_compliant_dataframe( # type: ignore[no-any-return]
obj_any._compliant_frame._from_native_frame(
native_obj.convert_dtypes(*args, **kwargs)
)
)
if is_pandas_like_series(native_obj):
return obj_any._from_compliant_series( # type: ignore[no-any-return]
obj_any._compliant_series._from_native_series(
native_obj.convert_dtypes(*args, **kwargs)
)
)
return df
return obj_any # type: ignore[no-any-return]


def is_ordered_categorical(series: Series) -> bool:
Expand Down
7 changes: 6 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,15 @@ def test_maybe_set_index_polars() -> None:
def test_maybe_convert_dtypes_pandas() -> None:
import numpy as np

df = nw.from_native(pd.DataFrame({"a": [1, np.nan]}, dtype=np.dtype("float64")))
df = nw.from_native(
pd.DataFrame({"a": [1, np.nan]}, dtype=np.dtype("float64")), eager_only=True
)
result = nw.to_native(nw.maybe_convert_dtypes(df))
expected = pd.DataFrame({"a": [1, pd.NA]}, dtype="Int64")
pd.testing.assert_frame_equal(result, expected)
result_s = nw.to_native(nw.maybe_convert_dtypes(df["a"]))
expected_s = pd.Series([1, pd.NA], name="a", dtype="Int64")
pd.testing.assert_series_equal(result_s, expected_s)


def test_maybe_convert_dtypes_polars() -> None:
Expand Down

0 comments on commit 54c6cd8

Please sign in to comment.