From 1b98168550bb67970dde09f16bbd653b58a78a84 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Wed, 10 May 2023 20:28:22 +0200 Subject: [PATCH 1/4] Add a `fill_nan` method to dataframe and column Addresses half of gh-142 (`fill_null` is more complex, and not included here). --- .../dataframe_api/column_object.py | 14 ++++++++++++++ .../dataframe_api/dataframe_object.py | 17 +++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 0d8a8a27..48291ce1 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -456,3 +456,17 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Column[int]: To get the unique values, you can do ``col.get_rows(col.unique_indices())``. """ ... + + def fill_nan(self, value: float | null, /): + """ + Fill floating point ``nan`` values with the given fill value. + + Parameters + ---------- + value : float or `null` + Value used to replace any ``nan`` in the column with. Must be + of the Python scalar type matching the dtype of the column (or + be `null`). + + """ + ... diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 70c39025..f98f2a57 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -706,3 +706,20 @@ def isnan(self) -> DataFrame: In particular, does not check for `np.timedelta64('NaT')`. """ ... + + def fill_nan(self, value: float | null, /): + """ + Fill ``nan`` values with the given fill value. + + The fill operation will apply to all columns with a floating-point + dtype. Other columns remain unchanged. + + Parameters + ---------- + value : float or `null` + Value used to replace any ``nan`` in the column with. Must be + of the Python scalar type matching the dtype of the column (or + be `null`. + + """ + ... From 3c7f4a5c397abdcad68ed246010db20f4ee8d880 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Thu, 18 May 2023 16:21:55 +0200 Subject: [PATCH 2/4] Resolve reference to `null` value doc build issue, add `__all__` dict --- spec/API_specification/dataframe_api/__init__.py | 10 ++++++++++ spec/API_specification/dataframe_api/column_object.py | 2 +- .../dataframe_api/dataframe_object.py | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py index cd6ef443..afca3cb0 100644 --- a/spec/API_specification/dataframe_api/__init__.py +++ b/spec/API_specification/dataframe_api/__init__.py @@ -12,6 +12,16 @@ from ._types import DType +__all__ = [ + "__dataframe_api_version", + "column_from_sequence", + "concat", + "dataframe_from_dict", + "isnull", + "null", +] + + __dataframe_api_version__: str = "YYYY.MM" """ String representing the version of the DataFrame API specification to which diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 48291ce1..e397e614 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -457,7 +457,7 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Column[int]: """ ... - def fill_nan(self, value: float | null, /): + def fill_nan(self, value: float | 'null', /): """ Fill floating point ``nan`` values with the given fill value. diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index f98f2a57..283075d5 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -707,7 +707,7 @@ def isnan(self) -> DataFrame: """ ... - def fill_nan(self, value: float | null, /): + def fill_nan(self, value: float | 'null', /): """ Fill ``nan`` values with the given fill value. From 8c6e694bea3de2d55d0d5941f7de9a5be1e1d9d5 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Fri, 19 May 2023 11:28:09 +0200 Subject: [PATCH 3/4] Add return type annotations to `fill_nan` methods --- spec/API_specification/dataframe_api/column_object.py | 2 +- spec/API_specification/dataframe_api/dataframe_object.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index e397e614..3fec189c 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -457,7 +457,7 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Column[int]: """ ... - def fill_nan(self, value: float | 'null', /): + def fill_nan(self, value: float | 'null', /) -> Column: """ Fill floating point ``nan`` values with the given fill value. diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 283075d5..aaa03aa6 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -707,7 +707,7 @@ def isnan(self) -> DataFrame: """ ... - def fill_nan(self, value: float | 'null', /): + def fill_nan(self, value: float | 'null', /) -> DataFrame: """ Fill ``nan`` values with the given fill value. From 5acee9da1a21a2e0fc0510d4a6850bac24b42cca Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Fri, 19 May 2023 11:29:07 +0200 Subject: [PATCH 4/4] Update spec/API_specification/dataframe_api/dataframe_object.py Co-authored-by: Marco Edward Gorelli --- spec/API_specification/dataframe_api/dataframe_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index aaa03aa6..e348051f 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -719,7 +719,7 @@ def fill_nan(self, value: float | 'null', /) -> DataFrame: value : float or `null` Value used to replace any ``nan`` in the column with. Must be of the Python scalar type matching the dtype of the column (or - be `null`. + be `null`). """ ...