|
22 | 22 | from __future__ import annotations
|
23 | 23 |
|
24 | 24 | import warnings
|
| 25 | +from enum import Enum |
25 | 26 | from typing import (
|
26 | 27 | TYPE_CHECKING,
|
27 | 28 | Any,
|
|
33 | 34 | overload,
|
34 | 35 | )
|
35 | 36 |
|
| 37 | +import pyarrow as pa |
36 | 38 | from typing_extensions import deprecated
|
37 | 39 |
|
| 40 | +from datafusion import functions as f |
| 41 | +from datafusion._internal import DataFrame as DataFrameInternal |
| 42 | +from datafusion.expr import Expr, SortExpr, sort_or_default |
38 | 43 | from datafusion.plan import ExecutionPlan, LogicalPlan
|
39 | 44 | from datafusion.record_batch import RecordBatchStream
|
40 | 45 |
|
41 |
| -import pyarrow as pa |
42 |
| -from datafusion import functions as f |
43 |
| - |
44 | 46 | if TYPE_CHECKING:
|
45 | 47 | import pathlib
|
46 | 48 | from typing import Callable, Sequence
|
47 | 49 |
|
48 | 50 | import pandas as pd
|
49 | 51 | import polars as pl
|
50 |
| - |
51 |
| -from enum import Enum |
52 |
| - |
53 |
| -from datafusion._internal import DataFrame as DataFrameInternal |
54 |
| -from datafusion.expr import Expr, SortExpr, sort_or_default |
55 |
| - |
56 |
| - |
57 | 52 | # excerpt from deltalake
|
58 | 53 | # https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163
|
59 | 54 | class Compression(Enum):
|
@@ -868,14 +863,14 @@ def fill_null(self, value: Any, subset: list[str] | None = None) -> "DataFrame":
|
868 | 863 |
|
869 | 864 | Examples:
|
870 | 865 | >>> df = df.fill_null(0) # Fill all nulls with 0 where possible
|
871 |
| - >>> df = df.fill_null("missing", subset=["name", "category"]) # Fill string columns |
| 866 | + >>> # Fill nulls in specific string columns |
| 867 | + >>> df = df.fill_null("missing", subset=["name", "category"]) |
872 | 868 |
|
873 | 869 | Notes:
|
874 | 870 | - Only fills nulls in columns where the value can be cast to the column type
|
875 | 871 | - For columns where casting fails, the original column is kept unchanged
|
876 | 872 | - For columns not in subset, the original column is kept unchanged
|
877 | 873 | """
|
878 |
| - |
879 | 874 | # Get columns to process
|
880 | 875 | if subset is None:
|
881 | 876 | subset = self.schema().names
|
@@ -910,29 +905,28 @@ def fill_null(self, value: Any, subset: list[str] | None = None) -> "DataFrame":
|
910 | 905 |
|
911 | 906 | return self.select(*exprs)
|
912 | 907 |
|
913 |
| - def fill_nan( |
914 |
| - self, value: float | int, subset: list[str] | None = None |
915 |
| - ) -> "DataFrame": |
| 908 | + def fill_nan(self, value: float | int, subset: list[str] | None = None) -> "DataFrame": |
916 | 909 | """Fill NaN values in specified numeric columns with a value.
|
917 | 910 |
|
918 | 911 | Args:
|
919 |
| - value: Numeric value to replace NaN values with |
920 |
| - subset: Optional list of column names to fill. If None, fills all numeric columns. |
| 912 | + value: Numeric value to replace NaN values with. |
| 913 | + subset: Optional list of column names to fill. If None, fills all numeric |
| 914 | + columns. |
921 | 915 |
|
922 | 916 | Returns:
|
923 |
| - DataFrame with NaN values replaced in numeric columns |
| 917 | + DataFrame with NaN values replaced in numeric columns. |
924 | 918 |
|
925 | 919 | Examples:
|
926 | 920 | >>> df = df.fill_nan(0) # Fill all NaNs with 0 in numeric columns
|
927 |
| - >>> df = df.fill_nan(99.9, subset=["price", "score"]) # Fill specific columns |
| 921 | + >>> # Fill NaNs in specific numeric columns |
| 922 | + >>> df = df.fill_nan(99.9, subset=["price", "score"]) |
928 | 923 |
|
929 | 924 | Notes:
|
930 | 925 | - Only fills NaN values in numeric columns (float32, float64)
|
931 | 926 | - Non-numeric columns are kept unchanged
|
932 | 927 | - For columns not in subset, the original column is kept unchanged
|
933 | 928 | - Value must be numeric (int or float)
|
934 | 929 | """
|
935 |
| - |
936 | 930 | if not isinstance(value, (int, float)):
|
937 | 931 | raise ValueError("Value must be numeric (int or float)")
|
938 | 932 |
|
|
0 commit comments