|
36 | 36 | from narwhals.typing import IntoDataFrame
|
37 | 37 | from narwhals.typing import IntoExpr
|
38 | 38 | from narwhals.typing import IntoFrame
|
| 39 | + from narwhals.typing import SizeUnit |
39 | 40 | from narwhals.utils import Implementation
|
40 | 41 |
|
41 | 42 | FrameT = TypeVar("FrameT", bound="IntoFrame")
|
@@ -764,6 +765,50 @@ def get_column(self, name: str) -> Series[Any]:
|
764 | 765 | level=self._level,
|
765 | 766 | )
|
766 | 767 |
|
| 768 | + def estimated_size(self, unit: SizeUnit = "b") -> int | float: |
| 769 | + """Return an estimation of the total (heap) allocated size of the `DataFrame`. |
| 770 | +
|
| 771 | + Estimated size is given in the specified unit (bytes by default). |
| 772 | +
|
| 773 | + Arguments: |
| 774 | + unit: 'b', 'kb', 'mb', 'gb', 'tb', 'bytes', 'kilobytes', 'megabytes', |
| 775 | + 'gigabytes', or 'terabytes'. |
| 776 | +
|
| 777 | + Returns: |
| 778 | + Integer or Float. |
| 779 | +
|
| 780 | + Examples: |
| 781 | + >>> import pandas as pd |
| 782 | + >>> import polars as pl |
| 783 | + >>> import pyarrow as pa |
| 784 | + >>> import narwhals as nw |
| 785 | + >>> from narwhals.typing import IntoDataFrameT |
| 786 | + >>> data = { |
| 787 | + ... "foo": [1, 2, 3], |
| 788 | + ... "bar": [6.0, 7.0, 8.0], |
| 789 | + ... "ham": ["a", "b", "c"], |
| 790 | + ... } |
| 791 | + >>> df_pd = pd.DataFrame(data) |
| 792 | + >>> df_pl = pl.DataFrame(data) |
| 793 | + >>> df_pa = pa.table(data) |
| 794 | +
|
| 795 | + Let's define a dataframe-agnostic function: |
| 796 | +
|
| 797 | + >>> def agnostic_estimated_size(df_native: IntoDataFrameT) -> int | float: |
| 798 | + ... df = nw.from_native(df_native) |
| 799 | + ... return df.estimated_size() |
| 800 | +
|
| 801 | + We can then pass either pandas, Polars or PyArrow to `agnostic_estimated_size`: |
| 802 | +
|
| 803 | + >>> agnostic_estimated_size(df_pd) |
| 804 | + np.int64(330) |
| 805 | + >>> agnostic_estimated_size(df_pl) |
| 806 | + 51 |
| 807 | + >>> agnostic_estimated_size(df_pa) |
| 808 | + 63 |
| 809 | + """ |
| 810 | + return self._compliant_frame.estimated_size(unit=unit) # type: ignore[no-any-return] |
| 811 | + |
767 | 812 | @overload
|
768 | 813 | def __getitem__(self, item: tuple[Sequence[int], slice]) -> Self: ...
|
769 | 814 | @overload
|
|
0 commit comments