diff --git a/narwhals/functions.py b/narwhals/functions.py index e66b46bdf..618b2c81b 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -110,117 +110,62 @@ def concat( >>> import pandas as pd >>> import polars as pl + >>> import pyarrow as pa >>> import narwhals as nw - >>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> data_2 = {"a": [5, 2], "b": [1, 4]} - - >>> df_pd_1 = pd.DataFrame(data_1) - >>> df_pd_2 = pd.DataFrame(data_2) - >>> df_pl_1 = pl.DataFrame(data_1) - >>> df_pl_2 = pl.DataFrame(data_2) - - Let's define a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def agnostic_vertical_concat(df1, df2): - ... return nw.concat([df1, df2], how="vertical") - - >>> agnostic_vertical_concat(df_pd_1, df_pd_2) - a b - 0 1 4 - 1 2 5 - 2 3 6 - 0 5 1 - 1 2 4 - >>> agnostic_vertical_concat(df_pl_1, df_pl_2) - shape: (5, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - │ 5 ┆ 1 │ - │ 2 ┆ 4 │ - └─────┴─────┘ - - Let's look at case a for horizontal concatenation: - - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> data_2 = {"c": [5, 2], "d": [1, 4]} - - >>> df_pd_1 = pd.DataFrame(data_1) - >>> df_pd_2 = pd.DataFrame(data_2) - >>> df_pl_1 = pl.DataFrame(data_1) - >>> df_pl_2 = pl.DataFrame(data_2) - - Defining a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def agnostic_horizontal_concat(df1, df2): - ... return nw.concat([df1, df2], how="horizontal") - - >>> agnostic_horizontal_concat(df_pd_1, df_pd_2) - a b c d - 0 1 4 5.0 1.0 - 1 2 5 2.0 4.0 - 2 3 6 NaN NaN - - >>> agnostic_horizontal_concat(df_pl_1, df_pl_2) - shape: (3, 4) - ┌─────┬─────┬──────┬──────┐ - │ a ┆ b ┆ c ┆ d │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 ┆ i64 │ - ╞═════╪═════╪══════╪══════╡ - │ 1 ┆ 4 ┆ 5 ┆ 1 │ - │ 2 ┆ 5 ┆ 2 ┆ 4 │ - │ 3 ┆ 6 ┆ null ┆ null │ - └─────┴─────┴──────┴──────┘ - - Let's look at case a for diagonal concatenation: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> data_1 = {"a": [1, 2], "b": [3.5, 4.5]} - >>> data_2 = {"a": [3, 4], "z": ["x", "y"]} - - >>> df_pd_1 = pd.DataFrame(data_1) - >>> df_pd_2 = pd.DataFrame(data_2) - >>> df_pl_1 = pl.DataFrame(data_1) - >>> df_pl_2 = pl.DataFrame(data_2) - - Defining a dataframe-agnostic function: - - >>> @nw.narwhalify - ... def agnostic_diagonal_concat(df1, df2): - ... return nw.concat([df1, df2], how="diagonal") - - >>> agnostic_diagonal_concat(df_pd_1, df_pd_2) - a b z - 0 1 3.5 NaN - 1 2 4.5 NaN - 0 3 NaN x - 1 4 NaN y - - >>> agnostic_diagonal_concat(df_pl_1, df_pl_2) - shape: (4, 3) - ┌─────┬──────┬──────┐ - │ a ┆ b ┆ z │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ str │ - ╞═════╪══════╪══════╡ - │ 1 ┆ 3.5 ┆ null │ - │ 2 ┆ 4.5 ┆ null │ - │ 3 ┆ null ┆ x │ - │ 4 ┆ null ┆ y │ - └─────┴──────┴──────┘ + Let's look at one case a for vertical concatenation (pandas backed): + + >>> df_pd_1 = nw.from_native(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})) + >>> df_pd_2 = nw.from_native(pd.DataFrame({"a": [5, 2], "b": [1, 4]})) + >>> nw.concat([df_pd_1, df_pd_2], how="vertical") + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 1 4 | + | 1 2 5 | + | 2 3 6 | + | 0 5 1 | + | 1 2 4 | + └──────────────────┘ + + Let's look at one case a for horizontal concatenation (polars backed): + + >>> df_pl_1 = nw.from_native(pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})) + >>> df_pl_2 = nw.from_native(pl.DataFrame({"c": [5, 2], "d": [1, 4]})) + >>> nw.concat([df_pl_1, df_pl_2], how="horizontal") + ┌───────────────────────────┐ + | Narwhals DataFrame | + |---------------------------| + |shape: (3, 4) | + |┌─────┬─────┬──────┬──────┐| + |│ a ┆ b ┆ c ┆ d │| + |│ --- ┆ --- ┆ --- ┆ --- │| + |│ i64 ┆ i64 ┆ i64 ┆ i64 │| + |╞═════╪═════╪══════╪══════╡| + |│ 1 ┆ 4 ┆ 5 ┆ 1 │| + |│ 2 ┆ 5 ┆ 2 ┆ 4 │| + |│ 3 ┆ 6 ┆ null ┆ null │| + |└─────┴─────┴──────┴──────┘| + └───────────────────────────┘ + + Let's look at one case a for diagonal concatenation (pyarrow backed): + + >>> df_pa_1 = nw.from_native(pa.table({"a": [1, 2], "b": [3.5, 4.5]})) + >>> df_pa_2 = nw.from_native(pa.table({"a": [3, 4], "z": ["x", "y"]})) + >>> nw.concat([df_pa_1, df_pa_2], how="diagonal") + ┌──────────────────────────┐ + | Narwhals DataFrame | + |--------------------------| + |pyarrow.Table | + |a: int64 | + |b: double | + |z: string | + |---- | + |a: [[1,2],[3,4]] | + |b: [[3.5,4.5],[null,null]]| + |z: [[null,null],["x","y"]]| + └──────────────────────────┘ """ if how not in {"horizontal", "vertical", "diagonal"}: # pragma: no cover msg = "Only vertical, horizontal and diagonal concatenations are supported." @@ -258,51 +203,19 @@ def new_series( Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT, IntoSeriesT - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - - Let's define a dataframe-agnostic function: - - >>> def agnostic_new_series(df_native: IntoFrameT) -> IntoSeriesT: - ... values = [4, 1, 2, 3] - ... native_namespace = nw.get_native_namespace(df_native) - ... return nw.new_series( - ... name="a", - ... values=values, - ... dtype=nw.Int32, - ... native_namespace=native_namespace, - ... ).to_native() - - We can then pass any supported eager library, such as pandas / Polars / PyArrow: - - >>> agnostic_new_series(pd.DataFrame(data)) - 0 4 - 1 1 - 2 2 - 3 3 - Name: a, dtype: int32 - >>> agnostic_new_series(pl.DataFrame(data)) # doctest: +NORMALIZE_WHITESPACE - shape: (4,) - Series: 'a' [i32] - [ - 4 - 1 - 2 - 3 - ] - >>> agnostic_new_series(pa.table(data)) - - [ - [ - 4, - 1, - 2, - 3 - ] - ] + >>> + >>> values = [4, 1, 2, 3] + >>> nw.new_series(name="a", values=values, dtype=nw.Int32, native_namespace=pd) + ┌─────────────────────┐ + | Narwhals Series | + |---------------------| + |0 4 | + |1 1 | + |2 2 | + |3 3 | + |Name: a, dtype: int32| + └─────────────────────┘ """ return _new_series_impl( name, @@ -334,11 +247,7 @@ def _new_series_impl( dtype_pl = None native_series = native_namespace.Series(name=name, values=values, dtype=dtype_pl) - elif implementation in { - Implementation.PANDAS, - Implementation.MODIN, - Implementation.CUDF, - }: + elif implementation.is_pandas_like(): if dtype: from narwhals._pandas_like.utils import ( narwhals_to_native_dtype as pandas_like_narwhals_to_native_dtype, @@ -416,40 +325,16 @@ def from_dict( Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - - Let's create a new dataframe and specify the backend argument. - - >>> def agnostic_from_dict(backend: str) -> IntoFrameT: - ... data = {"c": [5, 2], "d": [1, 4]} - ... return nw.from_dict(data, backend=backend).to_native() - - Let's see what happens when passing pandas, Polars or PyArrow input: - - >>> agnostic_from_dict(backend="pandas") - c d - 0 5 1 - 1 2 4 - >>> agnostic_from_dict(backend="polars") - shape: (2, 2) - ┌─────┬─────┐ - │ c ┆ d │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 5 ┆ 1 │ - │ 2 ┆ 4 │ - └─────┴─────┘ - >>> agnostic_from_dict(backend="pyarrow") - pyarrow.Table - c: int64 - d: int64 - ---- - c: [[5,2]] - d: [[1,4]] + >>> data = {"c": [5, 2], "d": [1, 4]} + >>> nw.from_dict(data, backend="pandas") + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | c d | + | 0 5 1 | + | 1 2 4 | + └──────────────────┘ """ backend = validate_native_namespace_and_backend( backend, native_namespace, emit_deprecation_warning=True @@ -495,11 +380,7 @@ def _from_dict_impl( if eager_backend is Implementation.POLARS: schema_pl = Schema(schema).to_polars() if schema else None native_frame = native_namespace.from_dict(data, schema=schema_pl) - elif eager_backend in { - Implementation.PANDAS, - Implementation.MODIN, - Implementation.CUDF, - }: + elif eager_backend.is_pandas_like(): from narwhals._pandas_like.utils import broadcast_align_and_extract_native aligned_data = {} @@ -568,123 +449,25 @@ def from_numpy( A new DataFrame. Examples: - >>> import pandas as pd - >>> import polars as pl + >>> import numpy as np >>> import pyarrow as pa >>> import narwhals as nw - >>> import numpy as np - >>> from narwhals.typing import IntoFrameT - >>> data = {"a": [1, 2], "b": [3, 4]} - - Let's create a new dataframe of the same class as the dataframe we started with, from a NumPy ndarray of new data: - - >>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT: - ... new_data = np.array([[5, 2, 1], [1, 4, 3]]) - ... df = nw.from_native(df_native) - ... native_namespace = nw.get_native_namespace(df) - ... return nw.from_numpy( - ... new_data, native_namespace=native_namespace - ... ).to_native() - - Let's see what happens when passing pandas, Polars or PyArrow input: - - >>> agnostic_from_numpy(pd.DataFrame(data)) - column_0 column_1 column_2 - 0 5 2 1 - 1 1 4 3 - >>> agnostic_from_numpy(pl.DataFrame(data)) - shape: (2, 3) - ┌──────────┬──────────┬──────────┐ - │ column_0 ┆ column_1 ┆ column_2 │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞══════════╪══════════╪══════════╡ - │ 5 ┆ 2 ┆ 1 │ - │ 1 ┆ 4 ┆ 3 │ - └──────────┴──────────┴──────────┘ - >>> agnostic_from_numpy(pa.table(data)) - pyarrow.Table - column_0: int64 - column_1: int64 - column_2: int64 - ---- - column_0: [[5,1]] - column_1: [[2,4]] - column_2: [[1,3]] - - Let's specify the column names: - - >>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT: - ... new_data = np.array([[5, 2, 1], [1, 4, 3]]) - ... schema = ["c", "d", "e"] - ... df = nw.from_native(df_native) - ... native_namespace = nw.get_native_namespace(df) - ... return nw.from_numpy( - ... new_data, native_namespace=native_namespace, schema=schema - ... ).to_native() - - Let's see the modified outputs: - - >>> agnostic_from_numpy(pd.DataFrame(data)) - c d e - 0 5 2 1 - 1 1 4 3 - >>> agnostic_from_numpy(pl.DataFrame(data)) - shape: (2, 3) - ┌─────┬─────┬─────┐ - │ c ┆ d ┆ e │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╡ - │ 5 ┆ 2 ┆ 1 │ - │ 1 ┆ 4 ┆ 3 │ - └─────┴─────┴─────┘ - >>> agnostic_from_numpy(pa.table(data)) - pyarrow.Table - c: int64 - d: int64 - e: int64 - ---- - c: [[5,1]] - d: [[2,4]] - e: [[1,3]] - - Let's modify the function so that it specifies the schema: - - >>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT: - ... new_data = np.array([[5, 2, 1], [1, 4, 3]]) - ... schema = {"c": nw.Int16(), "d": nw.Float32(), "e": nw.Int8()} - ... df = nw.from_native(df_native) - ... native_namespace = nw.get_native_namespace(df) - ... return nw.from_numpy( - ... new_data, native_namespace=native_namespace, schema=schema - ... ).to_native() - - Let's see the outputs: - - >>> agnostic_from_numpy(pd.DataFrame(data)) - c d e - 0 5 2.0 1 - 1 1 4.0 3 - >>> agnostic_from_numpy(pl.DataFrame(data)) - shape: (2, 3) - ┌─────┬─────┬─────┐ - │ c ┆ d ┆ e │ - │ --- ┆ --- ┆ --- │ - │ i16 ┆ f32 ┆ i8 │ - ╞═════╪═════╪═════╡ - │ 5 ┆ 2.0 ┆ 1 │ - │ 1 ┆ 4.0 ┆ 3 │ - └─────┴─────┴─────┘ - >>> agnostic_from_numpy(pa.table(data)) - pyarrow.Table - c: int16 - d: float - e: int8 - ---- - c: [[5,1]] - d: [[2,4]] - e: [[1,3]] + >>> + >>> arr = np.array([[5, 2, 1], [1, 4, 3]]) + >>> schema = {"c": nw.Int16(), "d": nw.Float32(), "e": nw.Int8()} + >>> nw.from_numpy(arr, schema=schema, native_namespace=pa) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | pyarrow.Table | + | c: int16 | + | d: float | + | e: int8 | + | ---- | + | c: [[5,1]] | + | d: [[2,4]] | + | e: [[1,3]] | + └──────────────────┘ """ return _from_numpy_impl( data, @@ -729,11 +512,7 @@ def _from_numpy_impl( raise TypeError(msg) native_frame = native_namespace.from_numpy(data, schema=schema) - elif implementation in { - Implementation.PANDAS, - Implementation.MODIN, - Implementation.CUDF, - }: + elif implementation.is_pandas_like(): if isinstance(schema, (dict, Schema)): from narwhals._pandas_like.utils import get_dtype_backend from narwhals._pandas_like.utils import ( @@ -821,34 +600,23 @@ def from_arrow( Examples: >>> import pandas as pd >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - - Let's define a dataframe-agnostic function which creates a PyArrow - Table. - - >>> def agnostic_to_arrow(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return nw.from_arrow(df, native_namespace=pa).to_native() - - Let's see what happens when passing pandas / Polars input: - - >>> agnostic_to_arrow(pd.DataFrame(data)) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2,3]] - b: [[4,5,6]] - >>> agnostic_to_arrow(pl.DataFrame(data)) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2,3]] - b: [[4,5,6]] + >>> + >>> df_native = pd.DataFrame({"a": [1, 2], "b": [4.2, 5.1]}) + >>> nw.from_arrow(df_native, native_namespace=pl) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | shape: (2, 2) | + | ┌─────┬─────┐ | + | │ a ┆ b │ | + | │ --- ┆ --- │ | + | │ i64 ┆ f64 │ | + | ╞═════╪═════╡ | + | │ 1 ┆ 4.2 │ | + | │ 2 ┆ 5.1 │ | + | └─────┴─────┘ | + └──────────────────┘ """ if not hasattr(native_frame, "__arrow_c_stream__"): msg = f"Given object of type {type(native_frame)} does not support PyCapsule interface" @@ -937,24 +705,14 @@ def _get_deps_info() -> dict[str, str]: Returns: Mapping from dependency to version. """ - deps = ( - "pandas", - "polars", - "cudf", - "modin", - "pyarrow", - "numpy", - ) - - from . import __version__ - - deps_info = { - "narwhals": __version__, - } - from importlib.metadata import PackageNotFoundError from importlib.metadata import version + from narwhals import __version__ + + deps = ("pandas", "polars", "cudf", "modin", "pyarrow", "numpy") + deps_info = {"narwhals": __version__} + for modname in deps: try: deps_info[modname] = version(modname) @@ -1021,44 +779,16 @@ def read_csv( Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from types import ModuleType - - Let's create an agnostic function that reads a csv file with a specified native namespace: - - >>> def agnostic_read_csv(native_namespace: ModuleType) -> IntoDataFrame: - ... return nw.read_csv( - ... "file.csv", native_namespace=native_namespace - ... ).to_native() - - Then we can read the file by passing pandas, Polars or PyArrow namespaces: - - >>> agnostic_read_csv(native_namespace=pd) # doctest:+SKIP - a b - 0 1 4 - 1 2 5 - 2 3 6 - >>> agnostic_read_csv(native_namespace=pl) # doctest:+SKIP - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - └─────┴─────┘ - >>> agnostic_read_csv(native_namespace=pa) # doctest:+SKIP - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2,3]] - b: [[4,5,6]] + >>> + >>> nw.read_csv("file.csv", native_namespace=pd) # doctest:+SKIP + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 1 4 | + | 1 2 5 | + └──────────────────┘ """ return _read_csv_impl(source, native_namespace=native_namespace, **kwargs) @@ -1108,38 +838,18 @@ def scan_csv( LazyFrame. Examples: - >>> import dask.dataframe as dd - >>> import polars as pl - >>> import pyarrow as pa + >>> import duckdb >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> from types import ModuleType - - Let's create an agnostic function that lazily reads a csv file with a specified native namespace: - - >>> def agnostic_scan_csv(native_namespace: ModuleType) -> IntoFrame: - ... return nw.scan_csv( - ... "file.csv", native_namespace=native_namespace - ... ).to_native() - - Then we can read the file by passing, for example, Polars or Dask namespaces: - - >>> agnostic_scan_csv(native_namespace=pl).collect() # doctest:+SKIP - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - └─────┴─────┘ - >>> agnostic_scan_csv(native_namespace=dd).compute() # doctest:+SKIP - a b - 0 1 4 - 1 2 5 - 2 3 6 + >>> + >>> nw.scan_csv("file.csv", native_namespace=duckdb).to_native() # doctest:+SKIP + ┌─────────┬───────┐ + │ a │ b │ + │ varchar │ int32 │ + ├─────────┼───────┤ + │ x │ 1 │ + │ y │ 2 │ + │ z │ 3 │ + └─────────┴───────┘ """ return _scan_csv_impl(source, native_namespace=native_namespace, **kwargs) @@ -1192,45 +902,20 @@ def read_parquet( DataFrame. Examples: - >>> import pandas as pd - >>> import polars as pl >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoDataFrame - >>> from types import ModuleType - - Let's create an agnostic function that reads a parquet file with a specified native namespace: - - >>> def agnostic_read_parquet(native_namespace: ModuleType) -> IntoDataFrame: - ... return nw.read_parquet( - ... "file.parquet", native_namespace=native_namespace - ... ).to_native() - - Then we can read the file by passing pandas, Polars or PyArrow namespaces: - - >>> agnostic_read_parquet(native_namespace=pd) # doctest:+SKIP - a b - 0 1 4 - 1 2 5 - 2 3 6 - >>> agnostic_read_parquet(native_namespace=pl) # doctest:+SKIP - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - └─────┴─────┘ - >>> agnostic_read_parquet(native_namespace=pa) # doctest:+SKIP - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2,3]] - b: [[4,5,6]] + >>> + >>> nw.read_parquet("file.parquet", native_namespace=pa) # doctest:+SKIP + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + |pyarrow.Table | + |a: int64 | + |c: double | + |---- | + |a: [[1,2]] | + |c: [[0.2,0.1]] | + └──────────────────┘ """ return _read_parquet_impl(source, native_namespace=native_namespace, **kwargs) @@ -1282,37 +967,18 @@ def scan_parquet( Examples: >>> import dask.dataframe as dd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrame - >>> from types import ModuleType - - Let's create an agnostic function that lazily reads a parquet file with a specified native namespace: - - >>> def agnostic_scan_parquet(native_namespace: ModuleType) -> IntoFrame: - ... return nw.scan_parquet( - ... "file.parquet", native_namespace=native_namespace - ... ).to_native() - - Then we can read the file by passing, for example, Polars or Dask namespaces: - - >>> agnostic_scan_parquet(native_namespace=pl).collect() # doctest:+SKIP - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 4 │ - │ 2 ┆ 5 │ - │ 3 ┆ 6 │ - └─────┴─────┘ - >>> agnostic_scan_parquet(native_namespace=dd).compute() # doctest:+SKIP - a b - 0 1 4 - 1 2 5 - 2 3 6 + >>> + >>> nw.scan_parquet( + ... "file.parquet", native_namespace=dd + ... ).collect() # doctest:+SKIP + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 1 4 | + | 1 2 5 | + └──────────────────┘ """ return _scan_parquet_impl(source, native_namespace=native_namespace, **kwargs) @@ -1356,47 +1022,24 @@ def col(*names: str | Iterable[str]) -> Expr: A new expression. Examples: - >>> import pandas as pd >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> - >>> data = {"a": [1, 2], "b": [3, 4]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_col(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a") * nw.col("b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_col`: - - >>> agnostic_col(df_pd) - a - 0 3 - 1 8 - - >>> agnostic_col(df_pl) - shape: (2, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 3 │ - │ 8 │ - └─────┘ - - >>> agnostic_col(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[3,8]] + >>> df_native = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": ["x", "z"]}) + >>> nw.from_native(df_native).select(nw.col("a", "b") * nw.col("b")) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | shape: (2, 2) | + | ┌─────┬─────┐ | + | │ a ┆ b │ | + | │ --- ┆ --- │ | + | │ i64 ┆ i64 │ | + | ╞═════╪═════╡ | + | │ 3 ┆ 9 │ | + | │ 8 ┆ 16 │ | + | └─────┴─────┘ | + └──────────────────┘ """ def func(plx: Any) -> Any: @@ -1419,46 +1062,21 @@ def nth(*indices: int | Sequence[int]) -> Expr: A new expression. Examples: - >>> import pandas as pd - >>> import polars as pl >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> - >>> data = {"a": [1, 2], "b": [3, 4]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_nth(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.nth(0) * 2).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_nth`: - - >>> agnostic_nth(df_pd) - a - 0 2 - 1 4 - - >>> agnostic_nth(df_pl) - shape: (2, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - │ 4 │ - └─────┘ - - >>> agnostic_nth(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[2,4]] + >>> df_native = pa.table({"a": [1, 2], "b": [3, 4], "c": [0.123, 3.14]}) + >>> nw.from_native(df_native).select(nw.nth(0, 2) * 2) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + |pyarrow.Table | + |a: int64 | + |c: double | + |---- | + |a: [[2,4]] | + |c: [[0.246,6.28]] | + └──────────────────┘ """ def func(plx: Any) -> Any: @@ -1475,51 +1093,18 @@ def all_() -> Expr: A new expression. Examples: - >>> import polars as pl >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_all(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all() * 2).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_all`: - - >>> agnostic_all(df_pd) - a b - 0 2 8 - 1 4 10 - 2 6 12 - - >>> agnostic_all(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 2 ┆ 8 │ - │ 4 ┆ 10 │ - │ 6 ┆ 12 │ - └─────┴─────┘ - - >>> agnostic_all(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[2,4,6]] - b: [[8,10,12]] + >>> df_native = pd.DataFrame({"a": [1, 2], "b": [3.14, 0.123]}) + >>> nw.from_native(df_native).select(nw.all() * 2) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 2 6.280 | + | 1 4 0.246 | + └──────────────────┘ """ return Expr( lambda plx: plx.all(), @@ -1538,42 +1123,22 @@ def len_() -> Expr: Examples: >>> import polars as pl - >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> - >>> data = {"a": [1, 2], "b": [5, 10]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_len(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.len()).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_len`: - - >>> agnostic_len(df_pd) - len - 0 2 - >>> agnostic_len(df_pl) - shape: (1, 1) - ┌─────┐ - │ len │ - │ --- │ - │ u32 │ - ╞═════╡ - │ 2 │ - └─────┘ - >>> agnostic_len(df_pa) - pyarrow.Table - len: int64 - ---- - len: [[2]] + >>> df_native = pl.DataFrame({"a": [1, 2], "b": [5, None]}) + >>> nw.from_native(df_native).select(nw.len()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | shape: (1, 1) | + | ┌─────┐ | + | │ len │ | + | │ --- │ | + | │ u32 │ | + | ╞═════╡ | + | │ 2 │ | + | └─────┘ | + └──────────────────┘ """ def func(plx: Any) -> Any: @@ -1596,44 +1161,16 @@ def sum(*columns: str) -> Expr: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> - >>> data = {"a": [1, 2]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_sum(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.sum("a")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_sum`: - - >>> agnostic_sum(df_pd) - a - 0 3 - - >>> agnostic_sum(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 3 │ - └─────┘ - - >>> agnostic_sum(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[3]] + >>> df_native = pd.DataFrame({"a": [1, 2], "b": [-1.4, 6.2]}) + >>> nw.from_native(df_native).select(nw.sum("a", "b")) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 3 4.8 | + └──────────────────┘ """ return Expr( lambda plx: plx.col(*columns).sum(), @@ -1656,45 +1193,21 @@ def mean(*columns: str) -> Expr: A new expression. Examples: - >>> import pandas as pd - >>> import polars as pl >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> - >>> data = {"a": [1, 8, 3]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe agnostic function: - - >>> def agnostic_mean(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.mean("a")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_mean`: - - >>> agnostic_mean(df_pd) - a - 0 4.0 - - >>> agnostic_mean(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 4.0 │ - └─────┘ - - >>> agnostic_mean(df_pa) - pyarrow.Table - a: double - ---- - a: [[4]] + >>> df_native = pa.table({"a": [1, 8, 3], "b": [3.14, 6.28, 42.1]}) + >>> nw.from_native(df_native).select(nw.mean("a", "b")) + ┌─────────────────────────┐ + | Narwhals DataFrame | + |-------------------------| + |pyarrow.Table | + |a: double | + |b: double | + |---- | + |a: [[4]] | + |b: [[17.173333333333336]]| + └─────────────────────────┘ """ return Expr( lambda plx: plx.col(*columns).mean(), @@ -1719,45 +1232,23 @@ def median(*columns: str) -> Expr: A new expression. Examples: - >>> import pandas as pd >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> - >>> data = {"a": [4, 5, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe agnostic function: - - >>> def agnostic_median(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.median("a")).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_median`: - - >>> agnostic_median(df_pd) - a - 0 4.0 - - >>> agnostic_median(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 4.0 │ - └─────┘ - - >>> agnostic_median(df_pa) - pyarrow.Table - a: double - ---- - a: [[4]] + >>> df_native = pl.DataFrame({"a": [4, 5, 2]}) + >>> nw.from_native(df_native).select(nw.median("a")) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | shape: (1, 1) | + | ┌─────┐ | + | │ a │ | + | │ --- │ | + | │ f64 │ | + | ╞═════╡ | + | │ 4.0 │ | + | └─────┘ | + └──────────────────┘ """ return Expr( lambda plx: plx.col(*columns).median(), @@ -1780,45 +1271,21 @@ def min(*columns: str) -> Expr: A new expression. Examples: - >>> import polars as pl - >>> import pandas as pd >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> - >>> data = {"a": [1, 2], "b": [5, 10]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_min(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.min("b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_min`: - - >>> agnostic_min(df_pd) - b - 0 5 - - >>> agnostic_min(df_pl) - shape: (1, 1) - ┌─────┐ - │ b │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 5 │ - └─────┘ - - >>> agnostic_min(df_pa) - pyarrow.Table - b: int64 - ---- - b: [[5]] + >>> df_native = pa.table({"a": [1, 2], "b": [5, 10]}) + >>> nw.from_native(df_native).select(nw.min("a", "b")) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | pyarrow.Table | + | a: int64 | + | b: int64 | + | ---- | + | a: [[1]] | + | b: [[5]] | + └──────────────────┘ """ return Expr( lambda plx: plx.col(*columns).min(), @@ -1841,45 +1308,17 @@ def max(*columns: str) -> Expr: A new expression. Examples: - >>> import polars as pl >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> - >>> data = {"a": [1, 2], "b": [5, 10]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_max(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.max("a")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_max`: - - >>> agnostic_max(df_pd) - a - 0 2 - - >>> agnostic_max(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - └─────┘ - - >>> agnostic_max(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[2]] + >>> df_native = pd.DataFrame({"a": [1, 2], "b": [5, 10]}) + >>> nw.from_native(df_native).select(nw.max("a", "b")) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 2 10 | + └──────────────────┘ """ return Expr( lambda plx: plx.col(*columns).max(), @@ -1903,48 +1342,25 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: A new expression. Examples: - >>> import pandas as pd >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> - >>> data = {"a": [1, 2, 3], "b": [5, 10, None]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_sum_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.sum_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_sum_horizontal`: - - >>> agnostic_sum_horizontal(df_pd) - a - 0 6.0 - 1 12.0 - 2 3.0 - - >>> agnostic_sum_horizontal(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 6 │ - │ 12 │ - │ 3 │ - └─────┘ - - >>> agnostic_sum_horizontal(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[6,12,3]] + >>> df_native = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, None]}) + >>> nw.from_native(df_native).with_columns(sum=nw.sum_horizontal("a", "b")) + ┌────────────────────┐ + | Narwhals DataFrame | + |--------------------| + |shape: (3, 3) | + |┌─────┬──────┬─────┐| + |│ a ┆ b ┆ sum │| + |│ --- ┆ --- ┆ --- │| + |│ i64 ┆ i64 ┆ i64 │| + |╞═════╪══════╪═════╡| + |│ 1 ┆ 5 ┆ 6 │| + |│ 2 ┆ 10 ┆ 12 │| + |│ 3 ┆ null ┆ 3 │| + |└─────┴──────┴─────┘| + └────────────────────┘ """ if not exprs: msg = "At least one expression must be passed to `sum_horizontal`" @@ -1977,51 +1393,23 @@ def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: A new expression. Examples: - >>> import pandas as pd - >>> import polars as pl >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> - >>> data = { - ... "a": [1, 8, 3], - ... "b": [4, 5, None], - ... "c": ["x", "y", "z"], - ... } - - We define a dataframe-agnostic function that computes the horizontal min of "a" - and "b" columns: - - >>> def agnostic_min_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.min_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_min_horizontal`: - - >>> agnostic_min_horizontal(pd.DataFrame(data)) - a - 0 1.0 - 1 5.0 - 2 3.0 - - >>> agnostic_min_horizontal(pl.DataFrame(data)) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - │ 5 │ - │ 3 │ - └─────┘ - - >>> agnostic_min_horizontal(pa.table(data)) - pyarrow.Table - a: int64 - ---- - a: [[1,5,3]] + >>> df_native = pa.table({"a": [1, 8, 3], "b": [4, 5, None]}) + >>> nw.from_native(df_native).with_columns(h_min=nw.min_horizontal("a", "b")) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | pyarrow.Table | + | a: int64 | + | b: int64 | + | h_min: int64 | + | ---- | + | a: [[1,8,3]] | + | b: [[4,5,null]] | + | h_min: [[1,5,3]] | + └──────────────────┘ """ if not exprs: msg = "At least one expression must be passed to `min_horizontal`" @@ -2054,51 +1442,25 @@ def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: A new expression. Examples: - >>> import pandas as pd >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> - >>> data = { - ... "a": [1, 8, 3], - ... "b": [4, 5, None], - ... "c": ["x", "y", "z"], - ... } - - We define a dataframe-agnostic function that computes the horizontal max of "a" - and "b" columns: - - >>> def agnostic_max_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.max_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_max_horizontal`: - - >>> agnostic_max_horizontal(pd.DataFrame(data)) - a - 0 4.0 - 1 8.0 - 2 3.0 - - >>> agnostic_max_horizontal(pl.DataFrame(data)) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 4 │ - │ 8 │ - │ 3 │ - └─────┘ - - >>> agnostic_max_horizontal(pa.table(data)) - pyarrow.Table - a: int64 - ---- - a: [[4,8,3]] + >>> df_native = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, None]}) + >>> nw.from_native(df_native).with_columns(h_max=nw.max_horizontal("a", "b")) + ┌──────────────────────┐ + | Narwhals DataFrame | + |----------------------| + |shape: (3, 3) | + |┌─────┬──────┬───────┐| + |│ a ┆ b ┆ h_max │| + |│ --- ┆ --- ┆ --- │| + |│ i64 ┆ i64 ┆ i64 │| + |╞═════╪══════╪═══════╡| + |│ 1 ┆ 4 ┆ 4 │| + |│ 8 ┆ 5 ┆ 8 │| + |│ 3 ┆ null ┆ 3 │| + |└─────┴──────┴───────┘| + └──────────────────────┘ """ if not exprs: msg = "At least one expression must be passed to `max_horizontal`" @@ -2182,54 +1544,21 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> >>> data = {"a": [1, 2, 3], "b": [5, 10, 15]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_when_then_otherwise(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when") - ... ).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_when_then_otherwise`: - - >>> agnostic_when_then_otherwise(df_pd) - a b a_when - 0 1 5 5 - 1 2 10 5 - 2 3 15 6 - - >>> agnostic_when_then_otherwise(df_pl) - shape: (3, 3) - ┌─────┬─────┬────────┐ - │ a ┆ b ┆ a_when │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i32 │ - ╞═════╪═════╪════════╡ - │ 1 ┆ 5 ┆ 5 │ - │ 2 ┆ 10 ┆ 5 │ - │ 3 ┆ 15 ┆ 6 │ - └─────┴─────┴────────┘ - - >>> agnostic_when_then_otherwise(df_pa) - pyarrow.Table - a: int64 - b: int64 - a_when: int64 - ---- - a: [[1,2,3]] - b: [[5,10,15]] - a_when: [[5,5,6]] + >>> df_native = pd.DataFrame(data) + >>> nw.from_native(df_native).with_columns( + ... nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when") + ... ) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b a_when | + | 0 1 5 5 | + | 1 2 10 5 | + | 2 3 15 6 | + └──────────────────┘ """ return When(*predicates) @@ -2245,62 +1574,28 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: A new expression. Examples: - >>> import pandas as pd - >>> import polars as pl >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> >>> data = { ... "a": [False, False, True, True, False, None], ... "b": [False, True, True, None, None, None], ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_all_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select("a", "b", all=nw.all_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_all_horizontal`: - - >>> agnostic_all_horizontal(df_pd) - a b all - 0 False False False - 1 False True False - 2 True True True - 3 True - 4 False False - 5 - - >>> agnostic_all_horizontal(df_pl) - shape: (6, 3) - ┌───────┬───────┬───────┐ - │ a ┆ b ┆ all │ - │ --- ┆ --- ┆ --- │ - │ bool ┆ bool ┆ bool │ - ╞═══════╪═══════╪═══════╡ - │ false ┆ false ┆ false │ - │ false ┆ true ┆ false │ - │ true ┆ true ┆ true │ - │ true ┆ null ┆ null │ - │ false ┆ null ┆ false │ - │ null ┆ null ┆ null │ - └───────┴───────┴───────┘ - - >>> agnostic_all_horizontal(df_pa) - pyarrow.Table - a: bool - b: bool - all: bool - ---- - a: [[false,false,true,true,false,null]] - b: [[false,true,true,null,null,null]] - all: [[false,false,true,null,false,null]] + >>> df_native = pa.table(data) + >>> nw.from_native(df_native).select("a", "b", all=nw.all_horizontal("a", "b")) + ┌─────────────────────────────────────────┐ + | Narwhals DataFrame | + |-----------------------------------------| + |pyarrow.Table | + |a: bool | + |b: bool | + |all: bool | + |---- | + |a: [[false,false,true,true,false,null]] | + |b: [[false,true,true,null,null,null]] | + |all: [[false,false,true,null,false,null]]| + └─────────────────────────────────────────┘ + """ if not exprs: msg = "At least one expression must be passed to `all_horizontal`" @@ -2332,48 +1627,17 @@ def lit(value: Any, dtype: DType | type[DType] | None = None) -> Expr: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> - >>> data = {"a": [1, 2]} - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_lit(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(nw.lit(3)).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_lit`: - - >>> agnostic_lit(df_pd) - a literal - 0 1 3 - 1 2 3 - - >>> agnostic_lit(df_pl) - shape: (2, 2) - ┌─────┬─────────┐ - │ a ┆ literal │ - │ --- ┆ --- │ - │ i64 ┆ i32 │ - ╞═════╪═════════╡ - │ 1 ┆ 3 │ - │ 2 ┆ 3 │ - └─────┴─────────┘ - - >>> agnostic_lit(df_pa) - pyarrow.Table - a: int64 - literal: int64 - ---- - a: [[1,2]] - literal: [[3,3]] + >>> df_native = pd.DataFrame({"a": [1, 2]}) + >>> nw.from_native(df_native).with_columns(nw.lit(3)) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a literal | + | 0 1 3 | + | 1 2 3 | + └──────────────────┘ """ if is_numpy_array(value): msg = ( @@ -2405,62 +1669,32 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: A new expression. Examples: - >>> import pandas as pd >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> >>> data = { ... "a": [False, False, True, True, False, None], ... "b": [False, True, True, None, None, None], ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_any_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select("a", "b", any=nw.any_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_any_horizontal`: - - >>> agnostic_any_horizontal(df_pd) - a b any - 0 False False False - 1 False True True - 2 True True True - 3 True True - 4 False - 5 - - >>> agnostic_any_horizontal(df_pl) - shape: (6, 3) - ┌───────┬───────┬───────┐ - │ a ┆ b ┆ any │ - │ --- ┆ --- ┆ --- │ - │ bool ┆ bool ┆ bool │ - ╞═══════╪═══════╪═══════╡ - │ false ┆ false ┆ false │ - │ false ┆ true ┆ true │ - │ true ┆ true ┆ true │ - │ true ┆ null ┆ true │ - │ false ┆ null ┆ null │ - │ null ┆ null ┆ null │ - └───────┴───────┴───────┘ - - >>> agnostic_any_horizontal(df_pa) - pyarrow.Table - a: bool - b: bool - any: bool - ---- - a: [[false,false,true,true,false,null]] - b: [[false,true,true,null,null,null]] - any: [[false,true,true,true,null,null]] + >>> df_native = pl.DataFrame(data) + >>> nw.from_native(df_native).select("a", "b", any=nw.any_horizontal("a", "b")) + ┌─────────────────────────┐ + | Narwhals DataFrame | + |-------------------------| + |shape: (6, 3) | + |┌───────┬───────┬───────┐| + |│ a ┆ b ┆ any │| + |│ --- ┆ --- ┆ --- │| + |│ bool ┆ bool ┆ bool │| + |╞═══════╪═══════╪═══════╡| + |│ false ┆ false ┆ false │| + |│ false ┆ true ┆ true │| + |│ true ┆ true ┆ true │| + |│ true ┆ null ┆ true │| + |│ false ┆ null ┆ null │| + |│ null ┆ null ┆ null │| + |└───────┴───────┴───────┘| + └─────────────────────────┘ """ if not exprs: msg = "At least one expression must be passed to `any_horizontal`" @@ -2490,54 +1724,28 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: A new expression. Examples: - >>> import pandas as pd - >>> import polars as pl >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> >>> data = { ... "a": [1, 8, 3], ... "b": [4, 5, None], ... "c": ["x", "y", "z"], ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pd = pd.DataFrame(data) - >>> df_pa = pa.table(data) + >>> df_native = pa.table(data) We define a dataframe-agnostic function that computes the horizontal mean of "a" and "b" columns: - >>> def agnostic_mean_horizontal(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.mean_horizontal("a", "b")).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow to - `agnostic_mean_horizontal`: - - >>> agnostic_mean_horizontal(df_pd) - a - 0 2.5 - 1 6.5 - 2 3.0 - - >>> agnostic_mean_horizontal(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 2.5 │ - │ 6.5 │ - │ 3.0 │ - └─────┘ - - >>> agnostic_mean_horizontal(df_pa) - pyarrow.Table - a: double - ---- - a: [[2.5,6.5,3]] + >>> nw.from_native(df_native).select(nw.mean_horizontal("a", "b")) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | pyarrow.Table | + | a: double | + | ---- | + | a: [[2.5,6.5,3]] | + └──────────────────┘ """ if not exprs: msg = "At least one expression must be passed to `mean_horizontal`" @@ -2580,23 +1788,16 @@ def concat_str( Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT >>> >>> data = { ... "a": [1, 2, 3], ... "b": ["dogs", "cats", None], ... "c": ["play", "swim", "walk"], ... } - - We define a dataframe-agnostic function that computes the horizontal string - concatenation of different columns - - >>> def agnostic_concat_str(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( + >>> df_native = pd.DataFrame(data) + >>> ( + ... nw.from_native(df_native).select( ... nw.concat_str( ... [ ... nw.col("a") * 2, @@ -2605,34 +1806,16 @@ def concat_str( ... ], ... separator=" ", ... ).alias("full_sentence") - ... ).to_native() - - We can pass any supported library such as Pandas, Polars, or PyArrow - to `agnostic_concat_str`: - - >>> agnostic_concat_str(pd.DataFrame(data)) - full_sentence - 0 2 dogs play - 1 4 cats swim - 2 None - - >>> agnostic_concat_str(pl.DataFrame(data)) - shape: (3, 1) - ┌───────────────┐ - │ full_sentence │ - │ --- │ - │ str │ - ╞═══════════════╡ - │ 2 dogs play │ - │ 4 cats swim │ - │ null │ - └───────────────┘ - - >>> agnostic_concat_str(pa.table(data)) - pyarrow.Table - full_sentence: string - ---- - full_sentence: [["2 dogs play","4 cats swim",null]] + ... ) + ... ) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | full_sentence | + | 0 2 dogs play | + | 1 4 cats swim | + | 2 None | + └──────────────────┘ """ exprs = flatten([*flatten([exprs]), *more_exprs]) return Expr(