diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000000..e69de29bb2 diff --git a/404.html b/404.html new file mode 100644 index 0000000000..0b21ebea91 --- /dev/null +++ b/404.html @@ -0,0 +1,1561 @@ + + + +
+ + + + + + + + + + + + + + +Class | +Method | +arrow | +pandas-like | +
---|---|---|---|
DataFrame | +clone | ++ | + |
DataFrame | +collect_schema | ++ | + |
DataFrame | +columns | ++ | + |
DataFrame | +drop | ++ | + |
DataFrame | +drop_nulls | ++ | + |
DataFrame | +estimated_size | ++ | + |
DataFrame | +explode | ++ | + |
DataFrame | +filter | ++ | + |
DataFrame | +gather_every | ++ | + |
DataFrame | +get_column | ++ | + |
DataFrame | +group_by | ++ | + |
DataFrame | +head | ++ | + |
DataFrame | +implementation | ++ | + |
DataFrame | +is_duplicated | ++ | + |
DataFrame | +is_empty | ++ | + |
DataFrame | +is_unique | ++ | + |
DataFrame | +item | ++ | + |
DataFrame | +iter_rows | ++ | + |
DataFrame | +join | ++ | + |
DataFrame | +join_asof | ++ | + |
DataFrame | +lazy | ++ | + |
DataFrame | +null_count | ++ | + |
DataFrame | +pipe | ++ | + |
DataFrame | +pivot | ++ | + |
DataFrame | +rename | ++ | + |
DataFrame | +row | ++ | + |
DataFrame | +rows | ++ | + |
DataFrame | +sample | ++ | + |
DataFrame | +schema | ++ | + |
DataFrame | +select | ++ | + |
DataFrame | +shape | ++ | + |
DataFrame | +sort | ++ | + |
DataFrame | +tail | ++ | + |
DataFrame | +to_arrow | ++ | + |
DataFrame | +to_dict | ++ | + |
DataFrame | +to_native | ++ | + |
DataFrame | +to_numpy | ++ | + |
DataFrame | +to_pandas | ++ | + |
DataFrame | +unique | ++ | + |
DataFrame | +unpivot | ++ | + |
DataFrame | +with_columns | ++ | + |
DataFrame | +with_row_index | ++ | + |
DataFrame | +write_csv | ++ | + |
DataFrame | +write_parquet | ++ | + |
Class | +Method | +arrow | +dask | +duckdb | +pandas-like | +spark-like | +
---|---|---|---|---|---|---|
Expr | +abs | ++ | + | + | + | + |
Expr | +alias | ++ | + | + | + | + |
Expr | +all | ++ | + | + | + | + |
Expr | +any | ++ | + | + | + | + |
Expr | +arg_max | ++ | + | + | + | + |
Expr | +arg_min | ++ | + | + | + | + |
Expr | +arg_true | ++ | + | + | + | + |
Expr | +cast | ++ | + | + | + | + |
Expr | +cat | ++ | + | + | + | + |
Expr | +clip | ++ | + | + | + | + |
Expr | +count | ++ | + | + | + | + |
Expr | +cum_count | ++ | + | + | + | + |
Expr | +cum_max | ++ | + | + | + | + |
Expr | +cum_min | ++ | + | + | + | + |
Expr | +cum_prod | ++ | + | + | + | + |
Expr | +cum_sum | ++ | + | + | + | + |
Expr | +diff | ++ | + | + | + | + |
Expr | +drop_nulls | ++ | + | + | + | + |
Expr | +dt | ++ | + | + | + | + |
Expr | +ewm_mean | ++ | + | + | + | + |
Expr | +fill_null | ++ | + | + | + | + |
Expr | +filter | ++ | + | + | + | + |
Expr | +gather_every | ++ | + | + | + | + |
Expr | +head | ++ | + | + | + | + |
Expr | +is_between | ++ | + | + | + | + |
Expr | +is_duplicated | ++ | + | + | + | + |
Expr | +is_finite | ++ | + | + | + | + |
Expr | +is_first_distinct | ++ | + | + | + | + |
Expr | +is_in | ++ | + | + | + | + |
Expr | +is_last_distinct | ++ | + | + | + | + |
Expr | +is_nan | ++ | + | + | + | + |
Expr | +is_null | ++ | + | + | + | + |
Expr | +is_unique | ++ | + | + | + | + |
Expr | +len | ++ | + | + | + | + |
Expr | +list | ++ | + | + | + | + |
Expr | +map_batches | ++ | + | + | + | + |
Expr | +max | ++ | + | + | + | + |
Expr | +mean | ++ | + | + | + | + |
Expr | +median | ++ | + | + | + | + |
Expr | +min | ++ | + | + | + | + |
Expr | +mode | ++ | + | + | + | + |
Expr | +n_unique | ++ | + | + | + | + |
Expr | +name | ++ | + | + | + | + |
Expr | +null_count | ++ | + | + | + | + |
Expr | +over | ++ | + | + | + | + |
Expr | +pipe | ++ | + | + | + | + |
Expr | +quantile | ++ | + | + | + | + |
Expr | +rank | ++ | + | + | + | + |
Expr | +replace_strict | ++ | + | + | + | + |
Expr | +rolling_mean | ++ | + | + | + | + |
Expr | +rolling_std | ++ | + | + | + | + |
Expr | +rolling_sum | ++ | + | + | + | + |
Expr | +rolling_var | ++ | + | + | + | + |
Expr | +round | ++ | + | + | + | + |
Expr | +sample | ++ | + | + | + | + |
Expr | +shift | ++ | + | + | + | + |
Expr | +skew | ++ | + | + | + | + |
Expr | +sort | ++ | + | + | + | + |
Expr | +std | ++ | + | + | + | + |
Expr | +str | ++ | + | + | + | + |
Expr | +sum | ++ | + | + | + | + |
Expr | +tail | ++ | + | + | + | + |
Expr | +unique | ++ | + | + | + | + |
Expr | +var | ++ | + | + | + | + |
Narwhals has two different level of support for libraries: "full" and "interchange".
+Libraries for which we have full support we intend to support the whole Narwhals API, +however this is a continuous work in progress.
+In the following section it is possible to check which method is implemented for which +class and backend.
+Info
+Class | +Method | +dask | +duckdb | +spark-like | +
---|---|---|---|---|
LazyFrame | +clone | ++ | + | + |
LazyFrame | +collect | ++ | + | + |
LazyFrame | +collect_schema | ++ | + | + |
LazyFrame | +columns | ++ | + | + |
LazyFrame | +drop | ++ | + | + |
LazyFrame | +drop_nulls | ++ | + | + |
LazyFrame | +explode | ++ | + | + |
LazyFrame | +filter | ++ | + | + |
LazyFrame | +gather_every | ++ | + | + |
LazyFrame | +group_by | ++ | + | + |
LazyFrame | +head | ++ | + | + |
LazyFrame | +implementation | ++ | + | + |
LazyFrame | +join | ++ | + | + |
LazyFrame | +join_asof | ++ | + | + |
LazyFrame | +lazy | ++ | + | + |
LazyFrame | +pipe | ++ | + | + |
LazyFrame | +rename | ++ | + | + |
LazyFrame | +schema | ++ | + | + |
LazyFrame | +select | ++ | + | + |
LazyFrame | +sort | ++ | + | + |
LazyFrame | +tail | ++ | + | + |
LazyFrame | +to_native | ++ | + | + |
LazyFrame | +unique | ++ | + | + |
LazyFrame | +unpivot | ++ | + | + |
LazyFrame | +with_columns | ++ | + | + |
LazyFrame | +with_row_index | ++ | + | + |
Class | +Method | +arrow | +pandas-like | +
---|---|---|---|
Series | +abs | ++ | + |
Series | +alias | ++ | + |
Series | +all | ++ | + |
Series | +any | ++ | + |
Series | +arg_max | ++ | + |
Series | +arg_min | ++ | + |
Series | +arg_true | ++ | + |
Series | +cast | ++ | + |
Series | +cat | ++ | + |
Series | +clip | ++ | + |
Series | +count | ++ | + |
Series | +cum_count | ++ | + |
Series | +cum_max | ++ | + |
Series | +cum_min | ++ | + |
Series | +cum_prod | ++ | + |
Series | +cum_sum | ++ | + |
Series | +diff | ++ | + |
Series | +drop_nulls | ++ | + |
Series | +dt | ++ | + |
Series | +dtype | ++ | + |
Series | +ewm_mean | ++ | + |
Series | +fill_null | ++ | + |
Series | +filter | ++ | + |
Series | +gather_every | ++ | + |
Series | +head | ++ | + |
Series | +implementation | ++ | + |
Series | +is_between | ++ | + |
Series | +is_duplicated | ++ | + |
Series | +is_empty | ++ | + |
Series | +is_finite | ++ | + |
Series | +is_first_distinct | ++ | + |
Series | +is_in | ++ | + |
Series | +is_last_distinct | ++ | + |
Series | +is_nan | ++ | + |
Series | +is_null | ++ | + |
Series | +is_sorted | ++ | + |
Series | +is_unique | ++ | + |
Series | +item | ++ | + |
Series | +len | ++ | + |
Series | +list | ++ | + |
Series | +max | ++ | + |
Series | +mean | ++ | + |
Series | +median | ++ | + |
Series | +min | ++ | + |
Series | +mode | ++ | + |
Series | +n_unique | ++ | + |
Series | +name | ++ | + |
Series | +null_count | ++ | + |
Series | +pipe | ++ | + |
Series | +quantile | ++ | + |
Series | +rank | ++ | + |
Series | +rename | ++ | + |
Series | +replace_strict | ++ | + |
Series | +rolling_mean | ++ | + |
Series | +rolling_std | ++ | + |
Series | +rolling_sum | ++ | + |
Series | +rolling_var | ++ | + |
Series | +round | ++ | + |
Series | +sample | ++ | + |
Series | +scatter | ++ | + |
Series | +shape | ++ | + |
Series | +shift | ++ | + |
Series | +skew | ++ | + |
Series | +sort | ++ | + |
Series | +std | ++ | + |
Series | +str | ++ | + |
Series | +sum | ++ | + |
Series | +tail | ++ | + |
Series | +to_arrow | ++ | + |
Series | +to_dummies | ++ | + |
Series | +to_frame | ++ | + |
Series | +to_list | ++ | + |
Series | +to_native | ++ | + |
Series | +to_numpy | ++ | + |
Series | +to_pandas | ++ | + |
Series | +unique | ++ | + |
Series | +value_counts | ++ | + |
Series | +var | ++ | + |
Series | +zip_with | ++ | + |
narwhals.DataFrame
Narwhals DataFrame, backed by a native eager dataframe.
+Warning
+This class is not meant to be instantiated directly - instead:
+If the native object is a eager dataframe from one of the supported
+ backend (e.g. pandas.DataFrame, polars.DataFrame, pyarrow.Table),
+ you can use narwhals.from_native
:
+
narwhals.from_native(native_dataframe)
+narwhals.from_native(native_dataframe, eager_only=True)
+
If the object is a dictionary of column names and generic sequences mapping
+ (e.g. dict[str, list]
), you can create a DataFrame via
+ narwhals.from_dict
:
+
narwhals.from_dict(
+ data={"a": [1, 2, 3]},
+ native_namespace=narwhals.get_native_namespace(another_object),
+)
+
columns
+
+
+ property
+
+
+Get column names.
+ + +Returns:
+Type | +Description | +
---|---|
+ list[str]
+ |
+
+
+
+ The column names stored in a list. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_columns(df_native: IntoFrame) -> list[str]:
+... df = nw.from_native(df_native)
+... return df.columns
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_columns
:
>>> agnostic_columns(df_pd)
+['foo', 'bar', 'ham']
+>>> agnostic_columns(df_pl)
+['foo', 'bar', 'ham']
+>>> agnostic_columns(df_pa)
+['foo', 'bar', 'ham']
+
implementation
+
+
+ property
+
+
+Return implementation of native frame.
+This can be useful when you need to use special-casing for features outside of +Narwhals' scope - for example, when dealing with pandas' Period Dtype.
+ + +Returns:
+Type | +Description | +
---|---|
+ Implementation
+ |
+
+
+
+ Implementation. + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> df_native = pd.DataFrame({"a": [1, 2, 3]})
+>>> df = nw.from_native(df_native)
+>>> df.implementation
+<Implementation.PANDAS: 1>
+>>> df.implementation.is_pandas()
+True
+>>> df.implementation.is_pandas_like()
+True
+>>> df.implementation.is_polars()
+False
+
schema
+
+
+ property
+
+
+Get an ordered mapping of column names to their data type.
+ + +Returns:
+Type | +Description | +
---|---|
+ Schema
+ |
+
+
+
+ A Narwhals Schema object that displays the mapping of column names. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.schema import Schema
+>>> from narwhals.typing import IntoFrame
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_schema(df_native: IntoFrame) -> Schema:
+... df = nw.from_native(df_native)
+... return df.schema
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_schema
:
>>> agnostic_schema(df_pd)
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+>>> agnostic_schema(df_pl)
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+>>> agnostic_schema(df_pa)
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
shape
+
+
+ property
+
+
+Get the shape of the DataFrame.
+ + +Returns:
+Type | +Description | +
---|---|
+ tuple[int, int]
+ |
+
+
+
+ The shape of the dataframe as a tuple. + |
+
Examples:
+Construct pandas and polars DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> data = {"foo": [1, 2, 3, 4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]:
+... df = nw.from_native(df_native)
+... return df.shape
+
We can then pass either pandas, Polars or PyArrow to agnostic_shape
:
>>> agnostic_shape(df_pd)
+(5, 1)
+>>> agnostic_shape(df_pl)
+(5, 1)
+>>> agnostic_shape(df_pa)
+(5, 1)
+
__arrow_c_stream__(requested_schema=None)
+
+Export a DataFrame via the Arrow PyCapsule Interface.
+to_arrow
and then defer to PyArrow's implementationSee PyCapsule Interface +for more.
+ +__getitem__(item)
+
+Extract column or slice of DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ item
+ |
+
+ str | slice | Sequence[int] | Sequence[str] | tuple[Sequence[int], str | int] | tuple[slice, str | int] | tuple[slice | Sequence[int], Sequence[int] | Sequence[str] | slice] | tuple[slice, slice]
+ |
+
+
+
+ How to slice dataframe. What happens depends on what is passed. It's easiest
+to explain by example. Suppose we have a Dataframe
|
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Series[Any] | Self
+ |
+
+
+
+ A Narwhals Series, backed by a native series. + |
+
In contrast with Polars, pandas allows non-string column names.
+If you don't know whether the column name you're trying to extract
+is definitely a string (e.g. df[df.columns[0]]
) then you should
+use DataFrame.get_column
instead.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> from narwhals.typing import IntoSeries
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_slice(df_native: IntoDataFrame) -> IntoSeries:
+... df = nw.from_native(df_native)
+... return df["a"].to_native()
+
We can then pass either pandas, Polars or PyArrow to agnostic_slice
:
>>> agnostic_slice(df_pd)
+0 1
+1 2
+Name: a, dtype: int64
+>>> agnostic_slice(df_pl)
+shape: (2,)
+Series: 'a' [i64]
+[
+ 1
+ 2
+]
+>>> agnostic_slice(df_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 2
+ ]
+]
+
clone()
+
+Create a copy of this DataFrame.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ An identical copy of the original dataframe. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function in which we clone the DataFrame:
+>>> def agnostic_clone(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.clone().to_native()
+
We can then pass any supported library such as Pandas or Polars
+to agnostic_clone
:
>>> agnostic_clone(df_pd)
+ a b
+0 1 3
+1 2 4
+
>>> agnostic_clone(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
collect_schema()
+
+Get an ordered mapping of column names to their data type.
+ + +Returns:
+Type | +Description | +
---|---|
+ Schema
+ |
+
+
+
+ A Narwhals Schema object that displays the mapping of column names. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.schema import Schema
+>>> from narwhals.typing import IntoFrame
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_collect_schema(df_native: IntoFrame) -> Schema:
+... df = nw.from_native(df_native)
+... return df.collect_schema()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_collect_schema
:
>>> agnostic_collect_schema(df_pd)
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+>>> agnostic_collect_schema(df_pl)
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+>>> agnostic_collect_schema(df_pa)
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
drop(*columns, strict=True)
+
+Remove columns from the dataframe.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The dataframe with the specified columns removed. + |
+
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *columns
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of the columns that should be removed from the dataframe. + |
+
+ ()
+ |
+
+ strict
+ |
+
+ bool
+ |
+
+
+
+ Validate that all column names exist in the schema and throw an +exception if a column name does not exist in the schema. + |
+
+ True
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_drop(df_native: IntoFrameT) -> IntoFrameT:
+... return nw.from_native(df_native).drop("ham").to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_drop
:
>>> agnostic_drop(df_pd)
+ foo bar
+0 1 6.0
+1 2 7.0
+2 3 8.0
+>>> agnostic_drop(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 1 ┆ 6.0 │
+│ 2 ┆ 7.0 │
+│ 3 ┆ 8.0 │
+└─────┴─────┘
+>>> agnostic_drop(df_pa)
+pyarrow.Table
+foo: int64
+bar: double
+----
+foo: [[1,2,3]]
+bar: [[6,7,8]]
+
Use positional arguments to drop multiple columns.
+>>> def agnostic_drop_multi(df_native: IntoFrameT) -> IntoFrameT:
+... return nw.from_native(df_native).drop("foo", "ham").to_native()
+
>>> agnostic_drop_multi(df_pd)
+ bar
+0 6.0
+1 7.0
+2 8.0
+>>> agnostic_drop_multi(df_pl)
+shape: (3, 1)
+┌─────┐
+│ bar │
+│ --- │
+│ f64 │
+╞═════╡
+│ 6.0 │
+│ 7.0 │
+│ 8.0 │
+└─────┘
+>>> agnostic_drop_multi(df_pa)
+pyarrow.Table
+bar: double
+----
+bar: [[6,7,8]]
+
drop_nulls(subset=None)
+
+Drop rows that contain null values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ subset
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column name(s) for which null values are considered. If set to None +(default), use all columns. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The original object with the rows removed that contained the null values. + |
+
pandas handles null values differently from Polars and PyArrow. +See null_handling +for reference.
+Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.drop_nulls().to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_drop_nulls
:
>>> agnostic_drop_nulls(df_pd)
+ a ba
+0 1.0 1.0
+>>> agnostic_drop_nulls(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ ba │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 1.0 ┆ 1.0 │
+└─────┴─────┘
+>>> agnostic_drop_nulls(df_pa)
+pyarrow.Table
+a: double
+ba: double
+----
+a: [[1]]
+ba: [[1]]
+
estimated_size(unit='b')
+
+Return an estimation of the total (heap) allocated size of the DataFrame
.
Estimated size is given in the specified unit (bytes by default).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ unit
+ |
+
+ SizeUnit
+ |
+
+
+
+ 'b', 'kb', 'mb', 'gb', 'tb', 'bytes', 'kilobytes', 'megabytes', +'gigabytes', or 'terabytes'. + |
+
+ 'b'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ int | float
+ |
+
+
+
+ Integer or Float. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrameT
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_estimated_size(df_native: IntoDataFrameT) -> int | float:
+... df = nw.from_native(df_native)
+... return df.estimated_size()
+
We can then pass either pandas, Polars or PyArrow to agnostic_estimated_size
:
>>> agnostic_estimated_size(df_pd)
+np.int64(330)
+>>> agnostic_estimated_size(df_pl)
+51
+>>> agnostic_estimated_size(df_pa)
+63
+
explode(columns, *more_columns)
+
+Explode the dataframe to long format by exploding the given columns.
+ + +It is possible to explode multiple columns only if these columns must have +matching element counts.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str | Sequence[str]
+ |
+
+
+
+ Column names. The underlying columns being exploded must be of the |
+ + required + | +
+ *more_columns
+ |
+
+ str
+ |
+
+
+
+ Additional names of columns to explode, specified as positional arguments. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ New DataFrame + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "a": ["x", "y", "z", "w"],
+... "lst1": [[1, 2], None, [None], []],
+... "lst2": [[3, None], None, [42], []],
+... }
+
We define a library agnostic function:
+>>> def agnostic_explode(df_native: IntoFrameT) -> IntoFrameT:
+... return (
+... nw.from_native(df_native)
+... .with_columns(nw.col("lst1", "lst2").cast(nw.List(nw.Int32())))
+... .explode("lst1", "lst2")
+... .to_native()
+... )
+
We can then pass any supported library such as pandas, Polars (eager),
+or PyArrow to agnostic_explode
:
>>> agnostic_explode(pd.DataFrame(data))
+ a lst1 lst2
+0 x 1 3
+0 x 2 <NA>
+1 y <NA> <NA>
+2 z <NA> 42
+3 w <NA> <NA>
+>>> agnostic_explode(pl.DataFrame(data))
+shape: (5, 3)
+┌─────┬──────┬──────┐
+│ a ┆ lst1 ┆ lst2 │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i32 ┆ i32 │
+╞═════╪══════╪══════╡
+│ x ┆ 1 ┆ 3 │
+│ x ┆ 2 ┆ null │
+│ y ┆ null ┆ null │
+│ z ┆ null ┆ 42 │
+│ w ┆ null ┆ null │
+└─────┴──────┴──────┘
+
filter(*predicates, **constraints)
+
+Filter the rows in the DataFrame based on one or more predicate expressions.
+The original order of the remaining rows is preserved.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *predicates
+ |
+
+ IntoExpr | Iterable[IntoExpr] | list[bool]
+ |
+
+
+
+ Expression(s) that evaluates to a boolean Series. Can +also be a (single!) boolean list. + |
+
+ ()
+ |
+
+ **constraints
+ |
+
+ Any
+ |
+
+
+
+ Column filters; use |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The filtered dataframe. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function in which we filter on +one condition.
+>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.filter(nw.col("foo") > 1).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_filter
:
>>> agnostic_filter(df_pd)
+ foo bar ham
+1 2 7 b
+2 3 8 c
+>>> agnostic_filter(df_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+>>> agnostic_filter(df_pa)
+pyarrow.Table
+foo: int64
+bar: int64
+ham: string
+----
+foo: [[2,3]]
+bar: [[7,8]]
+ham: [["b","c"]]
+
Filter on multiple conditions, combined with and/or operators:
+>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a")).to_native()
+>>> agnostic_filter(df_pd)
+ foo bar ham
+0 1 6 a
+>>> agnostic_filter(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+>>> agnostic_filter(df_pa)
+pyarrow.Table
+foo: int64
+bar: int64
+ham: string
+----
+foo: [[1]]
+bar: [[6]]
+ham: [["a"]]
+
>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... dframe = df.filter(
+... (nw.col("foo") == 1) | (nw.col("ham") == "c")
+... ).to_native()
+... return dframe
+>>> agnostic_filter(df_pd)
+ foo bar ham
+0 1 6 a
+2 3 8 c
+>>> agnostic_filter(df_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+>>> agnostic_filter(df_pa)
+pyarrow.Table
+foo: int64
+bar: int64
+ham: string
+----
+foo: [[1,3]]
+bar: [[6,8]]
+ham: [["a","c"]]
+
Provide multiple filters using *args
syntax:
>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... dframe = df.filter(
+... nw.col("foo") <= 2,
+... ~nw.col("ham").is_in(["b", "c"]),
+... ).to_native()
+... return dframe
+>>> agnostic_filter(df_pd)
+ foo bar ham
+0 1 6 a
+>>> agnostic_filter(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+>>> agnostic_filter(df_pa)
+pyarrow.Table
+foo: int64
+bar: int64
+ham: string
+----
+foo: [[1]]
+bar: [[6]]
+ham: [["a"]]
+
Provide multiple filters using **kwargs
syntax:
>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.filter(foo=2, ham="b").to_native()
+>>> agnostic_filter(df_pd)
+ foo bar ham
+1 2 7 b
+>>> agnostic_filter(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+└─────┴─────┴─────┘
+>>> agnostic_filter(df_pa)
+pyarrow.Table
+foo: int64
+bar: int64
+ham: string
+----
+foo: [[2]]
+bar: [[7]]
+ham: [["b"]]
+
gather_every(n, offset=0)
+
+Take every nth row in the DataFrame and return as a new DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Gather every n-th row. + |
+ + required + | +
+ offset
+ |
+
+ int
+ |
+
+
+
+ Starting index. + |
+
+ 0
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The dataframe containing only the selected rows. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function in which gather every 2 rows, +starting from a offset of 1:
+>>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.gather_every(n=2, offset=1).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_gather_every
:
>>> agnostic_gather_every(df_pd)
+ a b
+1 2 6
+3 4 8
+
>>> agnostic_gather_every(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 2 ┆ 6 │
+│ 4 ┆ 8 │
+└─────┴─────┘
+>>> agnostic_gather_every(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[2,4]]
+b: [[6,8]]
+
get_column(name)
+
+Get a single column by name.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The column name as a string. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Series[Any]
+ |
+
+
+
+ A Narwhals Series, backed by a native series. + |
+
Although name
is typed as str
, pandas does allow non-string column
+names, and they will work when passed to this function if the
+narwhals.DataFrame
is backed by a pandas dataframe with non-string
+columns. This function can only be used to extract a column by name, so
+there is no risk of ambiguity.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> from narwhals.typing import IntoSeries
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_get_column(df_native: IntoDataFrame) -> IntoSeries:
+... df = nw.from_native(df_native)
+... name = df.columns[0]
+... return df.get_column(name).to_native()
+
We can then pass either pandas, Polars or PyArrow to agnostic_get_column
:
>>> agnostic_get_column(df_pd)
+0 1
+1 2
+Name: a, dtype: int64
+>>> agnostic_get_column(df_pl)
+shape: (2,)
+Series: 'a' [i64]
+[
+ 1
+ 2
+]
+>>> agnostic_get_column(df_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 2
+ ]
+]
+
group_by(*keys, drop_null_keys=False)
+
+Start a group by operation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *keys
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) to group by. Accepts multiple columns names as a list. + |
+
+ ()
+ |
+
+ drop_null_keys
+ |
+
+ bool
+ |
+
+
+
+ if True, then groups where any key is null won't be included +in the result. + |
+
+ False
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
GroupBy |
+ GroupBy[Self]
+ |
+
+
+
+ Object which can be used to perform aggregations. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrameT
+>>> data = {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function in which we group by one column
+and call agg
to compute the grouped sum of another column.
>>> def agnostic_group_by_agg(df_native: IntoDataFrameT) -> IntoDataFrameT:
+... df = nw.from_native(df_native, eager_only=True)
+... return df.group_by("a").agg(nw.col("b").sum()).sort("a").to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_group_by_agg
:
>>> agnostic_group_by_agg(df_pd)
+ a b
+0 a 2
+1 b 5
+2 c 3
+>>> agnostic_group_by_agg(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+>>> agnostic_group_by_agg(df_pa)
+pyarrow.Table
+a: string
+b: int64
+----
+a: [["a","b","c"]]
+b: [[2,5,3]]
+
Group by multiple columns by passing a list of column names.
+>>> def agnostic_group_by_agg(df_native: IntoDataFrameT) -> IntoDataFrameT:
+... df = nw.from_native(df_native, eager_only=True)
+... return df.group_by(["a", "b"]).agg(nw.max("c")).sort("a", "b").to_native()
+
>>> agnostic_group_by_agg(df_pd)
+ a b c
+0 a 1 5
+1 b 2 4
+2 b 3 2
+3 c 3 1
+>>> agnostic_group_by_agg(df_pl)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 5 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+>>> agnostic_group_by_agg(df_pa)
+pyarrow.Table
+a: string
+b: int64
+c: int64
+----
+a: [["a","b","b","c"]]
+b: [[1,2,3,3]]
+c: [[5,4,2,1]]
+
head(n=5)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. If a negative value is passed, return all rows
+except the last |
+
+ 5
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A subset of the dataframe of shape (n, n_columns). + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "foo": [1, 2, 3, 4, 5],
+... "bar": [6, 7, 8, 9, 10],
+... "ham": ["a", "b", "c", "d", "e"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that gets the first 3 rows.
+>>> def agnostic_head(df_native: IntoFrameT) -> IntoFrameT:
+... return nw.from_native(df_native).head(3).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_head
:
>>> agnostic_head(df_pd)
+ foo bar ham
+0 1 6 a
+1 2 7 b
+2 3 8 c
+>>> agnostic_head(df_pl)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+>>> agnostic_head(df_pa)
+pyarrow.Table
+foo: int64
+bar: int64
+ham: string
+----
+foo: [[1,2,3]]
+bar: [[6,7,8]]
+ham: [["a","b","c"]]
+
is_duplicated()
+
+Get a mask of all duplicated rows in this DataFrame.
+ + +Returns:
+Type | +Description | +
---|---|
+ Series[Any]
+ |
+
+
+
+ A new Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> from narwhals.typing import IntoSeries
+>>> data = {
+... "a": [1, 2, 3, 1],
+... "b": ["x", "y", "z", "x"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_duplicated(df_native: IntoDataFrame) -> IntoSeries:
+... df = nw.from_native(df_native, eager_only=True)
+... return df.is_duplicated().to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_is_duplicated
:
>>> agnostic_is_duplicated(df_pd)
+0 True
+1 False
+2 False
+3 True
+dtype: bool
+
>>> agnostic_is_duplicated(df_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ true
+ false
+ false
+ true
+]
+>>> agnostic_is_duplicated(df_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ true,
+ false,
+ false,
+ true
+ ]
+]
+
is_empty()
+
+Check if the dataframe is empty.
+ + +Returns:
+Type | +Description | +
---|---|
+ bool
+ |
+
+
+
+ A boolean indicating whether the dataframe is empty (True) or not (False). + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+
Let's define a dataframe-agnostic function that filters rows in which "foo" +values are greater than 10, and then checks if the result is empty or not:
+>>> def agnostic_is_empty(df_native: IntoDataFrame) -> bool:
+... df = nw.from_native(df_native, eager_only=True)
+... return df.filter(nw.col("foo") > 10).is_empty()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_is_empty
:
>>> data = {"foo": [1, 2, 3], "bar": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+>>> agnostic_is_empty(df_pd), agnostic_is_empty(df_pl), agnostic_is_empty(df_pa)
+(True, True, True)
+
>>> data = {"foo": [100, 2, 3], "bar": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+>>> agnostic_is_empty(df_pd), agnostic_is_empty(df_pl), agnostic_is_empty(df_pa)
+(False, False, False)
+
is_unique()
+
+Get a mask of all unique rows in this DataFrame.
+ + +Returns:
+Type | +Description | +
---|---|
+ Series[Any]
+ |
+
+
+
+ A new Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> from narwhals.typing import IntoSeries
+>>> data = {
+... "a": [1, 2, 3, 1],
+... "b": ["x", "y", "z", "x"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_unique(df_native: IntoDataFrame) -> IntoSeries:
+... df = nw.from_native(df_native, eager_only=True)
+... return df.is_unique().to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_is_unique
:
>>> agnostic_is_unique(df_pd)
+0 False
+1 True
+2 True
+3 False
+dtype: bool
+
>>> agnostic_is_unique(df_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ false
+ true
+ true
+ false
+]
+>>> agnostic_is_unique(df_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ false,
+ true,
+ true,
+ false
+ ]
+]
+
item(row=None, column=None)
+
+Return the DataFrame as a scalar, or return the element at the given row/column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ row
+ |
+
+ int | None
+ |
+
+
+
+ The n-th row. + |
+
+ None
+ |
+
+ column
+ |
+
+ int | str | None
+ |
+
+
+
+ The column selected via an integer or a string (column name). + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ A scalar or the specified element in the dataframe. + |
+
If row/col not provided, this is equivalent to df[0,0], with a check that the shape is (1,1). +With row/col, this is equivalent to df[row,col].
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that returns item at given row/column
+>>> def agnostic_item(
+... df_native: IntoDataFrame, row: int | None, column: int | str | None
+... ):
+... df = nw.from_native(df_native, eager_only=True)
+... return df.item(row, column)
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_item
:
>>> agnostic_item(df_pd, 1, 1), agnostic_item(df_pd, 2, "b")
+(np.int64(5), np.int64(6))
+>>> agnostic_item(df_pl, 1, 1), agnostic_item(df_pl, 2, "b")
+(5, 6)
+>>> agnostic_item(df_pa, 1, 1), agnostic_item(df_pa, 2, "b")
+(5, 6)
+
iter_rows(*, named=False, buffer_size=512)
+
+Returns an iterator over the DataFrame of rows of python-native values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ named
+ |
+
+ bool
+ |
+
+
+
+ By default, each row is returned as a tuple of values given +in the same order as the frame columns. Setting named=True will +return rows of dictionaries instead. + |
+
+ False
+ |
+
+ buffer_size
+ |
+
+ int
+ |
+
+
+
+ Determines the number of rows that are buffered +internally while iterating over the data. +See https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.iter_rows.html + |
+
+ 512
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Iterator[tuple[Any, ...]] | Iterator[dict[str, Any]]
+ |
+
+
+
+ An iterator over the DataFrame of rows. + |
+
cuDF doesn't support this method.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_iter_rows(df_native: IntoDataFrame, *, named: bool):
+... return nw.from_native(df_native, eager_only=True).iter_rows(named=named)
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_iter_rows
:
>>> [row for row in agnostic_iter_rows(df_pd, named=False)]
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> [row for row in agnostic_iter_rows(df_pd, named=True)]
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+>>> [row for row in agnostic_iter_rows(df_pl, named=False)]
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> [row for row in agnostic_iter_rows(df_pl, named=True)]
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+>>> [row for row in agnostic_iter_rows(df_pa, named=False)]
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> [row for row in agnostic_iter_rows(df_pa, named=True)]
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+
join(other, on=None, how='inner', *, left_on=None, right_on=None, suffix='_right')
+
+Join in SQL-like fashion.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Self
+ |
+
+
+
+ DataFrame to join with. + |
+ + required + | +
+ on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Name(s) of the join columns in both DataFrames. If set, |
+
+ None
+ |
+
+ how
+ |
+
+ Literal['inner', 'left', 'cross', 'semi', 'anti']
+ |
+
+
+
+ Join strategy. +
|
+
+ 'inner'
+ |
+
+ left_on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Join column of the left DataFrame. + |
+
+ None
+ |
+
+ right_on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Join column of the right DataFrame. + |
+
+ None
+ |
+
+ suffix
+ |
+
+ str
+ |
+
+
+
+ Suffix to append to columns with a duplicate name. + |
+
+ '_right'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined DataFrame + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> data_other = {
+... "apple": ["x", "y", "z"],
+... "ham": ["a", "b", "d"],
+... }
+
>>> df_pd = pd.DataFrame(data)
+>>> other_pd = pd.DataFrame(data_other)
+
>>> df_pl = pl.DataFrame(data)
+>>> other_pl = pl.DataFrame(data_other)
+
>>> df_pa = pa.table(data)
+>>> other_pa = pa.table(data_other)
+
Let's define a dataframe-agnostic function in which we join over "ham" column:
+>>> def agnostic_join_on_ham(
+... df_native: IntoFrameT, other_native: IntoFrameT
+... ) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... other = nw.from_native(other_native)
+... return df.join(other, left_on="ham", right_on="ham").to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_join_on_ham
:
>>> agnostic_join_on_ham(df_pd, other_pd)
+ foo bar ham apple
+0 1 6.0 a x
+1 2 7.0 b y
+
>>> agnostic_join_on_ham(df_pl, other_pl)
+shape: (2, 4)
+┌─────┬─────┬─────┬───────┐
+│ foo ┆ bar ┆ ham ┆ apple │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str ┆ str │
+╞═════╪═════╪═════╪═══════╡
+│ 1 ┆ 6.0 ┆ a ┆ x │
+│ 2 ┆ 7.0 ┆ b ┆ y │
+└─────┴─────┴─────┴───────┘
+>>> agnostic_join_on_ham(df_pa, other_pa)
+pyarrow.Table
+foo: int64
+bar: double
+ham: string
+apple: string
+----
+foo: [[1,2]]
+bar: [[6,7]]
+ham: [["a","b"]]
+apple: [["x","y"]]
+
join_asof(other, *, left_on=None, right_on=None, on=None, by_left=None, by_right=None, by=None, strategy='backward')
+
+Perform an asof join.
+This is similar to a left-join except that we match on nearest key rather than equal keys.
+Both DataFrames must be sorted by the asof_join key.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Self
+ |
+
+
+
+ DataFrame to join with. + |
+ + required + | +
+ left_on
+ |
+
+ str | None
+ |
+
+
+
+ Name(s) of the left join column(s). + |
+
+ None
+ |
+
+ right_on
+ |
+
+ str | None
+ |
+
+
+
+ Name(s) of the right join column(s). + |
+
+ None
+ |
+
+ on
+ |
+
+ str | None
+ |
+
+
+
+ Join column of both DataFrames. If set, left_on and right_on should be None. + |
+
+ None
+ |
+
+ by_left
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join. + |
+
+ None
+ |
+
+ by_right
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join. + |
+
+ None
+ |
+
+ by
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join. + |
+
+ None
+ |
+
+ strategy
+ |
+
+ Literal['backward', 'forward', 'nearest']
+ |
+
+
+
+ Join strategy. The default is "backward". +
|
+
+ 'backward'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined DataFrame + |
+
Examples:
+>>> from datetime import datetime
+>>> from typing import Literal
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data_gdp = {
+... "datetime": [
+... datetime(2016, 1, 1),
+... datetime(2017, 1, 1),
+... datetime(2018, 1, 1),
+... datetime(2019, 1, 1),
+... datetime(2020, 1, 1),
+... ],
+... "gdp": [4164, 4411, 4566, 4696, 4827],
+... }
+>>> data_population = {
+... "datetime": [
+... datetime(2016, 3, 1),
+... datetime(2018, 8, 1),
+... datetime(2019, 1, 1),
+... ],
+... "population": [82.19, 82.66, 83.12],
+... }
+>>> gdp_pd = pd.DataFrame(data_gdp)
+>>> population_pd = pd.DataFrame(data_population)
+
>>> gdp_pl = pl.DataFrame(data_gdp).sort("datetime")
+>>> population_pl = pl.DataFrame(data_population).sort("datetime")
+
Let's define a dataframe-agnostic function in which we join over "datetime" column:
+>>> def agnostic_join_asof_datetime(
+... df_native: IntoFrameT,
+... other_native: IntoFrameT,
+... strategy: Literal["backward", "forward", "nearest"],
+... ) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... other = nw.from_native(other_native)
+... return df.join_asof(other, on="datetime", strategy=strategy).to_native()
+
We can then pass any supported library such as Pandas or Polars
+to agnostic_join_asof_datetime
:
>>> agnostic_join_asof_datetime(population_pd, gdp_pd, strategy="backward")
+ datetime population gdp
+0 2016-03-01 82.19 4164
+1 2018-08-01 82.66 4566
+2 2019-01-01 83.12 4696
+
>>> agnostic_join_asof_datetime(population_pl, gdp_pl, strategy="backward")
+shape: (3, 3)
+┌─────────────────────┬────────────┬──────┐
+│ datetime ┆ population ┆ gdp │
+│ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ f64 ┆ i64 │
+╞═════════════════════╪════════════╪══════╡
+│ 2016-03-01 00:00:00 ┆ 82.19 ┆ 4164 │
+│ 2018-08-01 00:00:00 ┆ 82.66 ┆ 4566 │
+│ 2019-01-01 00:00:00 ┆ 83.12 ┆ 4696 │
+└─────────────────────┴────────────┴──────┘
+
Here is a real-world times-series example that uses by
argument.
>>> from datetime import datetime
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data_quotes = {
+... "datetime": [
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 30),
+... datetime(2016, 5, 25, 13, 30, 0, 41),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 49),
+... datetime(2016, 5, 25, 13, 30, 0, 72),
+... datetime(2016, 5, 25, 13, 30, 0, 75),
+... ],
+... "ticker": [
+... "GOOG",
+... "MSFT",
+... "MSFT",
+... "MSFT",
+... "GOOG",
+... "AAPL",
+... "GOOG",
+... "MSFT",
+... ],
+... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
+... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03],
+... }
+>>> data_trades = {
+... "datetime": [
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 38),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... ],
+... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
+... "price": [51.95, 51.95, 720.77, 720.92, 98.0],
+... "quantity": [75, 155, 100, 100, 100],
+... }
+>>> quotes_pd = pd.DataFrame(data_quotes)
+>>> trades_pd = pd.DataFrame(data_trades)
+>>> quotes_pl = pl.DataFrame(data_quotes).sort("datetime")
+>>> trades_pl = pl.DataFrame(data_trades).sort("datetime")
+
Let's define a dataframe-agnostic function in which we join over "datetime" and by "ticker" columns:
+>>> def agnostic_join_asof_datetime_by_ticker(
+... df_native: IntoFrameT, other_native: IntoFrameT
+... ) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... other = nw.from_native(other_native)
+... return df.join_asof(other, on="datetime", by="ticker").to_native()
+
We can now pass either pandas or Polars to the function:
+>>> agnostic_join_asof_datetime_by_ticker(trades_pd, quotes_pd)
+ datetime ticker price quantity bid ask
+0 2016-05-25 13:30:00.000023 MSFT 51.95 75 51.95 51.96
+1 2016-05-25 13:30:00.000038 MSFT 51.95 155 51.97 51.98
+2 2016-05-25 13:30:00.000048 GOOG 720.77 100 720.50 720.93
+3 2016-05-25 13:30:00.000048 GOOG 720.92 100 720.50 720.93
+4 2016-05-25 13:30:00.000048 AAPL 98.00 100 NaN NaN
+
>>> agnostic_join_asof_datetime_by_ticker(trades_pl, quotes_pl)
+shape: (5, 6)
+┌────────────────────────────┬────────┬────────┬──────────┬───────┬────────┐
+│ datetime ┆ ticker ┆ price ┆ quantity ┆ bid ┆ ask │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ str ┆ f64 ┆ i64 ┆ f64 ┆ f64 │
+╞════════════════════════════╪════════╪════════╪══════════╪═══════╪════════╡
+│ 2016-05-25 13:30:00.000023 ┆ MSFT ┆ 51.95 ┆ 75 ┆ 51.95 ┆ 51.96 │
+│ 2016-05-25 13:30:00.000038 ┆ MSFT ┆ 51.95 ┆ 155 ┆ 51.97 ┆ 51.98 │
+│ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.77 ┆ 100 ┆ 720.5 ┆ 720.93 │
+│ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.92 ┆ 100 ┆ 720.5 ┆ 720.93 │
+│ 2016-05-25 13:30:00.000048 ┆ AAPL ┆ 98.0 ┆ 100 ┆ null ┆ null │
+└────────────────────────────┴────────┴────────┴──────────┴───────┴────────┘
+
lazy()
+
+Lazify the DataFrame (if possible).
+If a library does not support lazy execution, then this is a no-op.
+ + +Returns:
+Type | +Description | +
---|---|
+ LazyFrame[Any]
+ |
+
+
+
+ A new LazyFrame. + |
+
Examples:
+Construct pandas, Polars and PyArrow DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_lazy(df_native: IntoFrame) -> IntoFrame:
+... df = nw.from_native(df_native)
+... return df.lazy().to_native()
+
Note that then, pandas and pyarrow dataframe stay eager, but Polars DataFrame +becomes a Polars LazyFrame:
+>>> agnostic_lazy(df_pd)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> agnostic_lazy(df_pl)
+<LazyFrame ...>
+>>> agnostic_lazy(df_pa)
+pyarrow.Table
+foo: int64
+bar: double
+ham: string
+----
+foo: [[1,2,3]]
+bar: [[6,7,8]]
+ham: [["a","b","c"]]
+
null_count()
+
+Create a new DataFrame that shows the null counts per column.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A dataframe of shape (1, n_columns). + |
+
pandas handles null values differently from Polars and PyArrow. +See null_handling +for reference.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "foo": [1, None, 3],
+... "bar": [6, 7, None],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that returns the null count of +each columns:
+>>> def agnostic_null_count(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.null_count().to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_null_count
:
>>> agnostic_null_count(df_pd)
+ foo bar ham
+0 1 1 0
+
>>> agnostic_null_count(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ u32 ┆ u32 ┆ u32 │
+╞═════╪═════╪═════╡
+│ 1 ┆ 1 ┆ 0 │
+└─────┴─────┴─────┘
+
>>> agnostic_null_count(df_pa)
+pyarrow.Table
+foo: int64
+bar: int64
+ham: int64
+----
+foo: [[1]]
+bar: [[1]]
+ham: [[0]]
+
pipe(function, *args, **kwargs)
+
+Pipe function call.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ function
+ |
+
+ Callable[[Any], Self]
+ |
+
+
+
+ Function to apply. + |
+ + required + | +
+ args
+ |
+
+ Any
+ |
+
+
+
+ Positional arguments to pass to function. + |
+
+ ()
+ |
+
+ kwargs
+ |
+
+ Any
+ |
+
+
+
+ Keyword arguments to pass to function. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The original object with the function applied. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_pipe(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.pipe(
+... lambda _df: _df.select(
+... [x for x in _df.columns if len(x) == 1]
+... ).to_native()
+... )
+
We can then pass either pandas, Polars or PyArrow to agnostic_pipe
:
>>> agnostic_pipe(df_pd)
+ a
+0 1
+1 2
+2 3
+>>> agnostic_pipe(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+>>> agnostic_pipe(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[1,2,3]]
+
pivot(on, *, index=None, values=None, aggregate_function=None, maintain_order=None, sort_columns=False, separator='_')
+
+Create a spreadsheet-style pivot table as a DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ on
+ |
+
+ str | list[str]
+ |
+
+
+
+ Name of the column(s) whose values will be used as the header of the +output DataFrame. + |
+ + required + | +
+ index
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ One or multiple keys to group by. If None, all remaining columns not
+specified on |
+
+ None
+ |
+
+ values
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ One or multiple keys to group by. If None, all remaining columns not
+specified on |
+
+ None
+ |
+
+ aggregate_function
+ |
+
+ Literal['min', 'max', 'first', 'last', 'sum', 'mean', 'median', 'len'] | None
+ |
+
+
+
+ Choose from: +
|
+
+ None
+ |
+
+ maintain_order
+ |
+
+ bool | None
+ |
+
+
+
+ Has no effect and is kept around only for backwards-compatibility. + |
+
+ None
+ |
+
+ sort_columns
+ |
+
+ bool
+ |
+
+
+
+ Sort the transposed columns by name. Default is by order of +discovery. + |
+
+ False
+ |
+
+ separator
+ |
+
+ str
+ |
+
+
+
+ Used as separator/delimiter in generated column names in case of
+multiple |
+
+ '_'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new dataframe. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrameT
+>>> data = {
+... "ix": [1, 1, 2, 2, 1, 2],
+... "col": ["a", "a", "a", "a", "b", "b"],
+... "foo": [0, 1, 2, 2, 7, 1],
+... "bar": [0, 2, 0, 0, 9, 4],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_pivot(df_native: IntoDataFrameT) -> IntoDataFrameT:
+... df = nw.from_native(df_native, eager_only=True)
+... return df.pivot("col", index="ix", aggregate_function="sum").to_native()
+
We can then pass any supported library such as Pandas or Polars
+to agnostic_pivot
:
>>> agnostic_pivot(df_pd)
+ ix foo_a foo_b bar_a bar_b
+0 1 1 7 2 9
+1 2 4 1 0 4
+>>> agnostic_pivot(df_pl)
+shape: (2, 5)
+┌─────┬───────┬───────┬───────┬───────┐
+│ ix ┆ foo_a ┆ foo_b ┆ bar_a ┆ bar_b │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
+╞═════╪═══════╪═══════╪═══════╪═══════╡
+│ 1 ┆ 1 ┆ 7 ┆ 2 ┆ 9 │
+│ 2 ┆ 4 ┆ 1 ┆ 0 ┆ 4 │
+└─────┴───────┴───────┴───────┴───────┘
+
rename(mapping)
+
+Rename column names.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ mapping
+ |
+
+ dict[str, str]
+ |
+
+
+
+ Key value pairs that map from old name to new name. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The dataframe with the specified columns renamed. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_rename(df_native: IntoFrameT) -> IntoFrameT:
+... return nw.from_native(df_native).rename({"foo": "apple"}).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_rename
:
>>> agnostic_rename(df_pd)
+ apple bar ham
+0 1 6 a
+1 2 7 b
+2 3 8 c
+>>> agnostic_rename(df_pl)
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ apple ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═══════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└───────┴─────┴─────┘
+>>> agnostic_rename(df_pa)
+pyarrow.Table
+apple: int64
+bar: int64
+ham: string
+----
+apple: [[1,2,3]]
+bar: [[6,7,8]]
+ham: [["a","b","c"]]
+
row(index)
+
+Get values at given row.
+Warning
+You should NEVER use this method to iterate over a DataFrame; +if you require row-iteration you should strongly prefer use of iter_rows() +instead.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ index
+ |
+
+ int
+ |
+
+
+
+ Row number. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ tuple[Any, ...]
+ |
+
+
+
+ A tuple of the values in the selected row. + |
+
cuDF doesn't support this method.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> from narwhals.typing import IntoDataFrame
+>>> from typing import Any
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a library-agnostic function to get the second row.
+>>> def agnostic_row(df_native: IntoDataFrame) -> tuple[Any, ...]:
+... return nw.from_native(df_native).row(1)
+
We can then pass either pandas, Polars or PyArrow to agnostic_row
:
>>> agnostic_row(df_pd)
+(2, 5)
+>>> agnostic_row(df_pl)
+(2, 5)
+>>> agnostic_row(df_pa)
+(<pyarrow.Int64Scalar: 2>, <pyarrow.Int64Scalar: 5>)
+
rows(*, named=False)
+
+Returns all data in the DataFrame as a list of rows of python-native values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ named
+ |
+
+ bool
+ |
+
+
+
+ By default, each row is returned as a tuple of values given +in the same order as the frame columns. Setting named=True will +return rows of dictionaries instead. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ list[tuple[Any, ...]] | list[dict[str, Any]]
+ |
+
+
+
+ The data as a list of rows. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_rows(df_native: IntoDataFrame, *, named: bool):
+... return nw.from_native(df_native, eager_only=True).rows(named=named)
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_rows
:
>>> agnostic_rows(df_pd, named=False)
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> agnostic_rows(df_pd, named=True)
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+>>> agnostic_rows(df_pl, named=False)
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> agnostic_rows(df_pl, named=True)
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+>>> agnostic_rows(df_pa, named=False)
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> agnostic_rows(df_pa, named=True)
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+
sample(n=None, *, fraction=None, with_replacement=False, seed=None)
+
+Sample from this DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int | None
+ |
+
+
+
+ Number of items to return. Cannot be used with fraction. + |
+
+ None
+ |
+
+ fraction
+ |
+
+ float | None
+ |
+
+
+
+ Fraction of items to return. Cannot be used with n. + |
+
+ None
+ |
+
+ with_replacement
+ |
+
+ bool
+ |
+
+
+
+ Allow values to be sampled more than once. + |
+
+ False
+ |
+
+ seed
+ |
+
+ int | None
+ |
+
+
+
+ Seed for the random number generator. If set to None (default), a random +seed is generated for each sample operation. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new dataframe. + |
+
The results may not be consistent across libraries.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrameT
+>>> data = {"a": [1, 2, 3, 4], "b": ["x", "y", "x", "y"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_sample(df_native: IntoDataFrameT) -> IntoDataFrameT:
+... df = nw.from_native(df_native, eager_only=True)
+... return df.sample(n=2, seed=123).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_sample
:
>>> agnostic_sample(df_pd)
+ a b
+3 4 y
+0 1 x
+>>> agnostic_sample(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ str │
+╞═════╪═════╡
+│ 2 ┆ y │
+│ 3 ┆ x │
+└─────┴─────┘
+>>> agnostic_sample(df_pa)
+pyarrow.Table
+a: int64
+b: string
+----
+a: [[1,3]]
+b: [["x","x"]]
+
As you can see, by using the same seed, the result will be consistent within +the same backend, but not necessarely across different backends.
+ +select(*exprs, **named_exprs)
+
+Select columns from this DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to select, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, + other non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
+ **named_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to select, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The dataframe containing only the selected columns. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function in which we pass the name of a +column to select that column.
+>>> def agnostic_single_select(df_native: IntoFrameT) -> IntoFrameT:
+... return nw.from_native(df_native).select("foo").to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_single_select
:
>>> agnostic_single_select(df_pd)
+ foo
+0 1
+1 2
+2 3
+>>> agnostic_single_select(df_pl)
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+>>> agnostic_single_select(df_pa)
+pyarrow.Table
+foo: int64
+----
+foo: [[1,2,3]]
+
Multiple columns can be selected by passing a list of column names.
+>>> def agnostic_multi_select(df_native: IntoFrameT) -> IntoFrameT:
+... return nw.from_native(df_native).select(["foo", "bar"]).to_native()
+
>>> agnostic_multi_select(df_pd)
+ foo bar
+0 1 6
+1 2 7
+2 3 8
+>>> agnostic_multi_select(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 6 │
+│ 2 ┆ 7 │
+│ 3 ┆ 8 │
+└─────┴─────┘
+>>> agnostic_multi_select(df_pa)
+pyarrow.Table
+foo: int64
+bar: int64
+----
+foo: [[1,2,3]]
+bar: [[6,7,8]]
+
Multiple columns can also be selected using positional arguments instead of a +list. Expressions are also accepted.
+>>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT:
+... return (
+... nw.from_native(df_native)
+... .select(nw.col("foo"), nw.col("bar") + 1)
+... .to_native()
+... )
+
>>> agnostic_select(df_pd)
+ foo bar
+0 1 7
+1 2 8
+2 3 9
+>>> agnostic_select(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+>>> agnostic_select(df_pa)
+pyarrow.Table
+foo: int64
+bar: int64
+----
+foo: [[1,2,3]]
+bar: [[7,8,9]]
+
Use keyword arguments to easily name your expression inputs.
+>>> def agnostic_select_w_kwargs(df_native: IntoFrameT) -> IntoFrameT:
+... return (
+... nw.from_native(df_native)
+... .select(threshold=nw.col("foo") * 2)
+... .to_native()
+... )
+
>>> agnostic_select_w_kwargs(df_pd)
+ threshold
+0 2
+1 4
+2 6
+>>> agnostic_select_w_kwargs(df_pl)
+shape: (3, 1)
+┌───────────┐
+│ threshold │
+│ --- │
+│ i64 │
+╞═══════════╡
+│ 2 │
+│ 4 │
+│ 6 │
+└───────────┘
+>>> agnostic_select_w_kwargs(df_pa)
+pyarrow.Table
+threshold: int64
+----
+threshold: [[2,4,6]]
+
sort(by, *more_by, descending=False, nulls_last=False)
+
+Sort the dataframe by the given columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ by
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) names to sort by. + |
+ + required + | +
+ *more_by
+ |
+
+ str
+ |
+
+
+
+ Additional columns to sort by, specified as positional arguments. + |
+
+ ()
+ |
+
+ descending
+ |
+
+ bool | Sequence[bool]
+ |
+
+
+
+ Sort in descending order. When sorting by multiple columns, can be +specified per column by passing a sequence of booleans. + |
+
+ False
+ |
+
+ nulls_last
+ |
+
+ bool
+ |
+
+
+
+ Place null values last. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The sorted dataframe. + |
+
Unlike Polars, it is not possible to specify a sequence of booleans for
+nulls_last
in order to control per-column behaviour. Instead a single
+boolean is applied for all by
columns.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "a": [1, 2, None],
+... "b": [6.0, 5.0, 4.0],
+... "c": ["a", "c", "b"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function in which we sort by multiple +columns in different orders
+>>> def agnostic_sort(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.sort("c", "a", descending=[False, True]).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_sort
:
>>> agnostic_sort(df_pd)
+ a b c
+0 1.0 6.0 a
+2 NaN 4.0 b
+1 2.0 5.0 c
+>>> agnostic_sort(df_pl)
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ null ┆ 4.0 ┆ b │
+│ 2 ┆ 5.0 ┆ c │
+└──────┴─────┴─────┘
+>>> agnostic_sort(df_pa)
+pyarrow.Table
+a: int64
+b: double
+c: string
+----
+a: [[1,null,2]]
+b: [[6,4,5]]
+c: [["a","b","c"]]
+
tail(n=5)
+
+Get the last n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. If a negative value is passed, return all rows
+except the first |
+
+ 5
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A subset of the dataframe of shape (n, n_columns). + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "foo": [1, 2, 3, 4, 5],
+... "bar": [6, 7, 8, 9, 10],
+... "ham": ["a", "b", "c", "d", "e"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that gets the last 3 rows.
+>>> def agnostic_tail(df_native: IntoFrameT) -> IntoFrameT:
+... return nw.from_native(df_native).tail(3).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_tail
:
>>> agnostic_tail(df_pd)
+ foo bar ham
+2 3 8 c
+3 4 9 d
+4 5 10 e
+>>> agnostic_tail(df_pl)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 3 ┆ 8 ┆ c │
+│ 4 ┆ 9 ┆ d │
+│ 5 ┆ 10 ┆ e │
+└─────┴─────┴─────┘
+>>> agnostic_tail(df_pa)
+pyarrow.Table
+foo: int64
+bar: int64
+ham: string
+----
+foo: [[3,4,5]]
+bar: [[8,9,10]]
+ham: [["c","d","e"]]
+
to_arrow()
+
+Convert to arrow table.
+ + +Returns:
+Type | +Description | +
---|---|
+ Table
+ |
+
+
+
+ A new PyArrow table. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> data = {"foo": [1, 2, 3], "bar": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that converts to arrow table:
+>>> def agnostic_to_arrow(df_native: IntoDataFrame) -> pa.Table:
+... df = nw.from_native(df_native, eager_only=True)
+... return df.to_arrow()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_to_arrow
:
>>> agnostic_to_arrow(df_pd)
+pyarrow.Table
+foo: int64
+bar: string
+----
+foo: [[1,2,3]]
+bar: [["a","b","c"]]
+
>>> agnostic_to_arrow(df_pl)
+pyarrow.Table
+foo: int64
+bar: large_string
+----
+foo: [[1,2,3]]
+bar: [["a","b","c"]]
+
>>> agnostic_to_arrow(df_pa)
+pyarrow.Table
+foo: int64
+bar: string
+----
+foo: [[1,2,3]]
+bar: [["a","b","c"]]
+
to_dict(*, as_series=True)
+
+Convert DataFrame to a dictionary mapping column name to values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ as_series
+ |
+
+ bool
+ |
+
+
+
+ If set to true |
+
+ True
+ |
+
Returns:
+Type | +Description | +
---|---|
+ dict[str, Series[Any]] | dict[str, list[Any]]
+ |
+
+
+
+ A mapping from column name to values / Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> data = {
+... "A": [1, 2, 3, 4, 5],
+... "fruits": ["banana", "banana", "apple", "apple", "banana"],
+... "B": [5, 4, 3, 2, 1],
+... "animals": ["beetle", "fly", "beetle", "beetle", "beetle"],
+... "optional": [28, 300, None, 2, -30],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_to_dict(
+... df_native: IntoDataFrame,
+... ) -> dict[str, list[int | str | float | None]]:
+... df = nw.from_native(df_native)
+... return df.to_dict(as_series=False)
+
We can then pass either pandas, Polars or PyArrow to agnostic_to_dict
:
>>> agnostic_to_dict(df_pd)
+{'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28.0, 300.0, nan, 2.0, -30.0]}
+>>> agnostic_to_dict(df_pl)
+{'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]}
+>>> agnostic_to_dict(df_pa)
+{'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]}
+
to_native()
+
+Convert Narwhals DataFrame to native one.
+ + +Returns:
+Type | +Description | +
---|---|
+ DataFrameT
+ |
+
+
+
+ Object of class that user started with. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Calling to_native
on a Narwhals DataFrame returns the native object:
>>> nw.from_native(df_pd).to_native()
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> nw.from_native(df_pl).to_native()
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ 2 ┆ 7.0 ┆ b │
+│ 3 ┆ 8.0 ┆ c │
+└─────┴─────┴─────┘
+>>> nw.from_native(df_pa).to_native()
+pyarrow.Table
+foo: int64
+bar: double
+ham: string
+----
+foo: [[1,2,3]]
+bar: [[6,7,8]]
+ham: [["a","b","c"]]
+
to_numpy()
+
+Convert this DataFrame to a NumPy ndarray.
+ + +Returns:
+Type | +Description | +
---|---|
+ ndarray
+ |
+
+
+
+ A NumPy ndarray array. + |
+
Examples:
+Construct pandas and polars DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> import numpy as np
+>>> from narwhals.typing import IntoDataFrame
+>>> data = {"foo": [1, 2, 3], "bar": [6.5, 7.0, 8.5], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_to_numpy(df_native: IntoDataFrame) -> np.ndarray:
+... df = nw.from_native(df_native)
+... return df.to_numpy()
+
We can then pass either pandas, Polars or PyArrow to agnostic_to_numpy
:
>>> agnostic_to_numpy(df_pd)
+array([[1, 6.5, 'a'],
+ [2, 7.0, 'b'],
+ [3, 8.5, 'c']], dtype=object)
+>>> agnostic_to_numpy(df_pl)
+array([[1, 6.5, 'a'],
+ [2, 7.0, 'b'],
+ [3, 8.5, 'c']], dtype=object)
+>>> agnostic_to_numpy(df_pa)
+array([[1, 6.5, 'a'],
+ [2, 7.0, 'b'],
+ [3, 8.5, 'c']], dtype=object)
+
to_pandas()
+
+Convert this DataFrame to a pandas DataFrame.
+ + +Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ A pandas DataFrame. + |
+
Examples:
+Construct pandas, Polars (eager) and PyArrow DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_to_pandas(df_native: IntoDataFrame) -> pd.DataFrame:
+... df = nw.from_native(df_native)
+... return df.to_pandas()
+
We can then pass any supported library such as pandas, Polars (eager), or
+PyArrow to agnostic_to_pandas
:
>>> agnostic_to_pandas(df_pd)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> agnostic_to_pandas(df_pl)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> agnostic_to_pandas(df_pa)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+
unique(subset=None, *, keep='any', maintain_order=False)
+
+Drop duplicate rows from this dataframe.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ subset
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column name(s) to consider when identifying duplicate rows. + |
+
+ None
+ |
+
+ keep
+ |
+
+ Literal['any', 'first', 'last', 'none']
+ |
+
+
+
+ {'first', 'last', 'any', 'none'} +Which of the duplicate rows to keep. +
|
+
+ 'any'
+ |
+
+ maintain_order
+ |
+
+ bool
+ |
+
+
+
+ Keep the same order as the original DataFrame. This may be more +expensive to compute. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The dataframe with the duplicate rows removed. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "foo": [1, 2, 3, 1],
+... "bar": ["a", "a", "a", "a"],
+... "ham": ["b", "b", "b", "b"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_unique(df_native: IntoFrameT) -> IntoFrameT:
+... return nw.from_native(df_native).unique(["bar", "ham"]).to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_unique
:
>>> agnostic_unique(df_pd)
+ foo bar ham
+0 1 a b
+>>> agnostic_unique(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ a ┆ b │
+└─────┴─────┴─────┘
+>>> agnostic_unique(df_pa)
+pyarrow.Table
+foo: int64
+bar: string
+ham: string
+----
+foo: [[1]]
+bar: [["a"]]
+ham: [["b"]]
+
unpivot(on=None, *, index=None, variable_name=None, value_name=None)
+
+Unpivot a DataFrame from wide to long format.
+Optionally leaves identifiers set.
+This function is useful to massage a DataFrame into a format where one or more +columns are identifier variables (index) while all other columns, considered +measured variables (on), are "unpivoted" to the row axis leaving just +two non-identifier columns, 'variable' and 'value'.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column(s) to use as values variables; if |
+
+ None
+ |
+
+ index
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column(s) to use as identifier variables. + |
+
+ None
+ |
+
+ variable_name
+ |
+
+ str | None
+ |
+
+
+
+ Name to give to the |
+
+ None
+ |
+
+ value_name
+ |
+
+ str | None
+ |
+
+
+
+ Name to give to the |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The unpivoted dataframe. + |
+
If you're coming from pandas, this is similar to pandas.DataFrame.melt
,
+but with index
replacing id_vars
and on
replacing value_vars
.
+In other frameworks, you might know this operation as pivot_longer
.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "a": ["x", "y", "z"],
+... "b": [1, 3, 5],
+... "c": [2, 4, 6],
+... }
+
We define a library agnostic function:
+>>> def agnostic_unpivot(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.unpivot(on=["b", "c"], index="a").to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_unpivot
:
>>> agnostic_unpivot(pl.DataFrame(data))
+shape: (6, 3)
+┌─────┬──────────┬───────┐
+│ a ┆ variable ┆ value │
+│ --- ┆ --- ┆ --- │
+│ str ┆ str ┆ i64 │
+╞═════╪══════════╪═══════╡
+│ x ┆ b ┆ 1 │
+│ y ┆ b ┆ 3 │
+│ z ┆ b ┆ 5 │
+│ x ┆ c ┆ 2 │
+│ y ┆ c ┆ 4 │
+│ z ┆ c ┆ 6 │
+└─────┴──────────┴───────┘
+
>>> agnostic_unpivot(pd.DataFrame(data))
+ a variable value
+0 x b 1
+1 y b 3
+2 z b 5
+3 x c 2
+4 y c 4
+5 z c 6
+
>>> agnostic_unpivot(pa.table(data))
+pyarrow.Table
+a: string
+variable: string
+value: int64
+----
+a: [["x","y","z"],["x","y","z"]]
+variable: [["b","b","b"],["c","c","c"]]
+value: [[1,3,5],[2,4,6]]
+
with_columns(*exprs, **named_exprs)
+
+Add columns to this DataFrame.
+Added columns will replace existing columns with the same name.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to add, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, other + non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
+ **named_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to add, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
DataFrame |
+ Self
+ |
+
+
+
+ A new DataFrame with the columns added. + |
+
Creating a new DataFrame using this method does not create a new copy of +existing data.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {
+... "a": [1, 2, 3, 4],
+... "b": [0.5, 4, 10, 13],
+... "c": [True, True, False, True],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function in which we pass an expression +to add it as a new column:
+>>> def agnostic_with_columns(df_native: IntoFrameT) -> IntoFrameT:
+... return (
+... nw.from_native(df_native)
+... .with_columns((nw.col("a") * 2).alias("a*2"))
+... .to_native()
+... )
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_with_columns
:
>>> agnostic_with_columns(df_pd)
+ a b c a*2
+0 1 0.5 True 2
+1 2 4.0 True 4
+2 3 10.0 False 6
+3 4 13.0 True 8
+>>> agnostic_with_columns(df_pl)
+shape: (4, 4)
+┌─────┬──────┬───────┬─────┐
+│ a ┆ b ┆ c ┆ a*2 │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ i64 │
+╞═════╪══════╪═══════╪═════╡
+│ 1 ┆ 0.5 ┆ true ┆ 2 │
+│ 2 ┆ 4.0 ┆ true ┆ 4 │
+│ 3 ┆ 10.0 ┆ false ┆ 6 │
+│ 4 ┆ 13.0 ┆ true ┆ 8 │
+└─────┴──────┴───────┴─────┘
+>>> agnostic_with_columns(df_pa)
+pyarrow.Table
+a: int64
+b: double
+c: bool
+a*2: int64
+----
+a: [[1,2,3,4]]
+b: [[0.5,4,10,13]]
+c: [[true,true,false,true]]
+a*2: [[2,4,6,8]]
+
with_row_index(name='index')
+
+Insert column which enumerates rows.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The name of the column as a string. The default is "index". + |
+
+ 'index'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The original object with the column added. + |
+
Examples:
+Construct pandas as polars DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_with_row_index(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_row_index().to_native()
+
We can then pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_with_row_index
:
>>> agnostic_with_row_index(df_pd)
+ index a b
+0 0 1 4
+1 1 2 5
+2 2 3 6
+>>> agnostic_with_row_index(df_pl)
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ index ┆ a ┆ b │
+│ --- ┆ --- ┆ --- │
+│ u32 ┆ i64 ┆ i64 │
+╞═══════╪═════╪═════╡
+│ 0 ┆ 1 ┆ 4 │
+│ 1 ┆ 2 ┆ 5 │
+│ 2 ┆ 3 ┆ 6 │
+└───────┴─────┴─────┘
+>>> agnostic_with_row_index(df_pa)
+pyarrow.Table
+index: int64
+a: int64
+b: int64
+----
+index: [[0,1,2]]
+a: [[1,2,3]]
+b: [[4,5,6]]
+
write_csv(file=None)
+
+Write dataframe to comma-separated values (CSV) file.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ file
+ |
+
+ str | Path | BytesIO | None
+ |
+
+
+
+ String, path object or file-like object to which the dataframe will be +written. If None, the resulting csv format is returned as a string. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ str | None
+ |
+
+
+
+ String or None. + |
+
Examples:
+Construct pandas, Polars (eager) and PyArrow DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_write_csv(df_native: IntoDataFrame) -> str:
+... df = nw.from_native(df_native)
+... return df.write_csv()
+
We can pass any supported library such as pandas, Polars or PyArrow to agnostic_write_csv
:
>>> agnostic_write_csv(df_pd)
+'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n'
+>>> agnostic_write_csv(df_pl)
+'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n'
+>>> agnostic_write_csv(df_pa)
+'"foo","bar","ham"\n1,6,"a"\n2,7,"b"\n3,8,"c"\n'
+
If we had passed a file name to write_csv
, it would have been
+written to that file.
write_parquet(file)
+
+Write dataframe to parquet file.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ file
+ |
+
+ str | Path | BytesIO
+ |
+
+
+
+ String, path object or file-like object to which the dataframe will be +written. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ None
+ |
+
+
+
+ None. + |
+
Examples:
+Construct pandas, Polars and PyArrow DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_write_parquet(df_native: IntoDataFrame):
+... df = nw.from_native(df_native)
+... df.write_parquet("foo.parquet")
+
We can then pass either pandas, Polars or PyArrow to agnostic_write_parquet
:
>>> agnostic_write_parquet(df_pd)
+>>> agnostic_write_parquet(df_pl)
+>>> agnostic_write_parquet(df_pa)
+
narwhals.dependencies
get_cudf()
+
+Get cudf module (if already imported - else return None).
+ +get_ibis()
+
+Get ibis module (if already imported - else return None).
+ +get_modin()
+
+Get modin.pandas module (if already imported - else return None).
+ +get_pandas()
+
+Get pandas module (if already imported - else return None).
+ +get_polars()
+
+Get Polars module (if already imported - else return None).
+ +get_pyarrow()
+
+Get pyarrow module (if already imported - else return None).
+ +is_cudf_dataframe(df)
+
+Check whether df
is a cudf DataFrame without importing cudf.
is_cudf_index(index)
+
+Check whether index
is a cudf Index without importing cudf.
is_cudf_series(ser)
+
+Check whether ser
is a cudf Series without importing cudf.
is_dask_dataframe(df)
+
+Check whether df
is a Dask DataFrame without importing Dask.
is_ibis_table(df)
+
+Check whether df
is a Ibis Table without importing Ibis.
is_into_dataframe(native_dataframe)
+
+Check whether native_dataframe
can be converted to a Narwhals DataFrame.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ native_dataframe
+ |
+
+ Any
+ |
+
+
+
+ The object to check. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ bool
+ |
+
+
+
+
|
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import numpy as np
+>>> from narwhals.dependencies import is_into_dataframe
+
>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+>>> np_arr = np.array([[1, 4], [2, 5], [3, 6]])
+
>>> is_into_dataframe(df_pd)
+True
+>>> is_into_dataframe(df_pl)
+True
+>>> is_into_dataframe(np_arr)
+False
+
is_into_series(native_series)
+
+Check whether native_series
can be converted to a Narwhals Series.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ native_series
+ |
+
+ IntoSeries
+ |
+
+
+
+ The object to check. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ bool
+ |
+
+
+
+
|
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import numpy as np
+>>> import narwhals as nw
+
>>> s_pd = pd.Series([1, 2, 3])
+>>> s_pl = pl.Series([1, 2, 3])
+>>> np_arr = np.array([1, 2, 3])
+
>>> nw.dependencies.is_into_series(s_pd)
+True
+>>> nw.dependencies.is_into_series(s_pl)
+True
+>>> nw.dependencies.is_into_series(np_arr)
+False
+
is_modin_dataframe(df)
+
+Check whether df
is a modin DataFrame without importing modin.
is_modin_index(index)
+
+Check whether index
is a modin Index without importing modin.
is_modin_series(ser)
+
+Check whether ser
is a modin Series without importing modin.
is_numpy_array(arr)
+
+Check whether arr
is a NumPy Array without importing NumPy.
is_pandas_dataframe(df)
+
+Check whether df
is a pandas DataFrame without importing pandas.
is_pandas_index(index)
+
+Check whether index
is a pandas Index without importing pandas.
is_pandas_like_dataframe(df)
+
+Check whether df
is a pandas-like DataFrame without doing any imports.
By "pandas-like", we mean: pandas, Modin, cuDF.
+ +is_pandas_like_index(index)
+
+Check whether index
is a pandas-like Index without doing any imports.
By "pandas-like", we mean: pandas, Modin, cuDF.
+ +is_pandas_like_series(ser)
+
+Check whether ser
is a pandas-like Series without doing any imports.
By "pandas-like", we mean: pandas, Modin, cuDF.
+ +is_pandas_series(ser)
+
+Check whether ser
is a pandas Series without importing pandas.
is_polars_dataframe(df)
+
+Check whether df
is a Polars DataFrame without importing Polars.
is_polars_lazyframe(df)
+
+Check whether df
is a Polars LazyFrame without importing Polars.
is_polars_series(ser)
+
+Check whether ser
is a Polars Series without importing Polars.
is_pyarrow_chunked_array(ser)
+
+Check whether ser
is a PyArrow ChunkedArray without importing PyArrow.
is_pyarrow_table(df)
+
+Check whether df
is a PyArrow Table without importing PyArrow.
narwhals.dtypes
Array
+
+
+Fixed length list type.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ inner
+ |
+
+ DType | type[DType]
+ |
+
+
+
+ The datatype of the values within each array. + |
+ + required + | +
+ width
+ |
+
+ int | None
+ |
+
+
+
+ the length of each array. + |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [[1, 2], [3, 4], [5, 6]]
+>>> ser_pd = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int32(), 2)))
+>>> ser_pl = pl.Series(data, dtype=pl.Array(pl.Int32, 2))
+>>> ser_pa = pa.chunked_array([data], type=pa.list_(pa.int32(), 2))
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Array(Int32, 2)
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Array(Int32, 2)
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Array(Int32, 2)
+
Decimal
+
+
+Decimal type.
+ + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = pl.Series(["1.5"], dtype=pl.Decimal)
+>>> nw.from_native(s, series_only=True).dtype
+Decimal
+
List
+
+
+Variable length list type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [["narwhal", "orca"], ["beluga", "vaquita"]]
+>>> ser_pd = pd.Series(data, dtype=pd.ArrowDtype(pa.large_list(pa.large_string())))
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+List(String)
+>>> nw.from_native(ser_pl, series_only=True).dtype
+List(String)
+>>> nw.from_native(ser_pa, series_only=True).dtype
+List(String)
+
Int128
+
+
+128-bit signed integer type.
+ + + + + + + + + +Int64
+
+
+64-bit signed integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Int64
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Int64
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Int64
+
Int32
+
+
+32-bit signed integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.Int32).dtype
+
>>> func(ser_pd)
+Int32
+>>> func(ser_pl)
+Int32
+>>> func(ser_pa)
+Int32
+
Int16
+
+
+16-bit signed integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.Int16).dtype
+
>>> func(ser_pd)
+Int16
+>>> func(ser_pl)
+Int16
+>>> func(ser_pa)
+Int16
+
Int8
+
+
+8-bit signed integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.Int8).dtype
+
>>> func(ser_pd)
+Int8
+>>> func(ser_pl)
+Int8
+>>> func(ser_pa)
+Int8
+
UInt128
+
+
+128-bit unsigned integer type.
+ + + + + + + + + +UInt64
+
+
+64-bit unsigned integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.UInt64).dtype
+
>>> func(ser_pd)
+UInt64
+>>> func(ser_pl)
+UInt64
+>>> func(ser_pa)
+UInt64
+
UInt32
+
+
+32-bit unsigned integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.UInt32).dtype
+
>>> func(ser_pd)
+UInt32
+>>> func(ser_pl)
+UInt32
+>>> func(ser_pa)
+UInt32
+
UInt16
+
+
+16-bit unsigned integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.UInt16).dtype
+
>>> func(ser_pd)
+UInt16
+>>> func(ser_pl)
+UInt16
+>>> func(ser_pa)
+UInt16
+
UInt8
+
+
+8-bit unsigned integer type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [2, 1, 3, 7]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.UInt8).dtype
+
>>> func(ser_pd)
+UInt8
+>>> func(ser_pl)
+UInt8
+>>> func(ser_pa)
+UInt8
+
Field
+
+
+Definition of a single field within a Struct
DataType.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The name of the field within its parent |
+ + required + | +
+ dtype
+ |
+
+ type[DType] | DType
+ |
+
+
+
+ The |
+ + required + | +
Float64
+
+
+64-bit floating point type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [0.001, 0.1, 0.01, 0.1]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Float64
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Float64
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Float64
+
Float32
+
+
+32-bit floating point type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [0.001, 0.1, 0.01, 0.1]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> def func(ser):
+... ser_nw = nw.from_native(ser, series_only=True)
+... return ser_nw.cast(nw.Float32).dtype
+
>>> func(ser_pd)
+Float32
+>>> func(ser_pl)
+Float32
+>>> func(ser_pa)
+Float32
+
Boolean
+
+
+Boolean type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [True, False, False, True]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Boolean
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Boolean
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Boolean
+
Categorical
+
+
+A categorical encoding of a set of strings.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = ["beluga", "narwhal", "orca", "vaquita"]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pd, series_only=True).cast(nw.Categorical).dtype
+Categorical
+>>> nw.from_native(ser_pl, series_only=True).cast(nw.Categorical).dtype
+Categorical
+>>> nw.from_native(ser_pa, series_only=True).cast(nw.Categorical).dtype
+Categorical
+
Enum
+
+
+A fixed categorical encoding of a unique set of strings.
+Polars has an Enum data type, while pandas and PyArrow do not.
+ + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = ["beluga", "narwhal", "orca", "vaquita"]
+>>> ser_pl = pl.Series(data, dtype=pl.Enum(data))
+
>>> nw.from_native(ser_pl, series_only=True).dtype
+Enum
+
String
+
+
+UTF-8 encoded string type.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = ["beluga", "narwhal", "orca", "vaquita"]
+>>> ser_pd = pd.Series(data)
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+String
+>>> nw.from_native(ser_pl, series_only=True).dtype
+String
+>>> nw.from_native(ser_pa, series_only=True).dtype
+String
+
Struct
+
+
+Struct composite type.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ fields
+ |
+
+ Sequence[Field] | Mapping[str, DType | type[DType]]
+ |
+
+
+
+ The fields that make up the struct. Can be either a sequence of Field +objects or a mapping of column names to data types. + |
+ + required + | +
Examples:
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = [{"a": 1, "b": ["narwhal", "beluga"]}, {"a": 2, "b": ["orca"]}]
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pl, series_only=True).dtype
+Struct({'a': Int64, 'b': List(String)})
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Struct({'a': Int64, 'b': List(String)})
+
to_schema()
+
+Return Struct dtype as a schema dict.
+ + +Returns:
+Type | +Description | +
---|---|
+ OrderedDict[str, DType | type[DType]]
+ |
+
+
+
+ Mapping from column name to dtype. + |
+
Date
+
+
+Data type representing a calendar date.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from datetime import date, timedelta
+>>> data = [date(2024, 12, 1) + timedelta(days=d) for d in range(4)]
+>>> ser_pd = pd.Series(data, dtype="date32[pyarrow]")
+>>> ser_pl = pl.Series(data)
+>>> ser_pa = pa.chunked_array([data])
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Date
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Date
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Date
+
Datetime
+
+
+Data type representing a calendar date and time of day.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_unit
+ |
+
+ Literal['us', 'ns', 'ms', 's']
+ |
+
+
+
+ Unit of time. Defaults to |
+
+ 'us'
+ |
+
+ time_zone
+ |
+
+ str | timezone | None
+ |
+
+
+
+ Time zone string, as defined in zoneinfo (to see valid strings run
+ |
+
+ None
+ |
+
Adapted from Polars implementation
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import pyarrow.compute as pc
+>>> import narwhals as nw
+>>> from datetime import datetime, timedelta
+>>> data = [datetime(2024, 12, 9) + timedelta(days=n) for n in range(5)]
+>>> ser_pd = (
+... pd.Series(data)
+... .dt.tz_localize("Africa/Accra")
+... .astype("datetime64[ms, Africa/Accra]")
+... )
+>>> ser_pl = (
+... pl.Series(data).cast(pl.Datetime("ms")).dt.replace_time_zone("Africa/Accra")
+... )
+>>> ser_pa = pc.assume_timezone(
+... pa.chunked_array([data], type=pa.timestamp("ms")), "Africa/Accra"
+... )
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Datetime(time_unit='ms', time_zone='Africa/Accra')
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Datetime(time_unit='ms', time_zone='Africa/Accra')
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Datetime(time_unit='ms', time_zone='Africa/Accra')
+
Duration
+
+
+Data type representing a time duration.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_unit
+ |
+
+ Literal['us', 'ns', 'ms', 's']
+ |
+
+
+
+ Unit of time. Defaults to |
+
+ 'us'
+ |
+
Adapted from Polars implementation
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from datetime import timedelta
+>>> data = [timedelta(seconds=d) for d in range(1, 4)]
+>>> ser_pd = pd.Series(data).astype("timedelta64[ms]")
+>>> ser_pl = pl.Series(data).cast(pl.Duration("ms"))
+>>> ser_pa = pa.chunked_array([data], type=pa.duration("ms"))
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Duration(time_unit='ms')
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Duration(time_unit='ms')
+>>> nw.from_native(ser_pa, series_only=True).dtype
+Duration(time_unit='ms')
+
Object
+
+
+Data type for wrapping arbitrary Python objects.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> class Foo: ...
+>>> ser_pd = pd.Series([Foo(), Foo()])
+>>> ser_pl = pl.Series([Foo(), Foo()])
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Object
+>>> nw.from_native(ser_pl, series_only=True).dtype
+Object
+
Unknown
+
+
+Type representing DataType values that could not be determined statically.
+ + +Examples:
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = pd.period_range("2000-01", periods=4, freq="M")
+>>> ser_pd = pd.Series(data)
+
>>> nw.from_native(ser_pd, series_only=True).dtype
+Unknown
+
narwhals.exceptions
ColumnNotFoundError
+
+
+Exception raised when column name isn't present.
+ + + + + + + + + +InvalidIntoExprError
+
+
+Exception raised when object can't be converted to expression.
+ + + + + + + + + +InvalidOperationError
+
+
+Exception raised during invalid operations.
+ + + + + + + +NarwhalsUnstableWarning
+
+
+Warning issued when a method or function is considered unstable in the stable api.
+ + + + + + + +narwhals.Expr
abs()
+
+Return absolute value of each element.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, -2], "b": [-3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_abs(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").abs()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_abs
:
>>> agnostic_abs(df_pd)
+ a b
+0 1 3
+1 2 4
+
>>> agnostic_abs(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
>>> agnostic_abs(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2]]
+b: [[3,4]]
+
alias(name)
+
+Rename the expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The new name. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2], "b": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_alias(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select((nw.col("b") + 10).alias("c")).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_alias
:
>>> agnostic_alias(df_pd)
+ c
+0 14
+1 15
+
>>> agnostic_alias(df_pl)
+shape: (2, 1)
+┌─────┐
+│ c │
+│ --- │
+│ i64 │
+╞═════╡
+│ 14 │
+│ 15 │
+└─────┘
+
>>> agnostic_alias(df_pa)
+pyarrow.Table
+c: int64
+----
+c: [[14,15]]
+
all()
+
+Return whether all values in the column are True
.
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [True, False], "b": [True, True]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_all(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").all()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_all
:
>>> agnostic_all(df_pd)
+ a b
+0 False True
+
>>> agnostic_all(df_pl)
+shape: (1, 2)
+┌───────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪══════╡
+│ false ┆ true │
+└───────┴──────┘
+
>>> agnostic_all(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[false]]
+b: [[true]]
+
any()
+
+Return whether any of the values in the column are True
.
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [True, False], "b": [True, True]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_any(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").any()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_any
:
>>> agnostic_any(df_pd)
+ a b
+0 True True
+
>>> agnostic_any(df_pl)
+shape: (1, 2)
+┌──────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞══════╪══════╡
+│ true ┆ true │
+└──────┴──────┘
+
>>> agnostic_any(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[true]]
+b: [[true]]
+
arg_max()
+
+Returns the index of the maximum value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [10, 20], "b": [150, 100]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_arg_max(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a", "b").arg_max().name.suffix("_arg_max")
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or
+PyArrow to agnostic_arg_max
:
>>> agnostic_arg_max(df_pd)
+ a_arg_max b_arg_max
+0 1 0
+
>>> agnostic_arg_max(df_pl)
+shape: (1, 2)
+┌───────────┬───────────┐
+│ a_arg_max ┆ b_arg_max │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═══════════╪═══════════╡
+│ 1 ┆ 0 │
+└───────────┴───────────┘
+
>>> agnostic_arg_max(df_pa)
+pyarrow.Table
+a_arg_max: int64
+b_arg_max: int64
+----
+a_arg_max: [[1]]
+b_arg_max: [[0]]
+
arg_min()
+
+Returns the index of the minimum value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [10, 20], "b": [150, 100]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_arg_min(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a", "b").arg_min().name.suffix("_arg_min")
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or
+PyArrow to agnostic_arg_min
:
>>> agnostic_arg_min(df_pd)
+ a_arg_min b_arg_min
+0 0 1
+
>>> agnostic_arg_min(df_pl)
+shape: (1, 2)
+┌───────────┬───────────┐
+│ a_arg_min ┆ b_arg_min │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═══════════╪═══════════╡
+│ 0 ┆ 1 │
+└───────────┴───────────┘
+
>>> agnostic_arg_min(df_pa)
+pyarrow.Table
+a_arg_min: int64
+b_arg_min: int64
+----
+a_arg_min: [[0]]
+b_arg_min: [[1]]
+
arg_true()
+
+Find elements where boolean expression is True.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, None, None, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_arg_true(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").is_null().arg_true()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_arg_true
:
>>> agnostic_arg_true(df_pd)
+ a
+1 1
+2 2
+
>>> agnostic_arg_true(df_pl)
+shape: (2, 1)
+┌─────┐
+│ a │
+│ --- │
+│ u32 │
+╞═════╡
+│ 1 │
+│ 2 │
+└─────┘
+
>>> agnostic_arg_true(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[1,2]]
+
cast(dtype)
+
+Redefine an object's data type.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ dtype
+ |
+
+ DType | type[DType]
+ |
+
+
+
+ Data type that the object will be cast into. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_cast(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8)
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_cast
:
>>> agnostic_cast(df_pd)
+ foo bar
+0 1.0 6
+1 2.0 7
+2 3.0 8
+>>> agnostic_cast(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ f32 ┆ u8 │
+╞═════╪═════╡
+│ 1.0 ┆ 6 │
+│ 2.0 ┆ 7 │
+│ 3.0 ┆ 8 │
+└─────┴─────┘
+>>> agnostic_cast(df_pa)
+pyarrow.Table
+foo: float
+bar: uint8
+----
+foo: [[1,2,3]]
+bar: [[6,7,8]]
+
count()
+
+Returns the number of non-null elements in the column.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3], "b": [None, 4, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_count(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.all().count()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_count
:
>>> agnostic_count(df_pd)
+ a b
+0 3 2
+
>>> agnostic_count(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 3 ┆ 2 │
+└─────┴─────┘
+
>>> agnostic_count(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[3]]
+b: [[2]]
+
cum_count(*, reverse=False)
+
+Return the cumulative count of the non-null values in the column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": ["x", "k", None, "d"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_cum_count(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("a").cum_count().alias("cum_count"),
+... nw.col("a").cum_count(reverse=True).alias("cum_count_reverse"),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_cum_count
:
>>> agnostic_cum_count(df_pd)
+ a cum_count cum_count_reverse
+0 x 1 3
+1 k 2 2
+2 None 2 1
+3 d 3 1
+
>>> agnostic_cum_count(df_pl)
+shape: (4, 3)
+┌──────┬───────────┬───────────────────┐
+│ a ┆ cum_count ┆ cum_count_reverse │
+│ --- ┆ --- ┆ --- │
+│ str ┆ u32 ┆ u32 │
+╞══════╪═══════════╪═══════════════════╡
+│ x ┆ 1 ┆ 3 │
+│ k ┆ 2 ┆ 2 │
+│ null ┆ 2 ┆ 1 │
+│ d ┆ 3 ┆ 1 │
+└──────┴───────────┴───────────────────┘
+
>>> agnostic_cum_count(df_pa)
+pyarrow.Table
+a: string
+cum_count: uint32
+cum_count_reverse: uint32
+----
+a: [["x","k",null,"d"]]
+cum_count: [[1,2,2,3]]
+cum_count_reverse: [[3,2,1,1]]
+
cum_max(*, reverse=False)
+
+Return the cumulative max of the non-null values in the column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 3, None, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_cum_max(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("a").cum_max().alias("cum_max"),
+... nw.col("a").cum_max(reverse=True).alias("cum_max_reverse"),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_
:
>>> agnostic_cum_max(df_pd)
+ a cum_max cum_max_reverse
+0 1.0 1.0 3.0
+1 3.0 3.0 3.0
+2 NaN NaN NaN
+3 2.0 3.0 2.0
+
>>> agnostic_cum_max(df_pl)
+shape: (4, 3)
+┌──────┬─────────┬─────────────────┐
+│ a ┆ cum_max ┆ cum_max_reverse │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞══════╪═════════╪═════════════════╡
+│ 1 ┆ 1 ┆ 3 │
+│ 3 ┆ 3 ┆ 3 │
+│ null ┆ null ┆ null │
+│ 2 ┆ 3 ┆ 2 │
+└──────┴─────────┴─────────────────┘
+
>>> agnostic_cum_max(df_pa)
+pyarrow.Table
+a: int64
+cum_max: int64
+cum_max_reverse: int64
+----
+a: [[1,3,null,2]]
+cum_max: [[1,3,null,3]]
+cum_max_reverse: [[3,3,null,2]]
+
cum_min(*, reverse=False)
+
+Return the cumulative min of the non-null values in the column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [3, 1, None, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_cum_min(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("a").cum_min().alias("cum_min"),
+... nw.col("a").cum_min(reverse=True).alias("cum_min_reverse"),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_cum_min
:
>>> agnostic_cum_min(df_pd)
+ a cum_min cum_min_reverse
+0 3.0 3.0 1.0
+1 1.0 1.0 1.0
+2 NaN NaN NaN
+3 2.0 1.0 2.0
+
>>> agnostic_cum_min(df_pl)
+shape: (4, 3)
+┌──────┬─────────┬─────────────────┐
+│ a ┆ cum_min ┆ cum_min_reverse │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞══════╪═════════╪═════════════════╡
+│ 3 ┆ 3 ┆ 1 │
+│ 1 ┆ 1 ┆ 1 │
+│ null ┆ null ┆ null │
+│ 2 ┆ 1 ┆ 2 │
+└──────┴─────────┴─────────────────┘
+
>>> agnostic_cum_min(df_pa)
+pyarrow.Table
+a: int64
+cum_min: int64
+cum_min_reverse: int64
+----
+a: [[3,1,null,2]]
+cum_min: [[3,1,null,1]]
+cum_min_reverse: [[1,1,null,2]]
+
cum_prod(*, reverse=False)
+
+Return the cumulative product of the non-null values in the column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 3, None, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_cum_prod(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("a").cum_prod().alias("cum_prod"),
+... nw.col("a").cum_prod(reverse=True).alias("cum_prod_reverse"),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_cum_prod
:
>>> agnostic_cum_prod(df_pd)
+ a cum_prod cum_prod_reverse
+0 1.0 1.0 6.0
+1 3.0 3.0 6.0
+2 NaN NaN NaN
+3 2.0 6.0 2.0
+
>>> agnostic_cum_prod(df_pl)
+shape: (4, 3)
+┌──────┬──────────┬──────────────────┐
+│ a ┆ cum_prod ┆ cum_prod_reverse │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞══════╪══════════╪══════════════════╡
+│ 1 ┆ 1 ┆ 6 │
+│ 3 ┆ 3 ┆ 6 │
+│ null ┆ null ┆ null │
+│ 2 ┆ 6 ┆ 2 │
+└──────┴──────────┴──────────────────┘
+
>>> agnostic_cum_prod(df_pa)
+pyarrow.Table
+a: int64
+cum_prod: int64
+cum_prod_reverse: int64
+----
+a: [[1,3,null,2]]
+cum_prod: [[1,3,null,6]]
+cum_prod_reverse: [[6,6,null,2]]
+
cum_sum(*, reverse=False)
+
+Return cumulative sum.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_cum_sum(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").cum_sum()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_cum_sum
:
>>> agnostic_cum_sum(df_pd)
+ a b
+0 1 2
+1 2 6
+2 5 10
+3 10 16
+4 15 22
+>>> agnostic_cum_sum(df_pl)
+shape: (5, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 2 │
+│ 2 ┆ 6 │
+│ 5 ┆ 10 │
+│ 10 ┆ 16 │
+│ 15 ┆ 22 │
+└─────┴─────┘
+>>> agnostic_cum_sum(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2,5,10,15]]
+b: [[2,6,10,16,22]]
+
diff()
+
+Returns the difference between each element and the previous one.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to calculate
+the diff and fill missing values with 0
in a Int64 column, you could
+do:
nw.col("a").diff().fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 1, 3, 5, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_diff(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(a_diff=nw.col("a").diff()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_diff
:
>>> agnostic_diff(df_pd)
+ a_diff
+0 NaN
+1 0.0
+2 2.0
+3 2.0
+4 0.0
+
>>> agnostic_diff(df_pl)
+shape: (5, 1)
+┌────────┐
+│ a_diff │
+│ --- │
+│ i64 │
+╞════════╡
+│ null │
+│ 0 │
+│ 2 │
+│ 2 │
+│ 0 │
+└────────┘
+
>>> agnostic_diff(df_pa)
+pyarrow.Table
+a_diff: int64
+----
+a_diff: [[null,0,2,2,0]]
+
drop_nulls()
+
+Drop null values.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
pandas handles null values differently from Polars and PyArrow. +See null_handling +for reference.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
+>>> df_pl = pl.DataFrame({"a": [2.0, 4.0, None, 3.0, None, 5.0]})
+>>> df_pa = pa.table({"a": [2.0, 4.0, None, 3.0, None, 5.0]})
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").drop_nulls()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or
+PyArrow to agnostic_drop_nulls
:
>>> agnostic_drop_nulls(df_pd)
+ a
+0 2.0
+1 4.0
+3 3.0
+5 5.0
+
>>> agnostic_drop_nulls(df_pl)
+shape: (4, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 2.0 │
+│ 4.0 │
+│ 3.0 │
+│ 5.0 │
+└─────┘
+
>>> agnostic_drop_nulls(df_pa)
+pyarrow.Table
+a: double
+----
+a: [[2,4,3,5]]
+
ewm_mean(*, com=None, span=None, half_life=None, alpha=None, adjust=True, min_periods=1, ignore_nulls=False)
+
+Compute exponentially-weighted moving average.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ com
+ |
+
+ float | None
+ |
+
+
+
+ Specify decay in terms of center of mass, \(\gamma\), with |
+
+ None
+ |
+
+ span
+ |
+
+ float | None
+ |
+
+
+
+ Specify decay in terms of span, \(\theta\), with |
+
+ None
+ |
+
+ half_life
+ |
+
+ float | None
+ |
+
+
+
+ Specify decay in terms of half-life, \(\tau\), with |
+
+ None
+ |
+
+ alpha
+ |
+
+ float | None
+ |
+
+
+
+ Specify smoothing factor alpha directly, \(0 < \alpha \leq 1\). + |
+
+ None
+ |
+
+ adjust
+ |
+
+ bool
+ |
+
+
+
+ Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings +
|
+
+ True
+ |
+
+ min_periods
+ |
+
+ int
+ |
+
+
+
+ Minimum number of observations in window required to have a value, (otherwise result is null). + |
+
+ 1
+ |
+
+ ignore_nulls
+ |
+
+ bool
+ |
+
+
+
+ Ignore missing values when calculating weights. +
|
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ Expr + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> def agnostic_ewm_mean(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").ewm_mean(com=1, ignore_nulls=False)
+... ).to_native()
+
We can then pass either pandas or Polars to agnostic_ewm_mean
:
>>> agnostic_ewm_mean(df_pd)
+ a
+0 1.000000
+1 1.666667
+2 2.428571
+
>>> agnostic_ewm_mean(df_pl)
+shape: (3, 1)
+┌──────────┐
+│ a │
+│ --- │
+│ f64 │
+╞══════════╡
+│ 1.0 │
+│ 1.666667 │
+│ 2.428571 │
+└──────────┘
+
fill_null(value=None, strategy=None, limit=None)
+
+Fill null values with given value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ value
+ |
+
+ Any | None
+ |
+
+
+
+ Value used to fill null values. + |
+
+ None
+ |
+
+ strategy
+ |
+
+ Literal['forward', 'backward'] | None
+ |
+
+
+
+ Strategy used to fill null values. + |
+
+ None
+ |
+
+ limit
+ |
+
+ int | None
+ |
+
+
+
+ Number of consecutive null values to fill when using the 'forward' or 'backward' strategy. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
pandas handles null values differently from Polars and PyArrow. +See null_handling +for reference.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> df_pd = pd.DataFrame(
+... {
+... "a": [2, 4, None, None, 3, 5],
+... "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
+... }
+... )
+>>> data = {
+... "a": [2, 4, None, None, 3, 5],
+... "b": [2.0, 4.0, None, None, 3.0, 5.0],
+... }
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_fill_null(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(nw.col("a", "b").fill_null(0)).to_native()
+
We can then pass any supported library such as Pandas, Polars, or
+PyArrow to agnostic_fill_null
:
>>> agnostic_fill_null(df_pd)
+ a b
+0 2.0 2.0
+1 4.0 4.0
+2 0.0 0.0
+3 0.0 0.0
+4 3.0 3.0
+5 5.0 5.0
+
>>> agnostic_fill_null(df_pl)
+shape: (6, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 2 ┆ 2.0 │
+│ 4 ┆ 4.0 │
+│ 0 ┆ 0.0 │
+│ 0 ┆ 0.0 │
+│ 3 ┆ 3.0 │
+│ 5 ┆ 5.0 │
+└─────┴─────┘
+
>>> agnostic_fill_null(df_pa)
+pyarrow.Table
+a: int64
+b: double
+----
+a: [[2,4,0,0,3,5]]
+b: [[2,4,0,0,3,5]]
+
Using a strategy:
+>>> def agnostic_fill_null_with_strategy(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("a", "b")
+... .fill_null(strategy="forward", limit=1)
+... .name.suffix("_filled")
+... ).to_native()
+
>>> agnostic_fill_null_with_strategy(df_pd)
+ a b a_filled b_filled
+0 2.0 2.0 2.0 2.0
+1 4.0 4.0 4.0 4.0
+2 NaN NaN 4.0 4.0
+3 NaN NaN NaN NaN
+4 3.0 3.0 3.0 3.0
+5 5.0 5.0 5.0 5.0
+
>>> agnostic_fill_null_with_strategy(df_pl)
+shape: (6, 4)
+┌──────┬──────┬──────────┬──────────┐
+│ a ┆ b ┆ a_filled ┆ b_filled │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ i64 ┆ f64 │
+╞══════╪══════╪══════════╪══════════╡
+│ 2 ┆ 2.0 ┆ 2 ┆ 2.0 │
+│ 4 ┆ 4.0 ┆ 4 ┆ 4.0 │
+│ null ┆ null ┆ 4 ┆ 4.0 │
+│ null ┆ null ┆ null ┆ null │
+│ 3 ┆ 3.0 ┆ 3 ┆ 3.0 │
+│ 5 ┆ 5.0 ┆ 5 ┆ 5.0 │
+└──────┴──────┴──────────┴──────────┘
+
>>> agnostic_fill_null_with_strategy(df_pa)
+pyarrow.Table
+a: int64
+b: double
+a_filled: int64
+b_filled: double
+----
+a: [[2,4,null,null,3,5]]
+b: [[2,4,null,null,3,5]]
+a_filled: [[2,4,4,null,3,5]]
+b_filled: [[2,4,4,null,3,5]]
+
filter(*predicates)
+
+Filters elements based on a condition, returning a new expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ predicates
+ |
+
+ Any
+ |
+
+
+
+ Conditions to filter by (which get ANDed together). + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").filter(nw.col("a") > 4),
+... nw.col("b").filter(nw.col("b") < 13),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_filter
:
>>> agnostic_filter(df_pd)
+ a b
+3 5 10
+4 6 11
+5 7 12
+
>>> agnostic_filter(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 5 ┆ 10 │
+│ 6 ┆ 11 │
+│ 7 ┆ 12 │
+└─────┴─────┘
+
>>> agnostic_filter(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[5,6,7]]
+b: [[10,11,12]]
+
gather_every(n, offset=0)
+
+Take every nth value in the Series and return as new Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Gather every n-th row. + |
+ + required + | +
+ offset
+ |
+
+ int
+ |
+
+
+
+ Starting index. + |
+
+ 0
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function in which gather every 2 rows, +starting from a offset of 1:
+>>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").gather_every(n=2, offset=1)).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_gather_every
:
>>> agnostic_gather_every(df_pd)
+ a
+1 2
+3 4
+
>>> agnostic_gather_every(df_pl)
+shape: (2, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+│ 4 │
+└─────┘
+
>>> agnostic_gather_every(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[2,4]]
+
head(n=10)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 10
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": list(range(10))}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that returns the first 3 rows:
+>>> def agnostic_head(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").head(3)).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_head
:
>>> agnostic_head(df_pd)
+ a
+0 0
+1 1
+2 2
+
>>> agnostic_head(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 0 │
+│ 1 │
+│ 2 │
+└─────┘
+
>>> agnostic_head(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[0,1,2]]
+
clip(lower_bound=None, upper_bound=None)
+
+Clip values in the Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lower_bound
+ |
+
+ IntoExpr | Any | None
+ |
+
+
+
+ Lower bound value. + |
+
+ None
+ |
+
+ upper_bound
+ |
+
+ IntoExpr | Any | None
+ |
+
+
+
+ Upper bound value. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_clip_lower(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").clip(2)).to_native()
+
We can then pass any supported library such as Pandas, Polars, or
+PyArrow to agnostic_clip_lower
:
>>> agnostic_clip_lower(df_pd)
+ a
+0 2
+1 2
+2 3
+
>>> agnostic_clip_lower(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+│ 2 │
+│ 3 │
+└─────┘
+
>>> agnostic_clip_lower(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[2,2,3]]
+
We define another library agnostic function:
+>>> def agnostic_clip_upper(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").clip(upper_bound=2)).to_native()
+
We can then pass any supported library such as Pandas, Polars, or
+PyArrow to agnostic_clip_upper
:
>>> agnostic_clip_upper(df_pd)
+ a
+0 1
+1 2
+2 2
+
>>> agnostic_clip_upper(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 2 │
+└─────┘
+
>>> agnostic_clip_upper(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[1,2,2]]
+
We can have both at the same time
+>>> data = {"a": [-1, 1, -3, 3, -5, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_clip(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").clip(-1, 3)).to_native()
+
We can pass any supported library such as Pandas, Polars, or
+PyArrow to agnostic_clip
:
>>> agnostic_clip(df_pd)
+ a
+0 -1
+1 1
+2 -1
+3 3
+4 -1
+5 3
+
>>> agnostic_clip(df_pl)
+shape: (6, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ -1 │
+│ 1 │
+│ -1 │
+│ 3 │
+│ -1 │
+│ 3 │
+└─────┘
+
>>> agnostic_clip(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[-1,1,-1,3,-1,3]]
+
is_between(lower_bound, upper_bound, closed='both')
+
+Check if this expression is between the given lower and upper bounds.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lower_bound
+ |
+
+ Any | IntoExpr
+ |
+
+
+
+ Lower bound value. + |
+ + required + | +
+ upper_bound
+ |
+
+ Any | IntoExpr
+ |
+
+
+
+ Upper bound value. + |
+ + required + | +
+ closed
+ |
+
+ Literal['left', 'right', 'none', 'both']
+ |
+
+
+
+ Define which sides of the interval are closed (inclusive). + |
+
+ 'both'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3, 4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_between(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").is_between(2, 4, "right")).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_between
:
>>> agnostic_is_between(df_pd)
+ a
+0 False
+1 False
+2 True
+3 True
+4 False
+
>>> agnostic_is_between(df_pl)
+shape: (5, 1)
+┌───────┐
+│ a │
+│ --- │
+│ bool │
+╞═══════╡
+│ false │
+│ false │
+│ true │
+│ true │
+│ false │
+└───────┘
+
>>> agnostic_is_between(df_pa)
+pyarrow.Table
+a: bool
+----
+a: [[false,false,true,true,false]]
+
is_duplicated()
+
+Return a boolean mask indicating duplicated values.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_duplicated(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.all().is_duplicated()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_duplicated
:
>>> agnostic_is_duplicated(df_pd)
+ a b
+0 True True
+1 False True
+2 False False
+3 True False
+
>>> agnostic_is_duplicated(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ true ┆ true │
+│ false ┆ true │
+│ false ┆ false │
+│ true ┆ false │
+└───────┴───────┘
+
>>> agnostic_is_duplicated(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[true,false,false,true]]
+b: [[true,true,false,false]]
+
is_finite()
+
+Returns boolean values indicating which original values are finite.
+ + +Different backend handle null values differently. is_finite
will return
+False for NaN and Null's in the Dask and pandas non-nullable backend, while
+for Polars, PyArrow and pandas nullable backends null values are kept as such.
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ Expression of |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [float("nan"), float("inf"), 2.0, None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_is_finite(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").is_finite()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_finite
:
>>> agnostic_is_finite(df_pd)
+ a
+0 False
+1 False
+2 True
+3 False
+
>>> agnostic_is_finite(df_pl)
+shape: (4, 1)
+┌───────┐
+│ a │
+│ --- │
+│ bool │
+╞═══════╡
+│ false │
+│ false │
+│ true │
+│ null │
+└───────┘
+
>>> agnostic_is_finite(df_pa)
+pyarrow.Table
+a: bool
+----
+a: [[false,false,true,null]]
+
is_first_distinct()
+
+Return a boolean mask indicating the first occurrence of each distinct value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_first_distinct(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.all().is_first_distinct()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_first_distinct
:
>>> agnostic_is_first_distinct(df_pd)
+ a b
+0 True True
+1 True False
+2 True True
+3 False True
+
>>> agnostic_is_first_distinct(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ true ┆ true │
+│ true ┆ false │
+│ true ┆ true │
+│ false ┆ true │
+└───────┴───────┘
+
>>> agnostic_is_first_distinct(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[true,true,true,false]]
+b: [[true,false,true,true]]
+
is_in(other)
+
+Check if elements of this expression are present in the other iterable.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Any
+ |
+
+
+
+ iterable + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 9, 10]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_in(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(b=nw.col("a").is_in([1, 2])).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_in
:
>>> agnostic_is_in(df_pd)
+ a b
+0 1 True
+1 2 True
+2 9 False
+3 10 False
+
>>> agnostic_is_in(df_pl)
+shape: (4, 2)
+┌─────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ bool │
+╞═════╪═══════╡
+│ 1 ┆ true │
+│ 2 ┆ true │
+│ 9 ┆ false │
+│ 10 ┆ false │
+└─────┴───────┘
+
>>> agnostic_is_in(df_pa)
+pyarrow.Table
+a: int64
+b: bool
+----
+a: [[1,2,9,10]]
+b: [[true,true,false,false]]
+
is_last_distinct()
+
+Return a boolean mask indicating the last occurrence of each distinct value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_last_distinct(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.all().is_last_distinct()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_last_distinct
:
>>> agnostic_is_last_distinct(df_pd)
+ a b
+0 False False
+1 True True
+2 True True
+3 True True
+
>>> agnostic_is_last_distinct(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ false ┆ false │
+│ true ┆ true │
+│ true ┆ true │
+│ true ┆ true │
+└───────┴───────┘
+
>>> agnostic_is_last_distinct(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[false,true,true,true]]
+b: [[false,true,true,true]]
+
is_nan()
+
+Indicate which values are NaN.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
pandas handles null values differently from Polars and PyArrow. +See null_handling +for reference.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"orig": [0.0, None, 2.0]}
+>>> df_pd = pd.DataFrame(data).astype({"orig": "Float64"})
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_self_div_is_nan(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... divided=nw.col("orig") / nw.col("orig"),
+... divided_is_nan=(nw.col("orig") / nw.col("orig")).is_nan(),
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or
+PyArrow to agnostic_self_div_is_nan
:
>>> print(agnostic_self_div_is_nan(df_pd))
+ orig divided divided_is_nan
+0 0.0 NaN True
+1 <NA> <NA> <NA>
+2 2.0 1.0 False
+
>>> print(agnostic_self_div_is_nan(df_pl))
+shape: (3, 3)
+┌──────┬─────────┬────────────────┐
+│ orig ┆ divided ┆ divided_is_nan │
+│ --- ┆ --- ┆ --- │
+│ f64 ┆ f64 ┆ bool │
+╞══════╪═════════╪════════════════╡
+│ 0.0 ┆ NaN ┆ true │
+│ null ┆ null ┆ null │
+│ 2.0 ┆ 1.0 ┆ false │
+└──────┴─────────┴────────────────┘
+
>>> print(agnostic_self_div_is_nan(df_pa))
+pyarrow.Table
+orig: double
+divided: double
+divided_is_nan: bool
+----
+orig: [[0,null,2]]
+divided: [[nan,null,1]]
+divided_is_nan: [[true,null,false]]
+
is_null()
+
+Returns a boolean Series indicating which values are null.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
pandas handles null values differently from Polars and PyArrow. +See null_handling +for reference.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> df_pd = pd.DataFrame(
+... {
+... "a": [2, 4, None, 3, 5],
+... "b": [2.0, 4.0, float("nan"), 3.0, 5.0],
+... }
+... )
+>>> data = {
+... "a": [2, 4, None, 3, 5],
+... "b": [2.0, 4.0, None, 3.0, 5.0],
+... }
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_null(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
+... ).to_native()
+
We can then pass any supported library such as Pandas, Polars, or
+PyArrow to agnostic_is_null
:
>>> agnostic_is_null(df_pd)
+ a b a_is_null b_is_null
+0 2.0 2.0 False False
+1 4.0 4.0 False False
+2 NaN NaN True True
+3 3.0 3.0 False False
+4 5.0 5.0 False False
+
>>> agnostic_is_null(df_pl)
+shape: (5, 4)
+┌──────┬──────┬───────────┬───────────┐
+│ a ┆ b ┆ a_is_null ┆ b_is_null │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ bool │
+╞══════╪══════╪═══════════╪═══════════╡
+│ 2 ┆ 2.0 ┆ false ┆ false │
+│ 4 ┆ 4.0 ┆ false ┆ false │
+│ null ┆ null ┆ true ┆ true │
+│ 3 ┆ 3.0 ┆ false ┆ false │
+│ 5 ┆ 5.0 ┆ false ┆ false │
+└──────┴──────┴───────────┴───────────┘
+
>>> agnostic_is_null(df_pa)
+pyarrow.Table
+a: int64
+b: double
+a_is_null: bool
+b_is_null: bool
+----
+a: [[2,4,null,3,5]]
+b: [[2,4,null,3,5]]
+a_is_null: [[false,false,true,false,false]]
+b_is_null: [[false,false,true,false,false]]
+
is_unique()
+
+Return a boolean mask indicating unique values.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_unique(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.all().is_unique()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_unique
:
>>> agnostic_is_unique(df_pd)
+ a b
+0 False False
+1 True False
+2 True True
+3 False True
+
>>> agnostic_is_unique(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ false ┆ false │
+│ true ┆ false │
+│ true ┆ true │
+│ false ┆ true │
+└───────┴───────┘
+
>>> agnostic_is_unique(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[false,true,true,false]]
+b: [[false,false,true,true]]
+
len()
+
+Return the number of elements in the column.
+Null values count towards the total.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that computes the len over +different values of "b" column:
+>>> def agnostic_len(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
+... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_len
:
>>> agnostic_len(df_pd)
+ a1 a2
+0 2 1
+
>>> agnostic_len(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a1 ┆ a2 │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 2 ┆ 1 │
+└─────┴─────┘
+
>>> agnostic_len(df_pa)
+pyarrow.Table
+a1: int64
+a2: int64
+----
+a1: [[2]]
+a2: [[1]]
+
map_batches(function, return_dtype=None)
+
+Apply a custom python function to a whole Series or sequence of Series.
+The output of this custom function is presumed to be either a Series, +or a NumPy array (in which case it will be automatically converted into +a Series).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ function
+ |
+
+ Callable[[Any], Self]
+ |
+
+
+
+ Function to apply to Series. + |
+ + required + | +
+ return_dtype
+ |
+
+ DType | None
+ |
+
+
+
+ Dtype of the output Series. +If not set, the dtype will be inferred based on the first non-null value +that is returned by the function. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_map_batches(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a", "b").map_batches(
+... lambda s: s.to_numpy() + 1, return_dtype=nw.Float64
+... )
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_map_batches
:
>>> agnostic_map_batches(df_pd)
+ a b
+0 2.0 5.0
+1 3.0 6.0
+2 4.0 7.0
+>>> agnostic_map_batches(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 2.0 ┆ 5.0 │
+│ 3.0 ┆ 6.0 │
+│ 4.0 ┆ 7.0 │
+└─────┴─────┘
+>>> agnostic_map_batches(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[2,3,4]]
+b: [[5,6,7]]
+
max()
+
+Returns the maximum value(s) from a column(s).
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [10, 20], "b": [50, 100]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_max(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.max("a", "b")).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_max
:
>>> agnostic_max(df_pd)
+ a b
+0 20 100
+
>>> agnostic_max(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 20 ┆ 100 │
+└─────┴─────┘
+
>>> agnostic_max(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[20]]
+b: [[100]]
+
mean()
+
+Get mean value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [-1, 0, 1], "b": [2, 4, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_mean(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").mean()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_mean
:
>>> agnostic_mean(df_pd)
+ a b
+0 0.0 4.0
+
>>> agnostic_mean(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 0.0 ┆ 4.0 │
+└─────┴─────┘
+
>>> agnostic_mean(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[0]]
+b: [[4]]
+
median()
+
+Get median value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 8, 3], "b": [4, 5, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_median(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").median()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_median
:
>>> agnostic_median(df_pd)
+ a b
+0 3.0 4.0
+
>>> agnostic_median(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 3.0 ┆ 4.0 │
+└─────┴─────┘
+
>>> agnostic_median(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[3]]
+b: [[4]]
+
min()
+
+Returns the minimum value(s) from a column(s).
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2], "b": [4, 3]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_min(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.min("a", "b")).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_min
:
>>> agnostic_min(df_pd)
+ a b
+0 1 3
+
>>> agnostic_min(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+└─────┴─────┘
+
>>> agnostic_min(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1]]
+b: [[3]]
+
mode()
+
+Compute the most occurring value(s).
+Can return multiple values.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [1, 1, 2, 3],
+... "b": [1, 1, 2, 2],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_mode(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").mode()).sort("a").to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_mode
:
>>> agnostic_mode(df_pd)
+ a
+0 1
+
>>> agnostic_mode(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+└─────┘
+
>>> agnostic_mode(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[1]]
+
null_count()
+
+Count null values.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
pandas handles null values differently from Polars and PyArrow. +See null_handling +for reference.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_null_count(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.all().null_count()).to_native()
+
We can then pass any supported library such as Pandas, Polars, or
+PyArrow to agnostic_null_count
:
>>> agnostic_null_count(df_pd)
+ a b
+0 1 2
+
>>> agnostic_null_count(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 1 ┆ 2 │
+└─────┴─────┘
+
>>> agnostic_null_count(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1]]
+b: [[2]]
+
n_unique()
+
+Returns count of unique values.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_n_unique(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").n_unique()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_n_unique
:
>>> agnostic_n_unique(df_pd)
+ a b
+0 5 3
+>>> agnostic_n_unique(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 5 ┆ 3 │
+└─────┴─────┘
+>>> agnostic_n_unique(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[5]]
+b: [[3]]
+
over(*keys)
+
+Compute expressions over the given groups.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ keys
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of columns to compute window expression over.
+ Must be names of columns, as opposed to expressions -
+ so, this is a bit less flexible than Polars' |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3], "b": [1, 1, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_min_over_b(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_min_per_group=nw.col("a").min().over("b")
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_min_over_b
:
>>> agnostic_min_over_b(df_pd)
+ a b a_min_per_group
+0 1 1 1
+1 2 1 1
+2 3 2 3
+
>>> agnostic_min_over_b(df_pl)
+shape: (3, 3)
+┌─────┬─────┬─────────────────┐
+│ a ┆ b ┆ a_min_per_group │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞═════╪═════╪═════════════════╡
+│ 1 ┆ 1 ┆ 1 │
+│ 2 ┆ 1 ┆ 1 │
+│ 3 ┆ 2 ┆ 3 │
+└─────┴─────┴─────────────────┘
+
>>> agnostic_min_over_b(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+a_min_per_group: int64
+----
+a: [[1,2,3]]
+b: [[1,1,2]]
+a_min_per_group: [[1,1,3]]
+
Cumulative operations are also supported, but (currently) only for +pandas and Polars:
+>>> def agnostic_cum_sum(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(c=nw.col("a").cum_sum().over("b")).to_native()
+
>>> agnostic_cum_sum(df_pd)
+ a b c
+0 1 1 1
+1 2 1 3
+2 3 2 3
+
>>> agnostic_cum_sum(df_pl)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ 1 ┆ 1 ┆ 1 │
+│ 2 ┆ 1 ┆ 3 │
+│ 3 ┆ 2 ┆ 3 │
+└─────┴─────┴─────┘
+
pipe(function, *args, **kwargs)
+
+Pipe function call.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ function
+ |
+
+ Callable[[Any], Self]
+ |
+
+
+
+ Function to apply. + |
+ + required + | +
+ args
+ |
+
+ Any
+ |
+
+
+
+ Positional arguments to pass to function. + |
+
+ ()
+ |
+
+ kwargs
+ |
+
+ Any
+ |
+
+
+
+ Keyword arguments to pass to function. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Lets define a library-agnostic function:
+>>> def agnostic_pipe(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").pipe(lambda x: x + 1)).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_pipe
:
>>> agnostic_pipe(df_pd)
+ a
+0 2
+1 3
+2 4
+3 5
+
>>> agnostic_pipe(df_pl)
+shape: (4, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+│ 3 │
+│ 4 │
+│ 5 │
+└─────┘
+
>>> agnostic_pipe(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[2,3,4,5]]
+
quantile(quantile, interpolation)
+
+Get quantile value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ quantile
+ |
+
+ float
+ |
+
+
+
+ Quantile between 0.0 and 1.0. + |
+ + required + | +
+ interpolation
+ |
+
+ Literal['nearest', 'higher', 'lower', 'midpoint', 'linear']
+ |
+
+
+
+ Interpolation method. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": list(range(50)), "b": list(range(50, 100))}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_quantile(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a", "b").quantile(0.5, interpolation="linear")
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_quantile
:
>>> agnostic_quantile(df_pd)
+ a b
+0 24.5 74.5
+
>>> agnostic_quantile(df_pl)
+shape: (1, 2)
+┌──────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════╪══════╡
+│ 24.5 ┆ 74.5 │
+└──────┴──────┘
+
>>> agnostic_quantile(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[24.5]]
+b: [[74.5]]
+
rank(method='average', *, descending=False)
+
+Assign ranks to data, dealing with ties appropriately.
+ + +The resulting dtype may differ between backends.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ method
+ |
+
+ Literal['average', 'min', 'max', 'dense', 'ordinal']
+ |
+
+
+
+ The method used to assign ranks to tied elements. +The following methods are available (default is 'average'): +
|
+
+ 'average'
+ |
+
+ descending
+ |
+
+ bool
+ |
+
+
+
+ Rank in descending order. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression with rank data. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [3, 6, 1, 1, 6]}
+
We define a dataframe-agnostic function that computes the dense rank for +the data:
+>>> def agnostic_dense_rank(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... result = df.with_columns(rnk=nw.col("a").rank(method="dense"))
+... return result.to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dense_rank
:
>>> agnostic_dense_rank(pd.DataFrame(data))
+ a rnk
+0 3 2.0
+1 6 3.0
+2 1 1.0
+3 1 1.0
+4 6 3.0
+
>>> agnostic_dense_rank(pl.DataFrame(data))
+shape: (5, 2)
+┌─────┬─────┐
+│ a ┆ rnk │
+│ --- ┆ --- │
+│ i64 ┆ u32 │
+╞═════╪═════╡
+│ 3 ┆ 2 │
+│ 6 ┆ 3 │
+│ 1 ┆ 1 │
+│ 1 ┆ 1 │
+│ 6 ┆ 3 │
+└─────┴─────┘
+
>>> agnostic_dense_rank(pa.table(data))
+pyarrow.Table
+a: int64
+rnk: uint64
+----
+a: [[3,6,1,1,6]]
+rnk: [[2,3,1,1,3]]
+
replace_strict(old, new=None, *, return_dtype=None)
+
+Replace all values by different values.
+This function must replace all non-null input values (else it raises an error).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ old
+ |
+
+ Sequence[Any] | Mapping[Any, Any]
+ |
+
+
+
+ Sequence of values to replace. It also accepts a mapping of values to
+their replacement as syntactic sugar for
+ |
+ + required + | +
+ new
+ |
+
+ Sequence[Any] | None
+ |
+
+
+
+ Sequence of values to replace by. Length must match the length of |
+
+ None
+ |
+
+ return_dtype
+ |
+
+ DType | type[DType] | None
+ |
+
+
+
+ The data type of the resulting expression. If set to |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [3, 0, 1, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define dataframe-agnostic functions:
+>>> def agnostic_replace_strict(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... b=nw.col("a").replace_strict(
+... [0, 1, 2, 3],
+... ["zero", "one", "two", "three"],
+... return_dtype=nw.String,
+... )
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_replace_strict
:
>>> agnostic_replace_strict(df_pd)
+ a b
+0 3 three
+1 0 zero
+2 1 one
+3 2 two
+
>>> agnostic_replace_strict(df_pl)
+shape: (4, 2)
+┌─────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ str │
+╞═════╪═══════╡
+│ 3 ┆ three │
+│ 0 ┆ zero │
+│ 1 ┆ one │
+│ 2 ┆ two │
+└─────┴───────┘
+
>>> agnostic_replace_strict(df_pa)
+pyarrow.Table
+a: int64
+b: string
+----
+a: [[3,0,1,2]]
+b: [["three","zero","one","two"]]
+
rolling_mean(window_size, *, min_periods=None, center=False)
+
+Apply a rolling mean (moving mean) over the values.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+A window of length window_size
will traverse the values. The resulting values
+will be aggregated to their mean.
The window at a given row will include the row itself and the window_size - 1
+elements before it.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ window_size
+ |
+
+ int
+ |
+
+
+
+ The length of the window in number of elements. It must be a +strictly positive integer. + |
+ + required + | +
+ min_periods
+ |
+
+ int | None
+ |
+
+
+
+ The number of values in the window that should be non-null before
+computing a result. If set to |
+
+ None
+ |
+
+ center
+ |
+
+ bool
+ |
+
+
+
+ Set the labels at the center of the window. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1.0, 2.0, None, 4.0]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_rolling_mean(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... b=nw.col("a").rolling_mean(window_size=3, min_periods=1)
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_rolling_mean
:
>>> agnostic_rolling_mean(df_pd)
+ a b
+0 1.0 1.0
+1 2.0 1.5
+2 NaN 1.5
+3 4.0 3.0
+
>>> agnostic_rolling_mean(df_pl)
+shape: (4, 2)
+┌──────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════╪═════╡
+│ 1.0 ┆ 1.0 │
+│ 2.0 ┆ 1.5 │
+│ null ┆ 1.5 │
+│ 4.0 ┆ 3.0 │
+└──────┴─────┘
+
>>> agnostic_rolling_mean(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[1,2,null,4]]
+b: [[1,1.5,1.5,3]]
+
rolling_std(window_size, *, min_periods=None, center=False, ddof=1)
+
+Apply a rolling standard deviation (moving standard deviation) over the values.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+A window of length window_size
will traverse the values. The resulting values
+will be aggregated to their standard deviation.
The window at a given row will include the row itself and the window_size - 1
+elements before it.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ window_size
+ |
+
+ int
+ |
+
+
+
+ The length of the window in number of elements. It must be a +strictly positive integer. + |
+ + required + | +
+ min_periods
+ |
+
+ int | None
+ |
+
+
+
+ The number of values in the window that should be non-null before
+computing a result. If set to |
+
+ None
+ |
+
+ center
+ |
+
+ bool
+ |
+
+
+
+ Set the labels at the center of the window. + |
+
+ False
+ |
+
+ ddof
+ |
+
+ int
+ |
+
+
+
+ Delta Degrees of Freedom; the divisor for a length N window is N - ddof. + |
+
+ 1
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1.0, 2.0, None, 4.0]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_rolling_std(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... b=nw.col("a").rolling_std(window_size=3, min_periods=1)
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_rolling_std
:
>>> agnostic_rolling_std(df_pd)
+ a b
+0 1.0 NaN
+1 2.0 0.707107
+2 NaN 0.707107
+3 4.0 1.414214
+
>>> agnostic_rolling_std(df_pl)
+shape: (4, 2)
+┌──────┬──────────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════╪══════════╡
+│ 1.0 ┆ null │
+│ 2.0 ┆ 0.707107 │
+│ null ┆ 0.707107 │
+│ 4.0 ┆ 1.414214 │
+└──────┴──────────┘
+
>>> agnostic_rolling_std(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[1,2,null,4]]
+b: [[nan,0.7071067811865476,0.7071067811865476,1.4142135623730951]]
+
rolling_sum(window_size, *, min_periods=None, center=False)
+
+Apply a rolling sum (moving sum) over the values.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+A window of length window_size
will traverse the values. The resulting values
+will be aggregated to their sum.
The window at a given row will include the row itself and the window_size - 1
+elements before it.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ window_size
+ |
+
+ int
+ |
+
+
+
+ The length of the window in number of elements. It must be a +strictly positive integer. + |
+ + required + | +
+ min_periods
+ |
+
+ int | None
+ |
+
+
+
+ The number of values in the window that should be non-null before
+computing a result. If set to |
+
+ None
+ |
+
+ center
+ |
+
+ bool
+ |
+
+
+
+ Set the labels at the center of the window. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1.0, 2.0, None, 4.0]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_rolling_sum(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... b=nw.col("a").rolling_sum(window_size=3, min_periods=1)
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_rolling_sum
:
>>> agnostic_rolling_sum(df_pd)
+ a b
+0 1.0 1.0
+1 2.0 3.0
+2 NaN 3.0
+3 4.0 6.0
+
>>> agnostic_rolling_sum(df_pl)
+shape: (4, 2)
+┌──────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════╪═════╡
+│ 1.0 ┆ 1.0 │
+│ 2.0 ┆ 3.0 │
+│ null ┆ 3.0 │
+│ 4.0 ┆ 6.0 │
+└──────┴─────┘
+
>>> agnostic_rolling_sum(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[1,2,null,4]]
+b: [[1,3,3,6]]
+
rolling_var(window_size, *, min_periods=None, center=False, ddof=1)
+
+Apply a rolling variance (moving variance) over the values.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+A window of length window_size
will traverse the values. The resulting values
+will be aggregated to their variance.
The window at a given row will include the row itself and the window_size - 1
+elements before it.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ window_size
+ |
+
+ int
+ |
+
+
+
+ The length of the window in number of elements. It must be a +strictly positive integer. + |
+ + required + | +
+ min_periods
+ |
+
+ int | None
+ |
+
+
+
+ The number of values in the window that should be non-null before
+computing a result. If set to |
+
+ None
+ |
+
+ center
+ |
+
+ bool
+ |
+
+
+
+ Set the labels at the center of the window. + |
+
+ False
+ |
+
+ ddof
+ |
+
+ int
+ |
+
+
+
+ Delta Degrees of Freedom; the divisor for a length N window is N - ddof. + |
+
+ 1
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1.0, 2.0, None, 4.0]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_rolling_var(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... b=nw.col("a").rolling_var(window_size=3, min_periods=1)
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_rolling_var
:
>>> agnostic_rolling_var(df_pd)
+ a b
+0 1.0 NaN
+1 2.0 0.5
+2 NaN 0.5
+3 4.0 2.0
+
>>> agnostic_rolling_var(df_pl)
+shape: (4, 2)
+┌──────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════╪══════╡
+│ 1.0 ┆ null │
+│ 2.0 ┆ 0.5 │
+│ null ┆ 0.5 │
+│ 4.0 ┆ 2.0 │
+└──────┴──────┘
+
>>> agnostic_rolling_var(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[1,2,null,4]]
+b: [[nan,0.5,0.5,2]]
+
round(decimals=0)
+
+Round underlying floating point data by decimals
digits.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ decimals
+ |
+
+ int
+ |
+
+
+
+ Number of decimals to round by. + |
+
+ 0
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.
+pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and +4.5 to 4.0, etc..).
+Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1.12345, 2.56789, 3.901234]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that rounds to the first decimal:
+>>> def agnostic_round(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").round(1)).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_round
:
>>> agnostic_round(df_pd)
+ a
+0 1.1
+1 2.6
+2 3.9
+
>>> agnostic_round(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 1.1 │
+│ 2.6 │
+│ 3.9 │
+└─────┘
+
>>> agnostic_round(df_pa)
+pyarrow.Table
+a: double
+----
+a: [[1.1,2.6,3.9]]
+
sample(n=None, *, fraction=None, with_replacement=False, seed=None)
+
+Sample randomly from this expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int | None
+ |
+
+
+
+ Number of items to return. Cannot be used with fraction. + |
+
+ None
+ |
+
+ fraction
+ |
+
+ float | None
+ |
+
+
+
+ Fraction of items to return. Cannot be used with n. + |
+
+ None
+ |
+
+ with_replacement
+ |
+
+ bool
+ |
+
+
+
+ Allow values to be sampled more than once. + |
+
+ False
+ |
+
+ seed
+ |
+
+ int | None
+ |
+
+
+
+ Seed for the random number generator. If set to None (default), a random +seed is generated for each sample operation. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_sample(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").sample(fraction=1.0, with_replacement=True)
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_sample
:
>>> agnostic_sample(df_pd)
+ a
+2 3
+0 1
+2 3
+
>>> agnostic_sample(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 2 │
+│ 3 │
+│ 3 │
+└─────┘
+
>>> agnostic_sample(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[1,3,3]]
+
shift(n)
+
+Shift values by n
positions.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of positions to shift values by. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to shift
+and fill missing values with 0
in a Int64 column, you could
+do:
nw.col("a").shift(1).fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 1, 3, 5, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_shift(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(a_shift=nw.col("a").shift(n=1)).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_shift
:
>>> agnostic_shift(df_pd)
+ a_shift
+0 NaN
+1 1.0
+2 1.0
+3 3.0
+4 5.0
+
>>> agnostic_shift(df_pl)
+shape: (5, 1)
+┌─────────┐
+│ a_shift │
+│ --- │
+│ i64 │
+╞═════════╡
+│ null │
+│ 1 │
+│ 1 │
+│ 3 │
+│ 5 │
+└─────────┘
+
>>> agnostic_shift(df_pa)
+pyarrow.Table
+a_shift: int64
+----
+a_shift: [[null,1,1,3,5]]
+
sort(*, descending=False, nulls_last=False)
+
+Sort this column. Place null values first.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ descending
+ |
+
+ bool
+ |
+
+
+
+ Sort in descending order. + |
+
+ False
+ |
+
+ nulls_last
+ |
+
+ bool
+ |
+
+
+
+ Place null values last instead of first. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [5, None, 1, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define dataframe-agnostic functions:
+>>> def agnostic_sort(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").sort()).to_native()
+
>>> def agnostic_sort_descending(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").sort(descending=True)).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_sort
and agnostic_sort_descending
:
>>> agnostic_sort(df_pd)
+ a
+1 NaN
+2 1.0
+3 2.0
+0 5.0
+
>>> agnostic_sort(df_pl)
+shape: (4, 1)
+┌──────┐
+│ a │
+│ --- │
+│ i64 │
+╞══════╡
+│ null │
+│ 1 │
+│ 2 │
+│ 5 │
+└──────┘
+
>>> agnostic_sort(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[null,1,2,5]]
+
>>> agnostic_sort_descending(df_pd)
+ a
+1 NaN
+0 5.0
+3 2.0
+2 1.0
+
>>> agnostic_sort_descending(df_pl)
+shape: (4, 1)
+┌──────┐
+│ a │
+│ --- │
+│ i64 │
+╞══════╡
+│ null │
+│ 5 │
+│ 2 │
+│ 1 │
+└──────┘
+
>>> agnostic_sort_descending(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[null,5,2,1]]
+
skew()
+
+Calculate the sample skewness of a column.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ An expression representing the sample skewness of the column. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_skew(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").skew()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_skew
:
>>> agnostic_skew(df_pd)
+ a b
+0 0.0 1.472427
+
>>> agnostic_skew(df_pl)
+shape: (1, 2)
+┌─────┬──────────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪══════════╡
+│ 0.0 ┆ 1.472427 │
+└─────┴──────────┘
+
>>> agnostic_skew(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[0]]
+b: [[1.4724267269058975]]
+
std(*, ddof=1)
+
+Get standard deviation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ ddof
+ |
+
+ int
+ |
+
+
+
+ "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, +where N represents the number of elements. By default ddof is 1. + |
+
+ 1
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [20, 25, 60], "b": [1.5, 1, -1.4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_std(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").std(ddof=0)).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_std
:
>>> agnostic_std(df_pd)
+ a b
+0 17.79513 1.265789
+>>> agnostic_std(df_pl)
+shape: (1, 2)
+┌──────────┬──────────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════════╪══════════╡
+│ 17.79513 ┆ 1.265789 │
+└──────────┴──────────┘
+>>> agnostic_std(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[17.795130420052185]]
+b: [[1.2657891697365016]]
+
sum()
+
+Return the sum value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [5, 10], "b": [50, 100]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_sum(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").sum()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_sum
:
>>> agnostic_sum(df_pd)
+ a b
+0 15 150
+>>> agnostic_sum(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 15 ┆ 150 │
+└─────┴─────┘
+>>> agnostic_sum(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[15]]
+b: [[150]]
+
tail(n=10)
+
+Get the last n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 10
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": list(range(10))}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that returns the last 3 rows:
+>>> def agnostic_tail(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").tail(3)).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_tail
:
>>> agnostic_tail(df_pd)
+ a
+7 7
+8 8
+9 9
+
>>> agnostic_tail(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 7 │
+│ 8 │
+│ 9 │
+└─────┘
+
>>> agnostic_tail(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[7,8,9]]
+
unique(*, maintain_order=False)
+
+Return unique values of this expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ maintain_order
+ |
+
+ bool
+ |
+
+
+
+ Keep the same order as the original expression. This may be more
+expensive to compute. Settings this to |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_unique(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").unique(maintain_order=True)).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_unique
:
>>> agnostic_unique(df_pd)
+ a b
+0 1 2
+1 3 4
+2 5 6
+
>>> agnostic_unique(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 2 │
+│ 3 ┆ 4 │
+│ 5 ┆ 6 │
+└─────┴─────┘
+
>>> agnostic_unique(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,3,5]]
+b: [[2,4,6]]
+
var(*, ddof=1)
+
+Get variance.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ ddof
+ |
+
+ int
+ |
+
+
+
+ "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, + where N represents the number of elements. By default ddof is 1. + |
+
+ 1
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [20, 25, 60], "b": [1.5, 1, -1.4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_var(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a", "b").var(ddof=0)).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_var
:
>>> agnostic_var(df_pd)
+ a b
+0 316.666667 1.602222
+
>>> agnostic_var(df_pl)
+shape: (1, 2)
+┌────────────┬──────────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞════════════╪══════════╡
+│ 316.666667 ┆ 1.602222 │
+└────────────┴──────────┘
+
>>> agnostic_var(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[316.6666666666667]]
+b: [[1.6022222222222222]]
+
narwhals.Expr.cat
get_categories()
+
+Get unique categories from column.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+Let's create some dataframes:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"fruits": ["apple", "mango", "mango"]}
+>>> df_pd = pd.DataFrame(data, dtype="category")
+>>> df_pl = pl.DataFrame(data, schema={"fruits": pl.Categorical})
+
We define a dataframe-agnostic function to get unique categories +from column 'fruits':
+>>> def agnostic_cat_get_categories(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("fruits").cat.get_categories()).to_native()
+
We can then pass any supported library such as pandas or Polars to
+ agnostic_cat_get_categories
:
>>> agnostic_cat_get_categories(df_pd)
+ fruits
+0 apple
+1 mango
+
+>>> agnostic_cat_get_categories(df_pl)
+shape: (2, 1)
+┌────────┐
+│ fruits │
+│ --- │
+│ str │
+╞════════╡
+│ apple │
+│ mango │
+└────────┘
+
+
+ narwhals.Expr.dt
convert_time_zone(time_zone)
+
+Convert to a new time zone.
+If converting from a time-zone-naive column, then conversion happens +as if converting from UTC.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_zone
+ |
+
+ str
+ |
+
+
+
+ Target time zone. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import datetime, timezone
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [
+... datetime(2024, 1, 1, tzinfo=timezone.utc),
+... datetime(2024, 1, 2, tzinfo=timezone.utc),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_dt_convert_time_zone(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").dt.convert_time_zone("Asia/Kathmandu")
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_convert_time_zone
:
>>> agnostic_dt_convert_time_zone(df_pd)
+ a
+0 2024-01-01 05:45:00+05:45
+1 2024-01-02 05:45:00+05:45
+
>>> agnostic_dt_convert_time_zone(df_pl)
+shape: (2, 1)
+┌──────────────────────────────┐
+│ a │
+│ --- │
+│ datetime[μs, Asia/Kathmandu] │
+╞══════════════════════════════╡
+│ 2024-01-01 05:45:00 +0545 │
+│ 2024-01-02 05:45:00 +0545 │
+└──────────────────────────────┘
+
>>> agnostic_dt_convert_time_zone(df_pa)
+pyarrow.Table
+a: timestamp[us, tz=Asia/Kathmandu]
+----
+a: [[2024-01-01 00:00:00.000000Z,2024-01-02 00:00:00.000000Z]]
+
date()
+
+Extract the date from underlying DateTime representation.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Raises:
+Type | +Description | +
---|---|
+ NotImplementedError
+ |
+
+
+
+ If pandas default backend is being used. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)]}
+>>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow")
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_dt_date(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a").dt.date()).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_date
:
>>> agnostic_dt_date(df_pd)
+ a
+0 2012-01-07
+1 2023-03-10
+
>>> agnostic_dt_date(df_pl)
+shape: (2, 1)
+┌────────────┐
+│ a │
+│ --- │
+│ date │
+╞════════════╡
+│ 2012-01-07 │
+│ 2023-03-10 │
+└────────────┘
+
>>> agnostic_dt_date(df_pa)
+pyarrow.Table
+a: date32[day]
+----
+a: [[2012-01-07,2023-03-10]]
+
day()
+
+Extract day from underlying DateTime representation.
+Returns the day of month starting from 1. The return value ranges from 1 to 31. (The last day of month differs by months.)
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "datetime": [
+... datetime(1978, 6, 1),
+... datetime(2024, 12, 13),
+... datetime(2065, 1, 1),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_day(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.day().alias("day"),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_day
:
>>> agnostic_dt_day(df_pd)
+ datetime day
+0 1978-06-01 1
+1 2024-12-13 13
+2 2065-01-01 1
+
>>> agnostic_dt_day(df_pl)
+shape: (3, 2)
+┌─────────────────────┬─────┐
+│ datetime ┆ day │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i8 │
+╞═════════════════════╪═════╡
+│ 1978-06-01 00:00:00 ┆ 1 │
+│ 2024-12-13 00:00:00 ┆ 13 │
+│ 2065-01-01 00:00:00 ┆ 1 │
+└─────────────────────┴─────┘
+
>>> agnostic_dt_day(df_pa)
+pyarrow.Table
+datetime: timestamp[us]
+day: int64
+----
+datetime: [[1978-06-01 00:00:00.000000,2024-12-13 00:00:00.000000,2065-01-01 00:00:00.000000]]
+day: [[1,13,1]]
+
hour()
+
+Extract hour from underlying DateTime representation.
+Returns the hour number from 0 to 23.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1),
+... datetime(2024, 10, 13, 5),
+... datetime(2065, 1, 1, 10),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_hour(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour")
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_hour
:
>>> agnostic_dt_hour(df_pd)
+ datetime hour
+0 1978-01-01 01:00:00 1
+1 2024-10-13 05:00:00 5
+2 2065-01-01 10:00:00 10
+
>>> agnostic_dt_hour(df_pl)
+shape: (3, 2)
+┌─────────────────────┬──────┐
+│ datetime ┆ hour │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i8 │
+╞═════════════════════╪══════╡
+│ 1978-01-01 01:00:00 ┆ 1 │
+│ 2024-10-13 05:00:00 ┆ 5 │
+│ 2065-01-01 10:00:00 ┆ 10 │
+└─────────────────────┴──────┘
+
>>> agnostic_dt_hour(df_pa)
+pyarrow.Table
+datetime: timestamp[us]
+hour: int64
+----
+datetime: [[1978-01-01 01:00:00.000000,2024-10-13 05:00:00.000000,2065-01-01 10:00:00.000000]]
+hour: [[1,5,10]]
+
microsecond()
+
+Extract microseconds from underlying DateTime representation.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1, 0),
+... datetime(2024, 10, 13, 5, 30, 14, 505000),
+... datetime(2065, 1, 1, 10, 20, 30, 67000),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_microsecond(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.microsecond().alias("microsecond"),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_microsecond
:
>>> agnostic_dt_microsecond(df_pd)
+ datetime microsecond
+0 1978-01-01 01:01:01.000 0
+1 2024-10-13 05:30:14.505 505000
+2 2065-01-01 10:20:30.067 67000
+
>>> agnostic_dt_microsecond(df_pl)
+shape: (3, 2)
+┌─────────────────────────┬─────────────┐
+│ datetime ┆ microsecond │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i32 │
+╞═════════════════════════╪═════════════╡
+│ 1978-01-01 01:01:01 ┆ 0 │
+│ 2024-10-13 05:30:14.505 ┆ 505000 │
+│ 2065-01-01 10:20:30.067 ┆ 67000 │
+└─────────────────────────┴─────────────┘
+
>>> agnostic_dt_microsecond(df_pa)
+pyarrow.Table
+datetime: timestamp[us]
+microsecond: int64
+----
+datetime: [[1978-01-01 01:01:01.000000,2024-10-13 05:30:14.505000,2065-01-01 10:20:30.067000]]
+microsecond: [[0,505000,67000]]
+
millisecond()
+
+Extract milliseconds from underlying DateTime representation.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1, 0),
+... datetime(2024, 10, 13, 5, 30, 14, 505000),
+... datetime(2065, 1, 1, 10, 20, 30, 67000),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_millisecond(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.millisecond().alias("millisecond"),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_millisecond
:
>>> agnostic_dt_millisecond(df_pd)
+ datetime millisecond
+0 1978-01-01 01:01:01.000 0
+1 2024-10-13 05:30:14.505 505
+2 2065-01-01 10:20:30.067 67
+
>>> agnostic_dt_millisecond(df_pl)
+shape: (3, 2)
+┌─────────────────────────┬─────────────┐
+│ datetime ┆ millisecond │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i32 │
+╞═════════════════════════╪═════════════╡
+│ 1978-01-01 01:01:01 ┆ 0 │
+│ 2024-10-13 05:30:14.505 ┆ 505 │
+│ 2065-01-01 10:20:30.067 ┆ 67 │
+└─────────────────────────┴─────────────┘
+
>>> agnostic_dt_millisecond(df_pa)
+pyarrow.Table
+datetime: timestamp[us]
+millisecond: int64
+----
+datetime: [[1978-01-01 01:01:01.000000,2024-10-13 05:30:14.505000,2065-01-01 10:20:30.067000]]
+millisecond: [[0,505,67]]
+
minute()
+
+Extract minutes from underlying DateTime representation.
+Returns the minute number from 0 to 59.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1),
+... datetime(2024, 10, 13, 5, 30),
+... datetime(2065, 1, 1, 10, 20),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_minute(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.minute().alias("minute"),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_minute
:
>>> agnostic_dt_minute(df_pd)
+ datetime minute
+0 1978-01-01 01:01:00 1
+1 2024-10-13 05:30:00 30
+2 2065-01-01 10:20:00 20
+
>>> agnostic_dt_minute(df_pl)
+shape: (3, 2)
+┌─────────────────────┬────────┐
+│ datetime ┆ minute │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i8 │
+╞═════════════════════╪════════╡
+│ 1978-01-01 01:01:00 ┆ 1 │
+│ 2024-10-13 05:30:00 ┆ 30 │
+│ 2065-01-01 10:20:00 ┆ 20 │
+└─────────────────────┴────────┘
+
>>> agnostic_dt_minute(df_pa)
+pyarrow.Table
+datetime: timestamp[us]
+minute: int64
+----
+datetime: [[1978-01-01 01:01:00.000000,2024-10-13 05:30:00.000000,2065-01-01 10:20:00.000000]]
+minute: [[1,30,20]]
+
month()
+
+Extract month from underlying DateTime representation.
+Returns the month number starting from 1. The return value ranges from 1 to 12.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "datetime": [
+... datetime(1978, 6, 1),
+... datetime(2024, 12, 13),
+... datetime(2065, 1, 1),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_month(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.month().alias("month"),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_month
:
>>> agnostic_dt_month(df_pd)
+ datetime month
+0 1978-06-01 6
+1 2024-12-13 12
+2 2065-01-01 1
+
>>> agnostic_dt_month(df_pl)
+shape: (3, 2)
+┌─────────────────────┬───────┐
+│ datetime ┆ month │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i8 │
+╞═════════════════════╪═══════╡
+│ 1978-06-01 00:00:00 ┆ 6 │
+│ 2024-12-13 00:00:00 ┆ 12 │
+│ 2065-01-01 00:00:00 ┆ 1 │
+└─────────────────────┴───────┘
+
>>> agnostic_dt_month(df_pa)
+pyarrow.Table
+datetime: timestamp[us]
+month: int64
+----
+datetime: [[1978-06-01 00:00:00.000000,2024-12-13 00:00:00.000000,2065-01-01 00:00:00.000000]]
+month: [[6,12,1]]
+
nanosecond()
+
+Extract Nanoseconds from underlying DateTime representation.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1, 0),
+... datetime(2024, 10, 13, 5, 30, 14, 500000),
+... datetime(2065, 1, 1, 10, 20, 30, 60000),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_nanosecond(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.nanosecond().alias("nanosecond"),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_nanosecond
:
>>> agnostic_dt_nanosecond(df_pd)
+ datetime nanosecond
+0 1978-01-01 01:01:01.000 0
+1 2024-10-13 05:30:14.500 500000000
+2 2065-01-01 10:20:30.060 60000000
+
>>> agnostic_dt_nanosecond(df_pl)
+shape: (3, 2)
+┌─────────────────────────┬────────────┐
+│ datetime ┆ nanosecond │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i32 │
+╞═════════════════════════╪════════════╡
+│ 1978-01-01 01:01:01 ┆ 0 │
+│ 2024-10-13 05:30:14.500 ┆ 500000000 │
+│ 2065-01-01 10:20:30.060 ┆ 60000000 │
+└─────────────────────────┴────────────┘
+
>>> agnostic_dt_nanosecond(df_pa)
+pyarrow.Table
+datetime: timestamp[us]
+nanosecond: int64
+----
+datetime: [[1978-01-01 01:01:01.000000,2024-10-13 05:30:14.500000,2065-01-01 10:20:30.060000]]
+nanosecond: [[0,500000000,60000000]]
+
ordinal_day()
+
+Get ordinal day.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [datetime(2020, 1, 1), datetime(2020, 8, 3)]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_ordinal_day(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_ordinal_day=nw.col("a").dt.ordinal_day()
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_ordinal_day
:
>>> agnostic_dt_ordinal_day(df_pd)
+ a a_ordinal_day
+0 2020-01-01 1
+1 2020-08-03 216
+
>>> agnostic_dt_ordinal_day(df_pl)
+shape: (2, 2)
+┌─────────────────────┬───────────────┐
+│ a ┆ a_ordinal_day │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i16 │
+╞═════════════════════╪═══════════════╡
+│ 2020-01-01 00:00:00 ┆ 1 │
+│ 2020-08-03 00:00:00 ┆ 216 │
+└─────────────────────┴───────────────┘
+
>>> agnostic_dt_ordinal_day(df_pa)
+pyarrow.Table
+a: timestamp[us]
+a_ordinal_day: int64
+----
+a: [[2020-01-01 00:00:00.000000,2020-08-03 00:00:00.000000]]
+a_ordinal_day: [[1,216]]
+
replace_time_zone(time_zone)
+
+Replace time zone.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_zone
+ |
+
+ str | None
+ |
+
+
+
+ Target time zone. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import datetime, timezone
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [
+... datetime(2024, 1, 1, tzinfo=timezone.utc),
+... datetime(2024, 1, 2, tzinfo=timezone.utc),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_dt_replace_time_zone(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").dt.replace_time_zone("Asia/Kathmandu")
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_replace_time_zone
:
>>> agnostic_dt_replace_time_zone(df_pd)
+ a
+0 2024-01-01 00:00:00+05:45
+1 2024-01-02 00:00:00+05:45
+
>>> agnostic_dt_replace_time_zone(df_pl)
+shape: (2, 1)
+┌──────────────────────────────┐
+│ a │
+│ --- │
+│ datetime[μs, Asia/Kathmandu] │
+╞══════════════════════════════╡
+│ 2024-01-01 00:00:00 +0545 │
+│ 2024-01-02 00:00:00 +0545 │
+└──────────────────────────────┘
+
>>> agnostic_dt_replace_time_zone(df_pa)
+pyarrow.Table
+a: timestamp[us, tz=Asia/Kathmandu]
+----
+a: [[2023-12-31 18:15:00.000000Z,2024-01-01 18:15:00.000000Z]]
+
second()
+
+Extract seconds from underlying DateTime representation.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1),
+... datetime(2024, 10, 13, 5, 30, 14),
+... datetime(2065, 1, 1, 10, 20, 30),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_second(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.second().alias("second"),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_second
:
>>> agnostic_dt_second(df_pd)
+ datetime second
+0 1978-01-01 01:01:01 1
+1 2024-10-13 05:30:14 14
+2 2065-01-01 10:20:30 30
+
>>> agnostic_dt_second(df_pl)
+shape: (3, 2)
+┌─────────────────────┬────────┐
+│ datetime ┆ second │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i8 │
+╞═════════════════════╪════════╡
+│ 1978-01-01 01:01:01 ┆ 1 │
+│ 2024-10-13 05:30:14 ┆ 14 │
+│ 2065-01-01 10:20:30 ┆ 30 │
+└─────────────────────┴────────┘
+
>>> agnostic_dt_second(df_pa)
+pyarrow.Table
+datetime: timestamp[us]
+second: int64
+----
+datetime: [[1978-01-01 01:01:01.000000,2024-10-13 05:30:14.000000,2065-01-01 10:20:30.000000]]
+second: [[1,14,30]]
+
timestamp(time_unit='us')
+
+Return a timestamp in the given time unit.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_unit
+ |
+
+ Literal['ns', 'us', 'ms']
+ |
+
+
+
+ {'ns', 'us', 'ms'} +Time unit. + |
+
+ 'us'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import date
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"date": [date(2001, 1, 1), None, date(2001, 1, 3)]}
+>>> df_pd = pd.DataFrame(data, dtype="datetime64[ns]")
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_dt_timestamp(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("date").dt.timestamp().alias("timestamp_us"),
+... nw.col("date").dt.timestamp("ms").alias("timestamp_ms"),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_timestamp
:
>>> agnostic_dt_timestamp(df_pd)
+ date timestamp_us timestamp_ms
+0 2001-01-01 9.783072e+14 9.783072e+11
+1 NaT NaN NaN
+2 2001-01-03 9.784800e+14 9.784800e+11
+
>>> agnostic_dt_timestamp(df_pl)
+shape: (3, 3)
+┌────────────┬─────────────────┬──────────────┐
+│ date ┆ timestamp_us ┆ timestamp_ms │
+│ --- ┆ --- ┆ --- │
+│ date ┆ i64 ┆ i64 │
+╞════════════╪═════════════════╪══════════════╡
+│ 2001-01-01 ┆ 978307200000000 ┆ 978307200000 │
+│ null ┆ null ┆ null │
+│ 2001-01-03 ┆ 978480000000000 ┆ 978480000000 │
+└────────────┴─────────────────┴──────────────┘
+
>>> agnostic_dt_timestamp(df_pa)
+pyarrow.Table
+date: date32[day]
+timestamp_us: int64
+timestamp_ms: int64
+----
+date: [[2001-01-01,null,2001-01-03]]
+timestamp_us: [[978307200000000,null,978480000000000]]
+timestamp_ms: [[978307200000,null,978480000000]]
+
total_microseconds()
+
+Get total microseconds.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
The function outputs the total microseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> from datetime import timedelta
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [
+... timedelta(microseconds=10),
+... timedelta(milliseconds=1, microseconds=200),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_total_microseconds(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_total_microseconds=nw.col("a").dt.total_microseconds()
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_total_microseconds
:
>>> agnostic_dt_total_microseconds(df_pd)
+ a a_total_microseconds
+0 0 days 00:00:00.000010 10
+1 0 days 00:00:00.001200 1200
+
>>> agnostic_dt_total_microseconds(df_pl)
+shape: (2, 2)
+┌──────────────┬──────────────────────┐
+│ a ┆ a_total_microseconds │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪══════════════════════╡
+│ 10µs ┆ 10 │
+│ 1200µs ┆ 1200 │
+└──────────────┴──────────────────────┘
+
>>> agnostic_dt_total_microseconds(df_pa)
+pyarrow.Table
+a: duration[us]
+a_total_microseconds: int64
+----
+a: [[10,1200]]
+a_total_microseconds: [[10,1200]]
+
total_milliseconds()
+
+Get total milliseconds.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
The function outputs the total milliseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> from datetime import timedelta
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [
+... timedelta(milliseconds=10),
+... timedelta(milliseconds=20, microseconds=40),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_total_milliseconds(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_total_milliseconds=nw.col("a").dt.total_milliseconds()
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_total_milliseconds
:
>>> agnostic_dt_total_milliseconds(df_pd)
+ a a_total_milliseconds
+0 0 days 00:00:00.010000 10
+1 0 days 00:00:00.020040 20
+
>>> agnostic_dt_total_milliseconds(df_pl)
+shape: (2, 2)
+┌──────────────┬──────────────────────┐
+│ a ┆ a_total_milliseconds │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪══════════════════════╡
+│ 10ms ┆ 10 │
+│ 20040µs ┆ 20 │
+└──────────────┴──────────────────────┘
+
>>> agnostic_dt_total_milliseconds(df_pa)
+pyarrow.Table
+a: duration[us]
+a_total_milliseconds: int64
+----
+a: [[10000,20040]]
+a_total_milliseconds: [[10,20]]
+
total_minutes()
+
+Get total minutes.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
The function outputs the total minutes in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> from datetime import timedelta
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [timedelta(minutes=10), timedelta(minutes=20, seconds=40)]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_total_minutes(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_total_minutes=nw.col("a").dt.total_minutes()
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_total_minutes
:
>>> agnostic_dt_total_minutes(df_pd)
+ a a_total_minutes
+0 0 days 00:10:00 10
+1 0 days 00:20:40 20
+
>>> agnostic_dt_total_minutes(df_pl)
+shape: (2, 2)
+┌──────────────┬─────────────────┐
+│ a ┆ a_total_minutes │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪═════════════════╡
+│ 10m ┆ 10 │
+│ 20m 40s ┆ 20 │
+└──────────────┴─────────────────┘
+
>>> agnostic_dt_total_minutes(df_pa)
+pyarrow.Table
+a: duration[us]
+a_total_minutes: int64
+----
+a: [[600000000,1240000000]]
+a_total_minutes: [[10,20]]
+
total_nanoseconds()
+
+Get total nanoseconds.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
The function outputs the total nanoseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> from datetime import timedelta
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = ["2024-01-01 00:00:00.000000001", "2024-01-01 00:00:00.000000002"]
+>>> df_pd = pd.DataFrame({"a": pd.to_datetime(data)})
+>>> df_pl = pl.DataFrame({"a": data}).with_columns(
+... pl.col("a").str.to_datetime(time_unit="ns")
+... )
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_total_nanoseconds(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_diff_total_nanoseconds=nw.col("a").diff().dt.total_nanoseconds()
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_total_nanoseconds
:
>>> agnostic_dt_total_nanoseconds(df_pd)
+ a a_diff_total_nanoseconds
+0 2024-01-01 00:00:00.000000001 NaN
+1 2024-01-01 00:00:00.000000002 1.0
+
>>> agnostic_dt_total_nanoseconds(df_pl)
+shape: (2, 2)
+┌───────────────────────────────┬──────────────────────────┐
+│ a ┆ a_diff_total_nanoseconds │
+│ --- ┆ --- │
+│ datetime[ns] ┆ i64 │
+╞═══════════════════════════════╪══════════════════════════╡
+│ 2024-01-01 00:00:00.000000001 ┆ null │
+│ 2024-01-01 00:00:00.000000002 ┆ 1 │
+└───────────────────────────────┴──────────────────────────┘
+
total_seconds()
+
+Get total seconds.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
The function outputs the total seconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> from datetime import timedelta
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_total_seconds(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... a_total_seconds=nw.col("a").dt.total_seconds()
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_total_seconds
:
>>> agnostic_dt_total_seconds(df_pd)
+ a a_total_seconds
+0 0 days 00:00:10 10
+1 0 days 00:00:20.040000 20
+
>>> agnostic_dt_total_seconds(df_pl)
+shape: (2, 2)
+┌──────────────┬─────────────────┐
+│ a ┆ a_total_seconds │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪═════════════════╡
+│ 10s ┆ 10 │
+│ 20s 40ms ┆ 20 │
+└──────────────┴─────────────────┘
+
>>> agnostic_dt_total_seconds(df_pa)
+pyarrow.Table
+a: duration[us]
+a_total_seconds: int64
+----
+a: [[10000000,20040000]]
+a_total_seconds: [[10,20]]
+
to_string(format)
+
+Convert a Date/Time/Datetime column into a String column with the given format.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ format
+ |
+
+ str
+ |
+
+
+
+ Format to format temporal column with. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Unfortunately, different libraries interpret format directives a bit +differently.
+"%.f"
for fractional seconds,
+ whereas pandas and Python stdlib use ".%f"
."%S"
as "seconds, including fractional seconds"
+ whereas most other tools interpret it as "just seconds, as 2 digits".Therefore, we make the following adjustments:
+"%S.%f"
with "%S%.f"
."%S.%f"
with "%S"
.Workarounds like these don't make us happy, and we try to avoid them as +much as possible, but here we feel like it's the best compromise.
+If you just want to format a date/datetime Series as a local datetime +string, and have it work as consistently as possible across libraries, +we suggest using:
+"%Y-%m-%dT%H:%M:%S%.f"
for datetimes"%Y-%m-%d"
for datesthough note that, even then, different tools may return a different number +of trailing zeros. Nonetheless, this is probably consistent enough for +most applications.
+If you have an application where this is not enough, please open an issue +and let us know.
+Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [
+... datetime(2020, 3, 1),
+... datetime(2020, 4, 1),
+... datetime(2020, 5, 1),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_to_string(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").dt.to_string("%Y/%m/%d %H:%M:%S")
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_to_string
:
>>> agnostic_dt_to_string(df_pd)
+ a
+0 2020/03/01 00:00:00
+1 2020/04/01 00:00:00
+2 2020/05/01 00:00:00
+
>>> agnostic_dt_to_string(df_pl)
+shape: (3, 1)
+┌─────────────────────┐
+│ a │
+│ --- │
+│ str │
+╞═════════════════════╡
+│ 2020/03/01 00:00:00 │
+│ 2020/04/01 00:00:00 │
+│ 2020/05/01 00:00:00 │
+└─────────────────────┘
+
>>> agnostic_dt_to_string(df_pa)
+pyarrow.Table
+a: string
+----
+a: [["2020/03/01 00:00:00.000000","2020/04/01 00:00:00.000000","2020/05/01 00:00:00.000000"]]
+
weekday()
+
+Extract the week day from the underlying Date representation.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ Returns the ISO weekday number where monday = 1 and sunday = 7 + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [datetime(2020, 1, 1), datetime(2020, 8, 3)]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_weekday(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(a_weekday=nw.col("a").dt.weekday()).to_native()
+
We can then pass either pandas, Polars, PyArrow, and other supported libraries to
+agnostic_dt_weekday
:
>>> agnostic_dt_weekday(df_pd)
+ a a_weekday
+0 2020-01-01 3
+1 2020-08-03 1
+
>>> agnostic_dt_weekday(df_pl)
+shape: (2, 2)
+┌─────────────────────┬───────────┐
+│ a ┆ a_weekday │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i8 │
+╞═════════════════════╪═══════════╡
+│ 2020-01-01 00:00:00 ┆ 3 │
+│ 2020-08-03 00:00:00 ┆ 1 │
+└─────────────────────┴───────────┘
+
>>> agnostic_dt_weekday(df_pa)
+pyarrow.Table
+a: timestamp[us]
+a_weekday: int64
+----
+a: [[2020-01-01 00:00:00.000000,2020-08-03 00:00:00.000000]]
+a_weekday: [[3,1]]
+
year()
+
+Extract year from underlying DateTime representation.
+Returns the year number in the calendar date.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "datetime": [
+... datetime(1978, 6, 1),
+... datetime(2024, 12, 13),
+... datetime(2065, 1, 1),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_dt_year(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.col("datetime").dt.year().alias("year")
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dt_year
:
>>> agnostic_dt_year(df_pd)
+ datetime year
+0 1978-06-01 1978
+1 2024-12-13 2024
+2 2065-01-01 2065
+
>>> agnostic_dt_year(df_pl)
+shape: (3, 2)
+┌─────────────────────┬──────┐
+│ datetime ┆ year │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i32 │
+╞═════════════════════╪══════╡
+│ 1978-06-01 00:00:00 ┆ 1978 │
+│ 2024-12-13 00:00:00 ┆ 2024 │
+│ 2065-01-01 00:00:00 ┆ 2065 │
+└─────────────────────┴──────┘
+
>>> agnostic_dt_year(df_pa)
+pyarrow.Table
+datetime: timestamp[us]
+year: int64
+----
+datetime: [[1978-06-01 00:00:00.000000,2024-12-13 00:00:00.000000,2065-01-01 00:00:00.000000]]
+year: [[1978,2024,2065]]
+
narwhals.Expr.list
len()
+
+Return the number of elements in each list.
+Null values count towards the total.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [[1, 2], [3, 4, None], None, []]}
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_list_len(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(a_len=nw.col("a").list.len()).to_native()
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> agnostic_list_len(
+... pd.DataFrame(data).astype({"a": pd.ArrowDtype(pa.list_(pa.int64()))})
+... )
+ a a_len
+0 [1. 2.] 2
+1 [ 3. 4. nan] 3
+2 <NA> <NA>
+3 [] 0
+
>>> agnostic_list_len(pl.DataFrame(data))
+shape: (4, 2)
+┌──────────────┬───────┐
+│ a ┆ a_len │
+│ --- ┆ --- │
+│ list[i64] ┆ u32 │
+╞══════════════╪═══════╡
+│ [1, 2] ┆ 2 │
+│ [3, 4, null] ┆ 3 │
+│ null ┆ null │
+│ [] ┆ 0 │
+└──────────────┴───────┘
+
>>> agnostic_list_len(pa.table(data))
+pyarrow.Table
+a: list<item: int64>
+ child 0, item: int64
+a_len: uint32
+----
+a: [[[1,2],[3,4,null],null,[]]]
+a_len: [[2,3,null,0]]
+
narwhals.Expr.name
keep()
+
+Keep the original root name of the expression.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>>
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_name_keep(df_native: IntoFrame) -> list[str]:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("foo").alias("alias_for_foo").name.keep()).columns
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_name_keep
:
>>> agnostic_name_keep(df_pd)
+['foo']
+
>>> agnostic_name_keep(df_pl)
+['foo']
+
>>> agnostic_name_keep(df_pa)
+['foo']
+
map(function)
+
+Rename the output of an expression by mapping a function over the root name.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ function
+ |
+
+ Callable[[str], str]
+ |
+
+
+
+ Function that maps a root name to a new name. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>>
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> renaming_func = lambda s: s[::-1] # reverse column name
+>>> def agnostic_name_map(df_native: IntoFrame) -> list[str]:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("foo", "BAR").name.map(renaming_func)).columns
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_name_map
:
>>> agnostic_name_map(df_pd)
+['oof', 'RAB']
+
>>> agnostic_name_map(df_pl)
+['oof', 'RAB']
+
>>> agnostic_name_map(df_pa)
+['oof', 'RAB']
+
prefix(prefix)
+
+Add a prefix to the root column name of the expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ prefix
+ |
+
+ str
+ |
+
+
+
+ Prefix to add to the root column name. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>>
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_name_prefix(df_native: IntoFrame, prefix: str) -> list[str]:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("foo", "BAR").name.prefix(prefix)).columns
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_name_prefix
:
>>> agnostic_name_prefix(df_pd, "with_prefix_")
+['with_prefix_foo', 'with_prefix_BAR']
+
>>> agnostic_name_prefix(df_pl, "with_prefix_")
+['with_prefix_foo', 'with_prefix_BAR']
+
>>> agnostic_name_prefix(df_pa, "with_prefix_")
+['with_prefix_foo', 'with_prefix_BAR']
+
suffix(suffix)
+
+Add a suffix to the root column name of the expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ suffix
+ |
+
+ str
+ |
+
+
+
+ Suffix to add to the root column name. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>>
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_name_suffix(df_native: IntoFrame, suffix: str) -> list[str]:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("foo", "BAR").name.suffix(suffix)).columns
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_name_suffix
:
>>> agnostic_name_suffix(df_pd, "_with_suffix")
+['foo_with_suffix', 'BAR_with_suffix']
+
>>> agnostic_name_suffix(df_pl, "_with_suffix")
+['foo_with_suffix', 'BAR_with_suffix']
+
>>> agnostic_name_suffix(df_pa, "_with_suffix")
+['foo_with_suffix', 'BAR_with_suffix']
+
to_lowercase()
+
+Make the root column name lowercase.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>>
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_name_to_lowercase(df_native: IntoFrame) -> list[str]:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("foo", "BAR").name.to_lowercase()).columns
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_name_to_lowercase
:
>>> agnostic_name_to_lowercase(df_pd)
+['foo', 'bar']
+
>>> agnostic_name_to_lowercase(df_pl)
+['foo', 'bar']
+
>>> agnostic_name_to_lowercase(df_pa)
+['foo', 'bar']
+
to_uppercase()
+
+Make the root column name uppercase.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>>
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_name_to_uppercase(df_native: IntoFrame) -> list[str]:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("foo", "BAR").name.to_uppercase()).columns
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_name_to_uppercase
:
>>> agnostic_name_to_uppercase(df_pd)
+['FOO', 'BAR']
+
>>> agnostic_name_to_uppercase(df_pl)
+['FOO', 'BAR']
+
>>> agnostic_name_to_uppercase(df_pa)
+['FOO', 'BAR']
+
narwhals.Expr.str
contains(pattern, *, literal=False)
+
+Check if string contains a substring that matches a pattern.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A Character sequence or valid regular expression pattern. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ If True, treats the pattern as a literal string. + If False, assumes the pattern is a regular expression. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"pets": ["cat", "dog", "rabbit and parrot", "dove", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_str_contains(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... default_match=nw.col("pets").str.contains("parrot|Dove"),
+... case_insensitive_match=nw.col("pets").str.contains("(?i)parrot|Dove"),
+... literal_match=nw.col("pets").str.contains(
+... "parrot|Dove", literal=True
+... ),
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_str_contains
:
>>> agnostic_str_contains(df_pd)
+ pets default_match case_insensitive_match literal_match
+0 cat False False False
+1 dog False False False
+2 rabbit and parrot True True False
+3 dove False True False
+4 None None None None
+
>>> agnostic_str_contains(df_pl)
+shape: (5, 4)
+┌───────────────────┬───────────────┬────────────────────────┬───────────────┐
+│ pets ┆ default_match ┆ case_insensitive_match ┆ literal_match │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ str ┆ bool ┆ bool ┆ bool │
+╞═══════════════════╪═══════════════╪════════════════════════╪═══════════════╡
+│ cat ┆ false ┆ false ┆ false │
+│ dog ┆ false ┆ false ┆ false │
+│ rabbit and parrot ┆ true ┆ true ┆ false │
+│ dove ┆ false ┆ true ┆ false │
+│ null ┆ null ┆ null ┆ null │
+└───────────────────┴───────────────┴────────────────────────┴───────────────┘
+
>>> agnostic_str_contains(df_pa)
+pyarrow.Table
+pets: string
+default_match: bool
+case_insensitive_match: bool
+literal_match: bool
+----
+pets: [["cat","dog","rabbit and parrot","dove",null]]
+default_match: [[false,false,true,false,null]]
+case_insensitive_match: [[false,false,true,true,null]]
+literal_match: [[false,false,false,false,null]]
+
ends_with(suffix)
+
+Check if string values end with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ suffix
+ |
+
+ str
+ |
+
+
+
+ suffix substring + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"fruits": ["apple", "mango", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_str_ends_with(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... has_suffix=nw.col("fruits").str.ends_with("ngo")
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_str_ends_with
:
>>> agnostic_str_ends_with(df_pd)
+ fruits has_suffix
+0 apple False
+1 mango True
+2 None None
+
>>> agnostic_str_ends_with(df_pl)
+shape: (3, 2)
+┌────────┬────────────┐
+│ fruits ┆ has_suffix │
+│ --- ┆ --- │
+│ str ┆ bool │
+╞════════╪════════════╡
+│ apple ┆ false │
+│ mango ┆ true │
+│ null ┆ null │
+└────────┴────────────┘
+
>>> agnostic_str_ends_with(df_pa)
+pyarrow.Table
+fruits: string
+has_suffix: bool
+----
+fruits: [["apple","mango",null]]
+has_suffix: [[false,true,null]]
+
head(n=5)
+
+Take the first n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is not supported. + |
+
+ 5
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
If the length of the string has fewer than n
characters, the full string is returned.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"lyrics": ["Atatata", "taata", "taatatata", "zukkyun"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_str_head(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... lyrics_head=nw.col("lyrics").str.head()
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_str_head
:
>>> agnostic_str_head(df_pd)
+ lyrics lyrics_head
+0 Atatata Atata
+1 taata taata
+2 taatatata taata
+3 zukkyun zukky
+
>>> agnostic_str_head(df_pl)
+shape: (4, 2)
+┌───────────┬─────────────┐
+│ lyrics ┆ lyrics_head │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═══════════╪═════════════╡
+│ Atatata ┆ Atata │
+│ taata ┆ taata │
+│ taatatata ┆ taata │
+│ zukkyun ┆ zukky │
+└───────────┴─────────────┘
+
>>> agnostic_str_head(df_pa)
+pyarrow.Table
+lyrics: string
+lyrics_head: string
+----
+lyrics: [["Atatata","taata","taatatata","zukkyun"]]
+lyrics_head: [["Atata","taata","taata","zukky"]]
+
len_chars()
+
+Return the length of each string as the number of characters.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"words": ["foo", "Café", "345", "東京", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_str_len_chars(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... words_len=nw.col("words").str.len_chars()
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_str_len_chars
:
>>> agnostic_str_len_chars(df_pd)
+ words words_len
+0 foo 3.0
+1 Café 4.0
+2 345 3.0
+3 東京 2.0
+4 None NaN
+
>>> agnostic_str_len_chars(df_pl)
+shape: (5, 2)
+┌───────┬───────────┐
+│ words ┆ words_len │
+│ --- ┆ --- │
+│ str ┆ u32 │
+╞═══════╪═══════════╡
+│ foo ┆ 3 │
+│ Café ┆ 4 │
+│ 345 ┆ 3 │
+│ 東京 ┆ 2 │
+│ null ┆ null │
+└───────┴───────────┘
+
>>> agnostic_str_len_chars(df_pa)
+pyarrow.Table
+words: string
+words_len: int32
+----
+words: [["foo","Café","345","東京",null]]
+words_len: [[3,4,3,2,null]]
+
replace(pattern, value, *, literal=False, n=1)
+
+Replace first matching regex/literal substring with a new string value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A valid regular expression pattern. + |
+ + required + | +
+ value
+ |
+
+ str
+ |
+
+
+
+ String that will replace the matched substring. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ Treat |
+
+ False
+ |
+
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of matches to replace. + |
+
+ 1
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"foo": ["123abc", "abc abc123"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_str_replace(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... df = df.with_columns(replaced=nw.col("foo").str.replace("abc", ""))
+... return df.to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_str_replace
:
>>> agnostic_str_replace(df_pd)
+ foo replaced
+0 123abc 123
+1 abc abc123 abc123
+
>>> agnostic_str_replace(df_pl)
+shape: (2, 2)
+┌────────────┬──────────┐
+│ foo ┆ replaced │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════════╪══════════╡
+│ 123abc ┆ 123 │
+│ abc abc123 ┆ abc123 │
+└────────────┴──────────┘
+
>>> agnostic_str_replace(df_pa)
+pyarrow.Table
+foo: string
+replaced: string
+----
+foo: [["123abc","abc abc123"]]
+replaced: [["123"," abc123"]]
+
replace_all(pattern, value, *, literal=False)
+
+Replace all matching regex/literal substring with a new string value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A valid regular expression pattern. + |
+ + required + | +
+ value
+ |
+
+ str
+ |
+
+
+
+ String that will replace the matched substring. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ Treat |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"foo": ["123abc", "abc abc123"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_str_replace_all(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... df = df.with_columns(replaced=nw.col("foo").str.replace_all("abc", ""))
+... return df.to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_str_replace_all
:
>>> agnostic_str_replace_all(df_pd)
+ foo replaced
+0 123abc 123
+1 abc abc123 123
+
>>> agnostic_str_replace_all(df_pl)
+shape: (2, 2)
+┌────────────┬──────────┐
+│ foo ┆ replaced │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════════╪══════════╡
+│ 123abc ┆ 123 │
+│ abc abc123 ┆ 123 │
+└────────────┴──────────┘
+
>>> agnostic_str_replace_all(df_pa)
+pyarrow.Table
+foo: string
+replaced: string
+----
+foo: [["123abc","abc abc123"]]
+replaced: [["123"," 123"]]
+
slice(offset, length=None)
+
+Create subslices of the string values of an expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ offset
+ |
+
+ int
+ |
+
+
+
+ Start index. Negative indexing is supported. + |
+ + required + | +
+ length
+ |
+
+ int | None
+ |
+
+
+
+ Length of the slice. If set to |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"s": ["pear", None, "papaya", "dragonfruit"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_str_slice(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... s_sliced=nw.col("s").str.slice(4, length=3)
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_str_slice
:
>>> agnostic_str_slice(df_pd)
+ s s_sliced
+0 pear
+1 None None
+2 papaya ya
+3 dragonfruit onf
+
>>> agnostic_str_slice(df_pl)
+shape: (4, 2)
+┌─────────────┬──────────┐
+│ s ┆ s_sliced │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═════════════╪══════════╡
+│ pear ┆ │
+│ null ┆ null │
+│ papaya ┆ ya │
+│ dragonfruit ┆ onf │
+└─────────────┴──────────┘
+
>>> agnostic_str_slice(df_pa)
+pyarrow.Table
+s: string
+s_sliced: string
+----
+s: [["pear",null,"papaya","dragonfruit"]]
+s_sliced: [["",null,"ya","onf"]]
+
Using negative indexes:
+>>> def agnostic_str_slice_negative(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(s_sliced=nw.col("s").str.slice(-3)).to_native()
+
>>> agnostic_str_slice_negative(df_pd)
+ s s_sliced
+0 pear ear
+1 None None
+2 papaya aya
+3 dragonfruit uit
+
>>> agnostic_str_slice_negative(df_pl)
+shape: (4, 2)
+┌─────────────┬──────────┐
+│ s ┆ s_sliced │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═════════════╪══════════╡
+│ pear ┆ ear │
+│ null ┆ null │
+│ papaya ┆ aya │
+│ dragonfruit ┆ uit │
+└─────────────┴──────────┘
+
>>> agnostic_str_slice_negative(df_pa)
+pyarrow.Table
+s: string
+s_sliced: string
+----
+s: [["pear",null,"papaya","dragonfruit"]]
+s_sliced: [["ear",null,"aya","uit"]]
+
starts_with(prefix)
+
+Check if string values start with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ prefix
+ |
+
+ str
+ |
+
+
+
+ prefix substring + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"fruits": ["apple", "mango", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_str_starts_with(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... has_prefix=nw.col("fruits").str.starts_with("app")
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_str_starts_with
:
>>> agnostic_str_starts_with(df_pd)
+ fruits has_prefix
+0 apple True
+1 mango False
+2 None None
+
>>> agnostic_str_starts_with(df_pl)
+shape: (3, 2)
+┌────────┬────────────┐
+│ fruits ┆ has_prefix │
+│ --- ┆ --- │
+│ str ┆ bool │
+╞════════╪════════════╡
+│ apple ┆ true │
+│ mango ┆ false │
+│ null ┆ null │
+└────────┴────────────┘
+
>>> agnostic_str_starts_with(df_pa)
+pyarrow.Table
+fruits: string
+has_prefix: bool
+----
+fruits: [["apple","mango",null]]
+has_prefix: [[true,false,null]]
+
strip_chars(characters=None)
+
+Remove leading and trailing characters.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ characters
+ |
+
+ str | None
+ |
+
+
+
+ The set of characters to be removed. All combinations of this +set of characters will be stripped from the start and end of the string. +If set to None (default), all leading and trailing whitespace is removed +instead. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> from typing import Any
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>>
+>>> data = {"fruits": ["apple", "\nmango"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_str_strip_chars(df_native: IntoFrame) -> dict[str, Any]:
+... df = nw.from_native(df_native)
+... df = df.with_columns(stripped=nw.col("fruits").str.strip_chars())
+... return df.to_dict(as_series=False)
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_str_strip_chars
:
>>> agnostic_str_strip_chars(df_pd)
+{'fruits': ['apple', '\nmango'], 'stripped': ['apple', 'mango']}
+
>>> agnostic_str_strip_chars(df_pl)
+{'fruits': ['apple', '\nmango'], 'stripped': ['apple', 'mango']}
+
>>> agnostic_str_strip_chars(df_pa)
+{'fruits': ['apple', '\nmango'], 'stripped': ['apple', 'mango']}
+
tail(n=5)
+
+Take the last n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is not supported. + |
+
+ 5
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
If the length of the string has fewer than n
characters, the full string is returned.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"lyrics": ["Atatata", "taata", "taatatata", "zukkyun"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_str_tail(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... lyrics_tail=nw.col("lyrics").str.tail()
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_str_tail
:
>>> agnostic_str_tail(df_pd)
+ lyrics lyrics_tail
+0 Atatata atata
+1 taata taata
+2 taatatata atata
+3 zukkyun kkyun
+
>>> agnostic_str_tail(df_pl)
+shape: (4, 2)
+┌───────────┬─────────────┐
+│ lyrics ┆ lyrics_tail │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═══════════╪═════════════╡
+│ Atatata ┆ atata │
+│ taata ┆ taata │
+│ taatatata ┆ atata │
+│ zukkyun ┆ kkyun │
+└───────────┴─────────────┘
+
>>> agnostic_str_tail(df_pa)
+pyarrow.Table
+lyrics: string
+lyrics_tail: string
+----
+lyrics: [["Atatata","taata","taatatata","zukkyun"]]
+lyrics_tail: [["atata","taata","atata","kkyun"]]
+
to_datetime(format=None)
+
+Convert to Datetime dtype.
+ + +As different backends auto-infer format in different ways, if format=None
+there is no guarantee that the result will be equal.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ format
+ |
+
+ str | None
+ |
+
+
+
+ Format to use for conversion. If set to None (default), the format is +inferred from the data. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
pandas defaults to nanosecond time unit, Polars to microsecond. +Prior to pandas 2.0, nanoseconds were the only time unit supported +in pandas, with no ability to set any other one. The ability to +set the time unit in pandas, if the version permits, will arrive.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = ["2020-01-01", "2020-01-02"]
+>>> df_pd = pd.DataFrame({"a": data})
+>>> df_pl = pl.DataFrame({"a": data})
+>>> df_pa = pa.table({"a": data})
+
We define a dataframe-agnostic function:
+>>> def agnostic_str_to_datetime(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.col("a").str.to_datetime(format="%Y-%m-%d")
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_str_to_datetime
:
>>> agnostic_str_to_datetime(df_pd)
+ a
+0 2020-01-01
+1 2020-01-02
+
>>> agnostic_str_to_datetime(df_pl)
+shape: (2, 1)
+┌─────────────────────┐
+│ a │
+│ --- │
+│ datetime[μs] │
+╞═════════════════════╡
+│ 2020-01-01 00:00:00 │
+│ 2020-01-02 00:00:00 │
+└─────────────────────┘
+
>>> agnostic_str_to_datetime(df_pa)
+pyarrow.Table
+a: timestamp[us]
+----
+a: [[2020-01-01 00:00:00.000000,2020-01-02 00:00:00.000000]]
+
to_lowercase()
+
+Transform string to lowercase variant.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"fruits": ["APPLE", "MANGO", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_str_to_lowercase(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... lower_col=nw.col("fruits").str.to_lowercase()
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_str_to_lowercase
:
>>> agnostic_str_to_lowercase(df_pd)
+ fruits lower_col
+0 APPLE apple
+1 MANGO mango
+2 None None
+
>>> agnostic_str_to_lowercase(df_pl)
+shape: (3, 2)
+┌────────┬───────────┐
+│ fruits ┆ lower_col │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════╪═══════════╡
+│ APPLE ┆ apple │
+│ MANGO ┆ mango │
+│ null ┆ null │
+└────────┴───────────┘
+
>>> agnostic_str_to_lowercase(df_pa)
+pyarrow.Table
+fruits: string
+lower_col: string
+----
+fruits: [["APPLE","MANGO",null]]
+lower_col: [["apple","mango",null]]
+
to_uppercase()
+
+Transform string to uppercase variant.
+ + +Returns:
+Type | +Description | +
---|---|
+ ExprT
+ |
+
+
+
+ A new expression. + |
+
The PyArrow backend will convert 'ß' to 'ẞ' instead of 'SS'. +For more info see the related issue. +There may be other unicode-edge-case-related variations across implementations.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"fruits": ["apple", "mango", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_str_to_uppercase(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... upper_col=nw.col("fruits").str.to_uppercase()
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_str_to_uppercase
:
>>> agnostic_str_to_uppercase(df_pd)
+ fruits upper_col
+0 apple APPLE
+1 mango MANGO
+2 None None
+
>>> agnostic_str_to_uppercase(df_pl)
+shape: (3, 2)
+┌────────┬───────────┐
+│ fruits ┆ upper_col │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════╪═══════════╡
+│ apple ┆ APPLE │
+│ mango ┆ MANGO │
+│ null ┆ null │
+└────────┴───────────┘
+
>>> agnostic_str_to_uppercase(df_pa)
+pyarrow.Table
+fruits: string
+upper_col: string
+----
+fruits: [["apple","mango",null]]
+upper_col: [["APPLE","MANGO",null]]
+
narwhals.GroupBy
agg(*aggs, **named_aggs)
+
+Compute aggregations for each group of a group by operation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ aggs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Aggregations to compute for each group of the group by operation, +specified as positional arguments. + |
+
+ ()
+ |
+
+ named_aggs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional aggregations, specified as keyword arguments. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrameT
+ |
+
+
+
+ A new Dataframe. + |
+
Examples:
+Group by one column or by multiple columns and call agg
to compute
+the grouped sum of another column.
>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame(
+... {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+... )
+
We define library agnostic functions:
+>>> @nw.narwhalify
+... def func(df):
+... return df.group_by("a").agg(nw.col("b").sum()).sort("a")
+
>>> @nw.narwhalify
+... def func_mult_col(df):
+... return df.group_by("a", "b").agg(nw.sum("c")).sort("a", "b")
+
We can then pass either pandas or Polars to func
and func_mult_col
:
>>> func(df_pd)
+ a b
+0 a 2
+1 b 5
+2 c 3
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+>>> func_mult_col(df_pd)
+ a b c
+0 a 1 8
+1 b 2 4
+2 b 3 2
+3 c 3 1
+>>> func_mult_col(df_pl)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 8 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+
narwhals.LazyGroupBy
agg(*aggs, **named_aggs)
+
+Compute aggregations for each group of a group by operation.
+If a library does not support lazy execution, then this is a no-op.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ aggs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Aggregations to compute for each group of the group by operation, +specified as positional arguments. + |
+
+ ()
+ |
+
+ named_aggs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional aggregations, specified as keyword arguments. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ LazyFrameT
+ |
+
+
+
+ A new LazyFrame. + |
+
Examples:
+Group by one column or by multiple columns and call agg
to compute
+the grouped sum of another column.
>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> lf_pl = pl.LazyFrame(
+... {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+... )
+
We define library agnostic functions:
+>>> def agnostic_func_one_col(lf_native: IntoFrameT) -> IntoFrameT:
+... lf = nw.from_native(lf_native)
+... return nw.to_native(lf.group_by("a").agg(nw.col("b").sum()).sort("a"))
+
>>> def agnostic_func_mult_col(lf_native: IntoFrameT) -> IntoFrameT:
+... lf = nw.from_native(lf_native)
+... return nw.to_native(lf.group_by("a", "b").agg(nw.sum("c")).sort("a", "b"))
+
We can then pass a lazy frame and materialise it with collect
:
>>> agnostic_func_one_col(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+>>> agnostic_func_mult_col(lf_pl).collect()
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 8 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+
narwhals.LazyFrame
Narwhals LazyFrame, backed by a native lazyframe.
+Warning
+This class is not meant to be instantiated directly - instead use
+narwhals.from_native
with a native
+object that is a lazy dataframe from one of the supported
+backend (e.g. polars.LazyFrame, dask_expr._collection.DataFrame):
+
narwhals.from_native(native_lazyframe)
+
columns
+
+
+ property
+
+
+Get column names.
+ + +Returns:
+Type | +Description | +
---|---|
+ list[str]
+ |
+
+
+
+ The column names stored in a list. + |
+
Examples:
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>>
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
We define a library agnostic function:
+>>> def agnostic_columns(df_native: IntoFrame) -> list[str]:
+... df = nw.from_native(df_native)
+... return df.columns
+
We can then pass any supported library such as Polars or Dask to agnostic_columns
:
>>> agnostic_columns(lf_pl)
+['foo', 'bar', 'ham']
+>>> agnostic_columns(lf_dask)
+['foo', 'bar', 'ham']
+
implementation
+
+
+ property
+
+
+Return implementation of native frame.
+This can be useful when you need to use special-casing for features outside of +Narwhals' scope - for example, when dealing with pandas' Period Dtype.
+ + +Returns:
+Type | +Description | +
---|---|
+ Implementation
+ |
+
+
+
+ Implementation. + |
+
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> lf_pl = pl.LazyFrame({"a": [1, 2, 3]})
+>>> lf_dask = dd.from_dict({"a": [1, 2, 3]}, npartitions=2)
+
>>> lf = nw.from_native(lf_pl)
+>>> lf.implementation
+<Implementation.POLARS: 6>
+>>> lf.implementation.is_pandas()
+False
+>>> lf.implementation.is_polars()
+True
+
>>> lf = nw.from_native(lf_dask)
+>>> lf.implementation
+<Implementation.DASK: 7>
+>>> lf.implementation.is_dask()
+True
+
schema
+
+
+ property
+
+
+Get an ordered mapping of column names to their data type.
+ + +Returns:
+Type | +Description | +
---|---|
+ Schema
+ |
+
+
+
+ A Narwhals Schema object that displays the mapping of column names. + |
+
Examples:
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> import narwhals as nw
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
>>> lf = nw.from_native(lf_pl)
+>>> lf.schema
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
>>> lf = nw.from_native(lf_dask)
+>>> lf.schema
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
clone()
+
+Create a copy of this DataFrame.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ An identical copy of the original LazyFrame. + |
+
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function in which we copy the DataFrame:
+>>> def agnostic_clone(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.clone().collect().to_native()
+
We can then pass any supported library such as Polars to agnostic_clone
:
>>> agnostic_clone(lf_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
collect()
+
+Materialize this LazyFrame into a DataFrame.
+ + +Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ DataFrame + |
+
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> data = {
+... "a": ["a", "b", "a", "b", "b", "c"],
+... "b": [1, 2, 3, 4, 5, 6],
+... "c": [6, 5, 4, 3, 2, 1],
+... }
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
>>> lf = nw.from_native(lf_pl)
+>>> lf
+┌─────────────────────────────┐
+| Narwhals LazyFrame |
+|-----------------------------|
+|<LazyFrame at ...
+└─────────────────────────────┘
+>>> df = lf.group_by("a").agg(nw.all().sum()).collect()
+>>> df.to_native().sort("a")
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 4 ┆ 10 │
+│ b ┆ 11 ┆ 10 │
+│ c ┆ 6 ┆ 1 │
+└─────┴─────┴─────┘
+
>>> lf = nw.from_native(lf_dask)
+>>> lf
+┌───────────────────────────────────┐
+| Narwhals LazyFrame |
+|-----------------------------------|
+|Dask DataFrame Structure: |
+| a b c|
+|npartitions=2 |
+|0 string int64 int64|
+|3 ... ... ...|
+|5 ... ... ...|
+|Dask Name: frompandas, 1 expression|
+|Expr=df |
+└───────────────────────────────────┘
+>>> df = lf.group_by("a").agg(nw.col("b", "c").sum()).collect()
+>>> df.to_native()
+ a b c
+0 a 4 10
+1 b 11 10
+2 c 6 1
+
collect_schema()
+
+Get an ordered mapping of column names to their data type.
+ + +Returns:
+Type | +Description | +
---|---|
+ Schema
+ |
+
+
+
+ A Narwhals Schema object that displays the mapping of column names. + |
+
Examples:
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> import narwhals as nw
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
>>> lf = nw.from_native(lf_pl)
+>>> lf.collect_schema()
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
>>> lf = nw.from_native(lf_dask)
+>>> lf.collect_schema()
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
drop(*columns, strict=True)
+
+Remove columns from the LazyFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *columns
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of the columns that should be removed from the dataframe. + |
+
+ ()
+ |
+
+ strict
+ |
+
+ bool
+ |
+
+
+
+ Validate that all column names exist in the schema and throw an +exception if a column name does not exist in the schema. + |
+
+ True
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The LazyFrame with the specified columns removed. + |
+
strict
argument is ignored for polars<1.0.0
.
Please consider upgrading to a newer version or pass to eager mode.
+Examples:
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
We define a library agnostic function:
+>>> def agnostic_drop(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.drop("ham").collect().to_native()
+
We can then pass any supported library such as Polars or Dask to agnostic_drop
:
>>> agnostic_drop(lf_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 1 ┆ 6.0 │
+│ 2 ┆ 7.0 │
+│ 3 ┆ 8.0 │
+└─────┴─────┘
+>>> agnostic_drop(lf_dask)
+ foo bar
+0 1 6.0
+1 2 7.0
+2 3 8.0
+
Use positional arguments to drop multiple columns.
+>>> def agnostic_drop(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.drop("foo", "ham").collect().to_native()
+
>>> agnostic_drop(lf_pl)
+shape: (3, 1)
+┌─────┐
+│ bar │
+│ --- │
+│ f64 │
+╞═════╡
+│ 6.0 │
+│ 7.0 │
+│ 8.0 │
+└─────┘
+>>> agnostic_drop(lf_dask)
+ bar
+0 6.0
+1 7.0
+2 8.0
+
drop_nulls(subset=None)
+
+Drop rows that contain null values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ subset
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column name(s) for which null values are considered. If set to None +(default), use all columns. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The original object with the rows removed that contained the null values. + |
+
pandas handles null values differently from Polars and PyArrow. +See null_handling +for reference.
+Examples:
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]}
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.drop_nulls().collect().to_native()
+
We can then pass any supported library such as Polars or Dask to agnostic_drop_nulls
:
>>> agnostic_drop_nulls(lf_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ ba │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 1.0 ┆ 1.0 │
+└─────┴─────┘
+>>> agnostic_drop_nulls(lf_dask)
+ a ba
+0 1.0 1.0
+
explode(columns, *more_columns)
+
+Explode the dataframe to long format by exploding the given columns.
+ + +It is possible to explode multiple columns only if these columns have +matching element counts.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str | Sequence[str]
+ |
+
+
+
+ Column names. The underlying columns being exploded must be of the |
+ + required + | +
+ *more_columns
+ |
+
+ str
+ |
+
+
+
+ Additional names of columns to explode, specified as positional arguments. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ New LazyFrame + |
+
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> import polars as pl
+>>> data = {
+... "a": ["x", "y", "z", "w"],
+... "lst1": [[1, 2], None, [None], []],
+... "lst2": [[3, None], None, [42], []],
+... }
+
We define a library agnostic function:
+>>> def agnostic_explode(df_native: IntoFrameT) -> IntoFrameT:
+... return (
+... nw.from_native(df_native)
+... .with_columns(nw.col("lst1", "lst2").cast(nw.List(nw.Int32())))
+... .explode("lst1", "lst2")
+... .collect()
+... .to_native()
+... )
+
We can then pass any supported library such as Polars to agnostic_explode
:
>>> agnostic_explode(pl.LazyFrame(data))
+shape: (5, 3)
+┌─────┬──────┬──────┐
+│ a ┆ lst1 ┆ lst2 │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i32 ┆ i32 │
+╞═════╪══════╪══════╡
+│ x ┆ 1 ┆ 3 │
+│ x ┆ 2 ┆ null │
+│ y ┆ null ┆ null │
+│ z ┆ null ┆ 42 │
+│ w ┆ null ┆ null │
+└─────┴──────┴──────┘
+
filter(*predicates, **constraints)
+
+Filter the rows in the LazyFrame based on a predicate expression.
+The original order of the remaining rows is preserved.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *predicates
+ |
+
+ IntoExpr | Iterable[IntoExpr] | list[bool]
+ |
+
+
+
+ Expression that evaluates to a boolean Series. Can +also be a (single!) boolean list. + |
+
+ ()
+ |
+
+ **constraints
+ |
+
+ Any
+ |
+
+
+
+ Column filters; use |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The filtered LazyFrame. + |
+
Examples:
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
Let's define a dataframe-agnostic function in which we filter on +one condition.
+>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.filter(nw.col("foo") > 1).collect().to_native()
+
We can then pass any supported library such as Polars or Dask to agnostic_filter
:
>>> agnostic_filter(lf_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+>>> agnostic_filter(lf_dask)
+ foo bar ham
+1 2 7 b
+2 3 8 c
+
Filter on multiple conditions:
+>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return (
+... df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a"))
+... .collect()
+... .to_native()
+... )
+
>>> agnostic_filter(lf_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+>>> agnostic_filter(lf_dask)
+ foo bar ham
+0 1 6 a
+
Provide multiple filters using *args
syntax:
>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return (
+... df.filter(
+... nw.col("foo") == 1,
+... nw.col("ham") == "a",
+... )
+... .collect()
+... .to_native()
+... )
+
>>> agnostic_filter(lf_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+>>> agnostic_filter(lf_dask)
+ foo bar ham
+0 1 6 a
+
Filter on an OR condition:
+>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return (
+... df.filter((nw.col("foo") == 1) | (nw.col("ham") == "c"))
+... .collect()
+... .to_native()
+... )
+
>>> agnostic_filter(lf_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+>>> agnostic_filter(lf_dask)
+ foo bar ham
+0 1 6 a
+2 3 8 c
+
Provide multiple filters using **kwargs
syntax:
>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.filter(foo=2, ham="b").collect().to_native()
+
>>> agnostic_filter(lf_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+└─────┴─────┴─────┘
+>>> agnostic_filter(lf_dask)
+ foo bar ham
+1 2 7 b
+
gather_every(n, offset=0)
+
+Take every nth row in the DataFrame and return as a new DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Gather every n-th row. + |
+ + required + | +
+ offset
+ |
+
+ int
+ |
+
+
+
+ Starting index. + |
+
+ 0
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The LazyFrame containing only the selected rows. + |
+
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
Let's define a dataframe-agnostic function in which we gather every 2 rows, +starting from a offset of 1:
+>>> def agnostic_gather_every(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.gather_every(n=2, offset=1).collect().to_native()
+
We can then pass any supported library such as Polars or Dask to agnostic_gather_every
:
>>> agnostic_gather_every(lf_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 2 ┆ 6 │
+│ 4 ┆ 8 │
+└─────┴─────┘
+>>> agnostic_gather_every(lf_dask)
+ a b
+1 2 6
+3 4 8
+
group_by(*keys, drop_null_keys=False)
+
+Start a group by operation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *keys
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) to group by. Accepts expression input. Strings are +parsed as column names. + |
+
+ ()
+ |
+
+ drop_null_keys
+ |
+
+ bool
+ |
+
+
+
+ if True, then groups where any key is null won't be +included in the result. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ LazyGroupBy[Self]
+ |
+
+
+
+ Object which can be used to perform aggregations. + |
+
Examples:
+Group by one column and call agg
to compute the grouped sum of
+another column.
>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
Let's define a dataframe-agnostic function in which we group by one column
+and call agg
to compute the grouped sum of another column.
>>> def agnostic_group_by_agg(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return (
+... df.group_by("a")
+... .agg(nw.col("b").sum())
+... .sort("a")
+... .collect()
+... .to_native()
+... )
+
We can then pass any supported library such as Polars or Dask to agnostic_group_by_agg
:
>>> agnostic_group_by_agg(lf_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+>>> agnostic_group_by_agg(lf_dask)
+ a b
+0 a 2
+1 b 5
+2 c 3
+
Group by multiple columns by passing a list of column names.
+>>> def agnostic_group_by_agg(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return (
+... df.group_by(["a", "b"])
+... .agg(nw.max("c"))
+... .sort(["a", "b"])
+... .collect()
+... .to_native()
+... )
+
>>> agnostic_group_by_agg(lf_pl)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 5 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+>>> agnostic_group_by_agg(lf_dask)
+ a b c
+0 a 1 5
+1 b 2 4
+2 b 3 2
+3 c 3 1
+
head(n=5)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 5
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A subset of the LazyFrame of shape (n, n_columns). + |
+
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [1, 2, 3, 4, 5, 6],
+... "b": [7, 8, 9, 10, 11, 12],
+... }
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
Let's define a dataframe-agnostic function that gets the first 3 rows.
+>>> def agnostic_head(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.head(3).collect().to_native()
+
We can then pass any supported library such as Polars or Dask to agnostic_head
:
>>> agnostic_head(lf_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+>>> agnostic_head(lf_dask)
+ a b
+0 1 7
+1 2 8
+2 3 9
+
join(other, on=None, how='inner', *, left_on=None, right_on=None, suffix='_right')
+
+Add a join operation to the Logical Plan.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Self
+ |
+
+
+
+ Lazy DataFrame to join with. + |
+ + required + | +
+ on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Name(s) of the join columns in both DataFrames. If set, |
+
+ None
+ |
+
+ how
+ |
+
+ Literal['inner', 'left', 'cross', 'semi', 'anti']
+ |
+
+
+
+ Join strategy. +
|
+
+ 'inner'
+ |
+
+ left_on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Join column of the left DataFrame. + |
+
+ None
+ |
+
+ right_on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Join column of the right DataFrame. + |
+
+ None
+ |
+
+ suffix
+ |
+
+ str
+ |
+
+
+
+ Suffix to append to columns with a duplicate name. + |
+
+ '_right'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined LazyFrame. + |
+
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> data_other = {
+... "apple": ["x", "y", "z"],
+... "ham": ["a", "b", "d"],
+... }
+
>>> lf_pl = pl.LazyFrame(data)
+>>> other_pl = pl.LazyFrame(data_other)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+>>> other_dask = dd.from_dict(data_other, npartitions=2)
+
Let's define a dataframe-agnostic function in which we join over "ham" column:
+>>> def agnostic_join_on_ham(
+... df_native: IntoFrameT,
+... other_native: IntoFrameT,
+... ) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... other = nw.from_native(other_native)
+... return (
+... df.join(other, left_on="ham", right_on="ham")
+... .sort("ham")
+... .collect()
+... .to_native()
+... )
+
We can then pass any supported library such as Polars or Dask to agnostic_join_on_ham
:
>>> agnostic_join_on_ham(lf_pl, other_pl)
+shape: (2, 4)
+┌─────┬─────┬─────┬───────┐
+│ foo ┆ bar ┆ ham ┆ apple │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str ┆ str │
+╞═════╪═════╪═════╪═══════╡
+│ 1 ┆ 6.0 ┆ a ┆ x │
+│ 2 ┆ 7.0 ┆ b ┆ y │
+└─────┴─────┴─────┴───────┘
+>>> agnostic_join_on_ham(lf_dask, other_dask)
+ foo bar ham apple
+0 1 6.0 a x
+0 2 7.0 b y
+
join_asof(other, *, left_on=None, right_on=None, on=None, by_left=None, by_right=None, by=None, strategy='backward')
+
+Perform an asof join.
+This is similar to a left-join except that we match on nearest key rather than equal keys.
+Both DataFrames must be sorted by the asof_join key.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Self
+ |
+
+
+
+ DataFrame to join with. + |
+ + required + | +
+ left_on
+ |
+
+ str | None
+ |
+
+
+
+ Name(s) of the left join column(s). + |
+
+ None
+ |
+
+ right_on
+ |
+
+ str | None
+ |
+
+
+
+ Name(s) of the right join column(s). + |
+
+ None
+ |
+
+ on
+ |
+
+ str | None
+ |
+
+
+
+ Join column of both DataFrames. If set, left_on and right_on should be None. + |
+
+ None
+ |
+
+ by_left
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ by_right
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ by
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ strategy
+ |
+
+ Literal['backward', 'forward', 'nearest']
+ |
+
+
+
+ Join strategy. The default is "backward". +
|
+
+ 'backward'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined LazyFrame. + |
+
Examples:
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> from typing import Literal
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data_gdp = {
+... "datetime": [
+... datetime(2016, 1, 1),
+... datetime(2017, 1, 1),
+... datetime(2018, 1, 1),
+... datetime(2019, 1, 1),
+... datetime(2020, 1, 1),
+... ],
+... "gdp": [4164, 4411, 4566, 4696, 4827],
+... }
+>>> data_population = {
+... "datetime": [
+... datetime(2016, 3, 1),
+... datetime(2018, 8, 1),
+... datetime(2019, 1, 1),
+... ],
+... "population": [82.19, 82.66, 83.12],
+... }
+>>> gdp_pl = pl.LazyFrame(data_gdp)
+>>> population_pl = pl.LazyFrame(data_population)
+>>> gdp_dask = dd.from_dict(data_gdp, npartitions=2)
+>>> population_dask = dd.from_dict(data_population, npartitions=2)
+
Let's define a dataframe-agnostic function in which we join over "datetime" column:
+>>> def agnostic_join_asof_datetime(
+... df_native: IntoFrameT,
+... other_native: IntoFrameT,
+... strategy: Literal["backward", "forward", "nearest"],
+... ) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... other = nw.from_native(other_native)
+... return (
+... df.sort("datetime")
+... .join_asof(other, on="datetime", strategy=strategy)
+... .collect()
+... .to_native()
+... )
+
We can then pass any supported library such as Polars or Dask to agnostic_join_asof_datetime
:
>>> agnostic_join_asof_datetime(population_pl, gdp_pl, strategy="backward")
+shape: (3, 3)
+┌─────────────────────┬────────────┬──────┐
+│ datetime ┆ population ┆ gdp │
+│ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ f64 ┆ i64 │
+╞═════════════════════╪════════════╪══════╡
+│ 2016-03-01 00:00:00 ┆ 82.19 ┆ 4164 │
+│ 2018-08-01 00:00:00 ┆ 82.66 ┆ 4566 │
+│ 2019-01-01 00:00:00 ┆ 83.12 ┆ 4696 │
+└─────────────────────┴────────────┴──────┘
+>>> agnostic_join_asof_datetime(population_dask, gdp_dask, strategy="backward")
+ datetime population gdp
+0 2016-03-01 82.19 4164
+1 2018-08-01 82.66 4566
+0 2019-01-01 83.12 4696
+
Here is a real-world times-series example that uses by
argument.
>>> from datetime import datetime
+>>> import narwhals as nw
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data_quotes = {
+... "datetime": [
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 30),
+... datetime(2016, 5, 25, 13, 30, 0, 41),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 49),
+... datetime(2016, 5, 25, 13, 30, 0, 72),
+... datetime(2016, 5, 25, 13, 30, 0, 75),
+... ],
+... "ticker": [
+... "GOOG",
+... "MSFT",
+... "MSFT",
+... "MSFT",
+... "GOOG",
+... "AAPL",
+... "GOOG",
+... "MSFT",
+... ],
+... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
+... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03],
+... }
+>>> data_trades = {
+... "datetime": [
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 38),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 49),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... ],
+... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
+... "price": [51.95, 51.95, 720.77, 720.92, 98.0],
+... "quantity": [75, 155, 100, 100, 100],
+... }
+>>> quotes_pl = pl.LazyFrame(data_quotes)
+>>> trades_pl = pl.LazyFrame(data_trades)
+>>> quotes_dask = dd.from_dict(data_quotes, npartitions=2)
+>>> trades_dask = dd.from_dict(data_trades, npartitions=2)
+
Let's define a dataframe-agnostic function in which we join over "datetime" and by "ticker" columns:
+>>> def agnostic_join_asof_datetime_by_ticker(
+... df_native: IntoFrameT,
+... other_native: IntoFrameT,
+... ) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... other = nw.from_native(other_native)
+... return (
+... df.sort("datetime", "ticker")
+... .join_asof(other, on="datetime", by="ticker")
+... .sort("datetime", "ticker")
+... .collect()
+... .to_native()
+... )
+
We can then pass any supported library such as Polars or Dask to agnostic_join_asof_datetime_by_ticker
:
>>> agnostic_join_asof_datetime_by_ticker(trades_pl, quotes_pl)
+shape: (5, 6)
+┌────────────────────────────┬────────┬────────┬──────────┬───────┬────────┐
+│ datetime ┆ ticker ┆ price ┆ quantity ┆ bid ┆ ask │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ str ┆ f64 ┆ i64 ┆ f64 ┆ f64 │
+╞════════════════════════════╪════════╪════════╪══════════╪═══════╪════════╡
+│ 2016-05-25 13:30:00.000023 ┆ MSFT ┆ 51.95 ┆ 75 ┆ 51.95 ┆ 51.96 │
+│ 2016-05-25 13:30:00.000038 ┆ MSFT ┆ 51.95 ┆ 155 ┆ 51.97 ┆ 51.98 │
+│ 2016-05-25 13:30:00.000048 ┆ AAPL ┆ 98.0 ┆ 100 ┆ null ┆ null │
+│ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.77 ┆ 100 ┆ 720.5 ┆ 720.93 │
+│ 2016-05-25 13:30:00.000049 ┆ GOOG ┆ 720.92 ┆ 100 ┆ 720.5 ┆ 720.93 │
+└────────────────────────────┴────────┴────────┴──────────┴───────┴────────┘
+>>> agnostic_join_asof_datetime_by_ticker(trades_dask, quotes_dask)
+ datetime ticker price quantity bid ask
+0 2016-05-25 13:30:00.000023 MSFT 51.95 75 51.95 51.96
+0 2016-05-25 13:30:00.000038 MSFT 51.95 155 51.97 51.98
+1 2016-05-25 13:30:00.000048 AAPL 98.00 100 NaN NaN
+2 2016-05-25 13:30:00.000048 GOOG 720.77 100 720.50 720.93
+3 2016-05-25 13:30:00.000049 GOOG 720.92 100 720.50 720.93
+
lazy()
+
+Lazify the DataFrame (if possible).
+If a library does not support lazy execution, then this is a no-op.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A LazyFrame. + |
+
Examples:
+Construct pandas and Polars objects:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
We define a library agnostic function:
+>>> def agnostic_lazy(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.lazy().to_native()
+
Note that then, pandas dataframe stay eager, and the Polars LazyFrame stays lazy:
+>>> agnostic_lazy(df_pd)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> agnostic_lazy(lf_pl)
+<LazyFrame ...>
+
pipe(function, *args, **kwargs)
+
+Pipe function call.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ function
+ |
+
+ Callable[[Any], Self]
+ |
+
+
+
+ Function to apply. + |
+ + required + | +
+ args
+ |
+
+ Any
+ |
+
+
+
+ Positional arguments to pass to function. + |
+
+ ()
+ |
+
+ kwargs
+ |
+
+ Any
+ |
+
+
+
+ Keyword arguments to pass to function. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The original object with the function applied. + |
+
Examples:
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]}
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_pipe(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.pipe(lambda _df: _df.select("a")).collect().to_native()
+
We can then pass any supported library such as Polars or Dask to agnostic_pipe
:
>>> agnostic_pipe(lf_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+>>> agnostic_pipe(lf_dask)
+ a
+0 1
+1 2
+2 3
+
rename(mapping)
+
+Rename column names.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ mapping
+ |
+
+ dict[str, str]
+ |
+
+
+
+ Key value pairs that map from old name to new name, or a + function that takes the old name as input and returns the + new name. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The LazyFrame with the specified columns renamed. + |
+
Examples:
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
We define a library agnostic function:
+>>> def agnostic_rename(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.rename({"foo": "apple"}).collect().to_native()
+
We can then pass any supported library such as Polars or Dask to agnostic_rename
:
>>> agnostic_rename(lf_pl)
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ apple ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═══════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└───────┴─────┴─────┘
+>>> agnostic_rename(lf_dask)
+ apple bar ham
+0 1 6 a
+1 2 7 b
+2 3 8 c
+
select(*exprs, **named_exprs)
+
+Select columns from this LazyFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to select, specified as positional arguments. +Accepts expression input. Strings are parsed as column names. + |
+
+ ()
+ |
+
+ **named_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to select, specified as keyword arguments. +The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The LazyFrame containing only the selected columns. + |
+
If you'd like to select a column whose name isn't a string (for example,
+if you're working with pandas) then you should explicitly use nw.col
instead
+of just passing the column name. For example, to select a column named
+0
use df.select(nw.col(0))
, not df.select(0)
.
Examples:
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
Let's define a dataframe-agnostic function in which we pass the name of a +column to select that column.
+>>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select("foo").collect().to_native()
+
We can then pass any supported library such as Polars or Dask to agnostic_select
:
>>> agnostic_select(lf_pl)
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+>>> agnostic_select(lf_dask)
+ foo
+0 1
+1 2
+2 3
+
Multiple columns can be selected by passing a list of column names.
+>>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(["foo", "bar"]).collect().to_native()
+
>>> agnostic_select(lf_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 6 │
+│ 2 ┆ 7 │
+│ 3 ┆ 8 │
+└─────┴─────┘
+>>> agnostic_select(lf_dask)
+ foo bar
+0 1 6
+1 2 7
+2 3 8
+
Multiple columns can also be selected using positional arguments instead of a +list. Expressions are also accepted.
+>>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("foo"), nw.col("bar") + 1).collect().to_native()
+
>>> agnostic_select(lf_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+>>> agnostic_select(lf_dask)
+ foo bar
+0 1 7
+1 2 8
+2 3 9
+
Use keyword arguments to easily name your expression inputs.
+>>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(threshold=nw.col("foo") * 2).collect().to_native()
+
>>> agnostic_select(lf_pl)
+shape: (3, 1)
+┌───────────┐
+│ threshold │
+│ --- │
+│ i64 │
+╞═══════════╡
+│ 2 │
+│ 4 │
+│ 6 │
+└───────────┘
+>>> agnostic_select(lf_dask)
+ threshold
+0 2
+1 4
+2 6
+
sort(by, *more_by, descending=False, nulls_last=False)
+
+Sort the LazyFrame by the given columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ by
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) names to sort by. + |
+ + required + | +
+ *more_by
+ |
+
+ str
+ |
+
+
+
+ Additional columns to sort by, specified as positional arguments. + |
+
+ ()
+ |
+
+ descending
+ |
+
+ bool | Sequence[bool]
+ |
+
+
+
+ Sort in descending order. When sorting by multiple columns, can be +specified per column by passing a sequence of booleans. + |
+
+ False
+ |
+
+ nulls_last
+ |
+
+ bool
+ |
+
+
+
+ Place null values last; can specify a single boolean applying to +all columns or a sequence of booleans for per-column control. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The sorted LazyFrame. + |
+
Unlike Polars, it is not possible to specify a sequence of booleans for
+nulls_last
in order to control per-column behaviour. Instead a single
+boolean is applied for all by
columns.
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [1, 2, None],
+... "b": [6.0, 5.0, 4.0],
+... "c": ["a", "c", "b"],
+... }
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
Let's define a dataframe-agnostic function in which we sort by multiple +columns in different orders
+>>> def agnostic_sort(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.sort("c", "a", descending=[False, True]).collect().to_native()
+
We can then pass any supported library such as Polars or Dask to agnostic_sort
:
>>> agnostic_sort(lf_pl)
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ null ┆ 4.0 ┆ b │
+│ 2 ┆ 5.0 ┆ c │
+└──────┴─────┴─────┘
+>>> agnostic_sort(lf_dask)
+ a b c
+0 1.0 6.0 a
+2 NaN 4.0 b
+1 2.0 5.0 c
+
tail(n=5)
+
+Get the last n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 5
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A subset of the LazyFrame of shape (n, n_columns). + |
+
LazyFrame.tail
is not supported for the Dask backend with multiple
+partitions.
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [1, 2, 3, 4, 5, 6],
+... "b": [7, 8, 9, 10, 11, 12],
+... }
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=1)
+
Let's define a dataframe-agnostic function that gets the last 3 rows.
+>>> def agnostic_tail(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.tail(3).collect().to_native()
+
We can then pass any supported library such as Polars or Dask to agnostic_tail
:
>>> agnostic_tail(lf_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 4 ┆ 10 │
+│ 5 ┆ 11 │
+│ 6 ┆ 12 │
+└─────┴─────┘
+>>> agnostic_tail(lf_dask)
+ a b
+3 4 10
+4 5 11
+5 6 12
+
to_native()
+
+Convert Narwhals LazyFrame to native one.
+ + +Returns:
+Type | +Description | +
---|---|
+ FrameT
+ |
+
+
+
+ Object of class that user started with. + |
+
Examples:
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> import narwhals as nw
+>>>
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
Calling to_native
on a Narwhals LazyFrame returns the native object:
>>> nw.from_native(lf_pl).to_native().collect()
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ 2 ┆ 7.0 ┆ b │
+│ 3 ┆ 8.0 ┆ c │
+└─────┴─────┴─────┘
+>>> nw.from_native(lf_dask).to_native().compute()
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+
unique(subset=None, *, keep='any', maintain_order=None)
+
+Drop duplicate rows from this LazyFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ subset
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column name(s) to consider when identifying duplicate rows.
+ If set to |
+
+ None
+ |
+
+ keep
+ |
+
+ Literal['any', 'none']
+ |
+
+
+
+ {'first', 'none'} +Which of the duplicate rows to keep. +
|
+
+ 'any'
+ |
+
+ maintain_order
+ |
+
+ bool | None
+ |
+
+
+
+ Has no effect and is kept around only for backwards-compatibility. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The LazyFrame with unique rows. + |
+
Examples:
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "foo": [1, 2, 3, 1],
+... "bar": ["a", "a", "a", "a"],
+... "ham": ["b", "b", "b", "b"],
+... }
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
We define a library agnostic function:
+>>> def agnostic_unique(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.unique(["bar", "ham"]).collect().to_native()
+
We can then pass any supported library such as Polars or Dask to agnostic_unique
:
>>> agnostic_unique(lf_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ a ┆ b │
+└─────┴─────┴─────┘
+>>> agnostic_unique(lf_dask)
+ foo bar ham
+0 1 a b
+
unpivot(on=None, *, index=None, variable_name=None, value_name=None)
+
+Unpivot a DataFrame from wide to long format.
+Optionally leaves identifiers set.
+This function is useful to massage a DataFrame into a format where one or more +columns are identifier variables (index) while all other columns, considered +measured variables (on), are "unpivoted" to the row axis leaving just +two non-identifier columns, 'variable' and 'value'.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column(s) to use as values variables; if |
+
+ None
+ |
+
+ index
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column(s) to use as identifier variables. + |
+
+ None
+ |
+
+ variable_name
+ |
+
+ str | None
+ |
+
+
+
+ Name to give to the |
+
+ None
+ |
+
+ value_name
+ |
+
+ str | None
+ |
+
+
+
+ Name to give to the |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The unpivoted LazyFrame. + |
+
If you're coming from pandas, this is similar to pandas.DataFrame.melt
,
+but with index
replacing id_vars
and on
replacing value_vars
.
+In other frameworks, you might know this operation as pivot_longer
.
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": ["x", "y", "z"],
+... "b": [1, 3, 5],
+... "c": [2, 4, 6],
+... }
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
We define a library agnostic function:
+>>> def agnostic_unpivot(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return (
+... (df.unpivot(on=["b", "c"], index="a").sort(["variable", "a"]))
+... .collect()
+... .to_native()
+... )
+
We can then pass any supported library such as Polars or Dask to agnostic_unpivot
:
>>> agnostic_unpivot(lf_pl)
+shape: (6, 3)
+┌─────┬──────────┬───────┐
+│ a ┆ variable ┆ value │
+│ --- ┆ --- ┆ --- │
+│ str ┆ str ┆ i64 │
+╞═════╪══════════╪═══════╡
+│ x ┆ b ┆ 1 │
+│ y ┆ b ┆ 3 │
+│ z ┆ b ┆ 5 │
+│ x ┆ c ┆ 2 │
+│ y ┆ c ┆ 4 │
+│ z ┆ c ┆ 6 │
+└─────┴──────────┴───────┘
+>>> agnostic_unpivot(lf_dask)
+ a variable value
+0 x b 1
+1 y b 3
+0 z b 5
+2 x c 2
+3 y c 4
+1 z c 6
+
with_columns(*exprs, **named_exprs)
+
+Add columns to this LazyFrame.
+Added columns will replace existing columns with the same name.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to add, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, other + non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
+ **named_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to add, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
LazyFrame |
+ Self
+ |
+
+
+
+ A new LazyFrame with the columns added. + |
+
Creating a new LazyFrame using this method does not create a new copy of +existing data.
+Examples:
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [1, 2, 3, 4],
+... "b": [0.5, 4, 10, 13],
+... "c": [True, True, False, True],
+... }
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
Let's define a dataframe-agnostic function in which we pass an expression +to add it as a new column:
+>>> def agnostic_with_columns(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return (
+... df.with_columns((nw.col("a") * 2).alias("2a")).collect().to_native()
+... )
+
We can then pass any supported library such as Polars or Dask to agnostic_with_columns
:
>>> agnostic_with_columns(lf_pl)
+shape: (4, 4)
+┌─────┬──────┬───────┬─────┐
+│ a ┆ b ┆ c ┆ 2a │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ i64 │
+╞═════╪══════╪═══════╪═════╡
+│ 1 ┆ 0.5 ┆ true ┆ 2 │
+│ 2 ┆ 4.0 ┆ true ┆ 4 │
+│ 3 ┆ 10.0 ┆ false ┆ 6 │
+│ 4 ┆ 13.0 ┆ true ┆ 8 │
+└─────┴──────┴───────┴─────┘
+>>> agnostic_with_columns(lf_dask)
+ a b c 2a
+0 1 0.5 True 2
+1 2 4.0 True 4
+2 3 10.0 False 6
+3 4 13.0 True 8
+
with_row_index(name='index')
+
+Insert column which enumerates rows.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The name of the column as a string. The default is "index". + |
+
+ 'index'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ The original object with the column added. + |
+
Examples:
+>>> import polars as pl
+>>> import dask.dataframe as dd
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf_dask = dd.from_dict(data, npartitions=2)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_with_row_index(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_row_index().collect().to_native()
+
We can then pass any supported library such as Polars or Dask to agnostic_with_row_index
:
>>> agnostic_with_row_index(lf_pl)
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ index ┆ a ┆ b │
+│ --- ┆ --- ┆ --- │
+│ u32 ┆ i64 ┆ i64 │
+╞═══════╪═════╪═════╡
+│ 0 ┆ 1 ┆ 4 │
+│ 1 ┆ 2 ┆ 5 │
+│ 2 ┆ 3 ┆ 6 │
+└───────┴─────┴─────┘
+>>> agnostic_with_row_index(lf_dask)
+ index a b
+0 0 1 4
+1 1 2 5
+2 2 3 6
+
Here are the top-level functions available in Narwhals.
+ + +all()
+
+Instantiate an expression representing all columns.
+ + +Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_all(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.all() * 2).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_all
:
>>> agnostic_all(df_pd)
+ a b
+0 2 8
+1 4 10
+2 6 12
+
>>> agnostic_all(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 2 ┆ 8 │
+│ 4 ┆ 10 │
+│ 6 ┆ 12 │
+└─────┴─────┘
+
>>> agnostic_all(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[2,4,6]]
+b: [[8,10,12]]
+
all_horizontal(*exprs)
+
+Compute the bitwise AND horizontally across columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts +expression input. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [False, False, True, True, False, None],
+... "b": [False, True, True, None, None, None],
+... }
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow")
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_all_horizontal(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select("a", "b", all=nw.all_horizontal("a", "b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_all_horizontal
:
>>> agnostic_all_horizontal(df_pd)
+ a b all
+0 False False False
+1 False True False
+2 True True True
+3 True <NA> <NA>
+4 False <NA> False
+5 <NA> <NA> <NA>
+
>>> agnostic_all_horizontal(df_pl)
+shape: (6, 3)
+┌───────┬───────┬───────┐
+│ a ┆ b ┆ all │
+│ --- ┆ --- ┆ --- │
+│ bool ┆ bool ┆ bool │
+╞═══════╪═══════╪═══════╡
+│ false ┆ false ┆ false │
+│ false ┆ true ┆ false │
+│ true ┆ true ┆ true │
+│ true ┆ null ┆ null │
+│ false ┆ null ┆ false │
+│ null ┆ null ┆ null │
+└───────┴───────┴───────┘
+
>>> agnostic_all_horizontal(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+all: bool
+----
+a: [[false,false,true,true,false,null]]
+b: [[false,true,true,null,null,null]]
+all: [[false,false,true,null,false,null]]
+
any_horizontal(*exprs)
+
+Compute the bitwise OR horizontally across columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts +expression input. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [False, False, True, True, False, None],
+... "b": [False, True, True, None, None, None],
+... }
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow")
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_any_horizontal(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select("a", "b", any=nw.any_horizontal("a", "b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_any_horizontal
:
>>> agnostic_any_horizontal(df_pd)
+ a b any
+0 False False False
+1 False True True
+2 True True True
+3 True <NA> True
+4 False <NA> <NA>
+5 <NA> <NA> <NA>
+
>>> agnostic_any_horizontal(df_pl)
+shape: (6, 3)
+┌───────┬───────┬───────┐
+│ a ┆ b ┆ any │
+│ --- ┆ --- ┆ --- │
+│ bool ┆ bool ┆ bool │
+╞═══════╪═══════╪═══════╡
+│ false ┆ false ┆ false │
+│ false ┆ true ┆ true │
+│ true ┆ true ┆ true │
+│ true ┆ null ┆ true │
+│ false ┆ null ┆ null │
+│ null ┆ null ┆ null │
+└───────┴───────┴───────┘
+
>>> agnostic_any_horizontal(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+any: bool
+----
+a: [[false,false,true,true,false,null]]
+b: [[false,true,true,null,null,null]]
+any: [[false,true,true,true,null,null]]
+
col(*names)
+
+Creates an expression that references one or more columns by their name(s).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ names
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Name(s) of the columns to use. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_col(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.col("a") * nw.col("b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_col
:
>>> agnostic_col(df_pd)
+ a
+0 3
+1 8
+
>>> agnostic_col(df_pl)
+shape: (2, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 3 │
+│ 8 │
+└─────┘
+
>>> agnostic_col(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[3,8]]
+
concat(items, *, how='vertical')
+
+Concatenate multiple DataFrames, LazyFrames into a single entity.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ items
+ |
+
+ Iterable[FrameT]
+ |
+
+
+
+ DataFrames, LazyFrames to concatenate. + |
+ + required + | +
+ how
+ |
+
+ Literal['horizontal', 'vertical', 'diagonal']
+ |
+
+
+
+ concatenating strategy: +
|
+
+ 'vertical'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ FrameT
+ |
+
+
+
+ A new DataFrame, Lazyframe resulting from the concatenation. + |
+
Raises:
+Type | +Description | +
---|---|
+ TypeError
+ |
+
+
+
+ The items to concatenate should either all be eager, or all lazy + |
+
Examples:
+Let's take an example of vertical concatenation:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> data_2 = {"a": [5, 2], "b": [1, 4]}
+
>>> df_pd_1 = pd.DataFrame(data_1)
+>>> df_pd_2 = pd.DataFrame(data_2)
+>>> df_pl_1 = pl.DataFrame(data_1)
+>>> df_pl_2 = pl.DataFrame(data_2)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def agnostic_vertical_concat(df1, df2):
+... return nw.concat([df1, df2], how="vertical")
+
>>> agnostic_vertical_concat(df_pd_1, df_pd_2)
+ a b
+0 1 4
+1 2 5
+2 3 6
+0 5 1
+1 2 4
+>>> agnostic_vertical_concat(df_pl_1, df_pl_2)
+shape: (5, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 4 │
+│ 2 ┆ 5 │
+│ 3 ┆ 6 │
+│ 5 ┆ 1 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
Let's look at case a for horizontal concatenation:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> data_2 = {"c": [5, 2], "d": [1, 4]}
+
>>> df_pd_1 = pd.DataFrame(data_1)
+>>> df_pd_2 = pd.DataFrame(data_2)
+>>> df_pl_1 = pl.DataFrame(data_1)
+>>> df_pl_2 = pl.DataFrame(data_2)
+
Defining a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def agnostic_horizontal_concat(df1, df2):
+... return nw.concat([df1, df2], how="horizontal")
+
>>> agnostic_horizontal_concat(df_pd_1, df_pd_2)
+ a b c d
+0 1 4 5.0 1.0
+1 2 5 2.0 4.0
+2 3 6 NaN NaN
+
>>> agnostic_horizontal_concat(df_pl_1, df_pl_2)
+shape: (3, 4)
+┌─────┬─────┬──────┬──────┐
+│ a ┆ b ┆ c ┆ d │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 ┆ i64 │
+╞═════╪═════╪══════╪══════╡
+│ 1 ┆ 4 ┆ 5 ┆ 1 │
+│ 2 ┆ 5 ┆ 2 ┆ 4 │
+│ 3 ┆ 6 ┆ null ┆ null │
+└─────┴─────┴──────┴──────┘
+
Let's look at case a for diagonal concatenation:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data_1 = {"a": [1, 2], "b": [3.5, 4.5]}
+>>> data_2 = {"a": [3, 4], "z": ["x", "y"]}
+
>>> df_pd_1 = pd.DataFrame(data_1)
+>>> df_pd_2 = pd.DataFrame(data_2)
+>>> df_pl_1 = pl.DataFrame(data_1)
+>>> df_pl_2 = pl.DataFrame(data_2)
+
Defining a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def agnostic_diagonal_concat(df1, df2):
+... return nw.concat([df1, df2], how="diagonal")
+
>>> agnostic_diagonal_concat(df_pd_1, df_pd_2)
+ a b z
+0 1 3.5 NaN
+1 2 4.5 NaN
+0 3 NaN x
+1 4 NaN y
+
>>> agnostic_diagonal_concat(df_pl_1, df_pl_2)
+shape: (4, 3)
+┌─────┬──────┬──────┐
+│ a ┆ b ┆ z │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞═════╪══════╪══════╡
+│ 1 ┆ 3.5 ┆ null │
+│ 2 ┆ 4.5 ┆ null │
+│ 3 ┆ null ┆ x │
+│ 4 ┆ null ┆ y │
+└─────┴──────┴──────┘
+
concat_str(exprs, *more_exprs, separator='', ignore_nulls=False)
+
+Horizontally concatenate columns into a single string column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Columns to concatenate into a single string column. Accepts expression
+input. Strings are parsed as column names, other non-expression inputs are
+parsed as literals. Non- |
+ + required + | +
+ *more_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to concatenate into a single string column, +specified as positional arguments. + |
+
+ ()
+ |
+
+ separator
+ |
+
+ str
+ |
+
+
+
+ String that will be used to separate the values of each column. + |
+
+ ''
+ |
+
+ ignore_nulls
+ |
+
+ bool
+ |
+
+
+
+ Ignore null values (default is |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [1, 2, 3],
+... "b": ["dogs", "cats", None],
+... "c": ["play", "swim", "walk"],
+... }
+
We define a dataframe-agnostic function that computes the horizontal string +concatenation of different columns
+>>> def agnostic_concat_str(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(
+... nw.concat_str(
+... [
+... nw.col("a") * 2,
+... nw.col("b"),
+... nw.col("c"),
+... ],
+... separator=" ",
+... ).alias("full_sentence")
+... ).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow
+to agnostic_concat_str
:
>>> agnostic_concat_str(pd.DataFrame(data))
+ full_sentence
+0 2 dogs play
+1 4 cats swim
+2 None
+
>>> agnostic_concat_str(pl.DataFrame(data))
+shape: (3, 1)
+┌───────────────┐
+│ full_sentence │
+│ --- │
+│ str │
+╞═══════════════╡
+│ 2 dogs play │
+│ 4 cats swim │
+│ null │
+└───────────────┘
+
>>> agnostic_concat_str(pa.table(data))
+pyarrow.Table
+full_sentence: string
+----
+full_sentence: [["2 dogs play","4 cats swim",null]]
+
from_arrow(native_frame, *, native_namespace)
+
+Construct a DataFrame from an object which supports the PyCapsule Interface.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ native_frame
+ |
+
+ ArrowStreamExportable
+ |
+
+
+
+ Object which implements |
+ + required + | +
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ A new DataFrame. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+
Let's define a dataframe-agnostic function which creates a PyArrow +Table.
+>>> def agnostic_to_arrow(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return nw.from_arrow(df, native_namespace=pa).to_native()
+
Let's see what happens when passing pandas / Polars input:
+>>> agnostic_to_arrow(pd.DataFrame(data))
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2,3]]
+b: [[4,5,6]]
+>>> agnostic_to_arrow(pl.DataFrame(data))
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2,3]]
+b: [[4,5,6]]
+
from_dict(data, schema=None, *, native_namespace=None)
+
+Instantiate DataFrame from dictionary.
+Indexes (if present, for pandas-like backends) are aligned following +the left-hand-rule.
+ + +For pandas-like dataframes, conversion to schema is applied after dataframe +creation.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ data
+ |
+
+ dict[str, Any]
+ |
+
+
+
+ Dictionary to create DataFrame from. + |
+ + required + | +
+ schema
+ |
+
+ dict[str, DType] | Schema | None
+ |
+
+
+
+ The DataFrame schema as Schema or dict of {name: type}. + |
+
+ None
+ |
+
+ native_namespace
+ |
+
+ ModuleType | None
+ |
+
+
+
+ The native library to use for DataFrame creation. Only +necessary if inputs are not Narwhals Series. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ A new DataFrame. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+
Let's create a new dataframe of the same class as the dataframe we started with, from a dict of new data:
+>>> def agnostic_from_dict(df_native: IntoFrameT) -> IntoFrameT:
+... new_data = {"c": [5, 2], "d": [1, 4]}
+... native_namespace = nw.get_native_namespace(df_native)
+... return nw.from_dict(new_data, native_namespace=native_namespace).to_native()
+
Let's see what happens when passing pandas, Polars or PyArrow input:
+>>> agnostic_from_dict(pd.DataFrame(data))
+ c d
+0 5 1
+1 2 4
+>>> agnostic_from_dict(pl.DataFrame(data))
+shape: (2, 2)
+┌─────┬─────┐
+│ c ┆ d │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 5 ┆ 1 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+>>> agnostic_from_dict(pa.table(data))
+pyarrow.Table
+c: int64
+d: int64
+----
+c: [[5,2]]
+d: [[1,4]]
+
from_native(native_object, *, strict=None, pass_through=None, eager_only=False, eager_or_interchange_only=False, series_only=False, allow_series=None)
+
+Convert native_object
to Narwhals Dataframe, Lazyframe, or Series.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ native_object
+ |
+
+ IntoFrameT | IntoSeries | T
+ |
+
+
+
+ Raw object from user. +Depending on the other arguments, input object can be: +
|
+ + required + | +
+ strict
+ |
+
+ bool | None
+ |
+
+
+
+ Determine what happens if the object can't be converted to Narwhals: +
Deprecated (v1.13.0):
+ Please use |
+
+ None
+ |
+
+ pass_through
+ |
+
+ bool | None
+ |
+
+
+
+ Determine what happens if the object can't be converted to Narwhals: +
|
+
+ None
+ |
+
+ eager_only
+ |
+
+ bool
+ |
+
+
+
+ Whether to only allow eager objects: +
|
+
+ False
+ |
+
+ eager_or_interchange_only
+ |
+
+ bool
+ |
+
+
+
+ Whether to only allow eager objects or objects which +have interchange-level support in Narwhals: +
See interchange-only support +for more details. + |
+
+ False
+ |
+
+ series_only
+ |
+
+ bool
+ |
+
+
+
+ Whether to only allow Series: +
|
+
+ False
+ |
+
+ allow_series
+ |
+
+ bool | None
+ |
+
+
+
+ Whether to allow Series (default is only Dataframe / Lazyframe): +
|
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ LazyFrame[IntoFrameT] | DataFrame[IntoFrameT] | Series[IntoSeriesT] | T
+ |
+
+
+
+ DataFrame, LazyFrame, Series, or original object, depending +on which combination of parameters was passed. + |
+
from_numpy(data, schema=None, *, native_namespace)
+
+Construct a DataFrame from a NumPy ndarray.
+ + +Only row orientation is currently supported.
+For pandas-like dataframes, conversion to schema is applied after dataframe +creation.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ data
+ |
+
+ ndarray
+ |
+
+
+
+ Two-dimensional data represented as a NumPy ndarray. + |
+ + required + | +
+ schema
+ |
+
+ dict[str, DType] | Schema | list[str] | None
+ |
+
+
+
+ The DataFrame schema as Schema, dict of {name: type}, or a list of str. + |
+
+ None
+ |
+
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ A new DataFrame. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> import numpy as np
+>>> from narwhals.typing import IntoFrameT
+>>> data = {"a": [1, 2], "b": [3, 4]}
+
Let's create a new dataframe of the same class as the dataframe we started with, from a NumPy ndarray of new data:
+>>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT:
+... new_data = np.array([[5, 2, 1], [1, 4, 3]])
+... df = nw.from_native(df_native)
+... native_namespace = nw.get_native_namespace(df)
+... return nw.from_numpy(new_data, native_namespace=native_namespace).to_native()
+
Let's see what happens when passing pandas, Polars or PyArrow input:
+>>> agnostic_from_numpy(pd.DataFrame(data))
+ column_0 column_1 column_2
+0 5 2 1
+1 1 4 3
+>>> agnostic_from_numpy(pl.DataFrame(data))
+shape: (2, 3)
+┌──────────┬──────────┬──────────┐
+│ column_0 ┆ column_1 ┆ column_2 │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞══════════╪══════════╪══════════╡
+│ 5 ┆ 2 ┆ 1 │
+│ 1 ┆ 4 ┆ 3 │
+└──────────┴──────────┴──────────┘
+>>> agnostic_from_numpy(pa.table(data))
+pyarrow.Table
+column_0: int64
+column_1: int64
+column_2: int64
+----
+column_0: [[5,1]]
+column_1: [[2,4]]
+column_2: [[1,3]]
+
Let's specify the column names:
+>>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT:
+... new_data = np.array([[5, 2, 1], [1, 4, 3]])
+... schema = ["c", "d", "e"]
+... df = nw.from_native(df_native)
+... native_namespace = nw.get_native_namespace(df)
+... return nw.from_numpy(
+... new_data, native_namespace=native_namespace, schema=schema
+... ).to_native()
+
Let's see the modified outputs:
+>>> agnostic_from_numpy(pd.DataFrame(data))
+ c d e
+0 5 2 1
+1 1 4 3
+>>> agnostic_from_numpy(pl.DataFrame(data))
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ c ┆ d ┆ e │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ 5 ┆ 2 ┆ 1 │
+│ 1 ┆ 4 ┆ 3 │
+└─────┴─────┴─────┘
+>>> agnostic_from_numpy(pa.table(data))
+pyarrow.Table
+c: int64
+d: int64
+e: int64
+----
+c: [[5,1]]
+d: [[2,4]]
+e: [[1,3]]
+
Let's modify the function so that it specifies the schema:
+>>> def agnostic_from_numpy(df_native: IntoFrameT) -> IntoFrameT:
+... new_data = np.array([[5, 2, 1], [1, 4, 3]])
+... schema = {"c": nw.Int16(), "d": nw.Float32(), "e": nw.Int8()}
+... df = nw.from_native(df_native)
+... native_namespace = nw.get_native_namespace(df)
+... return nw.from_numpy(
+... new_data, native_namespace=native_namespace, schema=schema
+... ).to_native()
+
Let's see the outputs:
+>>> agnostic_from_numpy(pd.DataFrame(data))
+ c d e
+0 5 2.0 1
+1 1 4.0 3
+>>> agnostic_from_numpy(pl.DataFrame(data))
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ c ┆ d ┆ e │
+│ --- ┆ --- ┆ --- │
+│ i16 ┆ f32 ┆ i8 │
+╞═════╪═════╪═════╡
+│ 5 ┆ 2.0 ┆ 1 │
+│ 1 ┆ 4.0 ┆ 3 │
+└─────┴─────┴─────┘
+>>> agnostic_from_numpy(pa.table(data))
+pyarrow.Table
+c: int16
+d: float
+e: int8
+----
+c: [[5,1]]
+d: [[2,4]]
+e: [[1,3]]
+
generate_temporary_column_name(n_bytes, columns)
+
+Generates a unique column name that is not present in the given list of columns.
+It relies on python secrets token_hex +function to return a string nbytes random bytes.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n_bytes
+ |
+
+ int
+ |
+
+
+
+ The number of bytes to generate for the token. + |
+ + required + | +
+ columns
+ |
+
+ list[str]
+ |
+
+
+
+ The list of columns to check for uniqueness. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ str
+ |
+
+
+
+ A unique token that is not present in the given list of columns. + |
+
Raises:
+Type | +Description | +
---|---|
+ AssertionError
+ |
+
+
+
+ If a unique token cannot be generated after 100 attempts. + |
+
Examples:
+>>> import narwhals as nw
+>>> columns = ["abc", "xyz"]
+>>> nw.generate_temporary_column_name(n_bytes=8, columns=columns) not in columns
+True
+
get_level(obj)
+
+Level of support Narwhals has for current object.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ obj
+ |
+
+ DataFrame[Any] | LazyFrame[Any] | Series[IntoSeriesT]
+ |
+
+
+
+ Dataframe or Series. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Literal['full', 'lazy', 'interchange']
+ |
+
+
+
+ This can be one of: +
|
+
get_native_namespace(obj)
+
+Get native namespace from object.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ obj
+ |
+
+ DataFrame[Any] | LazyFrame[Any] | Series[Any] | DataFrame | Series | DataFrame | LazyFrame | Series | Table | ChunkedArray
+ |
+
+
+
+ Dataframe, Lazyframe, or Series. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ Native module. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
+>>> nw.get_native_namespace(df)
+<module 'pandas'...>
+>>> df = nw.from_native(pl.DataFrame({"a": [1, 2, 3]}))
+>>> nw.get_native_namespace(df)
+<module 'polars'...>
+
is_ordered_categorical(series)
+
+Return whether indices of categories are semantically meaningful.
+This is a convenience function to accessing what would otherwise be
+the is_ordered
property from the DataFrame Interchange Protocol,
+see https://data-apis.org/dataframe-protocol/latest/API.html.
dtype.ordering == "physical"
.dtype.cat.ordered == True
.dtype.type.ordered == True
.Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ series
+ |
+
+ Series[Any]
+ |
+
+
+
+ Input Series. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ bool
+ |
+
+
+
+ Whether the Series is an ordered categorical. + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = ["x", "y"]
+>>> s_pd = pd.Series(data, dtype=pd.CategoricalDtype(ordered=True))
+>>> s_pl = pl.Series(data, dtype=pl.Categorical(ordering="physical"))
+
Let's define a library-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return nw.is_ordered_categorical(s)
+
Then, we can pass any supported library to func
:
>>> func(s_pd)
+True
+>>> func(s_pl)
+True
+
len()
+
+Return the number of rows.
+ + +Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2], "b": [5, 10]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_len(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.len()).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_len
:
>>> agnostic_len(df_pd)
+ len
+0 2
+>>> agnostic_len(df_pl)
+shape: (1, 1)
+┌─────┐
+│ len │
+│ --- │
+│ u32 │
+╞═════╡
+│ 2 │
+└─────┘
+>>> agnostic_len(df_pa)
+pyarrow.Table
+len: int64
+----
+len: [[2]]
+
lit(value, dtype=None)
+
+Return an expression representing a literal value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ value
+ |
+
+ Any
+ |
+
+
+
+ The value to use as literal. + |
+ + required + | +
+ dtype
+ |
+
+ DType | type[DType] | None
+ |
+
+
+
+ The data type of the literal value. If not provided, the data type will +be inferred. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2]}
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_lit(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(nw.lit(3)).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_lit
:
>>> agnostic_lit(df_pd)
+ a literal
+0 1 3
+1 2 3
+
>>> agnostic_lit(df_pl)
+shape: (2, 2)
+┌─────┬─────────┐
+│ a ┆ literal │
+│ --- ┆ --- │
+│ i64 ┆ i32 │
+╞═════╪═════════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 3 │
+└─────┴─────────┘
+
>>> agnostic_lit(df_pa)
+pyarrow.Table
+a: int64
+literal: int64
+----
+a: [[1,2]]
+literal: [[3,3]]
+
max(*columns)
+
+Return the maximum value.
+ + +Syntactic sugar for nw.col(columns).max()
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2], "b": [5, 10]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_max(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.max("a")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_max
:
>>> agnostic_max(df_pd)
+ a
+0 2
+
>>> agnostic_max(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+└─────┘
+
>>> agnostic_max(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[2]]
+
max_horizontal(*exprs)
+
+Get the maximum value horizontally across columns.
+ + +We support max_horizontal
over numeric columns only.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts +expression input. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [1, 8, 3],
+... "b": [4, 5, None],
+... "c": ["x", "y", "z"],
+... }
+
We define a dataframe-agnostic function that computes the horizontal max of "a" +and "b" columns:
+>>> def agnostic_max_horizontal(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.max_horizontal("a", "b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_max_horizontal
:
>>> agnostic_max_horizontal(pd.DataFrame(data))
+ a
+0 4.0
+1 8.0
+2 3.0
+
>>> agnostic_max_horizontal(pl.DataFrame(data))
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 4 │
+│ 8 │
+│ 3 │
+└─────┘
+
>>> agnostic_max_horizontal(pa.table(data))
+pyarrow.Table
+a: int64
+----
+a: [[4,8,3]]
+
maybe_align_index(lhs, rhs)
+
+Align lhs
to the Index of rhs
, if they're both pandas-like.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lhs
+ |
+
+ FrameOrSeriesT
+ |
+
+
+
+ Dataframe or Series. + |
+ + required + | +
+ rhs
+ |
+
+ Series[Any] | DataFrame[Any] | LazyFrame[Any]
+ |
+
+
+
+ Dataframe or Series to align with. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ FrameOrSeriesT
+ |
+
+
+
+ Same type as input. + |
+
This is only really intended for backwards-compatibility purposes,
+for example if your library already aligns indices for users.
+If you're designing a new library, we highly encourage you to not
+rely on the Index.
+For non-pandas-like inputs, this only checks that lhs
and rhs
+are the same length.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2]}, index=[3, 4])
+>>> s_pd = pd.Series([6, 7], index=[4, 3])
+>>> df = nw.from_native(df_pd)
+>>> s = nw.from_native(s_pd, series_only=True)
+>>> nw.to_native(nw.maybe_align_index(df, s))
+ a
+4 2
+3 1
+
maybe_convert_dtypes(obj, *args, **kwargs)
+
+Convert columns or series to the best possible dtypes using dtypes supporting pd.NA
, if df is pandas-like.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ obj
+ |
+
+ FrameOrSeriesT
+ |
+
+
+
+ DataFrame or Series. + |
+ + required + | +
+ *args
+ |
+
+ bool
+ |
+
+
+
+ Additional arguments which gets passed through. + |
+
+ ()
+ |
+
+ **kwargs
+ |
+
+ bool | str
+ |
+
+
+
+ Additional arguments which gets passed through. + |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ FrameOrSeriesT
+ |
+
+
+
+ Same type as input. + |
+
For non-pandas-like inputs, this is a no-op.
+Also, args
and kwargs
just get passed down to the underlying library as-is.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> import numpy as np
+>>> df_pd = pd.DataFrame(
+... {
+... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
+... "b": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
+... }
+... )
+>>> df = nw.from_native(df_pd)
+>>> nw.to_native(nw.maybe_convert_dtypes(df)).dtypes
+a Int32
+b boolean
+dtype: object
+
maybe_get_index(obj)
+
+Get the index of a DataFrame or a Series, if it's pandas-like.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ obj
+ |
+
+ DataFrame[Any] | LazyFrame[Any] | Series[Any]
+ |
+
+
+
+ Dataframe or Series. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Any | None
+ |
+
+
+
+ Same type as input. + |
+
This is only really intended for backwards-compatibility purposes,
+for example if your library already aligns indices for users.
+If you're designing a new library, we highly encourage you to not
+rely on the Index.
+For non-pandas-like inputs, this returns None
.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
+>>> df = nw.from_native(df_pd)
+>>> nw.maybe_get_index(df)
+RangeIndex(start=0, stop=2, step=1)
+>>> series_pd = pd.Series([1, 2])
+>>> series = nw.from_native(series_pd, series_only=True)
+>>> nw.maybe_get_index(series)
+RangeIndex(start=0, stop=2, step=1)
+
maybe_reset_index(obj)
+
+Reset the index to the default integer index of a DataFrame or a Series, if it's pandas-like.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ obj
+ |
+
+ FrameOrSeriesT
+ |
+
+
+
+ Dataframe or Series. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ FrameOrSeriesT
+ |
+
+
+
+ Same type as input. + |
+
This is only really intended for backwards-compatibility purposes, +for example if your library already resets the index for users. +If you're designing a new library, we highly encourage you to not +rely on the Index. +For non-pandas-like inputs, this is a no-op.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}, index=([6, 7]))
+>>> df = nw.from_native(df_pd)
+>>> nw.to_native(nw.maybe_reset_index(df))
+ a b
+0 1 4
+1 2 5
+>>> series_pd = pd.Series([1, 2])
+>>> series = nw.from_native(series_pd, series_only=True)
+>>> nw.maybe_get_index(series)
+RangeIndex(start=0, stop=2, step=1)
+
maybe_set_index(obj, column_names=None, *, index=None)
+
+Set the index of a DataFrame or a Series, if it's pandas-like.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ obj
+ |
+
+ FrameOrSeriesT
+ |
+
+
+
+ object for which maybe set the index (can be either a Narwhals |
+ + required + | +
+ column_names
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ name or list of names of the columns to set as index.
+For dataframes, only one of |
+
+ None
+ |
+
+ index
+ |
+
+ Series[IntoSeriesT] | list[Series[IntoSeriesT]] | None
+ |
+
+
+
+ series or list of series to set as index. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ FrameOrSeriesT
+ |
+
+
+
+ Same type as input. + |
+
Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If one of the following condition happens: +
|
+
This is only really intended for backwards-compatibility purposes, for example if +your library already aligns indices for users. +If you're designing a new library, we highly encourage you to not +rely on the Index.
+For non-pandas-like inputs, this is a no-op.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
+>>> df = nw.from_native(df_pd)
+>>> nw.to_native(nw.maybe_set_index(df, "b"))
+ a
+b
+4 1
+5 2
+
mean(*columns)
+
+Get the mean value.
+ + +Syntactic sugar for nw.col(columns).mean()
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 8, 3]}
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe agnostic function:
+>>> def agnostic_mean(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.mean("a")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_mean
:
>>> agnostic_mean(df_pd)
+ a
+0 4.0
+
>>> agnostic_mean(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 4.0 │
+└─────┘
+
>>> agnostic_mean(df_pa)
+pyarrow.Table
+a: double
+----
+a: [[4]]
+
mean_horizontal(*exprs)
+
+Compute the mean of all values horizontally across columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts +expression input. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [1, 8, 3],
+... "b": [4, 5, None],
+... "c": ["x", "y", "z"],
+... }
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function that computes the horizontal mean of "a" +and "b" columns:
+>>> def agnostic_mean_horizontal(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.mean_horizontal("a", "b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_mean_horizontal
:
>>> agnostic_mean_horizontal(df_pd)
+ a
+0 2.5
+1 6.5
+2 3.0
+
>>> agnostic_mean_horizontal(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 2.5 │
+│ 6.5 │
+│ 3.0 │
+└─────┘
+
>>> agnostic_mean_horizontal(df_pa)
+pyarrow.Table
+a: double
+----
+a: [[2.5,6.5,3]]
+
median(*columns)
+
+Get the median value.
+ + +nw.col(columns).median()
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [4, 5, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe agnostic function:
+>>> def agnostic_median(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.median("a")).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_median
:
>>> agnostic_median(df_pd)
+ a
+0 4.0
+
>>> agnostic_median(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 4.0 │
+└─────┘
+
>>> agnostic_median(df_pa)
+pyarrow.Table
+a: double
+----
+a: [[4]]
+
min(*columns)
+
+Return the minimum value.
+ + +Syntactic sugar for nw.col(columns).min()
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2], "b": [5, 10]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_min(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.min("b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_min
:
>>> agnostic_min(df_pd)
+ b
+0 5
+
>>> agnostic_min(df_pl)
+shape: (1, 1)
+┌─────┐
+│ b │
+│ --- │
+│ i64 │
+╞═════╡
+│ 5 │
+└─────┘
+
>>> agnostic_min(df_pa)
+pyarrow.Table
+b: int64
+----
+b: [[5]]
+
min_horizontal(*exprs)
+
+Get the minimum value horizontally across columns.
+ + +We support min_horizontal
over numeric columns only.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts +expression input. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {
+... "a": [1, 8, 3],
+... "b": [4, 5, None],
+... "c": ["x", "y", "z"],
+... }
+
We define a dataframe-agnostic function that computes the horizontal min of "a" +and "b" columns:
+>>> def agnostic_min_horizontal(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.min_horizontal("a", "b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_min_horizontal
:
>>> agnostic_min_horizontal(pd.DataFrame(data))
+ a
+0 1.0
+1 5.0
+2 3.0
+
>>> agnostic_min_horizontal(pl.DataFrame(data))
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 5 │
+│ 3 │
+└─────┘
+
>>> agnostic_min_horizontal(pa.table(data))
+pyarrow.Table
+a: int64
+----
+a: [[1,5,3]]
+
narwhalify(func=None, *, strict=None, pass_through=None, eager_only=False, eager_or_interchange_only=False, series_only=False, allow_series=True)
+
+Decorate function so it becomes dataframe-agnostic.
+This will try to convert any dataframe/series-like object into the Narwhals
+respective DataFrame/Series, while leaving the other parameters as they are.
+Similarly, if the output of the function is a Narwhals DataFrame or Series, it will be
+converted back to the original dataframe/series type, while if the output is another
+type it will be left as is.
+By setting pass_through=False
, then every input and every output will be required to be a
+dataframe/series-like object.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ func
+ |
+
+ Callable[..., Any] | None
+ |
+
+
+
+ Function to wrap in a |
+
+ None
+ |
+
+ strict
+ |
+
+ bool | None
+ |
+
+
+
+ Deprecated (v1.13.0):
+Please use Determine what happens if the object can't be converted to Narwhals: +
|
+
+ None
+ |
+
+ pass_through
+ |
+
+ bool | None
+ |
+
+
+
+ Determine what happens if the object can't be converted to Narwhals: +
|
+
+ None
+ |
+
+ eager_only
+ |
+
+ bool
+ |
+
+
+
+ Whether to only allow eager objects: +
|
+
+ False
+ |
+
+ eager_or_interchange_only
+ |
+
+ bool
+ |
+
+
+
+ Whether to only allow eager objects or objects which +have interchange-level support in Narwhals: +
See interchange-only support +for more details. + |
+
+ False
+ |
+
+ series_only
+ |
+
+ bool
+ |
+
+
+
+ Whether to only allow Series: +
|
+
+ False
+ |
+
+ allow_series
+ |
+
+ bool | None
+ |
+
+
+
+ Whether to allow Series (default is only Dataframe / Lazyframe): +
|
+
+ True
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Callable[..., Any]
+ |
+
+
+
+ Decorated function. + |
+
Examples:
+Instead of writing
+>>> import narwhals as nw
+>>> def agnostic_group_by_sum(df):
+... df = nw.from_native(df, pass_through=True)
+... df = df.group_by("a").agg(nw.col("b").sum())
+... return nw.to_native(df)
+
you can just write
+>>> @nw.narwhalify
+... def agnostic_group_by_sum(df):
+... return df.group_by("a").agg(nw.col("b").sum())
+
new_series(name, values, dtype=None, *, native_namespace)
+
+Instantiate Narwhals Series from iterable (e.g. list or array).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ Name of resulting Series. + |
+ + required + | +
+ values
+ |
+
+ Any
+ |
+
+
+
+ Values of make Series from. + |
+ + required + | +
+ dtype
+ |
+
+ DType | type[DType] | None
+ |
+
+
+
+ (Narwhals) dtype. If not provided, the native library
+may auto-infer it from |
+
+ None
+ |
+
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Series[Any]
+ |
+
+
+
+ A new Series + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT, IntoSeriesT
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_new_series(df_native: IntoFrameT) -> IntoSeriesT:
+... values = [4, 1, 2, 3]
+... native_namespace = nw.get_native_namespace(df_native)
+... return nw.new_series(
+... name="a",
+... values=values,
+... dtype=nw.Int32,
+... native_namespace=native_namespace,
+... ).to_native()
+
We can then pass any supported eager library, such as pandas / Polars / PyArrow:
+>>> agnostic_new_series(pd.DataFrame(data))
+0 4
+1 1
+2 2
+3 3
+Name: a, dtype: int32
+>>> agnostic_new_series(pl.DataFrame(data))
+shape: (4,)
+Series: 'a' [i32]
+[
+ 4
+ 1
+ 2
+ 3
+]
+>>> agnostic_new_series(pa.table(data))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 4,
+ 1,
+ 2,
+ 3
+ ]
+]
+
nth(*indices)
+
+Creates an expression that references one or more columns by their index(es).
+ + +nth
is not supported for Polars version<1.0.0. Please use
+narwhals.col
instead.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ indices
+ |
+
+ int | Sequence[int]
+ |
+
+
+
+ One or more indices representing the columns to retrieve. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_nth(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.nth(0) * 2).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to agnostic_nth
:
>>> agnostic_nth(df_pd)
+ a
+0 2
+1 4
+
>>> agnostic_nth(df_pl)
+shape: (2, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+│ 4 │
+└─────┘
+
>>> agnostic_nth(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[2,4]]
+
read_csv(source, *, native_namespace, **kwargs)
+
+Read a CSV file into a DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ source
+ |
+
+ str
+ |
+
+
+
+ Path to a file. + |
+ + required + | +
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
+ kwargs
+ |
+
+ Any
+ |
+
+
+
+ Extra keyword arguments which are passed to the native CSV reader.
+For example, you could use
+ |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ DataFrame. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> from types import ModuleType
+
Let's create an agnostic function that reads a csv file with a specified native namespace:
+>>> def agnostic_read_csv(native_namespace: ModuleType) -> IntoDataFrame:
+... return nw.read_csv("file.csv", native_namespace=native_namespace).to_native()
+
Then we can read the file by passing pandas, Polars or PyArrow namespaces:
+>>> agnostic_read_csv(native_namespace=pd)
+ a b
+0 1 4
+1 2 5
+2 3 6
+>>> agnostic_read_csv(native_namespace=pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 4 │
+│ 2 ┆ 5 │
+│ 3 ┆ 6 │
+└─────┴─────┘
+>>> agnostic_read_csv(native_namespace=pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2,3]]
+b: [[4,5,6]]
+
read_parquet(source, *, native_namespace, **kwargs)
+
+Read into a DataFrame from a parquet file.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ source
+ |
+
+ str
+ |
+
+
+
+ Path to a file. + |
+ + required + | +
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
+ kwargs
+ |
+
+ Any
+ |
+
+
+
+ Extra keyword arguments which are passed to the native parquet reader.
+For example, you could use
+ |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ DataFrame. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> from types import ModuleType
+
Let's create an agnostic function that reads a parquet file with a specified native namespace:
+>>> def agnostic_read_parquet(native_namespace: ModuleType) -> IntoDataFrame:
+... return nw.read_parquet(
+... "file.parquet", native_namespace=native_namespace
+... ).to_native()
+
Then we can read the file by passing pandas, Polars or PyArrow namespaces:
+>>> agnostic_read_parquet(native_namespace=pd)
+ a b
+0 1 4
+1 2 5
+2 3 6
+>>> agnostic_read_parquet(native_namespace=pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 4 │
+│ 2 ┆ 5 │
+│ 3 ┆ 6 │
+└─────┴─────┘
+>>> agnostic_read_parquet(native_namespace=pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2,3]]
+b: [[4,5,6]]
+
scan_csv(source, *, native_namespace, **kwargs)
+
+Lazily read from a CSV file.
+For the libraries that do not support lazy dataframes, the function reads +a csv file eagerly and then converts the resulting dataframe to a lazyframe.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ source
+ |
+
+ str
+ |
+
+
+
+ Path to a file. + |
+ + required + | +
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
+ kwargs
+ |
+
+ Any
+ |
+
+
+
+ Extra keyword arguments which are passed to the native CSV reader.
+For example, you could use
+ |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ LazyFrame[Any]
+ |
+
+
+
+ LazyFrame. + |
+
Examples:
+>>> import dask.dataframe as dd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>> from types import ModuleType
+
Let's create an agnostic function that lazily reads a csv file with a specified native namespace:
+>>> def agnostic_scan_csv(native_namespace: ModuleType) -> IntoFrame:
+... return nw.scan_csv("file.csv", native_namespace=native_namespace).to_native()
+
Then we can read the file by passing, for example, Polars or Dask namespaces:
+>>> agnostic_scan_csv(native_namespace=pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 4 │
+│ 2 ┆ 5 │
+│ 3 ┆ 6 │
+└─────┴─────┘
+>>> agnostic_scan_csv(native_namespace=dd).compute()
+ a b
+0 1 4
+1 2 5
+2 3 6
+
scan_parquet(source, *, native_namespace, **kwargs)
+
+Lazily read from a parquet file.
+For the libraries that do not support lazy dataframes, the function reads +a parquet file eagerly and then converts the resulting dataframe to a lazyframe.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ source
+ |
+
+ str
+ |
+
+
+
+ Path to a file. + |
+ + required + | +
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
+ kwargs
+ |
+
+ Any
+ |
+
+
+
+ Extra keyword arguments which are passed to the native parquet reader.
+For example, you could use
+ |
+
+ {}
+ |
+
Returns:
+Type | +Description | +
---|---|
+ LazyFrame[Any]
+ |
+
+
+
+ LazyFrame. + |
+
Examples:
+>>> import dask.dataframe as dd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>> from types import ModuleType
+
Let's create an agnostic function that lazily reads a parquet file with a specified native namespace:
+>>> def agnostic_scan_parquet(native_namespace: ModuleType) -> IntoFrame:
+... return nw.scan_parquet(
+... "file.parquet", native_namespace=native_namespace
+... ).to_native()
+
Then we can read the file by passing, for example, Polars or Dask namespaces:
+>>> agnostic_scan_parquet(native_namespace=pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 4 │
+│ 2 ┆ 5 │
+│ 3 ┆ 6 │
+└─────┴─────┘
+>>> agnostic_scan_parquet(native_namespace=dd).compute()
+ a b
+0 1 4
+1 2 5
+2 3 6
+
sum(*columns)
+
+Sum all values.
+ + +Syntactic sugar for nw.col(columns).sum()
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2]}
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_sum(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.sum("a")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_sum
:
>>> agnostic_sum(df_pd)
+ a
+0 3
+
>>> agnostic_sum(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 3 │
+└─────┘
+
>>> agnostic_sum(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[3]]
+
sum_horizontal(*exprs)
+
+Sum all values horizontally across columns.
+ + +Unlike Polars, we support horizontal sum over numeric columns only.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts +expression input. + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3], "b": [5, 10, None]}
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_sum_horizontal(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.select(nw.sum_horizontal("a", "b")).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to agnostic_sum_horizontal
:
>>> agnostic_sum_horizontal(df_pd)
+ a
+0 6.0
+1 12.0
+2 3.0
+
>>> agnostic_sum_horizontal(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 6 │
+│ 12 │
+│ 3 │
+└─────┘
+
>>> agnostic_sum_horizontal(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[6,12,3]]
+
show_versions()
+
+Print useful debugging information.
+ + +Examples:
+>>> from narwhals import show_versions
+>>> show_versions()
+
to_native(narwhals_object, *, strict=None, pass_through=None)
+
+Convert Narwhals object to native one.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ narwhals_object
+ |
+
+ DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series[IntoSeriesT]
+ |
+
+
+
+ Narwhals object. + |
+ + required + | +
+ strict
+ |
+
+ bool | None
+ |
+
+
+
+ Determine what happens if
Deprecated (v1.13.0):
+ Please use |
+
+ None
+ |
+
+ pass_through
+ |
+
+ bool | None
+ |
+
+
+
+ Determine what happens if
|
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ IntoFrameT | Any
+ |
+
+
+
+ Object of class that user started with. + |
+
to_py_scalar(scalar_like)
+
+If a scalar is not Python native, converts it to Python native.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ scalar_like
+ |
+
+ Any
+ |
+
+
+
+ Scalar-like value. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ Python scalar. + |
+
Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If the object is not convertible to a scalar. + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
+>>> nw.to_py_scalar(df["a"].item(0))
+1
+>>> import pyarrow as pa
+>>> df = nw.from_native(pa.table({"a": [1, 2, 3]}))
+>>> nw.to_py_scalar(df["a"].item(0))
+1
+>>> nw.to_py_scalar(1)
+1
+
when(*predicates)
+
+Start a when-then-otherwise
expression.
Expression similar to an if-else
statement in Python. Always initiated by a
+pl.when(<condition>).then(<value if condition>)
, and optionally followed by
+chaining one or more .when(<condition>).then(<value>)
statements.
+Chained when-then operations should be read as Python if, elif, ... elif
+blocks, not as if, if, ... if
, i.e. the first condition that evaluates to
+True
will be picked.
+If none of the conditions are True
, an optional
+.otherwise(<value if all statements are false>)
can be appended at the end.
+If not appended, and none of the conditions are True
, None
will be returned.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ predicates
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Condition(s) that must be met in order to apply the subsequent
+statement. Accepts one or more boolean expressions, which are implicitly
+combined with |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ When
+ |
+
+
+
+ A "when" object, which |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>>
+>>> data = {"a": [1, 2, 3], "b": [5, 10, 15]}
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> def agnostic_when_then_otherwise(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(
+... nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when")
+... ).to_native()
+
We can pass any supported library such as Pandas, Polars, or PyArrow to
+agnostic_when_then_otherwise
:
>>> agnostic_when_then_otherwise(df_pd)
+ a b a_when
+0 1 5 5
+1 2 10 5
+2 3 15 6
+
>>> agnostic_when_then_otherwise(df_pl)
+shape: (3, 3)
+┌─────┬─────┬────────┐
+│ a ┆ b ┆ a_when │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i32 │
+╞═════╪═════╪════════╡
+│ 1 ┆ 5 ┆ 5 │
+│ 2 ┆ 10 ┆ 5 │
+│ 3 ┆ 15 ┆ 6 │
+└─────┴─────┴────────┘
+
>>> agnostic_when_then_otherwise(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+a_when: int64
+----
+a: [[1,2,3]]
+b: [[5,10,15]]
+a_when: [[5,5,6]]
+
narwhals.Schema
Ordered mapping of column names to their data type.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ schema
+ |
+
+ Mapping[str, DType] | Iterable[tuple[str, DType]] | None
+ |
+
+
+
+ Mapping[str, DType] | Iterable[tuple[str, DType]] | None +The schema definition given by column names and their associated. +instantiated Narwhals data type. Accepts a mapping or an iterable of tuples. + |
+
+ None
+ |
+
Examples:
+Define a schema by passing instantiated data types.
+>>> import narwhals as nw
+>>> schema = nw.Schema({"foo": nw.Int8(), "bar": nw.String()})
+>>> schema
+Schema({'foo': Int8, 'bar': String})
+
Access the data type associated with a specific column name.
+>>> schema["foo"]
+Int8
+
Access various schema properties using the names
, dtypes
, and len
methods.
>>> schema.names()
+['foo', 'bar']
+>>> schema.dtypes()
+[Int8, String]
+>>> schema.len()
+2
+
names()
+
+Get the column names of the schema.
+ + +Returns:
+Type | +Description | +
---|---|
+ list[str]
+ |
+
+
+
+ Column names. + |
+
dtypes()
+
+Get the data types of the schema.
+ + +Returns:
+Type | +Description | +
---|---|
+ list[DType]
+ |
+
+
+
+ Data types of schema. + |
+
len()
+
+Get the number of columns in the schema.
+ + +Returns:
+Type | +Description | +
---|---|
+ int
+ |
+
+
+
+ Number of columns. + |
+
narwhals.selectors
The following selectors are all supported. In addition, just like in Polars, the following +set operations are supported:
+&
|
-
~
boolean()
+
+Select boolean columns.
+ + +Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select boolean +dtypes:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.boolean())
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ c
+0 False
+1 True
+>>> func(df_pl)
+shape: (2, 1)
+┌───────┐
+│ c │
+│ --- │
+│ bool │
+╞═══════╡
+│ false │
+│ true │
+└───────┘
+
by_dtype(*dtypes)
+
+Select columns based on their dtype.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ dtypes
+ |
+
+ Any
+ |
+
+
+
+ one or data types to select + |
+
+ ()
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select int64 and float64 +dtypes and multiplies each value by 2:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.by_dtype(nw.Int64, nw.Float64) * 2)
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ a c
+0 2 8.2
+1 4 4.6
+>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ c │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 2 ┆ 8.2 │
+│ 4 ┆ 4.6 │
+└─────┴─────┘
+
categorical()
+
+Select categorical columns.
+ + +Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
+>>> df_pd = pd.DataFrame(data).astype({"b": "category"})
+>>> df_pl = pl.DataFrame(data, schema_overrides={"b": pl.Categorical})
+
Let's define a dataframe-agnostic function to select string +dtypes:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.categorical())
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ b
+0 x
+1 y
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ b │
+│ --- │
+│ cat │
+╞═════╡
+│ x │
+│ y │
+└─────┘
+
numeric()
+
+Select numeric columns.
+ + +Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select numeric +dtypes and multiplies each value by 2:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.numeric() * 2)
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ a c
+0 2 8.2
+1 4 4.6
+>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ c │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 2 ┆ 8.2 │
+│ 4 ┆ 4.6 │
+└─────┴─────┘
+
string()
+
+Select string columns.
+ + +Returns:
+Type | +Description | +
---|---|
+ Expr
+ |
+
+
+
+ A new expression. + |
+
Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select string +dtypes:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.string())
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ b
+0 x
+1 y
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ b │
+│ --- │
+│ str │
+╞═════╡
+│ x │
+│ y │
+└─────┘
+
narwhals.Series
Narwhals Series, backed by a native series.
+Warning
+This class is not meant to be instantiated directly - instead:
+If the native object is a series from one of the supported backend (e.g.
+ pandas.Series, polars.Series, pyarrow.ChunkedArray), you can use
+ narwhals.from_native
:
+
narwhals.from_native(native_series, allow_series=True)
+narwhals.from_native(native_series, series_only=True)
+
If the object is a generic sequence (e.g. a list or a tuple of values), you can
+ create a series via narwhals.new_series
:
+
narwhals.new_series(
+ name=name,
+ values=values,
+ native_namespace=narwhals.get_native_namespace(another_object),
+)
+
dtype
+
+
+ property
+
+
+Get the data type of the Series.
+ + +Returns:
+Type | +Description | +
---|---|
+ DType
+ |
+
+
+
+ The data type of the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_dtype(s_native: IntoSeriesT) -> nw.dtypes.DType:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dtype
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dtype
:
>>> agnostic_dtype(s_pd)
+Int64
+
>>> agnostic_dtype(s_pl)
+Int64
+
>>> agnostic_dtype(s_pa)
+Int64
+
implementation
+
+
+ property
+
+
+Return implementation of native Series.
+This can be useful when you need to use special-casing for features outside of +Narwhals' scope - for example, when dealing with pandas' Period Dtype.
+ + +Returns:
+Type | +Description | +
---|---|
+ Implementation
+ |
+
+
+
+ Implementation. + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+
>>> s_native = pd.Series([1, 2, 3])
+>>> s = nw.from_native(s_native, series_only=True)
+
>>> s.implementation
+<Implementation.PANDAS: 1>
+
>>> s.implementation.is_pandas()
+True
+
>>> s.implementation.is_pandas_like()
+True
+
>>> s.implementation.is_polars()
+False
+
name
+
+
+ property
+
+
+Get the name of the Series.
+ + +Returns:
+Type | +Description | +
---|---|
+ str
+ |
+
+
+
+ The name of the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data, name="foo")
+>>> s_pl = pl.Series("foo", data)
+
We define a library agnostic function:
+>>> def agnostic_name(s_native: IntoSeries) -> str:
+... s = nw.from_native(s_native, series_only=True)
+... return s.name
+
We can then pass any supported library such as pandas or Polars
+to agnostic_name
:
>>> agnostic_name(s_pd)
+'foo'
+
>>> agnostic_name(s_pl)
+'foo'
+
shape
+
+
+ property
+
+
+Get the shape of the Series.
+ + +Returns:
+Type | +Description | +
---|---|
+ tuple[int]
+ |
+
+
+
+ A tuple containing the length of the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_shape(s_native: IntoSeries) -> tuple[int]:
+... s = nw.from_native(s_native, series_only=True)
+... return s.shape
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_shape
:
>>> agnostic_shape(s_pd)
+(3,)
+
>>> agnostic_shape(s_pl)
+(3,)
+
>>> agnostic_shape(s_pa)
+(3,)
+
__arrow_c_stream__(requested_schema=None)
+
+Export a Series via the Arrow PyCapsule Interface.
+Narwhals doesn't implement anything itself here:
+to_arrow
and then defer to PyArrow's implementationSee PyCapsule Interface +for more.
+ +__getitem__(idx)
+
+Retrieve elements from the object using integer indexing or slicing.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ idx
+ |
+
+ int | slice | Sequence[int]
+ |
+
+
+
+ The index, slice, or sequence of indices to retrieve. +
|
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Any | Self
+ |
+
+
+
+ A single element if |
+
Examples:
+>>> from typing import Any
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_get_first_item(s_native: IntoSeriesT) -> Any:
+... s = nw.from_native(s_native, series_only=True)
+... return s[0]
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_get_first_item
:
>>> agnostic_get_first_item(s_pd)
+np.int64(1)
+
>>> agnostic_get_first_item(s_pl)
+1
+
>>> agnostic_get_first_item(s_pa)
+1
+
We can also make a function to slice the Series:
+>>> def agnostic_slice(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s[:2].to_native()
+
>>> agnostic_slice(s_pd)
+0 1
+1 2
+dtype: int64
+
>>> agnostic_slice(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 1
+ 2
+]
+
>>> agnostic_slice(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 2
+ ]
+]
+
__iter__()
+
+abs()
+
+Calculate the absolute value of each element.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with the absolute values of the original elements. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [2, -4, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.abs().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_abs
:
>>> agnostic_abs(s_pd)
+0 2
+1 4
+2 3
+dtype: int64
+
>>> agnostic_abs(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 4
+ 3
+]
+
>>> agnostic_abs(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2,
+ 4,
+ 3
+ ]
+]
+
alias(name)
+
+Rename the Series.
+ + +This method is very cheap, but does not guarantee that data +will be copied. For example:
+s1: nw.Series
+s2 = s1.alias("foo")
+arr = s2.to_numpy()
+arr[0] = 999
+
may (depending on the backend, and on the version) result in
+s1
's data being modified. We recommend:
- if you need to alias an object and don't need the original
+ one around any more, just use `alias` without worrying about it.
+- if you were expecting `alias` to copy data, then explicily call
+ `.clone` before calling `alias`.
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The new name. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with the updated name. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data, name="foo")
+>>> s_pl = pl.Series("foo", data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_alias(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.alias("bar").to_native()
+
We can then pass any supported library such as pandas or Polars, or
+PyArrow to agnostic_alias
:
>>> agnostic_alias(s_pd)
+0 1
+1 2
+2 3
+Name: bar, dtype: int64
+
>>> agnostic_alias(s_pl)
+shape: (3,)
+Series: 'bar' [i64]
+[
+ 1
+ 2
+ 3
+]
+
>>> agnostic_alias(s_pa)
+<pyarrow.lib.ChunkedArray object at 0x...>
+[
+ [
+ 1,
+ 2,
+ 3
+ ]
+]
+
all()
+
+Return whether all values in the Series are True.
+ + +Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ A boolean indicating if all values in the Series are True. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [False, True, False]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_all(s_native: IntoSeries) -> bool:
+... s = nw.from_native(s_native, series_only=True)
+... return s.all()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_all
:
>>> agnostic_all(s_pd)
+np.False_
+
>>> agnostic_all(s_pl)
+False
+
>>> agnostic_all(s_pa)
+False
+
any()
+
+Return whether any of the values in the Series are True.
+ + +Only works on Series of data type Boolean.
+Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ A boolean indicating if any values in the Series are True. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [False, True, False]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_any(s_native: IntoSeries) -> bool:
+... s = nw.from_native(s_native, series_only=True)
+... return s.any()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_any
:
>>> agnostic_any(s_pd)
+np.True_
+
>>> agnostic_any(s_pl)
+True
+
>>> agnostic_any(s_pa)
+True
+
arg_max()
+
+Returns the index of the maximum value.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_arg_max(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.arg_max()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_arg_max
:
>>> agnostic_arg_max(s_pd)
+np.int64(2)
+
>>> agnostic_arg_max(s_pl)
+2
+
>>> agnostic_arg_max(s_pa)
+2
+
arg_min()
+
+Returns the index of the minimum value.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_arg_min(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.arg_min()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_arg_min
:
>>> agnostic_arg_min(s_pd)
+np.int64(0)
+
>>> agnostic_arg_min(s_pl)
+0
+
>>> agnostic_arg_min(s_pa)
+0
+
arg_true()
+
+Find elements where boolean Series is True.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with the indices of elements that are True. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, None, None, 2]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_arg_true(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_null().arg_true().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_arg_true
:
>>> agnostic_arg_true(s_pd)
+1 1
+2 2
+dtype: int64
+
>>> agnostic_arg_true(s_pl)
+shape: (2,)
+Series: '' [u32]
+[
+ 1
+ 2
+]
+
>>> agnostic_arg_true(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 2
+ ]
+]
+
cast(dtype)
+
+Cast between data types.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ dtype
+ |
+
+ DType | type[DType]
+ |
+
+
+
+ Data type that the object will be cast into. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with the specified data type. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [True, False, True]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_cast(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.cast(nw.Int64).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_cast
:
>>> agnostic_cast(s_pd)
+0 1
+1 0
+2 1
+dtype: int64
+
>>> agnostic_cast(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 0
+ 1
+]
+
>>> agnostic_cast(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 0,
+ 1
+ ]
+]
+
clip(lower_bound=None, upper_bound=None)
+
+Clip values in the Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lower_bound
+ |
+
+ Self | Any | None
+ |
+
+
+
+ Lower bound value. + |
+
+ None
+ |
+
+ upper_bound
+ |
+
+ Self | Any | None
+ |
+
+
+
+ Upper bound value. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with values clipped to the specified bounds. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_clip_lower(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.clip(2).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_clip_lower
:
>>> agnostic_clip_lower(s_pd)
+0 2
+1 2
+2 3
+dtype: int64
+
>>> agnostic_clip_lower(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 2
+ 3
+]
+
>>> agnostic_clip_lower(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2,
+ 2,
+ 3
+ ]
+]
+
We define another library agnostic function:
+>>> def agnostic_clip_upper(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.clip(upper_bound=2).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+ PyArrow to agnostic_clip_upper
:
>>> agnostic_clip_upper(s_pd)
+0 1
+1 2
+2 2
+dtype: int64
+
+>>> agnostic_clip_upper(s_pl) # doctest: +NORMALIZE_WHITESPACE
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 2
+ 2
+]
+
+>>> agnostic_clip_upper(s_pa) # doctest: +ELLIPSIS
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 2,
+ 2
+ ]
+]
+
+We can have both at the same time
+
+>>> data = [-1, 1, -3, 3, -5, 5]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
+We define a library agnostic function:
+
+>>> def agnostic_clip(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.clip(-1, 3).to_native()
+
+We can then pass any supported library such as pandas, Polars, or
+PyArrow to `agnostic_clip`:
+
+>>> agnostic_clip(s_pd)
+0 -1
+1 1
+2 -1
+3 3
+4 -1
+5 3
+dtype: int64
+
+>>> agnostic_clip(s_pl) # doctest: +NORMALIZE_WHITESPACE
+shape: (6,)
+Series: '' [i64]
+[
+ -1
+ 1
+ -1
+ 3
+ -1
+ 3
+]
+
+>>> agnostic_clip_upper(s_pa) # doctest: +ELLIPSIS
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ -1,
+ 1,
+ -3,
+ 2,
+ -5,
+ 2
+ ]
+]
+
+
+ count()
+
+Returns the number of non-null elements in the Series.
+ + +Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ The number of non-null elements in the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_count(s_native: IntoSeries) -> int:
+... s = nw.from_native(s_native, series_only=True)
+... return s.count()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_count
:
>>> agnostic_count(s_pd)
+np.int64(3)
+
>>> agnostic_count(s_pl)
+3
+
>>> agnostic_count(s_pa)
+3
+
cum_count(*, reverse=False)
+
+Return the cumulative count of the non-null values in the series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with the cumulative count of non-null values. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["x", "k", None, "d"]
+
We define a library agnostic function:
+>>> def agnostic_cum_count(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.cum_count(reverse=True).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_cum_count
:
>>> agnostic_cum_count(pd.Series(data))
+0 3
+1 2
+2 1
+3 1
+dtype: int64
+
>>> agnostic_cum_count(pl.Series(data))
+shape: (4,)
+Series: '' [u32]
+[
+ 3
+ 2
+ 1
+ 1
+]
+
>>> agnostic_cum_count(pa.chunked_array([data]))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 3,
+ 2,
+ 1,
+ 1
+ ]
+]
+
cum_max(*, reverse=False)
+
+Return the cumulative max of the non-null values in the series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with the cumulative max of non-null values. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 3, None, 2]
+
We define a library agnostic function:
+>>> def agnostic_cum_max(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.cum_max().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_cum_max
:
>>> agnostic_cum_max(pd.Series(data))
+0 1.0
+1 3.0
+2 NaN
+3 3.0
+dtype: float64
+
>>> agnostic_cum_max(pl.Series(data))
+shape: (4,)
+Series: '' [i64]
+[
+ 1
+ 3
+ null
+ 3
+]
+
>>> agnostic_cum_max(pa.chunked_array([data]))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 3,
+ null,
+ 3
+ ]
+]
+
cum_min(*, reverse=False)
+
+Return the cumulative min of the non-null values in the series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with the cumulative min of non-null values. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [3, 1, None, 2]
+
We define a library agnostic function:
+>>> def agnostic_cum_min(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.cum_min().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_cum_min
:
>>> agnostic_cum_min(pd.Series(data))
+0 3.0
+1 1.0
+2 NaN
+3 1.0
+dtype: float64
+
>>> agnostic_cum_min(pl.Series(data))
+shape: (4,)
+Series: '' [i64]
+[
+ 3
+ 1
+ null
+ 1
+]
+
>>> agnostic_cum_min(pa.chunked_array([data]))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 3,
+ 1,
+ null,
+ 1
+ ]
+]
+
cum_prod(*, reverse=False)
+
+Return the cumulative product of the non-null values in the series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with the cumulative product of non-null values. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 3, None, 2]
+
We define a library agnostic function:
+>>> def agnostic_cum_prod(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.cum_prod().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_cum_prod
:
>>> agnostic_cum_prod(pd.Series(data))
+0 1.0
+1 3.0
+2 NaN
+3 6.0
+dtype: float64
+
>>> agnostic_cum_prod(pl.Series(data))
+shape: (4,)
+Series: '' [i64]
+[
+ 1
+ 3
+ null
+ 6
+]
+
>>> agnostic_cum_prod(pa.chunked_array([data]))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 3,
+ null,
+ 6
+ ]
+]
+
cum_sum(*, reverse=False)
+
+Calculate the cumulative sum.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ reverse the operation + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with the cumulative sum of non-null values. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [2, 4, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_cum_sum(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.cum_sum().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_cum_sum
:
>>> agnostic_cum_sum(s_pd)
+0 2
+1 6
+2 9
+dtype: int64
+
>>> agnostic_cum_sum(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 6
+ 9
+]
+
>>> agnostic_cum_sum(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2,
+ 6,
+ 9
+ ]
+]
+
diff()
+
+Calculate the difference with the previous element, for each element.
+ + +pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to calculate
+the diff and fill missing values with 0
in a Int64 column, you could
+do:
s.diff().fill_null(0).cast(nw.Int64)
+
+Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with the difference between each element and its predecessor. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [2, 4, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_diff(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.diff().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_diff
:
>>> agnostic_diff(s_pd)
+0 NaN
+1 2.0
+2 -1.0
+dtype: float64
+
>>> agnostic_diff(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ null
+ 2
+ -1
+]
+
>>> agnostic_diff(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ null,
+ 2,
+ -1
+ ]
+]
+
drop_nulls()
+
+Drop null values.
+ + +pandas handles null values differently from Polars and PyArrow. +See null_handling +for reference.
+Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with null values removed. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [2, 4, None, 3, 5]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_drop_nulls(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.drop_nulls().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_drop_nulls
:
>>> agnostic_drop_nulls(s_pd)
+0 2.0
+1 4.0
+3 3.0
+4 5.0
+dtype: float64
+
>>> agnostic_drop_nulls(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ 2
+ 4
+ 3
+ 5
+]
+
>>> agnostic_drop_nulls(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2,
+ 4,
+ 3,
+ 5
+ ]
+]
+
ewm_mean(*, com=None, span=None, half_life=None, alpha=None, adjust=True, min_periods=1, ignore_nulls=False)
+
+Compute exponentially-weighted moving average.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ com
+ |
+
+ float | None
+ |
+
+
+
+ Specify decay in terms of center of mass, \(\gamma\), with |
+
+ None
+ |
+
+ span
+ |
+
+ float | None
+ |
+
+
+
+ Specify decay in terms of span, \(\theta\), with |
+
+ None
+ |
+
+ half_life
+ |
+
+ float | None
+ |
+
+
+
+ Specify decay in terms of half-life, \(\tau\), with |
+
+ None
+ |
+
+ alpha
+ |
+
+ float | None
+ |
+
+
+
+ Specify smoothing factor alpha directly, \(0 < \alpha \leq 1\). + |
+
+ None
+ |
+
+ adjust
+ |
+
+ bool
+ |
+
+
+
+ Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings +
|
+
+ True
+ |
+
+ min_periods
+ |
+
+ int
+ |
+
+
+
+ Minimum number of observations in window required to have a value (otherwise result is null). + |
+
+ 1
+ |
+
+ ignore_nulls
+ |
+
+ bool
+ |
+
+
+
+ Ignore missing values when calculating weights. +
|
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ Series + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(name="a", data=data)
+>>> s_pl = pl.Series(name="a", values=data)
+
We define a library agnostic function:
+>>> def agnostic_ewm_mean(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.ewm_mean(com=1, ignore_nulls=False).to_native()
+
We can then pass any supported library such as pandas or Polars
+to agnostic_ewm_mean
:
>>> agnostic_ewm_mean(s_pd)
+0 1.000000
+1 1.666667
+2 2.428571
+Name: a, dtype: float64
+
>>> agnostic_ewm_mean(s_pl)
+shape: (3,)
+Series: 'a' [f64]
+[
+ 1.0
+ 1.666667
+ 2.428571
+]
+
fill_null(value=None, strategy=None, limit=None)
+
+Fill null values using the specified value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ value
+ |
+
+ Any | None
+ |
+
+
+
+ Value used to fill null values. + |
+
+ None
+ |
+
+ strategy
+ |
+
+ Literal['forward', 'backward'] | None
+ |
+
+
+
+ Strategy used to fill null values. + |
+
+ None
+ |
+
+ limit
+ |
+
+ int | None
+ |
+
+
+
+ Number of consecutive null values to fill when using the 'forward' or 'backward' strategy. + |
+
+ None
+ |
+
pandas handles null values differently from Polars and PyArrow. +See null_handling +for reference.
+Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with null values filled according to the specified value or strategy. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_fill_null(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.fill_null(5).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_fill_null
:
>>> agnostic_fill_null(s_pd)
+0 1.0
+1 2.0
+2 5.0
+dtype: float64
+
>>> agnostic_fill_null(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 2
+ 5
+]
+
>>> agnostic_fill_null(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 2,
+ 5
+ ]
+]
+
Using a strategy:
+>>> def agnostic_fill_null_with_strategy(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.fill_null(strategy="forward", limit=1).to_native()
+
>>> agnostic_fill_null_with_strategy(s_pd)
+0 1.0
+1 2.0
+2 2.0
+dtype: float64
+
>>> agnostic_fill_null_with_strategy(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 2
+ 2
+]
+
>>> agnostic_fill_null_with_strategy(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 2,
+ 2
+ ]
+]
+
filter(other)
+
+Filter elements in the Series based on a condition.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with elements that satisfy the condition. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [4, 10, 15, 34, 50]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_filter(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.filter(s > 10).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_filter
:
>>> agnostic_filter(s_pd)
+2 15
+3 34
+4 50
+dtype: int64
+
>>> agnostic_filter(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 15
+ 34
+ 50
+]
+
>>> agnostic_filter(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 15,
+ 34,
+ 50
+ ]
+]
+
gather_every(n, offset=0)
+
+Take every nth value in the Series and return as new Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Gather every n-th row. + |
+ + required + | +
+ offset
+ |
+
+ int
+ |
+
+
+
+ Starting index. + |
+
+ 0
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with every nth value starting from the offset. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3, 4]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function in which gather every 2 rows, +starting from a offset of 1:
+>>> def agnostic_gather_every(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.gather_every(n=2, offset=1).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_gather_every
:
>>> agnostic_gather_every(s_pd)
+1 2
+3 4
+dtype: int64
+
>>> agnostic_gather_every(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 2
+ 4
+]
+
>>> agnostic_gather_every(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2,
+ 4
+ ]
+]
+
head(n=10)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 10
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series containing the first n characters of each string. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = list(range(10))
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function that returns the first 3 rows:
+>>> def agnostic_head(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.head(3).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_head
:
>>> agnostic_head(s_pd)
+0 0
+1 1
+2 2
+dtype: int64
+
>>> agnostic_head(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 0
+ 1
+ 2
+]
+
>>> agnostic_head(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 0,
+ 1,
+ 2
+ ]
+]
+
is_between(lower_bound, upper_bound, closed='both')
+
+Get a boolean mask of the values that are between the given lower/upper bounds.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lower_bound
+ |
+
+ Any | Self
+ |
+
+
+
+ Lower bound value. + |
+ + required + | +
+ upper_bound
+ |
+
+ Any | Self
+ |
+
+
+
+ Upper bound value. + |
+ + required + | +
+ closed
+ |
+
+ Literal['left', 'right', 'none', 'both']
+ |
+
+
+
+ Define which sides of the interval are closed (inclusive). + |
+
+ 'both'
+ |
+
If the value of the lower_bound
is greater than that of the upper_bound
,
+then the values will be False, as no value can satisfy the condition.
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A boolean Series indicating which values are between the given bounds. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3, 4, 5]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_is_between(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_between(2, 4, "right").to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_between
:
>>> agnostic_is_between(s_pd)
+0 False
+1 False
+2 True
+3 True
+4 False
+dtype: bool
+
>>> agnostic_is_between(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ false
+ false
+ true
+ true
+ false
+]
+
>>> agnostic_is_between(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ false,
+ false,
+ true,
+ true,
+ false
+ ]
+]
+
is_duplicated()
+
+Get a mask of all duplicated rows in the Series.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with boolean values indicating duplicated rows. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3, 1]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_duplicated(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_duplicated().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_duplicated
:
>>> agnostic_is_duplicated(s_pd)
+0 True
+1 False
+2 False
+3 True
+dtype: bool
+
>>> agnostic_is_duplicated(s_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ true
+ false
+ false
+ true
+]
+
>>> agnostic_is_duplicated(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ true,
+ false,
+ false,
+ true
+ ]
+]
+
is_empty()
+
+Check if the series is empty.
+ + +Returns:
+Type | +Description | +
---|---|
+ bool
+ |
+
+
+
+ A boolean indicating if the series is empty. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
Let's define a dataframe-agnostic function that filters rows in which "foo" +values are greater than 10, and then checks if the result is empty or not:
+>>> def agnostic_is_empty(s_native: IntoSeries) -> bool:
+... s = nw.from_native(s_native, series_only=True)
+... return s.filter(s > 10).is_empty()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_empty
:
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+>>> agnostic_is_empty(s_pd), agnostic_is_empty(s_pl), agnostic_is_empty(s_pa)
+(True, True, True)
+
>>> data = [100, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+>>> agnostic_is_empty(s_pd), agnostic_is_empty(s_pl), agnostic_is_empty(s_pa)
+(False, False, False)
+
is_finite()
+
+Returns a boolean Series indicating which values are finite.
+ + +Different backend handle null values differently. is_finite
will return
+False for NaN and Null's in the Dask and pandas non-nullable backend, while
+for Polars, PyArrow and pandas nullable backends null values are kept as such.
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ Expression of |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [float("nan"), float("inf"), 2.0, None]
+
We define a library agnostic function:
+>>> def agnostic_is_finite(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_finite().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_finite
:
>>> agnostic_is_finite(pd.Series(data))
+0 False
+1 False
+2 True
+3 False
+dtype: bool
+
>>> agnostic_is_finite(pl.Series(data))
+shape: (4,)
+Series: '' [bool]
+[
+ false
+ false
+ true
+ null
+]
+
>>> agnostic_is_finite(pa.chunked_array([data]))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ false,
+ false,
+ true,
+ null
+ ]
+]
+
is_first_distinct()
+
+Return a boolean mask indicating the first occurrence of each distinct value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with boolean values indicating the first occurrence of each distinct value. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 1, 2, 3, 2]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_first_distinct(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_first_distinct().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_first_distinct
:
>>> agnostic_is_first_distinct(s_pd)
+0 True
+1 False
+2 True
+3 True
+4 False
+dtype: bool
+
>>> agnostic_is_first_distinct(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ true
+ false
+ true
+ true
+ false
+]
+
>>> agnostic_is_first_distinct(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ true,
+ false,
+ true,
+ true,
+ false
+ ]
+]
+
is_in(other)
+
+Check if the elements of this Series are in the other sequence.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Any
+ |
+
+
+
+ Sequence of primitive type. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with boolean values indicating if the elements are in the other sequence. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_is_in(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_in([3, 2, 8]).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_in
:
>>> agnostic_is_in(s_pd)
+0 False
+1 True
+2 True
+dtype: bool
+
>>> agnostic_is_in(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ false
+ true
+ true
+]
+
>>> agnostic_is_in(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ false,
+ true,
+ true
+ ]
+]
+
is_last_distinct()
+
+Return a boolean mask indicating the last occurrence of each distinct value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with boolean values indicating the last occurrence of each distinct value. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 1, 2, 3, 2]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_last_distinct(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_last_distinct().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_last_distinct
:
>>> agnostic_is_last_distinct(s_pd)
+0 False
+1 True
+2 False
+3 True
+4 True
+dtype: bool
+
>>> agnostic_is_last_distinct(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ false
+ true
+ false
+ true
+ true
+]
+
>>> agnostic_is_last_distinct(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ false,
+ true,
+ false,
+ true,
+ true
+ ]
+]
+
is_nan()
+
+Returns a boolean Series indicating which values are NaN.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A boolean Series indicating which values are NaN. + |
+
pandas handles null values differently from Polars and PyArrow. +See null_handling +for reference.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [0.0, None, 2.0]
+>>> s_pd = pd.Series(data, dtype="Float64")
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data], type=pa.float64())
+
>>> def agnostic_self_div_is_nan(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_nan().to_native()
+
>>> print(agnostic_self_div_is_nan(s_pd))
+0 False
+1 <NA>
+2 False
+dtype: boolean
+
>>> print(agnostic_self_div_is_nan(s_pl))
+shape: (3,)
+Series: '' [bool]
+[
+ false
+ null
+ false
+]
+
>>> print(agnostic_self_div_is_nan(s_pa))
+[
+ [
+ false,
+ null,
+ false
+ ]
+]
+
is_null()
+
+Returns a boolean Series indicating which values are null.
+ + +pandas handles null values differently from Polars and PyArrow. +See null_handling +for reference.
+Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A boolean Series indicating which values are null. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_null(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_null().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_null
:
>>> agnostic_is_null(s_pd)
+0 False
+1 False
+2 True
+dtype: bool
+
>>> agnostic_is_null(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ false
+ false
+ true
+]
+
>>> agnostic_is_null(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ false,
+ false,
+ true
+ ]
+]
+
is_sorted(*, descending=False)
+
+Check if the Series is sorted.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ descending
+ |
+
+ bool
+ |
+
+
+
+ Check if the Series is sorted in descending order. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ bool
+ |
+
+
+
+ A boolean indicating if the Series is sorted. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> unsorted_data = [1, 3, 2]
+>>> sorted_data = [3, 2, 1]
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_sorted(s_native: IntoSeries, descending: bool = False):
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_sorted(descending=descending)
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_sorted
:
>>> agnostic_is_sorted(pd.Series(unsorted_data))
+False
+
>>> agnostic_is_sorted(pd.Series(sorted_data), descending=True)
+True
+
>>> agnostic_is_sorted(pl.Series(unsorted_data))
+False
+
>>> agnostic_is_sorted(pl.Series(sorted_data), descending=True)
+True
+
>>> agnostic_is_sorted(pa.chunked_array([unsorted_data]))
+False
+
>>> agnostic_is_sorted(pa.chunked_array([sorted_data]), descending=True)
+True
+
is_unique()
+
+Get a mask of all unique rows in the Series.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with boolean values indicating unique rows. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3, 1]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_is_unique(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.is_unique().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_is_unique
:
>>> agnostic_is_unique(s_pd)
+0 False
+1 True
+2 True
+3 False
+dtype: bool
+
>>> agnostic_is_unique(s_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ false
+ true
+ true
+ false
+]
+>>> agnostic_is_unique(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ false,
+ true,
+ true,
+ false
+ ]
+]
+
item(index=None)
+
+Return the Series as a scalar, or return the element at the given index.
+If no index is provided, this is equivalent to s[0]
, with a check
+that the shape is (1,). With an index, this is equivalent to s[index]
.
Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ The scalar value of the Series or the element at the given index. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
Let's define a dataframe-agnostic function that returns item at given index
+>>> def agnostic_item(s_native: IntoSeries, index=None):
+... s = nw.from_native(s_native, series_only=True)
+... return s.item(index)
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_item
:
>>> (
+... agnostic_item(pl.Series("a", [1]), None),
+... agnostic_item(pd.Series([1]), None),
+... agnostic_item(pa.chunked_array([[1]]), None),
+... )
+(1, np.int64(1), 1)
+
>>> (
+... agnostic_item(pl.Series("a", [9, 8, 7]), -1),
+... agnostic_item(pl.Series([9, 8, 7]), -2),
+... agnostic_item(pa.chunked_array([[9, 8, 7]]), -3),
+... )
+(7, 8, 9)
+
len()
+
+Return the number of elements in the Series.
+Null values count towards the total.
+ + +Returns:
+Type | +Description | +
---|---|
+ int
+ |
+
+
+
+ The number of elements in the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function that computes the len of the series:
+>>> def agnostic_len(s_native: IntoSeries) -> int:
+... s = nw.from_native(s_native, series_only=True)
+... return s.len()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_len
:
>>> agnostic_len(s_pd)
+3
+
>>> agnostic_len(s_pl)
+3
+
>>> agnostic_len(s_pa)
+3
+
max()
+
+Get the maximum value in this Series.
+ + +Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ The maximum value in the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_max(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.max()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_max
:
>>> agnostic_max(s_pd)
+np.int64(3)
+
>>> agnostic_max(s_pl)
+3
+
>>> agnostic_max(s_pa)
+3
+
mean()
+
+Reduce this Series to the mean value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ The average of all elements in the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_mean(s_native: IntoSeries) -> float:
+... s = nw.from_native(s_native, series_only=True)
+... return s.mean()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_mean
:
>>> agnostic_mean(s_pd)
+np.float64(2.0)
+
>>> agnostic_mean(s_pl)
+2.0
+
>>> agnostic_mean(s_pa)
+2.0
+
median()
+
+Reduce this Series to the median value.
+ + +Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.
+Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ The median value of all elements in the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [5, 3, 8]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a library agnostic function:
+>>> def agnostic_median(s_native: IntoSeries) -> float:
+... s = nw.from_native(s_native, series_only=True)
+... return s.median()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_median
:
>>> agnostic_median(s_pd)
+np.float64(5.0)
+
>>> agnostic_median(s_pl)
+5.0
+
>>> agnostic_median(s_pa)
+5.0
+
min()
+
+Get the minimal value in this Series.
+ + +Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ The minimum value in the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_min(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.min()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_min
:
>>> agnostic_min(s_pd)
+np.int64(1)
+
>>> agnostic_min(s_pl)
+1
+
>>> agnostic_min(s_pa)
+1
+
mode()
+
+Compute the most occurring value(s).
+Can return multiple values.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series containing the mode(s) (values that appear most frequently). + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 1, 2, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_mode(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.mode().sort().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_mode
:
>>> agnostic_mode(s_pd)
+0 1
+1 2
+dtype: int64
+
>>> agnostic_mode(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 1
+ 2
+]
+
>>> agnostic_mode(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 2
+ ]
+]
+
n_unique()
+
+Count the number of unique values.
+ + +Returns:
+Type | +Description | +
---|---|
+ int
+ |
+
+
+
+ Number of unique values in the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_n_unique(s_native: IntoSeries) -> int:
+... s = nw.from_native(s_native, series_only=True)
+... return s.n_unique()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_n_unique
:
>>> agnostic_n_unique(s_pd)
+3
+
>>> agnostic_n_unique(s_pl)
+3
+
>>> agnostic_n_unique(s_pa)
+3
+
null_count()
+
+Create a new Series that shows the null counts per column.
+ + +pandas handles null values differently from Polars and PyArrow. +See null_handling +for reference.
+Returns:
+Type | +Description | +
---|---|
+ int
+ |
+
+
+
+ The number of null values in the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, None, None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function that returns the null count of +the series:
+>>> def agnostic_null_count(s_native: IntoSeries) -> int:
+... s = nw.from_native(s_native, series_only=True)
+... return s.null_count()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_null_count
:
>>> agnostic_null_count(s_pd)
+np.int64(2)
+
>>> agnostic_null_count(s_pl)
+2
+
>>> agnostic_null_count(s_pa)
+2
+
pipe(function, *args, **kwargs)
+
+Pipe function call.
+ + +Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with the results of the piped function applied. + |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a function to pipe into:
+>>> def agnostic_pipe(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.pipe(lambda x: x + 2).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_pipe
:
>>> agnostic_pipe(s_pd)
+0 3
+1 4
+2 5
+dtype: int64
+
>>> agnostic_pipe(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 3
+ 4
+ 5
+]
+
>>> agnostic_pipe(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 3,
+ 4,
+ 5
+ ]
+]
+
quantile(quantile, interpolation)
+
+Get quantile value of the series.
+ + +pandas and Polars may have implementation differences for a given interpolation method.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ quantile
+ |
+
+ float
+ |
+
+
+
+ Quantile between 0.0 and 1.0. + |
+ + required + | +
+ interpolation
+ |
+
+ Literal['nearest', 'higher', 'lower', 'midpoint', 'linear']
+ |
+
+
+
+ Interpolation method. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ The quantile value. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = list(range(50))
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_quantile(s_native: IntoSeries) -> list[float]:
+... s = nw.from_native(s_native, series_only=True)
+... return [
+... s.quantile(quantile=q, interpolation="nearest")
+... for q in (0.1, 0.25, 0.5, 0.75, 0.9)
+... ]
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_quantile
:
>>> agnostic_quantile(s_pd)
+[np.int64(5), np.int64(12), np.int64(24), np.int64(37), np.int64(44)]
+
>>> agnostic_quantile(s_pl)
+[5.0, 12.0, 25.0, 37.0, 44.0]
+
>>> agnostic_quantile(s_pa)
+[5, 12, 24, 37, 44]
+
rank(method='average', *, descending=False)
+
+Assign ranks to data, dealing with ties appropriately.
+ + +The resulting dtype may differ between backends.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ method
+ |
+
+ Literal['average', 'min', 'max', 'dense', 'ordinal']
+ |
+
+
+
+ The method used to assign ranks to tied elements. +The following methods are available (default is 'average'): +
|
+
+ 'average'
+ |
+
+ descending
+ |
+
+ bool
+ |
+
+
+
+ Rank in descending order. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new series with rank data as values. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>>
+>>> data = [3, 6, 1, 1, 6]
+
We define a dataframe-agnostic function that computes the dense rank for +the data:
+>>> def agnostic_dense_rank(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.rank(method="dense").to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_dense_rank
:
>>> agnostic_dense_rank(pd.Series(data))
+0 2.0
+1 3.0
+2 1.0
+3 1.0
+4 3.0
+dtype: float64
+
>>> agnostic_dense_rank(pl.Series(data))
+shape: (5,)
+Series: '' [u32]
+[
+ 2
+ 3
+ 1
+ 1
+ 3
+]
+
>>> agnostic_dense_rank(pa.chunked_array([data]))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2,
+ 3,
+ 1,
+ 1,
+ 3
+ ]
+]
+
rename(name)
+
+Rename the Series.
+Alias for Series.alias()
.
This method is very cheap, but does not guarantee that data +will be copied. For example:
+s1: nw.Series
+s2 = s1.rename("foo")
+arr = s2.to_numpy()
+arr[0] = 999
+
may (depending on the backend, and on the version) result in
+s1
's data being modified. We recommend:
- if you need to rename an object and don't need the original
+ one around any more, just use `rename` without worrying about it.
+- if you were expecting `rename` to copy data, then explicily call
+ `.clone` before calling `rename`.
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The new name. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with the updated name. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data, name="foo")
+>>> s_pl = pl.Series("foo", data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_rename(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.rename("bar").to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_rename
:
>>> agnostic_rename(s_pd)
+0 1
+1 2
+2 3
+Name: bar, dtype: int64
+
>>> agnostic_rename(s_pl)
+shape: (3,)
+Series: 'bar' [i64]
+[
+ 1
+ 2
+ 3
+]
+
>>> agnostic_rename(s_pa)
+<pyarrow.lib.ChunkedArray object at 0x...>
+[
+ [
+ 1,
+ 2,
+ 3
+ ]
+]
+
replace_strict(old, new=None, *, return_dtype=None)
+
+Replace all values by different values.
+This function must replace all non-null input values (else it raises an error).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ old
+ |
+
+ Sequence[Any] | Mapping[Any, Any]
+ |
+
+
+
+ Sequence of values to replace. It also accepts a mapping of values to
+their replacement as syntactic sugar for
+ |
+ + required + | +
+ new
+ |
+
+ Sequence[Any] | None
+ |
+
+
+
+ Sequence of values to replace by. Length must match the length of |
+
+ None
+ |
+
+ return_dtype
+ |
+
+ DType | type[DType] | None
+ |
+
+
+
+ The data type of the resulting expression. If set to |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with values replaced according to the mapping. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = {"a": [3, 0, 1, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define dataframe-agnostic functions:
+>>> def agnostic_replace_strict(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.replace_strict(
+... [0, 1, 2, 3], ["zero", "one", "two", "three"], return_dtype=nw.String
+... ).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_replace_strict
:
>>> agnostic_replace_strict(df_pd["a"])
+0 three
+1 zero
+2 one
+3 two
+Name: a, dtype: object
+
>>> agnostic_replace_strict(df_pl["a"])
+shape: (4,)
+Series: 'a' [str]
+[
+ "three"
+ "zero"
+ "one"
+ "two"
+]
+
>>> agnostic_replace_strict(df_pa["a"])
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ "three",
+ "zero",
+ "one",
+ "two"
+ ]
+]
+
rolling_mean(window_size, *, min_periods=None, center=False)
+
+Apply a rolling mean (moving mean) over the values.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+A window of length window_size
will traverse the values. The resulting values
+will be aggregated to their mean.
The window at a given row will include the row itself and the window_size - 1
+elements before it.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ window_size
+ |
+
+ int
+ |
+
+
+
+ The length of the window in number of elements. It must be a +strictly positive integer. + |
+ + required + | +
+ min_periods
+ |
+
+ int | None
+ |
+
+
+
+ The number of values in the window that should be non-null before
+computing a result. If set to |
+
+ None
+ |
+
+ center
+ |
+
+ bool
+ |
+
+
+
+ Set the labels at the center of the window. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1.0, 2.0, 3.0, 4.0]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_rolling_mean(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.rolling_mean(window_size=2).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_rolling_mean
:
>>> agnostic_rolling_mean(s_pd)
+0 NaN
+1 1.5
+2 2.5
+3 3.5
+dtype: float64
+
>>> agnostic_rolling_mean(s_pl)
+shape: (4,)
+Series: '' [f64]
+[
+ null
+ 1.5
+ 2.5
+ 3.5
+]
+
>>> agnostic_rolling_mean(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ null,
+ 1.5,
+ 2.5,
+ 3.5
+ ]
+]
+
rolling_std(window_size, *, min_periods=None, center=False, ddof=1)
+
+Apply a rolling standard deviation (moving standard deviation) over the values.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+A window of length window_size
will traverse the values. The resulting values
+will be aggregated to their standard deviation.
The window at a given row will include the row itself and the window_size - 1
+elements before it.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ window_size
+ |
+
+ int
+ |
+
+
+
+ The length of the window in number of elements. It must be a +strictly positive integer. + |
+ + required + | +
+ min_periods
+ |
+
+ int | None
+ |
+
+
+
+ The number of values in the window that should be non-null before
+computing a result. If set to |
+
+ None
+ |
+
+ center
+ |
+
+ bool
+ |
+
+
+
+ Set the labels at the center of the window. + |
+
+ False
+ |
+
+ ddof
+ |
+
+ int
+ |
+
+
+
+ Delta Degrees of Freedom; the divisor for a length N window is N - ddof. + |
+
+ 1
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1.0, 3.0, 1.0, 4.0]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_rolling_std(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.rolling_std(window_size=2, min_periods=1).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_rolling_std
:
>>> agnostic_rolling_std(s_pd)
+0 NaN
+1 1.414214
+2 1.414214
+3 2.121320
+dtype: float64
+
>>> agnostic_rolling_std(s_pl)
+shape: (4,)
+Series: '' [f64]
+[
+ null
+ 1.414214
+ 1.414214
+ 2.12132
+]
+
>>> agnostic_rolling_std(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ nan,
+ 1.4142135623730951,
+ 1.4142135623730951,
+ 2.1213203435596424
+ ]
+]
+
rolling_sum(window_size, *, min_periods=None, center=False)
+
+Apply a rolling sum (moving sum) over the values.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+A window of length window_size
will traverse the values. The resulting values
+will be aggregated to their sum.
The window at a given row will include the row itself and the window_size - 1
+elements before it.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ window_size
+ |
+
+ int
+ |
+
+
+
+ The length of the window in number of elements. It must be a +strictly positive integer. + |
+ + required + | +
+ min_periods
+ |
+
+ int | None
+ |
+
+
+
+ The number of values in the window that should be non-null before
+computing a result. If set to |
+
+ None
+ |
+
+ center
+ |
+
+ bool
+ |
+
+
+
+ Set the labels at the center of the window. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1.0, 2.0, 3.0, 4.0]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_rolling_sum(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.rolling_sum(window_size=2).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_rolling_sum
:
>>> agnostic_rolling_sum(s_pd)
+0 NaN
+1 3.0
+2 5.0
+3 7.0
+dtype: float64
+
>>> agnostic_rolling_sum(s_pl)
+shape: (4,)
+Series: '' [f64]
+[
+ null
+ 3.0
+ 5.0
+ 7.0
+]
+
>>> agnostic_rolling_sum(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ null,
+ 3,
+ 5,
+ 7
+ ]
+]
+
rolling_var(window_size, *, min_periods=None, center=False, ddof=1)
+
+Apply a rolling variance (moving variance) over the values.
+Warning
+This functionality is considered unstable. It may be changed at any point +without it being considered a breaking change.
+A window of length window_size
will traverse the values. The resulting values
+will be aggregated to their variance.
The window at a given row will include the row itself and the window_size - 1
+elements before it.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ window_size
+ |
+
+ int
+ |
+
+
+
+ The length of the window in number of elements. It must be a +strictly positive integer. + |
+ + required + | +
+ min_periods
+ |
+
+ int | None
+ |
+
+
+
+ The number of values in the window that should be non-null before
+computing a result. If set to |
+
+ None
+ |
+
+ center
+ |
+
+ bool
+ |
+
+
+
+ Set the labels at the center of the window. + |
+
+ False
+ |
+
+ ddof
+ |
+
+ int
+ |
+
+
+
+ Delta Degrees of Freedom; the divisor for a length N window is N - ddof. + |
+
+ 1
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1.0, 3.0, 1.0, 4.0]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_rolling_var(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.rolling_var(window_size=2, min_periods=1).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_rolling_var
:
>>> agnostic_rolling_var(s_pd)
+0 NaN
+1 2.0
+2 2.0
+3 4.5
+dtype: float64
+
>>> agnostic_rolling_var(s_pl)
+shape: (4,)
+Series: '' [f64]
+[
+ null
+ 2.0
+ 2.0
+ 4.5
+]
+
>>> agnostic_rolling_var(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ nan,
+ 2,
+ 2,
+ 4.5
+ ]
+]
+
round(decimals=0)
+
+Round underlying floating point data by decimals
digits.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ decimals
+ |
+
+ int
+ |
+
+
+
+ Number of decimals to round by. + |
+
+ 0
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with rounded values. + |
+
For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.
+pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and +4.5 to 4.0, etc..).
+Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1.12345, 2.56789, 3.901234]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function that rounds to the first decimal:
+>>> def agnostic_round(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.round(1).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_round
:
>>> agnostic_round(s_pd)
+0 1.1
+1 2.6
+2 3.9
+dtype: float64
+
>>> agnostic_round(s_pl)
+shape: (3,)
+Series: '' [f64]
+[
+ 1.1
+ 2.6
+ 3.9
+]
+
>>> agnostic_round(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1.1,
+ 2.6,
+ 3.9
+ ]
+]
+
sample(n=None, *, fraction=None, with_replacement=False, seed=None)
+
+Sample randomly from this Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int | None
+ |
+
+
+
+ Number of items to return. Cannot be used with fraction. + |
+
+ None
+ |
+
+ fraction
+ |
+
+ float | None
+ |
+
+
+
+ Fraction of items to return. Cannot be used with n. + |
+
+ None
+ |
+
+ with_replacement
+ |
+
+ bool
+ |
+
+
+
+ Allow values to be sampled more than once. + |
+
+ False
+ |
+
+ seed
+ |
+
+ int | None
+ |
+
+
+
+ Seed for the random number generator. If set to None (default), a random +seed is generated for each sample operation. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series containing randomly sampled values from the original Series. + |
+
The sample
method returns a Series with a specified number of
+randomly selected items chosen from this Series.
+The results are not consistent across libraries.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3, 4]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_sample(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.sample(fraction=1.0, with_replacement=True).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_sample
:
>>> agnostic_sample(s_pd)
+ a
+2 3
+1 2
+3 4
+3 4
+
>>> agnostic_sample(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ 1
+ 4
+ 3
+ 4
+]
+
>>> agnostic_sample(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 4,
+ 3,
+ 4
+ ]
+]
+
scatter(indices, values)
+
+Set value(s) at given position(s).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ indices
+ |
+
+ int | Sequence[int]
+ |
+
+
+
+ Position(s) to set items at. + |
+ + required + | +
+ values
+ |
+
+ Any
+ |
+
+
+
+ Values to set. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with values set at given positions. + |
+
This method always returns a new Series, without modifying the original one. +Using this function in a for-loop is an anti-pattern, we recommend building +up your positions and values beforehand and doing an update in one go.
+For example, instead of
+for i in [1, 3, 2]:
+ value = some_function(i)
+ s = s.scatter(i, value)
+
prefer
+positions = [1, 3, 2]
+values = [some_function(x) for x in positions]
+s = s.scatter(positions, values)
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+
>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> def agnostic_scatter(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(df["a"].scatter([0, 1], [999, 888])).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_scatter
:
>>> agnostic_scatter(df_pd)
+ a b
+0 999 4
+1 888 5
+2 3 6
+
>>> agnostic_scatter(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 999 ┆ 4 │
+│ 888 ┆ 5 │
+│ 3 ┆ 6 │
+└─────┴─────┘
+
>>> agnostic_scatter(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[999,888,3]]
+b: [[4,5,6]]
+
shift(n)
+
+Shift values by n
positions.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of indices to shift forward. If a negative value is passed, +values are shifted in the opposite direction instead. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with values shifted by n positions. + |
+
pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to shift
+and fill missing values with 0
in a Int64 column, you could
+do:
s.shift(1).fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [2, 4, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_shift(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.shift(1).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_shift
:
>>> agnostic_shift(s_pd)
+0 NaN
+1 2.0
+2 4.0
+dtype: float64
+
>>> agnostic_shift(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ null
+ 2
+ 4
+]
+
>>> agnostic_shift(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ null,
+ 2,
+ 4
+ ]
+]
+
sort(*, descending=False, nulls_last=False)
+
+Sort this Series. Place null values first.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ descending
+ |
+
+ bool
+ |
+
+
+
+ Sort in descending order. + |
+
+ False
+ |
+
+ nulls_last
+ |
+
+ bool
+ |
+
+
+
+ Place null values last instead of first. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new sorted Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [5, None, 1, 2]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define library agnostic functions:
+>>> def agnostic_sort(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.sort().to_native()
+
>>> def agnostic_sort_descending(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.sort(descending=True).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_sort
and agnostic_sort_descending
:
>>> agnostic_sort(s_pd)
+1 NaN
+2 1.0
+3 2.0
+0 5.0
+dtype: float64
+
>>> agnostic_sort(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ null
+ 1
+ 2
+ 5
+]
+
>>> agnostic_sort(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ null,
+ 1,
+ 2,
+ 5
+ ]
+]
+
>>> agnostic_sort_descending(s_pd)
+1 NaN
+0 5.0
+3 2.0
+2 1.0
+dtype: float64
+
>>> agnostic_sort_descending(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ null
+ 5
+ 2
+ 1
+]
+
>>> agnostic_sort_descending(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ null,
+ 5,
+ 2,
+ 1
+ ]
+]
+
skew()
+
+Calculate the sample skewness of the Series.
+ + +Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ The sample skewness of the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 1, 2, 10, 100]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_skew(s_native: IntoSeries) -> float:
+... s = nw.from_native(s_native, series_only=True)
+... return s.skew()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_skew
:
>>> agnostic_skew(s_pd)
+np.float64(1.4724267269058975)
+
>>> agnostic_skew(s_pl)
+1.4724267269058975
+
>>> agnostic_skew(s_pa)
+1.4724267269058975
+
The skewness is a measure of the asymmetry of the probability distribution. +A perfectly symmetric distribution has a skewness of 0.
+std(*, ddof=1)
+
+Get the standard deviation of this Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ ddof
+ |
+
+ int
+ |
+
+
+
+ "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, + where N represents the number of elements. + |
+
+ 1
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ The standard deviation of all elements in the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_std(s_native: IntoSeries) -> float:
+... s = nw.from_native(s_native, series_only=True)
+... return s.std()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_std
:
>>> agnostic_std(s_pd)
+np.float64(1.0)
+
>>> agnostic_std(s_pl)
+1.0
+
>>> agnostic_std(s_pa)
+1.0
+
sum()
+
+Reduce this Series to the sum value.
+ + +Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ The sum of all elements in the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_sum(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.sum()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_sum
:
>>> agnostic_sum(s_pd)
+np.int64(6)
+
>>> agnostic_sum(s_pl)
+6
+
>>> agnostic_sum(s_pa)
+6
+
tail(n=10)
+
+Get the last n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 10
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with the last n rows. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = list(range(10))
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function that returns the last 3 rows:
+>>> def agnostic_tail(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.tail(3).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_tail
:
>>> agnostic_tail(s_pd)
+7 7
+8 8
+9 9
+dtype: int64
+
>>> agnostic_tail(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 7
+ 8
+ 9
+]
+
>>> agnostic_tail(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 7,
+ 8,
+ 9
+ ]
+]
+
to_arrow()
+
+Convert to arrow.
+ + +Returns:
+Type | +Description | +
---|---|
+ Array
+ |
+
+
+
+ A PyArrow Array containing the data from the Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3, 4]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function that converts to arrow:
+>>> def agnostic_to_arrow(s_native: IntoSeries) -> pa.Array:
+... s = nw.from_native(s_native, series_only=True)
+... return s.to_arrow()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_to_arrow
:
>>> agnostic_to_arrow(s_pd)
+<pyarrow.lib.Int64Array object at ...>
+[
+ 1,
+ 2,
+ 3,
+ 4
+]
+
>>> agnostic_to_arrow(s_pl)
+<pyarrow.lib.Int64Array object at ...>
+[
+ 1,
+ 2,
+ 3,
+ 4
+]
+
>>> agnostic_to_arrow(s_pa)
+<pyarrow.lib.Int64Array object at ...>
+[
+ 1,
+ 2,
+ 3,
+ 4
+]
+
to_dummies(*, separator='_', drop_first=False)
+
+Get dummy/indicator variables.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ separator
+ |
+
+ str
+ |
+
+
+
+ Separator/delimiter used when generating column names. + |
+
+ '_'
+ |
+
+ drop_first
+ |
+
+ bool
+ |
+
+
+
+ Remove the first category from the variable being encoded. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ A new DataFrame containing the dummy/indicator variables. + |
+
pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data, name="a")
+>>> s_pl = pl.Series("a", data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_to_dummies(
+... s_native: IntoSeries, drop_first: bool = False
+... ) -> IntoDataFrame:
+... s = nw.from_native(s_native, series_only=True)
+... return s.to_dummies(drop_first=drop_first).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_to_dummies
:
>>> agnostic_to_dummies(s_pd)
+ a_1 a_2 a_3
+0 1 0 0
+1 0 1 0
+2 0 0 1
+
>>> agnostic_to_dummies(s_pd, drop_first=True)
+ a_2 a_3
+0 0 0
+1 1 0
+2 0 1
+
>>> agnostic_to_dummies(s_pl)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ a_1 ┆ a_2 ┆ a_3 │
+│ --- ┆ --- ┆ --- │
+│ i8 ┆ i8 ┆ i8 │
+╞═════╪═════╪═════╡
+│ 1 ┆ 0 ┆ 0 │
+│ 0 ┆ 1 ┆ 0 │
+│ 0 ┆ 0 ┆ 1 │
+└─────┴─────┴─────┘
+
>>> agnostic_to_dummies(s_pl, drop_first=True)
+shape: (3, 2)
+┌─────┬─────┐
+│ a_2 ┆ a_3 │
+│ --- ┆ --- │
+│ i8 ┆ i8 │
+╞═════╪═════╡
+│ 0 ┆ 0 │
+│ 1 ┆ 0 │
+│ 0 ┆ 1 │
+└─────┴─────┘
+
>>> agnostic_to_dummies(s_pa)
+pyarrow.Table
+_1: int8
+_2: int8
+_3: int8
+----
+_1: [[1,0,0]]
+_2: [[0,1,0]]
+_3: [[0,0,1]]
+>>> agnostic_to_dummies(s_pa, drop_first=True)
+pyarrow.Table
+_2: int8
+_3: int8
+----
+_2: [[0,1,0]]
+_3: [[0,0,1]]
+
to_frame()
+
+Convert to dataframe.
+ + +Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ A DataFrame containing this Series as a single column. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2]
+>>> s_pd = pd.Series(data, name="a")
+>>> s_pl = pl.Series("a", data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_to_frame(s_native: IntoSeries) -> IntoDataFrame:
+... s = nw.from_native(s_native, series_only=True)
+... return s.to_frame().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_to_frame
:
>>> agnostic_to_frame(s_pd)
+ a
+0 1
+1 2
+
>>> agnostic_to_frame(s_pl)
+shape: (2, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+└─────┘
+
>>> agnostic_to_frame(s_pa)
+pyarrow.Table
+: int64
+----
+: [[1,2]]
+
to_list()
+
+Convert to list.
+ + +This function converts to Python scalars. It's typically +more efficient to keep your data in the format native to +your original dataframe, so we recommend only calling this +when you absolutely need to.
+Returns:
+Type | +Description | +
---|---|
+ list[Any]
+ |
+
+
+
+ A list of Python objects. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_to_list(s_native: IntoSeries):
+... s = nw.from_native(s_native, series_only=True)
+... return s.to_list()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_to_list
:
>>> agnostic_to_list(s_pd)
+[1, 2, 3]
+
>>> agnostic_to_list(s_pl)
+[1, 2, 3]
+
>>> agnostic_to_list(s_pa)
+[1, 2, 3]
+
to_numpy()
+
+Convert to numpy.
+ + +Returns:
+Type | +Description | +
---|---|
+ ndarray
+ |
+
+
+
+ NumPy ndarray representation of the Series. + |
+
Examples:
+>>> import numpy as np
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data, name="a")
+>>> s_pl = pl.Series("a", data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_to_numpy(s_native: IntoSeries) -> np.ndarray:
+... s = nw.from_native(s_native, series_only=True)
+... return s.to_numpy()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_to_numpy
:
>>> agnostic_to_numpy(s_pd)
+array([1, 2, 3]...)
+
>>> agnostic_to_numpy(s_pl)
+array([1, 2, 3]...)
+
>>> agnostic_to_numpy(s_pa)
+array([1, 2, 3]...)
+
to_pandas()
+
+Convert to pandas.
+ + +Returns:
+Type | +Description | +
---|---|
+ Series
+ |
+
+
+
+ A pandas Series containing the data from this Series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data, name="a")
+>>> s_pl = pl.Series("a", data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_to_pandas(s_native: IntoSeries) -> pd.Series:
+... s = nw.from_native(s_native, series_only=True)
+... return s.to_pandas()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_to_pandas
:
>>> agnostic_to_pandas(s_pd)
+0 1
+1 2
+2 3
+Name: a, dtype: int64
+
>>> agnostic_to_pandas(s_pl)
+0 1
+1 2
+2 3
+Name: a, dtype: int64
+
>>> agnostic_to_pandas(s_pa)
+0 1
+1 2
+2 3
+Name: , dtype: int64
+
to_native()
+
+Convert Narwhals series to native series.
+ + +Returns:
+Type | +Description | +
---|---|
+ IntoSeriesT
+ |
+
+
+
+ Series of class that user started with. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_to_native(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_to_native
:
>>> agnostic_to_native(s_pd)
+0 1
+1 2
+2 3
+dtype: int64
+
>>> agnostic_to_native(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 2
+ 3
+]
+
>>> agnostic_to_native(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 2,
+ 3
+ ]
+]
+
unique(*, maintain_order=False)
+
+Returns unique values of the series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ maintain_order
+ |
+
+ bool
+ |
+
+
+
+ Keep the same order as the original series. This may be more
+expensive to compute. Settings this to |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with duplicate values removed. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [2, 4, 4, 6]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_unique(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.unique(maintain_order=True).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_unique
:
>>> agnostic_unique(s_pd)
+0 2
+1 4
+2 6
+dtype: int64
+
>>> agnostic_unique(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 4
+ 6
+]
+
>>> agnostic_unique(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2,
+ 4,
+ 6
+ ]
+]
+
value_counts(*, sort=False, parallel=False, name=None, normalize=False)
+
+Count the occurrences of unique values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ sort
+ |
+
+ bool
+ |
+
+
+
+ Sort the output by count in descending order. If set to False (default), +the order of the output is random. + |
+
+ False
+ |
+
+ parallel
+ |
+
+ bool
+ |
+
+
+
+ Execute the computation in parallel. Used for Polars only. + |
+
+ False
+ |
+
+ name
+ |
+
+ str | None
+ |
+
+
+
+ Give the resulting count column a specific name; if |
+
+ None
+ |
+
+ normalize
+ |
+
+ bool
+ |
+
+
+
+ If true gives relative frequencies of the unique values + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ A DataFrame with two columns: + |
+
+ DataFrame[Any]
+ |
+
+
+
+
|
+
+ DataFrame[Any]
+ |
+
+
+
+
|
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 1, 2, 3, 2]
+>>> s_pd = pd.Series(data, name="s")
+>>> s_pl = pl.Series(values=data, name="s")
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_value_counts(s_native: IntoSeries) -> IntoDataFrame:
+... s = nw.from_native(s_native, series_only=True)
+... return s.value_counts(sort=True).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_value_counts
:
>>> agnostic_value_counts(s_pd)
+ s count
+0 1 2
+1 2 2
+2 3 1
+
>>> agnostic_value_counts(s_pl)
+shape: (3, 2)
+┌─────┬───────┐
+│ s ┆ count │
+│ --- ┆ --- │
+│ i64 ┆ u32 │
+╞═════╪═══════╡
+│ 1 ┆ 2 │
+│ 2 ┆ 2 │
+│ 3 ┆ 1 │
+└─────┴───────┘
+
>>> agnostic_value_counts(s_pa)
+pyarrow.Table
+: int64
+count: int64
+----
+: [[1,2,3]]
+count: [[2,2,1]]
+
var(*, ddof=1)
+
+Get the variance of this Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ ddof
+ |
+
+ int
+ |
+
+
+
+ "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, + where N represents the number of elements. + |
+
+ 1
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+
>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_var(s_native: IntoSeries) -> float:
+... s = nw.from_native(s_native, series_only=True)
+... return s.var()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_var
:
>>> agnostic_var(s_pd)
+np.float64(1.0)
+
>>> agnostic_var(s_pl)
+1.0
+
>>> agnostic_var(s_pa)
+1.0
+
zip_with(mask, other)
+
+Take values from self or other based on the given mask.
+Where mask evaluates true, take values from self. Where mask evaluates false, +take values from other.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ mask
+ |
+
+ Self
+ |
+
+
+
+ Boolean Series + |
+ + required + | +
+ other
+ |
+
+ Self
+ |
+
+
+
+ Series of same type. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new Series with values selected from self or other based on the mask. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [1, 2, 3, 4, 5]
+>>> other = [5, 4, 3, 2, 1]
+>>> mask = [True, False, True, False, True]
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_zip_with(
+... s1_native: IntoSeriesT, mask_native: IntoSeriesT, s2_native: IntoSeriesT
+... ) -> IntoSeriesT:
+... s1 = nw.from_native(s1_native, series_only=True)
+... mask = nw.from_native(mask_native, series_only=True)
+... s2 = nw.from_native(s2_native, series_only=True)
+... return s1.zip_with(mask, s2).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_zip_with
:
>>> agnostic_zip_with(
+... s1_native=pl.Series(data),
+... mask_native=pl.Series(mask),
+... s2_native=pl.Series(other),
+... )
+shape: (5,)
+Series: '' [i64]
+[
+ 1
+ 4
+ 3
+ 2
+ 5
+]
+
>>> agnostic_zip_with(
+... s1_native=pd.Series(data),
+... mask_native=pd.Series(mask),
+... s2_native=pd.Series(other),
+... )
+0 1
+1 4
+2 3
+3 2
+4 5
+dtype: int64
+
>>> agnostic_zip_with(
+... s1_native=pa.chunked_array([data]),
+... mask_native=pa.chunked_array([mask]),
+... s2_native=pa.chunked_array([other]),
+... )
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 4,
+ 3,
+ 2,
+ 5
+ ]
+]
+
narwhals.Series.cat
get_categories()
+
+Get unique categories from column.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the unique categories. + |
+
Examples:
+Let's create some series:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["apple", "mango", "mango"]
+>>> s_pd = pd.Series(data, dtype="category")
+>>> s_pl = pl.Series(data, dtype=pl.Categorical)
+>>> s_pa = pa.chunked_array([data]).dictionary_encode()
+
We define a dataframe-agnostic function to get unique categories +from column 'fruits':
+>>> def agnostic_get_categories(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.cat.get_categories().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_get_categories
:
>>> agnostic_get_categories(s_pd)
+0 apple
+1 mango
+dtype: object
+
>>> agnostic_get_categories(s_pl)
+shape: (2,)
+Series: '' [str]
+[
+ "apple"
+ "mango"
+]
+
>>> agnostic_get_categories(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ "apple",
+ "mango"
+ ]
+]
+
narwhals.Series.dt
convert_time_zone(time_zone)
+
+Convert time zone.
+If converting from a time-zone-naive column, then conversion happens +as if converting from UTC.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_zone
+ |
+
+ str
+ |
+
+
+
+ Target time zone. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series with the specified time zone. + |
+
Examples:
+>>> from datetime import datetime, timezone
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [
+... datetime(2024, 1, 1, tzinfo=timezone.utc),
+... datetime(2024, 1, 2, tzinfo=timezone.utc),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_convert_time_zone(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.convert_time_zone("Asia/Kathmandu").to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_convert_time_zone
:
>>> agnostic_convert_time_zone(s_pd)
+0 2024-01-01 05:45:00+05:45
+1 2024-01-02 05:45:00+05:45
+dtype: datetime64[ns, Asia/Kathmandu]
+
>>> agnostic_convert_time_zone(s_pl)
+shape: (2,)
+Series: '' [datetime[μs, Asia/Kathmandu]]
+[
+ 2024-01-01 05:45:00 +0545
+ 2024-01-02 05:45:00 +0545
+]
+
>>> agnostic_convert_time_zone(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2024-01-01 00:00:00.000000Z,
+ 2024-01-02 00:00:00.000000Z
+ ]
+]
+
date()
+
+Get the date in a datetime series.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series with the date portion of the datetime values. + |
+
Raises:
+Type | +Description | +
---|---|
+ NotImplementedError
+ |
+
+
+
+ If pandas default backend is being used. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> dates = [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)]
+>>> s_pd = pd.Series(dates).convert_dtypes(dtype_backend="pyarrow")
+>>> s_pl = pl.Series(dates)
+>>> s_pa = pa.chunked_array([dates])
+
We define a library agnostic function:
+>>> def agnostic_date(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.date().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_date
:
>>> agnostic_date(s_pd)
+0 2012-01-07
+1 2023-03-10
+dtype: date32[day][pyarrow]
+
>>> agnostic_date(s_pl)
+shape: (2,)
+Series: '' [date]
+[
+ 2012-01-07
+ 2023-03-10
+]
+
>>> agnostic_date(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2012-01-07,
+ 2023-03-10
+ ]
+]
+
day()
+
+Extracts the day in a datetime series.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the day component of each datetime value. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> dates = [datetime(2022, 1, 1), datetime(2022, 1, 5)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+>>> s_pa = pa.chunked_array([dates])
+
We define a library agnostic function:
+>>> def agnostic_day(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.day().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_day
:
>>> agnostic_day(s_pd)
+0 1
+1 5
+dtype: int...
+
>>> agnostic_day(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 1
+ 5
+]
+
>>> agnostic_day(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 5
+ ]
+]
+
hour()
+
+Extracts the hour in a datetime series.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the hour component of each datetime value. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> dates = [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+>>> s_pa = pa.chunked_array([dates])
+
We define a library agnostic function:
+>>> def agnostic_hour(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.hour().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_hour
:
>>> agnostic_hour(s_pd)
+0 5
+1 9
+dtype: int...
+
>>> agnostic_hour(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 5
+ 9
+]
+
>>> agnostic_hour(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 5,
+ 9
+ ]
+]
+
microsecond()
+
+Extracts the microseconds in a datetime series.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the microsecond component of each datetime value. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> dates = [
+... datetime(2023, 5, 21, 12, 55, 10, 400000),
+... datetime(2023, 5, 21, 12, 55, 10, 600000),
+... datetime(2023, 5, 21, 12, 55, 10, 800000),
+... datetime(2023, 5, 21, 12, 55, 11, 0),
+... datetime(2023, 5, 21, 12, 55, 11, 200000),
+... ]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+>>> s_pa = pa.chunked_array([dates])
+
We define a library agnostic function:
+>>> def agnostic_microsecond(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.microsecond().alias("datetime").to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_microsecond
:
>>> agnostic_microsecond(s_pd)
+0 400000
+1 600000
+2 800000
+3 0
+4 200000
+Name: datetime, dtype: int...
+
>>> agnostic_microsecond(s_pl)
+shape: (5,)
+Series: 'datetime' [i32]
+[
+ 400000
+ 600000
+ 800000
+ 0
+ 200000
+]
+
>>> agnostic_microsecond(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 400000,
+ 600000,
+ 800000,
+ 0,
+ 200000
+ ]
+]
+
millisecond()
+
+Extracts the milliseconds in a datetime series.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the millisecond component of each datetime value. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> dates = [
+... datetime(2023, 5, 21, 12, 55, 10, 400000),
+... datetime(2023, 5, 21, 12, 55, 10, 600000),
+... datetime(2023, 5, 21, 12, 55, 10, 800000),
+... datetime(2023, 5, 21, 12, 55, 11, 0),
+... datetime(2023, 5, 21, 12, 55, 11, 200000),
+... ]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+>>> s_pa = pa.chunked_array([dates])
+
We define a library agnostic function:
+>>> def agnostic_millisecond(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.millisecond().alias("datetime").to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_millisecond
:
>>> agnostic_millisecond(s_pd)
+0 400
+1 600
+2 800
+3 0
+4 200
+Name: datetime, dtype: int...
+
>>> agnostic_millisecond(s_pl)
+shape: (5,)
+Series: 'datetime' [i32]
+[
+ 400
+ 600
+ 800
+ 0
+ 200
+]
+
>>> agnostic_millisecond(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 400,
+ 600,
+ 800,
+ 0,
+ 200
+ ]
+]
+
minute()
+
+Extracts the minute in a datetime series.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the minute component of each datetime value. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> dates = [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+>>> s_pa = pa.chunked_array([dates])
+
We define a library agnostic function:
+>>> def agnostic_minute(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.minute().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_minute
:
>>> agnostic_minute(s_pd)
+0 3
+1 12
+dtype: int...
+
>>> agnostic_minute(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 3
+ 12
+]
+
>>> agnostic_minute(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 3,
+ 12
+ ]
+]
+
month()
+
+Gets the month in a datetime series.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the month component of each datetime value. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> dates = [datetime(2023, 2, 1), datetime(2023, 8, 3)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+>>> s_pa = pa.chunked_array([dates])
+
We define a library agnostic function:
+>>> def agnostic_month(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.month().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_month
:
>>> agnostic_month(s_pd)
+0 2
+1 8
+dtype: int...
+>>> agnostic_month(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 2
+ 8
+]
+
>>> agnostic_month(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2,
+ 8
+ ]
+]
+
nanosecond()
+
+Extract the nanoseconds in a date series.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the nanosecond component of each datetime value. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> dates = [
+... datetime(2022, 1, 1, 5, 3, 10, 500000),
+... datetime(2022, 1, 5, 9, 12, 4, 60000),
+... ]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+>>> s_pa = pa.chunked_array([dates])
+
We define a library agnostic function:
+>>> def agnostic_nanosecond(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.nanosecond().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_nanosecond
:
>>> agnostic_nanosecond(s_pd)
+0 500000000
+1 60000000
+dtype: int...
+
>>> agnostic_nanosecond(s_pl)
+shape: (2,)
+Series: '' [i32]
+[
+ 500000000
+ 60000000
+]
+
>>> agnostic_nanosecond(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 500000000,
+ 60000000
+ ]
+]
+
ordinal_day()
+
+Get ordinal day.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the ordinal day (day of year) for each datetime value. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [datetime(2020, 1, 1), datetime(2020, 8, 3)]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_ordinal_day(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.ordinal_day().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_ordinal_day
:
>>> agnostic_ordinal_day(s_pd)
+0 1
+1 216
+dtype: int32
+
>>> agnostic_ordinal_day(s_pl)
+shape: (2,)
+Series: '' [i16]
+[
+ 1
+ 216
+]
+
>>> agnostic_ordinal_day(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 216
+ ]
+]
+
replace_time_zone(time_zone)
+
+Replace time zone.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_zone
+ |
+
+ str | None
+ |
+
+
+
+ Target time zone. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series with the specified time zone. + |
+
Examples:
+>>> from datetime import datetime, timezone
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [
+... datetime(2024, 1, 1, tzinfo=timezone.utc),
+... datetime(2024, 1, 2, tzinfo=timezone.utc),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_replace_time_zone(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.replace_time_zone("Asia/Kathmandu").to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_replace_time_zone
:
>>> agnostic_replace_time_zone(s_pd)
+0 2024-01-01 00:00:00+05:45
+1 2024-01-02 00:00:00+05:45
+dtype: datetime64[ns, Asia/Kathmandu]
+
>>> agnostic_replace_time_zone(s_pl)
+shape: (2,)
+Series: '' [datetime[μs, Asia/Kathmandu]]
+[
+ 2024-01-01 00:00:00 +0545
+ 2024-01-02 00:00:00 +0545
+]
+
>>> agnostic_replace_time_zone(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2023-12-31 18:15:00.000000Z,
+ 2024-01-01 18:15:00.000000Z
+ ]
+]
+
second()
+
+Extracts the seconds in a datetime series.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the second component of each datetime value. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> dates = [datetime(2022, 1, 1, 5, 3, 10), datetime(2022, 1, 5, 9, 12, 4)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+>>> s_pa = pa.chunked_array([dates])
+
We define a library agnostic function:
+>>> def agnostic_second(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.second().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_second
:
>>> agnostic_second(s_pd)
+0 10
+1 4
+dtype: int...
+
>>> agnostic_second(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 10
+ 4
+]
+
>>> agnostic_second(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 10,
+ 4
+ ]
+]
+
timestamp(time_unit='us')
+
+Return a timestamp in the given time unit.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_unit
+ |
+
+ Literal['ns', 'us', 'ms']
+ |
+
+
+
+ {'ns', 'us', 'ms'} +Time unit. + |
+
+ 'us'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series with timestamps in the specified time unit. + |
+
Examples:
+>>> from datetime import date
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [date(2001, 1, 1), None, date(2001, 1, 3)]
+>>> s_pd = pd.Series(data, dtype="datetime64[ns]")
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_timestamp(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.timestamp("ms").to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_timestamp
:
>>> agnostic_timestamp(s_pd)
+0 9.783072e+11
+1 NaN
+2 9.784800e+11
+dtype: float64
+
>>> agnostic_timestamp(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 978307200000
+ null
+ 978480000000
+]
+
>>> agnostic_timestamp(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 978307200000,
+ null,
+ 978480000000
+ ]
+]
+
total_microseconds()
+
+Get total microseconds.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the total number of microseconds for each timedelta value. + |
+
The function outputs the total microseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> from datetime import timedelta
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [
+... timedelta(microseconds=10),
+... timedelta(milliseconds=1, microseconds=200),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_total_microseconds(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.total_microseconds().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_total_microseconds
:
>>> agnostic_total_microseconds(s_pd)
+0 10
+1 1200
+dtype: int...
+
>>> agnostic_total_microseconds(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 1200
+]
+
>>> agnostic_total_microseconds(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 10,
+ 1200
+ ]
+]
+
total_milliseconds()
+
+Get total milliseconds.
+ + +The function outputs the total milliseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the total number of milliseconds for each timedelta value. + |
+
Examples:
+>>> from datetime import timedelta
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [
+... timedelta(milliseconds=10),
+... timedelta(milliseconds=20, microseconds=40),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_total_milliseconds(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.total_milliseconds().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_total_milliseconds
:
>>> agnostic_total_milliseconds(s_pd)
+0 10
+1 20
+dtype: int...
+
>>> agnostic_total_milliseconds(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 20
+]
+
>>> agnostic_total_milliseconds(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 10,
+ 20
+ ]
+]
+
total_minutes()
+
+Get total minutes.
+ + +The function outputs the total minutes in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the total number of minutes for each timedelta value. + |
+
Examples:
+>>> from datetime import timedelta
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [timedelta(minutes=10), timedelta(minutes=20, seconds=40)]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_total_minutes(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.total_minutes().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_total_minutes
:
>>> agnostic_total_minutes(s_pd)
+0 10
+1 20
+dtype: int...
+
>>> agnostic_total_minutes(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 20
+]
+
>>> agnostic_total_minutes(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 10,
+ 20
+ ]
+]
+
total_nanoseconds()
+
+Get total nanoseconds.
+ + +The function outputs the total nanoseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the total number of nanoseconds for each timedelta value. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["2024-01-01 00:00:00.000000001", "2024-01-01 00:00:00.000000002"]
+>>> s_pd = pd.to_datetime(pd.Series(data))
+>>> s_pl = pl.Series(data).str.to_datetime(time_unit="ns")
+
We define a library agnostic function:
+>>> def agnostic_total_nanoseconds(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.diff().dt.total_nanoseconds().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_total_nanoseconds
:
>>> agnostic_total_nanoseconds(s_pd)
+0 NaN
+1 1.0
+dtype: float64
+
>>> agnostic_total_nanoseconds(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ null
+ 1
+]
+
total_seconds()
+
+Get total seconds.
+ + +The function outputs the total seconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the total number of seconds for each timedelta value. + |
+
Examples:
+>>> from datetime import timedelta
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_total_seconds(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.total_seconds().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_total_seconds
:
>>> agnostic_total_seconds(s_pd)
+0 10
+1 20
+dtype: int...
+
>>> agnostic_total_seconds(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 20
+]
+
>>> agnostic_total_seconds(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 10,
+ 20
+ ]
+]
+
to_string(format)
+
+Convert a Date/Time/Datetime series into a String series with the given format.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ format
+ |
+
+ str
+ |
+
+
+
+ Format string for converting the datetime to string. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series with the datetime values formatted as strings according to the specified format. + |
+
Unfortunately, different libraries interpret format directives a bit +differently.
+"%.f"
for fractional seconds,
+ whereas pandas and Python stdlib use ".%f"
."%S"
as "seconds, including fractional seconds"
+ whereas most other tools interpret it as "just seconds, as 2 digits".Therefore, we make the following adjustments:
+"%S.%f"
with "%S%.f"
."%S.%f"
with "%S"
.Workarounds like these don't make us happy, and we try to avoid them as +much as possible, but here we feel like it's the best compromise.
+If you just want to format a date/datetime Series as a local datetime +string, and have it work as consistently as possible across libraries, +we suggest using:
+"%Y-%m-%dT%H:%M:%S%.f"
for datetimes"%Y-%m-%d"
for datesthough note that, even then, different tools may return a different number +of trailing zeros. Nonetheless, this is probably consistent enough for +most applications.
+If you have an application where this is not enough, please open an issue +and let us know.
+Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [
+... datetime(2020, 3, 1),
+... datetime(2020, 4, 1),
+... datetime(2020, 5, 1),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_to_string(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.to_string("%Y/%m/%d").to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_to_string
:
>>> agnostic_to_string(s_pd)
+0 2020/03/01
+1 2020/04/01
+2 2020/05/01
+dtype: object
+
>>> agnostic_to_string(s_pl)
+shape: (3,)
+Series: '' [str]
+[
+ "2020/03/01"
+ "2020/04/01"
+ "2020/05/01"
+]
+
>>> agnostic_to_string(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ "2020/03/01",
+ "2020/04/01",
+ "2020/05/01"
+ ]
+]
+
weekday()
+
+Extract the week day in a datetime series.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the week day for each datetime value. + |
+
+ SeriesT
+ |
+
+
+
+ Returns the ISO weekday number where monday = 1 and sunday = 7 + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> data = [datetime(2020, 1, 1), datetime(2020, 8, 3)]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a library agnostic function:
+>>> def agnostic_weekday(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.weekday().to_native()
+
We can then pass either pandas, Polars, PyArrow, and other supported libraries to agnostic_weekday
:
>>> agnostic_weekday(s_pd)
+0 3
+1 1
+dtype: int32
+>>> agnostic_weekday(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 3
+ 1
+]
+>>> agnostic_weekday(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 3,
+ 1
+ ]
+]
+
year()
+
+Get the year in a datetime series.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the year component of each datetime value. + |
+
Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> dates = [datetime(2012, 1, 7), datetime(2023, 3, 10)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+>>> s_pa = pa.chunked_array([dates])
+
We define a library agnostic function:
+>>> def agnostic_year(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.dt.year().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_year
:
>>> agnostic_year(s_pd)
+0 2012
+1 2023
+dtype: int...
+
>>> agnostic_year(s_pl)
+shape: (2,)
+Series: '' [i32]
+[
+ 2012
+ 2023
+]
+
>>> agnostic_year(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2012,
+ 2023
+ ]
+]
+
narwhals.Series.list
len()
+
+Return the number of elements in each list.
+Null values count towards the total.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new series. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = [[1, 2], [3, 4, None], None, []]
+
Let's define a dataframe-agnostic function:
+>>> def agnostic_list_len(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.list.len().to_native()
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> agnostic_list_len(
+... pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
+... )
+0 2
+1 3
+2 <NA>
+3 0
+dtype: int32[pyarrow]
+
>>> agnostic_list_len(pl.Series(data))
+shape: (4,)
+Series: '' [u32]
+[
+ 2
+ 3
+ null
+ 0
+]
+
>>> agnostic_list_len(pa.chunked_array([data]))
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2,
+ 3,
+ null,
+ 0
+ ]
+]
+
narwhals.Series.str
contains(pattern, *, literal=False)
+
+Check if string contains a substring that matches a pattern.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A Character sequence or valid regular expression pattern. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ If True, treats the pattern as a literal string. + If False, assumes the pattern is a regular expression. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series with boolean values indicating if each string contains the pattern. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["cat", "dog", "rabbit and parrot", "dove", None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_contains(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.contains("parrot|dove").to_native()
+
We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_contains
:
>>> agnostic_contains(s_pd)
+0 False
+1 False
+2 True
+3 True
+4 None
+dtype: object
+
>>> agnostic_contains(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ false
+ false
+ true
+ true
+ null
+]
+
>>> agnostic_contains(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ false,
+ false,
+ true,
+ true,
+ null
+ ]
+]
+
ends_with(suffix)
+
+Check if string values end with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ suffix
+ |
+
+ str
+ |
+
+
+
+ suffix substring + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series with boolean values indicating if each string ends with the suffix. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["apple", "mango", None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_ends_with(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.ends_with("ngo").to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_ends_with
:
>>> agnostic_ends_with(s_pd)
+0 False
+1 True
+2 None
+dtype: object
+
>>> agnostic_ends_with(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ false
+ true
+ null
+]
+
>>> agnostic_ends_with(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ false,
+ true,
+ null
+ ]
+]
+
head(n=5)
+
+Take the first n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is supported (see note (1.)) + |
+
+ 5
+ |
+
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the first n characters of each string. + |
+
n
input is negative, head
returns characters up to the n-th from the end of the string.
+ For example, if n = -3
, then all characters except the last three are returned.n
characters, the full string is returned.Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["Atatata", "taata", "taatatata", "zukkyun"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_head(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.head().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_head
:
>>> agnostic_head(s_pd)
+0 Atata
+1 taata
+2 taata
+3 zukky
+dtype: object
+
>>> agnostic_head(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ "Atata"
+ "taata"
+ "taata"
+ "zukky"
+]
+
>>> agnostic_head(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ "Atata",
+ "taata",
+ "taata",
+ "zukky"
+ ]
+]
+
len_chars()
+
+Return the length of each string as the number of characters.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the length of each string in characters. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["foo", "Café", "345", "東京", None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_len_chars(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.len_chars().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_len_chars
:
>>> agnostic_len_chars(s_pd)
+0 3.0
+1 4.0
+2 3.0
+3 2.0
+4 NaN
+dtype: float64
+
>>> agnostic_len_chars(s_pl)
+shape: (5,)
+Series: '' [u32]
+[
+ 3
+ 4
+ 3
+ 2
+ null
+]
+
>>> agnostic_len_chars(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 3,
+ 4,
+ 3,
+ 2,
+ null
+ ]
+]
+
replace(pattern, value, *, literal=False, n=1)
+
+Replace first matching regex/literal substring with a new string value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A valid regular expression pattern. + |
+ + required + | +
+ value
+ |
+
+ str
+ |
+
+
+
+ String that will replace the matched substring. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ Treat |
+
+ False
+ |
+
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of matches to replace. + |
+
+ 1
+ |
+
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series with the regex/literal pattern replaced with the specified value. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["123abc", "abc abc123"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_replace(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... s = s.str.replace("abc", "")
+... return s.to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_replace
:
>>> agnostic_replace(s_pd)
+0 123
+1 abc123
+dtype: object
+
>>> agnostic_replace(s_pl)
+shape: (2,)
+Series: '' [str]
+[
+ "123"
+ " abc123"
+]
+
>>> agnostic_replace(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ "123",
+ " abc123"
+ ]
+]
+
replace_all(pattern, value, *, literal=False)
+
+Replace all matching regex/literal substring with a new string value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A valid regular expression pattern. + |
+ + required + | +
+ value
+ |
+
+ str
+ |
+
+
+
+ String that will replace the matched substring. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ Treat |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series with all occurrences of pattern replaced with the specified value. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["123abc", "abc abc123"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_replace_all(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... s = s.str.replace_all("abc", "")
+... return s.to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_replace_all
:
>>> agnostic_replace_all(s_pd)
+0 123
+1 123
+dtype: object
+
>>> agnostic_replace_all(s_pl)
+shape: (2,)
+Series: '' [str]
+[
+ "123"
+ " 123"
+]
+
>>> agnostic_replace_all(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ "123",
+ " 123"
+ ]
+]
+
slice(offset, length=None)
+
+Create subslices of the string values of a Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ offset
+ |
+
+ int
+ |
+
+
+
+ Start index. Negative indexing is supported. + |
+ + required + | +
+ length
+ |
+
+ int | None
+ |
+
+
+
+ Length of the slice. If set to |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing subslices of each string. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["pear", None, "papaya", "dragonfruit"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_slice(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.slice(4, length=3).to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_slice
:
>>> agnostic_slice(s_pd)
+0
+1 None
+2 ya
+3 onf
+dtype: object
+
>>> agnostic_slice(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ ""
+ null
+ "ya"
+ "onf"
+]
+
>>> agnostic_slice(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ "",
+ null,
+ "ya",
+ "onf"
+ ]
+]
+
Using negative indexes:
+>>> def agnostic_slice(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.slice(-3).to_native()
+
>>> agnostic_slice(s_pd)
+0 ear
+1 None
+2 aya
+3 uit
+dtype: object
+
>>> agnostic_slice(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ "ear"
+ null
+ "aya"
+ "uit"
+]
+
>>> agnostic_slice(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ "ear",
+ null,
+ "aya",
+ "uit"
+ ]
+]
+
starts_with(prefix)
+
+Check if string values start with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ prefix
+ |
+
+ str
+ |
+
+
+
+ prefix substring + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series with boolean values indicating if each string starts with the prefix. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["apple", "mango", None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_starts_with(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.starts_with("app").to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_starts_with
:
>>> agnostic_starts_with(s_pd)
+0 True
+1 False
+2 None
+dtype: object
+
>>> agnostic_starts_with(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ true
+ false
+ null
+]
+
>>> agnostic_starts_with(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ true,
+ false,
+ null
+ ]
+]
+
strip_chars(characters=None)
+
+Remove leading and trailing characters.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ characters
+ |
+
+ str | None
+ |
+
+
+
+ The set of characters to be removed. All combinations of this set of characters will be stripped from the start and end of the string. If set to None (default), all leading and trailing whitespace is removed instead. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series with leading and trailing characters removed. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["apple", "\nmango"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_strip_chars(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... s = s.str.strip_chars()
+... return s.to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_strip_chars
:
>>> agnostic_strip_chars(s_pd)
+0 apple
+1 mango
+dtype: object
+
>>> agnostic_strip_chars(s_pl)
+shape: (2,)
+Series: '' [str]
+[
+ "apple"
+ "mango"
+]
+
>>> agnostic_strip_chars(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ "apple",
+ "mango"
+ ]
+]
+
tail(n=5)
+
+Take the last n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is supported (see note (1.)) + |
+
+ 5
+ |
+
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series containing the last n characters of each string. + |
+
n
input is negative, tail
returns characters starting from the n-th from the beginning of
+ the string. For example, if n = -3
, then all characters except the first three are returned.n
characters, the full string is returned.Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["Atatata", "taata", "taatatata", "zukkyun"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_tail(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.tail().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_tail
:
>>> agnostic_tail(s_pd)
+0 atata
+1 taata
+2 atata
+3 kkyun
+dtype: object
+
>>> agnostic_tail(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ "atata"
+ "taata"
+ "atata"
+ "kkyun"
+]
+
>>> agnostic_tail(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ "atata",
+ "taata",
+ "atata",
+ "kkyun"
+ ]
+]
+
to_datetime(format=None)
+
+Parse Series with strings to a Series with Datetime dtype.
+ + +pandas defaults to nanosecond time unit, Polars to microsecond. +Prior to pandas 2.0, nanoseconds were the only time unit supported +in pandas, with no ability to set any other one. The ability to +set the time unit in pandas, if the version permits, will arrive.
+As different backends auto-infer format in different ways, if format=None
+there is no guarantee that the result will be equal.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ format
+ |
+
+ str | None
+ |
+
+
+
+ Format to use for conversion. If set to None (default), the format is +inferred from the data. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series with datetime dtype. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["2020-01-01", "2020-01-02"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_to_datetime(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.to_datetime(format="%Y-%m-%d").to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_to_datetime
:
>>> agnostic_to_datetime(s_pd)
+0 2020-01-01
+1 2020-01-02
+dtype: datetime64[ns]
+
>>> agnostic_to_datetime(s_pl)
+shape: (2,)
+Series: '' [datetime[μs]]
+[
+ 2020-01-01 00:00:00
+ 2020-01-02 00:00:00
+]
+
>>> agnostic_to_datetime(s_pa)
+<pyarrow.lib.ChunkedArray object at 0x...>
+[
+ [
+ 2020-01-01 00:00:00.000000,
+ 2020-01-02 00:00:00.000000
+ ]
+]
+
to_lowercase()
+
+Transform string to lowercase variant.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series with values converted to lowercase. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["APPLE", "MANGO", None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_to_lowercase(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.to_lowercase().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_to_lowercase
:
>>> agnostic_to_lowercase(s_pd)
+0 apple
+1 mango
+2 None
+dtype: object
+
>>> agnostic_to_lowercase(s_pl)
+shape: (3,)
+Series: '' [str]
+[
+ "apple"
+ "mango"
+ null
+]
+
>>> agnostic_to_lowercase(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ "apple",
+ "mango",
+ null
+ ]
+]
+
to_uppercase()
+
+Transform string to uppercase variant.
+ + +Returns:
+Type | +Description | +
---|---|
+ SeriesT
+ |
+
+
+
+ A new Series with values converted to uppercase. + |
+
The PyArrow backend will convert 'ß' to 'ẞ' instead of 'SS'. +For more info see: https://github.com/apache/arrow/issues/34599 +There may be other unicode-edge-case-related variations across implementations.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+
>>> data = ["apple", "mango", None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> def agnostic_to_uppercase(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.str.to_uppercase().to_native()
+
We can then pass any supported library such as pandas, Polars, or
+PyArrow to agnostic_to_uppercase
:
>>> agnostic_to_uppercase(s_pd)
+0 APPLE
+1 MANGO
+2 None
+dtype: object
+
>>> agnostic_to_uppercase(s_pl)
+shape: (3,)
+Series: '' [str]
+[
+ "APPLE"
+ "MANGO"
+ null
+]
+
>>> agnostic_to_uppercase(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ "APPLE",
+ "MANGO",
+ null
+ ]
+]
+
narwhals.typing
Narwhals comes fully statically typed. In addition to nw.DataFrame
, nw.Expr
,
+nw.Series
, nw.LazyFrame
, we also provide the following type hints:
DataFrameT = TypeVar('DataFrameT', bound='DataFrame[Any]')
+
+
+ module-attribute
+
+
+TypeVar bound to Narwhals DataFrame.
+Use this if your function can accept a Narwhals DataFrame and returns a Narwhals +DataFrame backed by the same backend.
+ + +Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import DataFrameT
+>>> @nw.narwhalify
+>>> def func(df: DataFrameT) -> DataFrameT:
+... return df.with_columns(c=df["a"] + 1)
+
Frame = Union['DataFrame[Any]', 'LazyFrame[Any]']
+
+
+ module-attribute
+
+
+Narwhals DataFrame or Narwhals LazyFrame.
+Use this if your function can work with either and your function doesn't care +about its backend.
+ + +Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import Frame
+>>> @nw.narwhalify
+... def agnostic_columns(df: Frame) -> list[str]:
+... return df.columns
+
FrameT = TypeVar('FrameT', bound='Frame')
+
+
+ module-attribute
+
+
+TypeVar bound to Narwhals DataFrame or Narwhals LazyFrame.
+Use this if your function accepts either nw.DataFrame
or nw.LazyFrame
and returns
+an object of the same kind.
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import FrameT
+>>> @nw.narwhalify
+... def agnostic_func(df: FrameT) -> FrameT:
+... return df.with_columns(c=nw.col("a") + 1)
+
IntoDataFrame = Union['NativeFrame', 'DataFrame[Any]', 'DataFrameLike']
+
+
+ module-attribute
+
+
+Anything which can be converted to a Narwhals DataFrame.
+Use this if your function accepts a narwhalifiable object but doesn't care about its backend.
+ + +Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrame
+>>> def agnostic_shape(df_native: IntoDataFrame) -> tuple[int, int]:
+... df = nw.from_native(df_native, eager_only=True)
+... return df.shape
+
IntoDataFrameT = TypeVar('IntoDataFrameT', bound='IntoDataFrame')
+
+
+ module-attribute
+
+
+TypeVar bound to object convertible to Narwhals DataFrame.
+Use this if your function accepts an object which can be converted to nw.DataFrame
+and returns an object of the same class.
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoDataFrameT
+>>> def agnostic_func(df_native: IntoDataFrameT) -> IntoDataFrameT:
+... df = nw.from_native(df_native, eager_only=True)
+... return df.with_columns(c=df["a"] + 1).to_native()
+
IntoExpr = Union['Expr', str, 'Series[Any]']
+
+
+ module-attribute
+
+
+Anything which can be converted to an expression.
+Use this to mean "either a Narwhals expression, or something which can be converted
+into one". For example, exprs
in DataFrame.select
is typed to accept IntoExpr
,
+as it can either accept a nw.Expr
(e.g. df.select(nw.col('a'))
) or a string
+which will be interpreted as a nw.Expr
, e.g. df.select('a')
.
IntoFrame = Union['NativeFrame', 'DataFrame[Any]', 'LazyFrame[Any]', 'DataFrameLike']
+
+
+ module-attribute
+
+
+Anything which can be converted to a Narwhals DataFrame or LazyFrame.
+Use this if your function can accept an object which can be converted to either
+nw.DataFrame
or nw.LazyFrame
and it doesn't care about its backend.
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrame
+>>> def agnostic_columns(df_native: IntoFrame) -> list[str]:
+... df = nw.from_native(df_native)
+... return df.collect_schema().names()
+
IntoFrameT = TypeVar('IntoFrameT', bound='IntoFrame')
+
+
+ module-attribute
+
+
+TypeVar bound to object convertible to Narwhals DataFrame or Narwhals LazyFrame.
+Use this if your function accepts an object which is convertible to nw.DataFrame
+or nw.LazyFrame
and returns an object of the same type.
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoFrameT
+>>> def agnostic_func(df_native: IntoFrameT) -> IntoFrameT:
+... df = nw.from_native(df_native)
+... return df.with_columns(c=nw.col("a") + 1).to_native()
+
IntoSeries = Union['Series[Any]', 'NativeSeries']
+
+
+ module-attribute
+
+
+Anything which can be converted to a Narwhals Series.
+Use this if your function can accept an object which can be converted to nw.Series
+and it doesn't care about its backend.
Examples:
+>>> from typing import Any
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeries
+>>> def agnostic_to_list(s_native: IntoSeries) -> list[Any]:
+... s = nw.from_native(s_native)
+... return s.to_list()
+
IntoSeriesT = TypeVar('IntoSeriesT', bound='IntoSeries')
+
+
+ module-attribute
+
+
+TypeVar bound to object convertible to Narwhals Series.
+Use this if your function accepts an object which can be converted to nw.Series
+and returns an object of the same class.
Examples:
+>>> import narwhals as nw
+>>> from narwhals.typing import IntoSeriesT
+>>> def agnostic_abs(s_native: IntoSeriesT) -> IntoSeriesT:
+... s = nw.from_native(s_native, series_only=True)
+... return s.abs().to_native()
+
nw.narwhalify
, or nw.from_native
?Although some people find the former more readable, the latter is better at preserving type hints.
+Here's an example: +
import polars as pl
+import narwhals as nw
+from narwhals.typing import IntoDataFrameT, DataFrameT
+
+df = pl.DataFrame({"a": [1, 2, 3]})
+
+
+def func(df_native: IntoDataFrameT) -> IntoDataFrameT:
+ df = nw.from_native(df_native, eager_only=True)
+ return df.select(b=nw.col("a")).to_native()
+
+
+reveal_type(func(df))
+
+
+@nw.narwhalify(strict=True)
+def func_2(df: DataFrameT) -> DataFrameT:
+ return df.select(b=nw.col("a"))
+
+
+reveal_type(func_2(df))
+
Running mypy
on it gives:
+
$ mypy t.py
+t.py:13: note: Revealed type is "polars.dataframe.frame.DataFrame"
+t.py:21: note: Revealed type is "Any"
+Success: no issues found in 1 source file
+
In the first case, mypy can infer that df
is a polars.DataFrame
. In the second case, it can't.
If you want to make the most out of type hints and preserve them as much as possible, we recommend
+nw.from_native
and nw.to_native
. Type hints will still be respected
+inside the function body if you type the arguments.