Skip to content

Commit

Permalink
feat: Implements arr.n_unique (#15296)
Browse files Browse the repository at this point in the history
  • Loading branch information
reswqa authored Mar 28, 2024
1 parent c9d94e1 commit e1bba07
Show file tree
Hide file tree
Showing 10 changed files with 93 additions and 0 deletions.
10 changes: 10 additions & 0 deletions crates/polars-core/src/chunked_array/array/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,16 @@ impl ArrayChunked {
self.amortized_iter().map(f).collect_ca(self.name())
}

/// Try apply a closure `F` elementwise.
pub fn try_apply_amortized_generic<'a, F, K, V>(&'a self, f: F) -> PolarsResult<ChunkedArray<V>>
where
V: PolarsDataType,
F: FnMut(Option<UnstableSeries<'a>>) -> PolarsResult<Option<K>> + Copy,
V::Array: ArrayFromIter<Option<K>>,
{
self.amortized_iter().map(f).try_collect_ca(self.name())
}

pub fn for_each_amortized<'a, F>(&'a self, f: F)
where
F: FnMut(Option<UnstableSeries<'a>>),
Expand Down
8 changes: 8 additions & 0 deletions crates/polars-ops/src/chunked_array/array/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,14 @@ pub trait ArrayNameSpace: AsArray {
ca.try_apply_amortized_to_list(|s| s.as_ref().unique_stable())
}

fn array_n_unique(&self) -> PolarsResult<IdxCa> {
let ca = self.as_array();
ca.try_apply_amortized_generic(|opt_s| {
let opt_v = opt_s.map(|s| s.as_ref().n_unique()).transpose()?;
Ok(opt_v.map(|idx| idx as IdxSize))
})
}

#[cfg(feature = "array_any_all")]
fn array_any(&self) -> PolarsResult<Series> {
let ca = self.as_array();
Expand Down
5 changes: 5 additions & 0 deletions crates/polars-plan/src/dsl/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ impl ArrayNameSpace {
.map_private(FunctionExpr::ArrayExpr(ArrayFunction::Unique(true)))
}

pub fn n_unique(self) -> Expr {
self.0
.map_private(FunctionExpr::ArrayExpr(ArrayFunction::NUnique))
}

/// Cast the Array column to List column with the same inner data type.
pub fn to_list(self) -> Expr {
self.0
Expand Down
8 changes: 8 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub enum ArrayFunction {
Sum,
ToList,
Unique(bool),
NUnique,
Std(u8),
Var(u8),
Median,
Expand Down Expand Up @@ -39,6 +40,7 @@ impl ArrayFunction {
Sum => mapper.nested_sum_type(),
ToList => mapper.try_map_dtype(map_array_dtype_to_list_dtype),
Unique(_) => mapper.try_map_dtype(map_array_dtype_to_list_dtype),
NUnique => mapper.with_dtype(IDX_DTYPE),
Std(_) => mapper.map_to_float_dtype(),
Var(_) => mapper.map_to_float_dtype(),
Median => mapper.map_to_float_dtype(),
Expand Down Expand Up @@ -75,6 +77,7 @@ impl Display for ArrayFunction {
Sum => "sum",
ToList => "to_list",
Unique(_) => "unique",
NUnique => "n_unique",
Std(_) => "std",
Var(_) => "var",
Median => "median",
Expand Down Expand Up @@ -107,6 +110,7 @@ impl From<ArrayFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
Sum => map!(sum),
ToList => map!(to_list),
Unique(stable) => map!(unique, stable),
NUnique => map!(n_unique),
Std(ddof) => map!(std, ddof),
Var(ddof) => map!(var, ddof),
Median => map!(median),
Expand Down Expand Up @@ -162,6 +166,10 @@ pub(super) fn unique(s: &Series, stable: bool) -> PolarsResult<Series> {
out.map(|ca| ca.into_series())
}

pub(super) fn n_unique(s: &Series) -> PolarsResult<Series> {
Ok(s.array()?.array_n_unique()?.into_series())
}

pub(super) fn to_list(s: &Series) -> PolarsResult<Series> {
let list_dtype = map_array_dtype_to_list_dtype(s.dtype())?;
s.cast(&list_dtype)
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expressions/array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ The following methods are available under the `expr.arr` attribute.
Expr.arr.std
Expr.arr.to_list
Expr.arr.unique
Expr.arr.n_unique
Expr.arr.var
Expr.arr.all
Expr.arr.any
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series/array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ The following methods are available under the `Series.arr` attribute.
Series.arr.std
Series.arr.to_list
Series.arr.unique
Series.arr.n_unique
Series.arr.var
Series.arr.all
Series.arr.any
Expand Down
25 changes: 25 additions & 0 deletions py-polars/polars/expr/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,31 @@ def unique(self, *, maintain_order: bool = False) -> Expr:
"""
return wrap_expr(self._pyexpr.arr_unique(maintain_order))

def n_unique(self) -> Expr:
"""
Count the number of unique values in every sub-arrays.
Examples
--------
>>> df = pl.DataFrame(
... {
... "a": [[1, 1, 2], [2, 3, 4]],
... },
... schema={"a": pl.Array(pl.Int64, 3)},
... )
>>> df.with_columns(n_unique=pl.col("a").arr.n_unique())
shape: (2, 2)
┌───────────────┬──────────┐
│ a ┆ n_unique │
│ --- ┆ --- │
│ array[i64, 3] ┆ u32 │
╞═══════════════╪══════════╡
│ [1, 1, 2] ┆ 2 │
│ [2, 3, 4] ┆ 3 │
└───────────────┴──────────┘
"""
return wrap_expr(self._pyexpr.arr_n_unique())

def to_list(self) -> Expr:
"""
Convert an Array column into a List column with the same inner data type.
Expand Down
16 changes: 16 additions & 0 deletions py-polars/polars/series/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,22 @@ def unique(self, *, maintain_order: bool = False) -> Series:
└───────────┘
"""

def n_unique(self) -> Series:
"""
Count the number of unique values in every sub-arrays.
Examples
--------
>>> s = pl.Series("a", [[1, 2], [4, 4]], dtype=pl.Array(pl.Int64, 2))
>>> s.arr.n_unique()
shape: (2,)
Series: 'a' [u32]
[
2
1
]
"""

def to_list(self) -> Series:
"""
Convert an Array column into a List column with the same inner data type.
Expand Down
4 changes: 4 additions & 0 deletions py-polars/src/expr/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ impl PyExpr {
}
}

fn arr_n_unique(&self) -> Self {
self.inner.clone().arr().n_unique().into()
}

fn arr_to_list(&self) -> Self {
self.inner.clone().arr().to_list().into()
}
Expand Down
15 changes: 15 additions & 0 deletions py-polars/tests/unit/namespaces/array/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,3 +382,18 @@ def test_array_shift() -> None:
schema={"lit": pl.Array(pl.Int64, 3), "expr": pl.Array(pl.Int64, 3)},
)
assert_frame_equal(out, expected)


def test_array_n_unique() -> None:
df = pl.DataFrame(
{
"a": [[1, 1, 2], [3, 3, 3], [None, None, None], None],
},
schema={"a": pl.Array(pl.Int64, 3)},
)

out = df.select(n_unique=pl.col("a").arr.n_unique())
expected = pl.DataFrame(
{"n_unique": [2, 1, 1, None]}, schema={"n_unique": pl.UInt32}
)
assert_frame_equal(out, expected)

0 comments on commit e1bba07

Please sign in to comment.