Skip to content

Commit

Permalink
Continued work on sketch of expr-based index_of().
Browse files Browse the repository at this point in the history
  • Loading branch information
pythonspeed committed Oct 23, 2024
1 parent 7f02952 commit f3bfe8e
Show file tree
Hide file tree
Showing 11 changed files with 63 additions and 93 deletions.
3 changes: 2 additions & 1 deletion crates/polars-ops/src/series/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ mod rolling;
#[cfg(feature = "round_series")]
mod round;
// TODO add a feature?
mod search;
mod index_of;
#[cfg(feature = "search_sorted")]
mod search_sorted;
#[cfg(feature = "to_dummies")]
Expand Down Expand Up @@ -124,6 +124,7 @@ pub use rle::*;
pub use rolling::*;
#[cfg(feature = "round_series")]
pub use round::*;
pub use index_of::*;
#[cfg(feature = "search_sorted")]
pub use search_sorted::*;
#[cfg(feature = "to_dummies")]
Expand Down
77 changes: 0 additions & 77 deletions crates/polars-ops/src/series/ops/search.rs

This file was deleted.

16 changes: 16 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/index_of.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
use polars_ops::series::index_of as index_of_op;
use super::*;

pub(super) fn index_of(s: &mut [Column]) -> PolarsResult<Option<Column>> {
let series = s[0].as_materialized_series();
let value = s[1].as_materialized_series();
if value.len() != 1 {
polars_bail!(
ComputeError:
"there can only be a single value searched for in `index_of` expressions, but {} values were give",
value.len(),
);
}
let result = index_of_op(series, value)?;
Ok(result.map(|r| Column::new(series.name().clone(), [r as IdxSize])))
}
7 changes: 7 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ mod round;
#[cfg(feature = "row_hash")]
mod row_hash;
pub(super) mod schema;
mod index_of;
#[cfg(feature = "search_sorted")]
mod search_sorted;
mod shift_and_fill;
Expand Down Expand Up @@ -154,6 +155,7 @@ pub enum FunctionExpr {
Hash(u64, u64, u64, u64),
#[cfg(feature = "arg_where")]
ArgWhere,
IndexOf,
#[cfg(feature = "search_sorted")]
SearchSorted(SearchSortedSide),
#[cfg(feature = "range")]
Expand Down Expand Up @@ -392,6 +394,7 @@ impl Hash for FunctionExpr {
#[cfg(feature = "business")]
Business(f) => f.hash(state),
Pow(f) => f.hash(state),
IndexOf => {},
#[cfg(feature = "search_sorted")]
SearchSorted(f) => f.hash(state),
#[cfg(feature = "random")]
Expand Down Expand Up @@ -629,6 +632,7 @@ impl Display for FunctionExpr {
Hash(_, _, _, _) => "hash",
#[cfg(feature = "arg_where")]
ArgWhere => "arg_where",
IndexOf => "index_of",
#[cfg(feature = "search_sorted")]
SearchSorted(_) => "search_sorted",
#[cfg(feature = "range")]
Expand Down Expand Up @@ -918,6 +922,9 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn ColumnsUdf>> {
ArgWhere => {
wrap!(arg_where::arg_where)
},
IndexOf => {
wrap!(index_of::index_of)
}
#[cfg(feature = "search_sorted")]
SearchSorted(side) => {
map_as_slice!(search_sorted::search_sorted_impl, side)
Expand Down
1 change: 1 addition & 0 deletions crates/polars-plan/src/dsl/function_expr/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ impl FunctionExpr {
Hash(..) => mapper.with_dtype(DataType::UInt64),
#[cfg(feature = "arg_where")]
ArgWhere => mapper.with_dtype(IDX_DTYPE),
IndexOf => mapper.with_dtype(IDX_DTYPE),
#[cfg(feature = "search_sorted")]
SearchSorted(_) => mapper.with_dtype(IDX_DTYPE),
#[cfg(feature = "range")]
Expand Down
18 changes: 18 additions & 0 deletions crates/polars-plan/src/dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,24 @@ impl Expr {
)
}

/// Find the index of a value.
pub fn index_of<E: Into<Expr>>(self, element: E) -> Expr {
let element = element.into();
Expr::Function {
input: vec![self, element],
function: FunctionExpr::IndexOf,
options: FunctionOptions {
// TODO which ApplyOptions, if any?
//collect_groups: ApplyOptions::GroupWise,
flags: FunctionFlags::default() | FunctionFlags::RETURNS_SCALAR,
fmt_str: "index_of",
// TODO can we rely on casting here instead of doing it in the
// function?
..Default::default()
},
}
}

#[cfg(feature = "search_sorted")]
/// Find indices where elements should be inserted to maintain order.
pub fn search_sorted<E: Into<Expr>>(self, element: E, side: SearchSortedSide) -> Expr {
Expand Down
5 changes: 5 additions & 0 deletions crates/polars-python/src/expr/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,13 +315,18 @@ impl PyExpr {
self.inner.clone().arg_min().into()
}

fn index_of(&self, element: Self) -> Self {
self.inner.clone().index_of(element.inner).into()
}

#[cfg(feature = "search_sorted")]
fn search_sorted(&self, element: Self, side: Wrap<SearchSortedSide>) -> Self {
self.inner
.clone()
.search_sorted(element.inner, side.0)
.into()
}

fn gather(&self, idx: Self) -> Self {
self.inner.clone().gather(idx.inner).into()
}
Expand Down
1 change: 1 addition & 0 deletions crates/polars-python/src/lazyframe/visitor/expr_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,7 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
("hash", seed, seed_1, seed_2, seed_3).to_object(py)
},
FunctionExpr::ArgWhere => ("argwhere",).to_object(py),
FunctionExpr::IndexOf => ("index_of",).to_object(py),
#[cfg(feature = "search_sorted")]
FunctionExpr::SearchSorted(side) => (
"search_sorted",
Expand Down
1 change: 0 additions & 1 deletion crates/polars-python/src/series/scatter.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use polars::export::arrow::array::Array;
use polars::prelude::*;
use polars_core::downcast_as_macro_arg_physical;
use pyo3::prelude::*;

use super::PySeries;
Expand Down
7 changes: 7 additions & 0 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2303,6 +2303,13 @@ def arg_min(self) -> Expr:
"""
return self._from_pyexpr(self._pyexpr.arg_min())

def index_of(self, element: IntoExpr | np.ndarray[Any, Any]) -> Expr:
"""
TODO
"""
element = parse_into_expression(element, str_as_lit=True, list_as_series=True) # type: ignore[arg-type]
return self._from_pyexpr(self._pyexpr.index_of(element))

def search_sorted(
self, element: IntoExpr | np.ndarray[Any, Any], side: SearchSortedSide = "any"
) -> Expr:
Expand Down
20 changes: 6 additions & 14 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4732,32 +4732,24 @@ def scatter(
self._s.scatter(indices._s, values._s)
return self

def index_of(
self,
value: Series | Iterable[PythonLiteral] | PythonLiteral | None,
) -> int | None:
def index_of(self, element) -> int | None:
"""
Get the first index of a value, or ``None`` if it's not found.
Parameters
----------
value
element
Value to find.
Examples
--------
TODO
"""
if isinstance(value, Series):
# Searching for lists or arrays:
value = value.implode()
df = F.select(F.lit(self).index_of(element))
if isinstance(element, (list, Series, pl.Expr, np.ndarray)):
return df.to_series()
else:
value = Series(values=[value])

if isinstance(self.dtype, Array):
value = value.cast(Array(self.dtype.inner, len(value[0])))

return self._s.index_of(value._s)
return df.item()

def clear(self, n: int = 0) -> Series:
"""
Expand Down

0 comments on commit f3bfe8e

Please sign in to comment.