Skip to content

Commit

Permalink
fix: sort for series with unsupported dtype should raise instead of…
Browse files Browse the repository at this point in the history
… panic (#15385)
  • Loading branch information
reswqa authored Mar 29, 2024
1 parent eba3e76 commit f61594a
Show file tree
Hide file tree
Showing 25 changed files with 83 additions and 56 deletions.
23 changes: 23 additions & 0 deletions crates/polars-core/src/chunked_array/array/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,29 @@ impl ArrayChunked {
.collect_ca_with_dtype(self.name(), self.dtype().clone())
}

/// Try apply a closure `F` to each array.
///
/// # Safety
/// Return series of `F` must has the same dtype and number of elements as input if it is Ok.
pub unsafe fn try_apply_amortized_same_type<'a, F>(&'a self, mut f: F) -> PolarsResult<Self>
where
F: FnMut(UnstableSeries<'a>) -> PolarsResult<Series>,
{
if self.is_empty() {
return Ok(self.clone());
}
self.amortized_iter()
.map(|opt_v| {
opt_v
.map(|v| {
let out = f(v)?;
Ok(to_arr(&out))
})
.transpose()
})
.try_collect_ca_with_dtype(self.name(), self.dtype().clone())
}

/// Zip with a `ChunkedArray` then apply a binary function `F` elementwise.
///
/// # Safety
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1843,7 +1843,7 @@ impl DataFrame {
// no need to compute the sort indices and then take by these indices
// simply sort and return as frame
if df.width() == 1 && df.check_name_to_idx(s.name()).is_ok() {
let mut out = s.sort_with(options);
let mut out = s.sort_with(options)?;
if let Some((offset, len)) = slice {
out = out.slice(offset, len);
}
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/series/implementations/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,8 @@ impl SeriesTrait for SeriesWrap<BinaryChunked> {
self.0.get_any_value_unchecked(index)
}

fn sort_with(&self, options: SortOptions) -> Series {
ChunkSort::sort_with(&self.0, options).into_series()
fn sort_with(&self, options: SortOptions) -> PolarsResult<Series> {
Ok(ChunkSort::sort_with(&self.0, options).into_series())
}

fn arg_sort(&self, options: SortOptions) -> IdxCa {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ impl SeriesTrait for SeriesWrap<BinaryOffsetChunked> {
self.0.get_any_value_unchecked(index)
}

fn sort_with(&self, options: SortOptions) -> Series {
ChunkSort::sort_with(&self.0, options).into_series()
fn sort_with(&self, options: SortOptions) -> PolarsResult<Series> {
Ok(ChunkSort::sort_with(&self.0, options).into_series())
}

fn arg_sort(&self, options: SortOptions) -> IdxCa {
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/series/implementations/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ impl SeriesTrait for SeriesWrap<BooleanChunked> {
self.0.get_any_value_unchecked(index)
}

fn sort_with(&self, options: SortOptions) -> Series {
ChunkSort::sort_with(&self.0, options).into_series()
fn sort_with(&self, options: SortOptions) -> PolarsResult<Series> {
Ok(ChunkSort::sort_with(&self.0, options).into_series())
}

fn arg_sort(&self, options: SortOptions) -> IdxCa {
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/series/implementations/categorical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,8 @@ impl SeriesTrait for SeriesWrap<CategoricalChunked> {
self.0.get_any_value_unchecked(index)
}

fn sort_with(&self, options: SortOptions) -> Series {
self.0.sort_with(options).into_series()
fn sort_with(&self, options: SortOptions) -> PolarsResult<Series> {
Ok(self.0.sort_with(options).into_series())
}

fn arg_sort(&self, options: SortOptions) -> IdxCa {
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/series/implementations/dates_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,8 @@ macro_rules! impl_dyn_series {
self.0.get_any_value_unchecked(index)
}

fn sort_with(&self, options: SortOptions) -> Series {
self.0.sort_with(options).$into_logical().into_series()
fn sort_with(&self, options: SortOptions) -> PolarsResult<Series> {
Ok(self.0.sort_with(options).$into_logical().into_series())
}

fn arg_sort(&self, options: SortOptions) -> IdxCa {
Expand Down
7 changes: 4 additions & 3 deletions crates/polars-core/src/series/implementations/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,11 +266,12 @@ impl SeriesTrait for SeriesWrap<DatetimeChunked> {
self.0.get_any_value_unchecked(index)
}

fn sort_with(&self, options: SortOptions) -> Series {
self.0
fn sort_with(&self, options: SortOptions) -> PolarsResult<Series> {
Ok(self
.0
.sort_with(options)
.into_datetime(self.0.time_unit(), self.0.time_zone().clone())
.into_series()
.into_series())
}

fn arg_sort(&self, options: SortOptions) -> IdxCa {
Expand Down
7 changes: 4 additions & 3 deletions crates/polars-core/src/series/implementations/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,11 +261,12 @@ impl SeriesTrait for SeriesWrap<DecimalChunked> {
self.0.get_any_value_unchecked(index)
}

fn sort_with(&self, options: SortOptions) -> Series {
self.0
fn sort_with(&self, options: SortOptions) -> PolarsResult<Series> {
Ok(self
.0
.sort_with(options)
.into_decimal_unchecked(self.0.precision(), self.0.scale())
.into_series()
.into_series())
}

fn arg_sort(&self, options: SortOptions) -> IdxCa {
Expand Down
7 changes: 4 additions & 3 deletions crates/polars-core/src/series/implementations/duration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,11 +326,12 @@ impl SeriesTrait for SeriesWrap<DurationChunked> {
self.0.get_any_value_unchecked(index)
}

fn sort_with(&self, options: SortOptions) -> Series {
self.0
fn sort_with(&self, options: SortOptions) -> PolarsResult<Series> {
Ok(self
.0
.sort_with(options)
.into_duration(self.0.time_unit())
.into_series()
.into_series())
}

fn arg_sort(&self, options: SortOptions) -> IdxCa {
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/series/implementations/floats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,8 @@ macro_rules! impl_dyn_series {
self.0.get_any_value_unchecked(index)
}

fn sort_with(&self, options: SortOptions) -> Series {
ChunkSort::sort_with(&self.0, options).into_series()
fn sort_with(&self, options: SortOptions) -> PolarsResult<Series> {
Ok(ChunkSort::sort_with(&self.0, options).into_series())
}

fn arg_sort(&self, options: SortOptions) -> IdxCa {
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/series/implementations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,8 @@ macro_rules! impl_dyn_series {
self.0.get_any_value_unchecked(index)
}

fn sort_with(&self, options: SortOptions) -> Series {
ChunkSort::sort_with(&self.0, options).into_series()
fn sort_with(&self, options: SortOptions) -> PolarsResult<Series> {
Ok(ChunkSort::sort_with(&self.0, options).into_series())
}

fn arg_sort(&self, options: SortOptions) -> IdxCa {
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/series/implementations/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ impl SeriesTrait for SeriesWrap<StringChunked> {
self.0.get_any_value_unchecked(index)
}

fn sort_with(&self, options: SortOptions) -> Series {
ChunkSort::sort_with(&self.0, options).into_series()
fn sort_with(&self, options: SortOptions) -> PolarsResult<Series> {
Ok(ChunkSort::sort_with(&self.0, options).into_series())
}

fn arg_sort(&self, options: SortOptions) -> IdxCa {
Expand Down
22 changes: 10 additions & 12 deletions crates/polars-core/src/series/implementations/struct_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -313,25 +313,23 @@ impl SeriesTrait for SeriesWrap<StructChunked> {
&self.0
}

fn sort_with(&self, options: SortOptions) -> Series {
fn sort_with(&self, options: SortOptions) -> PolarsResult<Series> {
let df = self.0.clone().unnest();

let desc = if options.descending {
vec![true; df.width()]
} else {
vec![false; df.width()]
};
let out = df
.sort_impl(
df.columns.clone(),
desc,
options.nulls_last,
options.maintain_order,
None,
options.multithreaded,
)
.unwrap();
StructChunked::new_unchecked(self.name(), &out.columns).into_series()
let out = df.sort_impl(
df.columns.clone(),
desc,
options.nulls_last,
options.maintain_order,
None,
options.multithreaded,
)?;
Ok(StructChunked::new_unchecked(self.name(), &out.columns).into_series())
}

fn arg_sort(&self, options: SortOptions) -> IdxCa {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ impl Series {
Ok(self)
}

pub fn sort(&self, descending: bool, nulls_last: bool) -> Self {
pub fn sort(&self, descending: bool, nulls_last: bool) -> PolarsResult<Self> {
self.sort_with(SortOptions {
descending,
nulls_last,
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/series/series_trait.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,8 @@ pub trait SeriesTrait:
invalid_operation_panic!(get_unchecked, self)
}

fn sort_with(&self, _options: SortOptions) -> Series {
invalid_operation_panic!(sort_with, self)
fn sort_with(&self, _options: SortOptions) -> PolarsResult<Series> {
polars_bail!(opq = sort_with, self._dtype());
}

/// Retrieve the indexes needed for a sort.
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-lazy/src/physical_plan/expressions/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl PhysicalExpr for SortExpr {
}
fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
let series = self.physical_expr.evaluate(df, state)?;
Ok(series.sort_with(self.options))
series.sort_with(self.options)
}

#[allow(clippy::ptr_arg)]
Expand All @@ -65,7 +65,7 @@ impl PhysicalExpr for SortExpr {
match ac.agg_state() {
AggState::AggregatedList(s) => {
let ca = s.list().unwrap();
let out = ca.lst_sort(self.options);
let out = ca.lst_sort(self.options)?;
ac.with_series(out.into_series(), true, Some(&self.expr))?;
},
_ => {
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-ops/src/chunked_array/array/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,10 @@ pub trait ArrayNameSpace: AsArray {
array_all(ca)
}

fn array_sort(&self, options: SortOptions) -> ArrayChunked {
fn array_sort(&self, options: SortOptions) -> PolarsResult<ArrayChunked> {
let ca = self.as_array();
// SAFETY: Sort only changes the order of the elements in each subarray.
unsafe { ca.apply_amortized_same_type(|s| s.as_ref().sort_with(options)) }
unsafe { ca.try_apply_amortized_same_type(|s| s.as_ref().sort_with(options)) }
}

fn array_reverse(&self) -> ArrayChunked {
Expand Down
7 changes: 3 additions & 4 deletions crates/polars-ops/src/chunked_array/list/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,10 @@ pub trait ListNameSpaceImpl: AsList {
}
}

#[must_use]
fn lst_sort(&self, options: SortOptions) -> ListChunked {
fn lst_sort(&self, options: SortOptions) -> PolarsResult<ListChunked> {
let ca = self.as_list();
let out = ca.apply_amortized(|s| s.as_ref().sort_with(options));
self.same_type(out)
let out = ca.try_apply_amortized(|s| s.as_ref().sort_with(options))?;
Ok(self.same_type(out))
}

#[must_use]
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/series/ops/cut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ pub fn qcut(
include_breaks: bool,
) -> PolarsResult<Series> {
let s = s.cast(&DataType::Float64)?;
let s2 = s.sort(false, false);
let s2 = s.sort(false, false)?;
let ca = s2.f64()?;

if ca.null_count() == ca.len() {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-pipe/src/executors/sinks/sort/sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ impl Sink for SortSink {
nulls_last: self.sort_args.nulls_last,
multithreaded: true,
maintain_order: self.sort_args.maintain_order,
});
})?;

let instant = self.ooc_start.unwrap();
if context.verbose {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dsl/function_expr/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ pub(super) fn all(s: &Series) -> PolarsResult<Series> {
}

pub(super) fn sort(s: &Series, options: SortOptions) -> PolarsResult<Series> {
Ok(s.array()?.array_sort(options).into_series())
Ok(s.array()?.array_sort(options)?.into_series())
}

pub(super) fn reverse(s: &Series) -> PolarsResult<Series> {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dsl/function_expr/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ pub(super) fn diff(s: &Series, n: i64, null_behavior: NullBehavior) -> PolarsRes
}

pub(super) fn sort(s: &Series, options: SortOptions) -> PolarsResult<Series> {
Ok(s.list()?.lst_sort(options).into_series())
Ok(s.list()?.lst_sort(options)?.into_series())
}

pub(super) fn reverse(s: &Series) -> PolarsResult<Series> {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-sql/tests/iss_8395.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ fn iss_8395() -> PolarsResult<()> {
let df = res.collect()?;

// assert that the df only contains [vegetables, seafood]
let s = df.column("category")?.unique()?.sort(false, false);
let s = df.column("category")?.unique()?.sort(false, false)?;
let expected = Series::new("category", &["seafood", "vegetables"]);
assert!(s.equals(&expected));
Ok(())
Expand Down
8 changes: 6 additions & 2 deletions py-polars/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,12 @@ impl PySeries {
}
}

fn sort(&mut self, descending: bool, nulls_last: bool) -> Self {
self.series.sort(descending, nulls_last).into()
fn sort(&mut self, descending: bool, nulls_last: bool) -> PyResult<Self> {
Ok(self
.series
.sort(descending, nulls_last)
.map_err(PyPolarsErr::from)?
.into())
}

fn take_with_series(&self, indices: &PySeries) -> PyResult<Self> {
Expand Down

0 comments on commit f61594a

Please sign in to comment.