Skip to content

Commit beedbd7

Browse files
authored
feat: is_sorted vtable (#2591)
Introduces a new new `IsSorted` vtable. Also fixes some issues with not setting some stats when short circuting their computation based on other facts.
1 parent 5959012 commit beedbd7

File tree

31 files changed

+564
-139
lines changed

31 files changed

+564
-139
lines changed
+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
use vortex_array::compute::{IsSortedFn, is_sorted, is_strict_sorted};
2+
use vortex_error::VortexResult;
3+
4+
use crate::{DictArray, DictEncoding};
5+
6+
impl IsSortedFn<&DictArray> for DictEncoding {
7+
fn is_sorted(&self, array: &DictArray) -> VortexResult<bool> {
8+
let is_sorted = is_sorted(array.values())? && is_sorted(array.codes())?;
9+
Ok(is_sorted)
10+
}
11+
12+
fn is_strict_sorted(&self, array: &DictArray) -> VortexResult<bool> {
13+
let is_sorted = is_strict_sorted(array.values())? && is_strict_sorted(array.codes())?;
14+
Ok(is_sorted)
15+
}
16+
}

encodings/dict/src/compute/mod.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
mod binary_numeric;
22
mod compare;
33
mod is_constant;
4+
mod is_sorted;
45
mod like;
56

67
use vortex_array::compute::{
7-
BinaryNumericFn, CompareFn, FilterFn, IsConstantFn, LikeFn, ScalarAtFn, SliceFn, TakeFn,
8-
filter, scalar_at, slice, take,
8+
BinaryNumericFn, CompareFn, FilterFn, IsConstantFn, IsSortedFn, LikeFn, ScalarAtFn, SliceFn,
9+
TakeFn, filter, scalar_at, slice, take,
910
};
1011
use vortex_array::vtable::ComputeVTable;
1112
use vortex_array::{Array, ArrayRef};
@@ -28,6 +29,10 @@ impl ComputeVTable for DictEncoding {
2829
Some(self)
2930
}
3031

32+
fn is_sorted_fn(&self) -> Option<&dyn IsSortedFn<&dyn Array>> {
33+
Some(self)
34+
}
35+
3136
fn filter_fn(&self) -> Option<&dyn FilterFn<&dyn Array>> {
3237
Some(self)
3338
}

encodings/dict/src/stats.rs

-31
Original file line numberDiff line numberDiff line change
@@ -20,44 +20,13 @@ impl StatisticsVTable<&DictArray> for DictEncoding {
2020
stats.set(Stat::Max, Precision::exact(max));
2121
}
2222
}
23-
Stat::IsConstant => {
24-
if let Some(is_constant) = array.codes().statistics().compute_is_constant() {
25-
stats.set(Stat::IsConstant, Precision::exact(is_constant));
26-
}
27-
}
2823
Stat::NullCount => {
2924
if let Some(null_count) =
3025
array.codes().statistics().compute_stat(Stat::NullCount)?
3126
{
3227
stats.set(Stat::NullCount, Precision::exact(null_count));
3328
}
3429
}
35-
Stat::IsSorted | Stat::IsStrictSorted => {
36-
// if dictionary is sorted
37-
if array
38-
.values()
39-
.statistics()
40-
.compute_is_sorted()
41-
.unwrap_or(false)
42-
{
43-
if let Some(codes_are_sorted) =
44-
array.codes().statistics().compute_stat(Stat::IsSorted)?
45-
{
46-
stats.set(Stat::IsSorted, Precision::exact(codes_are_sorted));
47-
}
48-
49-
if let Some(codes_are_strict_sorted) = array
50-
.codes()
51-
.statistics()
52-
.compute_stat(Stat::IsStrictSorted)?
53-
{
54-
stats.set(
55-
Stat::IsStrictSorted,
56-
Precision::exact(codes_are_strict_sorted),
57-
);
58-
}
59-
}
60-
}
6130
_ => {}
6231
}
6332

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
use vortex_array::Array;
2+
use vortex_array::compute::{IsSortedFn, is_sorted, is_strict_sorted};
3+
4+
use crate::{RunEndArray, RunEndEncoding};
5+
6+
impl IsSortedFn<&RunEndArray> for RunEndEncoding {
7+
fn is_sorted(&self, array: &RunEndArray) -> vortex_error::VortexResult<bool> {
8+
is_sorted(array.values())
9+
}
10+
11+
fn is_strict_sorted(&self, array: &RunEndArray) -> vortex_error::VortexResult<bool> {
12+
is_strict_sorted(array.to_canonical()?.as_ref())
13+
}
14+
}

encodings/runend/src/compute/mod.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,16 @@ mod compare;
33
mod fill_null;
44
pub(crate) mod filter;
55
mod invert;
6+
mod is_sorted;
67
mod scalar_at;
78
mod slice;
89
pub(crate) mod take;
910
mod take_from;
1011

1112
use vortex_array::Array;
1213
use vortex_array::compute::{
13-
BinaryNumericFn, CompareFn, FillNullFn, FilterFn, InvertFn, ScalarAtFn, SliceFn, TakeFn,
14-
TakeFromFn,
14+
BinaryNumericFn, CompareFn, FillNullFn, FilterFn, InvertFn, IsSortedFn, ScalarAtFn, SliceFn,
15+
TakeFn, TakeFromFn,
1516
};
1617
use vortex_array::vtable::ComputeVTable;
1718

@@ -38,6 +39,10 @@ impl ComputeVTable for RunEndEncoding {
3839
Some(self)
3940
}
4041

42+
fn is_sorted_fn(&self) -> Option<&dyn IsSortedFn<&dyn Array>> {
43+
Some(self)
44+
}
45+
4146
fn scalar_at_fn(&self) -> Option<&dyn ScalarAtFn<&dyn Array>> {
4247
Some(self)
4348
}

encodings/runend/src/statistics.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ mod tests {
193193
);
194194
assert!(!arr.statistics().compute_as::<bool>(Stat::IsSorted).unwrap());
195195

196+
// Slicing down to [null, false, false]
196197
let sliced = slice(&arr, 4, 7).unwrap();
197198

198199
assert!(!sliced.statistics().compute_as::<bool>(Stat::Min).unwrap());
@@ -204,9 +205,9 @@ mod tests {
204205
.unwrap(),
205206
1
206207
);
207-
// Not sorted because null must come last
208+
208209
assert!(
209-
!sliced
210+
sliced
210211
.statistics()
211212
.compute_as::<bool>(Stat::IsSorted)
212213
.unwrap()

vortex-array/src/array/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,11 @@ pub trait ArrayExt: Array {
249249
fn as_opt<A: Array + 'static>(&self) -> Option<&A> {
250250
self.as_any().downcast_ref::<A>()
251251
}
252+
253+
/// Is self an array with encoding `A`.
254+
fn is<A: Array + 'static>(&self) -> bool {
255+
self.as_opt::<A>().is_some()
256+
}
252257
}
253258

254259
impl<A: Array + ?Sized> ArrayExt for A {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
use vortex_error::VortexResult;
2+
use vortex_mask::Mask;
3+
4+
use crate::Array;
5+
use crate::arrays::{BoolArray, BoolEncoding};
6+
use crate::compute::{IsSortedFn, IsSortedIteratorExt};
7+
8+
impl IsSortedFn<&BoolArray> for BoolEncoding {
9+
fn is_sorted(&self, array: &BoolArray) -> VortexResult<bool> {
10+
match array.validity_mask()? {
11+
Mask::AllFalse(_) => Ok(true),
12+
Mask::AllTrue(_) => Ok(array.boolean_buffer().iter().is_sorted()),
13+
Mask::Values(mask_values) => {
14+
let set_indices = mask_values.boolean_buffer().set_indices();
15+
let values = array.boolean_buffer();
16+
let values_iter = set_indices.map(|idx|
17+
// Safety:
18+
// All idxs are in-bounds for the array.
19+
unsafe {
20+
values.value_unchecked(idx)
21+
});
22+
23+
Ok(values_iter.is_sorted())
24+
}
25+
}
26+
}
27+
28+
fn is_strict_sorted(&self, array: &BoolArray) -> VortexResult<bool> {
29+
match array.validity_mask()? {
30+
Mask::AllFalse(_) => Ok(false),
31+
Mask::AllTrue(_) => Ok(array.boolean_buffer().iter().is_strict_sorted()),
32+
Mask::Values(mask_values) => {
33+
let validity_buffer = mask_values.boolean_buffer();
34+
let values = array.boolean_buffer();
35+
36+
Ok(validity_buffer
37+
.iter()
38+
.zip(values.iter())
39+
.map(|(is_valid, value)| is_valid.then_some(value))
40+
.is_strict_sorted())
41+
}
42+
}
43+
}
44+
}

vortex-array/src/arrays/bool/compute/mod.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
use crate::Array;
22
use crate::arrays::BoolEncoding;
33
use crate::compute::{
4-
BinaryBooleanFn, CastFn, FillForwardFn, FillNullFn, FilterFn, InvertFn, IsConstantFn, MaskFn,
5-
MinMaxFn, ScalarAtFn, SliceFn, SumFn, TakeFn, ToArrowFn, UncompressedSizeFn,
4+
BinaryBooleanFn, CastFn, FillForwardFn, FillNullFn, FilterFn, InvertFn, IsConstantFn,
5+
IsSortedFn, MaskFn, MinMaxFn, ScalarAtFn, SliceFn, SumFn, TakeFn, ToArrowFn,
6+
UncompressedSizeFn,
67
};
78
use crate::vtable::ComputeVTable;
89

@@ -13,6 +14,7 @@ pub mod filter;
1314
mod flatten;
1415
mod invert;
1516
mod is_constant;
17+
mod is_sorted;
1618
mod mask;
1719
mod min_max;
1820
mod scalar_at;
@@ -55,6 +57,10 @@ impl ComputeVTable for BoolEncoding {
5557
Some(self)
5658
}
5759

60+
fn is_sorted_fn(&self) -> Option<&dyn IsSortedFn<&dyn Array>> {
61+
Some(self)
62+
}
63+
5864
fn mask_fn(&self) -> Option<&dyn MaskFn<&dyn Array>> {
5965
Some(self)
6066
}

vortex-array/src/arrays/bool/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ impl FromIterator<bool> for BoolArray {
4343
impl FromIterator<Option<bool>> for BoolArray {
4444
fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
4545
let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
46+
4647
Self::new(
4748
buffer,
4849
nulls.map(Validity::from).unwrap_or(Validity::AllValid),

vortex-array/src/arrays/bool/stats.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,13 @@ mod test {
236236
#[test]
237237
fn empty_array() {
238238
let bool_arr = BoolArray::new(BooleanBuffer::new_set(0), Validity::NonNullable);
239-
assert!(bool_arr.statistics().compute_is_strict_sorted().is_none());
240-
assert!(bool_arr.statistics().compute_is_sorted().is_none());
239+
assert!(
240+
bool_arr
241+
.statistics()
242+
.compute_is_strict_sorted()
243+
.is_some_and(|v| v)
244+
);
245+
assert!(bool_arr.statistics().compute_is_sorted().is_some_and(|v| v));
241246
assert!(bool_arr.statistics().compute_is_constant().is_none());
242247
assert!(bool_arr.statistics().compute_min::<bool>().is_none());
243248
assert!(bool_arr.statistics().compute_max::<bool>().is_none());
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
use vortex_error::VortexResult;
2+
3+
use crate::Array;
4+
use crate::arrays::{ChunkedArray, ChunkedEncoding};
5+
use crate::compute::{IsSortedFn, is_sorted, is_strict_sorted, scalar_at};
6+
7+
impl IsSortedFn<&ChunkedArray> for ChunkedEncoding {
8+
fn is_sorted(&self, array: &ChunkedArray) -> VortexResult<bool> {
9+
is_sorted_impl(array, false, is_sorted)
10+
}
11+
12+
fn is_strict_sorted(&self, array: &ChunkedArray) -> VortexResult<bool> {
13+
is_sorted_impl(array, true, is_strict_sorted)
14+
}
15+
}
16+
17+
fn is_sorted_impl(
18+
array: &ChunkedArray,
19+
strict: bool,
20+
reentry_fn: impl Fn(&dyn Array) -> VortexResult<bool>,
21+
) -> VortexResult<bool> {
22+
let mut first_last = Vec::default();
23+
24+
for chunk in array.chunks() {
25+
if chunk.is_empty() {
26+
continue;
27+
}
28+
29+
let first = scalar_at(chunk, 0)?;
30+
let last = scalar_at(chunk, chunk.len() - 1)?;
31+
32+
first_last.push((first, last));
33+
}
34+
35+
let chunk_sorted = first_last
36+
.iter()
37+
.is_sorted_by(|a, b| if strict { a.1 < b.0 } else { a.1 <= b.0 });
38+
39+
if !chunk_sorted {
40+
return Ok(false);
41+
}
42+
43+
for chunk in array.chunks() {
44+
if !reentry_fn(chunk)? {
45+
return Ok(false);
46+
}
47+
}
48+
49+
Ok(true)
50+
}

vortex-array/src/arrays/chunked/compute/mod.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ use crate::arrays::ChunkedEncoding;
55
use crate::arrays::chunked::ChunkedArray;
66
use crate::compute::{
77
BinaryBooleanFn, BinaryNumericFn, CastFn, CompareFn, FillNullFn, FilterFn, InvertFn,
8-
IsConstantFn, MaskFn, MinMaxFn, ScalarAtFn, SliceFn, TakeFn, UncompressedSizeFn, try_cast,
8+
IsConstantFn, IsSortedFn, MaskFn, MinMaxFn, ScalarAtFn, SliceFn, TakeFn, UncompressedSizeFn,
9+
try_cast,
910
};
1011
use crate::vtable::ComputeVTable;
1112
use crate::{Array, ArrayRef};
@@ -17,6 +18,7 @@ mod fill_null;
1718
mod filter;
1819
mod invert;
1920
mod is_constant;
21+
mod is_sorted;
2022
mod mask;
2123
mod min_max;
2224
mod scalar_at;
@@ -58,6 +60,10 @@ impl ComputeVTable for ChunkedEncoding {
5860
Some(self)
5961
}
6062

63+
fn is_sorted_fn(&self) -> Option<&dyn IsSortedFn<&dyn Array>> {
64+
Some(self)
65+
}
66+
6167
fn mask_fn(&self) -> Option<&dyn MaskFn<&dyn Array>> {
6268
Some(self)
6369
}

vortex-array/src/arrays/extension/compute/mod.rs

+17-3
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ use vortex_scalar::Scalar;
88
use crate::arrays::ExtensionEncoding;
99
use crate::arrays::extension::ExtensionArray;
1010
use crate::compute::{
11-
CastFn, CompareFn, FilterFn, IsConstantFn, IsConstantOpts, MinMaxFn, MinMaxResult, ScalarAtFn,
12-
SliceFn, SumFn, TakeFn, ToArrowFn, UncompressedSizeFn, filter, is_constant_opts, min_max,
13-
scalar_at, slice, sum, take, uncompressed_size,
11+
CastFn, CompareFn, FilterFn, IsConstantFn, IsConstantOpts, IsSortedFn, MinMaxFn, MinMaxResult,
12+
ScalarAtFn, SliceFn, SumFn, TakeFn, ToArrowFn, UncompressedSizeFn, filter, is_constant_opts,
13+
is_sorted, is_strict_sorted, min_max, scalar_at, slice, sum, take, uncompressed_size,
1414
};
1515
use crate::variants::ExtensionArrayTrait;
1616
use crate::vtable::ComputeVTable;
@@ -36,6 +36,10 @@ impl ComputeVTable for ExtensionEncoding {
3636
Some(self)
3737
}
3838

39+
fn is_sorted_fn(&self) -> Option<&dyn IsSortedFn<&dyn Array>> {
40+
Some(self)
41+
}
42+
3943
fn scalar_at_fn(&self) -> Option<&dyn ScalarAtFn<&dyn Array>> {
4044
Some(self)
4145
}
@@ -134,3 +138,13 @@ impl UncompressedSizeFn<&ExtensionArray> for ExtensionEncoding {
134138
uncompressed_size(array.storage())
135139
}
136140
}
141+
142+
impl IsSortedFn<&ExtensionArray> for ExtensionEncoding {
143+
fn is_sorted(&self, array: &ExtensionArray) -> VortexResult<bool> {
144+
is_sorted(array.storage())
145+
}
146+
147+
fn is_strict_sorted(&self, array: &ExtensionArray) -> VortexResult<bool> {
148+
is_strict_sorted(array.storage())
149+
}
150+
}

0 commit comments

Comments
 (0)