Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Handle search from left if we are searching for a value between fill and last patch in SparseArray #2609

Merged
merged 5 commits into from
Mar 10, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 87 additions & 22 deletions encodings/sparse/src/compute/search_sorted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ impl SearchSortedFn<&SparseArray> for SparseEncoding {
}
}

// Find the fill position relative to patches, in case of fill being in between patches we want to find the right most
// index of the fill relative to patches.
fn fill_position(array: &SparseArray, side: SearchSortedSide) -> VortexResult<usize> {
// In not found case we need to find the relative position of fill value to the patches
let fill_result = if array.fill_scalar().is_null() {
// For null fill the patches can only ever be after the fill
SearchResult::NotFound(array.patches().min_index()?)
Expand All @@ -72,27 +73,19 @@ fn fill_position(array: &SparseArray, side: SearchSortedSide) -> VortexResult<us
array.len()
} else {
// [patch, fill, ..., fill, patch]
match side {
SearchSortedSide::Left => fill_result_index,
SearchSortedSide::Right => {
// When searching from right we need to find the right most occurrence of our fill value. If fill value
// is present in patches this would be the index of the next value after the fill value
let fill_index = array.patches().search_index(fill_result_index)?.to_index();
if fill_index < array.patches().num_patches() {
// Since we are searching from right the fill_index is the index one after the found one
let next_index =
usize::try_from(&scalar_at(array.patches().indices(), fill_index)?)?;
// fill value is dense with a next patch value we want to return the original fill_index,
// i.e. the fill value cannot exist between fill_index and next_index
if fill_index + 1 == next_index {
fill_index
} else {
next_index
}
} else {
fill_index
}
// If fill value is present in patches this would be the index of the next value after the fill value
let fill_index = array.patches().search_index(fill_result_index)?.to_index();
if fill_index < array.patches().num_patches() {
let next_index = usize::try_from(&scalar_at(array.patches().indices(), fill_index)?)?;
// fill value search result is dense with a next patch value we want to return the original fill_index,
// i.e. the fill value cannot exist between fill_index and next_index
if fill_index + 1 == next_index {
fill_index
} else {
next_index
}
} else {
fill_index
}
})
}
Expand All @@ -114,7 +107,7 @@ impl SearchSortedUsizeFn<&SparseArray> for SparseEncoding {

#[cfg(test)]
mod tests {
use rstest::rstest;
use rstest::{fixture, rstest};
use vortex_array::arrays::PrimitiveArray;
use vortex_array::compute::{SearchResult, SearchSortedSide, search_sorted};
use vortex_array::validity::Validity;
Expand Down Expand Up @@ -213,6 +206,30 @@ mod tests {
.into_array()
}

#[fixture]
fn sparse_edge_patch_high() -> ArrayRef {
SparseArray::try_new(
buffer![0u64, 1, 2, 19].into_array(),
buffer![11i32, 22, 23, 55].into_array(),
20,
Scalar::primitive(33, Nullability::NonNullable),
)
.unwrap()
.into_array()
}

#[fixture]
fn sparse_edge_patch_low() -> ArrayRef {
SparseArray::try_new(
buffer![0u64, 17, 18, 19].into_array(),
buffer![11i32, 33, 44, 55].into_array(),
20,
Scalar::primitive(22, Nullability::NonNullable),
)
.unwrap()
.into_array()
}

#[rstest]
#[case(sparse_high_null_fill(), SearchResult::NotFound(20))]
#[case(sparse_high_non_null_fill(), SearchResult::NotFound(20))]
Expand Down Expand Up @@ -386,4 +403,52 @@ mod tests {
let res = search_sorted(&array, search, SearchSortedSide::Right).unwrap();
assert_eq!(res, expected);
}

#[rstest]
fn search_between_fill_and_patch_high_left(#[from(sparse_edge_patch_high)] array: ArrayRef) {
assert_eq!(
search_sorted(&array, 25, SearchSortedSide::Left).unwrap(),
SearchResult::NotFound(3)
);
assert_eq!(
search_sorted(&array, 44, SearchSortedSide::Left).unwrap(),
SearchResult::NotFound(19)
);
}

#[rstest]
fn search_between_fill_and_patch_high_right(#[from(sparse_edge_patch_high)] array: ArrayRef) {
assert_eq!(
search_sorted(&array, 25, SearchSortedSide::Right).unwrap(),
SearchResult::NotFound(3)
);
assert_eq!(
search_sorted(&array, 44, SearchSortedSide::Right).unwrap(),
SearchResult::NotFound(19)
);
}

#[rstest]
fn search_between_fill_and_patch_low_left(#[from(sparse_edge_patch_low)] array: ArrayRef) {
assert_eq!(
search_sorted(&array, 20, SearchSortedSide::Left).unwrap(),
SearchResult::NotFound(1)
);
assert_eq!(
search_sorted(&array, 28, SearchSortedSide::Left).unwrap(),
SearchResult::NotFound(17)
);
}

#[rstest]
fn search_between_fill_and_patch_low_right(#[from(sparse_edge_patch_low)] array: ArrayRef) {
assert_eq!(
search_sorted(&array, 20, SearchSortedSide::Right).unwrap(),
SearchResult::NotFound(1)
);
assert_eq!(
search_sorted(&array, 28, SearchSortedSide::Right).unwrap(),
SearchResult::NotFound(17)
);
}
}
Loading