From 571e85e7acd44daf2ad243b35606f04f27dc9d4f Mon Sep 17 00:00:00 2001 From: Li0k Date: Mon, 16 Dec 2024 19:55:53 +0800 Subject: [PATCH 1/8] feat(batch): Supports cut or condition and push down to storage --- src/common/src/util/scan_range.rs | 520 ++++++++++++++++++ .../tests/testdata/output/range_scan.yaml | 2 +- src/frontend/src/utils/condition.rs | 122 +++- 3 files changed, 634 insertions(+), 10 deletions(-) diff --git a/src/common/src/util/scan_range.rs b/src/common/src/util/scan_range.rs index b45db071be3cc..464a754dd09f8 100644 --- a/src/common/src/util/scan_range.rs +++ b/src/common/src/util/scan_range.rs @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Ordering; use std::ops::{Bound, RangeBounds}; use paste::paste; use risingwave_pb::batch_plan::scan_range::Bound as PbBound; use risingwave_pb::batch_plan::ScanRange as PbScanRange; +use super::sort_util::{cmp_rows, OrderType}; use crate::hash::table_distribution::TableDistribution; use crate::hash::VirtualNode; use crate::types::{Datum, ScalarImpl}; @@ -98,6 +100,126 @@ impl ScanRange { range: full_range(), } } + + pub fn covert_to_range(&self) -> (Bound>, Bound>) { + fn handle_bound(eq_conds: &Vec, bound: &Bound>) -> Bound> { + match bound { + Bound::Included(literal) => { + let mut prefix = eq_conds.clone(); + prefix.extend_from_slice(literal); + Bound::Included(prefix) + } + Bound::Excluded(literal) => { + let mut prefix = eq_conds.clone(); + prefix.extend_from_slice(literal); + Bound::Excluded(prefix) + } + Bound::Unbounded => { + if eq_conds.is_empty() { + Bound::Unbounded + } else { + Bound::Included(eq_conds.clone()) + } + } + } + } + + let new_left = handle_bound(&self.eq_conds, &self.range.0); + let new_right = handle_bound(&self.eq_conds, &self.range.1); + (new_left, new_right) + } + + pub fn is_overlap(left: &ScanRange, right: &ScanRange, order_types: &[OrderType]) -> bool { + let range_left = left.covert_to_range(); + let range_right = right.covert_to_range(); + + if range_left.0 == Bound::Unbounded + || range_right.0 == Bound::Unbounded + || range_left.1 == Bound::Unbounded + || range_right.1 == Bound::Unbounded + { + return true; + } + + Self::range_overlap_check(range_left, range_right, order_types) + } + + fn range_overlap_check( + left: (Bound>, Bound>), + right: (Bound>, Bound>), + order_types: &[OrderType], + ) -> bool { + let (left_start, left_end) = &left; + let (right_start, right_end) = &right; + + let left_start_vec = match &left_start { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + let right_start_vec = match &right_start { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + + assert!(!left_start_vec.is_empty()); + assert!(!right_start_vec.is_empty()); + + let cmp_column_len = left_start_vec.len().min(right_start_vec.len()); + + let cmp_start = cmp_rows( + &left_start_vec[0..cmp_column_len], + &right_start_vec[0..cmp_column_len], + &order_types[0..cmp_column_len], + ); + + let right_start_before_left_start = cmp_start.is_gt(); + + if right_start_before_left_start { + return Self::range_overlap_check(right, left, order_types); + } + + if cmp_start == Ordering::Equal + && let (Bound::Included(_), Bound::Included(_)) = (left_start, right_start) + { + return true; + } + + let left_end_vec = match &left_end { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + let right_end_vec = match &right_end { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + + assert!(!left_end_vec.is_empty()); + assert!(!right_end_vec.is_empty()); + + let cmp_end = cmp_rows( + &left_end_vec[0..cmp_column_len], + &right_start_vec[0..cmp_column_len], + &order_types[0..cmp_column_len], + ); + + match cmp_end { + Ordering::Equal => { + if let (Bound::Included(_), Bound::Included(_)) = (left_end, right_start) { + return true; + } + } + + Ordering::Greater => { + return true; + } + + Ordering::Less => { + return false; + } + } + + false + } } pub const fn full_range() -> (Bound, Bound) { @@ -221,4 +343,402 @@ mod tests { assert_eq!(scan_range.try_compute_vnode(&dist), Some(vnode)); } + + #[test] + fn test_covert_to_range() { + { + // test empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![], + range: ( + Bound::Included(vec![Some(ScalarImpl::from(1))]), + Bound::Included(vec![Some(ScalarImpl::from(2))]), + ), + }; + + let (left, right) = scan_range.covert_to_range(); + assert_eq!(left, Bound::Included(vec![Some(ScalarImpl::from(1))])); + assert_eq!(right, Bound::Included(vec![Some(ScalarImpl::from(2))])); + } + + { + // test exclude bound with empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![], + range: ( + Bound::Excluded(vec![Some(ScalarImpl::from(1))]), + Bound::Excluded(vec![Some(ScalarImpl::from(2))]), + ), + }; + + let (left, right) = scan_range.covert_to_range(); + assert_eq!(left, Bound::Excluded(vec![Some(ScalarImpl::from(1))])); + assert_eq!(right, Bound::Excluded(vec![Some(ScalarImpl::from(2))])); + } + + { + // test include bound with empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![], + range: ( + Bound::Included(vec![Some(ScalarImpl::from(1))]), + Bound::Unbounded, + ), + }; + + let (left, right) = scan_range.covert_to_range(); + assert_eq!(left, Bound::Included(vec![Some(ScalarImpl::from(1))])); + assert_eq!(right, Bound::Unbounded); + } + + { + // test exclude bound with non-empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![Some(ScalarImpl::from(1))], + range: ( + Bound::Excluded(vec![Some(ScalarImpl::from(2))]), + Bound::Excluded(vec![Some(ScalarImpl::from(3))]), + ), + }; + + let (left, right) = scan_range.covert_to_range(); + assert_eq!( + left, + Bound::Excluded(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(2))]) + ); + assert_eq!( + right, + Bound::Excluded(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(3))]) + ); + } + + { + // test include bound with non-empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![Some(ScalarImpl::from(1))], + range: ( + Bound::Included(vec![Some(ScalarImpl::from(2))]), + Bound::Included(vec![Some(ScalarImpl::from(3))]), + ), + }; + + let (left, right) = scan_range.covert_to_range(); + assert_eq!( + left, + Bound::Included(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(2))]) + ); + assert_eq!( + right, + Bound::Included(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(3))]) + ); + } + + { + let scan_range = ScanRange { + eq_conds: vec![Some(ScalarImpl::from(1))], + range: ( + Bound::Included(vec![Some(ScalarImpl::from(2))]), + Bound::Unbounded, + ), + }; + + let (left, right) = scan_range.covert_to_range(); + assert_eq!( + left, + Bound::Included(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(2))]) + ); + assert_eq!(right, Bound::Included(vec![Some(ScalarImpl::from(1))])); + } + } + + #[test] + fn test_range_overlap_check() { + let order_types = vec![OrderType::ascending()]; + + // (Included, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Excluded, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Excluded, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Excluded) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Excluded) vs (Included, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Excluded) vs (Excluded, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Excluded) vs (Excluded, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Included) vs (Included, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Included) vs (Excluded, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Included) vs (Excluded, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Excluded) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Excluded) vs (Included, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Excluded) vs (Excluded, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Excluded) vs (Excluded, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3)), Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7)), Some(ScalarImpl::Int32(7))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Included) + assert!(!ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(2))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Excluded, Encluded) + assert!(!ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Encluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + } } diff --git a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml index ed8b4b863fe64..69b84d1bb9ff6 100644 --- a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml +++ b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml @@ -418,7 +418,7 @@ batch_plan: |- BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: (((orders_count_by_user.user_id = 1:Int32) OR ((orders_count_by_user.user_id = 2:Int32) AND In(orders_count_by_user.date, 1111:Int32, 2222:Int32))) OR (orders_count_by_user.user_id <> 3:Int32)) } - └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id = Int64(1), orders_count_by_user.user_id = Int64(2) AND orders_count_by_user.date = Int32(1111), orders_count_by_user.user_id = Int64(2) AND orders_count_by_user.date = Int32(2222)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } - name: When any arm of or clause is not equal type, we can't convert it to scan range yet. before: - create_table_and_mv diff --git a/src/frontend/src/utils/condition.rs b/src/frontend/src/utils/condition.rs index e497ba821864a..888488c90f029 100644 --- a/src/frontend/src/utils/condition.rs +++ b/src/frontend/src/utils/condition.rs @@ -24,6 +24,7 @@ use risingwave_common::catalog::{Schema, TableDesc}; use risingwave_common::types::{DataType, DefaultOrd, ScalarImpl}; use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::scan_range::{is_full_range, ScanRange}; +use risingwave_common::util::sort_util::{cmp_rows, OrderType}; use crate::error::Result; use crate::expr::{ @@ -299,7 +300,7 @@ impl Condition { table_desc: Rc, max_split_range_gap: u64, disjunctions: Vec, - ) -> Result, Self)>> { + ) -> Result, bool)>> { let disjunctions_result: Result, Self)>> = disjunctions .into_iter() .map(|x| { @@ -352,9 +353,106 @@ impl Condition { } } - Ok(Some((non_overlap_scan_ranges, Condition::true_cond()))) + Ok(Some((non_overlap_scan_ranges, false))) } else { - Ok(None) + let scan_ranges = disjunctions_result + .into_iter() + .flat_map(|(scan_ranges, _)| scan_ranges) + // sort, small one first + .sorted_by(|a, b| a.eq_conds.len().cmp(&b.eq_conds.len())) + .collect_vec(); + + if scan_ranges.is_empty() { + return Ok(None); + } + + if scan_ranges.len() == 1 { + return Ok(Some((scan_ranges, true))); + } + + let mut output_scan_ranges: Vec = vec![]; + output_scan_ranges.push(scan_ranges[0].clone()); + let order_types = table_desc.pk.iter().map(|x| x.order_type).collect_vec(); + let mut idx = 1; + loop { + if idx >= scan_ranges.len() { + break; + } + + let scan_range_left = output_scan_ranges.last_mut().unwrap(); + let scan_range_right = &scan_ranges[idx]; + + if scan_range_left.eq_conds == scan_range_right.eq_conds { + // range merge + + if !ScanRange::is_overlap(scan_range_left, scan_range_right, &order_types) { + // not merge + output_scan_ranges.push(scan_range_right.clone()); + idx += 1; + continue; + } + + // merge range + fn merge_bound( + left_scan_range: &Bound>>, + right_scan_range: &Bound>>, + order_types: &[OrderType], + left_bound: bool, + ) -> Bound>> { + let left_scan_range = match left_scan_range { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + Bound::Unbounded => return Bound::Unbounded, + }; + + let right_scan_range = match right_scan_range { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + Bound::Unbounded => return Bound::Unbounded, + }; + + let cmp_len = left_scan_range.len().min(right_scan_range.len()); + + let cmp = cmp_rows( + &left_scan_range[..cmp_len], + &right_scan_range[..cmp_len], + &order_types[..cmp_len], + ); + + let bound = { + if (cmp.is_le() && left_bound) || (cmp.is_ge() && !left_bound) { + left_scan_range.to_vec() + } else { + right_scan_range.to_vec() + } + }; + + Bound::Included(bound) + } + + scan_range_left.range.0 = merge_bound( + &scan_range_left.range.0, + &scan_range_right.range.0, + &order_types, + true, + ); + + scan_range_left.range.1 = merge_bound( + &scan_range_left.range.1, + &scan_range_right.range.1, + &order_types, + false, + ); + + if scan_range_left.is_full_table_scan() { + return Ok(None); + } + } else { + output_scan_ranges.push(scan_range_right.clone()); + } + + idx += 1; + } + + Ok(Some((output_scan_ranges, true))) } } @@ -470,12 +568,18 @@ impl Condition { // It's an OR. if self.conjunctions.len() == 1 { if let Some(disjunctions) = self.conjunctions[0].as_or_disjunctions() { - if let Some((scan_ranges, other_condition)) = Self::disjunctions_to_scan_ranges( - table_desc, - max_split_range_gap, - disjunctions, - )? { - return Ok((scan_ranges, other_condition)); + if let Some((scan_ranges, maintaining_condition)) = + Self::disjunctions_to_scan_ranges( + table_desc, + max_split_range_gap, + disjunctions, + )? + { + if maintaining_condition { + return Ok((scan_ranges, self)); + } else { + return Ok((scan_ranges, Condition::true_cond())); + } } else { return Ok((vec![], self)); } From ae6736b16335a4704f76841e11bc642c763eca94 Mon Sep 17 00:00:00 2001 From: Li0k Date: Wed, 18 Dec 2024 15:35:48 +0800 Subject: [PATCH 2/8] fix(frontend): fix buggy pushdown --- .../tests/testdata/output/range_scan.yaml | 2 +- src/frontend/src/utils/condition.rs | 17 +++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml index 69b84d1bb9ff6..ed8b4b863fe64 100644 --- a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml +++ b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml @@ -418,7 +418,7 @@ batch_plan: |- BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: (((orders_count_by_user.user_id = 1:Int32) OR ((orders_count_by_user.user_id = 2:Int32) AND In(orders_count_by_user.date, 1111:Int32, 2222:Int32))) OR (orders_count_by_user.user_id <> 3:Int32)) } - └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id = Int64(1), orders_count_by_user.user_id = Int64(2) AND orders_count_by_user.date = Int32(1111), orders_count_by_user.user_id = Int64(2) AND orders_count_by_user.date = Int32(2222)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } - name: When any arm of or clause is not equal type, we can't convert it to scan range yet. before: - create_table_and_mv diff --git a/src/frontend/src/utils/condition.rs b/src/frontend/src/utils/condition.rs index 888488c90f029..a98b2dd5f471d 100644 --- a/src/frontend/src/utils/condition.rs +++ b/src/frontend/src/utils/condition.rs @@ -355,12 +355,16 @@ impl Condition { Ok(Some((non_overlap_scan_ranges, false))) } else { - let scan_ranges = disjunctions_result - .into_iter() - .flat_map(|(scan_ranges, _)| scan_ranges) - // sort, small one first - .sorted_by(|a, b| a.eq_conds.len().cmp(&b.eq_conds.len())) - .collect_vec(); + let mut scan_ranges = vec![]; + for (scan_ranges_chunk, _) in disjunctions_result { + if scan_ranges_chunk.is_empty() { + // full scan range + return Ok(None); + } + + scan_ranges.extend(scan_ranges_chunk); + } + scan_ranges.sort_by(|a, b| a.eq_conds.len().cmp(&b.eq_conds.len())); if scan_ranges.is_empty() { return Ok(None); @@ -425,6 +429,7 @@ impl Condition { } }; + // Included Bound just for convenience, the correctness will be guaranteed by the upper level filter. Bound::Included(bound) } From fe5c2b9d4717556d2ec673b961fb56985493071e Mon Sep 17 00:00:00 2001 From: Li0k Date: Wed, 18 Dec 2024 17:05:46 +0800 Subject: [PATCH 3/8] fix(frontend): overlap bug fix and add more test --- src/common/src/util/scan_range.rs | 163 +++++++++++++----- .../tests/testdata/input/range_scan.yaml | 45 +++++ .../tests/testdata/output/range_scan.yaml | 59 +++++++ 3 files changed, 223 insertions(+), 44 deletions(-) diff --git a/src/common/src/util/scan_range.rs b/src/common/src/util/scan_range.rs index 464a754dd09f8..41c23e5ef6872 100644 --- a/src/common/src/util/scan_range.rs +++ b/src/common/src/util/scan_range.rs @@ -132,15 +132,6 @@ impl ScanRange { pub fn is_overlap(left: &ScanRange, right: &ScanRange, order_types: &[OrderType]) -> bool { let range_left = left.covert_to_range(); let range_right = right.covert_to_range(); - - if range_left.0 == Bound::Unbounded - || range_right.0 == Bound::Unbounded - || range_left.1 == Bound::Unbounded - || range_right.1 == Bound::Unbounded - { - return true; - } - Self::range_overlap_check(range_left, range_right, order_types) } @@ -161,27 +152,37 @@ impl ScanRange { _ => &vec![], }; - assert!(!left_start_vec.is_empty()); - assert!(!right_start_vec.is_empty()); - - let cmp_column_len = left_start_vec.len().min(right_start_vec.len()); + if left_start_vec.is_empty() && right_start_vec.is_empty() { + return true; + } - let cmp_start = cmp_rows( - &left_start_vec[0..cmp_column_len], - &right_start_vec[0..cmp_column_len], - &order_types[0..cmp_column_len], - ); + // Unbounded is always less than any other bound + if left_start_vec.is_empty() { + // pass + } else if right_start_vec.is_empty() { + return Self::range_overlap_check(right, left, order_types); + } else { + assert!(!left_start_vec.is_empty()); + assert!(!right_start_vec.is_empty()); + let cmp_column_len = left_start_vec.len().min(right_start_vec.len()); + + let cmp_start = cmp_rows( + &left_start_vec[0..cmp_column_len], + &right_start_vec[0..cmp_column_len], + &order_types[0..cmp_column_len], + ); - let right_start_before_left_start = cmp_start.is_gt(); + let right_start_before_left_start = cmp_start.is_gt(); - if right_start_before_left_start { - return Self::range_overlap_check(right, left, order_types); - } + if right_start_before_left_start { + return Self::range_overlap_check(right, left, order_types); + } - if cmp_start == Ordering::Equal - && let (Bound::Included(_), Bound::Included(_)) = (left_start, right_start) - { - return true; + if cmp_start == Ordering::Equal + && let (Bound::Included(_), Bound::Included(_)) = (left_start, right_start) + { + return true; + } } let left_end_vec = match &left_end { @@ -193,32 +194,43 @@ impl ScanRange { _ => &vec![], }; - assert!(!left_end_vec.is_empty()); - assert!(!right_end_vec.is_empty()); + if left_end_vec.is_empty() && right_end_vec.is_empty() { + return true; + } - let cmp_end = cmp_rows( - &left_end_vec[0..cmp_column_len], - &right_start_vec[0..cmp_column_len], - &order_types[0..cmp_column_len], - ); + if left_end_vec.is_empty() { + return true; + } else { + // cmp left_end and right_start + assert!(!left_end_vec.is_empty()); + assert!(!right_start_vec.is_empty()); - match cmp_end { - Ordering::Equal => { - if let (Bound::Included(_), Bound::Included(_)) = (left_end, right_start) { + let cmp_column_len = left_end_vec.len().min(right_start_vec.len()); + + let cmp_end = cmp_rows( + &left_end_vec[0..cmp_column_len], + &right_start_vec[0..cmp_column_len], + &order_types[0..cmp_column_len], + ); + + match cmp_end { + Ordering::Equal => { + if let (Bound::Included(_), Bound::Included(_)) = (left_end, right_start) { + return true; + } + } + + Ordering::Greater => { return true; } - } - Ordering::Greater => { - return true; + Ordering::Less => { + return false; + } } - Ordering::Less => { - return false; - } + false } - - false } } @@ -740,5 +752,68 @@ mod tests { ), &order_types )); + + assert!(!ScanRange::range_overlap_check( + ( + Bound::Unbounded, + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + Bound::Unbounded, + ), + &order_types + )); + + assert!(ScanRange::range_overlap_check( + ( + Bound::Unbounded, + Bound::Included(vec![Some(ScalarImpl::Int32(10))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + Bound::Unbounded, + ), + &order_types + )); + + assert!(ScanRange::range_overlap_check( + (Bound::Unbounded, Bound::Unbounded,), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + Bound::Unbounded, + ), + &order_types + )); + + assert!(ScanRange::range_overlap_check( + (Bound::Unbounded, Bound::Unbounded), + (Bound::Unbounded, Bound::Unbounded), + &order_types + )); + + assert!(!ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + Bound::Unbounded, + ), + &order_types + )); + + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Unbounded, + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + ), + &order_types + )); } } diff --git a/src/frontend/planner_test/tests/testdata/input/range_scan.yaml b/src/frontend/planner_test/tests/testdata/input/range_scan.yaml index 52863fa5aaf77..58185bcf07f96 100644 --- a/src/frontend/planner_test/tests/testdata/input/range_scan.yaml +++ b/src/frontend/planner_test/tests/testdata/input/range_scan.yaml @@ -428,3 +428,48 @@ expected_outputs: - logical_plan - batch_plan +- name: When OR clauses contain non-overlapping conditions,, we can pushdown serveral scan_range. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 20); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id >= 10); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 20) or (user_id = 15); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id < 30) or (user_id >= 30 and user_id < 40); + expected_outputs: + - batch_plan +- name: When OR clauses contain overlapping conditions, we can merge serveral scan_range and pushdown. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id <= 30) or (user_id >= 30 and user_id < 40) or (user_id = 15); + expected_outputs: + - batch_plan +- name: When OR clauses contain overlapping conditions, we cannot push down if it results in a full table scan. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 20) or (user_id > 10); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 20) or (user_id != 10); + expected_outputs: + - batch_plan diff --git a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml index ed8b4b863fe64..2960494883bb6 100644 --- a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml +++ b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml @@ -503,3 +503,62 @@ └─LogicalFilter { predicate: (t.k = 2147483648:Int64) } └─LogicalScan { table: t, columns: [t.k, t.v, t._rw_timestamp] } batch_plan: 'BatchValues { rows: [] }' +- name: When OR clauses contain non-overlapping conditions,, we can pushdown serveral scan_range. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 20); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id > 20:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id > Int64(20)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id >= 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id >= 10:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id >= Int64(10)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 20) or (user_id = 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: (((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id > 20:Int32)) OR (orders_count_by_user.user_id = 15:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id > Int64(20), orders_count_by_user.user_id = Int64(15)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id < 30) or (user_id >= 30 and user_id < 40); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((((orders_count_by_user.user_id < 10:Int32) AND (orders_count_by_user.user_id > 1:Int32)) OR ((orders_count_by_user.user_id > 20:Int32) AND (orders_count_by_user.user_id < 30:Int32))) OR ((orders_count_by_user.user_id >= 30:Int32) AND (orders_count_by_user.user_id < 40:Int32))) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id > Int64(1) AND orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id > Int64(20) AND orders_count_by_user.user_id < Int64(30), orders_count_by_user.user_id >= Int64(30) AND orders_count_by_user.user_id < Int64(40)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- name: When OR clauses contain overlapping conditions, we can merge serveral scan_range and pushdown. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id <= 30) or (user_id >= 30 and user_id < 40) or (user_id = 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((((orders_count_by_user.user_id < 10:Int32) AND (orders_count_by_user.user_id > 1:Int32)) OR ((orders_count_by_user.user_id > 20:Int32) AND (orders_count_by_user.user_id <= 30:Int32))) OR (((orders_count_by_user.user_id >= 30:Int32) AND (orders_count_by_user.user_id < 40:Int32)) OR (orders_count_by_user.user_id = 15:Int32))) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id > Int64(1) AND orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id >= Int64(20) AND orders_count_by_user.user_id <= Int64(40), orders_count_by_user.user_id = Int64(15)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- name: When OR clauses contain overlapping conditions, we cannot push down if it results in a full table scan. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 20) or (user_id > 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user.user_id < 20:Int32) OR (orders_count_by_user.user_id > 10:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 20) or (user_id != 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user.user_id < 20:Int32) OR (orders_count_by_user.user_id <> 10:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } From a851fbc368aec0a19bcb408924c023b8c6e81746 Mon Sep 17 00:00:00 2001 From: Li0k Date: Mon, 23 Dec 2024 16:40:10 +0800 Subject: [PATCH 4/8] fix(frontend): fix ordering of scan range --- .../tests/testdata/input/range_scan.yaml | 6 +++ .../tests/testdata/output/range_scan.yaml | 12 +++++- src/frontend/src/utils/condition.rs | 37 ++++++++++++++++++- 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/frontend/planner_test/tests/testdata/input/range_scan.yaml b/src/frontend/planner_test/tests/testdata/input/range_scan.yaml index 58185bcf07f96..0fd4ea458e55b 100644 --- a/src/frontend/planner_test/tests/testdata/input/range_scan.yaml +++ b/src/frontend/planner_test/tests/testdata/input/range_scan.yaml @@ -460,6 +460,12 @@ SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id <= 30) or (user_id >= 30 and user_id < 40) or (user_id = 15); expected_outputs: - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 30) or (user_id > 5 and user_id < 15); + expected_outputs: + - batch_plan - name: When OR clauses contain overlapping conditions, we cannot push down if it results in a full table scan. before: - create_table_and_mv diff --git a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml index 2960494883bb6..6e364866a0a7b 100644 --- a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml +++ b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml @@ -527,7 +527,7 @@ batch_plan: |- BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: (((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id > 20:Int32)) OR (orders_count_by_user.user_id = 15:Int32)) } - └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id > Int64(20), orders_count_by_user.user_id = Int64(15)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id = Int64(15), orders_count_by_user.user_id > Int64(20)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } - before: - create_table_and_mv sql: | @@ -544,7 +544,15 @@ batch_plan: |- BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: ((((orders_count_by_user.user_id < 10:Int32) AND (orders_count_by_user.user_id > 1:Int32)) OR ((orders_count_by_user.user_id > 20:Int32) AND (orders_count_by_user.user_id <= 30:Int32))) OR (((orders_count_by_user.user_id >= 30:Int32) AND (orders_count_by_user.user_id < 40:Int32)) OR (orders_count_by_user.user_id = 15:Int32))) } - └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id > Int64(1) AND orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id >= Int64(20) AND orders_count_by_user.user_id <= Int64(40), orders_count_by_user.user_id = Int64(15)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id > Int64(1) AND orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id = Int64(15), orders_count_by_user.user_id >= Int64(20) AND orders_count_by_user.user_id <= Int64(40)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 30) or (user_id > 5 and user_id < 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: (((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id > 30:Int32)) OR ((orders_count_by_user.user_id > 5:Int32) AND (orders_count_by_user.user_id < 15:Int32))) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id <= Int64(15), orders_count_by_user.user_id > Int64(30)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } - name: When OR clauses contain overlapping conditions, we cannot push down if it results in a full table scan. before: - create_table_and_mv diff --git a/src/frontend/src/utils/condition.rs b/src/frontend/src/utils/condition.rs index a98b2dd5f471d..1352d96535d4a 100644 --- a/src/frontend/src/utils/condition.rs +++ b/src/frontend/src/utils/condition.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Ordering; use std::collections::{BTreeMap, HashSet}; use std::fmt::{self, Debug}; use std::ops::Bound; @@ -364,7 +365,40 @@ impl Condition { scan_ranges.extend(scan_ranges_chunk); } - scan_ranges.sort_by(|a, b| a.eq_conds.len().cmp(&b.eq_conds.len())); + + let order_types = table_desc.pk.iter().map(|x| x.order_type).collect_vec(); + scan_ranges.sort_by(|left, right| { + let (left_start, _left_end) = &left.covert_to_range(); + let (right_start, _right_end) = &right.covert_to_range(); + + let left_start_vec = match &left_start { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + let right_start_vec = match &right_start { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + + if left_start_vec.is_empty() && right_start_vec.is_empty() { + return Ordering::Less; + } + + if left_start_vec.is_empty() { + return Ordering::Less; + } + + if right_start_vec.is_empty() { + return Ordering::Greater; + } + + let cmp_column_len = left_start_vec.len().min(right_start_vec.len()); + cmp_rows( + &left_start_vec[0..cmp_column_len], + &right_start_vec[0..cmp_column_len], + &order_types[0..cmp_column_len], + ) + }); if scan_ranges.is_empty() { return Ok(None); @@ -376,7 +410,6 @@ impl Condition { let mut output_scan_ranges: Vec = vec![]; output_scan_ranges.push(scan_ranges[0].clone()); - let order_types = table_desc.pk.iter().map(|x| x.order_type).collect_vec(); let mut idx = 1; loop { if idx >= scan_ranges.len() { From 7c62b5329ef9449f797bd83aa4ca3d188eda938c Mon Sep 17 00:00:00 2001 From: Li0k Date: Mon, 23 Dec 2024 16:48:19 +0800 Subject: [PATCH 5/8] fix check --- src/common/src/util/scan_range.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/src/util/scan_range.rs b/src/common/src/util/scan_range.rs index 41c23e5ef6872..25c6da82ca7bb 100644 --- a/src/common/src/util/scan_range.rs +++ b/src/common/src/util/scan_range.rs @@ -199,7 +199,7 @@ impl ScanRange { } if left_end_vec.is_empty() { - return true; + true } else { // cmp left_end and right_start assert!(!left_end_vec.is_empty()); From 41105a421a7e27863cb867ab593f6be1b6974d22 Mon Sep 17 00:00:00 2001 From: Li0k Date: Mon, 23 Dec 2024 16:50:48 +0800 Subject: [PATCH 6/8] fix check --- src/common/src/util/scan_range.rs | 20 ++++++++++---------- src/frontend/src/utils/condition.rs | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/common/src/util/scan_range.rs b/src/common/src/util/scan_range.rs index 25c6da82ca7bb..2ba39a58c73b1 100644 --- a/src/common/src/util/scan_range.rs +++ b/src/common/src/util/scan_range.rs @@ -101,7 +101,7 @@ impl ScanRange { } } - pub fn covert_to_range(&self) -> (Bound>, Bound>) { + pub fn convert_to_range(&self) -> (Bound>, Bound>) { fn handle_bound(eq_conds: &Vec, bound: &Bound>) -> Bound> { match bound { Bound::Included(literal) => { @@ -130,8 +130,8 @@ impl ScanRange { } pub fn is_overlap(left: &ScanRange, right: &ScanRange, order_types: &[OrderType]) -> bool { - let range_left = left.covert_to_range(); - let range_right = right.covert_to_range(); + let range_left = left.convert_to_range(); + let range_right = right.convert_to_range(); Self::range_overlap_check(range_left, range_right, order_types) } @@ -357,7 +357,7 @@ mod tests { } #[test] - fn test_covert_to_range() { + fn test_convert_to_range() { { // test empty eq_conds let scan_range = ScanRange { @@ -368,7 +368,7 @@ mod tests { ), }; - let (left, right) = scan_range.covert_to_range(); + let (left, right) = scan_range.convert_to_range(); assert_eq!(left, Bound::Included(vec![Some(ScalarImpl::from(1))])); assert_eq!(right, Bound::Included(vec![Some(ScalarImpl::from(2))])); } @@ -383,7 +383,7 @@ mod tests { ), }; - let (left, right) = scan_range.covert_to_range(); + let (left, right) = scan_range.convert_to_range(); assert_eq!(left, Bound::Excluded(vec![Some(ScalarImpl::from(1))])); assert_eq!(right, Bound::Excluded(vec![Some(ScalarImpl::from(2))])); } @@ -398,7 +398,7 @@ mod tests { ), }; - let (left, right) = scan_range.covert_to_range(); + let (left, right) = scan_range.convert_to_range(); assert_eq!(left, Bound::Included(vec![Some(ScalarImpl::from(1))])); assert_eq!(right, Bound::Unbounded); } @@ -413,7 +413,7 @@ mod tests { ), }; - let (left, right) = scan_range.covert_to_range(); + let (left, right) = scan_range.convert_to_range(); assert_eq!( left, Bound::Excluded(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(2))]) @@ -434,7 +434,7 @@ mod tests { ), }; - let (left, right) = scan_range.covert_to_range(); + let (left, right) = scan_range.convert_to_range(); assert_eq!( left, Bound::Included(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(2))]) @@ -454,7 +454,7 @@ mod tests { ), }; - let (left, right) = scan_range.covert_to_range(); + let (left, right) = scan_range.convert_to_range(); assert_eq!( left, Bound::Included(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(2))]) diff --git a/src/frontend/src/utils/condition.rs b/src/frontend/src/utils/condition.rs index 1352d96535d4a..f96c27b8385fe 100644 --- a/src/frontend/src/utils/condition.rs +++ b/src/frontend/src/utils/condition.rs @@ -368,8 +368,8 @@ impl Condition { let order_types = table_desc.pk.iter().map(|x| x.order_type).collect_vec(); scan_ranges.sort_by(|left, right| { - let (left_start, _left_end) = &left.covert_to_range(); - let (right_start, _right_end) = &right.covert_to_range(); + let (left_start, _left_end) = &left.convert_to_range(); + let (right_start, _right_end) = &right.convert_to_range(); let left_start_vec = match &left_start { Bound::Included(vec) | Bound::Excluded(vec) => vec, From ce9fbc2df7baeead0e7f3b4e1e985f8a16c22fc5 Mon Sep 17 00:00:00 2001 From: Li0k Date: Tue, 24 Dec 2024 15:25:06 +0800 Subject: [PATCH 7/8] fix(frontend): fix desc ordering --- src/common/src/util/scan_range.rs | 14 +- .../tests/testdata/input/range_scan.yaml | 328 ++++++++++-------- .../tests/testdata/output/range_scan.yaml | 66 ++++ src/frontend/src/utils/condition.rs | 13 +- 4 files changed, 279 insertions(+), 142 deletions(-) diff --git a/src/common/src/util/scan_range.rs b/src/common/src/util/scan_range.rs index 2ba39a58c73b1..075af89df2d03 100644 --- a/src/common/src/util/scan_range.rs +++ b/src/common/src/util/scan_range.rs @@ -15,6 +15,7 @@ use std::cmp::Ordering; use std::ops::{Bound, RangeBounds}; +use itertools::Itertools; use paste::paste; use risingwave_pb::batch_plan::scan_range::Bound as PbBound; use risingwave_pb::batch_plan::ScanRange as PbScanRange; @@ -156,6 +157,17 @@ impl ScanRange { return true; } + let order_types = if order_types.iter().all(|o| o.is_ascending()) { + order_types + } else { + // reverse order types to ascending + &order_types + .iter() + .cloned() + .map(|o| o.reverse()) + .collect_vec() + }; + // Unbounded is always less than any other bound if left_start_vec.is_empty() { // pass @@ -165,7 +177,6 @@ impl ScanRange { assert!(!left_start_vec.is_empty()); assert!(!right_start_vec.is_empty()); let cmp_column_len = left_start_vec.len().min(right_start_vec.len()); - let cmp_start = cmp_rows( &left_start_vec[0..cmp_column_len], &right_start_vec[0..cmp_column_len], @@ -206,7 +217,6 @@ impl ScanRange { assert!(!right_start_vec.is_empty()); let cmp_column_len = left_end_vec.len().min(right_start_vec.len()); - let cmp_end = cmp_rows( &left_end_vec[0..cmp_column_len], &right_start_vec[0..cmp_column_len], diff --git a/src/frontend/planner_test/tests/testdata/input/range_scan.yaml b/src/frontend/planner_test/tests/testdata/input/range_scan.yaml index 0fd4ea458e55b..374da681d0987 100644 --- a/src/frontend/planner_test/tests/testdata/input/range_scan.yaml +++ b/src/frontend/planner_test/tests/testdata/input/range_scan.yaml @@ -6,182 +6,184 @@ date INTEGER); CREATE MATERIALIZED VIEW orders_count_by_user AS SELECT user_id, date, count(*) AS orders_count FROM orders GROUP BY user_id, date; + CREATE MATERIALIZED VIEW orders_count_by_user_desc AS + SELECT user_id, date, count(*) AS orders_count FROM orders GROUP BY user_id, date ORDER BY user_id DESC; expected_outputs: [] - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id < 43 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 + 1 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1/0 expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 2147483647 + 1 expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 'a' expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id > 'a' expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = '42' expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = NULL expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id IS NULL expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id > NULL expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date = 1111 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id > 42 AND date = 1111 AND 2>1 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE date > 1111 AND user_id = 42 AND 5<6 AND date <= 6666 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (42, 43) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (42+1, 44-1) expected_outputs: - - batch_plan + - batch_plan - name: If the IN list has a larger type than the column, the InputRef is casted. Currently this case is not converted to scan range yet. before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (42.0, 43.0) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in ('42', '43') expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in ('42', '43.0') expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (2147483648, 2147483649) AND date = 6666 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 3333) expected_outputs: - - batch_plan + - batch_plan - name: test duplicate value in in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 2222) expected_outputs: - - batch_plan + - batch_plan - name: test NULL in in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, NULL) expected_outputs: - - batch_plan + - batch_plan - name: test NULL in in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (NULL) expected_outputs: - - batch_plan + - batch_plan - name: test multiple in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 3333) AND date in (4444, 3333) expected_outputs: - - batch_plan + - batch_plan - name: test eq & in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 3333) AND date = 3333 expected_outputs: - - batch_plan + - batch_plan - name: test eq & in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 3333) AND date = 4444 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (2147483648, 2147483649) AND date in (2222, 3333) expected_outputs: - - batch_plan + - batch_plan - id: create_table_and_mv_ordered sql: | CREATE TABLE orders ( @@ -194,93 +196,93 @@ ORDER BY orders_count; expected_outputs: [] - before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE user_id = 42 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE user_id > 42 AND orders_count = 10 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count = 10 expected_outputs: - - batch_plan + - batch_plan - name: merge mutiple upper bound before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count < 10 and orders_count < 30 expected_outputs: - - batch_plan + - batch_plan - name: merge include and exclude upper bound of same value before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count < 10 and orders_count <= 10 expected_outputs: - - batch_plan + - batch_plan - name: merge mutiple lower bound before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 10 and orders_count > 30 expected_outputs: - - batch_plan + - batch_plan - name: merge include and exclude lower bound of same value before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 10 and orders_count >= 10 expected_outputs: - - batch_plan + - batch_plan - name: invalid range before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 10 and orders_count < 5 expected_outputs: - - batch_plan + - batch_plan - name: merge cmp and eq condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 20 and orders_count < 30 and orders_count = 25 expected_outputs: - - batch_plan + - batch_plan - name: invalid range of merging cmp and eq condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 20 and orders_count < 30 and orders_count = 35 expected_outputs: - - batch_plan + - batch_plan - name: merge cmp and const-in condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count in (10,20,30,40) and orders_count <30 expected_outputs: - - batch_plan + - batch_plan - name: invalid range of merging cmp and const-in condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count in (10,20,30,40) and orders_count > 50 expected_outputs: - - batch_plan + - batch_plan - name: merge null and cmp condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count is null and orders_count < 30 expected_outputs: - - batch_plan + - batch_plan - id: create_small sql: | CREATE TABLE t(x smallint); @@ -289,193 +291,241 @@ expected_outputs: [] - name: When the constant with larger type is out of the range of the column's type, we can convert it as false condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x = 60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is out of the upper bound of the column's type, we can convert < as true condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x < 60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is out of the upper bound of the column's type, we can convert > as false condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x > 60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is out of the lower bound of the column's type, we can convert < as false condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x < -60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is out of the lower bound of the column's type, we can convert > as true condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x > -60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is in range of the column's type, we can convert it. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x < 3::bigint and x > 1::bigint; expected_outputs: - - batch_plan + - batch_plan - name: Can't push down the in-compatitble numeric type before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x = 3.4; expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or user_id = 2; expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id = 1) or (user_id = 2 and date = 2222); expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id = 1) or (user_id = 2 and date in (1111, 2222)); expected_outputs: - - batch_plan + - batch_plan - name: When one arm of or clause contains other conditions, we can't convert it to scan range yet. before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id = 1) or (user_id = 2 and date in (1111, 2222)) or (user_id != 3); expected_outputs: - - batch_plan + - batch_plan - name: When any arm of or clause is not equal type, we can't convert it to scan range yet. before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id > 1 or user_id < 10 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or user_id is null expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 and user_id is null expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or (user_id is null and date = 1111) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or (user_id = 2 and date is null) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or (user_id is null and date is null) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id is null or (user_id is null and date is null) expected_outputs: - - batch_plan + - batch_plan - sql: | create table sbtest1(id INT, k INT, c VARCHAR, pad VARCHAR); create index k1 on sbtest1(k); select count(k) from sbtest1 where k between 0 and 5; expected_outputs: - - batch_plan + - batch_plan - sql: | create table sbtest1(id INT, k INT, c VARCHAR, pad VARCHAR); create index k1 on sbtest1(k); select count(k) from sbtest1 where k between 0 and 500; expected_outputs: - - batch_plan + - batch_plan - sql: | create table sbtest1(id INT, k INT, c VARCHAR, pad VARCHAR, primary key(id)); create index k1 on sbtest1(k); select count(k) from sbtest1 where id between 0 and 5; expected_outputs: - - batch_plan + - batch_plan - sql: | create table t (k int primary key, v int); select v from t where k = 2147483648; -- out of range of int32 expected_outputs: - - logical_plan - - batch_plan + - logical_plan + - batch_plan - name: When OR clauses contain non-overlapping conditions,, we can pushdown serveral scan_range. before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 20); expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 20); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id >= 10); expected_outputs: - - batch_plan + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id >= 10); + expected_outputs: + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 20) or (user_id = 15); expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 20) or (user_id = 15); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id < 30) or (user_id >= 30 and user_id < 40); expected_outputs: - - batch_plan + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id < 30) or (user_id >= 30 and user_id < 40); + expected_outputs: + - batch_plan - name: When OR clauses contain overlapping conditions, we can merge serveral scan_range and pushdown. before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id <= 30) or (user_id >= 30 and user_id < 40) or (user_id = 15); expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id <= 30) or (user_id >= 30 and user_id < 40) or (user_id = 15); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 30) or (user_id > 5 and user_id < 15); expected_outputs: - - batch_plan + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 30) or (user_id > 5 and user_id < 15); + expected_outputs: + - batch_plan - name: When OR clauses contain overlapping conditions, we cannot push down if it results in a full table scan. before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id < 20) or (user_id > 10); expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 20) or (user_id > 10); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id < 20) or (user_id != 10); expected_outputs: - - batch_plan + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 20) or (user_id != 10); + expected_outputs: + - batch_plan diff --git a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml index 6e364866a0a7b..e175558038c81 100644 --- a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml +++ b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml @@ -7,6 +7,8 @@ date INTEGER); CREATE MATERIALIZED VIEW orders_count_by_user AS SELECT user_id, date, count(*) AS orders_count FROM orders GROUP BY user_id, date; + CREATE MATERIALIZED VIEW orders_count_by_user_desc AS + SELECT user_id, date, count(*) AS orders_count FROM orders GROUP BY user_id, date ORDER BY user_id DESC; - before: - create_table_and_mv sql: | @@ -512,6 +514,14 @@ BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: ((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id > 20:Int32)) } └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id > Int64(20)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 20); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user_desc.user_id < 10:Int32) OR (orders_count_by_user_desc.user_id > 20:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id > Int64(20)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } - before: - create_table_and_mv sql: | @@ -520,6 +530,14 @@ BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: ((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id >= 10:Int32)) } └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id >= Int64(10)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id >= 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user_desc.user_id < 10:Int32) OR (orders_count_by_user_desc.user_id >= 10:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id >= Int64(10)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } - before: - create_table_and_mv sql: | @@ -528,6 +546,14 @@ BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: (((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id > 20:Int32)) OR (orders_count_by_user.user_id = 15:Int32)) } └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id = Int64(15), orders_count_by_user.user_id > Int64(20)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 20) or (user_id = 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: (((orders_count_by_user_desc.user_id < 10:Int32) OR (orders_count_by_user_desc.user_id > 20:Int32)) OR (orders_count_by_user_desc.user_id = 15:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id = Int64(15), orders_count_by_user_desc.user_id > Int64(20)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } - before: - create_table_and_mv sql: | @@ -536,6 +562,14 @@ BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: ((((orders_count_by_user.user_id < 10:Int32) AND (orders_count_by_user.user_id > 1:Int32)) OR ((orders_count_by_user.user_id > 20:Int32) AND (orders_count_by_user.user_id < 30:Int32))) OR ((orders_count_by_user.user_id >= 30:Int32) AND (orders_count_by_user.user_id < 40:Int32))) } └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id > Int64(1) AND orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id > Int64(20) AND orders_count_by_user.user_id < Int64(30), orders_count_by_user.user_id >= Int64(30) AND orders_count_by_user.user_id < Int64(40)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id < 30) or (user_id >= 30 and user_id < 40); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((((orders_count_by_user_desc.user_id < 10:Int32) AND (orders_count_by_user_desc.user_id > 1:Int32)) OR ((orders_count_by_user_desc.user_id > 20:Int32) AND (orders_count_by_user_desc.user_id < 30:Int32))) OR ((orders_count_by_user_desc.user_id >= 30:Int32) AND (orders_count_by_user_desc.user_id < 40:Int32))) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id > Int64(1) AND orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id > Int64(20) AND orders_count_by_user_desc.user_id < Int64(30), orders_count_by_user_desc.user_id >= Int64(30) AND orders_count_by_user_desc.user_id < Int64(40)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } - name: When OR clauses contain overlapping conditions, we can merge serveral scan_range and pushdown. before: - create_table_and_mv @@ -545,6 +579,14 @@ BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: ((((orders_count_by_user.user_id < 10:Int32) AND (orders_count_by_user.user_id > 1:Int32)) OR ((orders_count_by_user.user_id > 20:Int32) AND (orders_count_by_user.user_id <= 30:Int32))) OR (((orders_count_by_user.user_id >= 30:Int32) AND (orders_count_by_user.user_id < 40:Int32)) OR (orders_count_by_user.user_id = 15:Int32))) } └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id > Int64(1) AND orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id = Int64(15), orders_count_by_user.user_id >= Int64(20) AND orders_count_by_user.user_id <= Int64(40)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id <= 30) or (user_id >= 30 and user_id < 40) or (user_id = 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((((orders_count_by_user_desc.user_id < 10:Int32) AND (orders_count_by_user_desc.user_id > 1:Int32)) OR ((orders_count_by_user_desc.user_id > 20:Int32) AND (orders_count_by_user_desc.user_id <= 30:Int32))) OR (((orders_count_by_user_desc.user_id >= 30:Int32) AND (orders_count_by_user_desc.user_id < 40:Int32)) OR (orders_count_by_user_desc.user_id = 15:Int32))) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id > Int64(1) AND orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id = Int64(15), orders_count_by_user_desc.user_id >= Int64(20) AND orders_count_by_user_desc.user_id <= Int64(40)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } - before: - create_table_and_mv sql: | @@ -553,6 +595,14 @@ BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: (((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id > 30:Int32)) OR ((orders_count_by_user.user_id > 5:Int32) AND (orders_count_by_user.user_id < 15:Int32))) } └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id <= Int64(15), orders_count_by_user.user_id > Int64(30)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 30) or (user_id > 5 and user_id < 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: (((orders_count_by_user_desc.user_id < 10:Int32) OR (orders_count_by_user_desc.user_id > 30:Int32)) OR ((orders_count_by_user_desc.user_id > 5:Int32) AND (orders_count_by_user_desc.user_id < 15:Int32))) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id <= Int64(15), orders_count_by_user_desc.user_id > Int64(30)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } - name: When OR clauses contain overlapping conditions, we cannot push down if it results in a full table scan. before: - create_table_and_mv @@ -562,6 +612,14 @@ BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: ((orders_count_by_user.user_id < 20:Int32) OR (orders_count_by_user.user_id > 10:Int32)) } └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 20) or (user_id > 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user_desc.user_id < 20:Int32) OR (orders_count_by_user_desc.user_id > 10:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } - before: - create_table_and_mv sql: | @@ -570,3 +628,11 @@ BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: ((orders_count_by_user.user_id < 20:Int32) OR (orders_count_by_user.user_id <> 10:Int32)) } └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 20) or (user_id != 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user_desc.user_id < 20:Int32) OR (orders_count_by_user_desc.user_id <> 10:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } diff --git a/src/frontend/src/utils/condition.rs b/src/frontend/src/utils/condition.rs index f96c27b8385fe..f79cdb04f1c5b 100644 --- a/src/frontend/src/utils/condition.rs +++ b/src/frontend/src/utils/condition.rs @@ -366,7 +366,18 @@ impl Condition { scan_ranges.extend(scan_ranges_chunk); } - let order_types = table_desc.pk.iter().map(|x| x.order_type).collect_vec(); + let order_types = table_desc + .pk + .iter() + .cloned() + .map(|x| { + if x.order_type.is_descending() { + x.order_type.reverse() + } else { + x.order_type + } + }) + .collect_vec(); scan_ranges.sort_by(|left, right| { let (left_start, _left_end) = &left.convert_to_range(); let (right_start, _right_end) = &right.convert_to_range(); From f8d0a24a8cf298625114956b2a22509f627eb845 Mon Sep 17 00:00:00 2001 From: Li0k Date: Tue, 24 Dec 2024 15:38:37 +0800 Subject: [PATCH 8/8] typo --- src/common/src/util/scan_range.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/src/util/scan_range.rs b/src/common/src/util/scan_range.rs index 075af89df2d03..5c56550eed279 100644 --- a/src/common/src/util/scan_range.rs +++ b/src/common/src/util/scan_range.rs @@ -164,7 +164,7 @@ impl ScanRange { &order_types .iter() .cloned() - .map(|o| o.reverse()) + .map(|o| if o.is_descending() { o.reverse() } else { o }) .collect_vec() };