From a175600c5f1501dbe2282f57fac23840e4f2ab69 Mon Sep 17 00:00:00 2001 From: Xinhao Xu <84456268+xxhZs@users.noreply.github.com> Date: Mon, 23 Dec 2024 19:32:18 +0800 Subject: [PATCH 1/9] fix(frontend): fix row filter with pk diff order type (#19797) --- e2e_test/batch/basic/row_filter.slt.part | 65 +++++++++++++++++++ .../executors/src/executor/row_seq_scan.rs | 1 + .../tests/testdata/input/row_filter.yaml | 6 ++ .../tests/testdata/output/row_filter.yaml | 12 +++- src/frontend/src/utils/condition.rs | 47 +++++++++++--- 5 files changed, 119 insertions(+), 12 deletions(-) create mode 100644 e2e_test/batch/basic/row_filter.slt.part diff --git a/e2e_test/batch/basic/row_filter.slt.part b/e2e_test/batch/basic/row_filter.slt.part new file mode 100644 index 0000000000000..fdec809d92379 --- /dev/null +++ b/e2e_test/batch/basic/row_filter.slt.part @@ -0,0 +1,65 @@ +statement ok +create table t1(v1 int, v2 int, v3 int); + +statement ok +insert into t1 values(1,1,1),(1,2,1),(1,2,2),(1,3,1),(1,3,2),(1,3,3); + +statement ok +create materialized view mv1 as select * from t1 order by v1 asc, v2 desc, v3 asc; + +statement ok +create materialized view mv2 as select * from t1 order by v1 desc, v2 desc, v3 desc; + +statement ok +create materialized view mv3 as select * from t1 order by v1 asc, v2 asc, v3 asc; + +query III +select * from mv1 where (v1,v2,v3) > (1,3,1) order by v3; +---- +1 3 2 +1 3 3 + +query III +select * from mv2 where (v1,v2,v3) > (1,3,1) order by v3; +---- +1 3 2 +1 3 3 + +query III +select * from mv3 where (v1,v2,v3) > (1,3,1) order by v3; +---- +1 3 2 +1 3 3 + +query III +select * from mv1 where (v1,v2,v3) < (1,3,1) order by v1,v2,v3; +---- +1 1 1 +1 2 1 +1 2 2 + +query III +select * from mv2 where (v1,v2,v3) < (1,3,1) order by v1,v2,v3; +---- +1 1 1 +1 2 1 +1 2 2 + +query III +select * from mv3 where (v1,v2,v3) < (1,3,1) order by v1,v2,v3; +---- +1 1 1 +1 2 1 +1 2 2 + +statement ok +drop materialized view mv3; + +statement ok +drop materialized view mv2; + +statement ok +drop materialized view mv1; + +statement ok +drop table t1; diff --git a/src/batch/executors/src/executor/row_seq_scan.rs b/src/batch/executors/src/executor/row_seq_scan.rs index 445b1fa8038d7..a9efb465df187 100644 --- a/src/batch/executors/src/executor/row_seq_scan.rs +++ b/src/batch/executors/src/executor/row_seq_scan.rs @@ -434,6 +434,7 @@ impl RowSeqScanExecutor { next_col_bounds, } = scan_range; + // The len of a valid pk_prefix should be less than or equal pk's num. let order_type = table.pk_serializer().get_order_types()[pk_prefix.len()]; let (start_bound, end_bound) = if order_type.is_ascending() { (next_col_bounds.0, next_col_bounds.1) diff --git a/src/frontend/planner_test/tests/testdata/input/row_filter.yaml b/src/frontend/planner_test/tests/testdata/input/row_filter.yaml index 2b1966f4316fa..7293cadf7512b 100644 --- a/src/frontend/planner_test/tests/testdata/input/row_filter.yaml +++ b/src/frontend/planner_test/tests/testdata/input/row_filter.yaml @@ -22,4 +22,10 @@ create table t(v1 int, v2 int, v3 int, primary key(v1,v2,v3)); select * from t where (v1,v2,v1) > (1,2,3); expected_outputs: + - batch_plan +- sql: | + create table t1(v1 int, v2 int, v3 int); + create materialized view mv1 as select * from t1 order by v1 asc, v2 asc, v3 desc; + select * from mv1 where (v1,v2,v3) > (1,3,1); + expected_outputs: - batch_plan \ No newline at end of file diff --git a/src/frontend/planner_test/tests/testdata/output/row_filter.yaml b/src/frontend/planner_test/tests/testdata/output/row_filter.yaml index 1ef80b8e025bd..5de2e79815476 100644 --- a/src/frontend/planner_test/tests/testdata/output/row_filter.yaml +++ b/src/frontend/planner_test/tests/testdata/output/row_filter.yaml @@ -17,7 +17,7 @@ batch_plan: |- BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: (Row(t.v1, t.v3) > '(2,3)':Struct(StructType { field_names: [], field_types: [Int32, Int32] })) } - └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], scan_ranges: [t.v1 > Int32(2)], distribution: UpstreamHashShard(t.v1, t.v2, t.v3) } + └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], scan_ranges: [t.v1 >= Int32(2)], distribution: UpstreamHashShard(t.v1, t.v2, t.v3) } - sql: | create table t(v1 int, v2 int, v3 int, primary key(v1,v2,v3)); select * from t where (v3,v2,v1) > (1,2,3); @@ -31,4 +31,12 @@ batch_plan: |- BatchExchange { order: [], dist: Single } └─BatchFilter { predicate: (Row(t.v1, t.v2, t.v1) > '(1,2,3)':Struct(StructType { field_names: [], field_types: [Int32, Int32, Int32] })) } - └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], scan_ranges: [(t.v1, t.v2) > (Int32(1), Int32(2))], distribution: UpstreamHashShard(t.v1, t.v2, t.v3) } + └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], scan_ranges: [(t.v1, t.v2) >= (Int32(1), Int32(2))], distribution: UpstreamHashShard(t.v1, t.v2, t.v3) } +- sql: | + create table t1(v1 int, v2 int, v3 int); + create materialized view mv1 as select * from t1 order by v1 asc, v2 asc, v3 desc; + select * from mv1 where (v1,v2,v3) > (1,3,1); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: (Row(mv1.v1, mv1.v2, mv1.v3) > '(1,3,1)':Struct(StructType { field_names: [], field_types: [Int32, Int32, Int32] })) } + └─BatchScan { table: mv1, columns: [mv1.v1, mv1.v2, mv1.v3], scan_ranges: [(mv1.v1, mv1.v2) >= (Int32(1), Int32(3))], distribution: SomeShard } diff --git a/src/frontend/src/utils/condition.rs b/src/frontend/src/utils/condition.rs index e497ba821864a..c3f8327f39c2e 100644 --- a/src/frontend/src/utils/condition.rs +++ b/src/frontend/src/utils/condition.rs @@ -415,28 +415,43 @@ impl Condition { || matches!(func_type, ExprType::GreaterThan)) { let mut pk_struct = vec![]; + let mut order_type = None; let mut all_added = true; let mut iter = row_left_inputs.iter().zip_eq_fast(right_iter); - for i in 0..table_desc.order_column_indices().len() { + for column_order in &table_desc.pk { if let Some((left_expr, right_expr)) = iter.next() { - if left_expr.as_input_ref().unwrap().index != i { + if left_expr.as_input_ref().unwrap().index != column_order.column_index { all_added = false; break; } + match order_type { + Some(o) => { + if o != column_order.order_type { + all_added = false; + break; + } + } + None => order_type = Some(column_order.order_type), + } pk_struct.push(right_expr.clone()); } } + // Here it is necessary to determine whether all of row is included in the `ScanRanges`, if so, the data for eq is not needed if !pk_struct.is_empty() { - let scan_range = ScanRange { - eq_conds: vec![], - range: match func_type { - ExprType::GreaterThan => (Bound::Excluded(pk_struct), Bound::Unbounded), - ExprType::LessThan => (Bound::Unbounded, Bound::Excluded(pk_struct)), - _ => unreachable!(), - }, - }; if !all_added { + let scan_range = ScanRange { + eq_conds: vec![], + range: match func_type { + ExprType::GreaterThan => { + (Bound::Included(pk_struct), Bound::Unbounded) + } + ExprType::LessThan => { + (Bound::Unbounded, Bound::Included(pk_struct)) + } + _ => unreachable!(), + }, + }; return Ok(Some(( vec![scan_range], Condition { @@ -444,6 +459,18 @@ impl Condition { }, ))); } else { + let scan_range = ScanRange { + eq_conds: vec![], + range: match func_type { + ExprType::GreaterThan => { + (Bound::Excluded(pk_struct), Bound::Unbounded) + } + ExprType::LessThan => { + (Bound::Unbounded, Bound::Excluded(pk_struct)) + } + _ => unreachable!(), + }, + }; return Ok(Some(( vec![scan_range], Condition { From e70eb591f6f505bf99a18d116b3b4373f406f424 Mon Sep 17 00:00:00 2001 From: xxchan Date: Mon, 23 Dec 2024 21:11:42 +0800 Subject: [PATCH 2/9] build: prune unused dependencies (#19885) Signed-off-by: xxchan --- Cargo.lock | 166 +----------------- Cargo.toml | 19 +- Makefile.toml | 2 +- ci/scripts/common.sh | 4 +- .../feature-store/server/Cargo.toml | 4 - .../feature-store/simulator/Cargo.toml | 2 - src/batch/Cargo.toml | 8 - src/batch/executors/Cargo.toml | 6 - src/cmd/Cargo.toml | 1 - src/cmd_all/Cargo.toml | 6 +- src/common/Cargo.toml | 9 +- src/common/common_service/Cargo.toml | 6 - src/common/estimate_size/Cargo.toml | 7 - src/common/heap_profiling/Cargo.toml | 7 - src/common/metrics/Cargo.toml | 6 - src/common/proc_macro/Cargo.toml | 6 +- src/common/secret/Cargo.toml | 5 - src/common/telemetry_event/Cargo.toml | 8 - src/compute/Cargo.toml | 8 - src/connector/Cargo.toml | 16 +- src/connector/codec/Cargo.toml | 6 - src/connector/with_options/Cargo.toml | 7 - src/ctl/Cargo.toml | 8 - src/dml/Cargo.toml | 12 -- src/error/Cargo.toml | 1 - src/expr/core/Cargo.toml | 7 - src/expr/impl/Cargo.toml | 9 +- src/frontend/Cargo.toml | 8 - src/frontend/planner_test/Cargo.toml | 6 - src/java_binding/Cargo.toml | 6 - src/jni_core/Cargo.toml | 13 -- src/license/Cargo.toml | 6 - src/meta/Cargo.toml | 15 -- src/meta/model/Cargo.toml | 9 +- src/meta/model/migration/Cargo.toml | 6 - src/meta/node/Cargo.toml | 12 -- src/meta/service/Cargo.toml | 9 - src/object_store/Cargo.toml | 9 +- src/prost/Cargo.toml | 6 - src/prost/helpers/Cargo.toml | 6 - src/risedevtool/Cargo.toml | 7 - src/rpc_client/Cargo.toml | 10 -- src/sqlparser/Cargo.toml | 6 - src/sqlparser/fuzz/Cargo.toml | 6 +- src/sqlparser/sqlparser_bench/Cargo.toml | 6 +- src/storage/Cargo.toml | 9 - src/storage/backup/Cargo.toml | 7 - src/storage/compactor/Cargo.toml | 7 - src/storage/hummock_sdk/Cargo.toml | 9 - src/storage/hummock_test/Cargo.toml | 11 +- src/stream/Cargo.toml | 17 +- src/stream/spill_test/Cargo.toml | 6 - src/test_runner/Cargo.toml | 6 - src/tests/compaction_test/Cargo.toml | 13 -- src/tests/e2e_extended_mode/Cargo.toml | 6 - src/tests/libpq_test/Cargo.toml | 1 - src/tests/regress/Cargo.toml | 6 - src/tests/simulation/Cargo.toml | 9 +- src/tests/sqlsmith/Cargo.toml | 7 - src/tests/state_cleaning_test/Cargo.toml | 7 - src/utils/delta_btree_map/Cargo.toml | 6 - src/utils/futures_util/Cargo.toml | 6 - src/utils/iter_util/Cargo.toml | 6 - src/utils/local_stats_alloc/Cargo.toml | 6 - src/utils/pgwire/Cargo.toml | 6 - src/utils/resource_util/Cargo.toml | 6 - src/utils/runtime/Cargo.toml | 6 - src/utils/sync-point/Cargo.toml | 6 - src/utils/variables/Cargo.toml | 6 - 69 files changed, 44 insertions(+), 623 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d2cb5a29b3284..7e770880971b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1331,22 +1331,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "aws-http" -version = "0.60.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e4199d5d62ab09be6a64650c06cc5c4aa45806fed4c74bc4a5c8eaf039a6fa" -dependencies = [ - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "http 0.2.9", - "http-body 0.4.5", - "pin-project-lite", - "tracing", -] - [[package]] name = "aws-lc-rs" version = "1.6.2" @@ -3506,19 +3490,6 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" -[[package]] -name = "dashmap" -version = "5.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" -dependencies = [ - "cfg-if", - "hashbrown 0.14.5", - "lock_api", - "once_cell", - "parking_lot_core 0.9.8", -] - [[package]] name = "dashmap" version = "6.1.0" @@ -3561,7 +3532,7 @@ dependencies = [ "bytes", "bzip2", "chrono", - "dashmap 6.1.0", + "dashmap", "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", @@ -3654,7 +3625,7 @@ checksum = "799e70968c815b611116951e3dd876aef04bf217da31b72eec01ee6a959336a1" dependencies = [ "arrow 52.2.0", "chrono", - "dashmap 6.1.0", + "dashmap", "datafusion-common", "datafusion-expr", "futures", @@ -4050,7 +4021,7 @@ dependencies = [ "bytes", "cfg-if", "chrono", - "dashmap 6.1.0", + "dashmap", "datafusion", "datafusion-common", "datafusion-expr", @@ -5346,21 +5317,6 @@ dependencies = [ "libc", ] -[[package]] -name = "function_name" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1ab577a896d09940b5fe12ec5ae71f9d8211fff62c919c03a3750a9901e98a7" -dependencies = [ - "function_name-proc-macro", -] - -[[package]] -name = "function_name-proc-macro" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "673464e1e314dd67a0fd9544abc99e8eb28d0c7e3b69b033bcff9b2d00b87333" - [[package]] name = "funty" version = "2.0.0" @@ -5795,14 +5751,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68a7f542ee6b35af73b06abc0dad1c1bae89964e4e253bc4b587b91c9637867b" dependencies = [ "cfg-if", - "dashmap 5.5.3", "futures", "futures-timer", "no-std-compat", "nonzero_ext", "parking_lot 0.12.1", "portable-atomic", - "rand", "smallvec", "spinning_top", ] @@ -10455,7 +10409,6 @@ dependencies = [ "regex", "reqwest 0.12.4", "serde", - "serde_json", "serde_with 3.8.1", "serde_yaml", "sqlx", @@ -10499,7 +10452,6 @@ version = "2.3.0-alpha" dependencies = [ "anyhow", "async-trait", - "bincode 1.3.3", "bytes", "itertools 0.13.0", "parking_lot 0.12.1", @@ -10522,7 +10474,6 @@ dependencies = [ "anyhow", "async-recursion", "async-trait", - "criterion", "either", "futures", "futures-async-stream", @@ -10552,7 +10503,6 @@ dependencies = [ "tempfile", "thiserror 1.0.63", "thiserror-ext", - "tikv-jemallocator", "tokio-postgres", "tokio-stream 0.1.15", "tracing", @@ -10654,7 +10604,6 @@ version = "2.3.0-alpha" dependencies = [ "clap", "madsim-tokio", - "prometheus", "risingwave_batch_executors", "risingwave_common", "risingwave_compactor", @@ -10673,14 +10622,12 @@ dependencies = [ name = "risingwave_cmd_all" version = "2.3.0-alpha" dependencies = [ - "anyhow", "clap", "console", "const-str", "expect-test", "home", "madsim-tokio", - "prometheus", "risingwave_batch_executors", "risingwave_cmd", "risingwave_common", @@ -10748,7 +10695,6 @@ dependencies = [ "http 1.1.0", "http-body 0.4.5", "humantime", - "hytra", "itertools 0.13.0", "itoa", "jsonbb", @@ -10829,7 +10775,6 @@ dependencies = [ "ethnum", "fixedbitset 0.5.0", "jsonbb", - "lru 0.7.6", "risingwave_common_proc_macro", "rust_decimal", "serde_json", @@ -10839,7 +10784,6 @@ dependencies = [ name = "risingwave_common_heap_profiling" version = "2.3.0-alpha" dependencies = [ - "anyhow", "chrono", "madsim-tokio", "parking_lot 0.12.1", @@ -10945,21 +10889,14 @@ name = "risingwave_compaction_test" version = "2.3.0-alpha" dependencies = [ "anyhow", - "async-trait", "bytes", "clap", "foyer", - "futures", "madsim-tokio", - "prometheus", - "rand", "risingwave_common", "risingwave_compactor", "risingwave_hummock_sdk", - "risingwave_hummock_test", - "risingwave_meta", "risingwave_meta_node", - "risingwave_object_store", "risingwave_pb", "risingwave_rpc_client", "risingwave_rt", @@ -10978,7 +10915,6 @@ dependencies = [ "jsonbb", "madsim-tokio", "madsim-tonic", - "parking_lot 0.12.1", "prost 0.13.1", "risingwave_common", "risingwave_common_heap_profiling", @@ -11006,7 +10942,6 @@ dependencies = [ "futures", "futures-async-stream", "http 1.1.0", - "hyper 1.4.1", "itertools 0.13.0", "madsim-tokio", "madsim-tonic", @@ -11036,7 +10971,6 @@ dependencies = [ "tokio-stream 0.1.15", "tower 0.5.0", "tracing", - "uuid", "workspace-hack", ] @@ -11090,7 +11024,6 @@ dependencies = [ "google-cloud-googleapis", "google-cloud-pubsub", "governor", - "http 0.2.9", "iceberg", "iceberg-catalog-glue", "iceberg-catalog-rest", @@ -11098,8 +11031,6 @@ dependencies = [ "indexmap 2.7.0", "itertools 0.13.0", "jni", - "jsonbb", - "jsonwebtoken", "madsim-rdkafka", "madsim-tokio", "madsim-tonic", @@ -11134,7 +11065,6 @@ dependencies = [ "risingwave_common_estimate_size", "risingwave_connector_codec", "risingwave_jni_core", - "risingwave_object_store", "risingwave_pb", "risingwave_rpc_client", "rumqttc", @@ -11225,7 +11155,6 @@ dependencies = [ "itertools 0.13.0", "madsim-tokio", "madsim-tonic", - "memcomparable", "prost 0.13.1", "regex", "risingwave_common", @@ -11234,7 +11163,6 @@ dependencies = [ "risingwave_hummock_sdk", "risingwave_meta", "risingwave_meta_model", - "risingwave_meta_model_migration", "risingwave_object_store", "risingwave_pb", "risingwave_rpc_client", @@ -11256,21 +11184,15 @@ name = "risingwave_dml" version = "2.3.0-alpha" dependencies = [ "assert_matches", - "criterion", "futures", "futures-async-stream", "itertools 0.13.0", "madsim-tokio", "parking_lot 0.12.1", "paste", - "rand", "risingwave_common", - "risingwave_connector", - "risingwave_pb", - "rw_futures_util", "tempfile", "thiserror 1.0.63", - "thiserror-ext", "tracing", "workspace-hack", ] @@ -11296,7 +11218,6 @@ version = "2.3.0-alpha" dependencies = [ "anyhow", "bincode 1.3.3", - "bytes", "easy-ext", "madsim-tonic", "serde", @@ -11314,7 +11235,6 @@ dependencies = [ "async-trait", "auto_impl", "await-tree", - "cfg-or-panic", "chrono", "const-currying", "downcast-rs", @@ -11476,7 +11396,6 @@ dependencies = [ "risingwave_expr_impl", "risingwave_frontend_macro", "risingwave_hummock_sdk", - "risingwave_object_store", "risingwave_pb", "risingwave_rpc_client", "risingwave_sqlparser", @@ -11500,7 +11419,6 @@ dependencies = [ "url", "uuid", "workspace-hack", - "zstd 0.13.0", ] [[package]] @@ -11517,16 +11435,13 @@ name = "risingwave_hummock_sdk" version = "2.3.0-alpha" dependencies = [ "bytes", - "easy-ext", "hex", "itertools 0.13.0", "parse-display", - "prost 0.13.1", "risingwave_common", "risingwave_common_estimate_size", "risingwave_pb", "serde", - "serde_bytes", "tracing", "workspace-hack", ] @@ -11558,7 +11473,6 @@ dependencies = [ "risingwave_rpc_client", "risingwave_storage", "risingwave_test_runner", - "serde", "serial_test", "sync-point", "workspace-hack", @@ -11621,21 +11535,14 @@ dependencies = [ "cfg-or-panic", "chrono", "expect-test", - "foyer", "fs-err", "futures", - "itertools 0.13.0", "jni", "madsim-tokio", "paste", "prost 0.13.1", "risingwave_common", - "risingwave_expr", - "risingwave_hummock_sdk", "risingwave_pb", - "rw_futures_util", - "serde", - "serde_json", "thiserror 1.0.63", "thiserror-ext", "tracing", @@ -11660,15 +11567,10 @@ dependencies = [ name = "risingwave_mem_table_spill_test" version = "2.3.0-alpha" dependencies = [ - "async-trait", - "bytes", - "futures", - "futures-async-stream", "madsim-tokio", "risingwave_common", "risingwave_hummock_sdk", "risingwave_hummock_test", - "risingwave_storage", "risingwave_stream", ] @@ -11680,10 +11582,8 @@ dependencies = [ "arc-swap", "assert_matches", "async-trait", - "aws-config", "axum", "base64-url", - "bincode 1.3.3", "bytes", "chrono", "clap", @@ -11695,8 +11595,6 @@ dependencies = [ "enum-as-inner 0.6.0", "expect-test", "fail", - "flate2", - "function_name", "futures", "hex", "http 1.1.0", @@ -11705,9 +11603,6 @@ dependencies = [ "madsim-tokio", "madsim-tonic", "maplit", - "memcomparable", - "mime_guess", - "moka", "notify", "num-integer", "num-traits", @@ -11731,7 +11626,6 @@ dependencies = [ "risingwave_rpc_client", "risingwave_sqlparser", "risingwave_test_runner", - "rw-aws-sdk-ec2", "rw_futures_util", "scopeguard", "sea-orm", @@ -11748,7 +11642,6 @@ dependencies = [ "tower-http", "tracing", "tracing-subscriber", - "url", "uuid", "workspace-hack", ] @@ -11780,7 +11673,6 @@ version = "2.3.0-alpha" dependencies = [ "prost 0.13.1", "risingwave_common", - "risingwave_hummock_sdk", "risingwave_pb", "sea-orm", "serde", @@ -11804,13 +11696,9 @@ dependencies = [ name = "risingwave_meta_node" version = "2.3.0-alpha" dependencies = [ - "anyhow", "clap", "educe", - "either", - "futures", "hex", - "itertools 0.13.0", "madsim-tokio", "madsim-tonic", "otlp-embedded", @@ -11821,12 +11709,10 @@ dependencies = [ "risingwave_common_heap_profiling", "risingwave_common_service", "risingwave_meta", - "risingwave_meta_model_migration", "risingwave_meta_service", "risingwave_pb", "risingwave_rpc_client", "sea-orm", - "serde", "serde_json", "thiserror-ext", "tracing", @@ -11839,12 +11725,10 @@ version = "2.3.0-alpha" dependencies = [ "anyhow", "async-trait", - "either", "futures", "itertools 0.13.0", "madsim-tokio", "madsim-tonic", - "prost 0.13.1", "rand", "regex", "risingwave_common", @@ -11855,7 +11739,6 @@ dependencies = [ "risingwave_pb", "sea-orm", "serde_json", - "sync-point", "thiserror-ext", "tokio-stream 0.1.15", "tracing", @@ -11876,12 +11759,10 @@ dependencies = [ "aws-smithy-types", "bytes", "crc32fast", - "either", "fail", "futures", "hyper 0.14.27", "hyper-rustls 0.24.2", - "hyper-tls 0.5.0", "itertools 0.13.0", "madsim", "madsim-aws-sdk-s3", @@ -11964,10 +11845,7 @@ dependencies = [ "easy-ext", "either", "futures", - "h2 0.4.7", "http 1.1.0", - "hyper 1.4.1", - "itertools 0.13.0", "lru 0.7.6", "madsim-tokio", "madsim-tonic", @@ -11986,7 +11864,6 @@ dependencies = [ "tokio-stream 0.1.15", "tower 0.5.0", "tracing", - "url", "workspace-hack", ] @@ -12038,14 +11915,12 @@ dependencies = [ "itertools 0.13.0", "lru 0.7.6", "madsim", - "madsim-aws-sdk-s3", "madsim-rdkafka", "madsim-tokio", "maplit", "paste", "pin-project", "pretty_assertions", - "prometheus", "rand", "rand_chacha", "risingwave_batch_executors", @@ -12061,7 +11936,6 @@ dependencies = [ "risingwave_meta_node", "risingwave_object_store", "risingwave_pb", - "risingwave_rpc_client", "risingwave_sqlparser", "risingwave_sqlsmith", "serde", @@ -12071,7 +11945,6 @@ dependencies = [ "tempfile", "tikv-jemallocator", "tokio-postgres", - "tokio-stream 0.1.15", "tracing", "tracing-subscriber", "uuid", @@ -12116,7 +11989,6 @@ dependencies = [ "risingwave_expr", "risingwave_expr_impl", "risingwave_frontend", - "risingwave_pb", "risingwave_sqlparser", "similar", "thiserror-ext", @@ -12134,7 +12006,6 @@ dependencies = [ "clap", "futures", "madsim-tokio", - "prometheus", "regex", "risingwave_rt", "serde", @@ -12150,7 +12021,6 @@ dependencies = [ name = "risingwave_storage" version = "2.3.0-alpha" dependencies = [ - "ahash 0.8.11", "anyhow", "arc-swap", "async-trait", @@ -12159,9 +12029,7 @@ dependencies = [ "bincode 1.3.3", "bytes", "criterion", - "crossbeam", "darwin-libproc", - "dashmap 6.1.0", "dyn-clone", "either", "enum-as-inner 0.6.0", @@ -12276,7 +12144,6 @@ dependencies = [ "serde_yaml", "smallvec", "static_assertions", - "strum 0.26.3", "strum_macros 0.26.4", "thiserror 1.0.63", "thiserror-ext", @@ -12298,10 +12165,8 @@ dependencies = [ "prost 0.13.1", "reqwest 0.12.4", "risingwave_pb", - "thiserror 1.0.63", "thiserror-ext", "tracing", - "uuid", ] [[package]] @@ -12725,30 +12590,6 @@ dependencies = [ "wait-timeout", ] -[[package]] -name = "rw-aws-sdk-ec2" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80dba3602b267a7f9dcc546ccbf1d05752447773146253c7e344e2a320630b7f" -dependencies = [ - "aws-credential-types", - "aws-http", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", - "aws-smithy-query", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-smithy-xml", - "aws-types", - "fastrand", - "http 0.2.9", - "regex", - "tracing", -] - [[package]] name = "rw_futures_util" version = "0.0.0" @@ -16563,7 +16404,6 @@ dependencies = [ name = "with_options" version = "2.3.0-alpha" dependencies = [ - "proc-macro2", "quote", "syn 2.0.87", ] diff --git a/Cargo.toml b/Cargo.toml index 4a86290bbc128..4e7cc205e15ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,6 +80,19 @@ keywords = ["sql", "database", "streaming"] license = "Apache-2.0" repository = "https://github.com/risingwavelabs/risingwave" +# some dependencies are not worth deleting. +[workspace.metadata.cargo-machete] +ignored = [ + "workspace-hack", + "expect-test", + "pretty_assertions", + "serde", + "serde_json", +] +[workspace.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] +development = ["expect-test", "pretty_assertions"] + [workspace.dependencies] foyer = { version = "0.13.1", features = ["tracing", "nightly", "prometheus"] } apache-avro = { git = "https://github.com/risingwavelabs/avro", rev = "25113ba88234a9ae23296e981d8302c290fdaa4b", features = [ @@ -107,11 +120,6 @@ aws-sdk-s3 = { version = "1", default-features = false, features = [ "rt-tokio", "rustls", ] } -# To bump the version of aws-sdk-ec2, check the README of https://github.com/risingwavelabs/rw-aws-sdk-ec2 -aws-sdk-ec2 = { package = "rw-aws-sdk-ec2", version = "1", default-features = false, features = [ - "rt-tokio", - "rustls", -] } aws-sdk-sqs = { version = "1", default-features = false, features = [ "rt-tokio", "rustls", @@ -204,6 +212,7 @@ tokio-stream = { git = "https://github.com/madsim-rs/tokio.git", rev = "0dd1055" tokio-util = "0.7" tracing-opentelemetry = "0.25" rand = { version = "0.8", features = ["small_rng"] } +governor = { version = "0.6", default-features = false, features = ["std"] } risingwave_backup = { path = "./src/storage/backup" } risingwave_batch = { path = "./src/batch" } risingwave_batch_executors = { path = "./src/batch/executors" } diff --git a/Makefile.toml b/Makefile.toml index 89814dbe36ae5..6c15c231e2c73 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -1208,7 +1208,7 @@ script = """ echo "Running $(tput setaf 4)cargo udeps$(tput sgr0) checks" -cargo udeps --workspace --all-targets ${RISINGWAVE_FEATURE_FLAGS} --exclude workspace-hack --exclude risingwave_bench --exclude risingwave_udf --exclude risingwave_simulation +cargo udeps --workspace --all-targets ${RISINGWAVE_FEATURE_FLAGS} --exclude workspace-hack --exclude risingwave_bench --exclude risingwave_simulation """ [tasks.check-trailing-spaces] diff --git a/ci/scripts/common.sh b/ci/scripts/common.sh index b3e09bd607b28..4c43da1fcaeaa 100755 --- a/ci/scripts/common.sh +++ b/ci/scripts/common.sh @@ -21,7 +21,9 @@ export RUST_MIN_STACK=4194304 unset LANG function dump_diagnose_info() { - ./risedev diagnose || true + if [ -f .risingwave/config/risedev-env ]; then + ./risedev diagnose || true + fi } trap dump_diagnose_info EXIT diff --git a/integration_tests/feature-store/server/Cargo.toml b/integration_tests/feature-store/server/Cargo.toml index 26e300cdf2f6d..8e13d5374d7bf 100644 --- a/integration_tests/feature-store/server/Cargo.toml +++ b/integration_tests/feature-store/server/Cargo.toml @@ -9,16 +9,12 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -sqlx = { version = "0.8", features = ["runtime-tokio-native-tls", "postgres"] } tokio = { version = "1", features = ["full"] } tonic = "0.11.0" -reqwest = { version = "0.11", features = ["blocking"] } rdkafka = { version = "0.34", features = ["cmake-build"] } -serde_json = "1.0" prost = "0.12" clap = "4.4.6" tokio-postgres = "0.7.10" -tonic-build = "0.11.0" [build-dependencies] tonic-build = "0.11.0" diff --git a/integration_tests/feature-store/simulator/Cargo.toml b/integration_tests/feature-store/simulator/Cargo.toml index e7b53c9047de2..0591ddf5b3137 100644 --- a/integration_tests/feature-store/simulator/Cargo.toml +++ b/integration_tests/feature-store/simulator/Cargo.toml @@ -11,12 +11,10 @@ edition = "2021" [dependencies] tokio = { version = "1", features = ["macros", "rt", "rt-multi-thread"] } tonic = "0.11.0" -reqwest = { version = "0.11" } serde_json = "1.0" serde_derive = "1.0" rand = { workspace = true } clap = "4.4.6" prost = "0.12" -serde = { version = "1", features = ["derive"] } futures = "0.3.28" csv = "1.3.0" diff --git a/src/batch/Cargo.toml b/src/batch/Cargo.toml index c501f2df4962e..56491055cfacd 100644 --- a/src/batch/Cargo.toml +++ b/src/batch/Cargo.toml @@ -7,12 +7,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" async-recursion = "1" @@ -62,11 +56,9 @@ twox-hash = "2" workspace-hack = { path = "../workspace-hack" } [dev-dependencies] -criterion = { workspace = true, features = ["async_tokio", "async"] } rand = { workspace = true } risingwave_hummock_sdk = { workspace = true } tempfile = "3" -tikv-jemallocator = { workspace = true } [lints] workspace = true diff --git a/src/batch/executors/Cargo.toml b/src/batch/executors/Cargo.toml index 927c026e83d8e..736c54f8a5b0e 100644 --- a/src/batch/executors/Cargo.toml +++ b/src/batch/executors/Cargo.toml @@ -7,12 +7,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" assert_matches = "1" diff --git a/src/cmd/Cargo.toml b/src/cmd/Cargo.toml index 5ddc6112bd4ab..097f9fd881a48 100644 --- a/src/cmd/Cargo.toml +++ b/src/cmd/Cargo.toml @@ -21,7 +21,6 @@ normal = ["workspace-hack", "workspace-config", "task_stats_alloc"] [dependencies] clap = { workspace = true } -prometheus = { version = "0.13" } risingwave_batch_executors = { workspace = true } risingwave_common = { workspace = true } risingwave_compactor = { workspace = true } diff --git a/src/cmd_all/Cargo.toml b/src/cmd_all/Cargo.toml index 2269ca1010481..0e74a20a4a10e 100644 --- a/src/cmd_all/Cargo.toml +++ b/src/cmd_all/Cargo.toml @@ -19,18 +19,16 @@ js-udf = ["risingwave_expr_impl/js-udf"] python-udf = ["risingwave_expr_impl/python-udf"] [package.metadata.cargo-machete] -ignored = ["workspace-hack", "workspace-config", "task_stats_alloc"] +ignored = ["workspace-hack", "workspace-config", "task_stats_alloc", "tikv-jemallocator"] [package.metadata.cargo-udeps.ignore] -ignored = ["workspace-hack", "workspace-config", "task_stats_alloc"] +ignored = ["workspace-hack", "workspace-config", "task_stats_alloc", "tikv-jemallocator"] [dependencies] -anyhow = "1" clap = { workspace = true } console = "0.15" const-str = "0.5" home = "0.5" -prometheus = { version = "0.13" } risingwave_batch_executors = { workspace = true } risingwave_cmd = { workspace = true } risingwave_common = { workspace = true } diff --git a/src/common/Cargo.toml b/src/common/Cargo.toml index 8a554137dccdd..1fbf8e312185d 100644 --- a/src/common/Cargo.toml +++ b/src/common/Cargo.toml @@ -7,12 +7,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] ahash = "0.8" anyhow = "1" @@ -48,12 +42,11 @@ ethnum = { version = "1", features = ["serde"] } fixedbitset = "0.5" foyer = { workspace = true } futures = { version = "0.3", default-features = false, features = ["alloc"] } -governor = { version = "0.6", default-features = false, features = ["std"] } +governor = { workspace = true } hashbrown = "0.14" hex = "0.4.3" http = "1" humantime = "2.1" -hytra = { workspace = true } itertools = { workspace = true } itoa = "1.0" jsonbb = { workspace = true } diff --git a/src/common/common_service/Cargo.toml b/src/common/common_service/Cargo.toml index 37775ff04a82b..38dd8f4534477 100644 --- a/src/common/common_service/Cargo.toml +++ b/src/common/common_service/Cargo.toml @@ -8,12 +8,6 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] async-trait = "0.1" axum = { workspace = true } diff --git a/src/common/estimate_size/Cargo.toml b/src/common/estimate_size/Cargo.toml index 77e4203f9c7cb..c6447ff974216 100644 --- a/src/common/estimate_size/Cargo.toml +++ b/src/common/estimate_size/Cargo.toml @@ -8,19 +8,12 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] bytes = "1" educe = "0.6" ethnum = { version = "1", features = ["serde"] } fixedbitset = "0.5" jsonbb = { workspace = true } -lru = { workspace = true } risingwave_common_proc_macro = { workspace = true } rust_decimal = "1" serde_json = "1" diff --git a/src/common/heap_profiling/Cargo.toml b/src/common/heap_profiling/Cargo.toml index 706f801fe4762..edb0d11e2fd0b 100644 --- a/src/common/heap_profiling/Cargo.toml +++ b/src/common/heap_profiling/Cargo.toml @@ -8,14 +8,7 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] -anyhow = "1" chrono = { version = "0.4", default-features = false, features = [ "clock", "std", diff --git a/src/common/metrics/Cargo.toml b/src/common/metrics/Cargo.toml index 78562624c6935..b499508f177d9 100644 --- a/src/common/metrics/Cargo.toml +++ b/src/common/metrics/Cargo.toml @@ -8,12 +8,6 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] auto_impl = "1" bytes = "1" diff --git a/src/common/proc_macro/Cargo.toml b/src/common/proc_macro/Cargo.toml index 2219dc2efaba4..d44951a4aec56 100644 --- a/src/common/proc_macro/Cargo.toml +++ b/src/common/proc_macro/Cargo.toml @@ -10,11 +10,9 @@ repository = { workspace = true } [lib] proc-macro = true -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] + + [dependencies] proc-macro-error = "1.0" diff --git a/src/common/secret/Cargo.toml b/src/common/secret/Cargo.toml index 6d501594acbde..ebd042b23a9b0 100644 --- a/src/common/secret/Cargo.toml +++ b/src/common/secret/Cargo.toml @@ -7,11 +7,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] [dependencies] aes-gcm = "0.10" diff --git a/src/common/telemetry_event/Cargo.toml b/src/common/telemetry_event/Cargo.toml index 6007f1cdc3314..ab3826b9504b0 100644 --- a/src/common/telemetry_event/Cargo.toml +++ b/src/common/telemetry_event/Cargo.toml @@ -7,18 +7,11 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] jsonbb = { workspace = true } prost = { workspace = true } reqwest = { version = "0.12.2", features = ["json"] } risingwave_pb = { workspace = true } -thiserror = "1" thiserror-ext = { workspace = true } tokio = { version = "0.2", package = "madsim-tokio", features = [ "rt", @@ -29,4 +22,3 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [ "signal", ] } tracing = "0.1" -uuid = { version = "1", features = ["v4"] } diff --git a/src/compute/Cargo.toml b/src/compute/Cargo.toml index ef66755e3e5be..d6a14aa04cd9e 100644 --- a/src/compute/Cargo.toml +++ b/src/compute/Cargo.toml @@ -7,12 +7,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" async-trait = "0.1" @@ -24,7 +18,6 @@ foyer = { workspace = true } futures = { version = "0.3", default-features = false, features = ["alloc"] } futures-async-stream = { workspace = true } http = "1" -hyper = "1" itertools = { workspace = true } maplit = "1.0.2" pprof = { version = "0.14", features = ["flamegraph"] } @@ -60,7 +53,6 @@ tokio-stream = { workspace = true } tonic = { workspace = true } tower = { version = "0.5", features = ["util", "load-shed"] } tracing = "0.1" -uuid = { version = "1.11.0", features = ["v4"] } [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../workspace-hack" } diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml index 3f12926c9edac..b1ab8b9a8a72c 100644 --- a/src/connector/Cargo.toml +++ b/src/connector/Cargo.toml @@ -7,12 +7,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" apache-avro = { workspace = true } @@ -61,12 +55,7 @@ google-cloud-bigquery = { version = "0.13.0", features = ["auth"] } google-cloud-gax = "0.19.0" google-cloud-googleapis = { version = "0.15", features = ["pubsub", "bigquery"] } google-cloud-pubsub = "0.29" -governor = { version = "0.6", default-features = false, features = [ - "std", - "dashmap", - "jitter", -] } -http = "0.2" +governor = { workspace = true } iceberg = { workspace = true } iceberg-catalog-glue = { workspace = true } iceberg-catalog-rest = { workspace = true } @@ -74,8 +63,6 @@ icelake = { workspace = true } indexmap = { version = "2.7.0", features = ["serde"] } itertools = { workspace = true } jni = { version = "0.21.1", features = ["invocation"] } -jsonbb = { workspace = true } -jsonwebtoken = "9.2.0" maplit = "1.0.2" moka = { version = "0.12.8", features = ["future"] } mongodb = { version = "2.8.2", features = ["tokio-runtime"] } @@ -125,7 +112,6 @@ risingwave_common = { workspace = true } risingwave_common_estimate_size = { workspace = true } risingwave_connector_codec = { workspace = true } risingwave_jni_core = { workspace = true } -risingwave_object_store = { workspace = true } risingwave_pb = { workspace = true } risingwave_rpc_client = { workspace = true } rumqttc = { version = "0.24.0", features = ["url"] } diff --git a/src/connector/codec/Cargo.toml b/src/connector/codec/Cargo.toml index 9038c928dc3da..50b3a53887404 100644 --- a/src/connector/codec/Cargo.toml +++ b/src/connector/codec/Cargo.toml @@ -8,12 +8,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" apache-avro = { workspace = true } diff --git a/src/connector/with_options/Cargo.toml b/src/connector/with_options/Cargo.toml index 59dd4291e12b7..b8d1ecf510379 100644 --- a/src/connector/with_options/Cargo.toml +++ b/src/connector/with_options/Cargo.toml @@ -9,7 +9,6 @@ repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -proc-macro2 = "1" quote = "1" syn = "2" @@ -18,11 +17,5 @@ proc-macro = true [dev-dependencies] -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [lints] workspace = true diff --git a/src/ctl/Cargo.toml b/src/ctl/Cargo.toml index 7b0fc940de946..cdb8f5377c75e 100644 --- a/src/ctl/Cargo.toml +++ b/src/ctl/Cargo.toml @@ -7,12 +7,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" bytes = "1" @@ -24,7 +18,6 @@ futures = { version = "0.3", default-features = false, features = ["alloc"] } hex = "0.4" inquire = "0.7.0" itertools = { workspace = true } -memcomparable = "0.2" prost = { workspace = true } regex = "1.10.0" risingwave_common = { workspace = true } @@ -33,7 +26,6 @@ risingwave_frontend = { workspace = true } risingwave_hummock_sdk = { workspace = true } risingwave_meta = { workspace = true } risingwave_meta_model = { workspace = true } -risingwave_meta_model_migration = { workspace = true } risingwave_object_store = { workspace = true } risingwave_pb = { workspace = true } risingwave_rpc_client = { workspace = true } diff --git a/src/dml/Cargo.toml b/src/dml/Cargo.toml index df0ce4ebc19d6..1ad438db96259 100644 --- a/src/dml/Cargo.toml +++ b/src/dml/Cargo.toml @@ -7,24 +7,13 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] futures = { version = "0.3", default-features = false, features = ["alloc"] } futures-async-stream = { workspace = true } itertools = { workspace = true } parking_lot = { workspace = true } -rand = { workspace = true } risingwave_common = { workspace = true } -risingwave_connector = { workspace = true } -risingwave_pb = { workspace = true } -rw_futures_util = { workspace = true } thiserror = "1" -thiserror-ext = { workspace = true } tokio = { version = "0.2", package = "madsim-tokio", features = [ "rt", "rt-multi-thread", @@ -41,7 +30,6 @@ workspace-hack = { path = "../workspace-hack" } [dev-dependencies] assert_matches = "1" -criterion = { workspace = true, features = ["async_tokio"] } paste = "1" tempfile = "3" diff --git a/src/error/Cargo.toml b/src/error/Cargo.toml index 4a99711db6c41..8407197a88769 100644 --- a/src/error/Cargo.toml +++ b/src/error/Cargo.toml @@ -10,7 +10,6 @@ repository = { workspace = true } [dependencies] anyhow = "1" bincode = "1" -bytes = "1" easy-ext = "1" serde = "1" serde-error = "0.1" diff --git a/src/expr/core/Cargo.toml b/src/expr/core/Cargo.toml index 9851186260e5a..16db4ea8f2691 100644 --- a/src/expr/core/Cargo.toml +++ b/src/expr/core/Cargo.toml @@ -9,18 +9,11 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack", "ctor"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack", "ctor"] - [dependencies] anyhow = "1" async-trait = "0.1" auto_impl = "1" await-tree = { workspace = true } -cfg-or-panic = "0.2" chrono = { version = "0.4", default-features = false, features = [ "clock", "std", diff --git a/src/expr/impl/Cargo.toml b/src/expr/impl/Cargo.toml index ec6fa8afb9e99..30bd6f61d6b95 100644 --- a/src/expr/impl/Cargo.toml +++ b/src/expr/impl/Cargo.toml @@ -10,13 +10,10 @@ repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [package.metadata.cargo-machete] -ignored = ["workspace-hack", "ctor"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack", "ctor"] +ignored = ["chrono-tz", "futures-async-stream"] [features] -external-udf = ["arrow-udf-flight", "arrow-flight", "tonic"] +external-udf = ["arrow-udf-flight", "arrow-flight", "tonic", "ginepro"] js-udf = ["arrow-udf-js"] python-udf = ["arrow-udf-python"] wasm-udf = ["arrow-udf-wasm", "zstd"] @@ -42,7 +39,7 @@ educe = "0.6" fancy-regex = "0.14" futures-async-stream = { workspace = true } futures-util = "0.3" -ginepro = "0.8" +ginepro = { version = "0.8", optional = true } hex = "0.4" hmac = "0.12" iceberg = { workspace = true } diff --git a/src/frontend/Cargo.toml b/src/frontend/Cargo.toml index 3041a4320dd42..74fafdd6d7d97 100644 --- a/src/frontend/Cargo.toml +++ b/src/frontend/Cargo.toml @@ -8,12 +8,6 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" arc-swap = "1" @@ -71,7 +65,6 @@ risingwave_dml = { workspace = true } risingwave_expr = { workspace = true } risingwave_frontend_macro = { path = "macro" } risingwave_hummock_sdk = { workspace = true } -risingwave_object_store = { workspace = true } risingwave_pb = { workspace = true } risingwave_rpc_client = { workspace = true } risingwave_sqlparser = { workspace = true } @@ -108,7 +101,6 @@ tower-http = { version = "0.6", features = [ tracing = "0.1" url = "2.5.0" uuid = "1" -zstd = { version = "0.13", default-features = false } [target.'cfg(not(madsim))'.dependencies] workspace-hack = { path = "../workspace-hack" } diff --git a/src/frontend/planner_test/Cargo.toml b/src/frontend/planner_test/Cargo.toml index 49ad1876afff0..ee8d4af18655d 100644 --- a/src/frontend/planner_test/Cargo.toml +++ b/src/frontend/planner_test/Cargo.toml @@ -7,12 +7,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" expect-test = "1" diff --git a/src/java_binding/Cargo.toml b/src/java_binding/Cargo.toml index 0966b700a713f..36facf1790abd 100644 --- a/src/java_binding/Cargo.toml +++ b/src/java_binding/Cargo.toml @@ -3,12 +3,6 @@ name = "risingwave_java_binding" version = "0.1.0" edition = "2021" -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" bytes = "1" diff --git a/src/jni_core/Cargo.toml b/src/jni_core/Cargo.toml index 868c79e7427ec..d182e19045997 100644 --- a/src/jni_core/Cargo.toml +++ b/src/jni_core/Cargo.toml @@ -3,29 +3,18 @@ name = "risingwave_jni_core" version = "0.1.0" edition = "2021" -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" bytes = "1" cfg-or-panic = "0.2" chrono = { version = "0.4", default-features = false } -foyer ={ workspace = true } fs-err = "3" futures = { version = "0.3", default-features = false, features = ["alloc"] } -itertools = { workspace = true } jni = { version = "0.21.1", features = ["invocation"] } paste = "1" prost = { workspace = true } risingwave_common = { workspace = true } risingwave_pb = { workspace = true } -rw_futures_util = { workspace = true } -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" thiserror = "1" thiserror-ext = { workspace = true } tokio = { version = "0.2", package = "madsim-tokio", features = [ @@ -41,8 +30,6 @@ tracing = "0.1" [dev-dependencies] expect-test = "1" -risingwave_expr = { workspace = true } -risingwave_hummock_sdk = { workspace = true } [lints] workspace = true diff --git a/src/license/Cargo.toml b/src/license/Cargo.toml index b435747467e21..3c0c2e8f612ff 100644 --- a/src/license/Cargo.toml +++ b/src/license/Cargo.toml @@ -8,12 +8,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] jsonbb = { workspace = true } jsonwebtoken = "9" diff --git a/src/meta/Cargo.toml b/src/meta/Cargo.toml index 16a303609699b..582903c99a00a 100644 --- a/src/meta/Cargo.toml +++ b/src/meta/Cargo.toml @@ -7,21 +7,12 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" arc-swap = "1" assert_matches = "1" async-trait = "0.1" -aws-config = { workspace = true } -aws-sdk-ec2 = { workspace = true } base64-url = { version = "3.0.0" } -bincode = "1.3" bytes = { version = "1", features = ["serde"] } chrono = "0.4" clap = { workspace = true } @@ -32,17 +23,12 @@ educe = "0.6" either = "1" enum-as-inner = "0.6" fail = "0.5" -flate2 = "1" -function_name = "0.3.0" futures = { version = "0.3", default-features = false, features = ["alloc"] } hex = "0.4" http = "1" itertools = { workspace = true } jsonbb = { workspace = true } maplit = "1.0.2" -memcomparable = { version = "0.2" } -mime_guess = "2" -moka = { version = "0.12.3", features = ["future"] } notify = { version = "7", default-features = false, features = [ "macos_fsevent", ] } @@ -89,7 +75,6 @@ tokio-stream = { workspace = true } tonic = { workspace = true } tower = { version = "0.5", features = ["util", "load-shed"] } tracing = "0.1" -url = "2" uuid = { version = "1", features = ["v4"] } [target.'cfg(not(madsim))'.dependencies] diff --git a/src/meta/model/Cargo.toml b/src/meta/model/Cargo.toml index 991becc820642..530a67da3b9f5 100644 --- a/src/meta/model/Cargo.toml +++ b/src/meta/model/Cargo.toml @@ -7,17 +7,10 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] prost = { workspace = true } risingwave_common = { workspace = true } -risingwave_hummock_sdk = { workspace = true } risingwave_pb = { workspace = true } sea-orm = { workspace = true } serde = { version = "1.0.196", features = ["derive"] } -serde_json = "1.0.113" +serde_json = "1" diff --git a/src/meta/model/migration/Cargo.toml b/src/meta/model/migration/Cargo.toml index 9d9fbd5292d25..725e028cabdc7 100644 --- a/src/meta/model/migration/Cargo.toml +++ b/src/meta/model/migration/Cargo.toml @@ -7,12 +7,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] async-std = { version = "1", features = ["attributes", "tokio1"] } easy-ext = "1" diff --git a/src/meta/node/Cargo.toml b/src/meta/node/Cargo.toml index dc011842c3cd3..f3f13d546ec06 100644 --- a/src/meta/node/Cargo.toml +++ b/src/meta/node/Cargo.toml @@ -7,20 +7,10 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] -anyhow = "1" clap = { workspace = true } educe = "0.6" -either = "1" -futures = { version = "0.3", default-features = false, features = ["alloc"] } hex = "0.4" -itertools = { workspace = true } otlp-embedded = { workspace = true } prometheus-http-query = "0.8" redact = "0.1.5" @@ -29,12 +19,10 @@ risingwave_common = { workspace = true } risingwave_common_heap_profiling = { workspace = true } risingwave_common_service = { workspace = true } risingwave_meta = { workspace = true } -risingwave_meta_model_migration = { workspace = true } risingwave_meta_service = { workspace = true } risingwave_pb = { workspace = true } risingwave_rpc_client = { workspace = true } sea-orm = { workspace = true } -serde = { version = "1", features = ["derive"] } serde_json = "1" thiserror-ext = { workspace = true } tokio = { version = "0.2", package = "madsim-tokio", features = [ diff --git a/src/meta/service/Cargo.toml b/src/meta/service/Cargo.toml index 53c3708da0e12..b3fbe5e8bcb78 100644 --- a/src/meta/service/Cargo.toml +++ b/src/meta/service/Cargo.toml @@ -7,19 +7,11 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" async-trait = "0.1" -either = "1" futures = { version = "0.3", default-features = false, features = ["alloc"] } itertools = { workspace = true } -prost = { workspace = true } rand = { workspace = true } regex = "1" risingwave_common = { workspace = true } @@ -30,7 +22,6 @@ risingwave_meta_model = { workspace = true } risingwave_pb = { workspace = true } sea-orm = { workspace = true } serde_json = "1" -sync-point = { path = "../../utils/sync-point" } thiserror-ext = { workspace = true } tokio = { version = "0.2", package = "madsim-tokio", features = [ "rt", diff --git a/src/object_store/Cargo.toml b/src/object_store/Cargo.toml index 54bc6b4524f66..0e7d213a72321 100644 --- a/src/object_store/Cargo.toml +++ b/src/object_store/Cargo.toml @@ -15,6 +15,7 @@ workspace = true async-trait = "0.1" await-tree = { workspace = true } aws-config = { workspace = true } +# add the dependency explicitly to enable hardcoded-credentials feature aws-credential-types = { workspace = true } aws-sdk-s3 = { version = "0.5", package = "madsim-aws-sdk-s3" } aws-smithy-http = { workspace = true } @@ -23,12 +24,10 @@ aws-smithy-runtime-api = { workspace = true } aws-smithy-types = { workspace = true } bytes = { version = "1", features = ["serde"] } crc32fast = "1" -either = "1" fail = "0.5" futures = { version = "0.3", default-features = false, features = ["alloc"] } hyper = { version = "0.14", features = ["tcp", "client"] } # TODO(http-bump): required by aws sdk hyper-rustls = { version = "0.24.2", features = ["webpki-roots"] } -hyper-tls = "0.5.0" itertools = { workspace = true } madsim = "0.2.31" opendal = { workspace = true, features = [ @@ -56,11 +55,9 @@ tracing = "0.1" # This crate is excluded from hakari (see hakari.toml) after hdfs is introduced...## [target.'cfg(not(madsim))'.dependencies] # workspace-hack = { path = "../workspace-hack" } # -# [package.metadata.cargo-machete] -# ignored = ["workspace-hack"] # -# [package.metadata.cargo-udeps.ignore] -# normal = ["workspace-hack"] +# +# # [features] # hdfs-backend = ["opendal/services-hdfs", "dep:risingwave_jni_core"] diff --git a/src/prost/Cargo.toml b/src/prost/Cargo.toml index 230a654e0fcee..2918c41fcb89a 100644 --- a/src/prost/Cargo.toml +++ b/src/prost/Cargo.toml @@ -28,11 +28,5 @@ prost-build = { workspace = true } tonic-build = { workspace = true } walkdir = "2" -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [lints] workspace = true diff --git a/src/prost/helpers/Cargo.toml b/src/prost/helpers/Cargo.toml index c78ac3f2a8ece..177b1b97dadaa 100644 --- a/src/prost/helpers/Cargo.toml +++ b/src/prost/helpers/Cargo.toml @@ -11,11 +11,5 @@ proc-macro2 = { version = "1", default-features = false } quote = "1" syn = "2" -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [lints] workspace = true diff --git a/src/risedevtool/Cargo.toml b/src/risedevtool/Cargo.toml index 2c415d9f5da78..3334ea6bd4162 100644 --- a/src/risedevtool/Cargo.toml +++ b/src/risedevtool/Cargo.toml @@ -7,12 +7,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = { version = "1", features = ["backtrace"] } chrono = { version = "0.4", default-features = false, features = [ @@ -32,7 +26,6 @@ redis = "0.25" regex = "1" reqwest = { version = "0.12.2", features = ["blocking"] } serde = { version = "1", features = ["derive"] } -serde_json = "1" serde_with = "3" serde_yaml = "0.9" sqlx = { workspace = true, features = ["any"] } diff --git a/src/rpc_client/Cargo.toml b/src/rpc_client/Cargo.toml index 1da4027c5110c..d10a7d8d766d7 100644 --- a/src/rpc_client/Cargo.toml +++ b/src/rpc_client/Cargo.toml @@ -7,22 +7,13 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" async-trait = "0.1" easy-ext = "1" either = "1.13.0" futures = { version = "0.3", default-features = false, features = ["alloc"] } -h2 = "0.4.6" # https://github.com/risingwavelabs/risingwave/issues/18039 http = "1" -hyper = "1" -itertools = { workspace = true } lru = { workspace = true } moka = { version = "0.12.0", features = ["future"] } paste = "1" @@ -48,7 +39,6 @@ tokio-stream = { workspace = true } tonic = { workspace = true } tower = "0.5" tracing = "0.1" -url = "2.5.0" [dev-dependencies] risingwave_hummock_sdk = { workspace = true } diff --git a/src/sqlparser/Cargo.toml b/src/sqlparser/Cargo.toml index 7638d27208beb..18c1068122d34 100644 --- a/src/sqlparser/Cargo.toml +++ b/src/sqlparser/Cargo.toml @@ -15,12 +15,6 @@ path = "src/lib.rs" default = ["std"] std = [] -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] itertools = { workspace = true } serde = { version = "1.0", features = ["derive"], optional = true } diff --git a/src/sqlparser/fuzz/Cargo.toml b/src/sqlparser/fuzz/Cargo.toml index 24ebb8e6ba7fd..c031295f78fec 100644 --- a/src/sqlparser/fuzz/Cargo.toml +++ b/src/sqlparser/fuzz/Cargo.toml @@ -4,11 +4,9 @@ version = "0.1.0" edition = "2018" publish = false -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] + + [dependencies] honggfuzz = "0.5.54" diff --git a/src/sqlparser/sqlparser_bench/Cargo.toml b/src/sqlparser/sqlparser_bench/Cargo.toml index f28d7ef75e2a2..a654ad5a6fb94 100644 --- a/src/sqlparser/sqlparser_bench/Cargo.toml +++ b/src/sqlparser/sqlparser_bench/Cargo.toml @@ -4,11 +4,9 @@ version = "0.1.0" authors = ["Dandandan "] edition = "2018" -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] + + [dependencies] risingwave_sqlparser = { workspace = true } diff --git a/src/storage/Cargo.toml b/src/storage/Cargo.toml index a5ccd017bdd75..39dc955964105 100644 --- a/src/storage/Cargo.toml +++ b/src/storage/Cargo.toml @@ -7,22 +7,13 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] -ahash = "0.8.7" anyhow = "1" arc-swap = "1" async-trait = "0.1" auto_enums = { workspace = true } await-tree = { workspace = true } bytes = { version = "1", features = ["serde"] } -crossbeam = "0.8.2" -dashmap = { version = "6", default-features = false } dyn-clone = "1.0.14" either = "1" enum-as-inner = "0.6" diff --git a/src/storage/backup/Cargo.toml b/src/storage/backup/Cargo.toml index 23a2c99567c48..f9b88044eb69e 100644 --- a/src/storage/backup/Cargo.toml +++ b/src/storage/backup/Cargo.toml @@ -7,16 +7,9 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" async-trait = "0.1" -bincode = "1.3" bytes = { version = "1", features = ["serde"] } itertools = { workspace = true } parking_lot = { workspace = true } diff --git a/src/storage/compactor/Cargo.toml b/src/storage/compactor/Cargo.toml index ed8f421ee6384..d94da125ca3ec 100644 --- a/src/storage/compactor/Cargo.toml +++ b/src/storage/compactor/Cargo.toml @@ -8,18 +8,11 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] async-trait = "0.1" await-tree = { workspace = true } clap = { workspace = true } jsonbb = { workspace = true } -parking_lot = { workspace = true } prost = { workspace = true } risingwave_common = { workspace = true } risingwave_common_heap_profiling = { workspace = true } diff --git a/src/storage/hummock_sdk/Cargo.toml b/src/storage/hummock_sdk/Cargo.toml index 47042e659a35c..ec9f06fbdcd29 100644 --- a/src/storage/hummock_sdk/Cargo.toml +++ b/src/storage/hummock_sdk/Cargo.toml @@ -7,24 +7,15 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack", "num-traits"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] bytes = "1" -easy-ext = "1" hex = "0.4" itertools = { workspace = true } parse-display = "0.10" -prost = { workspace = true } risingwave_common = { workspace = true } risingwave_common_estimate_size = { workspace = true } risingwave_pb = { workspace = true } serde = { version = "1", features = ["derive"] } -serde_bytes = "0.11" tracing = "0.1" [target.'cfg(not(madsim))'.dependencies] diff --git a/src/storage/hummock_test/Cargo.toml b/src/storage/hummock_test/Cargo.toml index da96b5883462a..45a5ce62c5ba1 100644 --- a/src/storage/hummock_test/Cargo.toml +++ b/src/storage/hummock_test/Cargo.toml @@ -8,12 +8,6 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] async-trait = "0.1" bytes = { version = "1" } @@ -34,7 +28,7 @@ risingwave_object_store = { workspace = true } risingwave_pb = { workspace = true } risingwave_rpc_client = { workspace = true } risingwave_storage = { workspace = true, features = ["test"] } -serde = { version = "1", features = ["derive"] } +serial_test = { version = "3.2", optional = true } tokio = { version = "0.2", package = "madsim-tokio" } [target.'cfg(not(madsim))'.dependencies] @@ -51,12 +45,11 @@ futures = { version = "0.3", default-features = false, features = [ futures-async-stream = "0.2.9" risingwave_hummock_sdk = { workspace = true } risingwave_test_runner = { workspace = true } -serial_test = "3.2" sync-point = { path = "../../utils/sync-point" } [features] failpoints = ["risingwave_storage/failpoints"] -sync_point = ["sync-point/sync_point"] +sync_point = ["sync-point/sync_point", "serial_test"] test = [] [[bench]] diff --git a/src/stream/Cargo.toml b/src/stream/Cargo.toml index 8a0061ab99ea9..0061174fbbd7b 100644 --- a/src/stream/Cargo.toml +++ b/src/stream/Cargo.toml @@ -8,12 +8,6 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" arc-swap = "1" @@ -25,6 +19,7 @@ await-tree = { workspace = true } bytes = "1" cfg-if = "1" delta_btree_map = { path = "../utils/delta_btree_map" } +dhat = { version = "0.3", optional = true } educe = "0.6" either = "1" enum-as-inner = "0.6" @@ -32,11 +27,7 @@ fail = "0.5" foyer = { workspace = true } futures = { version = "0.3", default-features = false, features = ["alloc"] } futures-async-stream = { workspace = true } -governor = { version = "0.6", default-features = false, features = [ - "std", - "dashmap", - "jitter", -] } +governor = { workspace = true } hytra = "0.1.2" itertools = { workspace = true } jsonbb = { workspace = true } @@ -65,7 +56,6 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1" smallvec = "1" static_assertions = "1" -strum = "0.26" strum_macros = "0.26" thiserror = "1" thiserror-ext = { workspace = true } @@ -91,7 +81,6 @@ workspace-hack = { path = "../workspace-hack" } [dev-dependencies] assert_matches = "1" criterion = { workspace = true, features = ["async_tokio", "async"] } -dhat = "0.3" expect-test = "1" risingwave_expr_impl = { workspace = true } risingwave_hummock_sdk = { workspace = true } @@ -102,7 +91,7 @@ serde_yaml = "0.9" tracing-test = "0.2" [features] -dhat-heap = [] +dhat-heap = ["dhat"] [[bench]] name = "stream_hash_agg" diff --git a/src/stream/spill_test/Cargo.toml b/src/stream/spill_test/Cargo.toml index fe4fddfe2b1c3..b979fdfa30b80 100644 --- a/src/stream/spill_test/Cargo.toml +++ b/src/stream/spill_test/Cargo.toml @@ -8,14 +8,8 @@ license = { workspace = true } repository = { workspace = true } [dependencies] -async-trait = "0.1" -bytes = { version = "1" } -futures = { version = "0.3", default-features = false, features = ["alloc"] } -futures-async-stream = "0.2.9" risingwave_common = { workspace = true } -risingwave_hummock_sdk = { workspace = true } risingwave_hummock_test = { workspace = true, features = ["test"] } -risingwave_storage = { workspace = true, features = ["test"] } risingwave_stream = { workspace = true } tokio = { version = "0.2", package = "madsim-tokio" } diff --git a/src/test_runner/Cargo.toml b/src/test_runner/Cargo.toml index 3b9819bd45dad..23ecd648aa57e 100644 --- a/src/test_runner/Cargo.toml +++ b/src/test_runner/Cargo.toml @@ -8,12 +8,6 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] fail = "0.5" sync-point = { path = "../utils/sync-point" } diff --git a/src/tests/compaction_test/Cargo.toml b/src/tests/compaction_test/Cargo.toml index 3bc86649ea3b3..2351e750c2a95 100644 --- a/src/tests/compaction_test/Cargo.toml +++ b/src/tests/compaction_test/Cargo.toml @@ -8,28 +8,15 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" -async-trait = "0.1" bytes = "1" clap = { workspace = true } foyer = { workspace = true } -futures = { version = "0.3", default-features = false, features = ["alloc"] } -prometheus = { version = "0.13" } -rand = { workspace = true } risingwave_common = { workspace = true } risingwave_compactor = { workspace = true } risingwave_hummock_sdk = { workspace = true } -risingwave_hummock_test = { workspace = true } -risingwave_meta = { workspace = true } risingwave_meta_node = { workspace = true } -risingwave_object_store = { workspace = true } risingwave_pb = { workspace = true } risingwave_rpc_client = { workspace = true } risingwave_rt = { workspace = true } diff --git a/src/tests/e2e_extended_mode/Cargo.toml b/src/tests/e2e_extended_mode/Cargo.toml index 2e88f009742a7..8e4e16f2c41cd 100644 --- a/src/tests/e2e_extended_mode/Cargo.toml +++ b/src/tests/e2e_extended_mode/Cargo.toml @@ -7,12 +7,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = { version = "1", features = ["backtrace"] } chrono = { version = "0.4", features = ['serde'] } diff --git a/src/tests/libpq_test/Cargo.toml b/src/tests/libpq_test/Cargo.toml index 84cefac33ef6e..8e618be7928ee 100644 --- a/src/tests/libpq_test/Cargo.toml +++ b/src/tests/libpq_test/Cargo.toml @@ -7,6 +7,5 @@ version = "0.1.0" edition = "2021" [dependencies] -anyhow = "1" libpq = "3.0" clap = { workspace = true } diff --git a/src/tests/regress/Cargo.toml b/src/tests/regress/Cargo.toml index 65248877adf60..a5095d40a7f80 100644 --- a/src/tests/regress/Cargo.toml +++ b/src/tests/regress/Cargo.toml @@ -7,12 +7,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = { version = "1", features = ["backtrace"] } clap = { workspace = true } diff --git a/src/tests/simulation/Cargo.toml b/src/tests/simulation/Cargo.toml index 76f2e263d7290..0c402a551b8b8 100644 --- a/src/tests/simulation/Cargo.toml +++ b/src/tests/simulation/Cargo.toml @@ -5,15 +5,11 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [package.metadata.cargo-machete] -ignored = ["serde"] - -[package.metadata.cargo-udeps.ignore] -normal = ["serde"] +ignored = ["tikv-jemallocator"] [dependencies] anyhow = "1.0" async-trait = "0.1" -aws-sdk-s3 = { version = "0.5", package = "madsim-aws-sdk-s3" } cfg-or-panic = "0.2" clap = { workspace = true } console = "0.15" @@ -28,7 +24,6 @@ maplit = "1" paste = "1" pin-project = "1.1" pretty_assertions = "1" -prometheus = { version = "0.13" } rand = { workspace = true } rand_chacha = { version = "0.3.1" } rdkafka = { workspace = true } @@ -45,7 +40,6 @@ risingwave_hummock_sdk = { workspace = true } risingwave_meta_node = { workspace = true } risingwave_object_store = { workspace = true } risingwave_pb = { workspace = true } -risingwave_rpc_client = { workspace = true } risingwave_sqlparser = { workspace = true } risingwave_sqlsmith = { workspace = true } serde = "1.0.188" @@ -56,7 +50,6 @@ tempfile = "3" tikv-jemallocator = { workspace = true } tokio = { version = "0.2", package = "madsim-tokio" } tokio-postgres = "0.7" -tokio-stream = { workspace = true } tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } uuid = "*" diff --git a/src/tests/sqlsmith/Cargo.toml b/src/tests/sqlsmith/Cargo.toml index c7cf9da9ce299..740749e58d572 100644 --- a/src/tests/sqlsmith/Cargo.toml +++ b/src/tests/sqlsmith/Cargo.toml @@ -7,12 +7,6 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" chrono = "0.4" @@ -25,7 +19,6 @@ risingwave_common = { workspace = true } risingwave_expr = { workspace = true } risingwave_expr_impl = { workspace = true } risingwave_frontend = { workspace = true } -risingwave_pb = { workspace = true } risingwave_sqlparser = { workspace = true } similar = "2.6.0" thiserror-ext = { workspace = true } diff --git a/src/tests/state_cleaning_test/Cargo.toml b/src/tests/state_cleaning_test/Cargo.toml index 6c12898343951..5a5fa90d3a85a 100644 --- a/src/tests/state_cleaning_test/Cargo.toml +++ b/src/tests/state_cleaning_test/Cargo.toml @@ -7,17 +7,10 @@ keywords = { workspace = true } license = { workspace = true } repository = { workspace = true } -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = "1" clap = { workspace = true } futures = { version = "0.3", default-features = false, features = ["alloc"] } -prometheus = { version = "0.13" } regex = "1" risingwave_rt = { workspace = true } serde = { version = "1", features = ["derive"] } diff --git a/src/utils/delta_btree_map/Cargo.toml b/src/utils/delta_btree_map/Cargo.toml index 274a028489395..879f740e89425 100644 --- a/src/utils/delta_btree_map/Cargo.toml +++ b/src/utils/delta_btree_map/Cargo.toml @@ -8,12 +8,6 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] educe = "0.6" enum-as-inner = "0.6" diff --git a/src/utils/futures_util/Cargo.toml b/src/utils/futures_util/Cargo.toml index 97bd794daaf8d..d8e763938e3e8 100644 --- a/src/utils/futures_util/Cargo.toml +++ b/src/utils/futures_util/Cargo.toml @@ -5,12 +5,6 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] futures = "0.3" pin-project-lite = "0.2" diff --git a/src/utils/iter_util/Cargo.toml b/src/utils/iter_util/Cargo.toml index d730c1cbc908e..2bfec2cedf147 100644 --- a/src/utils/iter_util/Cargo.toml +++ b/src/utils/iter_util/Cargo.toml @@ -5,12 +5,6 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] itertools = { workspace = true } diff --git a/src/utils/local_stats_alloc/Cargo.toml b/src/utils/local_stats_alloc/Cargo.toml index d80d3db38109c..b92b0b0e1359a 100644 --- a/src/utils/local_stats_alloc/Cargo.toml +++ b/src/utils/local_stats_alloc/Cargo.toml @@ -14,11 +14,5 @@ workspace-hack = { path = "../../workspace-hack" } [dev-dependencies] -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [lints] workspace = true diff --git a/src/utils/pgwire/Cargo.toml b/src/utils/pgwire/Cargo.toml index 8f0e55ff8446b..2275ed5a20571 100644 --- a/src/utils/pgwire/Cargo.toml +++ b/src/utils/pgwire/Cargo.toml @@ -8,12 +8,6 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] anyhow = { version = "1.0", default-features = false } auto_enums = { workspace = true } diff --git a/src/utils/resource_util/Cargo.toml b/src/utils/resource_util/Cargo.toml index 9680c5054bc31..175052cdfbd16 100644 --- a/src/utils/resource_util/Cargo.toml +++ b/src/utils/resource_util/Cargo.toml @@ -5,12 +5,6 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] fs-err = "3" sysinfo = { version = "0.33", default-features = false, features = ["system"] } diff --git a/src/utils/runtime/Cargo.toml b/src/utils/runtime/Cargo.toml index b34cd499f9ddd..0ecadea5aeb9a 100644 --- a/src/utils/runtime/Cargo.toml +++ b/src/utils/runtime/Cargo.toml @@ -8,12 +8,6 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] await-tree = { workspace = true } console = "0.15" diff --git a/src/utils/sync-point/Cargo.toml b/src/utils/sync-point/Cargo.toml index d228bfbe79a35..27b908bc517b1 100644 --- a/src/utils/sync-point/Cargo.toml +++ b/src/utils/sync-point/Cargo.toml @@ -4,12 +4,6 @@ version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] futures-util = "0.3" spin = "0.9" diff --git a/src/utils/variables/Cargo.toml b/src/utils/variables/Cargo.toml index 7bcc1b2d963c0..c897c42986f2a 100644 --- a/src/utils/variables/Cargo.toml +++ b/src/utils/variables/Cargo.toml @@ -8,12 +8,6 @@ license = { workspace = true } repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[package.metadata.cargo-machete] -ignored = ["workspace-hack"] - -[package.metadata.cargo-udeps.ignore] -normal = ["workspace-hack"] - [dependencies] chrono = { version = "0.4", default-features = false, features = [ "clock", From 6098f65dd3d628204c1ac59714ea657f7e3a19b6 Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Tue, 24 Dec 2024 01:51:25 +0800 Subject: [PATCH 3/9] refactor(frontend): extract arguments for create table handler (#19898) Signed-off-by: Bugen Zhao --- src/frontend/src/handler/create_table.rs | 269 +++++++++--------- src/frontend/src/handler/create_table_as.rs | 22 +- src/frontend/src/optimizer/mod.rs | 43 +-- .../optimizer/plan_node/stream_materialize.rs | 10 +- src/sqlparser/src/ast/mod.rs | 2 +- 5 files changed, 168 insertions(+), 178 deletions(-) diff --git a/src/frontend/src/handler/create_table.rs b/src/frontend/src/handler/create_table.rs index 29af1e8c62749..1ea32fa941e6d 100644 --- a/src/frontend/src/handler/create_table.rs +++ b/src/frontend/src/handler/create_table.rs @@ -502,13 +502,10 @@ pub(crate) async fn gen_create_table_plan_with_source( format_encode: FormatEncodeOptions, source_watermarks: Vec, mut col_id_gen: ColumnIdGenerator, - append_only: bool, - on_conflict: Option, - with_version_column: Option, include_column_options: IncludeOption, - engine: Engine, + props: CreateTableProps, ) -> Result<(PlanRef, Option, PbTable)> { - if append_only + if props.append_only && format_encode.format != Format::Plain && format_encode.format != Format::Native { @@ -522,6 +519,9 @@ pub(crate) async fn gen_create_table_plan_with_source( let session = &handler_args.session; let with_properties = bind_connector_props(&handler_args, &format_encode, false)?; + let db_name: &str = session.database(); + let (schema_name, _) = Binder::resolve_schema_qualified_name(db_name, table_name.clone())?; + let (columns_from_resolve_source, source_info) = bind_columns_from_source( session, &format_encode, @@ -556,14 +556,10 @@ pub(crate) async fn gen_create_table_plan_with_source( let (plan, table) = gen_table_plan_with_source( context.into(), + schema_name, source_catalog, - append_only, - on_conflict, - with_version_column, - Some(col_id_gen.into_version()), - database_id, - schema_id, - engine, + col_id_gen.into_version(), + props, )?; Ok((plan, Some(pb_source), table)) @@ -579,13 +575,8 @@ pub(crate) fn gen_create_table_plan( constraints: Vec, mut col_id_gen: ColumnIdGenerator, source_watermarks: Vec, - append_only: bool, - on_conflict: Option, - with_version_column: Option, - webhook_info: Option, - engine: Engine, + props: CreateTableProps, ) -> Result<(PlanRef, PbTable)> { - let definition = context.normalized_sql().to_owned(); let mut columns = bind_sql_columns(&column_defs)?; for c in &mut columns { c.column_desc.column_id = col_id_gen.generate(&*c)?; @@ -602,14 +593,9 @@ pub(crate) fn gen_create_table_plan( columns, column_defs, constraints, - definition, source_watermarks, - append_only, - on_conflict, - with_version_column, - Some(col_id_gen.into_version()), - webhook_info, - engine, + col_id_gen.into_version(), + props, ) } @@ -620,15 +606,11 @@ pub(crate) fn gen_create_table_plan_without_source( columns: Vec, column_defs: Vec, constraints: Vec, - definition: String, source_watermarks: Vec, - append_only: bool, - on_conflict: Option, - with_version_column: Option, - version: Option, - webhook_info: Option, - engine: Engine, + version: TableVersion, + props: CreateTableProps, ) -> Result<(PlanRef, PbTable)> { + // XXX: Why not bind outside? let pk_names = bind_sql_pk_names(&column_defs, bind_table_constraints(&constraints)?)?; let (mut columns, pk_column_ids, row_id_index) = bind_pk_and_row_id_on_relation(columns, pk_names, true)?; @@ -650,89 +632,97 @@ pub(crate) fn gen_create_table_plan_without_source( let session = context.session_ctx().clone(); let db_name = session.database(); - let (schema_name, name) = Binder::resolve_schema_qualified_name(db_name, table_name)?; - let (database_id, schema_id) = - session.get_database_and_schema_id_for_create(schema_name.clone())?; + let (schema_name, table_name) = Binder::resolve_schema_qualified_name(db_name, table_name)?; - gen_table_plan_inner( - context.into(), - name, + let info = CreateTableInfo { columns, pk_column_ids, row_id_index, - definition, watermark_descs, - append_only, - on_conflict, - with_version_column, + source_catalog: None, version, - None, - database_id, - schema_id, - webhook_info, - engine, - ) + }; + + gen_table_plan_inner(context.into(), schema_name, table_name, info, props) } fn gen_table_plan_with_source( context: OptimizerContextRef, + schema_name: Option, source_catalog: SourceCatalog, - append_only: bool, - on_conflict: Option, - with_version_column: Option, - version: Option, /* TODO: this should always be `Some` if we support `ALTER - * TABLE` for `CREATE TABLE AS`. */ - database_id: DatabaseId, - schema_id: SchemaId, - engine: Engine, + version: TableVersion, + props: CreateTableProps, ) -> Result<(PlanRef, PbTable)> { - let cloned_source_catalog = source_catalog.clone(); - gen_table_plan_inner( - context, - source_catalog.name, - source_catalog.columns, - source_catalog.pk_col_ids, - source_catalog.row_id_index, - source_catalog.definition, - source_catalog.watermark_descs, - append_only, - on_conflict, - with_version_column, + let table_name = source_catalog.name.clone(); + + let info = CreateTableInfo { + columns: source_catalog.columns.clone(), + pk_column_ids: source_catalog.pk_col_ids.clone(), + row_id_index: source_catalog.row_id_index, + watermark_descs: source_catalog.watermark_descs.clone(), + source_catalog: Some(source_catalog), version, - Some(cloned_source_catalog), - database_id, - schema_id, - None, - engine, - ) + }; + + gen_table_plan_inner(context, schema_name, table_name, info, props) +} + +/// Arguments of the functions that generate a table plan, part 1. +/// +/// Compared to [`CreateTableProps`], this struct contains fields that need some work of binding +/// or resolving based on the user input. +pub struct CreateTableInfo { + pub columns: Vec, + pub pk_column_ids: Vec, + pub row_id_index: Option, + pub watermark_descs: Vec, + pub source_catalog: Option, + pub version: TableVersion, +} + +/// Arguments of the functions that generate a table plan, part 2. +/// +/// Compared to [`CreateTableInfo`], this struct contains fields that can be (relatively) simply +/// obtained from the input or the context. +pub struct CreateTableProps { + pub definition: String, + pub append_only: bool, + pub on_conflict: Option, + pub with_version_column: Option, + pub webhook_info: Option, + pub engine: Engine, } #[allow(clippy::too_many_arguments)] fn gen_table_plan_inner( context: OptimizerContextRef, + schema_name: Option, table_name: String, - columns: Vec, - pk_column_ids: Vec, - row_id_index: Option, - definition: String, - watermark_descs: Vec, - append_only: bool, - on_conflict: Option, - with_version_column: Option, - version: Option, /* TODO: this should always be `Some` if we support `ALTER - * TABLE` for `CREATE TABLE AS`. */ - source_catalog: Option, - database_id: DatabaseId, - schema_id: SchemaId, - webhook_info: Option, - engine: Engine, + info: CreateTableInfo, + props: CreateTableProps, ) -> Result<(PlanRef, PbTable)> { + let CreateTableInfo { + ref columns, + row_id_index, + ref watermark_descs, + ref source_catalog, + .. + } = info; + let CreateTableProps { + append_only, + on_conflict, + .. + } = props; + + let (database_id, schema_id) = context + .session_ctx() + .get_database_and_schema_id_for_create(schema_name)?; + let session = context.session_ctx().clone(); let retention_seconds = context.with_options().retention_seconds(); - let is_external_source = source_catalog.is_some(); let source_node: PlanRef = LogicalSource::new( - source_catalog.map(|source| Rc::new(source.clone())), + source_catalog.clone().map(Rc::new), columns.clone(), row_id_index, SourceNodeKind::CreateTable, @@ -784,20 +774,11 @@ fn gen_table_plan_inner( let materialize = plan_root.gen_table_plan( context, table_name, - columns, - definition, - pk_column_ids, - row_id_index, - append_only, - on_conflict, - with_version_column, - watermark_descs, - version, - is_external_source, - retention_seconds, - None, - webhook_info, - engine, + info, + CreateTableProps { + on_conflict, + ..props + }, )?; let mut table = materialize.table().to_prost(schema_id, database_id); @@ -919,24 +900,27 @@ pub(crate) fn gen_create_table_plan_for_cdc_table( let materialize = plan_root.gen_table_plan( context, resolved_table_name, - columns, - definition, - pk_column_ids, - None, - false, - on_conflict, - with_version_column, - vec![], - Some(col_id_gen.into_version()), - true, - None, - Some(cdc_table_id), - None, - engine, + CreateTableInfo { + columns, + pk_column_ids, + row_id_index: None, + watermark_descs: vec![], + source_catalog: Some((*source).clone()), + version: col_id_gen.into_version(), + }, + CreateTableProps { + definition, + append_only: false, + on_conflict, + with_version_column, + webhook_info: None, + engine, + }, )?; let mut table = materialize.table().to_prost(schema_id, database_id); table.owner = session.user_id(); + table.cdc_table_id = Some(cdc_table_id); table.dependent_relations = vec![source.id]; Ok((materialize.into(), table)) @@ -1024,6 +1008,18 @@ pub(super) async fn handle_create_table_plan( &include_column_options, &cdc_table_info, )?; + let webhook_info = webhook_info + .map(|info| bind_webhook_info(&handler_args.session, &column_defs, info)) + .transpose()?; + + let props = CreateTableProps { + definition: handler_args.normalized_sql.clone(), + append_only, + on_conflict, + with_version_column: with_version_column.clone(), + webhook_info, + engine, + }; let ((plan, source, table), job_type) = match (format_encode, cdc_table_info.as_ref()) { (Some(format_encode), None) => ( @@ -1037,20 +1033,13 @@ pub(super) async fn handle_create_table_plan( format_encode, source_watermarks, col_id_gen, - append_only, - on_conflict, - with_version_column, include_column_options, - engine, + props, ) .await?, TableJobType::General, ), (None, None) => { - let webhook_info = webhook_info - .map(|info| bind_webhook_info(&handler_args.session, &column_defs, info)) - .transpose()?; - let context = OptimizerContext::new(handler_args, explain_options); let (plan, table) = gen_create_table_plan( context, @@ -1059,11 +1048,7 @@ pub(super) async fn handle_create_table_plan( constraints, col_id_gen, source_watermarks, - append_only, - on_conflict, - with_version_column, - webhook_info, - engine, + props, )?; ((plan, None, table), TableJobType::General) @@ -1784,6 +1769,15 @@ pub async fn generate_stream_graph_for_replace_table( ) -> Result<(StreamFragmentGraph, Table, Option, TableJobType)> { use risingwave_pb::catalog::table::OptionalAssociatedSourceId; + let props = CreateTableProps { + definition: handler_args.normalized_sql.clone(), + append_only, + on_conflict, + with_version_column: with_version_column.clone(), + webhook_info: original_catalog.webhook_info.clone(), + engine, + }; + let ((plan, mut source, table), job_type) = match (format_encode, cdc_table_info.as_ref()) { (Some(format_encode), None) => ( gen_create_table_plan_with_source( @@ -1796,11 +1790,8 @@ pub async fn generate_stream_graph_for_replace_table( format_encode, source_watermarks, col_id_gen, - append_only, - on_conflict, - with_version_column, include_column_options, - engine, + props, ) .await?, TableJobType::General, @@ -1814,11 +1805,7 @@ pub async fn generate_stream_graph_for_replace_table( constraints, col_id_gen, source_watermarks, - append_only, - on_conflict, - with_version_column, - original_catalog.webhook_info.clone(), - engine, + props, )?; ((plan, None, table), TableJobType::General) } diff --git a/src/frontend/src/handler/create_table_as.rs b/src/frontend/src/handler/create_table_as.rs index d90ad6afe9a73..d9019656e4e98 100644 --- a/src/frontend/src/handler/create_table_as.rs +++ b/src/frontend/src/handler/create_table_as.rs @@ -21,7 +21,9 @@ use risingwave_sqlparser::ast::{ColumnDef, ObjectName, OnConflict, Query, Statem use super::{HandlerArgs, RwPgResponse}; use crate::binder::BoundStatement; use crate::error::{ErrorCode, Result}; -use crate::handler::create_table::{gen_create_table_plan_without_source, ColumnIdGenerator}; +use crate::handler::create_table::{ + gen_create_table_plan_without_source, ColumnIdGenerator, CreateTableProps, +}; use crate::handler::query::handle_query; use crate::{build_graph, Binder, OptimizerContext}; pub async fn handle_create_as( @@ -107,14 +109,16 @@ pub async fn handle_create_as( columns, vec![], vec![], - "".to_owned(), // TODO: support `SHOW CREATE TABLE` for `CREATE TABLE AS` - vec![], // No watermark should be defined in for `CREATE TABLE AS` - append_only, - on_conflict, - with_version_column, - Some(col_id_gen.into_version()), - None, - engine, + vec![], // No watermark should be defined in for `CREATE TABLE AS` + col_id_gen.into_version(), + CreateTableProps { + definition: "".to_owned(), // TODO: empty definition means no schema change support + append_only, + on_conflict, + with_version_column, + webhook_info: None, + engine, + }, )?; let graph = build_graph(plan)?; diff --git a/src/frontend/src/optimizer/mod.rs b/src/frontend/src/optimizer/mod.rs index 14e45f02517f4..230224557b63b 100644 --- a/src/frontend/src/optimizer/mod.rs +++ b/src/frontend/src/optimizer/mod.rs @@ -51,14 +51,11 @@ pub use optimizer_context::*; use plan_expr_rewriter::ConstEvalRewriter; use property::Order; use risingwave_common::bail; -use risingwave_common::catalog::{ - ColumnCatalog, ColumnDesc, ColumnId, ConflictBehavior, Engine, Field, Schema, -}; +use risingwave_common::catalog::{ColumnCatalog, ColumnDesc, ConflictBehavior, Field, Schema}; use risingwave_common::types::DataType; use risingwave_common::util::column_index_mapping::ColIndexMapping; use risingwave_common::util::iter_util::ZipEqDebug; use risingwave_connector::sink::catalog::SinkFormatDesc; -use risingwave_pb::catalog::{PbWebhookSourceInfo, WatermarkDesc}; use risingwave_pb::stream_plan::StreamScanType; use self::heuristic_optimizer::ApplyOrder; @@ -73,9 +70,10 @@ use self::plan_visitor::InputRefValidator; use self::plan_visitor::{has_batch_exchange, CardinalityVisitor, StreamKeyChecker}; use self::property::{Cardinality, RequiredDist}; use self::rule::*; -use crate::catalog::table_catalog::{TableType, TableVersion}; +use crate::catalog::table_catalog::TableType; use crate::error::{ErrorCode, Result}; use crate::expr::TimestamptzExprFinder; +use crate::handler::create_table::{CreateTableInfo, CreateTableProps}; use crate::optimizer::plan_node::generic::{SourceNodeKind, Union}; use crate::optimizer::plan_node::{ BatchExchange, PlanNodeType, PlanTreeNode, RewriteExprsRecursive, StreamExchange, StreamUnion, @@ -639,25 +637,26 @@ impl PlanRoot { } /// Optimize and generate a create table plan. - #[allow(clippy::too_many_arguments)] pub fn gen_table_plan( mut self, context: OptimizerContextRef, table_name: String, - columns: Vec, - definition: String, - pk_column_ids: Vec, - row_id_index: Option, - append_only: bool, - on_conflict: Option, - with_version_column: Option, - watermark_descs: Vec, - version: Option, - with_external_source: bool, - retention_seconds: Option, - cdc_table_id: Option, - webhook_info: Option, - engine: Engine, + CreateTableInfo { + columns, + pk_column_ids, + row_id_index, + watermark_descs, + source_catalog, + version, + }: CreateTableInfo, + CreateTableProps { + definition, + append_only, + on_conflict, + with_version_column, + webhook_info, + engine, + }: CreateTableProps, ) -> Result { assert_eq!(self.phase, PlanPhase::Logical); assert_eq!(self.plan.convention(), Convention::Logical); @@ -751,6 +750,7 @@ impl PlanRoot { None }; + let with_external_source = source_catalog.is_some(); let union_inputs = if with_external_source { let mut external_source_node = stream_plan; external_source_node = @@ -868,6 +868,8 @@ impl PlanRoot { ))? } + let retention_seconds = context.with_options().retention_seconds(); + let table_required_dist = { let mut bitset = FixedBitSet::with_capacity(columns.len()); for idx in &pk_column_indices { @@ -891,7 +893,6 @@ impl PlanRoot { row_id_index, version, retention_seconds, - cdc_table_id, webhook_info, engine, ) diff --git a/src/frontend/src/optimizer/plan_node/stream_materialize.rs b/src/frontend/src/optimizer/plan_node/stream_materialize.rs index 2793c84b1d555..6e4339658966b 100644 --- a/src/frontend/src/optimizer/plan_node/stream_materialize.rs +++ b/src/frontend/src/optimizer/plan_node/stream_materialize.rs @@ -97,6 +97,7 @@ impl StreamMaterialize { } else { CreateType::Foreground }; + let table = Self::derive_table_catalog( input.clone(), name, @@ -136,15 +137,14 @@ impl StreamMaterialize { version_column_index: Option, pk_column_indices: Vec, row_id_index: Option, - version: Option, + version: TableVersion, retention_seconds: Option, - cdc_table_id: Option, webhook_info: Option, engine: Engine, ) -> Result { let input = Self::rewrite_input(input, user_distributed_by, TableType::Table)?; - let mut table = Self::derive_table_catalog( + let table = Self::derive_table_catalog( input.clone(), name, user_order_by, @@ -155,7 +155,7 @@ impl StreamMaterialize { Some(pk_column_indices), row_id_index, TableType::Table, - version, + Some(version), Cardinality::unknown(), // unknown cardinality for tables retention_seconds, CreateType::Foreground, @@ -163,8 +163,6 @@ impl StreamMaterialize { engine, )?; - table.cdc_table_id = cdc_table_id; - Ok(Self::new(input, table)) } diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs index c0fce4a2d4780..ec525ddcc5951 100644 --- a/src/sqlparser/src/ast/mod.rs +++ b/src/sqlparser/src/ast/mod.rs @@ -2839,7 +2839,7 @@ impl fmt::Display for EmitMode { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum OnConflict { UpdateFull, From 16c7fcd2e483cb6b154ba3ab78fde37f060ad5b4 Mon Sep 17 00:00:00 2001 From: zwang28 <70626450+zwang28@users.noreply.github.com> Date: Tue, 24 Dec 2024 12:14:23 +0800 Subject: [PATCH 4/9] fix(meta): skip unnecessary delta persistence (#19878) --- src/meta/src/hummock/manager/time_travel.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/meta/src/hummock/manager/time_travel.rs b/src/meta/src/hummock/manager/time_travel.rs index 3e333ecafb94c..acd851b3b2781 100644 --- a/src/meta/src/hummock/manager/time_travel.rs +++ b/src/meta/src/hummock/manager/time_travel.rs @@ -484,6 +484,8 @@ impl HummockManager { .on_conflict_do_nothing() .exec(txn) .await?; + // Return early to skip persisting delta. + return Ok(version_sst_ids); } let written = write_sstable_infos( delta.newly_added_sst_infos().filter(|s| { From af666704444506b48f3ae6e6e0869ee99bd53bed Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 24 Dec 2024 04:37:01 +0000 Subject: [PATCH 5/9] chore(deps): Bump sqllogictest from 0.23.0 to 0.24.0 (#19903) Signed-off-by: dependabot[bot] Signed-off-by: xxchan Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: xxchan --- .github/dependabot.yml | 3 +++ Cargo.lock | 4 ++-- Makefile.toml | 2 +- ci/Dockerfile | 2 +- ci/build-ci-image.sh | 2 +- ci/docker-compose.yml | 14 +++++++------- src/tests/simulation/Cargo.toml | 2 +- 7 files changed, 16 insertions(+), 13 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index c43a610fa2c24..da99e7a0a0d23 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -22,6 +22,9 @@ updates: update-types: ["version-update:semver-minor", "version-update:semver-major"] - dependency-name: "parquet" update-types: ["version-update:semver-minor", "version-update:semver-major"] + # bump sqllogictest manually together with sqllogictest-bin in CI docker image + - dependency-name: "sqllogictest" + update-types: ["version-update:semver-minor", "version-update:semver-major"] # Create a group of dependencies to be updated together in one pull request groups: aws: diff --git a/Cargo.lock b/Cargo.lock index 7e770880971b0..aef4a966ebb30 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13615,9 +13615,9 @@ dependencies = [ [[package]] name = "sqllogictest" -version = "0.23.0" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec31dce96f489e2247a165837f49bbce4912b0cbcf127b79b4fdd87503022ae9" +checksum = "48c03edcabfda1ab894cc63a115b9f014bfc6875916b850ab7498d3cb92daed9" dependencies = [ "async-trait", "educe", diff --git a/Makefile.toml b/Makefile.toml index 6c15c231e2c73..b3793adfad283 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -1340,7 +1340,7 @@ echo "All processes has exited." [tasks.slt] category = "RiseDev - Test - SQLLogicTest" -install_crate = { min_version = "0.23.1", crate_name = "sqllogictest-bin", binary = "sqllogictest", test_arg = [ +install_crate = { min_version = "0.24.0", crate_name = "sqllogictest-bin", binary = "sqllogictest", test_arg = [ "--help", ], install_command = "binstall" } dependencies = ["check-and-load-risedev-env-file"] diff --git a/ci/Dockerfile b/ci/Dockerfile index 21b9c30c678b0..7478d24b66edc 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -70,7 +70,7 @@ ENV CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse RUN curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash RUN cargo binstall -y --locked --no-symlinks cargo-llvm-cov cargo-nextest cargo-hakari cargo-sort cargo-cache cargo-audit \ cargo-make@0.37.9 \ - sqllogictest-bin@0.23.1 \ + sqllogictest-bin@0.24.0 \ sccache@0.7.4 \ && cargo cache -a \ && rm -rf "/root/.cargo/registry/index" \ diff --git a/ci/build-ci-image.sh b/ci/build-ci-image.sh index 86392d2ecd7d8..969239dea82c2 100755 --- a/ci/build-ci-image.sh +++ b/ci/build-ci-image.sh @@ -10,7 +10,7 @@ cat ../rust-toolchain # shellcheck disable=SC2155 # REMEMBER TO ALSO UPDATE ci/docker-compose.yml -export BUILD_ENV_VERSION=v20241213 +export BUILD_ENV_VERSION=v20241224 export BUILD_TAG="public.ecr.aws/w1p7b4n3/rw-build-env:${BUILD_ENV_VERSION}" diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index 7ef17777b390c..4b8d795c142f5 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -90,7 +90,7 @@ services: retries: 5 source-test-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20241213 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20241224 depends_on: - mysql - mysql-meta @@ -106,7 +106,7 @@ services: - ..:/risingwave sink-test-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20241213 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20241224 depends_on: - mysql - mysql-meta @@ -129,13 +129,13 @@ services: - ..:/risingwave rw-build-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20241213 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20241224 volumes: - ..:/risingwave # Standard environment for CI, including MySQL and Postgres for metadata. ci-standard-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20241213 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20241224 depends_on: - mysql-meta - db @@ -143,14 +143,14 @@ services: - ..:/risingwave iceberg-engine-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20241213 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20241224 depends_on: - db volumes: - ..:/risingwave ci-flamegraph-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20241213 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20241224 # NOTE(kwannoel): This is used in order to permit # syscalls for `nperf` (perf_event_open), # so it can do CPU profiling. @@ -161,7 +161,7 @@ services: - ..:/risingwave regress-test-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20241213 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20241224 depends_on: db: condition: service_healthy diff --git a/src/tests/simulation/Cargo.toml b/src/tests/simulation/Cargo.toml index 0c402a551b8b8..9fc7c348f6acd 100644 --- a/src/tests/simulation/Cargo.toml +++ b/src/tests/simulation/Cargo.toml @@ -45,7 +45,7 @@ risingwave_sqlsmith = { workspace = true } serde = "1.0.188" serde_derive = "1.0.188" serde_json = "1.0.107" -sqllogictest = "0.23.0" +sqllogictest = "0.24.0" tempfile = "3" tikv-jemallocator = { workspace = true } tokio = { version = "0.2", package = "madsim-tokio" } From 1b9b5a797396ce19e08b55c0f111e6e30310fb89 Mon Sep 17 00:00:00 2001 From: Xinhao Xu <84456268+xxhZs@users.noreply.github.com> Date: Tue, 24 Dec 2024 12:59:52 +0800 Subject: [PATCH 6/9] fix(sink): fix mogodb write error handling (#19869) --- src/connector/src/sink/mongodb.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/connector/src/sink/mongodb.rs b/src/connector/src/sink/mongodb.rs index 244a1bb70db8d..44d116cedf093 100644 --- a/src/connector/src/sink/mongodb.rs +++ b/src/connector/src/sink/mongodb.rs @@ -81,6 +81,12 @@ mod send_bulk_write_command_future { ))) })?; + if let Ok(ok) = result.get_i32("ok") + && ok != 1 + { + return Err(SinkError::Mongodb(anyhow!("bulk write write errors"))); + } + if let Ok(write_errors) = result.get_array("writeErrors") { return Err(SinkError::Mongodb(anyhow!( "bulk write respond with write errors: {:?}", @@ -88,15 +94,10 @@ mod send_bulk_write_command_future { ))); } - let n = result.get_i32("n").map_err(|err| { - SinkError::Mongodb( - anyhow!(err).context("can't extract field n from bulk write response"), - ) - })?; - if n < 1 { + if let Ok(write_concern_error) = result.get_array("writeConcernError") { return Err(SinkError::Mongodb(anyhow!( - "bulk write respond with an abnormal state, n = {}", - n + "bulk write respond with write errors: {:?}", + write_concern_error, ))); } From 81f651f871db375313e1611558269e3e6f757b57 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 24 Dec 2024 13:28:23 +0800 Subject: [PATCH 7/9] chore(deps): Bump http from 1.1.0 to 1.2.0 (#19904) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 68 +++++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aef4a966ebb30..601c2966b37d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1581,7 +1581,7 @@ dependencies = [ "hex", "hmac", "http 0.2.9", - "http 1.1.0", + "http 1.2.0", "once_cell", "p256 0.11.1", "percent-encoding", @@ -1713,7 +1713,7 @@ dependencies = [ "aws-smithy-types", "bytes", "http 0.2.9", - "http 1.1.0", + "http 1.2.0", "pin-project-lite", "tokio", "tracing", @@ -1731,7 +1731,7 @@ dependencies = [ "bytes-utils", "futures-core", "http 0.2.9", - "http 1.1.0", + "http 1.2.0", "http-body 0.4.5", "http-body 1.0.0", "http-body-util", @@ -1790,7 +1790,7 @@ dependencies = [ "axum-core", "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.0", "http-body-util", "hyper 1.4.1", @@ -1823,7 +1823,7 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.0", "http-body-util", "mime", @@ -1843,7 +1843,7 @@ checksum = "077959a7f8cf438676af90b483304528eb7e16eadadb7f44e9ada4f9dceb9e62" dependencies = [ "axum-core", "chrono", - "http 1.1.0", + "http 1.2.0", "mime_guess", "rust-embed", "tower-service", @@ -1859,7 +1859,7 @@ dependencies = [ "axum-core", "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.0", "http-body-util", "mime", @@ -5602,7 +5602,7 @@ checksum = "cae77099e2399aea466bba05f0d23a150b4f34ed7ce535835e71d91399e65b58" dependencies = [ "anyhow", "async-trait", - "http 1.1.0", + "http 1.2.0", "thiserror 1.0.63", "tokio", "tonic", @@ -5685,7 +5685,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3eaaad103912825594d674a4b1e556ccbb05a13a6cac17dcfd871997fb760a" dependencies = [ "google-cloud-token", - "http 1.1.0", + "http 1.2.0", "thiserror 1.0.63", "tokio", "tokio-retry", @@ -5813,7 +5813,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.1.0", + "http 1.2.0", "indexmap 2.7.0", "slab", "tokio", @@ -6012,9 +6012,9 @@ dependencies = [ [[package]] name = "http" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" dependencies = [ "bytes", "fnv", @@ -6039,7 +6039,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" dependencies = [ "bytes", - "http 1.1.0", + "http 1.2.0", ] [[package]] @@ -6050,7 +6050,7 @@ checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" dependencies = [ "bytes", "futures-core", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.0", "pin-project-lite", ] @@ -6113,7 +6113,7 @@ dependencies = [ "futures-channel", "futures-util", "h2 0.4.7", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.0", "httparse", "httpdate", @@ -6148,7 +6148,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c" dependencies = [ "futures-util", - "http 1.1.0", + "http 1.2.0", "hyper 1.4.1", "hyper-util", "rustls 0.22.4", @@ -6209,7 +6209,7 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.0", "hyper 1.4.1", "pin-project-lite", @@ -6327,7 +6327,7 @@ source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=53f786fb214 dependencies = [ "async-trait", "chrono", - "http 1.1.0", + "http 1.2.0", "iceberg", "itertools 0.13.0", "log", @@ -7319,7 +7319,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "http 1.1.0", + "http 1.2.0", "madsim", "spin 0.9.8", "tracing", @@ -8266,7 +8266,7 @@ dependencies = [ "flagset", "futures", "getrandom", - "http 1.1.0", + "http 1.2.0", "log", "md-5", "once_cell", @@ -8298,7 +8298,7 @@ dependencies = [ "flagset", "futures", "getrandom", - "http 1.1.0", + "http 1.2.0", "log", "md-5", "once_cell", @@ -8440,7 +8440,7 @@ checksum = "6b925a602ffb916fb7421276b86756027b37ee708f9dce2dbdcc51739f07e727" dependencies = [ "async-trait", "futures-core", - "http 1.1.0", + "http 1.2.0", "opentelemetry", "opentelemetry-proto", "opentelemetry_sdk", @@ -10188,7 +10188,7 @@ dependencies = [ "hex", "hmac", "home", - "http 1.1.0", + "http 1.2.0", "jsonwebtoken", "log", "once_cell", @@ -10260,7 +10260,7 @@ dependencies = [ "futures-core", "futures-util", "h2 0.4.7", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.0", "http-body-util", "hyper 1.4.1", @@ -10307,7 +10307,7 @@ checksum = "a45d100244a467870f6cb763c4484d010a6bed6bd610b3676e3825d93fb4cfbd" dependencies = [ "anyhow", "async-trait", - "http 1.1.0", + "http 1.2.0", "reqwest 0.12.4", "serde", "thiserror 1.0.63", @@ -10692,7 +10692,7 @@ dependencies = [ "governor", "hashbrown 0.14.5", "hex", - "http 1.1.0", + "http 1.2.0", "http-body 0.4.5", "humantime", "itertools 0.13.0", @@ -10806,7 +10806,7 @@ dependencies = [ "easy-ext", "futures", "http 0.2.9", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.0", "hyper 0.14.27", "hyper 1.4.1", @@ -10868,7 +10868,7 @@ dependencies = [ "axum", "axum-extra", "futures", - "http 1.1.0", + "http 1.2.0", "madsim-tokio", "madsim-tonic", "prometheus", @@ -10941,7 +10941,7 @@ dependencies = [ "foyer", "futures", "futures-async-stream", - "http 1.1.0", + "http 1.2.0", "itertools 0.13.0", "madsim-tokio", "madsim-tonic", @@ -11597,7 +11597,7 @@ dependencies = [ "fail", "futures", "hex", - "http 1.1.0", + "http 1.2.0", "itertools 0.13.0", "jsonbb", "madsim-tokio", @@ -11845,7 +11845,7 @@ dependencies = [ "easy-ext", "either", "futures", - "http 1.1.0", + "http 1.2.0", "lru 0.7.6", "madsim-tokio", "madsim-tonic", @@ -14608,7 +14608,7 @@ dependencies = [ "bytes", "futures-core", "futures-sink", - "http 1.1.0", + "http 1.2.0", "httparse", "rand", "ring 0.17.5", @@ -14702,7 +14702,7 @@ dependencies = [ "bytes", "flate2", "h2 0.4.7", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.0", "http-body-util", "hyper 1.4.1", @@ -14781,7 +14781,7 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.0", "http-body-util", "http-range-header", From 86c3933ff3f14ef5bf226a004d7ac6a9fd72c60e Mon Sep 17 00:00:00 2001 From: zwang28 <70626450+zwang28@users.noreply.github.com> Date: Tue, 24 Dec 2024 19:52:53 +0800 Subject: [PATCH 8/9] fix(metric): include table change log when calculating Hummock version size (#19920) --- src/storage/hummock_sdk/src/version.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/storage/hummock_sdk/src/version.rs b/src/storage/hummock_sdk/src/version.rs index 7f4b4dafb1bf7..f0bd4582752be 100644 --- a/src/storage/hummock_sdk/src/version.rs +++ b/src/storage/hummock_sdk/src/version.rs @@ -271,6 +271,21 @@ impl HummockVersion { .values() .map(|table_watermark| table_watermark.estimated_encode_len()) .sum::() + + self + .table_change_log + .values() + .map(|c| { + c.0.iter() + .map(|l| { + l.old_value + .iter() + .chain(l.new_value.iter()) + .map(|s| s.estimated_encode_len()) + .sum::() + }) + .sum::() + }) + .sum::() } } From 3431eabc3ef3c4f6431ba39546707c94a1d5b375 Mon Sep 17 00:00:00 2001 From: Li0k Date: Tue, 24 Dec 2024 21:03:53 +0800 Subject: [PATCH 9/9] feat(frontend): Supports cut OR condition and push down to storage (#19812) --- src/common/src/util/scan_range.rs | 605 ++++++++++++++++++ .../tests/testdata/input/range_scan.yaml | 347 ++++++---- .../tests/testdata/output/range_scan.yaml | 133 ++++ src/frontend/src/utils/condition.rs | 171 ++++- 4 files changed, 1124 insertions(+), 132 deletions(-) diff --git a/src/common/src/util/scan_range.rs b/src/common/src/util/scan_range.rs index b45db071be3cc..5c56550eed279 100644 --- a/src/common/src/util/scan_range.rs +++ b/src/common/src/util/scan_range.rs @@ -12,12 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Ordering; use std::ops::{Bound, RangeBounds}; +use itertools::Itertools; use paste::paste; use risingwave_pb::batch_plan::scan_range::Bound as PbBound; use risingwave_pb::batch_plan::ScanRange as PbScanRange; +use super::sort_util::{cmp_rows, OrderType}; use crate::hash::table_distribution::TableDistribution; use crate::hash::VirtualNode; use crate::types::{Datum, ScalarImpl}; @@ -98,6 +101,147 @@ impl ScanRange { range: full_range(), } } + + pub fn convert_to_range(&self) -> (Bound>, Bound>) { + fn handle_bound(eq_conds: &Vec, bound: &Bound>) -> Bound> { + match bound { + Bound::Included(literal) => { + let mut prefix = eq_conds.clone(); + prefix.extend_from_slice(literal); + Bound::Included(prefix) + } + Bound::Excluded(literal) => { + let mut prefix = eq_conds.clone(); + prefix.extend_from_slice(literal); + Bound::Excluded(prefix) + } + Bound::Unbounded => { + if eq_conds.is_empty() { + Bound::Unbounded + } else { + Bound::Included(eq_conds.clone()) + } + } + } + } + + let new_left = handle_bound(&self.eq_conds, &self.range.0); + let new_right = handle_bound(&self.eq_conds, &self.range.1); + (new_left, new_right) + } + + pub fn is_overlap(left: &ScanRange, right: &ScanRange, order_types: &[OrderType]) -> bool { + let range_left = left.convert_to_range(); + let range_right = right.convert_to_range(); + Self::range_overlap_check(range_left, range_right, order_types) + } + + fn range_overlap_check( + left: (Bound>, Bound>), + right: (Bound>, Bound>), + order_types: &[OrderType], + ) -> bool { + let (left_start, left_end) = &left; + let (right_start, right_end) = &right; + + let left_start_vec = match &left_start { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + let right_start_vec = match &right_start { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + + if left_start_vec.is_empty() && right_start_vec.is_empty() { + return true; + } + + let order_types = if order_types.iter().all(|o| o.is_ascending()) { + order_types + } else { + // reverse order types to ascending + &order_types + .iter() + .cloned() + .map(|o| if o.is_descending() { o.reverse() } else { o }) + .collect_vec() + }; + + // Unbounded is always less than any other bound + if left_start_vec.is_empty() { + // pass + } else if right_start_vec.is_empty() { + return Self::range_overlap_check(right, left, order_types); + } else { + assert!(!left_start_vec.is_empty()); + assert!(!right_start_vec.is_empty()); + let cmp_column_len = left_start_vec.len().min(right_start_vec.len()); + let cmp_start = cmp_rows( + &left_start_vec[0..cmp_column_len], + &right_start_vec[0..cmp_column_len], + &order_types[0..cmp_column_len], + ); + + let right_start_before_left_start = cmp_start.is_gt(); + + if right_start_before_left_start { + return Self::range_overlap_check(right, left, order_types); + } + + if cmp_start == Ordering::Equal + && let (Bound::Included(_), Bound::Included(_)) = (left_start, right_start) + { + return true; + } + } + + let left_end_vec = match &left_end { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + let right_end_vec = match &right_end { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + + if left_end_vec.is_empty() && right_end_vec.is_empty() { + return true; + } + + if left_end_vec.is_empty() { + true + } else { + // cmp left_end and right_start + assert!(!left_end_vec.is_empty()); + assert!(!right_start_vec.is_empty()); + + let cmp_column_len = left_end_vec.len().min(right_start_vec.len()); + let cmp_end = cmp_rows( + &left_end_vec[0..cmp_column_len], + &right_start_vec[0..cmp_column_len], + &order_types[0..cmp_column_len], + ); + + match cmp_end { + Ordering::Equal => { + if let (Bound::Included(_), Bound::Included(_)) = (left_end, right_start) { + return true; + } + } + + Ordering::Greater => { + return true; + } + + Ordering::Less => { + return false; + } + } + + false + } + } } pub const fn full_range() -> (Bound, Bound) { @@ -221,4 +365,465 @@ mod tests { assert_eq!(scan_range.try_compute_vnode(&dist), Some(vnode)); } + + #[test] + fn test_convert_to_range() { + { + // test empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![], + range: ( + Bound::Included(vec![Some(ScalarImpl::from(1))]), + Bound::Included(vec![Some(ScalarImpl::from(2))]), + ), + }; + + let (left, right) = scan_range.convert_to_range(); + assert_eq!(left, Bound::Included(vec![Some(ScalarImpl::from(1))])); + assert_eq!(right, Bound::Included(vec![Some(ScalarImpl::from(2))])); + } + + { + // test exclude bound with empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![], + range: ( + Bound::Excluded(vec![Some(ScalarImpl::from(1))]), + Bound::Excluded(vec![Some(ScalarImpl::from(2))]), + ), + }; + + let (left, right) = scan_range.convert_to_range(); + assert_eq!(left, Bound::Excluded(vec![Some(ScalarImpl::from(1))])); + assert_eq!(right, Bound::Excluded(vec![Some(ScalarImpl::from(2))])); + } + + { + // test include bound with empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![], + range: ( + Bound::Included(vec![Some(ScalarImpl::from(1))]), + Bound::Unbounded, + ), + }; + + let (left, right) = scan_range.convert_to_range(); + assert_eq!(left, Bound::Included(vec![Some(ScalarImpl::from(1))])); + assert_eq!(right, Bound::Unbounded); + } + + { + // test exclude bound with non-empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![Some(ScalarImpl::from(1))], + range: ( + Bound::Excluded(vec![Some(ScalarImpl::from(2))]), + Bound::Excluded(vec![Some(ScalarImpl::from(3))]), + ), + }; + + let (left, right) = scan_range.convert_to_range(); + assert_eq!( + left, + Bound::Excluded(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(2))]) + ); + assert_eq!( + right, + Bound::Excluded(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(3))]) + ); + } + + { + // test include bound with non-empty eq_conds + let scan_range = ScanRange { + eq_conds: vec![Some(ScalarImpl::from(1))], + range: ( + Bound::Included(vec![Some(ScalarImpl::from(2))]), + Bound::Included(vec![Some(ScalarImpl::from(3))]), + ), + }; + + let (left, right) = scan_range.convert_to_range(); + assert_eq!( + left, + Bound::Included(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(2))]) + ); + assert_eq!( + right, + Bound::Included(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(3))]) + ); + } + + { + let scan_range = ScanRange { + eq_conds: vec![Some(ScalarImpl::from(1))], + range: ( + Bound::Included(vec![Some(ScalarImpl::from(2))]), + Bound::Unbounded, + ), + }; + + let (left, right) = scan_range.convert_to_range(); + assert_eq!( + left, + Bound::Included(vec![Some(ScalarImpl::from(1)), Some(ScalarImpl::from(2))]) + ); + assert_eq!(right, Bound::Included(vec![Some(ScalarImpl::from(1))])); + } + } + + #[test] + fn test_range_overlap_check() { + let order_types = vec![OrderType::ascending()]; + + // (Included, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Excluded, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Excluded, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Excluded) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Excluded) vs (Included, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Excluded) vs (Excluded, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Excluded) vs (Excluded, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Included) vs (Included, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Included) vs (Excluded, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Included) vs (Excluded, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Excluded) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Excluded) vs (Included, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Excluded) vs (Excluded, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Excluded, Excluded) vs (Excluded, Excluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(5))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3)), Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7)), Some(ScalarImpl::Int32(7))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(5))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Included) + assert!(!ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(2))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Included) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(3))]), + Bound::Included(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Excluded, Encluded) + assert!(!ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Excluded(vec![Some(ScalarImpl::Int32(3))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + // (Included, Included) vs (Included, Encluded) + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Excluded(vec![Some(ScalarImpl::Int32(7))]) + ), + &order_types + )); + + assert!(!ScanRange::range_overlap_check( + ( + Bound::Unbounded, + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + Bound::Unbounded, + ), + &order_types + )); + + assert!(ScanRange::range_overlap_check( + ( + Bound::Unbounded, + Bound::Included(vec![Some(ScalarImpl::Int32(10))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + Bound::Unbounded, + ), + &order_types + )); + + assert!(ScanRange::range_overlap_check( + (Bound::Unbounded, Bound::Unbounded,), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + Bound::Unbounded, + ), + &order_types + )); + + assert!(ScanRange::range_overlap_check( + (Bound::Unbounded, Bound::Unbounded), + (Bound::Unbounded, Bound::Unbounded), + &order_types + )); + + assert!(!ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + Bound::Unbounded, + ), + &order_types + )); + + assert!(ScanRange::range_overlap_check( + ( + Bound::Included(vec![Some(ScalarImpl::Int32(1))]), + Bound::Included(vec![Some(ScalarImpl::Int32(3))]) + ), + ( + Bound::Unbounded, + Bound::Included(vec![Some(ScalarImpl::Int32(5))]), + ), + &order_types + )); + } } diff --git a/src/frontend/planner_test/tests/testdata/input/range_scan.yaml b/src/frontend/planner_test/tests/testdata/input/range_scan.yaml index 52863fa5aaf77..374da681d0987 100644 --- a/src/frontend/planner_test/tests/testdata/input/range_scan.yaml +++ b/src/frontend/planner_test/tests/testdata/input/range_scan.yaml @@ -6,182 +6,184 @@ date INTEGER); CREATE MATERIALIZED VIEW orders_count_by_user AS SELECT user_id, date, count(*) AS orders_count FROM orders GROUP BY user_id, date; + CREATE MATERIALIZED VIEW orders_count_by_user_desc AS + SELECT user_id, date, count(*) AS orders_count FROM orders GROUP BY user_id, date ORDER BY user_id DESC; expected_outputs: [] - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id < 43 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 + 1 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1/0 expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 2147483647 + 1 expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 'a' expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id > 'a' expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = '42' expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = NULL expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id IS NULL expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id > NULL expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date = 1111 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id > 42 AND date = 1111 AND 2>1 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE date > 1111 AND user_id = 42 AND 5<6 AND date <= 6666 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (42, 43) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (42+1, 44-1) expected_outputs: - - batch_plan + - batch_plan - name: If the IN list has a larger type than the column, the InputRef is casted. Currently this case is not converted to scan range yet. before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (42.0, 43.0) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in ('42', '43') expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in ('42', '43.0') expected_outputs: - - batch_error + - batch_error - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (2147483648, 2147483649) AND date = 6666 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 3333) expected_outputs: - - batch_plan + - batch_plan - name: test duplicate value in in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 2222) expected_outputs: - - batch_plan + - batch_plan - name: test NULL in in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, NULL) expected_outputs: - - batch_plan + - batch_plan - name: test NULL in in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (NULL) expected_outputs: - - batch_plan + - batch_plan - name: test multiple in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 3333) AND date in (4444, 3333) expected_outputs: - - batch_plan + - batch_plan - name: test eq & in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 3333) AND date = 3333 expected_outputs: - - batch_plan + - batch_plan - name: test eq & in-list before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 42 AND date in (2222, 3333) AND date = 4444 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id in (2147483648, 2147483649) AND date in (2222, 3333) expected_outputs: - - batch_plan + - batch_plan - id: create_table_and_mv_ordered sql: | CREATE TABLE orders ( @@ -194,93 +196,93 @@ ORDER BY orders_count; expected_outputs: [] - before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE user_id = 42 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE user_id > 42 AND orders_count = 10 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count = 10 expected_outputs: - - batch_plan + - batch_plan - name: merge mutiple upper bound before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count < 10 and orders_count < 30 expected_outputs: - - batch_plan + - batch_plan - name: merge include and exclude upper bound of same value before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count < 10 and orders_count <= 10 expected_outputs: - - batch_plan + - batch_plan - name: merge mutiple lower bound before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 10 and orders_count > 30 expected_outputs: - - batch_plan + - batch_plan - name: merge include and exclude lower bound of same value before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 10 and orders_count >= 10 expected_outputs: - - batch_plan + - batch_plan - name: invalid range before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 10 and orders_count < 5 expected_outputs: - - batch_plan + - batch_plan - name: merge cmp and eq condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 20 and orders_count < 30 and orders_count = 25 expected_outputs: - - batch_plan + - batch_plan - name: invalid range of merging cmp and eq condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count > 20 and orders_count < 30 and orders_count = 35 expected_outputs: - - batch_plan + - batch_plan - name: merge cmp and const-in condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count in (10,20,30,40) and orders_count <30 expected_outputs: - - batch_plan + - batch_plan - name: invalid range of merging cmp and const-in condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count in (10,20,30,40) and orders_count > 50 expected_outputs: - - batch_plan + - batch_plan - name: merge null and cmp condition before: - - create_table_and_mv_ordered + - create_table_and_mv_ordered sql: | SELECT * FROM orders_count_by_user_ordered WHERE orders_count is null and orders_count < 30 expected_outputs: - - batch_plan + - batch_plan - id: create_small sql: | CREATE TABLE t(x smallint); @@ -289,142 +291,241 @@ expected_outputs: [] - name: When the constant with larger type is out of the range of the column's type, we can convert it as false condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x = 60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is out of the upper bound of the column's type, we can convert < as true condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x < 60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is out of the upper bound of the column's type, we can convert > as false condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x > 60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is out of the lower bound of the column's type, we can convert < as false condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x < -60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is out of the lower bound of the column's type, we can convert > as true condition. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x > -60000; expected_outputs: - - batch_plan + - batch_plan - name: When the constant with larger type is in range of the column's type, we can convert it. before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x < 3::bigint and x > 1::bigint; expected_outputs: - - batch_plan + - batch_plan - name: Can't push down the in-compatitble numeric type before: - - create_small + - create_small sql: | SELECT * FROM mv WHERE x = 3.4; expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or user_id = 2; expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id = 1) or (user_id = 2 and date = 2222); expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id = 1) or (user_id = 2 and date in (1111, 2222)); expected_outputs: - - batch_plan + - batch_plan - name: When one arm of or clause contains other conditions, we can't convert it to scan range yet. before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE (user_id = 1) or (user_id = 2 and date in (1111, 2222)) or (user_id != 3); expected_outputs: - - batch_plan + - batch_plan - name: When any arm of or clause is not equal type, we can't convert it to scan range yet. before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id > 1 or user_id < 10 expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or user_id is null expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 and user_id is null expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or (user_id is null and date = 1111) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or (user_id = 2 and date is null) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id = 1 or (user_id is null and date is null) expected_outputs: - - batch_plan + - batch_plan - before: - - create_table_and_mv + - create_table_and_mv sql: | SELECT * FROM orders_count_by_user WHERE user_id is null or (user_id is null and date is null) expected_outputs: - - batch_plan + - batch_plan - sql: | create table sbtest1(id INT, k INT, c VARCHAR, pad VARCHAR); create index k1 on sbtest1(k); select count(k) from sbtest1 where k between 0 and 5; expected_outputs: - - batch_plan + - batch_plan - sql: | create table sbtest1(id INT, k INT, c VARCHAR, pad VARCHAR); create index k1 on sbtest1(k); select count(k) from sbtest1 where k between 0 and 500; expected_outputs: - - batch_plan + - batch_plan - sql: | create table sbtest1(id INT, k INT, c VARCHAR, pad VARCHAR, primary key(id)); create index k1 on sbtest1(k); select count(k) from sbtest1 where id between 0 and 5; expected_outputs: - - batch_plan + - batch_plan - sql: | create table t (k int primary key, v int); select v from t where k = 2147483648; -- out of range of int32 expected_outputs: - - logical_plan - - batch_plan + - logical_plan + - batch_plan +- name: When OR clauses contain non-overlapping conditions,, we can pushdown serveral scan_range. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 20); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 20); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id >= 10); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id >= 10); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 20) or (user_id = 15); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 20) or (user_id = 15); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id < 30) or (user_id >= 30 and user_id < 40); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id < 30) or (user_id >= 30 and user_id < 40); + expected_outputs: + - batch_plan +- name: When OR clauses contain overlapping conditions, we can merge serveral scan_range and pushdown. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id <= 30) or (user_id >= 30 and user_id < 40) or (user_id = 15); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id <= 30) or (user_id >= 30 and user_id < 40) or (user_id = 15); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 30) or (user_id > 5 and user_id < 15); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 30) or (user_id > 5 and user_id < 15); + expected_outputs: + - batch_plan +- name: When OR clauses contain overlapping conditions, we cannot push down if it results in a full table scan. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 20) or (user_id > 10); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 20) or (user_id > 10); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 20) or (user_id != 10); + expected_outputs: + - batch_plan +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 20) or (user_id != 10); + expected_outputs: + - batch_plan diff --git a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml index ed8b4b863fe64..e175558038c81 100644 --- a/src/frontend/planner_test/tests/testdata/output/range_scan.yaml +++ b/src/frontend/planner_test/tests/testdata/output/range_scan.yaml @@ -7,6 +7,8 @@ date INTEGER); CREATE MATERIALIZED VIEW orders_count_by_user AS SELECT user_id, date, count(*) AS orders_count FROM orders GROUP BY user_id, date; + CREATE MATERIALIZED VIEW orders_count_by_user_desc AS + SELECT user_id, date, count(*) AS orders_count FROM orders GROUP BY user_id, date ORDER BY user_id DESC; - before: - create_table_and_mv sql: | @@ -503,3 +505,134 @@ └─LogicalFilter { predicate: (t.k = 2147483648:Int64) } └─LogicalScan { table: t, columns: [t.k, t.v, t._rw_timestamp] } batch_plan: 'BatchValues { rows: [] }' +- name: When OR clauses contain non-overlapping conditions,, we can pushdown serveral scan_range. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 20); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id > 20:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id > Int64(20)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 20); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user_desc.user_id < 10:Int32) OR (orders_count_by_user_desc.user_id > 20:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id > Int64(20)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id >= 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id >= 10:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id >= Int64(10)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id >= 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user_desc.user_id < 10:Int32) OR (orders_count_by_user_desc.user_id >= 10:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id >= Int64(10)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 20) or (user_id = 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: (((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id > 20:Int32)) OR (orders_count_by_user.user_id = 15:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id = Int64(15), orders_count_by_user.user_id > Int64(20)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 20) or (user_id = 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: (((orders_count_by_user_desc.user_id < 10:Int32) OR (orders_count_by_user_desc.user_id > 20:Int32)) OR (orders_count_by_user_desc.user_id = 15:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id = Int64(15), orders_count_by_user_desc.user_id > Int64(20)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id < 30) or (user_id >= 30 and user_id < 40); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((((orders_count_by_user.user_id < 10:Int32) AND (orders_count_by_user.user_id > 1:Int32)) OR ((orders_count_by_user.user_id > 20:Int32) AND (orders_count_by_user.user_id < 30:Int32))) OR ((orders_count_by_user.user_id >= 30:Int32) AND (orders_count_by_user.user_id < 40:Int32))) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id > Int64(1) AND orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id > Int64(20) AND orders_count_by_user.user_id < Int64(30), orders_count_by_user.user_id >= Int64(30) AND orders_count_by_user.user_id < Int64(40)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id < 30) or (user_id >= 30 and user_id < 40); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((((orders_count_by_user_desc.user_id < 10:Int32) AND (orders_count_by_user_desc.user_id > 1:Int32)) OR ((orders_count_by_user_desc.user_id > 20:Int32) AND (orders_count_by_user_desc.user_id < 30:Int32))) OR ((orders_count_by_user_desc.user_id >= 30:Int32) AND (orders_count_by_user_desc.user_id < 40:Int32))) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id > Int64(1) AND orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id > Int64(20) AND orders_count_by_user_desc.user_id < Int64(30), orders_count_by_user_desc.user_id >= Int64(30) AND orders_count_by_user_desc.user_id < Int64(40)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } +- name: When OR clauses contain overlapping conditions, we can merge serveral scan_range and pushdown. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id <= 30) or (user_id >= 30 and user_id < 40) or (user_id = 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((((orders_count_by_user.user_id < 10:Int32) AND (orders_count_by_user.user_id > 1:Int32)) OR ((orders_count_by_user.user_id > 20:Int32) AND (orders_count_by_user.user_id <= 30:Int32))) OR (((orders_count_by_user.user_id >= 30:Int32) AND (orders_count_by_user.user_id < 40:Int32)) OR (orders_count_by_user.user_id = 15:Int32))) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id > Int64(1) AND orders_count_by_user.user_id < Int64(10), orders_count_by_user.user_id = Int64(15), orders_count_by_user.user_id >= Int64(20) AND orders_count_by_user.user_id <= Int64(40)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10 and user_id > 1) or (user_id > 20 and user_id <= 30) or (user_id >= 30 and user_id < 40) or (user_id = 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((((orders_count_by_user_desc.user_id < 10:Int32) AND (orders_count_by_user_desc.user_id > 1:Int32)) OR ((orders_count_by_user_desc.user_id > 20:Int32) AND (orders_count_by_user_desc.user_id <= 30:Int32))) OR (((orders_count_by_user_desc.user_id >= 30:Int32) AND (orders_count_by_user_desc.user_id < 40:Int32)) OR (orders_count_by_user_desc.user_id = 15:Int32))) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id > Int64(1) AND orders_count_by_user_desc.user_id < Int64(10), orders_count_by_user_desc.user_id = Int64(15), orders_count_by_user_desc.user_id >= Int64(20) AND orders_count_by_user_desc.user_id <= Int64(40)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 10) or (user_id > 30) or (user_id > 5 and user_id < 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: (((orders_count_by_user.user_id < 10:Int32) OR (orders_count_by_user.user_id > 30:Int32)) OR ((orders_count_by_user.user_id > 5:Int32) AND (orders_count_by_user.user_id < 15:Int32))) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], scan_ranges: [orders_count_by_user.user_id <= Int64(15), orders_count_by_user.user_id > Int64(30)], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 10) or (user_id > 30) or (user_id > 5 and user_id < 15); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: (((orders_count_by_user_desc.user_id < 10:Int32) OR (orders_count_by_user_desc.user_id > 30:Int32)) OR ((orders_count_by_user_desc.user_id > 5:Int32) AND (orders_count_by_user_desc.user_id < 15:Int32))) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], scan_ranges: [orders_count_by_user_desc.user_id <= Int64(15), orders_count_by_user_desc.user_id > Int64(30)], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } +- name: When OR clauses contain overlapping conditions, we cannot push down if it results in a full table scan. + before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 20) or (user_id > 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user.user_id < 20:Int32) OR (orders_count_by_user.user_id > 10:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 20) or (user_id > 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user_desc.user_id < 20:Int32) OR (orders_count_by_user_desc.user_id > 10:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user WHERE (user_id < 20) or (user_id != 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user.user_id < 20:Int32) OR (orders_count_by_user.user_id <> 10:Int32)) } + └─BatchScan { table: orders_count_by_user, columns: [orders_count_by_user.user_id, orders_count_by_user.date, orders_count_by_user.orders_count], distribution: UpstreamHashShard(orders_count_by_user.user_id, orders_count_by_user.date) } +- before: + - create_table_and_mv + sql: | + SELECT * FROM orders_count_by_user_desc WHERE (user_id < 20) or (user_id != 10); + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchFilter { predicate: ((orders_count_by_user_desc.user_id < 20:Int32) OR (orders_count_by_user_desc.user_id <> 10:Int32)) } + └─BatchScan { table: orders_count_by_user_desc, columns: [orders_count_by_user_desc.user_id, orders_count_by_user_desc.date, orders_count_by_user_desc.orders_count], distribution: UpstreamHashShard(orders_count_by_user_desc.user_id, orders_count_by_user_desc.date) } diff --git a/src/frontend/src/utils/condition.rs b/src/frontend/src/utils/condition.rs index c3f8327f39c2e..fb5fd07c1fc07 100644 --- a/src/frontend/src/utils/condition.rs +++ b/src/frontend/src/utils/condition.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Ordering; use std::collections::{BTreeMap, HashSet}; use std::fmt::{self, Debug}; use std::ops::Bound; @@ -24,6 +25,7 @@ use risingwave_common::catalog::{Schema, TableDesc}; use risingwave_common::types::{DataType, DefaultOrd, ScalarImpl}; use risingwave_common::util::iter_util::ZipEqFast; use risingwave_common::util::scan_range::{is_full_range, ScanRange}; +use risingwave_common::util::sort_util::{cmp_rows, OrderType}; use crate::error::Result; use crate::expr::{ @@ -299,7 +301,7 @@ impl Condition { table_desc: Rc, max_split_range_gap: u64, disjunctions: Vec, - ) -> Result, Self)>> { + ) -> Result, bool)>> { let disjunctions_result: Result, Self)>> = disjunctions .into_iter() .map(|x| { @@ -352,9 +354,154 @@ impl Condition { } } - Ok(Some((non_overlap_scan_ranges, Condition::true_cond()))) + Ok(Some((non_overlap_scan_ranges, false))) } else { - Ok(None) + let mut scan_ranges = vec![]; + for (scan_ranges_chunk, _) in disjunctions_result { + if scan_ranges_chunk.is_empty() { + // full scan range + return Ok(None); + } + + scan_ranges.extend(scan_ranges_chunk); + } + + let order_types = table_desc + .pk + .iter() + .cloned() + .map(|x| { + if x.order_type.is_descending() { + x.order_type.reverse() + } else { + x.order_type + } + }) + .collect_vec(); + scan_ranges.sort_by(|left, right| { + let (left_start, _left_end) = &left.convert_to_range(); + let (right_start, _right_end) = &right.convert_to_range(); + + let left_start_vec = match &left_start { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + let right_start_vec = match &right_start { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + _ => &vec![], + }; + + if left_start_vec.is_empty() && right_start_vec.is_empty() { + return Ordering::Less; + } + + if left_start_vec.is_empty() { + return Ordering::Less; + } + + if right_start_vec.is_empty() { + return Ordering::Greater; + } + + let cmp_column_len = left_start_vec.len().min(right_start_vec.len()); + cmp_rows( + &left_start_vec[0..cmp_column_len], + &right_start_vec[0..cmp_column_len], + &order_types[0..cmp_column_len], + ) + }); + + if scan_ranges.is_empty() { + return Ok(None); + } + + if scan_ranges.len() == 1 { + return Ok(Some((scan_ranges, true))); + } + + let mut output_scan_ranges: Vec = vec![]; + output_scan_ranges.push(scan_ranges[0].clone()); + let mut idx = 1; + loop { + if idx >= scan_ranges.len() { + break; + } + + let scan_range_left = output_scan_ranges.last_mut().unwrap(); + let scan_range_right = &scan_ranges[idx]; + + if scan_range_left.eq_conds == scan_range_right.eq_conds { + // range merge + + if !ScanRange::is_overlap(scan_range_left, scan_range_right, &order_types) { + // not merge + output_scan_ranges.push(scan_range_right.clone()); + idx += 1; + continue; + } + + // merge range + fn merge_bound( + left_scan_range: &Bound>>, + right_scan_range: &Bound>>, + order_types: &[OrderType], + left_bound: bool, + ) -> Bound>> { + let left_scan_range = match left_scan_range { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + Bound::Unbounded => return Bound::Unbounded, + }; + + let right_scan_range = match right_scan_range { + Bound::Included(vec) | Bound::Excluded(vec) => vec, + Bound::Unbounded => return Bound::Unbounded, + }; + + let cmp_len = left_scan_range.len().min(right_scan_range.len()); + + let cmp = cmp_rows( + &left_scan_range[..cmp_len], + &right_scan_range[..cmp_len], + &order_types[..cmp_len], + ); + + let bound = { + if (cmp.is_le() && left_bound) || (cmp.is_ge() && !left_bound) { + left_scan_range.to_vec() + } else { + right_scan_range.to_vec() + } + }; + + // Included Bound just for convenience, the correctness will be guaranteed by the upper level filter. + Bound::Included(bound) + } + + scan_range_left.range.0 = merge_bound( + &scan_range_left.range.0, + &scan_range_right.range.0, + &order_types, + true, + ); + + scan_range_left.range.1 = merge_bound( + &scan_range_left.range.1, + &scan_range_right.range.1, + &order_types, + false, + ); + + if scan_range_left.is_full_table_scan() { + return Ok(None); + } + } else { + output_scan_ranges.push(scan_range_right.clone()); + } + + idx += 1; + } + + Ok(Some((output_scan_ranges, true))) } } @@ -497,12 +644,18 @@ impl Condition { // It's an OR. if self.conjunctions.len() == 1 { if let Some(disjunctions) = self.conjunctions[0].as_or_disjunctions() { - if let Some((scan_ranges, other_condition)) = Self::disjunctions_to_scan_ranges( - table_desc, - max_split_range_gap, - disjunctions, - )? { - return Ok((scan_ranges, other_condition)); + if let Some((scan_ranges, maintaining_condition)) = + Self::disjunctions_to_scan_ranges( + table_desc, + max_split_range_gap, + disjunctions, + )? + { + if maintaining_condition { + return Ok((scan_ranges, self)); + } else { + return Ok((scan_ranges, Condition::true_cond())); + } } else { return Ok((vec![], self)); }