From b124c560f78320ba8982698c06a07e5af6c50cd4 Mon Sep 17 00:00:00 2001 From: feiniaofeiafei Date: Thu, 19 Dec 2024 15:11:02 +0800 Subject: [PATCH 01/55] [fix](nereids) fix ExtractAndNormalizeWindowExpression bug (#45553) Problem Summary: In ExtractAndNormalizeWindowExpression, some expressions are pushed down and output by the bottom LogicalProject. If a window expression depends on these pushed-down expressions, those parts should be replaced accordingly. However, when a Literal is pushed down, it should not be used as a replacement. Example: For the window expression: last_value(c1, false) over() If the expression false is pushed down as alias1, the window expression would incorrectly be replaced as: last_value(c1, alias1) over() This PR fixes the issue by ensuring Literals are not replaced when pushed down. --- .../ExtractAndNormalizeWindowExpression.java | 3 +- .../normalize_window_nullable_agg_test.out | 4 ++ .../normalize_window_nullable_agg_test.groovy | 57 +++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java index 2cfe45230038797..a74ebe4b76b9cf3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExtractAndNormalizeWindowExpression.java @@ -27,6 +27,7 @@ import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.WindowExpression; import org.apache.doris.nereids.trees.expressions.functions.agg.NullableAggregateFunction; +import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalWindow; @@ -117,7 +118,7 @@ private Plan normalize(LogicalProject project) { // we need replace alias's child expr with corresponding alias's slot in output // so create a customNormalizeMap alias's child -> alias.toSlot to do it Map customNormalizeMap = toBePushedDown.stream() - .filter(expr -> expr instanceof Alias) + .filter(expr -> expr instanceof Alias && !(expr.child(0) instanceof Literal)) .collect(Collectors.toMap(expr -> ((Alias) expr).child(), expr -> ((Alias) expr).toSlot(), (oldExpr, newExpr) -> oldExpr)); diff --git a/regression-test/data/nereids_rules_p0/normalize_window/normalize_window_nullable_agg_test.out b/regression-test/data/nereids_rules_p0/normalize_window/normalize_window_nullable_agg_test.out index 2df25bb0d3bed27..f910333a57e1761 100644 --- a/regression-test/data/nereids_rules_p0/normalize_window/normalize_window_nullable_agg_test.out +++ b/regression-test/data/nereids_rules_p0/normalize_window/normalize_window_nullable_agg_test.out @@ -291,3 +291,7 @@ false false false +-- !fold_window -- +0 false 0 be +9999-12-31 23:59:59 false 9999-12-31 23:59:59 b + diff --git a/regression-test/suites/nereids_rules_p0/normalize_window/normalize_window_nullable_agg_test.groovy b/regression-test/suites/nereids_rules_p0/normalize_window/normalize_window_nullable_agg_test.groovy index 9617e8bb19cd957..df3fd63d7507423 100644 --- a/regression-test/suites/nereids_rules_p0/normalize_window/normalize_window_nullable_agg_test.groovy +++ b/regression-test/suites/nereids_rules_p0/normalize_window/normalize_window_nullable_agg_test.groovy @@ -72,4 +72,61 @@ suite("normalize_window_nullable_agg") { sql "select group_concat(xwho order by xwhat) over(partition by xwhen) from windowfunnel_test_normalize_window;" exception "order by is not supported" } + + sql "set enable_fold_constant_by_be = 1;" + sql "drop table if exists fold_window1" + sql """create table fold_window1 ( + pk int, + col_char_255__undef_signed char(255) null , + col_char_100__undef_signed char(100) null , + col_varchar_255__undef_signed varchar(255) null , + col_char_255__undef_signed_not_null char(255) not null , + col_char_100__undef_signed_not_null char(100) not null , + col_varchar_255__undef_signed_not_null varchar(255) not null , + col_varchar_1000__undef_signed varchar(1000) null , + col_varchar_1000__undef_signed_not_null varchar(1000) not null , + col_varchar_1001__undef_signed varchar(1001) null , + col_varchar_1001__undef_signed_not_null varchar(1001) not null , + col_string_undef_signed string null , + col_string_undef_signed_not_null string not null + ) engine=olap + DUPLICATE KEY(pk, col_char_255__undef_signed, col_char_100__undef_signed, col_varchar_255__undef_signed) + distributed by hash(pk) buckets 10 + properties("bloom_filter_columns" = "col_char_255__undef_signed, col_char_100__undef_signed, col_varchar_255__undef_signed ", "replication_num" = "1");""" + sql """insert into fold_window1(pk,col_char_255__undef_signed,col_char_255__undef_signed_not_null,col_char_100__undef_signed + ,col_char_100__undef_signed_not_null,col_varchar_255__undef_signed,col_varchar_255__undef_signed_not_null,col_varchar_1000__undef_signed,col_varchar_1000__undef_signed_not_null + ,col_varchar_1001__undef_signed,col_varchar_1001__undef_signed_not_null,col_string_undef_signed,col_string_undef_signed_not_null) + values (0,'like','9999-12-31 23:59:59','9999-12-31 23:59:59','c','20240803','2024-08-03 13:08:30','300.343','2024-07-01','that''s','9999-12-31 23:59:59','s','b'), + (1,'be','g','f','not','20240803','20240803','2024-08-03 13:08:30','g','20240803','0','2024-07-01','be')""" + + sql "drop table if exists fold_window2" + sql """create table fold_window2 ( + pk int, + col_char_255__undef_signed char(255) null , + col_char_255__undef_signed_not_null char(255) not null , + col_char_100__undef_signed char(100) null , + col_char_100__undef_signed_not_null char(100) not null , + col_varchar_255__undef_signed varchar(255) null , + col_varchar_255__undef_signed_not_null varchar(255) not null , + col_varchar_1000__undef_signed varchar(1000) null , + col_varchar_1000__undef_signed_not_null varchar(1000) not null , + col_varchar_1001__undef_signed varchar(1001) null , + col_varchar_1001__undef_signed_not_null varchar(1001) not null , + col_string_undef_signed string null , + col_string_undef_signed_not_null string not null + ) engine=olap + DUPLICATE KEY(pk) + distributed by hash(pk) buckets 10 + properties ("bloom_filter_columns" = "col_char_255__undef_signed, col_char_100__undef_signed, col_varchar_255__undef_signed ", "replication_num" = "1");""" + sql """insert into fold_window2(pk,col_char_255__undef_signed,col_char_255__undef_signed_not_null,col_char_100__undef_signed + ,col_char_100__undef_signed_not_null,col_varchar_255__undef_signed,col_varchar_255__undef_signed_not_null,col_varchar_1000__undef_signed + ,col_varchar_1000__undef_signed_not_null,col_varchar_1001__undef_signed,col_varchar_1001__undef_signed_not_null,col_string_undef_signed,col_string_undef_signed_not_null) + values (0,'some','2024-07-01','9999-12-31 23:59:59','9999-12-31 23:59:59','9999-12-31 23:59:59','300.343','2024-07-01','1','1','2024-07-01','2024-08-03 13:08:30','2024-08-03 13:08:30');""" + + qt_fold_window """ + select initcap(col_varchar_1001__undef_signed_not_null) col_alias97650 , starts_with('ourBZbRijD', "e") AS col_alias97651 , + col_varchar_1001__undef_signed_not_null AS col_alias97652 , LAST_VALUE(col_string_undef_signed_not_null , false) over ( order by pk ) + AS col_alias97653 from fold_window1 where 'DCOFMrybqf' <> (select min ( col_char_255__undef_signed ) + from fold_window2) ORDER BY col_alias97650,col_alias97651,col_alias97652,col_alias97653 ; + """ } \ No newline at end of file From 2a7c2e3f410a715ff9c22890de06d809fa411c52 Mon Sep 17 00:00:00 2001 From: yujun Date: Thu, 19 Dec 2024 15:47:50 +0800 Subject: [PATCH 02/55] [fix](create table) fix create table fail msg (#45623) --- .../org/apache/doris/clone/TabletScheduler.java | 2 +- .../java/org/apache/doris/system/Backend.java | 3 +++ .../apache/doris/system/SystemInfoService.java | 2 +- .../apache/doris/catalog/CreateTableTest.java | 17 +++++++++++++++++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index 504e1d36a65af34..1545236aa59cd0c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -1545,7 +1545,7 @@ private RootPathLoadStatistic doChooseAvailableDestPath(TabletSchedCtx tabletCtx !allFitPathsSameMedium.isEmpty() ? allFitPathsSameMedium : allFitPathsDiffMedium; if (allFitPaths.isEmpty()) { List backendsInfo = Env.getCurrentSystemInfo().getAllClusterBackendsNoException().values().stream() - .filter(be -> be.getLocationTag() == tag) + .filter(be -> be.getLocationTag().equals(tag)) .map(Backend::getDetailsForCreateReplica) .collect(Collectors.toList()); throw new SchedException(Status.UNRECOVERABLE, String.format("unable to find dest path for new replica" diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java index 974c0e0cae13a7c..c864e1ba2ae0ba1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java @@ -28,6 +28,7 @@ import org.apache.doris.common.util.PrintableMap; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.persist.gson.GsonUtils; +import org.apache.doris.qe.SimpleScheduler; import org.apache.doris.resource.Tag; import org.apache.doris.system.HeartbeatResponse.HbStatus; import org.apache.doris.thrift.TDisk; @@ -340,6 +341,8 @@ public String getDetailsForCreateReplica() { sb.append(", isDecommissioned=true, exclude it"); } else if (isComputeNode()) { sb.append(", isComputeNode=true, exclude it"); + } else if (!Config.disable_backend_black_list && !SimpleScheduler.isAvailable(this)) { + sb.append(", is in black list, exclude it"); } else { sb.append(", hdd disks count={"); if (hddOk > 0) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java b/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java index 447dc3457aefe1d..f7359293621e3db 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java @@ -567,7 +567,7 @@ public String getDetailsForCreateReplica(ReplicaAllocation replicaAlloc) { StringBuilder sb = new StringBuilder(" Backends details: "); for (Tag tag : replicaAlloc.getAllocMap().keySet()) { sb.append("backends with tag ").append(tag).append(" is "); - sb.append(idToBackendRef.values().stream().filter(be -> be.getLocationTag() == tag) + sb.append(idToBackendRef.values().stream().filter(be -> be.getLocationTag().equals(tag)) .map(Backend::getDetailsForCreateReplica) .collect(Collectors.toList())); sb.append(", "); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableTest.java index 50e9ac40bc7d939..76acb4ad76e196c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableTest.java @@ -25,12 +25,15 @@ import org.apache.doris.common.ExceptionChecker; import org.apache.doris.common.FeConstants; import org.apache.doris.common.UserException; +import org.apache.doris.resource.Tag; import org.apache.doris.utframe.TestWithFeService; +import com.google.common.collect.Maps; import org.junit.Assert; import org.junit.jupiter.api.Test; import java.util.HashSet; +import java.util.Map; import java.util.Set; import java.util.UUID; @@ -789,6 +792,20 @@ public void testCreateTableWithForceReplica() throws DdlException { } } + @Test + public void testCreateTableDetailMsg() throws Exception { + Map allocMap = Maps.newHashMap(); + allocMap.put(Tag.create(Tag.TYPE_LOCATION, "group_a"), (short) 6); + Assert.assertEquals(" Backends details: backends with tag {\"location\" : \"group_a\"} is [], ", + Env.getCurrentSystemInfo().getDetailsForCreateReplica(new ReplicaAllocation(allocMap))); + + allocMap.clear(); + allocMap.put(Tag.create(Tag.TYPE_LOCATION, new String(Tag.VALUE_DEFAULT_TAG)), (short) 6); + String msg = Env.getCurrentSystemInfo().getDetailsForCreateReplica(new ReplicaAllocation(allocMap)); + Assert.assertTrue("msg: " + msg, msg.contains("Backends details: backends with tag {\"location\" : \"default\"} is [[backendId=") + && msg.contains("hdd disks count={ok=1,}, ssd disk count={}], [backendId=")); + } + @Test public void testCreateTableWithMinLoadReplicaNum() throws Exception { ExceptionChecker.expectThrowsNoException( From a3279a260441692cd0494c0eb8a93e5f97aa2945 Mon Sep 17 00:00:00 2001 From: Mryange Date: Thu, 19 Dec 2024 17:04:03 +0800 Subject: [PATCH 03/55] [refine](exchange) Use is_merge from FE for judgment instead of relying on the operator in BE. (#45592) ### What problem does this PR solve? Previously, determining whether the receiver is a merge exchange relied on checking if the specific operator was a sort node. However, this approach is incorrect because there are many types of sort operators: regular sort, partitioned sort, and spill sort. --- be/src/pipeline/exec/exchange_sink_operator.cpp | 10 ++-------- be/src/pipeline/exec/exchange_sink_operator.h | 4 +++- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/be/src/pipeline/exec/exchange_sink_operator.cpp b/be/src/pipeline/exec/exchange_sink_operator.cpp index e7fed76be8fa165..cc789f6e25b20bc 100644 --- a/be/src/pipeline/exec/exchange_sink_operator.cpp +++ b/be/src/pipeline/exec/exchange_sink_operator.cpp @@ -32,7 +32,6 @@ #include "pipeline/exec/operator.h" #include "pipeline/exec/sort_source_operator.h" #include "pipeline/local_exchange/local_exchange_sink_operator.h" -#include "pipeline/local_exchange/local_exchange_source_operator.h" #include "pipeline/pipeline_fragment_context.h" #include "util/runtime_profile.h" #include "util/uid_util.h" @@ -279,6 +278,7 @@ ExchangeSinkOperatorX::ExchangeSinkOperatorX( _tablet_sink_txn_id(sink.tablet_sink_txn_id), _t_tablet_sink_exprs(&sink.tablet_sink_exprs), _enable_local_merge_sort(state->enable_local_merge_sort()), + _dest_is_merge(sink.__isset.is_merge && sink.is_merge), _fragment_instance_ids(fragment_instance_ids) { DCHECK_GT(destinations.size(), 0); DCHECK(sink.output_partition.type == TPartitionType::UNPARTITIONED || @@ -571,19 +571,13 @@ std::shared_ptr ExchangeSinkOperatorX::_create_buffer( // Therefore, a shared sink buffer is used here to limit the number of concurrent RPCs. // (Note: This does not reduce the total number of RPCs.) // In a merge sort scenario, there are only n RPCs, so a shared sink buffer is not needed. -/// TODO: Modify this to let FE handle the judgment instead of BE. std::shared_ptr ExchangeSinkOperatorX::get_sink_buffer( InstanceLoId sender_ins_id) { - if (!_child) { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, - "ExchangeSinkOperatorX did not correctly set the child."); - } // When the child is SortSourceOperatorX or LocalExchangeSourceOperatorX, // it is an order-by scenario. // In this case, there is only one target instance, and no n * n RPC concurrency will occur. // Therefore, sharing a sink buffer is not necessary. - if (std::dynamic_pointer_cast(_child) || - std::dynamic_pointer_cast(_child)) { + if (_dest_is_merge) { return _create_buffer({sender_ins_id}); } if (_state->enable_shared_exchange_sink_buffer()) { diff --git a/be/src/pipeline/exec/exchange_sink_operator.h b/be/src/pipeline/exec/exchange_sink_operator.h index 85575beb9f7e475..3d6eeb4b39e94fc 100644 --- a/be/src/pipeline/exec/exchange_sink_operator.h +++ b/be/src/pipeline/exec/exchange_sink_operator.h @@ -205,7 +205,6 @@ class ExchangeSinkOperatorX final : public DataSinkOperatorX get_sink_buffer(InstanceLoId sender_ins_id); vectorized::VExprContextSPtrs& tablet_sink_expr_ctxs() { return _tablet_sink_expr_ctxs; } @@ -260,6 +259,9 @@ class ExchangeSinkOperatorX final : public DataSinkOperatorX& _fragment_instance_ids; }; From a3de177da0799b93c02a173c659800fde7f5278d Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Thu, 19 Dec 2024 17:17:28 +0800 Subject: [PATCH 04/55] [fix](cloud) Adjust rowset state check in `CloudTablet::create_transient_rowset_writer` (#45496) https://github.com/apache/doris/pull/32257 checks if the current rowset state is `BEGIN_PARTIAL_UPDATE` in `CloudTablet::create_transient_rowset_writer`. But if this is a retry calculate task, the rowset's state may have been changed to `COMMITTED` in the first try. This PR adjust this check to avoid DCHECK fails. --- be/src/cloud/cloud_tablet.cpp | 23 ++-- .../test_cloud_mow_partial_update_retry.out | 16 +++ ...test_cloud_mow_partial_update_retry.groovy | 100 ++++++++++++++++++ 3 files changed, 131 insertions(+), 8 deletions(-) create mode 100644 regression-test/data/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.out create mode 100644 regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index 93c7128756738c7..c7d3170726b2d5f 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -54,6 +54,7 @@ namespace doris { using namespace ErrorCode; static constexpr int COMPACTION_DELETE_BITMAP_LOCK_ID = -1; +static constexpr int LOAD_INITIATOR_ID = -1; CloudTablet::CloudTablet(CloudStorageEngine& engine, TabletMetaSharedPtr tablet_meta) : BaseTablet(std::move(tablet_meta)), _engine(engine) {} @@ -504,13 +505,19 @@ Result> CloudTablet::create_rowset_writer( Result> CloudTablet::create_transient_rowset_writer( const Rowset& rowset, std::shared_ptr partial_update_info, int64_t txn_expiration) { - if (rowset.rowset_meta()->rowset_state() != RowsetStatePB::BEGIN_PARTIAL_UPDATE) [[unlikely]] { - // May cause the segment files generated by the transient rowset writer unable to be - // recycled, see `CloudRowsetWriter::build` for detail. - LOG(WARNING) << "Wrong rowset state: " << rowset.rowset_meta()->rowset_state(); - DCHECK(false) << rowset.rowset_meta()->rowset_state(); + if (rowset.rowset_meta_state() != RowsetStatePB::BEGIN_PARTIAL_UPDATE && + rowset.rowset_meta_state() != RowsetStatePB::COMMITTED) [[unlikely]] { + auto msg = fmt::format( + "wrong rowset state when create_transient_rowset_writer, rowset state should be " + "BEGIN_PARTIAL_UPDATE or COMMITTED, but found {}, rowset_id={}, tablet_id={}", + RowsetStatePB_Name(rowset.rowset_meta_state()), rowset.rowset_id().to_string(), + tablet_id()); + // see `CloudRowsetWriter::build` for detail. + // if this is in a retry task, the rowset state may have been changed to RowsetStatePB::COMMITTED + // in `RowsetMeta::merge_rowset_meta()` in previous trials. + LOG(WARNING) << msg; + DCHECK(false) << msg; } - RowsetWriterContext context; context.rowset_state = PREPARED; context.segments_overlap = OVERLAPPING; @@ -719,8 +726,8 @@ Status CloudTablet::save_delete_bitmap(const TabletTxnInfo* txn_info, int64_t tx } auto ms_lock_id = lock_id == -1 ? txn_id : lock_id; - RETURN_IF_ERROR(_engine.meta_mgr().update_delete_bitmap( - *this, ms_lock_id, COMPACTION_DELETE_BITMAP_LOCK_ID, new_delete_bitmap.get())); + RETURN_IF_ERROR(_engine.meta_mgr().update_delete_bitmap(*this, ms_lock_id, LOAD_INITIATOR_ID, + new_delete_bitmap.get())); // store the delete bitmap with sentinel marks in txn_delete_bitmap_cache because if the txn is retried for some reason, // it will use the delete bitmap from txn_delete_bitmap_cache when re-calculating the delete bitmap, during which it will do diff --git a/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.out b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.out new file mode 100644 index 000000000000000..3b24419bdc6fc1c --- /dev/null +++ b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.out @@ -0,0 +1,16 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 1 1 1 +2 2 2 2 +3 3 3 2 + +-- !sql -- +1 1 888 1 +2 2 777 2 +3 3 3 2 + +-- !sql -- +1 999 888 1 +2 666 777 2 +3 3 3 2 + diff --git a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy new file mode 100644 index 000000000000000..13abaf1ffcabd3d --- /dev/null +++ b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_cloud_mow_partial_update_retry", "nonConcurrent") { + if (!isCloudMode()) { + return + } + + GetDebugPoint().clearDebugPointsForAllFEs() + GetDebugPoint().clearDebugPointsForAllBEs() + + def customFeConfig = [ + delete_bitmap_lock_expiration_seconds : 10, + calculate_delete_bitmap_task_timeout_seconds : 15, + ] + + setFeConfigTemporary(customFeConfig) { + + def table1 = "test_cloud_mow_partial_update_retry" + sql "DROP TABLE IF EXISTS ${table1} FORCE;" + sql """ CREATE TABLE IF NOT EXISTS ${table1} ( + `k1` int NOT NULL, + `c1` int, + `c2` int, + `c3` int + )UNIQUE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 1 + PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "disable_auto_compaction" = "true", + "replication_num" = "1"); """ + + sql "insert into ${table1} values(1,1,1,1);" + sql "insert into ${table1} values(2,2,2,2);" + sql "insert into ${table1} values(3,3,3,2);" + sql "sync;" + qt_sql "select * from ${table1} order by k1;" + + try { + // block the first load + GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.enable_spin_wait", [token: "token1"]) + GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block", [wait_token: "token1"]) + + // the first load + t1 = Thread.start { + sql "set enable_unique_key_partial_update=true;" + sql "sync;" + sql "insert into ${table1}(k1,c1) values(1,999),(2,666);" + } + + // wait util the first partial update load's delete bitmap update lock expired + // to ensure that the second load can take the delete bitmap update lock + // Config.delete_bitmap_lock_expiration_seconds = 10s + Thread.sleep(11 * 1000) + + // the second load + GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.enable_spin_wait", [token: "token2"]) + Thread.sleep(200) + + sql "set enable_unique_key_partial_update=true;" + sql "sync;" + sql "insert into ${table1}(k1,c2) values(1,888),(2,777);" + + qt_sql "select * from ${table1} order by k1;" + + + // keep waiting util the delete bitmap calculation timeout(Config.calculate_delete_bitmap_task_timeout_seconds = 15s) + // and the first load will retry the calculation of delete bitmap + Thread.sleep(15 * 1000) + + // let the first partial update load finish + GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block") + t1.join() + + Thread.sleep(1000) + + qt_sql "select * from ${table1} order by k1;" + + } catch(Exception e) { + logger.info(e.getMessage()) + throw e + } finally { + GetDebugPoint().clearDebugPointsForAllBEs() + } + } +} From 0e2de34bdc8e55382526660cdc9d1e8eed9b8033 Mon Sep 17 00:00:00 2001 From: Sridhar R Manikarnike Date: Thu, 19 Dec 2024 15:00:55 +0530 Subject: [PATCH 05/55] [Enhancement] (nereids)implement DropCatalogCommand in nereids (#45372) Issue Number: close #42613 --- .../org/apache/doris/nereids/DorisParser.g4 | 2 +- .../apache/doris/datasource/CatalogMgr.java | 22 ++++-- .../nereids/parser/LogicalPlanBuilder.java | 9 +++ .../doris/nereids/trees/plans/PlanType.java | 1 + .../plans/commands/DropCatalogCommand.java | 77 +++++++++++++++++++ .../trees/plans/visitor/CommandVisitor.java | 5 ++ .../nereids_p0/test_drop_catalog_command.out | 4 + .../test_drop_catalog_command.groovy | 43 +++++++++++ 8 files changed, 155 insertions(+), 8 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropCatalogCommand.java create mode 100644 regression-test/data/nereids_p0/test_drop_catalog_command.out create mode 100644 regression-test/suites/nereids_p0/test_drop_catalog_command.groovy diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 0cd32f3820fda47..b2e3eca37e006b6 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -220,6 +220,7 @@ supportedDropStatement | DROP SQL_BLOCK_RULE (IF EXISTS)? identifierSeq #dropSqlBlockRule | DROP USER (IF EXISTS)? userIdentify #dropUser | DROP WORKLOAD GROUP (IF EXISTS)? name=identifierOrText #dropWorkloadGroup + | DROP CATALOG (IF EXISTS)? name=identifier #dropCatalog | DROP FILE name=STRING_LITERAL ((FROM | IN) database=identifier)? properties=propertyClause #dropFile | DROP WORKLOAD POLICY (IF EXISTS)? name=identifierOrText #dropWorkloadPolicy @@ -689,7 +690,6 @@ fromRollup unsupportedDropStatement : DROP (DATABASE | SCHEMA) (IF EXISTS)? name=multipartIdentifier FORCE? #dropDatabase - | DROP CATALOG (IF EXISTS)? name=identifier #dropCatalog | DROP (GLOBAL | SESSION | LOCAL)? FUNCTION (IF EXISTS)? functionIdentifier LEFT_PAREN functionArguments? RIGHT_PAREN #dropFunction | DROP TABLE (IF EXISTS)? name=multipartIdentifier FORCE? #dropTable diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java index 0203aa7020b090e..f90a2a32fdc3ec6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogMgr.java @@ -275,23 +275,24 @@ public void createCatalog(CreateCatalogStmt stmt) throws UserException { /** * Remove the catalog instance by name and write the meta log. */ - public void dropCatalog(DropCatalogStmt stmt) throws UserException { + public void dropCatalog(String catalogName, boolean ifExists) throws UserException { writeLock(); try { - if (stmt.isSetIfExists() && !nameToCatalog.containsKey(stmt.getCatalogName())) { - LOG.warn("Non catalog {} is found.", stmt.getCatalogName()); + if (ifExists && !nameToCatalog.containsKey(catalogName)) { + LOG.warn("Non catalog {} is found.", catalogName); return; } - CatalogIf> catalog = nameToCatalog.get(stmt.getCatalogName()); + CatalogIf> catalog = nameToCatalog.get(catalogName); if (catalog == null) { - throw new DdlException("No catalog found with name: " + stmt.getCatalogName()); + throw new DdlException("No catalog found with name: " + catalogName); } - CatalogLog log = CatalogFactory.createCatalogLog(catalog.getId(), stmt); + CatalogLog log = new CatalogLog(); + log.setCatalogId(catalog.getId()); replayDropCatalog(log); Env.getCurrentEnv().getEditLog().logCatalogLog(OperationType.OP_DROP_CATALOG, log); if (ConnectContext.get() != null) { - ConnectContext.get().removeLastDBOfCatalog(stmt.getCatalogName()); + ConnectContext.get().removeLastDBOfCatalog(catalogName); } Env.getCurrentEnv().getQueryStats().clear(catalog.getId()); } finally { @@ -299,6 +300,13 @@ public void dropCatalog(DropCatalogStmt stmt) throws UserException { } } + /** + * Remove the catalog instance by name and write the meta log. + */ + public void dropCatalog(DropCatalogStmt stmt) throws UserException { + dropCatalog(stmt.getCatalogName(), stmt.isSetIfExists()); + } + /** * Modify the catalog name into a new one and write the meta log. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 68226f156c7f255..3b570fff8e75841 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -120,6 +120,7 @@ import org.apache.doris.nereids.DorisParser.DecimalLiteralContext; import org.apache.doris.nereids.DorisParser.DeleteContext; import org.apache.doris.nereids.DorisParser.DereferenceContext; +import org.apache.doris.nereids.DorisParser.DropCatalogContext; import org.apache.doris.nereids.DorisParser.DropCatalogRecycleBinContext; import org.apache.doris.nereids.DorisParser.DropConstraintContext; import org.apache.doris.nereids.DorisParser.DropEncryptkeyContext; @@ -526,6 +527,7 @@ import org.apache.doris.nereids.trees.plans.commands.CreateWorkloadGroupCommand; import org.apache.doris.nereids.trees.plans.commands.DeleteFromCommand; import org.apache.doris.nereids.trees.plans.commands.DeleteFromUsingCommand; +import org.apache.doris.nereids.trees.plans.commands.DropCatalogCommand; import org.apache.doris.nereids.trees.plans.commands.DropCatalogRecycleBinCommand; import org.apache.doris.nereids.trees.plans.commands.DropCatalogRecycleBinCommand.IdType; import org.apache.doris.nereids.trees.plans.commands.DropConstraintCommand; @@ -4969,6 +4971,13 @@ public LogicalPlan visitDropRole(DropRoleContext ctx) { return new DropRoleCommand(ctx.name.getText(), ctx.EXISTS() != null); } + @Override + public LogicalPlan visitDropCatalog(DropCatalogContext ctx) { + String catalogName = stripQuotes(ctx.name.getText()); + boolean ifExists = ctx.EXISTS() != null; + return new DropCatalogCommand(catalogName, ifExists); + } + @Override public LogicalPlan visitCreateEncryptkey(CreateEncryptkeyContext ctx) { List nameParts = visitMultipartIdentifier(ctx.multipartIdentifier()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java index 6395f429db29de0..f58a6bf139d2fee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java @@ -149,6 +149,7 @@ public enum PlanType { CREATE_JOB_COMMAND, PAUSE_JOB_COMMAND, CANCEL_JOB_COMMAND, + DROP_CATALOG_COMMAND, DROP_JOB_COMMAND, RESUME_JOB_COMMAND, ALTER_MTMV_COMMAND, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropCatalogCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropCatalogCommand.java new file mode 100644 index 000000000000000..034ecb1053a5f60 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropCatalogCommand.java @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.commands; + +import org.apache.doris.catalog.Env; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.ErrorCode; +import org.apache.doris.common.ErrorReport; +import org.apache.doris.common.util.Util; +import org.apache.doris.datasource.InternalCatalog; +import org.apache.doris.mysql.privilege.PrivPredicate; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.qe.StmtExecutor; + +import java.util.Objects; + +/** + * Command for DROP CATALOG. + */ +public class DropCatalogCommand extends DropCommand { + private final String catalogName; + private final boolean ifExists; + + public DropCatalogCommand(String catalogName, boolean ifExists) { + super(PlanType.DROP_CATALOG_COMMAND); + this.catalogName = Objects.requireNonNull(catalogName, "Catalog name cannot be null"); + this.ifExists = ifExists; + } + + @Override + public void doRun(ConnectContext ctx, StmtExecutor executor) throws Exception { + // Validate the catalog name + Util.checkCatalogAllRules(catalogName); + + if (catalogName.equals(InternalCatalog.INTERNAL_CATALOG_NAME)) { + throw new AnalysisException("Internal catalog can't be drop."); + } + + if (!Env.getCurrentEnv().getAccessManager().checkCtlPriv( + ConnectContext.get(), catalogName, PrivPredicate.DROP)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_CATALOG_ACCESS_DENIED, + ConnectContext.get().getQualifiedUser(), catalogName); + } + + Env.getCurrentEnv().getCatalogMgr().dropCatalog(catalogName, ifExists); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitDropCatalogCommand(this, context); + } + + public String getCatalogName() { + return catalogName; + } + + public boolean isIfExists() { + return ifExists; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java index a9340894c33590f..9c2839b3784093c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java @@ -56,6 +56,7 @@ import org.apache.doris.nereids.trees.plans.commands.CreateWorkloadGroupCommand; import org.apache.doris.nereids.trees.plans.commands.DeleteFromCommand; import org.apache.doris.nereids.trees.plans.commands.DeleteFromUsingCommand; +import org.apache.doris.nereids.trees.plans.commands.DropCatalogCommand; import org.apache.doris.nereids.trees.plans.commands.DropCatalogRecycleBinCommand; import org.apache.doris.nereids.trees.plans.commands.DropConstraintCommand; import org.apache.doris.nereids.trees.plans.commands.DropEncryptkeyCommand; @@ -345,6 +346,10 @@ default R visitAlterViewCommand(AlterViewCommand alterViewCommand, C context) { return visitCommand(alterViewCommand, context); } + default R visitDropCatalogCommand(DropCatalogCommand dropCatalogCommand, C context) { + return visitCommand(dropCatalogCommand, context); + } + default R visitAlterCatalogCommentCommand(AlterCatalogCommentCommand alterCatalogCommentCommand, C context) { return visitCommand(alterCatalogCommentCommand, context); } diff --git a/regression-test/data/nereids_p0/test_drop_catalog_command.out b/regression-test/data/nereids_p0/test_drop_catalog_command.out new file mode 100644 index 000000000000000..ddb84e0eb356f7f --- /dev/null +++ b/regression-test/data/nereids_p0/test_drop_catalog_command.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !cmd -- +test_drop_catalog \nCREATE CATALOG `test_drop_catalog` PROPERTIES (\n"type" = "es",\n"hosts" = "http://127.0.0.1:9200"\n); + diff --git a/regression-test/suites/nereids_p0/test_drop_catalog_command.groovy b/regression-test/suites/nereids_p0/test_drop_catalog_command.groovy new file mode 100644 index 000000000000000..a936e52208ea44f --- /dev/null +++ b/regression-test/suites/nereids_p0/test_drop_catalog_command.groovy @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_drop_catalog_command", "nereids_p0") { + def catalogName = "test_drop_catalog" + def catalogProperties = "\"type\"=\"es\", \"hosts\"=\"http://127.0.0.1:9200\"" + + try { + // Drop catalog if it already exists + checkNereidsExecute("DROP CATALOG IF EXISTS ${catalogName}") + + // Create a new catalog + sql(""" + CREATE CATALOG ${catalogName} + PROPERTIES (${catalogProperties}) + """) + + // Verify the catalog was created + checkNereidsExecute("""SHOW CREATE CATALOG ${catalogName}""") + qt_cmd("""SHOW CREATE CATALOG ${catalogName}""") + + // Drop the catalog + checkNereidsExecute("DROP CATALOG ${catalogName}") + } finally { + // Ensure cleanup + checkNereidsExecute("DROP CATALOG IF EXISTS ${catalogName}") + } +} + From a9de07b9ab2226b578821190e846ca4212fc7879 Mon Sep 17 00:00:00 2001 From: Sridhar R Manikarnike Date: Thu, 19 Dec 2024 15:01:22 +0530 Subject: [PATCH 06/55] [Enhancement] (nereids)implement showDataTypesCommand in nereids (#44299) Issue Number: close #42743 --- .../org/apache/doris/nereids/DorisParser.g4 | 2 +- .../nereids/parser/LogicalPlanBuilder.java | 7 ++ .../doris/nereids/trees/plans/PlanType.java | 1 + .../plans/commands/ShowDataTypesCommand.java | 102 ++++++++++++++++++ .../trees/plans/visitor/CommandVisitor.java | 5 + .../nereids_p0/show/test_show_data_types.out | 31 ++++++ .../show/test_show_data_types.groovy | 29 +++++ 7 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowDataTypesCommand.java create mode 100644 regression-test/data/nereids_p0/show/test_show_data_types.out create mode 100644 regression-test/suites/nereids_p0/show/test_show_data_types.groovy diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index b2e3eca37e006b6..93bf6050970930b 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -262,6 +262,7 @@ supportedShowStatement | SHOW COLLATION wildWhere? #showCollation | SHOW SQL_BLOCK_RULE (FOR ruleName=identifier)? #showSqlBlockRule | SHOW CREATE VIEW name=multipartIdentifier #showCreateView + | SHOW DATA TYPES #showDataTypes | SHOW CREATE MATERIALIZED VIEW mvName=identifier ON tableName=multipartIdentifier #showCreateMaterializedView | SHOW (WARNINGS | ERRORS) limitClause? #showWarningErrors @@ -330,7 +331,6 @@ unsupportedShowStatement LEFT_PAREN functionArguments? RIGHT_PAREN ((FROM | IN) database=multipartIdentifier)? #showCreateFunction | SHOW (DATABASES | SCHEMAS) (FROM catalog=identifier)? wildWhere? #showDatabases - | SHOW DATA TYPES #showDataTypes | SHOW CATALOGS wildWhere? #showCatalogs | SHOW CATALOG name=identifier #showCatalog | SHOW FULL? (COLUMNS | FIELDS) (FROM | IN) tableName=multipartIdentifier diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 3b570fff8e75841..34f760ff4f524e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -257,6 +257,7 @@ import org.apache.doris.nereids.DorisParser.ShowCreateTableContext; import org.apache.doris.nereids.DorisParser.ShowCreateViewContext; import org.apache.doris.nereids.DorisParser.ShowDataSkewContext; +import org.apache.doris.nereids.DorisParser.ShowDataTypesContext; import org.apache.doris.nereids.DorisParser.ShowDatabaseIdContext; import org.apache.doris.nereids.DorisParser.ShowDeleteContext; import org.apache.doris.nereids.DorisParser.ShowDiagnoseTabletContext; @@ -575,6 +576,7 @@ import org.apache.doris.nereids.trees.plans.commands.ShowCreateTableCommand; import org.apache.doris.nereids.trees.plans.commands.ShowCreateViewCommand; import org.apache.doris.nereids.trees.plans.commands.ShowDataSkewCommand; +import org.apache.doris.nereids.trees.plans.commands.ShowDataTypesCommand; import org.apache.doris.nereids.trees.plans.commands.ShowDatabaseIdCommand; import org.apache.doris.nereids.trees.plans.commands.ShowDeleteCommand; import org.apache.doris.nereids.trees.plans.commands.ShowDiagnoseTabletCommand; @@ -4498,6 +4500,11 @@ public LogicalPlan visitShowLoadProfile(ShowLoadProfileContext ctx) { return new ShowLoadProfileCommand(ctx.loadIdPath.getText()); } + @Override + public LogicalPlan visitShowDataTypes(ShowDataTypesContext ctx) { + return new ShowDataTypesCommand(); + } + @Override public LogicalPlan visitShowGrants(ShowGrantsContext ctx) { boolean all = (ctx.ALL() != null) ? true : false; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java index f58a6bf139d2fee..8eeac54a853e0f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java @@ -224,6 +224,7 @@ public enum PlanType { SHOW_DYNAMIC_PARTITION_COMMAND, SHOW_ENCRYPT_KEYS_COMMAND, SHOW_EVENTS_COMMAND, + SHOW_DATA_TYPES_COMMAND, SHOW_FRONTENDS_COMMAND, SHOW_GRANTS_COMMAND, SHOW_LAST_INSERT_COMMAND, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowDataTypesCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowDataTypesCommand.java new file mode 100644 index 000000000000000..6ce9b781bd37f1b --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowDataTypesCommand.java @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.commands; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.ScalarType; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.qe.ShowResultSet; +import org.apache.doris.qe.ShowResultSetMetaData; +import org.apache.doris.qe.StmtExecutor; + +import com.google.common.collect.Lists; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +/** + * Represents the command for SHOW DATA TYPES. + */ +public class ShowDataTypesCommand extends ShowCommand { + private static final ShowResultSetMetaData META_DATA = + ShowResultSetMetaData.builder() + .addColumn(new Column("TypeName", ScalarType.createVarchar(20))) + .addColumn(new Column("Size", ScalarType.createVarchar(100))) + .build(); + + public ShowDataTypesCommand() { + super(PlanType.SHOW_DATA_TYPES_COMMAND); + } + + /** + * getTypes(). + */ + public static ArrayList getTypes() { + return PrimitiveType.getSupportedTypes(); + } + + /** + * getTypesAvailableInDdl(). + */ + public static List> getTypesAvailableInDdl() { + ArrayList supportedTypes = getTypes(); + List> rows = Lists.newArrayList(); + for (PrimitiveType type : supportedTypes) { + List row = new ArrayList<>(); + if (type.isAvailableInDdl()) { + row.add(type.toString()); + row.add(Integer.toString(type.getSlotSize())); + rows.add(row); + } + } + return rows; + } + + /** + * sortMetaData(). + */ + public void sortMetaData(List> rows) { + Collections.sort(rows, new Comparator>() { + @Override + public int compare(List row1, List row2) { + return row1.get(0).compareTo(row2.get(0)); + } + }); + } + + @Override + public ShowResultSet doRun(ConnectContext ctx, StmtExecutor executor) throws Exception { + List> rows = getTypesAvailableInDdl(); + sortMetaData(rows); + return new ShowResultSet(getMetaData(), rows); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitShowDataTypesCommand(this, context); + } + + public ShowResultSetMetaData getMetaData() { + return META_DATA; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java index 9c2839b3784093c..cce1f41e071531b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java @@ -102,6 +102,7 @@ import org.apache.doris.nereids.trees.plans.commands.ShowCreateTableCommand; import org.apache.doris.nereids.trees.plans.commands.ShowCreateViewCommand; import org.apache.doris.nereids.trees.plans.commands.ShowDataSkewCommand; +import org.apache.doris.nereids.trees.plans.commands.ShowDataTypesCommand; import org.apache.doris.nereids.trees.plans.commands.ShowDatabaseIdCommand; import org.apache.doris.nereids.trees.plans.commands.ShowDeleteCommand; import org.apache.doris.nereids.trees.plans.commands.ShowDiagnoseTabletCommand; @@ -521,6 +522,10 @@ default R visitCleanAllProfileCommand(CleanAllProfileCommand cleanAllProfileComm return visitCommand(cleanAllProfileCommand, context); } + default R visitShowDataTypesCommand(ShowDataTypesCommand showDataTypesCommand, C context) { + return visitCommand(showDataTypesCommand, context); + } + default R visitShowFrontendsCommand(ShowFrontendsCommand showFrontendsCommand, C context) { return visitCommand(showFrontendsCommand, context); } diff --git a/regression-test/data/nereids_p0/show/test_show_data_types.out b/regression-test/data/nereids_p0/show/test_show_data_types.out new file mode 100644 index 000000000000000..de1d757cbf80cdd --- /dev/null +++ b/regression-test/data/nereids_p0/show/test_show_data_types.out @@ -0,0 +1,31 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !cmd -- +AGG_STATE 16 +ARRAY 32 +BIGINT 8 +BITMAP 16 +BOOLEAN 1 +CHAR 16 +DATE 16 +DATETIME 16 +DATETIMEV2 8 +DATEV2 4 +DECIMAL128 16 +DECIMAL32 4 +DECIMAL64 8 +DECIMALV2 16 +DOUBLE 8 +FLOAT 4 +HLL 16 +INT 4 +IPV4 4 +IPV6 16 +JSON 16 +LARGEINT 16 +MAP 24 +QUANTILE_STATE 16 +SMALLINT 2 +STRING 16 +TINYINT 1 +VARCHAR 16 + diff --git a/regression-test/suites/nereids_p0/show/test_show_data_types.groovy b/regression-test/suites/nereids_p0/show/test_show_data_types.groovy new file mode 100644 index 000000000000000..4316fd5545f47fc --- /dev/null +++ b/regression-test/suites/nereids_p0/show/test_show_data_types.groovy @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_show_data_types_nereids", "query,datatype") { + try { + // Execute the SHOW DATA TYPES command and verify the output + checkNereidsExecute("SHOW DATA TYPES") + qt_cmd("SHOW DATA TYPES") + } catch (Exception e) { + // Log any exceptions that occur during testing + log.error("Failed to execute SHOW DATA TYPES command", e) + throw e + } +} + From ea6958cb40e225dd3704327964436432696497eb Mon Sep 17 00:00:00 2001 From: Sridhar R Manikarnike Date: Thu, 19 Dec 2024 15:01:40 +0530 Subject: [PATCH 07/55] [Enhancement](nereids)implement showStatusCommand in nereids (#45427) Issue Number: close #42730 --- .../org/apache/doris/nereids/DorisParser.g4 | 2 +- .../nereids/parser/LogicalPlanBuilder.java | 16 +++++ .../doris/nereids/trees/plans/PlanType.java | 1 + .../plans/commands/ShowStatusCommand.java | 61 +++++++++++++++++++ .../trees/plans/visitor/CommandVisitor.java | 5 ++ .../show/test_show_status_command.out | 7 +++ .../show/test_show_status_command.groovy | 31 ++++++++++ 7 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowStatusCommand.java create mode 100644 regression-test/data/nereids_p0/show/test_show_status_command.out create mode 100644 regression-test/suites/nereids_p0/show/test_show_status_command.groovy diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 93bf6050970930b..97876c231fec69f 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -274,6 +274,7 @@ supportedShowStatement | SHOW DATABASE databaseId=INTEGER_VALUE #showDatabaseId | SHOW TABLE tableId=INTEGER_VALUE #showTableId | SHOW TRASH (ON backend=STRING_LITERAL)? #showTrash + | SHOW (GLOBAL | SESSION | LOCAL)? STATUS #showStatus | SHOW WHITELIST #showWhitelist | SHOW TABLETS BELONG tabletIds+=INTEGER_VALUE (COMMA tabletIds+=INTEGER_VALUE)* #showTabletsBelong @@ -325,7 +326,6 @@ unsupportedShowStatement | SHOW TABLE STATUS ((FROM | IN) database=multipartIdentifier)? wildWhere? #showTableStatus | SHOW FULL? TABLES ((FROM | IN) database=multipartIdentifier)? wildWhere? #showTables | SHOW FULL? VIEWS ((FROM | IN) database=multipartIdentifier)? wildWhere? #showViews - | SHOW (GLOBAL | SESSION | LOCAL)? STATUS wildWhere? #showStatus | SHOW CREATE MATERIALIZED VIEW name=multipartIdentifier #showMaterializedView | SHOW CREATE (GLOBAL | SESSION | LOCAL)? FUNCTION functionIdentifier LEFT_PAREN functionArguments? RIGHT_PAREN diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 34f760ff4f524e8..0332123f9ff5843 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -281,6 +281,7 @@ import org.apache.doris.nereids.DorisParser.ShowRolesContext; import org.apache.doris.nereids.DorisParser.ShowSmallFilesContext; import org.apache.doris.nereids.DorisParser.ShowSqlBlockRuleContext; +import org.apache.doris.nereids.DorisParser.ShowStatusContext; import org.apache.doris.nereids.DorisParser.ShowStorageEnginesContext; import org.apache.doris.nereids.DorisParser.ShowSyncJobContext; import org.apache.doris.nereids.DorisParser.ShowTableCreationContext; @@ -599,6 +600,7 @@ import org.apache.doris.nereids.trees.plans.commands.ShowRolesCommand; import org.apache.doris.nereids.trees.plans.commands.ShowSmallFilesCommand; import org.apache.doris.nereids.trees.plans.commands.ShowSqlBlockRuleCommand; +import org.apache.doris.nereids.trees.plans.commands.ShowStatusCommand; import org.apache.doris.nereids.trees.plans.commands.ShowStorageEnginesCommand; import org.apache.doris.nereids.trees.plans.commands.ShowSyncJobCommand; import org.apache.doris.nereids.trees.plans.commands.ShowTableCreationCommand; @@ -5124,6 +5126,20 @@ public LogicalPlan visitAdminCheckTablets(AdminCheckTabletsContext ctx) { return new AdminCheckTabletsCommand(tabletIdLists, properties); } + @Override + public LogicalPlan visitShowStatus(ShowStatusContext ctx) { + String scope = null; + if (ctx.GLOBAL() != null) { + scope = "GLOBAL"; + } else if (ctx.SESSION() != null) { + scope = "SESSION"; + } else if (ctx.LOCAL() != null) { + scope = "LOCAL"; + } + + return new ShowStatusCommand(scope); + } + @Override public LogicalPlan visitShowDataSkew(ShowDataSkewContext ctx) { TableRefInfo tableRefInfo = visitBaseTableRefContext(ctx.baseTableRef()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java index 8eeac54a853e0f8..dfc129f10b0fd6d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java @@ -239,6 +239,7 @@ public enum PlanType { SHOW_REPOSITORIES_COMMAND, SHOW_ROLE_COMMAND, SHOW_SMALL_FILES_COMMAND, + SHOW_STATUS_COMMAND, SHOW_STORAGE_ENGINES_COMMAND, SHOW_SYNC_JOB_COMMAND, SHOW_TABLE_ID_COMMAND, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowStatusCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowStatusCommand.java new file mode 100644 index 000000000000000..3ae5643e068c9af --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowStatusCommand.java @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.commands; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.ScalarType; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.qe.ShowResultSet; +import org.apache.doris.qe.ShowResultSetMetaData; +import org.apache.doris.qe.StmtExecutor; + +import com.google.common.collect.Lists; + +import java.util.List; + +/** + * Command for SHOW STATUS. + */ +public class ShowStatusCommand extends ShowCommand { + private static final ShowResultSetMetaData META_DATA = + ShowResultSetMetaData.builder() + .addColumn(new Column("Variable_name", ScalarType.createVarchar(64))) + .addColumn(new Column("Value", ScalarType.createVarchar(64))) + .build(); + + private final String scope; + + public ShowStatusCommand(String scope) { + super(PlanType.SHOW_STATUS_COMMAND); + this.scope = scope; + } + + @Override + public ShowResultSet doRun(ConnectContext ctx, StmtExecutor executor) throws Exception { + List> rows = Lists.newArrayList(); + return new ShowResultSet(META_DATA, rows); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitShowStatusCommand(this, context); + } +} + diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java index cce1f41e071531b..d3749e94d57d0f0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java @@ -125,6 +125,7 @@ import org.apache.doris.nereids.trees.plans.commands.ShowRolesCommand; import org.apache.doris.nereids.trees.plans.commands.ShowSmallFilesCommand; import org.apache.doris.nereids.trees.plans.commands.ShowSqlBlockRuleCommand; +import org.apache.doris.nereids.trees.plans.commands.ShowStatusCommand; import org.apache.doris.nereids.trees.plans.commands.ShowStorageEnginesCommand; import org.apache.doris.nereids.trees.plans.commands.ShowSyncJobCommand; import org.apache.doris.nereids.trees.plans.commands.ShowTableCreationCommand; @@ -416,6 +417,10 @@ default R visitShowGrantsCommand(ShowGrantsCommand showGrantsCommand, C context) return visitCommand(showGrantsCommand, context); } + default R visitShowStatusCommand(ShowStatusCommand showStatusCommand, C context) { + return visitCommand(showStatusCommand, context); + } + default R visitShowPartitionIdCommand(ShowPartitionIdCommand showPartitionIdCommand, C context) { return visitCommand(showPartitionIdCommand, context); } diff --git a/regression-test/data/nereids_p0/show/test_show_status_command.out b/regression-test/data/nereids_p0/show/test_show_status_command.out new file mode 100644 index 000000000000000..0fbf8d052eda80f --- /dev/null +++ b/regression-test/data/nereids_p0/show/test_show_status_command.out @@ -0,0 +1,7 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !cmd -- + +-- !cmd -- + +-- !cmd -- + diff --git a/regression-test/suites/nereids_p0/show/test_show_status_command.groovy b/regression-test/suites/nereids_p0/show/test_show_status_command.groovy new file mode 100644 index 000000000000000..1b6112680571118 --- /dev/null +++ b/regression-test/suites/nereids_p0/show/test_show_status_command.groovy @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_show_status_command", "nereids_p0") { + // Verify SESSION status + checkNereidsExecute("SHOW SESSION STATUS") + qt_cmd("SHOW SESSION STATUS") + + // Verify GLOBAL status + checkNereidsExecute("SHOW GLOBAL STATUS") + qt_cmd("SHOW GLOBAL STATUS") + + // Verify default STATUS (SESSION) + checkNereidsExecute("SHOW STATUS") + qt_cmd("SHOW STATUS") +} + From 549abf4a56e047bbba8dd9c75fa5cc46e67d9ea0 Mon Sep 17 00:00:00 2001 From: Calvin Kirs Date: Thu, 19 Dec 2024 17:37:33 +0800 Subject: [PATCH 08/55] =?UTF-8?q?[Fix](Job)Fix=20redundant=20job=20schedul?= =?UTF-8?q?ing=20by=20preventing=20same=20state=20transitions=20(e.g.,=20R?= =?UTF-8?q?UNNING=20=E2=86=92=20RUNNING)=20(#45495)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? In the current job scheduling logic, invalid state transitions (e.g., RUNNING to RUNNING) are not filtered, which causes redundant scheduling during resume operations. This PR adds a check to ensure that jobs cannot transition to the same state, preventing duplicate scheduling triggers and improving state consistency. --- .../java/org/apache/doris/job/manager/JobManager.java | 6 +++++- .../suites/job_p0/test_base_insert_job.groovy | 9 ++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/manager/JobManager.java b/fe/fe-core/src/main/java/org/apache/doris/job/manager/JobManager.java index 2a957775e113b8f..ac9f15b9d67fd3c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/job/manager/JobManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/job/manager/JobManager.java @@ -201,9 +201,13 @@ public void alterJobStatus(String jobName, JobStatus jobStatus) throws JobExcept for (T a : jobMap.values()) { if (a.getJobName().equals(jobName)) { try { + if (jobStatus.equals(a.getJobStatus())) { + throw new JobException("Can't change job status to the same status"); + } alterJobStatus(a.getJobId(), jobStatus); } catch (JobException e) { - throw new JobException("unregister job error, jobName:" + jobName); + throw new JobException("Alter job status error, jobName is %s, errorMsg is %s", + jobName, e.getMessage()); } } } diff --git a/regression-test/suites/job_p0/test_base_insert_job.groovy b/regression-test/suites/job_p0/test_base_insert_job.groovy index 33ae28443b290ac..1703b355c950192 100644 --- a/regression-test/suites/job_p0/test_base_insert_job.groovy +++ b/regression-test/suites/job_p0/test_base_insert_job.groovy @@ -190,6 +190,11 @@ suite("test_base_insert_job") { // check job status and succeed task count is 1 pressJob.size() == 1 && '1' == onceJob.get(0).get(0) }) + assertThrows(Exception) { + sql """ + RESUME JOB where jobName='press' + """ + } sql """ DROP JOB IF EXISTS where jobname = 'past_start_time' @@ -299,12 +304,10 @@ suite("test_base_insert_job") { assert e.getMessage().contains("Invalid interval time unit: years") } // assert interval time unit is -1 - try { + assertThrows(Exception) { sql """ CREATE JOB test_error_starts ON SCHEDULE every -1 second comment 'test' DO insert into ${tableName} (timestamp, type, user_id) values ('2023-03-18','1','12213'); """ - } catch (Exception e) { - assert e.getMessage().contains("expecting INTEGER_VALUE") } // test keyword as job name From 40c6c61dbbe67b3c11095427b3c5e471dfee0042 Mon Sep 17 00:00:00 2001 From: 924060929 Date: Thu, 19 Dec 2024 17:39:25 +0800 Subject: [PATCH 09/55] [fix](sql cache) fix prepare statement with sql cache throw NullPointerException (#45640) fix prepare statement with sql cache throw NullPointerException: ```shell java.lang.NullPointerException: Cannot read field "originStmt" because the return value of "org.apache.doris.analysis.StatementBase.getOrigStmt()" is null ``` --- .../org/apache/doris/qe/StmtExecutor.java | 3 +- .../cache/prepare_stmt_with_sql_cache.groovy | 57 +++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 regression-test/suites/nereids_p0/cache/prepare_stmt_with_sql_cache.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index 05df53ed6796cba..5c2566225fe50a5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -1894,7 +1894,8 @@ private void handleQueryStmt() throws Exception { // TODO support arrow flight sql // NOTE: If you want to add another condition about SessionVariable, please consider whether // add to CacheAnalyzer.commonCacheCondition - if (channel != null && !isOutfileQuery && CacheAnalyzer.canUseCache(context.getSessionVariable())) { + if (channel != null && !isOutfileQuery && CacheAnalyzer.canUseCache(context.getSessionVariable()) + && parsedStmt.getOrigStmt() != null && parsedStmt.getOrigStmt().originStmt != null) { if (queryStmt instanceof QueryStmt || queryStmt instanceof LogicalPlanAdapter) { handleCacheStmt(cacheAnalyzer, channel); LOG.info("Query {} finished", DebugUtil.printId(context.queryId)); diff --git a/regression-test/suites/nereids_p0/cache/prepare_stmt_with_sql_cache.groovy b/regression-test/suites/nereids_p0/cache/prepare_stmt_with_sql_cache.groovy new file mode 100644 index 000000000000000..7819a6ca09d7198 --- /dev/null +++ b/regression-test/suites/nereids_p0/cache/prepare_stmt_with_sql_cache.groovy @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import com.mysql.cj.ServerPreparedQuery +import com.mysql.cj.jdbc.ConnectionImpl +import com.mysql.cj.jdbc.JdbcStatement +import com.mysql.cj.jdbc.ServerPreparedStatement +import com.mysql.cj.jdbc.StatementImpl +import org.apache.doris.regression.util.JdbcUtils + +import java.lang.reflect.Field +import java.sql.PreparedStatement +import java.sql.ResultSet +import java.util.concurrent.CopyOnWriteArrayList + +suite("prepare_stmt_with_sql_cache") { + + multi_sql """ + drop table if exists test_prepare_stmt_with_sql_cache; + create table test_prepare_stmt_with_sql_cache(id int) + distributed by hash(id) + properties('replication_num'='1'); + + insert into test_prepare_stmt_with_sql_cache select * from numbers('number'='100'); + """ + + def db = (sql "select database()")[0][0].toString() + + def url = getServerPrepareJdbcUrl(context.config.jdbcUrl, db) + + connect(context.config.jdbcUser, context.config.jdbcPassword, url) { + sql "set enable_sql_cache=true" + for (def i in 0..<10) { + try (PreparedStatement pstmt = prepareStatement("select * from test_prepare_stmt_with_sql_cache where id=?")) { + pstmt.setInt(1, i) + try (ResultSet rs = pstmt.executeQuery()) { + def result = JdbcUtils.toList(rs).v1 + logger.info("result: {}", result) + } + } + } + } +} From 24328d1cc2401b62a62f4d89c944a03866e4a252 Mon Sep 17 00:00:00 2001 From: morrySnow Date: Thu, 19 Dec 2024 17:48:37 +0800 Subject: [PATCH 10/55] [opt](Nereids) lock table in ascending order of table IDs (#45045) ### What problem does this PR solve? Problem Summary: Doris's table locks are fair read-write locks. If two threads acquire read locks on tables in different orders and simultaneously a third thread attempts to acquire a write lock on one of these tables, a deadlock can form between the two threads trying to acquire read locks. This PR changes the lock acquisition order for queries to follow the order of table IDs, ensuring that the lock acquisition order for tables is consistent among different threads. ### Release note Execute table locking operations in ascending order of table IDs --- .../java/org/apache/doris/catalog/Column.java | 5 +- .../java/org/apache/doris/catalog/MTMV.java | 8 +- .../org/apache/doris/catalog/OlapTable.java | 30 +-- .../org/apache/doris/catalog/TableIf.java | 134 ++++------ .../doris/common/NereidsSqlCacheManager.java | 32 ++- .../lock/MonitoredReentrantReadWriteLock.java | 13 + .../doris/common/proc/PartitionsProcDir.java | 37 ++- .../doris/common/profile/SummaryProfile.java | 15 +- .../httpv2/rest/StmtExecutionAction.java | 34 +-- .../doris/job/extensions/mtmv/MTMVTask.java | 38 ++- .../org/apache/doris/mtmv/BaseTableInfo.java | 7 + .../java/org/apache/doris/mtmv/MTMVCache.java | 30 ++- .../org/apache/doris/mtmv/MTMVJobManager.java | 11 +- .../apache/doris/mtmv/MTMVPartitionUtil.java | 5 +- .../org/apache/doris/mtmv/MTMVPlanUtil.java | 68 ++--- .../doris/mtmv/MTMVRelationManager.java | 21 +- .../apache/doris/mtmv/MTMVRewriteUtil.java | 4 +- .../apache/doris/nereids/CascadesContext.java | 239 +----------------- .../apache/doris/nereids/NereidsPlanner.java | 126 +++++---- .../apache/doris/nereids/SqlCacheContext.java | 4 + .../doris/nereids/StatementContext.java | 156 ++++++++---- .../UnboundBaseExternalTableSink.java | 6 - .../analyzer/UnboundOneRowRelation.java | 2 +- .../nereids/analyzer/UnboundRelation.java | 6 - .../nereids/analyzer/UnboundResultSink.java | 6 - .../nereids/analyzer/UnboundTVFRelation.java | 6 - .../nereids/analyzer/UnboundTableSink.java | 6 - .../doris/nereids/jobs/executor/Analyzer.java | 35 +-- .../nereids/jobs/executor/TableCollector.java | 71 ++++++ .../doris/nereids/minidump/MinidumpUtils.java | 11 +- .../nereids/parser/LogicalPlanBuilder.java | 3 +- .../apache/doris/nereids/rules/RuleType.java | 9 +- .../nereids/rules/analysis/BindRelation.java | 64 ++--- .../rules/analysis/CollectRelation.java | 228 +++++++++++++++++ .../mv/AsyncMaterializationContext.java | 5 - .../mv/InitMaterializationContextHook.java | 24 +- .../exploration/mv/MaterializedViewUtils.java | 8 +- .../plans/commands/AddConstraintCommand.java | 31 ++- .../trees/plans/commands/CommandUtils.java | 49 ---- .../plans/commands/DropConstraintCommand.java | 28 +- .../commands/ShowConstraintsCommand.java | 15 +- .../plans/commands/info/CreateMTMVInfo.java | 75 +++--- .../info/MTMVPartitionDefinition.java | 20 +- .../insert/InsertIntoTableCommand.java | 131 ++++++---- .../plans/commands/insert/InsertUtils.java | 11 +- .../trees/plans/visitor/TableCollector.java | 122 --------- .../org/apache/doris/qe/SessionVariable.java | 12 + .../org/apache/doris/qe/StmtExecutor.java | 3 + .../tablefunction/MetadataGenerator.java | 37 ++- .../rules/analysis/BindRelationTest.java | 70 +---- .../nereids/trees/plans/PlanVisitorTest.java | 163 ------------ .../doris/nereids/util/PlanChecker.java | 14 +- .../doris/nereids/util/ReadLockTest.java | 11 +- .../apache/doris/qe/OlapQueryCacheTest.java | 2 +- 54 files changed, 1083 insertions(+), 1218 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/TableCollector.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CollectRelation.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CommandUtils.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/TableCollector.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index 0ae6a4f8bdb5eb1..3ef5f680e94d15b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -1054,10 +1054,7 @@ public boolean equals(Object obj) { && isKey == other.isKey && isAllowNull == other.isAllowNull && isAutoInc == other.isAutoInc - && getDataType().equals(other.getDataType()) - && getStrLen() == other.getStrLen() - && getPrecision() == other.getPrecision() - && getScale() == other.getScale() + && Objects.equals(type, other.type) && Objects.equals(comment, other.comment) && visible == other.visible && Objects.equals(children, other.children) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java index daf1aac333d653b..19058df1eb904ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java @@ -201,7 +201,7 @@ public void addTaskResult(MTMVTask task, MTMVRelation relation, // to connection issues such as S3, so it is directly set to null if (!isReplay) { // shouldn't do this while holding mvWriteLock - mtmvCache = MTMVCache.from(this, MTMVPlanUtil.createMTMVContext(this), true); + mtmvCache = MTMVCache.from(this, MTMVPlanUtil.createMTMVContext(this), true, true); } } catch (Throwable e) { mtmvCache = null; @@ -323,7 +323,7 @@ public MTMVCache getOrGenerateCache(ConnectContext connectionContext) throws Ana MTMVCache mtmvCache; try { // Should new context with ADMIN user - mtmvCache = MTMVCache.from(this, MTMVPlanUtil.createMTMVContext(this), true); + mtmvCache = MTMVCache.from(this, MTMVPlanUtil.createMTMVContext(this), true, false); } finally { connectionContext.setThreadLocalInfo(); } @@ -362,7 +362,7 @@ public MTMVRefreshSnapshot getRefreshSnapshot() { * * @return mvPartitionName ==> mvPartitionKeyDesc */ - public Map generateMvPartitionDescs() throws AnalysisException { + public Map generateMvPartitionDescs() { Map mtmvItems = getAndCopyPartitionItems(); Map result = Maps.newHashMap(); for (Entry entry : mtmvItems.entrySet()) { @@ -392,7 +392,7 @@ public Pair>, Map> calculateDoublyPartit Map baseToMv = Maps.newHashMap(); Map> relatedPartitionDescs = MTMVPartitionUtil .generateRelatedPartitionDescs(mvPartitionInfo, mvProperties); - Map mvPartitionItems = getAndCopyPartitionItemsWithoutLock(); + Map mvPartitionItems = getAndCopyPartitionItems(); for (Entry entry : mvPartitionItems.entrySet()) { Set basePartitionNames = relatedPartitionDescs.getOrDefault(entry.getValue().toPartitionKeyDesc(), Sets.newHashSet()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 9f1f455ab354953..477f76301120d2d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -114,7 +114,6 @@ import java.util.Set; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; /** @@ -3325,33 +3324,26 @@ public PartitionType getPartitionType() { } @Override - public Map getAndCopyPartitionItems(Optional snapshot) - throws AnalysisException { + public Map getAndCopyPartitionItems(Optional snapshot) { return getAndCopyPartitionItems(); } - public Map getAndCopyPartitionItems() throws AnalysisException { - if (!tryReadLock(1, TimeUnit.MINUTES)) { - throw new AnalysisException("get table read lock timeout, database=" + getDBName() + ",table=" + getName()); - } + public Map getAndCopyPartitionItems() { + readLock(); try { - return getAndCopyPartitionItemsWithoutLock(); + Map res = Maps.newHashMap(); + for (Entry entry : getPartitionInfo().getIdToItem(false).entrySet()) { + Partition partition = idToPartition.get(entry.getKey()); + if (partition != null) { + res.put(partition.getName(), entry.getValue()); + } + } + return res; } finally { readUnlock(); } } - public Map getAndCopyPartitionItemsWithoutLock() throws AnalysisException { - Map res = Maps.newHashMap(); - for (Entry entry : getPartitionInfo().getIdToItem(false).entrySet()) { - Partition partition = idToPartition.get(entry.getKey()); - if (partition != null) { - res.put(partition.getName(), entry.getValue()); - } - } - return res; - } - @Override public List getPartitionColumns(Optional snapshot) { return getPartitionColumns(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java index 4761ac9d86db2aa..a93c0818d5c91fb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java @@ -213,7 +213,6 @@ default Map getConstraintsMapUnsafe() { } default Set getForeignKeyConstraints() { - readLock(); try { return getConstraintsMapUnsafe().values().stream() .filter(ForeignKeyConstraint.class::isInstance) @@ -221,24 +220,18 @@ default Set getForeignKeyConstraints() { .collect(ImmutableSet.toImmutableSet()); } catch (Exception ignored) { return ImmutableSet.of(); - } finally { - readUnlock(); } } default Map getConstraintsMap() { - readLock(); try { return ImmutableMap.copyOf(getConstraintsMapUnsafe()); } catch (Exception ignored) { return ImmutableMap.of(); - } finally { - readUnlock(); } } default Set getPrimaryKeyConstraints() { - readLock(); try { return getConstraintsMapUnsafe().values().stream() .filter(PrimaryKeyConstraint.class::isInstance) @@ -246,13 +239,10 @@ default Set getPrimaryKeyConstraints() { .collect(ImmutableSet.toImmutableSet()); } catch (Exception ignored) { return ImmutableSet.of(); - } finally { - readUnlock(); } } default Set getUniqueConstraints() { - readLock(); try { return getConstraintsMapUnsafe().values().stream() .filter(UniqueConstraint.class::isInstance) @@ -260,8 +250,6 @@ default Set getUniqueConstraints() { .collect(ImmutableSet.toImmutableSet()); } catch (Exception ignored) { return ImmutableSet.of(); - } finally { - readUnlock(); } } @@ -280,34 +268,24 @@ default void checkConstraintNotExistenceUnsafe(String name, Constraint primaryKe } default void addUniqueConstraint(String name, ImmutableList columns, boolean replay) { - writeLock(); - try { - Map constraintMap = getConstraintsMapUnsafe(); - UniqueConstraint uniqueConstraint = new UniqueConstraint(name, ImmutableSet.copyOf(columns)); - checkConstraintNotExistenceUnsafe(name, uniqueConstraint, constraintMap); - constraintMap.put(name, uniqueConstraint); - if (!replay) { - Env.getCurrentEnv().getEditLog().logAddConstraint( - new AlterConstraintLog(uniqueConstraint, this)); - } - } finally { - writeUnlock(); + Map constraintMap = getConstraintsMapUnsafe(); + UniqueConstraint uniqueConstraint = new UniqueConstraint(name, ImmutableSet.copyOf(columns)); + checkConstraintNotExistenceUnsafe(name, uniqueConstraint, constraintMap); + constraintMap.put(name, uniqueConstraint); + if (!replay) { + Env.getCurrentEnv().getEditLog().logAddConstraint( + new AlterConstraintLog(uniqueConstraint, this)); } } default void addPrimaryKeyConstraint(String name, ImmutableList columns, boolean replay) { - writeLock(); - try { - Map constraintMap = getConstraintsMapUnsafe(); - PrimaryKeyConstraint primaryKeyConstraint = new PrimaryKeyConstraint(name, ImmutableSet.copyOf(columns)); - checkConstraintNotExistenceUnsafe(name, primaryKeyConstraint, constraintMap); - constraintMap.put(name, primaryKeyConstraint); - if (!replay) { - Env.getCurrentEnv().getEditLog().logAddConstraint( - new AlterConstraintLog(primaryKeyConstraint, this)); - } - } finally { - writeUnlock(); + Map constraintMap = getConstraintsMapUnsafe(); + PrimaryKeyConstraint primaryKeyConstraint = new PrimaryKeyConstraint(name, ImmutableSet.copyOf(columns)); + checkConstraintNotExistenceUnsafe(name, primaryKeyConstraint, constraintMap); + constraintMap.put(name, primaryKeyConstraint); + if (!replay) { + Env.getCurrentEnv().getEditLog().logAddConstraint( + new AlterConstraintLog(primaryKeyConstraint, this)); } } @@ -326,26 +304,19 @@ default PrimaryKeyConstraint tryGetPrimaryKeyForForeignKeyUnsafe( default void addForeignConstraint(String name, ImmutableList columns, TableIf referencedTable, ImmutableList referencedColumns, boolean replay) { - writeLock(); - referencedTable.writeLock(); - try { - Map constraintMap = getConstraintsMapUnsafe(); - ForeignKeyConstraint foreignKeyConstraint = - new ForeignKeyConstraint(name, columns, referencedTable, referencedColumns); - checkConstraintNotExistenceUnsafe(name, foreignKeyConstraint, constraintMap); - PrimaryKeyConstraint requirePrimaryKeyName = new PrimaryKeyConstraint(name, - foreignKeyConstraint.getReferencedColumnNames()); - PrimaryKeyConstraint primaryKeyConstraint = - tryGetPrimaryKeyForForeignKeyUnsafe(requirePrimaryKeyName, referencedTable); - primaryKeyConstraint.addForeignTable(this); - constraintMap.put(name, foreignKeyConstraint); - if (!replay) { - Env.getCurrentEnv().getEditLog().logAddConstraint( - new AlterConstraintLog(foreignKeyConstraint, this)); - } - } finally { - referencedTable.writeUnlock(); - writeUnlock(); + Map constraintMap = getConstraintsMapUnsafe(); + ForeignKeyConstraint foreignKeyConstraint = + new ForeignKeyConstraint(name, columns, referencedTable, referencedColumns); + checkConstraintNotExistenceUnsafe(name, foreignKeyConstraint, constraintMap); + PrimaryKeyConstraint requirePrimaryKeyName = new PrimaryKeyConstraint(name, + foreignKeyConstraint.getReferencedColumnNames()); + PrimaryKeyConstraint primaryKeyConstraint = + tryGetPrimaryKeyForForeignKeyUnsafe(requirePrimaryKeyName, referencedTable); + primaryKeyConstraint.addForeignTable(this); + constraintMap.put(name, foreignKeyConstraint); + if (!replay) { + Env.getCurrentEnv().getEditLog().logAddConstraint( + new AlterConstraintLog(foreignKeyConstraint, this)); } } @@ -381,40 +352,31 @@ default void replayDropConstraint(String name) { } default void dropConstraint(String name, boolean replay) { - writeLock(); - try { - Map constraintMap = getConstraintsMapUnsafe(); - if (!constraintMap.containsKey(name)) { - throw new AnalysisException( - String.format("Unknown constraint %s on table %s.", name, this.getName())); - } - Constraint constraint = constraintMap.get(name); - constraintMap.remove(name); - if (constraint instanceof PrimaryKeyConstraint) { - ((PrimaryKeyConstraint) constraint).getForeignTables() - .forEach(t -> t.dropFKReferringPK(this, (PrimaryKeyConstraint) constraint)); - } - if (!replay) { - Env.getCurrentEnv().getEditLog().logDropConstraint(new AlterConstraintLog(constraint, this)); - } - } finally { - writeUnlock(); + Map constraintMap = getConstraintsMapUnsafe(); + if (!constraintMap.containsKey(name)) { + throw new AnalysisException( + String.format("Unknown constraint %s on table %s.", name, this.getName())); + } + Constraint constraint = constraintMap.get(name); + constraintMap.remove(name); + if (constraint instanceof PrimaryKeyConstraint) { + ((PrimaryKeyConstraint) constraint).getForeignTables() + .forEach(t -> t.dropFKReferringPK(this, (PrimaryKeyConstraint) constraint)); + } + if (!replay) { + Env.getCurrentEnv().getEditLog().logDropConstraint(new AlterConstraintLog(constraint, this)); } } default void dropFKReferringPK(TableIf table, PrimaryKeyConstraint constraint) { - writeLock(); - try { - Map constraintMap = getConstraintsMapUnsafe(); - Set fkName = constraintMap.entrySet().stream() - .filter(e -> e.getValue() instanceof ForeignKeyConstraint - && ((ForeignKeyConstraint) e.getValue()).isReferringPK(table, constraint)) - .map(Entry::getKey) - .collect(Collectors.toSet()); - fkName.forEach(constraintMap::remove); - } finally { - writeUnlock(); - } + Map constraintMap = getConstraintsMapUnsafe(); + Set fkName = constraintMap.entrySet().stream() + .filter(e -> e.getValue() instanceof ForeignKeyConstraint + && ((ForeignKeyConstraint) e.getValue()).isReferringPK(table, constraint)) + .map(Entry::getKey) + .collect(Collectors.toSet()); + fkName.forEach(constraintMap::remove); + } /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/NereidsSqlCacheManager.java b/fe/fe-core/src/main/java/org/apache/doris/common/NereidsSqlCacheManager.java index cd32b52034a5d45..86a2b875a93d685 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/NereidsSqlCacheManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/NereidsSqlCacheManager.java @@ -225,6 +225,10 @@ private Optional tryParseSqlWithoutCheckVariable( SqlCacheContext sqlCacheContext, UserIdentity currentUserIdentity) { Env env = connectContext.getEnv(); + if (!tryLockTables(connectContext, env, sqlCacheContext)) { + return invalidateCache(key); + } + // check table and view and their columns authority if (privilegeChanged(connectContext, env, sqlCacheContext)) { return invalidateCache(key); @@ -378,16 +382,38 @@ private boolean dataMaskPoliciesChanged( return false; } - private boolean privilegeChanged(ConnectContext connectContext, Env env, SqlCacheContext sqlCacheContext) { + /** + * Execute table locking operations in ascending order of table IDs. + * + * @return true if obtain all tables lock. + */ + private boolean tryLockTables(ConnectContext connectContext, Env env, SqlCacheContext sqlCacheContext) { StatementContext currentStatementContext = connectContext.getStatementContext(); + for (FullTableName fullTableName : sqlCacheContext.getUsedTables()) { + TableIf tableIf = findTableIf(env, fullTableName); + if (tableIf == null) { + return false; + } + currentStatementContext.getTables().put(fullTableName.toList(), tableIf); + } + for (FullTableName fullTableName : sqlCacheContext.getUsedViews().keySet()) { + TableIf tableIf = findTableIf(env, fullTableName); + if (tableIf == null) { + return false; + } + currentStatementContext.getTables().put(fullTableName.toList(), tableIf); + } + currentStatementContext.lock(); + return true; + } + + private boolean privilegeChanged(ConnectContext connectContext, Env env, SqlCacheContext sqlCacheContext) { for (Entry> kv : sqlCacheContext.getCheckPrivilegeTablesOrViews().entrySet()) { Set usedColumns = kv.getValue(); TableIf tableIf = findTableIf(env, kv.getKey()); if (tableIf == null) { return true; } - // release when close statementContext - currentStatementContext.addTableReadLock(tableIf); try { UserAuthentication.checkPermission(tableIf, connectContext, usedColumns); } catch (Throwable t) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/lock/MonitoredReentrantReadWriteLock.java b/fe/fe-core/src/main/java/org/apache/doris/common/lock/MonitoredReentrantReadWriteLock.java index 7a6f0db5938b23c..de825fbdb3ac237 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/lock/MonitoredReentrantReadWriteLock.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/lock/MonitoredReentrantReadWriteLock.java @@ -17,6 +17,12 @@ package org.apache.doris.common.lock; +import org.apache.doris.common.util.DebugUtil; +import org.apache.doris.qe.ConnectContext; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + import java.util.concurrent.locks.ReentrantReadWriteLock; /** @@ -24,6 +30,8 @@ * monitoring capabilities for read and write locks. */ public class MonitoredReentrantReadWriteLock extends ReentrantReadWriteLock { + + private static final Logger LOG = LogManager.getLogger(MonitoredReentrantReadWriteLock.class); // Monitored read and write lock instances private final ReadLock readLock = new ReadLock(this); private final WriteLock writeLock = new WriteLock(this); @@ -97,6 +105,11 @@ protected WriteLock(ReentrantReadWriteLock lock) { public void lock() { super.lock(); monitor.afterLock(); + if (isFair() && getReadHoldCount() > 0) { + LOG.warn(" read lock count is {}, write lock count is {}, stack is {}, query id is {}", + getReadHoldCount(), getWriteHoldCount(), Thread.currentThread().getStackTrace(), + ConnectContext.get() == null ? "" : DebugUtil.printId(ConnectContext.get().queryId())); + } } /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/PartitionsProcDir.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/PartitionsProcDir.java index 3fd945c013cd5d6..3c44874cb7deffa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/PartitionsProcDir.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/PartitionsProcDir.java @@ -35,6 +35,7 @@ import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.PartitionType; +import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.ErrorCode; @@ -44,9 +45,12 @@ import org.apache.doris.common.Pair; import org.apache.doris.common.util.DebugUtil; import org.apache.doris.common.util.ListComparator; +import org.apache.doris.common.util.MetaLockUtils; import org.apache.doris.common.util.OrderByPair; import org.apache.doris.common.util.TimeUtils; +import org.apache.doris.mtmv.BaseTableInfo; import org.apache.doris.mtmv.MTMVPartitionUtil; +import org.apache.doris.mtmv.MTMVUtil; import org.apache.doris.thrift.TCell; import org.apache.doris.thrift.TRow; @@ -59,6 +63,7 @@ import java.util.ArrayList; import java.util.Collection; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -250,22 +255,38 @@ private List, TRow>> getPartitionInfosInrernal() throws An List, TRow>> partitionInfos = new ArrayList, TRow>>(); Map> partitionsUnSyncTables = null; String mtmvPartitionSyncErrorMsg = null; + + List needLocked = Lists.newArrayList(); + needLocked.add(olapTable); if (olapTable instanceof MTMV) { - try { - partitionsUnSyncTables = MTMVPartitionUtil - .getPartitionsUnSyncTables((MTMV) olapTable); - } catch (AnalysisException e) { - mtmvPartitionSyncErrorMsg = e.getMessage(); + MTMV mtmv = (MTMV) olapTable; + for (BaseTableInfo baseTableInfo : mtmv.getRelation().getBaseTables()) { + try { + TableIf baseTable = MTMVUtil.getTable(baseTableInfo); + needLocked.add(baseTable); + } catch (Exception e) { + // do nothing, ignore not existed table + } } + needLocked.sort(Comparator.comparing(TableIf::getId)); } - olapTable.readLock(); + MetaLockUtils.readLockTables(needLocked); try { + if (olapTable instanceof MTMV) { + try { + partitionsUnSyncTables = MTMVPartitionUtil + .getPartitionsUnSyncTables((MTMV) olapTable); + } catch (AnalysisException e) { + mtmvPartitionSyncErrorMsg = e.getMessage(); + } + } List partitionIds; PartitionInfo tblPartitionInfo = olapTable.getPartitionInfo(); // for range partitions, we return partitions in ascending range order by default. // this is to be consistent with the behaviour before 0.12 - if (tblPartitionInfo.getType() == PartitionType.RANGE || tblPartitionInfo.getType() == PartitionType.LIST) { + if (tblPartitionInfo.getType() == PartitionType.RANGE + || tblPartitionInfo.getType() == PartitionType.LIST) { partitionIds = tblPartitionInfo.getPartitionItemEntryList(isTempPartition, true).stream() .map(Map.Entry::getKey).collect(Collectors.toList()); } else { @@ -402,7 +423,7 @@ private List, TRow>> getPartitionInfosInrernal() throws An partitionInfos.add(Pair.of(partitionInfo, trow)); } } finally { - olapTable.readUnlock(); + MetaLockUtils.readUnlockTables(needLocked); } return partitionInfos; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java b/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java index ecc4c9088091619..6a92e043b6eb20a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java @@ -97,6 +97,7 @@ public class SummaryProfile { public static final String GET_TABLE_VERSION_COUNT = "Get Table Version Count"; public static final String PARSE_SQL_TIME = "Parse SQL Time"; + public static final String NEREIDS_LOCK_TABLE_TIME = "Nereids Lock Table Time"; public static final String NEREIDS_ANALYSIS_TIME = "Nereids Analysis Time"; public static final String NEREIDS_REWRITE_TIME = "Nereids Rewrite Time"; public static final String NEREIDS_OPTIMIZE_TIME = "Nereids Optimize Time"; @@ -136,6 +137,7 @@ public class SummaryProfile { // The display order of execution summary items. public static final ImmutableList EXECUTION_SUMMARY_KEYS = ImmutableList.of( PARSE_SQL_TIME, + NEREIDS_LOCK_TABLE_TIME, NEREIDS_ANALYSIS_TIME, NEREIDS_REWRITE_TIME, NEREIDS_OPTIMIZE_TIME, @@ -224,6 +226,8 @@ public class SummaryProfile { private long parseSqlStartTime = -1; @SerializedName(value = "parseSqlFinishTime") private long parseSqlFinishTime = -1; + @SerializedName(value = "nereidsLockTableFinishTime") + private long nereidsLockTableFinishTime = -1; @SerializedName(value = "nereidsAnalysisFinishTime") private long nereidsAnalysisFinishTime = -1; @SerializedName(value = "nereidsRewriteFinishTime") @@ -410,6 +414,7 @@ private void updateSummaryProfile(Map infos) { private void updateExecutionSummaryProfile() { executionSummaryProfile.addInfoString(PARSE_SQL_TIME, getPrettyParseSqlTime()); + executionSummaryProfile.addInfoString(NEREIDS_LOCK_TABLE_TIME, getPrettyNereidsLockTableTime()); executionSummaryProfile.addInfoString(NEREIDS_ANALYSIS_TIME, getPrettyNereidsAnalysisTime()); executionSummaryProfile.addInfoString(NEREIDS_REWRITE_TIME, getPrettyNereidsRewriteTime()); executionSummaryProfile.addInfoString(NEREIDS_OPTIMIZE_TIME, getPrettyNereidsOptimizeTime()); @@ -506,6 +511,10 @@ public void setParseSqlFinishTime(long parseSqlFinishTime) { this.parseSqlFinishTime = parseSqlFinishTime; } + public void setNereidsLockTableFinishTime() { + this.nereidsLockTableFinishTime = TimeUtils.getStartTimeMs(); + } + public void setNereidsAnalysisTime() { this.nereidsAnalysisFinishTime = TimeUtils.getStartTimeMs(); } @@ -766,8 +775,12 @@ public String getPrettyParseSqlTime() { return getPrettyTime(parseSqlFinishTime, parseSqlStartTime, TUnit.TIME_MS); } + public String getPrettyNereidsLockTableTime() { + return getPrettyTime(nereidsLockTableFinishTime, parseSqlStartTime, TUnit.TIME_MS); + } + public String getPrettyNereidsAnalysisTime() { - return getPrettyTime(nereidsAnalysisFinishTime, queryBeginTime, TUnit.TIME_MS); + return getPrettyTime(nereidsAnalysisFinishTime, nereidsLockTableFinishTime, TUnit.TIME_MS); } public String getPrettyNereidsRewriteTime() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/StmtExecutionAction.java b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/StmtExecutionAction.java index a37d3a11f84c9cb..524c228467a194e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/StmtExecutionAction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/StmtExecutionAction.java @@ -186,22 +186,26 @@ private ResponseEntity executeQuery(ActionAuthorizationInfo authInfo, boolean is @NotNull private String getSchema(String sql) { LogicalPlan unboundMvPlan = new NereidsParser().parseSingle(sql); - StatementContext statementContext = new StatementContext(ConnectContext.get(), - new OriginStatement(sql, 0)); - NereidsPlanner planner = new NereidsPlanner(statementContext); - if (statementContext.getConnectContext().getStatementContext() == null) { - statementContext.getConnectContext().setStatementContext(statementContext); + try (StatementContext statementContext = new StatementContext(ConnectContext.get(), + new OriginStatement(sql, 0))) { + StatementContext originalContext = ConnectContext.get().getStatementContext(); + try { + ConnectContext.get().setStatementContext(statementContext); + NereidsPlanner planner = new NereidsPlanner(statementContext); + planner.planWithLock(unboundMvPlan, PhysicalProperties.ANY, ExplainCommand.ExplainLevel.ANALYZED_PLAN); + LogicalPlan logicalPlan = (LogicalPlan) planner.getCascadesContext().getRewritePlan(); + + List createStmts = PlanUtils.getLogicalScanFromRootPlan(logicalPlan).stream().map(plan -> { + TableIf tbl = plan.getTable(); + List createTableStmts = Lists.newArrayList(); + Env.getDdlStmt(tbl, createTableStmts, null, null, false, true, -1L); + return createTableStmts.get(0); + }).collect(Collectors.toList()); + return Joiner.on("\n\n").join(createStmts); + } finally { + ConnectContext.get().setStatementContext(originalContext); + } } - planner.planWithLock(unboundMvPlan, PhysicalProperties.ANY, ExplainCommand.ExplainLevel.ANALYZED_PLAN); - LogicalPlan logicalPlan = (LogicalPlan) planner.getCascadesContext().getRewritePlan(); - - List createStmts = PlanUtils.getLogicalScanFromRootPlan(logicalPlan).stream().map(plan -> { - TableIf tbl = plan.getTable(); - List createTableStmts = Lists.newArrayList(); - Env.getDdlStmt(tbl, createTableStmts, null, null, false, true, -1L); - return createTableStmts.get(0); - }).collect(Collectors.toList()); - return Joiner.on("\n\n").join(createStmts); } private static class StmtRequestBody { diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java index c1002faf4078b39..31e6c8353e24b35 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java @@ -28,6 +28,7 @@ import org.apache.doris.common.Status; import org.apache.doris.common.UserException; import org.apache.doris.common.util.DebugUtil; +import org.apache.doris.common.util.MetaLockUtils; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.datasource.mvcc.MvccSnapshot; import org.apache.doris.datasource.mvcc.MvccTable; @@ -72,6 +73,7 @@ import java.math.BigDecimal; import java.math.RoundingMode; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -180,19 +182,31 @@ public void run() throws JobException { } // Every time a task is run, the relation is regenerated because baseTables and baseViews may change, // such as deleting a table and creating a view with the same name - this.relation = MTMVPlanUtil.generateMTMVRelation(mtmv, ctx); + Set tablesInPlan = MTMVPlanUtil.getBaseTableFromQuery(mtmv.getQuerySql(), ctx); + this.relation = MTMVPlanUtil.generateMTMVRelation(tablesInPlan, ctx); beforeMTMVRefresh(); - if (mtmv.getMvPartitionInfo().getPartitionType() != MTMVPartitionType.SELF_MANAGE) { - MTMVRelatedTableIf relatedTable = mtmv.getMvPartitionInfo().getRelatedTable(); - if (!relatedTable.isValidRelatedTable()) { - throw new JobException("MTMV " + mtmv.getName() + "'s related table " + relatedTable.getName() - + " is not a valid related table anymore, stop refreshing." - + " e.g. Table has multiple partition columns or including not supported transform functions."); + List tableIfs = Lists.newArrayList(tablesInPlan); + tableIfs.sort(Comparator.comparing(TableIf::getId)); + + MTMVRefreshContext context; + // lock table order by id to avoid deadlock + MetaLockUtils.readLockTables(tableIfs); + try { + if (mtmv.getMvPartitionInfo().getPartitionType() != MTMVPartitionType.SELF_MANAGE) { + MTMVRelatedTableIf relatedTable = mtmv.getMvPartitionInfo().getRelatedTable(); + if (!relatedTable.isValidRelatedTable()) { + throw new JobException("MTMV " + mtmv.getName() + "'s related table " + relatedTable.getName() + + " is not a valid related table anymore, stop refreshing." + + " e.g. Table has multiple partition columns" + + " or including not supported transform functions."); + } + MTMVPartitionUtil.alignMvPartition(mtmv); } - MTMVPartitionUtil.alignMvPartition(mtmv); + context = MTMVRefreshContext.buildContext(mtmv); + this.needRefreshPartitions = calculateNeedRefreshPartitions(context); + } finally { + MetaLockUtils.readUnlockTables(tableIfs); } - MTMVRefreshContext context = MTMVRefreshContext.buildContext(mtmv); - this.needRefreshPartitions = calculateNeedRefreshPartitions(context); this.refreshMode = generateRefreshMode(needRefreshPartitions); if (refreshMode == MTMVTaskRefreshMode.NOT_REFRESH) { return; @@ -207,7 +221,7 @@ public void run() throws JobException { int start = i * refreshPartitionNum; int end = start + refreshPartitionNum; Set execPartitionNames = Sets.newHashSet(needRefreshPartitions - .subList(start, end > needRefreshPartitions.size() ? needRefreshPartitions.size() : end)); + .subList(start, Math.min(end, needRefreshPartitions.size()))); // need get names before exec Map execPartitionSnapshots = MTMVPartitionUtil .generatePartitionSnapshots(context, relation.getBaseTablesOneLevel(), execPartitionNames); @@ -217,7 +231,7 @@ public void run() throws JobException { } } catch (Throwable e) { if (getStatus() == TaskStatus.RUNNING) { - LOG.warn("run task failed: ", e.getMessage()); + LOG.warn("run task failed: {}", e.getMessage()); throw new JobException(e.getMessage(), e); } else { // if status is not `RUNNING`,maybe the task was canceled, therefore, it is a normal situation diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/BaseTableInfo.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/BaseTableInfo.java index fcf18d73a269bb5..076a4f4e8bb0502 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/BaseTableInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/BaseTableInfo.java @@ -26,11 +26,14 @@ import org.apache.doris.datasource.InternalCatalog; import com.google.common.base.Objects; +import com.google.common.collect.Lists; import com.google.gson.annotations.SerializedName; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.util.List; + public class BaseTableInfo { private static final Logger LOG = LogManager.getLogger(BaseTableInfo.class); @@ -167,4 +170,8 @@ public void compatible(CatalogMgr catalogMgr) { LOG.warn("MTMV compatible failed, ctlId: {}, dbId: {}, tableId: {}", ctlId, dbId, tableId, e); } } + + public List toList() { + return Lists.newArrayList(getCtlName(), getDbName(), getTableName()); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVCache.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVCache.java index d3d7f1ad6ebbf5d..b185000c14897ee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVCache.java @@ -87,23 +87,31 @@ public StructInfo getStructInfo() { return structInfo; } - public static MTMVCache from(MTMV mtmv, ConnectContext connectContext, boolean needCost) { + public static MTMVCache from(MTMV mtmv, ConnectContext connectContext, boolean needCost, boolean needLock) { StatementContext mvSqlStatementContext = new StatementContext(connectContext, new OriginStatement(mtmv.getQuerySql(), 0)); + if (needLock) { + mvSqlStatementContext.setNeedLockTables(false); + } if (mvSqlStatementContext.getConnectContext().getStatementContext() == null) { mvSqlStatementContext.getConnectContext().setStatementContext(mvSqlStatementContext); } LogicalPlan unboundMvPlan = new NereidsParser().parseSingle(mtmv.getQuerySql()); NereidsPlanner planner = new NereidsPlanner(mvSqlStatementContext); - - // Can not convert to table sink, because use the same column from different table when self join - // the out slot is wrong - if (needCost) { - // Only in mv rewrite, we need plan with eliminated cost which is used for mv chosen - planner.planWithLock(unboundMvPlan, PhysicalProperties.ANY, ExplainLevel.ALL_PLAN); - } else { - // No need cost for performance - planner.planWithLock(unboundMvPlan, PhysicalProperties.ANY, ExplainLevel.REWRITTEN_PLAN); + boolean originalRewriteFlag = connectContext.getSessionVariable().enableMaterializedViewRewrite; + connectContext.getSessionVariable().enableMaterializedViewRewrite = false; + try { + // Can not convert to table sink, because use the same column from different table when self join + // the out slot is wrong + if (needCost) { + // Only in mv rewrite, we need plan with eliminated cost which is used for mv chosen + planner.planWithLock(unboundMvPlan, PhysicalProperties.ANY, ExplainLevel.ALL_PLAN); + } else { + // No need cost for performance + planner.planWithLock(unboundMvPlan, PhysicalProperties.ANY, ExplainLevel.REWRITTEN_PLAN); + } + } finally { + connectContext.getSessionVariable().enableMaterializedViewRewrite = originalRewriteFlag; } Plan originPlan = planner.getCascadesContext().getRewritePlan(); // Eliminate result sink because sink operator is useless in query rewrite by materialized view @@ -128,6 +136,6 @@ public Plan visitLogicalResultSink(LogicalResultSink logicalResu new BitSet()); return new MTMVCache(mvPlan, originPlan, planner.getAnalyzedPlan(), needCost ? planner.getCascadesContext().getMemo().getRoot().getStatistics() : null, - structInfoOptional.orElseGet(() -> null)); + structInfoOptional.orElse(null)); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVJobManager.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVJobManager.java index 2c03ad16176feab..a9dee132f64b122 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVJobManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVJobManager.java @@ -104,17 +104,18 @@ private void setManualJobConfig(JobExecutionConfiguration jobExecutionConfigurat private void setScheduleJobConfig(JobExecutionConfiguration jobExecutionConfiguration, MTMV mtmv) { jobExecutionConfiguration.setExecuteType(JobExecuteType.RECURRING); + MTMVRefreshInfo refreshMTMVInfo = mtmv.getRefreshInfo(); TimerDefinition timerDefinition = new TimerDefinition(); timerDefinition - .setInterval(mtmv.getRefreshInfo().getRefreshTriggerInfo().getIntervalTrigger().getInterval()); + .setInterval(refreshMTMVInfo.getRefreshTriggerInfo().getIntervalTrigger().getInterval()); timerDefinition - .setIntervalUnit(mtmv.getRefreshInfo().getRefreshTriggerInfo().getIntervalTrigger().getTimeUnit()); + .setIntervalUnit(refreshMTMVInfo.getRefreshTriggerInfo().getIntervalTrigger().getTimeUnit()); if (!StringUtils - .isEmpty(mtmv.getRefreshInfo().getRefreshTriggerInfo().getIntervalTrigger().getStartTime())) { + .isEmpty(refreshMTMVInfo.getRefreshTriggerInfo().getIntervalTrigger().getStartTime())) { timerDefinition.setStartTimeMs(TimeUtils.timeStringToLong( - mtmv.getRefreshInfo().getRefreshTriggerInfo().getIntervalTrigger().getStartTime())); + refreshMTMVInfo.getRefreshTriggerInfo().getIntervalTrigger().getStartTime())); } - if (mtmv.getRefreshInfo().getBuildMode().equals(BuildMode.IMMEDIATE)) { + if (refreshMTMVInfo.getBuildMode().equals(BuildMode.IMMEDIATE)) { jobExecutionConfiguration.setImmediate(true); } jobExecutionConfiguration.setTimerDefinition(timerDefinition); diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionUtil.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionUtil.java index 9597378c488cfc7..8f715b1b0fb3a0a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionUtil.java @@ -536,11 +536,12 @@ private static Map getPartitionVersions(MTMV mtmv) throws Analysis private static Map getTableVersions(MTMV mtmv) { Map res = Maps.newHashMap(); - if (mtmv.getRelation() == null || mtmv.getRelation().getBaseTablesOneLevel() == null) { + MTMVRelation relation = mtmv.getRelation(); + if (relation == null || relation.getBaseTablesOneLevel() == null) { return res; } List olapTables = Lists.newArrayList(); - for (BaseTableInfo baseTableInfo : mtmv.getRelation().getBaseTablesOneLevel()) { + for (BaseTableInfo baseTableInfo : relation.getBaseTablesOneLevel()) { TableIf table = null; try { table = MTMVUtil.getTable(baseTableInfo); diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java index 35c06e74d3cc806..3264d6627ead5d4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java @@ -33,11 +33,8 @@ import org.apache.doris.nereids.parser.NereidsParser; import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.rules.RuleType; -import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.commands.ExplainCommand.ExplainLevel; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; -import org.apache.doris.nereids.trees.plans.visitor.TableCollector; -import org.apache.doris.nereids.trees.plans.visitor.TableCollector.TableCollectorContext; import org.apache.doris.qe.ConnectContext; import com.google.common.collect.ImmutableSet; @@ -99,41 +96,27 @@ private static void setCatalogAndDb(ConnectContext ctx, MTMV mtmv) { ctx.setDatabase(databaseIf.get().getFullName()); } - public static MTMVRelation generateMTMVRelation(MTMV mtmv, ConnectContext ctx) { - // Should not make table without data to empty relation when analyze the related table, - // so add disable rules - Plan plan = getAnalyzePlanBySql(mtmv.getQuerySql(), ctx); - return generateMTMVRelation(plan, ctx); - } - - public static MTMVRelation generateMTMVRelation(Plan plan, ConnectContext connectContext) { - return new MTMVRelation(getBaseTables(plan, true, connectContext), - getBaseTables(plan, false, connectContext), getBaseViews(plan)); - } - - private static Set getBaseTables(Plan plan, boolean expand, ConnectContext connectContext) { - TableCollectorContext collectorContext = - new TableCollector.TableCollectorContext( - com.google.common.collect.Sets - .newHashSet(TableType.values()), expand, connectContext); - plan.accept(TableCollector.INSTANCE, collectorContext); - Set collectedTables = collectorContext.getCollectedTables(); - return transferTableIfToInfo(collectedTables); - } - - private static Set getBaseViews(Plan plan) { - return Sets.newHashSet(); - } - - private static Set transferTableIfToInfo(Set tables) { - Set result = com.google.common.collect.Sets.newHashSet(); - for (TableIf table : tables) { - result.add(new BaseTableInfo(table)); + public static MTMVRelation generateMTMVRelation(Set tablesInPlan, ConnectContext ctx) { + Set oneLevelTables = Sets.newHashSet(); + Set allLevelTables = Sets.newHashSet(); + Set oneLevelViews = Sets.newHashSet(); + for (TableIf table : tablesInPlan) { + BaseTableInfo baseTableInfo = new BaseTableInfo(table); + if (table.getType() == TableType.VIEW) { + // TODO reopen it after we support mv on view + // oneLevelViews.add(baseTableInfo); + } else { + oneLevelTables.add(baseTableInfo); + allLevelTables.add(baseTableInfo); + if (table instanceof MTMV) { + allLevelTables.addAll(((MTMV) table).getRelation().getBaseTables()); + } + } } - return result; + return new MTMVRelation(allLevelTables, oneLevelTables, oneLevelViews); } - private static Plan getAnalyzePlanBySql(String querySql, ConnectContext ctx) { + public static Set getBaseTableFromQuery(String querySql, ConnectContext ctx) { List statements; try { statements = new NereidsParser().parseSQL(querySql); @@ -143,12 +126,15 @@ private static Plan getAnalyzePlanBySql(String querySql, ConnectContext ctx) { StatementBase parsedStmt = statements.get(0); LogicalPlan logicalPlan = ((LogicalPlanAdapter) parsedStmt).getLogicalPlan(); StatementContext original = ctx.getStatementContext(); - ctx.setStatementContext(new StatementContext()); - try { - NereidsPlanner planner = new NereidsPlanner(ctx.getStatementContext()); - return planner.planWithLock(logicalPlan, PhysicalProperties.ANY, ExplainLevel.ANALYZED_PLAN); - } finally { - ctx.setStatementContext(original); + try (StatementContext tempCtx = new StatementContext()) { + ctx.setStatementContext(tempCtx); + try { + NereidsPlanner planner = new NereidsPlanner(ctx.getStatementContext()); + planner.planWithLock(logicalPlan, PhysicalProperties.ANY, ExplainLevel.ANALYZED_PLAN); + return Sets.newHashSet(ctx.getStatementContext().getTables().values()); + } finally { + ctx.setStatementContext(original); + } } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelationManager.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelationManager.java index 436427526ba08b0..f8f92e25d38d654 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelationManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelationManager.java @@ -59,8 +59,8 @@ public class MTMVRelationManager implements MTMVHookService { // create mv2 as select * from mv1; // `tableMTMVs` will have 3 pair: table1 ==> mv1,mv1==>mv2, table1 ==> mv2 // `tableMTMVsOneLevel` will have 2 pair: table1 ==> mv1,mv1==>mv2 - private Map> tableMTMVs = Maps.newConcurrentMap(); - private Map> tableMTMVsOneLevel = Maps.newConcurrentMap(); + private final Map> tableMTMVs = Maps.newConcurrentMap(); + private final Map> tableMTMVsOneLevel = Maps.newConcurrentMap(); public Set getMtmvsByBaseTable(BaseTableInfo table) { return tableMTMVs.getOrDefault(table, ImmutableSet.of()); @@ -98,6 +98,23 @@ public Set getAvailableMTMVs(List tableInfos, ConnectContex return res; } + /** + * get all mtmv related to tableInfos. + */ + public Set getAllMTMVs(List tableInfos) { + Set mtmvs = Sets.newLinkedHashSet(); + Set mvInfos = getMTMVInfos(tableInfos); + for (BaseTableInfo tableInfo : mvInfos) { + try { + mtmvs.add((MTMV) MTMVUtil.getTable(tableInfo)); + } catch (AnalysisException e) { + // not throw exception to client, just ignore it + LOG.warn("getTable failed: {}", tableInfo.toString(), e); + } + } + return mtmvs; + } + @VisibleForTesting public boolean isMVPartitionValid(MTMV mtmv, ConnectContext ctx, boolean forceConsistent) { long currentTimeMillis = System.currentTimeMillis(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRewriteUtil.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRewriteUtil.java index 7b7d743a36bc8ce..ff1b3263d3409ee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRewriteUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRewriteUtil.java @@ -51,8 +51,8 @@ public static Collection getMTMVCanRewritePartitions(MTMV mtmv, Conne return res; } // check mv is normal - if (mtmv.getStatus().getState() != MTMVState.NORMAL - || mtmv.getStatus().getRefreshState() == MTMVRefreshState.INIT) { + MTMVStatus mtmvStatus = mtmv.getStatus(); + if (mtmvStatus.getState() != MTMVState.NORMAL || mtmvStatus.getRefreshState() == MTMVRefreshState.INIT) { return res; } MTMVRefreshContext refreshContext = null; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java index bb10996a11bf6ae..258704763909f1d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java @@ -17,20 +17,13 @@ package org.apache.doris.nereids; -import org.apache.doris.catalog.DatabaseIf; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.TableIf; import org.apache.doris.common.Pair; -import org.apache.doris.datasource.CatalogIf; import org.apache.doris.nereids.analyzer.Scope; -import org.apache.doris.nereids.analyzer.UnboundOneRowRelation; -import org.apache.doris.nereids.analyzer.UnboundRelation; -import org.apache.doris.nereids.analyzer.UnboundTableSink; -import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.hint.Hint; import org.apache.doris.nereids.jobs.Job; import org.apache.doris.nereids.jobs.JobContext; import org.apache.doris.nereids.jobs.executor.Analyzer; +import org.apache.doris.nereids.jobs.executor.TableCollector; import org.apache.doris.nereids.jobs.rewrite.RewriteBottomUpJob; import org.apache.doris.nereids.jobs.rewrite.RewriteTopDownJob; import org.apache.doris.nereids.jobs.rewrite.RootPlanTreeRewriteJob.RootRewriteJobContext; @@ -46,7 +39,6 @@ import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.rules.RuleFactory; import org.apache.doris.nereids.rules.RuleSet; -import org.apache.doris.nereids.rules.analysis.BindRelation.CustomTableResolver; import org.apache.doris.nereids.rules.exploration.mv.MaterializationContext; import org.apache.doris.nereids.trees.expressions.CTEId; import org.apache.doris.nereids.trees.expressions.Expression; @@ -54,13 +46,7 @@ import org.apache.doris.nereids.trees.expressions.SubqueryExpr; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.RelationId; -import org.apache.doris.nereids.trees.plans.logical.LogicalCTE; import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer; -import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; -import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; -import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; -import org.apache.doris.nereids.trees.plans.logical.LogicalProject; -import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.SessionVariable; import org.apache.doris.statistics.ColumnStatistic; @@ -70,7 +56,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; -import org.apache.commons.collections.MapUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -84,8 +69,6 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; -import java.util.Stack; -import java.util.concurrent.TimeUnit; import java.util.function.Function; import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -103,7 +86,7 @@ public class CascadesContext implements ScheduleContext { private Memo memo; private final StatementContext statementContext; - private final CTEContext cteContext; + private CTEContext cteContext; private final RuleSet ruleSet; private final JobPool jobPool; private final JobScheduler jobScheduler; @@ -113,7 +96,6 @@ public class CascadesContext implements ScheduleContext { private final RuntimeFilterContext runtimeFilterContext; private final TopnFilterContext topnFilterContext = new TopnFilterContext(); private Optional outerScope = Optional.empty(); - private Map, TableIf> tables = null; private boolean isRewriteRoot; private volatile boolean isTimeout = false; @@ -169,9 +151,6 @@ private CascadesContext(Optional parent, Optional curren } else { this.isEnableExprTrace = false; } - if (parent.isPresent()) { - this.tables = parent.get().tables; - } this.isLeadingDisableJoinReorder = isLeadingDisableJoinReorder; } @@ -245,12 +224,12 @@ public void toMemo() { this.memo = new Memo(getConnectContext(), plan); } - public Analyzer newAnalyzer() { - return newAnalyzer(Optional.empty()); + public TableCollector newTableCollector() { + return new TableCollector(this); } - public Analyzer newAnalyzer(Optional customTableResolver) { - return new Analyzer(this, customTableResolver); + public Analyzer newAnalyzer() { + return new Analyzer(this); } @Override @@ -266,10 +245,6 @@ public void releaseMemo() { this.memo = null; } - public void setTables(Map, TableIf> tables) { - this.tables = tables; - } - public final ConnectContext getConnectContext() { return statementContext.getConnectContext(); } @@ -344,6 +319,10 @@ public CTEContext getCteContext() { return cteContext; } + public void setCteContext(CTEContext cteContext) { + this.cteContext = cteContext; + } + public void setIsRewriteRoot(boolean isRewriteRoot) { this.isRewriteRoot = isRewriteRoot; } @@ -408,204 +387,6 @@ private CascadesContext execute(Job job) { return this; } - /** - * Extract tables. - */ - public void extractTables(LogicalPlan logicalPlan) { - Set> tableNames = getTables(logicalPlan); - tables = Maps.newHashMap(); - for (List tableName : tableNames) { - try { - TableIf table = getTable(tableName); - tables.put(table.getFullQualifiers(), table); - } catch (Throwable e) { - // IGNORE - } - } - - } - - public Map, TableIf> getTables() { - if (tables == null) { - return null; - } else { - return tables; - } - } - - private Set> getTables(LogicalPlan logicalPlan) { - final Set> tableNames = new HashSet<>(); - logicalPlan.foreach(p -> { - if (p instanceof LogicalFilter) { - tableNames.addAll(extractTableNamesFromFilter((LogicalFilter) p)); - } else if (p instanceof LogicalCTE) { - tableNames.addAll(extractTableNamesFromCTE((LogicalCTE) p)); - } else if (p instanceof LogicalProject) { - tableNames.addAll(extractTableNamesFromProject((LogicalProject) p)); - } else if (p instanceof LogicalHaving) { - tableNames.addAll(extractTableNamesFromHaving((LogicalHaving) p)); - } else if (p instanceof UnboundOneRowRelation) { - tableNames.addAll(extractTableNamesFromOneRowRelation((UnboundOneRowRelation) p)); - } else { - Set logicalPlans = p.collect( - n -> (n instanceof UnboundRelation || n instanceof UnboundTableSink)); - for (LogicalPlan plan : logicalPlans) { - if (plan instanceof UnboundRelation) { - tableNames.add(((UnboundRelation) plan).getNameParts()); - } else if (plan instanceof UnboundTableSink) { - tableNames.add(((UnboundTableSink) plan).getNameParts()); - } else { - throw new AnalysisException("get tables from plan failed. meet unknown type node " + plan); - } - } - } - }); - return tableNames; - } - - public Map, TableIf> getOrExtractTables(LogicalPlan logicalPlan) { - if (MapUtils.isEmpty(tables)) { - extractTables(logicalPlan); - } - return tables; - } - - private Set> extractTableNamesFromHaving(LogicalHaving having) { - Set subqueryExprs = having.getPredicate() - .collect(SubqueryExpr.class::isInstance); - Set> tableNames = new HashSet<>(); - for (SubqueryExpr expr : subqueryExprs) { - LogicalPlan plan = expr.getQueryPlan(); - tableNames.addAll(getTables(plan)); - } - return tableNames; - } - - private Set> extractTableNamesFromOneRowRelation(UnboundOneRowRelation oneRowRelation) { - Set subqueryExprs = oneRowRelation.getProjects().stream() - .>map(p -> p.collect(SubqueryExpr.class::isInstance)) - .flatMap(Set::stream) - .collect(Collectors.toSet()); - Set> tableNames = new HashSet<>(); - for (SubqueryExpr expr : subqueryExprs) { - LogicalPlan plan = expr.getQueryPlan(); - tableNames.addAll(getTables(plan)); - } - return tableNames; - } - - private Set> extractTableNamesFromProject(LogicalProject project) { - Set subqueryExprs = project.getProjects().stream() - .>map(p -> p.collect(SubqueryExpr.class::isInstance)) - .flatMap(Set::stream) - .collect(Collectors.toSet()); - Set> tableNames = new HashSet<>(); - for (SubqueryExpr expr : subqueryExprs) { - LogicalPlan plan = expr.getQueryPlan(); - tableNames.addAll(getTables(plan)); - } - return tableNames; - } - - private Set> extractTableNamesFromFilter(LogicalFilter filter) { - Set subqueryExprs = filter.getPredicate() - .collect(SubqueryExpr.class::isInstance); - Set> tableNames = new HashSet<>(); - for (SubqueryExpr expr : subqueryExprs) { - LogicalPlan plan = expr.getQueryPlan(); - tableNames.addAll(getTables(plan)); - } - return tableNames; - } - - private Set> extractTableNamesFromCTE(LogicalCTE cte) { - List> subQueryAliases = cte.getAliasQueries(); - Set> tableNames = new HashSet<>(); - for (LogicalSubQueryAlias subQueryAlias : subQueryAliases) { - tableNames.addAll(getTables(subQueryAlias)); - } - return tableNames; - } - - private TableIf getTable(List nameParts) { - switch (nameParts.size()) { - case 1: { // table - String ctlName = getConnectContext().getEnv().getCurrentCatalog().getName(); - String dbName = getConnectContext().getDatabase(); - return getTable(ctlName, dbName, nameParts.get(0), getConnectContext().getEnv()); - } - case 2: { // db.table - String ctlName = getConnectContext().getEnv().getCurrentCatalog().getName(); - String dbName = nameParts.get(0); - return getTable(ctlName, dbName, nameParts.get(1), getConnectContext().getEnv()); - } - case 3: { // catalog.db.table - return getTable(nameParts.get(0), nameParts.get(1), nameParts.get(2), getConnectContext().getEnv()); - } - default: - throw new IllegalStateException("Table name [" + String.join(".", nameParts) + "] is invalid."); - } - } - - /** - * Find table from catalog. - */ - public TableIf getTable(String ctlName, String dbName, String tableName, Env env) { - CatalogIf catalog = env.getCatalogMgr().getCatalog(ctlName); - if (catalog == null) { - throw new RuntimeException("Catalog [" + ctlName + "] does not exist."); - } - DatabaseIf db = catalog.getDbNullable(dbName); - if (db == null) { - throw new RuntimeException("Database [" + dbName + "] does not exist in catalog [" + ctlName + "]."); - } - - TableIf table = db.getTableNullable(tableName); - if (table == null) { - throw new RuntimeException("Table [" + tableName + "] does not exist in database [" + dbName + "]."); - } - return table; - - } - - /** - * Used to lock table - */ - public static class Lock implements AutoCloseable { - - CascadesContext cascadesContext; - private final Stack locked = new Stack<>(); - - /** - * Try to acquire read locks on tables, throw runtime exception once the acquiring for read lock failed. - */ - public Lock(LogicalPlan plan, CascadesContext cascadesContext) { - this.cascadesContext = cascadesContext; - // tables can also be load from dump file - if (cascadesContext.getTables() == null || cascadesContext.getTables().isEmpty()) { - cascadesContext.extractTables(plan); - cascadesContext.getStatementContext().setTables(cascadesContext.getTables()); - } - for (TableIf table : cascadesContext.tables.values()) { - if (!table.needReadLockWhenPlan()) { - continue; - } - if (!table.tryReadLock(1, TimeUnit.MINUTES)) { - close(); - throw new RuntimeException(String.format("Failed to get read lock on table: %s", table.getName())); - } - locked.push(table); - } - } - - @Override - public void close() { - while (!locked.empty()) { - locked.pop().readUnlock(); - } - } - } - public void putCTEIdToConsumer(LogicalCTEConsumer cteConsumer) { Set consumers = this.statementContext.getCteIdToConsumers() .computeIfAbsent(cteConsumer.getCteId(), k -> new HashSet<>()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index 16fe1353facfb6e..4eafa0e2172f966 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -30,7 +30,6 @@ import org.apache.doris.common.profile.SummaryProfile; import org.apache.doris.common.util.DebugUtil; import org.apache.doris.mysql.FieldInfo; -import org.apache.doris.nereids.CascadesContext.Lock; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.glue.LogicalPlanAdapter; import org.apache.doris.nereids.glue.translator.PhysicalPlanTranslator; @@ -156,7 +155,7 @@ public void plan(StatementBase queryStmt, org.apache.doris.thrift.TQueryOptions } @VisibleForTesting - public void planWithLock(StatementBase queryStmt) { + public void plan(StatementBase queryStmt) { try { plan(queryStmt, statementContext.getConnectContext().getSessionVariable().toThrift()); } catch (Exception e) { @@ -164,14 +163,17 @@ public void planWithLock(StatementBase queryStmt) { } } + @VisibleForTesting public PhysicalPlan planWithLock(LogicalPlan plan, PhysicalProperties outputProperties) { return (PhysicalPlan) planWithLock(plan, outputProperties, ExplainLevel.NONE, false); } + // TODO check all caller public Plan planWithLock(LogicalPlan plan, PhysicalProperties requireProperties, ExplainLevel explainLevel) { return planWithLock(plan, requireProperties, explainLevel, false); } + @VisibleForTesting public Plan planWithLock(LogicalPlan plan, PhysicalProperties requireProperties, ExplainLevel explainLevel, boolean showPlanProcess) { Consumer noCallback = p -> {}; @@ -188,9 +190,8 @@ public Plan planWithLock(LogicalPlan plan, PhysicalProperties requireProperties, * @return plan generated by this planner * @throws AnalysisException throw exception if failed in ant stage */ - public Plan planWithLock(LogicalPlan plan, PhysicalProperties requireProperties, - ExplainLevel explainLevel, boolean showPlanProcess, - Consumer lockCallback) { + private Plan planWithLock(LogicalPlan plan, PhysicalProperties requireProperties, + ExplainLevel explainLevel, boolean showPlanProcess, Consumer lockCallback) { try { long beforePlanGcTime = getGarbageCollectionTime(); if (plan instanceof LogicalSqlCache) { @@ -216,39 +217,37 @@ public Plan planWithLock(LogicalPlan plan, PhysicalProperties requireProperties, plan = preprocess(plan); initCascadesContext(plan, requireProperties); - statementContext.loadSnapshots(cascadesContext.getOrExtractTables(plan)); - try (Lock lock = new Lock(plan, cascadesContext)) { - Plan resultPlan = planWithoutLock(plan, explainLevel, showPlanProcess, requireProperties); - lockCallback.accept(resultPlan); - if (statementContext.getConnectContext().getExecutor() != null) { - statementContext.getConnectContext().getExecutor().getSummaryProfile() - .setNereidsGarbageCollectionTime(getGarbageCollectionTime() - beforePlanGcTime); - } - return resultPlan; + // collect table and lock them in the order of table id + collectAndLockTable(showAnalyzeProcess(explainLevel, showPlanProcess)); + // after table collector, we should use a new context. + statementContext.loadSnapshots(); + Plan resultPlan = planWithoutLock(plan, requireProperties, explainLevel, showPlanProcess); + lockCallback.accept(resultPlan); + if (statementContext.getConnectContext().getExecutor() != null) { + statementContext.getConnectContext().getExecutor().getSummaryProfile() + .setNereidsGarbageCollectionTime(getGarbageCollectionTime() - beforePlanGcTime); } + return resultPlan; } finally { statementContext.releasePlannerResources(); } } - protected Plan planWithoutLock( - LogicalPlan plan, ExplainLevel explainLevel, - boolean showPlanProcess, PhysicalProperties requireProperties) { - // resolve column, table and function - // analyze this query - analyze(showAnalyzeProcess(explainLevel, showPlanProcess)); + /** + * do plan but not lock any table + */ + private Plan planWithoutLock( + LogicalPlan plan, PhysicalProperties requireProperties, ExplainLevel explainLevel, + boolean showPlanProcess) { // minidump of input must be serialized first, this process ensure minidump string not null try { - MinidumpUtils.serializeInputsToDumpFile(plan, cascadesContext.getTables()); + + MinidumpUtils.serializeInputsToDumpFile(plan, statementContext); } catch (IOException e) { throw new RuntimeException(e); } - - if (statementContext.getConnectContext().getExecutor() != null) { - statementContext.getConnectContext().getExecutor().getSummaryProfile().setQueryAnalysisFinishTime(); - statementContext.getConnectContext().getExecutor().getSummaryProfile().setNereidsAnalysisTime(); - } - + // analyze this query, resolve column, table and function + analyze(showAnalyzeProcess(explainLevel, showPlanProcess)); if (explainLevel == ExplainLevel.ANALYZED_PLAN || explainLevel == ExplainLevel.ALL_PLAN) { analyzedPlan = cascadesContext.getRewritePlan(); if (explainLevel == ExplainLevel.ANALYZED_PLAN) { @@ -258,10 +257,6 @@ protected Plan planWithoutLock( // rule-based optimize rewrite(showRewriteProcess(explainLevel, showPlanProcess)); - if (statementContext.getConnectContext().getExecutor() != null) { - statementContext.getConnectContext().getExecutor().getSummaryProfile().setNereidsRewriteTime(); - } - if (explainLevel == ExplainLevel.REWRITTEN_PLAN || explainLevel == ExplainLevel.ALL_PLAN) { rewrittenPlan = cascadesContext.getRewritePlan(); if (explainLevel == ExplainLevel.REWRITTEN_PLAN) { @@ -269,40 +264,20 @@ protected Plan planWithoutLock( } } - // if we cannot get table row count, skip join reorder - // except: - // 1. user set leading hint - // 2. ut test. In ut test, FeConstants.enableInternalSchemaDb is false or FeConstants.runningUnitTest is true - if (FeConstants.enableInternalSchemaDb && !FeConstants.runningUnitTest - && !cascadesContext.isLeadingDisableJoinReorder()) { - List scans = cascadesContext.getRewritePlan() - .collectToList(CatalogRelation.class::isInstance); - Optional disableJoinReorderReason = StatsCalculator - .disableJoinReorderIfStatsInvalid(scans, cascadesContext); - disableJoinReorderReason.ifPresent(statementContext::setDisableJoinReorderReason); - } - - setRuntimeFilterWaitTimeByTableRowCountAndType(); - optimize(); - if (statementContext.getConnectContext().getExecutor() != null) { - statementContext.getConnectContext().getExecutor().getSummaryProfile().setNereidsOptimizeTime(); - } - // print memo before choose plan. // if chooseNthPlan failed, we could get memo to debug if (cascadesContext.getConnectContext().getSessionVariable().dumpNereidsMemo) { String memo = cascadesContext.getMemo().toString(); - LOG.info(ConnectContext.get().getQueryIdentifier() + "\n" + memo); + LOG.info("{}\n{}", ConnectContext.get().getQueryIdentifier(), memo); } - int nth = cascadesContext.getConnectContext().getSessionVariable().getNthOptimizedPlan(); PhysicalPlan physicalPlan = chooseNthPlan(getRoot(), requireProperties, nth); physicalPlan = postProcess(physicalPlan); if (cascadesContext.getConnectContext().getSessionVariable().dumpNereidsMemo) { String tree = physicalPlan.treeString(); - LOG.info(ConnectContext.get().getQueryIdentifier() + "\n" + tree); + LOG.info("{}\n{}", ConnectContext.get().getQueryIdentifier(), tree); } if (explainLevel == ExplainLevel.OPTIMIZED_PLAN || explainLevel == ExplainLevel.ALL_PLAN @@ -361,8 +336,21 @@ private void setRuntimeFilterWaitTimeByTableRowCountAndType() { private void initCascadesContext(LogicalPlan plan, PhysicalProperties requireProperties) { cascadesContext = CascadesContext.initContext(statementContext, plan, requireProperties); - if (statementContext.getTables() != null) { - cascadesContext.setTables(statementContext.getTables()); + } + + protected void collectAndLockTable(boolean showPlanProcess) { + if (LOG.isDebugEnabled()) { + LOG.debug("Start collect and lock table"); + } + keepOrShowPlanProcess(showPlanProcess, () -> cascadesContext.newTableCollector().collect()); + statementContext.lock(); + cascadesContext.setCteContext(new CTEContext()); + NereidsTracer.logImportantTime("EndCollectAndLockTables"); + if (LOG.isDebugEnabled()) { + LOG.debug("End collect and lock table"); + } + if (statementContext.getConnectContext().getExecutor() != null) { + statementContext.getConnectContext().getExecutor().getSummaryProfile().setNereidsLockTableFinishTime(); } } @@ -376,6 +364,11 @@ protected void analyze(boolean showPlanProcess) { if (LOG.isDebugEnabled()) { LOG.debug("End analyze plan"); } + + if (statementContext.getConnectContext().getExecutor() != null) { + statementContext.getConnectContext().getExecutor().getSummaryProfile().setQueryAnalysisFinishTime(); + statementContext.getConnectContext().getExecutor().getSummaryProfile().setNereidsAnalysisTime(); + } } /** @@ -390,10 +383,26 @@ protected void rewrite(boolean showPlanProcess) { if (LOG.isDebugEnabled()) { LOG.debug("End rewrite plan"); } + if (statementContext.getConnectContext().getExecutor() != null) { + statementContext.getConnectContext().getExecutor().getSummaryProfile().setNereidsRewriteTime(); + } } // DependsRules: EnsureProjectOnTopJoin.class protected void optimize() { + // if we cannot get table row count, skip join reorder + // except: + // 1. user set leading hint + // 2. ut test. In ut test, FeConstants.enableInternalSchemaDb is false or FeConstants.runningUnitTest is true + if (FeConstants.enableInternalSchemaDb && !FeConstants.runningUnitTest + && !cascadesContext.isLeadingDisableJoinReorder()) { + List scans = cascadesContext.getRewritePlan() + .collectToList(CatalogRelation.class::isInstance); + Optional disableJoinReorderReason = StatsCalculator + .disableJoinReorderIfStatsInvalid(scans, cascadesContext); + disableJoinReorderReason.ifPresent(statementContext::setDisableJoinReorderReason); + } + setRuntimeFilterWaitTimeByTableRowCountAndType(); if (LOG.isDebugEnabled()) { LOG.debug("Start optimize plan"); } @@ -402,6 +411,9 @@ protected void optimize() { if (LOG.isDebugEnabled()) { LOG.debug("End optimize plan"); } + if (statementContext.getConnectContext().getExecutor() != null) { + statementContext.getConnectContext().getExecutor().getSummaryProfile().setNereidsOptimizeTime(); + } } protected void splitFragments(PhysicalPlan resultPlan) { @@ -685,6 +697,8 @@ public String getExplainString(ExplainOptions explainOptions) { plan = "========== PARSED PLAN " + getTimeMetricString(SummaryProfile::getPrettyParseSqlTime) + " ==========\n" + parsedPlan.treeString() + "\n\n" + + "========== LOCK TABLE " + + getTimeMetricString(SummaryProfile::getPrettyNereidsLockTableTime) + " ==========\n" + "========== ANALYZED PLAN " + getTimeMetricString(SummaryProfile::getPrettyNereidsAnalysisTime) + " ==========\n" + analyzedPlan.treeString() + "\n\n" @@ -864,7 +878,7 @@ private boolean showRewriteProcess(ExplainLevel explainLevel, boolean showPlanPr } private boolean showPlanProcess(ExplainOptions explainOptions) { - return explainOptions == null ? false : explainOptions.showPlanProcess(); + return explainOptions != null && explainOptions.showPlanProcess(); } private void keepOrShowPlanProcess(boolean showPlanProcess, Runnable task) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/SqlCacheContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/SqlCacheContext.java index ea3e80877db329f..29be4af41a76753 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/SqlCacheContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/SqlCacheContext.java @@ -424,6 +424,10 @@ public static class FullTableName { public String toString() { return catalog + "." + db + "." + table; } + + public List toList() { + return Lists.newArrayList(catalog, db, table); + } } /** FullColumnName */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java index 4d60b06e7c5cc1c..7717c1034bb6b4f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java @@ -19,6 +19,7 @@ import org.apache.doris.analysis.StatementBase; import org.apache.doris.catalog.TableIf; +import org.apache.doris.catalog.View; import org.apache.doris.catalog.constraint.TableIdentifier; import org.apache.doris.common.FormatOptions; import org.apache.doris.common.Id; @@ -42,9 +43,9 @@ import org.apache.doris.nereids.trees.plans.PlaceholderId; import org.apache.doris.nereids.trees.plans.RelationId; import org.apache.doris.nereids.trees.plans.TableId; -import org.apache.doris.nereids.trees.plans.algebra.Relation; import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.util.RelationUtil; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.OriginStatement; import org.apache.doris.qe.SessionVariable; @@ -54,7 +55,6 @@ import org.apache.doris.system.Backend; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.base.Stopwatch; import com.google.common.base.Supplier; import com.google.common.base.Suppliers; @@ -70,11 +70,13 @@ import java.util.ArrayList; import java.util.BitSet; import java.util.Collection; +import java.util.Comparator; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.PriorityQueue; import java.util.Set; import java.util.Stack; import java.util.TreeMap; @@ -87,6 +89,18 @@ public class StatementContext implements Closeable { private static final Logger LOG = LogManager.getLogger(StatementContext.class); + /** + * indicate where the table come from. + * QUERY: in query sql directly + * INSERT_TARGET: the insert target table + * MTMV: mtmv itself and its related tables witch do not belong to this sql, but maybe used in rewrite by mtmv. + */ + public enum TableFrom { + QUERY, + INSERT_TARGET, + MTMV + } + private ConnectContext connectContext; private final Stopwatch stopwatch = Stopwatch.createUnstarted(); @@ -140,10 +154,6 @@ public class StatementContext implements Closeable { private final List hints = new ArrayList<>(); - // Map slot to its relation, currently used in SlotReference to find its original - // Relation for example LogicalOlapScan - private final Map slotToRelation = Maps.newHashMap(); - // the columns in Plan.getExpressions(), such as columns in join condition or filter condition, group by expression private final Set keySlots = Sets.newHashSet(); private BitSet disableRules; @@ -154,8 +164,17 @@ public class StatementContext implements Closeable { // placeholder params for prepared statement private List placeholders; - // tables used for plan replayer - private Map, TableIf> tables = null; + // all tables in query + private boolean needLockTables = true; + + // tables in this query directly + private final Map, TableIf> tables = Maps.newHashMap(); + // tables maybe used by mtmv rewritten in this query + private final Map, TableIf> mtmvRelatedTables = Maps.newHashMap(); + // insert into target tables + private final Map, TableIf> insertTargetTables = Maps.newHashMap(); + // save view's def and sql mode to avoid them change before lock + private final Map, Pair> viewInfos = Maps.newHashMap(); // for create view support in nereids // key is the start and end position of the sql substring that needs to be replaced, @@ -178,7 +197,7 @@ public class StatementContext implements Closeable { private FormatOptions formatOptions = FormatOptions.getDefault(); - private List plannerHooks = new ArrayList<>(); + private final List plannerHooks = new ArrayList<>(); private String disableJoinReorderReason; @@ -220,28 +239,67 @@ public StatementContext(ConnectContext connectContext, OriginStatement originSta } } + public void setNeedLockTables(boolean needLockTables) { + this.needLockTables = needLockTables; + } + + /** + * cache view info to avoid view's def and sql mode changed before lock it. + * + * @param qualifiedViewName full qualified name of the view + * @param view view need to cache info + * + * @return view info, first is view's def sql, second is view's sql mode + */ + public Pair getAndCacheViewInfo(List qualifiedViewName, View view) { + return viewInfos.computeIfAbsent(qualifiedViewName, k -> { + String viewDef; + long sqlMode; + view.readLock(); + try { + viewDef = view.getInlineViewDef(); + sqlMode = view.getSqlMode(); + } finally { + view.readUnlock(); + } + return Pair.of(viewDef, sqlMode); + }); + } + + public Map, TableIf> getInsertTargetTables() { + return insertTargetTables; + } + + public Map, TableIf> getMtmvRelatedTables() { + return mtmvRelatedTables; + } + public Map, TableIf> getTables() { - if (tables == null) { - tables = Maps.newHashMap(); - } return tables; } public void setTables(Map, TableIf> tables) { - this.tables = tables; + this.tables.clear(); + this.tables.putAll(tables); } /** get table by table name, try to get from information from dumpfile first */ - public TableIf getTableInMinidumpCache(List tableQualifier) { - if (!getConnectContext().getSessionVariable().isPlayNereidsDump()) { - return null; - } - Preconditions.checkState(tables != null, "tables should not be null"); - TableIf table = tables.getOrDefault(tableQualifier, null); - if (getConnectContext().getSessionVariable().isPlayNereidsDump() && table == null) { - throw new AnalysisException("Minidump cache can not find table:" + tableQualifier); + public TableIf getAndCacheTable(List tableQualifier, TableFrom tableFrom) { + Map, TableIf> tables; + switch (tableFrom) { + case QUERY: + tables = this.tables; + break; + case INSERT_TARGET: + tables = this.insertTargetTables; + break; + case MTMV: + tables = this.mtmvRelatedTables; + break; + default: + throw new AnalysisException("Unknown table from " + tableFrom); } - return table; + return tables.computeIfAbsent(tableQualifier, k -> RelationUtil.getTable(k, connectContext.getEnv())); } public void setConnectContext(ConnectContext connectContext) { @@ -303,10 +361,6 @@ public Optional getSqlCacheContext() { return Optional.ofNullable(sqlCacheContext); } - public void addSlotToRelation(Slot slot, Relation relation) { - slotToRelation.put(slot, relation); - } - public boolean isDpHyp() { return isDpHyp; } @@ -475,21 +529,36 @@ public Map getRelationIdToStatisticsMap() { return relationIdToStatisticsMap; } - /** addTableReadLock */ - public synchronized void addTableReadLock(TableIf tableIf) { - if (!tableIf.needReadLockWhenPlan()) { + /** + * lock all table collect by TableCollector + */ + public synchronized void lock() { + if (!needLockTables + || (tables.isEmpty() && mtmvRelatedTables.isEmpty() && insertTargetTables.isEmpty()) + || !plannerResources.isEmpty()) { return; } - if (!tableIf.tryReadLock(1, TimeUnit.MINUTES)) { - close(); - throw new RuntimeException(String.format("Failed to get read lock on table: %s", tableIf.getName())); + PriorityQueue tableIfs = new PriorityQueue<>( + tables.size() + mtmvRelatedTables.size() + insertTargetTables.size(), + Comparator.comparing(TableIf::getId)); + tableIfs.addAll(tables.values()); + tableIfs.addAll(mtmvRelatedTables.values()); + tableIfs.addAll(insertTargetTables.values()); + while (!tableIfs.isEmpty()) { + TableIf tableIf = tableIfs.poll(); + if (!tableIf.needReadLockWhenPlan()) { + continue; + } + if (!tableIf.tryReadLock(1, TimeUnit.MINUTES)) { + close(); + throw new RuntimeException("Failed to get read lock on table:" + tableIf.getName()); + } + String fullTableName = tableIf.getNameWithFullQualifiers(); + String resourceName = "tableReadLock(" + fullTableName + ")"; + plannerResources.push(new CloseableResource( + resourceName, Thread.currentThread().getName(), + originStatement == null ? null : originStatement.originStmt, tableIf::readUnlock)); } - - String fullTableName = tableIf.getNameWithFullQualifiers(); - String resourceName = "tableReadLock(" + fullTableName + ")"; - plannerResources.push(new CloseableResource( - resourceName, Thread.currentThread().getName(), - originStatement == null ? null : originStatement.originStmt, tableIf::readUnlock)); } /** releasePlannerResources */ @@ -505,7 +574,7 @@ public synchronized void releasePlannerResources() { } } if (throwable != null) { - Throwables.propagateIfInstanceOf(throwable, RuntimeException.class); + Throwables.throwIfInstanceOf(throwable, RuntimeException.class); throw new IllegalStateException("Release resource failed", throwable); } } @@ -552,13 +621,8 @@ public void addPlannerHook(PlannerHook plannerHook) { /** * Load snapshot information of mvcc - * - * @param tables Tables used in queries */ - public void loadSnapshots(Map, TableIf> tables) { - if (tables == null) { - return; - } + public void loadSnapshots() { for (TableIf tableIf : tables.values()) { if (tableIf instanceof MvccTable) { MvccTableInfo mvccTableInfo = new MvccTableInfo(tableIf); @@ -616,7 +680,7 @@ public void close() { try { resource.close(); } catch (Throwable t) { - Throwables.propagateIfInstanceOf(t, RuntimeException.class); + Throwables.throwIfInstanceOf(t, RuntimeException.class); throw new IllegalStateException("Close resource failed: " + t.getMessage(), t); } closed = true; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundBaseExternalTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundBaseExternalTableSink.java index cfdefc59872d4e7..2c88e2f4a46ff74 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundBaseExternalTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundBaseExternalTableSink.java @@ -21,7 +21,6 @@ import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.properties.UnboundLogicalProperties; -import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.BlockFuncDepsPropagation; @@ -80,11 +79,6 @@ public UnboundBaseExternalTableSink withOutputExprs(List getExpressions() { - throw new UnsupportedOperationException(this.getClass().getSimpleName() + " don't support getExpression()"); - } - @Override public boolean equals(Object o) { if (this == o) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundOneRowRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundOneRowRelation.java index 9bc368c8ad6b18a..bb61bc93574208e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundOneRowRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundOneRowRelation.java @@ -71,7 +71,7 @@ public List getProjects() { @Override public List getExpressions() { - throw new UnsupportedOperationException(this.getClass().getSimpleName() + " don't support getExpression()"); + return projects; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundRelation.java index b8d821e1548be7f..12d4a7c74be58f9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundRelation.java @@ -25,7 +25,6 @@ import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.properties.UnboundLogicalProperties; import org.apache.doris.nereids.trees.TableSample; -import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.BlockFuncDepsPropagation; import org.apache.doris.nereids.trees.plans.Plan; @@ -186,11 +185,6 @@ public R accept(PlanVisitor visitor, C context) { return visitor.visitUnboundRelation(this, context); } - @Override - public List getExpressions() { - throw new UnsupportedOperationException(this.getClass().getSimpleName() + " don't support getExpression()"); - } - public List getPartNames() { return partNames; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundResultSink.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundResultSink.java index 5fd5c18a365d040..d57e518824d3aa8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundResultSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundResultSink.java @@ -21,7 +21,6 @@ import org.apache.doris.nereids.exceptions.UnboundException; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.LogicalProperties; -import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.BlockFuncDepsPropagation; @@ -64,11 +63,6 @@ public R accept(PlanVisitor visitor, C context) { return visitor.visitUnboundResultSink(this, context); } - @Override - public List getExpressions() { - throw new UnsupportedOperationException(this.getClass().getSimpleName() + " don't support getExpression()"); - } - @Override public Plan withGroupExpression(Optional groupExpression) { return new UnboundResultSink<>(groupExpression, Optional.of(getLogicalProperties()), child()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundTVFRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundTVFRelation.java index e876825af6569a6..3024058edc7a5d5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundTVFRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundTVFRelation.java @@ -21,7 +21,6 @@ import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.properties.UnboundLogicalProperties; -import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Properties; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.functions.table.TableValuedFunction; @@ -79,11 +78,6 @@ public R accept(PlanVisitor visitor, C context) { return visitor.visitUnboundTVFRelation(this, context); } - @Override - public List getExpressions() { - throw new UnsupportedOperationException(this.getClass().getSimpleName() + " don't support getExpression()"); - } - @Override public List computeOutput() { throw new UnboundException("output"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundTableSink.java index 23c58ba42fb17eb..0e528227dc97428 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundTableSink.java @@ -21,7 +21,6 @@ import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.properties.UnboundLogicalProperties; -import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.BlockFuncDepsPropagation; @@ -135,11 +134,6 @@ public R accept(PlanVisitor visitor, C context) { return visitor.visitUnboundTableSink(this, context); } - @Override - public List getExpressions() { - throw new UnsupportedOperationException(this.getClass().getSimpleName() + " don't support getExpression()"); - } - @Override public boolean equals(Object o) { if (this == o) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java index 03dbb6c7110a7ce..e05fead59015091 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java @@ -24,7 +24,6 @@ import org.apache.doris.nereids.rules.analysis.AnalyzeCTE; import org.apache.doris.nereids.rules.analysis.BindExpression; import org.apache.doris.nereids.rules.analysis.BindRelation; -import org.apache.doris.nereids.rules.analysis.BindRelation.CustomTableResolver; import org.apache.doris.nereids.rules.analysis.BindSink; import org.apache.doris.nereids.rules.analysis.CheckAfterBind; import org.apache.doris.nereids.rules.analysis.CheckAnalysis; @@ -58,8 +57,6 @@ import com.google.common.collect.ImmutableSet; import java.util.List; -import java.util.Objects; -import java.util.Optional; /** * Bind symbols according to metadata in the catalog, perform semantic analysis, etc. @@ -67,38 +64,20 @@ */ public class Analyzer extends AbstractBatchJobExecutor { - public static final List ANALYZE_JOBS = buildAnalyzeJobs(Optional.empty()); - - private final List jobs; - - /** - * Execute the analysis job with scope. - * @param cascadesContext planner context for execute job - */ - public Analyzer(CascadesContext cascadesContext) { - this(cascadesContext, Optional.empty()); - } + public static final List ANALYZE_JOBS = buildAnalyzeJobs(); /** * constructor of Analyzer. For view, we only do bind relation since other analyze step will do by outer Analyzer. * * @param cascadesContext current context for analyzer - * @param customTableResolver custom resolver for outer catalog. */ - public Analyzer(CascadesContext cascadesContext, Optional customTableResolver) { + public Analyzer(CascadesContext cascadesContext) { super(cascadesContext); - Objects.requireNonNull(customTableResolver, "customTableResolver cannot be null"); - - if (customTableResolver.isPresent()) { - this.jobs = buildAnalyzeJobs(customTableResolver); - } else { - this.jobs = ANALYZE_JOBS; - } } @Override public List getJobs() { - return jobs; + return ANALYZE_JOBS; } /** @@ -108,20 +87,20 @@ public void analyze() { execute(); } - private static List buildAnalyzeJobs(Optional customTableResolver) { + private static List buildAnalyzeJobs() { return notTraverseChildrenOf( ImmutableSet.of(LogicalView.class, LogicalCTEAnchor.class), - () -> buildAnalyzerJobs(customTableResolver) + Analyzer::buildAnalyzerJobs ); } - private static List buildAnalyzerJobs(Optional customTableResolver) { + private static List buildAnalyzerJobs() { return jobs( // we should eliminate hint before "Subquery unnesting". topDown(new AnalyzeCTE()), topDown(new EliminateLogicalSelectHint()), bottomUp( - new BindRelation(customTableResolver), + new BindRelation(), new CheckPolicy() ), bottomUp(new BindExpression()), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/TableCollector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/TableCollector.java new file mode 100644 index 000000000000000..0ae433262efeb92 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/TableCollector.java @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.jobs.executor; + +import org.apache.doris.nereids.CascadesContext; +import org.apache.doris.nereids.jobs.rewrite.RewriteJob; +import org.apache.doris.nereids.rules.analysis.CollectRelation; +import org.apache.doris.nereids.trees.plans.logical.LogicalView; + +import com.google.common.collect.ImmutableSet; + +import java.util.List; + +/** + * Bind symbols according to metadata in the catalog, perform semantic analysis, etc. + * TODO: revisit the interface after subquery analysis is supported. + */ +public class TableCollector extends AbstractBatchJobExecutor { + + public static final List COLLECT_JOBS = buildCollectTableJobs(); + + /** + * constructor of Analyzer. For view, we only do bind relation since other analyze step will do by outer Analyzer. + * + * @param cascadesContext current context for analyzer + */ + public TableCollector(CascadesContext cascadesContext) { + super(cascadesContext); + + } + + @Override + public List getJobs() { + return COLLECT_JOBS; + } + + /** + * nereids analyze sql. + */ + public void collect() { + execute(); + } + + private static List buildCollectTableJobs() { + return notTraverseChildrenOf( + ImmutableSet.of(LogicalView.class), + TableCollector::buildCollectorJobs + ); + } + + private static List buildCollectorJobs() { + return jobs( + topDown(new CollectRelation()) + ); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/minidump/MinidumpUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/minidump/MinidumpUtils.java index c0f88b25341cdec..a369772f404b2c9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/minidump/MinidumpUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/minidump/MinidumpUtils.java @@ -268,7 +268,7 @@ public static JSONObject executeSql(String sql) { } NereidsPlanner nereidsPlanner = new NereidsPlanner( new StatementContext(ConnectContext.get(), new OriginStatement(sql, 0))); - nereidsPlanner.planWithLock(LogicalPlanAdapter.of(parsed)); + nereidsPlanner.plan(LogicalPlanAdapter.of(parsed)); return ((AbstractPlan) nereidsPlanner.getOptimizedPlan()).toJson(); } @@ -554,10 +554,10 @@ private static JSONObject serializeInputs(Plan parsedPlan, Map, Tab /** * This function is used to serialize inputs of one query * @param parsedPlan input plan - * @param tables all tables relative to this query + * @param statementContext context for this query * @throws IOException this will write to disk, so io exception should be dealed with */ - public static void serializeInputsToDumpFile(Plan parsedPlan, Map, TableIf> tables) + public static void serializeInputsToDumpFile(Plan parsedPlan, StatementContext statementContext) throws IOException { ConnectContext connectContext = ConnectContext.get(); // when playing minidump file, we do not save input again. @@ -566,7 +566,10 @@ public static void serializeInputsToDumpFile(Plan parsedPlan, Map, } MinidumpUtils.init(); - connectContext.setMinidump(serializeInputs(parsedPlan, tables)); + Map, TableIf> allTablesUsedInQuery = Maps.newHashMap(); + allTablesUsedInQuery.putAll(statementContext.getTables()); + allTablesUsedInQuery.putAll(statementContext.getMtmvRelatedTables()); + connectContext.setMinidump(serializeInputs(parsedPlan, allTablesUsedInQuery)); } /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 0332123f9ff5843..bb344e1b376deb0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -916,7 +916,8 @@ public LogicalPlan visitInsertTable(InsertTableContext ctx) { command = new InsertOverwriteTableCommand(sink, labelName, cte); } else { if (ConnectContext.get() != null && ConnectContext.get().isTxnModel() - && sink.child() instanceof LogicalInlineTable) { + && sink.child() instanceof LogicalInlineTable + && sink.child().getExpressions().stream().allMatch(Expression::isConstant)) { // FIXME: In legacy, the `insert into select 1` is handled as `insert into values`. // In nereids, the original way is throw an AnalysisException and fallback to legacy. // Now handle it as `insert into select`(a separate load job), should fix it as the legacy. diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index d348889818a5dd5..4cf3c75b68dc43b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -26,9 +26,14 @@ public enum RuleType { // just for UT TEST_REWRITE(RuleTypeClass.REWRITE), - // binding rules - // **** make sure BINDING_UNBOUND_LOGICAL_PLAN is the lowest priority in the rewrite rules. **** + // collect relation rules + COLLECT_TABLE_FROM_CTE(RuleTypeClass.REWRITE), + COLLECT_TABLE_FROM_RELATION(RuleTypeClass.REWRITE), + COLLECT_TABLE_FROM_SINK(RuleTypeClass.REWRITE), + COLLECT_TABLE_FROM_OTHER(RuleTypeClass.REWRITE), + + // binding rules BINDING_RESULT_SINK(RuleTypeClass.REWRITE), BINDING_INSERT_HIVE_TABLE(RuleTypeClass.REWRITE), BINDING_INSERT_ICEBERG_TABLE(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java index c7d4e9f975e50a2..583244f09028969 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java @@ -39,6 +39,7 @@ import org.apache.doris.nereids.CascadesContext; import org.apache.doris.nereids.SqlCacheContext; import org.apache.doris.nereids.StatementContext; +import org.apache.doris.nereids.StatementContext.TableFrom; import org.apache.doris.nereids.analyzer.Unbound; import org.apache.doris.nereids.analyzer.UnboundRelation; import org.apache.doris.nereids.analyzer.UnboundResultSink; @@ -98,25 +99,14 @@ import java.util.ArrayList; import java.util.List; import java.util.Optional; -import java.util.function.Function; /** * Rule to bind relations in query plan. */ public class BindRelation extends OneAnalysisRuleFactory { - private final Optional customTableResolver; + public BindRelation() {} - public BindRelation() { - this(Optional.empty()); - } - - public BindRelation(Optional customTableResolver) { - this.customTableResolver = customTableResolver; - } - - // TODO: cte will be copied to a sub-query with different names but the id of the unbound relation in them - // are the same, so we use new relation id when binding relation, and will fix this bug later. @Override public Rule build() { return unboundRelation().thenApply(ctx -> { @@ -168,23 +158,10 @@ private LogicalPlan bindWithCurrentDb(CascadesContext cascadesContext, UnboundRe return consumer; } } - List tableQualifier = RelationUtil.getQualifierName(cascadesContext.getConnectContext(), - unboundRelation.getNameParts()); - TableIf table = null; - table = ConnectContext.get().getStatementContext().getTableInMinidumpCache(tableQualifier); - if (table == null) { - if (customTableResolver.isPresent()) { - table = customTableResolver.get().apply(tableQualifier); - } - } - // In some cases even if we have already called the "cascadesContext.getTableByName", - // it also gets the null. So, we just check it in the catalog again for safety. - if (table == null) { - table = RelationUtil.getTable(tableQualifier, cascadesContext.getConnectContext().getEnv()); - } - ConnectContext.get().getStatementContext().getTables().put(tableQualifier, table); + List tableQualifier = RelationUtil.getQualifierName( + cascadesContext.getConnectContext(), unboundRelation.getNameParts()); + TableIf table = cascadesContext.getStatementContext().getAndCacheTable(tableQualifier, TableFrom.QUERY); - // TODO: should generate different Scan sub class according to table's type LogicalPlan scan = getLogicalPlan(table, unboundRelation, tableQualifier, cascadesContext); if (cascadesContext.isLeadingJoin()) { LeadingHint leading = (LeadingHint) cascadesContext.getHintMap().get("Leading"); @@ -197,17 +174,7 @@ private LogicalPlan bindWithCurrentDb(CascadesContext cascadesContext, UnboundRe private LogicalPlan bind(CascadesContext cascadesContext, UnboundRelation unboundRelation) { List tableQualifier = RelationUtil.getQualifierName(cascadesContext.getConnectContext(), unboundRelation.getNameParts()); - TableIf table = null; - if (customTableResolver.isPresent()) { - table = customTableResolver.get().apply(tableQualifier); - } - table = ConnectContext.get().getStatementContext().getTableInMinidumpCache(tableQualifier); - // In some cases even if we have already called the "cascadesContext.getTableByName", - // it also gets the null. So, we just check it in the catalog again for safety. - if (table == null) { - table = RelationUtil.getTable(tableQualifier, cascadesContext.getConnectContext().getEnv()); - } - ConnectContext.get().getStatementContext().getTables().put(tableQualifier, table); + TableIf table = cascadesContext.getStatementContext().getAndCacheTable(tableQualifier, TableFrom.QUERY); return getLogicalPlan(table, unboundRelation, tableQualifier, cascadesContext); } @@ -415,8 +382,7 @@ private LogicalPlan getLogicalPlan(TableIf table, UnboundRelation unboundRelatio case VIEW: View view = (View) table; isView = true; - String inlineViewDef = view.getInlineViewDef(); - Plan viewBody = parseAndAnalyzeView(view, inlineViewDef, cascadesContext); + Plan viewBody = parseAndAnalyzeDorisView(view, qualifiedTableName, cascadesContext); LogicalView logicalView = new LogicalView<>(view, viewBody); return new LogicalSubQueryAlias<>(qualifiedTableName, logicalView); case HMS_EXTERNAL_TABLE: @@ -496,6 +462,17 @@ private Plan parseAndAnalyzeHiveView( } } + private Plan parseAndAnalyzeDorisView(View view, List tableQualifier, CascadesContext parentContext) { + Pair viewInfo = parentContext.getStatementContext().getAndCacheViewInfo(tableQualifier, view); + long originalSqlMode = parentContext.getConnectContext().getSessionVariable().getSqlMode(); + parentContext.getConnectContext().getSessionVariable().setSqlMode(viewInfo.second); + try { + return parseAndAnalyzeView(view, viewInfo.first, parentContext); + } finally { + parentContext.getConnectContext().getSessionVariable().setSqlMode(originalSqlMode); + } + } + private Plan parseAndAnalyzeView(TableIf view, String ddlSql, CascadesContext parentContext) { parentContext.getStatementContext().addViewDdlSql(ddlSql); Optional sqlCacheContext = parentContext.getStatementContext().getSqlCacheContext(); @@ -510,7 +487,7 @@ private Plan parseAndAnalyzeView(TableIf view, String ddlSql, CascadesContext pa CascadesContext viewContext = CascadesContext.initContext( parentContext.getStatementContext(), parsedViewPlan, PhysicalProperties.ANY); viewContext.keepOrShowPlanProcess(parentContext.showPlanProcess(), () -> { - viewContext.newAnalyzer(customTableResolver).analyze(); + viewContext.newAnalyzer().analyze(); }); parentContext.addPlanProcesses(viewContext.getPlanProcesses()); // we should remove all group expression of the plan which in other memo, so the groupId would not conflict @@ -543,7 +520,4 @@ private List getPartitionIds(TableIf t, UnboundRelation unboundRelation, L return part.getId(); }).collect(ImmutableList.toImmutableList()); } - - /** CustomTableResolver */ - public interface CustomTableResolver extends Function, TableIf> {} } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CollectRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CollectRelation.java new file mode 100644 index 000000000000000..9c6e3adbe74e1b7 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CollectRelation.java @@ -0,0 +1,228 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.MTMV; +import org.apache.doris.catalog.TableIf; +import org.apache.doris.catalog.View; +import org.apache.doris.common.Pair; +import org.apache.doris.mtmv.BaseTableInfo; +import org.apache.doris.nereids.CTEContext; +import org.apache.doris.nereids.CascadesContext; +import org.apache.doris.nereids.StatementContext.TableFrom; +import org.apache.doris.nereids.analyzer.UnboundRelation; +import org.apache.doris.nereids.analyzer.UnboundResultSink; +import org.apache.doris.nereids.analyzer.UnboundTableSink; +import org.apache.doris.nereids.parser.NereidsParser; +import org.apache.doris.nereids.pattern.MatchingContext; +import org.apache.doris.nereids.properties.PhysicalProperties; +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.CTEId; +import org.apache.doris.nereids.trees.expressions.SubqueryExpr; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalCTE; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias; +import org.apache.doris.nereids.util.RelationUtil; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Rule to bind relations in query plan. + */ +public class CollectRelation implements AnalysisRuleFactory { + + private static final Logger LOG = LogManager.getLogger(CollectRelation.class); + + public CollectRelation() {} + + @Override + public List buildRules() { + return ImmutableList.of( + // should collect table from cte first to fill collect all cte name to avoid collect wrong table. + logicalCTE() + .thenApply(ctx -> { + ctx.cascadesContext.setCteContext(collectFromCte(ctx.root, ctx.cascadesContext)); + return null; + }) + .toRule(RuleType.COLLECT_TABLE_FROM_CTE), + unboundRelation() + .thenApply(this::collectFromUnboundRelation) + .toRule(RuleType.COLLECT_TABLE_FROM_RELATION), + unboundTableSink() + .thenApply(this::collectFromUnboundTableSink) + .toRule(RuleType.COLLECT_TABLE_FROM_SINK), + any().whenNot(UnboundRelation.class::isInstance) + .whenNot(UnboundTableSink.class::isInstance) + .thenApply(this::collectFromAny) + .toRule(RuleType.COLLECT_TABLE_FROM_OTHER) + ); + } + + /** + * register and store CTEs in CTEContext + */ + private CTEContext collectFromCte( + LogicalCTE logicalCTE, CascadesContext cascadesContext) { + CTEContext outerCteCtx = cascadesContext.getCteContext(); + List> aliasQueries = logicalCTE.getAliasQueries(); + for (LogicalSubQueryAlias aliasQuery : aliasQueries) { + // we should use a chain to ensure visible of cte + LogicalPlan parsedCtePlan = (LogicalPlan) aliasQuery.child(); + CascadesContext innerCascadesCtx = CascadesContext.newContextWithCteContext( + cascadesContext, parsedCtePlan, outerCteCtx); + innerCascadesCtx.newTableCollector().collect(); + LogicalPlan analyzedCtePlan = (LogicalPlan) innerCascadesCtx.getRewritePlan(); + // cteId is not used in CollectTable stage + CTEId cteId = new CTEId(0); + LogicalSubQueryAlias logicalSubQueryAlias = + aliasQuery.withChildren(ImmutableList.of(analyzedCtePlan)); + outerCteCtx = new CTEContext(cteId, logicalSubQueryAlias, outerCteCtx); + outerCteCtx.setAnalyzedPlan(logicalSubQueryAlias); + } + return outerCteCtx; + } + + private Plan collectFromAny(MatchingContext ctx) { + Set subqueryExprs = ctx.root.getExpressions().stream() + .>map(p -> p.collect(SubqueryExpr.class::isInstance)) + .flatMap(Set::stream) + .collect(Collectors.toSet()); + for (SubqueryExpr subqueryExpr : subqueryExprs) { + CascadesContext subqueryContext = CascadesContext.newContextWithCteContext( + ctx.cascadesContext, subqueryExpr.getQueryPlan(), ctx.cteContext); + subqueryContext.keepOrShowPlanProcess(ctx.cascadesContext.showPlanProcess(), + () -> subqueryContext.newTableCollector().collect()); + ctx.cascadesContext.addPlanProcesses(subqueryContext.getPlanProcesses()); + } + return null; + } + + private Plan collectFromUnboundTableSink(MatchingContext> ctx) { + List nameParts = ctx.root.getNameParts(); + switch (nameParts.size()) { + case 1: + // table + // Use current database name from catalog. + case 2: + // db.table + // Use database name from table name parts. + case 3: + // catalog.db.table + // Use catalog and database name from name parts. + collectFromUnboundRelation(ctx.cascadesContext, nameParts, TableFrom.INSERT_TARGET); + return null; + default: + throw new IllegalStateException("Insert target name is invalid."); + } + } + + private Plan collectFromUnboundRelation(MatchingContext ctx) { + List nameParts = ctx.root.getNameParts(); + switch (nameParts.size()) { + case 1: + // table + // Use current database name from catalog. + case 2: + // db.table + // Use database name from table name parts. + case 3: + // catalog.db.table + // Use catalog and database name from name parts. + collectFromUnboundRelation(ctx.cascadesContext, nameParts, TableFrom.QUERY); + return null; + default: + throw new IllegalStateException("Table name [" + ctx.root.getTableName() + "] is invalid."); + } + } + + private void collectFromUnboundRelation(CascadesContext cascadesContext, + List nameParts, TableFrom tableFrom) { + if (nameParts.size() == 1) { + String tableName = nameParts.get(0); + // check if it is a CTE's name + CTEContext cteContext = cascadesContext.getCteContext().findCTEContext(tableName).orElse(null); + if (cteContext != null) { + Optional analyzedCte = cteContext.getAnalyzedCTEPlan(tableName); + if (analyzedCte.isPresent()) { + return; + } + } + } + List tableQualifier = RelationUtil.getQualifierName(cascadesContext.getConnectContext(), nameParts); + TableIf table = cascadesContext.getConnectContext().getStatementContext() + .getAndCacheTable(tableQualifier, tableFrom); + LOG.info("collect table {} from {}", nameParts, tableFrom); + if (tableFrom == TableFrom.QUERY) { + collectMTMVCandidates(table, cascadesContext); + } + if (table instanceof View) { + parseAndCollectFromView(tableQualifier, (View) table, cascadesContext); + } + } + + private void collectMTMVCandidates(TableIf table, CascadesContext cascadesContext) { + if (cascadesContext.getConnectContext().getSessionVariable().enableMaterializedViewRewrite) { + Set mtmvSet = Env.getCurrentEnv().getMtmvService().getRelationManager() + .getAllMTMVs(Lists.newArrayList(new BaseTableInfo(table))); + LOG.info("table {} related mv set is {}", new BaseTableInfo(table), mtmvSet); + for (MTMV mtmv : mtmvSet) { + cascadesContext.getStatementContext().getMtmvRelatedTables().put(mtmv.getFullQualifiers(), mtmv); + mtmv.readMvLock(); + try { + for (BaseTableInfo baseTableInfo : mtmv.getRelation().getBaseTables()) { + LOG.info("mtmv {} related base table include {}", new BaseTableInfo(mtmv), baseTableInfo); + cascadesContext.getStatementContext().getAndCacheTable(baseTableInfo.toList(), TableFrom.MTMV); + } + } finally { + mtmv.readMvUnlock(); + } + } + } + } + + private void parseAndCollectFromView(List tableQualifier, View view, CascadesContext parentContext) { + Pair viewInfo = parentContext.getStatementContext().getAndCacheViewInfo(tableQualifier, view); + long originalSqlMode = parentContext.getConnectContext().getSessionVariable().getSqlMode(); + parentContext.getConnectContext().getSessionVariable().setSqlMode(viewInfo.second); + LogicalPlan parsedViewPlan; + try { + parsedViewPlan = new NereidsParser().parseSingle(viewInfo.first); + } finally { + parentContext.getConnectContext().getSessionVariable().setSqlMode(originalSqlMode); + } + if (parsedViewPlan instanceof UnboundResultSink) { + parsedViewPlan = (LogicalPlan) ((UnboundResultSink) parsedViewPlan).child(); + } + CascadesContext viewContext = CascadesContext.initContext( + parentContext.getStatementContext(), parsedViewPlan, PhysicalProperties.ANY); + viewContext.keepOrShowPlanProcess(parentContext.showPlanProcess(), + () -> viewContext.newTableCollector().collect()); + parentContext.addPlanProcesses(viewContext.getPlanProcesses()); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AsyncMaterializationContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AsyncMaterializationContext.java index 96d37ad546a7b4b..593ad986ca797c2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AsyncMaterializationContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AsyncMaterializationContext.java @@ -131,11 +131,6 @@ boolean isFinalChosen(Relation relation) { @Override public Plan getScanPlan(StructInfo queryInfo, CascadesContext cascadesContext) { - // If try to get scan plan or rewrite successfully, try to get mv read lock to avoid meta data inconsistent, - // try to get lock which should added before RBO - if (!this.isSuccess()) { - cascadesContext.getStatementContext().addTableReadLock(this.getMtmv()); - } super.getScanPlan(queryInfo, cascadesContext); return scanPlan; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/InitMaterializationContextHook.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/InitMaterializationContextHook.java index 4f8198e0b3c0bd0..db270390f9bdd37 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/InitMaterializationContextHook.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/InitMaterializationContextHook.java @@ -33,9 +33,6 @@ import org.apache.doris.nereids.NereidsPlanner; import org.apache.doris.nereids.PlannerHook; import org.apache.doris.nereids.parser.NereidsParser; -import org.apache.doris.nereids.trees.plans.Plan; -import org.apache.doris.nereids.trees.plans.visitor.TableCollector; -import org.apache.doris.nereids.trees.plans.visitor.TableCollector.TableCollectorContext; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -80,24 +77,11 @@ public void initMaterializationContext(CascadesContext cascadesContext) { */ protected void doInitMaterializationContext(CascadesContext cascadesContext) { if (cascadesContext.getConnectContext().getSessionVariable().isInDebugMode()) { - LOG.info(String.format("MaterializationContext init return because is in debug mode, current queryId is %s", - cascadesContext.getConnectContext().getQueryIdentifier())); + LOG.info("MaterializationContext init return because is in debug mode, current queryId is {}", + cascadesContext.getConnectContext().getQueryIdentifier()); return; } - // Only collect the table or mv which query use directly, to avoid useless mv partition in rewrite - // Keep use one connection context when in query, if new connect context, - // the ConnectionContext.get() will change - TableCollectorContext collectorContext = new TableCollectorContext(Sets.newHashSet(), false, - cascadesContext.getConnectContext()); - try { - Plan rewritePlan = cascadesContext.getRewritePlan(); - rewritePlan.accept(TableCollector.INSTANCE, collectorContext); - } catch (Exception e) { - LOG.warn(String.format("MaterializationContext init table collect fail, current queryId is %s", - cascadesContext.getConnectContext().getQueryIdentifier()), e); - return; - } - Set collectedTables = collectorContext.getCollectedTables(); + Set collectedTables = Sets.newHashSet(cascadesContext.getStatementContext().getTables().values()); if (collectedTables.isEmpty()) { return; } @@ -115,7 +99,7 @@ protected void doInitMaterializationContext(CascadesContext cascadesContext) { } // Create async materialization context for (MaterializationContext context : createAsyncMaterializationContext(cascadesContext, - collectorContext.getCollectedTables())) { + collectedTables)) { cascadesContext.addMaterializationContext(context); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java index 4ddb93409379e95..20aad9ecdb25c6e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java @@ -128,11 +128,10 @@ public static RelatedTableInfo getRelatedTableInfo(String column, String timeUni materializedViewPlan = new LogicalProject<>(ImmutableList.of(columnExpr), materializedViewPlan); } // Collect table relation map which is used to identify self join - List catalogRelationObjs = - materializedViewPlan.collectToList(CatalogRelation.class::isInstance); + List catalogRelations = materializedViewPlan.collectToList(CatalogRelation.class::isInstance); ImmutableMultimap.Builder tableCatalogRelationMultimapBuilder = ImmutableMultimap.builder(); - for (CatalogRelation catalogRelation : catalogRelationObjs) { + for (CatalogRelation catalogRelation : catalogRelations) { tableCatalogRelationMultimapBuilder.put(new TableIdentifier(catalogRelation.getTable()), catalogRelation); } // Check sql pattern @@ -320,6 +319,7 @@ public static MTMVCache createMTMVCache(String querySql, ConnectContext connectC LogicalPlan unboundMvPlan = new NereidsParser().parseSingle(querySql); StatementContext mvSqlStatementContext = new StatementContext(connectContext, new OriginStatement(querySql, 0)); + mvSqlStatementContext.setNeedLockTables(false); NereidsPlanner planner = new NereidsPlanner(mvSqlStatementContext); if (mvSqlStatementContext.getConnectContext().getStatementContext() == null) { mvSqlStatementContext.getConnectContext().setStatementContext(mvSqlStatementContext); @@ -771,7 +771,7 @@ public static final class RelatedTableInfo { private final String column; private final Set failReasons = new HashSet<>(); // This records the partition expression if exist - private Optional partitionExpression; + private final Optional partitionExpression; public RelatedTableInfo(BaseTableInfo tableInfo, boolean pctPossible, String column, String failReason, Expression partitionExpression) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AddConstraintCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AddConstraintCommand.java index 08954741c806aa3..f92a3b6103b3457 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AddConstraintCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AddConstraintCommand.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.TableIf; import org.apache.doris.common.Pair; +import org.apache.doris.common.util.MetaLockUtils; import org.apache.doris.nereids.NereidsPlanner; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.properties.PhysicalProperties; @@ -34,9 +35,12 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.util.Comparator; +import java.util.List; import java.util.Set; /** @@ -61,15 +65,26 @@ public AddConstraintCommand(String name, Constraint constraint) { @Override public void run(ConnectContext ctx, StmtExecutor executor) throws Exception { Pair, TableIf> columnsAndTable = extractColumnsAndTable(ctx, constraint.toProject()); + List tables = Lists.newArrayList(columnsAndTable.second); + Pair, TableIf> referencedColumnsAndTable = null; if (constraint.isForeignKey()) { - Pair, TableIf> referencedColumnsAndTable - = extractColumnsAndTable(ctx, constraint.toReferenceProject()); - columnsAndTable.second.addForeignConstraint(name, columnsAndTable.first, - referencedColumnsAndTable.second, referencedColumnsAndTable.first, false); - } else if (constraint.isPrimaryKey()) { - columnsAndTable.second.addPrimaryKeyConstraint(name, columnsAndTable.first, false); - } else if (constraint.isUnique()) { - columnsAndTable.second.addUniqueConstraint(name, columnsAndTable.first, false); + referencedColumnsAndTable = extractColumnsAndTable(ctx, constraint.toReferenceProject()); + tables.add(referencedColumnsAndTable.second); + } + tables.sort((Comparator.comparing(TableIf::getId))); + MetaLockUtils.writeLockTables(tables); + try { + if (constraint.isForeignKey()) { + Preconditions.checkState(referencedColumnsAndTable != null); + columnsAndTable.second.addForeignConstraint(name, columnsAndTable.first, + referencedColumnsAndTable.second, referencedColumnsAndTable.first, false); + } else if (constraint.isPrimaryKey()) { + columnsAndTable.second.addPrimaryKeyConstraint(name, columnsAndTable.first, false); + } else if (constraint.isUnique()) { + columnsAndTable.second.addUniqueConstraint(name, columnsAndTable.first, false); + } + } finally { + MetaLockUtils.writeUnlockTables(tables); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CommandUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CommandUtils.java deleted file mode 100644 index f9b0c3e18d1b2fc..000000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CommandUtils.java +++ /dev/null @@ -1,49 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.nereids.trees.plans.commands; - -import org.apache.doris.catalog.KeysType; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.TableIf; -import org.apache.doris.nereids.exceptions.AnalysisException; -import org.apache.doris.nereids.util.RelationUtil; -import org.apache.doris.qe.ConnectContext; - -import java.util.List; - -/** - * delete from unique key table. - */ -public class CommandUtils { - - /** - * check delete target table should unique key olap table. If ok, return it. - */ - public static OlapTable checkAndGetDeleteTargetTable(ConnectContext ctx, List nameParts) { - List qualifiedTableName = RelationUtil.getQualifierName(ctx, nameParts); - TableIf table = RelationUtil.getTable(qualifiedTableName, ctx.getEnv()); - if (!(table instanceof OlapTable)) { - throw new AnalysisException("table must be olapTable in delete command"); - } - OlapTable targetTable = ((OlapTable) table); - if (targetTable.getKeysType() != KeysType.UNIQUE_KEYS) { - throw new AnalysisException("Nereids only support delete command on unique key table now"); - } - return targetTable; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropConstraintCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropConstraintCommand.java index fe777ea82972fc5..63d432121599e0e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropConstraintCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropConstraintCommand.java @@ -18,6 +18,9 @@ package org.apache.doris.nereids.trees.plans.commands; import org.apache.doris.catalog.TableIf; +import org.apache.doris.catalog.constraint.Constraint; +import org.apache.doris.catalog.constraint.PrimaryKeyConstraint; +import org.apache.doris.common.util.MetaLockUtils; import org.apache.doris.nereids.NereidsPlanner; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.properties.PhysicalProperties; @@ -30,9 +33,12 @@ import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.StmtExecutor; +import com.google.common.collect.Lists; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.util.Comparator; +import java.util.List; import java.util.Set; /** @@ -56,7 +62,27 @@ public DropConstraintCommand(String name, LogicalPlan plan) { @Override public void run(ConnectContext ctx, StmtExecutor executor) throws Exception { TableIf table = extractTable(ctx, plan); - table.dropConstraint(name, false); + List tables = Lists.newArrayList(table); + table.readLock(); + try { + Constraint constraint = table.getConstraintsMapUnsafe().get(name); + if (constraint == null) { + throw new AnalysisException( + String.format("Unknown constraint %s on table %s.", name, table.getName())); + } + if (constraint instanceof PrimaryKeyConstraint) { + tables.addAll(((PrimaryKeyConstraint) constraint).getForeignTables()); + } + } finally { + table.readUnlock(); + } + tables.sort((Comparator.comparing(TableIf::getId))); + MetaLockUtils.writeLockTables(tables); + try { + table.dropConstraint(name, false); + } finally { + MetaLockUtils.writeUnlockTables(tables); + } } private TableIf extractTable(ConnectContext ctx, LogicalPlan plan) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowConstraintsCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowConstraintsCommand.java index 5c3c16137bfe80b..0e5c332058d4810 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowConstraintsCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowConstraintsCommand.java @@ -52,12 +52,19 @@ public ShowConstraintsCommand(List nameParts) { public void run(ConnectContext ctx, StmtExecutor executor) throws Exception { TableIf tableIf = RelationUtil.getDbAndTable( RelationUtil.getQualifierName(ctx, nameParts), ctx.getEnv()).value(); - List> res = tableIf.getConstraintsMap().entrySet().stream() - .map(e -> Lists.newArrayList(e.getKey(), - e.getValue().getType().getName(), - e.getValue().toString())) + tableIf.readLock(); + List> res; + try { + res = tableIf.getConstraintsMap().entrySet().stream() + .map(e -> Lists.newArrayList(e.getKey(), + e.getValue().getType().getName(), + e.getValue().toString())) .collect(Collectors.toList()); + } finally { + tableIf.readUnlock(); + } executor.handleShowConstraintStmt(res); + } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java index 417ab807cfe4bb6..349379285790fa2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java @@ -250,42 +250,43 @@ private void analyzeProperties() { /** * analyzeQuery */ - public void analyzeQuery(ConnectContext ctx, Map mvProperties) throws Exception { - // create table as select - StatementContext statementContext = ctx.getStatementContext(); - NereidsPlanner planner = new NereidsPlanner(statementContext); - // this is for expression column name infer when not use alias - LogicalSink logicalSink = new UnboundResultSink<>(logicalQuery); - // Should not make table without data to empty relation when analyze the related table, - // so add disable rules - Set tempDisableRules = ctx.getSessionVariable().getDisableNereidsRuleNames(); - ctx.getSessionVariable().setDisableNereidsRules(CreateMTMVInfo.MTMV_PLANER_DISABLE_RULES); - ctx.getStatementContext().invalidCache(SessionVariable.DISABLE_NEREIDS_RULES); - Plan plan; - try { - // must disable constant folding by be, because be constant folding may return wrong type - ctx.getSessionVariable().setVarOnce(SessionVariable.ENABLE_FOLD_CONSTANT_BY_BE, "false"); - plan = planner.planWithLock(logicalSink, PhysicalProperties.ANY, ExplainLevel.ALL_PLAN); - } finally { - // after operate, roll back the disable rules - ctx.getSessionVariable().setDisableNereidsRules(String.join(",", tempDisableRules)); - ctx.getStatementContext().invalidCache(SessionVariable.DISABLE_NEREIDS_RULES); - } - // can not contain VIEW or MTMV - analyzeBaseTables(planner.getAnalyzedPlan()); - // can not contain Random function - analyzeExpressions(planner.getAnalyzedPlan(), mvProperties); - // can not contain partition or tablets - boolean containTableQueryOperator = MaterializedViewUtils.containTableQueryOperator(planner.getAnalyzedPlan()); - if (containTableQueryOperator) { - throw new AnalysisException("can not contain invalid expression"); - } - getRelation(planner); - this.mvPartitionInfo = mvPartitionDefinition.analyzeAndTransferToMTMVPartitionInfo(planner, ctx); - this.partitionDesc = generatePartitionDesc(ctx); - getColumns(plan, ctx, mvPartitionInfo.getPartitionCol(), distribution); - analyzeKeys(); + public void analyzeQuery(ConnectContext ctx, Map mvProperties) { + try (StatementContext statementContext = ctx.getStatementContext()) { + NereidsPlanner planner = new NereidsPlanner(statementContext); + // this is for expression column name infer when not use alias + LogicalSink logicalSink = new UnboundResultSink<>(logicalQuery); + // Should not make table without data to empty relation when analyze the related table, + // so add disable rules + Set tempDisableRules = ctx.getSessionVariable().getDisableNereidsRuleNames(); + ctx.getSessionVariable().setDisableNereidsRules(CreateMTMVInfo.MTMV_PLANER_DISABLE_RULES); + statementContext.invalidCache(SessionVariable.DISABLE_NEREIDS_RULES); + Plan plan; + try { + // must disable constant folding by be, because be constant folding may return wrong type + ctx.getSessionVariable().setVarOnce(SessionVariable.ENABLE_FOLD_CONSTANT_BY_BE, "false"); + plan = planner.planWithLock(logicalSink, PhysicalProperties.ANY, ExplainLevel.ALL_PLAN); + } finally { + // after operate, roll back the disable rules + ctx.getSessionVariable().setDisableNereidsRules(String.join(",", tempDisableRules)); + statementContext.invalidCache(SessionVariable.DISABLE_NEREIDS_RULES); + } + // can not contain VIEW or MTMV + analyzeBaseTables(planner.getAnalyzedPlan()); + // can not contain Random function + analyzeExpressions(planner.getAnalyzedPlan(), mvProperties); + // can not contain partition or tablets + boolean containTableQueryOperator = MaterializedViewUtils.containTableQueryOperator( + planner.getAnalyzedPlan()); + if (containTableQueryOperator) { + throw new AnalysisException("can not contain invalid expression"); + } + getRelation(Sets.newHashSet(statementContext.getTables().values()), ctx); + this.mvPartitionInfo = mvPartitionDefinition.analyzeAndTransferToMTMVPartitionInfo(planner); + this.partitionDesc = generatePartitionDesc(ctx); + getColumns(plan, ctx, mvPartitionInfo.getPartitionCol(), distribution); + analyzeKeys(); + } } private void analyzeKeys() { @@ -327,8 +328,8 @@ private void analyzeKeys() { } // Should use analyzed plan for collect views and tables - private void getRelation(NereidsPlanner planner) { - this.relation = MTMVPlanUtil.generateMTMVRelation(planner.getAnalyzedPlan(), planner.getConnectContext()); + private void getRelation(Set tables, ConnectContext ctx) { + this.relation = MTMVPlanUtil.generateMTMVRelation(tables, ctx); } private PartitionDesc generatePartitionDesc(ConnectContext ctx) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/MTMVPartitionDefinition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/MTMVPartitionDefinition.java index a26a97f7240793c..c2e9abd2f0f97cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/MTMVPartitionDefinition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/MTMVPartitionDefinition.java @@ -44,7 +44,6 @@ import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.functions.scalar.DateTrunc; import org.apache.doris.nereids.trees.expressions.literal.Literal; -import org.apache.doris.qe.ConnectContext; import com.google.common.collect.Sets; @@ -66,10 +65,9 @@ public class MTMVPartitionDefinition { * analyzeAndTransferToMTMVPartitionInfo * * @param planner planner - * @param ctx ctx * @return MTMVPartitionInfo */ - public MTMVPartitionInfo analyzeAndTransferToMTMVPartitionInfo(NereidsPlanner planner, ConnectContext ctx) { + public MTMVPartitionInfo analyzeAndTransferToMTMVPartitionInfo(NereidsPlanner planner) { MTMVPartitionInfo mtmvPartitionInfo = new MTMVPartitionInfo(partitionType); if (this.partitionType == MTMVPartitionType.SELF_MANAGE) { return mtmvPartitionInfo; @@ -77,9 +75,8 @@ public MTMVPartitionInfo analyzeAndTransferToMTMVPartitionInfo(NereidsPlanner pl String partitionColName; String timeUnit; if (this.partitionType == MTMVPartitionType.EXPR) { - String functionName = ((UnboundFunction) functionCallExpression).getName(); - if (functionCallExpression instanceof UnboundFunction - && functionName.equalsIgnoreCase(PARTITION_BY_FUNCTION_NAME)) { + if (functionCallExpression instanceof UnboundFunction && PARTITION_BY_FUNCTION_NAME + .equalsIgnoreCase(((UnboundFunction) functionCallExpression).getName())) { partitionColName = functionCallExpression.getArgument(0) instanceof UnboundSlot ? ((UnboundSlot) functionCallExpression.getArgument(0)).getName() : null; timeUnit = functionCallExpression.getArguments().get(1).isLiteral() @@ -93,7 +90,7 @@ public MTMVPartitionInfo analyzeAndTransferToMTMVPartitionInfo(NereidsPlanner pl timeUnit = null; } mtmvPartitionInfo.setPartitionCol(partitionColName); - RelatedTableInfo relatedTableInfo = getRelatedTableInfo(planner, ctx, partitionColName, timeUnit); + RelatedTableInfo relatedTableInfo = getRelatedTableInfo(planner, partitionColName, timeUnit); mtmvPartitionInfo.setRelatedCol(relatedTableInfo.getColumn()); mtmvPartitionInfo.setRelatedTable(relatedTableInfo.getTableInfo()); if (relatedTableInfo.getPartitionExpression().isPresent()) { @@ -119,8 +116,7 @@ public MTMVPartitionInfo analyzeAndTransferToMTMVPartitionInfo(NereidsPlanner pl } // Should use rewritten plan without view and subQuery to get related partition table - private RelatedTableInfo getRelatedTableInfo(NereidsPlanner planner, ConnectContext ctx, - String partitionColName, String timeUnit) { + private RelatedTableInfo getRelatedTableInfo(NereidsPlanner planner, String partitionColName, String timeUnit) { CascadesContext cascadesContext = planner.getCascadesContext(); RelatedTableInfo relatedTableInfo = MaterializedViewUtils @@ -129,10 +125,10 @@ private RelatedTableInfo getRelatedTableInfo(NereidsPlanner planner, ConnectCont throw new AnalysisException(String.format("Unable to find a suitable base table for partitioning," + " the fail reason is %s", relatedTableInfo.getFailReason())); } - MTMVRelatedTableIf mtmvBaseRealtedTable = MTMVUtil.getRelatedTable(relatedTableInfo.getTableInfo()); + MTMVRelatedTableIf mtmvBaseRelatedTable = MTMVUtil.getRelatedTable(relatedTableInfo.getTableInfo()); Set partitionColumnNames = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER); try { - partitionColumnNames.addAll(mtmvBaseRealtedTable.getPartitionColumnNames(Optional.empty())); + partitionColumnNames.addAll(mtmvBaseRelatedTable.getPartitionColumnNames(Optional.empty())); } catch (DdlException e) { throw new AnalysisException(e.getMessage(), e); } @@ -140,7 +136,7 @@ private RelatedTableInfo getRelatedTableInfo(NereidsPlanner planner, ConnectCont if (!partitionColumnNames.contains(relatedTableInfo.getColumn())) { throw new AnalysisException("error related column: " + relatedTableInfo.getColumn()); } - if (!(mtmvBaseRealtedTable instanceof HMSExternalTable) + if (!(mtmvBaseRelatedTable instanceof HMSExternalTable) && partitionColumnNames.size() != 1) { throw new AnalysisException("only hms table support multi column partition."); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertIntoTableCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertIntoTableCommand.java index 0999c4baa79e3b1..10f9947974cdb07 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertIntoTableCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertIntoTableCommand.java @@ -25,6 +25,7 @@ import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.common.profile.ProfileManager.ProfileType; +import org.apache.doris.common.util.DebugUtil; import org.apache.doris.datasource.hive.HMSExternalTable; import org.apache.doris.datasource.iceberg.IcebergExternalTable; import org.apache.doris.datasource.jdbc.JdbcExternalTable; @@ -51,6 +52,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalSink; import org.apache.doris.nereids.trees.plans.physical.PhysicalUnion; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.nereids.util.RelationUtil; import org.apache.doris.planner.DataSink; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.ConnectContext.ConnectType; @@ -83,13 +85,14 @@ public class InsertIntoTableCommand extends Command implements ForwardWithSync, public static final Logger LOG = LogManager.getLogger(InsertIntoTableCommand.class); + private LogicalPlan originalLogicalQuery; private LogicalPlan logicalQuery; private Optional labelName; /** * When source it's from job scheduler,it will be set. */ private long jobId; - private Optional insertCtx; + private final Optional insertCtx; private final Optional cte; /** @@ -98,7 +101,8 @@ public class InsertIntoTableCommand extends Command implements ForwardWithSync, public InsertIntoTableCommand(LogicalPlan logicalQuery, Optional labelName, Optional insertCtx, Optional cte) { super(PlanType.INSERT_INTO_TABLE_COMMAND); - this.logicalQuery = Objects.requireNonNull(logicalQuery, "logicalQuery should not be null"); + this.originalLogicalQuery = Objects.requireNonNull(logicalQuery, "logicalQuery should not be null"); + this.logicalQuery = originalLogicalQuery; this.labelName = Objects.requireNonNull(labelName, "labelName should not be null"); this.insertCtx = insertCtx; this.cte = cte; @@ -145,62 +149,95 @@ public AbstractInsertExecutor initPlan(ConnectContext ctx, StmtExecutor executor */ public AbstractInsertExecutor initPlan(ConnectContext ctx, StmtExecutor stmtExecutor, boolean needBeginTransaction) throws Exception { - TableIf targetTableIf = InsertUtils.getTargetTable(logicalQuery, ctx); - // check auth - if (!Env.getCurrentEnv().getAccessManager() - .checkTblPriv(ConnectContext.get(), targetTableIf.getDatabase().getCatalog().getName(), - targetTableIf.getDatabase().getFullName(), targetTableIf.getName(), - PrivPredicate.LOAD)) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "LOAD", - ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP(), - targetTableIf.getDatabase().getFullName() + "." + targetTableIf.getName()); + List qualifiedTargetTableName = InsertUtils.getTargetTableQualified(logicalQuery, ctx); + + AbstractInsertExecutor insertExecutor; + int retryTimes = 0; + while (++retryTimes < Math.max(ctx.getSessionVariable().dmlPlanRetryTimes, 3)) { + TableIf targetTableIf = RelationUtil.getTable(qualifiedTargetTableName, ctx.getEnv()); + // check auth + if (!Env.getCurrentEnv().getAccessManager() + .checkTblPriv(ConnectContext.get(), targetTableIf.getDatabase().getCatalog().getName(), + targetTableIf.getDatabase().getFullName(), targetTableIf.getName(), + PrivPredicate.LOAD)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLEACCESS_DENIED_ERROR, "LOAD", + ConnectContext.get().getQualifiedUser(), ConnectContext.get().getRemoteIP(), + targetTableIf.getDatabase().getFullName() + "." + targetTableIf.getName()); + } + BuildInsertExecutorResult buildResult; + try { + buildResult = initPlanOnce(ctx, stmtExecutor, targetTableIf); + } catch (Throwable e) { + Throwables.throwIfInstanceOf(e, RuntimeException.class); + throw new IllegalStateException(e.getMessage(), e); + } + insertExecutor = buildResult.executor; + if (!needBeginTransaction) { + return insertExecutor; + } + + // lock after plan and check does table's schema changed to ensure we lock table order by id. + TableIf newestTargetTableIf = RelationUtil.getTable(qualifiedTargetTableName, ctx.getEnv()); + newestTargetTableIf.readLock(); + try { + if (targetTableIf.getId() != newestTargetTableIf.getId()) { + LOG.warn("insert plan failed {} times. query id is {}. table id changed from {} to {}", + retryTimes, DebugUtil.printId(ctx.queryId()), + targetTableIf.getId(), newestTargetTableIf.getId()); + continue; + } + if (!targetTableIf.getFullSchema().equals(newestTargetTableIf.getFullSchema())) { + LOG.warn("insert plan failed {} times. query id is {}. table schema changed from {} to {}", + retryTimes, DebugUtil.printId(ctx.queryId()), + targetTableIf.getFullSchema(), newestTargetTableIf.getFullSchema()); + continue; + } + if (!insertExecutor.isEmptyInsert()) { + insertExecutor.beginTransaction(); + insertExecutor.finalizeSink( + buildResult.planner.getFragments().get(0), buildResult.dataSink, + buildResult.physicalSink + ); + } + newestTargetTableIf.readUnlock(); + } catch (Throwable e) { + newestTargetTableIf.readUnlock(); + // the abortTxn in onFail need to acquire table write lock + if (insertExecutor != null) { + insertExecutor.onFail(e); + } + Throwables.throwIfInstanceOf(e, RuntimeException.class); + throw new IllegalStateException(e.getMessage(), e); + } + stmtExecutor.setProfileType(ProfileType.LOAD); + // We exposed @StmtExecutor#cancel as a unified entry point for statement interruption, + // so we need to set this here + insertExecutor.getCoordinator().setTxnId(insertExecutor.getTxnId()); + stmtExecutor.setCoord(insertExecutor.getCoordinator()); + // for prepare and execute, avoiding normalization for every execute command + this.originalLogicalQuery = this.logicalQuery; + return insertExecutor; } + LOG.warn("insert plan failed {} times. query id is {}.", retryTimes, DebugUtil.printId(ctx.queryId())); + throw new AnalysisException("Insert plan failed. Could not get target table lock."); + } - AbstractInsertExecutor insertExecutor = null; - // should lock target table until we begin transaction. + private BuildInsertExecutorResult initPlanOnce(ConnectContext ctx, + StmtExecutor stmtExecutor, TableIf targetTableIf) throws Throwable { targetTableIf.readLock(); try { - // 1. process inline table (default values, empty values) - this.logicalQuery = (LogicalPlan) InsertUtils.normalizePlan(logicalQuery, targetTableIf, insertCtx); + // process inline table (default values, empty values) + this.logicalQuery = (LogicalPlan) InsertUtils.normalizePlan(originalLogicalQuery, targetTableIf, insertCtx); if (cte.isPresent()) { this.logicalQuery = ((LogicalPlan) cte.get().withChildren(logicalQuery)); } OlapGroupCommitInsertExecutor.analyzeGroupCommit(ctx, targetTableIf, this.logicalQuery, this.insertCtx); - LogicalPlanAdapter logicalPlanAdapter = new LogicalPlanAdapter(logicalQuery, ctx.getStatementContext()); - - BuildInsertExecutorResult buildResult = planInsertExecutor( - ctx, stmtExecutor, logicalPlanAdapter, targetTableIf - ); - - insertExecutor = buildResult.executor; - - if (!needBeginTransaction) { - targetTableIf.readUnlock(); - return insertExecutor; - } - if (!insertExecutor.isEmptyInsert()) { - insertExecutor.beginTransaction(); - insertExecutor.finalizeSink( - buildResult.planner.getFragments().get(0), buildResult.dataSink, buildResult.physicalSink - ); - } - targetTableIf.readUnlock(); - } catch (Throwable e) { + } finally { targetTableIf.readUnlock(); - // the abortTxn in onFail need to acquire table write lock - if (insertExecutor != null) { - insertExecutor.onFail(e); - } - Throwables.propagateIfInstanceOf(e, RuntimeException.class); - throw new IllegalStateException(e.getMessage(), e); } - stmtExecutor.setProfileType(ProfileType.LOAD); - // We exposed @StmtExecutor#cancel as a unified entry point for statement interruption, - // so we need to set this here - insertExecutor.getCoordinator().setTxnId(insertExecutor.getTxnId()); - stmtExecutor.setCoord(insertExecutor.getCoordinator()); - return insertExecutor; + LogicalPlanAdapter logicalPlanAdapter = new LogicalPlanAdapter(logicalQuery, ctx.getStatementContext()); + return planInsertExecutor(ctx, stmtExecutor, logicalPlanAdapter, targetTableIf); } // we should select the factory type first, but we can not initial InsertExecutor at this time, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertUtils.java index 60e7e5bf805a64d..459ffcd04f894ac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertUtils.java @@ -425,6 +425,14 @@ private static Expression castValue(Expression value, DataType targetType) { * get target table from names. */ public static TableIf getTargetTable(Plan plan, ConnectContext ctx) { + List tableQualifier = getTargetTableQualified(plan, ctx); + return RelationUtil.getTable(tableQualifier, ctx.getEnv()); + } + + /** + * get target table from names. + */ + public static List getTargetTableQualified(Plan plan, ConnectContext ctx) { UnboundLogicalSink unboundTableSink; if (plan instanceof UnboundTableSink) { unboundTableSink = (UnboundTableSink) plan; @@ -439,8 +447,7 @@ public static TableIf getTargetTable(Plan plan, ConnectContext ctx) { + " [UnboundTableSink, UnboundHiveTableSink, UnboundIcebergTableSink]," + " but it is " + plan.getType()); } - List tableQualifier = RelationUtil.getQualifierName(ctx, unboundTableSink.getNameParts()); - return RelationUtil.getDbAndTable(tableQualifier, ctx.getEnv()).second; + return RelationUtil.getQualifierName(ctx, unboundTableSink.getNameParts()); } private static NamedExpression generateDefaultExpression(Column column) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/TableCollector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/TableCollector.java deleted file mode 100644 index 27ff1e4b68c0754..000000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/TableCollector.java +++ /dev/null @@ -1,122 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.nereids.trees.plans.visitor; - -import org.apache.doris.catalog.MTMV; -import org.apache.doris.catalog.TableIf; -import org.apache.doris.catalog.TableIf.TableType; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.mtmv.MTMVCache; -import org.apache.doris.nereids.trees.plans.Plan; -import org.apache.doris.nereids.trees.plans.logical.LogicalCatalogRelation; -import org.apache.doris.nereids.trees.plans.physical.PhysicalCatalogRelation; -import org.apache.doris.nereids.trees.plans.visitor.TableCollector.TableCollectorContext; -import org.apache.doris.qe.ConnectContext; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.util.HashSet; -import java.util.Set; - -/** - * Collect the table in plan - * Note: will not get table if table is eliminated by EmptyRelation in rewrite. - * View expand is in RBO, if call this method with the plan after RBO, this will get base tables in view, or will not. - * Materialized view is extended or not can be controlled by the field expand - */ -public class TableCollector extends DefaultPlanVisitor { - - public static final TableCollector INSTANCE = new TableCollector(); - private static final Logger LOG = LogManager.getLogger(TableCollector.class); - - @Override - public Plan visitLogicalCatalogRelation(LogicalCatalogRelation catalogRelation, TableCollectorContext context) { - TableIf table = catalogRelation.getTable(); - if (context.getTargetTableTypes().isEmpty() || context.getTargetTableTypes().contains(table.getType())) { - context.getCollectedTables().add(table); - } - if (table instanceof MTMV) { - expandMvAndCollect((MTMV) table, context); - } - return catalogRelation; - } - - @Override - public Plan visitPhysicalCatalogRelation(PhysicalCatalogRelation catalogRelation, TableCollectorContext context) { - TableIf table = catalogRelation.getTable(); - if (context.getTargetTableTypes().isEmpty() || context.getTargetTableTypes().contains(table.getType())) { - context.getCollectedTables().add(table); - } - if (table instanceof MTMV) { - expandMvAndCollect((MTMV) table, context); - } - return catalogRelation; - } - - private void expandMvAndCollect(MTMV mtmv, TableCollectorContext context) { - if (!context.isExpandMaterializedView()) { - return; - } - // Make sure use only one connection context when in query to avoid ConnectionContext.get() wrong - MTMVCache expandedMvCache; - try { - expandedMvCache = mtmv.getOrGenerateCache(context.getConnectContext()); - } catch (AnalysisException exception) { - LOG.warn(String.format("expandMvAndCollect getOrGenerateCache fail, mtmv name is %s", mtmv.getName()), - exception); - expandedMvCache = MTMVCache.from(mtmv, context.getConnectContext(), false); - } - expandedMvCache.getAnalyzedPlan().accept(this, context); - } - - /** - * The context for table collecting, it contains the target collect table types - * and the result of collect. - */ - public static final class TableCollectorContext { - private final Set collectedTables = new HashSet<>(); - private final Set targetTableTypes; - // if expand the mv or not - private final boolean expandMaterializedView; - private final ConnectContext connectContext; - - public TableCollectorContext(Set targetTableTypes, boolean expandMaterializedView, - ConnectContext connectContext) { - this.targetTableTypes = targetTableTypes; - this.expandMaterializedView = expandMaterializedView; - this.connectContext = connectContext; - } - - public Set getCollectedTables() { - return collectedTables; - } - - public Set getTargetTableTypes() { - return targetTableTypes; - } - - public boolean isExpandMaterializedView() { - return expandMaterializedView; - } - - public ConnectContext getConnectContext() { - return connectContext; - } - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 126ed1135e9dc06..75f21c786b8c379 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -141,6 +141,7 @@ public class SessionVariable implements Serializable, Writable { public static final String PARALLEL_PIPELINE_TASK_NUM = "parallel_pipeline_task_num"; public static final String PROFILE_LEVEL = "profile_level"; public static final String MAX_INSTANCE_NUM = "max_instance_num"; + public static final String DML_PLAN_RETRY_TIMES = "DML_PLAN_RETRY_TIMES"; public static final String ENABLE_INSERT_STRICT = "enable_insert_strict"; public static final String INSERT_MAX_FILTER_RATIO = "insert_max_filter_ratio"; @@ -1008,6 +1009,17 @@ public enum IgnoreSplitType { @VariableMgr.VarAttr(name = MAX_INSTANCE_NUM) public int maxInstanceNum = 64; + @VariableMgr.VarAttr(name = DML_PLAN_RETRY_TIMES, needForward = true, description = { + "写入规划的最大重试次数。为了避免死锁,写入规划时采用了分阶段加锁。当在两次加锁中间,表结构发生变更时,会尝试重新规划。" + + "此变量限制重新规划的最大尝试次数。", + "Maximum retry attempts for write planning. To avoid deadlocks, " + + "phased locking is adopted during write planning. " + + "When changes occur to the table structure between two locking phases, " + + "re-planning will be attempted. " + + "This variable limits the maximum number of retry attempts for re-planning." + }) + public int dmlPlanRetryTimes = 3; + @VariableMgr.VarAttr(name = ENABLE_INSERT_STRICT, needForward = true) public boolean enableInsertStrict = true; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index 5c2566225fe50a5..e757f3153db0383 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -237,6 +237,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -1357,6 +1358,7 @@ public void analyze(TQueryOptions tQueryOptions) throws UserException, Interrupt } // table id in tableList is in ascending order because that table map is a sorted map List tables = Lists.newArrayList(tableMap.values()); + tables.sort((Comparator.comparing(TableIf::getId))); int analyzeTimes = 2; if (Config.isCloudMode()) { // be core and be restarted, need retry more times @@ -2387,6 +2389,7 @@ private void handleInsertStmt() throws Exception { response.getStatus(), i); if (i < maxRetry) { List tables = Lists.newArrayList(insertStmt.getTargetTable()); + tables.sort((Comparator.comparing(TableIf::getId))); MetaLockUtils.readLockTables(tables); try { insertStmt.reset(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java index 01eb92b9be3f40c..5f6c12d8eeb4772 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java @@ -43,6 +43,7 @@ import org.apache.doris.common.UserException; import org.apache.doris.common.proc.FrontendsProcNode; import org.apache.doris.common.proc.PartitionsProcDir; +import org.apache.doris.common.util.MetaLockUtils; import org.apache.doris.common.util.NetUtils; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.common.util.Util; @@ -62,7 +63,10 @@ import org.apache.doris.job.common.JobType; import org.apache.doris.job.extensions.mtmv.MTMVJob; import org.apache.doris.job.task.AbstractTask; +import org.apache.doris.mtmv.BaseTableInfo; import org.apache.doris.mtmv.MTMVPartitionUtil; +import org.apache.doris.mtmv.MTMVStatus; +import org.apache.doris.mtmv.MTMVUtil; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.plsql.metastore.PlsqlManager; import org.apache.doris.plsql.metastore.PlsqlProcedureKey; @@ -111,6 +115,7 @@ import java.time.LocalDateTime; import java.util.ArrayList; import java.util.Collection; +import java.util.Comparator; import java.util.Date; import java.util.List; import java.util.Map; @@ -844,22 +849,42 @@ private static TFetchSchemaTableDataResult mtmvMetadataResult(TMetadataTableRequ } MTMV mv = (MTMV) table; if (LOG.isDebugEnabled()) { - LOG.debug("mv: " + mv.toInfoString()); + LOG.debug("mv: {}", mv.toInfoString()); } + List needLocked = Lists.newArrayList(); + needLocked.add(mv); + boolean alwaysNotSync = false; + try { + for (BaseTableInfo baseTableInfo : mv.getRelation().getBaseTables()) { + TableIf baseTable = MTMVUtil.getTable(baseTableInfo); + needLocked.add(baseTable); + } + } catch (Exception e) { + alwaysNotSync = true; + } + needLocked.sort(Comparator.comparing(TableIf::getId)); + MetaLockUtils.readLockTables(needLocked); + boolean isSync; + try { + isSync = !alwaysNotSync && MTMVPartitionUtil.isMTMVSync(mv); + } finally { + MetaLockUtils.readUnlockTables(needLocked); + } + MTMVStatus mtmvStatus = mv.getStatus(); TRow trow = new TRow(); trow.addToColumnValue(new TCell().setLongVal(mv.getId())); trow.addToColumnValue(new TCell().setStringVal(mv.getName())); trow.addToColumnValue(new TCell().setStringVal(mv.getJobInfo().getJobName())); - trow.addToColumnValue(new TCell().setStringVal(mv.getStatus().getState().name())); - trow.addToColumnValue(new TCell().setStringVal(mv.getStatus().getSchemaChangeDetail())); - trow.addToColumnValue(new TCell().setStringVal(mv.getStatus().getRefreshState().name())); + trow.addToColumnValue(new TCell().setStringVal(mtmvStatus.getState().name())); + trow.addToColumnValue(new TCell().setStringVal(mtmvStatus.getSchemaChangeDetail())); + trow.addToColumnValue(new TCell().setStringVal(mtmvStatus.getRefreshState().name())); trow.addToColumnValue(new TCell().setStringVal(mv.getRefreshInfo().toString())); trow.addToColumnValue(new TCell().setStringVal(mv.getQuerySql())); trow.addToColumnValue(new TCell().setStringVal(mv.getMvProperties().toString())); trow.addToColumnValue(new TCell().setStringVal(mv.getMvPartitionInfo().toNameString())); - trow.addToColumnValue(new TCell().setBoolVal(MTMVPartitionUtil.isMTMVSync(mv))); + trow.addToColumnValue(new TCell().setBoolVal(isSync)); if (LOG.isDebugEnabled()) { - LOG.debug("mvend: " + mv.getName()); + LOG.debug("mv end: {}", mv.getName()); } dataBatch.add(trow); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/BindRelationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/BindRelationTest.java index 369a57017cba28f..eaeaa3b2edda8b2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/BindRelationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/BindRelationTest.java @@ -17,23 +17,13 @@ package org.apache.doris.nereids.rules.analysis; -import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.DatabaseIf; -import org.apache.doris.catalog.KeysType; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.PartitionInfo; -import org.apache.doris.catalog.RandomDistributionInfo; -import org.apache.doris.catalog.Type; import org.apache.doris.nereids.analyzer.UnboundRelation; import org.apache.doris.nereids.pattern.GeneratedPlanPatterns; import org.apache.doris.nereids.rules.RulePromise; -import org.apache.doris.nereids.rules.analysis.BindRelation.CustomTableResolver; import org.apache.doris.nereids.trees.expressions.StatementScopeIdGenerator; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; -import org.apache.doris.nereids.util.PlanChecker; import org.apache.doris.nereids.util.PlanRewriter; import org.apache.doris.utframe.TestWithFeService; @@ -41,9 +31,6 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.util.List; -import java.util.Optional; - class BindRelationTest extends TestWithFeService implements GeneratedPlanPatterns { private static final String DB1 = "db1"; private static final String DB2 = "db2"; @@ -72,7 +59,7 @@ void bindInCurrentDb() { Plan plan = PlanRewriter.bottomUpRewrite(new UnboundRelation(StatementScopeIdGenerator.newRelationId(), ImmutableList.of("t")), connectContext, new BindRelation()); - Assertions.assertTrue(plan instanceof LogicalOlapScan); + Assertions.assertInstanceOf(LogicalOlapScan.class, plan); Assertions.assertEquals( ImmutableList.of("internal", DEFAULT_CLUSTER_PREFIX + DB1, "t"), ((LogicalOlapScan) plan).qualified()); @@ -84,63 +71,12 @@ void bindByDbQualifier() { Plan plan = PlanRewriter.bottomUpRewrite(new UnboundRelation(StatementScopeIdGenerator.newRelationId(), ImmutableList.of("db1", "t")), connectContext, new BindRelation()); - Assertions.assertTrue(plan instanceof LogicalOlapScan); + Assertions.assertInstanceOf(LogicalOlapScan.class, plan); Assertions.assertEquals( ImmutableList.of("internal", DEFAULT_CLUSTER_PREFIX + DB1, "t"), ((LogicalOlapScan) plan).qualified()); } - @Test - public void bindExternalRelation() { - connectContext.setDatabase(DEFAULT_CLUSTER_PREFIX + DB1); - String tableName = "external_table"; - - List externalTableColumns = ImmutableList.of( - new Column("id", Type.INT), - new Column("name", Type.VARCHAR) - ); - - Database externalDatabase = new Database(10000, DEFAULT_CLUSTER_PREFIX + DB1); - - OlapTable externalOlapTable = new OlapTable(1, tableName, externalTableColumns, KeysType.DUP_KEYS, - new PartitionInfo(), new RandomDistributionInfo(10)) { - @Override - public List getBaseSchema(boolean full) { - return externalTableColumns; - } - - @Override - public boolean hasDeleteSign() { - return false; - } - - @Override - public DatabaseIf getDatabase() { - return externalDatabase; - } - }; - - CustomTableResolver customTableResolver = qualifiedTable -> { - if (qualifiedTable.get(2).equals(tableName)) { - return externalOlapTable; - } else { - return null; - } - }; - - PlanChecker.from(connectContext) - .parse("select * from " + tableName + " as et join db1.t on et.id = t.a") - .customAnalyzer(Optional.of(customTableResolver)) // analyze internal relation - .matches( - logicalJoin( - logicalSubQueryAlias( - logicalOlapScan().when(r -> r.getTable() == externalOlapTable) - ), - logicalOlapScan().when(r -> r.getTable().getName().equals("t")) - ) - ); - } - @Test void bindRandomAggTable() { connectContext.setDatabase(DEFAULT_CLUSTER_PREFIX + DB1); @@ -148,7 +84,7 @@ void bindRandomAggTable() { Plan plan = PlanRewriter.bottomUpRewrite(new UnboundRelation(StatementScopeIdGenerator.newRelationId(), ImmutableList.of("tagg")), connectContext, new BindRelation()); - Assertions.assertTrue(plan instanceof LogicalAggregate); + Assertions.assertInstanceOf(LogicalAggregate.class, plan); Assertions.assertEquals( ImmutableList.of("internal", DEFAULT_CLUSTER_PREFIX + DB1, "tagg"), plan.getOutput().get(0).getQualifier()); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/PlanVisitorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/PlanVisitorTest.java index 0c54f8fad5a1077..82c8122a18d72f8 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/PlanVisitorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/PlanVisitorTest.java @@ -17,34 +17,19 @@ package org.apache.doris.nereids.trees.plans; -import org.apache.doris.catalog.TableIf; -import org.apache.doris.catalog.TableIf.TableType; import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewUtils; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.scalar.CurrentDate; import org.apache.doris.nereids.trees.expressions.functions.scalar.CurrentTime; import org.apache.doris.nereids.trees.expressions.functions.scalar.Now; -import org.apache.doris.nereids.trees.expressions.functions.scalar.Random; import org.apache.doris.nereids.trees.expressions.functions.scalar.UnixTimestamp; -import org.apache.doris.nereids.trees.expressions.functions.scalar.Uuid; -import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; -import org.apache.doris.nereids.trees.plans.visitor.TableCollector; -import org.apache.doris.nereids.trees.plans.visitor.TableCollector.TableCollectorContext; import org.apache.doris.nereids.util.PlanChecker; -import org.apache.doris.qe.SessionVariable; import org.apache.doris.utframe.TestWithFeService; -import com.google.common.collect.Sets; -import mockit.Mock; -import mockit.MockUp; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.util.BitSet; -import java.util.HashSet; import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; /** * Tests for plan visitors to make sure the result meets expectation. @@ -115,154 +100,6 @@ protected void runBeforeAll() throws Exception { + "inner join table3 t3 on t1.c1= t3.c2;"); } - @Test - public void test1() { - PlanChecker.from(connectContext) - .checkPlannerResult("SELECT *, random() FROM table1 " - + "LEFT SEMI JOIN table2 ON table1.c1 = table2.c1 " - + "WHERE table1.c1 IN (SELECT c1 FROM table2) OR table1.c1 < 10", - nereidsPlanner -> { - PhysicalPlan physicalPlan = nereidsPlanner.getPhysicalPlan(); - // Check nondeterministic collect - List nondeterministicFunctionSet = - MaterializedViewUtils.extractNondeterministicFunction(physicalPlan); - Assertions.assertEquals(1, nondeterministicFunctionSet.size()); - Assertions.assertTrue(nondeterministicFunctionSet.get(0) instanceof Random); - // Check get tables - TableCollectorContext collectorContext = new TableCollector.TableCollectorContext( - Sets.newHashSet(TableType.OLAP), true, connectContext); - physicalPlan.accept(TableCollector.INSTANCE, collectorContext); - Set expectedTables = new HashSet<>(); - expectedTables.add("table1"); - expectedTables.add("table2"); - Assertions.assertEquals( - collectorContext.getCollectedTables().stream() - .map(TableIf::getName) - .collect(Collectors.toSet()), - expectedTables); - }); - } - - @Test - public void test2() { - PlanChecker.from(connectContext) - .checkPlannerResult("SELECT view1.*, uuid() FROM view1 " - + "LEFT SEMI JOIN table2 ON view1.c1 = table2.c1 " - + "WHERE view1.c1 IN (SELECT c1 FROM table2) OR view1.c1 < 10", - nereidsPlanner -> { - PhysicalPlan physicalPlan = nereidsPlanner.getPhysicalPlan(); - // Check nondeterministic collect - List nondeterministicFunctionSet = - MaterializedViewUtils.extractNondeterministicFunction(physicalPlan); - Assertions.assertEquals(2, nondeterministicFunctionSet.size()); - Assertions.assertTrue(nondeterministicFunctionSet.get(0) instanceof Uuid); - Assertions.assertTrue(nondeterministicFunctionSet.get(1) instanceof Random); - // Check get tables - TableCollectorContext collectorContext = new TableCollector.TableCollectorContext( - Sets.newHashSet(TableType.OLAP), true, connectContext); - physicalPlan.accept(TableCollector.INSTANCE, collectorContext); - Set expectedTables = new HashSet<>(); - expectedTables.add("table1"); - expectedTables.add("table2"); - Assertions.assertEquals( - collectorContext.getCollectedTables().stream() - .map(TableIf::getName) - .collect(Collectors.toSet()), - expectedTables); - }); - } - - @Test - public void test3() throws Exception { - connectContext.getSessionVariable().setDisableNereidsRules("PRUNE_EMPTY_PARTITION"); - BitSet disableNereidsRules = connectContext.getSessionVariable().getDisableNereidsRules(); - new MockUp() { - @Mock - public BitSet getDisableNereidsRules() { - return disableNereidsRules; - } - }; - PlanChecker.from(connectContext) - .checkPlannerResult("SELECT mv1.*, uuid() FROM mv1 " - + "INNER JOIN view1 on mv1.c1 = view1.c2 " - + "LEFT SEMI JOIN table2 ON mv1.c1 = table2.c1 " - + "WHERE mv1.c1 IN (SELECT c1 FROM table2) OR mv1.c1 < 10", - nereidsPlanner -> { - PhysicalPlan physicalPlan = nereidsPlanner.getPhysicalPlan(); - // Check nondeterministic collect - List nondeterministicFunctionSet = - MaterializedViewUtils.extractNondeterministicFunction(physicalPlan); - Assertions.assertEquals(1, nondeterministicFunctionSet.size()); - Assertions.assertTrue(nondeterministicFunctionSet.get(0) instanceof Uuid); - // Check get tables - TableCollectorContext collectorContext = new TableCollector.TableCollectorContext( - Sets.newHashSet(TableType.OLAP), true, connectContext); - physicalPlan.accept(TableCollector.INSTANCE, collectorContext); - Set expectedTables = new HashSet<>(); - expectedTables.add("table1"); - expectedTables.add("table2"); - expectedTables.add("table3"); - Assertions.assertEquals( - collectorContext.getCollectedTables().stream() - .map(TableIf::getName) - .collect(Collectors.toSet()), - expectedTables); - - TableCollectorContext collectorContextWithNoExpand = - new TableCollector.TableCollectorContext(Sets.newHashSet(TableType.OLAP), - false, connectContext); - physicalPlan.accept(TableCollector.INSTANCE, collectorContextWithNoExpand); - Set expectedTablesWithNoExpand = new HashSet<>(); - expectedTablesWithNoExpand.add("table1"); - expectedTablesWithNoExpand.add("table2"); - Assertions.assertEquals( - collectorContextWithNoExpand.getCollectedTables().stream() - .map(TableIf::getName) - .collect(Collectors.toSet()), - expectedTablesWithNoExpand); - - TableCollectorContext mvCollectorContext = new TableCollector.TableCollectorContext( - Sets.newHashSet(TableType.MATERIALIZED_VIEW), true, connectContext); - physicalPlan.accept(TableCollector.INSTANCE, mvCollectorContext); - Set expectedMvs = new HashSet<>(); - expectedMvs.add("mv1"); - Assertions.assertEquals( - mvCollectorContext.getCollectedTables().stream() - .map(TableIf::getName) - .collect(Collectors.toSet()), - expectedMvs); - - TableCollectorContext mvCollectorContextWithNoExpand = - new TableCollector.TableCollectorContext( - Sets.newHashSet(TableType.MATERIALIZED_VIEW), false, connectContext); - physicalPlan.accept(TableCollector.INSTANCE, mvCollectorContextWithNoExpand); - Set expectedMvsWithNoExpand = new HashSet<>(); - expectedMvsWithNoExpand.add("mv1"); - Assertions.assertEquals( - mvCollectorContextWithNoExpand.getCollectedTables().stream() - .map(TableIf::getName) - .collect(Collectors.toSet()), - expectedMvsWithNoExpand); - - TableCollectorContext allTableTypeWithExpand = - new TableCollector.TableCollectorContext( - Sets.newHashSet(TableType.values()), true, connectContext); - physicalPlan.accept(TableCollector.INSTANCE, allTableTypeWithExpand); - // when collect in plan with expand, should collect table which is expended - Set expectedTablesWithExpand = new HashSet<>(); - expectedTablesWithExpand.add("mv1"); - expectedTablesWithExpand.add("table1"); - expectedTablesWithExpand.add("table2"); - expectedTablesWithExpand.add("table3"); - Assertions.assertEquals( - allTableTypeWithExpand.getCollectedTables().stream() - .map(TableIf::getName) - .collect(Collectors.toSet()), - expectedTablesWithExpand); - }); - dropMvByNereids("drop materialized view mv1"); - } - @Test public void testTimeFunction() { PlanChecker.from(connectContext) diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/PlanChecker.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/PlanChecker.java index f0a45d1e7bc8520..77ecbd5dc7c4dda 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/PlanChecker.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/PlanChecker.java @@ -49,7 +49,6 @@ import org.apache.doris.nereids.rules.RuleFactory; import org.apache.doris.nereids.rules.RuleSet; import org.apache.doris.nereids.rules.RuleType; -import org.apache.doris.nereids.rules.analysis.BindRelation.CustomTableResolver; import org.apache.doris.nereids.rules.exploration.mv.InitMaterializationContextHook; import org.apache.doris.nereids.rules.rewrite.OneRewriteRuleFactory; import org.apache.doris.nereids.trees.plans.GroupPlan; @@ -71,7 +70,6 @@ import java.util.ArrayList; import java.util.List; -import java.util.Optional; import java.util.Set; import java.util.function.Consumer; import java.util.function.Supplier; @@ -147,12 +145,6 @@ public PlanChecker analyze(String sql) { return this; } - public PlanChecker customAnalyzer(Optional customTableResolver) { - this.cascadesContext.newAnalyzer(customTableResolver).analyze(); - this.cascadesContext.toMemo(); - return this; - } - public PlanChecker customRewrite(CustomRewriter customRewriter) { Rewriter.getWholeTreeRewriterWithCustomJobs(cascadesContext, ImmutableList.of(Rewriter.custom(RuleType.TEST_REWRITE, () -> customRewriter))) @@ -277,7 +269,7 @@ public NereidsPlanner plan(String sql) { LogicalPlan parsedPlan = new NereidsParser().parseSingle(sql); LogicalPlanAdapter parsedPlanAdaptor = new LogicalPlanAdapter(parsedPlan, statementContext); statementContext.setParsedStatement(parsedPlanAdaptor); - planner.planWithLock(parsedPlanAdaptor); + planner.plan(parsedPlanAdaptor); return planner; } @@ -576,7 +568,7 @@ public PlanChecker checkExplain(String sql, Consumer consumer) { new StatementContext(connectContext, new OriginStatement(sql, 0))); LogicalPlanAdapter adapter = LogicalPlanAdapter.of(parsed); adapter.setIsExplain(new ExplainOptions(ExplainLevel.ALL_PLAN, false)); - nereidsPlanner.planWithLock(adapter); + nereidsPlanner.plan(adapter); consumer.accept(nereidsPlanner); return this; } @@ -585,7 +577,7 @@ public PlanChecker checkPlannerResult(String sql, Consumer consu LogicalPlan parsed = new NereidsParser().parseSingle(sql); NereidsPlanner nereidsPlanner = new NereidsPlanner( new StatementContext(connectContext, new OriginStatement(sql, 0))); - nereidsPlanner.planWithLock(LogicalPlanAdapter.of(parsed)); + nereidsPlanner.plan(LogicalPlanAdapter.of(parsed)); consumer.accept(nereidsPlanner); return this; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/ReadLockTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/ReadLockTest.java index 1e1535a573610b5..6cd85183b3df796 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/ReadLockTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/ReadLockTest.java @@ -118,7 +118,7 @@ public void testScalarSubQuery() { } @Test - public void testInserInto() { + public void testInsertInto() { String sql = "INSERT INTO supplier(s_suppkey, s_name, s_address, s_city, s_nation, s_region, s_phone) " + "SELECT lo_orderkey, '', '', '', '', '', '' FROM lineorder"; StatementContext statementContext = MemoTestUtils.createStatementContext(connectContext, sql); @@ -129,7 +129,6 @@ public void testInserInto() { PhysicalProperties.ANY ); Map, TableIf> f = statementContext.getTables(); - // when table in insert would not be added to statement context, but be lock when insert Assertions.assertEquals(1, f.size()); Set tableNames = new HashSet<>(); for (Map.Entry, TableIf> entry : f.entrySet()) { @@ -137,5 +136,13 @@ public void testInserInto() { tableNames.add(table.getName()); } Assertions.assertTrue(tableNames.contains("lineorder")); + f = statementContext.getInsertTargetTables(); + Assertions.assertEquals(1, f.size()); + tableNames = new HashSet<>(); + for (Map.Entry, TableIf> entry : f.entrySet()) { + TableIf table = entry.getValue(); + tableNames.add(table.getName()); + } + Assertions.assertTrue(tableNames.contains("supplier")); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/qe/OlapQueryCacheTest.java b/fe/fe-core/src/test/java/org/apache/doris/qe/OlapQueryCacheTest.java index 3c793cfc72090dd..e8b545f3ffe2c11 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/qe/OlapQueryCacheTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/qe/OlapQueryCacheTest.java @@ -502,7 +502,7 @@ private StatementBase parseSqlByNereids(String sql) { ctx.setStatementContext(statementContext); NereidsPlanner nereidsPlanner = new NereidsPlanner(statementContext); LogicalPlanAdapter adapter = new LogicalPlanAdapter(plan, statementContext); - nereidsPlanner.planWithLock(adapter); + nereidsPlanner.plan(adapter); statementContext.setParsedStatement(adapter); stmt = adapter; } catch (Throwable throwable) { From b1ccd3696a906c2d295b5af5e21665674d56a96a Mon Sep 17 00:00:00 2001 From: Xinyi Zou Date: Thu, 19 Dec 2024 19:56:19 +0800 Subject: [PATCH 11/55] [fix](memory) Fix adjust cache capacity (#45603) ### What problem does this PR solve? If the cache capacity adjustment is not completed within 500ms (conf::memory_gc_sleep_time_ms), the next adjustment will be skipped. In some scenarios, after Memory GC adjusts the cache capacity to 0, the next adjustment to restore the cache capacity is skipped, the cache capacity will remain at 0 for a long time. --- be/src/common/daemon.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 12bf1749a5694de..2aaa58f4feb5976 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -230,6 +230,11 @@ void refresh_memory_state_after_memory_change() { } void refresh_cache_capacity() { + if (doris::GlobalMemoryArbitrator::cache_adjust_capacity_notify.load( + std::memory_order_relaxed)) { + // the last cache capacity adjustment has not been completed. + return; + } if (refresh_cache_capacity_sleep_time_ms <= 0) { auto cache_capacity_reduce_mem_limit = int64_t( doris::MemInfo::soft_mem_limit() * config::cache_capacity_reduce_mem_limit_frac); @@ -247,6 +252,8 @@ void refresh_cache_capacity() { new_cache_capacity_adjust_weighted; doris::GlobalMemoryArbitrator::notify_cache_adjust_capacity(); refresh_cache_capacity_sleep_time_ms = config::memory_gc_sleep_time_ms; + } else { + refresh_cache_capacity_sleep_time_ms = 0; } } refresh_cache_capacity_sleep_time_ms -= config::memory_maintenance_sleep_time_ms; From 6cce4087f94b5ef91b7ffcba294b4c5da79f1f14 Mon Sep 17 00:00:00 2001 From: Xinyi Zou Date: Thu, 19 Dec 2024 19:56:38 +0800 Subject: [PATCH 12/55] [fix](memory) Process available memory to increase the Jemalloc cache (#45621) ### What problem does this PR solve? Currently, when the Doris BE process exceed memory limit, Jemalloc cache will be manually released. Add the Jemalloc cache to the available memory of the BE process is expected to have little impact on the risk of the process OOM killer. the process memory used has already subtracted the Jemalloc cache. Not merge to 2.1 because 2.1 is stable now --- be/src/runtime/memory/global_memory_arbitrator.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/be/src/runtime/memory/global_memory_arbitrator.h b/be/src/runtime/memory/global_memory_arbitrator.h index 075113088fbc5b9..a7a85725ab10c98 100644 --- a/be/src/runtime/memory/global_memory_arbitrator.h +++ b/be/src/runtime/memory/global_memory_arbitrator.h @@ -76,7 +76,7 @@ class GlobalMemoryArbitrator { static inline int64_t sys_mem_available() { return MemInfo::_s_sys_mem_available.load(std::memory_order_relaxed) - refresh_interval_memory_growth.load(std::memory_order_relaxed) - - process_reserved_memory(); + process_reserved_memory() + static_cast(MemInfo::allocator_cache_mem()); } static inline std::string sys_mem_available_str() { @@ -91,12 +91,14 @@ class GlobalMemoryArbitrator { static inline std::string sys_mem_available_details_str() { auto msg = fmt::format( "sys available memory {}(= {}[proc/available] - {}[reserved] - " - "{}B[waiting_refresh])", + "{}B[waiting_refresh] + {}[tc/jemalloc_cache])", PrettyPrinter::print(sys_mem_available(), TUnit::BYTES), PrettyPrinter::print(MemInfo::_s_sys_mem_available.load(std::memory_order_relaxed), TUnit::BYTES), PrettyPrinter::print(process_reserved_memory(), TUnit::BYTES), - refresh_interval_memory_growth); + refresh_interval_memory_growth, + PrettyPrinter::print(static_cast(MemInfo::allocator_cache_mem()), + TUnit::BYTES)); #ifdef ADDRESS_SANITIZER msg = "[ASAN]" + msg; #endif From 55c26e03e5f76fe80e6bedd2bc31760e55cd6707 Mon Sep 17 00:00:00 2001 From: linrrarity <142187136+linrrzqqq@users.noreply.github.com> Date: Thu, 19 Dec 2024 20:04:15 +0800 Subject: [PATCH 13/55] [Enhancement](Log) Reduce usage of log fatal(PART I) (#42344) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Proposed changes Issue Number: close #40835 use `throw Exception` to replace them which not in `if constexpr`, and change part of `INTERNAL_ERROR` in this [pr](https://github.com/apache/doris/pull/38144/files)(file `aggregate_function_reader_first_last.h` and `aggregate_function_window.h`) to `FatalError`. for those in `if constexpr else{...}`, use `static_assert` about template argument which used in that judgement to advance them to compile time but there seems to be some bugs with the template parameter instantiation in the files `comparison_predicate.h`, `set_probe_sink_operator.cpp`, `set_sink_operator.cpp`, `comparison_predicate.h`, `in_list_predicate.h` and `set_source_operator.cpp` that I haven't modified yet. --------- Co-authored-by: wyxxxcat <1520358997@qq.com> --- be/src/common/status.h | 11 +++- be/src/gutil/strings/escaping.cc | 5 +- be/src/gutil/strings/numbers.cc | 6 +- be/src/gutil/strings/util.cc | 5 +- .../threading/thread_collision_warner.cc | 8 ++- be/src/io/file_factory.h | 5 +- be/src/olap/block_column_predicate.h | 12 ++-- be/src/olap/data_dir.cpp | 16 +++--- be/src/olap/key_coder.h | 13 ++--- be/src/olap/like_column_predicate.h | 8 +-- be/src/olap/match_predicate.h | 3 +- be/src/olap/null_predicate.h | 4 +- be/src/olap/olap_common.h | 6 +- be/src/olap/page_cache.h | 6 +- be/src/olap/rowset/beta_rowset_writer_v2.h | 3 +- be/src/olap/rowset/rowset_writer.h | 4 +- .../segment_v2/hierarchical_data_reader.cpp | 6 +- be/src/olap/storage_policy.cpp | 6 +- be/src/olap/tablet_reader.cpp | 2 +- be/src/pipeline/dependency.h | 3 +- be/src/pipeline/exec/exchange_sink_buffer.cpp | 3 +- be/src/pipeline/exec/exchange_sink_buffer.h | 5 +- be/src/pipeline/exec/hashjoin_build_sink.cpp | 4 +- be/src/pipeline/exec/operator.cpp | 3 +- be/src/pipeline/exec/operator.h | 6 +- be/src/runtime/exec_env_init.cpp | 10 ++-- be/src/runtime/jsonb_value.h | 33 ++++------- be/src/runtime/memory/cache_manager.h | 3 +- be/src/runtime/memory/cache_policy.h | 6 +- be/src/runtime/memory/lru_cache_policy.h | 3 +- .../runtime/memory/thread_mem_tracker_mgr.h | 4 +- be/src/runtime/snapshot_loader.cpp | 5 +- .../stream_load/stream_load_executor.cpp | 3 +- be/src/runtime/thread_context.h | 7 +-- be/src/util/binary_cast.hpp | 3 +- be/src/util/bit_util.h | 4 +- be/src/util/bitmap_value.h | 3 +- be/src/util/block_compression.cpp | 3 +- be/src/util/easy_json.cc | 5 +- be/src/util/jsonb_utils.h | 4 +- be/src/util/rle_encoding.h | 2 +- be/src/util/threadpool.cpp | 11 ++-- be/src/util/timezone_utils.cpp | 4 +- .../aggregate_function_map.h | 5 +- .../aggregate_function_reader_first_last.h | 17 ++---- .../aggregate_function_window.h | 19 +++---- be/src/vec/columns/column_string.cpp | 10 ++-- be/src/vec/common/assert_cast.h | 19 +++---- .../vec/common/hash_table/string_hash_table.h | 3 +- be/src/vec/common/schema_util.cpp | 5 +- be/src/vec/core/block.cpp | 4 +- be/src/vec/core/decimal_comparison.h | 23 +++----- be/src/vec/core/field.h | 56 ++++++++----------- be/src/vec/core/types.h | 3 +- .../vec/data_types/data_type_number_base.cpp | 3 +- be/src/vec/data_types/data_type_number_base.h | 3 +- be/src/vec/data_types/serde/data_type_serde.h | 5 +- .../exec/format/parquet/bool_rle_decoder.cpp | 9 +-- be/src/vec/exec/format/parquet/decoder.h | 4 +- .../format/parquet/delta_bit_pack_decoder.h | 10 ++-- .../format/parquet/parquet_column_convert.h | 9 +-- .../format/parquet/vparquet_column_reader.h | 4 +- be/src/vec/exec/jni_connector.cpp | 4 +- be/src/vec/exec/scan/split_source_connector.h | 6 +- be/src/vec/exprs/vexpr.h | 12 ++-- .../functions/array/function_array_apply.cpp | 4 +- be/src/vec/functions/function_cast.h | 12 +++- be/src/vec/json/simd_json_parser.h | 4 +- be/src/vec/olap/olap_data_convertor.h | 6 +- be/src/vec/runtime/vdatetime_value.cpp | 6 +- be/test/util/threadpool_test.cpp | 1 + 71 files changed, 250 insertions(+), 282 deletions(-) diff --git a/be/src/common/status.h b/be/src/common/status.h index d059f289402ceab..0252ec8564feebc 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -293,7 +293,8 @@ namespace ErrorCode { E(ENTRY_NOT_FOUND, -7002, false); \ E(INVALID_TABLET_STATE, -7211, false); \ E(ROWSETS_EXPIRED, -7311, false); \ - E(CGROUP_ERROR, -7411, false); + E(CGROUP_ERROR, -7411, false); \ + E(FATAL_ERROR, -7412, false); // Define constexpr int error_code_name = error_code_value #define M(NAME, ERRORCODE, ENABLESTACKTRACE) constexpr int NAME = ERRORCODE; @@ -446,6 +447,14 @@ class [[nodiscard]] Status { static Status OK() { return {}; } + template + static Status FatalError(std::string_view msg, Args&&... args) { +#ifndef NDEBUG + LOG(FATAL) << fmt::format(msg, std::forward(args)...); +#endif + return Error(msg, std::forward(args)...); + } + // default have stacktrace. could disable manually. #define ERROR_CTOR(name, code) \ template \ diff --git a/be/src/gutil/strings/escaping.cc b/be/src/gutil/strings/escaping.cc index 2ff59104f6d5cec..c6ba8e2f9c375e9 100644 --- a/be/src/gutil/strings/escaping.cc +++ b/be/src/gutil/strings/escaping.cc @@ -10,6 +10,8 @@ #include #include +#include "common/exception.h" + using std::numeric_limits; #include @@ -1084,7 +1086,8 @@ int Base64UnescapeInternal(const char* src, int szsrc, char* dest, int szdest, default: // state should have no other values at this point. - LOG(FATAL) << "This can't happen; base64 decoder state = " << state; + throw doris::Exception( + doris::Status::FatalError("This can't happen; base64 decoder state = {}", state)); } // The remainder of the string should be all whitespace, mixed with diff --git a/be/src/gutil/strings/numbers.cc b/be/src/gutil/strings/numbers.cc index f471bf31bd08bb1..f044ea08d315511 100644 --- a/be/src/gutil/strings/numbers.cc +++ b/be/src/gutil/strings/numbers.cc @@ -19,6 +19,8 @@ #include #include +#include "common/exception.h" + using std::numeric_limits; #include @@ -772,8 +774,8 @@ uint64 atoi_kmgt(const char* s) { scale = GG_ULONGLONG(1) << 40; break; default: - LOG(FATAL) << "Invalid mnemonic: `" << c << "';" - << " should be one of `K', `M', `G', and `T'."; + throw doris::Exception(doris::Status::FatalError( + "Invalid mnemonic: `{}'; should be one of `K', `M', `G', and `T'.", c)); } } return n * scale; diff --git a/be/src/gutil/strings/util.cc b/be/src/gutil/strings/util.cc index 80d5d463430c775..37c09d63b24fffe 100644 --- a/be/src/gutil/strings/util.cc +++ b/be/src/gutil/strings/util.cc @@ -19,6 +19,8 @@ #include #include +#include "common/exception.h" + using std::copy; using std::max; using std::min; @@ -489,8 +491,7 @@ const char* strstr_delimited(const char* haystack, const char* needle, char deli ++haystack; } } - LOG(FATAL) << "Unreachable statement"; - return nullptr; + throw doris::Exception(doris::Status::FatalError("Unreachable statement")); } // ---------------------------------------------------------------------- diff --git a/be/src/gutil/threading/thread_collision_warner.cc b/be/src/gutil/threading/thread_collision_warner.cc index d2f1e47f8e02d94..fd51a9195d629ee 100644 --- a/be/src/gutil/threading/thread_collision_warner.cc +++ b/be/src/gutil/threading/thread_collision_warner.cc @@ -4,6 +4,9 @@ #include "gutil/threading/thread_collision_warner.h" +#include "common/exception.h" +#include "common/status.h" + #ifdef __linux__ #include #else @@ -19,8 +22,9 @@ namespace base { void DCheckAsserter::warn(int64_t previous_thread_id, int64_t current_thread_id) { - LOG(FATAL) << "Thread Collision! Previous thread id: " << previous_thread_id - << ", current thread id: " << current_thread_id; + throw doris::Exception(doris::Status::FatalError( + "Thread Collision! Previous thread id: {}, current thread id: {}", previous_thread_id, + current_thread_id)); } static subtle::Atomic64 CurrentThread() { diff --git a/be/src/io/file_factory.h b/be/src/io/file_factory.h index 9d9d714812ffe90..afa54e221664c94 100644 --- a/be/src/io/file_factory.h +++ b/be/src/io/file_factory.h @@ -118,10 +118,9 @@ class FileFactory { case TStorageBackendType::HDFS: return TFileType::FILE_HDFS; default: - LOG(FATAL) << "not match type to convert, from type:" << type; + throw Exception(Status::FatalError("not match type to convert, from type:{}", type)); } - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw Exception(Status::FatalError("__builtin_unreachable")); } }; diff --git a/be/src/olap/block_column_predicate.h b/be/src/olap/block_column_predicate.h index eed5e18329acf75..b6ff115c34c72d1 100644 --- a/be/src/olap/block_column_predicate.h +++ b/be/src/olap/block_column_predicate.h @@ -74,25 +74,21 @@ class BlockColumnPredicate { } virtual bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const { - LOG(FATAL) << "should not reach here"; - return true; + throw Exception(Status::FatalError("should not reach here")); } virtual bool support_zonemap() const { return true; } virtual bool evaluate_and(const std::pair& statistic) const { - LOG(FATAL) << "should not reach here"; - return true; + throw Exception(Status::FatalError("should not reach here")); } virtual bool evaluate_and(const segment_v2::BloomFilter* bf) const { - LOG(FATAL) << "should not reach here"; - return true; + throw Exception(Status::FatalError("should not reach here")); } virtual bool evaluate_and(const StringRef* dict_words, const size_t dict_num) const { - LOG(FATAL) << "should not reach here"; - return true; + throw Exception(Status::FatalError("should not reach here")); } virtual bool can_do_bloom_filter(bool ngram) const { return false; } diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 4070bd1dd4340e3..4aa215e0c2eb16a 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -316,10 +316,10 @@ Status DataDir::_check_incompatible_old_format_tablet() { std::string_view value) -> bool { // if strict check incompatible old format, then log fatal if (config::storage_strict_check_incompatible_old_format) { - LOG(FATAL) - << "There are incompatible old format metas, current version does not support " - << "and it may lead to data missing!!! " - << "tablet_id = " << tablet_id << " schema_hash = " << schema_hash; + throw Exception(Status::FatalError( + "There are incompatible old format metas, current version does not support and " + "it may lead to data missing!!! tablet_id = {} schema_hash = {}", + tablet_id, schema_hash)); } else { LOG(WARNING) << "There are incompatible old format metas, current version does not support " @@ -451,7 +451,8 @@ Status DataDir::load() { << ", loaded tablet: " << tablet_ids.size() << ", error tablet: " << failed_tablet_ids.size() << ", path: " << _path; if (!config::ignore_load_tablet_failure) { - LOG(FATAL) << "load tablets encounter failure. stop BE process. path: " << _path; + throw Exception(Status::FatalError( + "load tablets encounter failure. stop BE process. path: {}", _path)); } } if (!load_tablet_status) { @@ -495,10 +496,9 @@ Status DataDir::load() { } } if (rowset_partition_id_eq_0_num > config::ignore_invalid_partition_id_rowset_num) { - LOG(FATAL) << fmt::format( + throw Exception(Status::FatalError( "roswet partition id eq 0 is {} bigger than config {}, be exit, plz check be.INFO", - rowset_partition_id_eq_0_num, config::ignore_invalid_partition_id_rowset_num); - exit(-1); + rowset_partition_id_eq_0_num, config::ignore_invalid_partition_id_rowset_num)); } // traverse rowset diff --git a/be/src/olap/key_coder.h b/be/src/olap/key_coder.h index 6885a0d96f251bc..549ac53656b647d 100644 --- a/be/src/olap/key_coder.h +++ b/be/src/olap/key_coder.h @@ -109,8 +109,8 @@ class KeyCoderTraits< case 16: return BigEndian::FromHost128(val); default: - LOG(FATAL) << "Invalid type to big endian, type=" << int(field_type) - << ", size=" << sizeof(UnsignedCppType); + throw Exception(Status::FatalError("Invalid type to big endian, type={}, size={}", + int(field_type), sizeof(UnsignedCppType))); } } } @@ -300,8 +300,7 @@ class KeyCoderTraits { } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { - LOG(FATAL) << "decode_ascending is not implemented"; - return Status::OK(); + throw Exception(Status::FatalError("decode_ascending is not implemented")); } }; @@ -320,8 +319,7 @@ class KeyCoderTraits { } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { - LOG(FATAL) << "decode_ascending is not implemented"; - return Status::OK(); + throw Exception(Status::FatalError("decode_ascending is not implemented")); } }; @@ -340,8 +338,7 @@ class KeyCoderTraits { } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { - LOG(FATAL) << "decode_ascending is not implemented"; - return Status::OK(); + throw Exception(Status::FatalError("decode_ascending is not implemented")); } }; diff --git a/be/src/olap/like_column_predicate.h b/be/src/olap/like_column_predicate.h index 31763d45f7edc72..e0d185c7bd3e986 100644 --- a/be/src/olap/like_column_predicate.h +++ b/be/src/olap/like_column_predicate.h @@ -128,8 +128,8 @@ class LikeColumnPredicate : public ColumnPredicate { } } } else { - LOG(FATAL) << "vectorized (not) like predicates should be dict column"; - __builtin_unreachable(); + throw Exception(Status::FatalError( + "vectorized (not) like predicates should be dict column")); } } else { if (column.is_column_dictionary()) { @@ -153,8 +153,8 @@ class LikeColumnPredicate : public ColumnPredicate { } } } else { - LOG(FATAL) << "vectorized (not) like predicates should be dict column"; - __builtin_unreachable(); + throw Exception(Status::FatalError( + "vectorized (not) like predicates should be dict column")); } } } diff --git a/be/src/olap/match_predicate.h b/be/src/olap/match_predicate.h index ad202b7b2427cf4..3ff1775fd8882a3 100644 --- a/be/src/olap/match_predicate.h +++ b/be/src/olap/match_predicate.h @@ -55,8 +55,7 @@ class MatchPredicate : public ColumnPredicate { //evaluate predicate on Bitmap Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows, roaring::Roaring* roaring) const override { - LOG(FATAL) << "Not Implemented MatchPredicate::evaluate"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Not Implemented MatchPredicate::evaluate")); } //evaluate predicate on inverted diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h index 59480264b461037..8e3fef1ff276956 100644 --- a/be/src/olap/null_predicate.h +++ b/be/src/olap/null_predicate.h @@ -87,8 +87,8 @@ class NullPredicate : public ColumnPredicate { if (_is_null) { return bf->test_bytes(nullptr, 0); } else { - LOG(FATAL) << "Bloom filter is not supported by predicate type: is_null=" << _is_null; - return true; + throw Exception(Status::FatalError( + "Bloom filter is not supported by predicate type: is_null=")); } } diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index 11249bafb1e3c07..3b892e5d360e541 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -36,6 +36,7 @@ #include #include "common/config.h" +#include "common/exception.h" #include "io/io_common.h" #include "olap/olap_define.h" #include "olap/rowset/rowset_fwd.h" @@ -419,7 +420,8 @@ struct RowsetId { LOG(WARNING) << "failed to init rowset id: " << rowset_id_str; high = next_rowset_id().hi; } else { - LOG(FATAL) << "failed to init rowset id: " << rowset_id_str; + throw Exception( + Status::FatalError("failed to init rowset id: {}", rowset_id_str)); } } init(1, high, 0, 0); @@ -440,7 +442,7 @@ struct RowsetId { void init(int64_t id_version, int64_t high, int64_t middle, int64_t low) { version = id_version; if (UNLIKELY(high >= MAX_ROWSET_ID)) { - LOG(FATAL) << "inc rowsetid is too large:" << high; + throw Exception(Status::FatalError("inc rowsetid is too large:{}", high)); } hi = (id_version << 56) + (high & LOW_56_BITS); mi = middle; diff --git a/be/src/olap/page_cache.h b/be/src/olap/page_cache.h index 32b6683e7823b04..db1a6808345525d 100644 --- a/be/src/olap/page_cache.h +++ b/be/src/olap/page_cache.h @@ -176,11 +176,9 @@ class StoragePageCache { return _pk_index_page_cache.get(); } default: - LOG(FATAL) << "get error type page cache"; - __builtin_unreachable(); + throw Exception(Status::FatalError("get error type page cache")); } - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw Exception(Status::FatalError("__builtin_unreachable")); } }; diff --git a/be/src/olap/rowset/beta_rowset_writer_v2.h b/be/src/olap/rowset/beta_rowset_writer_v2.h index 78ec4a7dce703c4..9040003a68d0d8c 100644 --- a/be/src/olap/rowset/beta_rowset_writer_v2.h +++ b/be/src/olap/rowset/beta_rowset_writer_v2.h @@ -99,8 +99,7 @@ class BetaRowsetWriterV2 : public RowsetWriter { }; RowsetSharedPtr manual_build(const RowsetMetaSharedPtr& rowset_meta) override { - LOG(FATAL) << "not implemeted"; - return nullptr; + throw Exception(Status::FatalError("not implemeted")); } PUniqueId load_id() override { return _context.load_id; } diff --git a/be/src/olap/rowset/rowset_writer.h b/be/src/olap/rowset/rowset_writer.h index f84ff964ea30516..0a0d36ea04a6612 100644 --- a/be/src/olap/rowset/rowset_writer.h +++ b/be/src/olap/rowset/rowset_writer.h @@ -170,7 +170,9 @@ class RowsetWriter { virtual int32_t allocate_segment_id() = 0; - virtual void set_segment_start_id(int num_segment) { LOG(FATAL) << "not supported!"; } + virtual void set_segment_start_id(int num_segment) { + throw Exception(Status::FatalError("not supported!")); + } virtual int64_t delete_bitmap_ns() { return 0; } diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp index db6bac6b8b4c096..fe7167e9444a76a 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp @@ -80,8 +80,7 @@ Status HierarchicalDataReader::init(const ColumnIteratorOptions& opts) { } Status HierarchicalDataReader::seek_to_first() { - LOG(FATAL) << "Not implemented"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Not implemented")); } Status HierarchicalDataReader::seek_to_ordinal(ordinal_t ord) { @@ -159,8 +158,7 @@ Status ExtractReader::init(const ColumnIteratorOptions& opts) { } Status ExtractReader::seek_to_first() { - LOG(FATAL) << "Not implemented"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Not implemented")); } Status ExtractReader::seek_to_ordinal(ordinal_t ord) { diff --git a/be/src/olap/storage_policy.cpp b/be/src/olap/storage_policy.cpp index 837e9bed178e3a9..3b4a1f1a185678c 100644 --- a/be/src/olap/storage_policy.cpp +++ b/be/src/olap/storage_policy.cpp @@ -141,8 +141,10 @@ std::vector> get_storage_resource_ids() { namespace { [[noreturn]] void exit_at_unknown_path_version(std::string_view resource_id, int64_t path_version) { - LOG(FATAL) << "unknown path version, please upgrade BE or drop this storage vault. resource_id=" - << resource_id << " path_version=" << path_version; + throw Exception( + Status::FatalError("unknown path version, please upgrade BE or drop this storage " + "vault. resource_id={} path_version={}", + resource_id, path_version)); } } // namespace diff --git a/be/src/olap/tablet_reader.cpp b/be/src/olap/tablet_reader.cpp index a83e0bfdbf4c30d..17cab2a3c0c8345 100644 --- a/be/src/olap/tablet_reader.cpp +++ b/be/src/olap/tablet_reader.cpp @@ -61,7 +61,7 @@ using namespace ErrorCode; void TabletReader::ReaderParams::check_validation() const { if (UNLIKELY(version.first == -1 && is_segcompaction == false)) { - LOG(FATAL) << "version is not set. tablet=" << tablet->tablet_id(); + throw Exception(Status::FatalError("version is not set. tablet={}", tablet->tablet_id())); } } diff --git a/be/src/pipeline/dependency.h b/be/src/pipeline/dependency.h index f1cfe2b02977e12..ecbd49a5647c2e4 100644 --- a/be/src/pipeline/dependency.h +++ b/be/src/pipeline/dependency.h @@ -723,8 +723,7 @@ inline std::string get_exchange_type_name(ExchangeType idx) { case ExchangeType::LOCAL_MERGE_SORT: return "LOCAL_MERGE_SORT"; } - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw Exception(Status::FatalError("__builtin_unreachable")); } struct DataDistribution { diff --git a/be/src/pipeline/exec/exchange_sink_buffer.cpp b/be/src/pipeline/exec/exchange_sink_buffer.cpp index e3f895444d4168c..800ef6150738d61 100644 --- a/be/src/pipeline/exec/exchange_sink_buffer.cpp +++ b/be/src/pipeline/exec/exchange_sink_buffer.cpp @@ -422,8 +422,7 @@ void ExchangeSinkBuffer::_ended(InstanceLoId id) { } LOG(INFO) << ss.str(); - LOG(FATAL) << "not find the instance id"; - __builtin_unreachable(); + throw Exception(Status::FatalError("not find the instance id")); } else { std::unique_lock lock(*_instance_to_package_queue_mutex[id]); _running_sink_count[id]--; diff --git a/be/src/pipeline/exec/exchange_sink_buffer.h b/be/src/pipeline/exec/exchange_sink_buffer.h index 458c7c3f66e3eec..a381c5aff144f3e 100644 --- a/be/src/pipeline/exec/exchange_sink_buffer.h +++ b/be/src/pipeline/exec/exchange_sink_buffer.h @@ -155,10 +155,9 @@ class ExchangeSendCallback : public ::doris::DummyBrpcCallback { start_rpc_time); } } catch (const std::exception& exp) { - LOG(FATAL) << "brpc callback error: " << exp.what(); + throw Exception(Status::FatalError("brpc callback error: {}", exp.what())); } catch (...) { - LOG(FATAL) << "brpc callback error."; - __builtin_unreachable(); + throw Exception(Status::FatalError("brpc callback error.")); } } int64_t start_rpc_time; diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp index 19e8493e596a7e8..47560875b51252b 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.cpp +++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp @@ -303,9 +303,7 @@ Status HashJoinBuildSinkLocalState::process_build_block(RuntimeState* state, [&](std::monostate& arg, auto join_op, auto short_circuit_for_null_in_build_side, auto with_other_conjuncts) -> Status { - LOG(FATAL) << "FATAL: uninited hash table"; - __builtin_unreachable(); - return Status::OK(); + throw Exception(Status::FatalError("FATAL: uninited hash table")); }, [&](auto&& arg, auto&& join_op, auto short_circuit_for_null_in_build_side, auto with_other_conjuncts) -> Status { diff --git a/be/src/pipeline/exec/operator.cpp b/be/src/pipeline/exec/operator.cpp index f6664e147a3dab5..bb254aae72b8a74 100644 --- a/be/src/pipeline/exec/operator.cpp +++ b/be/src/pipeline/exec/operator.cpp @@ -414,8 +414,7 @@ std::shared_ptr DataSinkOperatorX::create_shar return nullptr; } else if constexpr (std::is_same_v) { - LOG(FATAL) << "should not reach here!"; - return nullptr; + throw Exception(Status::FatalError("should not reach here!")); } else { auto ss = LocalStateType::SharedStateType::create_shared(); ss->id = operator_id(); diff --git a/be/src/pipeline/exec/operator.h b/be/src/pipeline/exec/operator.h index a2c8e110cedac3c..df6e9c913b6b4c3 100644 --- a/be/src/pipeline/exec/operator.h +++ b/be/src/pipeline/exec/operator.h @@ -632,12 +632,10 @@ class OperatorXBase : public OperatorBase { _limit(-1) {} virtual Status init(const TPlanNode& tnode, RuntimeState* state); Status init(const TDataSink& tsink) override { - LOG(FATAL) << "should not reach here!"; - return Status::OK(); + throw Exception(Status::FatalError("should not reach here!")); } virtual Status init(ExchangeType type) { - LOG(FATAL) << "should not reach here!"; - return Status::OK(); + throw Exception(Status::FatalError("should not reach here!")); } [[noreturn]] virtual const std::vector& runtime_filter_descs() { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, _op_name); diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index a371cdb947ff56d..2d7554e702969f6 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -421,9 +421,9 @@ void ExecEnv::init_file_cache_factory(std::vector& cache_paths std::unordered_set cache_path_set; Status rest = doris::parse_conf_cache_paths(doris::config::file_cache_path, cache_paths); if (!rest) { - LOG(FATAL) << "parse config file cache path failed, path=" << doris::config::file_cache_path - << ", reason=" << rest.msg(); - exit(-1); + throw Exception( + Status::FatalError("parse config file cache path failed, path={}, reason={}", + doris::config::file_cache_path, rest.msg())); } doris::Status cache_status; @@ -437,8 +437,8 @@ void ExecEnv::init_file_cache_factory(std::vector& cache_paths cache_path.path, cache_path.init_settings()); if (!cache_status.ok()) { if (!doris::config::ignore_broken_disk) { - LOG(FATAL) << "failed to init file cache, err: " << cache_status; - exit(-1); + throw Exception( + Status::FatalError("failed to init file cache, err: {}", cache_status)); } LOG(WARNING) << "failed to init file cache, err: " << cache_status; } diff --git a/be/src/runtime/jsonb_value.h b/be/src/runtime/jsonb_value.h index 65f4927759c3047..5f530db1ac81170 100644 --- a/be/src/runtime/jsonb_value.h +++ b/be/src/runtime/jsonb_value.h @@ -61,58 +61,47 @@ struct JsonBinaryValue { } bool operator==(const JsonBinaryValue& other) const { - LOG(FATAL) << "comparing between JsonBinaryValue is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonBinaryValue is not supported")); } // != bool ne(const JsonBinaryValue& other) const { - LOG(FATAL) << "comparing between JsonBinaryValue is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonBinaryValue is not supported")); } // <= bool le(const JsonBinaryValue& other) const { - LOG(FATAL) << "comparing between JsonBinaryValue is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonBinaryValue is not supported")); } // >= bool ge(const JsonBinaryValue& other) const { - LOG(FATAL) << "comparing between JsonBinaryValue is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonBinaryValue is not supported")); } // < bool lt(const JsonBinaryValue& other) const { - LOG(FATAL) << "comparing between JsonBinaryValue is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonBinaryValue is not supported")); } // > bool gt(const JsonBinaryValue& other) const { - LOG(FATAL) << "comparing between JsonBinaryValue is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonBinaryValue is not supported")); } bool operator!=(const JsonBinaryValue& other) const { - LOG(FATAL) << "comparing between JsonBinaryValue is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonBinaryValue is not supported")); } bool operator<=(const JsonBinaryValue& other) const { - LOG(FATAL) << "comparing between JsonBinaryValue is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonBinaryValue is not supported")); } bool operator>=(const JsonBinaryValue& other) const { - LOG(FATAL) << "comparing between JsonBinaryValue is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonBinaryValue is not supported")); } bool operator<(const JsonBinaryValue& other) const { - LOG(FATAL) << "comparing between JsonBinaryValue is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonBinaryValue is not supported")); } bool operator>(const JsonBinaryValue& other) const { - LOG(FATAL) << "comparing between JsonBinaryValue is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonBinaryValue is not supported")); } Status from_json_string(const char* s, size_t len); diff --git a/be/src/runtime/memory/cache_manager.h b/be/src/runtime/memory/cache_manager.h index a2a089b929dbdf9..1e89e957ba1ce6a 100644 --- a/be/src/runtime/memory/cache_manager.h +++ b/be/src/runtime/memory/cache_manager.h @@ -40,7 +40,8 @@ class CacheManager { #ifdef BE_TEST _caches.erase(it); #else - LOG(FATAL) << "Repeat register cache " << CachePolicy::type_string(cache->type()); + throw Exception(Status::FatalError("Repeat register cache {}", + CachePolicy::type_string(cache->type()))); #endif // BE_TEST } _caches.insert({cache->type(), cache}); diff --git a/be/src/runtime/memory/cache_policy.h b/be/src/runtime/memory/cache_policy.h index 8f077a4eb45bb1f..72e61fed2e00131 100644 --- a/be/src/runtime/memory/cache_policy.h +++ b/be/src/runtime/memory/cache_policy.h @@ -99,10 +99,10 @@ class CachePolicy { case CacheType::TABLET_COLUMN_OBJECT_POOL: return "TabletColumnObjectPool"; default: - LOG(FATAL) << "not match type of cache policy :" << static_cast(type); + throw Exception(Status::FatalError("not match type of cache policy :{}", + static_cast(type))); } - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw Exception(Status::FatalError("__builtin_unreachable")); } inline static std::unordered_map StringToType = { diff --git a/be/src/runtime/memory/lru_cache_policy.h b/be/src/runtime/memory/lru_cache_policy.h index 3fdb43facd77159..d4c282dab8274e5 100644 --- a/be/src/runtime/memory/lru_cache_policy.h +++ b/be/src/runtime/memory/lru_cache_policy.h @@ -90,7 +90,8 @@ class LRUCachePolicy : public CachePolicy { case LRUCacheType::NUMBER: return "number"; default: - LOG(FATAL) << "not match type of lru cache:" << static_cast(type); + throw Exception( + Status::FatalError("not match type of lru cache:{}", static_cast(type))); } } diff --git a/be/src/runtime/memory/thread_mem_tracker_mgr.h b/be/src/runtime/memory/thread_mem_tracker_mgr.h index db3b32a6298820d..9dbf4399492d02c 100644 --- a/be/src/runtime/memory/thread_mem_tracker_mgr.h +++ b/be/src/runtime/memory/thread_mem_tracker_mgr.h @@ -246,13 +246,13 @@ inline void ThreadMemTrackerMgr::consume(int64_t size, int skip_large_memory_che } if (doris::config::crash_in_alloc_large_memory_bytes > 0 && size > doris::config::crash_in_alloc_large_memory_bytes) { - LOG(FATAL) << fmt::format( + throw Exception(Status::FatalError( "alloc large memory: {}, {}, crash generate core dumpsto help analyze, " "stacktrace:\n{}", size, is_attach_query() ? "in query or load: " + print_id(_query_id) : "not in query or load", - get_stack_trace()); + get_stack_trace())); } } } diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index 784904c78a3fb17..b492a929fca3bff 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -74,7 +74,7 @@ Status upload_with_checksum(io::RemoteFileSystem& fs, std::string_view local_pat RETURN_IF_ERROR(fs.upload(local_path, full_remote_path)); break; default: - LOG(FATAL) << "unknown fs type: " << static_cast(fs.type()); + throw Exception(Status::FatalError("unknown fs type: {}", static_cast(fs.type()))); } return Status::OK(); } @@ -807,8 +807,7 @@ Status SnapshotLoader::move(const std::string& snapshot_path, TabletSharedPtr ta } } else { - LOG(FATAL) << "only support overwrite now"; - __builtin_unreachable(); + throw Exception(Status::FatalError("only support overwrite now")); } // snapshot loader not need to change tablet uid diff --git a/be/src/runtime/stream_load/stream_load_executor.cpp b/be/src/runtime/stream_load/stream_load_executor.cpp index ad4d22946f1b837..054de96a8814256 100644 --- a/be/src/runtime/stream_load/stream_load_executor.cpp +++ b/be/src/runtime/stream_load/stream_load_executor.cpp @@ -390,8 +390,7 @@ bool StreamLoadExecutor::collect_load_stat(StreamLoadContext* ctx, TTxnCommitAtt } switch (ctx->load_type) { case TLoadType::MINI_LOAD: { - LOG(FATAL) << "mini load is not supported any more"; - break; + throw Exception(Status::FatalError("mini load is not supported any more")); } case TLoadType::ROUTINE_LOAD: { attach->loadType = TLoadType::ROUTINE_LOAD; diff --git a/be/src/runtime/thread_context.h b/be/src/runtime/thread_context.h index e0a44af69c1d663..9ba7949ec5afad2 100644 --- a/be/src/runtime/thread_context.h +++ b/be/src/runtime/thread_context.h @@ -354,8 +354,7 @@ class ThreadLocalHandle { DCHECK(bthread_context != nullptr); bthread_context->thread_local_handle_count--; } else { - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw Exception(Status::FatalError("__builtin_unreachable")); } } }; @@ -379,8 +378,8 @@ static ThreadContext* thread_context(bool allow_return_null = false) { return nullptr; } // It means that use thread_context() but this thread not attached a query/load using SCOPED_ATTACH_TASK macro. - LOG(FATAL) << "__builtin_unreachable, " << doris::memory_orphan_check_msg; - __builtin_unreachable(); + throw Exception( + Status::FatalError("__builtin_unreachable, {}", doris::memory_orphan_check_msg)); } // belong to one query object member, not be shared by multiple queries. diff --git a/be/src/util/binary_cast.hpp b/be/src/util/binary_cast.hpp index 8a91ab3a5791520..e7c62ad45ac091a 100644 --- a/be/src/util/binary_cast.hpp +++ b/be/src/util/binary_cast.hpp @@ -137,8 +137,7 @@ To binary_cast(From from) { conv.decimal = from; return conv.i128; } else { - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw Exception(Status::FatalError("__builtin_unreachable")); } } diff --git a/be/src/util/bit_util.h b/be/src/util/bit_util.h index 504b0b274281906..5ec5a8bf8e1aa41 100644 --- a/be/src/util/bit_util.h +++ b/be/src/util/bit_util.h @@ -237,9 +237,7 @@ class BitUtil { } else if constexpr (std::is_same_v) { return value; } else { - __builtin_unreachable(); - LOG(FATAL) << "__builtin_unreachable"; - return value; + throw Exception(Status::FatalError("__builtin_unreachable")); } } diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index 2d15ac99611274f..528dbe407882295 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -2519,8 +2519,7 @@ class BitmapValueIterator { } break; case BitmapValue::BitmapDataType::SET: { - LOG(FATAL) << "BitmapValue with set do not support move"; - break; + throw Exception(Status::FatalError("BitmapValue with set do not support move")); } default: break; diff --git a/be/src/util/block_compression.cpp b/be/src/util/block_compression.cpp index d1788b0948a6f2c..7a0aacd4252dec4 100644 --- a/be/src/util/block_compression.cpp +++ b/be/src/util/block_compression.cpp @@ -233,7 +233,8 @@ class HadoopLz4BlockCompression : public Lz4BlockCompression { HadoopLz4BlockCompression() { Status st = Decompressor::create_decompressor(CompressType::LZ4BLOCK, &_decompressor); if (!st.ok()) { - LOG(FATAL) << "HadoopLz4BlockCompression construction failed. status = " << st << "\n"; + throw Exception(Status::FatalError( + "HadoopLz4BlockCompression construction failed. status = {}", st)); } } diff --git a/be/src/util/easy_json.cc b/be/src/util/easy_json.cc index 46c3a1867f7b421..fcb8021e3836b2b 100644 --- a/be/src/util/easy_json.cc +++ b/be/src/util/easy_json.cc @@ -27,6 +27,8 @@ #include #include #include + +#include "common/exception.h" // IWYU pragma: no_include using rapidjson::SizeType; @@ -200,8 +202,7 @@ EasyJson EasyJson::PushBack(EasyJson::ComplexTypeInitializer val) { } else if (val == kArray) { push_val.SetArray(); } else { - LOG(FATAL) << "Unknown initializer type"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Unknown initializer type")); } value_->PushBack(push_val, alloc_->allocator()); return EasyJson(&(*value_)[value_->Size() - 1], alloc_); diff --git a/be/src/util/jsonb_utils.h b/be/src/util/jsonb_utils.h index 7dba0dca3af1eb2..8ec842ef227dd5b 100644 --- a/be/src/util/jsonb_utils.h +++ b/be/src/util/jsonb_utils.h @@ -23,6 +23,7 @@ #include +#include "common/exception.h" #include "jsonb_document.h" #include "jsonb_stream.h" #include "jsonb_writer.h" @@ -42,7 +43,8 @@ class JsonbToJson { const std::string to_json_string(const char* data, size_t size) { JsonbDocument* pdoc = doris::JsonbDocument::createDocument(data, size); if (!pdoc) { - LOG(FATAL) << "invalid json binary value: " << std::string_view(data, size); + throw Exception(Status::FatalError("invalid json binary value: {}", + std::string_view(data, size))); } return to_json_string(pdoc->getValue()); } diff --git a/be/src/util/rle_encoding.h b/be/src/util/rle_encoding.h index 206349b472815d6..5369ace9eed6ce5 100644 --- a/be/src/util/rle_encoding.h +++ b/be/src/util/rle_encoding.h @@ -283,7 +283,7 @@ void RleDecoder::RewindOne() { switch (rewind_state_) { case CANT_REWIND: - LOG(FATAL) << "Can't rewind more than once after each read!"; + throw Exception(Status::FatalError("Can't rewind more than once after each read!")); break; case REWIND_RUN: ++repeat_count_; diff --git a/be/src/util/threadpool.cpp b/be/src/util/threadpool.cpp index f5ea38515def363..e9af13f556e1436 100644 --- a/be/src/util/threadpool.cpp +++ b/be/src/util/threadpool.cpp @@ -27,6 +27,7 @@ #include #include +#include "common/exception.h" #include "common/logging.h" #include "gutil/map-util.h" #include "gutil/port.h" @@ -194,7 +195,7 @@ void ThreadPoolToken::transition(State new_state) { CHECK(false); // QUIESCED is a terminal state break; default: - LOG(FATAL) << "Unknown token state: " << _state; + throw Exception(Status::FatalError("Unknown token state: {}", _state)); } #endif @@ -616,10 +617,10 @@ Status ThreadPool::create_thread() { void ThreadPool::check_not_pool_thread_unlocked() { Thread* current = Thread::current_thread(); if (ContainsKey(_threads, current)) { - LOG(FATAL) << strings::Substitute( - "Thread belonging to thread pool '$0' with " - "name '$1' called pool function that would result in deadlock", - _name, current->name()); + throw Exception( + Status::FatalError("Thread belonging to thread pool {} with " + "name {} called pool function that would result in deadlock", + _name, current->name())); } } diff --git a/be/src/util/timezone_utils.cpp b/be/src/util/timezone_utils.cpp index 6bb71ac46471c99..a26ad3703b79b9e 100644 --- a/be/src/util/timezone_utils.cpp +++ b/be/src/util/timezone_utils.cpp @@ -35,6 +35,7 @@ #include #include +#include "common/exception.h" #include "common/logging.h" #include "common/status.h" @@ -83,8 +84,7 @@ void TimezoneUtils::load_timezones_to_cache() { const auto root_path = fs::path {base_str}; if (!exists(root_path)) { - LOG(FATAL) << "Cannot find system tzfile. Doris exiting!"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Cannot find system tzfile. Doris exiting!")); } std::set ignore_paths = {"posix", "right"}; // duplications. ignore them. diff --git a/be/src/vec/aggregate_functions/aggregate_function_map.h b/be/src/vec/aggregate_functions/aggregate_function_map.h index 17bc54f7499adb6..7273390e7c53427 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_map.h +++ b/be/src/vec/aggregate_functions/aggregate_function_map.h @@ -40,10 +40,7 @@ struct AggregateFunctionMapAggData { using KeyType = std::conditional_t, StringRef, K>; using Map = phmap::flat_hash_map; - AggregateFunctionMapAggData() { - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); - } + AggregateFunctionMapAggData() { throw Exception(Status::FatalError("__builtin_unreachable")); } AggregateFunctionMapAggData(const DataTypes& argument_types) { _key_type = remove_nullable(argument_types[0]); diff --git a/be/src/vec/aggregate_functions/aggregate_function_reader_first_last.h b/be/src/vec/aggregate_functions/aggregate_function_reader_first_last.h index 8efea2dc6fc8e4e..6f5d680d3eb0fc6 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_reader_first_last.h +++ b/be/src/vec/aggregate_functions/aggregate_function_reader_first_last.h @@ -238,24 +238,17 @@ class ReaderFunctionData final void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, AggregateDataPtr place, const IColumn** columns, Arena*) const override { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, - "ReaderFunctionData do not support add_range_single_place"); - __builtin_unreachable(); + throw doris::Exception( + Status::FatalError("ReaderFunctionData do not support add_range_single_place")); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, - "ReaderFunctionData do not support merge"); - __builtin_unreachable(); + throw doris::Exception(Status::FatalError("ReaderFunctionData do not support merge")); } void serialize(ConstAggregateDataPtr place, BufferWritable& buf) const override { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, - "ReaderFunctionData do not support serialize"); - __builtin_unreachable(); + throw doris::Exception(Status::FatalError("ReaderFunctionData do not support serialize")); } void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, - "ReaderFunctionData do not support deserialize"); - __builtin_unreachable(); + throw doris::Exception(Status::FatalError("ReaderFunctionData do not support deserialize")); } private: diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.h b/be/src/vec/aggregate_functions/aggregate_function_window.h index 13fa8e74751df64..0cef4c82d3dbfeb 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window.h @@ -563,24 +563,19 @@ class WindowFunctionData final void add(AggregateDataPtr place, const IColumn** columns, ssize_t row_num, Arena*) const override { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, - "WindowFunctionLeadLagData do not support add"); - __builtin_unreachable(); + throw doris::Exception(Status::FatalError("WindowFunctionLeadLagData do not support add")); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, - "WindowFunctionLeadLagData do not support merge"); - __builtin_unreachable(); + throw doris::Exception( + Status::FatalError("WindowFunctionLeadLagData do not support merge")); } void serialize(ConstAggregateDataPtr place, BufferWritable& buf) const override { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, - "WindowFunctionLeadLagData do not support serialize"); - __builtin_unreachable(); + throw doris::Exception( + Status::FatalError("WindowFunctionLeadLagData do not support serialize")); } void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, - "WindowFunctionLeadLagData do not support deserialize"); - __builtin_unreachable(); + throw doris::Exception( + Status::FatalError("WindowFunctionLeadLagData do not support deserialize")); } private: diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index cb83a29bbada2cc..db0088e67c27b60 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -40,16 +40,16 @@ template void ColumnStr::sanity_check() const { auto count = offsets.size(); if (chars.size() != offsets[count - 1]) { - LOG(FATAL) << "row count: " << count << ", chars.size(): " << chars.size() << ", offset[" - << count - 1 << "]: " << offsets[count - 1]; + throw Exception(Status::FatalError("row count: {}, chars.size(): {}, offset[{}]: ", count, + chars.size(), count - 1, offsets[count - 1])); } if (offsets[-1] != 0) { - LOG(FATAL) << "wrong offsets[-1]: " << offsets[-1]; + throw Exception(Status::FatalError("wrong offsets[-1]: {}", offsets[-1])); } for (size_t i = 0; i < count; ++i) { if (offsets[i] < offsets[i - 1]) { - LOG(FATAL) << "row count: " << count << ", offsets[" << i << "]: " << offsets[i] - << ", offsets[" << i - 1 << "]: " << offsets[i - 1]; + throw Exception(Status::FatalError("row count: {}, offsets[{}]: {}, offsets[{}]: {}", + count, i, offsets[i], i - 1, offsets[i - 1])); } } } diff --git a/be/src/vec/common/assert_cast.h b/be/src/vec/common/assert_cast.h index 02dce99e967bdbc..1905983a58cc292 100644 --- a/be/src/vec/common/assert_cast.h +++ b/be/src/vec/common/assert_cast.h @@ -23,6 +23,7 @@ #include #include +#include "common/exception.h" #include "common/logging.h" #include "vec/common/demangle.h" @@ -45,35 +46,33 @@ PURE To assert_cast(From&& from) { if (auto ptr = dynamic_cast(from); ptr != nullptr) { return ptr; } - LOG(FATAL) << fmt::format("Bad cast from type:{}* to {}", - demangle(typeid(*from).name()), - demangle(typeid(To).name())); + throw doris::Exception(doris::Status::FatalError("Bad cast from type:{}* to {}", + demangle(typeid(*from).name()), + demangle(typeid(To).name()))); } } else { if (typeid(from) == typeid(To)) { return static_cast(from); } } - LOG(FATAL) << fmt::format("Bad cast from type:{} to {}", demangle(typeid(from).name()), - demangle(typeid(To).name())); - __builtin_unreachable(); + throw doris::Exception(doris::Status::FatalError("Bad cast from type:{} to {}", + demangle(typeid(from).name()), + demangle(typeid(To).name()))); }; #ifndef NDEBUG try { return perform_cast(std::forward(from)); } catch (const std::exception& e) { - LOG(FATAL) << "assert cast err:" << e.what(); + throw doris::Exception(doris::Status::FatalError("assert cast err:{}", e.what())); } - __builtin_unreachable(); #else if constexpr (check == TypeCheckOnRelease::ENABLE) { try { return perform_cast(std::forward(from)); } catch (const std::exception& e) { - LOG(FATAL) << "assert cast err:" << e.what(); + throw doris::Exception(doris::Status::FatalError("assert cast err:{}", e.what())); } - __builtin_unreachable(); } else { return static_cast(from); } diff --git a/be/src/vec/common/hash_table/string_hash_table.h b/be/src/vec/common/hash_table/string_hash_table.h index 74be1e85e1efe88..892598a83263b96 100644 --- a/be/src/vec/common/hash_table/string_hash_table.h +++ b/be/src/vec/common/hash_table/string_hash_table.h @@ -327,8 +327,7 @@ class StringHashTable : private boost::noncopyable { return iterator5 == rhs.iterator5; } } - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw doris::Exception(doris::Status::FatalError("__builtin_unreachable")); } bool operator!=(const iterator_base& rhs) const { return !(*this == rhs); } diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index fd50af3e1fcd88e..2b1c71c643d6135 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -133,7 +133,7 @@ size_t get_size_of_interger(TypeIndex type) { case TypeIndex::UInt128: return sizeof(uint128_t); default: - LOG(FATAL) << "Unknown integer type: " << getTypeName(type); + throw Exception(Status::FatalError("Unknown integer type: {}", getTypeName(type))); return 0; } } @@ -231,8 +231,7 @@ void get_column_by_type(const vectorized::DataTypePtr& data_type, const std::str return; } // TODO handle more types like struct/date/datetime/decimal... - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw Exception(Status::FatalError("__builtin_unreachable")); } TabletColumn get_column_by_type(const vectorized::DataTypePtr& data_type, const std::string& name, diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 4dc553b1a5790f6..951c2661faf1727 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -644,10 +644,10 @@ Block Block::clone_with_columns(const Columns& columns) const { size_t num_columns = data.size(); if (num_columns != columns.size()) { - LOG(FATAL) << fmt::format( + throw Exception(Status::FatalError( "Cannot clone block with columns because block has {} columns, but {} columns " "given.", - num_columns, columns.size()); + num_columns, columns.size())); } for (size_t i = 0; i < num_columns; ++i) { diff --git a/be/src/vec/core/decimal_comparison.h b/be/src/vec/core/decimal_comparison.h index 9e9d9ad399ae04f..4503a264c28014b 100644 --- a/be/src/vec/core/decimal_comparison.h +++ b/be/src/vec/core/decimal_comparison.h @@ -82,8 +82,9 @@ class DecimalComparison { DecimalComparison(Block& block, uint32_t result, const ColumnWithTypeAndName& col_left, const ColumnWithTypeAndName& col_right) { if (!apply(block, result, col_left, col_right)) { - LOG(FATAL) << fmt::format("Wrong decimal comparison with {} and {}", - col_left.type->get_name(), col_right.type->get_name()); + throw Exception(Status::FatalError("Wrong decimal comparison with {} and {}", + col_left.type->get_name(), + col_right.type->get_name())); } } @@ -106,8 +107,7 @@ class DecimalComparison { static bool compare(A a, B b, UInt32 scale_a, UInt32 scale_b) { static const UInt32 max_scale = max_decimal_precision(); if (scale_a > max_scale || scale_b > max_scale) { - LOG(FATAL) << "Bad scale of decimal field"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Bad scale of decimal field")); } Shift shift; @@ -213,8 +213,7 @@ class DecimalComparison { if (const ColVecB* c1_vec = check_and_get_column(c1.get())) constant_vector(a, c1_vec->get_data(), vec_res, scale); else { - LOG(FATAL) << "Wrong column in Decimal comparison"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Wrong column in Decimal comparison")); } } else if (c1_is_const) { const ColumnConst* c1_const = check_and_get_column_const(c1.get()); @@ -222,8 +221,7 @@ class DecimalComparison { if (const ColVecA* c0_vec = check_and_get_column(c0.get())) vector_constant(c0_vec->get_data(), b, vec_res, scale); else { - LOG(FATAL) << "Wrong column in Decimal comparison"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Wrong column in Decimal comparison")); } } else { if (const ColVecA* c0_vec = check_and_get_column(c0.get())) { @@ -231,12 +229,10 @@ class DecimalComparison { vector_vector(c0_vec->get_data(), c1_vec->get_data(), vec_res, scale); else { - LOG(FATAL) << "Wrong column in Decimal comparison"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Wrong column in Decimal comparison")); } } else { - LOG(FATAL) << "Wrong column in Decimal comparison"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Wrong column in Decimal comparison")); } } return c_res; @@ -262,8 +258,7 @@ class DecimalComparison { if constexpr (scale_right) overflow |= common::mul_overflow(y, scale, y); if (overflow) { - LOG(FATAL) << "Can't compare"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Can't compare")); } } else { if constexpr (scale_left) x *= scale; diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index 341f65e075ed111..1176840738a289c 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -38,6 +38,7 @@ #include #include "common/compiler_util.h" // IWYU pragma: keep +#include "common/exception.h" #include "olap/hll.h" #include "util/bitmap_value.h" #include "util/quantile_state.h" @@ -168,7 +169,7 @@ class JsonbField { JsonbField(const char* ptr, size_t len) : size(len) { data = new char[size]; if (!data) { - LOG(FATAL) << "new data buffer failed, size: " << size; + throw Exception(Status::FatalError("new data buffer failed, size: {}", size)); } memcpy(data, ptr, size); } @@ -176,7 +177,7 @@ class JsonbField { JsonbField(const JsonbField& x) : size(x.size) { data = new char[size]; if (!data) { - LOG(FATAL) << "new data buffer failed, size: " << size; + throw Exception(Status::FatalError("new data buffer failed, size: {}", size)); } memcpy(data, x.data, size); } @@ -189,7 +190,7 @@ class JsonbField { JsonbField& operator=(const JsonbField& x) { data = new char[size]; if (!data) { - LOG(FATAL) << "new data buffer failed, size: " << size; + throw Exception(Status::FatalError("new data buffer failed, size: {}", size)); } memcpy(data, x.data, size); return *this; @@ -216,38 +217,30 @@ class JsonbField { size_t get_size() const { return size; } bool operator<(const JsonbField& r) const { - LOG(FATAL) << "comparing between JsonbField is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonbField is not supported")); } bool operator<=(const JsonbField& r) const { - LOG(FATAL) << "comparing between JsonbField is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonbField is not supported")); } bool operator==(const JsonbField& r) const { - LOG(FATAL) << "comparing between JsonbField is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonbField is not supported")); } bool operator>(const JsonbField& r) const { - LOG(FATAL) << "comparing between JsonbField is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonbField is not supported")); } bool operator>=(const JsonbField& r) const { - LOG(FATAL) << "comparing between JsonbField is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonbField is not supported")); } bool operator!=(const JsonbField& r) const { - LOG(FATAL) << "comparing between JsonbField is not supported"; - __builtin_unreachable(); + throw Exception(Status::FatalError("comparing between JsonbField is not supported")); } const JsonbField& operator+=(const JsonbField& r) { - LOG(FATAL) << "Not support plus opration on JsonbField"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Not support plus opration on JsonbField")); } const JsonbField& operator-=(const JsonbField& r) { - LOG(FATAL) << "Not support minus opration on JsonbField"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Not support minus opration on JsonbField")); } private: @@ -305,8 +298,7 @@ class DecimalField { const DecimalField& operator+=(const DecimalField& r) { if (scale != r.get_scale()) { - LOG(FATAL) << "Add different decimal fields"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Add different decimal fields")); } dec += r.get_value(); return *this; @@ -314,8 +306,7 @@ class DecimalField { const DecimalField& operator-=(const DecimalField& r) { if (scale != r.get_scale()) { - LOG(FATAL) << "Sub different decimal fields"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Sub different decimal fields")); } dec -= r.get_value(); return *this; @@ -422,8 +413,8 @@ class Field { case IPv6: return "IPv6"; default: - LOG(FATAL) << "type not supported, type=" << Types::to_string(which); - break; + throw Exception( + Status::FatalError("type not supported, type={}", Types::to_string(which))); } __builtin_unreachable(); } @@ -558,8 +549,9 @@ class Field { return which <=> rhs.which; } if (which != rhs.which) { - LOG(FATAL) << "lhs type not equal with rhs, lhs=" << Types::to_string(which) - << ", rhs=" << Types::to_string(rhs.which); + throw Exception(Status::FatalError("lhs type not equal with rhs, lhs={}, rhs={}", + Types::to_string(which), + Types::to_string(rhs.which))); } switch (which) { @@ -601,9 +593,9 @@ class Field { case Types::Decimal256: return get() <=> rhs.get(); default: - LOG(FATAL) << "lhs type not equal with rhs, lhs=" << Types::to_string(which) - << ", rhs=" << Types::to_string(rhs.which); - break; + throw Exception(Status::FatalError("lhs type not equal with rhs, lhs={}, rhs={}", + Types::to_string(which), + Types::to_string(rhs.which))); } } @@ -675,8 +667,8 @@ class Field { f(field.template get()); return; default: - LOG(FATAL) << "type not supported, type=" << Types::to_string(field.which); - break; + throw Exception(Status::FatalError("type not supported, type={}", + Types::to_string(field.which))); } } diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h index c817c6ab273f428..223dc13c8182bd6 100644 --- a/be/src/vec/core/types.h +++ b/be/src/vec/core/types.h @@ -942,8 +942,7 @@ inline const char* getTypeName(TypeIndex idx) { return "Time"; } - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw Exception(Status::FatalError("__builtin_unreachable")); } // NOLINTEND(readability-function-size) } // namespace vectorized diff --git a/be/src/vec/data_types/data_type_number_base.cpp b/be/src/vec/data_types/data_type_number_base.cpp index 1afed3d7d1a394e..55330bd2797772d 100644 --- a/be/src/vec/data_types/data_type_number_base.cpp +++ b/be/src/vec/data_types/data_type_number_base.cpp @@ -158,8 +158,7 @@ Field DataTypeNumberBase::get_field(const TExprNode& node) const { if constexpr (std::is_same_v, TypeId>) { return Float64(node.float_literal.value); } - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw Exception(Status::FatalError("__builtin_unreachable")); } template diff --git a/be/src/vec/data_types/data_type_number_base.h b/be/src/vec/data_types/data_type_number_base.h index a73bd9951891a32..c560fdd01adac3c 100644 --- a/be/src/vec/data_types/data_type_number_base.h +++ b/be/src/vec/data_types/data_type_number_base.h @@ -125,8 +125,7 @@ class DataTypeNumberBase : public IDataType { if constexpr (std::is_same_v, TypeId>) { return doris::FieldType::OLAP_FIELD_TYPE_DOUBLE; } - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw Exception(Status::FatalError("__builtin_unreachable")); } Field get_default() const override; diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 1a089bb73fe99cf..122a700cf9b20b7 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -395,8 +395,9 @@ inline static NullMap revert_null_map(const NullMap* null_bytemap, size_t start, inline void checkArrowStatus(const arrow::Status& status, const std::string& column, const std::string& format_name) { if (!status.ok()) { - LOG(FATAL) << "arrow serde with arrow: " << format_name << " with column : " << column - << " with error msg: " << status.ToString(); + throw Exception( + Status::FatalError("arrow serde with arrow: {} with column : {} with error msg: {}", + format_name, column, status.ToString())); } } diff --git a/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp b/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp index 17ce68e604e9b8d..3f46a9c00735685 100644 --- a/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp +++ b/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp @@ -36,15 +36,16 @@ void BoolRLEDecoder::set_data(Slice* slice) { _offset = 0; _current_value_idx = 0; if (_num_bytes < 4) { - LOG(FATAL) << "Received invalid length : " + std::to_string(_num_bytes) + - " (corrupt data page?)"; + throw Exception(Status::FatalError("Received invalid length : {} (corrupt data page?)", + std::to_string(_num_bytes))); } // Load the first 4 bytes in little-endian, which indicates the length const uint8_t* data = reinterpret_cast(_data->data); uint32_t num_bytes = decode_fixed32_le(data); if (num_bytes > static_cast(_num_bytes - 4)) { - LOG(FATAL) << ("Received invalid number of bytes : " + std::to_string(num_bytes) + - " (corrupt data page?)"); + throw Exception( + Status::FatalError("Received invalid number of bytes : {} (corrupt data page?)", + std::to_string(_num_bytes))); } _num_bytes = num_bytes; auto decoder_data = data + 4; diff --git a/be/src/vec/exec/format/parquet/decoder.h b/be/src/vec/exec/format/parquet/decoder.h index 1654878af80a29f..06e131b5b560491 100644 --- a/be/src/vec/exec/format/parquet/decoder.h +++ b/be/src/vec/exec/format/parquet/decoder.h @@ -79,8 +79,8 @@ class Decoder { } virtual MutableColumnPtr convert_dict_column_to_string_column(const ColumnInt32* dict_column) { - LOG(FATAL) << "Method convert_dict_column_to_string_column is not supported"; - __builtin_unreachable(); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "Method convert_dict_column_to_string_column is not supported"); } protected: diff --git a/be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h b/be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h index 9497aa1cb1cdb58..dbe90acc985a4d1 100644 --- a/be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h +++ b/be/src/vec/exec/format/parquet/delta_bit_pack_decoder.h @@ -177,7 +177,8 @@ class DeltaBitPackDecoder final : public DeltaDecoder { _bit_reader.reset(new BitReader((const uint8_t*)slice->data, slice->size)); Status st = _init_header(); if (!st.ok()) { - LOG(FATAL) << "Fail to init delta encoding header for " << st.to_string(); + throw Exception(Status::FatalError("Fail to init delta encoding header for {}", + st.to_string())); } _data = slice; _offset = 0; @@ -189,7 +190,8 @@ class DeltaBitPackDecoder final : public DeltaDecoder { _bit_reader = std::move(bit_reader); Status st = _init_header(); if (!st.ok()) { - LOG(FATAL) << "Fail to init delta encoding header for " << st.to_string(); + throw Exception(Status::FatalError("Fail to init delta encoding header for {}", + st.to_string())); } } @@ -345,7 +347,7 @@ class DeltaByteArrayDecoder : public DeltaDecoder { int ret; Status st = _prefix_len_decoder.decode(_buffered_prefix_length.data(), num_prefix, &ret); if (!st.ok()) { - LOG(FATAL) << "Fail to decode delta prefix, status: " << st; + throw Exception(Status::FatalError("Fail to decode delta prefix, status: {}", st)); } DCHECK_EQ(ret, num_prefix); _prefix_len_offset = 0; @@ -527,7 +529,7 @@ void DeltaLengthByteArrayDecoder::_decode_lengths() { int ret; Status st = _len_decoder.decode(_buffered_length.data(), num_length, &ret); if (!st.ok()) { - LOG(FATAL) << "Fail to decode delta length, status: " << st; + throw Exception(Status::FatalError("Fail to decode delta length, status: {}", st)); } DCHECK_EQ(ret, num_length); _length_idx = 0; diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h b/be/src/vec/exec/format/parquet/parquet_column_convert.h index cf6f8aa13fa1d1d..d35a69ff59c625e 100644 --- a/be/src/vec/exec/format/parquet/parquet_column_convert.h +++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h @@ -423,8 +423,7 @@ class FixedSizeToDecimal : public PhysicalToLogicalConverter { switch (_type_length) { APPLY_FOR_DECIMALS() default: - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw Exception(Status::FatalError("__builtin_unreachable")); } return Status::OK(); #undef APPLY_FOR_DECIMALS @@ -456,8 +455,7 @@ class FixedSizeToDecimal : public PhysicalToLogicalConverter { } else if constexpr (ScaleType == DecimalScaleParams::NO_SCALE) { // do nothing } else { - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw Exception(Status::FatalError("__builtin_unreachable")); } auto& v = reinterpret_cast(data[start_idx + i]); v = (DecimalType)value; @@ -501,8 +499,7 @@ class StringToDecimal : public PhysicalToLogicalConverter { } else if constexpr (ScaleType == DecimalScaleParams::NO_SCALE) { // do nothing } else { - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw Exception(Status::FatalError("__builtin_unreachable")); } } auto& v = reinterpret_cast(data[start_idx + i]); diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.h b/be/src/vec/exec/format/parquet/vparquet_column_reader.h index 4c6e5b1eac9f60a..a8062d2d9f9b7c6 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.h @@ -129,8 +129,8 @@ class ParquetColumnReader { } virtual MutableColumnPtr convert_dict_column_to_string_column(const ColumnInt32* dict_column) { - LOG(FATAL) << "Method convert_dict_column_to_string_column is not supported"; - __builtin_unreachable(); + throw Exception( + Status::FatalError("Method convert_dict_column_to_string_column is not supported")); } static Status create(io::FileReaderSPtr file, FieldSchema* field, diff --git a/be/src/vec/exec/jni_connector.cpp b/be/src/vec/exec/jni_connector.cpp index a87ccf987ac7afa..11a58e81c98d892 100644 --- a/be/src/vec/exec/jni_connector.cpp +++ b/be/src/vec/exec/jni_connector.cpp @@ -185,8 +185,8 @@ Status JniConnector::close() { jthrowable exc = (env)->ExceptionOccurred(); if (exc != nullptr) { // Ensure successful resource release - LOG(FATAL) << "Failed to release jni resource: " - << JniUtil::GetJniExceptionMsg(env).to_string(); + throw Exception(Status::FatalError("Failed to release jni resource: {}", + JniUtil::GetJniExceptionMsg(env).to_string())); } } return Status::OK(); diff --git a/be/src/vec/exec/scan/split_source_connector.h b/be/src/vec/exec/scan/split_source_connector.h index 8f38cd4f17a18fe..abe59562578aaf5 100644 --- a/be/src/vec/exec/scan/split_source_connector.h +++ b/be/src/vec/exec/scan/split_source_connector.h @@ -117,7 +117,8 @@ class LocalSplitSourceConnector : public SplitSourceConnector { // for compatibility. return &_scan_ranges[0].scan_range.ext_scan_range.file_scan_range.params; } - LOG(FATAL) << "Unreachable, params is got by file_scan_range_params_map"; + throw Exception( + Status::FatalError("Unreachable, params is got by file_scan_range_params_map")); } }; @@ -160,7 +161,8 @@ class RemoteSplitSourceConnector : public SplitSourceConnector { int num_scan_ranges() override { return _num_splits; } TFileScanRangeParams* get_params() override { - LOG(FATAL) << "Unreachable, params is got by file_scan_range_params_map"; + throw Exception( + Status::FatalError("Unreachable, params is got by file_scan_range_params_map")); } }; diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index 953fbaa9c38c8d0..91786337244013a 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -237,18 +237,18 @@ class VExpr { // If this expr is a BloomPredicate, this method will return a BloomFilterFunc virtual std::shared_ptr get_bloom_filter_func() const { - LOG(FATAL) << "Method 'get_bloom_filter_func()' is not supported in expression: " - << this->debug_string(); - return nullptr; + throw Exception(Status::FatalError( + "Method 'get_bloom_filter_func()' is not supported in expression: {}", + this->debug_string())); } virtual std::shared_ptr get_set_func() const { return nullptr; } // If this expr is a BitmapPredicate, this method will return a BitmapFilterFunc virtual std::shared_ptr get_bitmap_filter_func() const { - LOG(FATAL) << "Method 'get_bitmap_filter_func()' is not supported in expression: " - << this->debug_string(); - return nullptr; + throw Exception(Status::FatalError( + "Method 'get_bitmap_filter_func()' is not supported in expression: {}", + this->debug_string())); } // fast_execute can direct copy expr filter result which build by apply index in segment_iterator diff --git a/be/src/vec/functions/array/function_array_apply.cpp b/be/src/vec/functions/array/function_array_apply.cpp index 75425389dd975cc..4161441080aac09 100644 --- a/be/src/vec/functions/array/function_array_apply.cpp +++ b/be/src/vec/functions/array/function_array_apply.cpp @@ -24,6 +24,7 @@ #include #include +#include "common/exception.h" #include "common/status.h" #include "runtime/thread_context.h" #include "vec/aggregate_functions/aggregate_function.h" @@ -130,8 +131,7 @@ class FunctionArrayApply : public IFunction { if constexpr (op == ApplyOp::GE) { return data >= comp; } - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); + throw Exception(Status::FatalError("__builtin_unreachable")); } // need exception safety diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 48619ff85f83c84..af9e9d19267073a 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -665,7 +665,14 @@ struct ConvertImplNumberToJsonb { } else if constexpr (std::is_same_v) { writer.writeDouble(data[i]); } else { - LOG(FATAL) << "unsupported type "; + static_assert(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v, + "unsupported type"); __builtin_unreachable(); } column_string->insert_data(writer.getOutput()->getBuffer(), @@ -950,8 +957,7 @@ struct ConvertImplFromJsonb { res[i] = 0; } } else { - LOG(FATAL) << "unsupported type "; - __builtin_unreachable(); + throw Exception(Status::FatalError("unsupported type")); } } diff --git a/be/src/vec/json/simd_json_parser.h b/be/src/vec/json/simd_json_parser.h index 5189e93563cc528..79924a12a3a4ff9 100644 --- a/be/src/vec/json/simd_json_parser.h +++ b/be/src/vec/json/simd_json_parser.h @@ -208,8 +208,8 @@ class SimdJSONParser { /// Optional: Allocates memory to parse JSON documents faster. void reserve(size_t max_size) { if (parser.allocate(max_size) != simdjson::error_code::SUCCESS) { - LOG(FATAL) << "Couldn't allocate " + std::to_string(max_size) + - " bytes when parsing JSON"; + throw Exception(Status::FatalError("Couldn't allocate {} bytes when parsing JSON", + std::to_string(max_size))); } } diff --git a/be/src/vec/olap/olap_data_convertor.h b/be/src/vec/olap/olap_data_convertor.h index 3473d9d26b52050..75aff7dfec34cd1 100644 --- a/be/src/vec/olap/olap_data_convertor.h +++ b/be/src/vec/olap/olap_data_convertor.h @@ -455,7 +455,8 @@ class OlapBlockDataConvertor { const void* get_data() const override { return _results.data(); }; const void* get_data_at(size_t offset) const override { - LOG(FATAL) << "now not support get_data_at for OlapColumnDataConvertorArray"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "now not support get_data_at for OlapColumnDataConvertorArray"); __builtin_unreachable(); }; Status convert_to_olap() override; @@ -484,7 +485,8 @@ class OlapBlockDataConvertor { Status convert_to_olap() override; const void* get_data() const override { return _results.data(); }; const void* get_data_at(size_t offset) const override { - LOG(FATAL) << "now not support get_data_at for OlapColumnDataConvertorMap"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "now not support get_data_at for OlapColumnDataConvertorMap"); __builtin_unreachable(); }; diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index 86c50f0936f30d5..026648319d4be4c 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -3434,8 +3434,7 @@ void DateV2Value::unchecked_set_time(uint8_t hour, uint8_t minute, uint16_t s date_v2_value_.second_ = second; date_v2_value_.microsecond_ = microsecond; } else { - LOG(FATAL) << "Invalid operation 'set_time' for date!"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Invalid operation 'set_time' for date!")); } } @@ -3444,8 +3443,7 @@ void DateV2Value::set_microsecond(uint64_t microsecond) { if constexpr (is_datetime) { date_v2_value_.microsecond_ = microsecond; } else { - LOG(FATAL) << "Invalid operation 'set_microsecond' for date!"; - __builtin_unreachable(); + throw Exception(Status::FatalError("Invalid operation 'set_microsecond' for date!")); } } diff --git a/be/test/util/threadpool_test.cpp b/be/test/util/threadpool_test.cpp index 3859639539dbb7e..d331bd0d2ac25d5 100644 --- a/be/test/util/threadpool_test.cpp +++ b/be/test/util/threadpool_test.cpp @@ -42,6 +42,7 @@ #include "common/logging.h" #include "common/status.h" +#include "gtest/gtest.h" #include "gtest/gtest_pred_impl.h" #include "gutil/strings/substitute.h" #include "util/barrier.h" From 6c57c3c70dc669ba386782bfa85baa096129852c Mon Sep 17 00:00:00 2001 From: zclllyybb Date: Thu, 19 Dec 2024 22:00:18 +0800 Subject: [PATCH 14/55] [fix](ub) Dont throw in noexcept function (#45672) --- be/src/pipeline/exec/exchange_sink_buffer.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/be/src/pipeline/exec/exchange_sink_buffer.h b/be/src/pipeline/exec/exchange_sink_buffer.h index a381c5aff144f3e..458c7c3f66e3eec 100644 --- a/be/src/pipeline/exec/exchange_sink_buffer.h +++ b/be/src/pipeline/exec/exchange_sink_buffer.h @@ -155,9 +155,10 @@ class ExchangeSendCallback : public ::doris::DummyBrpcCallback { start_rpc_time); } } catch (const std::exception& exp) { - throw Exception(Status::FatalError("brpc callback error: {}", exp.what())); + LOG(FATAL) << "brpc callback error: " << exp.what(); } catch (...) { - throw Exception(Status::FatalError("brpc callback error.")); + LOG(FATAL) << "brpc callback error."; + __builtin_unreachable(); } } int64_t start_rpc_time; From b5249a9e47149bd5ff33c25933a9ae882c6e6b45 Mon Sep 17 00:00:00 2001 From: zhengyu Date: Fri, 20 Dec 2024 00:08:33 +0800 Subject: [PATCH 15/55] [opt](cloud) reduce cache hotspot table write amplification (#45557) 1. batch insert cloud_cache_hotspot in FE 2. enlarge polling interval in FE 3. shrink bucket num to 1 for cloud_cache_hotspot table 4. ignore stable statistics only catch the dynamic in BE Signed-off-by: zhengyu --- be/src/cloud/cloud_tablet_hotspot.cpp | 94 +++++++++++-------- be/src/cloud/cloud_tablet_hotspot.h | 19 ++++ .../java/org/apache/doris/common/Config.java | 4 +- .../doris/cloud/CacheHotspotManager.java | 2 +- .../doris/cloud/CacheHotspotManagerUtils.java | 3 +- 5 files changed, 80 insertions(+), 42 deletions(-) diff --git a/be/src/cloud/cloud_tablet_hotspot.cpp b/be/src/cloud/cloud_tablet_hotspot.cpp index dd197268646fbc2..6391a2dc5c4928d 100644 --- a/be/src/cloud/cloud_tablet_hotspot.cpp +++ b/be/src/cloud/cloud_tablet_hotspot.cpp @@ -57,18 +57,55 @@ TabletHotspot::~TabletHotspot() { } } -struct MapKeyHash { - int64_t operator()(const std::pair& key) const { - return std::hash {}(key.first) + std::hash {}(key.second); +void get_return_partitions( + const std::unordered_map, MapKeyHash>& + hot_partition, + const std::unordered_map, MapKeyHash>& + last_hot_partition, + std::vector* hot_tables, int& return_partitions, int N) { + for (const auto& [key, partition_to_value] : hot_partition) { + THotTableMessage msg; + msg.table_id = key.first; + msg.index_id = key.second; + for (const auto& [partition_id, value] : partition_to_value) { + if (return_partitions > N) { + return; + } + auto last_value_iter = last_hot_partition.find(key); + if (last_value_iter != last_hot_partition.end()) { + auto last_partition_iter = last_value_iter->second.find(partition_id); + if (last_partition_iter != last_value_iter->second.end()) { + const auto& last_value = last_partition_iter->second; + if (std::abs(static_cast(value.qpd) - + static_cast(last_value.qpd)) < 5 && + std::abs(static_cast(value.qpw) - + static_cast(last_value.qpw)) < 10 && + std::abs(static_cast(value.last_access_time) - + static_cast(last_value.last_access_time)) < 60) { + LOG(INFO) << "skip partition_id=" << partition_id << " qpd=" << value.qpd + << " qpw=" << value.qpw + << " last_access_time=" << value.last_access_time + << " last_qpd=" << last_value.qpd + << " last_qpw=" << last_value.qpw + << " last_access_time=" << last_value.last_access_time; + continue; + } + } + } + THotPartition hot_partition; + hot_partition.__set_partition_id(partition_id); + hot_partition.__set_query_per_day(value.qpd); + hot_partition.__set_query_per_week(value.qpw); + hot_partition.__set_last_access_time(value.last_access_time); + msg.hot_partitions.push_back(hot_partition); + return_partitions++; + } + msg.__isset.hot_partitions = !msg.hot_partitions.empty(); + hot_tables->push_back(std::move(msg)); } -}; -struct TabletHotspotMapValue { - uint64_t qpd = 0; // query per day - uint64_t qpw = 0; // query per week - int64_t last_access_time; -}; - -using TabletHotspotMapKey = std::pair; +} void TabletHotspot::get_top_n_hot_partition(std::vector* hot_tables) { // map, map> for day @@ -108,33 +145,14 @@ void TabletHotspot::get_top_n_hot_partition(std::vector* hot_t }); constexpr int N = 50; int return_partitions = 0; - auto get_return_partitions = - [=, &return_partitions]( - const std::unordered_map, - MapKeyHash>& hot_partition) { - for (const auto& [key, partition_to_value] : hot_partition) { - THotTableMessage msg; - msg.table_id = key.first; - msg.index_id = key.second; - for (const auto& [partition_id, value] : partition_to_value) { - if (return_partitions > N) { - return; - } - THotPartition hot_partition; - hot_partition.__set_partition_id(partition_id); - hot_partition.__set_query_per_day(value.qpd); - hot_partition.__set_query_per_week(value.qpw); - hot_partition.__set_last_access_time(value.last_access_time); - msg.hot_partitions.push_back(hot_partition); - return_partitions++; - } - msg.__isset.hot_partitions = !msg.hot_partitions.empty(); - hot_tables->push_back(std::move(msg)); - } - }; - get_return_partitions(day_hot_partitions); - get_return_partitions(week_hot_partitions); + + get_return_partitions(day_hot_partitions, _last_day_hot_partitions, hot_tables, + return_partitions, N); + get_return_partitions(week_hot_partitions, _last_week_hot_partitions, hot_tables, + return_partitions, N); + + _last_day_hot_partitions = std::move(day_hot_partitions); + _last_week_hot_partitions = std::move(week_hot_partitions); } void HotspotCounter::make_dot_point() { diff --git a/be/src/cloud/cloud_tablet_hotspot.h b/be/src/cloud/cloud_tablet_hotspot.h index af98f99a558b9ba..0be1c085a6c990f 100644 --- a/be/src/cloud/cloud_tablet_hotspot.h +++ b/be/src/cloud/cloud_tablet_hotspot.h @@ -49,6 +49,19 @@ struct HotspotCounter { }; using HotspotCounterPtr = std::shared_ptr; +using TabletHotspotMapKey = std::pair; + +struct TabletHotspotMapValue { + uint64_t qpd = 0; // query per day + uint64_t qpw = 0; // query per week + int64_t last_access_time; +}; + +struct MapKeyHash { + int64_t operator()(const std::pair& key) const { + return std::hash {}(key.first) + std::hash {}(key.second); + } +}; class TabletHotspot { public: @@ -71,6 +84,12 @@ class TabletHotspot { bool _closed {false}; std::mutex _mtx; std::condition_variable _cond; + std::unordered_map, + MapKeyHash> + _last_day_hot_partitions; + std::unordered_map, + MapKeyHash> + _last_week_hot_partitions; }; } // namespace doris diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index c601a492162958f..935300dee6f2fd0 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -3190,11 +3190,11 @@ public static int metaServiceRpcRetryTimes() { public static boolean enable_fetch_cluster_cache_hotspot = true; @ConfField(mutable = true) - public static long fetch_cluster_cache_hotspot_interval_ms = 600000; + public static long fetch_cluster_cache_hotspot_interval_ms = 3600000; // to control the max num of values inserted into cache hotspot internal table // insert into cache table when the size of batch values reaches this limit @ConfField(mutable = true) - public static long batch_insert_cluster_cache_hotspot_num = 1000; + public static long batch_insert_cluster_cache_hotspot_num = 5000; /** * intervals between be status checks for CloudUpgradeMgr diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java index 0b83baa94d6d4a2..f4c7392eb75c63c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java @@ -159,9 +159,9 @@ public void runAfterCatalogReady() { } }); } - triggerBatchInsert(); }); }); + triggerBatchInsert(); idToTable.clear(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManagerUtils.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManagerUtils.java index 20de42f8cdc25ad..72710debaefd9ae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManagerUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManagerUtils.java @@ -70,9 +70,10 @@ public class CacheHotspotManagerUtils { + " last_access_time DATETIMEV2)\n" + " UNIQUE KEY(cluster_id, backend_id, table_id, index_id, partition_id, insert_day)\n" + " PARTITION BY RANGE (insert_day) ()\n" - + " DISTRIBUTED BY HASH (cluster_id)\n" + + " DISTRIBUTED BY HASH (cluster_id) BUCKETS 1\n" + " PROPERTIES (\n" + " \"dynamic_partition.enable\" = \"true\",\n" + + " \"dynamic_partition.buckets\" = \"1\",\n" + " \"dynamic_partition.time_unit\" = \"DAY\",\n" + " \"dynamic_partition.start\" = \"-7\",\n" + " \"dynamic_partition.end\" = \"3\",\n" From 1bd8003a1a126e8ea033b9452499da404b2ec0be Mon Sep 17 00:00:00 2001 From: Luwei Date: Fri, 20 Dec 2024 00:28:51 +0800 Subject: [PATCH 16/55] [Enhancement](compaction) enable the compaction producer to generate multiple compaction tasks in a single run (#45411) --- be/src/common/config.cpp | 2 + be/src/common/config.h | 2 + be/src/olap/tablet_manager.cpp | 52 ++++++++++++++++++--- be/test/olap/tablet_mgr_test.cpp | 79 +++++++++++++++++++++++++++++++- 4 files changed, 127 insertions(+), 8 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 95a3e61fb5517aa..083b9f06c9491dc 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1404,6 +1404,8 @@ DEFINE_Bool(enable_table_size_correctness_check, "false"); DEFINE_Bool(force_regenerate_rowsetid_on_start_error, "false"); DEFINE_mBool(enable_sleep_between_delete_cumu_compaction, "false"); +DEFINE_mInt32(compaction_num_per_round, "1"); + // clang-format off #ifdef BE_TEST // test s3 diff --git a/be/src/common/config.h b/be/src/common/config.h index f8a9c3f7480b337..1e3d57ff7634170 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1490,6 +1490,8 @@ DECLARE_Bool(enable_table_size_correctness_check); // Enable sleep 5s between delete cumulative compaction. DECLARE_mBool(enable_sleep_between_delete_cumu_compaction); +DECLARE_mInt32(compaction_num_per_round); + #ifdef BE_TEST // test s3 DECLARE_String(test_s3_resource); diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index 33fee7ca3509005..44c26d160eb8bc2 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -719,6 +719,11 @@ void TabletManager::get_tablet_stat(TTabletStatResult* result) { result->__set_tablet_stat_list(*local_cache); } +struct TabletScore { + TabletSharedPtr tablet_ptr; + int score; +}; + std::vector TabletManager::find_best_tablets_to_compaction( CompactionType compaction_type, DataDir* data_dir, const std::unordered_set& tablet_submitted_compaction, uint32_t* score, @@ -732,6 +737,9 @@ std::vector TabletManager::find_best_tablets_to_compaction( uint32_t single_compact_highest_score = 0; TabletSharedPtr best_tablet; TabletSharedPtr best_single_compact_tablet; + auto cmp = [](TabletScore left, TabletScore right) { return left.score > right.score; }; + std::priority_queue, decltype(cmp)> top_tablets(cmp); + auto handler = [&](const TabletSharedPtr& tablet_ptr) { if (tablet_ptr->tablet_meta()->tablet_schema()->disable_auto_compaction()) { LOG_EVERY_N(INFO, 500) << "Tablet " << tablet_ptr->tablet_id() @@ -798,13 +806,33 @@ std::vector TabletManager::find_best_tablets_to_compaction( } } - // tablet should do cumu or base compaction - if (current_compaction_score > highest_score && !tablet_ptr->should_fetch_from_peer()) { - bool ret = tablet_ptr->suitable_for_compaction(compaction_type, - cumulative_compaction_policy); - if (ret) { - highest_score = current_compaction_score; - best_tablet = tablet_ptr; + if (config::compaction_num_per_round > 1 && !tablet_ptr->should_fetch_from_peer()) { + TabletScore ts; + ts.score = current_compaction_score; + ts.tablet_ptr = tablet_ptr; + if ((top_tablets.size() >= config::compaction_num_per_round && + current_compaction_score > top_tablets.top().score) || + top_tablets.size() < config::compaction_num_per_round) { + bool ret = tablet_ptr->suitable_for_compaction(compaction_type, + cumulative_compaction_policy); + if (ret) { + top_tablets.push(ts); + if (top_tablets.size() > config::compaction_num_per_round) { + top_tablets.pop(); + } + if (current_compaction_score > highest_score) { + highest_score = current_compaction_score; + } + } + } + } else { + if (current_compaction_score > highest_score && !tablet_ptr->should_fetch_from_peer()) { + bool ret = tablet_ptr->suitable_for_compaction(compaction_type, + cumulative_compaction_policy); + if (ret) { + highest_score = current_compaction_score; + best_tablet = tablet_ptr; + } } } }; @@ -820,6 +848,16 @@ std::vector TabletManager::find_best_tablets_to_compaction( picked_tablet.emplace_back(std::move(best_tablet)); } + std::vector reverse_top_tablets; + while (!top_tablets.empty()) { + reverse_top_tablets.emplace_back(top_tablets.top().tablet_ptr); + top_tablets.pop(); + } + + for (auto it = reverse_top_tablets.rbegin(); it != reverse_top_tablets.rend(); ++it) { + picked_tablet.emplace_back(*it); + } + // pick single compaction tablet needs the highest score if (best_single_compact_tablet != nullptr && single_compact_highest_score >= highest_score) { VLOG_CRITICAL << "Found the best tablet for single compaction. " diff --git a/be/test/olap/tablet_mgr_test.cpp b/be/test/olap/tablet_mgr_test.cpp index 1bcdcdf45c6906e..a25515434053285 100644 --- a/be/test/olap/tablet_mgr_test.cpp +++ b/be/test/olap/tablet_mgr_test.cpp @@ -83,6 +83,7 @@ class TabletMgrTest : public testing::Test { SAFE_DELETE(_data_dir); EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_engine_data_path).ok()); _tablet_mgr = nullptr; + config::compaction_num_per_round = 1; } std::unique_ptr k_engine; @@ -463,11 +464,87 @@ TEST_F(TabletMgrTest, FindTabletWithCompact) { ASSERT_EQ(score, 25); // drop all tablets - for (int64_t id = 1; id <= 20; ++id) { + for (int64_t id = 1; id <= 21; ++id) { Status drop_st = _tablet_mgr->drop_tablet(id, id * 10, false); ASSERT_TRUE(drop_st.ok()) << drop_st; } + { + config::compaction_num_per_round = 10; + for (int64_t i = 1; i <= 100; ++i) { + create_tablet(10000 + i, false, i); + } + + compact_tablets = _tablet_mgr->find_best_tablets_to_compaction( + CompactionType::CUMULATIVE_COMPACTION, _data_dir, cumu_set, &score, + cumulative_compaction_policies); + ASSERT_EQ(compact_tablets.size(), 10); + int index = 0; + for (auto t : compact_tablets) { + ASSERT_EQ(t->tablet_id(), 10100 - index); + ASSERT_EQ(t->calc_compaction_score(), 100 - index); + index++; + } + config::compaction_num_per_round = 1; + // drop all tablets + for (int64_t id = 10001; id <= 10100; ++id) { + Status drop_st = _tablet_mgr->drop_tablet(id, id * 10, false); + ASSERT_TRUE(drop_st.ok()) << drop_st; + } + } + + { + config::compaction_num_per_round = 10; + for (int64_t i = 1; i <= 100; ++i) { + create_tablet(20000 + i, false, i); + } + create_tablet(20102, true, 200); + + compact_tablets = _tablet_mgr->find_best_tablets_to_compaction( + CompactionType::CUMULATIVE_COMPACTION, _data_dir, cumu_set, &score, + cumulative_compaction_policies); + ASSERT_EQ(compact_tablets.size(), 11); + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(compact_tablets[i]->tablet_id(), 20100 - i); + ASSERT_EQ(compact_tablets[i]->calc_compaction_score(), 100 - i); + } + ASSERT_EQ(compact_tablets[10]->tablet_id(), 20102); + ASSERT_EQ(compact_tablets[10]->calc_compaction_score(), 200); + + config::compaction_num_per_round = 1; + // drop all tablets + for (int64_t id = 20001; id <= 20100; ++id) { + Status drop_st = _tablet_mgr->drop_tablet(id, id * 10, false); + ASSERT_TRUE(drop_st.ok()) << drop_st; + } + + Status drop_st = _tablet_mgr->drop_tablet(20102, 20102 * 10, false); + ASSERT_TRUE(drop_st.ok()) << drop_st; + } + + { + config::compaction_num_per_round = 10; + for (int64_t i = 1; i <= 5; ++i) { + create_tablet(30000 + i, false, i + 5); + } + + compact_tablets = _tablet_mgr->find_best_tablets_to_compaction( + CompactionType::CUMULATIVE_COMPACTION, _data_dir, cumu_set, &score, + cumulative_compaction_policies); + ASSERT_EQ(compact_tablets.size(), 5); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(compact_tablets[i]->tablet_id(), 30000 + 5 - i); + ASSERT_EQ(compact_tablets[i]->calc_compaction_score(), 10 - i); + } + + config::compaction_num_per_round = 1; + // drop all tablets + for (int64_t id = 30001; id <= 30005; ++id) { + Status drop_st = _tablet_mgr->drop_tablet(id, id * 10, false); + ASSERT_TRUE(drop_st.ok()) << drop_st; + } + } + Status trash_st = _tablet_mgr->start_trash_sweep(); ASSERT_TRUE(trash_st.ok()) << trash_st; } From d38970bf82da2d9d3dd91ad21d96d66be6c5a147 Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Fri, 20 Dec 2024 09:43:16 +0800 Subject: [PATCH 17/55] [opt](fs)add local file support (#45632) ### What problem does this PR solve? Problem Summary: Added support for local files to facilitate debugging of some local files in the data lake. --- .../apache/doris/datasource/property/PropertyConverter.java | 3 +++ .../src/main/java/org/apache/doris/fs/FileSystemFactory.java | 1 + 2 files changed, 4 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java index 8544ae597f1cd33..7b65411aa6b4843 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java @@ -41,6 +41,7 @@ import com.google.common.collect.Maps; import org.apache.hadoop.fs.CosFileSystem; import org.apache.hadoop.fs.CosNConfigKeys; +import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem; import org.apache.hadoop.fs.obs.OBSConstants; import org.apache.hadoop.fs.obs.OBSFileSystem; @@ -193,6 +194,8 @@ private static Map convertToOBSProperties(Map pr public static String getHadoopFSImplByScheme(String fsScheme) { if (fsScheme.equalsIgnoreCase("obs")) { return OBSFileSystem.class.getName(); + } else if (fsScheme.equalsIgnoreCase("file")) { + return LocalFileSystem.class.getName(); } else if (fsScheme.equalsIgnoreCase("oss")) { return AliyunOSSFileSystem.class.getName(); } else if (fsScheme.equalsIgnoreCase("cosn") || fsScheme.equalsIgnoreCase("lakefs")) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java index 1f3d60d2adf2ebd..fb23005f4ac9acf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java @@ -63,6 +63,7 @@ public static RemoteFileSystem getRemoteFileSystem(FileSystemType type, Map Date: Fri, 20 Dec 2024 09:45:05 +0800 Subject: [PATCH 18/55] [fix](iceberg) Fill in the detailed error information (#45415) ### What problem does this PR solve? Related PR: #45285 Problem Summary: When dropping a database, fill in the detailed error information. --- .../doris/datasource/iceberg/IcebergMetadataOps.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataOps.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataOps.java index 440a671afe58f13..da61b2ac1ab7fe4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataOps.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataOps.java @@ -104,7 +104,7 @@ public List listDatabaseNames() { .map(n -> n.level(n.length() - 1)) .collect(Collectors.toList())); } catch (Exception e) { - throw new RuntimeException("Failed to list database names, error message is: " + e.getMessage()); + throw new RuntimeException("Failed to list database names, error message is:" + e.getMessage(), e); } } @@ -125,7 +125,7 @@ public void createDb(CreateDbStmt stmt) throws DdlException { }); } catch (Exception e) { throw new DdlException("Failed to create database: " - + stmt.getFullDbName() + " ,error message is: " + e.getMessage()); + + stmt.getFullDbName() + ", error message is:" + e.getMessage(), e); } } @@ -161,7 +161,7 @@ public void dropDb(DropDbStmt stmt) throws DdlException { }); } catch (Exception e) { throw new DdlException( - "Failed to drop database: " + stmt.getDbName() + ", error message is: " + e.getMessage(), e); + "Failed to drop database: " + stmt.getDbName() + ", error message is:" + e.getMessage(), e); } } @@ -184,7 +184,8 @@ public boolean createTable(CreateTableStmt stmt) throws UserException { try { preExecutionAuthenticator.execute(() -> performCreateTable(stmt)); } catch (Exception e) { - throw new DdlException("Failed to create table: " + stmt.getTableName() + " ,error message is:", e); + throw new DdlException( + "Failed to create table: " + stmt.getTableName() + ", error message is:" + e.getMessage(), e); } return false; } @@ -228,7 +229,8 @@ public void dropTable(DropTableStmt stmt) throws DdlException { return null; }); } catch (Exception e) { - throw new DdlException("Failed to drop table: " + stmt.getTableName() + " ,error message is:", e); + throw new DdlException( + "Failed to drop table: " + stmt.getTableName() + ", error message is:" + e.getMessage(), e); } } From b515f86669bc118cc03cf9f8332b6397194dc337 Mon Sep 17 00:00:00 2001 From: lw112 <131352377+felixwluo@users.noreply.github.com> Date: Fri, 20 Dec 2024 10:38:03 +0800 Subject: [PATCH 19/55] [fix](fold) fixed an issue with be computing constants (#43410) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? issue close: #43061 1、Problem When enable_fold_constant_by_be=true is set,the results of between below queries are inconsistent select hex(from_base64('wr2JEDVXzL9+2XtRhgIloA==')) +----------------------------------------------+ | hex(from_base64('wr2JEDVXzL9+2XtRhgIloA==')) | +----------------------------------------------+ | C2BD89103557CCBF7ED97B51860225A0 | +----------------------------------------------+ select hex(s) from (select from_base64('wr2JEDVXzL9+2XtRhgIloA==') as s) t +--------------------------------------------------+ | hex(s) | +--------------------------------------------------+ | C2BDEFBFBD103557CCBF7EEFBFBD7B51EFBFBD0225EFBFBD | +--------------------------------------------------+ 2、mysql results select hex(s) from (select from_base64('wr2JEDVXzL9+2XtRhgIloA==') as s) t; +----------------------------------+ | hex(s) | +----------------------------------+ | C2BD89103557CCBF7ED97B51860225A0 | +----------------------------------+ 3、cause When processing binary data such as FromBase64, BE will return the original binary data through the bytesValue field, and the previous code only uses the stringValue field, resulting in the binary data being corrupted during the string encoding conversion process --- .../rules/expression/rules/FoldConstantRuleOnBE.java | 12 ++++++++++-- .../doris/nereids/trees/expressions/LiteralTest.java | 8 ++++++-- .../expression/fold_constant/fold_constant_by_be.out | 6 ++++++ .../fold_constant/fold_constant_by_be.groovy | 11 +++++++---- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java index 70e63b050a84029..dd79de70e26adb2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java @@ -487,8 +487,16 @@ public static List getResultExpression(DataType type, PValues resultCon } else if (type.isStringLikeType()) { int num = resultContent.getStringValueCount(); for (int i = 0; i < num; ++i) { - Literal literal = new StringLiteral(resultContent.getStringValue(i)); - res.add(literal); + // get the raw byte data to avoid character encoding conversion problems + ByteString bytesValues = resultContent.getBytesValue(i); + // use UTF-8 encoding to ensure proper handling of binary data + String stringValue = bytesValues.toStringUtf8(); + // handle special NULL value cases + if ("\\N".equalsIgnoreCase(stringValue) && resultContent.hasHasNull()) { + res.add(new NullLiteral(type)); + } else { + res.add(new StringLiteral(stringValue)); + } } } else if (type.isArrayType()) { ArrayType arrayType = (ArrayType) type; diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/LiteralTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/LiteralTest.java index fcb64ff0bface94..9c7e2e5b1519b5d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/LiteralTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/LiteralTest.java @@ -233,7 +233,9 @@ public void testGetResultExpressionStruct() { PValues.Builder resultContentBuilder = PValues.newBuilder(); for (int i = 0; i < elementsArray.length; i = i + 2) { childBuilder1.addInt32Value(elementsArray[i]); - childBuilder2.addStringValue("str" + (i + 1)); + String strValue = "str" + (i + 1); + childBuilder2.addStringValue(strValue); + childBuilder2.addBytesValue(com.google.protobuf.ByteString.copyFromUtf8(strValue)); } childBuilder1.setType(childTypeBuilder1.build()); childBuilder2.setType(childTypeBuilder2.build()); @@ -280,7 +282,9 @@ public void testGetResultExpressionStructArray() { PValues.Builder resultContentBuilder = PValues.newBuilder(); for (int i = 0; i < elementsArray.length; i = i + 2) { childBuilder1.addInt32Value(elementsArray[i]); - childBuilder2.addStringValue("str" + (i + 1)); + String strValue = "str" + (i + 1); + childBuilder2.addStringValue(strValue); + childBuilder2.addBytesValue(com.google.protobuf.ByteString.copyFromUtf8(strValue)); } childBuilder1.setType(childTypeBuilder1.build()); childBuilder2.setType(childTypeBuilder2.build()); diff --git a/regression-test/data/nereids_p0/expression/fold_constant/fold_constant_by_be.out b/regression-test/data/nereids_p0/expression/fold_constant/fold_constant_by_be.out index c7c506292a5423b..8d9d704684ea7c9 100644 --- a/regression-test/data/nereids_p0/expression/fold_constant/fold_constant_by_be.out +++ b/regression-test/data/nereids_p0/expression/fold_constant/fold_constant_by_be.out @@ -1,4 +1,10 @@ -- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +C2BD89103557CCBF7ED97B51860225A0 + +-- !sql -- +C2BD89103557CCBF7ED97B51860225A0 + -- !sql_1 -- 80000 diff --git a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_by_be.groovy b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_by_be.groovy index 09a80209c04ede9..f3b1b0cdcd5b161 100644 --- a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_by_be.groovy +++ b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_by_be.groovy @@ -22,6 +22,9 @@ suite("fold_constant_by_be") { sql 'set enable_fallback_to_original_planner=false' sql 'set enable_fold_constant_by_be=true' + qt_sql """ select hex(from_base64('wr2JEDVXzL9+2XtRhgIloA==')); """ + qt_sql """ select hex(s) from (select from_base64('wr2JEDVXzL9+2XtRhgIloA==') as s) t; """ + test { sql ''' select if( @@ -32,8 +35,8 @@ suite("fold_constant_by_be") { result([['9999-07-31']]) } - sql """ - CREATE TABLE IF NOT EXISTS str_tb (k1 VARCHAR(10) NULL, v1 STRING NULL) + sql """ + CREATE TABLE IF NOT EXISTS str_tb (k1 VARCHAR(10) NULL, v1 STRING NULL) UNIQUE KEY(k1) DISTRIBUTED BY HASH(k1) BUCKETS 5 properties("replication_num" = "1"); """ @@ -53,7 +56,7 @@ suite("fold_constant_by_be") { sql 'set query_timeout=12;' qt_sql "select sleep(sign(1)*5);" - + explain { sql("verbose select substring('123456', 1, 3)") contains "varchar(3)" @@ -71,7 +74,7 @@ suite("fold_constant_by_be") { col_varchar_1000__undef_signed varchar(1000) null , col_varchar_1000__undef_signed_not_null varchar(1000) not null , col_varchar_1001__undef_signed varchar(1001) null , - col_varchar_1001__undef_signed_not_null varchar(1001) not null + col_varchar_1001__undef_signed_not_null varchar(1001) not null ) engine=olap DUPLICATE KEY(pk, col_char_255__undef_signed, col_char_100__undef_signed) distributed by hash(pk) buckets 10 From 62ff850cfe7fb55effb7a3af81e803a1a1a7a906 Mon Sep 17 00:00:00 2001 From: morrySnow Date: Fri, 20 Dec 2024 12:03:36 +0800 Subject: [PATCH 20/55] [fix](variable) force update variable not work as expected (#45648) ### What problem does this PR solve? Related PR: #41607 Problem Summary: should not reset variable version when replay edit log --- fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java | 2 +- .../src/main/java/org/apache/doris/qe/VariableMgr.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index b3544b4de55460a..cc7b8846b1fb446 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -2647,7 +2647,7 @@ public long saveGlobalVariable(CountingDataOutputStream dos, long checksum) thro } public void replayGlobalVariableV2(GlobalVarPersistInfo info) throws IOException, DdlException { - VariableMgr.replayGlobalVariableV2(info); + VariableMgr.replayGlobalVariableV2(info, false); } public long saveLoadJobsV2(CountingDataOutputStream dos, long checksum) throws IOException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/VariableMgr.java b/fe/fe-core/src/main/java/org/apache/doris/qe/VariableMgr.java index 5b632c1632e968e..82486fb72c759cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/VariableMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/VariableMgr.java @@ -459,21 +459,21 @@ public static void read(DataInputStream in) throws IOException, DdlException { } variablesToRead.readFields(in); GlobalVarPersistInfo info = GlobalVarPersistInfo.read(in); - replayGlobalVariableV2(info); + replayGlobalVariableV2(info, true); } finally { wlock.unlock(); } } // this method is used to replace the `replayGlobalVariable()` - public static void replayGlobalVariableV2(GlobalVarPersistInfo info) throws DdlException { + public static void replayGlobalVariableV2(GlobalVarPersistInfo info, boolean fromImage) throws DdlException { wlock.lock(); try { String json = info.getPersistJsonString(); JSONObject root = (JSONObject) JSONValue.parse(json); // if not variable version, we set it to 0 to ensure we could force set global variable. boolean hasVariableVersion = root.containsKey(GlobalVariable.VARIABLE_VERSION); - if (!hasVariableVersion) { + if (fromImage && !hasVariableVersion) { GlobalVariable.variableVersion = GlobalVariable.VARIABLE_VERSION_0; } for (Object varName : root.keySet()) { From 012acf58a7fde1ca0117fb2b88a0d6a22b9a1447 Mon Sep 17 00:00:00 2001 From: lihangyu Date: Fri, 20 Dec 2024 12:16:00 +0800 Subject: [PATCH 21/55] [improve](variant) only sanitize in debug mode (#45689) 1. could improve serialization performance 2. check is incorrect, example int8 and int16 could be compatible --- be/src/vec/columns/column_object.cpp | 2 ++ be/src/vec/columns/column_object.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index d67a70d2f630f03..3d6a3e44436d29b 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -1953,6 +1953,7 @@ std::string ColumnObject::debug_string() const { } Status ColumnObject::sanitize() const { +#ifndef NDEBUG RETURN_IF_CATCH_EXCEPTION(check_consistency()); for (const auto& subcolumn : subcolumns) { if (subcolumn->data.is_finalized()) { @@ -1967,6 +1968,7 @@ Status ColumnObject::sanitize() const { } VLOG_DEBUG << "sanitized " << debug_string(); +#endif return Status::OK(); } diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index e4127197a22b025..037656508e2a7f2 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -428,7 +428,7 @@ class ColumnObject final : public COWHelper { bool empty() const; - // Check if all columns and types are aligned + // Check if all columns and types are aligned, only in debug mode Status sanitize() const; std::string debug_string() const; From a15e3e549af1aa125e9e5317214c26230f51c415 Mon Sep 17 00:00:00 2001 From: walter Date: Fri, 20 Dec 2024 12:34:24 +0800 Subject: [PATCH 22/55] [fix](catelog) Unifies partition items string (#45669) ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: For range partitions, `getItems().toString()` is equal to `getItemsString`, but for list partitions, there has a `,` between each item. The upsert record of binlog is generated via `getItemsString`, but the getMeta method fetches partition items string via `getItems().toString()`, which are different in the list partitions, and the ccr-syncer is unable to identify them. This PR unifies all partition items string via `getItemsString`. --- .../src/main/java/org/apache/doris/catalog/Env.java | 6 +----- .../org/apache/doris/catalog/ListPartitionItem.java | 12 ++++++------ .../org/apache/doris/catalog/RangePartitionItem.java | 12 ++++++------ .../doris/common/proc/EsPartitionsProcDir.java | 2 +- .../apache/doris/common/proc/PartitionsProcDir.java | 2 +- 5 files changed, 15 insertions(+), 19 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index cc7b8846b1fb446..1c6345613d768d8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -6486,11 +6486,7 @@ private static void getTableMeta(OlapTable olapTable, TGetMetaDBMeta dbMeta) { long partitionId = partition.getId(); partitionMeta.setId(partitionId); partitionMeta.setName(partition.getName()); - String partitionRange = ""; - if (tblPartitionInfo.getType() == PartitionType.RANGE - || tblPartitionInfo.getType() == PartitionType.LIST) { - partitionRange = tblPartitionInfo.getItem(partitionId).getItems().toString(); - } + String partitionRange = tblPartitionInfo.getPartitionRangeString(partitionId); partitionMeta.setRange(partitionRange); partitionMeta.setVisibleVersion(partition.getVisibleVersion()); // partitionMeta.setTemp(partition.isTemp()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionItem.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionItem.java index dba109a9539876b..985853812443251 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionItem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionItem.java @@ -61,7 +61,12 @@ public List getItems() { } public String getItemsString() { - return toString(); + // ATTN: DO NOT EDIT unless unless you explicitly guarantee compatibility + // between different versions. + // + // the ccr syncer depends on this string to identify partitions between two + // clusters (cluster versions may be different). + return getItems().toString(); } public String getItemsSql() { @@ -173,11 +178,6 @@ public int hashCode() { @Override public String toString() { - // ATTN: DO NOT EDIT unless unless you explicitly guarantee compatibility - // between different versions. - // - // the ccr syncer depends on this string to identify partitions between two - // clusters (cluster versions may be different). StringBuilder builder = new StringBuilder(); builder.append("partitionKeys: ["); for (PartitionKey partitionKey : partitionKeys) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionItem.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionItem.java index 690ab88991bd16e..96bf0097c28a515 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionItem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionItem.java @@ -46,7 +46,12 @@ public Range getItems() { } public String getItemsString() { - return toString(); + // ATTN: DO NOT EDIT unless unless you explicitly guarantee compatibility + // between different versions. + // + // the ccr syncer depends on this string to identify partitions between two + // clusters (cluster versions may be different). + return partitionKeyRange.toString(); } public String getItemsSql() { @@ -125,11 +130,6 @@ public boolean equals(Object obj) { @Override public String toString() { - // ATTN: DO NOT EDIT unless unless you explicitly guarantee compatibility - // between different versions. - // - // the ccr syncer depends on this string to identify partitions between two - // clusters (cluster versions may be different). return partitionKeyRange.toString(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/EsPartitionsProcDir.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/EsPartitionsProcDir.java index 87e7fa449f13a2a..ed5cfc18d13e482 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/EsPartitionsProcDir.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/EsPartitionsProcDir.java @@ -93,7 +93,7 @@ public ProcResult fetchResult() throws AnalysisException { } partitionInfo.add(joiner.join(colNames)); // partition key partitionInfo.add( - rangePartitionInfo.getItem(esShardPartitions.getPartitionId()).getItems().toString()); // range + rangePartitionInfo.getItem(esShardPartitions.getPartitionId()).getItemsString()); // range partitionInfo.add("-"); // dis partitionInfo.add(esShardPartitions.getShardRoutings().size()); // shards partitionInfo.add(1); // replica num diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/PartitionsProcDir.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/PartitionsProcDir.java index 3c44874cb7deffa..3ac8b797d64ad6d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/PartitionsProcDir.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/PartitionsProcDir.java @@ -323,7 +323,7 @@ private List, TRow>> getPartitionInfosInrernal() throws An String colNamesStr = joiner.join(colNames); partitionInfo.add(colNamesStr); trow.addToColumnValue(new TCell().setStringVal(colNamesStr)); - String itemStr = tblPartitionInfo.getItem(partitionId).getItems().toString(); + String itemStr = tblPartitionInfo.getPartitionRangeString(partitionId); partitionInfo.add(itemStr); trow.addToColumnValue(new TCell().setStringVal(itemStr)); } else { From 8956279255c7a80ef319d68d65bd0f3e0fcd8c7d Mon Sep 17 00:00:00 2001 From: yagagagaga Date: Fri, 20 Dec 2024 12:38:41 +0800 Subject: [PATCH 23/55] [chore](script) fix `start_fe.sh --version` not work and MetaService scripts occur error in Debian GNU/Linux 11 (bullseye) (#45610) 1. fix `start_fe.sh --version` not work 2. fix `ms/bin/start.sh` could not work in Debian GNU/Linux 11 (bullseye) --- bin/start_fe.sh | 6 ++++++ cloud/script/start.sh | 2 +- cloud/script/stop.sh | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/bin/start_fe.sh b/bin/start_fe.sh index ac5971072c306cd..b089596a9cdb735 100755 --- a/bin/start_fe.sh +++ b/bin/start_fe.sh @@ -258,6 +258,12 @@ if [[ "${HELPER}" != "" ]]; then HELPER="-helper ${HELPER}" fi +if [[ "${OPT_VERSION}" != "" ]]; then + export DORIS_LOG_TO_STDERR=1 + ${LIMIT:+${LIMIT}} "${JAVA}" org.apache.doris.DorisFE --version + exit 0 +fi + if [[ "${IMAGE_TOOL}" -eq 1 ]]; then if [[ -n "${IMAGE_PATH}" ]]; then ${LIMIT:+${LIMIT}} "${JAVA}" ${final_java_opt:+${final_java_opt}} ${coverage_opt:+${coverage_opt}} org.apache.doris.DorisFE -i "${IMAGE_PATH}" diff --git a/cloud/script/start.sh b/cloud/script/start.sh index 1bce9813f4cf528..ecb5a3b2bed1e62 100644 --- a/cloud/script/start.sh +++ b/cloud/script/start.sh @@ -1,4 +1,4 @@ -#!/usr/bin/bash +#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information diff --git a/cloud/script/stop.sh b/cloud/script/stop.sh index 48f01c545ae840e..1b59cef718243e9 100644 --- a/cloud/script/stop.sh +++ b/cloud/script/stop.sh @@ -1,4 +1,4 @@ -#!/usr/bin/bash +#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information From 2a1209d3cc77dac4f3ee7073240cd354bd6575c8 Mon Sep 17 00:00:00 2001 From: "Mingyu Chen (Rayner)" Date: Fri, 20 Dec 2024 13:33:25 +0800 Subject: [PATCH 24/55] [opt](catalog) cache the Configuration object (#45433) ### What problem does this PR solve? Problem Summary: Creating Configuration object is very costly, so we cache it for better performance --- .../doris/datasource/ExternalCatalog.java | 21 +++++++++++++++++++ .../hive/HiveMetaStoreClientHelper.java | 7 +------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java index d7cbee18c74c7a7..2575169f79207f4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java @@ -153,6 +153,9 @@ public abstract class ExternalCatalog protected MetaCache> metaCache; protected PreExecutionAuthenticator preExecutionAuthenticator; + private volatile Configuration cachedConf = null; + private final byte[] confLock = new byte[0]; + public ExternalCatalog() { } @@ -164,6 +167,20 @@ public ExternalCatalog(long catalogId, String name, InitCatalogLog.Type logType, } public Configuration getConfiguration() { + // build configuration is costly, so we cache it. + if (cachedConf != null) { + return cachedConf; + } + synchronized (confLock) { + if (cachedConf != null) { + return cachedConf; + } + cachedConf = buildConf(); + return cachedConf; + } + } + + private Configuration buildConf() { Configuration conf = DFSFileSystem.getHdfsConf(ifNotSetFallbackToSimpleAuth()); Map catalogProperties = catalogProperty.getHadoopProperties(); for (Map.Entry entry : catalogProperties.entrySet()) { @@ -409,6 +426,10 @@ public void onRefresh(boolean invalidCache) { this.convertedProperties = null; } + synchronized (this.confLock) { + this.cachedConf = null; + } + refreshOnlyCatalogCache(invalidCache); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java index 884cfbee45ba9f3..706bd653a85e21c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java @@ -42,7 +42,6 @@ import org.apache.doris.common.security.authentication.AuthenticationConfig; import org.apache.doris.common.security.authentication.HadoopAuthenticator; import org.apache.doris.datasource.ExternalCatalog; -import org.apache.doris.fs.remote.dfs.DFSFileSystem; import org.apache.doris.thrift.TExprOpcode; import com.google.common.base.Strings; @@ -843,11 +842,7 @@ public static HoodieTableMetaClient getHudiClient(HMSExternalTable table) { } public static Configuration getConfiguration(HMSExternalTable table) { - Configuration conf = DFSFileSystem.getHdfsConf(table.getCatalog().ifNotSetFallbackToSimpleAuth()); - for (Map.Entry entry : table.getHadoopProperties().entrySet()) { - conf.set(entry.getKey(), entry.getValue()); - } - return conf; + return table.getCatalog().getConfiguration(); } public static Optional getSerdeProperty(Table table, String key) { From e3f3f470127091cec057eeb7c724206a5204fa3b Mon Sep 17 00:00:00 2001 From: Pxl Date: Fri, 20 Dec 2024 14:19:31 +0800 Subject: [PATCH 25/55] [Chore](profile) add some profile on ReaderInit (#45556) ### What problem does this PR solve? add some profile on ReaderInit --- be/src/olap/base_tablet.cpp | 5 +- be/src/olap/olap_common.h | 24 ++++++++ be/src/olap/rowset/beta_rowset_reader.cpp | 21 ++++--- be/src/olap/rowset/segment_v2/segment.cpp | 26 ++++++--- be/src/olap/rowset/segment_v2/segment.h | 6 +- .../rowset/segment_v2/segment_iterator.cpp | 10 +++- be/src/olap/tablet_reader.cpp | 7 +++ be/src/pipeline/exec/olap_scan_operator.cpp | 41 +++++++++++++ be/src/pipeline/exec/olap_scan_operator.h | 27 +++++++++ be/src/vec/exec/scan/new_olap_scanner.cpp | 41 +++++++++++++ be/src/vec/olap/block_reader.cpp | 57 +++++++++++-------- be/src/vec/olap/vgeneric_iterators.cpp | 5 +- 12 files changed, 221 insertions(+), 49 deletions(-) diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index 82dc122e19f5ef5..33275a2663b329e 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -28,6 +28,7 @@ #include "common/status.h" #include "olap/calc_delete_bitmap_executor.h" #include "olap/delete_bitmap_calculator.h" +#include "olap/iterators.h" #include "olap/memtable.h" #include "olap/partial_update_info.h" #include "olap/primary_key_index.h" @@ -81,7 +82,9 @@ Status _get_segment_column_iterator(const BetaRowsetSharedPtr& rowset, uint32_t rowset->rowset_id().to_string(), segid)); } segment_v2::SegmentSharedPtr segment = *it; - RETURN_IF_ERROR(segment->new_column_iterator(target_column, column_iterator, nullptr)); + StorageReadOptions opts; + opts.stats = stats; + RETURN_IF_ERROR(segment->new_column_iterator(target_column, column_iterator, &opts)); segment_v2::ColumnIteratorOptions opt { .use_page_cache = !config::disable_storage_page_cache, .file_reader = segment->file_reader().get(), diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index 3b892e5d360e541..a83e6a6df63e1a6 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -389,6 +389,30 @@ struct OlapReaderStatistics { int64_t collect_iterator_merge_next_timer = 0; int64_t collect_iterator_normal_next_timer = 0; int64_t delete_bitmap_get_agg_ns = 0; + + int64_t tablet_reader_init_timer_ns = 0; + int64_t tablet_reader_capture_rs_readers_timer_ns = 0; + int64_t tablet_reader_init_return_columns_timer_ns = 0; + int64_t tablet_reader_init_keys_param_timer_ns = 0; + int64_t tablet_reader_init_orderby_keys_param_timer_ns = 0; + int64_t tablet_reader_init_conditions_param_timer_ns = 0; + int64_t tablet_reader_init_delete_condition_param_timer_ns = 0; + int64_t block_reader_vcollect_iter_init_timer_ns = 0; + int64_t block_reader_rs_readers_init_timer_ns = 0; + int64_t block_reader_build_heap_init_timer_ns = 0; + + int64_t rowset_reader_get_segment_iterators_timer_ns = 0; + int64_t rowset_reader_create_iterators_timer_ns = 0; + int64_t rowset_reader_init_iterators_timer_ns = 0; + int64_t rowset_reader_load_segments_timer_ns = 0; + + int64_t segment_iterator_init_timer_ns = 0; + int64_t segment_iterator_init_return_column_iterators_timer_ns = 0; + int64_t segment_iterator_init_bitmap_index_iterators_timer_ns = 0; + int64_t segment_iterator_init_inverted_index_iterators_timer_ns = 0; + + int64_t segment_create_column_readers_timer_ns = 0; + int64_t segment_load_index_timer_ns = 0; }; using ColumnId = uint32_t; diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index 47cf9b820e8562d..9a4d71587a02c10 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -78,7 +78,6 @@ bool BetaRowsetReader::update_profile(RuntimeProfile* profile) { Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context, std::vector* out_iters, bool use_cache) { - RETURN_IF_ERROR(_rowset->load()); _read_context = read_context; // The segment iterator is created with its own statistics, // and the member variable '_stats' is initialized by '_stats(&owned_stats)'. @@ -92,6 +91,9 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context if (_read_context->stats != nullptr) { _stats = _read_context->stats; } + SCOPED_RAW_TIMER(&_stats->rowset_reader_get_segment_iterators_timer_ns); + + RETURN_IF_ERROR(_rowset->load()); // convert RowsetReaderContext to StorageReadOptions _read_options.block_row_max = read_context->batch_size; @@ -225,9 +227,12 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context bool should_use_cache = use_cache || (_read_context->reader_type == ReaderType::READER_QUERY && enable_segment_cache); SegmentCacheHandle segment_cache_handle; - RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(_rowset, &segment_cache_handle, - should_use_cache, - /*need_load_pk_index_and_bf*/ false)); + { + SCOPED_RAW_TIMER(&_stats->rowset_reader_load_segments_timer_ns); + RETURN_IF_ERROR(SegmentLoader::instance()->load_segments( + _rowset, &segment_cache_handle, should_use_cache, + /*need_load_pk_index_and_bf*/ false)); + } // create iterator for each segment auto& segments = segment_cache_handle.get_segments(); @@ -253,6 +258,7 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context const bool use_lazy_init_iterators = !is_merge_iterator && _read_context->reader_type == ReaderType::READER_QUERY; for (int i = seg_start; i < seg_end; i++) { + SCOPED_RAW_TIMER(&_stats->rowset_reader_create_iterators_timer_ns); auto& seg_ptr = segments[i]; std::unique_ptr iter; @@ -317,6 +323,8 @@ Status BetaRowsetReader::_init_iterator() { std::vector iterators; RETURN_IF_ERROR(get_segment_iterators(_read_context, &iterators)); + SCOPED_RAW_TIMER(&_stats->rowset_reader_init_iterators_timer_ns); + if (_read_context->merged_rows == nullptr) { _read_context->merged_rows = &_merged_rows; } @@ -352,8 +360,8 @@ Status BetaRowsetReader::_init_iterator() { } Status BetaRowsetReader::next_block(vectorized::Block* block) { - SCOPED_RAW_TIMER(&_stats->block_fetch_ns); RETURN_IF_ERROR(_init_iterator_once()); + SCOPED_RAW_TIMER(&_stats->block_fetch_ns); if (_empty) { return Status::Error("BetaRowsetReader is empty"); } @@ -381,9 +389,8 @@ Status BetaRowsetReader::next_block(vectorized::Block* block) { } Status BetaRowsetReader::next_block_view(vectorized::BlockView* block_view) { - SCOPED_RAW_TIMER(&_stats->block_fetch_ns); RETURN_IF_ERROR(_init_iterator_once()); - + SCOPED_RAW_TIMER(&_stats->block_fetch_ns); RuntimeState* runtime_state = nullptr; if (_read_context != nullptr) { runtime_state = _read_context->runtime_state; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 513c0be4f8cd14a..d55d84901c2e663 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -228,7 +228,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o if (read_options.runtime_state != nullptr) { _be_exec_version = read_options.runtime_state->be_exec_version(); } - RETURN_IF_ERROR(_create_column_readers_once()); + RETURN_IF_ERROR(_create_column_readers_once(read_options.stats)); read_options.stats->total_segment_number++; // trying to prune the current segment by segment-level zone map @@ -288,7 +288,11 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o } } - RETURN_IF_ERROR(load_index()); + { + SCOPED_RAW_TIMER(&read_options.stats->segment_load_index_timer_ns); + RETURN_IF_ERROR(load_index()); + } + if (read_options.delete_condition_predicates->num_of_column_predicate() == 0 && read_options.push_down_agg_type_opt != TPushAggOp::NONE && read_options.push_down_agg_type_opt != TPushAggOp::COUNT_ON_INDEX) { @@ -594,7 +598,8 @@ vectorized::DataTypePtr Segment::get_data_type_of(const ColumnIdentifier& identi return nullptr; } -Status Segment::_create_column_readers_once() { +Status Segment::_create_column_readers_once(OlapReaderStatistics* stats) { + SCOPED_RAW_TIMER(&stats->segment_create_column_readers_timer_ns); return _create_column_readers_once_call.call([&] { DCHECK(_footer_pb); Defer defer([&]() { _footer_pb.reset(); }); @@ -868,10 +873,10 @@ Status Segment::new_column_iterator_with_path(const TabletColumn& tablet_column, Status Segment::new_column_iterator(const TabletColumn& tablet_column, std::unique_ptr* iter, const StorageReadOptions* opt) { - if (opt != nullptr && opt->runtime_state != nullptr) { + if (opt->runtime_state != nullptr) { _be_exec_version = opt->runtime_state->be_exec_version(); } - RETURN_IF_ERROR(_create_column_readers_once()); + RETURN_IF_ERROR(_create_column_readers_once(opt->stats)); // init column iterator by path info if (tablet_column.has_path_info() || tablet_column.is_variant_type()) { @@ -899,8 +904,9 @@ Status Segment::new_column_iterator(const TabletColumn& tablet_column, return Status::OK(); } -Status Segment::new_column_iterator(int32_t unique_id, std::unique_ptr* iter) { - RETURN_IF_ERROR(_create_column_readers_once()); +Status Segment::new_column_iterator(int32_t unique_id, const StorageReadOptions* opt, + std::unique_ptr* iter) { + RETURN_IF_ERROR(_create_column_readers_once(opt->stats)); ColumnIterator* it; RETURN_IF_ERROR(_column_readers.at(unique_id)->new_iterator(&it)); iter->reset(it); @@ -928,8 +934,9 @@ ColumnReader* Segment::_get_column_reader(const TabletColumn& col) { } Status Segment::new_bitmap_index_iterator(const TabletColumn& tablet_column, + const StorageReadOptions& read_options, std::unique_ptr* iter) { - RETURN_IF_ERROR(_create_column_readers_once()); + RETURN_IF_ERROR(_create_column_readers_once(read_options.stats)); ColumnReader* reader = _get_column_reader(tablet_column); if (reader != nullptr && reader->has_bitmap_index()) { BitmapIndexIterator* it; @@ -947,7 +954,7 @@ Status Segment::new_inverted_index_iterator(const TabletColumn& tablet_column, if (read_options.runtime_state != nullptr) { _be_exec_version = read_options.runtime_state->be_exec_version(); } - RETURN_IF_ERROR(_create_column_readers_once()); + RETURN_IF_ERROR(_create_column_readers_once(read_options.stats)); ColumnReader* reader = _get_column_reader(tablet_column); if (reader != nullptr && index_meta) { if (_inverted_index_file_reader == nullptr) { @@ -1116,6 +1123,7 @@ Status Segment::seek_and_read_by_rowid(const TabletSchema& schema, SlotDescripto OlapReaderStatistics& stats, std::unique_ptr& iterator_hint) { StorageReadOptions storage_read_opt; + storage_read_opt.stats = &stats; storage_read_opt.io_ctx.reader_type = ReaderType::READER_QUERY; segment_v2::ColumnIteratorOptions opt { .use_page_cache = !config::disable_storage_page_cache, diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index 1b20c1f066bdf9f..ca2fee0e77aa82a 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -111,9 +111,11 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd std::unique_ptr* iter, const StorageReadOptions* opt); - Status new_column_iterator(int32_t unique_id, std::unique_ptr* iter); + Status new_column_iterator(int32_t unique_id, const StorageReadOptions* opt, + std::unique_ptr* iter); Status new_bitmap_index_iterator(const TabletColumn& tablet_column, + const StorageReadOptions& read_options, std::unique_ptr* iter); Status new_inverted_index_iterator(const TabletColumn& tablet_column, @@ -238,7 +240,7 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd Status _open_inverted_index(); - Status _create_column_readers_once(); + Status _create_column_readers_once(OlapReaderStatistics* stats); private: friend class SegmentIterator; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index abdf9116756f0ed..0c54eaa2d6cbaa0 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -281,9 +281,10 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { if (_inited) { return Status::OK(); } + _opts = opts; + SCOPED_RAW_TIMER(&_opts.stats->segment_iterator_init_timer_ns); _inited = true; _file_reader = _segment->_file_reader; - _opts = opts; _col_predicates.clear(); for (const auto& predicate : opts.column_predicates) { @@ -1005,6 +1006,7 @@ bool SegmentIterator::_check_all_conditions_passed_inverted_index_for_column(Col } Status SegmentIterator::_init_return_column_iterators() { + SCOPED_RAW_TIMER(&_opts.stats->segment_iterator_init_return_column_iterators_timer_ns); if (_cur_rowid >= num_rows()) { return Status::OK(); } @@ -1047,19 +1049,21 @@ Status SegmentIterator::_init_return_column_iterators() { } Status SegmentIterator::_init_bitmap_index_iterators() { + SCOPED_RAW_TIMER(&_opts.stats->segment_iterator_init_bitmap_index_iterators_timer_ns); if (_cur_rowid >= num_rows()) { return Status::OK(); } for (auto cid : _schema->column_ids()) { if (_bitmap_index_iterators[cid] == nullptr) { - RETURN_IF_ERROR(_segment->new_bitmap_index_iterator(_opts.tablet_schema->column(cid), - &_bitmap_index_iterators[cid])); + RETURN_IF_ERROR(_segment->new_bitmap_index_iterator( + _opts.tablet_schema->column(cid), _opts, &_bitmap_index_iterators[cid])); } } return Status::OK(); } Status SegmentIterator::_init_inverted_index_iterators() { + SCOPED_RAW_TIMER(&_opts.stats->segment_iterator_init_inverted_index_iterators_timer_ns); if (_cur_rowid >= num_rows()) { return Status::OK(); } diff --git a/be/src/olap/tablet_reader.cpp b/be/src/olap/tablet_reader.cpp index 17cab2a3c0c8345..416d0fea476b320 100644 --- a/be/src/olap/tablet_reader.cpp +++ b/be/src/olap/tablet_reader.cpp @@ -120,6 +120,7 @@ TabletReader::~TabletReader() { } Status TabletReader::init(const ReaderParams& read_params) { + SCOPED_RAW_TIMER(&_stats.tablet_reader_init_timer_ns); _predicate_arena = std::make_unique(); Status res = _init_params(read_params); @@ -159,6 +160,7 @@ bool TabletReader::_optimize_for_single_rowset( } Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) { + SCOPED_RAW_TIMER(&_stats.tablet_reader_capture_rs_readers_timer_ns); if (read_params.rs_splits.empty()) { return Status::InternalError("fail to acquire data sources. tablet={}", _tablet->tablet_id()); @@ -331,6 +333,7 @@ Status TabletReader::_init_params(const ReaderParams& read_params) { } Status TabletReader::_init_return_columns(const ReaderParams& read_params) { + SCOPED_RAW_TIMER(&_stats.tablet_reader_init_return_columns_timer_ns); if (read_params.reader_type == ReaderType::READER_QUERY) { _return_columns = read_params.return_columns; _tablet_columns_convert_to_null_set = read_params.tablet_columns_convert_to_null_set; @@ -387,6 +390,7 @@ Status TabletReader::_init_return_columns(const ReaderParams& read_params) { } Status TabletReader::_init_keys_param(const ReaderParams& read_params) { + SCOPED_RAW_TIMER(&_stats.tablet_reader_init_keys_param_timer_ns); if (read_params.start_key.empty()) { return Status::OK(); } @@ -461,6 +465,7 @@ Status TabletReader::_init_keys_param(const ReaderParams& read_params) { } Status TabletReader::_init_orderby_keys_param(const ReaderParams& read_params) { + SCOPED_RAW_TIMER(&_stats.tablet_reader_init_orderby_keys_param_timer_ns); // UNIQUE_KEYS will compare all keys as before if (_tablet_schema->keys_type() == DUP_KEYS || (_tablet_schema->keys_type() == UNIQUE_KEYS && _tablet->enable_unique_key_merge_on_write())) { @@ -513,6 +518,7 @@ Status TabletReader::_init_orderby_keys_param(const ReaderParams& read_params) { } Status TabletReader::_init_conditions_param(const ReaderParams& read_params) { + SCOPED_RAW_TIMER(&_stats.tablet_reader_init_conditions_param_timer_ns); std::vector predicates; for (const auto& condition : read_params.conditions) { TCondition tmp_cond = condition; @@ -639,6 +645,7 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const FunctionFilter& functio } Status TabletReader::_init_delete_condition(const ReaderParams& read_params) { + SCOPED_RAW_TIMER(&_stats.tablet_reader_init_delete_condition_param_timer_ns); // If it's cumu and not allow do delete when cumu if (read_params.reader_type == ReaderType::READER_SEGMENT_COMPACTION || (read_params.reader_type == ReaderType::READER_CUMULATIVE_COMPACTION && diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp b/be/src/pipeline/exec/olap_scan_operator.cpp index 34fa741ff1ec007..fa91caffa8ebc47 100644 --- a/be/src/pipeline/exec/olap_scan_operator.cpp +++ b/be/src/pipeline/exec/olap_scan_operator.cpp @@ -150,6 +150,47 @@ Status OlapScanLocalState::_init_profile() { _tablet_counter = ADD_COUNTER(_runtime_profile, "TabletNum", TUnit::UNIT); _key_range_counter = ADD_COUNTER(_runtime_profile, "KeyRangesNum", TUnit::UNIT); _runtime_filter_info = ADD_LABEL_COUNTER_WITH_LEVEL(_runtime_profile, "RuntimeFilterInfo", 1); + + _tablet_reader_init_timer = ADD_TIMER(_scanner_profile, "TabletReaderInitTimer"); + _tablet_reader_capture_rs_readers_timer = + ADD_TIMER(_scanner_profile, "TabletReaderCaptureRsReadersTimer"); + _tablet_reader_init_return_columns_timer = + ADD_TIMER(_scanner_profile, "TabletReaderInitReturnColumnsTimer"); + _tablet_reader_init_keys_param_timer = + ADD_TIMER(_scanner_profile, "TabletReaderInitKeysParamTimer"); + _tablet_reader_init_orderby_keys_param_timer = + ADD_TIMER(_scanner_profile, "TabletReaderInitOrderbyKeysParamTimer"); + _tablet_reader_init_conditions_param_timer = + ADD_TIMER(_scanner_profile, "TabletReaderInitConditionsParamTimer"); + _tablet_reader_init_delete_condition_param_timer = + ADD_TIMER(_scanner_profile, "TabletReaderInitDeleteConditionParamTimer"); + _block_reader_vcollect_iter_init_timer = + ADD_TIMER(_scanner_profile, "BlockReaderVcollectIterInitTimer"); + _block_reader_rs_readers_init_timer = + ADD_TIMER(_scanner_profile, "BlockReaderRsReadersInitTimer"); + _block_reader_build_heap_init_timer = + ADD_TIMER(_scanner_profile, "BlockReaderBuildHeapInitTimer"); + + _rowset_reader_get_segment_iterators_timer = + ADD_TIMER(_scanner_profile, "RowsetReaderGetSegmentIteratorsTimer"); + _rowset_reader_create_iterators_timer = + ADD_TIMER(_scanner_profile, "RowsetReaderCreateIteratorsTimer"); + _rowset_reader_init_iterators_timer = + ADD_TIMER(_scanner_profile, "RowsetReaderInitIteratorsTimer"); + _rowset_reader_load_segments_timer = + ADD_TIMER(_scanner_profile, "RowsetReaderLoadSegmentsTimer"); + + _segment_iterator_init_timer = ADD_TIMER(_scanner_profile, "SegmentIteratorInitTimer"); + _segment_iterator_init_return_column_iterators_timer = + ADD_TIMER(_scanner_profile, "SegmentIteratorInitReturnColumnIteratorsTimer"); + _segment_iterator_init_bitmap_index_iterators_timer = + ADD_TIMER(_scanner_profile, "SegmentIteratorInitBitmapIndexIteratorsTimer"); + _segment_iterator_init_inverted_index_iterators_timer = + ADD_TIMER(_scanner_profile, "SegmentIteratorInitInvertedIndexIteratorsTimer"); + + _segment_create_column_readers_timer = + ADD_TIMER(_scanner_profile, "SegmentCreateColumnReadersTimer"); + _segment_load_index_timer = ADD_TIMER(_scanner_profile, "SegmentLoadIndexTimer"); return Status::OK(); } diff --git a/be/src/pipeline/exec/olap_scan_operator.h b/be/src/pipeline/exec/olap_scan_operator.h index 91980d6a3f172b4..0e8e7223d4b8c50 100644 --- a/be/src/pipeline/exec/olap_scan_operator.h +++ b/be/src/pipeline/exec/olap_scan_operator.h @@ -184,6 +184,33 @@ class OlapScanLocalState final : public ScanLocalState { RuntimeProfile::Counter* _runtime_filter_info = nullptr; + // timer about tablet reader + RuntimeProfile::Counter* _tablet_reader_init_timer = nullptr; + RuntimeProfile::Counter* _tablet_reader_capture_rs_readers_timer = nullptr; + RuntimeProfile::Counter* _tablet_reader_init_return_columns_timer = nullptr; + RuntimeProfile::Counter* _tablet_reader_init_keys_param_timer = nullptr; + RuntimeProfile::Counter* _tablet_reader_init_orderby_keys_param_timer = nullptr; + RuntimeProfile::Counter* _tablet_reader_init_conditions_param_timer = nullptr; + RuntimeProfile::Counter* _tablet_reader_init_delete_condition_param_timer = nullptr; + + // timer about block reader + RuntimeProfile::Counter* _block_reader_vcollect_iter_init_timer = nullptr; + RuntimeProfile::Counter* _block_reader_rs_readers_init_timer = nullptr; + RuntimeProfile::Counter* _block_reader_build_heap_init_timer = nullptr; + + RuntimeProfile::Counter* _rowset_reader_get_segment_iterators_timer = nullptr; + RuntimeProfile::Counter* _rowset_reader_create_iterators_timer = nullptr; + RuntimeProfile::Counter* _rowset_reader_init_iterators_timer = nullptr; + RuntimeProfile::Counter* _rowset_reader_load_segments_timer = nullptr; + + RuntimeProfile::Counter* _segment_iterator_init_timer = nullptr; + RuntimeProfile::Counter* _segment_iterator_init_return_column_iterators_timer = nullptr; + RuntimeProfile::Counter* _segment_iterator_init_bitmap_index_iterators_timer = nullptr; + RuntimeProfile::Counter* _segment_iterator_init_inverted_index_iterators_timer = nullptr; + + RuntimeProfile::Counter* _segment_create_column_readers_timer = nullptr; + RuntimeProfile::Counter* _segment_load_index_timer = nullptr; + std::mutex _profile_mtx; }; diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 4c0b30e440ecf52..d3a05cbb3c2fe66 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -649,6 +649,47 @@ void NewOlapScanner::_collect_profile_before_close() { COUNTER_UPDATE(local_state->_filtered_segment_counter, stats.filtered_segment_number); COUNTER_UPDATE(local_state->_total_segment_counter, stats.total_segment_number); + COUNTER_UPDATE(local_state->_tablet_reader_init_timer, stats.tablet_reader_init_timer_ns); + COUNTER_UPDATE(local_state->_tablet_reader_capture_rs_readers_timer, + stats.tablet_reader_capture_rs_readers_timer_ns); + COUNTER_UPDATE(local_state->_tablet_reader_init_return_columns_timer, + stats.tablet_reader_init_return_columns_timer_ns); + COUNTER_UPDATE(local_state->_tablet_reader_init_keys_param_timer, + stats.tablet_reader_init_keys_param_timer_ns); + COUNTER_UPDATE(local_state->_tablet_reader_init_orderby_keys_param_timer, + stats.tablet_reader_init_orderby_keys_param_timer_ns); + COUNTER_UPDATE(local_state->_tablet_reader_init_conditions_param_timer, + stats.tablet_reader_init_conditions_param_timer_ns); + COUNTER_UPDATE(local_state->_tablet_reader_init_delete_condition_param_timer, + stats.tablet_reader_init_delete_condition_param_timer_ns); + COUNTER_UPDATE(local_state->_block_reader_vcollect_iter_init_timer, + stats.block_reader_vcollect_iter_init_timer_ns); + COUNTER_UPDATE(local_state->_block_reader_rs_readers_init_timer, + stats.block_reader_rs_readers_init_timer_ns); + COUNTER_UPDATE(local_state->_block_reader_build_heap_init_timer, + stats.block_reader_build_heap_init_timer_ns); + + COUNTER_UPDATE(local_state->_rowset_reader_get_segment_iterators_timer, + stats.rowset_reader_get_segment_iterators_timer_ns); + COUNTER_UPDATE(local_state->_rowset_reader_create_iterators_timer, + stats.rowset_reader_create_iterators_timer_ns); + COUNTER_UPDATE(local_state->_rowset_reader_init_iterators_timer, + stats.rowset_reader_init_iterators_timer_ns); + COUNTER_UPDATE(local_state->_rowset_reader_load_segments_timer, + stats.rowset_reader_load_segments_timer_ns); + + COUNTER_UPDATE(local_state->_segment_iterator_init_timer, stats.segment_iterator_init_timer_ns); + COUNTER_UPDATE(local_state->_segment_iterator_init_return_column_iterators_timer, + stats.segment_iterator_init_return_column_iterators_timer_ns); + COUNTER_UPDATE(local_state->_segment_iterator_init_bitmap_index_iterators_timer, + stats.segment_iterator_init_bitmap_index_iterators_timer_ns); + COUNTER_UPDATE(local_state->_segment_iterator_init_inverted_index_iterators_timer, + stats.segment_iterator_init_inverted_index_iterators_timer_ns); + + COUNTER_UPDATE(local_state->_segment_create_column_readers_timer, + stats.segment_create_column_readers_timer_ns); + COUNTER_UPDATE(local_state->_segment_load_index_timer, stats.segment_load_index_timer_ns); + // Update metrics DorisMetrics::instance()->query_scan_bytes->increment( local_state->_read_compressed_counter->value()); diff --git a/be/src/vec/olap/block_reader.cpp b/be/src/vec/olap/block_reader.cpp index c46ff330f2bef1d..07befd47d887814 100644 --- a/be/src/vec/olap/block_reader.cpp +++ b/be/src/vec/olap/block_reader.cpp @@ -108,40 +108,49 @@ Status BlockReader::_init_collect_iter(const ReaderParams& read_params) { return res; } // check if rowsets are noneoverlapping - _is_rowsets_overlapping = _rowsets_mono_asc_disjoint(read_params); - _vcollect_iter.init(this, _is_rowsets_overlapping, read_params.read_orderby_key, - read_params.read_orderby_key_reverse); + { + SCOPED_RAW_TIMER(&_stats.block_reader_vcollect_iter_init_timer_ns); + _is_rowsets_overlapping = _rowsets_mono_asc_disjoint(read_params); + _vcollect_iter.init(this, _is_rowsets_overlapping, read_params.read_orderby_key, + read_params.read_orderby_key_reverse); + } std::vector valid_rs_readers; RuntimeState* runtime_state = read_params.runtime_state; - for (int i = 0; i < read_params.rs_splits.size(); ++i) { - if (runtime_state != nullptr && runtime_state->is_cancelled()) { - return runtime_state->cancel_reason(); - } + { + SCOPED_RAW_TIMER(&_stats.block_reader_rs_readers_init_timer_ns); + for (int i = 0; i < read_params.rs_splits.size(); ++i) { + if (runtime_state != nullptr && runtime_state->is_cancelled()) { + return runtime_state->cancel_reason(); + } - auto& rs_split = read_params.rs_splits[i]; + auto& rs_split = read_params.rs_splits[i]; - // _vcollect_iter.topn_next() will init rs_reader by itself - if (!_vcollect_iter.use_topn_next()) { - RETURN_IF_ERROR(rs_split.rs_reader->init(&_reader_context, rs_split)); - } + // _vcollect_iter.topn_next() will init rs_reader by itself + if (!_vcollect_iter.use_topn_next()) { + RETURN_IF_ERROR(rs_split.rs_reader->init(&_reader_context, rs_split)); + } - Status res = _vcollect_iter.add_child(rs_split); - if (!res.ok() && !res.is()) { - LOG(WARNING) << "failed to add child to iterator, err=" << res; - return res; - } - if (res.ok()) { - valid_rs_readers.push_back(rs_split.rs_reader); + Status res = _vcollect_iter.add_child(rs_split); + if (!res.ok() && !res.is()) { + LOG(WARNING) << "failed to add child to iterator, err=" << res; + return res; + } + if (res.ok()) { + valid_rs_readers.push_back(rs_split.rs_reader); + } } } - RETURN_IF_ERROR(_vcollect_iter.build_heap(valid_rs_readers)); - // _vcollect_iter.topn_next() can not use current_row - if (!_vcollect_iter.use_topn_next()) { - auto status = _vcollect_iter.current_row(&_next_row); - _eof = status.is(); + { + SCOPED_RAW_TIMER(&_stats.block_reader_build_heap_init_timer_ns); + RETURN_IF_ERROR(_vcollect_iter.build_heap(valid_rs_readers)); + // _vcollect_iter.topn_next() can not use current_row + if (!_vcollect_iter.use_topn_next()) { + auto status = _vcollect_iter.current_row(&_next_row); + _eof = status.is(); + } } return Status::OK(); diff --git a/be/src/vec/olap/vgeneric_iterators.cpp b/be/src/vec/olap/vgeneric_iterators.cpp index d8a073fc11a5085..fe37abd08fa7bbd 100644 --- a/be/src/vec/olap/vgeneric_iterators.cpp +++ b/be/src/vec/olap/vgeneric_iterators.cpp @@ -50,9 +50,8 @@ Status VStatisticsIterator::init(const StorageReadOptions& opts) { auto cid = _schema.column_id(i); auto unique_id = _schema.column(cid)->unique_id(); if (_column_iterators_map.count(unique_id) < 1) { - RETURN_IF_ERROR(_segment->new_column_iterator(opts.tablet_schema->column(cid), - &_column_iterators_map[unique_id], - nullptr)); + RETURN_IF_ERROR(_segment->new_column_iterator( + opts.tablet_schema->column(cid), &_column_iterators_map[unique_id], &opts)); } _column_iterators.push_back(_column_iterators_map[unique_id].get()); } From 8b36f79ce779d960fbe00dd48fcff1a83e7016d9 Mon Sep 17 00:00:00 2001 From: Dongyang Li Date: Fri, 20 Dec 2024 14:20:23 +0800 Subject: [PATCH 26/55] [ci](perf) add back required of performance check (#45694) --- .asf.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.asf.yaml b/.asf.yaml index e3d516b35c19a5d..7a7d845e4c9bb09 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -63,6 +63,7 @@ github: - COMPILE (DORIS_COMPILE) - Need_2_Approval - Cloud UT (Doris Cloud UT) + - performance (Doris Performance) required_pull_request_reviews: dismiss_stale_reviews: true From db3aff97c7899fa7eca54291e27fd9f33d016274 Mon Sep 17 00:00:00 2001 From: starocean999 Date: Fri, 20 Dec 2024 14:26:01 +0800 Subject: [PATCH 27/55] [fix](nereids)use equals instead of == to compare String (#45628) when comparing column name, we should use equals method instead of '==' to compare the content --- .../org/apache/doris/nereids/rules/analysis/BindRelation.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java index 583244f09028969..d494f90c9cb804d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java @@ -257,7 +257,7 @@ private LogicalPlan preAggForRandomDistribution(LogicalOlapScan olapScan) { SlotReference slot = SlotReference.fromColumn(olapTable, col, col.getName(), olapScan.qualified()); ExprId exprId = slot.getExprId(); for (Slot childSlot : childOutputSlots) { - if (childSlot instanceof SlotReference && ((SlotReference) childSlot).getName() == col.getName()) { + if (childSlot instanceof SlotReference && ((SlotReference) childSlot).getName().equals(col.getName())) { exprId = childSlot.getExprId(); slot = slot.withExprId(exprId); break; From e81ecb5fd475de0e02dd7d5c16cbf4db040f61d9 Mon Sep 17 00:00:00 2001 From: minghong Date: Fri, 20 Dec 2024 15:08:02 +0800 Subject: [PATCH 28/55] [fix](nereids) support one phase DeferMaterializeTopN (#45693) ### What problem does this PR solve? make DeferMaterializeTopN support one phase sort. --- ...izeTopNToPhysicalDeferMaterializeTopN.java | 9 +++- .../defer_materialize_topn/one_phase.out | 7 +++ .../cte/test_cte_name_reuse.groovy | 2 +- .../defer_materialize_topn/one_phase.groovy | 50 +++++++++++++++++++ 4 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 regression-test/data/nereids_rules_p0/defer_materialize_topn/one_phase.out create mode 100644 regression-test/suites/nereids_rules_p0/defer_materialize_topn/one_phase.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalDeferMaterializeTopNToPhysicalDeferMaterializeTopN.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalDeferMaterializeTopNToPhysicalDeferMaterializeTopN.java index 9ad6b73d1c85cf7..2799ca30147bc7c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalDeferMaterializeTopNToPhysicalDeferMaterializeTopN.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalDeferMaterializeTopNToPhysicalDeferMaterializeTopN.java @@ -20,6 +20,7 @@ import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.SortPhase; import org.apache.doris.nereids.trees.plans.logical.LogicalDeferMaterializeTopN; import org.apache.doris.nereids.trees.plans.physical.PhysicalDeferMaterializeTopN; import org.apache.doris.nereids.trees.plans.physical.PhysicalTopN; @@ -38,8 +39,12 @@ public Rule build() { .build() .transform(topN.getLogicalTopN(), ctx.cascadesContext) .get(0); - return wrap(physicalTopN, topN, wrap((PhysicalTopN) physicalTopN.child(), topN, - ((PhysicalTopN) physicalTopN.child()).child())); + if (physicalTopN.getSortPhase() == SortPhase.MERGE_SORT) { + return wrap(physicalTopN, topN, wrap((PhysicalTopN) physicalTopN.child(), topN, + ((PhysicalTopN) physicalTopN.child()).child())); + } else { + return wrap(physicalTopN, topN, physicalTopN.child()); + } }).toRule(RuleType.LOGICAL_DEFER_MATERIALIZE_TOP_N_TO_PHYSICAL_DEFER_MATERIALIZE_TOP_N_RULE); } diff --git a/regression-test/data/nereids_rules_p0/defer_materialize_topn/one_phase.out b/regression-test/data/nereids_rules_p0/defer_materialize_topn/one_phase.out new file mode 100644 index 000000000000000..cb3f8c9b7ccdf47 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/defer_materialize_topn/one_phase.out @@ -0,0 +1,7 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !1 -- +11113 + +-- !2 -- +11113 + diff --git a/regression-test/suites/nereids_rules_p0/cte/test_cte_name_reuse.groovy b/regression-test/suites/nereids_rules_p0/cte/test_cte_name_reuse.groovy index 5d472716f00b733..0f943db5b311d7f 100644 --- a/regression-test/suites/nereids_rules_p0/cte/test_cte_name_reuse.groovy +++ b/regression-test/suites/nereids_rules_p0/cte/test_cte_name_reuse.groovy @@ -14,7 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -suite("test_cte_name_reuse)") { +suite("test_cte_name_reuse") { sql "SET enable_nereids_planner=true" sql "SET enable_pipeline_engine=true" sql "SET enable_fallback_to_original_planner=false" diff --git a/regression-test/suites/nereids_rules_p0/defer_materialize_topn/one_phase.groovy b/regression-test/suites/nereids_rules_p0/defer_materialize_topn/one_phase.groovy new file mode 100644 index 000000000000000..c7b106eff2a289b --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/defer_materialize_topn/one_phase.groovy @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +suite("one_phase") { + sql """ + drop table if exists users; + + CREATE TABLE `users` ( + `UserID` bigint NULL + ) ENGINE=OLAP + DUPLICATE KEY(`UserID`) + DISTRIBUTED BY HASH(`UserID`) BUCKETS 48 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "min_load_replica_num" = "-1", + "is_being_synced" = "false", + "storage_medium" = "hdd", + "storage_format" = "V2", + "inverted_index_storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false", + "group_commit_interval_ms" = "10000", + "group_commit_data_bytes" = "134217728" + ); + + insert into users values (11111),(11112),(11113); + + """ + + sql "set sort_phase_num=1;" + qt_1 "select userid from users order by userid limit 2, 109000000;" + + sql "set sort_phase_num=2;" + qt_2 "select userid from users order by userid limit 2, 109000000;" + +} \ No newline at end of file From c87180a3e430df3f43cf734e88b04ab1aa55b3cd Mon Sep 17 00:00:00 2001 From: hui lai Date: Fri, 20 Dec 2024 15:27:46 +0800 Subject: [PATCH 29/55] [fix](cloud) fix abort transaction in runningTxns list when show routine load (#45629) There are some abort transaction in running transaction list when execute `Show routine load`: ``` statistic: {"receivedBytes":690347731,"runningTxns":[84983868539904,85435786230784,85005343163392,85437129268225,85454778056704,85435116123136,85025611246592,85437060583424,85434241746944,85415318736896,85465045433344,84985143969794,85004337471488,85415183878144,85415385197568,85424109151232,85004808868865,85005412474880,85025545732096,85414981022720,84984677082113,85436924459012],"errorRows":0,"committedTaskNum":211,"loadedRows":3612290,"loadRowsRate":195026,"abortedTaskNum":1,"errorRowsAfterResumed":0,"totalRows":3612290,"unselectedRows":0,"receivedBytesRate":37271770,"taskExecuteTimeMs":18522} ``` When abort transaction to meta service, transaction info in `abortTxnResponse `would be default value when abort transaction failed.Then this logic will invalid for transaction id is default value: ``` this.jobStatistic.runningTxnIds.remove(txnState.getTransactionId()); ``` --- .../doris/cloud/transaction/CloudGlobalTransactionMgr.java | 1 + 1 file changed, 1 insertion(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java index 11a3f05ead70c4c..b9425245f421b22 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java @@ -1288,6 +1288,7 @@ private void handleAfterAbort(AbortTxnResponse abortTxnResponse, TxnCommitAttach if (txnCommitAttachment != null && txnCommitAttachment instanceof RLTaskTxnCommitAttachment) { RLTaskTxnCommitAttachment rlTaskTxnCommitAttachment = (RLTaskTxnCommitAttachment) txnCommitAttachment; callbackId = rlTaskTxnCommitAttachment.getJobId(); + txnState.setTransactionId(transactionId); } cb = callbackFactory.getCallback(callbackId); From fef962aea6cc38c59e548faa87f8c70ca5552873 Mon Sep 17 00:00:00 2001 From: hui lai Date: Fri, 20 Dec 2024 15:29:21 +0800 Subject: [PATCH 30/55] [fix](routine load) make routine load delay eof schedule work (#45528) --- .../load/routineload/KafkaRoutineLoadJob.java | 2 +- .../doris/load/routineload/KafkaTaskInfo.java | 9 +- .../load/routineload/RoutineLoadTaskInfo.java | 9 +- .../routineload/KafkaRoutineLoadJobTest.java | 2 +- .../RoutineLoadTaskSchedulerTest.java | 2 +- .../transaction/GlobalTransactionMgrTest.java | 4 +- .../load_p0/routine_load/data/test_eof.csv | 1 + .../routine_load/test_routine_load_eof.groovy | 178 ++++++++++++++++++ 8 files changed, 195 insertions(+), 12 deletions(-) create mode 100644 regression-test/suites/load_p0/routine_load/data/test_eof.csv create mode 100644 regression-test/suites/load_p0/routine_load/test_routine_load_eof.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java index 6bdef3301a610e0..d0843eb92044f0c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java @@ -235,7 +235,7 @@ public void divideRoutineLoadJob(int currentConcurrentTaskNum) throws UserExcept } KafkaTaskInfo kafkaTaskInfo = new KafkaTaskInfo(UUID.randomUUID(), id, maxBatchIntervalS * Config.routine_load_task_timeout_multiplier * 1000, - taskKafkaProgress, isMultiTable()); + taskKafkaProgress, isMultiTable(), -1, false); routineLoadTaskInfoList.add(kafkaTaskInfo); result.add(kafkaTaskInfo); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaTaskInfo.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaTaskInfo.java index f1578269529a126..e3292dc671f8b4d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaTaskInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaTaskInfo.java @@ -49,16 +49,17 @@ public class KafkaTaskInfo extends RoutineLoadTaskInfo { private Map partitionIdToOffset; public KafkaTaskInfo(UUID id, long jobId, - long timeoutMs, Map partitionIdToOffset, boolean isMultiTable) { - super(id, jobId, timeoutMs, isMultiTable); + long timeoutMs, Map partitionIdToOffset, boolean isMultiTable, + long lastScheduledTime, boolean isEof) { + super(id, jobId, timeoutMs, isMultiTable, lastScheduledTime, isEof); this.partitionIdToOffset = partitionIdToOffset; } public KafkaTaskInfo(KafkaTaskInfo kafkaTaskInfo, Map partitionIdToOffset, boolean isMultiTable) { super(UUID.randomUUID(), kafkaTaskInfo.getJobId(), - kafkaTaskInfo.getTimeoutMs(), kafkaTaskInfo.getBeId(), isMultiTable); + kafkaTaskInfo.getTimeoutMs(), kafkaTaskInfo.getBeId(), isMultiTable, + kafkaTaskInfo.getLastScheduledTime(), kafkaTaskInfo.getIsEof()); this.partitionIdToOffset = partitionIdToOffset; - this.isEof = kafkaTaskInfo.getIsEof(); } public List getPartitions() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadTaskInfo.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadTaskInfo.java index 1ff825d97b9d179..5075311299d6035 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadTaskInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadTaskInfo.java @@ -79,17 +79,20 @@ public abstract class RoutineLoadTaskInfo { // so that user or other logic can know the status of the corresponding txn. protected TransactionStatus txnStatus = TransactionStatus.UNKNOWN; - public RoutineLoadTaskInfo(UUID id, long jobId, long timeoutMs, boolean isMultiTable) { + public RoutineLoadTaskInfo(UUID id, long jobId, long timeoutMs, boolean isMultiTable, + long lastScheduledTime, boolean isEof) { this.id = id; this.jobId = jobId; this.createTimeMs = System.currentTimeMillis(); this.timeoutMs = timeoutMs; this.isMultiTable = isMultiTable; + this.lastScheduledTime = lastScheduledTime; + this.isEof = isEof; } public RoutineLoadTaskInfo(UUID id, long jobId, long timeoutMs, long previousBeId, - boolean isMultiTable) { - this(id, jobId, timeoutMs, isMultiTable); + boolean isMultiTable, long lastScheduledTime, boolean isEof) { + this(id, jobId, timeoutMs, isMultiTable, lastScheduledTime, isEof); this.previousBeId = previousBeId; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/KafkaRoutineLoadJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/KafkaRoutineLoadJobTest.java index 20cb626ff370550..63452a5d59ca114 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/KafkaRoutineLoadJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/KafkaRoutineLoadJobTest.java @@ -225,7 +225,7 @@ public void testProcessTimeOutTasks(@Injectable GlobalTransactionMgr globalTrans Map partitionIdsToOffset = Maps.newHashMap(); partitionIdsToOffset.put(100, 0L); KafkaTaskInfo kafkaTaskInfo = new KafkaTaskInfo(new UUID(1, 1), 1L, - maxBatchIntervalS * 2 * 1000, partitionIdsToOffset, false); + maxBatchIntervalS * 2 * 1000, partitionIdsToOffset, false, -1, false); kafkaTaskInfo.setExecuteStartTimeMs(System.currentTimeMillis() - maxBatchIntervalS * 2 * 1000 - 1); routineLoadTaskInfoList.add(kafkaTaskInfo); diff --git a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadTaskSchedulerTest.java b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadTaskSchedulerTest.java index 95c2423de71fa94..6e11fc5f71a5c5d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadTaskSchedulerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadTaskSchedulerTest.java @@ -70,7 +70,7 @@ public void testRunOneCycle(@Injectable KafkaRoutineLoadJob kafkaRoutineLoadJob1 LinkedBlockingDeque routineLoadTaskInfoQueue = new LinkedBlockingDeque<>(); KafkaTaskInfo routineLoadTaskInfo1 = new KafkaTaskInfo(new UUID(1, 1), 1L, 20000, - partitionIdToOffset, false); + partitionIdToOffset, false, -1, false); routineLoadTaskInfoQueue.addFirst(routineLoadTaskInfo1); Map idToRoutineLoadTask = Maps.newHashMap(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/transaction/GlobalTransactionMgrTest.java b/fe/fe-core/src/test/java/org/apache/doris/transaction/GlobalTransactionMgrTest.java index 420800a4bb3bd01..c4ec468c651856f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/transaction/GlobalTransactionMgrTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/transaction/GlobalTransactionMgrTest.java @@ -302,7 +302,7 @@ public void testCommitRoutineLoadTransaction(@Injectable TabletCommitInfo tablet Map partitionIdToOffset = Maps.newHashMap(); partitionIdToOffset.put(1, 0L); KafkaTaskInfo routineLoadTaskInfo = new KafkaTaskInfo(UUID.randomUUID(), 1L, 20000, - partitionIdToOffset, false); + partitionIdToOffset, false, -1, false); Deencapsulation.setField(routineLoadTaskInfo, "txnId", 1L); routineLoadTaskInfoList.add(routineLoadTaskInfo); TransactionState transactionState = new TransactionState(1L, Lists.newArrayList(1L), 1L, "label", null, @@ -368,7 +368,7 @@ public void testCommitRoutineLoadTransactionWithErrorMax(@Injectable TabletCommi Map partitionIdToOffset = Maps.newHashMap(); partitionIdToOffset.put(1, 0L); KafkaTaskInfo routineLoadTaskInfo = new KafkaTaskInfo(UUID.randomUUID(), 1L, 20000, - partitionIdToOffset, false); + partitionIdToOffset, false, -1, false); Deencapsulation.setField(routineLoadTaskInfo, "txnId", 1L); routineLoadTaskInfoList.add(routineLoadTaskInfo); TransactionState transactionState = new TransactionState(1L, Lists.newArrayList(1L), 1L, "label", null, diff --git a/regression-test/suites/load_p0/routine_load/data/test_eof.csv b/regression-test/suites/load_p0/routine_load/data/test_eof.csv new file mode 100644 index 000000000000000..bc857cabcfdb5c6 --- /dev/null +++ b/regression-test/suites/load_p0/routine_load/data/test_eof.csv @@ -0,0 +1 @@ +57|2023-08-19|TRUE|2|-25462|-74112029|6458082754318544493|-7910671781690629051|-15205.859375|-306870797.484914|759730669.0|-628556336.0|2023-07-10 18:39:10|2023-02-12|2023-01-27 07:26:06|y||Xi9nDVrLv8m6AwEpUxmtzFAuK48sQ|{"name": "John", "age": 25, "city": "New York"} \ No newline at end of file diff --git a/regression-test/suites/load_p0/routine_load/test_routine_load_eof.groovy b/regression-test/suites/load_p0/routine_load/test_routine_load_eof.groovy new file mode 100644 index 000000000000000..6eeb9a4e51c7b47 --- /dev/null +++ b/regression-test/suites/load_p0/routine_load/test_routine_load_eof.groovy @@ -0,0 +1,178 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.apache.kafka.clients.admin.AdminClient +import org.apache.kafka.clients.producer.KafkaProducer +import org.apache.kafka.clients.producer.ProducerRecord +import org.apache.kafka.clients.producer.ProducerConfig + +suite("test_routine_load_eof","p0") { + def kafkaCsvTpoics = [ + "test_eof", + ] + + String enabled = context.config.otherConfigs.get("enableKafkaTest") + String kafka_port = context.config.otherConfigs.get("kafka_port") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + def kafka_broker = "${externalEnvIp}:${kafka_port}" + + if (enabled != null && enabled.equalsIgnoreCase("true")) { + def thread = Thread.start { + // define kafka + def props = new Properties() + props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "${kafka_broker}".toString()) + props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") + props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") + // Create kafka producer + def producer = new KafkaProducer<>(props) + + while(true) { + Thread.sleep(1000) + for (String kafkaCsvTopic in kafkaCsvTpoics) { + def txt = new File("""${context.file.parent}/data/${kafkaCsvTopic}.csv""").text + def lines = txt.readLines() + lines.each { line -> + logger.info("=====${line}========") + def record = new ProducerRecord<>(kafkaCsvTopic, null, line) + producer.send(record) + } + } + } + } + + sleep(2 * 1000) + + def jobName = "testEof" + def tableName = "test_routine_load_eof" + try { + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} + ( + k00 INT NOT NULL, + k01 DATE NOT NULL, + k02 BOOLEAN NULL, + k03 TINYINT NULL, + k04 SMALLINT NULL, + k05 INT NULL, + k06 BIGINT NULL, + k07 LARGEINT NULL, + k08 FLOAT NULL, + k09 DOUBLE NULL, + k10 DECIMAL(9,1) NULL, + k11 DECIMALV3(9,1) NULL, + k12 DATETIME NULL, + k13 DATEV2 NULL, + k14 DATETIMEV2 NULL, + k15 CHAR NULL, + k16 VARCHAR NULL, + k17 STRING NULL, + k18 JSON NULL, + kd01 BOOLEAN NOT NULL DEFAULT "TRUE", + kd02 TINYINT NOT NULL DEFAULT "1", + kd03 SMALLINT NOT NULL DEFAULT "2", + kd04 INT NOT NULL DEFAULT "3", + kd05 BIGINT NOT NULL DEFAULT "4", + kd06 LARGEINT NOT NULL DEFAULT "5", + kd07 FLOAT NOT NULL DEFAULT "6.0", + kd08 DOUBLE NOT NULL DEFAULT "7.0", + kd09 DECIMAL NOT NULL DEFAULT "888888888", + kd10 DECIMALV3 NOT NULL DEFAULT "999999999", + kd11 DATE NOT NULL DEFAULT "2023-08-24", + kd12 DATETIME NOT NULL DEFAULT "2023-08-24 12:00:00", + kd13 DATEV2 NOT NULL DEFAULT "2023-08-24", + kd14 DATETIMEV2 NOT NULL DEFAULT "2023-08-24 12:00:00", + kd15 CHAR(255) NOT NULL DEFAULT "我能吞下玻璃而不伤身体", + kd16 VARCHAR(300) NOT NULL DEFAULT "我能吞下玻璃而不伤身体", + kd17 STRING NOT NULL DEFAULT "我能吞下玻璃而不伤身体", + kd18 JSON NULL, + + INDEX idx_inverted_k104 (`k05`) USING INVERTED, + INDEX idx_inverted_k110 (`k11`) USING INVERTED, + INDEX idx_inverted_k113 (`k13`) USING INVERTED, + INDEX idx_inverted_k114 (`k14`) USING INVERTED, + INDEX idx_inverted_k117 (`k17`) USING INVERTED PROPERTIES("parser" = "english"), + INDEX idx_ngrambf_k115 (`k15`) USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="256"), + INDEX idx_ngrambf_k116 (`k16`) USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="256"), + INDEX idx_ngrambf_k117 (`k17`) USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="256"), + + INDEX idx_bitmap_k104 (`k02`) USING BITMAP, + INDEX idx_bitmap_k110 (`kd01`) USING BITMAP + + ) + DUPLICATE KEY(k00) + PARTITION BY RANGE(k01) + ( + PARTITION p1 VALUES [('2023-08-01'), ('2023-08-11')), + PARTITION p2 VALUES [('2023-08-11'), ('2023-08-21')), + PARTITION p3 VALUES [('2023-08-21'), ('2023-09-01')) + ) + DISTRIBUTED BY HASH(k00) BUCKETS 32 + PROPERTIES ( + "bloom_filter_columns"="k05", + "replication_num" = "1" + ); + """ + sql "sync" + + sql """ + CREATE ROUTINE LOAD ${jobName} on ${tableName} + COLUMNS(k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18), + COLUMNS TERMINATED BY "|" + PROPERTIES + ( + "max_batch_interval" = "5", + "max_batch_rows" = "300000", + "max_batch_size" = "209715200" + ) + FROM KAFKA + ( + "kafka_broker_list" = "${externalEnvIp}:${kafka_port}", + "kafka_topic" = "test_eof", + "property.kafka_default_offsets" = "OFFSET_BEGINNING" + ); + """ + sql "sync" + + def count = 0 + while (true) { + sleep(1000) + def res = sql "show routine load for ${jobName}" + def state = res[0][8].toString() + if (state != "RUNNING") { + count++ + if (count > 60) { + assertEquals(1, 2) + } + continue; + } + break; + } + sleep(60 * 1000) + def res = sql "show routine load for ${jobName}" + def statistic = res[0][14].toString() + def json = parseJson(res[0][14]) + log.info("routine load statistic: ${res[0][14].toString()}".toString()) + if (json.committedTaskNum > 20) { + assertEquals(1, 2) + } + } finally { + sql "stop routine load for ${jobName}" + sql "DROP TABLE IF EXISTS ${tableName}" + } + thread.interrupt() + } +} \ No newline at end of file From fc512ab97715f3882d848f62e577f03383976ffd Mon Sep 17 00:00:00 2001 From: zhangdong Date: Fri, 20 Dec 2024 15:30:03 +0800 Subject: [PATCH 31/55] [enhance](mtmv)Change the way to verify the existence of partition names when refreshing MTMV (#45290) ### What problem does this PR solve? Previously, when refreshing the materialized view according to the partition name, the existing partition of the materialized view was used to verify whether the partition name existed After the change, the partition that should be present after refreshing the materialized view is used for verification --- .../plans/commands/info/RefreshMTMVInfo.java | 39 +++++++++- .../test_refresh_partition_name_mtmv.groovy | 76 +++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 regression-test/suites/mtmv_p0/test_refresh_partition_name_mtmv.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/RefreshMTMVInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/RefreshMTMVInfo.java index bf483f87a152c39..94e9243306989aa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/RefreshMTMVInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/RefreshMTMVInfo.java @@ -17,23 +17,33 @@ package org.apache.doris.nereids.trees.plans.commands.info; +import org.apache.doris.analysis.AllPartitionDesc; +import org.apache.doris.analysis.SinglePartitionDesc; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.MTMV; +import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.TableIf.TableType; import org.apache.doris.common.DdlException; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.MetaNotFoundException; +import org.apache.doris.common.util.MetaLockUtils; +import org.apache.doris.mtmv.MTMVPartitionInfo.MTMVPartitionType; import org.apache.doris.mtmv.MTMVPartitionUtil; +import org.apache.doris.mtmv.MTMVRelatedTableIf; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.util.Utils; import org.apache.doris.qe.ConnectContext; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import org.apache.commons.collections.CollectionUtils; +import java.util.Comparator; import java.util.List; import java.util.Objects; +import java.util.Set; /** * refresh mtmv info @@ -67,13 +77,40 @@ public void analyze(ConnectContext ctx) { Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(mvName.getDb()); MTMV mtmv = (MTMV) db.getTableOrMetaException(mvName.getTbl(), TableType.MATERIALIZED_VIEW); if (!CollectionUtils.isEmpty(partitions)) { - MTMVPartitionUtil.getPartitionsIdsByNames(mtmv, partitions); + checkPartitionExist(mtmv); } } catch (org.apache.doris.common.AnalysisException | MetaNotFoundException | DdlException e) { throw new AnalysisException(e.getMessage()); } } + private void checkPartitionExist(MTMV mtmv) throws org.apache.doris.common.AnalysisException { + MTMVRelatedTableIf relatedTable = mtmv.getMvPartitionInfo().getRelatedTable(); + List tables = Lists.newArrayList(mtmv, relatedTable); + tables.sort(Comparator.comparing(TableIf::getId)); + MetaLockUtils.readLockTables(tables); + try { + if (mtmv.getMvPartitionInfo().getPartitionType().equals(MTMVPartitionType.SELF_MANAGE)) { + throw new AnalysisException( + "The partition method of this asynchronous materialized view " + + "does not support refreshing by partition"); + } + List partitionDescs = MTMVPartitionUtil.getPartitionDescsByRelatedTable( + mtmv.getTableProperty().getProperties(), mtmv.getMvPartitionInfo(), mtmv.getMvProperties()); + Set shouldExistPartitionNames = Sets.newHashSetWithExpectedSize(partitionDescs.size()); + partitionDescs.stream().forEach(desc -> { + shouldExistPartitionNames.add(((SinglePartitionDesc) desc).getPartitionName()); + }); + for (String partition : partitions) { + if (!shouldExistPartitionNames.contains(partition)) { + throw new org.apache.doris.common.AnalysisException("partition not exist: " + partition); + } + } + } finally { + MetaLockUtils.readUnlockTables(tables); + } + } + /** * getMvName * diff --git a/regression-test/suites/mtmv_p0/test_refresh_partition_name_mtmv.groovy b/regression-test/suites/mtmv_p0/test_refresh_partition_name_mtmv.groovy new file mode 100644 index 000000000000000..a18945339838d98 --- /dev/null +++ b/regression-test/suites/mtmv_p0/test_refresh_partition_name_mtmv.groovy @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.Assert; + +suite("test_refresh_partition_name_mtmv","mtmv") { + String suiteName = "test_refresh_partition_name_mtmv" + String tableName = "${suiteName}_table" + String mvName = "${suiteName}_mv" + sql """drop table if exists `${tableName}`""" + sql """drop materialized view if exists ${mvName};""" + + sql """ + CREATE TABLE ${tableName} + ( + k2 TINYINT, + k3 INT not null + ) + COMMENT "my first table" + PARTITION BY LIST(`k3`) + ( + PARTITION `p1` VALUES IN ('1'), + PARTITION `p2` VALUES IN ('2'), + PARTITION `p3` VALUES IN ('3') + ) + DISTRIBUTED BY HASH(k2) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by(`k3`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ( + 'replication_num' = '1', + 'refresh_partition_num' = '2' + ) + AS + SELECT * from ${tableName}; + """ + + test { + sql """ + REFRESH MATERIALIZED VIEW ${mvName} partitions(p_4) + """ + exception "partition not exist" + } + + sql """ + alter table ${tableName} add PARTITION `p4` VALUES IN ('4') + """ + sql """ + REFRESH MATERIALIZED VIEW ${mvName} partitions(p_4) + """ + + waitingMTMVTaskFinishedByMvName(mvName) + + sql """drop table if exists `${tableName}`""" + sql """drop materialized view if exists ${mvName};""" +} From 9bde47c7ab39f3de18b1e47c08f39c2fd820e9eb Mon Sep 17 00:00:00 2001 From: KassieZ <139741991+KassieZ@users.noreply.github.com> Date: Fri, 20 Dec 2024 15:57:52 +0800 Subject: [PATCH 32/55] [docs](readme) Update README (#43283) ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: Update README about v3.X release ### Check List (For Committer) - Test - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No colde files have been changed. - [ ] Other reason - Behavior changed: - [ ] No. - [ ] Yes. - Does this need documentation? - [ ] No. - [ ] Yes. - Release note None ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label --------- Co-authored-by: Jeffrey --- README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 94f9f4b777f8f50..3d264ee13ed8ad3 100644 --- a/README.md +++ b/README.md @@ -59,12 +59,9 @@ Apache Doris is an easy-to-use, high-performance and real-time analytical databa All this makes Apache Doris an ideal tool for scenarios including report analysis, ad-hoc query, unified data warehouse, and data lake query acceleration. On Apache Doris, users can build various applications, such as user behavior analysis, AB test platform, log retrieval analysis, user portrait analysis, and order analysis. -🎉 Version 2.1.4 released now. Check out the 🔗[Release Notes](https://doris.apache.org/docs/releasenotes/release-2.1.4) here. The 2.1 verison delivers exceptional performance with 100% higher out-of-the-box queries proven by TPC-DS 1TB tests, enhanced data lake analytics that are 4-6 times speedier than Trino and Spark, solid support for semi-structured data analysis with new Variant types and suite of analytical functions, asynchronous materialized views for query acceleration, optimized real-time writing at scale, and better workload management with stability and runtime SQL resource tracking. +🎉 Check out the 🔗[All releases](https://doris.apache.org/docs/releasenotes/all-release), where you'll find a chronological summary of Apache Doris versions released over the past year. - -🎉 Version 2.0.12 is now released ! This fully evolved and stable release is ready for all users to upgrade. Check out the 🔗[Release Notes](https://doris.apache.org/docs/2.0/releasenotes/release-2.0.12) here. - -👀 Have a look at the 🔗[Official Website](https://doris.apache.org/) for a comprehensive list of Apache Doris's core features, blogs and user cases. +👀 Explore the 🔗[Official Website](https://doris.apache.org/) to discover Apache Doris's core features, blogs, and user cases in detail. ## 📈 Usage Scenarios From 62a6360a9881743a501d7e5a74063abebadc14a8 Mon Sep 17 00:00:00 2001 From: lihangyu Date: Fri, 20 Dec 2024 16:33:15 +0800 Subject: [PATCH 33/55] [Optimize](Variant) optimize schema update performance (#45480) When update schema with high concurrency, updaing schemas cost is expensive. 1. update schema only when rows is not 0 2. copy_from is expensive, use copy constructor --- .../olap/rowset/segment_v2/segment_writer.cpp | 4 +++- be/src/olap/rowset_builder.cpp | 24 ++++++++++--------- be/src/olap/tablet_schema.cpp | 15 ++++++++++++ be/src/olap/tablet_schema.h | 3 +++ be/src/vec/common/schema_util.cpp | 5 ++-- 5 files changed, 36 insertions(+), 15 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index fe465f98a2aad2e..2457a44de39e107 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -363,7 +363,9 @@ Status SegmentWriter::append_block_with_variant_subcolumns(vectorized::Block& da continue; } if (_flush_schema == nullptr) { - _flush_schema = std::make_shared(*_tablet_schema); + _flush_schema = std::make_shared(); + // deep copy + _flush_schema->copy_from(*_tablet_schema); } auto column_ref = data.get_by_position(i).column; const vectorized::ColumnObject& object_column = assert_cast( diff --git a/be/src/olap/rowset_builder.cpp b/be/src/olap/rowset_builder.cpp index ec7463d5b9d75d1..ccc006e1f040a6b 100644 --- a/be/src/olap/rowset_builder.cpp +++ b/be/src/olap/rowset_builder.cpp @@ -346,21 +346,22 @@ Status RowsetBuilder::commit_txn() { SCOPED_TIMER(_commit_txn_timer); const RowsetWriterContext& rw_ctx = _rowset_writer->context(); - if (rw_ctx.tablet_schema->num_variant_columns() > 0) { + if (rw_ctx.tablet_schema->num_variant_columns() > 0 && _rowset->num_rows() > 0) { // Need to merge schema with `rw_ctx.merged_tablet_schema` in prior, // merged schema keeps the newest merged schema for the rowset, which is updated and merged // during flushing segments. if (rw_ctx.merged_tablet_schema != nullptr) { RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.merged_tablet_schema)); + } else { + // We should merge rowset schema further, in case that the merged_tablet_schema maybe null + // when enable_memtable_on_sink_node is true, the merged_tablet_schema will not be passed to + // the destination backend. + // update tablet schema when meet variant columns, before commit_txn + // Eg. rowset schema: A(int), B(float), C(int), D(int) + // _tabelt->tablet_schema: A(bigint), B(double) + // => update_schema: A(bigint), B(double), C(int), D(int) + RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.tablet_schema)); } - // We should merge rowset schema further, in case that the merged_tablet_schema maybe null - // when enable_memtable_on_sink_node is true, the merged_tablet_schema will not be passed to - // the destination backend. - // update tablet schema when meet variant columns, before commit_txn - // Eg. rowset schema: A(int), B(float), C(int), D(int) - // _tabelt->tablet_schema: A(bigint), B(double) - // => update_schema: A(bigint), B(double), C(int), D(int) - RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.tablet_schema)); } // Transfer ownership of `PendingRowsetGuard` to `TxnManager` @@ -398,7 +399,6 @@ Status BaseRowsetBuilder::cancel() { void BaseRowsetBuilder::_build_current_tablet_schema(int64_t index_id, const OlapTableSchemaParam* table_schema_param, const TabletSchema& ori_tablet_schema) { - _tablet_schema->copy_from(ori_tablet_schema); // find the right index id int i = 0; auto indexes = table_schema_param->indexes(); @@ -407,11 +407,13 @@ void BaseRowsetBuilder::_build_current_tablet_schema(int64_t index_id, break; } } - if (!indexes.empty() && !indexes[i]->columns.empty() && indexes[i]->columns[0]->unique_id() >= 0) { + _tablet_schema->shawdow_copy_without_columns(ori_tablet_schema); _tablet_schema->build_current_tablet_schema(index_id, table_schema_param->version(), indexes[i], ori_tablet_schema); + } else { + _tablet_schema->copy_from(ori_tablet_schema); } if (_tablet_schema->schema_version() > ori_tablet_schema.schema_version()) { // After schema change, should include extracted column diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 3ec5d22166477f9..7b6b5f313c144ee 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -1064,6 +1064,21 @@ void TabletSchema::copy_from(const TabletSchema& tablet_schema) { _table_id = tablet_schema.table_id(); } +void TabletSchema::shawdow_copy_without_columns(const TabletSchema& tablet_schema) { + *this = tablet_schema; + _field_path_to_index.clear(); + _field_name_to_index.clear(); + _field_id_to_index.clear(); + _num_columns = 0; + _num_variant_columns = 0; + _num_null_columns = 0; + _num_key_columns = 0; + _cols.clear(); + _vl_field_mem_size = 0; + // notice : do not ref columns + _column_cache_handlers.clear(); +} + void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) { for (auto& col : _cols) { if (col->unique_id() < 0) { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index c813d6f0ef8722b..3dfe055fbf4a89d 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -330,6 +330,8 @@ class TabletSchema : public MetadataAdder { // Must make sure the row column is always the last column void add_row_column(); void copy_from(const TabletSchema& tablet_schema); + // lightweight copy, take care of lifecycle of TabletColumn + void shawdow_copy_without_columns(const TabletSchema& tablet_schema); void update_index_info_from(const TabletSchema& tablet_schema); std::string to_key() const; // get_metadata_size is only the memory of the TabletSchema itself, not include child objects. @@ -531,6 +533,7 @@ class TabletSchema : public MetadataAdder { private: friend bool operator==(const TabletSchema& a, const TabletSchema& b); friend bool operator!=(const TabletSchema& a, const TabletSchema& b); + TabletSchema(const TabletSchema&) = default; void clear_column_cache_handlers(); diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 2b1c71c643d6135..2b53fc2470290b0 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -415,9 +415,8 @@ Status get_least_common_schema(const std::vector& schemas, // duplicated paths following the update_least_common_schema process. auto build_schema_without_extracted_columns = [&](const TabletSchemaSPtr& base_schema) { output_schema = std::make_shared(); - output_schema->copy_from(*base_schema); - // Merge columns from other schemas - output_schema->clear_columns(); + // not copy columns but only shadow copy other attributes + output_schema->shawdow_copy_without_columns(*base_schema); // Get all columns without extracted columns and collect variant col unique id for (const TabletColumnPtr& col : base_schema->columns()) { if (col->is_variant_type()) { From af11693f48f7406438471a79718b67b3f8851731 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Fri, 20 Dec 2024 20:27:18 +0800 Subject: [PATCH 34/55] [opt](profile) add index page profile for io (#45675) --- be/src/olap/base_tablet.cpp | 6 +- be/src/olap/delete_bitmap_calculator.cpp | 5 +- be/src/olap/primary_key_index.cpp | 12 ++-- be/src/olap/primary_key_index.h | 12 ++-- .../segment_v2/bloom_filter_index_reader.cpp | 15 ++--- .../segment_v2/bloom_filter_index_reader.h | 12 ++-- .../olap/rowset/segment_v2/column_reader.cpp | 62 +++++++++++-------- be/src/olap/rowset/segment_v2/column_reader.h | 26 +++++--- .../segment_v2/indexed_column_reader.cpp | 11 ++-- .../rowset/segment_v2/indexed_column_reader.h | 4 +- .../rowset/segment_v2/ordinal_page_index.cpp | 16 +++-- .../rowset/segment_v2/ordinal_page_index.h | 5 +- be/src/olap/rowset/segment_v2/segment.cpp | 37 ++++++----- be/src/olap/rowset/segment_v2/segment.h | 12 ++-- .../rowset/segment_v2/segment_iterator.cpp | 2 +- .../olap/rowset/segment_v2/zone_map_index.cpp | 15 +++-- .../olap/rowset/segment_v2/zone_map_index.h | 6 +- be/test/olap/date_bloom_filter_test.cpp | 8 +-- be/test/olap/primary_key_index_test.cpp | 20 +++--- .../bloom_filter_index_reader_writer_test.cpp | 4 +- .../segment_v2/ordinal_page_index_test.cpp | 4 +- be/test/olap/segment_cache_test.cpp | 2 +- 22 files changed, 164 insertions(+), 132 deletions(-) diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index 33275a2663b329e..a4720f89d19be67 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -499,7 +499,7 @@ Status BaseTablet::lookup_row_key(const Slice& encoded_key, TabletSchema* latest for (auto id : picked_segments) { Status s = segments[id]->lookup_row_key(encoded_key, schema, with_seq_col, with_rowid, - &loc, encoded_seq_value, stats); + &loc, stats, encoded_seq_value); if (s.is()) { continue; } @@ -615,7 +615,7 @@ Status BaseTablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset, vectorized::Block ordered_block = block.clone_empty(); uint32_t pos = 0; - RETURN_IF_ERROR(seg->load_pk_index_and_bf()); // We need index blocks to iterate + RETURN_IF_ERROR(seg->load_pk_index_and_bf(nullptr)); // We need index blocks to iterate const auto* pk_idx = seg->get_primary_key_index(); int total = pk_idx->num_rows(); uint32_t row_id = 0; @@ -629,7 +629,7 @@ Status BaseTablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset, std::vector> segment_caches(specified_rowsets.size()); while (remaining > 0) { std::unique_ptr iter; - RETURN_IF_ERROR(pk_idx->new_iterator(&iter)); + RETURN_IF_ERROR(pk_idx->new_iterator(&iter, nullptr)); size_t num_to_read = std::min(batch_size, remaining); auto index_type = vectorized::DataTypeFactory::instance().create_data_type( diff --git a/be/src/olap/delete_bitmap_calculator.cpp b/be/src/olap/delete_bitmap_calculator.cpp index 017e3cff3d0489d..8ac05a1e393043d 100644 --- a/be/src/olap/delete_bitmap_calculator.cpp +++ b/be/src/olap/delete_bitmap_calculator.cpp @@ -145,12 +145,11 @@ Status MergeIndexDeleteBitmapCalculator::init(RowsetId rowset_id, MergeIndexDeleteBitmapCalculatorContext::Comparator(seq_col_length, _rowid_length); _contexts.reserve(segments.size()); _heap = std::make_unique(_comparator); - for (auto& segment : segments) { - RETURN_IF_ERROR(segment->load_index()); + RETURN_IF_ERROR(segment->load_index(nullptr)); auto pk_idx = segment->get_primary_key_index(); std::unique_ptr index; - RETURN_IF_ERROR(pk_idx->new_iterator(&index)); + RETURN_IF_ERROR(pk_idx->new_iterator(&index, nullptr)); auto index_type = vectorized::DataTypeFactory::instance().create_data_type( pk_idx->type_info()->type(), 1, 0); _contexts.emplace_back(std::move(index), index_type, segment->id(), pk_idx->num_rows()); diff --git a/be/src/olap/primary_key_index.cpp b/be/src/olap/primary_key_index.cpp index 5f7bedb01fc8de0..00b72832ee60e03 100644 --- a/be/src/olap/primary_key_index.cpp +++ b/be/src/olap/primary_key_index.cpp @@ -95,27 +95,29 @@ Status PrimaryKeyIndexBuilder::finalize(segment_v2::PrimaryKeyIndexMetaPB* meta) } Status PrimaryKeyIndexReader::parse_index(io::FileReaderSPtr file_reader, - const segment_v2::PrimaryKeyIndexMetaPB& meta) { + const segment_v2::PrimaryKeyIndexMetaPB& meta, + OlapReaderStatistics* pk_index_load_stats) { // parse primary key index _index_reader.reset(new segment_v2::IndexedColumnReader(file_reader, meta.primary_key_index())); _index_reader->set_is_pk_index(true); RETURN_IF_ERROR(_index_reader->load(!config::disable_pk_storage_page_cache, false, - _pk_index_load_stats)); + pk_index_load_stats)); _index_parsed = true; return Status::OK(); } Status PrimaryKeyIndexReader::parse_bf(io::FileReaderSPtr file_reader, - const segment_v2::PrimaryKeyIndexMetaPB& meta) { + const segment_v2::PrimaryKeyIndexMetaPB& meta, + OlapReaderStatistics* pk_index_load_stats) { // parse bloom filter segment_v2::ColumnIndexMetaPB column_index_meta = meta.bloom_filter_index(); segment_v2::BloomFilterIndexReader bf_index_reader(std::move(file_reader), column_index_meta.bloom_filter_index()); RETURN_IF_ERROR(bf_index_reader.load(!config::disable_pk_storage_page_cache, false, - _pk_index_load_stats)); + pk_index_load_stats)); std::unique_ptr bf_iter; - RETURN_IF_ERROR(bf_index_reader.new_iterator(&bf_iter)); + RETURN_IF_ERROR(bf_index_reader.new_iterator(&bf_iter, pk_index_load_stats)); RETURN_IF_ERROR(bf_iter->read_bloom_filter(0, &_bf)); segment_v2::g_pk_total_bloom_filter_num << 1; segment_v2::g_pk_total_bloom_filter_total_bytes << _bf->size(); diff --git a/be/src/olap/primary_key_index.h b/be/src/olap/primary_key_index.h index dcbbc5f30625f4e..f74d3e42030f2f2 100644 --- a/be/src/olap/primary_key_index.h +++ b/be/src/olap/primary_key_index.h @@ -98,8 +98,7 @@ class PrimaryKeyIndexBuilder { class PrimaryKeyIndexReader { public: - PrimaryKeyIndexReader(OlapReaderStatistics* pk_index_load_stats = nullptr) - : _index_parsed(false), _bf_parsed(false), _pk_index_load_stats(pk_index_load_stats) {} + PrimaryKeyIndexReader() : _index_parsed(false), _bf_parsed(false) {} ~PrimaryKeyIndexReader() { segment_v2::g_pk_total_bloom_filter_num << -static_cast(_bf_num); @@ -109,12 +108,14 @@ class PrimaryKeyIndexReader { } Status parse_index(io::FileReaderSPtr file_reader, - const segment_v2::PrimaryKeyIndexMetaPB& meta); + const segment_v2::PrimaryKeyIndexMetaPB& meta, + OlapReaderStatistics* pk_index_load_stats); - Status parse_bf(io::FileReaderSPtr file_reader, const segment_v2::PrimaryKeyIndexMetaPB& meta); + Status parse_bf(io::FileReaderSPtr file_reader, const segment_v2::PrimaryKeyIndexMetaPB& meta, + OlapReaderStatistics* pk_index_load_stats); Status new_iterator(std::unique_ptr* index_iterator, - OlapReaderStatistics* stats = nullptr) const { + OlapReaderStatistics* stats) const { DCHECK(_index_parsed); index_iterator->reset(new segment_v2::IndexedColumnIterator(_index_reader.get(), stats)); return Status::OK(); @@ -155,7 +156,6 @@ class PrimaryKeyIndexReader { std::unique_ptr _bf; size_t _bf_num = 0; uint64 _bf_bytes = 0; - OlapReaderStatistics* _pk_index_load_stats = nullptr; }; } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp index 8c63c25d20acee8..7c51f0a24c1b1d4 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp @@ -34,9 +34,8 @@ namespace segment_v2 { Status BloomFilterIndexReader::load(bool use_page_cache, bool kept_in_memory, OlapReaderStatistics* index_load_stats) { // TODO yyq: implement a new once flag to avoid status construct. - _index_load_stats = index_load_stats; - return _load_once.call([this, use_page_cache, kept_in_memory] { - return _load(use_page_cache, kept_in_memory); + return _load_once.call([this, use_page_cache, kept_in_memory, index_load_stats] { + return _load(use_page_cache, kept_in_memory, index_load_stats); }); } @@ -45,20 +44,22 @@ int64_t BloomFilterIndexReader::get_metadata_size() const { (_bloom_filter_index_meta ? _bloom_filter_index_meta->ByteSizeLong() : 0); } -Status BloomFilterIndexReader::_load(bool use_page_cache, bool kept_in_memory) { +Status BloomFilterIndexReader::_load(bool use_page_cache, bool kept_in_memory, + OlapReaderStatistics* index_load_stats) { const IndexedColumnMetaPB& bf_index_meta = _bloom_filter_index_meta->bloom_filter(); _bloom_filter_reader.reset(new IndexedColumnReader(_file_reader, bf_index_meta)); - RETURN_IF_ERROR(_bloom_filter_reader->load(use_page_cache, kept_in_memory, _index_load_stats)); + RETURN_IF_ERROR(_bloom_filter_reader->load(use_page_cache, kept_in_memory, index_load_stats)); update_metadata_size(); return Status::OK(); } -Status BloomFilterIndexReader::new_iterator(std::unique_ptr* iterator) { +Status BloomFilterIndexReader::new_iterator(std::unique_ptr* iterator, + OlapReaderStatistics* index_load_stats) { DBUG_EXECUTE_IF("BloomFilterIndexReader::new_iterator.fail", { return Status::InternalError("new_iterator for bloom filter index failed"); }); - iterator->reset(new BloomFilterIndexIterator(this)); + iterator->reset(new BloomFilterIndexIterator(this, index_load_stats)); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h index fcb0239a2440fa3..fb53af89c0fe92f 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h @@ -48,17 +48,18 @@ class BloomFilterIndexReader : public MetadataAdder { } Status load(bool use_page_cache, bool kept_in_memory, - OlapReaderStatistics* _bf_index_load_stats = nullptr); + OlapReaderStatistics* bf_index_load_stats); BloomFilterAlgorithmPB algorithm() { return _bloom_filter_index_meta->algorithm(); } // create a new column iterator. - Status new_iterator(std::unique_ptr* iterator); + Status new_iterator(std::unique_ptr* iterator, + OlapReaderStatistics* index_load_stats); const TypeInfo* type_info() const { return _type_info; } private: - Status _load(bool use_page_cache, bool kept_in_memory); + Status _load(bool use_page_cache, bool kept_in_memory, OlapReaderStatistics* index_load_stats); int64_t get_metadata_size() const override; @@ -70,13 +71,12 @@ class BloomFilterIndexReader : public MetadataAdder { const TypeInfo* _type_info = nullptr; std::unique_ptr _bloom_filter_index_meta = nullptr; std::unique_ptr _bloom_filter_reader; - OlapReaderStatistics* _index_load_stats = nullptr; }; class BloomFilterIndexIterator { public: - explicit BloomFilterIndexIterator(BloomFilterIndexReader* reader) - : _reader(reader), _bloom_filter_iter(reader->_bloom_filter_reader.get()) {} + explicit BloomFilterIndexIterator(BloomFilterIndexReader* reader, OlapReaderStatistics* stats) + : _reader(reader), _bloom_filter_iter(reader->_bloom_filter_reader.get(), stats) {} // Read bloom filter at the given ordinal into `bf`. Status read_bloom_filter(rowid_t ordinal, std::unique_ptr* bf); diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 9d5328de8693048..78c415530cd0291 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -374,10 +374,12 @@ Status ColumnReader::read_page(const ColumnIteratorOptions& iter_opts, const Pag Status ColumnReader::get_row_ranges_by_zone_map( const AndBlockColumnPredicate* col_predicates, - const std::vector* delete_predicates, RowRanges* row_ranges) { + const std::vector* delete_predicates, RowRanges* row_ranges, + const ColumnIteratorOptions& iter_opts) { std::vector page_indexes; - RETURN_IF_ERROR(_get_filtered_pages(col_predicates, delete_predicates, &page_indexes)); - RETURN_IF_ERROR(_calculate_row_ranges(page_indexes, row_ranges)); + RETURN_IF_ERROR( + _get_filtered_pages(col_predicates, delete_predicates, &page_indexes, iter_opts)); + RETURN_IF_ERROR(_calculate_row_ranges(page_indexes, row_ranges, iter_opts)); return Status::OK(); } @@ -514,8 +516,8 @@ bool ColumnReader::_zone_map_match_condition(const ZoneMapPB& zone_map, Status ColumnReader::_get_filtered_pages( const AndBlockColumnPredicate* col_predicates, const std::vector* delete_predicates, - std::vector* page_indexes) { - RETURN_IF_ERROR(_load_zone_map_index(_use_index_page_cache, _opts.kept_in_memory)); + std::vector* page_indexes, const ColumnIteratorOptions& iter_opts) { + RETURN_IF_ERROR(_load_zone_map_index(_use_index_page_cache, _opts.kept_in_memory, iter_opts)); FieldType type = _type_info->type(); const std::vector& zone_maps = _zone_map_index->page_zone_maps(); @@ -553,9 +555,10 @@ Status ColumnReader::_get_filtered_pages( } Status ColumnReader::_calculate_row_ranges(const std::vector& page_indexes, - RowRanges* row_ranges) { + RowRanges* row_ranges, + const ColumnIteratorOptions& iter_opts) { row_ranges->clear(); - RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory)); + RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory, iter_opts)); for (auto i : page_indexes) { ordinal_t page_first_id = _ordinal_index->get_first_ordinal(i); ordinal_t page_last_id = _ordinal_index->get_last_ordinal(i); @@ -566,12 +569,14 @@ Status ColumnReader::_calculate_row_ranges(const std::vector& page_ind } Status ColumnReader::get_row_ranges_by_bloom_filter(const AndBlockColumnPredicate* col_predicates, - RowRanges* row_ranges) { - RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory)); - RETURN_IF_ERROR(_load_bloom_filter_index(_use_index_page_cache, _opts.kept_in_memory)); + RowRanges* row_ranges, + const ColumnIteratorOptions& iter_opts) { + RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory, iter_opts)); + RETURN_IF_ERROR( + _load_bloom_filter_index(_use_index_page_cache, _opts.kept_in_memory, iter_opts)); RowRanges bf_row_ranges; std::unique_ptr bf_iter; - RETURN_IF_ERROR(_bloom_filter_index->new_iterator(&bf_iter)); + RETURN_IF_ERROR(_bloom_filter_index->new_iterator(&bf_iter, iter_opts.stats)); size_t range_size = row_ranges->range_size(); // get covered page ids std::set page_ids; @@ -598,16 +603,18 @@ Status ColumnReader::get_row_ranges_by_bloom_filter(const AndBlockColumnPredicat return Status::OK(); } -Status ColumnReader::_load_ordinal_index(bool use_page_cache, bool kept_in_memory) { +Status ColumnReader::_load_ordinal_index(bool use_page_cache, bool kept_in_memory, + const ColumnIteratorOptions& iter_opts) { if (!_ordinal_index) { return Status::InternalError("ordinal_index not inited"); } - return _ordinal_index->load(use_page_cache, kept_in_memory); + return _ordinal_index->load(use_page_cache, kept_in_memory, iter_opts.stats); } -Status ColumnReader::_load_zone_map_index(bool use_page_cache, bool kept_in_memory) { +Status ColumnReader::_load_zone_map_index(bool use_page_cache, bool kept_in_memory, + const ColumnIteratorOptions& iter_opts) { if (_zone_map_index != nullptr) { - return _zone_map_index->load(use_page_cache, kept_in_memory); + return _zone_map_index->load(use_page_cache, kept_in_memory, iter_opts.stats); } return Status::OK(); } @@ -681,15 +688,17 @@ bool ColumnReader::has_bloom_filter_index(bool ngram) const { } } -Status ColumnReader::_load_bloom_filter_index(bool use_page_cache, bool kept_in_memory) { +Status ColumnReader::_load_bloom_filter_index(bool use_page_cache, bool kept_in_memory, + const ColumnIteratorOptions& iter_opts) { if (_bloom_filter_index != nullptr) { - return _bloom_filter_index->load(use_page_cache, kept_in_memory); + return _bloom_filter_index->load(use_page_cache, kept_in_memory, iter_opts.stats); } return Status::OK(); } -Status ColumnReader::seek_to_first(OrdinalPageIndexIterator* iter) { - RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory)); +Status ColumnReader::seek_to_first(OrdinalPageIndexIterator* iter, + const ColumnIteratorOptions& iter_opts) { + RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory, iter_opts)); *iter = _ordinal_index->begin(); if (!iter->valid()) { return Status::NotFound("Failed to seek to first rowid"); @@ -697,8 +706,9 @@ Status ColumnReader::seek_to_first(OrdinalPageIndexIterator* iter) { return Status::OK(); } -Status ColumnReader::seek_at_or_before(ordinal_t ordinal, OrdinalPageIndexIterator* iter) { - RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory)); +Status ColumnReader::seek_at_or_before(ordinal_t ordinal, OrdinalPageIndexIterator* iter, + const ColumnIteratorOptions& iter_opts) { + RETURN_IF_ERROR(_load_ordinal_index(_use_index_page_cache, _opts.kept_in_memory, iter_opts)); *iter = _ordinal_index->seek_at_or_before(ordinal); if (!iter->valid()) { return Status::NotFound("Failed to seek to ordinal {}, ", ordinal); @@ -1172,7 +1182,7 @@ Status FileColumnIterator::init(const ColumnIteratorOptions& opts) { FileColumnIterator::~FileColumnIterator() = default; Status FileColumnIterator::seek_to_first() { - RETURN_IF_ERROR(_reader->seek_to_first(&_page_iter)); + RETURN_IF_ERROR(_reader->seek_to_first(&_page_iter, _opts)); RETURN_IF_ERROR(_read_data_page(_page_iter)); _seek_to_pos_in_page(&_page, 0); @@ -1183,7 +1193,7 @@ Status FileColumnIterator::seek_to_first() { Status FileColumnIterator::seek_to_ordinal(ordinal_t ord) { // if current page contains this row, we don't need to seek if (!_page || !_page.contains(ord) || !_page_iter.valid()) { - RETURN_IF_ERROR(_reader->seek_at_or_before(ord, &_page_iter)); + RETURN_IF_ERROR(_reader->seek_at_or_before(ord, &_page_iter, _opts)); RETURN_IF_ERROR(_read_data_page(_page_iter)); } _seek_to_pos_in_page(&_page, ord - _page.first_ordinal); @@ -1431,8 +1441,8 @@ Status FileColumnIterator::get_row_ranges_by_zone_map( const AndBlockColumnPredicate* col_predicates, const std::vector* delete_predicates, RowRanges* row_ranges) { if (_reader->has_zone_map()) { - RETURN_IF_ERROR( - _reader->get_row_ranges_by_zone_map(col_predicates, delete_predicates, row_ranges)); + RETURN_IF_ERROR(_reader->get_row_ranges_by_zone_map(col_predicates, delete_predicates, + row_ranges, _opts)); } return Status::OK(); } @@ -1441,7 +1451,7 @@ Status FileColumnIterator::get_row_ranges_by_bloom_filter( const AndBlockColumnPredicate* col_predicates, RowRanges* row_ranges) { if ((col_predicates->can_do_bloom_filter(false) && _reader->has_bloom_filter_index(false)) || (col_predicates->can_do_bloom_filter(true) && _reader->has_bloom_filter_index(true))) { - RETURN_IF_ERROR(_reader->get_row_ranges_by_bloom_filter(col_predicates, row_ranges)); + RETURN_IF_ERROR(_reader->get_row_ranges_by_bloom_filter(col_predicates, row_ranges, _opts)); } return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index d72d802f97769be..7e32b3a09b34dac 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -148,8 +148,9 @@ class ColumnReader : public MetadataAdder { std::unique_ptr* iterator); // Seek to the first entry in the column. - Status seek_to_first(OrdinalPageIndexIterator* iter); - Status seek_at_or_before(ordinal_t ordinal, OrdinalPageIndexIterator* iter); + Status seek_to_first(OrdinalPageIndexIterator* iter, const ColumnIteratorOptions& iter_opts); + Status seek_at_or_before(ordinal_t ordinal, OrdinalPageIndexIterator* iter, + const ColumnIteratorOptions& iter_opts); // read a page from file into a page handle Status read_page(const ColumnIteratorOptions& iter_opts, const PagePointer& pp, @@ -175,11 +176,13 @@ class ColumnReader : public MetadataAdder { // - delete_condition is a delete predicate of one version Status get_row_ranges_by_zone_map(const AndBlockColumnPredicate* col_predicates, const std::vector* delete_predicates, - RowRanges* row_ranges); + RowRanges* row_ranges, + const ColumnIteratorOptions& iter_opts); // get row ranges with bloom filter index Status get_row_ranges_by_bloom_filter(const AndBlockColumnPredicate* col_predicates, - RowRanges* row_ranges); + RowRanges* row_ranges, + const ColumnIteratorOptions& iter_opts); PagePointer get_dict_page_pointer() const { return _meta_dict_page; } @@ -219,13 +222,16 @@ class ColumnReader : public MetadataAdder { return Status::OK(); } - [[nodiscard]] Status _load_zone_map_index(bool use_page_cache, bool kept_in_memory); - [[nodiscard]] Status _load_ordinal_index(bool use_page_cache, bool kept_in_memory); + [[nodiscard]] Status _load_zone_map_index(bool use_page_cache, bool kept_in_memory, + const ColumnIteratorOptions& iter_opts); + [[nodiscard]] Status _load_ordinal_index(bool use_page_cache, bool kept_in_memory, + const ColumnIteratorOptions& iter_opts); [[nodiscard]] Status _load_bitmap_index(bool use_page_cache, bool kept_in_memory); [[nodiscard]] Status _load_inverted_index_index( std::shared_ptr index_file_reader, const TabletIndex* index_meta); - [[nodiscard]] Status _load_bloom_filter_index(bool use_page_cache, bool kept_in_memory); + [[nodiscard]] Status _load_bloom_filter_index(bool use_page_cache, bool kept_in_memory, + const ColumnIteratorOptions& iter_opts); bool _zone_map_match_condition(const ZoneMapPB& zone_map, WrapperField* min_value_container, WrapperField* max_value_container, @@ -239,9 +245,11 @@ class ColumnReader : public MetadataAdder { Status _get_filtered_pages(const AndBlockColumnPredicate* col_predicates, const std::vector* delete_predicates, - std::vector* page_indexes); + std::vector* page_indexes, + const ColumnIteratorOptions& iter_opts); - Status _calculate_row_ranges(const std::vector& page_indexes, RowRanges* row_ranges); + Status _calculate_row_ranges(const std::vector& page_indexes, RowRanges* row_ranges, + const ColumnIteratorOptions& iter_opts); int64_t get_metadata_size() const override; diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp index da6beff5d8d6a2d..3f582293ee4d7fa 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp @@ -66,7 +66,6 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory, OlapReaderStatistics* index_load_stats) { _use_page_cache = use_page_cache; _kept_in_memory = kept_in_memory; - _index_load_stats = index_load_stats; _type_info = get_scalar_type_info((FieldType)_meta.data_type()); if (_type_info == nullptr) { @@ -82,7 +81,7 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory, } else { RETURN_IF_ERROR(load_index_page(_meta.ordinal_index_meta().root_page(), &_ordinal_index_page_handle, - _ordinal_index_reader.get())); + _ordinal_index_reader.get(), index_load_stats)); _has_index_page = true; } } @@ -93,7 +92,8 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory, _sole_data_page = PagePointer(_meta.value_index_meta().root_page()); } else { RETURN_IF_ERROR(load_index_page(_meta.value_index_meta().root_page(), - &_value_index_page_handle, _value_index_reader.get())); + &_value_index_page_handle, _value_index_reader.get(), + index_load_stats)); _has_index_page = true; } } @@ -104,13 +104,14 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory, } Status IndexedColumnReader::load_index_page(const PagePointerPB& pp, PageHandle* handle, - IndexPageReader* reader) { + IndexPageReader* reader, + OlapReaderStatistics* index_load_stats) { Slice body; PageFooterPB footer; BlockCompressionCodec* local_compress_codec; RETURN_IF_ERROR(get_block_compression_codec(_meta.compression(), &local_compress_codec)); RETURN_IF_ERROR(read_page(PagePointer(pp), handle, &body, &footer, INDEX_PAGE, - local_compress_codec, false, _index_load_stats)); + local_compress_codec, false, index_load_stats)); RETURN_IF_ERROR(reader->parse(body, footer.index_page_footer())); _mem_size += body.get_size(); return Status::OK(); diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.h b/be/src/olap/rowset/segment_v2/indexed_column_reader.h index c9640c0007c153b..6e62feaafdcdd10 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_reader.h +++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.h @@ -76,7 +76,8 @@ class IndexedColumnReader : public MetadataAdder { void set_is_pk_index(bool is_pk) { _is_pk_index = is_pk; } private: - Status load_index_page(const PagePointerPB& pp, PageHandle* handle, IndexPageReader* reader); + Status load_index_page(const PagePointerPB& pp, PageHandle* handle, IndexPageReader* reader, + OlapReaderStatistics* index_load_stats); int64_t get_metadata_size() const override; @@ -103,7 +104,6 @@ class IndexedColumnReader : public MetadataAdder { const KeyCoder* _value_key_coder = nullptr; uint64_t _mem_size = 0; bool _is_pk_index = false; - OlapReaderStatistics* _index_load_stats = nullptr; }; class IndexedColumnIterator { diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp index 9ee82bacdd73d23..4995e779892646a 100644 --- a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp +++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp @@ -69,15 +69,17 @@ Status OrdinalIndexWriter::finish(io::FileWriter* file_writer, ColumnIndexMetaPB return Status::OK(); } -Status OrdinalIndexReader::load(bool use_page_cache, bool kept_in_memory) { +Status OrdinalIndexReader::load(bool use_page_cache, bool kept_in_memory, + OlapReaderStatistics* index_load_stats) { // TODO yyq: implement a new once flag to avoid status construct. - return _load_once.call([this, use_page_cache, kept_in_memory] { - return _load(use_page_cache, kept_in_memory, std::move(_meta_pb)); + return _load_once.call([this, use_page_cache, kept_in_memory, index_load_stats] { + return _load(use_page_cache, kept_in_memory, std::move(_meta_pb), index_load_stats); }); } Status OrdinalIndexReader::_load(bool use_page_cache, bool kept_in_memory, - std::unique_ptr index_meta) { + std::unique_ptr index_meta, + OlapReaderStatistics* stats) { if (index_meta->root_page().is_root_data_page()) { // only one data page, no index page _num_pages = 1; @@ -88,6 +90,7 @@ Status OrdinalIndexReader::_load(bool use_page_cache, bool kept_in_memory, } // need to read index page OlapReaderStatistics tmp_stats; + OlapReaderStatistics* stats_ptr = stats != nullptr ? stats : &tmp_stats; PageReadOptions opts { .use_page_cache = use_page_cache, .kept_in_memory = kept_in_memory, @@ -96,8 +99,9 @@ Status OrdinalIndexReader::_load(bool use_page_cache, bool kept_in_memory, .page_pointer = PagePointer(index_meta->root_page().root_page()), // ordinal index page uses NO_COMPRESSION right now .codec = nullptr, - .stats = &tmp_stats, - .io_ctx = io::IOContext {.is_index_data = true}, + .stats = stats_ptr, + .io_ctx = io::IOContext {.is_index_data = true, + .file_cache_stats = &stats_ptr->file_cache_stats}, }; // read index page diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.h b/be/src/olap/rowset/segment_v2/ordinal_page_index.h index 1d74cf989520aa9..df60edb12d14816 100644 --- a/be/src/olap/rowset/segment_v2/ordinal_page_index.h +++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.h @@ -75,7 +75,7 @@ class OrdinalIndexReader : public MetadataAdder { virtual ~OrdinalIndexReader(); // load and parse the index page into memory - Status load(bool use_page_cache, bool kept_in_memory); + Status load(bool use_page_cache, bool kept_in_memory, OlapReaderStatistics* index_load_stats); // the returned iter points to the largest element which is less than `ordinal`, // or points to the first element if all elements are greater than `ordinal`, @@ -94,7 +94,8 @@ class OrdinalIndexReader : public MetadataAdder { private: Status _load(bool use_page_cache, bool kept_in_memory, - std::unique_ptr index_meta); + std::unique_ptr index_meta, + OlapReaderStatistics* index_load_stats); int64_t get_metadata_size() const override; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index d55d84901c2e663..b5ab3f0e873549a 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -290,7 +290,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o { SCOPED_RAW_TIMER(&read_options.stats->segment_load_index_timer_ns); - RETURN_IF_ERROR(load_index()); + RETURN_IF_ERROR(load_index(read_options.stats)); } if (read_options.delete_condition_predicates->num_of_column_predicate() == 0 && @@ -475,7 +475,7 @@ Status Segment::_parse_footer(SegmentFooterPB* footer) { return Status::OK(); } -Status Segment::_load_pk_bloom_filter() { +Status Segment::_load_pk_bloom_filter(OlapReaderStatistics* stats) { #ifdef BE_TEST if (_pk_index_meta == nullptr) { // for BE UT "segment_cache_test" @@ -490,30 +490,30 @@ Status Segment::_load_pk_bloom_filter() { DCHECK(_pk_index_meta != nullptr); DCHECK(_pk_index_reader != nullptr); - return _load_pk_bf_once.call([this] { - RETURN_IF_ERROR(_pk_index_reader->parse_bf(_file_reader, *_pk_index_meta)); + return _load_pk_bf_once.call([this, stats] { + RETURN_IF_ERROR(_pk_index_reader->parse_bf(_file_reader, *_pk_index_meta, stats)); // _meta_mem_usage += _pk_index_reader->get_bf_memory_size(); return Status::OK(); }); } Status Segment::load_pk_index_and_bf(OlapReaderStatistics* index_load_stats) { - _pk_index_load_stats = index_load_stats; - RETURN_IF_ERROR(load_index()); - RETURN_IF_ERROR(_load_pk_bloom_filter()); + RETURN_IF_ERROR(load_index(index_load_stats)); + RETURN_IF_ERROR(_load_pk_bloom_filter(index_load_stats)); return Status::OK(); } -Status Segment::load_index() { - return _load_index_once.call([this] { +Status Segment::load_index(OlapReaderStatistics* stats) { + return _load_index_once.call([this, stats] { if (_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr) { - _pk_index_reader = std::make_unique(_pk_index_load_stats); - RETURN_IF_ERROR(_pk_index_reader->parse_index(_file_reader, *_pk_index_meta)); + _pk_index_reader = std::make_unique(); + RETURN_IF_ERROR(_pk_index_reader->parse_index(_file_reader, *_pk_index_meta, stats)); // _meta_mem_usage += _pk_index_reader->get_memory_size(); return Status::OK(); } else { // read and parse short key index page OlapReaderStatistics tmp_stats; + OlapReaderStatistics* stats_ptr = stats != nullptr ? stats : &tmp_stats; PageReadOptions opts { .use_page_cache = true, .type = INDEX_PAGE, @@ -522,7 +522,8 @@ Status Segment::load_index() { // short key index page uses NO_COMPRESSION for now .codec = nullptr, .stats = &tmp_stats, - .io_ctx = io::IOContext {.is_index_data = true}, + .io_ctx = io::IOContext {.is_index_data = true, + .file_cache_stats = &stats_ptr->file_cache_stats}, }; Slice body; PageFooterPB footer; @@ -970,8 +971,8 @@ Status Segment::new_inverted_index_iterator(const TabletColumn& tablet_column, Status Segment::lookup_row_key(const Slice& key, const TabletSchema* latest_schema, bool with_seq_col, bool with_rowid, RowLocation* row_location, - std::string* encoded_seq_value, OlapReaderStatistics* stats) { - RETURN_IF_ERROR(load_pk_index_and_bf()); + OlapReaderStatistics* stats, std::string* encoded_seq_value) { + RETURN_IF_ERROR(load_pk_index_and_bf(stats)); bool has_seq_col = latest_schema->has_sequence_col(); bool has_rowid = !latest_schema->cluster_key_uids().empty(); size_t seq_col_length = 0; @@ -1071,9 +1072,10 @@ Status Segment::lookup_row_key(const Slice& key, const TabletSchema* latest_sche } Status Segment::read_key_by_rowid(uint32_t row_id, std::string* key) { - RETURN_IF_ERROR(load_pk_index_and_bf()); + OlapReaderStatistics* null_stat = nullptr; + RETURN_IF_ERROR(load_pk_index_and_bf(null_stat)); std::unique_ptr iter; - RETURN_IF_ERROR(_pk_index_reader->new_iterator(&iter)); + RETURN_IF_ERROR(_pk_index_reader->new_iterator(&iter, null_stat)); auto index_type = vectorized::DataTypeFactory::instance().create_data_type( _pk_index_reader->type_info()->type(), 1, 0); @@ -1129,7 +1131,8 @@ Status Segment::seek_and_read_by_rowid(const TabletSchema& schema, SlotDescripto .use_page_cache = !config::disable_storage_page_cache, .file_reader = file_reader().get(), .stats = &stats, - .io_ctx = io::IOContext {.reader_type = ReaderType::READER_QUERY}, + .io_ctx = io::IOContext {.reader_type = ReaderType::READER_QUERY, + .file_cache_stats = &stats.file_cache_stats}, }; std::vector single_row_loc {row_id}; if (!slot->column_paths().empty()) { diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index ca2fee0e77aa82a..441ae3e85e9b3f1 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -134,9 +134,8 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd } Status lookup_row_key(const Slice& key, const TabletSchema* latest_schema, bool with_seq_col, - bool with_rowid, RowLocation* row_location, - std::string* encoded_seq_value = nullptr, - OlapReaderStatistics* stats = nullptr); + bool with_rowid, RowLocation* row_location, OlapReaderStatistics* stats, + std::string* encoded_seq_value = nullptr); Status read_key_by_rowid(uint32_t row_id, std::string* key); @@ -144,9 +143,9 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd vectorized::MutableColumnPtr& result, OlapReaderStatistics& stats, std::unique_ptr& iterator_hint); - Status load_index(); + Status load_index(OlapReaderStatistics* stats); - Status load_pk_index_and_bf(OlapReaderStatistics* index_load_stats = nullptr); + Status load_pk_index_and_bf(OlapReaderStatistics* stats); void update_healthy_status(Status new_status) { _healthy_status.update(new_status); } // The segment is loaded into SegmentCache and then will load indices, if there are something wrong @@ -227,7 +226,7 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd Status _open(); Status _parse_footer(SegmentFooterPB* footer); Status _create_column_readers(const SegmentFooterPB& footer); - Status _load_pk_bloom_filter(); + Status _load_pk_bloom_filter(OlapReaderStatistics* stats); ColumnReader* _get_column_reader(const TabletColumn& col); // Get Iterator which will read variant root column and extract with paths and types info @@ -305,7 +304,6 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd InvertedIndexFileInfo _idx_file_info; int _be_exec_version = BeExecVersionManager::get_newest_version(); - OlapReaderStatistics* _pk_index_load_stats = nullptr; }; } // namespace segment_v2 diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 0c54eaa2d6cbaa0..5f50ffeea2d8f0c 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1181,7 +1181,7 @@ Status SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool bool exact_match = false; std::unique_ptr index_iterator; - RETURN_IF_ERROR(pk_index_reader->new_iterator(&index_iterator)); + RETURN_IF_ERROR(pk_index_reader->new_iterator(&index_iterator, _opts.stats)); Status status = index_iterator->seek_at_or_after(&index_key, &exact_match); if (UNLIKELY(!status.ok())) { diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.cpp b/be/src/olap/rowset/segment_v2/zone_map_index.cpp index c2139ff0899090d..9249c82aedfdc38 100644 --- a/be/src/olap/rowset/segment_v2/zone_map_index.cpp +++ b/be/src/olap/rowset/segment_v2/zone_map_index.cpp @@ -140,18 +140,21 @@ Status TypedZoneMapIndexWriter::finish(io::FileWriter* file_writer, return writer.finish(meta->mutable_page_zone_maps()); } -Status ZoneMapIndexReader::load(bool use_page_cache, bool kept_in_memory) { +Status ZoneMapIndexReader::load(bool use_page_cache, bool kept_in_memory, + OlapReaderStatistics* index_load_stats) { // TODO yyq: implement a new once flag to avoid status construct. - return _load_once.call([this, use_page_cache, kept_in_memory] { - return _load(use_page_cache, kept_in_memory, std::move(_page_zone_maps_meta)); + return _load_once.call([this, use_page_cache, kept_in_memory, index_load_stats] { + return _load(use_page_cache, kept_in_memory, std::move(_page_zone_maps_meta), + index_load_stats); }); } Status ZoneMapIndexReader::_load(bool use_page_cache, bool kept_in_memory, - std::unique_ptr page_zone_maps_meta) { + std::unique_ptr page_zone_maps_meta, + OlapReaderStatistics* index_load_stats) { IndexedColumnReader reader(_file_reader, *page_zone_maps_meta); - RETURN_IF_ERROR(reader.load(use_page_cache, kept_in_memory)); - IndexedColumnIterator iter(&reader); + RETURN_IF_ERROR(reader.load(use_page_cache, kept_in_memory, index_load_stats)); + IndexedColumnIterator iter(&reader, index_load_stats); _page_zone_maps.resize(reader.num_values()); diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.h b/be/src/olap/rowset/segment_v2/zone_map_index.h index 34869bbbfeea625..04cae12975c5fab 100644 --- a/be/src/olap/rowset/segment_v2/zone_map_index.h +++ b/be/src/olap/rowset/segment_v2/zone_map_index.h @@ -154,14 +154,16 @@ class ZoneMapIndexReader : public MetadataAdder { virtual ~ZoneMapIndexReader(); // load all page zone maps into memory - Status load(bool use_page_cache, bool kept_in_memory); + Status load(bool use_page_cache, bool kept_in_memory, + OlapReaderStatistics* index_load_stats = nullptr); const std::vector& page_zone_maps() const { return _page_zone_maps; } int32_t num_pages() const { return _page_zone_maps.size(); } private: - Status _load(bool use_page_cache, bool kept_in_memory, std::unique_ptr); + Status _load(bool use_page_cache, bool kept_in_memory, std::unique_ptr, + OlapReaderStatistics* index_load_stats); int64_t get_metadata_size() const override; diff --git a/be/test/olap/date_bloom_filter_test.cpp b/be/test/olap/date_bloom_filter_test.cpp index 715301419e228fb..51de4ebd8e74520 100644 --- a/be/test/olap/date_bloom_filter_test.cpp +++ b/be/test/olap/date_bloom_filter_test.cpp @@ -155,8 +155,8 @@ TEST_F(DateBloomFilterTest, query_index_test) { { const auto& reader = segment->_column_readers[0]; std::unique_ptr bf_iter; - EXPECT_TRUE(reader->_bloom_filter_index->load(true, true).ok()); - EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter).ok()); + EXPECT_TRUE(reader->_bloom_filter_index->load(true, true, nullptr).ok()); + EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter, nullptr).ok()); std::unique_ptr bf; EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok()); auto test = [&](const std::string& query_string, bool result) { @@ -174,8 +174,8 @@ TEST_F(DateBloomFilterTest, query_index_test) { { const auto& reader = segment->_column_readers[1]; std::unique_ptr bf_iter; - EXPECT_TRUE(reader->_bloom_filter_index->load(true, true).ok()); - EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter).ok()); + EXPECT_TRUE(reader->_bloom_filter_index->load(true, true, nullptr).ok()); + EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter, nullptr).ok()); std::unique_ptr bf; EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok()); auto test = [&](const std::string& query_string, bool result) { diff --git a/be/test/olap/primary_key_index_test.cpp b/be/test/olap/primary_key_index_test.cpp index 72aae56cd0938fe..9407be938867ec6 100644 --- a/be/test/olap/primary_key_index_test.cpp +++ b/be/test/olap/primary_key_index_test.cpp @@ -80,12 +80,12 @@ TEST_F(PrimaryKeyIndexTest, builder) { PrimaryKeyIndexReader index_reader; io::FileReaderSPtr file_reader; EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); - EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok()); - EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok()); + EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta, nullptr).ok()); + EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta, nullptr).ok()); EXPECT_EQ(num_rows, index_reader.num_rows()); std::unique_ptr index_iterator; - EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok()); + EXPECT_TRUE(index_reader.new_iterator(&index_iterator, nullptr).ok()); bool exact_match = false; uint32_t row_id; for (size_t i = 0; i < keys.size(); i++) { @@ -142,7 +142,7 @@ TEST_F(PrimaryKeyIndexTest, builder) { int batch_size = 1024; while (remaining > 0) { std::unique_ptr iter; - EXPECT_TRUE(index_reader.new_iterator(&iter).ok()); + EXPECT_TRUE(index_reader.new_iterator(&iter, nullptr).ok()); size_t num_to_read = std::min(batch_size, remaining); auto index_type = vectorized::DataTypeFactory::instance().create_data_type( @@ -199,12 +199,12 @@ TEST_F(PrimaryKeyIndexTest, multiple_pages) { PrimaryKeyIndexReader index_reader; io::FileReaderSPtr file_reader; EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); - EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok()); - EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok()); + EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta, nullptr).ok()); + EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta, nullptr).ok()); EXPECT_EQ(num_rows, index_reader.num_rows()); std::unique_ptr index_iterator; - EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok()); + EXPECT_TRUE(index_reader.new_iterator(&index_iterator, nullptr).ok()); bool exact_match = false; uint32_t row_id; for (size_t i = 0; i < keys.size(); i++) { @@ -283,12 +283,12 @@ TEST_F(PrimaryKeyIndexTest, single_page) { PrimaryKeyIndexReader index_reader; io::FileReaderSPtr file_reader; EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); - EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok()); - EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok()); + EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta, nullptr).ok()); + EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta, nullptr).ok()); EXPECT_EQ(num_rows, index_reader.num_rows()); std::unique_ptr index_iterator; - EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok()); + EXPECT_TRUE(index_reader.new_iterator(&index_iterator, nullptr).ok()); bool exact_match = false; uint32_t row_id; for (size_t i = 0; i < keys.size(); i++) { diff --git a/be/test/olap/rowset/segment_v2/bloom_filter_index_reader_writer_test.cpp b/be/test/olap/rowset/segment_v2/bloom_filter_index_reader_writer_test.cpp index 813952595efcfd6..e561f8ce9448874 100644 --- a/be/test/olap/rowset/segment_v2/bloom_filter_index_reader_writer_test.cpp +++ b/be/test/olap/rowset/segment_v2/bloom_filter_index_reader_writer_test.cpp @@ -124,10 +124,10 @@ void get_bloom_filter_reader_iter(const std::string& file_name, const ColumnInde io::FileReaderSPtr file_reader; ASSERT_EQ(io::global_local_filesystem()->open_file(fname, &file_reader), Status::OK()); *reader = new BloomFilterIndexReader(std::move(file_reader), meta.bloom_filter_index()); - auto st = (*reader)->load(true, false); + auto st = (*reader)->load(true, false, nullptr); EXPECT_TRUE(st.ok()); - st = (*reader)->new_iterator(iter); + st = (*reader)->new_iterator(iter, nullptr); EXPECT_TRUE(st.ok()); } diff --git a/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp b/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp index 33848c5959cfaae..ffd9c92ee0272e5 100644 --- a/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp +++ b/be/test/olap/rowset/segment_v2/ordinal_page_index_test.cpp @@ -74,7 +74,7 @@ TEST_F(OrdinalPageIndexTest, normal) { io::FileReaderSPtr file_reader; EXPECT_TRUE(fs->open_file(filename, &file_reader).ok()); OrdinalIndexReader index(file_reader, 16 * 1024 * 4096 + 1, index_meta.ordinal_index()); - EXPECT_TRUE(index.load(true, false).ok()); + EXPECT_TRUE(index.load(true, false, nullptr).ok()); EXPECT_EQ(16 * 1024, index.num_data_pages()); EXPECT_EQ(1, index.get_first_ordinal(0)); EXPECT_EQ(4096, index.get_last_ordinal(0)); @@ -128,7 +128,7 @@ TEST_F(OrdinalPageIndexTest, one_data_page) { } OrdinalIndexReader index(nullptr, num_values, index_meta.ordinal_index()); - EXPECT_TRUE(index.load(true, false).ok()); + EXPECT_TRUE(index.load(true, false, nullptr).ok()); EXPECT_EQ(1, index.num_data_pages()); EXPECT_EQ(0, index.get_first_ordinal(0)); EXPECT_EQ(num_values - 1, index.get_last_ordinal(0)); diff --git a/be/test/olap/segment_cache_test.cpp b/be/test/olap/segment_cache_test.cpp index b226bc6c2292e5e..c527ffddd424b94 100644 --- a/be/test/olap/segment_cache_test.cpp +++ b/be/test/olap/segment_cache_test.cpp @@ -323,7 +323,7 @@ TEST_F(SegmentCacheTest, vec_sequence_col) { segment_v2::SegmentSharedPtr segment_ptr = handle.get_segments()[0]; // load index and bf second - res = segment_ptr->load_pk_index_and_bf(); + res = segment_ptr->load_pk_index_and_bf(nullptr); ASSERT_TRUE(res.ok()); // check cache mem usage equals to segment mem usage From 034085ac4f90f93ea1b80695a020d1da8ebe3f91 Mon Sep 17 00:00:00 2001 From: walter Date: Fri, 20 Dec 2024 22:27:12 +0800 Subject: [PATCH 35/55] [fix](restore) Lock tablet before modify segment files (#45711) There is a race condition between the tablet checkpoint and the snapshot move task since the checkpoint will depend on the segment files to check data size correctness, and the move task will delete the tablet directory and move the downloaded files into it. This PR makes the move task to take tablet locks, before deleting the directory. --- be/src/olap/tablet.cpp | 4 +- be/src/olap/tablet.h | 5 +- be/src/runtime/snapshot_loader.cpp | 97 ++++++++++++++++++------------ 3 files changed, 63 insertions(+), 43 deletions(-) diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index c7919b3f8dca24f..1758166e76edeee 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -2766,7 +2766,7 @@ void Tablet::check_table_size_correctness() { const std::vector& all_rs_metas = _tablet_meta->all_rs_metas(); for (const auto& rs_meta : all_rs_metas) { int64_t total_segment_size = get_segment_file_size(rs_meta); - int64_t total_inverted_index_size = get_inverted_index_file_szie(rs_meta); + int64_t total_inverted_index_size = get_inverted_index_file_size(rs_meta); if (rs_meta->data_disk_size() != total_segment_size || rs_meta->index_disk_size() != total_inverted_index_size || rs_meta->data_disk_size() + rs_meta->index_disk_size() != rs_meta->total_disk_size()) { @@ -2817,7 +2817,7 @@ int64_t Tablet::get_segment_file_size(const RowsetMetaSharedPtr& rs_meta) { return total_segment_size; } -int64_t Tablet::get_inverted_index_file_szie(const RowsetMetaSharedPtr& rs_meta) { +int64_t Tablet::get_inverted_index_file_size(const RowsetMetaSharedPtr& rs_meta) { const auto& fs = rs_meta->fs(); if (!fs) { LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta->resource_id(); diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index d00476f044191cf..afe043bf15195b0 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -214,6 +214,7 @@ class Tablet final : public BaseTablet { std::mutex& get_push_lock() { return _ingest_lock; } std::mutex& get_base_compaction_lock() { return _base_compaction_lock; } std::mutex& get_cumulative_compaction_lock() { return _cumulative_compaction_lock; } + std::shared_mutex& get_meta_store_lock() { return _meta_store_lock; } std::shared_timed_mutex& get_migration_lock() { return _migration_lock; } @@ -531,7 +532,7 @@ class Tablet final : public BaseTablet { void check_table_size_correctness(); std::string get_segment_path(const RowsetMetaSharedPtr& rs_meta, int64_t seg_id); int64_t get_segment_file_size(const RowsetMetaSharedPtr& rs_meta); - int64_t get_inverted_index_file_szie(const RowsetMetaSharedPtr& rs_meta); + int64_t get_inverted_index_file_size(const RowsetMetaSharedPtr& rs_meta); public: static const int64_t K_INVALID_CUMULATIVE_POINT = -1; @@ -588,7 +589,7 @@ class Tablet final : public BaseTablet { std::shared_ptr _cumulative_compaction_policy; std::string_view _cumulative_compaction_type; - // use a seperate thread to check all tablets paths existance + // use a separate thread to check all tablets paths existence std::atomic _is_tablet_path_exists; int64_t _last_missed_version; diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index b492a929fca3bff..c5b27c823054a4a 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -765,49 +765,68 @@ Status SnapshotLoader::move(const std::string& snapshot_path, TabletSharedPtr ta return Status::InternalError(err_msg); } - if (overwrite) { - std::vector snapshot_files; - RETURN_IF_ERROR(_get_existing_files_from_local(snapshot_path, &snapshot_files)); - - // 1. simply delete the old dir and replace it with the snapshot dir - try { - // This remove seems soft enough, because we already get - // tablet id and schema hash from this path, which - // means this path is a valid path. - std::filesystem::remove_all(tablet_path); - VLOG_CRITICAL << "remove dir: " << tablet_path; - std::filesystem::create_directory(tablet_path); - VLOG_CRITICAL << "re-create dir: " << tablet_path; - } catch (const std::filesystem::filesystem_error& e) { - std::stringstream ss; - ss << "failed to move tablet path: " << tablet_path << ". err: " << e.what(); - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } + if (!overwrite) { + throw Exception(Status::FatalError("only support overwrite now")); + } - // link files one by one - // files in snapshot dir will be moved in snapshot clean process - std::vector linked_files; - for (auto& file : snapshot_files) { - auto full_src_path = fmt::format("{}/{}", snapshot_path, file); - auto full_dest_path = fmt::format("{}/{}", tablet_path, file); - if (link(full_src_path.c_str(), full_dest_path.c_str()) != 0) { - LOG(WARNING) << "failed to link file from " << full_src_path << " to " - << full_dest_path << ", err: " << std::strerror(errno); - - // clean the already linked files - for (auto& linked_file : linked_files) { - remove(linked_file.c_str()); - } + // Medium migration/clone/checkpoint/compaction may change or check the + // files and tablet meta, so we need to take these locks. + std::unique_lock migration_lock(tablet->get_migration_lock(), std::try_to_lock); + std::unique_lock base_compact_lock(tablet->get_base_compaction_lock(), std::try_to_lock); + std::unique_lock cumu_compact_lock(tablet->get_cumulative_compaction_lock(), std::try_to_lock); + std::unique_lock cold_compact_lock(tablet->get_cold_compaction_lock(), std::try_to_lock); + std::unique_lock build_idx_lock(tablet->get_build_inverted_index_lock(), std::try_to_lock); + std::unique_lock meta_store_lock(tablet->get_meta_store_lock(), std::try_to_lock); + if (!migration_lock.owns_lock() || !base_compact_lock.owns_lock() || + !cumu_compact_lock.owns_lock() || !cold_compact_lock.owns_lock() || + !build_idx_lock.owns_lock() || !meta_store_lock.owns_lock()) { + // This error should be retryable + auto status = Status::ObtainLockFailed("failed to get tablet locks, tablet: {}", tablet_id); + LOG(WARNING) << status << ", snapshot path: " << snapshot_path + << ", tablet path: " << tablet_path; + return status; + } - return Status::InternalError("move tablet failed"); + std::vector snapshot_files; + RETURN_IF_ERROR(_get_existing_files_from_local(snapshot_path, &snapshot_files)); + + // FIXME: the below logic will demage the tablet files if failed in the middle. + + // 1. simply delete the old dir and replace it with the snapshot dir + try { + // This remove seems soft enough, because we already get + // tablet id and schema hash from this path, which + // means this path is a valid path. + std::filesystem::remove_all(tablet_path); + VLOG_CRITICAL << "remove dir: " << tablet_path; + std::filesystem::create_directory(tablet_path); + VLOG_CRITICAL << "re-create dir: " << tablet_path; + } catch (const std::filesystem::filesystem_error& e) { + std::stringstream ss; + ss << "failed to move tablet path: " << tablet_path << ". err: " << e.what(); + LOG(WARNING) << ss.str(); + return Status::InternalError(ss.str()); + } + + // link files one by one + // files in snapshot dir will be moved in snapshot clean process + std::vector linked_files; + for (auto& file : snapshot_files) { + auto full_src_path = fmt::format("{}/{}", snapshot_path, file); + auto full_dest_path = fmt::format("{}/{}", tablet_path, file); + if (link(full_src_path.c_str(), full_dest_path.c_str()) != 0) { + LOG(WARNING) << "failed to link file from " << full_src_path << " to " << full_dest_path + << ", err: " << std::strerror(errno); + + // clean the already linked files + for (auto& linked_file : linked_files) { + remove(linked_file.c_str()); } - linked_files.push_back(full_dest_path); - VLOG_CRITICAL << "link file from " << full_src_path << " to " << full_dest_path; - } - } else { - throw Exception(Status::FatalError("only support overwrite now")); + return Status::InternalError("move tablet failed"); + } + linked_files.push_back(full_dest_path); + VLOG_CRITICAL << "link file from " << full_src_path << " to " << full_dest_path; } // snapshot loader not need to change tablet uid From 0ab4eae228e815b3237e70dab54d67808df634df Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Fri, 20 Dec 2024 22:49:46 +0800 Subject: [PATCH 36/55] [Bug](function) fix is_ip_address_in_range function parse error throw exception (#45657) ### What problem does this PR solve? Problem Summary: before: will be throw exception when parse NULL value, as the input is empty invalid. so need check firstly and then parse it. ``` mysql [test]>select * from ip_test; +------+------------------+ | id | data | +------+------------------+ | 54 | 2001:db8:4::/128 | | 55 | NULL | +------+------------------+ 2 rows in set (0.07 sec) mysql [test]>SELECT data, IS_IP_ADDRESS_IN_RANGE(CAST('0.0.0.1' AS STRING), data) FROM ip_test; ERROR 1105 (HY000): errCode = 2, detailMessage = (10.16.10.8)[INVALID_ARGUMENT][E33] The text does not contain '/': ``` --- be/src/vec/functions/function_ip.h | 23 +++++++++++++------ .../test_is_ip_address_in_range_function.out | 8 +++++++ ...est_is_ip_address_in_range_function.groovy | 9 ++++++++ 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/be/src/vec/functions/function_ip.h b/be/src/vec/functions/function_ip.h index 67edad5015aeaf3..1a1c23e2b06c354 100644 --- a/be/src/vec/functions/function_ip.h +++ b/be/src/vec/functions/function_ip.h @@ -615,8 +615,13 @@ class FunctionIsIPAddressInRange : public IFunction { for (size_t i = 0; i < input_rows_count; ++i) { auto addr_idx = index_check_const(i, addr_const); auto cidr_idx = index_check_const(i, cidr_const); - const auto cidr = - parse_ip_with_cidr(str_cidr_column->get_data_at(cidr_idx).to_string_view()); + auto cidr_data = str_cidr_column->get_data_at(cidr_idx); + // cidr_data maybe NULL, But the input column is nested column, so check here avoid throw exception + if (cidr_data.data == nullptr || cidr_data.size == 0) { + col_res_data[i] = 0; + continue; + } + const auto cidr = parse_ip_with_cidr(cidr_data.to_string_view()); if constexpr (PT == PrimitiveType::TYPE_IPV4) { if (cidr._address.as_v4()) { col_res_data[i] = match_ipv4_subnet(ip_data[addr_idx], cidr._address.as_v4(), @@ -775,11 +780,15 @@ class FunctionIsIPAddressInRange : public IFunction { for (size_t i = 0; i < input_rows_count; ++i) { auto addr_idx = index_check_const(i, addr_const); auto cidr_idx = index_check_const(i, cidr_const); - - const auto addr = - IPAddressVariant(str_addr_column->get_data_at(addr_idx).to_string_view()); - const auto cidr = - parse_ip_with_cidr(str_cidr_column->get_data_at(cidr_idx).to_string_view()); + auto addr_data = str_addr_column->get_data_at(addr_idx); + auto cidr_data = str_cidr_column->get_data_at(cidr_idx); + // cidr_data maybe NULL, But the input column is nested column, so check here avoid throw exception + if (cidr_data.data == nullptr || cidr_data.size == 0) { + col_res_data[i] = 0; + continue; + } + const auto addr = IPAddressVariant(addr_data.to_string_view()); + const auto cidr = parse_ip_with_cidr(cidr_data.to_string_view()); col_res_data[i] = is_address_in_range(addr, cidr) ? 1 : 0; } } diff --git a/regression-test/data/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.out b/regression-test/data/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.out index 285b861b742c5b5..759b6c890ea13e2 100644 --- a/regression-test/data/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.out +++ b/regression-test/data/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.out @@ -92,3 +92,11 @@ -- !sql -- \N +-- !sql1 -- +54 2001:db8:4::/128 +55 \N + +-- !sql2 -- +\N \N +2001:db8:4::/128 false + diff --git a/regression-test/suites/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.groovy b/regression-test/suites/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.groovy index 812bfffeb2f3e0d..cee47c818130a71 100644 --- a/regression-test/suites/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/ip_functions/test_is_ip_address_in_range_function.groovy @@ -78,4 +78,13 @@ suite("test_is_ip_address_in_range_function") { qt_sql "SELECT is_ip_address_in_range(NULL, '::ffff:192.168.0.4/128')" qt_sql "SELECT is_ip_address_in_range(NULL, NULL)" + + + sql """ DROP TABLE IF EXISTS ip_test """ + sql """ CREATE TABLE IF NOT EXISTS ip_test(id INT, data string) DISTRIBUTED BY HASH(id) BUCKETS 1 PROPERTIES ('replication_num' = '1');""" + sql """ INSERT INTO ip_test values (54, '2001:db8:4::/128'); """ + sql """ INSERT INTO ip_test values (55, NULL); """ + qt_sql1 """ select * from ip_test order by 1; """ + qt_sql2 "SELECT data, IS_IP_ADDRESS_IN_RANGE(CAST('0.0.0.1' AS STRING), data) FROM ip_test order by 1;" + } \ No newline at end of file From f6071a3833bb095f01065a9d10bc828f59cac883 Mon Sep 17 00:00:00 2001 From: feiniaofeiafei Date: Sat, 21 Dec 2024 18:00:07 +0800 Subject: [PATCH 37/55] [enhance](nereids) date_add, date_sub, date_diff, date_floor, date_ceil function implement Monotonic (#44943) date_add, date_sub, date_diff, date_floor, date_ceil function implement Monotonic, so we can do prune range partition for this functions, for example `date_add(dt, 1) = '2024-01-01'` --- .../rules/OneRangePartitionEvaluator.java | 15 +- .../functions/DateAddSubMonotonic.java | 38 ++ .../functions/DateCeilFloorMonotonic.java | 47 ++ .../functions/DateDiffMonotonic.java | 39 ++ .../expressions/functions/Monotonic.java | 5 + .../expressions/functions/scalar/DayCeil.java | 17 +- .../functions/scalar/DayFloor.java | 17 +- .../expressions/functions/scalar/DaysAdd.java | 8 +- .../functions/scalar/DaysDiff.java | 13 +- .../expressions/functions/scalar/DaysSub.java | 8 +- .../functions/scalar/FromDays.java | 18 +- .../functions/scalar/HourCeil.java | 17 +- .../functions/scalar/HourFloor.java | 17 +- .../functions/scalar/HoursAdd.java | 9 +- .../functions/scalar/HoursDiff.java | 13 +- .../functions/scalar/HoursSub.java | 9 +- .../functions/scalar/MicroSecondsAdd.java | 9 +- .../functions/scalar/MicroSecondsDiff.java | 13 +- .../functions/scalar/MicroSecondsSub.java | 9 +- .../functions/scalar/MilliSecondsAdd.java | 9 +- .../functions/scalar/MilliSecondsDiff.java | 13 +- .../functions/scalar/MilliSecondsSub.java | 9 +- .../functions/scalar/MinuteCeil.java | 17 +- .../functions/scalar/MinuteFloor.java | 17 +- .../functions/scalar/MinutesAdd.java | 9 +- .../functions/scalar/MinutesDiff.java | 13 +- .../functions/scalar/MinutesSub.java | 9 +- .../functions/scalar/MonthCeil.java | 17 +- .../functions/scalar/MonthFloor.java | 17 +- .../functions/scalar/MonthsAdd.java | 8 +- .../functions/scalar/MonthsDiff.java | 13 +- .../functions/scalar/MonthsSub.java | 8 +- .../functions/scalar/SecondCeil.java | 17 +- .../functions/scalar/SecondFloor.java | 17 +- .../functions/scalar/SecondsAdd.java | 9 +- .../functions/scalar/SecondsDiff.java | 13 +- .../functions/scalar/SecondsSub.java | 9 +- .../functions/scalar/UnixTimestamp.java | 39 +- .../functions/scalar/YearCeil.java | 17 +- .../functions/scalar/YearFloor.java | 17 +- .../functions/scalar/YearsAdd.java | 8 +- .../functions/scalar/YearsDiff.java | 13 +- .../functions/scalar/YearsSub.java | 8 +- .../expressions/literal/DateTimeLiteral.java | 5 +- .../test_add_sub_diff_ceil_floor.groovy | 407 ++++++++++++++++++ .../partition_prune/test_convert_tz.groovy | 6 +- 46 files changed, 1019 insertions(+), 46 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/DateAddSubMonotonic.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/DateCeilFloorMonotonic.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/DateDiffMonotonic.java create mode 100644 regression-test/suites/nereids_rules_p0/partition_prune/test_add_sub_diff_ceil_floor.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OneRangePartitionEvaluator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OneRangePartitionEvaluator.java index 7e91d5502f7fad8..eb9fd6e149160ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OneRangePartitionEvaluator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OneRangePartitionEvaluator.java @@ -48,6 +48,7 @@ import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral; import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.literal.MaxLiteral; +import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BooleanType; import org.apache.doris.nereids.util.ExpressionUtils; @@ -807,22 +808,29 @@ private EvaluateRangeResult computeMonotonicFunctionRange(EvaluateRangeResult re : new NonNullable(funcChild)); partitionSlotContainsNull.put((Expression) func, withNullable.nullable()); - if (!result.childrenResult.get(0).columnRanges.containsKey(funcChild)) { + if (!result.childrenResult.get(childIndex).columnRanges.containsKey(funcChild)) { return result; } - ColumnRange childRange = result.childrenResult.get(0).columnRanges.get(funcChild); + ColumnRange childRange = result.childrenResult.get(childIndex).columnRanges.get(funcChild); if (childRange.isEmptyRange() || childRange.asRanges().size() != 1 || (!childRange.span().hasLowerBound() && !childRange.span().hasUpperBound())) { return result; } Range span = childRange.span(); + // null means positive infinity or negative infinity Literal lower = span.hasLowerBound() ? span.lowerEndpoint().getValue() : null; Literal upper = span.hasUpperBound() && !(span.upperEndpoint().getValue() instanceof MaxLiteral) ? span.upperEndpoint().getValue() : null; + if (!func.isMonotonic(lower, upper)) { + return result; + } Expression lowerValue = lower != null ? FoldConstantRuleOnFE.evaluate(func.withConstantArgs(lower), expressionRewriteContext) : null; Expression upperValue = upper != null ? FoldConstantRuleOnFE.evaluate(func.withConstantArgs(upper), expressionRewriteContext) : null; + if (lowerValue instanceof NullLiteral || upperValue instanceof NullLiteral) { + return result; + } if (!func.isPositive()) { Expression temp = lowerValue; lowerValue = upperValue; @@ -842,6 +850,9 @@ private EvaluateRangeResult computeMonotonicFunctionRange(EvaluateRangeResult re if (upperValue instanceof Literal) { newRange = newRange.withUpperBound((Literal) upperValue); } + if (newRange.isEmptyRange() || !newRange.span().hasLowerBound() && !newRange.span().hasUpperBound()) { + return result; + } context.rangeMap.put((Expression) func, newRange); newRanges.put((Expression) func, newRange); return new EvaluateRangeResult((Expression) func, newRanges, result.childrenResult); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/DateAddSubMonotonic.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/DateAddSubMonotonic.java new file mode 100644 index 000000000000000..7fec22fd9d317d9 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/DateAddSubMonotonic.java @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions; + +import org.apache.doris.nereids.trees.expressions.literal.Literal; + +/** monotonicity for XX_ADD XX_SUB */ +public interface DateAddSubMonotonic extends Monotonic { + @Override + default boolean isMonotonic(Literal lower, Literal upper) { + return child(1) instanceof Literal; + } + + @Override + default boolean isPositive() { + return true; + } + + @Override + default int getMonotonicFunctionChildIndex() { + return 0; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/DateCeilFloorMonotonic.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/DateCeilFloorMonotonic.java new file mode 100644 index 000000000000000..71ad80a347176ae --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/DateCeilFloorMonotonic.java @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions; + +import org.apache.doris.nereids.trees.expressions.literal.Literal; + +/** monotonicity of XX_CEIL and XX_FLOOR */ +public interface DateCeilFloorMonotonic extends Monotonic { + @Override + default boolean isMonotonic(Literal lower, Literal upper) { + switch (arity()) { + case 1: + return true; + case 2: + return !(child(0) instanceof Literal) && child(1) instanceof Literal; + case 3: + return !(child(0) instanceof Literal) && child(1) instanceof Literal && child(2) instanceof Literal; + default: + return false; + } + } + + @Override + default boolean isPositive() { + return true; + } + + @Override + default int getMonotonicFunctionChildIndex() { + return 0; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/DateDiffMonotonic.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/DateDiffMonotonic.java new file mode 100644 index 000000000000000..daaea895b6d15b8 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/DateDiffMonotonic.java @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions; + +import org.apache.doris.nereids.trees.expressions.literal.Literal; + +/** monotonicity for XX_DIFF */ +public interface DateDiffMonotonic extends Monotonic { + @Override + default boolean isMonotonic(Literal lower, Literal upper) { + return !(child(0) instanceof Literal) && child(1) instanceof Literal + || child(0) instanceof Literal && !(child(1) instanceof Literal); + } + + @Override + default boolean isPositive() { + return child(1) instanceof Literal; + } + + @Override + default int getMonotonicFunctionChildIndex() { + return child(1) instanceof Literal ? 0 : 1; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/Monotonic.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/Monotonic.java index bcaa040cb2a6505..feec5933890e670 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/Monotonic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/Monotonic.java @@ -18,9 +18,14 @@ package org.apache.doris.nereids.trees.expressions.functions; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.literal.Literal; /** monotonicity of expressions */ public interface Monotonic extends ExpressionTrait { + default boolean isMonotonic(Literal lower, Literal upper) { + return true; + } + // true means that the function is an increasing function boolean isPositive(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DayCeil.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DayCeil.java index e77c307b523869b..740363b50aad2b8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DayCeil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DayCeil.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.DateCeilFloorMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; @@ -37,7 +38,7 @@ * ScalarFunction 'day_ceil'. This class is generated by GenerateFunction. */ public class DayCeil extends ScalarFunction - implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral { + implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral, DateCeilFloorMonotonic { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).args(DateTimeV2Type.SYSTEM_DEFAULT), @@ -106,4 +107,18 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitDayCeil(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + switch (arity()) { + case 1: + return new DayCeil(literal); + case 2: + return new DayCeil(literal, child(1)); + case 3: + return new DayCeil(literal, child(1), child(2)); + default: + throw new IllegalStateException("The function " + getName() + " has invalid child number."); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DayFloor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DayFloor.java index b7e04e3a374629d..5ba7fc13c7526b8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DayFloor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DayFloor.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.DateCeilFloorMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; @@ -37,7 +38,7 @@ * ScalarFunction 'day_floor'. This class is generated by GenerateFunction. */ public class DayFloor extends ScalarFunction - implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral { + implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral, DateCeilFloorMonotonic { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).args(DateTimeV2Type.SYSTEM_DEFAULT), @@ -106,4 +107,18 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitDayFloor(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + switch (arity()) { + case 1: + return new DayFloor(literal); + case 2: + return new DayFloor(literal, child(1)); + case 3: + return new DayFloor(literal, child(1), child(2)); + default: + throw new IllegalStateException("The function " + getName() + " has invalid child number."); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DaysAdd.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DaysAdd.java index e02c20eee82a04c..a231816a330eff5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DaysAdd.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DaysAdd.java @@ -21,6 +21,7 @@ import org.apache.doris.common.Config; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.ComputeSignatureForDateArithmetic; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -41,7 +42,7 @@ */ public class DaysAdd extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, - ComputeSignatureForDateArithmetic, PropagateNullableOnDateLikeV2Args { + ComputeSignatureForDateArithmetic, PropagateNullableOnDateLikeV2Args, DateAddSubMonotonic { // When enable_date_conversion is true, we prefer to V2 signature. // This preference follows original planner. refer to ScalarType.getDefaultDateType() private static final List SIGNATURES = Config.enable_date_conversion ? ImmutableList.of( @@ -77,4 +78,9 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitDaysAdd(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new DaysAdd(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DaysDiff.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DaysDiff.java index e0343f1148f162e..c6f3377fdc99c57 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DaysDiff.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DaysDiff.java @@ -19,8 +19,10 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateDiffMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; +import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; @@ -37,7 +39,7 @@ * ScalarFunction 'days_diff'. This class is generated by GenerateFunction. */ public class DaysDiff extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, DateDiffMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(BigIntType.INSTANCE) @@ -73,4 +75,13 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitDaysDiff(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + if (child(1) instanceof Literal) { + return new DaysDiff(literal, child(1)); + } else { + return new DaysDiff(child(0), literal); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DaysSub.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DaysSub.java index 8d135dc6c9cf725..5dab58ecdf63c58 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DaysSub.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DaysSub.java @@ -21,6 +21,7 @@ import org.apache.doris.common.Config; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.ComputeSignatureForDateArithmetic; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -41,7 +42,7 @@ */ public class DaysSub extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, - ComputeSignatureForDateArithmetic, PropagateNullableOnDateLikeV2Args { + ComputeSignatureForDateArithmetic, PropagateNullableOnDateLikeV2Args, DateAddSubMonotonic { // When enable_date_conversion is true, we prefer to V2 signature. // This preference follows original planner. refer to ScalarType.getDefaultDateType() private static final List SIGNATURES = Config.enable_date_conversion ? ImmutableList.of( @@ -77,4 +78,9 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitDaysSub(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new DaysSub(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/FromDays.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/FromDays.java index c3d19588ce5f2fd..1799c1461fc1b42 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/FromDays.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/FromDays.java @@ -21,6 +21,7 @@ import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.Monotonic; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral; import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; @@ -36,7 +37,7 @@ * ScalarFunction 'from_days'. This class is generated by GenerateFunction. */ public class FromDays extends ScalarFunction - implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral { + implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral, Monotonic { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateV2Type.INSTANCE).args(IntegerType.INSTANCE) @@ -67,4 +68,19 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitFromDays(this, context); } + + @Override + public boolean isPositive() { + return true; + } + + @Override + public int getMonotonicFunctionChildIndex() { + return 0; + } + + @Override + public Expression withConstantArgs(Expression literal) { + return new FromDays(literal); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HourCeil.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HourCeil.java index e76151ef9d682c8..13358e18d747705 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HourCeil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HourCeil.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.DateCeilFloorMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; @@ -36,7 +37,7 @@ * ScalarFunction 'hour_ceil'. This class is generated by GenerateFunction. */ public class HourCeil extends ScalarFunction - implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral { + implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral, DateCeilFloorMonotonic { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).args(DateTimeV2Type.SYSTEM_DEFAULT), @@ -100,4 +101,18 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitHourCeil(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + switch (arity()) { + case 1: + return new HourCeil(literal); + case 2: + return new HourCeil(literal, child(1)); + case 3: + return new HourCeil(literal, child(1), child(2)); + default: + throw new IllegalStateException("The function " + getName() + " has invalid child number."); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HourFloor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HourFloor.java index 567f0a2dd188daf..f48e26ea443f251 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HourFloor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HourFloor.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.DateCeilFloorMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; @@ -36,7 +37,7 @@ * ScalarFunction 'hour_floor'. This class is generated by GenerateFunction. */ public class HourFloor extends ScalarFunction - implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral { + implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral, DateCeilFloorMonotonic { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).args(DateTimeV2Type.SYSTEM_DEFAULT), @@ -100,4 +101,18 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitHourFloor(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + switch (arity()) { + case 1: + return new HourFloor(literal); + case 2: + return new HourFloor(literal, child(1)); + case 3: + return new HourFloor(literal, child(1), child(2)); + default: + throw new IllegalStateException("The function " + getName() + " has invalid child number."); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HoursAdd.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HoursAdd.java index cb4e601b14d0015..4c10b204597975c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HoursAdd.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HoursAdd.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -38,7 +39,8 @@ * ScalarFunction 'days_add'. */ public class HoursAdd extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, + DateAddSubMonotonic { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT) @@ -67,4 +69,9 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitHoursAdd(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new HoursAdd(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HoursDiff.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HoursDiff.java index 452e3110ff764dd..63942cd56e9e5be 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HoursDiff.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HoursDiff.java @@ -19,8 +19,10 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateDiffMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; +import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; @@ -37,7 +39,7 @@ * ScalarFunction 'hours_diff'. This class is generated by GenerateFunction. */ public class HoursDiff extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, DateDiffMonotonic { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(BigIntType.INSTANCE) @@ -73,4 +75,13 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitHoursDiff(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + if (child(1) instanceof Literal) { + return new HoursDiff(literal, child(1)); + } else { + return new HoursDiff(child(0), literal); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HoursSub.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HoursSub.java index e53f8ecb90528a8..49e8e5cb50f69ac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HoursSub.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/HoursSub.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -38,7 +39,8 @@ * ScalarFunction 'hours_sub'. */ public class HoursSub extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, + DateAddSubMonotonic { public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT) @@ -67,4 +69,9 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitHoursSub(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new HoursSub(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsAdd.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsAdd.java index 8d792259440dd2f..33dd5809f2b1a82 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsAdd.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsAdd.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -35,7 +36,8 @@ * ScalarFunction 'MicroSeconds_add'. */ public class MicroSecondsAdd extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, + DateAddSubMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.MAX) @@ -66,4 +68,9 @@ public FunctionSignature computeSignature(FunctionSignature signature) { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMicroSecondsAdd(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new MicroSecondsAdd(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsDiff.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsDiff.java index 8bf3a9648396d73..160e8a96b13e683 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsDiff.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsDiff.java @@ -19,8 +19,10 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateDiffMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; +import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; @@ -35,7 +37,7 @@ * ScalarFunction 'microseconds_diff'. This class is generated by GenerateFunction. */ public class MicroSecondsDiff extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, DateDiffMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(BigIntType.INSTANCE) @@ -67,4 +69,13 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMicroSecondsDiff(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + if (child(1) instanceof Literal) { + return new MicroSecondsDiff(literal, child(1)); + } else { + return new MicroSecondsDiff(child(0), literal); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsSub.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsSub.java index 2894d1fffc902f3..20c880fb879298e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsSub.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsSub.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -35,7 +36,8 @@ * ScalarFunction 'MicroSeconds_sub'. */ public class MicroSecondsSub extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, + DateAddSubMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.MAX) @@ -66,4 +68,9 @@ public FunctionSignature computeSignature(FunctionSignature signature) { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMicroSecondsSub(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new MicroSecondsSub(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsAdd.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsAdd.java index 1cb56b13f84ed53..244b661db3afede 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsAdd.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsAdd.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -35,7 +36,8 @@ * ScalarFunction 'MilliSeconds_add'. */ public class MilliSecondsAdd extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, + DateAddSubMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.MAX) @@ -66,4 +68,9 @@ public FunctionSignature computeSignature(FunctionSignature signature) { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMilliSecondsAdd(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new MilliSecondsAdd(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsDiff.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsDiff.java index 4500bd69460d989..0e8c623ce176b2e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsDiff.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsDiff.java @@ -19,8 +19,10 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateDiffMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; +import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; @@ -35,7 +37,7 @@ * ScalarFunction 'milliseconds_diff'. This class is generated by GenerateFunction. */ public class MilliSecondsDiff extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, DateDiffMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(BigIntType.INSTANCE) @@ -67,4 +69,13 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMilliSecondsDiff(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + if (child(1) instanceof Literal) { + return new MilliSecondsDiff(literal, child(1)); + } else { + return new MilliSecondsDiff(child(0), literal); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsSub.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsSub.java index 42891b7e7e0b223..10b4f8184d12d8f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsSub.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsSub.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -35,7 +36,8 @@ * ScalarFunction 'MilliSeconds_sub'. */ public class MilliSecondsSub extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, + DateAddSubMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.MAX) @@ -66,4 +68,9 @@ public FunctionSignature computeSignature(FunctionSignature signature) { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMilliSecondsSub(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new MilliSecondsSub(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinuteCeil.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinuteCeil.java index b00eaff07c2c626..4f3e317d00ef39c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinuteCeil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinuteCeil.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.DateCeilFloorMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DateTimeType; @@ -35,7 +36,7 @@ * ScalarFunction 'minute_ceil'. This class is generated by GenerateFunction. */ public class MinuteCeil extends ScalarFunction - implements ExplicitlyCastableSignature, AlwaysNullable { + implements ExplicitlyCastableSignature, AlwaysNullable, DateCeilFloorMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).args(DateTimeV2Type.SYSTEM_DEFAULT), @@ -99,4 +100,18 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMinuteCeil(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + switch (arity()) { + case 1: + return new MinuteCeil(literal); + case 2: + return new MinuteCeil(literal, child(1)); + case 3: + return new MinuteCeil(literal, child(1), child(2)); + default: + throw new IllegalStateException("The function " + getName() + " has invalid child number."); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinuteFloor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinuteFloor.java index 683acc3a48381cd..cefb5222c764ad5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinuteFloor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinuteFloor.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.DateCeilFloorMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DateTimeType; @@ -35,7 +36,7 @@ * ScalarFunction 'minute_floor'. This class is generated by GenerateFunction. */ public class MinuteFloor extends ScalarFunction - implements ExplicitlyCastableSignature, AlwaysNullable { + implements ExplicitlyCastableSignature, AlwaysNullable, DateCeilFloorMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).args(DateTimeV2Type.SYSTEM_DEFAULT), @@ -99,4 +100,18 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMinuteFloor(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + switch (arity()) { + case 1: + return new MinuteFloor(literal); + case 2: + return new MinuteFloor(literal, child(1)); + case 3: + return new MinuteFloor(literal, child(1), child(2)); + default: + throw new IllegalStateException("The function " + getName() + " has invalid child number."); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesAdd.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesAdd.java index f4c02fb84ca3d63..8ba1642f6248b6d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesAdd.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesAdd.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -38,7 +39,8 @@ * ScalarFunction 'minutes_add'. */ public class MinutesAdd extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, + DateAddSubMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT) @@ -67,4 +69,9 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMinutesAdd(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new MinutesAdd(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesDiff.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesDiff.java index 4d011116334bf47..91c254be7c14dc6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesDiff.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesDiff.java @@ -19,8 +19,10 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateDiffMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; +import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; @@ -37,7 +39,7 @@ * ScalarFunction 'minutes_diff'. This class is generated by GenerateFunction. */ public class MinutesDiff extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, DateDiffMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(BigIntType.INSTANCE) @@ -73,4 +75,13 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMinutesDiff(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + if (child(1) instanceof Literal) { + return new MinutesDiff(literal, child(1)); + } else { + return new MinutesDiff(child(0), literal); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesSub.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesSub.java index 4fb616957813a7a..2a29d9e1659963b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesSub.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesSub.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -38,7 +39,8 @@ * ScalarFunction 'minutes_sub'. */ public class MinutesSub extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, + DateAddSubMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT) @@ -67,4 +69,9 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMinutesSub(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new MinutesSub(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthCeil.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthCeil.java index 627568cf28a145f..a1f4628dd6a9abc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthCeil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthCeil.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.DateCeilFloorMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; @@ -37,7 +38,7 @@ * ScalarFunction 'month_ceil'. This class is generated by GenerateFunction. */ public class MonthCeil extends ScalarFunction - implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral { + implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral, DateCeilFloorMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).args(DateTimeV2Type.SYSTEM_DEFAULT), @@ -106,4 +107,18 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMonthCeil(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + switch (arity()) { + case 1: + return new MonthCeil(literal); + case 2: + return new MonthCeil(literal, child(1)); + case 3: + return new MonthCeil(literal, child(1), child(2)); + default: + throw new IllegalStateException("The function " + getName() + " has invalid child number."); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthFloor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthFloor.java index f3b0b66c1396d1a..d55d52ab68ce5a9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthFloor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthFloor.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.DateCeilFloorMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; @@ -37,7 +38,7 @@ * ScalarFunction 'month_floor'. This class is generated by GenerateFunction. */ public class MonthFloor extends ScalarFunction - implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral { + implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral, DateCeilFloorMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).args(DateTimeV2Type.SYSTEM_DEFAULT), @@ -106,4 +107,18 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMonthFloor(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + switch (arity()) { + case 1: + return new MonthFloor(literal); + case 2: + return new MonthFloor(literal, child(1)); + case 3: + return new MonthFloor(literal, child(1), child(2)); + default: + throw new IllegalStateException("The function " + getName() + " has invalid child number."); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthsAdd.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthsAdd.java index 5126400b71efaa0..1cca6d8446fef6a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthsAdd.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthsAdd.java @@ -21,6 +21,7 @@ import org.apache.doris.common.Config; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.ComputeSignatureForDateArithmetic; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -41,7 +42,7 @@ */ public class MonthsAdd extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, - ComputeSignatureForDateArithmetic, PropagateNullableOnDateLikeV2Args { + ComputeSignatureForDateArithmetic, PropagateNullableOnDateLikeV2Args, DateAddSubMonotonic { // When enable_date_conversion is true, we prefer to V2 signature. // This preference follows original planner. refer to ScalarType.getDefaultDateType() @@ -78,4 +79,9 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMonthsAdd(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new MonthsAdd(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthsDiff.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthsDiff.java index 373265b1e3822a6..a850767a49fa408 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthsDiff.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthsDiff.java @@ -19,8 +19,10 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateDiffMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; +import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; @@ -37,7 +39,7 @@ * ScalarFunction 'months_diff'. This class is generated by GenerateFunction. */ public class MonthsDiff extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, DateDiffMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(BigIntType.INSTANCE).args(DateV2Type.INSTANCE, DateV2Type.INSTANCE), @@ -73,4 +75,13 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMonthsDiff(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + if (child(1) instanceof Literal) { + return new MonthsDiff(literal, child(1)); + } else { + return new MonthsDiff(child(0), literal); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthsSub.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthsSub.java index 1c2985a6e136e2b..9c5824a1b9eebb9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthsSub.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MonthsSub.java @@ -21,6 +21,7 @@ import org.apache.doris.common.Config; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.ComputeSignatureForDateArithmetic; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -41,7 +42,7 @@ */ public class MonthsSub extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, - ComputeSignatureForDateArithmetic, PropagateNullableOnDateLikeV2Args { + ComputeSignatureForDateArithmetic, PropagateNullableOnDateLikeV2Args, DateAddSubMonotonic { // When enable_date_conversion is true, we prefer to V2 signature. // This preference follows original planner. refer to ScalarType.getDefaultDateType() @@ -78,4 +79,9 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMonthsSub(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new MonthsSub(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondCeil.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondCeil.java index 04cd08f4c8ce7bc..3b0e657698743aa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondCeil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondCeil.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.DateCeilFloorMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; @@ -36,7 +37,7 @@ * ScalarFunction 'second_ceil'. This class is generated by GenerateFunction. */ public class SecondCeil extends ScalarFunction - implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral { + implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral, DateCeilFloorMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).args(DateTimeV2Type.SYSTEM_DEFAULT), @@ -100,4 +101,18 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitSecondCeil(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + switch (arity()) { + case 1: + return new SecondCeil(literal); + case 2: + return new SecondCeil(literal, child(1)); + case 3: + return new SecondCeil(literal, child(1), child(2)); + default: + throw new IllegalStateException("The function " + getName() + " has invalid child number."); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondFloor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondFloor.java index ae0af1106073aca..c06fff06aed0162 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondFloor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondFloor.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.DateCeilFloorMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; @@ -36,7 +37,7 @@ * ScalarFunction 'second_floor'. This class is generated by GenerateFunction. */ public class SecondFloor extends ScalarFunction - implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral { + implements ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral, DateCeilFloorMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).args(DateTimeV2Type.SYSTEM_DEFAULT), @@ -101,4 +102,18 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitSecondFloor(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + switch (arity()) { + case 1: + return new SecondFloor(literal); + case 2: + return new SecondFloor(literal, child(1)); + case 3: + return new SecondFloor(literal, child(1), child(2)); + default: + throw new IllegalStateException("The function " + getName() + " has invalid child number."); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondsAdd.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondsAdd.java index a6e131f52635375..3afa8f134193ec0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondsAdd.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondsAdd.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -38,7 +39,8 @@ * ScalarFunction 'minutes_add'. */ public class SecondsAdd extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, + DateAddSubMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT) @@ -67,4 +69,9 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitSecondsAdd(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new SecondsAdd(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondsDiff.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondsDiff.java index 4dd7e12b9e2f32f..c81999d4fa2c980 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondsDiff.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondsDiff.java @@ -19,8 +19,10 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateDiffMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; +import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; @@ -37,7 +39,7 @@ * ScalarFunction 'seconds_diff'. This class is generated by GenerateFunction. */ public class SecondsDiff extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, DateDiffMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(BigIntType.INSTANCE) @@ -73,4 +75,13 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitSecondsDiff(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + if (child(1) instanceof Literal) { + return new SecondsDiff(literal, child(1)); + } else { + return new SecondsDiff(child(0), literal); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondsSub.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondsSub.java index 37c59b2168bda24..d3093f84e1a2dc0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondsSub.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SecondsSub.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -38,7 +39,8 @@ * ScalarFunction 'Seconds_sub'. */ public class SecondsSub extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, + DateAddSubMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT) @@ -67,4 +69,9 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitSecondsSub(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new SecondsSub(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UnixTimestamp.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UnixTimestamp.java index 633e1e7d4f3bda9..178187ad9cbc654 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UnixTimestamp.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UnixTimestamp.java @@ -20,6 +20,9 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.Monotonic; +import org.apache.doris.nereids.trees.expressions.literal.DateTimeLiteral; +import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.types.DateTimeType; @@ -39,7 +42,8 @@ /** * ScalarFunction 'unix_timestamp'. This class is generated by GenerateFunction. */ -public class UnixTimestamp extends ScalarFunction implements ExplicitlyCastableSignature { +public class UnixTimestamp extends ScalarFunction implements ExplicitlyCastableSignature, Monotonic { + private static final DateTimeLiteral MAX = new DateTimeLiteral("2038-01-19 03:14:07"); // we got changes when computeSignature private static final List SIGNATURES = ImmutableList.of( @@ -145,4 +149,37 @@ public R accept(ExpressionVisitor visitor, C context) { public boolean isDeterministic() { return !this.children.isEmpty(); } + + @Override + public boolean isPositive() { + return true; + } + + @Override + public int getMonotonicFunctionChildIndex() { + return 0; + } + + @Override + public Expression withConstantArgs(Expression literal) { + return new UnixTimestamp(literal); + } + + @Override + public boolean isMonotonic(Literal lower, Literal upper) { + if (arity() != 1) { + return false; + } + if (null == lower) { + lower = DateTimeLiteral.MIN_DATETIME; + } + if (null == upper) { + upper = DateTimeLiteral.MAX_DATETIME; + } + if (lower.compareTo(MAX) <= 0 && upper.compareTo(MAX) > 0) { + return false; + } else { + return true; + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearCeil.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearCeil.java index bc294638be99cda..37c952af815d861 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearCeil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearCeil.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.DateCeilFloorMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DateTimeType; @@ -36,7 +37,7 @@ * ScalarFunction 'year_ceil'. This class is generated by GenerateFunction. */ public class YearCeil extends ScalarFunction - implements ExplicitlyCastableSignature, AlwaysNullable { + implements ExplicitlyCastableSignature, AlwaysNullable, DateCeilFloorMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).args(DateTimeV2Type.SYSTEM_DEFAULT), @@ -105,4 +106,18 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitYearCeil(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + switch (arity()) { + case 1: + return new YearCeil(literal); + case 2: + return new YearCeil(literal, child(1)); + case 3: + return new YearCeil(literal, child(1), child(2)); + default: + throw new IllegalStateException("The function " + getName() + " has invalid child number."); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearFloor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearFloor.java index 5415502a7695798..00a1ad918f7ffa7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearFloor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearFloor.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.DateCeilFloorMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DateTimeType; @@ -36,7 +37,7 @@ * ScalarFunction 'year_floor'. This class is generated by GenerateFunction. */ public class YearFloor extends ScalarFunction - implements ExplicitlyCastableSignature, AlwaysNullable { + implements ExplicitlyCastableSignature, AlwaysNullable, DateCeilFloorMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).args(DateTimeV2Type.SYSTEM_DEFAULT), @@ -105,4 +106,18 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitYearFloor(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + switch (arity()) { + case 1: + return new YearFloor(literal); + case 2: + return new YearFloor(literal, child(1)); + case 3: + return new YearFloor(literal, child(1), child(2)); + default: + throw new IllegalStateException("The function " + getName() + " has invalid child number."); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearsAdd.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearsAdd.java index 33c9e1c6dfa5e85..9b81378d9871bce 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearsAdd.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearsAdd.java @@ -21,6 +21,7 @@ import org.apache.doris.common.Config; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.ComputeSignatureForDateArithmetic; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -41,7 +42,7 @@ */ public class YearsAdd extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, - ComputeSignatureForDateArithmetic, PropagateNullableOnDateLikeV2Args { + ComputeSignatureForDateArithmetic, PropagateNullableOnDateLikeV2Args, DateAddSubMonotonic { // When enable_date_conversion is true, we prefer to V2 signature. // This preference follows original planner. refer to ScalarType.getDefaultDateType() @@ -78,4 +79,9 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitYearsAdd(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new YearsAdd(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearsDiff.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearsDiff.java index e217d8da72902a8..61b637449f17972 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearsDiff.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearsDiff.java @@ -19,8 +19,10 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.DateDiffMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; +import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; @@ -37,7 +39,7 @@ * ScalarFunction 'years_diff'. This class is generated by GenerateFunction. */ public class YearsDiff extends ScalarFunction - implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { + implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args, DateDiffMonotonic { private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(BigIntType.INSTANCE).args(DateV2Type.INSTANCE, DateV2Type.INSTANCE), @@ -73,4 +75,13 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitYearsDiff(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + if (child(1) instanceof Literal) { + return new YearsDiff(literal, child(1)); + } else { + return new YearsDiff(child(0), literal); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearsSub.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearsSub.java index b70444178df508f..6f46727d937a28e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearsSub.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/YearsSub.java @@ -21,6 +21,7 @@ import org.apache.doris.common.Config; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.ComputeSignatureForDateArithmetic; +import org.apache.doris.nereids.trees.expressions.functions.DateAddSubMonotonic; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.functions.PropagateNullableOnDateLikeV2Args; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; @@ -41,7 +42,7 @@ */ public class YearsSub extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, - ComputeSignatureForDateArithmetic, PropagateNullableOnDateLikeV2Args { + ComputeSignatureForDateArithmetic, PropagateNullableOnDateLikeV2Args, DateAddSubMonotonic { // When enable_date_conversion is true, we prefer to V2 signature. // This preference follows original planner. refer to ScalarType.getDefaultDateType() @@ -78,4 +79,9 @@ public List getSignatures() { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitYearsSub(this, context); } + + @Override + public Expression withConstantArgs(Expression literal) { + return new YearsSub(literal, child(1)); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeLiteral.java index 27470187eae0d22..0a5c02409c113a0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeLiteral.java @@ -43,11 +43,10 @@ * date time literal. */ public class DateTimeLiteral extends DateLiteral { + public static final DateTimeLiteral MIN_DATETIME = new DateTimeLiteral(0000, 1, 1, 0, 0, 0); + public static final DateTimeLiteral MAX_DATETIME = new DateTimeLiteral(9999, 12, 31, 23, 59, 59); protected static final int MAX_MICROSECOND = 999999; - private static final DateTimeLiteral MIN_DATETIME = new DateTimeLiteral(0000, 1, 1, 0, 0, 0); - private static final DateTimeLiteral MAX_DATETIME = new DateTimeLiteral(9999, 12, 31, 23, 59, 59); - private static final Logger LOG = LogManager.getLogger(DateTimeLiteral.class); protected long hour; diff --git a/regression-test/suites/nereids_rules_p0/partition_prune/test_add_sub_diff_ceil_floor.groovy b/regression-test/suites/nereids_rules_p0/partition_prune/test_add_sub_diff_ceil_floor.groovy new file mode 100644 index 000000000000000..bda9dc81af7cfef --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/partition_prune/test_add_sub_diff_ceil_floor.groovy @@ -0,0 +1,407 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_add_sub_diff_ceil_floor") { + sql "set disable_nereids_rules='REWRITE_FILTER_EXPRESSION'" + sql "drop table if exists test_add_sub_diff_ceil_floor_t" + sql """create table test_add_sub_diff_ceil_floor_t (a int, dt datetime, d date, c varchar(100)) duplicate key(a) + partition by range(dt) ( + partition p1 values less than ("2017-01-01"), + partition p2 values less than ("2018-01-01"), + partition p3 values less than ("2019-01-01"), + partition p4 values less than ("2020-01-01"), + partition p5 values less than ("2021-01-01") + ) distributed by hash(a) properties("replication_num"="1");""" + sql """INSERT INTO test_add_sub_diff_ceil_floor_t SELECT number, + date_add('2016-01-01 00:00:00', interval number month), + cast(date_add('2022-01-01 00:00:00', interval number month) as date), cast(number as varchar(65533)) FROM numbers('number'='55');""" + sql "INSERT INTO test_add_sub_diff_ceil_floor_t values(3,null,null,null);" + + // xx_add + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where years_add(dt,1) >'2019-01-01' """ + contains("partitions=3/5 (p3,p4,p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where months_add(dt,2) >'2019-01-01' """ + contains("partitions=3/5 (p3,p4,p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where days_add(dt,10) >'2019-01-01' """ + contains("partitions=3/5 (p3,p4,p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where hours_add(dt,1) >'2019-01-01' """ + contains("partitions=3/5 (p3,p4,p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where minutes_add(dt,2) >'2019-01-01' """ + contains("partitions=3/5 (p3,p4,p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where seconds_add(dt,10) >'2019-01-01' """ + contains("partitions=3/5 (p3,p4,p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where milliseconds_add(dt,2) >'2019-01-01' """ + contains("partitions=3/5 (p3,p4,p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where microseconds_add(dt,10) >'2019-01-01' """ + contains("partitions=3/5 (p3,p4,p5)") + } + // xx_sub + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where years_sub(dt,1) <='2018-01-01' """ + contains("4/5 (p1,p2,p3,p4)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where months_sub(dt,2) <='2018-01-01' """ + contains("partitions=3/5 (p1,p2,p3)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where days_sub(dt,10) <='2018-01-01' """ + contains("partitions=3/5 (p1,p2,p3)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where hours_sub(dt,1) <='2018-01-01' """ + contains("partitions=3/5 (p1,p2,p3)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where minutes_sub(dt,2) <= '2018-01-01' """ + contains("partitions=3/5 (p1,p2,p3)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where seconds_sub(dt,10) <= '2018-01-01' """ + contains("partitions=3/5 (p1,p2,p3)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where milliseconds_sub(dt,2) <= '2018-01-01' """ + contains("partitions=3/5 (p1,p2,p3)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where microseconds_sub(dt,10) <= '2018-01-01' """ + contains("partitions=3/5 (p1,p2,p3)") + } + + // xx_diff + // first arg is dt. positive + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where years_diff(dt,'2017-01-01') <2 """ + contains("partitions=3/5 (p1,p2,p3)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where months_diff(dt,'2017-01-01') <2 """ + contains("partitions=2/5 (p1,p2)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where days_diff(dt,'2017-01-01') <2 """ + contains("partitions=2/5 (p1,p2)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where hours_diff(dt,'2017-01-01') <2 """ + contains("partitions=2/5 (p1,p2)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where minutes_diff(dt,'2017-01-01') <2 """ + contains("partitions=2/5 (p1,p2)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where seconds_diff(dt,'2017-01-01') <2 """ + contains("partitions=2/5 (p1,p2)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where milliseconds_diff(dt,'2017-01-01') <2 """ + contains("partitions=2/5 (p1,p2)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where microseconds_diff(dt,'2017-01-01') <2 """ + contains("partitions=2/5 (p1,p2)") + } + // second arg is dt. not positive + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where years_diff('2021-01-01',dt) <2 """ + contains("partitions=2/5 (p4,p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where months_diff('2021-01-01',dt) <2 """ + contains("partitions=1/5 (p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where days_diff('2021-01-01',dt) <2 """ + contains("partitions=1/5 (p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where hours_diff('2021-01-01',dt) <2 """ + contains("partitions=1/5 (p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where minutes_diff('2021-01-01',dt) <2 """ + contains("partitions=1/5 (p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where seconds_diff('2021-01-01',dt) <2 """ + contains("partitions=1/5 (p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where milliseconds_diff('2021-01-01',dt) <2 """ + contains("partitions=1/5 (p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where microseconds_diff('2021-01-01',dt) <2 """ + contains("partitions=1/5 (p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where years_diff('2021-01-01',dt) <=2 """ + contains("partitions=3/5 (p3,p4,p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where months_diff('2020-01-01',dt) >2 """ + contains("partitions=4/5 (p1,p2,p3,p4)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where days_diff('2020-01-01',dt) >=2 """ + contains("partitions=4/5 (p1,p2,p3,p4)") + } + + // xx_ceil + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where year_ceil(dt) <'2019-01-01' """ + contains("partitions=3/5 (p1,p2,p3)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where month_ceil(dt) <'2019-02-01' """ + contains("partitions=4/5 (p1,p2,p3,p4)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where day_ceil(dt) <'2019-01-01' """ + contains("partitions=3/5 (p1,p2,p3)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where hour_ceil(dt) <'2019-01-01' """ + contains("partitions=3/5 (p1,p2,p3)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where minute_ceil(dt) <'2019-01-01' """ + contains("partitions=3/5 (p1,p2,p3)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where second_ceil(dt) <'2019-01-01' """ + contains("partitions=3/5 (p1,p2,p3)") + } + // xx_ceil with other args + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where year_ceil(dt,5) <'2019-01-01' """ + contains("partitions=1/5 (p1)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where year_ceil(dt,'2013-01-01') <'2019-01-01' """ + contains("partitions=3/5 (p1,p2,p3)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where year_ceil(dt,5,'2013-01-01') <'2019-01-01'""" + contains(" partitions=3/5 (p1,p2,p3)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where hour_ceil(dt,c) <'2019-01-01' """ + contains("partitions=5/5 (p1,p2,p3,p4,p5)") + } + + // xx_floor + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where year_floor(dt) <='2019-01-01' """ + contains("partitions=4/5 (p1,p2,p3,p4)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where month_floor(dt) <='2019-02-01' """ + contains("partitions=4/5 (p1,p2,p3,p4)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where day_floor(dt) <='2019-01-01' """ + contains("partitions=4/5 (p1,p2,p3,p4)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where hour_floor(dt) <='2019-01-01' """ + contains("partitions=4/5 (p1,p2,p3,p4)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where minute_floor(dt) <='2019-01-01' """ + contains("partitions=4/5 (p1,p2,p3,p4)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where second_floor(dt) <'2019-01-01' """ + contains("partitions=3/5 (p1,p2,p3)") + } + + // xx_floor with other args + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where month_floor(dt,'2015-01-01') <='2019-02-01' """ + contains("partitions=4/5 (p1,p2,p3,p4)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where month_floor(dt,5,'2015-01-01') <='2019-02-01' """ + contains("partitions=4/5 (p1,p2,p3,p4)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where month_floor(dt,5) <='2019-02-01' """ + contains("partitions=4/5 (p1,p2,p3,p4)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where hour_floor(dt,c,'2015-01-01') <='2019-01-01' """ + contains("partitions=5/5 (p1,p2,p3,p4,p5)") + } + + // diff nest function + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where years_diff('2021-01-01',month_ceil(hours_add(dt, 1))) <=2 """ + contains("partitions=4/5 (p2,p3,p4,p5)") + } + explain { + sql "select * from test_add_sub_diff_ceil_floor_t where years_diff('2021-01-01',month_ceil(hours_sub(dt, 1))) <=2" + contains("partitions=4/5 (p1,p3,p4,p5)") + } + // mixed with non-function predicates + explain { + sql "select * from test_add_sub_diff_ceil_floor_t where years_diff('2021-01-01',month_ceil(hours_sub(dt, 1))) <=2 and dt>'2019-06-01'" + contains("partitions=2/5 (p4,p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where years_diff('2021-01-01',month_ceil(hours_sub(dt, 1))) <=2 and date_trunc(dt,'day')>'2019-06-01' """ + contains("partitions=2/5 (p4,p5)") + } + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where months_diff(months_add(dt,10), '2018-01-01') =2 """ + contains("partitions=1/5 (p2)") + } + + // hours_add second arg is not literal, so will not do pruning + explain { + sql """select * from test_add_sub_diff_ceil_floor_t where hours_add(dt, years_diff(dt,'2018-01-01')) <'2018-01-01' """ + contains("partitions=5/5 (p1,p2,p3,p4,p5)") + } + + // max + sql "drop table if exists max_t" + sql """create table max_t (a int, dt datetime, d date, c varchar(100)) duplicate key(a) + partition by range(dt) ( + partition p1 values less than ("2017-01-01"), + partition p2 values less than ("2018-01-01"), + partition p3 values less than ("2019-01-01"), + partition p4 values less than ("2020-01-01"), + partition p5 values less than ("2021-01-01"), + partition p6 values less than MAXVALUE + ) distributed by hash(a) properties("replication_num"="1");""" + sql """INSERT INTO max_t SELECT number, + date_add('2016-01-01 00:00:00', interval number month), + cast(date_add('2022-01-01 00:00:00', interval number month) as date), cast(number as varchar(65533)) FROM numbers('number'='100');""" + sql "INSERT INTO max_t values(3,null,null,null);" + + explain { + sql "select * from max_t where years_diff('2021-01-01',month_ceil(hours_add(dt, 1),'1990-01-05')) <=2 ;" + contains("partitions=5/6 (p2,p3,p4,p5,p6)") + } + explain { + sql "select * from max_t where years_diff('2021-01-01',month_ceil(hours_add(dt, 1),10,'1990-01-05')) <=2 ;" + contains("partitions=5/6 (p2,p3,p4,p5,p6)") + } + + explain { + sql """select * from max_t where years_diff('2021-01-01',month_ceil(hours_add(dt, 1),10,'1990-01-05')) <=2 and dt >'2018-01-01';""" + contains("partitions=4/6 (p3,p4,p5,p6)") + } + + explain { + sql """select * from max_t where months_diff('2021-01-01',month_floor(hours_add(dt, 1),10,'1990-01-05')) <=2;""" + contains("partitions=3/6 (p1,p5,p6)") + } + + explain { + sql """select * from max_t where months_diff('2021-01-01',month_floor(hours_add(dt, 1),12,'1000-01-01')) > 2""" + contains("partitions=5/6 (p1,p2,p3,p4,p5)") + } + explain { + sql """select * from max_t where months_diff('2021-01-01',month_floor(hours_add(dt, 1),12,'1000-01-01')) > 2 and month_floor(dt) >'2018-01-01' """ + contains("partitions=3/6 (p3,p4,p5)") + } + explain { + sql """select * from max_t where hours_sub(hours_add(dt, 1),1) >'2018-01-01' and days_diff(hours_sub(hours_add(dt, 1),1),'2021-01-01') >2""" + contains("partitions=1/6 (p6)") + } + + // from_days and unix_timestamp + explain { + sql """select * from max_t where unix_timestamp(dt) > 1547838847 """ + contains("partitions=3/6 (p4,p5,p6)") + } + + sql "drop table if exists partition_int_from_days" + sql """ + CREATE TABLE `partition_int_from_days` ( + `a` int NULL, + `b` int NULL + ) ENGINE=OLAP + DUPLICATE KEY(`a`, `b`) + PARTITION BY RANGE(`a`) + (PARTITION p1 VALUES [("-2147483648"), ("100000")), + PARTITION p2 VALUES [("100000"), ("738000")), + PARTITION p3 VALUES [("738000"), ("90000000")), + PARTITION p4 VALUES [("90000000"), (MAXVALUE))) + DISTRIBUTED BY HASH(`a`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); """ + sql """ + insert into partition_int_from_days values(100,100),(100022,1002),(738004,33),(90000003,89); + """ + explain { + sql """select * from partition_int_from_days where from_days(a)>'2020-07-29' """ + contains("partitions=3/4 (p1,p3,p4)") + } + + + sql "drop table if exists unix_time_t" + sql """create table unix_time_t (a int, dt datetime, d date, c varchar(100)) duplicate key(a) + partition by range(dt) ( + partition p1 values less than ("1980-01-01"), + partition p2 values less than ("2018-01-01"), + partition p3 values less than ("2039-01-01"), + partition p4 values less than MAXVALUE + ) distributed by hash(a) properties("replication_num"="1");""" + sql """INSERT INTO unix_time_t values(1,'1979-01-01','1979-01-01','abc'),(1,'2012-01-01','2012-01-01','abc'),(1,'2020-01-01','2020-01-01','abc'),(1,'2045-01-01','2045-01-01','abc')""" + sql "INSERT INTO unix_time_t values(3,null,null,null);" + explain { + sql """ select * from unix_time_t where unix_timestamp(dt) > 1514822400 """ + contains("partitions=2/4 (p3,p4)") + } + explain { + sql """select * from unix_time_t where unix_timestamp(dt) < 2147454847;""" + contains("partitions=4/4 (p1,p2,p3,p4)") + } + explain { + sql """select * from unix_time_t where unix_timestamp(dt) = 2147454847""" + contains("partitions=2/4 (p3,p4)") + } + explain { + sql """select * from unix_time_t where unix_timestamp(dt) = 2147454847 and dt<'2038-01-01'""" + contains("partitions=1/4 (p3)") + } + explain { + sql """select * from unix_time_t where unix_timestamp(dt) <=0""" + contains("partitions=3/4 (p1,p3,p4)") + } + +} \ No newline at end of file diff --git a/regression-test/suites/nereids_rules_p0/partition_prune/test_convert_tz.groovy b/regression-test/suites/nereids_rules_p0/partition_prune/test_convert_tz.groovy index c309d10d067194e..3e033a78eb963ce 100644 --- a/regression-test/suites/nereids_rules_p0/partition_prune/test_convert_tz.groovy +++ b/regression-test/suites/nereids_rules_p0/partition_prune/test_convert_tz.groovy @@ -46,13 +46,13 @@ suite("test_convert_tz") { } explain { sql "SELECT * FROM test_convert_tz WHERE convert_tz(timestamp, 'Asia/Shanghai', 'Europe/Paris') > '2021-01-01';"; - contains("partitions=2/3 (p2,p3)") + contains("partitions=3/3 (p1,p2,p3)") } explain { sql """SELECT * FROM test_convert_tz WHERE convert_tz(timestamp, 'Asia/Shanghai', 'Europe/Paris') < '2021-02-24' and convert_tz(timestamp, 'Asia/Shanghai', 'Europe/Paris') > '2021-01-01';""" - contains("partitions=2/3 (p2,p3)") + contains("partitions=3/3 (p1,p2,p3)") } explain { @@ -93,7 +93,7 @@ suite("test_convert_tz") { } explain { sql "SELECT * FROM test_convert_tz WHERE not convert_tz(timestamp, 'Asia/Shanghai', 'Europe/Paris') <= '2021-01-01';"; - contains("partitions=2/3 (p2,p3)") + contains("partitions=3/3 (p1,p2,p3)") } } } \ No newline at end of file From 82d021b80151140faaa51b02369506644379b043 Mon Sep 17 00:00:00 2001 From: "Mingyu Chen (Rayner)" Date: Sun, 22 Dec 2024 19:02:25 +0800 Subject: [PATCH 38/55] [fix](catalog) fix npe after replaying the external catalog (#45756) ### What problem does this PR solve? Related PR: #45433 Problem Summary: the `confLock` should be created after replaying in `gsonPostProcess()` of `ExternalCatalog`, or it will be null. --- .../doris/datasource/ExternalCatalog.java | 3 +- .../doris/datasource/ExternalCatalogTest.java | 40 +++++++++++++++++-- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java index 2575169f79207f4..d1df51177fd4960 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java @@ -154,7 +154,7 @@ public abstract class ExternalCatalog protected PreExecutionAuthenticator preExecutionAuthenticator; private volatile Configuration cachedConf = null; - private final byte[] confLock = new byte[0]; + private byte[] confLock = new byte[0]; public ExternalCatalog() { } @@ -784,6 +784,7 @@ public void gsonPostProcess() throws IOException { } } this.propLock = new byte[0]; + this.confLock = new byte[0]; this.initialized = false; setDefaultPropsIfMissing(true); if (tableAutoAnalyzePolicy == null) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/ExternalCatalogTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/ExternalCatalogTest.java index 43348ca8a0e6ef3..f8e72c366b55f77 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/ExternalCatalogTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/ExternalCatalogTest.java @@ -22,9 +22,10 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.common.FeConstants; +import org.apache.doris.common.FeMetaVersion; import org.apache.doris.datasource.hive.HMSExternalCatalog; import org.apache.doris.datasource.test.TestExternalCatalog; -import org.apache.doris.mysql.privilege.Auth; +import org.apache.doris.meta.MetaContext; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.QueryState.MysqlStateType; import org.apache.doris.qe.StmtExecutor; @@ -32,16 +33,20 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import org.apache.hadoop.conf.Configuration; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.nio.file.Files; import java.util.HashMap; import java.util.List; import java.util.Map; public class ExternalCatalogTest extends TestWithFeService { - private static Auth auth; - private static Env env; + private Env env; private CatalogMgr mgr; private ConnectContext rootCtx; @@ -51,7 +56,6 @@ protected void runBeforeAll() throws Exception { mgr = Env.getCurrentEnv().getCatalogMgr(); rootCtx = createDefaultCtx(); env = Env.getCurrentEnv(); - auth = env.getAuth(); // 1. create test catalog CreateCatalogStmt testCatalog = (CreateCatalogStmt) parseAndAnalyzeStmt( "create catalog test1 properties(\n" @@ -244,4 +248,32 @@ public Map>> getMetadata() { return MOCKED_META; } } + + @Test + public void testSerialization() throws Exception { + MetaContext metaContext = new MetaContext(); + metaContext.setMetaVersion(FeMetaVersion.VERSION_CURRENT); + metaContext.setThreadLocalInfo(); + + // 1. Write objects to file + File file = new File("./external_catalog_persist_test.dat"); + file.createNewFile(); + DataOutputStream dos = new DataOutputStream(Files.newOutputStream(file.toPath())); + + TestExternalCatalog ctl = (TestExternalCatalog) mgr.getCatalog("test1"); + ctl.write(dos); + dos.flush(); + dos.close(); + + // 2. Read objects from file + DataInputStream dis = new DataInputStream(Files.newInputStream(file.toPath())); + + TestExternalCatalog ctl2 = (TestExternalCatalog) ExternalCatalog.read(dis); + Configuration conf = ctl2.getConfiguration(); + Assertions.assertNotNull(conf); + + // 3. delete files + dis.close(); + file.delete(); + } } From 9296ce31020858c773e23d18398c8921ec1df94b Mon Sep 17 00:00:00 2001 From: Petrichor Date: Mon, 23 Dec 2024 10:12:59 +0800 Subject: [PATCH 39/55] [feat](nereids)implement useDatabase command in nereids (#45600) Issue Number: close https://github.com/apache/doris/issues/42523 --- .../org/apache/doris/nereids/DorisParser.g4 | 4 +- .../nereids/parser/LogicalPlanBuilder.java | 20 ++- .../doris/nereids/trees/plans/PlanType.java | 3 +- .../trees/plans/commands/use/UseCommand.java | 115 ++++++++++++++++++ .../trees/plans/visitor/CommandVisitor.java | 5 + .../nereids/parser/NereidsParserTest.java | 8 +- .../ddl/use/use_command_nereids.out | 13 ++ .../ddl/use/use_command_nereids.groovy | 79 ++++++++++++ 8 files changed, 236 insertions(+), 11 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/use/UseCommand.java create mode 100644 regression-test/data/nereids_p0/ddl/use/use_command_nereids.out create mode 100644 regression-test/suites/nereids_p0/ddl/use/use_command_nereids.groovy diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 97876c231fec69f..368847bac5f2703 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -870,11 +870,11 @@ supportedUnsetStatement supportedUseStatement : SWITCH catalog=identifier #switchCatalog + | USE (catalog=identifier DOT)? database=identifier #useDatabase ; unsupportedUseStatement - : USE (catalog=identifier DOT)? database=identifier #useDatabase - | USE ((catalog=identifier DOT)? database=identifier)? ATSIGN cluster=identifier #useCloudCluster + : USE ((catalog=identifier DOT)? database=identifier)? ATSIGN cluster=identifier #useCloudCluster ; unsupportedDmlStatement diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index bb344e1b376deb0..7bc328e238d99d0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -324,6 +324,7 @@ import org.apache.doris.nereids.DorisParser.UpdateAssignmentContext; import org.apache.doris.nereids.DorisParser.UpdateAssignmentSeqContext; import org.apache.doris.nereids.DorisParser.UpdateContext; +import org.apache.doris.nereids.DorisParser.UseDatabaseContext; import org.apache.doris.nereids.DorisParser.UserIdentifyContext; import org.apache.doris.nereids.DorisParser.UserVariableContext; import org.apache.doris.nereids.DorisParser.WhereClauseContext; @@ -683,6 +684,7 @@ import org.apache.doris.nereids.trees.plans.commands.refresh.RefreshDatabaseCommand; import org.apache.doris.nereids.trees.plans.commands.refresh.RefreshTableCommand; import org.apache.doris.nereids.trees.plans.commands.use.SwitchCommand; +import org.apache.doris.nereids.trees.plans.commands.use.UseCommand; import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalCTE; import org.apache.doris.nereids.trees.plans.logical.LogicalExcept; @@ -5178,12 +5180,20 @@ public LogicalPlan visitShowQueryProfile(ShowQueryProfileContext ctx) { } @Override - public Object visitSwitchCatalog(SwitchCatalogContext ctx) { - String catalogName = ctx.catalog.getText(); - if (catalogName != null) { - return new SwitchCommand(catalogName); + public LogicalPlan visitSwitchCatalog(SwitchCatalogContext ctx) { + if (ctx.catalog != null) { + return new SwitchCommand(ctx.catalog.getText()); } - throw new AnalysisException("catalog name can not be null"); + throw new ParseException("catalog name can not be null"); + } + + @Override + public LogicalPlan visitUseDatabase(UseDatabaseContext ctx) { + if (ctx.database == null) { + throw new ParseException("database name can not be null"); + } + return ctx.catalog != null ? new UseCommand(ctx.catalog.getText(), ctx.database.getText()) + : new UseCommand(ctx.database.getText()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java index dfc129f10b0fd6d..407610fbe08addd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java @@ -266,5 +266,6 @@ public enum PlanType { CREATE_ROUTINE_LOAD_COMMAND, SHOW_TABLE_CREATION_COMMAND, SHOW_QUERY_PROFILE_COMMAND, - SWITCH_COMMAND + SWITCH_COMMAND, + USE_COMMAND } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/use/UseCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/use/UseCommand.java new file mode 100644 index 000000000000000..9223e7d5ad66ed9 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/use/UseCommand.java @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.commands.use; + +import org.apache.doris.analysis.StmtType; +import org.apache.doris.catalog.Env; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.DdlException; +import org.apache.doris.common.ErrorCode; +import org.apache.doris.common.ErrorReport; +import org.apache.doris.mysql.privilege.PrivPredicate; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.commands.Command; +import org.apache.doris.nereids.trees.plans.commands.NoForward; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.qe.StmtExecutor; + +import com.google.common.base.Strings; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +/** + * Representation of a use db statement. + */ +public class UseCommand extends Command implements NoForward { + private static final Logger LOG = LogManager.getLogger(UseCommand.class); + private String catalogName; + private String databaseName; + + public UseCommand(String databaseName) { + super(PlanType.USE_COMMAND); + this.databaseName = databaseName; + } + + public UseCommand(String catalogName, String databaseName) { + super(PlanType.USE_COMMAND); + this.catalogName = catalogName; + this.databaseName = databaseName; + } + + @Override + public void run(ConnectContext ctx, StmtExecutor executor) throws Exception { + validate(ctx); + handleUseStmt(ctx); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitUseCommand(this, context); + } + + @Override + public StmtType stmtType() { + return StmtType.USE; + } + + private void validate(ConnectContext context) throws AnalysisException { + if (Strings.isNullOrEmpty(databaseName)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_NO_DB_ERROR); + } + String currentCatalogName = catalogName == null ? ConnectContext.get().getDefaultCatalog() : catalogName; + + if (!Env.getCurrentEnv().getAccessManager() + .checkDbPriv(ConnectContext.get(), currentCatalogName, databaseName, PrivPredicate.SHOW)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_DBACCESS_DENIED_ERROR, context.getQualifiedUser(), + databaseName); + } + } + + /** + * Process use statement. + */ + private void handleUseStmt(ConnectContext context) { + try { + if (catalogName != null) { + context.getEnv().changeCatalog(context, catalogName); + } + context.getEnv().changeDb(context, databaseName); + } catch (DdlException e) { + LOG.warn("The handling of the use command failed.", e); + context.getState().setError(e.getMysqlErrorCode(), e.getMessage()); + return; + } + context.getState().setOk(); + } + + /** + * Generate sql string. + */ + public String toSql() { + StringBuilder sb = new StringBuilder(); + sb.append("USE "); + if (catalogName != null) { + sb.append("`").append(catalogName).append("`."); + } + sb.append("`").append(databaseName).append("`"); + return sb.toString(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java index d3749e94d57d0f0..122e513a08cb577 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java @@ -151,6 +151,7 @@ import org.apache.doris.nereids.trees.plans.commands.refresh.RefreshDatabaseCommand; import org.apache.doris.nereids.trees.plans.commands.refresh.RefreshTableCommand; import org.apache.doris.nereids.trees.plans.commands.use.SwitchCommand; +import org.apache.doris.nereids.trees.plans.commands.use.UseCommand; /** CommandVisitor. */ public interface CommandVisitor { @@ -697,4 +698,8 @@ default R visitShowQueryProfileCommand(ShowQueryProfileCommand showQueryProfileC default R visitSwitchCommand(SwitchCommand switchCommand, C context) { return visitCommand(switchCommand, context); } + + default R visitUseCommand(UseCommand useCommand, C context) { + return visitCommand(useCommand, context); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java index 9a46b810586eeca..3ce7e64560ce1b9 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java @@ -448,7 +448,7 @@ public void testParseStmtType() { sql = "use a"; plan = nereidsParser.parseSingle(sql); - Assertions.assertEquals(plan.stmtType(), StmtType.OTHER); + Assertions.assertEquals(plan.stmtType(), StmtType.USE); sql = "CREATE TABLE tbl (`id` INT NOT NULL) DISTRIBUTED BY HASH(`id`) BUCKETS 1"; plan = nereidsParser.parseSingle(sql); @@ -463,10 +463,12 @@ public void testParseStmtType() { public void testParseUse() { NereidsParser nereidsParser = new NereidsParser(); String sql = "use db"; - nereidsParser.parseSingle(sql); + LogicalPlan logicalPlan = nereidsParser.parseSingle(sql); + Assertions.assertEquals(logicalPlan.stmtType(), StmtType.USE); sql = "use catalog.db"; - nereidsParser.parseSingle(sql); + LogicalPlan logicalPlan1 = nereidsParser.parseSingle(sql); + Assertions.assertEquals(logicalPlan1.stmtType(), StmtType.USE); } @Test diff --git a/regression-test/data/nereids_p0/ddl/use/use_command_nereids.out b/regression-test/data/nereids_p0/ddl/use/use_command_nereids.out new file mode 100644 index 000000000000000..17a7eaf6d7e12d4 --- /dev/null +++ b/regression-test/data/nereids_p0/ddl/use/use_command_nereids.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !show_tables_db1 -- +tb1 + +-- !show_tables_db2 -- +tb2 + +-- !show_tables_db1 -- +tb1 + +-- !show_tables_db2 -- +tb2 + diff --git a/regression-test/suites/nereids_p0/ddl/use/use_command_nereids.groovy b/regression-test/suites/nereids_p0/ddl/use/use_command_nereids.groovy new file mode 100644 index 000000000000000..70e0f3403e58550 --- /dev/null +++ b/regression-test/suites/nereids_p0/ddl/use/use_command_nereids.groovy @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("use_command_nereids") { + String db1 = "test_use_command_db1" + String db2 = "test_use_command_db2" + String tbl1 = "tb1" + String tbl2 = "tb2" + + sql """drop database if exists ${db1};""" + sql """drop database if exists ${db2};""" + // create database + sql """create database ${db1};""" + sql """create database ${db2};""" + //cloud-mode + if (isCloudMode()) { + return + } + // use command + checkNereidsExecute("use ${db1};") + + """drop table if exists ${tbl1};""" + sql """ create table ${db1}.${tbl1} + ( + c1 bigint, + c2 bigint + ) + ENGINE=OLAP + DUPLICATE KEY(c1, c2) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(c1) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + """ + qt_show_tables_db1 """show tables;""" + + checkNereidsExecute("use ${db2};") + """drop table if exists ${tbl2};""" + sql """ create table ${db2}.${tbl2} + ( + c1 bigint, + c2 bigint + ) + ENGINE=OLAP + DUPLICATE KEY(c1, c2) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(c1) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + qt_show_tables_db2 """show tables;""" + + checkNereidsExecute("use internal.${db1};") + qt_show_tables_db1 """show tables;""" + checkNereidsExecute("use internal.${db2};") + qt_show_tables_db2 """show tables;""" + + sql """drop table if exists ${db1}.${tbl1};""" + sql """drop table if exists ${db2}.${tbl2};""" + sql """drop database if exists ${db1};""" + sql """drop database if exists ${db2};""" +} \ No newline at end of file From 49d397b8a087a2f72d6d78ab0f5a73f0b1bcbdab Mon Sep 17 00:00:00 2001 From: "Mingyu Chen (Rayner)" Date: Mon, 23 Dec 2024 10:19:37 +0800 Subject: [PATCH 40/55] [fix](hudi) remove session variable field in HudiScanNode (#45762) ### What problem does this PR solve? Related PR: #45355 Problem Summary: The `sessionVariable` field is already in parent class `FileQueryScanNode`, remove it from `HudiScanNode`. --- .../doris/datasource/hudi/source/HudiScanNode.java | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java index 486fdea74a00bbc..b1eb47095f33c49 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java @@ -91,8 +91,6 @@ public class HudiScanNode extends HiveScanNode { private final AtomicLong noLogsSplitNum = new AtomicLong(0); - private final boolean useHiveSyncPartition; - private HoodieTableMetaClient hudiClient; private String basePath; private String inputFormat; @@ -102,7 +100,6 @@ public class HudiScanNode extends HiveScanNode { private boolean partitionInit = false; private HoodieTimeline timeline; - private Option snapshotTimestamp; private String queryInstant; private final AtomicReference batchException = new AtomicReference<>(null); @@ -113,7 +110,6 @@ public class HudiScanNode extends HiveScanNode { private boolean incrementalRead = false; private TableScanParams scanParams; private IncrementalRelation incrementalRelation; - private SessionVariable sessionVariable; /** * External file scan node for Query Hudi table @@ -125,8 +121,8 @@ public class HudiScanNode extends HiveScanNode { */ public HudiScanNode(PlanNodeId id, TupleDescriptor desc, boolean needCheckColumnPriv, Optional scanParams, Optional incrementalRelation, - SessionVariable sessionVariable) { - super(id, desc, "HUDI_SCAN_NODE", StatisticalType.HUDI_SCAN_NODE, needCheckColumnPriv, sessionVariable); + SessionVariable sv) { + super(id, desc, "HUDI_SCAN_NODE", StatisticalType.HUDI_SCAN_NODE, needCheckColumnPriv, sv); isCowTable = hmsTable.isHoodieCowTable(); if (LOG.isDebugEnabled()) { if (isCowTable) { @@ -136,11 +132,9 @@ public HudiScanNode(PlanNodeId id, TupleDescriptor desc, boolean needCheckColumn hmsTable.getFullQualifiers()); } } - useHiveSyncPartition = hmsTable.useHiveSyncPartition(); this.scanParams = scanParams.orElse(null); this.incrementalRelation = incrementalRelation.orElse(null); this.incrementalRead = (this.scanParams != null && this.scanParams.incrementalRead()); - this.sessionVariable = sessionVariable; } @Override @@ -215,7 +209,6 @@ protected void doInitialize() throws UserException { throw new UserException("Hudi does not support `FOR VERSION AS OF`, please use `FOR TIME AS OF`"); } queryInstant = tableSnapshot.getTime().replaceAll("[-: ]", ""); - snapshotTimestamp = Option.of(queryInstant); } else { Option snapshotInstant = timeline.lastInstant(); if (!snapshotInstant.isPresent()) { @@ -224,7 +217,6 @@ protected void doInitialize() throws UserException { return; } queryInstant = snapshotInstant.get().getTimestamp(); - snapshotTimestamp = Option.empty(); } } From 0fe8d7b663e671b1e6ae196457cec8102504f533 Mon Sep 17 00:00:00 2001 From: zzzxl Date: Mon, 23 Dec 2024 10:24:01 +0800 Subject: [PATCH 41/55] [fix](inverted index) Fix the issue with incorrect seek results in DICT_COMPRESS (#45738) Related PR: https://github.com/apache/doris/pull/44414 Problem Summary: In inverted index version 3 mode, using dictionary compression may lead to incorrect results after a seek operation. --- be/src/clucene | 2 +- .../test_inverted_index_v3.out | 12 +++++++++ .../test_inverted_index_v3.groovy | 26 +++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/be/src/clucene b/be/src/clucene index a506dbb6c523aa6..2204eaec46a68e5 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit a506dbb6c523aa65044eb1c527a066d236172543 +Subproject commit 2204eaec46a68e5e9a1876b7021f24839ecb2cf0 diff --git a/regression-test/data/inverted_index_p0/test_inverted_index_v3.out b/regression-test/data/inverted_index_p0/test_inverted_index_v3.out index 9dc20f3e0e0a856..53f4eb7ae0a667f 100644 --- a/regression-test/data/inverted_index_p0/test_inverted_index_v3.out +++ b/regression-test/data/inverted_index_p0/test_inverted_index_v3.out @@ -23,3 +23,15 @@ -- !sql -- 105 +-- !sql -- +238 + +-- !sql -- +104 + +-- !sql -- +104 + +-- !sql -- +105 + diff --git a/regression-test/suites/inverted_index_p0/test_inverted_index_v3.groovy b/regression-test/suites/inverted_index_p0/test_inverted_index_v3.groovy index ea7dd0b595f5042..82389d84e3cd67d 100644 --- a/regression-test/suites/inverted_index_p0/test_inverted_index_v3.groovy +++ b/regression-test/suites/inverted_index_p0/test_inverted_index_v3.groovy @@ -19,9 +19,11 @@ suite("test_inverted_index_v3", "p0"){ def indexTbName1 = "test_inverted_index_v3_1" def indexTbName2 = "test_inverted_index_v3_2" + def indexTbName3 = "test_inverted_index_v3_3" sql "DROP TABLE IF EXISTS ${indexTbName1}" sql "DROP TABLE IF EXISTS ${indexTbName2}" + sql "DROP TABLE IF EXISTS ${indexTbName3}" sql """ CREATE TABLE ${indexTbName1} ( @@ -59,6 +61,24 @@ suite("test_inverted_index_v3", "p0"){ ); """ + sql """ + CREATE TABLE ${indexTbName3} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` varchar(20) NULL COMMENT "", + `request` text NULL COMMENT "", + `status` int(11) NULL COMMENT "", + `size` int(11) NULL COMMENT "", + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true", "dict_compression" = "true") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "inverted_index_storage_format" = "V3" + ); + """ + def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false, expected_succ_rows = -1, load_to_single_tablet = 'true' -> @@ -99,6 +119,7 @@ suite("test_inverted_index_v3", "p0"){ try { load_httplogs_data.call(indexTbName1, indexTbName1, 'true', 'json', 'documents-1000.json') load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName3, indexTbName3, 'true', 'json', 'documents-1000.json') sql "sync" @@ -112,6 +133,11 @@ suite("test_inverted_index_v3", "p0"){ qt_sql """ select count() from ${indexTbName2} where request match_phrase 'hm bg'; """ qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'hm bg'; """ + qt_sql """ select count() from ${indexTbName3} where request match_any 'hm bg'; """ + qt_sql """ select count() from ${indexTbName3} where request match_all 'hm bg'; """ + qt_sql """ select count() from ${indexTbName3} where request match_phrase 'hm bg'; """ + qt_sql """ select count() from ${indexTbName3} where request match_phrase_prefix 'hm bg'; """ + } finally { } } \ No newline at end of file From 0527a2e5abef95418afd77920aee66f8bd6f8318 Mon Sep 17 00:00:00 2001 From: deardeng Date: Mon, 23 Dec 2024 10:26:31 +0800 Subject: [PATCH 42/55] [fix](tabletScheduler) Fix addTablet dead lock in tabletScheduler (#45298) The conditions that need to be met to trigger the bug, with the second condition being somewhat difficult to trigger, are as follows: 1. The number of tablets that need to be fixed exceeds 2000 (in the pending queue); 2. The scheduling of the lowest priority in the pending queue has previously experienced a clone failure, with fewer than 3 failures, and has been put back into the pending queue. Additionally, a new scheduling request that happens to belong to the same table as the previous one has a higher priority than the previous scheduling. The fix is to write the lock trylock in finalize TabletCtx. If the lock cannot be obtained, the current scheduling will fail and the next one will be rescheduled Fix ``` "colocate group clone checker" #7557 daemon prio=5 os_prio=0 cpu=686.24ms elapsed=6719.45s tid=0x00007f3e6c039ab0 nid=0x17b08 waiting on condition [0x00007f3ec77fe000] (1 similar threads) java.lang.Thread.State: WAITING (parking) at jdk.internal.misc.Unsafe.park(java.base@17.0.2/Native Method) - parking to wait for <0x000010014d223908> (a java.util.concurrent.locks.ReentrantReadWriteLock$FairSync) at java.util.concurrent.locks.LockSupport.park(java.base@17.0.2/LockSupport.java:211) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(java.base@17.0.2/AbstractQueuedSynchronizer.java:715) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(java.base@17.0.2/AbstractQueuedSynchronizer.java:938) at java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock(java.base@17.0.2/ReentrantReadWriteLock.java:959) at org.apache.doris.common.lock.MonitoredReentrantReadWriteLock$WriteLock.lock(MonitoredReentrantReadWriteLock.java:98) at org.apache.doris.catalog.Table.writeLockIfExist(Table.java:211) at org.apache.doris.clone.TabletSchedCtx.releaseResource(TabletSchedCtx.java:940) at org.apache.doris.clone.TabletSchedCtx.releaseResource(TabletSchedCtx.java:898) at org.apache.doris.clone.TabletScheduler.releaseTabletCtx(TabletScheduler.java:1743) at org.apache.doris.clone.TabletScheduler.finalizeTabletCtx(TabletScheduler.java:1625) at org.apache.doris.clone.TabletScheduler.addTablet(TabletScheduler.java:287) - locked <0x0000100009429110> (a org.apache.doris.clone.TabletScheduler) at org.apache.doris.clone.ColocateTableCheckerAndBalancer.matchGroups(ColocateTableCheckerAndBalancer.java:563) at org.apache.doris.clone.ColocateTableCheckerAndBalancer.runAfterCatalogReady(ColocateTableCheckerAndBalancer.java:340) at org.apache.doris.common.util.MasterDaemon.runOneCycle(MasterDaemon.java:58) at org.apache.doris.common.util.Daemon.run(Daemon.java:119) ``` --- be/src/olap/task/engine_clone_task.cpp | 10 ++++ .../apache/doris/clone/TabletSchedCtx.java | 31 ++--------- .../apache/doris/clone/TabletScheduler.java | 9 ++-- .../apache/doris/clone/TabletHealthTest.java | 52 +++++++++++++++++++ .../doris/utframe/MockedBackendFactory.java | 5 ++ 5 files changed, 74 insertions(+), 33 deletions(-) diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index fa8d9b8248e3f41..9af3e078d3aefa0 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -171,6 +171,16 @@ Status EngineCloneTask::_do_clone() { auto duration = std::chrono::milliseconds(dp->param("duration", 10 * 1000)); std::this_thread::sleep_for(duration); }); + + DBUG_EXECUTE_IF("EngineCloneTask.failed_clone", { + LOG_WARNING("EngineCloneTask.failed_clone") + .tag("tablet_id", _clone_req.tablet_id) + .tag("replica_id", _clone_req.replica_id) + .tag("version", _clone_req.version); + return Status::InternalError( + "in debug point, EngineCloneTask.failed_clone tablet={}, replica={}, version={}", + _clone_req.tablet_id, _clone_req.replica_id, _clone_req.version); + }); Status status = Status::OK(); string src_file_path; TBackend src_host; diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java index a6ba294e80934cc..b8a098cc891dee0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java @@ -62,6 +62,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; /* * TabletSchedCtx contains all information which is created during tablet scheduler processing. @@ -69,28 +70,6 @@ public class TabletSchedCtx implements Comparable { private static final Logger LOG = LogManager.getLogger(TabletSchedCtx.class); - /* - * SCHED_FAILED_COUNTER_THRESHOLD: - * threshold of times a tablet failed to be scheduled - * - * MIN_ADJUST_PRIORITY_INTERVAL_MS: - * min interval time of adjusting a tablet's priority - * - * MAX_NOT_BEING_SCHEDULED_INTERVAL_MS: - * max gap time of a tablet NOT being scheduled. - * - * These 3 params is for adjusting priority. - * If a tablet being scheduled failed for more than SCHED_FAILED_COUNTER_THRESHOLD times, its priority - * will be downgraded. And the interval between adjustment is larger than MIN_ADJUST_PRIORITY_INTERVAL_MS, - * to avoid being downgraded too soon. - * And if a tablet is not being scheduled longer than MAX_NOT_BEING_SCHEDULED_INTERVAL_MS, its priority - * will be upgraded, to avoid starvation. - * - */ - private static final int SCHED_FAILED_COUNTER_THRESHOLD = 5; - private static final long MIN_ADJUST_PRIORITY_INTERVAL_MS = 5 * 60 * 1000L; // 5 min - private static final long MAX_NOT_BEING_SCHEDULED_INTERVAL_MS = 30 * 60 * 1000L; // 30 min - /* * A clone task timeout is between Config.min_clone_task_timeout_sec and Config.max_clone_task_timeout_sec, * estimated by tablet size / MIN_CLONE_SPEED_MB_PER_SECOND. @@ -450,10 +429,6 @@ public void setSchedFailedCode(SubCode code) { schedFailedCode = code; } - public CloneTask getCloneTask() { - return cloneTask; - } - public long getCopySize() { return copySize; } @@ -932,12 +907,14 @@ public void releaseResource(TabletScheduler tabletScheduler, boolean reserveTabl } if (cloneTask != null) { AgentTaskQueue.removeTask(cloneTask.getBackendId(), TTaskType.CLONE, cloneTask.getSignature()); + cloneTask = null; // clear all CLONE replicas Database db = Env.getCurrentInternalCatalog().getDbNullable(dbId); if (db != null) { Table table = db.getTableNullable(tblId); - if (table != null && table.writeLockIfExist()) { + // try get table write lock, if failed TabletScheduler will try next time + if (table != null && table.tryWriteLockIfExist(Table.TRY_LOCK_TIMEOUT_MS, TimeUnit.MILLISECONDS)) { try { List cloneReplicas = Lists.newArrayList(); tablet.getReplicas().stream().filter(r -> r.getState() == ReplicaState.CLONE).forEach(r -> { diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index 1545236aa59cd0c..dc07ddb0be4d30b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -105,9 +105,6 @@ public class TabletScheduler extends MasterDaemon { private static final Logger LOG = LogManager.getLogger(TabletScheduler.class); - // handle at most BATCH_NUM tablets in one loop - private static final int MIN_BATCH_NUM = 50; - // the minimum interval of updating cluster statistics and priority of tablet info private static final long STAT_UPDATE_INTERVAL_MS = 20 * 1000; // 20s @@ -151,7 +148,7 @@ public enum AddResult { ADDED, // success to add ALREADY_IN, // already added, skip LIMIT_EXCEED, // number of pending tablets exceed the limit - REPLACE_ADDED, // succ to add, and envit a lowest task + REPLACE_ADDED, // succ to add, and evict a lowest task DISABLED // scheduler has been disabled. } @@ -292,7 +289,7 @@ public synchronized AddResult addTablet(TabletSchedCtx tablet, boolean force) { addResult = AddResult.REPLACE_ADDED; pendingTablets.pollLast(); finalizeTabletCtx(lowestPriorityTablet, TabletSchedCtx.State.CANCELLED, Status.UNRECOVERABLE, - "envit lower priority sched tablet because pending queue is full"); + "evict lower priority sched tablet because pending queue is full"); } if (!contains || tablet.getType() == TabletSchedCtx.Type.REPAIR) { @@ -1868,9 +1865,9 @@ public boolean finishCloneTask(CloneTask cloneTask, TFinishTaskRequest request) tabletCtx.increaseFailedRunningCounter(); if (!tabletCtx.isExceedFailedRunningLimit()) { stat.counterCloneTaskFailed.incrementAndGet(); + tabletCtx.setState(TabletSchedCtx.State.PENDING); tabletCtx.releaseResource(this); tabletCtx.resetFailedSchedCounter(); - tabletCtx.setState(TabletSchedCtx.State.PENDING); addBackToPendingTablets(tabletCtx); return false; } else { diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/TabletHealthTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/TabletHealthTest.java index b22925e5d892700..320bff45229fbad 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/clone/TabletHealthTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/clone/TabletHealthTest.java @@ -40,12 +40,14 @@ import com.google.common.base.Joiner; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.MinMaxPriorityQueue; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; public class TabletHealthTest extends TestWithFeService { @@ -78,6 +80,8 @@ protected void runBeforeAll() throws Exception { @Override protected void runBeforeEach() throws Exception { + // set back to default value + Config.max_scheduling_tablets = 2000; for (Table table : db.getTables()) { dropTable(table.getName(), true); } @@ -358,4 +362,52 @@ public void testColocateTabletHealth() throws Exception { dropTable(table.getName(), true); } + + @Test + public void testAddTabletNoDeadLock() throws Exception { + Config.max_scheduling_tablets = 1; + createTable("CREATE TABLE tbl3 (k INT) DISTRIBUTED BY HASH(k) BUCKETS 2" + + " PROPERTIES ('replication_num' = '3')"); + DebugPointUtil.addDebugPoint("MockedBackendFactory.handleCloneTablet.failed"); + OlapTable table = (OlapTable) db.getTableOrMetaException("tbl3"); + Partition partition = table.getPartitions().iterator().next(); + List tablets = partition.getMaterializedIndices(IndexExtState.ALL).iterator().next().getTablets(); + Assertions.assertEquals(2, tablets.size()); + + partition.updateVisibleVersion(10L); + tablets.forEach(tablet -> tablet.getReplicas().forEach(replica -> replica.updateVersion(10))); + + Tablet tabletA = tablets.get(0); + Tablet tabletB = tablets.get(1); + TabletScheduler scheduler = Env.getCurrentEnv().getTabletScheduler(); + tabletA.getReplicas().get(0).adminUpdateVersionInfo(8L, null, null, 0L); + checkTabletStatus(tabletA, TabletStatus.VERSION_INCOMPLETE, table, partition); + Env.getCurrentEnv().getTabletChecker().runAfterCatalogReady(); + Env.getCurrentEnv().getTabletScheduler().runAfterCatalogReady(); + Thread.sleep(1000); + MinMaxPriorityQueue queue = scheduler.getPendingTabletQueue(); + TabletSchedCtx tabletACtx = queue.peekFirst(); + Assertions.assertNotNull(tabletACtx); + tabletACtx.setLastVisitedTime(System.currentTimeMillis() + 3600 * 1000L); + tabletB.getReplicas().get(0).adminUpdateVersionInfo(8L, null, null, 0L); + checkTabletStatus(tabletB, TabletStatus.VERSION_INCOMPLETE, table, partition); + Thread thread = new Thread(() -> { + try { + Env.getCurrentEnv().getTabletChecker().runAfterCatalogReady(); + Env.getCurrentEnv().getTabletScheduler().runAfterCatalogReady(); + } catch (Exception e) { + e.printStackTrace(); + } + }); + thread.start(); + Thread.sleep(1000); + Assertions.assertTrue(table.tryWriteLock(2, TimeUnit.SECONDS)); + table.writeUnlock(); + DebugPointUtil.clearDebugPoints(); + doRepair(); + Thread.sleep(1000); + doRepair(); + checkTabletIsHealth(tabletA, table, partition); + checkTabletIsHealth(tabletB, table, partition); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/utframe/MockedBackendFactory.java b/fe/fe-core/src/test/java/org/apache/doris/utframe/MockedBackendFactory.java index 9e8ff913ada8ac5..1a9a175366e528b 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/utframe/MockedBackendFactory.java +++ b/fe/fe-core/src/test/java/org/apache/doris/utframe/MockedBackendFactory.java @@ -95,6 +95,7 @@ import org.apache.thrift.TException; import java.io.IOException; +import java.util.Collections; import java.util.List; import java.util.Random; import java.util.concurrent.BlockingQueue; @@ -305,6 +306,10 @@ private void handleCloneTablet(TAgentTaskRequest request, TFinishTaskRequest fin tabletInfo.setPathHash(pathHash); tabletInfo.setUsed(true); tabletInfos.add(tabletInfo); + if (DebugPointUtil.isEnable("MockedBackendFactory.handleCloneTablet.failed")) { + finishTaskRequest.setTaskStatus(new TStatus(TStatusCode.CANCELLED)); + finishTaskRequest.getTaskStatus().setErrorMsgs(Collections.singletonList("debug point set")); + } finishTaskRequest.setFinishTabletInfos(tabletInfos); } From 4fc9f92defaae41667e1a2eb3b3a0f1f451e99fd Mon Sep 17 00:00:00 2001 From: lihangyu Date: Mon, 23 Dec 2024 10:38:14 +0800 Subject: [PATCH 43/55] [Improve](Variant) pick random backend as coordinator (#45754) pick random rpc coordinator to do fetch_remote_tablet_schema service --- .../common/util/FetchRemoteTabletSchemaUtil.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/FetchRemoteTabletSchemaUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/FetchRemoteTabletSchemaUtil.java index 4a0b9d1ff5950d0..00147207c143db5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/FetchRemoteTabletSchemaUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/FetchRemoteTabletSchemaUtil.java @@ -98,16 +98,19 @@ public List fetch() { if (!backend.isAlive()) { continue; } - // need 2 be to provide a retry - if (coordinatorBackend.size() < 2) { - coordinatorBackend.add(backend); - } + coordinatorBackend.add(backend); PTabletsLocation.Builder locationBuilder = PTabletsLocation.newBuilder() .setHost(backend.getHost()) .setBrpcPort(backend.getBrpcPort()); PTabletsLocation location = locationBuilder.addAllTabletId(tabletIds).build(); locations.add(location); } + // pick 2 random coordinator + Collections.shuffle(coordinatorBackend); + if (!coordinatorBackend.isEmpty()) { + coordinatorBackend = coordinatorBackend.subList(0, Math.min(2, coordinatorBackend.size())); + LOG.debug("pick coordinator backend {}", coordinatorBackend.get(0)); + } PFetchRemoteSchemaRequest.Builder requestBuilder = PFetchRemoteSchemaRequest.newBuilder() .addAllTabletLocation(locations) .setIsCoordinator(true); From f01f759a0387aa2215e532e9f455d17495089233 Mon Sep 17 00:00:00 2001 From: Socrates Date: Mon, 23 Dec 2024 10:59:54 +0800 Subject: [PATCH 44/55] [Fix](ORC) Not push down fixed char type in orc reader (#45484) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? Problem Summary: In Hive, the ORC file format supports fixed-length CHAR types (CHAR(n)) by padding strings with spaces to ensure the fixed length. When data is written into ORC tables, the actual stored value includes additional trailing spaces to meet the defined length. These padded spaces are also considered during the computation of statistics. However, in Doris, fixed-length CHAR types (CHAR(n)) and variable-length VARCHAR types are internally represented as the same type. Doris does not pad CHAR values with spaces and treats them as regular strings. As a result, when Doris reads ORC files generated by Hive and parses the statistics, the differences in the handling of CHAR types between the two systems can lead to inconsistencies or incorrect statistics. ```sql create table fixed_char_table ( i int, c char(2) ) stored as orc; insert into fixed_char_table values(1,'a'),(2,'b '), (3,'cd'); select * from fixed_char_table where c = 'a'; ``` before ```text empty ``` after ```text 1 a ``` If a Hive table undergoes a schema change, such as a column’s type being modified from INT to STRING, predicate pushdown should be disabled in such cases. Performing predicate pushdown under these circumstances may lead to incorrect filtering, as the type mismatch can cause errors or unexpected behavior during query execution. ```sql create table type_changed_table ( id int, name string ) stored as orc; insert into type_changed_table values (1, 'Alice'), (2, 'Bob'), (3, 'Charlie'); ALTER TABLE type_changed_table CHANGE COLUMN id id STRING; select * from type_changed_table where id = '1'; select ``` before ```text empty ``` after ```text 1 a ``` ### Release note [fix](orc) Not push down fixed char type in orc reader #45484 --- be/src/vec/exec/format/orc/vorc_reader.cpp | 24 ++++++--- be/src/vec/exec/format/orc/vorc_reader.h | 4 +- be/src/vec/exec/scan/vfile_scanner.cpp | 10 +--- .../orc_predicate/orc_predicate_table.hql | 16 ++++++ .../data/multi_catalog/orc_predicate/run.sh | 9 ++++ .../hive/test_hive_orc_predicate.out | 29 +++++++++++ .../hive/test_hive_orc_predicate.groovy | 50 +++++++++++++++++++ 7 files changed, 122 insertions(+), 20 deletions(-) create mode 100644 docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql create mode 100755 docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/run.sh create mode 100644 regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out create mode 100644 regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index a1ecb1ae0dcf8b1..4d41830668960c9 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -143,7 +143,7 @@ void ORCFileInputStream::read(void* buf, uint64_t length, uint64_t offset) { OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState* state, const TFileScanRangeParams& params, const TFileRangeDesc& range, size_t batch_size, const std::string& ctz, io::IOContext* io_ctx, - bool enable_lazy_mat, std::vector* unsupported_pushdown_types) + bool enable_lazy_mat) : _profile(profile), _state(state), _scan_params(params), @@ -156,8 +156,7 @@ OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState* state, _enable_lazy_mat(enable_lazy_mat), _enable_filter_by_min_max( state == nullptr ? true : state->query_options().enable_orc_filter_by_min_max), - _dict_cols_has_converted(false), - _unsupported_pushdown_types(unsupported_pushdown_types) { + _dict_cols_has_converted(false) { TimezoneUtils::find_cctz_time_zone(ctz, _time_zone); VecDateTimeValue t; t.from_unixtime(0, ctz); @@ -460,7 +459,8 @@ static std::unordered_map TYPEKIND_TO_PRE {orc::TypeKind::DOUBLE, orc::PredicateDataType::FLOAT}, {orc::TypeKind::STRING, orc::PredicateDataType::STRING}, {orc::TypeKind::BINARY, orc::PredicateDataType::STRING}, - {orc::TypeKind::CHAR, orc::PredicateDataType::STRING}, + // should not pust down CHAR type, because CHAR type is fixed length and will be padded + // {orc::TypeKind::CHAR, orc::PredicateDataType::STRING}, {orc::TypeKind::VARCHAR, orc::PredicateDataType::STRING}, {orc::TypeKind::DATE, orc::PredicateDataType::DATE}, {orc::TypeKind::DECIMAL, orc::PredicateDataType::DECIMAL}, @@ -492,8 +492,9 @@ std::tuple convert_to_orc_literal(const orc::Type* type, [[fallthrough]]; case orc::TypeKind::BINARY: [[fallthrough]]; - case orc::TypeKind::CHAR: - [[fallthrough]]; + // should not pust down CHAR type, because CHAR type is fixed length and will be padded + // case orc::TypeKind::CHAR: + // [[fallthrough]]; case orc::TypeKind::VARCHAR: { return std::make_tuple(true, orc::Literal(literal_data.data, literal_data.size)); } @@ -593,7 +594,15 @@ std::tuple OrcReader::_make_orc_lite auto literal_data = literal->get_column_ptr()->get_data_at(0); auto* slot = _tuple_descriptor->slots()[slot_ref->column_id()]; auto slot_type = slot->type(); - switch (slot_type.type) { + auto primitive_type = slot_type.type; + auto src_type = OrcReader::convert_to_doris_type(orc_type).type; + // should not down predicate for string type change from other type + if (src_type != primitive_type && !is_string_type(src_type) && is_string_type(primitive_type)) { + LOG(WARNING) << "Unsupported Push Down Schema Changed Column " << primitive_type << " to " + << src_type; + return std::make_tuple(false, orc::Literal(false), orc::PredicateDataType::LONG); + } + switch (primitive_type) { #define M(NAME) \ case TYPE_##NAME: { \ auto [valid, orc_literal] = convert_to_orc_literal( \ @@ -606,7 +615,6 @@ std::tuple OrcReader::_make_orc_lite M(INT) \ M(BIGINT) \ M(LARGEINT) \ - M(CHAR) \ M(DATE) \ M(DATETIME) \ M(DATEV2) \ diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 0dd19077bcf0afe..6bbf3bead1efceb 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -129,8 +129,7 @@ class OrcReader : public GenericReader { OrcReader(RuntimeProfile* profile, RuntimeState* state, const TFileScanRangeParams& params, const TFileRangeDesc& range, size_t batch_size, const std::string& ctz, - io::IOContext* io_ctx, bool enable_lazy_mat = true, - std::vector* unsupported_pushdown_types = nullptr); + io::IOContext* io_ctx, bool enable_lazy_mat = true); OrcReader(const TFileScanRangeParams& params, const TFileRangeDesc& range, const std::string& ctz, io::IOContext* io_ctx, bool enable_lazy_mat = true); @@ -639,7 +638,6 @@ class OrcReader : public GenericReader { std::unique_ptr _string_dict_filter; bool _dict_cols_has_converted = false; bool _has_complex_type = false; - std::vector* _unsupported_pushdown_types; // resolve schema change std::unordered_map> _converters; diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 76639e4bed4a288..93a22d1a94bf522 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -879,17 +879,9 @@ Status VFileScanner::_get_next_reader() { break; } case TFileFormatType::FORMAT_ORC: { - std::vector* unsupported_pushdown_types = nullptr; - if (range.__isset.table_format_params && - range.table_format_params.table_format_type == "paimon") { - static std::vector paimon_unsupport_type = - std::vector {orc::TypeKind::CHAR}; - unsupported_pushdown_types = &paimon_unsupport_type; - } std::unique_ptr orc_reader = OrcReader::create_unique( _profile, _state, *_params, range, _state->query_options().batch_size, - _state->timezone(), _io_ctx.get(), _state->query_options().enable_orc_lazy_mat, - unsupported_pushdown_types); + _state->timezone(), _io_ctx.get(), _state->query_options().enable_orc_lazy_mat); orc_reader->set_push_down_agg_type(_get_push_down_agg_type()); if (push_down_predicates) { RETURN_IF_ERROR(_process_late_arrival_conjuncts()); diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql new file mode 100644 index 000000000000000..a946b25ff1af04d --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql @@ -0,0 +1,16 @@ +CREATE DATABASE IF NOT EXISTS multi_catalog; +USE multi_catalog; + +create table fixed_char_table ( + i int, + c char(2) +) stored as orc; + +insert into fixed_char_table values(1,'a'),(2,'b '), (3,'cd'); + +create table type_changed_table ( + id int, + name string +) stored as orc; +insert into type_changed_table values (1, 'Alice'), (2, 'Bob'), (3, 'Charlie'); +ALTER TABLE type_changed_table CHANGE COLUMN id id STRING; diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/run.sh b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/run.sh new file mode 100755 index 000000000000000..f934ff3009c6f23 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/run.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set -x + +CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" + +# create table +hive -f "${CUR_DIR}"/orc_predicate_table.hql + + diff --git a/regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out b/regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out new file mode 100644 index 000000000000000..f42bb629550c88a --- /dev/null +++ b/regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out @@ -0,0 +1,29 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !predicate_fixed_char1 -- +1 a + +-- !predicate_fixed_char2 -- + +-- !predicate_changed_type1 -- +1 Alice + +-- !predicate_changed_type2 -- +2 Bob + +-- !predicate_changed_type3 -- +3 Charlie + +-- !predicate_fixed_char1 -- +1 a + +-- !predicate_fixed_char2 -- + +-- !predicate_changed_type1 -- +1 Alice + +-- !predicate_changed_type2 -- +2 Bob + +-- !predicate_changed_type3 -- +3 Charlie + diff --git a/regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy b/regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy new file mode 100644 index 000000000000000..2dd647aa2c1d8e4 --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_orc_predicate", "p0,external,hive,external_docker,external_docker_hive") { + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test.") + return; + } + + for (String hivePrefix : ["hive2", "hive3"]) { + try { + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String catalog_name = "${hivePrefix}_test_predicate" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}' + );""" + sql """use `${catalog_name}`.`multi_catalog`""" + + qt_predicate_fixed_char1 """ select * from fixed_char_table where c = 'a';""" + qt_predicate_fixed_char2 """ select * from fixed_char_table where c = 'a ';""" + + qt_predicate_changed_type1 """ select * from type_changed_table where id = '1';""" + qt_predicate_changed_type2 """ select * from type_changed_table where id = '2';""" + qt_predicate_changed_type3 """ select * from type_changed_table where id = '3';""" + + sql """drop catalog if exists ${catalog_name}""" + } finally { + } + } +} From e7d2fedd1ce8abc276476d92eacfa882f318fcdc Mon Sep 17 00:00:00 2001 From: 924060929 Date: Mon, 23 Dec 2024 11:17:45 +0800 Subject: [PATCH 45/55] [opt](nereids) optimize rewrite of synchronize materialize view (#45748) optimize rewrite of synchronize materialize view 1. cache toSql 2. fast parse UnboundSlot in NereidsParser.parseExpression --- .../doris/common/profile/SummaryProfile.java | 4 +- .../doris/nereids/analyzer/MappingSlot.java | 2 +- .../doris/nereids/analyzer/UnboundAlias.java | 2 +- .../nereids/analyzer/UnboundFunction.java | 2 +- .../doris/nereids/analyzer/UnboundSlot.java | 14 ++++- .../doris/nereids/analyzer/UnboundStar.java | 2 +- .../doris/nereids/parser/NereidsParser.java | 53 +++++++++++++++++++ .../AbstractSelectMaterializedIndexRule.java | 5 +- .../SelectMaterializedIndexWithAggregate.java | 6 ++- .../expressions/AggregateExpression.java | 2 +- .../nereids/trees/expressions/Alias.java | 2 +- .../trees/expressions/ArrayItemReference.java | 2 +- .../trees/expressions/BinaryOperator.java | 2 +- .../nereids/trees/expressions/BoundStar.java | 2 +- .../nereids/trees/expressions/CaseWhen.java | 2 +- .../doris/nereids/trees/expressions/Cast.java | 2 +- .../trees/expressions/CompoundPredicate.java | 2 +- .../nereids/trees/expressions/Exists.java | 4 +- .../nereids/trees/expressions/Expression.java | 10 ++++ .../trees/expressions/InPredicate.java | 2 +- .../nereids/trees/expressions/InSubquery.java | 4 +- .../nereids/trees/expressions/IsNull.java | 2 +- .../nereids/trees/expressions/ListQuery.java | 4 +- .../nereids/trees/expressions/Match.java | 2 +- .../doris/nereids/trees/expressions/Not.java | 2 +- .../trees/expressions/OrderExpression.java | 2 +- .../trees/expressions/Placeholder.java | 2 +- .../nereids/trees/expressions/Properties.java | 2 +- .../trees/expressions/ScalarSubquery.java | 4 +- .../trees/expressions/SlotReference.java | 2 +- .../expressions/StringRegexPredicate.java | 2 +- .../trees/expressions/SubqueryExpr.java | 2 +- .../expressions/TimestampArithmetic.java | 2 +- .../trees/expressions/UnaryOperator.java | 2 +- .../nereids/trees/expressions/Variable.java | 2 +- .../trees/expressions/VariableDesc.java | 2 +- .../expressions/VirtualSlotReference.java | 2 +- .../nereids/trees/expressions/WhenClause.java | 2 +- .../trees/expressions/WindowExpression.java | 2 +- .../trees/expressions/WindowFrame.java | 2 +- .../expressions/functions/BoundFunction.java | 2 +- .../functions/agg/AggregateFunction.java | 2 +- .../expressions/functions/agg/Count.java | 4 +- .../functions/scalar/CryptoFunction.java | 2 +- .../expressions/functions/scalar/Lambda.java | 2 +- .../functions/table/TableValuedFunction.java | 2 +- .../expressions/literal/ArrayLiteral.java | 2 +- .../expressions/literal/DateLiteral.java | 2 +- .../expressions/literal/DateTimeLiteral.java | 2 +- .../expressions/literal/DecimalLiteral.java | 2 +- .../expressions/literal/DecimalV3Literal.java | 2 +- .../trees/expressions/literal/Literal.java | 2 +- .../trees/expressions/literal/MapLiteral.java | 2 +- .../trees/expressions/literal/MaxLiteral.java | 2 +- .../expressions/literal/StructLiteral.java | 2 +- .../plans/distribute/DistributePlanner.java | 1 + .../org/apache/doris/nereids/util/Utils.java | 12 ++++- 57 files changed, 151 insertions(+), 62 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java b/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java index 6a92e043b6eb20a..5b0d5ba353387fa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java @@ -600,7 +600,9 @@ public void setQueryDistributedFinishTime() { } public void setQueryPlanFinishTime() { - this.queryPlanFinishTime = TimeUtils.getStartTimeMs(); + if (queryPlanFinishTime == -1) { + this.queryPlanFinishTime = TimeUtils.getStartTimeMs(); + } } public void setQueryScheduleFinishTime() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/MappingSlot.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/MappingSlot.java index c7a020fd2abddf2..2e9e84195508b1c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/MappingSlot.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/MappingSlot.java @@ -77,7 +77,7 @@ public boolean nullable() { } @Override - public String toSql() { + public String computeToSql() { return slot.toSql(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundAlias.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundAlias.java index 2be2130aba71d43..25d40dd59811949 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundAlias.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundAlias.java @@ -59,7 +59,7 @@ public DataType getDataType() throws UnboundException { } @Override - public String toSql() { + public String computeToSql() { StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("(" + child() + ")"); alias.ifPresent(name -> stringBuilder.append(" AS " + name)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundFunction.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundFunction.java index a53917f08cd8d0e..b4b21e40dcd4df1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundFunction.java @@ -115,7 +115,7 @@ public List getArguments() { } @Override - public String toSql() throws UnboundException { + public String computeToSql() throws UnboundException { String params = children.stream() .map(Expression::toSql) .collect(Collectors.joining(", ")); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundSlot.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundSlot.java index f85812569804aa1..fdcb9547837686c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundSlot.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundSlot.java @@ -81,8 +81,18 @@ public String getInternalName() { } @Override - public String toSql() { - return nameParts.stream().map(Utils::quoteIfNeeded).reduce((left, right) -> left + "." + right).orElse(""); + public String computeToSql() { + switch (nameParts.size()) { + case 1: return Utils.quoteIfNeeded(nameParts.get(0)); + case 2: return Utils.quoteIfNeeded(nameParts.get(0)) + "." + Utils.quoteIfNeeded(nameParts.get(1)); + case 3: return Utils.quoteIfNeeded(nameParts.get(0)) + "." + Utils.quoteIfNeeded(nameParts.get(1)) + + "." + Utils.quoteIfNeeded(nameParts.get(2)); + default: { + return nameParts.stream().map(Utils::quoteIfNeeded) + .reduce((left, right) -> left + "." + right) + .orElse(""); + } + } } public static UnboundSlot quoted(String name) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundStar.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundStar.java index 6d8ed904ec109d1..cee6a0105f87c7f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundStar.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundStar.java @@ -98,7 +98,7 @@ public UnboundStar(List qualifier, List exceptedSlots, } @Override - public String toSql() { + public String computeToSql() { StringBuilder builder = new StringBuilder(); builder.append(Utils.qualifiedName(qualifier, "*")); if (!exceptedSlots.isEmpty()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java index 34646c1d6579534..4ed71bbbc14673c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java @@ -23,7 +23,9 @@ import org.apache.doris.common.Pair; import org.apache.doris.nereids.DorisLexer; import org.apache.doris.nereids.DorisParser; +import org.apache.doris.nereids.DorisParser.NonReservedContext; import org.apache.doris.nereids.StatementContext; +import org.apache.doris.nereids.analyzer.UnboundSlot; import org.apache.doris.nereids.glue.LogicalPlanAdapter; import org.apache.doris.nereids.parser.plsql.PLSqlLogicalPlanBuilder; import org.apache.doris.nereids.trees.expressions.Expression; @@ -35,6 +37,8 @@ import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.SessionVariable; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.antlr.v4.runtime.CharStreams; @@ -45,14 +49,17 @@ import org.antlr.v4.runtime.TokenSource; import org.antlr.v4.runtime.atn.PredictionMode; import org.antlr.v4.runtime.misc.ParseCancellationException; +import org.antlr.v4.runtime.tree.TerminalNode; import org.apache.commons.collections.CollectionUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.lang.reflect.Method; import java.util.BitSet; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.function.Function; import javax.annotation.Nullable; @@ -66,6 +73,9 @@ public class NereidsParser { private static final BitSet EXPLAIN_TOKENS = new BitSet(); + private static final Set NON_RESERVED_KEYWORDS; + private static final Map LITERAL_TOKENS; + static { EXPLAIN_TOKENS.set(DorisLexer.EXPLAIN); EXPLAIN_TOKENS.set(DorisLexer.PARSED); @@ -77,6 +87,25 @@ public class NereidsParser { EXPLAIN_TOKENS.set(DorisLexer.PLAN); EXPLAIN_TOKENS.set(DorisLexer.PROCESS); + ImmutableSet.Builder nonReserveds = ImmutableSet.builder(); + for (Method declaredMethod : NonReservedContext.class.getDeclaredMethods()) { + if (TerminalNode.class.equals(declaredMethod.getReturnType()) + && declaredMethod.getName().toUpperCase().equals(declaredMethod.getName()) + && declaredMethod.getParameterTypes().length == 0) { + String nonReserved = declaredMethod.getName(); + nonReserveds.add(nonReserved); + } + } + NON_RESERVED_KEYWORDS = nonReserveds.build(); + + ImmutableMap.Builder literalToTokenType = ImmutableMap.builder(); + for (int tokenType = 0; tokenType <= DorisLexer.VOCABULARY.getMaxTokenType(); tokenType++) { + String literalName = DorisLexer.VOCABULARY.getLiteralName(tokenType); + if (literalName != null) { + literalToTokenType.put(literalName.substring(1, literalName.length() - 1), tokenType); + } + } + LITERAL_TOKENS = literalToTokenType.build(); } /** @@ -256,9 +285,33 @@ public List> parseMultiple(String sql, } public Expression parseExpression(String expression) { + if (isSimpleIdentifier(expression)) { + return new UnboundSlot(expression); + } return parse(expression, DorisParser::expression); } + private static boolean isSimpleIdentifier(String expression) { + if (expression == null || expression.isEmpty()) { + return false; + } + + boolean hasLetter = false; + for (int i = 0; i < expression.length(); i++) { + char c = expression.charAt(i); + if ((('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_' || c == '$')) { + hasLetter = true; + } else if (!('0' <= c && c <= '9')) { + return false; + } + } + if (!hasLetter) { + return false; + } + String upperCase = expression.toUpperCase(); + return (NON_RESERVED_KEYWORDS.contains(upperCase) || !LITERAL_TOKENS.containsKey(upperCase)); + } + public DataType parseDataType(String dataType) { return parse(dataType, DorisParser::dataType); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/AbstractSelectMaterializedIndexRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/AbstractSelectMaterializedIndexRule.java index 357883d1f7136af..3c31ce22e4611a1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/AbstractSelectMaterializedIndexRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/AbstractSelectMaterializedIndexRule.java @@ -219,8 +219,9 @@ public static String parseMvColumnToMvName(String mvName, Optional aggTy } protected static boolean containsAllColumn(Expression expression, Set mvColumnNames) { - if (mvColumnNames.contains(expression.toSql()) || mvColumnNames - .contains(org.apache.doris.analysis.CreateMaterializedViewStmt.mvColumnBreaker(expression.toSql()))) { + String sql = expression.toSql(); + if (mvColumnNames.contains(sql) || mvColumnNames + .contains(org.apache.doris.analysis.CreateMaterializedViewStmt.mvColumnBreaker(sql))) { return true; } if (expression.children().isEmpty()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java index 468b9cf659c5b22..fa03165b37c337e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java @@ -625,9 +625,13 @@ private SelectResult select(LogicalOlapScan scan, Set requiredScanOutput, aggFuncsDiff(aggregateFunctions, aggRewriteResult), groupingExprs).isOn()) .collect(Collectors.toSet()); + Set candidatesWithRewritingIndexes = candidatesWithRewriting.stream() + .map(result -> result.index) + .collect(Collectors.toSet()); + Set candidatesWithoutRewriting = indexesGroupByIsBaseOrNot .getOrDefault(false, ImmutableList.of()).stream() - .filter(index -> !candidatesWithRewriting.contains(index)) + .filter(index -> !candidatesWithRewritingIndexes.contains(index)) .filter(index -> preAggEnabledByHint(scan) || checkPreAggStatus(scan, index.getId(), predicates, aggregateFunctions, groupingExprs).isOn()) .collect(Collectors.toSet()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/AggregateExpression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/AggregateExpression.java index 2e20dd05180a71c..86d7eb723823004 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/AggregateExpression.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/AggregateExpression.java @@ -100,7 +100,7 @@ public AggregateExpression withChildren(List children) { } @Override - public String toSql() { + public String computeToSql() { if (aggregateParam.aggMode.productAggregateBuffer) { return "partial_" + function.toSql(); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Alias.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Alias.java index 9eea3afd879e670..53a82011ac4c3c9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Alias.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Alias.java @@ -124,7 +124,7 @@ public DataType getDataType() throws UnboundException { } @Override - public String toSql() { + public String computeToSql() { return child().toSql() + " AS `" + name.get() + "`"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ArrayItemReference.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ArrayItemReference.java index c54ad358561d8e7..edc074af2b513af 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ArrayItemReference.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ArrayItemReference.java @@ -92,7 +92,7 @@ public DataType getDataType() { } @Override - public String toSql() { + public String computeToSql() { return child(0).toSql(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/BinaryOperator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/BinaryOperator.java index 750f3a778814309..f699e7531f62072 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/BinaryOperator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/BinaryOperator.java @@ -49,7 +49,7 @@ public List expectedInputTypes() { } @Override - public String toSql() { + public String computeToSql() { return "(" + left().toSql() + " " + symbol + " " + right().toSql() + ")"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/BoundStar.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/BoundStar.java index 8b4bffad3fc8176..0789d9a65279dbb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/BoundStar.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/BoundStar.java @@ -35,7 +35,7 @@ public BoundStar(List children) { ); } - public String toSql() { + public String computeToSql() { return children.stream().map(Expression::toSql).collect(Collectors.joining(", ")); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/CaseWhen.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/CaseWhen.java index bd48b648a73dfb1..0c3687f57153f27 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/CaseWhen.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/CaseWhen.java @@ -111,7 +111,7 @@ public String toString() { } @Override - public String toSql() throws UnboundException { + public String computeToSql() throws UnboundException { StringBuilder output = new StringBuilder("CASE"); for (Expression child : children()) { if (child instanceof WhenClause) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Cast.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Cast.java index 9122f0f4adbb0a2..20f8079bd9f1414 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Cast.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Cast.java @@ -95,7 +95,7 @@ public Cast withChildren(List children) { } @Override - public String toSql() throws UnboundException { + public String computeToSql() throws UnboundException { return "cast(" + child().toSql() + " as " + targetType.toSql() + ")"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/CompoundPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/CompoundPredicate.java index d58d1ba8193de5e..9b1535eb9cc3c98 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/CompoundPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/CompoundPredicate.java @@ -101,7 +101,7 @@ public boolean equals(Object o) { } @Override - public String toSql() { + public String computeToSql() { StringBuilder sb = new StringBuilder(); children().forEach(c -> sb.append(c.toSql()).append(",")); sb.deleteCharAt(sb.length() - 1); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Exists.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Exists.java index 3d3bd17c70e12cd..8d097d0faa6f45d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Exists.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Exists.java @@ -65,8 +65,8 @@ public DataType getDataType() throws UnboundException { } @Override - public String toSql() { - return "EXISTS (SUBQUERY) " + super.toSql(); + public String computeToSql() { + return "EXISTS (SUBQUERY) " + super.computeToSql(); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java index e20290e8b59c416..6634d5e0160ead7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java @@ -20,6 +20,7 @@ import org.apache.doris.nereids.analyzer.Unbound; import org.apache.doris.nereids.analyzer.UnboundVariable; import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.exceptions.UnboundException; import org.apache.doris.nereids.trees.AbstractTreeNode; import org.apache.doris.nereids.trees.expressions.ArrayItemReference.ArrayItemSlot; import org.apache.doris.nereids.trees.expressions.functions.ExpressionTrait; @@ -68,6 +69,7 @@ public abstract class Expression extends AbstractTreeNode implements private final Supplier> inputSlots = Suppliers.memoize( () -> collect(e -> e instanceof Slot && !(e instanceof ArrayItemSlot))); private final int fastChildrenHashCode; + private final Supplier toSqlCache = Suppliers.memoize(this::computeToSql); protected Expression(Expression... children) { super(children); @@ -210,6 +212,10 @@ public int fastChildrenHashCode() { return fastChildrenHashCode; } + protected String computeToSql() { + throw new UnboundException("sql"); + } + protected TypeCheckResult checkInputDataTypesInternal() { return TypeCheckResult.SUCCESS; } @@ -301,6 +307,10 @@ public boolean isInferred() { return inferred; } + public final String toSql() { + return toSqlCache.get(); + } + @Override public Expression withChildren(List children) { throw new RuntimeException(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/InPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/InPredicate.java index 53a753c4535dd1a..b8c0cf544719012 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/InPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/InPredicate.java @@ -122,7 +122,7 @@ public String toString() { } @Override - public String toSql() { + public String computeToSql() { return compareExpr.toSql() + " IN " + options.stream() .map(Expression::toSql).sorted() .collect(Collectors.joining(", ", "(", ")")); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/InSubquery.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/InSubquery.java index 8b7d0518181fdad..71dc1f5eb4f08f0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/InSubquery.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/InSubquery.java @@ -77,8 +77,8 @@ public boolean nullable() throws UnboundException { } @Override - public String toSql() { - return this.compareExpr.toSql() + " IN (" + super.toSql() + ")"; + public String computeToSql() { + return this.compareExpr.toSql() + " IN (" + super.computeToSql() + ")"; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/IsNull.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/IsNull.java index 7bb8538fc75031d..22216a84bafe874 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/IsNull.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/IsNull.java @@ -55,7 +55,7 @@ public IsNull withChildren(List children) { } @Override - public String toSql() throws UnboundException { + public String computeToSql() throws UnboundException { return child().toSql() + " IS NULL"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ListQuery.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ListQuery.java index 214525d25945804..16dade740b94221 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ListQuery.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ListQuery.java @@ -48,8 +48,8 @@ public DataType getDataType() { } @Override - public String toSql() { - return " (LISTQUERY) " + super.toSql(); + public String computeToSql() { + return " (LISTQUERY) " + super.computeToSql(); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Match.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Match.java index d9dcde287d3884b..405e3cb8fe46120 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Match.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Match.java @@ -76,7 +76,7 @@ public boolean nullable() throws UnboundException { } @Override - public String toSql() { + public String computeToSql() { return "(" + left().toSql() + " " + symbol + " " + right().toSql() + ")"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Not.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Not.java index 5061cab5ac96316..b001da9118fea34 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Not.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Not.java @@ -102,7 +102,7 @@ public String toString() { } @Override - public String toSql() { + public String computeToSql() { return "( not " + child().toSql() + ")"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/OrderExpression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/OrderExpression.java index d09fe2c0a00ed3f..7e33d4315d81adc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/OrderExpression.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/OrderExpression.java @@ -81,7 +81,7 @@ public String toString() { } @Override - public String toSql() { + public String computeToSql() { return orderKey.toSql(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Placeholder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Placeholder.java index c79c2d9db6d0e9d..3ce8cdb017f9b89 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Placeholder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Placeholder.java @@ -66,7 +66,7 @@ public String toString() { } @Override - public String toSql() { + public String computeToSql() { return "?"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Properties.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Properties.java index db0c78c1f78f579..d604e919e31d179 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Properties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Properties.java @@ -56,7 +56,7 @@ public DataType getDataType() throws UnboundException { } @Override - public String toSql() { + public String computeToSql() { return getMap() .entrySet() .stream() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ScalarSubquery.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ScalarSubquery.java index 178debe7db83a55..25a7052a4acabc8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ScalarSubquery.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ScalarSubquery.java @@ -84,8 +84,8 @@ public DataType getDataType() throws UnboundException { } @Override - public String toSql() { - return " (SCALARSUBQUERY) " + super.toSql(); + public String computeToSql() { + return " (SCALARSUBQUERY) " + super.computeToSql(); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java index e90bc3a5ecfaf4d..890fbdfdb96f72c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java @@ -183,7 +183,7 @@ public Optional getTable() { } @Override - public String toSql() { + public String computeToSql() { if (subPath.isEmpty()) { return name.get(); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/StringRegexPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/StringRegexPredicate.java index 8900ac928590c33..5a62be54f93d81d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/StringRegexPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/StringRegexPredicate.java @@ -55,7 +55,7 @@ public List getSignatures() { } @Override - public String toSql() { + public String computeToSql() { return '(' + left().toSql() + ' ' + getName() + ' ' + right().toSql() + ')'; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SubqueryExpr.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SubqueryExpr.java index 35d0e566476880c..c08fda1dc6b7135 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SubqueryExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SubqueryExpr.java @@ -80,7 +80,7 @@ public boolean nullable() throws UnboundException { } @Override - public String toSql() { + public String computeToSql() { return "(" + queryPlan + ")"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/TimestampArithmetic.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/TimestampArithmetic.java index d3e326fa48a5744..40a727eb1757ba3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/TimestampArithmetic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/TimestampArithmetic.java @@ -129,7 +129,7 @@ public String toString() { } @Override - public String toSql() { + public String computeToSql() { StringBuilder strBuilder = new StringBuilder(); if (funcName != null) { // Function-call like version. diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/UnaryOperator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/UnaryOperator.java index ace2c648daec798..61efa91f2621fc2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/UnaryOperator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/UnaryOperator.java @@ -46,7 +46,7 @@ public List expectedInputTypes() { } @Override - public String toSql() { + public String computeToSql() { return "(" + symbol + " " + child().toSql() + ")"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Variable.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Variable.java index fd16b84b183c7bf..5944ec08744980b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Variable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Variable.java @@ -85,7 +85,7 @@ public String toString() throws UnboundException { } @Override - public String toSql() throws UnboundException { + public String computeToSql() throws UnboundException { return toString(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/VariableDesc.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/VariableDesc.java index 38f23ee40fafe9e..3a16b38f9e19e01 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/VariableDesc.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/VariableDesc.java @@ -50,7 +50,7 @@ public SetType getSetType() { } @Override - public String toSql() { + public String computeToSql() { return toString(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/VirtualSlotReference.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/VirtualSlotReference.java index 43f4853758105ce..1b46a8552bafb96 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/VirtualSlotReference.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/VirtualSlotReference.java @@ -82,7 +82,7 @@ public R accept(ExpressionVisitor visitor, C context) { } @Override - public String toSql() { + public String computeToSql() { return getName(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/WhenClause.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/WhenClause.java index 4ce77f22df16920..adb862bb2f10413 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/WhenClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/WhenClause.java @@ -56,7 +56,7 @@ public Expression getResult() { } @Override - public String toSql() { + public String computeToSql() { return " WHEN " + left().toSql() + " THEN " + right().toSql(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/WindowExpression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/WindowExpression.java index 5bea07fff00326d..7f26298c7006260 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/WindowExpression.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/WindowExpression.java @@ -179,7 +179,7 @@ public int hashCode() { } @Override - public String toSql() { + public String computeToSql() { StringBuilder sb = new StringBuilder(); sb.append(function.toSql()).append(" OVER("); if (!partitionKeys.isEmpty()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/WindowFrame.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/WindowFrame.java index 5cbb93ce3748ea0..58ed4f15f9baa07 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/WindowFrame.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/WindowFrame.java @@ -95,7 +95,7 @@ public int hashCode() { } @Override - public String toSql() { + public String computeToSql() { StringBuilder sb = new StringBuilder(); sb.append(frameUnits + " "); if (rightBoundary != null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/BoundFunction.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/BoundFunction.java index 5ccc64a34bb43bb..13d4b515ad75f37 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/BoundFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/BoundFunction.java @@ -85,7 +85,7 @@ public int hashCode() { } @Override - public String toSql() throws UnboundException { + public String computeToSql() throws UnboundException { StringBuilder sql = new StringBuilder(getName()).append("("); int arity = arity(); for (int i = 0; i < arity; i++) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/AggregateFunction.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/AggregateFunction.java index 90df2f531da3fb1..777c9c4cc7add5a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/AggregateFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/AggregateFunction.java @@ -107,7 +107,7 @@ public boolean hasVarArguments() { } @Override - public String toSql() throws UnboundException { + public String computeToSql() throws UnboundException { StringBuilder sql = new StringBuilder(getName()).append("("); if (distinct) { sql.append("DISTINCT "); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java index e86e90974da1bd5..21e6ee1cba6b21a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java @@ -119,11 +119,11 @@ public Count withDistinctAndChildren(boolean distinct, List children } @Override - public String toSql() { + public String computeToSql() { if (isStar) { return "count(*)"; } - return super.toSql(); + return super.computeToSql(); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CryptoFunction.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CryptoFunction.java index 151f7ffc7732be4..1e4a866ecdcc538 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CryptoFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CryptoFunction.java @@ -42,7 +42,7 @@ public CryptoFunction(String name, List arguments) { } @Override - public String toSql() { + public String computeToSql() { List args = Lists.newArrayList(); for (int i = 0; i < arity(); i++) { if (i == 1) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Lambda.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Lambda.java index e8261f6391dda97..2ecab6090d8d3f1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Lambda.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Lambda.java @@ -126,7 +126,7 @@ public boolean equals(Object o) { } @Override - public String toSql() { + public String computeToSql() { StringBuilder builder = new StringBuilder(); String argStr = argumentNames.get(0); if (argumentNames.size() > 1) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/TableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/TableValuedFunction.java index 837edf27ab10671..4a4257e67609d0f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/TableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/TableValuedFunction.java @@ -130,7 +130,7 @@ public DataType getDataType() throws UnboundException { } @Override - public String toSql() { + public String computeToSql() { String args = getTVFProperties() .getMap() .entrySet() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/ArrayLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/ArrayLiteral.java index 486eeddabd71c84..be84a5b32e35cf2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/ArrayLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/ArrayLiteral.java @@ -94,7 +94,7 @@ public String toString() { } @Override - public String toSql() { + public String computeToSql() { String items = this.items.stream() .map(Literal::toSql) .collect(Collectors.joining(", ")); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateLiteral.java index 6ea1d2af7256799..ed99e3025e86035 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateLiteral.java @@ -423,7 +423,7 @@ public String getStringValue() { } @Override - public String toSql() { + public String computeToSql() { return "'" + getStringValue() + "'"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeLiteral.java index 0a5c02409c113a0..169ed4219348246 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeLiteral.java @@ -266,7 +266,7 @@ public double getDouble() { } @Override - public String toSql() { + public String computeToSql() { return "'" + getStringValue() + "'"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalLiteral.java index 4ffc92c634d709b..1f0aa788cdc6417 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalLiteral.java @@ -112,7 +112,7 @@ public boolean equals(Object o) { } @Override - public String toSql() { + public String computeToSql() { return value.toPlainString(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalV3Literal.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalV3Literal.java index d8be4faf0c93958..045da28bdb38a41 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalV3Literal.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalV3Literal.java @@ -152,7 +152,7 @@ public boolean equals(Object o) { } @Override - public String toSql() { + public String computeToSql() { return value.toPlainString(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/Literal.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/Literal.java index e8e37aaf697e24a..69e61b03c823140 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/Literal.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/Literal.java @@ -135,7 +135,7 @@ public DataType getDataType() throws UnboundException { } @Override - public String toSql() { + public String computeToSql() { return toString(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MapLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MapLiteral.java index c57bd3a04875e15..dbcf74c971e0697 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MapLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MapLiteral.java @@ -114,7 +114,7 @@ public String toString() { } @Override - public String toSql() { + public String computeToSql() { StringBuilder sb = new StringBuilder(); sb.append("map("); if (!keys.isEmpty()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MaxLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MaxLiteral.java index ce1278a9ad4b266..763fdfb1f4f1f8f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MaxLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/MaxLiteral.java @@ -38,7 +38,7 @@ public LiteralExpr toLegacyLiteral() { } @Override - public String toSql() { + public String computeToSql() { return "MAX_VALUE"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StructLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StructLiteral.java index 3a46f1f5b83e7ec..f44aa663c9eb03c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StructLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StructLiteral.java @@ -124,7 +124,7 @@ public String toString() { } @Override - public String toSql() { + public String computeToSql() { StringBuilder sb = new StringBuilder(); sb.append("STRUCT("); for (int i = 0; i < fields.size(); i++) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/distribute/DistributePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/distribute/DistributePlanner.java index 12ab8b42eaab611..75a2326236fc9bd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/distribute/DistributePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/distribute/DistributePlanner.java @@ -73,6 +73,7 @@ public DistributePlanner(StatementContext statementContext, List f /** plan */ public FragmentIdMapping plan() { + updateProfileIfPresent(SummaryProfile::setQueryPlanFinishTime); try { FragmentIdMapping fragmentJobs = UnassignedJobBuilder.buildJobs(statementContext, idToFragments); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java index 42b99f6effdb842..c111839fc5093ec 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java @@ -58,8 +58,16 @@ public class Utils { */ public static String quoteIfNeeded(String part) { // We quote strings except the ones which consist of digits only. - return part.matches("\\w*[\\w&&[^\\d]]+\\w*") - ? part : part.replace("`", "``"); + StringBuilder quote = new StringBuilder(part.length()); + for (int i = 0; i < part.length(); i++) { + char c = part.charAt(i); + if (c == '`') { + quote.append("``"); + } else { + quote.append(c); + } + } + return quote.toString(); } /** From c2e048f71c3edf75bb352dbaeeedd087c8df1264 Mon Sep 17 00:00:00 2001 From: yujun Date: Mon, 23 Dec 2024 11:28:30 +0800 Subject: [PATCH 46/55] [fix](nereids) fix comparison with date like (#45735) ### What problem does this PR solve? Issue Number: close #xxx Related PR: #45382 Problem Summary: #45382 had fix compare date/datev1 with datetime literal wrong cutting. but it not fix completely. ``` if (right instanceof DateTimeLiteral) { DateTimeLiteral dateTimeLiteral = (DateTimeLiteral) right; right = migrateToDateV2(dateTimeLiteral); if (dateTimeLiteral.getHour() != 0 || dateTimeLiteral.getMinute() != 0 || dateTimeLiteral.getSecond() != 0) { ... } } ``` For the above code, if check right is date time literal, but notice that datetimev2 literal is datetime literal's child class. so datetimev2 literal will also run the above code. And datetimev2 literal should check its microseconds not equals to 0. for example: `date_a = '2020-01-01 00:00:00.01'` should opt as `FALSE`, but not `date_a = '2020-01-01'`. --- .../rules/SimplifyComparisonPredicate.java | 2 +- .../SimplifyComparisonPredicateTest.java | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicate.java index fbe0d44417363a7..7dc9ddcb3971efd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicate.java @@ -182,7 +182,7 @@ private static Expression processDateLikeTypeCoercion(ComparisonPredicate cp, Ex DateTimeLiteral dateTimeLiteral = (DateTimeLiteral) right; right = migrateToDateV2(dateTimeLiteral); if (dateTimeLiteral.getHour() != 0 || dateTimeLiteral.getMinute() != 0 - || dateTimeLiteral.getSecond() != 0) { + || dateTimeLiteral.getSecond() != 0 || dateTimeLiteral.getMicroSecond() != 0) { if (cp instanceof EqualTo) { return ExpressionUtils.falseOrNull(cast.child()); } else if (cp instanceof NullSafeEqual) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicateTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicateTest.java index 028f1c4864f0995..bab3b4385137e89 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicateTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicateTest.java @@ -166,6 +166,18 @@ void testDateTimeV2CmpDateTimeV2() { new LessThan(date, new DateV2Literal("2020-01-02"))); assertRewrite(new LessThanEqual(new Cast(date, DateTimeType.INSTANCE), new DateTimeLiteral("2020-01-01 00:00:01")), new LessThanEqual(date, new DateV2Literal("2020-01-01"))); + assertRewrite(new EqualTo(new Cast(date, DateTimeV2Type.SYSTEM_DEFAULT), new DateTimeV2Literal("2020-01-01 00:00:00")), + new EqualTo(date, new DateV2Literal("2020-01-01"))); + assertRewrite(new EqualTo(new Cast(date, DateTimeV2Type.SYSTEM_DEFAULT), new DateTimeV2Literal("2020-01-01 00:00:01")), + ExpressionUtils.falseOrNull(date)); + assertRewrite(new EqualTo(new Cast(date, DateTimeV2Type.of(2)), new DateTimeV2Literal("2020-01-01 00:00:00.01")), + ExpressionUtils.falseOrNull(date)); + assertRewrite(new NullSafeEqual(new Cast(date, DateTimeV2Type.of(2)), new DateTimeV2Literal("2020-01-01 00:00:00.01")), + BooleanLiteral.FALSE); + assertRewrite(new GreaterThanEqual(new Cast(date, DateTimeV2Type.SYSTEM_DEFAULT), new DateTimeV2Literal("2020-01-01 00:00:01")), + new GreaterThanEqual(date, new DateV2Literal("2020-01-02"))); + assertRewrite(new GreaterThanEqual(new Cast(date, DateTimeV2Type.of(2)), new DateTimeV2Literal("2020-01-01 00:00:00.01")), + new GreaterThanEqual(date, new DateV2Literal("2020-01-02"))); // cast (date as datev1) = datev1-literal // assertRewrite(new EqualTo(new Cast(date, DateType.INSTANCE), new DateLiteral("2020-01-01")), // new EqualTo(date, new DateV2Literal("2020-01-01"))); @@ -191,6 +203,18 @@ void testDateTimeV2CmpDateTimeV2() { new EqualTo(datev1, new DateLiteral("2020-01-01"))); assertRewrite(new GreaterThan(new Cast(datev1, DateV2Type.INSTANCE), new DateV2Literal("2020-01-01")), new GreaterThan(datev1, new DateLiteral("2020-01-01"))); + assertRewrite(new EqualTo(new Cast(datev1, DateTimeV2Type.SYSTEM_DEFAULT), new DateTimeV2Literal("2020-01-01 00:00:00")), + new EqualTo(datev1, new DateLiteral("2020-01-01"))); + assertRewrite(new EqualTo(new Cast(datev1, DateTimeV2Type.SYSTEM_DEFAULT), new DateTimeV2Literal("2020-01-01 00:00:01")), + ExpressionUtils.falseOrNull(datev1)); + assertRewrite(new EqualTo(new Cast(datev1, DateTimeV2Type.of(2)), new DateTimeV2Literal("2020-01-01 00:00:00.01")), + ExpressionUtils.falseOrNull(datev1)); + assertRewrite(new NullSafeEqual(new Cast(datev1, DateTimeV2Type.of(2)), new DateTimeV2Literal("2020-01-01 00:00:00.01")), + BooleanLiteral.FALSE); + assertRewrite(new GreaterThanEqual(new Cast(datev1, DateTimeV2Type.SYSTEM_DEFAULT), new DateTimeV2Literal("2020-01-01 00:00:01")), + new GreaterThanEqual(datev1, new DateLiteral("2020-01-02"))); + assertRewrite(new GreaterThanEqual(new Cast(datev1, DateTimeV2Type.of(2)), new DateTimeV2Literal("2020-01-01 00:00:00.01")), + new GreaterThanEqual(datev1, new DateLiteral("2020-01-02"))); // cast (datetimev1 as datetime) cmp datetime assertRewrite(new EqualTo(new Cast(datetimev1, DateTimeV2Type.of(0)), new DateTimeV2Literal("2020-01-01 00:00:00")), From 208fde0648110f3a6d2837b43bbd8c19c5c51d33 Mon Sep 17 00:00:00 2001 From: Mryange Date: Mon, 23 Dec 2024 11:33:56 +0800 Subject: [PATCH 47/55] [refine](Column)Disallow implicit conversion of ColumnPtr to IColumn* (#45588) ### What problem does this PR solve? Previously, we allowed ColumnPtr to be directly converted to Column*: ```C++ ColumnPtr column; const IColumn* ptr = column; ``` This can easily cause confusion. For example, in the following code: ```C++ ColumnPtr column; const auto& const_column = check_and_get_column(column); ``` The matched function is: ```C++ template <> const doris::vectorized::ColumnConst* check_and_get_column( const IColumn* column) ``` However, the actual type of const_column is: ```C++ const doris::vectorized::ColumnConst* const& ``` ### Release note None ### Check List (For Author) - Test - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [x] No need to test or manual test. Explain why: - [x] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason - Behavior changed: - [x] No. - [ ] Yes. - Does this need documentation? - [x] No. - [ ] Yes. ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label --- be/src/exec/table_connector.cpp | 7 ++-- be/src/olap/push_handler.cpp | 2 +- .../olap/rowset/segment_v2/column_reader.cpp | 13 +++--- .../segment_v2/hierarchical_data_reader.h | 4 +- .../rowset/segment_v2/segment_iterator.cpp | 5 +-- be/src/olap/schema_change.cpp | 14 ++++--- be/src/pipeline/exec/hashjoin_build_sink.cpp | 2 +- .../pipeline/exec/hashjoin_probe_operator.cpp | 4 +- be/src/pipeline/exec/join_probe_operator.cpp | 2 +- be/src/pipeline/exec/olap_scan_operator.cpp | 5 +-- be/src/pipeline/exec/scan_operator.cpp | 8 ++-- .../aggregate_function_window.h | 2 +- be/src/vec/columns/column_object.cpp | 6 +-- be/src/vec/common/cow.h | 6 +-- .../vec/exec/format/column_type_convert.cpp | 2 +- be/src/vec/exec/format/csv/csv_reader.cpp | 4 +- .../vec/exec/format/json/new_json_reader.cpp | 23 +++++----- .../format/parquet/parquet_column_convert.cpp | 2 +- be/src/vec/exec/format/wal/wal_reader.cpp | 2 +- be/src/vec/exec/jni_connector.cpp | 2 +- be/src/vec/exec/scan/vfile_scanner.cpp | 2 +- be/src/vec/exec/scan/vmeta_scanner.cpp | 2 +- be/src/vec/exprs/vcompound_pred.h | 6 ++- .../array/function_array_cum_sum.cpp | 2 +- .../functions/array/function_array_distinct.h | 4 +- .../array/function_array_enumerate.cpp | 5 ++- .../array/function_array_enumerate_uniq.cpp | 7 ++-- .../vec/functions/array/function_array_join.h | 7 ++-- .../vec/functions/array/function_array_map.h | 2 +- .../functions/array/function_array_pop.cpp | 2 +- .../functions/array/function_array_range.cpp | 2 +- .../functions/array/function_array_remove.h | 8 ++-- .../functions/array/function_array_reverse.h | 2 +- .../vec/functions/array/function_array_set.h | 4 +- .../functions/array/function_array_slice.h | 2 +- .../functions/array/function_array_sortby.cpp | 6 +-- .../functions/array/function_arrays_overlap.h | 8 ++-- .../functions/comparison_equal_for_null.cpp | 10 +++-- be/src/vec/functions/function_agg_state.h | 2 +- .../functions/function_binary_arithmetic.h | 28 +++++++------ be/src/vec/functions/function_bitmap.cpp | 2 +- .../functions/function_bitmap_variadic.cpp | 2 +- be/src/vec/functions/function_case.h | 2 +- be/src/vec/functions/function_cast.h | 2 +- be/src/vec/functions/function_collection_in.h | 5 ++- .../function_date_or_datetime_computation.h | 4 +- be/src/vec/functions/function_ip.h | 6 ++- be/src/vec/functions/function_jsonb.cpp | 26 +++++++----- be/src/vec/functions/function_nullables.cpp | 5 ++- .../vec/functions/function_quantile_state.cpp | 4 +- be/src/vec/functions/function_string.h | 4 +- be/src/vec/functions/function_tokenize.cpp | 2 +- be/src/vec/functions/functions_geo.cpp | 16 +++---- be/src/vec/functions/functions_logical.cpp | 13 +++--- be/src/vec/functions/in.h | 2 +- be/src/vec/functions/least_greast.cpp | 2 +- be/src/vec/functions/round.h | 1 + be/src/vec/sink/vtablet_block_convertor.cpp | 5 ++- .../writer/iceberg/partition_transformers.h | 42 +++++++++---------- .../writer/iceberg/viceberg_table_writer.cpp | 2 +- be/src/vec/utils/util.hpp | 2 +- be/test/vec/columns/common_column_test.h | 6 +-- be/test/vec/data_types/from_string_test.cpp | 12 +++--- .../serde/data_type_serde_text_test.cpp | 12 +++--- be/test/vec/function/function_test_util.h | 2 +- be/test/vec/olap/char_type_padding_test.cpp | 6 +-- 66 files changed, 218 insertions(+), 195 deletions(-) diff --git a/be/src/exec/table_connector.cpp b/be/src/exec/table_connector.cpp index fa5181f5fecb2de..549fa6aae90fd80 100644 --- a/be/src/exec/table_connector.cpp +++ b/be/src/exec/table_connector.cpp @@ -118,16 +118,17 @@ Status TableConnector::convert_column_data(const vectorized::ColumnPtr& column_p fmt::format_to(_insert_stmt_buffer, "\"{}\"", str); } }; - const vectorized::IColumn* column = column_ptr; + const vectorized::IColumn* column = column_ptr.get(); if (type_ptr->is_nullable()) { - auto nullable_column = assert_cast(column_ptr.get()); + const auto* nullable_column = + assert_cast(column_ptr.get()); if (nullable_column->is_null_at(row)) { fmt::format_to(_insert_stmt_buffer, "{}", "NULL"); return Status::OK(); } column = nullable_column->get_nested_column_ptr().get(); } else { - column = column_ptr; + column = column_ptr.get(); } auto [item, size] = column->get_data_at(row); switch (type.type) { diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 56d167459f5be7e..eecb322384b6980 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -518,7 +518,7 @@ Status PushBrokerReader::_convert_to_output_block(vectorized::Block* block) { column_ptr = _src_block.get_by_position(result_column_id).column; // column_ptr maybe a ColumnConst, convert it to a normal column column_ptr = column_ptr->convert_to_full_column_if_const(); - DCHECK(column_ptr != nullptr); + DCHECK(column_ptr); // because of src_slot_desc is always be nullable, so the column_ptr after do dest_expr // is likely to be nullable diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 78c415530cd0291..1abb60e58507ec6 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -1267,8 +1267,8 @@ Status FileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& d DCHECK_EQ(this_run, num_rows); } else { *has_null = true; - auto* null_col = - vectorized::check_and_get_column(dst); + const auto* null_col = + vectorized::check_and_get_column(dst.get()); if (null_col != nullptr) { const_cast(null_col)->insert_null_elements( this_run); @@ -1328,8 +1328,9 @@ Status FileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t co auto origin_index = _page.data_decoder->current_index(); if (this_read_count > 0) { if (is_null) { - auto* null_col = - vectorized::check_and_get_column(dst); + const auto* null_col = + vectorized::check_and_get_column( + dst.get()); if (UNLIKELY(null_col == nullptr)) { return Status::InternalError("unexpected column type in column reader"); } @@ -1710,9 +1711,9 @@ Status DefaultNestedColumnIterator::next_batch(size_t* n, vectorized::MutableCol static void fill_nested_with_defaults(vectorized::MutableColumnPtr& dst, vectorized::MutableColumnPtr& sibling_column, size_t nrows) { const auto* sibling_array = vectorized::check_and_get_column( - remove_nullable(sibling_column->get_ptr())); + remove_nullable(sibling_column->get_ptr()).get()); const auto* dst_array = vectorized::check_and_get_column( - remove_nullable(dst->get_ptr())); + remove_nullable(dst->get_ptr()).get()); if (!dst_array || !sibling_array) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Expected array column, but met %s and %s", dst->get_name(), diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h index f85038713cadb75..bd5de7484740a8a 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h @@ -165,8 +165,8 @@ class HierarchicalDataReader : public ColumnIterator { // will type the type of ColumnObject::NESTED_TYPE, whih is Nullable>. for (auto& entry : nested_subcolumns) { MutableColumnPtr nested_object = ColumnObject::create(true, false); - const auto* base_array = - check_and_get_column(remove_nullable(entry.second[0].column)); + const auto* base_array = check_and_get_column( + remove_nullable(entry.second[0].column).get()); MutableColumnPtr offset = base_array->get_offsets_ptr()->assume_mutable(); auto* nested_object_ptr = assert_cast(nested_object.get()); // flatten nested arrays diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 5f50ffeea2d8f0c..366c6d3ce21a76a 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1955,8 +1955,7 @@ Status SegmentIterator::next_batch(vectorized::Block* block) { Status SegmentIterator::_convert_to_expected_type(const std::vector& col_ids) { for (ColumnId i : col_ids) { - if (_current_return_columns[i] == nullptr || _converted_column_ids[i] || - _is_pred_column[i]) { + if (!_current_return_columns[i] || _converted_column_ids[i] || _is_pred_column[i]) { continue; } if (!_segment->same_with_storage_type( @@ -1999,7 +1998,7 @@ Status SegmentIterator::copy_column_data_by_selector(vectorized::IColumn* input_ return Status::RuntimeError("copy_column_data_by_selector nullable mismatch"); } - return input_col_ptr->filter_by_selector(sel_rowid_idx, select_size, output_col); + return input_col_ptr->filter_by_selector(sel_rowid_idx, select_size, output_col.get()); } void SegmentIterator::_clear_iterators() { diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 7f947612eed4ac2..658ff05b67f0d66 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -337,7 +337,7 @@ Status BlockChanger::change_block(vectorized::Block* ref_block, int result_tmp_column_idx = -1; RETURN_IF_ERROR(ctx->execute(ref_block, &result_tmp_column_idx)); auto& result_tmp_column_def = ref_block->get_by_position(result_tmp_column_idx); - if (result_tmp_column_def.column == nullptr) { + if (!result_tmp_column_def.column) { return Status::Error( "result column={} is nullptr, input expr={}", result_tmp_column_def.name, apache::thrift::ThriftDebugString(*expr)); @@ -430,7 +430,7 @@ Status BlockChanger::_check_cast_valid(vectorized::ColumnPtr input_column, if (input_column->is_nullable() != output_column->is_nullable()) { if (input_column->is_nullable()) { const auto* ref_null_map = - vectorized::check_and_get_column(input_column) + vectorized::check_and_get_column(input_column.get()) ->get_null_map_column() .get_data() .data(); @@ -446,10 +446,12 @@ Status BlockChanger::_check_cast_valid(vectorized::ColumnPtr input_column, } } else { const auto& null_map_column = - vectorized::check_and_get_column(output_column) + vectorized::check_and_get_column( + output_column.get()) ->get_null_map_column(); const auto& nested_column = - vectorized::check_and_get_column(output_column) + vectorized::check_and_get_column( + output_column.get()) ->get_nested_column(); const auto* new_null_map = null_map_column.get_data().data(); @@ -481,12 +483,12 @@ Status BlockChanger::_check_cast_valid(vectorized::ColumnPtr input_column, if (input_column->is_nullable() && output_column->is_nullable()) { const auto* ref_null_map = - vectorized::check_and_get_column(input_column) + vectorized::check_and_get_column(input_column.get()) ->get_null_map_column() .get_data() .data(); const auto* new_null_map = - vectorized::check_and_get_column(output_column) + vectorized::check_and_get_column(output_column.get()) ->get_null_map_column() .get_data() .data(); diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp index 47560875b51252b..b71feff3ed44604 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.cpp +++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp @@ -254,7 +254,7 @@ Status HashJoinBuildSinkLocalState::_extract_join_column( // update nulllmap and split nested out of ColumnNullable when serialize_null_into_key is false and column is nullable const auto& col_nested = nullable->get_nested_column(); const auto& col_nullmap = nullable->get_null_map_data(); - DCHECK(null_map != nullptr); + DCHECK(null_map); vectorized::VectorizedUtils::update_null_map(null_map->get_data(), col_nullmap); raw_ptrs[i] = &col_nested; } else { diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.cpp b/be/src/pipeline/exec/hashjoin_probe_operator.cpp index 0db525f1bf52226..37ccd6206f3e0f7 100644 --- a/be/src/pipeline/exec/hashjoin_probe_operator.cpp +++ b/be/src/pipeline/exec/hashjoin_probe_operator.cpp @@ -371,7 +371,7 @@ Status HashJoinProbeLocalState::_extract_join_column(vectorized::Block& block, _need_null_map_for_probe = _need_probe_null_map(block, res_col_ids); } if (_need_null_map_for_probe) { - if (_null_map_column == nullptr) { + if (!_null_map_column) { _null_map_column = vectorized::ColumnUInt8::create(); } _null_map_column->get_data().assign(block.rows(), (uint8_t)0); @@ -389,7 +389,7 @@ Status HashJoinProbeLocalState::_extract_join_column(vectorized::Block& block, // update nulllmap and split nested out of ColumnNullable when serialize_null_into_key is false and column is nullable const auto& col_nested = nullable->get_nested_column(); const auto& col_nullmap = nullable->get_null_map_data(); - DCHECK(_null_map_column != nullptr); + DCHECK(_null_map_column); vectorized::VectorizedUtils::update_null_map(_null_map_column->get_data(), col_nullmap); _probe_columns[i] = &col_nested; } else { diff --git a/be/src/pipeline/exec/join_probe_operator.cpp b/be/src/pipeline/exec/join_probe_operator.cpp index 11b5b29c8b556b1..9a50d76a48ce8c2 100644 --- a/be/src/pipeline/exec/join_probe_operator.cpp +++ b/be/src/pipeline/exec/join_probe_operator.cpp @@ -150,7 +150,7 @@ Status JoinProbeLocalState::_build_output_block( /// TODO: maybe need a method to check if a column need to be converted to full /// column. if (is_column_const(*origin_column) || - check_column(origin_column)) { + check_column(origin_column.get())) { auto column_ptr = origin_column->convert_to_full_column_if_const(); insert_column_datas(mutable_columns[i], column_ptr, rows); } else { diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp b/be/src/pipeline/exec/olap_scan_operator.cpp index fa91caffa8ebc47..b1ab62743323c63 100644 --- a/be/src/pipeline/exec/olap_scan_operator.cpp +++ b/be/src/pipeline/exec/olap_scan_operator.cpp @@ -246,9 +246,8 @@ Status OlapScanLocalState::_should_push_down_function_filter(vectorized::Vectori DCHECK(children[1 - i]->type().is_string_type()); std::shared_ptr const_col_wrapper; RETURN_IF_ERROR(children[1 - i]->get_const_col(expr_ctx, &const_col_wrapper)); - if (const vectorized::ColumnConst* const_column = - check_and_get_column( - const_col_wrapper->column_ptr)) { + if (const auto* const_column = check_and_get_column( + const_col_wrapper->column_ptr.get())) { *constant_str = const_column->get_data_at(0); } else { pdt = PushDownType::UNACCEPTABLE; diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp index ae4396b22c7eec0..a73e1a6db7ccb74 100644 --- a/be/src/pipeline/exec/scan_operator.cpp +++ b/be/src/pipeline/exec/scan_operator.cpp @@ -520,8 +520,8 @@ Status ScanLocalState::_eval_const_conjuncts(vectorized::VExpr* vexpr, if (vexpr->is_constant()) { std::shared_ptr const_col_wrapper; RETURN_IF_ERROR(vexpr->get_const_col(expr_ctx, &const_col_wrapper)); - if (const auto* const_column = - check_and_get_column(const_col_wrapper->column_ptr)) { + if (const auto* const_column = check_and_get_column( + const_col_wrapper->column_ptr.get())) { constant_val = const_cast(const_column->get_data_at(0).data); if (constant_val == nullptr || !*reinterpret_cast(constant_val)) { *pdt = PushDownType::ACCEPTABLE; @@ -530,7 +530,7 @@ Status ScanLocalState::_eval_const_conjuncts(vectorized::VExpr* vexpr, } } else if (const auto* bool_column = check_and_get_column>( - const_col_wrapper->column_ptr)) { + const_col_wrapper->column_ptr.get())) { // TODO: If `vexpr->is_constant()` is true, a const column is expected here. // But now we still don't cover all predicates for const expression. // For example, for query `SELECT col FROM tbl WHERE 'PROMOTION' LIKE 'AAA%'`, @@ -690,7 +690,7 @@ Status ScanLocalState::_should_push_down_binary_predicate( std::shared_ptr const_col_wrapper; RETURN_IF_ERROR(children[1 - i]->get_const_col(expr_ctx, &const_col_wrapper)); if (const auto* const_column = check_and_get_column( - const_col_wrapper->column_ptr)) { + const_col_wrapper->column_ptr.get())) { *slot_ref_child = i; *constant_val = const_column->get_data_at(0); } else { diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.h b/be/src/vec/aggregate_functions/aggregate_function_window.h index 0cef4c82d3dbfeb..5d449318b7d2f5f 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window.h @@ -402,7 +402,7 @@ struct LeadLagData { if (nullable_column->is_null_at(0)) { _default_value.reset(); } else { - _default_value.set_value(nullable_column->get_nested_column_ptr(), 0); + _default_value.set_value(nullable_column->get_nested_column_ptr().get(), 0); } } else { _default_value.set_value(column, 0); diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 3d6a3e44436d29b..4300725cacaf9ca 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -1484,7 +1484,7 @@ Status ColumnObject::serialize_one_row_to_json_format(size_t row, rapidjson::Str #endif for (const auto& subcolumn : subcolumns) { RETURN_IF_ERROR(find_and_set_leave_value( - subcolumn->data.get_finalized_column_ptr(), subcolumn->path, + subcolumn->data.get_finalized_column_ptr().get(), subcolumn->path, subcolumn->data.get_least_common_type_serde(), subcolumn->data.get_least_common_type(), subcolumn->data.least_common_type.get_base_type_id(), root, @@ -1558,7 +1558,7 @@ Status ColumnObject::merge_sparse_to_root_column() { continue; } bool succ = find_and_set_leave_value( - column, subcolumn->path, subcolumn->data.get_least_common_type_serde(), + column.get(), subcolumn->path, subcolumn->data.get_least_common_type_serde(), subcolumn->data.get_least_common_type(), subcolumn->data.least_common_type.get_base_type_id(), root, doc_structure->GetAllocator(), mem_pool, i); @@ -1705,7 +1705,7 @@ bool ColumnObject::empty() const { } ColumnPtr get_base_column_of_array(const ColumnPtr& column) { - if (const auto* column_array = check_and_get_column(column)) { + if (const auto* column_array = check_and_get_column(column.get())) { return column_array->get_data_ptr(); } return column; diff --git a/be/src/vec/common/cow.h b/be/src/vec/common/cow.h index 95df7694f227d9e..4970f649d32e855 100644 --- a/be/src/vec/common/cow.h +++ b/be/src/vec/common/cow.h @@ -203,8 +203,6 @@ class COW { operator bool() const { return t != nullptr; } - operator T*() const { return t; } - private: T* t = nullptr; }; @@ -346,8 +344,8 @@ class COW { operator const immutable_ptr&() const { return value; } operator immutable_ptr&() { return value; } - operator bool() const { return value != nullptr; } - bool operator!() const { return value == nullptr; } + operator bool() const { return value.get() != nullptr; } + bool operator!() const { return value.get() == nullptr; } bool operator==(const chameleon_ptr& rhs) const { return value == rhs.value; } bool operator!=(const chameleon_ptr& rhs) const { return value != rhs.value; } diff --git a/be/src/vec/exec/format/column_type_convert.cpp b/be/src/vec/exec/format/column_type_convert.cpp index a2c226c91d6799b..0442158b690c398 100644 --- a/be/src/vec/exec/format/column_type_convert.cpp +++ b/be/src/vec/exec/format/column_type_convert.cpp @@ -99,7 +99,7 @@ ColumnPtr ColumnTypeConverter::get_column(const TypeDescriptor& src_type, Column return dst_column; } - if (_cached_src_column == nullptr) { + if (!_cached_src_column) { _cached_src_type = DataTypeFactory::instance().create_data_type(src_type, dst_type->is_nullable()); _cached_src_column = diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp b/be/src/vec/exec/format/csv/csv_reader.cpp index b27bb050dc6e0c2..d4a2dcfc7f3503c 100644 --- a/be/src/vec/exec/format/csv/csv_reader.cpp +++ b/be/src/vec/exec/format/csv/csv_reader.cpp @@ -657,7 +657,7 @@ Status CsvReader::_fill_dest_columns(const Slice& line, Block* block, col_idx < _split_values.size() ? _split_values[col_idx] : _s_null_slice; Slice slice {value.data, value.size}; - IColumn* col_ptr = columns[i]; + IColumn* col_ptr = columns[i].get(); if (!_is_load) { col_ptr = const_cast( block->get_by_position(_file_slot_idx_map[i]).column.get()); @@ -700,7 +700,7 @@ Status CsvReader::_fill_dest_columns(const Slice& line, Block* block, Status CsvReader::_fill_empty_line(Block* block, std::vector& columns, size_t* rows) { for (int i = 0; i < _file_slot_descs.size(); ++i) { - IColumn* col_ptr = columns[i]; + IColumn* col_ptr = columns[i].get(); if (!_is_load) { col_ptr = const_cast( block->get_by_position(_file_slot_idx_map[i]).column.get()); diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp index d79e86520741cdb..adb22d588f53d38 100644 --- a/be/src/vec/exec/format/json/new_json_reader.cpp +++ b/be/src/vec/exec/format/json/new_json_reader.cpp @@ -886,7 +886,7 @@ Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator if (column_ptr->is_nullable()) { nullable_column = reinterpret_cast(column_ptr); - data_column_ptr = nullable_column->get_nested_column().get_ptr(); + data_column_ptr = nullable_column->get_nested_column().get_ptr().get(); data_serde = serde->get_nested_serdes()[0]; if (value_is_null) { @@ -1010,7 +1010,8 @@ Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator const auto& sub_col_type = type_desc.children[sub_col_idx]; RETURN_IF_ERROR(_write_data_to_column( - sub_value, sub_col_type, struct_column_ptr->get_column(sub_col_idx).get_ptr(), + sub_value, sub_col_type, + struct_column_ptr->get_column(sub_col_idx).get_ptr().get(), column_name + "." + type_desc.field_names[sub_col_idx], sub_serdes[sub_col_idx], valid)); } @@ -1026,12 +1027,12 @@ Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator for (const auto& member_value : object_value) { RETURN_IF_ERROR(_write_data_to_column( &member_value.name, type_desc.children[0], - map_column_ptr->get_keys_ptr()->assume_mutable()->get_ptr(), + map_column_ptr->get_keys_ptr()->assume_mutable()->get_ptr().get(), column_name + ".key", sub_serdes[0], valid)); RETURN_IF_ERROR(_write_data_to_column( &member_value.value, type_desc.children[1], - map_column_ptr->get_values_ptr()->assume_mutable()->get_ptr(), + map_column_ptr->get_values_ptr()->assume_mutable()->get_ptr().get(), column_name + ".value", sub_serdes[1], valid)); } @@ -1048,7 +1049,7 @@ Status NewJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator for (const auto& sub_value : array_value) { RETURN_IF_ERROR(_write_data_to_column(&sub_value, type_desc.children[0], - array_column_ptr->get_data().get_ptr(), + array_column_ptr->get_data().get_ptr().get(), column_name + ".element", sub_serdes[0], valid)); } auto& offsets = array_column_ptr->get_offsets(); @@ -1653,7 +1654,7 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& if (column_ptr->is_nullable()) { nullable_column = reinterpret_cast(column_ptr); - data_column_ptr = nullable_column->get_nested_column().get_ptr(); + data_column_ptr = nullable_column->get_nested_column().get_ptr().get(); data_serde = serde->get_nested_serdes()[0]; // kNullType will put 1 into the Null map, so there is no need to push 0 for kNullType. @@ -1727,7 +1728,7 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& const auto& sub_col_type = type_desc.children[sub_column_idx]; RETURN_IF_ERROR(_simdjson_write_data_to_column( - sub.value(), sub_col_type, sub_column_ptr, column_name + "." + sub_key, + sub.value(), sub_col_type, sub_column_ptr.get(), column_name + "." + sub_key, sub_serdes[sub_column_idx], valid)); } @@ -1768,7 +1769,7 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& auto nullable_column = static_cast(column_ptr); nullable_column->get_null_map_data().push_back(0); - data_column_ptr = nullable_column->get_nested_column().get_ptr(); + data_column_ptr = nullable_column->get_nested_column().get_ptr().get(); data_serde = serde->get_nested_serdes()[0]; } Slice slice(key_view.data(), key_view.length()); @@ -1779,13 +1780,13 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& }; RETURN_IF_ERROR(f(member_value.unescaped_key(), type_desc.children[0], - map_column_ptr->get_keys_ptr()->assume_mutable()->get_ptr(), + map_column_ptr->get_keys_ptr()->assume_mutable()->get_ptr().get(), sub_serdes[0], _serde_options, valid)); simdjson::ondemand::value field_value = member_value.value(); RETURN_IF_ERROR(_simdjson_write_data_to_column( field_value, type_desc.children[1], - map_column_ptr->get_values_ptr()->assume_mutable()->get_ptr(), + map_column_ptr->get_values_ptr()->assume_mutable()->get_ptr().get(), column_name + ".value", sub_serdes[1], valid)); field_count++; } @@ -1807,7 +1808,7 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& int field_count = 0; for (simdjson::ondemand::value sub_value : array_value) { RETURN_IF_ERROR(_simdjson_write_data_to_column( - sub_value, type_desc.children[0], array_column_ptr->get_data().get_ptr(), + sub_value, type_desc.children[0], array_column_ptr->get_data().get_ptr().get(), column_name + ".element", sub_serdes[0], valid)); field_count++; } diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp index 0a5ef2913dd940b..49636d809aa0d8e 100644 --- a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp +++ b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp @@ -79,7 +79,7 @@ ColumnPtr PhysicalToLogicalConverter::get_physical_column(tparquet::Type::type s return dst_logical_column; } - if (_cached_src_physical_column == nullptr) { + if (!_cached_src_physical_column) { switch (src_physical_type) { case tparquet::Type::type::BOOLEAN: _cached_src_physical_type = std::make_shared(); diff --git a/be/src/vec/exec/format/wal/wal_reader.cpp b/be/src/vec/exec/format/wal/wal_reader.cpp index 22e6928216e1e83..a9a209b95a4ce12 100644 --- a/be/src/vec/exec/format/wal/wal_reader.cpp +++ b/be/src/vec/exec/format/wal/wal_reader.cpp @@ -92,7 +92,7 @@ Status WalReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { pos, src_block.columns()); } vectorized::ColumnPtr column_ptr = src_block.get_by_position(pos).column; - if (column_ptr != nullptr && slot_desc->is_nullable()) { + if (!column_ptr && slot_desc->is_nullable()) { column_ptr = make_nullable(column_ptr); } dst_block.insert(index, vectorized::ColumnWithTypeAndName( diff --git a/be/src/vec/exec/jni_connector.cpp b/be/src/vec/exec/jni_connector.cpp index 11a58e81c98d892..4b5bb72e57bfbd0 100644 --- a/be/src/vec/exec/jni_connector.cpp +++ b/be/src/vec/exec/jni_connector.cpp @@ -241,7 +241,7 @@ Status JniConnector::fill_block(Block* block, const ColumnNumbers& arguments, lo TableMetaAddress table_meta(table_address); long num_rows = table_meta.next_meta_as_long(); for (size_t i : arguments) { - if (block->get_by_position(i).column == nullptr) { + if (block->get_by_position(i).column.get() == nullptr) { auto return_type = block->get_data_type(i); bool result_nullable = return_type->is_nullable(); ColumnUInt8::MutablePtr null_col = nullptr; diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 93a22d1a94bf522..15b681f597975e5 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -596,7 +596,7 @@ Status VFileScanner::_convert_to_output_block(Block* block) { column_ptr = _src_block_ptr->get_by_position(result_column_id).column; // column_ptr maybe a ColumnConst, convert it to a normal column column_ptr = column_ptr->convert_to_full_column_if_const(); - DCHECK(column_ptr != nullptr); + DCHECK(column_ptr); // because of src_slot_desc is always be nullable, so the column_ptr after do dest_expr // is likely to be nullable diff --git a/be/src/vec/exec/scan/vmeta_scanner.cpp b/be/src/vec/exec/scan/vmeta_scanner.cpp index 289930b16bce856..db0256728741c78 100644 --- a/be/src/vec/exec/scan/vmeta_scanner.cpp +++ b/be/src/vec/exec/scan/vmeta_scanner.cpp @@ -148,7 +148,7 @@ Status VMetaScanner::_fill_block_with_remote_data(const std::vectoris_nullable()) { auto& null_col = reinterpret_cast(*col_ptr); null_col.get_null_map_data().push_back(0); - col_ptr = null_col.get_nested_column_ptr(); + col_ptr = null_col.get_nested_column_ptr().get(); } switch (slot_desc->type().type) { case TYPE_BOOLEAN: { diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index ff7649600b4c7fe..e3c02f554b3d36d 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -272,8 +272,10 @@ class VCompoundPred : public VectorizedFnCall { auto col_res = ColumnUInt8::create(size); auto col_nulls = ColumnUInt8::create(size); - auto* __restrict res_datas = assert_cast(col_res)->get_data().data(); - auto* __restrict res_nulls = assert_cast(col_nulls)->get_data().data(); + auto* __restrict res_datas = + assert_cast(col_res.get())->get_data().data(); + auto* __restrict res_nulls = + assert_cast(col_nulls.get())->get_data().data(); ColumnPtr temp_null_map = nullptr; // maybe both children are nullable / or one of children is nullable auto* __restrict lhs_null_map_tmp = create_null_map_column(temp_null_map, lhs_null_map); diff --git a/be/src/vec/functions/array/function_array_cum_sum.cpp b/be/src/vec/functions/array/function_array_cum_sum.cpp index 2f93a2a83b1a897..5fba7d4a619bd53 100644 --- a/be/src/vec/functions/array/function_array_cum_sum.cpp +++ b/be/src/vec/functions/array/function_array_cum_sum.cpp @@ -118,7 +118,7 @@ class FunctionArrayCumSum : public IFunction { // get null map const ColumnNullable* src_nested_nullable_col = check_and_get_column(*src_nested_column); - src_nested_column = src_nested_nullable_col->get_nested_column_ptr(); + src_nested_column = src_nested_nullable_col->get_nested_column_ptr().get(); const NullMapType& src_null_map = src_nested_nullable_col->get_null_map_column().get_data(); ColumnPtr res_nested_ptr; diff --git a/be/src/vec/functions/array/function_array_distinct.h b/be/src/vec/functions/array/function_array_distinct.h index 4b7e3e6f035d48f..4d37f7cbcf71333 100644 --- a/be/src/vec/functions/array/function_array_distinct.h +++ b/be/src/vec/functions/array/function_array_distinct.h @@ -102,14 +102,14 @@ class FunctionArrayDistinct : public IFunction { if (src_nested_column->is_nullable()) { const auto* src_nested_nullable_col = check_and_get_column(*src_nested_column); - src_nested_column = src_nested_nullable_col->get_nested_column_ptr(); + src_nested_column = src_nested_nullable_col->get_nested_column_ptr().get(); src_null_map = &src_nested_nullable_col->get_null_map_column().get_data(); } NullMapType* dest_null_map = nullptr; if (dest_nested_column->is_nullable()) { auto* dest_nested_nullable_col = reinterpret_cast(dest_nested_column); - dest_nested_column = dest_nested_nullable_col->get_nested_column_ptr(); + dest_nested_column = dest_nested_nullable_col->get_nested_column_ptr().get(); dest_null_map = &dest_nested_nullable_col->get_null_map_column().get_data(); } diff --git a/be/src/vec/functions/array/function_array_enumerate.cpp b/be/src/vec/functions/array/function_array_enumerate.cpp index 0e8bca3e5cd3b1b..3846addb83bb55a 100644 --- a/be/src/vec/functions/array/function_array_enumerate.cpp +++ b/be/src/vec/functions/array/function_array_enumerate.cpp @@ -83,7 +83,7 @@ class FunctionArrayEnumerate : public IFunction { auto left_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); const ColumnArray* array = - check_and_get_column(remove_nullable(left_column->get_ptr())); + check_and_get_column(remove_nullable(left_column->get_ptr()).get()); if (!array) { return Status::RuntimeError( fmt::format("Illegal column {}, of first argument of function {}", @@ -107,7 +107,8 @@ class FunctionArrayEnumerate : public IFunction { ColumnPtr res_column = ColumnArray::create(std::move(nested_column), array->get_offsets_ptr()); if (block.get_by_position(arguments[0]).column->is_nullable()) { - const ColumnNullable* nullable = check_and_get_column(left_column); + const ColumnNullable* nullable = + check_and_get_column(left_column.get()); res_column = ColumnNullable::create( res_column, nullable->get_null_map_column().clone_resized(nullable->size())); } diff --git a/be/src/vec/functions/array/function_array_enumerate_uniq.cpp b/be/src/vec/functions/array/function_array_enumerate_uniq.cpp index 21d6ab40007b6e6..bdee406655f1966 100644 --- a/be/src/vec/functions/array/function_array_enumerate_uniq.cpp +++ b/be/src/vec/functions/array/function_array_enumerate_uniq.cpp @@ -128,7 +128,7 @@ class FunctionArrayEnumerateUniq : public IFunction { block.get_by_position(arguments[i]).column->convert_to_full_column_if_const()); ColumnPtr& cur_column = src_columns[i]; const ColumnArray* array = - check_and_get_column(remove_nullable(cur_column->get_ptr())); + check_and_get_column(remove_nullable(cur_column->get_ptr()).get()); if (!array) { return Status::RuntimeError( fmt::format("Illegal column {}, of first argument of function {}", @@ -151,7 +151,7 @@ class FunctionArrayEnumerateUniq : public IFunction { const NullMapType* null_map = nullptr; if (arguments.size() == 1 && data_columns[0]->is_nullable()) { const ColumnNullable* nullable = check_and_get_column(*data_columns[0]); - data_columns[0] = nullable->get_nested_column_ptr(); + data_columns[0] = nullable->get_nested_column_ptr().get(); null_map = &nullable->get_null_map_column().get_data(); } @@ -219,7 +219,8 @@ class FunctionArrayEnumerateUniq : public IFunction { if (arguments.size() == 1 && block.get_by_position(arguments[0]).column->is_nullable()) { auto left_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - const ColumnNullable* nullable = check_and_get_column(left_column); + const ColumnNullable* nullable = + check_and_get_column(left_column.get()); res_column = ColumnNullable::create( res_column, nullable->get_null_map_column().clone_resized(nullable->size())); } diff --git a/be/src/vec/functions/array/function_array_join.h b/be/src/vec/functions/array/function_array_join.h index 957b2288fb746ae..29521c36111824e 100644 --- a/be/src/vec/functions/array/function_array_join.h +++ b/be/src/vec/functions/array/function_array_join.h @@ -78,10 +78,11 @@ struct ArrayJoinImpl { auto nested_type = data_type_array->get_nested_type(); auto dest_column_ptr = ColumnString::create(); - DCHECK(dest_column_ptr != nullptr); + DCHECK(dest_column_ptr); - auto res_val = _execute_by_type(*src.nested_col, *src.offsets_ptr, src.nested_nullmap_data, - sep_str, null_replace_str, nested_type, dest_column_ptr); + auto res_val = + _execute_by_type(*src.nested_col, *src.offsets_ptr, src.nested_nullmap_data, + sep_str, null_replace_str, nested_type, dest_column_ptr.get()); if (!res_val) { return Status::RuntimeError(fmt::format( "execute failed or unsupported types for function {}({},{},{})", "array_join", diff --git a/be/src/vec/functions/array/function_array_map.h b/be/src/vec/functions/array/function_array_map.h index fd4a2fc59f35485..5bfe723e232884f 100644 --- a/be/src/vec/functions/array/function_array_map.h +++ b/be/src/vec/functions/array/function_array_map.h @@ -165,7 +165,7 @@ struct ArrayMapImpl { static Status execute(ColumnPtr& res_ptr, ColumnArrayExecutionDatas datas, std::vector& col_const, size_t start_row, size_t end_row) { ColumnArrayMutableData dst = - create_mutable_data(datas[0].nested_col, datas[0].nested_nullmap_data); + create_mutable_data(datas[0].nested_col.get(), datas[0].nested_nullmap_data); if (_execute_internal(dst, datas, col_const, start_row, end_row)) { res_ptr = assemble_column_array(dst); return Status::OK(); diff --git a/be/src/vec/functions/array/function_array_pop.cpp b/be/src/vec/functions/array/function_array_pop.cpp index 2182699e0205b5b..1ddd767cfaf3ce8 100644 --- a/be/src/vec/functions/array/function_array_pop.cpp +++ b/be/src/vec/functions/array/function_array_pop.cpp @@ -75,7 +75,7 @@ class FunctionArrayPop : public IFunction { } // prepare dst array column bool is_nullable = src.nested_nullmap_data != nullptr; - ColumnArrayMutableData dst = create_mutable_data(src.nested_col, is_nullable); + ColumnArrayMutableData dst = create_mutable_data(src.nested_col.get(), is_nullable); dst.offsets_ptr->reserve(input_rows_count); // start from index depending on the PopType::start_offset auto offset_column = ColumnInt64::create(array_column->size(), PopType::start_offset); diff --git a/be/src/vec/functions/array/function_array_range.cpp b/be/src/vec/functions/array/function_array_range.cpp index 8a3de3754503ae7..0980587660b20ae 100644 --- a/be/src/vec/functions/array/function_array_range.cpp +++ b/be/src/vec/functions/array/function_array_range.cpp @@ -137,7 +137,7 @@ struct RangeImplUtil { IColumn* dest_nested_column = &dest_array_column_ptr->get_data(); ColumnNullable* dest_nested_nullable_col = reinterpret_cast(dest_nested_column); - dest_nested_column = dest_nested_nullable_col->get_nested_column_ptr(); + dest_nested_column = dest_nested_nullable_col->get_nested_column_ptr().get(); auto& dest_nested_null_map = dest_nested_nullable_col->get_null_map_column().get_data(); auto args_null_map = ColumnUInt8::create(input_rows_count, 0); diff --git a/be/src/vec/functions/array/function_array_remove.h b/be/src/vec/functions/array/function_array_remove.h index 197b032b0f8a4be..661a18170ed9dcb 100644 --- a/be/src/vec/functions/array/function_array_remove.h +++ b/be/src/vec/functions/array/function_array_remove.h @@ -107,13 +107,13 @@ class FunctionArrayRemove : public IFunction { auto dst_nested_column = ColumnNullable::create(nested_column.clone_empty(), ColumnUInt8::create()); array_nested_column = dst_nested_column->get_ptr(); - dst_column = dst_nested_column->get_nested_column_ptr(); + dst_column = dst_nested_column->get_nested_column_ptr().get(); dst_null_map = &dst_nested_column->get_null_map_data(); dst_null_map->reserve(offsets.back()); } else { auto dst_nested_column = nested_column.clone_empty(); array_nested_column = dst_nested_column->get_ptr(); - dst_column = dst_nested_column; + dst_column = dst_nested_column.get(); } auto& dst_data = reinterpret_cast(*dst_column).get_data(); @@ -179,13 +179,13 @@ class FunctionArrayRemove : public IFunction { auto dst_nested_column = ColumnNullable::create(nested_column.clone_empty(), ColumnUInt8::create()); array_nested_column = dst_nested_column->get_ptr(); - dst_column = dst_nested_column->get_nested_column_ptr(); + dst_column = dst_nested_column->get_nested_column_ptr().get(); dst_null_map = &dst_nested_column->get_null_map_data(); dst_null_map->reserve(offsets.back()); } else { auto dst_nested_column = nested_column.clone_empty(); array_nested_column = dst_nested_column->get_ptr(); - dst_column = dst_nested_column; + dst_column = dst_nested_column.get(); } auto& dst_offs = reinterpret_cast(*dst_column).get_offsets(); diff --git a/be/src/vec/functions/array/function_array_reverse.h b/be/src/vec/functions/array/function_array_reverse.h index 8567bc61158baba..9fc1623151801e5 100644 --- a/be/src/vec/functions/array/function_array_reverse.h +++ b/be/src/vec/functions/array/function_array_reverse.h @@ -40,7 +40,7 @@ struct ArrayReverseImpl { } bool is_nullable = src.nested_nullmap_data ? true : false; - ColumnArrayMutableData dst = create_mutable_data(src.nested_col, is_nullable); + ColumnArrayMutableData dst = create_mutable_data(src.nested_col.get(), is_nullable); dst.offsets_ptr->reserve(input_rows_count); auto res_val = _execute_internal(*src.nested_col, *src.offsets_ptr, *dst.nested_col, diff --git a/be/src/vec/functions/array/function_array_set.h b/be/src/vec/functions/array/function_array_set.h index 1ecf6d72531c73c..975268b1e61553e 100644 --- a/be/src/vec/functions/array/function_array_set.h +++ b/be/src/vec/functions/array/function_array_set.h @@ -142,9 +142,9 @@ struct ArraySetImpl { bool right_const) { ColumnArrayMutableData dst; if (left_data.nested_nullmap_data || right_data.nested_nullmap_data) { - dst = create_mutable_data(left_data.nested_col, true); + dst = create_mutable_data(left_data.nested_col.get(), true); } else { - dst = create_mutable_data(left_data.nested_col, false); + dst = create_mutable_data(left_data.nested_col.get(), false); } ColumnPtr res_column; if (left_const) { diff --git a/be/src/vec/functions/array/function_array_slice.h b/be/src/vec/functions/array/function_array_slice.h index 2acd1d3fbe1fd43..76082b266026ead 100644 --- a/be/src/vec/functions/array/function_array_slice.h +++ b/be/src/vec/functions/array/function_array_slice.h @@ -89,7 +89,7 @@ class FunctionArraySlice : public IFunction { } // prepare dst array column bool is_nullable = src.nested_nullmap_data ? true : false; - ColumnArrayMutableData dst = create_mutable_data(src.nested_col, is_nullable); + ColumnArrayMutableData dst = create_mutable_data(src.nested_col.get(), is_nullable); dst.offsets_ptr->reserve(input_rows_count); // execute slice_array(dst, src, *offset_column, length_column.get()); diff --git a/be/src/vec/functions/array/function_array_sortby.cpp b/be/src/vec/functions/array/function_array_sortby.cpp index 899bb40fba14236..fe6799aaa2e876f 100644 --- a/be/src/vec/functions/array/function_array_sortby.cpp +++ b/be/src/vec/functions/array/function_array_sortby.cpp @@ -95,13 +95,13 @@ class FunctionArraySortBy : public IFunction { src_column_array.get_offsets_column().clone_resized(input_rows_count); MutableColumnPtr result_nullmap = nullptr; const ColumnUInt8::Container* src_null_map_data = nullptr; - if (argument_nullmap[0] != nullptr) { + if (argument_nullmap[0]) { const auto& src_column_nullmap = assert_cast(*argument_nullmap[0]); result_nullmap = src_column_nullmap.clone_resized(input_rows_count); src_null_map_data = &(src_column_nullmap.get_data()); } const ColumnUInt8::Container* key_null_map_data = nullptr; - if (argument_nullmap[1] != nullptr) { + if (argument_nullmap[1]) { const auto& key_column_nullmap = assert_cast(*argument_nullmap[1]); key_null_map_data = &(key_column_nullmap.get_data()); } @@ -149,7 +149,7 @@ class FunctionArraySortBy : public IFunction { } } src_nested_nullable_column.append_data_by_selector(result_data_column, src_selector); - if (result_nullmap != nullptr) { + if (result_nullmap) { block.replace_by_position( result, ColumnNullable::create(ColumnArray::create(std::move(result_data_column), diff --git a/be/src/vec/functions/array/function_arrays_overlap.h b/be/src/vec/functions/array/function_arrays_overlap.h index dd993100885e3a0..8ac21bcd710f8dd 100644 --- a/be/src/vec/functions/array/function_arrays_overlap.h +++ b/be/src/vec/functions/array/function_arrays_overlap.h @@ -370,11 +370,11 @@ class FunctionArraysOverlap : public IFunction { ExecutorImpl impl; if (right_size < left_size) { - impl.insert_array(right_data.nested_col, right_start, right_size); - dst_data[row] = impl.find_any(left_data.nested_col, left_start, left_size); + impl.insert_array(right_data.nested_col.get(), right_start, right_size); + dst_data[row] = impl.find_any(left_data.nested_col.get(), left_start, left_size); } else { - impl.insert_array(left_data.nested_col, left_start, left_size); - dst_data[row] = impl.find_any(right_data.nested_col, right_start, right_size); + impl.insert_array(left_data.nested_col.get(), left_start, left_size); + dst_data[row] = impl.find_any(right_data.nested_col.get(), right_start, right_size); } } return Status::OK(); diff --git a/be/src/vec/functions/comparison_equal_for_null.cpp b/be/src/vec/functions/comparison_equal_for_null.cpp index 919f9ebed65a7c0..35719cf573008a2 100644 --- a/be/src/vec/functions/comparison_equal_for_null.cpp +++ b/be/src/vec/functions/comparison_equal_for_null.cpp @@ -139,18 +139,20 @@ class FunctionEqForNull : public IFunction { left_column = check_and_get_column( assert_cast( col_left.column.get()) - ->get_data_column_ptr()); + ->get_data_column_ptr() + .get()); } else { - left_column = check_and_get_column(col_left.column); + left_column = check_and_get_column(col_left.column.get()); } if (right_const) { right_column = check_and_get_column( assert_cast( col_right.column.get()) - ->get_data_column_ptr()); + ->get_data_column_ptr() + .get()); } else { - right_column = check_and_get_column(col_right.column); + right_column = check_and_get_column(col_right.column.get()); } bool left_nullable = left_column != nullptr; diff --git a/be/src/vec/functions/function_agg_state.h b/be/src/vec/functions/function_agg_state.h index f4b7aef23af220d..84a8d4f6f8b055c 100644 --- a/be/src/vec/functions/function_agg_state.h +++ b/be/src/vec/functions/function_agg_state.h @@ -82,7 +82,7 @@ class FunctionAggState : public IFunction { save_columns.push_back(column); } - agg_columns.push_back(column); + agg_columns.push_back(column.get()); } _agg_function->streaming_agg_serialize_to_column(agg_columns.data(), col, input_rows_count, &(context->get_arena())); diff --git a/be/src/vec/functions/function_binary_arithmetic.h b/be/src/vec/functions/function_binary_arithmetic.h index 4c0b8e7a0890dcd..a2757b38346247c 100644 --- a/be/src/vec/functions/function_binary_arithmetic.h +++ b/be/src/vec/functions/function_binary_arithmetic.h @@ -165,7 +165,7 @@ struct BinaryOperationImpl { static ColumnPtr adapt_normal_vector_constant(ColumnPtr column_left, B b) { auto column_left_ptr = - check_and_get_column(column_left); + check_and_get_column(column_left.get()); auto column_result = Base::ColumnVectorResult::create(column_left->size()); DCHECK(column_left_ptr != nullptr); @@ -182,7 +182,7 @@ struct BinaryOperationImpl { static ColumnPtr adapt_normal_constant_vector(A a, ColumnPtr column_right) { auto column_right_ptr = - check_and_get_column(column_right); + check_and_get_column(column_right.get()); auto column_result = Base::ColumnVectorResult::create(column_right->size()); DCHECK(column_right_ptr != nullptr); @@ -199,9 +199,9 @@ struct BinaryOperationImpl { static ColumnPtr adapt_normal_vector_vector(ColumnPtr column_left, ColumnPtr column_right) { auto column_left_ptr = - check_and_get_column(column_left); + check_and_get_column(column_left.get()); auto column_right_ptr = - check_and_get_column(column_right); + check_and_get_column(column_right.get()); auto column_result = Base::ColumnVectorResult::create(column_left->size()); DCHECK(column_left_ptr != nullptr && column_right_ptr != nullptr); @@ -447,7 +447,8 @@ struct DecimalBinaryOperation { auto type_result = assert_cast&, TypeCheckOnRelease::DISABLE>( *res_data_type); - auto column_left_ptr = check_and_get_column(column_left); + auto column_left_ptr = + check_and_get_column(column_left.get()); auto column_result = ColumnDecimal::create( column_left->size(), assert_cast&, TypeCheckOnRelease::DISABLE>( @@ -482,7 +483,8 @@ struct DecimalBinaryOperation { auto type_result = assert_cast&, TypeCheckOnRelease::DISABLE>( *res_data_type); - auto column_right_ptr = check_and_get_column(column_right); + auto column_right_ptr = + check_and_get_column(column_right.get()); auto column_result = ColumnDecimal::create( column_right->size(), assert_cast&, TypeCheckOnRelease::DISABLE>( @@ -515,8 +517,10 @@ struct DecimalBinaryOperation { const ResultType& max_result_number, const ResultType& scale_diff_multiplier, DataTypePtr res_data_type) { - auto column_left_ptr = check_and_get_column(column_left); - auto column_right_ptr = check_and_get_column(column_right); + auto column_left_ptr = + check_and_get_column(column_left.get()); + auto column_right_ptr = + check_and_get_column(column_right.get()); const auto& type_result = assert_cast&>(*res_data_type); auto column_result = @@ -847,8 +851,8 @@ struct ConstOrVectorAdapter { static ColumnPtr constant_constant(ColumnPtr column_left, ColumnPtr column_right, const LeftDataType& type_left, const RightDataType& type_right, DataTypePtr res_data_type) { - auto column_left_ptr = check_and_get_column(column_left); - auto column_right_ptr = check_and_get_column(column_right); + const auto* column_left_ptr = check_and_get_column(column_left.get()); + const auto* column_right_ptr = check_and_get_column(column_right.get()); DCHECK(column_left_ptr != nullptr && column_right_ptr != nullptr); ColumnPtr column_result = nullptr; @@ -875,7 +879,7 @@ struct ConstOrVectorAdapter { static ColumnPtr vector_constant(ColumnPtr column_left, ColumnPtr column_right, const LeftDataType& type_left, const RightDataType& type_right, DataTypePtr res_data_type) { - auto column_right_ptr = check_and_get_column(column_right); + const auto* column_right_ptr = check_and_get_column(column_right.get()); DCHECK(column_right_ptr != nullptr); if constexpr (result_is_decimal) { @@ -894,7 +898,7 @@ struct ConstOrVectorAdapter { static ColumnPtr constant_vector(ColumnPtr column_left, ColumnPtr column_right, const LeftDataType& type_left, const RightDataType& type_right, DataTypePtr res_data_type) { - auto column_left_ptr = check_and_get_column(column_left); + const auto* column_left_ptr = check_and_get_column(column_left.get()); DCHECK(column_left_ptr != nullptr); if constexpr (result_is_decimal) { diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index 92a5dba7b7a4d44..96cae50a9baf9a5 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -1211,7 +1211,7 @@ class FunctionBitmapToArray : public IFunction { IColumn* dest_nested_column = &dest_array_column_ptr->get_data(); ColumnNullable* dest_nested_nullable_col = reinterpret_cast(dest_nested_column); - dest_nested_column = dest_nested_nullable_col->get_nested_column_ptr(); + dest_nested_column = dest_nested_nullable_col->get_nested_column_ptr().get(); auto& dest_nested_null_map = dest_nested_nullable_col->get_null_map_column().get_data(); auto& arg_col = block.get_by_position(arguments[0]).column; diff --git a/be/src/vec/functions/function_bitmap_variadic.cpp b/be/src/vec/functions/function_bitmap_variadic.cpp index 6e1a103fdbd83bc..47a159e3c2f3918 100644 --- a/be/src/vec/functions/function_bitmap_variadic.cpp +++ b/be/src/vec/functions/function_bitmap_variadic.cpp @@ -247,7 +247,7 @@ class FunctionBitMapVariadic : public IFunction { vec_res.resize(input_rows_count); RETURN_IF_ERROR(Impl::vector_vector(argument_columns.data(), argument_size, - input_rows_count, vec_res, col_res_nulls)); + input_rows_count, vec_res, col_res_nulls.get())); if (!use_default_implementation_for_nulls() && result_info.type->is_nullable()) { block.replace_by_position( result, ColumnNullable::create(std::move(col_res), std::move(col_res_nulls))); diff --git a/be/src/vec/functions/function_case.h b/be/src/vec/functions/function_case.h index af44ea0d9b1acee..81f08f682ef0ef3 100644 --- a/be/src/vec/functions/function_case.h +++ b/be/src/vec/functions/function_case.h @@ -318,7 +318,7 @@ class FunctionCase : public IFunction { const uint8* __restrict then_idx, CaseWhenColumnHolder& column_holder) const { for (auto& then_ptr : column_holder.then_ptrs) { - then_ptr->reset(then_ptr.value()->convert_to_full_column_if_const()); + then_ptr->reset(then_ptr.value()->convert_to_full_column_if_const().get()); } size_t rows_count = column_holder.rows_count; diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index af9e9d19267073a..483e837de5dfd8a 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -770,7 +770,7 @@ struct ConvertImplGenericFromJsonb { continue; } ReadBuffer read_buffer((char*)(input_str.data()), input_str.size()); - Status st = data_type_to->from_string(read_buffer, col_to); + Status st = data_type_to->from_string(read_buffer, col_to.get()); // if parsing failed, will return null (*vec_null_map_to)[i] = !st.ok(); if (!st.ok()) { diff --git a/be/src/vec/functions/function_collection_in.h b/be/src/vec/functions/function_collection_in.h index ce58d63f44b6555..35299c7ea672be7 100644 --- a/be/src/vec/functions/function_collection_in.h +++ b/be/src/vec/functions/function_collection_in.h @@ -117,7 +117,8 @@ class FunctionCollectionIn : public IFunction { DCHECK(const_column_ptr != nullptr); const auto& [col, _] = unpack_if_const(const_column_ptr->column_ptr); if (col->is_nullable()) { - auto* null_col = vectorized::check_and_get_column(col); + const auto* null_col = + vectorized::check_and_get_column(col.get()); if (null_col->has_null()) { state->null_in_set = true; } else { @@ -161,7 +162,7 @@ class FunctionCollectionIn : public IFunction { if (materialized_column_not_null->is_nullable()) { materialized_column_not_null = assert_cast( vectorized::check_and_get_column( - materialized_column_not_null) + materialized_column_not_null.get()) ->get_nested_column_ptr()); } diff --git a/be/src/vec/functions/function_date_or_datetime_computation.h b/be/src/vec/functions/function_date_or_datetime_computation.h index 330ea75cba96c88..224bf49179177c6 100644 --- a/be/src/vec/functions/function_date_or_datetime_computation.h +++ b/be/src/vec/functions/function_date_or_datetime_computation.h @@ -878,7 +878,7 @@ struct CurrentDateTimeImpl { bool use_const; if constexpr (WithPrecision) { if (const auto* const_column = check_and_get_column( - block.get_by_position(arguments[0]).column)) { + block.get_by_position(arguments[0]).column.get())) { int64_t scale = const_column->get_int(0); dtv.from_unixtime(context->state()->timestamp_ms() / 1000, context->state()->nano_seconds(), @@ -892,7 +892,7 @@ struct CurrentDateTimeImpl { use_const = true; } else if (const auto* nullable_column = check_and_get_column( - block.get_by_position(arguments[0]).column)) { + block.get_by_position(arguments[0]).column.get())) { const auto& null_map = nullable_column->get_null_map_data(); const auto& nested_column = assert_cast( nullable_column->get_nested_column_ptr().get()); diff --git a/be/src/vec/functions/function_ip.h b/be/src/vec/functions/function_ip.h index 1a1c23e2b06c354..9f2f4dc28868b46 100644 --- a/be/src/vec/functions/function_ip.h +++ b/be/src/vec/functions/function_ip.h @@ -768,11 +768,13 @@ class FunctionIsIPAddressInRange : public IFunction { if (is_ipv4(addr_column_with_type_and_name.type)) { execute_impl_with_ip( input_rows_count, addr_const, cidr_const, - assert_cast(cidr_column.get()), addr_column, col_res); + assert_cast(cidr_column.get()), addr_column, + col_res.get()); } else if (is_ipv6(addr_column_with_type_and_name.type)) { execute_impl_with_ip( input_rows_count, addr_const, cidr_const, - assert_cast(cidr_column.get()), addr_column, col_res); + assert_cast(cidr_column.get()), addr_column, + col_res.get()); } else { const auto* str_addr_column = assert_cast(addr_column.get()); const auto* str_cidr_column = assert_cast(cidr_column.get()); diff --git a/be/src/vec/functions/function_jsonb.cpp b/be/src/vec/functions/function_jsonb.cpp index 463508169aadc61..dcae26f3c2f8440 100644 --- a/be/src/vec/functions/function_jsonb.cpp +++ b/be/src/vec/functions/function_jsonb.cpp @@ -459,11 +459,12 @@ class FunctionJsonbKeys : public IFunction { // prepare jsonb data column jsonb_data_column = unpack_if_const(block.get_by_position(arguments[0]).column).first; if (block.get_by_position(arguments[0]).column->is_nullable()) { - const auto* nullable = check_and_get_column(jsonb_data_column); + const auto* nullable = check_and_get_column(jsonb_data_column.get()); jsonb_data_column = nullable->get_nested_column_ptr(); data_null_map = &nullable->get_null_map_data(); } - const ColumnString* col_from_string = check_and_get_column(jsonb_data_column); + const ColumnString* col_from_string = + check_and_get_column(jsonb_data_column.get()); // prepare parse path column prepare, maybe we do not have path column ColumnPtr jsonb_path_column = nullptr; @@ -475,11 +476,12 @@ class FunctionJsonbKeys : public IFunction { std::tie(jsonb_path_column, path_const) = unpack_if_const(block.get_by_position(arguments[1]).column); if (block.get_by_position(arguments[1]).column->is_nullable()) { - const auto* nullable = check_and_get_column(jsonb_path_column); + const auto* nullable = + check_and_get_column(jsonb_path_column.get()); jsonb_path_column = nullable->get_nested_column_ptr(); path_null_map = &nullable->get_null_map_data(); } - jsonb_path_col = check_and_get_column(jsonb_path_column); + jsonb_path_col = check_and_get_column(jsonb_path_column.get()); } auto null_map = ColumnUInt8::create(input_rows_count, 0); @@ -1844,9 +1846,10 @@ class FunctionJsonSearch : public IFunction { // prepare jsonb data column std::tie(col_json, json_is_const) = unpack_if_const(block.get_by_position(arguments[0]).column); - const ColumnString* col_json_string = check_and_get_column(col_json); - if (auto* nullable = check_and_get_column(col_json)) { - col_json_string = check_and_get_column(nullable->get_nested_column_ptr()); + const ColumnString* col_json_string = check_and_get_column(col_json.get()); + if (auto* nullable = check_and_get_column(col_json.get())) { + col_json_string = + check_and_get_column(nullable->get_nested_column_ptr().get()); } if (!col_json_string) { @@ -1873,8 +1876,8 @@ class FunctionJsonSearch : public IFunction { // prepare jsonb data column std::tie(col_one, one_is_const) = unpack_if_const(block.get_by_position(arguments[1]).column); - const ColumnString* col_one_string = check_and_get_column(col_one); - if (auto* nullable = check_and_get_column(col_one)) { + const ColumnString* col_one_string = check_and_get_column(col_one.get()); + if (auto* nullable = check_and_get_column(col_one.get())) { col_one_string = check_and_get_column(*nullable->get_nested_column_ptr()); } if (!col_one_string) { @@ -1921,8 +1924,9 @@ class FunctionJsonSearch : public IFunction { std::tie(col_search, search_is_const) = unpack_if_const(block.get_by_position(arguments[2]).column); - const ColumnString* col_search_string = check_and_get_column(col_search); - if (auto* nullable = check_and_get_column(col_search)) { + const ColumnString* col_search_string = + check_and_get_column(col_search.get()); + if (auto* nullable = check_and_get_column(col_search.get())) { col_search_string = check_and_get_column(*nullable->get_nested_column_ptr()); } diff --git a/be/src/vec/functions/function_nullables.cpp b/be/src/vec/functions/function_nullables.cpp index 91bce24f48fc8b3..b1e72ff52a71f4c 100644 --- a/be/src/vec/functions/function_nullables.cpp +++ b/be/src/vec/functions/function_nullables.cpp @@ -54,7 +54,8 @@ class FunctionNullable : public IFunction { Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const override { ColumnPtr& col = block.get_by_position(arguments[0]).column; - if (const auto* col_null = check_and_get_column(col); col_null == nullptr) { + if (const auto* col_null = check_and_get_column(col.get()); + col_null == nullptr) { // not null block.replace_by_position( result, ColumnNullable::create(col, ColumnBool::create(input_rows_count, 0))); @@ -85,7 +86,7 @@ class FunctionNonNullable : public IFunction { Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const override { auto& data = block.get_by_position(arguments[0]); - if (const auto* col_null = check_and_get_column(data.column); + if (const auto* col_null = check_and_get_column(data.column.get()); col_null == nullptr) // raise error if input is not nullable. { return Status::InvalidArgument( diff --git a/be/src/vec/functions/function_quantile_state.cpp b/be/src/vec/functions/function_quantile_state.cpp index 95afbf1db32d233..8f8740841c5407e 100644 --- a/be/src/vec/functions/function_quantile_state.cpp +++ b/be/src/vec/functions/function_quantile_state.cpp @@ -130,7 +130,7 @@ class FunctionToQuantileState : public IFunction { const ColumnPtr& column = block.get_by_position(arguments[0]).column; const DataTypePtr& data_type = block.get_by_position(arguments[0]).type; auto compression_arg = check_and_get_column_const( - block.get_by_position(arguments.back()).column); + block.get_by_position(arguments.back()).column.get()); float compression = 2048; if (compression_arg) { auto compression_arg_val = compression_arg->get_value(); @@ -189,7 +189,7 @@ class FunctionQuantileStatePercent : public IFunction { auto str_col = assert_cast(column.get()); auto& col_data = str_col->get_data(); auto percent_arg = check_and_get_column_const( - block.get_by_position(arguments.back()).column); + block.get_by_position(arguments.back()).column.get()); if (!percent_arg) { return Status::InternalError( diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 14926e1062c0209..a729af5948a73fa 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -2142,7 +2142,7 @@ class FunctionSplitByString : public IFunction { NullMapType* dest_nested_null_map = nullptr; auto* dest_nullable_col = reinterpret_cast(dest_nested_column); - dest_nested_column = dest_nullable_col->get_nested_column_ptr(); + dest_nested_column = dest_nullable_col->get_nested_column_ptr().get(); dest_nested_null_map = &dest_nullable_col->get_null_map_column().get_data(); const auto* col_left = check_and_get_column(src_column.get()); @@ -4436,7 +4436,7 @@ class FunctionTranslate : public IFunction { } else if (is_ascii) { impl_vectors = impl_vectors_ascii; } - impl_vectors(col_source, col_from, col_to, col_res); + impl_vectors(col_source, col_from, col_to, col_res.get()); block.get_by_position(result).column = std::move(col_res); return Status::OK(); } diff --git a/be/src/vec/functions/function_tokenize.cpp b/be/src/vec/functions/function_tokenize.cpp index 0bcd31af40dac79..f0a7c3b68aec49f 100644 --- a/be/src/vec/functions/function_tokenize.cpp +++ b/be/src/vec/functions/function_tokenize.cpp @@ -129,7 +129,7 @@ Status FunctionTokenize::execute_impl(FunctionContext* /*context*/, Block& block NullMapType* dest_nested_null_map = nullptr; ColumnNullable* dest_nullable_col = reinterpret_cast(dest_nested_column); - dest_nested_column = dest_nullable_col->get_nested_column_ptr(); + dest_nested_column = dest_nullable_col->get_nested_column_ptr().get(); dest_nested_null_map = &dest_nullable_col->get_null_map_column().get_data(); if (auto col_left = check_and_get_column(src_column.get())) { diff --git a/be/src/vec/functions/functions_geo.cpp b/be/src/vec/functions/functions_geo.cpp index 6d75258d146ff7c..0a752af18fe04c3 100644 --- a/be/src/vec/functions/functions_geo.cpp +++ b/be/src/vec/functions/functions_geo.cpp @@ -258,10 +258,10 @@ struct StDistanceSphere { ColumnPtr y_lat_origin = block.get_by_position(arguments[3]).column->convert_to_full_column_if_const(); - const auto* x_lng = check_and_get_column(x_lng_origin); - const auto* x_lat = check_and_get_column(x_lat_origin); - const auto* y_lng = check_and_get_column(y_lng_origin); - const auto* y_lat = check_and_get_column(y_lat_origin); + const auto* x_lng = check_and_get_column(x_lng_origin.get()); + const auto* x_lat = check_and_get_column(x_lat_origin.get()); + const auto* y_lng = check_and_get_column(y_lng_origin.get()); + const auto* y_lat = check_and_get_column(y_lat_origin.get()); CHECK(x_lng && x_lat && y_lng && y_lat); const auto size = x_lng->size(); @@ -305,10 +305,10 @@ struct StAngleSphere { ColumnPtr y_lat_origin = block.get_by_position(arguments[3]).column->convert_to_full_column_if_const(); - const auto* x_lng = check_and_get_column(x_lng_origin); - const auto* x_lat = check_and_get_column(x_lat_origin); - const auto* y_lng = check_and_get_column(y_lng_origin); - const auto* y_lat = check_and_get_column(y_lat_origin); + const auto* x_lng = check_and_get_column(x_lng_origin.get()); + const auto* x_lat = check_and_get_column(x_lat_origin.get()); + const auto* y_lng = check_and_get_column(y_lng_origin.get()); + const auto* y_lat = check_and_get_column(y_lat_origin.get()); CHECK(x_lng && x_lat && y_lng && y_lat); const auto size = x_lng->size(); diff --git a/be/src/vec/functions/functions_logical.cpp b/be/src/vec/functions/functions_logical.cpp index 0f474851f032ee0..f99f0447725eddd 100644 --- a/be/src/vec/functions/functions_logical.cpp +++ b/be/src/vec/functions/functions_logical.cpp @@ -141,11 +141,11 @@ void basic_execute_impl(ColumnRawPtrs arguments, ColumnWithTypeAndName& result_i size_t input_rows_count) { auto col_res = ColumnUInt8::create(input_rows_count); if (auto l = check_and_get_column(arguments[0])) { - vector_const(arguments[1], l, col_res, input_rows_count); + vector_const(arguments[1], l, col_res.get(), input_rows_count); } else if (auto r = check_and_get_column(arguments[1])) { - vector_const(arguments[0], r, col_res, input_rows_count); + vector_const(arguments[0], r, col_res.get(), input_rows_count); } else { - vector_vector(arguments[0], arguments[1], col_res, input_rows_count); + vector_vector(arguments[0], arguments[1], col_res.get(), input_rows_count); } result_info.column = std::move(col_res); } @@ -156,11 +156,12 @@ void null_execute_impl(ColumnRawPtrs arguments, ColumnWithTypeAndName& result_in auto col_nulls = ColumnUInt8::create(input_rows_count); auto col_res = ColumnUInt8::create(input_rows_count); if (auto l = check_and_get_column(arguments[0])) { - vector_const_null(arguments[1], l, col_res, col_nulls, input_rows_count); + vector_const_null(arguments[1], l, col_res.get(), col_nulls.get(), input_rows_count); } else if (auto r = check_and_get_column(arguments[1])) { - vector_const_null(arguments[0], r, col_res, col_nulls, input_rows_count); + vector_const_null(arguments[0], r, col_res.get(), col_nulls.get(), input_rows_count); } else { - vector_vector_null(arguments[0], arguments[1], col_res, col_nulls, input_rows_count); + vector_vector_null(arguments[0], arguments[1], col_res.get(), col_nulls.get(), + input_rows_count); } result_info.column = ColumnNullable::create(std::move(col_res), std::move(col_nulls)); } diff --git a/be/src/vec/functions/in.h b/be/src/vec/functions/in.h index 5d5901901828018..6f697ba7441df58 100644 --- a/be/src/vec/functions/in.h +++ b/be/src/vec/functions/in.h @@ -216,7 +216,7 @@ class FunctionIn : public IFunction { if (materialized_column->is_nullable()) { const auto* null_col_ptr = vectorized::check_and_get_column( - materialized_column); + materialized_column.get()); const auto& null_map = assert_cast( null_col_ptr->get_null_map_column()) .get_data(); diff --git a/be/src/vec/functions/least_greast.cpp b/be/src/vec/functions/least_greast.cpp index 7d1953f7041174d..9ad53c4f5315294 100644 --- a/be/src/vec/functions/least_greast.cpp +++ b/be/src/vec/functions/least_greast.cpp @@ -173,7 +173,7 @@ struct FunctionFieldImpl { size_t input_rows_count) { const auto& data_type = block.get_by_position(arguments[0]).type; auto result_column = ColumnInt32::create(input_rows_count, 0); - auto& res_data = static_cast(result_column)->get_data(); + auto& res_data = static_cast(result_column.get())->get_data(); const auto& column_size = arguments.size(); std::vector argument_columns(column_size); diff --git a/be/src/vec/functions/round.h b/be/src/vec/functions/round.h index 3f4f9c60fcbe3df..3b821f0aa528a41 100644 --- a/be/src/vec/functions/round.h +++ b/be/src/vec/functions/round.h @@ -731,6 +731,7 @@ class FunctionRounding : public IFunction { const auto* col_general = is_col_general_const ? assert_cast(*column_general.column) .get_data_column_ptr() + .get() : column_general.column.get(); ColumnPtr res; diff --git a/be/src/vec/sink/vtablet_block_convertor.cpp b/be/src/vec/sink/vtablet_block_convertor.cpp index 26de6ea6c7e3d18..466902a4f907abc 100644 --- a/be/src/vec/sink/vtablet_block_convertor.cpp +++ b/be/src/vec/sink/vtablet_block_convertor.cpp @@ -506,7 +506,8 @@ Status OlapTableBlockConvertor::_fill_auto_inc_cols(vectorized::Block* block, si vectorized::ColumnInt64::Container& dst_values = dst_column->get_data(); vectorized::ColumnPtr src_column_ptr = block->get_by_position(idx).column; - if (const auto* const_column = check_and_get_column(src_column_ptr)) { + if (const auto* const_column = + check_and_get_column(src_column_ptr.get())) { // for insert stmt like "insert into tbl1 select null,col1,col2,... from tbl2" or // "insert into tbl1 select 1,col1,col2,... from tbl2", the type of literal's column // will be `ColumnConst` @@ -530,7 +531,7 @@ Status OlapTableBlockConvertor::_fill_auto_inc_cols(vectorized::Block* block, si dst_values.resize_fill(rows, value); } } else if (const auto* src_nullable_column = - check_and_get_column(src_column_ptr)) { + check_and_get_column(src_column_ptr.get())) { auto src_nested_column_ptr = src_nullable_column->get_nested_column_ptr(); const auto& null_map_data = src_nullable_column->get_null_map_data(); dst_values.reserve(rows); diff --git a/be/src/vec/sink/writer/iceberg/partition_transformers.h b/be/src/vec/sink/writer/iceberg/partition_transformers.h index 79eb385b298a8f5..0b18ce249522eb8 100644 --- a/be/src/vec/sink/writer/iceberg/partition_transformers.h +++ b/be/src/vec/sink/writer/iceberg/partition_transformers.h @@ -153,8 +153,8 @@ class StringTruncatePartitionColumnTransform : public PartitionColumnTransform { ColumnPtr string_column_ptr; ColumnPtr null_map_column_ptr; bool is_nullable = false; - if (auto* nullable_column = - check_and_get_column(column_with_type_and_name.column)) { + if (const auto* nullable_column = + check_and_get_column(column_with_type_and_name.column.get())) { null_map_column_ptr = nullable_column->get_null_map_column_ptr(); string_column_ptr = nullable_column->get_nested_column_ptr(); is_nullable = true; @@ -211,7 +211,7 @@ class IntegerTruncatePartitionColumnTransform : public PartitionColumnTransform //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -270,7 +270,7 @@ class BigintTruncatePartitionColumnTransform : public PartitionColumnTransform { //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -332,8 +332,8 @@ class DecimalTruncatePartitionColumnTransform : public PartitionColumnTransform ColumnPtr column_ptr; ColumnPtr null_map_column_ptr; bool is_nullable = false; - if (auto* nullable_column = - check_and_get_column(column_with_type_and_name.column)) { + if (const auto* nullable_column = + check_and_get_column(column_with_type_and_name.column.get())) { null_map_column_ptr = nullable_column->get_null_map_column_ptr(); column_ptr = nullable_column->get_nested_column_ptr(); is_nullable = true; @@ -342,7 +342,7 @@ class DecimalTruncatePartitionColumnTransform : public PartitionColumnTransform is_nullable = false; } - const auto* const decimal_col = check_and_get_column>(column_ptr); + const auto* const decimal_col = check_and_get_column>(column_ptr.get()); const auto& vec_src = decimal_col->get_data(); auto col_res = ColumnDecimal::create(vec_src.size(), decimal_col->get_scale()); @@ -391,7 +391,7 @@ class IntBucketPartitionColumnTransform : public PartitionColumnTransform { //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -454,7 +454,7 @@ class BigintBucketPartitionColumnTransform : public PartitionColumnTransform { //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -518,7 +518,7 @@ class DecimalBucketPartitionColumnTransform : public PartitionColumnTransform { //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -597,7 +597,7 @@ class DateBucketPartitionColumnTransform : public PartitionColumnTransform { //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -665,7 +665,7 @@ class TimestampBucketPartitionColumnTransform : public PartitionColumnTransform //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -746,7 +746,7 @@ class StringBucketPartitionColumnTransform : public PartitionColumnTransform { //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -811,7 +811,7 @@ class DateYearPartitionColumnTransform : public PartitionColumnTransform { //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -883,7 +883,7 @@ class TimestampYearPartitionColumnTransform : public PartitionColumnTransform { //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -955,7 +955,7 @@ class DateMonthPartitionColumnTransform : public PartitionColumnTransform { //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -1027,7 +1027,7 @@ class TimestampMonthPartitionColumnTransform : public PartitionColumnTransform { //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -1099,7 +1099,7 @@ class DateDayPartitionColumnTransform : public PartitionColumnTransform { //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -1177,7 +1177,7 @@ class TimestampDayPartitionColumnTransform : public PartitionColumnTransform { //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -1254,7 +1254,7 @@ class TimestampHourPartitionColumnTransform : public PartitionColumnTransform { //1) get the target column ptr const ColumnWithTypeAndName& column_with_type_and_name = block.get_by_position(column_pos); ColumnPtr column_ptr = column_with_type_and_name.column->convert_to_full_column_if_const(); - CHECK(column_ptr != nullptr); + CHECK(column_ptr); //2) get the input data from block ColumnPtr null_map_column_ptr; @@ -1328,7 +1328,7 @@ class VoidPartitionColumnTransform : public PartitionColumnTransform { ColumnPtr column_ptr; ColumnPtr null_map_column_ptr; if (auto* nullable_column = - check_and_get_column(column_with_type_and_name.column)) { + check_and_get_column(column_with_type_and_name.column.get())) { null_map_column_ptr = nullable_column->get_null_map_column_ptr(); column_ptr = nullable_column->get_nested_column_ptr(); } else { diff --git a/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp b/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp index 29c97b59ea4dba9..608afced8d92db9 100644 --- a/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp +++ b/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp @@ -410,7 +410,7 @@ std::any VIcebergTableWriter::_get_iceberg_partition_value( int position) { //1) get the partition column ptr ColumnPtr col_ptr = partition_column.column->convert_to_full_column_if_const(); - CHECK(col_ptr != nullptr); + CHECK(col_ptr); if (col_ptr->is_nullable()) { const ColumnNullable* nullable_column = reinterpret_cast(col_ptr.get()); diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp index 8d17b2787a53da7..485d81311ba5381 100644 --- a/be/src/vec/utils/util.hpp +++ b/be/src/vec/utils/util.hpp @@ -197,7 +197,7 @@ inline void change_null_to_true(ColumnPtr column, ColumnPtr argument = nullptr) data[i] |= null_map[i]; } memset(null_map, 0, rows); - } else if (argument != nullptr && argument->has_null()) { + } else if (argument && argument->has_null()) { const auto* __restrict null_map = assert_cast(argument.get())->get_null_map_data().data(); auto* __restrict data = diff --git a/be/test/vec/columns/common_column_test.h b/be/test/vec/columns/common_column_test.h index 8e1b86c0168f99b..b70ac660136216b 100644 --- a/be/test/vec/columns/common_column_test.h +++ b/be/test/vec/columns/common_column_test.h @@ -989,7 +989,7 @@ class CommonColumnTest : public ::testing::Test { // check size EXPECT_EQ(ptr->size(), *cl); // check ptr is not the same - EXPECT_NE(ptr.get(), source_column); + EXPECT_NE(ptr.get(), source_column.get()); // check after clone_resized with assert_res auto ser_col = ColumnString::create(); @@ -1042,7 +1042,7 @@ class CommonColumnTest : public ::testing::Test { // check size EXPECT_EQ(ptr->size(), insert_size); // check ptr is not the same - EXPECT_NE(ptr.get(), source_column); + EXPECT_NE(ptr.get(), source_column.get()); // check after cut with assert_res auto ser_col = ColumnString::create(); ser_col->reserve(ptr->size()); @@ -1095,7 +1095,7 @@ class CommonColumnTest : public ::testing::Test { // check size EXPECT_EQ(ptr->size(), insert_size); // check ptr is not the same - EXPECT_NE(ptr.get(), source_column); + EXPECT_NE(ptr.get(), source_column.get()); // check after cut with assert_res auto ser_col = ColumnString::create(); ser_col->reserve(ptr->size()); diff --git a/be/test/vec/data_types/from_string_test.cpp b/be/test/vec/data_types/from_string_test.cpp index 01515b805d9be0d..eb8b00ab16f69c8 100644 --- a/be/test/vec/data_types/from_string_test.cpp +++ b/be/test/vec/data_types/from_string_test.cpp @@ -203,7 +203,7 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) { string test_str = std::get<1>(type_pair)[i]; // data_type from_string ReadBuffer rb_test(test_str.data(), test_str.size()); - Status st = data_type_ptr->from_string(rb_test, col); + Status st = data_type_ptr->from_string(rb_test, col.get()); if (std::get<3>(type_pair)[i].empty()) { EXPECT_EQ(st.ok(), false); std::cout << "deserialize failed: " << st.to_json() << std::endl; @@ -256,11 +256,11 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) { ReadBuffer rand_rb(rand_date.data(), rand_date.size()); auto col = data_type_ptr->create_column(); - Status st = data_type_ptr->from_string(min_rb, col); + Status st = data_type_ptr->from_string(min_rb, col.get()); EXPECT_EQ(st.ok(), true); - st = data_type_ptr->from_string(max_rb, col); + st = data_type_ptr->from_string(max_rb, col.get()); EXPECT_EQ(st.ok(), true); - st = data_type_ptr->from_string(rand_rb, col); + st = data_type_ptr->from_string(rand_rb, col.get()); EXPECT_EQ(st.ok(), true); string min_s_d = data_type_ptr->to_string(*col, 0); @@ -319,7 +319,7 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) { string rand_ip = rand_wf->to_string(); ReadBuffer rand_rb(rand_ip.data(), rand_ip.size()); auto col = data_type_ptr->create_column(); - st = data_type_ptr->from_string(rand_rb, col); + st = data_type_ptr->from_string(rand_rb, col.get()); EXPECT_EQ(st.ok(), true); string rand_s_d = data_type_ptr->to_string(*col, 0); rtrim(rand_ip); @@ -336,7 +336,7 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) { EXPECT_EQ(st.ok(), false); ReadBuffer rand_rb(pair.second.data(), pair.second.size()); auto col = data_type_ptr->create_column(); - st = data_type_ptr->from_string(rand_rb, col); + st = data_type_ptr->from_string(rand_rb, col.get()); EXPECT_EQ(st.ok(), false); } } diff --git a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp index 2affbc36c86ab34..b65b3fc6f63d2c3 100644 --- a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp @@ -510,7 +510,7 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) { { // from_string ReadBuffer rb(rand_str.data(), rand_str.size()); - Status status = array_data_type_ptr->from_string(rb, col2); + Status status = array_data_type_ptr->from_string(rb, col2.get()); EXPECT_EQ(status.ok(), true); auto ser_col = ColumnString::create(); ser_col->reserve(1); @@ -661,7 +661,7 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) { { ReadBuffer rb(rand_str.data(), rand_str.size()); std::cout << "from string rb: " << rb.to_string() << std::endl; - Status stat = map_data_type_ptr->from_string(rb, col2); + Status stat = map_data_type_ptr->from_string(rb, col2.get()); std::cout << stat.to_json() << std::endl; auto ser_col = ColumnString::create(); ser_col->reserve(1); @@ -840,7 +840,7 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { // from_string ReadBuffer rb(rand_str.data(), rand_str.size()); auto col2 = array_data_type_ptr->create_column(); - Status status = array_data_type_ptr->from_string(rb, col2); + Status status = array_data_type_ptr->from_string(rb, col2.get()); if (expect_from_string_str == "") { EXPECT_EQ(status.ok(), false); std::cout << "test from_string: " << status.to_json() << std::endl; @@ -995,7 +995,7 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { // from_string ReadBuffer rb(rand_str.data(), rand_str.size()); auto col2 = array_data_type_ptr->create_column(); - Status status = array_data_type_ptr->from_string(rb, col2); + Status status = array_data_type_ptr->from_string(rb, col2.get()); if (expect_from_string_str == "") { EXPECT_EQ(status.ok(), false); std::cout << "test from_string: " << status.to_json() << std::endl; @@ -1213,7 +1213,7 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { // from_string ReadBuffer rb(rand_str.data(), rand_str.size()); auto col2 = map_data_type_ptr->create_column(); - Status status = map_data_type_ptr->from_string(rb, col2); + Status status = map_data_type_ptr->from_string(rb, col2.get()); if (expect_from_string_str == "") { EXPECT_EQ(status.ok(), false); std::cout << "test from_string: " << status.to_json() << std::endl; @@ -1354,7 +1354,7 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { // from_string ReadBuffer rb(rand_str.data(), rand_str.size()); auto col2 = array_data_type_ptr->create_column(); - Status status = array_data_type_ptr->from_string(rb, col2); + Status status = array_data_type_ptr->from_string(rb, col2.get()); if (expect_from_string_str == "") { EXPECT_EQ(status.ok(), false); std::cout << "test from_string: " << status.to_json() << std::endl; diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index c33a1d64f831111..a3809bf8ec6a486 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -315,7 +315,7 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty // 3. check the result of function ColumnPtr column = block.get_columns()[result]; - EXPECT_TRUE(column != nullptr); + EXPECT_TRUE(column); for (int i = 0; i < row_size; ++i) { // update current line diff --git a/be/test/vec/olap/char_type_padding_test.cpp b/be/test/vec/olap/char_type_padding_test.cpp index 0e4879e46a69901..dfdfea3026ecd00 100644 --- a/be/test/vec/olap/char_type_padding_test.cpp +++ b/be/test/vec/olap/char_type_padding_test.cpp @@ -40,10 +40,10 @@ TEST(CharTypePaddingTest, CharTypePaddingFullTest) { for (size_t i = 0; i < rows; i++) { input->insert_data(str.data(), str.length()); } - EXPECT_FALSE(ConvertorChar::should_padding(input, str.length())); + EXPECT_FALSE(ConvertorChar::should_padding(input.get(), str.length())); input->insert_data(str.data(), str.length() - 1); - EXPECT_TRUE(ConvertorChar::should_padding(input, str.length())); + EXPECT_TRUE(ConvertorChar::should_padding(input.get(), str.length())); } TEST(CharTypePaddingTest, CharTypePaddingDataTest) { @@ -56,7 +56,7 @@ TEST(CharTypePaddingTest, CharTypePaddingDataTest) { input->insert_data(str.data(), str.length() - i); } - auto output = ConvertorChar::clone_and_padding(input, str.length()); + auto output = ConvertorChar::clone_and_padding(input.get(), str.length()); for (int i = 0; i < rows; i++) { auto cell = output->get_data_at(i).to_string(); From 014f84accebfb20f0fcf4debdfd6c768b1cb8f69 Mon Sep 17 00:00:00 2001 From: zclllyybb Date: Mon, 23 Dec 2024 11:58:01 +0800 Subject: [PATCH 48/55] [Refactor](function) make all Datetime arithmetic operation overflow lead to exception in BE (#45265) ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: 1. totally refactored `FunctionDateOrDateTimeComputation`. removed some unnecessary function and template. simplified some template calculations. 2. All Datetime arithmetic operation overflow will lead to exception now. before for nullable input it will get `NULL` result see: ```sql mysql> select date_add('5000-10-10', interval 10000 year); +------------------------------------------------+ | years_add(cast('5000-10-10' as DATEV2), 10000) | +------------------------------------------------+ | NULL | +------------------------------------------------+ 1 row in set (0.10 sec) ``` now: ```sql ERROR 1105 (HY000): errCode = 2, detailMessage = (xxx)[E-218][E-218] Operation years_add of 5000-10-10, 10000 out of range ``` ### Release note All Datetime arithmetic operation overflow will lead to exception now. ### Check List (For Author) - Test - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [x] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [x] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason - Behavior changed: - [ ] No. - [x] Yes. - Does this need documentation? - [x] No. - [ ] Yes. ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label --- be/src/util/datetype_cast.hpp | 7 +- be/src/vec/common/typeid_cast.h | 3 - .../functions/array/function_array_range.cpp | 11 +- .../function_date_or_datetime_computation.cpp | 3 +- .../function_date_or_datetime_computation.h | 832 +++++++----------- ...nction_date_or_datetime_computation_v2.cpp | 4 +- be/src/vec/functions/function_helpers.h | 18 +- be/test/vec/function/function_test_util.h | 2 +- be/test/vec/function/function_time_test.cpp | 82 +- .../test_date_function_const.groovy | 2 +- ...te_or_datetime_computation_negative.groovy | 82 +- 11 files changed, 471 insertions(+), 575 deletions(-) diff --git a/be/src/util/datetype_cast.hpp b/be/src/util/datetype_cast.hpp index 495631ea7e376c9..5c187ded7b729c6 100644 --- a/be/src/util/datetype_cast.hpp +++ b/be/src/util/datetype_cast.hpp @@ -29,8 +29,10 @@ /* * We use these function family to clarify our types of datelike type. for example: * DataTypeDate -------------------> ColumnDate -----------------------> Int64 - * | TypeToColumn ValueTypeOfColumn - * | TypeToValueType + * | | TypeToColumn ValueTypeOfColumn | + * | ↘--------------------------------------------------------------↗ + * | ::FieldType + * ↓ TypeToValueType * VecDateTimeValue */ namespace doris::date_cast { @@ -102,6 +104,7 @@ constexpr bool IsV1() { std::is_same_v); } +// only for datelike types. template constexpr bool IsV2() { return !IsV1(); diff --git a/be/src/vec/common/typeid_cast.h b/be/src/vec/common/typeid_cast.h index e135ef3309d2ec6..3f81586a707c339 100644 --- a/be/src/vec/common/typeid_cast.h +++ b/be/src/vec/common/typeid_cast.h @@ -20,14 +20,11 @@ #pragma once -#include #include -#include #include #include "common/exception.h" #include "common/status.h" -#include "vec/common/demangle.h" /** Checks type by comparing typeid. * The exact match of the type is checked. That is, cast to the ancestor will be unsuccessful. diff --git a/be/src/vec/functions/array/function_array_range.cpp b/be/src/vec/functions/array/function_array_range.cpp index 0980587660b20ae..ffb5987c744d1fb 100644 --- a/be/src/vec/functions/array/function_array_range.cpp +++ b/be/src/vec/functions/array/function_array_range.cpp @@ -16,10 +16,10 @@ // under the License. #include -#include #include #include +#include #include #include @@ -41,11 +41,11 @@ #include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_time_v2.h" #include "vec/functions/function.h" #include "vec/functions/function_date_or_datetime_computation.h" #include "vec/functions/simple_function_factory.h" #include "vec/runtime/vdatetime_value.h" -#include "vec/utils/util.hpp" namespace doris { class FunctionContext; @@ -229,10 +229,9 @@ struct RangeImplUtil { dest_nested_null_map.push_back(0); offset++; move++; - idx = doris::vectorized::date_time_add< - UNIT::value, DateV2Value, - DateV2Value, DateTimeV2>(idx, step_row, - is_null); + idx = doris::vectorized::date_time_add(idx, step_row, + is_null); } dest_offsets.push_back(offset); } diff --git a/be/src/vec/functions/function_date_or_datetime_computation.cpp b/be/src/vec/functions/function_date_or_datetime_computation.cpp index f6bf806ad46c1d6..ece897d6dcbf7c8 100644 --- a/be/src/vec/functions/function_date_or_datetime_computation.cpp +++ b/be/src/vec/functions/function_date_or_datetime_computation.cpp @@ -55,7 +55,7 @@ using FunctionWeeksDiff = using FunctionHoursDiff = FunctionDateOrDateTimeComputation>; using FunctionMinutesDiff = - FunctionDateOrDateTimeComputation>; + FunctionDateOrDateTimeComputation>; using FunctionSecondsDiff = FunctionDateOrDateTimeComputation>; @@ -68,6 +68,7 @@ struct NowFunctionName { static constexpr auto name = "now"; }; +//TODO: remove the inter-layer CurrentDateTimeImpl using FunctionNow = FunctionCurrentDateOrDateTime>; using FunctionNowWithPrecision = diff --git a/be/src/vec/functions/function_date_or_datetime_computation.h b/be/src/vec/functions/function_date_or_datetime_computation.h index 224bf49179177c6..8165f57881b8399 100644 --- a/be/src/vec/functions/function_date_or_datetime_computation.h +++ b/be/src/vec/functions/function_date_or_datetime_computation.h @@ -17,13 +17,12 @@ #pragma once -#include -#include - #include #include +#include #include #include +#include #include #include @@ -32,7 +31,6 @@ #include "common/exception.h" #include "common/logging.h" #include "common/status.h" -#include "fmt/format.h" #include "runtime/runtime_state.h" #include "udf/udf.h" #include "util/binary_cast.hpp" @@ -45,12 +43,10 @@ #include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" #include "vec/common/pod_array_fwd.h" -#include "vec/common/typeid_cast.h" #include "vec/core/block.h" #include "vec/core/column_numbers.h" #include "vec/core/column_with_type_and_name.h" #include "vec/core/columns_with_type_and_name.h" -#include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_date.h" @@ -67,73 +63,57 @@ namespace doris::vectorized { -template -extern ResultType date_time_add(const Arg& t, Int64 delta, bool& is_null) { - auto ts_value = binary_cast(t); +/// because all these functions(xxx_add/xxx_sub) defined in FE use Integer as the second value +/// so Int32 as delta is enough. For upstream(FunctionDateOrDateTimeComputation) we also could use Int32. + +template +ReturnNativeType date_time_add(const InputNativeType& t, Int32 delta, bool& is_null) { + using DateValueType = date_cast::TypeToValueTypeV; + using ResultDateValueType = date_cast::TypeToValueTypeV; + // e.g.: for DatatypeDatetimeV2, cast from u64 to DateV2Value + auto ts_value = binary_cast(t); TimeInterval interval(unit, delta, false); - if constexpr (std::is_same_v || - std::is_same_v) { + if constexpr (std::is_same_v) { is_null = !(ts_value.template date_add_interval(interval)); - - return binary_cast(ts_value); + // here DateValueType = ResultDateValueType + return binary_cast(ts_value); } else { + // this is for HOUR/MINUTE/SECOND/MS_ADD for datev2. got datetimev2 but not datev2. so need this two-arg reload to assign. ResultDateValueType res; is_null = !(ts_value.template date_add_interval(interval, res)); - return binary_cast(res); + return binary_cast(res); } } -#define ADD_TIME_FUNCTION_IMPL(CLASS, NAME, UNIT) \ - template \ - struct CLASS { \ - using ReturnType = std::conditional_t< \ - date_cast::IsV1(), DataTypeDateTime, \ - std::conditional_t< \ - std::is_same_v, \ - std::conditional_t, \ - DataTypeDateTimeV2>>; \ - using ReturnNativeType = \ - date_cast::ValueTypeOfColumnV>; \ - using InputNativeType = date_cast::ValueTypeOfColumnV>; \ - static constexpr auto name = #NAME; \ - static constexpr auto is_nullable = true; \ - static inline ReturnNativeType execute(const InputNativeType& t, Int64 delta, \ - bool& is_null) { \ - if constexpr (std::is_same_v || \ - std::is_same_v) { \ - return date_time_add(t, delta, \ - is_null); \ - } else if constexpr (std::is_same_v) { \ - if constexpr (TimeUnit::UNIT == TimeUnit::HOUR || \ - TimeUnit::UNIT == TimeUnit::MINUTE || \ - TimeUnit::UNIT == TimeUnit::SECOND || \ - TimeUnit::UNIT == TimeUnit::SECOND_MICROSECOND) { \ - return date_time_add, \ - DateV2Value, ReturnNativeType>( \ - t, delta, is_null); \ - } else { \ - return date_time_add, \ - DateV2Value, ReturnNativeType>(t, delta, \ - is_null); \ - } \ - \ - } else { \ - return date_time_add, \ - DateV2Value, ReturnNativeType>(t, delta, \ - is_null); \ - } \ - } \ - \ - static DataTypes get_variadic_argument_types() { \ - return {std::make_shared(), std::make_shared()}; \ - } \ +#define ADD_TIME_FUNCTION_IMPL(CLASS, NAME, UNIT) \ + template \ + struct CLASS { \ + /* for V1 type all return Datetime. for V2 type, if unit <= hour, increase to DatetimeV2 */ \ + using ReturnType = std::conditional_t< \ + date_cast::IsV1(), DataTypeDateTime, \ + std::conditional_t< \ + std::is_same_v, \ + std::conditional_t, \ + DataTypeDateTimeV2>>; \ + using ReturnNativeType = ReturnType::FieldType; \ + using InputNativeType = ArgType::FieldType; \ + static constexpr auto name = #NAME; \ + static constexpr auto is_nullable = true; \ + static inline ReturnNativeType execute(const InputNativeType& t, Int32 delta, \ + bool& is_null) { \ + return date_time_add(t, delta, is_null); \ + } \ + \ + static DataTypes get_variadic_argument_types() { \ + return {std::make_shared(), std::make_shared()}; \ + } \ } ADD_TIME_FUNCTION_IMPL(AddMicrosecondsImpl, microseconds_add, MICROSECOND); @@ -146,46 +126,32 @@ ADD_TIME_FUNCTION_IMPL(AddWeeksImpl, weeks_add, WEEK); ADD_TIME_FUNCTION_IMPL(AddMonthsImpl, months_add, MONTH); ADD_TIME_FUNCTION_IMPL(AddYearsImpl, years_add, YEAR); -template +template struct AddQuartersImpl { using ReturnType = - std::conditional_t || - std::is_same_v, + std::conditional_t || + std::is_same_v, DataTypeDateTime, - std::conditional_t, + std::conditional_t, DataTypeDateV2, DataTypeDateTimeV2>>; - using InputNativeType = std::conditional_t< - std::is_same_v || std::is_same_v, - Int64, std::conditional_t, UInt32, UInt64>>; - using ReturnNativeType = std::conditional_t< - std::is_same_v || std::is_same_v, - Int64, std::conditional_t, UInt32, UInt64>>; + using InputNativeType = ArgType::FieldType; + using ReturnNativeType = ReturnType::FieldType; static constexpr auto name = "quarters_add"; static constexpr auto is_nullable = true; - static inline ReturnNativeType execute(const InputNativeType& t, Int64 delta, bool& is_null) { - if constexpr (std::is_same_v || - std::is_same_v) { - return date_time_add(t, delta, is_null); - } else if constexpr (std::is_same_v) { - return date_time_add, - DateV2Value, ReturnNativeType>(t, delta, is_null); - } else { - return date_time_add, - DateV2Value, ReturnNativeType>(t, delta, - is_null); - } + static inline ReturnNativeType execute(const InputNativeType& t, Int32 delta, bool& is_null) { + return date_time_add(t, 3 * delta, is_null); } - static DataTypes get_variadic_argument_types() { return {std::make_shared()}; } + static DataTypes get_variadic_argument_types() { return {std::make_shared()}; } }; template struct SubtractIntervalImpl { using ReturnType = typename Transform::ReturnType; using InputNativeType = typename Transform::InputNativeType; + using ReturnNativeType = typename Transform::ReturnNativeType; static constexpr auto is_nullable = true; - static inline Int64 execute(const InputNativeType& t, Int64 delta, bool& is_null) { + static inline ReturnNativeType execute(const InputNativeType& t, Int32 delta, bool& is_null) { return Transform::execute(t, -delta, is_null); } @@ -244,57 +210,49 @@ struct SubtractYearsImpl : SubtractIntervalImpl, DateType static constexpr auto name = "years_sub"; }; -#define DECLARE_DATE_FUNCTIONS(NAME, FN_NAME, RETURN_TYPE, STMT) \ - template \ - struct NAME { \ - using ArgType1 = std::conditional_t< \ - std::is_same_v, UInt32, \ - std::conditional_t, UInt64, Int64>>; \ - using ArgType2 = std::conditional_t< \ - std::is_same_v, UInt32, \ - std::conditional_t, UInt64, Int64>>; \ - using DateValueType1 = std::conditional_t< \ - std::is_same_v, DateV2Value, \ - std::conditional_t, \ - DateV2Value, VecDateTimeValue>>; \ - using DateValueType2 = std::conditional_t< \ - std::is_same_v, DateV2Value, \ - std::conditional_t, \ - DateV2Value, VecDateTimeValue>>; \ - using ReturnType = RETURN_TYPE; \ - static constexpr auto name = #FN_NAME; \ - static constexpr auto is_nullable = false; \ - static inline ReturnType::FieldType execute(const ArgType1& t0, const ArgType2& t1, \ - bool& is_null) { \ - const auto& ts0 = reinterpret_cast(t0); \ - const auto& ts1 = reinterpret_cast(t1); \ - is_null = !ts0.is_valid_date() || !ts1.is_valid_date(); \ - return STMT; \ - } \ - static DataTypes get_variadic_argument_types() { \ - return {std::make_shared(), std::make_shared()}; \ - } \ +#define DECLARE_DATE_FUNCTIONS(NAME, FN_NAME, RETURN_TYPE, STMT) \ + template \ + struct NAME { \ + using NativeType1 = DateType1::FieldType; \ + using NativeType2 = DateType2::FieldType; \ + using DateValueType1 = date_cast::TypeToValueTypeV; \ + using DateValueType2 = date_cast::TypeToValueTypeV; \ + using ReturnType = RETURN_TYPE; \ + \ + static constexpr auto name = #FN_NAME; \ + static constexpr auto is_nullable = false; \ + static inline ReturnType::FieldType execute(const NativeType1& t0, const NativeType2& t1, \ + bool& is_null) { \ + const auto& ts0 = reinterpret_cast(t0); \ + const auto& ts1 = reinterpret_cast(t1); \ + is_null = !ts0.is_valid_date() || !ts1.is_valid_date(); \ + return (STMT); \ + } \ + static DataTypes get_variadic_argument_types() { \ + return {std::make_shared(), std::make_shared()}; \ + } \ }; + DECLARE_DATE_FUNCTIONS(DateDiffImpl, datediff, DataTypeInt32, (ts0.daynr() - ts1.daynr())); // DECLARE_DATE_FUNCTIONS(TimeDiffImpl, timediff, DataTypeTime, ts0.second_diff(ts1)); -// Expands to +// Expands to below here because it use Time type which need some special deal. template struct TimeDiffImpl { - using DateValueType1 = date_cast::TypeToValueTypeV; - using DateValueType2 = date_cast::TypeToValueTypeV; - using ArgType1 = date_cast::ValueTypeOfColumnV>; - using ArgType2 = date_cast::ValueTypeOfColumnV>; + using NativeType1 = date_cast::TypeToValueTypeV; + using NativeType2 = date_cast::TypeToValueTypeV; + using ArgType1 = DateType1::FieldType; + using ArgType2 = DateType2::FieldType; static constexpr bool UsingTimev2 = date_cast::IsV2() || date_cast::IsV2(); - using ReturnType = DataTypeTimeV2; + using ReturnType = DataTypeTimeV2; // TimeV1Type also use double as native type. same as v2. static constexpr auto name = "timediff"; static constexpr int64_t limit_value = 3020399000000; // 838:59:59 convert to microsecond static inline ReturnType::FieldType execute(const ArgType1& t0, const ArgType2& t1, bool& is_null) { - const auto& ts0 = reinterpret_cast(t0); - const auto& ts1 = reinterpret_cast(t1); + const auto& ts0 = reinterpret_cast(t0); + const auto& ts1 = reinterpret_cast(t1); is_null = !ts0.is_valid_date() || !ts1.is_valid_date(); if constexpr (UsingTimev2) { // refer to https://dev.mysql.com/doc/refman/5.7/en/time.html @@ -318,381 +276,138 @@ struct TimeDiffImpl { #define TIME_DIFF_FUNCTION_IMPL(CLASS, NAME, UNIT) \ DECLARE_DATE_FUNCTIONS(CLASS, NAME, DataTypeInt64, datetime_diff(ts1, ts0)) +// all these functions implemented by datediff TIME_DIFF_FUNCTION_IMPL(YearsDiffImpl, years_diff, YEAR); TIME_DIFF_FUNCTION_IMPL(MonthsDiffImpl, months_diff, MONTH); TIME_DIFF_FUNCTION_IMPL(WeeksDiffImpl, weeks_diff, WEEK); TIME_DIFF_FUNCTION_IMPL(DaysDiffImpl, days_diff, DAY); TIME_DIFF_FUNCTION_IMPL(HoursDiffImpl, hours_diff, HOUR); -TIME_DIFF_FUNCTION_IMPL(MintueSDiffImpl, minutes_diff, MINUTE); +TIME_DIFF_FUNCTION_IMPL(MintuesDiffImpl, minutes_diff, MINUTE); TIME_DIFF_FUNCTION_IMPL(SecondsDiffImpl, seconds_diff, SECOND); TIME_DIFF_FUNCTION_IMPL(MilliSecondsDiffImpl, milliseconds_diff, MILLISECOND); TIME_DIFF_FUNCTION_IMPL(MicroSecondsDiffImpl, microseconds_diff, MICROSECOND); -#define TIME_FUNCTION_TWO_ARGS_IMPL(CLASS, NAME, FUNCTION, RETURN_TYPE) \ - template \ - struct CLASS { \ - using ArgType = std::conditional_t< \ - std::is_same_v, UInt32, \ - std::conditional_t, UInt64, Int64>>; \ - using DateValueType = std::conditional_t< \ - std::is_same_v, DateV2Value, \ - std::conditional_t, \ - DateV2Value, VecDateTimeValue>>; \ - using ReturnType = RETURN_TYPE; \ - static constexpr auto name = #NAME; \ - static constexpr auto is_nullable = false; \ - static inline ReturnType::FieldType execute(const ArgType& t0, const Int32 mode, \ - bool& is_null) { \ - const auto& ts0 = reinterpret_cast(t0); \ - is_null = !ts0.is_valid_date(); \ - return ts0.FUNCTION; \ - } \ - static DataTypes get_variadic_argument_types() { \ - return {std::make_shared(), std::make_shared()}; \ - } \ +#define TIME_FUNCTION_TWO_ARGS_IMPL(CLASS, NAME, FUNCTION, RETURN_TYPE) \ + template \ + struct CLASS { \ + using ArgType = DateType::FieldType; \ + using DateValueType = date_cast::TypeToValueTypeV; \ + using ReturnType = RETURN_TYPE; \ + \ + static constexpr auto name = #NAME; \ + static constexpr auto is_nullable = false; \ + static inline ReturnType::FieldType execute(const ArgType& t0, const Int32 mode, \ + bool& is_null) { \ + const auto& ts0 = reinterpret_cast(t0); \ + is_null = !ts0.is_valid_date(); \ + return ts0.FUNCTION; \ + } \ + static DataTypes get_variadic_argument_types() { \ + return {std::make_shared(), std::make_shared()}; \ + } \ } TIME_FUNCTION_TWO_ARGS_IMPL(ToYearWeekTwoArgsImpl, yearweek, year_week(mysql_week_mode(mode)), DataTypeInt32); TIME_FUNCTION_TWO_ARGS_IMPL(ToWeekTwoArgsImpl, week, week(mysql_week_mode(mode)), DataTypeInt8); -template +// only use for FunctionDateOrDateTimeComputation. FromTypes are NativeTypes. +template struct DateTimeOp { - // use for (DateTime, DateTime) -> other_type - static void vector_vector(const PaddedPODArray& vec_from0, - const PaddedPODArray& vec_from1, - PaddedPODArray& vec_to, NullMap& null_map) { - size_t size = vec_from0.size(); - vec_to.resize(size); - null_map.resize_fill(size, false); - - for (size_t i = 0; i < size; ++i) { - // here reinterpret_cast is used to convert uint8& to bool&, - // otherwise it will be implicitly converted to bool, causing the rvalue to fail to match the lvalue. - // the same goes for the following. - vec_to[i] = Transform::execute(vec_from0[i], vec_from1[i], - reinterpret_cast(null_map[i])); - } - } - static void vector_vector(const PaddedPODArray& vec_from0, - const PaddedPODArray& vec_from1, - PaddedPODArray& vec_to) { - size_t size = vec_from0.size(); - vec_to.resize(size); - - bool invalid = true; - for (size_t i = 0; i < size; ++i) { - // here reinterpret_cast is used to convert uint8& to bool&, - // otherwise it will be implicitly converted to bool, causing the rvalue to fail to match the lvalue. - // the same goes for the following. - vec_to[i] = Transform::execute(vec_from0[i], vec_from1[i], invalid); - - if (UNLIKELY(invalid)) { - throw Exception(ErrorCode::OUT_OF_BOUND, "Operation {} {} {} out of range", - Transform::name, vec_from0[i], vec_from1[i]); - } + using NativeType0 = DataType0::FieldType; + using NativeType1 = DataType1::FieldType; + using ValueType0 = date_cast::TypeToValueTypeV; + // arg1 maybe just delta value(e.g. DataTypeInt32, not datelike type) + constexpr static bool CastType1 = std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v; + + static void throw_out_of_bound(NativeType0 arg0, NativeType1 arg1) { + auto value0 = binary_cast(arg0); + char buf0[40]; + char* end0 = value0.to_string(buf0); + if constexpr (CastType1) { + auto value1 = binary_cast>(arg1); + char buf1[40]; + char* end1 = value1.to_string(buf1); + throw Exception(ErrorCode::OUT_OF_BOUND, "Operation {} of {}, {} out of range", + Transform::name, std::string_view {buf0, end0 - 1}, + std::string_view {buf1, end1 - 1}); // minus 1 to skip /0 + } else { + throw Exception(ErrorCode::OUT_OF_BOUND, "Operation {} of {}, {} out of range", + Transform::name, std::string_view {buf0, end0 - 1}, arg1); } } - // use for (DateTime, int32) -> other_type - static void vector_vector(const PaddedPODArray& vec_from0, - const PaddedPODArray& vec_from1, - PaddedPODArray& vec_to, NullMap& null_map) { - size_t size = vec_from0.size(); - vec_to.resize(size); - null_map.resize_fill(size, false); - - for (size_t i = 0; i < size; ++i) - vec_to[i] = Transform::execute(vec_from0[i], vec_from1[i], - reinterpret_cast(null_map[i])); - } - static void vector_vector(const PaddedPODArray& vec_from0, - const PaddedPODArray& vec_from1, - PaddedPODArray& vec_to) { + // execute on the null value's nested value may cause false positive exception, so use nullmaps to skip them. + static void vector_vector(const PaddedPODArray& vec_from0, + const PaddedPODArray& vec_from1, + PaddedPODArray& vec_to, const NullMap* nullmap0, + const NullMap* nullmap1) { size_t size = vec_from0.size(); vec_to.resize(size); + bool invalid = false; - bool invalid = true; for (size_t i = 0; i < size; ++i) { + if ((nullmap0 && (*nullmap0)[i]) || (nullmap1 && (*nullmap1)[i])) [[unlikely]] { + continue; + } vec_to[i] = Transform::execute(vec_from0[i], vec_from1[i], invalid); if (UNLIKELY(invalid)) { - throw Exception(ErrorCode::OUT_OF_BOUND, "Operation {} {} {} out of range", - Transform::name, vec_from0[i], vec_from1[i]); + throw_out_of_bound(vec_from0[i], vec_from1[i]); } } } - // use for (DateTime, const DateTime) -> other_type - static void vector_constant(const PaddedPODArray& vec_from, - PaddedPODArray& vec_to, NullMap& null_map, Int128& delta) { - size_t size = vec_from.size(); - vec_to.resize(size); - null_map.resize_fill(size, false); - - for (size_t i = 0; i < size; ++i) { - vec_to[i] = - Transform::execute(vec_from[i], delta, reinterpret_cast(null_map[i])); + static void vector_constant(const PaddedPODArray& vec_from, + PaddedPODArray& vec_to, const NativeType1& delta, + const NullMap* nullmap0, const NullMap* nullmap1) { + if (nullmap1 && (*nullmap1)[0]) [[unlikely]] { + return; } - } - static void vector_constant(const PaddedPODArray& vec_from, - PaddedPODArray& vec_to, Int128& delta) { size_t size = vec_from.size(); vec_to.resize(size); + bool invalid = false; - bool invalid = true; for (size_t i = 0; i < size; ++i) { - vec_to[i] = Transform::execute(vec_from[i], delta, invalid); - - if (UNLIKELY(invalid)) { - throw Exception(ErrorCode::OUT_OF_BOUND, "Operation {} {} {} out of range", - Transform::name, vec_from[i], delta); + if (nullmap0 && (*nullmap0)[i]) [[unlikely]] { + continue; } - } - } - - // use for (DateTime, const ColumnNumber) -> other_type - static void vector_constant(const PaddedPODArray& vec_from, - PaddedPODArray& vec_to, NullMap& null_map, Int64 delta) { - size_t size = vec_from.size(); - vec_to.resize(size); - null_map.resize_fill(size, false); - - for (size_t i = 0; i < size; ++i) { - vec_to[i] = - Transform::execute(vec_from[i], delta, reinterpret_cast(null_map[i])); - } - } - static void vector_constant(const PaddedPODArray& vec_from, - PaddedPODArray& vec_to, Int64 delta) { - size_t size = vec_from.size(); - vec_to.resize(size); - bool invalid = true; - - for (size_t i = 0; i < size; ++i) { vec_to[i] = Transform::execute(vec_from[i], delta, invalid); if (UNLIKELY(invalid)) { - throw Exception(ErrorCode::OUT_OF_BOUND, "Operation {} {} {} out of range", - Transform::name, vec_from[i], delta); + throw_out_of_bound(vec_from[i], delta); } } } - // use for (const DateTime, ColumnNumber) -> other_type - static void constant_vector(const FromType1& from, PaddedPODArray& vec_to, - NullMap& null_map, const IColumn& delta) { - size_t size = delta.size(); - vec_to.resize(size); - null_map.resize_fill(size, false); - - for (size_t i = 0; i < size; ++i) { - vec_to[i] = Transform::execute(from, delta.get_int(i), - reinterpret_cast(null_map[i])); + static void constant_vector(const NativeType0& from, PaddedPODArray& vec_to, + const PaddedPODArray& delta, const NullMap* nullmap0, + const NullMap* nullmap1) { + if (nullmap0 && (*nullmap0)[0]) [[unlikely]] { + return; } - } - static void constant_vector(const FromType1& from, PaddedPODArray& vec_to, - const IColumn& delta) { size_t size = delta.size(); vec_to.resize(size); - bool invalid = true; + bool invalid = false; for (size_t i = 0; i < size; ++i) { - vec_to[i] = Transform::execute(from, delta.get_int(i), invalid); - - if (UNLIKELY(invalid)) { - throw Exception(ErrorCode::OUT_OF_BOUND, "Operation {} {} {} out of range", - Transform::name, from, delta.get_int(i)); + if (nullmap1 && (*nullmap1)[i]) [[unlikely]] { + continue; } - } - } - - static void constant_vector(const FromType1& from, PaddedPODArray& vec_to, - NullMap& null_map, const PaddedPODArray& delta) { - size_t size = delta.size(); - vec_to.resize(size); - null_map.resize_fill(size, false); - - for (size_t i = 0; i < size; ++i) { - vec_to[i] = Transform::execute(from, delta[i], reinterpret_cast(null_map[i])); - } - } - - static void constant_vector(const FromType1& from, PaddedPODArray& vec_to, - const PaddedPODArray& delta) { - size_t size = delta.size(); - vec_to.resize(size); - bool invalid = true; - - for (size_t i = 0; i < size; ++i) { vec_to[i] = Transform::execute(from, delta[i], invalid); if (UNLIKELY(invalid)) { - throw Exception(ErrorCode::OUT_OF_BOUND, "Operation {} {} {} out of range", - Transform::name, from, delta[i]); - } - } - } -}; - -template -struct DateTimeAddIntervalImpl { - static Status execute(Block& block, const ColumnNumbers& arguments, uint32_t result, - size_t input_rows_count) { - using ToType = typename Transform::ReturnType::FieldType; - using Op = DateTimeOp; - - const ColumnPtr source_col = remove_nullable(block.get_by_position(arguments[0]).column); - const auto is_nullable = block.get_by_position(result).type->is_nullable(); - if (const auto* sources = check_and_get_column>(source_col.get())) { - auto col_to = ColumnVector::create(); - auto delta_column_ptr = remove_nullable(block.get_by_position(arguments[1]).column); - const IColumn& delta_column = *delta_column_ptr; - - if (is_nullable) { - auto null_map = ColumnUInt8::create(input_rows_count, 0); - if (const auto* delta_const_column = - typeid_cast(&delta_column)) { - if (delta_const_column->get_field().get_type() == Field::Types::Int128) { - Op::vector_constant(sources->get_data(), col_to->get_data(), - null_map->get_data(), - delta_const_column->get_field().get()); - } else if (delta_const_column->get_field().get_type() == Field::Types::Int64) { - Op::vector_constant(sources->get_data(), col_to->get_data(), - null_map->get_data(), - delta_const_column->get_field().get()); - } else if (delta_const_column->get_field().get_type() == Field::Types::UInt64) { - Op::vector_constant(sources->get_data(), col_to->get_data(), - null_map->get_data(), - delta_const_column->get_field().get()); - } else { - Op::vector_constant(sources->get_data(), col_to->get_data(), - null_map->get_data(), - delta_const_column->get_field().get()); - } - } else { - if (const auto* delta_vec_column0 = - check_and_get_column>(delta_column)) { - Op::vector_vector(sources->get_data(), delta_vec_column0->get_data(), - col_to->get_data(), null_map->get_data()); - } else { - const auto* delta_vec_column1 = - check_and_get_column>(delta_column); - DCHECK(delta_vec_column1 != nullptr); - Op::vector_vector(sources->get_data(), delta_vec_column1->get_data(), - col_to->get_data(), null_map->get_data()); - } - } - if (const auto* nullable_col = check_and_get_column( - block.get_by_position(arguments[0]).column.get())) { - NullMap& result_null_map = assert_cast(*null_map).get_data(); - const NullMap& src_null_map = - assert_cast(nullable_col->get_null_map_column()) - .get_data(); - - VectorizedUtils::update_null_map(result_null_map, src_null_map); - } - if (const auto* nullable_col = check_and_get_column( - block.get_by_position(arguments[1]).column.get())) { - NullMap& result_null_map = assert_cast(*null_map).get_data(); - const NullMap& src_null_map = - assert_cast(nullable_col->get_null_map_column()) - .get_data(); - - VectorizedUtils::update_null_map(result_null_map, src_null_map); - } - block.get_by_position(result).column = - ColumnNullable::create(std::move(col_to), std::move(null_map)); - } else { - if (const auto* delta_const_column = - typeid_cast(&delta_column)) { - if (delta_const_column->get_field().get_type() == Field::Types::Int128) { - Op::vector_constant(sources->get_data(), col_to->get_data(), - delta_const_column->get_field().get()); - } else if (delta_const_column->get_field().get_type() == Field::Types::Int64) { - Op::vector_constant(sources->get_data(), col_to->get_data(), - delta_const_column->get_field().get()); - } else if (delta_const_column->get_field().get_type() == Field::Types::UInt64) { - Op::vector_constant(sources->get_data(), col_to->get_data(), - delta_const_column->get_field().get()); - } else { - Op::vector_constant(sources->get_data(), col_to->get_data(), - delta_const_column->get_field().get()); - } - } else { - if (const auto* delta_vec_column0 = - check_and_get_column>(delta_column)) { - Op::vector_vector(sources->get_data(), delta_vec_column0->get_data(), - col_to->get_data()); - } else { - const auto* delta_vec_column1 = - check_and_get_column>(delta_column); - DCHECK(delta_vec_column1 != nullptr); - Op::vector_vector(sources->get_data(), delta_vec_column1->get_data(), - col_to->get_data()); - } - } - block.replace_by_position(result, std::move(col_to)); + throw_out_of_bound(from, delta[i]); } - } else if (const auto* sources_const = - check_and_get_column_const>(source_col.get())) { - auto col_to = ColumnVector::create(); - if (is_nullable) { - auto null_map = ColumnUInt8::create(input_rows_count, 0); - auto not_nullable_column_ptr_arg1 = - remove_nullable(block.get_by_position(arguments[1]).column); - if (const auto* delta_vec_column = check_and_get_column>( - *not_nullable_column_ptr_arg1)) { - Op::constant_vector(sources_const->template get_value(), - col_to->get_data(), null_map->get_data(), - delta_vec_column->get_data()); - } else { - Op::constant_vector(sources_const->template get_value(), - col_to->get_data(), null_map->get_data(), - *not_nullable_column_ptr_arg1); - } - if (const auto* nullable_col = check_and_get_column( - block.get_by_position(arguments[0]).column.get())) { - NullMap& result_null_map = assert_cast(*null_map).get_data(); - const NullMap& src_null_map = - assert_cast(nullable_col->get_null_map_column()) - .get_data(); - - VectorizedUtils::update_null_map(result_null_map, src_null_map); - } - if (const auto* nullable_col = check_and_get_column( - block.get_by_position(arguments[1]).column.get())) { - NullMap& result_null_map = assert_cast(*null_map).get_data(); - const NullMap& src_null_map = - assert_cast(nullable_col->get_null_map_column()) - .get_data(); - - VectorizedUtils::update_null_map(result_null_map, src_null_map); - } - block.get_by_position(result).column = - ColumnNullable::create(std::move(col_to), std::move(null_map)); - } else { - if (const auto* delta_vec_column = check_and_get_column>( - *block.get_by_position(arguments[1]).column)) { - Op::constant_vector(sources_const->template get_value(), - col_to->get_data(), delta_vec_column->get_data()); - } else { - Op::constant_vector(sources_const->template get_value(), - col_to->get_data(), - *block.get_by_position(arguments[1]).column); - } - block.replace_by_position(result, std::move(col_to)); - } - } else { - return Status::RuntimeError( - "Illegal column {} of first argument and type {} of function {}", - block.get_by_position(arguments[0]).column->get_name(), - block.get_by_position(arguments[0]).type->get_name(), Transform::name); } - return Status::OK(); } }; +// Used for date(time) add/sub date(time)/integer. the input types are variadic and dispatch in execute. the return type is +// decided by Transform template class FunctionDateOrDateTimeComputation : public IFunction { public: @@ -708,41 +423,14 @@ class FunctionDateOrDateTimeComputation : public IFunction { size_t get_number_of_arguments() const override { return 0; } DataTypes get_variadic_argument_types_impl() const override { - if constexpr (has_variadic_argument) return Transform::get_variadic_argument_types(); + if constexpr (has_variadic_argument) { + return Transform::get_variadic_argument_types(); + } return {}; } bool use_default_implementation_for_nulls() const override { return false; } DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { - if (arguments.size() != 2 && arguments.size() != 3) { - throw doris::Exception(ErrorCode::INVALID_ARGUMENT, - "Number of arguments for function {} doesn't match: passed {} , " - "should be 2 or 3", - get_name(), arguments.size()); - } - - if (arguments.size() == 2) { - if (!is_date_or_datetime(remove_nullable(arguments[0].type)) && - !is_date_v2_or_datetime_v2(remove_nullable(arguments[0].type))) { - throw doris::Exception( - ErrorCode::INVALID_ARGUMENT, - "Illegal type {} of argument of function {}. Should be a date or a date " - "with time", - arguments[0].type->get_name(), get_name()); - } - } else { - if (!WhichDataType(remove_nullable(arguments[0].type)).is_date_time() || - !WhichDataType(remove_nullable(arguments[0].type)).is_date_time_v2() || - !WhichDataType(remove_nullable(arguments[2].type)).is_string()) { - throw doris::Exception( - ErrorCode::INVALID_ARGUMENT, - "Function {} supports 2 or 3 arguments. The 1st argument must be of type " - "Date or DateTime. The 2nd argument must be number. The 3rd argument " - "(optional) must be a constant string with timezone name. The timezone " - "argument is allowed only when the 1st argument has the type DateTime", - get_name()); - } - } RETURN_REAL_TYPE_FOR_DATEV2_FUNCTION(typename Transform::ReturnType); } @@ -753,48 +441,164 @@ class FunctionDateOrDateTimeComputation : public IFunction { WhichDataType which1(remove_nullable(first_arg_type)); WhichDataType which2(remove_nullable(second_arg_type)); + /// now dispatch with the two arguments' type. no need to consider return type because the same arguments decide a + /// unique return type which could be extracted from Transform. + + // for all `xxx_add/sub`, the second arg is int32. + // for `week/yearweek`, if it has the second arg, it's int32. + // in these situations, the first would be any datelike type. + if (which2.is_int32()) { + switch (which1.idx) { + case TypeIndex::Date: + return execute_inner(block, arguments, result, + input_rows_count); + break; + case TypeIndex::DateTime: + return execute_inner(block, arguments, result, + input_rows_count); + break; + case TypeIndex::DateV2: + return execute_inner(block, arguments, result, + input_rows_count); + break; + case TypeIndex::DateTimeV2: + return execute_inner(block, arguments, result, + input_rows_count); + break; + default: + return Status::InternalError("Illegal argument {} and {} of function {}", + block.get_by_position(arguments[0]).type->get_name(), + block.get_by_position(arguments[1]).type->get_name(), + get_name()); + } + } + // then consider datelike - datelike. everything is possible here as well. + // for `xxx_diff`, every combination of V2 is possible. but for V1 we only support Datetime - Datetime if (which1.is_date_v2() && which2.is_date_v2()) { - return DateTimeAddIntervalImpl::execute(block, arguments, - result, - input_rows_count); + return execute_inner(block, arguments, result, + input_rows_count); } else if (which1.is_date_time_v2() && which2.is_date_time_v2()) { - return DateTimeAddIntervalImpl< - DataTypeDateTimeV2::FieldType, Transform, - DataTypeDateTimeV2::FieldType>::execute(block, arguments, result, - input_rows_count); - } else if (which1.is_date_time() && which2.is_date_time()) { - return DateTimeAddIntervalImpl::execute(block, arguments, - result, - input_rows_count); + return execute_inner(block, arguments, result, + input_rows_count); } else if (which1.is_date_v2() && which2.is_date_time_v2()) { - return DateTimeAddIntervalImpl< - DataTypeDateV2::FieldType, Transform, - DataTypeDateTimeV2::FieldType>::execute(block, arguments, result, - input_rows_count); + return execute_inner(block, arguments, result, + input_rows_count); } else if (which1.is_date_time_v2() && which2.is_date_v2()) { - return DateTimeAddIntervalImpl::execute(block, arguments, - result, - input_rows_count); - } else if (which1.is_date()) { - return DateTimeAddIntervalImpl::execute( - block, arguments, result, input_rows_count); - } else if (which1.is_date_time()) { - return DateTimeAddIntervalImpl::execute( - block, arguments, result, input_rows_count); - } else if (which1.is_date_v2()) { - return DateTimeAddIntervalImpl::execute( - block, arguments, result, input_rows_count); - } else if (which1.is_date_time_v2()) { - return DateTimeAddIntervalImpl::execute( - block, arguments, result, input_rows_count); - } else { - return Status::RuntimeError("Illegal type {} of argument of function {}", - block.get_by_position(arguments[0]).type->get_name(), - get_name()); + return execute_inner(block, arguments, result, + input_rows_count); + } else if (which1.is_date_time() && which2.is_date_time()) { + return execute_inner(block, arguments, result, + input_rows_count); } + return Status::InternalError("Illegal argument {} and {} of function {}", + block.get_by_position(arguments[0]).type->get_name(), + block.get_by_position(arguments[1]).type->get_name(), + get_name()); + } + + template + static Status execute_inner(Block& block, const ColumnNumbers& arguments, uint32_t result, + size_t input_rows_count) { + using NativeType0 = DataType0::FieldType; + using NativeType1 = DataType1::FieldType; + using ResFieldType = typename Transform::ReturnType::FieldType; + using Op = DateTimeOp; + + auto get_null_map = [](const ColumnPtr& col) -> const NullMap* { + if (col->is_nullable()) { + return &static_cast(*col).get_null_map_data(); + } + // Const(Nullable) + if (const auto* const_col = check_and_get_column(col.get()); + const_col != nullptr && const_col->is_concrete_nullable()) { + return &static_cast(const_col->get_data_column()) + .get_null_map_data(); + } + return nullptr; + }; + + //ATTN: those null maps may be nullmap of ColumnConst(only 1 row) + // src column is always datelike type. + ColumnPtr& col0 = block.get_by_position(arguments[0]).column; + const NullMap* nullmap0 = get_null_map(col0); + // the second column may be delta column(xx_add/sub) or datelike column(xxx_diff) + ColumnPtr& col1 = block.get_by_position(arguments[1]).column; + const NullMap* nullmap1 = get_null_map(col1); + + // if null wrapped, extract nested column as src_nested_col + const ColumnPtr src_nested_col = remove_nullable(col0); + const auto result_nullable = block.get_by_position(result).type->is_nullable(); + auto res_col = ColumnVector::create(); + + // vector-const or vector-vector + if (const auto* sources = + check_and_get_column>(src_nested_col.get())) { + const ColumnPtr nest_col1 = remove_nullable(col1); + bool rconst = false; + // vector-const + if (const auto* nest_col1_const = check_and_get_column(*nest_col1)) { + rconst = true; + const auto col1_inside_const = assert_cast&>( + nest_col1_const->get_data_column()); + Op::vector_constant(sources->get_data(), res_col->get_data(), + col1_inside_const.get_data()[0], nullmap0, nullmap1); + } else { // vector-vector + const auto concrete_col1 = + assert_cast&>(*nest_col1); + Op::vector_vector(sources->get_data(), concrete_col1.get_data(), + res_col->get_data(), nullmap0, nullmap1); + } + + // update result nullmap with inputs + if (result_nullable) { + auto null_map = ColumnBool::create(input_rows_count, 0); + NullMap& result_null_map = assert_cast(*null_map).get_data(); + if (nullmap0) { + VectorizedUtils::update_null_map(result_null_map, *nullmap0); + } + if (nullmap1) { + VectorizedUtils::update_null_map(result_null_map, *nullmap1, rconst); + } + block.get_by_position(result).column = + ColumnNullable::create(std::move(res_col), std::move(null_map)); + } else { + block.replace_by_position(result, std::move(res_col)); + } + } else if (const auto* sources_const = + check_and_get_column_const>( + src_nested_col.get())) { + // const-vector + const auto col0_inside_const = + assert_cast&>(sources_const->get_data_column()); + const ColumnPtr nested_col1 = remove_nullable(col1); + const auto concrete_col1 = assert_cast&>(*nested_col1); + Op::constant_vector(col0_inside_const.get_data()[0], res_col->get_data(), + concrete_col1.get_data(), nullmap0, nullmap1); + + // update result nullmap with inputs + if (result_nullable) { + auto null_map = ColumnBool::create(input_rows_count, 0); + NullMap& result_null_map = assert_cast(*null_map).get_data(); + if (nullmap0) { + VectorizedUtils::update_null_map(result_null_map, *nullmap0, true); + } + if (nullmap1) { // no const-const here. default impl deal it. + VectorizedUtils::update_null_map(result_null_map, *nullmap1); + } + block.get_by_position(result).column = + ColumnNullable::create(std::move(res_col), std::move(null_map)); + } else { + block.replace_by_position(result, std::move(res_col)); + } + } else { // no const-const here. default impl deal it. + return Status::InternalError( + "Illegel columns for function {}:\n1: {} with type {}\n2: {} with type {}", + Transform::name, block.get_by_position(arguments[0]).name, + block.get_by_position(arguments[0]).type->get_name(), + block.get_by_position(arguments[1]).name, + block.get_by_position(arguments[1]).type->get_name()); + } + return Status::OK(); } }; @@ -1170,7 +974,9 @@ class CurrentDateFunctionBuilder : public FunctionBuilderImpl { FunctionBasePtr build_impl(const ColumnsWithTypeAndName& arguments, const DataTypePtr& return_type) const override { DataTypes data_types(arguments.size()); - for (size_t i = 0; i < arguments.size(); ++i) data_types[i] = arguments[i].type; + for (size_t i = 0; i < arguments.size(); ++i) { + data_types[i] = arguments[i].type; + } if (is_date_v2(return_type)) { auto function = FunctionCurrentDateOrDateTime< CurrentDateImpl>::create(); diff --git a/be/src/vec/functions/function_date_or_datetime_computation_v2.cpp b/be/src/vec/functions/function_date_or_datetime_computation_v2.cpp index ec9560456c131a5..db43bf1818d38f3 100644 --- a/be/src/vec/functions/function_date_or_datetime_computation_v2.cpp +++ b/be/src/vec/functions/function_date_or_datetime_computation_v2.cpp @@ -95,14 +95,14 @@ using FunctionDatetimeV2SubYears = FUNCTION_DATEV2_WITH_TWO_ARGS(NAME, IMPL, DataTypeDateTimeV2, DataTypeDateV2) \ FUNCTION_DATEV2_WITH_TWO_ARGS(NAME, IMPL, DataTypeDateV2, DataTypeDateTimeV2) \ FUNCTION_DATEV2_WITH_TWO_ARGS(NAME, IMPL, DataTypeDateV2, DataTypeDateV2) - +// these diff functions accept all v2 types. but for v1 only datetime. ALL_FUNCTION_DATEV2_WITH_TWO_ARGS(FunctionDatetimeV2DateDiff, DateDiffImpl) ALL_FUNCTION_DATEV2_WITH_TWO_ARGS(FunctionDatetimeV2TimeDiff, TimeDiffImpl) ALL_FUNCTION_DATEV2_WITH_TWO_ARGS(FunctionDatetimeV2YearsDiff, YearsDiffImpl) ALL_FUNCTION_DATEV2_WITH_TWO_ARGS(FunctionDatetimeV2MonthsDiff, MonthsDiffImpl) ALL_FUNCTION_DATEV2_WITH_TWO_ARGS(FunctionDatetimeV2WeeksDiff, WeeksDiffImpl) ALL_FUNCTION_DATEV2_WITH_TWO_ARGS(FunctionDatetimeV2HoursDiff, HoursDiffImpl) -ALL_FUNCTION_DATEV2_WITH_TWO_ARGS(FunctionDatetimeV2MinutesDiff, MintueSDiffImpl) +ALL_FUNCTION_DATEV2_WITH_TWO_ARGS(FunctionDatetimeV2MinutesDiff, MintuesDiffImpl) ALL_FUNCTION_DATEV2_WITH_TWO_ARGS(FunctionDatetimeV2SecondsDiff, SecondsDiffImpl) ALL_FUNCTION_DATEV2_WITH_TWO_ARGS(FunctionDatetimeV2DaysDiff, DaysDiffImpl) ALL_FUNCTION_DATEV2_WITH_TWO_ARGS(FunctionDatetimeV2MilliSecondsDiff, MilliSecondsDiffImpl) diff --git a/be/src/vec/functions/function_helpers.h b/be/src/vec/functions/function_helpers.h index 8c7eec28fe2f6f8..818badeee4551b7 100644 --- a/be/src/vec/functions/function_helpers.h +++ b/be/src/vec/functions/function_helpers.h @@ -20,10 +20,8 @@ #pragma once -#include - +#include #include -#include #include "vec/columns/column.h" #include "vec/columns/column_const.h" @@ -53,11 +51,15 @@ const Type* check_and_get_data_type(const IDataType* data_type) { template const ColumnConst* check_and_get_column_const(const IColumn* column) { - if (!column || !is_column_const(*column)) return {}; + if (!column || !is_column_const(*column)) { + return nullptr; + } - const ColumnConst* res = assert_cast(column); + const auto* res = assert_cast(column); - if (!check_column(&res->get_data_column())) return {}; + if (!check_column(&res->get_data_column())) { + return nullptr; + } return res; } @@ -66,7 +68,9 @@ template const Type* check_and_get_column_constData(const IColumn* column) { const ColumnConst* res = check_and_get_column_const(column); - if (!res) return {}; + if (!res) { + return nullptr; + } return static_cast(&res->get_data_column()); } diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index a3809bf8ec6a486..1c4c0906b80d3e9 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -69,7 +69,7 @@ using Row = std::pair; using DataSet = std::vector; using InputTypeSet = std::vector; -// FIXME: should use exception or expected to deal null value.w +// FIXME: should use exception or expected to deal null value. int64_t str_to_date_time(std::string datetime_str, bool data_time = true); uint32_t str_to_date_v2(std::string datetime_str, std::string datetime_format); uint64_t str_to_datetime_v2(std::string datetime_str, std::string datetime_format); diff --git a/be/test/vec/function/function_time_test.cpp b/be/test/vec/function/function_time_test.cpp index a4299de35576086..ddfc722c7ab452c 100644 --- a/be/test/vec/function/function_time_test.cpp +++ b/be/test/vec/function/function_time_test.cpp @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include @@ -299,14 +300,22 @@ TEST(VTimestampFunctionsTest, years_add_test) { InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32}; - DataSet data_set = { - {{std::string("2020-05-23 00:00:00"), 5}, str_to_date_time("2025-05-23 00:00:00")}, - {{std::string("2020-05-23 00:00:00"), -5}, str_to_date_time("2015-05-23 00:00:00")}, - {{std::string(""), 5}, Null()}, - {{std::string("2020-05-23 00:00:00"), 8000}, Null()}, - {{Null(), 5}, Null()}}; + { + DataSet data_set = { + {{std::string("2020-05-23 00:00:00"), 5}, str_to_date_time("2025-05-23 00:00:00")}, + {{std::string("2020-05-23 00:00:00"), -5}, str_to_date_time("2015-05-23 00:00:00")}, + {{std::string(""), 5}, Null()}, + {{Null(), 5}, Null()}}; - static_cast(check_function(func_name, input_types, data_set)); + static_cast(check_function(func_name, input_types, data_set)); + } + + { + DataSet data_set = {{{std::string("2020-05-23 00:00:00"), 8000}, Null()}}; + + EXPECT_ANY_THROW(static_cast( + check_function(func_name, input_types, data_set))); + } } TEST(VTimestampFunctionsTest, years_sub_test) { @@ -314,14 +323,22 @@ TEST(VTimestampFunctionsTest, years_sub_test) { InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32}; - DataSet data_set = { - {{std::string("2020-05-23 00:00:00"), 5}, str_to_date_time("2015-05-23 00:00:00")}, - {{std::string("2020-05-23 00:00:00"), -5}, str_to_date_time("2025-05-23 00:00:00")}, - {{std::string(""), 5}, Null()}, - {{std::string("2020-05-23 00:00:00"), 3000}, Null()}, - {{Null(), 5}, Null()}}; + { + DataSet data_set = { + {{std::string("2020-05-23 00:00:00"), 5}, str_to_date_time("2015-05-23 00:00:00")}, + {{std::string("2020-05-23 00:00:00"), -5}, str_to_date_time("2025-05-23 00:00:00")}, + {{std::string(""), 5}, Null()}, + {{Null(), 5}, Null()}}; - static_cast(check_function(func_name, input_types, data_set)); + static_cast(check_function(func_name, input_types, data_set)); + } + + { + DataSet data_set = {{{std::string("2020-05-23 00:00:00"), 3000}, Null()}}; + + EXPECT_ANY_THROW(static_cast( + check_function(func_name, input_types, data_set))); + } } TEST(VTimestampFunctionsTest, months_add_test) { @@ -1043,11 +1060,18 @@ TEST(VTimestampFunctionsTest, years_add_v2_test) { {{std::string("2020-05-23"), 5}, str_to_date_v2("2025-05-23", "%Y-%m-%d")}, {{std::string("2020-05-23"), -5}, str_to_date_v2("2015-05-23", "%Y-%m-%d")}, {{std::string(""), 5}, Null()}, - {{std::string("2020-05-23"), 8000}, Null()}, {{Null(), 5}, Null()}}; static_cast(check_function(func_name, input_types, data_set)); } + { + InputTypeSet input_types = {TypeIndex::DateV2, TypeIndex::Int32}; + + DataSet data_set = {{{std::string("2020-05-23"), 8000}, Null()}}; + + EXPECT_ANY_THROW(static_cast( + check_function(func_name, input_types, data_set))); + } { InputTypeSet input_types = {TypeIndex::DateTimeV2, TypeIndex::Int32}; @@ -1057,12 +1081,19 @@ TEST(VTimestampFunctionsTest, years_add_v2_test) { {{std::string("2020-05-23 00:00:11.123"), -5}, str_to_datetime_v2("2015-05-23 00:00:11.123", "%Y-%m-%d %H:%i:%s.%f")}, {{std::string(""), 5}, Null()}, - {{std::string("2020-05-23 00:00:11.123"), 8000}, Null()}, {{Null(), 5}, Null()}}; static_cast( check_function(func_name, input_types, data_set)); } + { + InputTypeSet input_types = {TypeIndex::DateTimeV2, TypeIndex::Int32}; + + DataSet data_set = {{{std::string("2020-05-23 00:00:11.123"), 8000}, Null()}}; + + EXPECT_ANY_THROW(static_cast( + check_function(func_name, input_types, data_set))); + } } TEST(VTimestampFunctionsTest, years_sub_v2_test) { @@ -1075,11 +1106,19 @@ TEST(VTimestampFunctionsTest, years_sub_v2_test) { {{std::string("2020-05-23"), 5}, str_to_date_v2("2015-05-23", "%Y-%m-%d")}, {{std::string("2020-05-23"), -5}, str_to_date_v2("2025-05-23", "%Y-%m-%d")}, {{std::string(""), 5}, Null()}, - {{std::string("2020-05-23"), 3000}, Null()}, {{Null(), 5}, Null()}}; static_cast(check_function(func_name, input_types, data_set)); } + { + InputTypeSet input_types = {TypeIndex::DateV2, TypeIndex::Int32}; + + DataSet data_set = {{{std::string("2020-05-23"), 3000}, Null()}}; + + EXPECT_ANY_THROW(static_cast( + check_function(func_name, input_types, data_set))); + } + { InputTypeSet input_types = {TypeIndex::DateTimeV2, TypeIndex::Int32}; @@ -1088,12 +1127,19 @@ TEST(VTimestampFunctionsTest, years_sub_v2_test) { {{std::string("2020-05-23 00:00:11.123"), -5}, str_to_datetime_v2("2025-05-23 00:00:11.123", "%Y-%m-%d %H:%i:%s.%f")}, {{std::string(""), 5}, Null()}, - {{std::string("2020-05-23 00:00:11.123"), 3000}, Null()}, {{Null(), 5}, Null()}}; static_cast( check_function(func_name, input_types, data_set)); } + { + InputTypeSet input_types = {TypeIndex::DateTimeV2, TypeIndex::Int32}; + + DataSet data_set = {{{std::string("2020-05-23 00:00:11.123"), 3000}, Null()}}; + + EXPECT_ANY_THROW(static_cast( + check_function(func_name, input_types, data_set))); + } } TEST(VTimestampFunctionsTest, months_add_v2_test) { diff --git a/regression-test/suites/correctness/test_date_function_const.groovy b/regression-test/suites/correctness/test_date_function_const.groovy index d1ba4db4e689877..e9bf11bd24ebd67 100644 --- a/regression-test/suites/correctness/test_date_function_const.groovy +++ b/regression-test/suites/correctness/test_date_function_const.groovy @@ -61,6 +61,6 @@ suite("test_date_function_const") { test { sql """select date_add("1900-01-01 12:00:00.123456", interval 10000000000 month);""" - exception "Operation months_add 133705200962757184 1410065408 out of range" + exception "Operation months_add of 1900-01-01 12:00:00.123456, 1410065408 out of range" } } diff --git a/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_or_datetime_computation_negative.groovy b/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_or_datetime_computation_negative.groovy index 282a28a903e4a02..53b7385b1535df7 100644 --- a/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_or_datetime_computation_negative.groovy +++ b/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_or_datetime_computation_negative.groovy @@ -14,6 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. + suite("test_date_or_datetime_computation_negative") { sql """ CREATE TABLE IF NOT EXISTS test_date_or_datetime_computation_negative ( `row_id` LARGEINT NOT NULL, @@ -50,8 +51,11 @@ suite("test_date_or_datetime_computation_negative") { sql """SELECT date_sub(datetime, interval 1 year) FROM test_date_or_datetime_computation_negative WHERE row_id=1;""" check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """SELECT date_sub(date_null, interval 1 year), date_sub(dateV2_null, interval 1 year), date_sub(datetime_null, interval 1 year) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_1 """SELECT date_sub(date_null, interval 1 year), date_sub(dateV2_null, interval 1 year), date_sub(datetime_null, interval 1 year) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" test { sql """SELECT date_sub(date, interval 1 month) FROM test_date_or_datetime_computation_negative WHERE row_id=1;""" @@ -65,8 +69,11 @@ suite("test_date_or_datetime_computation_negative") { sql """SELECT date_sub(datetime, interval 1 month) FROM test_date_or_datetime_computation_negative WHERE row_id=1;""" check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """SELECT date_sub(date_null, interval 1 month), date_sub(dateV2_null, interval 1 month), date_sub(datetime_null, interval 1 month) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_2 """SELECT date_sub(date_null, interval 1 month), date_sub(dateV2_null, interval 1 month), date_sub(datetime_null, interval 1 month) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" test { sql """ SELECT date_sub(date, interval 1 week) FROM test_date_or_datetime_computation_negative WHERE row_id=1;""" @@ -80,10 +87,12 @@ suite("test_date_or_datetime_computation_negative") { sql """ SELECT date_sub(datetime, interval 1 week) FROM test_date_or_datetime_computation_negative WHERE row_id=1; """ check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """SELECT date_sub(date_null, interval 1 week), date_sub(dateV2_null, interval 1 week), date_sub(datetime_null, interval 1 week) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_3 """SELECT date_sub(date_null, interval 1 week), date_sub(dateV2_null, interval 1 week), date_sub(datetime_null, interval 1 week) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" - test { sql """SELECT date_sub(date, interval 1 day) FROM test_date_or_datetime_computation_negative WHERE row_id=1;""" check {result, exception, startTime, endTime -> @@ -96,10 +105,12 @@ suite("test_date_or_datetime_computation_negative") { sql """SELECT date_sub(datetime, interval 1 day) FROM test_date_or_datetime_computation_negative WHERE row_id=1;""" check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """SELECT date_sub(date_null, interval 1 day), date_sub(dateV2_null, interval 1 day), date_sub(datetime_null, interval 1 day) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_4 """SELECT date_sub(date_null, interval 1 day), date_sub(dateV2_null, interval 1 day), date_sub(datetime_null, interval 1 day) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" - test { sql """SELECT date_sub(date, interval 1 hour) FROM test_date_or_datetime_computation_negative WHERE row_id=1;""" check {result, exception, startTime, endTime -> @@ -112,8 +123,11 @@ suite("test_date_or_datetime_computation_negative") { sql """SELECT date_sub(datetime, interval 1 hour) FROM test_date_or_datetime_computation_negative WHERE row_id=1;""" check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """ SELECT date_sub(date_null, interval 1 hour), date_sub(dateV2_null, interval 1 hour), date_sub(datetime_null, interval 1 hour) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_5 """ SELECT date_sub(date_null, interval 1 hour), date_sub(dateV2_null, interval 1 hour), date_sub(datetime_null, interval 1 hour) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" test { sql """SELECT date_sub(date, interval 1 minute) FROM test_date_or_datetime_computation_negative WHERE row_id=1;""" @@ -127,8 +141,11 @@ suite("test_date_or_datetime_computation_negative") { sql """SELECT date_sub(datetime, interval 1 minute) FROM test_date_or_datetime_computation_negative WHERE row_id=1;""" check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """SELECT date_sub(date_null, interval 1 minute), date_sub(dateV2_null, interval 1 minute), date_sub(datetime_null, interval 1 minute) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_6 """SELECT date_sub(date_null, interval 1 minute), date_sub(dateV2_null, interval 1 minute), date_sub(datetime_null, interval 1 minute) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" test { sql """SELECT date_sub(date, interval 1 second) FROM test_date_or_datetime_computation_negative WHERE row_id=1;""" @@ -142,8 +159,11 @@ suite("test_date_or_datetime_computation_negative") { sql """SELECT date_sub(datetime, interval 1 second) FROM test_date_or_datetime_computation_negative WHERE row_id=1;""" check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """SELECT date_sub(date_null, interval 1 second), date_sub(dateV2_null, interval 1 second), date_sub(datetime_null, interval 1 second) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_7 """SELECT date_sub(date_null, interval 1 second), date_sub(dateV2_null, interval 1 second), date_sub(datetime_null, interval 1 second) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" test { @@ -158,8 +178,11 @@ suite("test_date_or_datetime_computation_negative") { sql """SELECT date_add(datetime, interval 1 year) FROM test_date_or_datetime_computation_negative WHERE row_id=3;""" check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """SELECT date_add(date_null, interval 1 year), date_add(dateV2_null, interval 1 year), date_add(datetime_null, interval 1 year) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_8 """SELECT date_add(date_null, interval 1 year), date_add(dateV2_null, interval 1 year), date_add(datetime_null, interval 1 year) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" test { sql """SELECT date_add(date, interval 1 month) FROM test_date_or_datetime_computation_negative WHERE row_id=3;""" @@ -173,8 +196,11 @@ suite("test_date_or_datetime_computation_negative") { sql """SELECT date_add(datetime, interval 1 month) FROM test_date_or_datetime_computation_negative WHERE row_id=3;""" check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """SELECT date_add(date_null, interval 1 month), date_add(dateV2_null, interval 1 month), date_add(datetime_null, interval 1 month) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_9 """SELECT date_add(date_null, interval 1 month), date_add(dateV2_null, interval 1 month), date_add(datetime_null, interval 1 month) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" test { sql """ SELECT date_add(date, interval 1 week) FROM test_date_or_datetime_computation_negative WHERE row_id=3;""" @@ -188,10 +214,12 @@ suite("test_date_or_datetime_computation_negative") { sql """ SELECT date_add(datetime, interval 1 week) FROM test_date_or_datetime_computation_negative WHERE row_id=3; """ check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """SELECT date_add(date_null, interval 1 week), date_add(dateV2_null, interval 1 week), date_add(datetime_null, interval 1 week) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_10 """SELECT date_add(date_null, interval 1 week), date_add(dateV2_null, interval 1 week), date_add(datetime_null, interval 1 week) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" - test { sql """SELECT date_add(date, interval 1 day) FROM test_date_or_datetime_computation_negative WHERE row_id=3;""" check {result, exception, startTime, endTime -> @@ -204,10 +232,12 @@ suite("test_date_or_datetime_computation_negative") { sql """SELECT date_add(datetime, interval 1 day) FROM test_date_or_datetime_computation_negative WHERE row_id=3;""" check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """SELECT date_add(date_null, interval 1 day), date_add(dateV2_null, interval 1 day), date_add(datetime_null, interval 1 day) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_11 """SELECT date_add(date_null, interval 1 day), date_add(dateV2_null, interval 1 day), date_add(datetime_null, interval 1 day) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" - test { sql """SELECT date_add(date, interval 1 hour) FROM test_date_or_datetime_computation_negative WHERE row_id=3;""" check {result, exception, startTime, endTime -> @@ -220,8 +250,11 @@ suite("test_date_or_datetime_computation_negative") { sql """SELECT date_add(datetime, interval 1 hour) FROM test_date_or_datetime_computation_negative WHERE row_id=3;""" check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """ SELECT date_add(date_null, interval 1 hour), date_add(dateV2_null, interval 1 hour), date_add(datetime_null, interval 1 hour) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_12 """ SELECT date_add(date_null, interval 1 hour), date_add(dateV2_null, interval 1 hour), date_add(datetime_null, interval 1 hour) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" test { sql """SELECT date_add(date, interval 1 minute) FROM test_date_or_datetime_computation_negative WHERE row_id=3;""" @@ -235,8 +268,11 @@ suite("test_date_or_datetime_computation_negative") { sql """SELECT date_add(datetime, interval 1 minute) FROM test_date_or_datetime_computation_negative WHERE row_id=3;""" check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """SELECT date_add(date_null, interval 1 minute), date_add(dateV2_null, interval 1 minute), date_add(datetime_null, interval 1 minute) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_13 """SELECT date_add(date_null, interval 1 minute), date_add(dateV2_null, interval 1 minute), date_add(datetime_null, interval 1 minute) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" test { sql """SELECT date_add(date, interval 1 second) FROM test_date_or_datetime_computation_negative WHERE row_id=3;""" @@ -250,8 +286,11 @@ suite("test_date_or_datetime_computation_negative") { sql """SELECT date_add(datetime, interval 1 second) FROM test_date_or_datetime_computation_negative WHERE row_id=3;""" check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """SELECT date_add(date_null, interval 1 second), date_add(dateV2_null, interval 1 second), date_add(datetime_null, interval 1 second) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_14 """SELECT date_add(date_null, interval 1 second), date_add(dateV2_null, interval 1 second), date_add(datetime_null, interval 1 second) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" // TODO: // nagetive test for microseconds_add/milliseconds_add/seconds_add/minutes_add/hours_add/days_add/weeks_add/months_add/years_add @@ -268,8 +307,9 @@ suite("test_date_or_datetime_computation_negative") { sql """SELECT hours_add(datetime, 24) FROM test_date_or_datetime_computation_negative WHERE row_id = 3;""" check {result, exception, startTime, endTime -> assertTrue (exception != null)} + + sql """SELECT hours_add(date_null, 24), hours_add(dateV2_null, 24), hours_add(datetime_null, 24) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" + check {result, exception, startTime, endTime -> + assertTrue (exception != null)} } - qt_select_nullable_15 """SELECT hours_add(date_null, 24), hours_add(dateV2_null, 24), hours_add(datetime_null, 24) FROM test_date_or_datetime_computation_negative ORDER BY row_id;""" - - sql "DROP TABLE test_date_or_datetime_computation_negative" } From cd916c6a7cf6b7df2fd910bc6f5d123416715b36 Mon Sep 17 00:00:00 2001 From: Siyang Tang Date: Mon, 23 Dec 2024 12:25:46 +0800 Subject: [PATCH 49/55] [enhancement](tablet-meta) Avoid be coredump due to potential race condition when updating tablet cumu point (#45643) Currently, when setting tablet's cumu point, aseert fail will happend if new point is less than local value, resulting BE coredump. This could happend when race condition happend: 1. thread A try to sync rowset 2. thread A fetch cumu point from ms 3. thread B update cumu point(like sc/compaction),commit to ms after 2. and set be tablet cumu point before 4. 4. thread A try to set cumu point seen before and meet the assertion, coredump. --- be/src/cloud/cloud_tablet.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index c7d3170726b2d5f..4e351f7cfa51101 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -33,6 +33,7 @@ #include "cloud/cloud_meta_mgr.h" #include "cloud/cloud_storage_engine.h" #include "cloud/cloud_tablet_mgr.h" +#include "common/logging.h" #include "io/cache/block_file_cache_downloader.h" #include "io/cache/block_file_cache_factory.h" #include "olap/cumulative_compaction_time_series_policy.h" @@ -657,11 +658,14 @@ void CloudTablet::get_compaction_status(std::string* json_result) { } void CloudTablet::set_cumulative_layer_point(int64_t new_point) { + if (new_point == Tablet::K_INVALID_CUMULATIVE_POINT || new_point >= _cumulative_point) { + _cumulative_point = new_point; + return; + } // cumulative point should only be reset to -1, or be increased - CHECK(new_point == Tablet::K_INVALID_CUMULATIVE_POINT || new_point >= _cumulative_point) - << "Unexpected cumulative point: " << new_point - << ", origin: " << _cumulative_point.load(); - _cumulative_point = new_point; + // FIXME: could happen in currently unresolved race conditions + LOG(WARNING) << "Unexpected cumulative point: " << new_point + << ", origin: " << _cumulative_point.load(); } std::vector CloudTablet::pick_candidate_rowsets_to_base_compaction() { From cccc9bb8beddb271537d4434cab20dc1ad16dde8 Mon Sep 17 00:00:00 2001 From: "Mingyu Chen (Rayner)" Date: Mon, 23 Dec 2024 13:53:27 +0800 Subject: [PATCH 50/55] [opt](log) add more info in nereids timeout log (#45705) ### What problem does this PR solve? Add elapsed time in log --- .../doris/nereids/jobs/scheduler/SimpleJobScheduler.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/scheduler/SimpleJobScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/scheduler/SimpleJobScheduler.java index e31c92e3fe004cb..1354f895a3c07ae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/scheduler/SimpleJobScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/scheduler/SimpleJobScheduler.java @@ -34,11 +34,11 @@ public void executeJobPool(ScheduleContext scheduleContext) { CascadesContext context = (CascadesContext) scheduleContext; SessionVariable sessionVariable = context.getConnectContext().getSessionVariable(); while (!pool.isEmpty()) { + long elapsedS = context.getStatementContext().getStopwatch().elapsed(TimeUnit.MILLISECONDS) / 1000; if (sessionVariable.enableNereidsTimeout - && context.getStatementContext().getStopwatch().elapsed(TimeUnit.MILLISECONDS) - > sessionVariable.nereidsTimeoutSecond * 1000L) { - throw new AnalysisException( - "Nereids cost too much time ( > " + sessionVariable.nereidsTimeoutSecond + "s )"); + && elapsedS > sessionVariable.nereidsTimeoutSecond) { + throw new AnalysisException(String.format("Nereids cost too much time ( %ds > %ds", + elapsedS, sessionVariable.nereidsTimeoutSecond)); } Job job = pool.pop(); job.execute(); From a032ece525c1b4bc5e15826e6f54fc82d5ccbee9 Mon Sep 17 00:00:00 2001 From: zzzxl Date: Mon, 23 Dec 2024 14:52:17 +0800 Subject: [PATCH 51/55] [opt](inverted index) Add Inverted Index Cache Toggle (#45718) Problem Summary: 1. Adding an inverted index cache toggle can help with debugging. --- .../segment_v2/inverted_index_reader.cpp | 74 ++++++--- .../rowset/segment_v2/inverted_index_reader.h | 38 +++-- .../org/apache/doris/qe/SessionVariable.java | 16 ++ gensrc/thrift/PaloInternalService.thrift | 4 + .../test_inverted_index_cache.out | 22 +++ .../test_inverted_index_cache.groovy | 144 ++++++++++++++++++ 6 files changed, 260 insertions(+), 38 deletions(-) create mode 100644 regression-test/data/fault_injection_p0/test_inverted_index_cache.out create mode 100644 regression-test/suites/fault_injection_p0/test_inverted_index_cache.groovy diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index b40f91211252079..9790d7273e1bff2 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -164,16 +164,48 @@ Status InvertedIndexReader::read_null_bitmap(const io::IOContext* io_ctx, return Status::OK(); } +Status InvertedIndexReader::handle_query_cache(RuntimeState* runtime_state, + InvertedIndexQueryCache* cache, + const InvertedIndexQueryCache::CacheKey& cache_key, + InvertedIndexQueryCacheHandle* cache_handler, + OlapReaderStatistics* stats, + std::shared_ptr& bit_map) { + const auto& query_options = runtime_state->query_options(); + if (query_options.enable_inverted_index_query_cache && + cache->lookup(cache_key, cache_handler)) { + DBUG_EXECUTE_IF("InvertedIndexReader.handle_query_cache_hit", { + return Status::Error("handle query cache hit"); + }); + stats->inverted_index_query_cache_hit++; + SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer); + bit_map = cache_handler->get_bitmap(); + return Status::OK(); + } + DBUG_EXECUTE_IF("InvertedIndexReader.handle_query_cache_miss", { + return Status::Error("handle query cache miss"); + }); + stats->inverted_index_query_cache_miss++; + return Status::Error("cache miss"); +} + Status InvertedIndexReader::handle_searcher_cache( - InvertedIndexCacheHandle* inverted_index_cache_handle, const io::IOContext* io_ctx, - OlapReaderStatistics* stats) { + RuntimeState* runtime_state, InvertedIndexCacheHandle* inverted_index_cache_handle, + const io::IOContext* io_ctx, OlapReaderStatistics* stats) { auto index_file_key = _inverted_index_file_reader->get_index_file_cache_key(&_index_meta); InvertedIndexSearcherCache::CacheKey searcher_cache_key(index_file_key); - if (InvertedIndexSearcherCache::instance()->lookup(searcher_cache_key, + const auto& query_options = runtime_state->query_options(); + if (query_options.enable_inverted_index_searcher_cache && + InvertedIndexSearcherCache::instance()->lookup(searcher_cache_key, inverted_index_cache_handle)) { + DBUG_EXECUTE_IF("InvertedIndexReader.handle_searcher_cache_hit", { + return Status::Error("handle searcher cache hit"); + }); stats->inverted_index_searcher_cache_hit++; return Status::OK(); } else { + DBUG_EXECUTE_IF("InvertedIndexReader.handle_searcher_cache_miss", { + return Status::Error("handle searcher cache miss"); + }); // searcher cache miss stats->inverted_index_searcher_cache_miss++; auto mem_tracker = std::make_unique("InvertedIndexSearcherCacheWithRead"); @@ -311,14 +343,16 @@ Status FullTextIndexReader::query(const io::IOContext* io_ctx, OlapReaderStatist InvertedIndexQueryCacheHandle cache_handler; std::shared_ptr term_match_bitmap = nullptr; - auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, bit_map); + auto cache_status = + handle_query_cache(runtime_state, cache, cache_key, &cache_handler, stats, bit_map); if (cache_status.ok()) { return Status::OK(); } FulltextIndexSearcherPtr* searcher_ptr = nullptr; InvertedIndexCacheHandle inverted_index_cache_handle; - RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, io_ctx, stats)); + RETURN_IF_ERROR( + handle_searcher_cache(runtime_state, &inverted_index_cache_handle, io_ctx, stats)); auto searcher_variant = inverted_index_cache_handle.get_index_searcher(); searcher_ptr = std::get_if(&searcher_variant); if (searcher_ptr != nullptr) { @@ -379,7 +413,8 @@ Status StringTypeInvertedIndexReader::query(const io::IOContext* io_ctx, search_str}; auto* cache = InvertedIndexQueryCache::instance(); InvertedIndexQueryCacheHandle cache_handler; - auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, bit_map); + auto cache_status = + handle_query_cache(runtime_state, cache, cache_key, &cache_handler, stats, bit_map); if (cache_status.ok()) { return Status::OK(); } @@ -393,7 +428,8 @@ Status StringTypeInvertedIndexReader::query(const io::IOContext* io_ctx, auto result = std::make_shared(); FulltextIndexSearcherPtr* searcher_ptr = nullptr; InvertedIndexCacheHandle inverted_index_cache_handle; - RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, io_ctx, stats)); + RETURN_IF_ERROR( + handle_searcher_cache(runtime_state, &inverted_index_cache_handle, io_ctx, stats)); auto searcher_variant = inverted_index_cache_handle.get_index_searcher(); searcher_ptr = std::get_if(&searcher_variant); if (searcher_ptr != nullptr) { @@ -609,11 +645,12 @@ Status BkdIndexReader::invoke_bkd_query(const void* query_value, InvertedIndexQu } Status BkdIndexReader::try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, - const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, uint32_t* count) { + RuntimeState* runtime_state, const std::string& column_name, + const void* query_value, InvertedIndexQueryType query_type, + uint32_t* count) { try { std::shared_ptr r; - auto st = get_bkd_reader(r, io_ctx, stats); + auto st = get_bkd_reader(r, io_ctx, stats, runtime_state); if (!st.ok()) { LOG(WARNING) << "get bkd reader for " << _inverted_index_file_reader->get_index_file_path(&_index_meta) @@ -629,7 +666,8 @@ Status BkdIndexReader::try_query(const io::IOContext* io_ctx, OlapReaderStatisti auto* cache = InvertedIndexQueryCache::instance(); InvertedIndexQueryCacheHandle cache_handler; std::shared_ptr bit_map; - auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, bit_map); + auto cache_status = + handle_query_cache(runtime_state, cache, cache_key, &cache_handler, stats, bit_map); if (cache_status.ok()) { *count = bit_map->cardinality(); return Status::OK(); @@ -653,7 +691,7 @@ Status BkdIndexReader::query(const io::IOContext* io_ctx, OlapReaderStatistics* try { std::shared_ptr r; - auto st = get_bkd_reader(r, io_ctx, stats); + auto st = get_bkd_reader(r, io_ctx, stats, runtime_state); if (!st.ok()) { LOG(WARNING) << "get bkd reader for " << _inverted_index_file_reader->get_index_file_path(&_index_meta) @@ -668,7 +706,8 @@ Status BkdIndexReader::query(const io::IOContext* io_ctx, OlapReaderStatistics* query_str}; auto* cache = InvertedIndexQueryCache::instance(); InvertedIndexQueryCacheHandle cache_handler; - auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, bit_map); + auto cache_status = + handle_query_cache(runtime_state, cache, cache_key, &cache_handler, stats, bit_map); if (cache_status.ok()) { return Status::OK(); } @@ -690,10 +729,11 @@ Status BkdIndexReader::query(const io::IOContext* io_ctx, OlapReaderStatistics* } Status BkdIndexReader::get_bkd_reader(BKDIndexSearcherPtr& bkd_reader, const io::IOContext* io_ctx, - OlapReaderStatistics* stats) { + OlapReaderStatistics* stats, RuntimeState* runtime_state) { BKDIndexSearcherPtr* bkd_searcher = nullptr; InvertedIndexCacheHandle inverted_index_cache_handle; - RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, io_ctx, stats)); + RETURN_IF_ERROR( + handle_searcher_cache(runtime_state, &inverted_index_cache_handle, io_ctx, stats)); auto searcher_variant = inverted_index_cache_handle.get_index_searcher(); bkd_searcher = std::get_if(&searcher_variant); if (bkd_searcher) { @@ -1138,8 +1178,8 @@ Status InvertedIndexIterator::try_read_from_inverted_index(const std::string& co query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY || query_type == InvertedIndexQueryType::LESS_THAN_QUERY || query_type == InvertedIndexQueryType::EQUAL_QUERY) { - RETURN_IF_ERROR( - _reader->try_query(&_io_ctx, _stats, column_name, query_value, query_type, count)); + RETURN_IF_ERROR(_reader->try_query(&_io_ctx, _stats, _runtime_state, column_name, + query_value, query_type, count)); } return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index a14456032866191..bbd148fae5250d8 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -190,8 +190,9 @@ class InvertedIndexReader : public std::enable_shared_from_this& bit_map) = 0; virtual Status try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, - const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, uint32_t* count) = 0; + RuntimeState* runtime_state, const std::string& column_name, + const void* query_value, InvertedIndexQueryType query_type, + uint32_t* count) = 0; Status read_null_bitmap(const io::IOContext* io_ctx, OlapReaderStatistics* stats, InvertedIndexQueryCacheHandle* cache_handle, @@ -208,22 +209,14 @@ class InvertedIndexReader : public std::enable_shared_from_this& bit_map) { - if (cache->lookup(cache_key, cache_handler)) { - stats->inverted_index_query_cache_hit++; - SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer); - bit_map = cache_handler->get_bitmap(); - return Status::OK(); - } - stats->inverted_index_query_cache_miss++; - return Status::Error("cache miss"); - } + std::shared_ptr& bit_map); - virtual Status handle_searcher_cache(InvertedIndexCacheHandle* inverted_index_cache_handle, + virtual Status handle_searcher_cache(RuntimeState* runtime_state, + InvertedIndexCacheHandle* inverted_index_cache_handle, const io::IOContext* io_ctx, OlapReaderStatistics* stats); std::string get_index_file_path(); static Status create_index_searcher(lucene::store::Directory* dir, IndexSearcherPtr* searcher, @@ -262,8 +255,9 @@ class FullTextIndexReader : public InvertedIndexReader { const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) override; Status try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, - const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, uint32_t* count) override { + RuntimeState* runtime_state, const std::string& column_name, + const void* query_value, InvertedIndexQueryType query_type, + uint32_t* count) override { return Status::Error( "FullTextIndexReader not support try_query"); } @@ -289,8 +283,9 @@ class StringTypeInvertedIndexReader : public InvertedIndexReader { const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) override; Status try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, - const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, uint32_t* count) override { + RuntimeState* runtime_state, const std::string& column_name, + const void* query_value, InvertedIndexQueryType query_type, + uint32_t* count) override { return Status::Error( "StringTypeInvertedIndexReader not support try_query"); } @@ -350,8 +345,9 @@ class BkdIndexReader : public InvertedIndexReader { const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) override; Status try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, - const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, uint32_t* count) override; + RuntimeState* runtime_state, const std::string& column_name, + const void* query_value, InvertedIndexQueryType query_type, + uint32_t* count) override; Status invoke_bkd_try_query(const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr r, uint32_t* count); Status invoke_bkd_query(const void* query_value, InvertedIndexQueryType query_type, @@ -364,7 +360,7 @@ class BkdIndexReader : public InvertedIndexReader { InvertedIndexReaderType type() override; Status get_bkd_reader(BKDIndexSearcherPtr& reader, const io::IOContext* io_ctx, - OlapReaderStatistics* stats); + OlapReaderStatistics* stats, RuntimeState* runtime_state); private: const TypeInfo* _type_info {}; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 75f21c786b8c379..cf26cce7383e1c2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -679,6 +679,8 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_MATCH_WITHOUT_INVERTED_INDEX = "enable_match_without_inverted_index"; public static final String ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX = "enable_fallback_on_missing_inverted_index"; + public static final String ENABLE_INVERTED_INDEX_SEARCHER_CACHE = "enable_inverted_index_searcher_cache"; + public static final String ENABLE_INVERTED_INDEX_QUERY_CACHE = "enable_inverted_index_query_cache"; public static final String IN_LIST_VALUE_COUNT_THRESHOLD = "in_list_value_count_threshold"; @@ -2304,6 +2306,18 @@ public void setIgnoreShapePlanNodes(String ignoreShapePlanNodes) { }) public boolean enableFallbackOnMissingInvertedIndex = true; + @VariableMgr.VarAttr(name = ENABLE_INVERTED_INDEX_SEARCHER_CACHE, description = { + "开启后会缓存倒排索引searcher", + "Enabling this will cache the inverted index searcher." + }) + public boolean enableInvertedIndexSearcherCache = true; + + @VariableMgr.VarAttr(name = ENABLE_INVERTED_INDEX_QUERY_CACHE, description = { + "开启后会缓存倒排索引查询结果", + "Enabling this will cache the results of inverted index queries." + }) + public boolean enableInvertedIndexQueryCache = true; + @VariableMgr.VarAttr(name = IN_LIST_VALUE_COUNT_THRESHOLD, description = { "in条件value数量大于这个threshold后将不会走fast_execute", "When the number of values in the IN condition exceeds this threshold," @@ -3990,6 +4004,8 @@ public TQueryOptions toThrift() { tResult.setEnableMatchWithoutInvertedIndex(enableMatchWithoutInvertedIndex); tResult.setEnableFallbackOnMissingInvertedIndex(enableFallbackOnMissingInvertedIndex); + tResult.setEnableInvertedIndexSearcherCache(enableInvertedIndexSearcherCache); + tResult.setEnableInvertedIndexQueryCache(enableInvertedIndexQueryCache); tResult.setHiveOrcUseColumnNames(hiveOrcUseColumnNames); tResult.setHiveParquetUseColumnNames(hiveParquetUseColumnNames); tResult.setKeepCarriageReturn(keepCarriageReturn); diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 0a1ea4a98fca940..f4d367659e48924 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -360,6 +360,10 @@ struct TQueryOptions { 141: optional bool ignore_runtime_filter_error = false; 142: optional bool enable_fixed_len_to_uint32_v2 = false; 143: optional bool enable_shared_exchange_sink_buffer = true; + + 144: optional bool enable_inverted_index_searcher_cache = true; + 145: optional bool enable_inverted_index_query_cache = true; + // For cloud, to control if the content would be written into file cache // In write path, to control if the content would be written into file cache. // In read path, read from file cache or remote storage when execute query. diff --git a/regression-test/data/fault_injection_p0/test_inverted_index_cache.out b/regression-test/data/fault_injection_p0/test_inverted_index_cache.out new file mode 100644 index 000000000000000..7d166b8b78d5d30 --- /dev/null +++ b/regression-test/data/fault_injection_p0/test_inverted_index_cache.out @@ -0,0 +1,22 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +863 + +-- !sql -- +863 + +-- !sql -- +863 + +-- !sql -- +863 + +-- !sql -- +350 + +-- !sql -- +863 + +-- !sql -- +350 + diff --git a/regression-test/suites/fault_injection_p0/test_inverted_index_cache.groovy b/regression-test/suites/fault_injection_p0/test_inverted_index_cache.groovy new file mode 100644 index 000000000000000..fd250a7d4fd528d --- /dev/null +++ b/regression-test/suites/fault_injection_p0/test_inverted_index_cache.groovy @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_inverted_index_cache", "nonConcurrent") { + // define a sql table + def indexTbName = "test_inverted_index_cache" + + sql "DROP TABLE IF EXISTS ${indexTbName}" + sql """ + CREATE TABLE ${indexTbName} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` varchar(20) NULL COMMENT "", + `request` text NULL COMMENT "", + `status` int(11) NULL COMMENT "", + `size` int(11) NULL COMMENT "", + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false, + expected_succ_rows = -1, load_to_single_tablet = 'true' -> + + // load the json data + streamLoad { + table "${table_name}" + + // set http request header params + set 'label', label + "_" + UUID.randomUUID().toString() + set 'read_json_by_line', read_flag + set 'format', format_flag + file file_name // import json file + time 10000 // limit inflight 10s + if (expected_succ_rows >= 0) { + set 'max_filter_ratio', '1' + } + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (ignore_failure && expected_succ_rows < 0) { return } + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + } + } + } + + load_httplogs_data.call(indexTbName, 'test_index_inlist_fault_injection', 'true', 'json', 'documents-1000.json') + sql "sync" + + qt_sql """ select count() from ${indexTbName} where (request match 'images'); """ + + // query cache hit + // searcher cache hit + try { + sql """ set enable_inverted_index_query_cache = true """ + sql """ set enable_inverted_index_searcher_cache = true """ + + GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexReader.handle_query_cache_miss") + GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexReader.handle_searcher_cache_miss") + + qt_sql """ select count() from ${indexTbName} where (request match 'images'); """ + + } finally { + GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexReader.handle_query_cache_miss") + GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexReader.handle_searcher_cache_miss") + } + + // query cache miss + // searcher cache hit + try { + sql """ set enable_inverted_index_query_cache = false """ + sql """ set enable_inverted_index_searcher_cache = true """ + + GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexReader.handle_query_cache_hit") + GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexReader.handle_searcher_cache_miss") + + qt_sql """ select count() from ${indexTbName} where (request match 'images'); """ + + } finally { + GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexReader.handle_query_cache_hit") + GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexReader.handle_searcher_cache_miss") + } + + // query cache hit + // searcher cache miss + try { + sql """ set enable_inverted_index_query_cache = true """ + sql """ set enable_inverted_index_searcher_cache = false """ + + GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexReader.handle_query_cache_miss") + GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexReader.handle_searcher_cache_hit") + + qt_sql """ select count() from ${indexTbName} where (request match 'images'); """ + qt_sql """ select count() from ${indexTbName} where (request match 'english'); """ + + } finally { + GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexReader.handle_query_cache_miss") + GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexReader.handle_searcher_cache_hit") + } + + // query cache miss + // searcher cache miss + try { + sql """ set enable_inverted_index_query_cache = false """ + sql """ set enable_inverted_index_searcher_cache = false """ + + GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexReader.handle_query_cache_hit") + GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexReader.handle_searcher_cache_hit") + + qt_sql """ select count() from ${indexTbName} where (request match 'images'); """ + qt_sql """ select count() from ${indexTbName} where (request match 'english'); """ + + } finally { + GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexReader.handle_query_cache_hit") + GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexReader.handle_searcher_cache_hit") + } + + sql """ set enable_inverted_index_query_cache = true """ + sql """ set enable_inverted_index_searcher_cache = true """ +} \ No newline at end of file From e09bc04bce094e16481c55e73294b03995761de8 Mon Sep 17 00:00:00 2001 From: yujun Date: Mon, 23 Dec 2024 16:08:35 +0800 Subject: [PATCH 52/55] [test](nereids) add test simplify comparison predicate (#44886) ### What problem does this PR solve? Add test simplify comparison predicate --- .../SimplifyComparisonPredicateTest.java | 191 +++++++++++++++++- .../test_simplify_comparison_predicate.groovy | 170 ++++++++++++++++ 2 files changed, 360 insertions(+), 1 deletion(-) create mode 100644 regression-test/suites/nereids_rules_p0/expression/test_simplify_comparison_predicate.groovy diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicateTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicateTest.java index bab3b4385137e89..9a36fb59b9f18d3 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicateTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicateTest.java @@ -40,6 +40,7 @@ import org.apache.doris.nereids.trees.expressions.literal.DateV2Literal; import org.apache.doris.nereids.trees.expressions.literal.DecimalV3Literal; import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral; +import org.apache.doris.nereids.trees.expressions.literal.FloatLiteral; import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral; import org.apache.doris.nereids.trees.expressions.literal.LargeIntLiteral; import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; @@ -54,6 +55,7 @@ import org.apache.doris.nereids.types.DateV2Type; import org.apache.doris.nereids.types.DecimalV3Type; import org.apache.doris.nereids.types.DoubleType; +import org.apache.doris.nereids.types.FloatType; import org.apache.doris.nereids.types.IntegerType; import org.apache.doris.nereids.types.SmallIntType; import org.apache.doris.nereids.types.TinyIntType; @@ -296,10 +298,197 @@ void testDoubleLiteral() { Expression rewrittenExpression = executor.rewrite(expression, context); Assertions.assertEquals(left.child(0).getDataType(), rewrittenExpression.child(1).getDataType()); Assertions.assertEquals(rewrittenExpression.child(0).getDataType(), rewrittenExpression.child(1).getDataType()); + + Expression tinyIntSlot = new SlotReference("a", TinyIntType.INSTANCE); + Expression smallIntSlot = new SlotReference("a", SmallIntType.INSTANCE); + Expression intSlot = new SlotReference("a", IntegerType.INSTANCE); + Expression bigIntSlot = new SlotReference("a", BigIntType.INSTANCE); + + // tiny int, literal not exceeds data type limit + assertRewrite(new EqualTo(new Cast(tinyIntSlot, FloatType.INSTANCE), new FloatLiteral(12.0f)), + new EqualTo(tinyIntSlot, new TinyIntLiteral((byte) 12))); + assertRewrite(new EqualTo(new Cast(tinyIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.0f)), + new EqualTo(tinyIntSlot, new TinyIntLiteral((byte) 12))); + assertRewrite(new EqualTo(new Cast(tinyIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + ExpressionUtils.falseOrNull(tinyIntSlot)); + assertRewrite(new NullSafeEqual(new Cast(tinyIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + BooleanLiteral.FALSE); + assertRewrite(new GreaterThan(new Cast(tinyIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new GreaterThan(tinyIntSlot, new TinyIntLiteral((byte) 12))); + assertRewrite(new GreaterThanEqual(new Cast(tinyIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new GreaterThanEqual(tinyIntSlot, new TinyIntLiteral((byte) 13))); + assertRewrite(new LessThan(new Cast(tinyIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new LessThan(tinyIntSlot, new TinyIntLiteral((byte) 13))); + assertRewrite(new LessThanEqual(new Cast(tinyIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new LessThanEqual(tinyIntSlot, new TinyIntLiteral((byte) 12))); + + // tiny int, literal exceeds data type limit + assertRewrite(new EqualTo(new Cast(tinyIntSlot, FloatType.INSTANCE), new FloatLiteral(200.0f)), + ExpressionUtils.falseOrNull(tinyIntSlot)); + assertRewrite(new EqualTo(new Cast(tinyIntSlot, DoubleType.INSTANCE), new DoubleLiteral(200.0f)), + ExpressionUtils.falseOrNull(tinyIntSlot)); + assertRewrite(new EqualTo(new Cast(tinyIntSlot, DoubleType.INSTANCE), new DoubleLiteral(200.3f)), + ExpressionUtils.falseOrNull(tinyIntSlot)); + assertRewrite(new NullSafeEqual(new Cast(tinyIntSlot, DoubleType.INSTANCE), new DoubleLiteral(200.3f)), + BooleanLiteral.FALSE); + assertRewrite(new GreaterThan(new Cast(tinyIntSlot, DoubleType.INSTANCE), new DoubleLiteral(200.3f)), + ExpressionUtils.falseOrNull(tinyIntSlot)); + assertRewrite(new GreaterThanEqual(new Cast(tinyIntSlot, DoubleType.INSTANCE), new DoubleLiteral(200.3f)), + ExpressionUtils.falseOrNull(tinyIntSlot)); + assertRewrite(new LessThan(new Cast(tinyIntSlot, DoubleType.INSTANCE), new DoubleLiteral(200.3f)), + ExpressionUtils.trueOrNull(tinyIntSlot)); + assertRewrite(new LessThanEqual(new Cast(tinyIntSlot, DoubleType.INSTANCE), new DoubleLiteral(200.3f)), + ExpressionUtils.trueOrNull(tinyIntSlot)); + + // small int + assertRewrite(new EqualTo(new Cast(smallIntSlot, FloatType.INSTANCE), new FloatLiteral(12.0f)), + new EqualTo(smallIntSlot, new SmallIntLiteral((short) 12))); + assertRewrite(new EqualTo(new Cast(smallIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.0f)), + new EqualTo(smallIntSlot, new SmallIntLiteral((short) 12))); + assertRewrite(new EqualTo(new Cast(smallIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + ExpressionUtils.falseOrNull(smallIntSlot)); + assertRewrite(new NullSafeEqual(new Cast(smallIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + BooleanLiteral.FALSE); + assertRewrite(new GreaterThan(new Cast(smallIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new GreaterThan(smallIntSlot, new SmallIntLiteral((short) 12))); + assertRewrite(new GreaterThanEqual(new Cast(smallIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new GreaterThanEqual(smallIntSlot, new SmallIntLiteral((short) 13))); + assertRewrite(new LessThan(new Cast(smallIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new LessThan(smallIntSlot, new SmallIntLiteral((short) 13))); + assertRewrite(new LessThanEqual(new Cast(smallIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new LessThanEqual(smallIntSlot, new SmallIntLiteral((short) 12))); + + // int + assertRewrite(new EqualTo(new Cast(intSlot, FloatType.INSTANCE), new FloatLiteral(12.0f)), + new EqualTo(intSlot, new IntegerLiteral(12))); + assertRewrite(new EqualTo(new Cast(intSlot, DoubleType.INSTANCE), new DoubleLiteral(12.0f)), + new EqualTo(intSlot, new IntegerLiteral(12))); + assertRewrite(new EqualTo(new Cast(intSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + ExpressionUtils.falseOrNull(intSlot)); + assertRewrite(new NullSafeEqual(new Cast(intSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + BooleanLiteral.FALSE); + assertRewrite(new GreaterThan(new Cast(intSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new GreaterThan(intSlot, new IntegerLiteral(12))); + assertRewrite(new GreaterThanEqual(new Cast(intSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new GreaterThanEqual(intSlot, new IntegerLiteral(13))); + assertRewrite(new LessThan(new Cast(intSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new LessThan(intSlot, new IntegerLiteral(13))); + assertRewrite(new LessThanEqual(new Cast(intSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new LessThanEqual(intSlot, new IntegerLiteral(12))); + + // big int + assertRewrite(new EqualTo(new Cast(bigIntSlot, FloatType.INSTANCE), new FloatLiteral(12.0f)), + new EqualTo(bigIntSlot, new BigIntLiteral(12L))); + assertRewrite(new EqualTo(new Cast(bigIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.0f)), + new EqualTo(bigIntSlot, new BigIntLiteral(12L))); + assertRewrite(new EqualTo(new Cast(bigIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + ExpressionUtils.falseOrNull(bigIntSlot)); + assertRewrite(new NullSafeEqual(new Cast(bigIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + BooleanLiteral.FALSE); + assertRewrite(new GreaterThan(new Cast(bigIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new GreaterThan(bigIntSlot, new BigIntLiteral(12L))); + assertRewrite(new GreaterThanEqual(new Cast(bigIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new GreaterThanEqual(bigIntSlot, new BigIntLiteral(13L))); + assertRewrite(new LessThan(new Cast(bigIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new LessThan(bigIntSlot, new BigIntLiteral(13L))); + assertRewrite(new LessThanEqual(new Cast(bigIntSlot, DoubleType.INSTANCE), new DoubleLiteral(12.3f)), + new LessThanEqual(bigIntSlot, new BigIntLiteral(12L))); + } + + @Test + void testIntCmpDecimalV3Literal() { + executor = new ExpressionRuleExecutor(ImmutableList.of( + bottomUp(SimplifyComparisonPredicate.INSTANCE) + )); + + Expression tinyIntSlot = new SlotReference("a", TinyIntType.INSTANCE); + Expression smallIntSlot = new SlotReference("a", SmallIntType.INSTANCE); + Expression intSlot = new SlotReference("a", IntegerType.INSTANCE); + Expression bigIntSlot = new SlotReference("a", BigIntType.INSTANCE); + + // tiny int, literal not exceeds data type limit + assertRewrite(new EqualTo(new Cast(tinyIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.0"))), + new EqualTo(tinyIntSlot, new TinyIntLiteral((byte) 12))); + assertRewrite(new EqualTo(new Cast(tinyIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + ExpressionUtils.falseOrNull(tinyIntSlot)); + assertRewrite(new NullSafeEqual(new Cast(tinyIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + BooleanLiteral.FALSE); + assertRewrite(new GreaterThan(new Cast(tinyIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new GreaterThan(tinyIntSlot, new TinyIntLiteral((byte) 12))); + assertRewrite(new GreaterThanEqual(new Cast(tinyIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new GreaterThanEqual(tinyIntSlot, new TinyIntLiteral((byte) 13))); + assertRewrite(new LessThan(new Cast(tinyIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new LessThan(tinyIntSlot, new TinyIntLiteral((byte) 13))); + assertRewrite(new LessThanEqual(new Cast(tinyIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new LessThanEqual(tinyIntSlot, new TinyIntLiteral((byte) 12))); + + // tiny int, literal exceeds data type limit + assertRewrite(new EqualTo(new Cast(tinyIntSlot, DecimalV3Type.createDecimalV3Type(4, 1)), new DecimalV3Literal(new BigDecimal("200.0"))), + ExpressionUtils.falseOrNull(tinyIntSlot)); + assertRewrite(new EqualTo(new Cast(tinyIntSlot, DecimalV3Type.createDecimalV3Type(4, 1)), new DecimalV3Literal(new BigDecimal("200.3"))), + ExpressionUtils.falseOrNull(tinyIntSlot)); + assertRewrite(new NullSafeEqual(new Cast(tinyIntSlot, DecimalV3Type.createDecimalV3Type(4, 1)), new DecimalV3Literal(new BigDecimal("200.3"))), + BooleanLiteral.FALSE); + assertRewrite(new GreaterThan(new Cast(tinyIntSlot, DecimalV3Type.createDecimalV3Type(4, 1)), new DecimalV3Literal(new BigDecimal("200.3"))), + ExpressionUtils.falseOrNull(tinyIntSlot)); + assertRewrite(new GreaterThanEqual(new Cast(tinyIntSlot, DecimalV3Type.createDecimalV3Type(4, 1)), new DecimalV3Literal(new BigDecimal("200.3"))), + ExpressionUtils.falseOrNull(tinyIntSlot)); + assertRewrite(new LessThan(new Cast(tinyIntSlot, DecimalV3Type.createDecimalV3Type(4, 1)), new DecimalV3Literal(new BigDecimal("200.3"))), + ExpressionUtils.trueOrNull(tinyIntSlot)); + assertRewrite(new LessThanEqual(new Cast(tinyIntSlot, DecimalV3Type.createDecimalV3Type(4, 1)), new DecimalV3Literal(new BigDecimal("200.3"))), + ExpressionUtils.trueOrNull(tinyIntSlot)); + + // small int + assertRewrite(new EqualTo(new Cast(smallIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.0"))), + new EqualTo(smallIntSlot, new SmallIntLiteral((short) 12))); + assertRewrite(new EqualTo(new Cast(smallIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + ExpressionUtils.falseOrNull(smallIntSlot)); + assertRewrite(new NullSafeEqual(new Cast(smallIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + BooleanLiteral.FALSE); + assertRewrite(new GreaterThan(new Cast(smallIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new GreaterThan(smallIntSlot, new SmallIntLiteral((short) 12))); + assertRewrite(new GreaterThanEqual(new Cast(smallIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new GreaterThanEqual(smallIntSlot, new SmallIntLiteral((short) 13))); + assertRewrite(new LessThan(new Cast(smallIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new LessThan(smallIntSlot, new SmallIntLiteral((short) 13))); + assertRewrite(new LessThanEqual(new Cast(smallIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new LessThanEqual(smallIntSlot, new SmallIntLiteral((short) 12))); + + // int + assertRewrite(new EqualTo(new Cast(intSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.0"))), + new EqualTo(intSlot, new IntegerLiteral(12))); + assertRewrite(new EqualTo(new Cast(intSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + ExpressionUtils.falseOrNull(intSlot)); + assertRewrite(new NullSafeEqual(new Cast(intSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + BooleanLiteral.FALSE); + assertRewrite(new GreaterThan(new Cast(intSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new GreaterThan(intSlot, new IntegerLiteral(12))); + assertRewrite(new GreaterThanEqual(new Cast(intSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new GreaterThanEqual(intSlot, new IntegerLiteral(13))); + assertRewrite(new LessThan(new Cast(intSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new LessThan(intSlot, new IntegerLiteral(13))); + assertRewrite(new LessThanEqual(new Cast(intSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new LessThanEqual(intSlot, new IntegerLiteral(12))); + + // big int + assertRewrite(new EqualTo(new Cast(bigIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.0"))), + new EqualTo(bigIntSlot, new BigIntLiteral(12L))); + assertRewrite(new EqualTo(new Cast(bigIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + ExpressionUtils.falseOrNull(bigIntSlot)); + assertRewrite(new NullSafeEqual(new Cast(bigIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + BooleanLiteral.FALSE); + assertRewrite(new GreaterThan(new Cast(bigIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new GreaterThan(bigIntSlot, new BigIntLiteral(12L))); + assertRewrite(new GreaterThanEqual(new Cast(bigIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new GreaterThanEqual(bigIntSlot, new BigIntLiteral(13L))); + assertRewrite(new LessThan(new Cast(bigIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new LessThan(bigIntSlot, new BigIntLiteral(13L))); + assertRewrite(new LessThanEqual(new Cast(bigIntSlot, DecimalV3Type.createDecimalV3Type(3, 1)), new DecimalV3Literal(new BigDecimal("12.3"))), + new LessThanEqual(bigIntSlot, new BigIntLiteral(12L))); } @Test - void testDecimalV3Literal() { + void testDecimalCmpDecimalV3Literal() { executor = new ExpressionRuleExecutor(ImmutableList.of( bottomUp(SimplifyComparisonPredicate.INSTANCE) )); diff --git a/regression-test/suites/nereids_rules_p0/expression/test_simplify_comparison_predicate.groovy b/regression-test/suites/nereids_rules_p0/expression/test_simplify_comparison_predicate.groovy new file mode 100644 index 000000000000000..af975aeeaa22e7d --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/expression/test_simplify_comparison_predicate.groovy @@ -0,0 +1,170 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// TODO: date datetime comparison still has bug, need fix. +suite('test_simplify_comparison_predicate', 'nonConcurrent') { + def tbl = 'test_simplify_comparison_predicate_tbl' + def checkExplain = { expression, resExpression -> + def checker = { explainString, exception, startTime, endTime -> + assertNull(exception) + def foundOutputExprs = false + def succ = false + for (def line : explainString.split('\n')) { + if (foundOutputExprs) { + assertTrue(line.contains(resExpression), "'${line}' no contains '${resExpression}'") + succ = true + break + } + if (line.contains('OUTPUT EXPRS:')) { + foundOutputExprs = true + } + } + assertTrue(foundOutputExprs) + assertTrue(succ) + } + + explain { + sql "SELECT ${expression} FROM ${tbl}" + check checker + } + } + def testSimplify = { checkNullColumn, checkNotNullColumn, expression, resExpression -> + def types = [''] + def column = '' + if (expression.contains('{int_like_column}')) { + column = '{int_like_column}' + types = ['tinyint', 'smallint', 'int', 'bigint'] + } else if (expression.contains('{decimal_column}')) { + column = '{decimal_column}' + types = ['decimal_3_0', 'decimal_5_2'] + } else if (expression.contains('{date_column}')) { + column = '{date_column}' + types = ['date', 'datev1'] + } else if (expression.contains('{datetime_column}')) { + column = '{datetime_column}' + types = ['datetime_0', 'datetime_3', 'datetimev1'] + } + for (def type : types) { + if (type == '') { + checkExplain expression, resExpression + } else { + if (checkNullColumn) { + checkExplain expression.replace(column, "c_${type}_null"), resExpression.replace(column, "c_${type}_null") + } + if (checkNotNullColumn) { + checkExplain expression.replace(column, "c_${type}"), resExpression.replace(column, "c_${type}") + } + } + } + } + + setFeConfigTemporary([disable_datev1:false, disable_decimalv2:false]) { + sql """ + DROP TABLE IF EXISTS ${tbl} FORCE; + + CREATE TABLE ${tbl} ( + c_tinyint tinyint not null default 1, + c_tinyint_null tinyint, + c_smallint smallint not null default 1, + c_smallint_null smallint, + c_int int not null default 1, + c_int_null int, + c_bigint bigint not null default 1, + c_bigint_null bigint, + c_decimal_3_0 decimal(3, 0) not null default 1, + c_decimal_3_0_null decimal(3, 0), + c_decimal_5_2 decimal(5, 2) not null default 1, + c_decimal_5_2_null decimal(5, 2), + c_date date not null default '2000-01-01', + c_date_null date, + c_datev1 datev1 not null default '2000-01-01', + c_datev1_null datev1 null, + c_datetime_0 datetime(0) not null default '2000-01-01 00:00:00', + c_datetime_0_null datetime(0), + c_datetime_3 datetime(3) not null default '2000-01-01 00:00:00', + c_datetime_3_null datetime(3), + c_datetimev1 datetimev1 not null default '2000-01-01 00:00:00', + c_datetimev1_null datetimev1 + ) + PROPERTIES ('replication_num' = '1'); + + INSERT INTO ${tbl} VALUES(); + """ + + testSimplify true, true, '{int_like_column} = CAST(1.00 as DOUBLE)', '({int_like_column} = 1)' + testSimplify true, false, '{int_like_column} = CAST(1.01 as DOUBLE)', 'AND[{int_like_column} IS NULL,NULL]' + testSimplify false, true, '{int_like_column} = CAST(1.01 as DOUBLE)', 'FALSE' + testSimplify true, true, '{int_like_column} <=> CAST(1.01 as DOUBLE)', 'FALSE' + testSimplify true, true, '{int_like_column} > CAST(1.00 as DOUBLE)', '({int_like_column} > 1)' + testSimplify true, true, '{int_like_column} < CAST(1.00 as DOUBLE)', '({int_like_column} < 1)' + testSimplify true, true, '{int_like_column} > CAST(1.01 as DOUBLE)', '({int_like_column} > 1)' + testSimplify true, true, '{int_like_column} >= CAST(1.01 as DOUBLE)', '({int_like_column} >= 2)' + testSimplify true, true, '{int_like_column} <= CAST(1.01 as DOUBLE)', '({int_like_column} <= 1)' + testSimplify true, true, '{int_like_column} < CAST(1.01 as DOUBLE)', '({int_like_column} < 2)' + testSimplify true, true, '{int_like_column} = 1.00', '({int_like_column} = 1)' + testSimplify true, true, '{int_like_column} > 1.00', '({int_like_column} > 1)' + testSimplify true, true, '{int_like_column} < 1.00', '({int_like_column} < 1)' + testSimplify true, false, '{int_like_column} = 1.01', 'AND[{int_like_column} IS NULL,NULL]' + testSimplify false, true, '{int_like_column} = 1.01', 'FALSE' + testSimplify true, true, '{int_like_column} <=> 1.01', 'FALSE' + testSimplify true, true, '{int_like_column} > 1.01', '({int_like_column} > 1)' + testSimplify true, true, '{int_like_column} >= 1.01', '({int_like_column} >= 2)' + testSimplify true, true, '{int_like_column} <= 1.01', '({int_like_column} <= 1)' + testSimplify true, true, '{int_like_column} < 1.01', '({int_like_column} < 2)' + testSimplify false, false, 'CAST(c_decimal_3_0_null as DECIMAL(10, 5)) = CAST(1.00 as DECIMAL(10, 5))', '(c_decimal_3_0_null = 1)' + testSimplify false, false, 'CAST(c_decimal_3_0_null as DECIMAL(10, 5)) = CAST(1.1 as DECIMAL(10, 5))', 'AND[c_decimal_3_0_null IS NULL,NULL]' + testSimplify false, false, 'CAST(c_decimal_3_0_null as DECIMAL(10, 5)) > CAST(1.1 as DECIMAL(10, 5))', '(c_decimal_3_0_null > 1)' + testSimplify false, false, 'CAST(c_decimal_3_0_null as DECIMAL(10, 5)) >= CAST(1.1 as DECIMAL(10, 5))', '(c_decimal_3_0_null >= 2)' + testSimplify false, false, 'CAST(c_decimal_3_0_null as DECIMAL(10, 5)) < CAST(1.1 as DECIMAL(10, 5))', '(c_decimal_3_0_null < 2)' + testSimplify false, false, 'CAST(c_decimal_3_0_null as DECIMAL(10, 5)) <= CAST(1.1 as DECIMAL(10, 5))', '(c_decimal_3_0_null <= 1)' + testSimplify false, false, 'c_decimal_5_2_null = CAST(1.0 as DECIMAL(10, 5))', '(c_decimal_5_2_null = 1.00)' + testSimplify false, false, 'c_decimal_5_2_null = CAST(1.1 as DECIMAL(10, 5))', '(c_decimal_5_2_null = 1.10)' + testSimplify false, false, 'c_decimal_5_2_null = CAST(1.12 as DECIMAL(10, 5))', '(c_decimal_5_2_null = 1.12)' + testSimplify false, false, 'c_decimal_5_2_null = CAST(1.123 as DECIMAL(10, 5))', 'AND[c_decimal_5_2_null IS NULL,NULL]' + testSimplify false, false, 'c_decimal_5_2 = CAST(1.123 as DECIMAL(10, 5))', 'FALSE' + testSimplify false, false, 'c_decimal_5_2_null > CAST(1.123 as DECIMAL(10, 5))', 'c_decimal_5_2_null > 1.12' + testSimplify false, false, 'c_decimal_5_2_null >= CAST(1.123 as DECIMAL(10, 5))', 'c_decimal_5_2_null >= 1.13' + testSimplify false, false, 'c_decimal_5_2_null <= CAST(1.123 as DECIMAL(10, 5))', 'c_decimal_5_2_null <= 1.12' + testSimplify false, false, 'c_decimal_5_2_null < CAST(1.123 as DECIMAL(10, 5))', 'c_decimal_5_2_null < 1.13' + testSimplify false, false, "CAST(c_datetime_0 AS DATETIME(5)) = '2000-01-01'", "(c_datetime_0 = '2000-01-01 00:00:00')" + testSimplify false, false, "CAST(c_datetime_0 AS DATETIME(5)) = '2000-01-01 00:00:00.1'", 'FALSE' + testSimplify false, false, "CAST(c_datetime_0_null AS DATETIME(5)) = '2000-01-01 00:00:00.1'", 'AND[c_datetime_0_null IS NULL,NULL]' + testSimplify false, false, "CAST(c_datetime_0_null AS DATETIME(5)) <=> '2000-01-01 00:00:00.1'", 'FALSE' + testSimplify false, false, "CAST(c_datetime_0 AS DATETIME(5)) >= '2000-01-01 00:00:00.1'", "(c_datetime_0 >= '2000-01-01 00:00:01')" + testSimplify false, false, "CAST(c_datetime_0 AS DATETIME(5)) > '2000-01-01 00:00:00.1'", "(c_datetime_0 > '2000-01-01 00:00:00')" + testSimplify false, false, "CAST(c_datetime_0 AS DATETIME(5)) <= '2000-01-01 00:00:00.1'", "(c_datetime_0 <= '2000-01-01 00:00:00')" + testSimplify false, false, "CAST(c_datetime_0 AS DATETIME(5)) < '2000-01-01 00:00:00.1'", "(c_datetime_0 < '2000-01-01 00:00:01')" + testSimplify false, false, "CAST(c_datetime_3 AS DATETIME(5)) = '2000-01-01'", "(c_datetime_3 = '2000-01-01 00:00:00.000')" + testSimplify false, false, "CAST(c_datetime_3 AS DATETIME(5)) = '2000-01-01 00:00:00.1234'", 'FALSE' + testSimplify false, false, "CAST(c_datetime_3_null AS DATETIME(5)) = '2000-01-01 00:00:00.1234'", 'AND[c_datetime_3_null IS NULL,NULL]' + testSimplify false, false, "CAST(c_datetime_3_null AS DATETIME(5)) <=> '2000-01-01 00:00:00.1234'", 'FALSE' + testSimplify false, false, "CAST(c_datetime_3 AS DATETIME(5)) >= '2000-01-01 00:00:00.1234'", "(c_datetime_3 >= '2000-01-01 00:00:00.124')" + testSimplify false, false, "CAST(c_datetime_3 AS DATETIME(5)) > '2000-01-01 00:00:00.1234'", "(c_datetime_3 > '2000-01-01 00:00:00.123')" + testSimplify false, false, "CAST(c_datetime_3 AS DATETIME(5)) <= '2000-01-01 00:00:00.1234'", "(c_datetime_3 <= '2000-01-01 00:00:00.123')" + testSimplify false, false, "CAST(c_datetime_3 AS DATETIME(5)) < '2000-01-01 00:00:00.1234'", "(c_datetime_3 < '2000-01-01 00:00:00.124')" + testSimplify false, false, "c_date = '2000-01-01 00:00:01'", 'FALSE' + testSimplify false, false, "CAST(c_date_null AS DATETIME(5)) = '2000-01-01 00:00:01'", 'AND[c_date_null IS NULL,NULL]' + testSimplify false, false, "CAST(c_date_null AS DATETIME(5)) <=> '2000-01-01 00:00:01'", 'FALSE' + testSimplify false, false, "CAST(c_date AS DATETIME(5)) > '2000-01-01 00:00:01'", "c_date > '2000-01-01'" + testSimplify false, false, "CAST(c_date AS DATETIME(5)) >= '2000-01-01 00:00:01'", "c_date >= '2000-01-02'" + testSimplify false, false, "CAST(c_date AS DATETIME(5)) <= '2000-01-01 00:00:01'", "c_date <= '2000-01-01'" + testSimplify false, false, "CAST(c_date AS DATETIME(5)) < '2000-01-01 00:00:01'", "c_date < '2000-01-02'" + + sql "DROP TABLE IF EXISTS ${tbl} FORCE" + } +} From ce9facb998a925fc551ddf8e03a229071ea0aa66 Mon Sep 17 00:00:00 2001 From: xzj7019 Date: Mon, 23 Dec 2024 16:21:12 +0800 Subject: [PATCH 53/55] [Improvement](tools) refine tools schema (#45778) --- .../ddl/create-tpcds-tables-sf1000.sql | 14 +++++++------- .../ddl/create-tpcds-tables-sf10000.sql | 18 +++++++++--------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/tpcds-tools/ddl/create-tpcds-tables-sf1000.sql b/tools/tpcds-tools/ddl/create-tpcds-tables-sf1000.sql index 7e950580553f684..23b16480be3eb12 100644 --- a/tools/tpcds-tools/ddl/create-tpcds-tables-sf1000.sql +++ b/tools/tpcds-tools/ddl/create-tpcds-tables-sf1000.sql @@ -213,7 +213,7 @@ PARTITION `p70` VALUES LESS THAN ("2452945"), PARTITION `p71` VALUES LESS THAN ("2452975"), PARTITION `p72` VALUES LESS THAN (MAXVALUE) ) -DISTRIBUTED BY HASH(cs_item_sk, cs_order_number) BUCKETS 3 +DISTRIBUTED BY HASH(cs_item_sk, cs_order_number) BUCKETS 4 PROPERTIES ( "replication_num" = "1" ); @@ -339,7 +339,7 @@ PARTITION `p70` VALUES LESS THAN ("2452945"), PARTITION `p71` VALUES LESS THAN ("2452975"), PARTITION `p72` VALUES LESS THAN (MAXVALUE) ) -DISTRIBUTED BY HASH(inv_item_sk, inv_warehouse_sk) BUCKETS 1 +DISTRIBUTED BY HASH(inv_item_sk, inv_warehouse_sk) BUCKETS 2 PROPERTIES ( "replication_num" = "1" ); @@ -449,7 +449,7 @@ PARTITION `p70` VALUES LESS THAN ("2452945"), PARTITION `p71` VALUES LESS THAN ("2452975"), PARTITION `p72` VALUES LESS THAN (MAXVALUE) ) -DISTRIBUTED BY HASH(cr_item_sk, cr_order_number) BUCKETS 1 +DISTRIBUTED BY HASH(cr_item_sk, cr_order_number) BUCKETS 2 PROPERTIES ( "replication_num" = "1" ); @@ -648,7 +648,7 @@ PARTITION `p70` VALUES LESS THAN ("2452945"), PARTITION `p71` VALUES LESS THAN ("2452975"), PARTITION `p72` VALUES LESS THAN (MAXVALUE) ) -DISTRIBUTED BY HASH(wr_item_sk, wr_order_number) BUCKETS 1 +DISTRIBUTED BY HASH(wr_item_sk, wr_order_number) BUCKETS 2 PROPERTIES ( "replication_num" = "1" ); @@ -826,7 +826,7 @@ PARTITION `p70` VALUES LESS THAN ("2452945"), PARTITION `p71` VALUES LESS THAN ("2452975"), PARTITION `p72` VALUES LESS THAN (MAXVALUE) ) -DISTRIBUTED BY HASH(ws_item_sk, ws_order_number) BUCKETS 3 +DISTRIBUTED BY HASH(ws_item_sk, ws_order_number) BUCKETS 2 PROPERTIES ( "replication_num" = "1" ); @@ -1006,7 +1006,7 @@ PARTITION `p70` VALUES LESS THAN ("2452945"), PARTITION `p71` VALUES LESS THAN ("2452975"), PARTITION `p72` VALUES LESS THAN (MAXVALUE) ) -DISTRIBUTED BY HASH(sr_item_sk, sr_ticket_number) BUCKETS 1 +DISTRIBUTED BY HASH(sr_item_sk, sr_ticket_number) BUCKETS 2 PROPERTIES ( "replication_num" = "1" ); @@ -1112,7 +1112,7 @@ PARTITION `p70` VALUES LESS THAN ("2452945"), PARTITION `p71` VALUES LESS THAN ("2452975"), PARTITION `p72` VALUES LESS THAN (MAXVALUE) ) -DISTRIBUTED BY HASH(ss_item_sk, ss_ticket_number) BUCKETS 3 +DISTRIBUTED BY HASH(ss_item_sk, ss_ticket_number) BUCKETS 4 PROPERTIES ( "replication_num" = "1" ); diff --git a/tools/tpcds-tools/ddl/create-tpcds-tables-sf10000.sql b/tools/tpcds-tools/ddl/create-tpcds-tables-sf10000.sql index 87201403d39fb10..e6d62165f970041 100644 --- a/tools/tpcds-tools/ddl/create-tpcds-tables-sf10000.sql +++ b/tools/tpcds-tools/ddl/create-tpcds-tables-sf10000.sql @@ -213,7 +213,7 @@ PARTITION `p70` VALUES LESS THAN ("2452945"), PARTITION `p71` VALUES LESS THAN ("2452975"), PARTITION `p72` VALUES LESS THAN (MAXVALUE) ) -DISTRIBUTED BY HASH(cs_item_sk, cs_order_number) BUCKETS 256 +DISTRIBUTED BY HASH(cs_item_sk, cs_order_number) BUCKETS 216 PROPERTIES ( "replication_num" = "1", "colocate_with" = "catalog" @@ -340,7 +340,7 @@ PARTITION `p70` VALUES LESS THAN ("2452945"), PARTITION `p71` VALUES LESS THAN ("2452975"), PARTITION `p72` VALUES LESS THAN (MAXVALUE) ) -DISTRIBUTED BY HASH(inv_item_sk, inv_warehouse_sk) BUCKETS 256 +DISTRIBUTED BY HASH(inv_item_sk, inv_warehouse_sk) BUCKETS 216 PROPERTIES ( "replication_num" = "1" ); @@ -450,7 +450,7 @@ PARTITION `p70` VALUES LESS THAN ("2452945"), PARTITION `p71` VALUES LESS THAN ("2452975"), PARTITION `p72` VALUES LESS THAN (MAXVALUE) ) -DISTRIBUTED BY HASH(cr_item_sk, cr_order_number) BUCKETS 256 +DISTRIBUTED BY HASH(cr_item_sk, cr_order_number) BUCKETS 216 PROPERTIES ( "replication_num" = "1", "colocate_with" = "catalog" @@ -485,7 +485,7 @@ CREATE TABLE IF NOT EXISTS customer_address ( ca_location_type char(20) ) DUPLICATE KEY(ca_address_sk) -DISTRIBUTED BY HASH(ca_address_sk) BUCKETS 256 +DISTRIBUTED BY HASH(ca_address_sk) BUCKETS 216 PROPERTIES ( "replication_num" = "1" ); @@ -650,7 +650,7 @@ PARTITION `p70` VALUES LESS THAN ("2452945"), PARTITION `p71` VALUES LESS THAN ("2452975"), PARTITION `p72` VALUES LESS THAN (MAXVALUE) ) -DISTRIBUTED BY HASH(wr_item_sk, wr_order_number) BUCKETS 256 +DISTRIBUTED BY HASH(wr_item_sk, wr_order_number) BUCKETS 216 PROPERTIES ( "replication_num" = "1", "colocate_with" = "web" @@ -829,7 +829,7 @@ PARTITION `p70` VALUES LESS THAN ("2452945"), PARTITION `p71` VALUES LESS THAN ("2452975"), PARTITION `p72` VALUES LESS THAN (MAXVALUE) ) -DISTRIBUTED BY HASH(ws_item_sk, ws_order_number) BUCKETS 256 +DISTRIBUTED BY HASH(ws_item_sk, ws_order_number) BUCKETS 216 PROPERTIES ( "replication_num" = "1", "colocate_with" = "web" @@ -1010,7 +1010,7 @@ PARTITION `p70` VALUES LESS THAN ("2452945"), PARTITION `p71` VALUES LESS THAN ("2452975"), PARTITION `p72` VALUES LESS THAN (MAXVALUE) ) -DISTRIBUTED BY HASH(sr_item_sk, sr_ticket_number) BUCKETS 256 +DISTRIBUTED BY HASH(sr_item_sk, sr_ticket_number) BUCKETS 216 PROPERTIES ( "replication_num" = "1", "colocate_with" = "store" @@ -1117,7 +1117,7 @@ PARTITION `p70` VALUES LESS THAN ("2452945"), PARTITION `p71` VALUES LESS THAN ("2452975"), PARTITION `p72` VALUES LESS THAN (MAXVALUE) ) -DISTRIBUTED BY HASH(ss_item_sk, ss_ticket_number) BUCKETS 256 +DISTRIBUTED BY HASH(ss_item_sk, ss_ticket_number) BUCKETS 216 PROPERTIES ( "replication_num" = "1", "colocate_with" = "store" @@ -1158,7 +1158,7 @@ CREATE TABLE IF NOT EXISTS customer ( c_last_review_date_sk integer ) DUPLICATE KEY(c_customer_sk) -DISTRIBUTED BY HASH(c_customer_id) BUCKETS 256 +DISTRIBUTED BY HASH(c_customer_id) BUCKETS 216 PROPERTIES ( "replication_num" = "1" ); From 81d9af08457ed2c33bd0d80d9b36ea7b25b0bc8a Mon Sep 17 00:00:00 2001 From: qiye Date: Mon, 23 Dec 2024 16:38:36 +0800 Subject: [PATCH 54/55] [test](index compaction)Fix unstable index compaction fault injection case (#45784) Problem Summary: Related PR:#45127 When set `enable_match_without_inverted_index` to `false`, `enable_common_expr_pushdown` must be `true`, if not, it will throw `[E-6001]match_any not support execute_match` error. --- ...x_compaction_exception_fault_injection.groovy | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/regression-test/suites/fault_injection_p0/test_index_compaction_exception_fault_injection.groovy b/regression-test/suites/fault_injection_p0/test_index_compaction_exception_fault_injection.groovy index b54f6374d833b8d..9c0cb5aea97f0be 100644 --- a/regression-test/suites/fault_injection_p0/test_index_compaction_exception_fault_injection.groovy +++ b/regression-test/suites/fault_injection_p0/test_index_compaction_exception_fault_injection.groovy @@ -26,8 +26,6 @@ suite("test_index_compaction_exception_fault_injection", "nonConcurrent") { def changed_variables = sql "show variables where Changed = 1" logger.info("changed variables: " + changed_variables.toString()) - // sql "UNSET GLOBAL VARIABLE ALL;" - sql "SET global enable_match_without_inverted_index = false" boolean disableAutoCompaction = false @@ -120,7 +118,7 @@ suite("test_index_compaction_exception_fault_injection", "nonConcurrent") { } def run_sql = { -> - def result = sql_return_maparray "SELECT * FROM ${tableName} WHERE name MATCH 'bason'" + def result = sql_return_maparray "SELECT /*+ SET_VAR(enable_match_without_inverted_index = false, enable_common_expr_pushdown = true) */ * FROM ${tableName} WHERE name MATCH 'bason'" assertEquals(3, result.size()) assertEquals(1, result[0]['id']) assertEquals("bason", result[0]['name']) @@ -129,7 +127,7 @@ suite("test_index_compaction_exception_fault_injection", "nonConcurrent") { assertEquals(3, result[2]['id']) assertEquals("bason", result[2]['name']) - result = sql_return_maparray "SELECT * FROM ${tableName} WHERE age = 11" + result = sql_return_maparray "SELECT /*+ SET_VAR(enable_match_without_inverted_index = false, enable_common_expr_pushdown = true) */ * FROM ${tableName} WHERE age = 11" assertEquals(3, result.size()) assertEquals(1, result[0]['id']) assertEquals("bason", result[0]['name']) @@ -138,7 +136,7 @@ suite("test_index_compaction_exception_fault_injection", "nonConcurrent") { assertEquals(3, result[2]['id']) assertEquals("bason", result[2]['name']) - result = sql_return_maparray "SELECT * FROM ${tableName} WHERE description MATCH 'singing'" + result = sql_return_maparray "SELECT /*+ SET_VAR(enable_match_without_inverted_index = false, enable_common_expr_pushdown = true) */ * FROM ${tableName} WHERE description MATCH 'singing'" assertEquals(3, result.size()) assertEquals("bason", result[0]['name']) assertEquals("bason is good at singing", result[0]['description']) @@ -147,7 +145,7 @@ suite("test_index_compaction_exception_fault_injection", "nonConcurrent") { assertEquals("bason", result[2]['name']) assertEquals("bason is good at singing", result[2]['description']) - result = sql_return_maparray "SELECT * FROM ${tableName} WHERE array_contains(scores, 79)" + result = sql_return_maparray "SELECT /*+ SET_VAR(enable_match_without_inverted_index = false, enable_common_expr_pushdown = true) */ * FROM ${tableName} WHERE array_contains(scores, 79)" assertEquals(3, result.size()) assertEquals("bason", result[0]['name']) assertEquals("[79, 85, 97]", result[0]['scores']) @@ -156,7 +154,7 @@ suite("test_index_compaction_exception_fault_injection", "nonConcurrent") { assertEquals("bason", result[2]['name']) assertEquals("[79, 85, 97]", result[2]['scores']) - result = sql_return_maparray "SELECT * FROM ${tableName} WHERE array_contains(hobbies, 'dancing')" + result = sql_return_maparray "SELECT /*+ SET_VAR(enable_match_without_inverted_index = false, enable_common_expr_pushdown = true) */ * FROM ${tableName} WHERE array_contains(hobbies, 'dancing')" assertEquals(3, result.size()) assertEquals("bason", result[0]['name']) assertEquals('["singing", "dancing"]', result[0]['hobbies']) @@ -165,7 +163,7 @@ suite("test_index_compaction_exception_fault_injection", "nonConcurrent") { assertEquals("bason", result[2]['name']) assertEquals('["singing", "dancing"]', result[2]['hobbies']) - result = sql_return_maparray "SELECT * FROM ${tableName} WHERE array_contains(evaluation, 'bason is very clever')" + result = sql_return_maparray "SELECT /*+ SET_VAR(enable_match_without_inverted_index = false, enable_common_expr_pushdown = true) */ * FROM ${tableName} WHERE array_contains(evaluation, 'bason is very clever')" assertEquals(3, result.size()) assertEquals("bason", result[0]['name']) assertEquals('["bason is very clever", "bason is very healthy"]', result[0]['evaluation']) @@ -338,7 +336,5 @@ suite("test_index_compaction_exception_fault_injection", "nonConcurrent") { if (has_update_be_config) { set_be_config.call("inverted_index_compaction_enable", invertedIndexCompactionEnable.toString()) } - sql "SET global enable_match_without_inverted_index = true" } - } From f662dd0f7f7f81ca7e78c2fffc15f4c9d8060670 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Mon, 23 Dec 2024 16:44:14 +0800 Subject: [PATCH 55/55] [feat](test) add tokenize ut test (#45374) Add unitest for token extractor for ngram bf index. --- be/test/olap/itoken_extractor_test.cpp | 493 +++++++++++++++++++++++++ 1 file changed, 493 insertions(+) diff --git a/be/test/olap/itoken_extractor_test.cpp b/be/test/olap/itoken_extractor_test.cpp index ea35f81973c73ca..3904dbee5e766fb 100644 --- a/be/test/olap/itoken_extractor_test.cpp +++ b/be/test/olap/itoken_extractor_test.cpp @@ -92,4 +92,497 @@ TEST_F(TestITokenExtractor, ngram_like_extractor) { runNextInStringLike(ngram_extractor, {from_u8string(u8"\\_手机%")}, {from_u8string(u8"_手"), from_u8string(u8"手机")}); } + +TEST_F(TestITokenExtractor, ngram_extractor_empty_input) { + // Test empty string input, expect no output + std::string statement = ""; + std::vector expect = {}; + NgramTokenExtractor ngram_extractor(2); + runNextInString(ngram_extractor, statement, expect); +} + +TEST_F(TestITokenExtractor, ngram_extractor_single_char) { + // Only one character, less than n=2, should produce no tokens + std::string statement = "a"; + std::vector expect = {}; + NgramTokenExtractor ngram_extractor(2); + runNextInString(ngram_extractor, statement, expect); +} + +TEST_F(TestITokenExtractor, ngram_extractor_ascii_characters) { + // Test token extraction for pure ASCII characters + std::string statement = "abcd"; + // 2-gram tokens: "ab", "bc", "cd" + std::vector expect = {"ab", "bc", "cd"}; + NgramTokenExtractor ngram_extractor(2); + runNextInString(ngram_extractor, statement, expect); +} + +TEST_F(TestITokenExtractor, ngram_extractor_emoji) { + // Test scenarios that include Emoji and other multi-byte UTF-8 characters + // Assume n=2. Here "👍" is an emoji (4 bytes), "测" is a Chinese character (3 bytes). + // String: "👍测A" (3 elements: 1 Emoji, 1 Chinese char, 1 ASCII) + // For two code points per token: + // First token: "👍测" + // Second token: "测A" + std::string statement = from_u8string(u8"👍测A"); + std::vector expect = {from_u8string(u8"👍测"), from_u8string(u8"测A")}; + NgramTokenExtractor ngram_extractor(2); + runNextInString(ngram_extractor, statement, expect); +} + +TEST_F(TestITokenExtractor, ngram_extractor_n_greater_than_length) { + // When n=3 and the string length is only 2, no 3-character Ngram can be formed + std::string statement = "ab"; + std::vector expect = {}; + NgramTokenExtractor ngram_extractor(3); + runNextInString(ngram_extractor, statement, expect); +} + +TEST_F(TestITokenExtractor, ngram_extractor_chinese_only) { + // Test pure Chinese characters with multi-byte UTF-8 tokens + // String: "中国人" (3 Chinese chars, each 3 bytes) + // n=2, expected tokens: ["中国", "国人"] + std::string statement = from_u8string(u8"中国人"); + std::vector expect = {from_u8string(u8"中国"), from_u8string(u8"国人")}; + NgramTokenExtractor ngram_extractor(2); + runNextInString(ngram_extractor, statement, expect); +} + +TEST_F(TestITokenExtractor, ngram_extractor_mixed_width_characters) { + // Mixed character widths: English (1 byte), Chinese (3 bytes), Emoji (4 bytes) + // String: "A中👍B" + // Code points: 'A'(1), '中'(1), '👍'(1), 'B'(1) total 4 code points + // n=2 tokens: "A中", "中👍", "👍B" + std::string statement = from_u8string(u8"A中👍B"); + std::vector expect = {from_u8string(u8"A中"), from_u8string(u8"中👍"), + from_u8string(u8"👍B")}; + NgramTokenExtractor ngram_extractor(2); + runNextInString(ngram_extractor, statement, expect); +} + +TEST_F(TestITokenExtractor, ngram_like_extractor_empty_input) { + // Test empty input for like extraction + std::string statement = ""; + std::vector expect = {}; + NgramTokenExtractor ngram_extractor(2); + runNextInStringLike(ngram_extractor, statement, expect); +} + +TEST_F(TestITokenExtractor, ngram_like_extractor_no_pattern) { + // No % or _, equivalent to extracting n-length sequences. + // String: "abc", n=2, theoretically extract "ab", "bc" + // next_in_string_like requires n code points to return a token. + // Without % or _, it should still extract normally. + std::string statement = "abc"; + // n=2: extract "ab", then "bc" + std::vector expect = {"ab", "bc"}; + NgramTokenExtractor ngram_extractor(2); + runNextInStringLike(ngram_extractor, statement, expect); +} + +TEST_F(TestITokenExtractor, ngram_like_extractor_pattern1) { + // No % or _, equivalent to extracting n-length sequences. + // String: "abc", n=2, theoretically extract "ab", "bc" + // next_in_string_like requires n code points to return a token. + // Without % or _, it should still extract normally. + std::string statement = "%abc%def%gh%"; + // n=2: extract "ab", then "bc" + std::vector expect = {"ab", "bc", "de", "ef", "gh"}; + NgramTokenExtractor ngram_extractor(2); + runNextInStringLike(ngram_extractor, statement, expect); +} + +TEST_F(TestITokenExtractor, ngram_like_extractor_patterns_only) { + // String has only '%' and '_', no normal chars to form a 2-gram + // "%__%", n=2: % and _ are not considered normal token characters + // Each encounter of % resets the token, so no tokens are generated + std::string statement = "%__%"; + std::vector expect = {}; + NgramTokenExtractor ngram_extractor(2); + runNextInStringLike(ngram_extractor, statement, expect); +} + +TEST_F(TestITokenExtractor, ngram_like_extractor_escaped_characters) { + // Test scenarios with escape characters: "\\%abc% \\_xyz_" + // Escaped '%' should be treated as a normal character, similarly for '_' + // Suppose n=2, for "\\%abc%": + // Initially encounter '\\%' => escaped '%', include it in token: "%a" + // Then 'a'(1 byte) 'b'(1 byte) form "ab", 'c'(1 byte) continues... + // A bit complex example, mainly to demonstrate properly handling escaped chars. + std::string statement = from_u8string(u8"\\%手机% \\_人_"); + // Analysis: + // "\\%" -> escaped '%', token gets "%" + // then "手"(1 code point), "机"(1 code point). Once 2 code points are formed, we have "%手" + // Move pos. Next token starts from "机": + // '机'(1 code point) + // Next is '%', encountering '%', reset token, skip over ' '... + // Next segment: "\\_人_" + // "\\_" => escaped '_', token gets "_" + // '人'(1 code point) + '_' pattern encountered resets token after outputting "_人" + // Final result: {"%手", "_人"} + // Note: Based on logic, pattern chars % and _ reset the token. After a token is output, + // encountering % or _ resets the token to empty, not affecting previously output tokens. + std::vector expect = {"%手", "手机", " _", "_人"}; + NgramTokenExtractor ngram_extractor(2); + runNextInStringLike(ngram_extractor, statement, expect); +} + +TEST_F(TestITokenExtractor, ngram_like_extractor_complex_pattern) { + // Complex scenario: "abc%中_\\%国%d" + // n=2 analysis: + // Start from the beginning: 'a'(1 code point), 'b'(1 code point) => "ab" output + // Encounter 'c' then '%', at '%' reset token and move forward + // Next: "中"(1 code point), '_' is pattern reset + // Then "\\%" => '%'(1 code point), '国'(1 code point) => "%国" output + // Encounter '%', reset token + // Finally 'd' alone is not enough to form 2 code points, no output + std::string statement = from_u8string(u8"abc%中_\\%国%d"); + std::vector expect = {"ab", "bc", "%国"}; + NgramTokenExtractor ngram_extractor(2); + runNextInStringLike(ngram_extractor, statement, expect); +} + +TEST_F(TestITokenExtractor, ngram_extractor_different_n) { + // Test different n values + // String: "abcd" + // n=3: extract "abc", "bcd" + std::string statement = "abcd"; + std::vector expect = {"abc", "bcd"}; + NgramTokenExtractor ngram_extractor(3); + runNextInString(ngram_extractor, statement, expect); +} + +std::string get_repetition_info(const std::string& text, size_t n) { + NgramTokenExtractor ngram_extractor(n); + std::vector tokens; + + { + size_t pos = 0; + size_t token_start = 0; + size_t token_length = 0; + while (ngram_extractor.next_in_string(text.c_str(), text.size(), &pos, &token_start, + &token_length)) { + tokens.push_back(text.substr(token_start, token_length)); + } + } + + std::unordered_map token_count; + for (auto& t : tokens) { + token_count[t]++; + } + + int total_tokens = static_cast(tokens.size()); + int repeated_tokens = 0; + for (auto& kv : token_count) { + if (kv.second > 1) { + repeated_tokens += kv.second; + } + } + + double repetition_rate = 0.0; + if (total_tokens > 0) { + repetition_rate = static_cast(repeated_tokens) / total_tokens; + } + + std::ostringstream oss; + oss << "Total tokens: " << total_tokens << "\n" + << "Repeated tokens: " << repeated_tokens << "\n" + << "Repetition rate: " << repetition_rate << "\n"; + + return oss.str(); +} + +TEST_F(TestITokenExtractor, ngram_extractor_repetition_rate_matchine_text) { + std::string statement = + "Exception=System.CannotUnloadAppDomain;\n" + "HResult=0x00007486;\n" + "Message=exception happened;\n" + "Source=BenchmarkLogGenerator;\n" + "StackTrace:\n" + " at BenchmarkLogGenerator.Generator.Run(Int32 sizeFactor) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Generator.cs:line 84\n" + " at BenchmarkLogGenerator.Generator.<>c__DisplayClass26_0.b__0() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Generator.cs:line 74\n" + " at System.Threading.ThreadHelper.ThreadStart_Context(Object state)\n" + " at System.Threading.ExecutionContext.RunInternal(ExecutionContext executionContext)\n" + " at BenchmarkLogGenerator.Flows.BootFlow.GetLevel(Int64 v) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Flows\\BootFlow.cs:line 85\n" + " at BenchmarkLogGenerator.Flows.BootFlow.d__1.MoveNext() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Flows\\BootFlow.cs:line 47\n" + " at BenchmarkLogGenerator.Scheduler.Flow.NextStep() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 74\n" + " at BenchmarkLogGenerator.Scheduler.Step.EnqueueNextStep(Scheduler scheduler) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 112\n" + " at BenchmarkLogGenerator.Scheduler.FlowDelayStep.Execute(Scheduler scheduler) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 137\n" + " at BenchmarkLogGenerator.Scheduler.Run() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 28\n" + " at BenchmarkLogGenerator.Generator.Run(Int32 sizeFactor) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Generator.cs:line 84\n" + " at BenchmarkLogGenerator.Generator.<>c__DisplayClass26_0.b__0() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Generator.cs:line 74\n" + " at System.Threading.ThreadHelper.ThreadStart_Context(Object state)\n" + " at System.Threading.ExecutionContext.RunInternal(ExecutionContext executionContext)\n" + " at BenchmarkLogGenerator.Flows.BootFlow.GetLevel(Int64 v) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Flows\\BootFlow.cs:line 85\n" + " at BenchmarkLogGenerator.Flows.BootFlow.d__1.MoveNext() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Flows\\BootFlow.cs:line 47\n" + " at BenchmarkLogGenerator.Scheduler.Flow.NextStep() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 74\n" + " at BenchmarkLogGenerator.Scheduler.Step.EnqueueNextStep(Scheduler scheduler) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 112\n" + " at BenchmarkLogGenerator.Scheduler.FlowDelayStep.Execute(Scheduler scheduler) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 137\n" + " at BenchmarkLogGenerator.Scheduler.Run() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 28\n" + " at BenchmarkLogGenerator.Generator.Run(Int32 sizeFactor) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Generator.cs:line 84\n" + " at BenchmarkLogGenerator.Generator.<>c__DisplayClass26_0.b__0() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Generator.cs:line 74\n" + " at System.Threading.ThreadHelper.ThreadStart_Context(Object state)\n" + " at System.Threading.ExecutionContext.RunInternal(ExecutionContext executionContext)\n" + " at BenchmarkLogGenerator.Flows.BootFlow.GetLevel(Int64 v) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Flows\\BootFlow.cs:line 85\n" + " at BenchmarkLogGenerator.Flows.BootFlow.d__1.MoveNext() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Flows\\BootFlow.cs:line 47\n" + " at BenchmarkLogGenerator.Scheduler.Flow.NextStep() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 74\n" + " at BenchmarkLogGenerator.Scheduler.Step.EnqueueNextStep(Scheduler scheduler) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 112\n" + " at BenchmarkLogGenerator.Scheduler.FlowDelayStep.Execute(Scheduler scheduler) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 137\n" + " at BenchmarkLogGenerator.Scheduler.Run() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 28\n" + " at BenchmarkLogGenerator.Generator.Run(Int32 sizeFactor) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Generator.cs:line 84\n" + " at BenchmarkLogGenerator.Generator.<>c__DisplayClass26_0.b__0() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Generator.cs:line 74\n" + " at System.Threading.ThreadHelper.ThreadStart_Context(Object state)\n" + " at System.Threading.ExecutionContext.RunInternal(ExecutionContext executionContext)\n" + " at BenchmarkLogGenerator.Flows.BootFlow.GetLevel(Int64 v) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Flows\\BootFlow.cs:line 85\n" + " at BenchmarkLogGenerator.Flows.BootFlow.d__1.MoveNext() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Flows\\BootFlow.cs:line 47\n" + " at BenchmarkLogGenerator.Scheduler.Flow.NextStep() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 74\n" + " at BenchmarkLogGenerator.Scheduler.Step.EnqueueNextStep(Scheduler scheduler) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 112\n" + " at BenchmarkLogGenerator.Scheduler.FlowDelayStep.Execute(Scheduler scheduler) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 137\n" + " at BenchmarkLogGenerator.Scheduler.Run() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 28\n" + " at BenchmarkLogGenerator.Generator.Run(Int32 sizeFactor) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Generator.cs:line 84\n" + " at BenchmarkLogGenerator.Generator.<>c__DisplayClass26_0.b__0() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Generator.cs:line 74\n" + " at System.Threading.ThreadHelper.ThreadStart_Context(Object state)\n" + " at System.Threading.ExecutionContext.RunInternal(ExecutionContext executionContext)\n" + " at BenchmarkLogGenerator.Flows.BootFlow.GetLevel(Int64 v) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Flows\\BootFlow.cs:line 85\n" + " at BenchmarkLogGenerator.Flows.BootFlow.d__1.MoveNext() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Flows\\BootFlow.cs:line 47\n" + " at BenchmarkLogGenerator.Scheduler.Flow.NextStep() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 74\n" + " at BenchmarkLogGenerator.Scheduler.Step.EnqueueNextStep(Scheduler scheduler) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 112\n" + " at BenchmarkLogGenerator.Scheduler.FlowDelayStep.Execute(Scheduler scheduler) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 137\n" + " at BenchmarkLogGenerator.Scheduler.Run() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Scheduler.cs:line 28\n" + " at BenchmarkLogGenerator.Generator.Run(Int32 sizeFactor) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Generator.cs:line 84\n" + " at BenchmarkLogGenerator.Generator.<>c__DisplayClass26_0.b__0() in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Generator.cs:line 74\n" + " at System.Threading.ThreadHelper.ThreadStart_Context(Object state)\n" + " at System.Threading.ExecutionContext.RunInternal(ExecutionContext executionContext)\n" + " at BenchmarkLogGenerator.Flows.BootFlow.GetLevel(Int64 v) in " + "C:\\Src\\Tools\\BenchmarkLogGenerator\\Flows\\BootFlow.cs:line 85"; + size_t n = 5; + std::string info = get_repetition_info(statement, n); + + std::cout << info << std::endl; +} + +TEST_F(TestITokenExtractor, ngram_extractor_repetition_rate_short_text) { + std::string statement = + "I bought these leggings for my daughter @ Christmas along with several other " + "leggings. She liked these leggings the best since they were lined and are very warm. " + " She is 5'3" and 115 lbs. and they fit her very well/comfortable. The only thing " + "I disliked about them is that the pattern is not uniform on both legs as it gets to " + "your upper thigh area."; + size_t n = 5; + std::string info = get_repetition_info(statement, n); + + std::cout << info << std::endl; +} + +TEST_F(TestITokenExtractor, ngram_extractor_repetition_rate_medium_text) { + std::string statement = + "Loving the fabulous and exquisite women's wear for plus size women, because of how " + "this sweater makes you feel good about yourself, and speaks to her heart with a " + "positive perspective, given her overall character as well." + "I bought these leggings for my daughter @ Christmas along with several other " + "leggings. She liked these leggings the best since they were lined and are very warm. " + " She is 5'3" and 115 lbs. and they fit her very well/comfortable. The only thing " + "I disliked about them is that the pattern is not uniform on both legs as it gets to " + "your upper thigh area." + "Love my boot cuffs I got as a gift. This is one I won’t be re-gifting. People at work " + "love it, good quality and good value. Love that it’s reversible and I can wear it " + "with any size boots." + "Reminds me of being 13 in the early 80's, only these are more attractive. These leg " + "warmers are exactly as pictured, soft & warm over my jeans to keep out the chill on " + "this snowy day. Brand new in package & I am very happy with this purchase. I will " + "buy another pair to double up the warmth on my bare legs." + "I couldn't be happier with this dress. It is the epitome of classic WW2 era ladies " + "fashion.
The material is lightweight, yet very soft and silky. It has a full " + "lining to it. I would
recommend sizing up on this particular
style as it " + "has a way of hugging your
curves, and in the midsection .

If you have " + "a perfectly flat stomach, then No worries.
But ladies who have a wee bit of a " + "pouch inFront, this dress may hug you a tad in the tummy.
It hangs very nicely " + "in back, and flows
beautifully. Honestly , i would order one in
every " + "color of the rainbow if they sold
them !
I love it, Thank You!
This is " + "my 4th dress from this vendor, and
by far my favorite." + "This tie is super cute! I love the color and the design... but that's about it.

The day after receiving it in the mail I strapped it on and wore it to work. " + "Within the first few hours I noticed the little white Vs began to fray and frizz. By " + "the end if the day most of white threading had completely frayed out. This tie was " + "very, very cheaply made.

It's a shame, because it is... or was... a very " + "good-looking bow tie!" + "The color and pictures looks very good. It fits really nicely with a bit of stretch " + "in the material. I was afraid after washing it that the colors would fade but it did " + "not. I highly recommand it t!!!" + "I just purchased this coat, and I have to say that so far, I am very satisfied with " + "it. The belt is a nice added touch, but not necessary to wear. This coat keeps me " + "very warm, and with the winter we're having this year, it's been a life saver. I " + "have gotten compliments on how it looks as well. This is replacing another coat that " + "had a zipper that broke after two winters of wearing it, so I am being extra careful " + "when zippering up this one. It's too soon to say how sturdy the zipper is on this " + "one, but as far as everything else, it's serving its purpose well. I highly " + "recommend it for the quality and price." + "ABSOLUTELY JUNK! wore it about four times then the hood nearly ripped completely off! " + "The Seam came out completely! DO NOT BUY WOULD LOVE TO HAVE MY MONEY COMPLETELY " + "REFUNDED!" + "this was the worst thing I brought online
it was very cheaply made size not " + "true brought
as a gift was so embarrassing the person did not accept the gift
the fur inside looked real fake I am stuck with this one" + "Honestly the most comfortable jacket I've ever worn. Will probably buy this jacket " + "for the rest of my life. End of story" + "ok Im trying to figure out if this is women or unisex sizing..This has a man wearing " + "it but it clearly is for a girl. I need to know before I order." + "Very comfortable and cute! It works well in school uniform and everyday wear. The " + "light material and zippers on the shoulders are super unique and welcomed addition to " + "my otherwise drab uniform!" + "The color is active. THe style is ok.
One thing to remember is to order one size " + "bigger than your regular size. For example, I wear S and the size M is OK ON me" + "These are actually considered panty hose. Unless you are using under a dress or a " + "very long sweater dont buy. Leggins is not the right description!!!''" + "Nice Dress" + "I am overall happy with the leggings. But be aware that if you are larger then a size " + "8, these will be too small for you. I am a size 8 and they just fit. The pattern is " + "stretched out quite a bit, but I think it still looks pretty good even tho the " + "pattern stretch out is not quite as bright and crisp. No complaints about the length " + "for me. I am 5'7" and these leggings reach my ankles without the feeling that " + "they are going to pull off of my hips." + "I bought these jeans knowing they were marked 'irregular' and thought there would be " + "a noticeable flaw. But when I received these jeans I was pleasantly surprised. They " + "look great and I couldn't find a flaw. The only thing I noticed was that the jeans " + "fit a bit tight around my butt. This is my first pair of big star jeans so it could " + "just be how they fit but I'm not sure. Other than that, these jeans are great for the " + "price." + "great scarf for price, ships quickly, color is more turquoise, than baby blue. really " + "like the chevron design lots of compliments." + "The fit of these leggings is excellent, they are extremely comfortable and true to " + "size. Not a skinny girl's legging, there's room to breathe. The classy, paisley " + "pattern makes regular black leggings seem boring. Good material and the design is " + "done nicely. An excellent buy, thanks Amazon." + "The dress is gorgeous and the mesh hearts are awesome. the material was a little " + "surprising, but its really cool" + "It did take long to get though well worth the wait... This was a gift for my daughter " + "and she loved it!! No issues with the product !" + "I love this sweater. I bought it for my daughter and she loves it. The colors are " + "very bright and I will surely be purchasing more from this seller." + "I bought this sweater in this color and in black in medium. I wear a medium. I " + "tried on the black first and the entire sweater fell apart as I was putting it on! " + "It literally came apart at the seams!" + "This wallet is nice looking and has the strongest chain I have ever seen. However, " + "it
simply has too few wallets for credit cards, so I sent it back. Others, " + "however may like
it, so check it out anyway." + "My husband loves his new scarf, as it is so extremely soft and warm. He was even " + "willing to give up his favorite scarf, which he has worn for years, for this one. It " + "adds just the right amount of color at the neckline of his black wool overcoat to " + "wear to the office." + "This dress appears to be quite beautiful in picture but is not. The materials was not " + "very nice, looked a bit cheap. as well the overall fit was not very nice. Had the " + "materials been of slightly better quality, it would have made up for some minor " + "imperfections. The dress runs very very small. I am an xs/s typically and thought " + "this was just too too tight and uncomfortable." + "Very nice scarves. Only complaint would be the description says one is purple but it " + "is actually a burgandy color." + "I ordered a large which is my usual size and found the arms to really tight even " + "without a winter sweater.
Poor quality - strings and "pulls" everywhere" + "Thank you so much for my my beautiful dress. The fit was perfect. The detail of the " + "dress was exactly like the picture. Also the dress was delivered before time. Thanks " + "again and I will be making future purchases very soon.5 stars for sure." + "this is a great looking shirt but i wish they had it in a medium i would definatley " + "spend my money if it was smaller" + "Purchased this for my granddaughter, and she simply loves it! People tell her, she " + "looks like a "Pop Star" because of the design and even mention she looks like " + "Michael Jackson! All she needs is to learn how to sing and dance!" + "At first I was worried that they would not stay up, but that was not a problem. I " + "wish they were available in a calf length for boots" + "I purchased this hat, more for a joke then keeping warm. The hat and beard are well " + "made. Looks cool. I don't think the beard would really do much to keep your face " + "warm. My buddies all got a laugh when I showed up wearing it." + "The actual shorts and ordering process was great but listed measurements diddnt match " + "up. I ordered the nxt size up and still too small." + "If you are looking for stretchy these aren't it so make sure to order right size. " + "Because of the fleece material inside they slide down constantly. Not too happy. But " + "they are pretty." + "So I have a 45+ inch chest and a 31 inch waist. Some would say that I'm athletically " + "proportioned. I will never find anything that fits me the way that it's supposed to " + "fit but this hoodie came damn near close. The US XL is nearly perfect for me. It " + "tappers around the waist as advertise even for broader guy like myself. My only quirk " + "is the collar around the hood gives a "no neck" appearance. But it's growing " + "on me. So as I said "nearly perfect"." + "This hat was purchased for my nephew for Christmas. It barely made it through " + "Christmas Eve. The fabric is extremely flimsy and there was a giant hole in it after " + "one or two times he put it on. I was able to get Amazon to refund my money, but not " + "worth the purchase. Very flimsy material." + "Got these for my mom and she wears them all the time. cute and comfy. I will borrow " + "them from her soon." + "first, the color is not like the picture above, the material of the shirt looks so " + "cheap and uncomfortable, the lace also looks so cheap.
second, at least use a " + "better material, the product really don't looks like the picture and not worthy at all" + "I purchased for my daughter and she loves it! This is a very high quality product " + "and worth the cost. I certainly would not pay $500 as the suggested price but " + "certainly worth the $160 paid. It did take nearly one month to arrive." + "The elastic material is comfortable, fits great on me . The straps are detachable so " + "you can have it cross your back or go bare." + "This blazer was poorly sewn together. The metal closure fell off when trying it on " + "for the first time. The material was uneven in length. This was a disappointing " + "purchase." + "I'm wearing this with my steelers t-shirt when I go to Vegas in a couple of weeks to " + "represent my team even though we not in the super bowl" + "I ordered a 3X. Normally a 2X will fit me in most clothing, but I order 3X when " + "available. This was tight and very,very thin. I returned it." + "This hood is super adorable and I love the pink/gray combination. There are just 2 " + "small things that I wasn't thrilled about. 1) The hood itself is just a tad small. 2) " + "The back part is cut kinda short leaving my neck a tinse exposed but I just pushed " + "the hood further back on my head and got a bit more coverage out of it. But I can " + "live with those things because it is super cute!" + "Love the color, cut and style of these gloves. They keep my hands warm without " + "restricting the use of my fingers for keying, sorting , etc. I think they are the " + "smartest buy I've made all winter!" + "so sucks the quality
the color is not like the picture above and the fur makes " + "it looks so cheap" + "And they look great on me! LOL They are simple with a classic look to them. I'll " + "probably pair with similar color shoes." + "The size was at least two sizes smaller than the printed size. They do not shape " + "well. I was very disappointed."; + size_t n = 5; + std::string info = get_repetition_info(statement, n); + + std::cout << info << std::endl; +} } // namespace doris