From c98330328905fad5f54980e3135d828b27e796b7 Mon Sep 17 00:00:00 2001 From: Chester Date: Tue, 21 Jan 2025 10:19:33 +0800 Subject: [PATCH 01/31] [ut](test) Add BE unit tests for HLL functions: hll_cardinality, hll_to_base64, hll_from_base64 (#47209) Add BE unit tests for these **HLL** functions: - hll_cardinality() - hll_to_base64() - hll_from_base64() --- be/src/olap/hll.h | 2 +- be/test/vec/function/function_hll_test.cpp | 136 ++++++++++++++++++++ be/test/vec/function/function_test_util.cpp | 7 + be/test/vec/function/function_test_util.h | 19 ++- 4 files changed, 160 insertions(+), 4 deletions(-) create mode 100644 be/test/vec/function/function_hll_test.cpp diff --git a/be/src/olap/hll.h b/be/src/olap/hll.h index 1d01223c2573ad..fbf8d62a6cc259 100644 --- a/be/src/olap/hll.h +++ b/be/src/olap/hll.h @@ -239,7 +239,7 @@ class HyperLogLog { static bool is_valid(const Slice& slice); // only for debug - std::string to_string() { + std::string to_string() const { switch (_type) { case HLL_DATA_EMPTY: return {}; diff --git a/be/test/vec/function/function_hll_test.cpp b/be/test/vec/function/function_hll_test.cpp new file mode 100644 index 00000000000000..6b3537740b1396 --- /dev/null +++ b/be/test/vec/function/function_hll_test.cpp @@ -0,0 +1,136 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include + +#include "function_test_util.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type_hll.h" +#include "vec/data_types/data_type_number.h" + +namespace doris::vectorized { + +TEST(function_hll_test, function_hll_cardinality_test) { + std::string func_name = "hll_cardinality"; + InputTypeSet input_types = {TypeIndex::HLL}; + + const std::string input1 = "test"; + const uint64_t hash_value1 = + HashUtil::murmur_hash64A(input1.data(), input1.size(), HashUtil::MURMUR_SEED); + HyperLogLog hll1(hash_value1); + + const std::string input2 = " "; + const uint64_t hash_value2 = + HashUtil::murmur_hash64A(input2.data(), input2.size(), HashUtil::MURMUR_SEED); + HyperLogLog hll2(hash_value2); + + HyperLogLog hll3(HLL_DATA_EXPLICIT); + hll3.update(hash_value1); + hll3.update(hash_value2); + + // we update the same hash value twice, the result should be the same as update once, which is 2 + HyperLogLog hll4(HLL_DATA_EXPLICIT); + hll4.update(hash_value1); + hll4.update(hash_value1); + + HyperLogLog empty_hll; + + DataSet data_set = {{{&hll1}, (int64_t)1}, {{&hll2}, (int64_t)1}, {{&hll3}, (int64_t)3}, + {{&hll4}, (int64_t)2}, {{&empty_hll}, (int64_t)0}, {{Null()}, (int64_t)0}}; + + static_cast(check_function(func_name, input_types, data_set)); +} + +TEST(function_hll_test, function_hll_to_base64_test) { + std::string func_name = "hll_to_base64"; + InputTypeSet input_types = {TypeIndex::HLL}; + + const std::string input1 = "test"; + const uint64_t hash_value1 = + HashUtil::murmur_hash64A(input1.data(), input1.size(), HashUtil::MURMUR_SEED); + HyperLogLog hll1(hash_value1); + + const std::string input2 = " "; + const uint64_t hash_value2 = + HashUtil::murmur_hash64A(input2.data(), input2.size(), HashUtil::MURMUR_SEED); + HyperLogLog hll2(hash_value2); + + HyperLogLog hll3; + hll3.update(hash_value1); + hll3.update(hash_value2); + + // Although the hll4 update the hash_value1 twice, the result should be the same as update once. + HyperLogLog hll4; + hll4.update(hash_value1); + hll4.update(hash_value2); + hll4.update(hash_value1); + + HyperLogLog empty_hll; + + DataSet data_set = {{{&hll1}, std::string("AQHm5IIJCx0h/w==")}, + {{&hll2}, std::string("AQG/Hk98sO59Sw==")}, + {{&hll3}, std::string("AQLm5IIJCx0h/78eT3yw7n1L")}, + {{&hll4}, std::string("AQLm5IIJCx0h/78eT3yw7n1L")}, + {{&empty_hll}, std::string("AA==")}, + {{Null()}, Null()}}; + + static_cast(check_function(func_name, input_types, data_set)); +} + +TEST(function_hll_test, function_hll_from_base64_test) { + std::string func_name = "hll_from_base64"; + InputTypeSet input_types = {TypeIndex::String}; + + const std::string input1 = "AQHm5IIJCx0h/w=="; + const std::string output1 = "test"; + const uint64_t hash_value1 = + HashUtil::murmur_hash64A(output1.data(), output1.size(), HashUtil::MURMUR_SEED); + HyperLogLog hll1(hash_value1); + + const std::string input2 = "AQG/Hk98sO59Sw=="; + const std::string output2 = " "; + const uint64_t hash_value2 = + HashUtil::murmur_hash64A(output2.data(), output2.size(), HashUtil::MURMUR_SEED); + HyperLogLog hll2(hash_value2); + + const std::string input3 = "AQLm5IIJCx0h/78eT3yw7n1L"; + HyperLogLog hll3; + hll3.update(hash_value1); + hll3.update(hash_value2); + + // Although the hll4 update the hash_value1 twice, the result should be the same as update once. + const std::string input4 = input3; + HyperLogLog hll4; + hll4.update(hash_value1); + hll4.update(hash_value2); + hll4.update(hash_value1); + + const std::string input5 = "AA=="; + HyperLogLog empty_hll; + + DataSet data_set = {{{input1}, hll1}, + {{input2}, hll2}, + {{input3}, hll3}, + {{input4}, hll4}, + {{input5}, empty_hll}}; + + static_cast(check_function(func_name, input_types, data_set)); +} +} // namespace doris::vectorized diff --git a/be/test/vec/function/function_test_util.cpp b/be/test/vec/function/function_test_util.cpp index 287b5d5c4b3b29..52d1768211f34c 100644 --- a/be/test/vec/function/function_test_util.cpp +++ b/be/test/vec/function/function_test_util.cpp @@ -104,6 +104,10 @@ size_t type_index_to_data_type(const std::vector& input_types, size_t i desc.type = doris::PrimitiveType::TYPE_OBJECT; type = std::make_shared(); return 1; + case TypeIndex::HLL: + desc.type = doris::PrimitiveType::TYPE_OBJECT; + type = std::make_shared(); + return 1; case TypeIndex::IPv4: desc.type = doris::PrimitiveType::TYPE_IPV4; type = std::make_shared(); @@ -331,6 +335,9 @@ bool insert_cell(MutableColumnPtr& column, DataTypePtr type_ptr, const AnyType& } else if (type.idx == TypeIndex::BitMap) { auto* bitmap = any_cast(cell); column->insert_data((char*)bitmap, sizeof(BitmapValue)); + } else if (type.idx == TypeIndex::HLL) { + auto* hll = any_cast(cell); + column->insert_data((char*)hll, sizeof(HyperLogLog)); } else if (type.is_ipv4()) { auto value = any_cast(cell); column->insert_data(reinterpret_cast(&value), 0); diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index 1c4c0906b80d3e..937a873c607f9d 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -50,6 +50,7 @@ #include "vec/core/wide_integer.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_bitmap.h" +#include "vec/data_types/data_type_hll.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" @@ -247,7 +248,7 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty std::vector> constant_cols; for (size_t i = 0; i < descs.size(); ++i) { auto& desc = descs[i]; - arguments.push_back(i); + arguments.push_back(static_cast(i)); arg_types.push_back(desc.type_desc); if (desc.is_const) { constant_col_ptrs.push_back( @@ -356,6 +357,18 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty } EXPECT_EQ(expect_data.to_string(), bitmap_col->get_element(i).to_string()) << " at row " << i; + } else if constexpr (std::is_same_v) { + const ColumnHLL* hll_col = nullptr; + if constexpr (nullable) { + const auto* nullable_column = + assert_cast(column.get()); + hll_col = assert_cast( + nullable_column->get_nested_column_ptr().get()); + } else { + hll_col = assert_cast(column.get()); + } + EXPECT_EQ(expect_data.to_string(), hll_col->get_element(i).to_string()) + << " at row " << i; } else if constexpr (std::is_same_v || std::is_same_v || std::is_same_v) { @@ -390,8 +403,8 @@ using BaseInputTypeSet = std::vector; template void check_function_all_arg_comb(const std::string& func_name, const BaseInputTypeSet& base_set, const DataSet& data_set) { - int arg_cnt = base_set.size(); - TestCaseInfo::arg_size = arg_cnt; + size_t arg_cnt = base_set.size(); + TestCaseInfo::arg_size = static_cast(arg_cnt); // Consider each parameter as a bit, if the j-th bit is 1, the j-th parameter is const; otherwise, it is not. for (int i = 0; i < (1 << arg_cnt); i++) { InputTypeSet input_types {}; From f8e7feafe319763ed3b6844cf3d2a56bb238964c Mon Sep 17 00:00:00 2001 From: zzzxl Date: Tue, 21 Jan 2025 11:15:42 +0800 Subject: [PATCH 02/31] [fix](inverted index) fix case test_index_ddl_fault_injection (#47253) --- .../fault_injection_p0/test_index_ddl_fault_injection.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regression-test/suites/fault_injection_p0/test_index_ddl_fault_injection.groovy b/regression-test/suites/fault_injection_p0/test_index_ddl_fault_injection.groovy index 4a907e8601ddae..2065be784e9362 100644 --- a/regression-test/suites/fault_injection_p0/test_index_ddl_fault_injection.groovy +++ b/regression-test/suites/fault_injection_p0/test_index_ddl_fault_injection.groovy @@ -74,7 +74,7 @@ suite("test_index_ddl_fault_injection", "nonConcurrent") { sql """ CREATE TABLE `${tableName2}` ( `col0` bigint NOT NULL, - `col1` boolean NULL, + `col1` boolean NOT NULL, `col2` tinyint NOT NULL, INDEX col1 (`col1`) USING INVERTED, INDEX col2 (`col2`) USING INVERTED From 2dcb12ea2b388608b7cad821c0c5d9d4cd0389e1 Mon Sep 17 00:00:00 2001 From: Uniqueyou Date: Tue, 21 Jan 2025 11:58:10 +0800 Subject: [PATCH 03/31] [fix](binlog) Fix table not gc binlog meta/records (#46981) db, table enable binlog ``` *************************** 6. row *************************** Name: db Type: db Id: 1737094589956 Dropped: false BinlogLength: 5 BinlogSize: 2092 FirstBinlogCommittedTime: 1737094625988 ReadableFirstBinlogCommittedTime: 2025-01-17 14:17:05 LastBinlogCommittedTime: 1737094630182 ReadableLastBinlogCommittedTime: 2025-01-17 14:17:10 BinlogTtlSeconds: 86400 BinlogMaxBytes: 9223372036854775807 BinlogMaxHistoryNums: 9223372036854775807 ``` db disable binlog, table enable binlog db binlog record will be dropped ``` *************************** 6. row *************************** Name: db.t Type: table Id: 1737094589958 Dropped: false BinlogLength: 4 BinlogSize: 955 FirstBinlogCommittedTime: 1737094625988 ReadableFirstBinlogCommittedTime: 2025-01-17 14:17:05 LastBinlogCommittedTime: 1737094630182 ReadableLastBinlogCommittedTime: 2025-01-17 14:17:10 BinlogTtlSeconds: 86400 BinlogMaxBytes: 9223372036854775807 BinlogMaxHistoryNums: 9223372036854775807 ``` db, table disable binlog ``` *************************** 6. row *************************** Name: db.t Type: table Id: 1737094589958 Dropped: false BinlogLength: 1 BinlogSize: 0 FirstBinlogCommittedTime: NULL ReadableFirstBinlogCommittedTime: NULL LastBinlogCommittedTime: NULL ReadableLastBinlogCommittedTime: NULL BinlogTtlSeconds: 86400 BinlogMaxBytes: 9223372036854775807 BinlogMaxHistoryNums: 9223372036854775807 ``` --- .../apache/doris/binlog/BinlogManager.java | 2 + .../org/apache/doris/binlog/DBBinlog.java | 13 +- .../org/apache/doris/binlog/TableBinlog.java | 26 ++-- .../org/apache/doris/binlog/DbBinlogTest.java | 118 ++++++++++++++++++ .../doris/binlog/MockBinlogConfigCache.java | 5 + .../apache/doris/binlog/TableBinlogTest.java | 117 +++++++++++++++++ 6 files changed, 263 insertions(+), 18 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogManager.java b/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogManager.java index 4b65fbfc0dc4f8..69293ea6c00da0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogManager.java @@ -592,9 +592,11 @@ public List gc() { tombstones.add(dbTombstones); } } + return tombstones; } + public void replayGc(BinlogGcInfo binlogGcInfo) { lock.writeLock().lock(); Map gcDbBinlogMap; diff --git a/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java b/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java index 4a2a1544dc1ef6..2568640e5f2b09 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java @@ -341,17 +341,11 @@ public BinlogTombstone gc() { if (dbBinlogConfig == null) { LOG.error("db not found. dbId: {}", dbId); return null; - } - - BinlogTombstone tombstone; - if (dbBinlogConfig.isEnable()) { - // db binlog is enabled, only one binlogTombstones - tombstone = dbBinlogEnableGc(dbBinlogConfig); + } else if (!dbBinlogConfig.isEnable()) { + return dbBinlogDisableGc(); } else { - tombstone = dbBinlogDisableGc(); + return dbBinlogEnableGc(dbBinlogConfig); } - - return tombstone; } private BinlogTombstone collectTableTombstone(List tableTombstones, boolean isDbGc) { @@ -395,6 +389,7 @@ private BinlogTombstone dbBinlogDisableGc() { tombstones.add(tombstone); } } + BinlogTombstone tombstone = collectTableTombstone(tombstones, false); if (tombstone != null) { removeExpiredMetaData(tombstone.getCommitSeq()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/binlog/TableBinlog.java b/fe/fe-core/src/main/java/org/apache/doris/binlog/TableBinlog.java index 3ec6987b83dcd3..c81f023a645c6f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/binlog/TableBinlog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/binlog/TableBinlog.java @@ -244,8 +244,11 @@ public BinlogTombstone commitSeqGc(long expiredCommitSeq) { public BinlogTombstone gc() { // step 1: get expire time BinlogConfig tableBinlogConfig = binlogConfigCache.getTableBinlogConfig(dbId, tableId); + Boolean isCleanFullBinlog = false; if (tableBinlogConfig == null) { return null; + } else if (!tableBinlogConfig.isEnable()) { + isCleanFullBinlog = true; } long ttlSeconds = tableBinlogConfig.getTtlSeconds(); @@ -255,22 +258,27 @@ public BinlogTombstone gc() { LOG.info( "gc table binlog. dbId: {}, tableId: {}, expiredMs: {}, ttlSecond: {}, maxBytes: {}, " - + "maxHistoryNums: {}, now: {}", - dbId, tableId, expiredMs, ttlSeconds, maxBytes, maxHistoryNums, System.currentTimeMillis()); + + "maxHistoryNums: {}, now: {}, isCleanFullBinlog: {}", + dbId, tableId, expiredMs, ttlSeconds, maxBytes, maxHistoryNums, System.currentTimeMillis(), + isCleanFullBinlog); // step 2: get tombstoneUpsertBinlog and dummyBinlog Pair tombstoneInfo; lock.writeLock().lock(); try { - // find the last expired commit seq. long expiredCommitSeq = -1; - Iterator> timeIterator = timestamps.iterator(); - while (timeIterator.hasNext()) { - Pair entry = timeIterator.next(); - if (expiredMs < entry.second) { - break; + if (isCleanFullBinlog) { + expiredCommitSeq = binlogs.last().getCommitSeq(); + } else { + // find the last expired commit seq. + Iterator> timeIterator = timestamps.iterator(); + while (timeIterator.hasNext()) { + Pair entry = timeIterator.next(); + if (expiredMs < entry.second) { + break; + } + expiredCommitSeq = entry.first; } - expiredCommitSeq = entry.first; } final long lastExpiredCommitSeq = expiredCommitSeq; diff --git a/fe/fe-core/src/test/java/org/apache/doris/binlog/DbBinlogTest.java b/fe/fe-core/src/test/java/org/apache/doris/binlog/DbBinlogTest.java index 06230bfce568a8..939ae49f232239 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/binlog/DbBinlogTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/binlog/DbBinlogTest.java @@ -304,4 +304,122 @@ boolean isEnableTable(long dbId, long tableId) { } } } + + @Test + public void testDbAndTableGcWithDisable() { + // init base data + long expiredTime = baseNum + expiredBinlogNum; + Map ttlMap = Maps.newHashMap(); + for (int i = 0; i < tableNum; ++i) { + String key = String.format("%d_%d", dbId, baseTableId + i); + ttlMap.put(key, expiredTime); + } + MockBinlogConfigCache binlogConfigCache = BinlogTestUtils.newMockBinlogConfigCache(ttlMap); + // disable db binlog + binlogConfigCache.addDbBinlogConfig(dbId, false, 0L); + // disable some table binlog + for (int i = 0; i <= gcTableNum; i++) { + binlogConfigCache.addTableBinlogConfig(dbId, baseTableId + i, false, expiredTime); + } + + // init & add binlogs + List testBinlogs = Lists.newArrayList(); + Long[] tableLastCommitInfo = new Long[tableNum]; + long maxGcTableId = baseTableId + gcTableNum; + for (int i = 0; i < totalBinlogNum; ++i) { + long tableId = baseTableId + (i / tableNum); + long commitSeq = baseNum + i; + tableLastCommitInfo[i / tableNum] = commitSeq; + TBinlog binlog = BinlogTestUtils.newBinlog(dbId, tableId, commitSeq, baseNum); + testBinlogs.add(binlog); + } + + // init DbBinlog + DBBinlog dbBinlog = null; + + // insert binlogs + for (int i = 0; i < totalBinlogNum; ++i) { + if (dbBinlog == null) { + dbBinlog = new DBBinlog(binlogConfigCache, testBinlogs.get(i)); + } + dbBinlog.addBinlog(testBinlogs.get(i), null); + } + + // trigger gc + BinlogTombstone tombstone = dbBinlog.gc(); + + // check binlog status - all binlogs should be cleared for disabled tables + for (TBinlog binlog : testBinlogs) { + long tableId = binlog.getTableIds().get(0); + if (tableId <= maxGcTableId) { + // For disabled tables, all binlogs should be cleared + Assert.assertEquals(0, binlog.getTableRef()); + } else { + // For enabled tables, only expired binlogs should be cleared + if (binlog.getTimestamp() <= expiredTime) { + Assert.assertEquals(0, binlog.getTableRef()); + } else { + Assert.assertEquals(1, binlog.getTableRef()); + } + } + } + + // check tombstone + Assert.assertFalse(tombstone.isDbBinlogTomstone()); + Assert.assertEquals(baseNum + totalBinlogNum - 1, tombstone.getCommitSeq()); + } + + @Test + public void testDbAndTableGcWithEnable() { + // init base data + long expiredTime = baseNum + expiredBinlogNum; + Map ttlMap = Maps.newHashMap(); + for (int i = 0; i < tableNum; ++i) { + String key = String.format("%d_%d", dbId, baseTableId + i); + ttlMap.put(key, expiredTime); + } + MockBinlogConfigCache binlogConfigCache = BinlogTestUtils.newMockBinlogConfigCache(ttlMap); + // enable db binlog + binlogConfigCache.addDbBinlogConfig(dbId, true, expiredTime); + // enable all table binlog + for (int i = 0; i < tableNum; i++) { + binlogConfigCache.addTableBinlogConfig(dbId, baseTableId + i, true, expiredTime); + } + + // init & add binlogs + List testBinlogs = Lists.newArrayList(); + for (int i = 0; i < totalBinlogNum; ++i) { + long tableId = baseTableId + (i / tableNum); + long commitSeq = baseNum + i; + TBinlog binlog = BinlogTestUtils.newBinlog(dbId, tableId, commitSeq, baseNum + i); + testBinlogs.add(binlog); + } + + // init DbBinlog + DBBinlog dbBinlog = null; + + // insert binlogs + for (int i = 0; i < totalBinlogNum; ++i) { + if (dbBinlog == null) { + dbBinlog = new DBBinlog(binlogConfigCache, testBinlogs.get(i)); + } + dbBinlog.addBinlog(testBinlogs.get(i), null); + } + + // trigger gc + BinlogTombstone tombstone = dbBinlog.gc(); + + // check binlog status - only expired binlogs should be cleared + for (TBinlog binlog : testBinlogs) { + if (binlog.getTimestamp() <= expiredTime) { + Assert.assertEquals(0, binlog.getTableRef()); + } else { + Assert.assertEquals(1, binlog.getTableRef()); + } + } + + // check tombstone + Assert.assertTrue(tombstone.isDbBinlogTomstone()); + Assert.assertEquals(expiredTime, tombstone.getCommitSeq()); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/binlog/MockBinlogConfigCache.java b/fe/fe-core/src/test/java/org/apache/doris/binlog/MockBinlogConfigCache.java index 4622171e9300b9..d2720bf61d5b9f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/binlog/MockBinlogConfigCache.java +++ b/fe/fe-core/src/test/java/org/apache/doris/binlog/MockBinlogConfigCache.java @@ -34,6 +34,11 @@ public void addDbBinlogConfig(long dbId, boolean enableBinlog, long expiredTime) mockedConfigs.put(String.valueOf(dbId), config); } + public void addTableBinlogConfig(long dbId, long tableId, boolean enableBinlog, long expiredTime) { + BinlogConfig config = BinlogTestUtils.newTestBinlogConfig(enableBinlog, expiredTime); + mockedConfigs.put(String.format("%d_%d", dbId, tableId), config); + } + @Override public BinlogConfig getTableBinlogConfig(long dbId, long tableId) { return mockedConfigs.get(String.format("%d_%d", dbId, tableId)); diff --git a/fe/fe-core/src/test/java/org/apache/doris/binlog/TableBinlogTest.java b/fe/fe-core/src/test/java/org/apache/doris/binlog/TableBinlogTest.java index cd86c5935e16af..e55a1f252f0e29 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/binlog/TableBinlogTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/binlog/TableBinlogTest.java @@ -20,6 +20,7 @@ import org.apache.doris.thrift.TBinlog; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import mockit.Mock; import mockit.MockUp; import org.junit.Assert; @@ -27,6 +28,7 @@ import org.junit.Test; import java.util.List; +import java.util.Map; public class TableBinlogTest { private long dbId = 10000; @@ -139,4 +141,119 @@ public void testCommitSeqGc() { TBinlog dummy = tableBinlog.getDummyBinlog(); Assert.assertEquals(expiredCommitSeq, dummy.getCommitSeq()); } + + @Test + public void testTableGcBinlogWithDisable() { + // mock BinlogUtils + new MockUp() { + @Mock + public long getExpiredMs(long direct) { + return direct; + } + }; + Map ttlMap = Maps.newHashMap(); + + // init base data + long expiredTime = baseNum + expiredBinlogNum; + ttlMap.put(String.format("%d_%d", dbId, tableId), expiredTime); + + MockBinlogConfigCache binlogConfigCache = BinlogTestUtils.newMockBinlogConfigCache(ttlMap); + + // disable table binlog + binlogConfigCache.addTableBinlogConfig(dbId, tableId, false, expiredTime); + + // init & add binlogs + List testBinlogs = Lists.newArrayList(); + for (int i = 0; i < totalBinlogNum; ++i) { + TBinlog binlog = BinlogTestUtils.newBinlog(dbId, tableId, baseNum + i, baseNum + i); + testBinlogs.add(binlog); + } + + // init TableBinlog + TableBinlog tableBinlog = null; + + // insert binlogs + for (int i = 0; i < totalBinlogNum; ++i) { + if (tableBinlog == null) { + tableBinlog = new TableBinlog(binlogConfigCache, testBinlogs.get(i), dbId, tableId); + } + tableBinlog.addBinlog(testBinlogs.get(i)); + } + + // trigger gc + BinlogTombstone tombstone = tableBinlog.gc(); + + // check binlog status - all binlogs should be cleared when table binlog is disabled + for (TBinlog binlog : testBinlogs) { + Assert.assertEquals(0, binlog.getTableRef()); + } + + // check tombstone + Assert.assertFalse(tombstone.isDbBinlogTomstone()); + Assert.assertEquals(baseNum + totalBinlogNum - 1, tombstone.getCommitSeq()); + + // check dummy - should have the last commitSeq + TBinlog dummy = tableBinlog.getDummyBinlog(); + Assert.assertEquals(baseNum + totalBinlogNum - 1, dummy.getCommitSeq()); + } + + @Test + public void testTableGcBinlogWithEnable() { + // mock BinlogUtils + new MockUp() { + @Mock + public long getExpiredMs(long direct) { + return direct; + } + }; + Map ttlMap = Maps.newHashMap(); + + // init base data + long expiredTime = baseNum + expiredBinlogNum; + ttlMap.put(String.format("%d_%d", dbId, tableId), expiredTime); + + MockBinlogConfigCache binlogConfigCache = BinlogTestUtils.newMockBinlogConfigCache(ttlMap); + + // enable table binlog + binlogConfigCache.addTableBinlogConfig(dbId, tableId, true, expiredTime); + + // init & add binlogs + List testBinlogs = Lists.newArrayList(); + for (int i = 0; i < totalBinlogNum; ++i) { + TBinlog binlog = BinlogTestUtils.newBinlog(dbId, tableId, baseNum + i, baseNum + i); + testBinlogs.add(binlog); + } + + // init TableBinlog + TableBinlog tableBinlog = null; + + // insert binlogs + for (int i = 0; i < totalBinlogNum; ++i) { + if (tableBinlog == null) { + tableBinlog = new TableBinlog(binlogConfigCache, testBinlogs.get(i), dbId, tableId); + } + tableBinlog.addBinlog(testBinlogs.get(i)); + } + + // trigger gc + BinlogTombstone tombstone = tableBinlog.gc(); + + // check binlog status - only expired binlogs should be cleared + for (TBinlog binlog : testBinlogs) { + if (binlog.getTimestamp() <= expiredTime) { + Assert.assertEquals(0, binlog.getTableRef()); + } else { + Assert.assertEquals(1, binlog.getTableRef()); + } + } + + // check tombstone + Assert.assertFalse(tombstone.isDbBinlogTomstone()); + Assert.assertEquals(expiredTime, tombstone.getCommitSeq()); + + // check dummy - should have the expiredTime as commitSeq + TBinlog dummy = tableBinlog.getDummyBinlog(); + Assert.assertEquals(expiredTime, dummy.getCommitSeq()); + } + } From 62f3920fd09bc2515e8c7e420128cf3156a98c5e Mon Sep 17 00:00:00 2001 From: zhangdong Date: Tue, 21 Jan 2025 11:59:22 +0800 Subject: [PATCH 04/31] [fix](mtmv)Release snapshots to avoid keeping references in MTMVTask (#47149) --- .../java/org/apache/doris/job/extensions/mtmv/MTMVTask.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java index aa1bbe629fd597..219a37e1b6508c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java @@ -148,7 +148,7 @@ public enum MTMVTaskRefreshMode { private StmtExecutor executor; private Map partitionSnapshots; - private final Map snapshots = Maps.newHashMap(); + private Map snapshots = Maps.newHashMap(); public MTMVTask() { } @@ -455,6 +455,9 @@ protected void closeOrReleaseResources() { if (null != partitionSnapshots) { partitionSnapshots = null; } + if (null != snapshots) { + snapshots = null; + } } private Map getIncrementalTableMap() throws AnalysisException { From 8493e59d4e2bebfe6099b9febbbc802f9d34be9d Mon Sep 17 00:00:00 2001 From: lw112 <131352377+felixwluo@users.noreply.github.com> Date: Tue, 21 Jan 2025 14:44:35 +0800 Subject: [PATCH 05/31] [fix](func) Fix precision loss in ST_GeometryFromWKB coordinate parsing (#46661) ### What problem does this PR solve? Issue Number: close #46619 Root Cause: ``` 1. Unnecessary floating-point number conversions in coordinate handling: 1.1 converting double to string using absl::StrFormat 1.2 converting string back to double using std::stod each conversion caused precision loss. 2. Byte order handling issue in WKB parsing: 2.1 using machine endian before properly reading WKB byte order flag 2.2 this caused incorrect interpretation of coordinate values ``` Solution: ``` 1. Remove unnecessary coordinate value conversions: 1.1 directly use S2LatLng's degrees() value without string formatting 1.2 increase output precision in print_s2point to 15 digits 2. Fix WKB byte order handling: 2.1 read byte order flag first 2.2 set correct byte order before parsing coordinates ``` Result: before: ``` POINT (1.461652102e-231 3.34424828009e-59) ``` after: ``` POINT(117.194767000297 36.46326301008) ``` --- be/src/geo/geo_types.cpp | 2 +- be/src/geo/wkb_parse.cpp | 153 ++++++++++++------ be/src/geo/wkb_parse.h | 14 +- .../correctness_p0/test_select_constant.out | 2 +- .../nereids_function_p0/scalar_function/S.out | 48 +++--- .../data/nereids_p0/test_select_constant.out | 2 +- .../spatial_functions/test_gis_function.out | 15 ++ .../data/query_p0/test_select_constant.out | 2 +- .../test_gis_function.groovy | 6 + 9 files changed, 168 insertions(+), 76 deletions(-) diff --git a/be/src/geo/geo_types.cpp b/be/src/geo/geo_types.cpp index bee6f69ba8e816..dc27595da3bfc6 100644 --- a/be/src/geo/geo_types.cpp +++ b/be/src/geo/geo_types.cpp @@ -57,7 +57,7 @@ GeoCircle::~GeoCircle() = default; void print_s2point(std::ostream& os, const S2Point& point) { S2LatLng coord(point); - os << std::setprecision(12) << coord.lng().degrees() << " " << coord.lat().degrees(); + os << std::setprecision(15) << coord.lng().degrees() << " " << coord.lat().degrees(); } static inline bool is_valid_lng_lat(double lng, double lat) { diff --git a/be/src/geo/wkb_parse.cpp b/be/src/geo/wkb_parse.cpp index e24328d7564ac3..7b345929fc0979 100644 --- a/be/src/geo/wkb_parse.cpp +++ b/be/src/geo/wkb_parse.cpp @@ -122,108 +122,169 @@ WkbParseContext* WkbParse::read(std::istream& is, WkbParseContext* ctx) { auto size = is.tellg(); is.seekg(0, std::ios::beg); - std::vector buf(static_cast(size)); - is.read(reinterpret_cast(buf.data()), static_cast(size)); - - ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); // will default to machine endian + // Check if size is valid + if (size <= 0) { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; + } - ctx->shape = readGeometry(ctx).release(); + std::vector buf(static_cast(size)); + if (!is.read(reinterpret_cast(buf.data()), static_cast(size))) { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; + } - if (!ctx->shape) { + // Ensure we have at least one byte for byte order + if (buf.empty()) { ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; } - return ctx; -} -std::unique_ptr WkbParse::readGeometry(WkbParseContext* ctx) { - // determine byte order - unsigned char byteOrder = ctx->dis.readByte(); + // First read the byte order using machine endian + auto byteOrder = buf[0]; - // default is machine endian + // Create ByteOrderDataInStream with the correct byte order if (byteOrder == byteOrder::wkbNDR) { + ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE); } else if (byteOrder == byteOrder::wkbXDR) { + ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG); + } else { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; + } + + std::unique_ptr shape = readGeometry(ctx); + if (!shape) { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; } - uint32_t typeInt = ctx->dis.readUnsigned(); + ctx->shape = shape.release(); + return ctx; +} - uint32_t geometryType = (typeInt & 0xffff) % 1000; +std::unique_ptr WkbParse::readGeometry(WkbParseContext* ctx) { + try { + // Ensure we have enough data to read + if (ctx->dis.size() < 5) { // At least 1 byte for order and 4 bytes for type + return nullptr; + } - std::unique_ptr shape; + // Skip the byte order as we've already handled it + ctx->dis.readByte(); - switch (geometryType) { - case wkbType::wkbPoint: - shape.reset(readPoint(ctx).release()); - break; - case wkbType::wkbLine: - shape.reset(readLine(ctx).release()); - break; - case wkbType::wkbPolygon: - shape.reset(readPolygon(ctx).release()); - break; - default: + uint32_t typeInt = ctx->dis.readUnsigned(); + + // Check if geometry has SRID + bool has_srid = (typeInt & WKB_SRID_FLAG) != 0; + + // Read SRID if present + if (has_srid) { + ctx->dis.readUnsigned(); // Read and store SRID if needed + } + + // Get the base geometry type + uint32_t geometryType = typeInt & WKB_TYPE_MASK; + + std::unique_ptr shape; + + switch (geometryType) { + case wkbType::wkbPoint: + shape = readPoint(ctx); + break; + case wkbType::wkbLine: + shape = readLine(ctx); + break; + case wkbType::wkbPolygon: + shape = readPolygon(ctx); + break; + default: + return nullptr; + } + + return shape; + } catch (...) { + // Handle any exceptions from reading operations return nullptr; } - return shape; } std::unique_ptr WkbParse::readPoint(WkbParseContext* ctx) { GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx); - std::unique_ptr point = GeoPoint::create_unique(); + if (coords.list.empty()) { + return nullptr; + } - if (point->from_coord(coords.list[0]) == GEO_PARSE_OK) { - return point; - } else { + std::unique_ptr point = GeoPoint::create_unique(); + if (!point || point->from_coord(coords.list[0]) != GEO_PARSE_OK) { return nullptr; } + + return point; } std::unique_ptr WkbParse::readLine(WkbParseContext* ctx) { uint32_t size = ctx->dis.readUnsigned(); - minMemSize(wkbLine, size, ctx); + if (minMemSize(wkbLine, size, ctx) != GEO_PARSE_OK) { + return nullptr; + } GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx); - std::unique_ptr line = GeoLine::create_unique(); + if (coords.list.empty()) { + return nullptr; + } - if (line->from_coords(coords) == GEO_PARSE_OK) { - return line; - } else { + std::unique_ptr line = GeoLine::create_unique(); + if (!line || line->from_coords(coords) != GEO_PARSE_OK) { return nullptr; } + + return line; } std::unique_ptr WkbParse::readPolygon(WkbParseContext* ctx) { uint32_t num_loops = ctx->dis.readUnsigned(); - minMemSize(wkbPolygon, num_loops, ctx); + if (minMemSize(wkbPolygon, num_loops, ctx) != GEO_PARSE_OK) { + return nullptr; + } + GeoCoordinateListList coordss; - for (int i = 0; i < num_loops; ++i) { + for (uint32_t i = 0; i < num_loops; ++i) { uint32_t size = ctx->dis.readUnsigned(); - GeoCoordinateList* coords = new GeoCoordinateList(); + if (size < 3) { // A polygon loop must have at least 3 points + return nullptr; + } + + auto coords = std::make_unique(); *coords = WkbParse::readCoordinateList(size, ctx); - coordss.add(coords); + if (coords->list.empty()) { + return nullptr; + } + coordss.add(coords.release()); } std::unique_ptr polygon = GeoPolygon::create_unique(); - - if (polygon->from_coords(coordss) == GEO_PARSE_OK) { - return polygon; - } else { + if (!polygon || polygon->from_coords(coordss) != GEO_PARSE_OK) { return nullptr; } + + return polygon; } GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext* ctx) { GeoCoordinateList coords; for (uint32_t i = 0; i < size; i++) { - readCoordinate(ctx); + if (!readCoordinate(ctx)) { + return GeoCoordinateList(); + } unsigned int j = 0; GeoCoordinate coord; coord.x = ctx->ordValues[j++]; coord.y = ctx->ordValues[j++]; coords.add(coord); } - return coords; } diff --git a/be/src/geo/wkb_parse.h b/be/src/geo/wkb_parse.h index e03dddf97a3964..de6d0e9b2d97c9 100644 --- a/be/src/geo/wkb_parse.h +++ b/be/src/geo/wkb_parse.h @@ -17,8 +17,7 @@ #pragma once -#include - +#include #include #include @@ -34,6 +33,17 @@ class GeoLine; class GeoPoint; class GeoPolygon; +// WKB format constants +// According to OpenGIS Implementation Specification: +// The high bit of the type value is set to 1 if the WKB contains a SRID. +// Reference: OpenGIS Implementation Specification for Geographic information - Simple feature access - Part 1: Common architecture +// Bit mask to check if WKB contains SRID +constexpr uint32_t WKB_SRID_FLAG = 0x20000000; + +// The geometry type is stored in the least significant byte of the type value +// Bit mask to extract the base geometry type +constexpr uint32_t WKB_TYPE_MASK = 0xFF; + class WkbParse { public: static GeoParseStatus parse_wkb(std::istream& is, GeoShape** shape); diff --git a/regression-test/data/correctness_p0/test_select_constant.out b/regression-test/data/correctness_p0/test_select_constant.out index 33c56d3a22cb02..6737e373d16b89 100644 --- a/regression-test/data/correctness_p0/test_select_constant.out +++ b/regression-test/data/correctness_p0/test_select_constant.out @@ -3,7 +3,7 @@ 100 test 2021-01-02 -- !select_geo1 -- -POINT (123.123456789 89.123456789) +POINT (123.123456789012 89.123456789) -- !select2 -- 20230101 diff --git a/regression-test/data/nereids_function_p0/scalar_function/S.out b/regression-test/data/nereids_function_p0/scalar_function/S.out index 53a5a1639a747f..b346ae022a0808 100644 --- a/regression-test/data/nereids_function_p0/scalar_function/S.out +++ b/regression-test/data/nereids_function_p0/scalar_function/S.out @@ -1335,32 +1335,32 @@ POINT (98.35620117 36.939093) -- !sql_st_circle_Double_Double_Double -- \N -CIRCLE ((-46.35620117 39.939093), 3.32100009918) -CIRCLE ((-74.35620117 79.939093), 66.3209991455) -CIRCLE ((126.35620117 -39.939093), 5.30000019073) -CIRCLE ((16.35620117 19.939093), 7.32100009918) -CIRCLE ((43.35620117 35.939093), 2.32100009918) -CIRCLE ((47.35620117 26.939093), 33.3209991455) -CIRCLE ((5 5), 4.32100009918) -CIRCLE ((90.35620117 39.939093), 100.320999146) -CIRCLE ((90.35620117 47.939093), 88.3209991455) -CIRCLE ((90.35620117 49.939093), 76.3209991455) -CIRCLE ((90.35620117 59.939093), 75.3209991455) -CIRCLE ((98.35620117 36.939093), 45.3209991455) +CIRCLE ((-46.35620117 39.939093), 3.32100009918213) +CIRCLE ((-74.35620117 79.939093), 66.3209991455078) +CIRCLE ((126.35620117 -39.939093), 5.30000019073486) +CIRCLE ((16.35620117 19.939093), 7.32100009918213) +CIRCLE ((43.35620117 35.939093), 2.32100009918213) +CIRCLE ((47.35620117 26.939093), 33.3209991455078) +CIRCLE ((5 5), 4.32100009918213) +CIRCLE ((90.35620117 39.939093), 100.320999145508) +CIRCLE ((90.35620117 47.939093), 88.3209991455078) +CIRCLE ((90.35620117 49.939093), 76.3209991455078) +CIRCLE ((90.35620117 59.939093), 75.3209991455078) +CIRCLE ((98.35620117 36.939093), 45.3209991455078) -- !sql_st_circle_Double_Double_Double_notnull -- -CIRCLE ((-46.35620117 39.939093), 3.32100009918) -CIRCLE ((-74.35620117 79.939093), 66.3209991455) -CIRCLE ((126.35620117 -39.939093), 5.30000019073) -CIRCLE ((16.35620117 19.939093), 7.32100009918) -CIRCLE ((43.35620117 35.939093), 2.32100009918) -CIRCLE ((47.35620117 26.939093), 33.3209991455) -CIRCLE ((5 5), 4.32100009918) -CIRCLE ((90.35620117 39.939093), 100.320999146) -CIRCLE ((90.35620117 47.939093), 88.3209991455) -CIRCLE ((90.35620117 49.939093), 76.3209991455) -CIRCLE ((90.35620117 59.939093), 75.3209991455) -CIRCLE ((98.35620117 36.939093), 45.3209991455) +CIRCLE ((-46.35620117 39.939093), 3.32100009918213) +CIRCLE ((-74.35620117 79.939093), 66.3209991455078) +CIRCLE ((126.35620117 -39.939093), 5.30000019073486) +CIRCLE ((16.35620117 19.939093), 7.32100009918213) +CIRCLE ((43.35620117 35.939093), 2.32100009918213) +CIRCLE ((47.35620117 26.939093), 33.3209991455078) +CIRCLE ((5 5), 4.32100009918213) +CIRCLE ((90.35620117 39.939093), 100.320999145508) +CIRCLE ((90.35620117 47.939093), 88.3209991455078) +CIRCLE ((90.35620117 49.939093), 76.3209991455078) +CIRCLE ((90.35620117 59.939093), 75.3209991455078) +CIRCLE ((98.35620117 36.939093), 45.3209991455078) -- !sql_st_contains_Varchar_Varchar -- \N diff --git a/regression-test/data/nereids_p0/test_select_constant.out b/regression-test/data/nereids_p0/test_select_constant.out index cb391ffb1210fc..6faa26295a1359 100644 --- a/regression-test/data/nereids_p0/test_select_constant.out +++ b/regression-test/data/nereids_p0/test_select_constant.out @@ -3,5 +3,5 @@ 100 test 2021-01-02 -- !select_geo1 -- -POINT (123.123456789 89.123456789) +POINT (123.123456789012 89.123456789) diff --git a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out index db1b1ffcae52d3..59bc628249f030 100644 --- a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out +++ b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out @@ -113,3 +113,18 @@ LINESTRING (1 1, 2 2) -- !sql -- POLYGON ((114.104486 22.547119, 114.093758 22.547753, 114.096504 22.532057, 114.104229 22.539826, 114.106203 22.54268, 114.104486 22.547119)) +-- !sql -- +POINT (117.194767000297 36.46326301008) + +-- !sql -- +1.0 + +-- !sql -- +POINT (1.23456789 2.34567891) + +-- !sql -- +0.9999999999999 + +-- !sql -- +1.0000000000001 + diff --git a/regression-test/data/query_p0/test_select_constant.out b/regression-test/data/query_p0/test_select_constant.out index cb391ffb1210fc..6faa26295a1359 100644 --- a/regression-test/data/query_p0/test_select_constant.out +++ b/regression-test/data/query_p0/test_select_constant.out @@ -3,5 +3,5 @@ 100 test 2021-01-02 -- !select_geo1 -- -POINT (123.123456789 89.123456789) +POINT (123.123456789012 89.123456789) diff --git a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy index f76cb44cb4ad4b..81eadfb0cc039d 100644 --- a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy @@ -70,4 +70,10 @@ suite("test_gis_function", "arrow_flight_sql") { qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_GeometryFromText(\"LINESTRING (1 1, 2 2)\"))));" qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_Polygon(\"POLYGON ((114.104486 22.547119,114.093758 22.547753,114.096504 22.532057,114.104229 22.539826,114.106203 22.542680,114.104486 22.547119))\"))));" + qt_sql "SELECT ST_AsText(ST_GeometryFromWKB('01010000208A11000068270210774C5D40B8DECA334C3B4240'));" + qt_sql "SELECT ST_X(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1, 1))));" + qt_sql "SELECT ST_AsText(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1.23456789, 2.34567891))));" + qt_sql "SELECT ST_X(ST_Point(0.9999999999999, 1));" + qt_sql "SELECT ST_Y(ST_Point(1, 1.0000000000001));" + } From fb1ad0c8ec613abbb0a7ed22d007df1691b40d21 Mon Sep 17 00:00:00 2001 From: 924060929 Date: Tue, 21 Jan 2025 15:02:50 +0800 Subject: [PATCH 06/31] [fix](cache) fix sql cache throw npe in cloud mode (#47221) fix sql cache throw npe in cloud mode, when some partitions is dropped: ``` 2025-01-15 18:18:54,811 WARN (mysql-nio-pool-101426|288) [ConnectProcessor.handleQueryException():537] Process one query failed because unknown reason: java.lang.NullPointerException: Cannot invoke "org.apache.doris.cloud.catalog.CloudPartition.getDbId()" because "partition" is null at org.apache.doris.cloud.catalog.CloudPartition.getSnapshotVisibleVersion(CloudPartition.java:196) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.catalog.OlapTable.getVersionInBatchForCloudMode(OlapTable.java:1190) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.cache.CacheAnalyzer.buildCacheTableForOlapScanNode(CacheAnalyzer.java:700) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.cache.CacheAnalyzer.buildCacheTableList(CacheAnalyzer.java:512) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.cache.CacheAnalyzer.innerCheckCacheModeForNereids(CacheAnalyzer.java:412) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.cache.CacheAnalyzer.getCacheData(CacheAnalyzer.java:522) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.handleCacheStmt(StmtExecutor.java:1725) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.handleQueryStmt(StmtExecutor.java:1831) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.handleQueryWithRetry(StmtExecutor.java:874) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.executeByNereids(StmtExecutor.java:811) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:607) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.queryRetry(StmtExecutor.java:557) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:547) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.ConnectProcessor.executeQuery(ConnectProcessor.java:397) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.ConnectProcessor.handleQuery(ConnectProcessor.java:238) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.MysqlConnectProcessor.handleQuery(MysqlConnectProcessor.java:194) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.MysqlConnectProcessor.dispatch(MysqlConnectProcessor.java:222) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.MysqlConnectProcessor.processOnce(MysqlConnectProcessor.java:281) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.mysql.ReadListener.lambda$handleEvent$0(ReadListener.java:52) ~[doris-fe.jar:1.2-SNAPSHOT] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) ~[?:?] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) ~[?:?] at java.lang.Thread.run(Thread.java:840) ~[?:?] ``` This is an unstable exception, so I can not add test --- .../common/cache/NereidsSqlCacheManager.java | 53 ++++++------ .../apache/doris/qe/cache/CacheAnalyzer.java | 84 ++++++++++--------- 2 files changed, 70 insertions(+), 67 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSqlCacheManager.java b/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSqlCacheManager.java index bf82795450048a..ed9a9f0c00cd9f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSqlCacheManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/cache/NereidsSqlCacheManager.java @@ -228,37 +228,36 @@ private String normalizeSql(String sql) { private Optional tryParseSql( ConnectContext connectContext, String key, SqlCacheContext sqlCacheContext, UserIdentity currentUserIdentity, boolean checkUserVariable) { - Env env = connectContext.getEnv(); - - if (!tryLockTables(connectContext, env, sqlCacheContext)) { - return invalidateCache(key); - } + try { + Env env = connectContext.getEnv(); - // check table and view and their columns authority - if (privilegeChanged(connectContext, env, sqlCacheContext)) { - return invalidateCache(key); - } - if (tablesOrDataChanged(env, sqlCacheContext)) { - return invalidateCache(key); - } - if (viewsChanged(env, sqlCacheContext)) { - return invalidateCache(key); - } + if (!tryLockTables(connectContext, env, sqlCacheContext)) { + return invalidateCache(key); + } - LogicalEmptyRelation whateverPlan = new LogicalEmptyRelation(new RelationId(0), ImmutableList.of()); - if (nondeterministicFunctionChanged(whateverPlan, connectContext, sqlCacheContext)) { - return invalidateCache(key); - } + // check table and view and their columns authority + if (privilegeChanged(connectContext, env, sqlCacheContext)) { + return invalidateCache(key); + } + if (tablesOrDataChanged(env, sqlCacheContext)) { + return invalidateCache(key); + } + if (viewsChanged(env, sqlCacheContext)) { + return invalidateCache(key); + } - // table structure and data not changed, now check policy - if (rowPoliciesChanged(currentUserIdentity, env, sqlCacheContext)) { - return invalidateCache(key); - } - if (dataMaskPoliciesChanged(currentUserIdentity, env, sqlCacheContext)) { - return invalidateCache(key); - } + LogicalEmptyRelation whateverPlan = new LogicalEmptyRelation(new RelationId(0), ImmutableList.of()); + if (nondeterministicFunctionChanged(whateverPlan, connectContext, sqlCacheContext)) { + return invalidateCache(key); + } - try { + // table structure and data not changed, now check policy + if (rowPoliciesChanged(currentUserIdentity, env, sqlCacheContext)) { + return invalidateCache(key); + } + if (dataMaskPoliciesChanged(currentUserIdentity, env, sqlCacheContext)) { + return invalidateCache(key); + } Optional resultSetInFe = sqlCacheContext.getResultSetInFe(); List currentVariables = ImmutableList.of(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/cache/CacheAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/qe/cache/CacheAnalyzer.java index a6fd3bc8fcbfe6..daa45eeb806707 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/cache/CacheAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/cache/CacheAnalyzer.java @@ -467,54 +467,58 @@ private CacheMode innerCheckCacheModeForNereids(long now) { } private List buildCacheTableList() { - //Check the last version time of the table - MetricRepo.COUNTER_QUERY_TABLE.increase(1L); - long olapScanNodeSize = 0; - long hiveScanNodeSize = 0; - for (ScanNode scanNode : scanNodes) { - if (scanNode instanceof OlapScanNode) { - olapScanNodeSize++; - } else if (scanNode instanceof HiveScanNode) { - hiveScanNodeSize++; + try { + //Check the last version time of the table + MetricRepo.COUNTER_QUERY_TABLE.increase(1L); + long olapScanNodeSize = 0; + long hiveScanNodeSize = 0; + for (ScanNode scanNode : scanNodes) { + if (scanNode instanceof OlapScanNode) { + olapScanNodeSize++; + } else if (scanNode instanceof HiveScanNode) { + hiveScanNodeSize++; + } } - } - if (olapScanNodeSize > 0) { - MetricRepo.COUNTER_QUERY_OLAP_TABLE.increase(1L); - } - if (hiveScanNodeSize > 0) { - MetricRepo.COUNTER_QUERY_HIVE_TABLE.increase(1L); - } - - if (!(olapScanNodeSize == scanNodes.size() || hiveScanNodeSize == scanNodes.size())) { - if (LOG.isDebugEnabled()) { - LOG.debug("only support olap/hive table with non-federated query, other types are not supported now, " - + "queryId {}", DebugUtil.printId(queryId)); + if (olapScanNodeSize > 0) { + MetricRepo.COUNTER_QUERY_OLAP_TABLE.increase(1L); + } + if (hiveScanNodeSize > 0) { + MetricRepo.COUNTER_QUERY_HIVE_TABLE.increase(1L); } - return Collections.emptyList(); - } - List tblTimeList = Lists.newArrayList(); - for (int i = 0; i < scanNodes.size(); i++) { - ScanNode node = scanNodes.get(i); - if (enablePartitionCache() - && (node instanceof OlapScanNode) - && ((OlapScanNode) node).getSelectedPartitionNum() > 1 - && selectStmt != null - && selectStmt.hasGroupByClause()) { + if (!(olapScanNodeSize == scanNodes.size() || hiveScanNodeSize == scanNodes.size())) { if (LOG.isDebugEnabled()) { - LOG.debug("more than one partition scanned when qeury has agg, " - + "partition cache cannot use, queryid {}", - DebugUtil.printId(queryId)); + LOG.debug("only support olap/hive table with non-federated query, " + + "other types are not supported now, queryId {}", DebugUtil.printId(queryId)); } return Collections.emptyList(); } - CacheTable cTable = node instanceof OlapScanNode - ? buildCacheTableForOlapScanNode((OlapScanNode) node) - : buildCacheTableForHiveScanNode((HiveScanNode) node); - tblTimeList.add(cTable); + + List tblTimeList = Lists.newArrayList(); + for (int i = 0; i < scanNodes.size(); i++) { + ScanNode node = scanNodes.get(i); + if (enablePartitionCache() + && (node instanceof OlapScanNode) + && ((OlapScanNode) node).getSelectedPartitionNum() > 1 + && selectStmt != null + && selectStmt.hasGroupByClause()) { + if (LOG.isDebugEnabled()) { + LOG.debug("more than one partition scanned when qeury has agg, " + + "partition cache cannot use, queryid {}", + DebugUtil.printId(queryId)); + } + return Collections.emptyList(); + } + CacheTable cTable = node instanceof OlapScanNode + ? buildCacheTableForOlapScanNode((OlapScanNode) node) + : buildCacheTableForHiveScanNode((HiveScanNode) node); + tblTimeList.add(cTable); + } + Collections.sort(tblTimeList); + return tblTimeList; + } catch (Throwable t) { + return new ArrayList<>(); } - Collections.sort(tblTimeList); - return tblTimeList; } public InternalService.PFetchCacheResult getCacheData() throws UserException { From eab52d67edc8c48b1da06a69e58221d614883dcb Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Tue, 21 Jan 2025 15:25:58 +0800 Subject: [PATCH 07/31] [fix](external catalog) Persisting the External Catalog comment field (#46946) Since the comment field is not persisted, the comment will be lost after the FE is restarted after the comment is modified. This PR adds persistence to the comment. --- .../doris/datasource/ExternalCatalog.java | 4 ++- .../doris/datasource/ExternalCatalogTest.java | 33 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java index 633113f27cfebe..b1eb8ab6afe0f3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java @@ -139,6 +139,9 @@ public abstract class ExternalCatalog // to tableAutoAnalyzePolicy @SerializedName(value = "taap") protected Map, String> tableAutoAnalyzePolicy = Maps.newHashMap(); + @SerializedName(value = "comment") + private String comment; + // db name does not contains "default_cluster" protected Map dbNameToId = Maps.newConcurrentMap(); private boolean objectCreated = false; @@ -147,7 +150,6 @@ public abstract class ExternalCatalog protected TransactionManager transactionManager; private ExternalSchemaCache schemaCache; - private String comment; // A cached and being converted properties for external catalog. // generated from catalog properties. private byte[] propLock = new byte[0]; diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/ExternalCatalogTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/ExternalCatalogTest.java index f8e72c366b55f7..67d1e2d7ab4f31 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/ExternalCatalogTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/ExternalCatalogTest.java @@ -276,4 +276,37 @@ public void testSerialization() throws Exception { dis.close(); file.delete(); } + + @Test + public void testSerializationWithComment() throws Exception { + MetaContext metaContext = new MetaContext(); + metaContext.setMetaVersion(FeMetaVersion.VERSION_CURRENT); + metaContext.setThreadLocalInfo(); + + // 1. Write objects to file + File file = new File("./external_catalog_with_comment_test.dat"); + file.createNewFile(); + DataOutputStream dos = new DataOutputStream(Files.newOutputStream(file.toPath())); + + TestExternalCatalog ctl = (TestExternalCatalog) mgr.getCatalog("test1"); + String testComment = "This is a test comment for serialization"; + ctl.setComment(testComment); // Set a custom comment value + ctl.write(dos); + dos.flush(); + dos.close(); + + // 2. Read objects from file + DataInputStream dis = new DataInputStream(Files.newInputStream(file.toPath())); + + TestExternalCatalog ctl2 = (TestExternalCatalog) ExternalCatalog.read(dis); + Configuration conf = ctl2.getConfiguration(); + Assertions.assertNotNull(conf); + + // Verify the comment is properly serialized and deserialized + Assertions.assertEquals(testComment, ctl2.getComment()); + + // 3. delete files + dis.close(); + file.delete(); + } } From 117cb96b0a8416bb6acefac3645b31104c33b001 Mon Sep 17 00:00:00 2001 From: zgxme Date: Tue, 21 Jan 2025 16:48:12 +0800 Subject: [PATCH 08/31] [fix](docker) solve kerberos docker conflict (#47260) --- docker/thirdparties/run-thirdparties-docker.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/thirdparties/run-thirdparties-docker.sh b/docker/thirdparties/run-thirdparties-docker.sh index a581baa6cfa1e7..6e3561706f7e75 100755 --- a/docker/thirdparties/run-thirdparties-docker.sh +++ b/docker/thirdparties/run-thirdparties-docker.sh @@ -597,7 +597,6 @@ start_kerberos() { sudo chmod a+w /etc/hosts sudo sed -i "1i${IP_HOST} hadoop-master" /etc/hosts sudo sed -i "1i${IP_HOST} hadoop-master-2" /etc/hosts - sudo cp "${ROOT}"/docker-compose/kerberos/kerberos.yaml.tpl "${ROOT}"/docker-compose/kerberos/kerberos.yaml sudo docker compose -f "${ROOT}"/docker-compose/kerberos/kerberos.yaml down sudo rm -rf "${ROOT}"/docker-compose/kerberos/data if [[ "${STOP}" -ne 1 ]]; then @@ -605,7 +604,7 @@ start_kerberos() { rm -rf "${ROOT}"/docker-compose/kerberos/two-kerberos-hives/*.keytab rm -rf "${ROOT}"/docker-compose/kerberos/two-kerberos-hives/*.jks rm -rf "${ROOT}"/docker-compose/kerberos/two-kerberos-hives/*.conf - sudo docker compose -f "${ROOT}"/docker-compose/kerberos/kerberos.yaml up -d --wait + sudo docker compose -f "${ROOT}"/docker-compose/kerberos/kerberos.yaml up --remove-orphans --wait -d sudo rm -f /keytabs sudo ln -s "${ROOT}"/docker-compose/kerberos/two-kerberos-hives /keytabs sudo cp "${ROOT}"/docker-compose/kerberos/common/conf/doris-krb5.conf /keytabs/krb5.conf From 0115e2ab6700582bf7f76537cb4edfc539bc3772 Mon Sep 17 00:00:00 2001 From: Kaijie Chen Date: Tue, 21 Jan 2025 17:23:41 +0800 Subject: [PATCH 09/31] [test](p2) fix case test_s3_load_properties (#47226) ### What problem does this PR solve? Issue Number: DORIS-17985 Related PR: #41788 #43989 #44285 Problem Summary: Fix test_s3_load_properties, which was incorrectly changed previously. --- .../broker_load/test_s3_load_properties.out | 37 +++++++++++++++---- .../test_s3_load_properties.groovy | 32 ++++++++-------- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/regression-test/data/load_p2/broker_load/test_s3_load_properties.out b/regression-test/data/load_p2/broker_load/test_s3_load_properties.out index 8608a3107fee3f..4275696e02fdca 100644 --- a/regression-test/data/load_p2/broker_load/test_s3_load_properties.out +++ b/regression-test/data/load_p2/broker_load/test_s3_load_properties.out @@ -350,15 +350,30 @@ -- !select -- 48 +-- !select -- +280 + -- !select -- 48 -- !select -- -280 +48 -- !select -- 48 +-- !select -- +234 + +-- !select -- +18 + +-- !select -- +18 + +-- !select -- +300 + -- !select -- 48 @@ -366,19 +381,25 @@ 48 -- !select -- -47 +48 -- !select -- -47 +252 + +-- !select -- +18 + +-- !select -- +18 -- !select -- 48 -- !select -- -216 +320 -- !select -- -280 +48 -- !select -- 48 @@ -396,7 +417,10 @@ 48 -- !select -- -216 +252 + +-- !select -- +320 -- !select -- 18 @@ -409,4 +433,3 @@ -- !select -- 47 - diff --git a/regression-test/suites/load_p2/broker_load/test_s3_load_properties.groovy b/regression-test/suites/load_p2/broker_load/test_s3_load_properties.groovy index fdc6971f475fac..d0bef21f226fd6 100644 --- a/regression-test/suites/load_p2/broker_load/test_s3_load_properties.groovy +++ b/regression-test/suites/load_p2/broker_load/test_s3_load_properties.groovy @@ -57,9 +57,22 @@ suite("test_s3_load_properties", "p2") { /* ========================================================== normal ========================================================== */ for (String table : basicTables) { - attributesList.add(new LoadAttributes("s3://${s3BucketName}/regression/load/data/basic_data.csv", + def attributes = new LoadAttributes("s3://${s3BucketName}/regression/load/data/basic_data.csv", "${table}", "LINES TERMINATED BY \"\n\"", "COLUMNS TERMINATED BY \"|\"", "FORMAT AS \"CSV\"", "(k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18)", - "", "", "", "", "")) + "", "", "", "", "") + // 'use_new_load_scan_node' is deprecated and its value will be ignored. Testing backward compatibility. + switch (table) { + case 'dup_tbl_basic': + attributes.addProperties("use_new_load_scan_node", "false") + break + case 'uniq_tbl_basic': + attributes.addProperties("use_new_load_scan_node", "true") + break + default: + // omit this property + break + } + attributesList.add(attributes) } attributesList.add(new LoadAttributes("s3://${s3BucketName}/regression/load/data/basic_data.csv", @@ -187,19 +200,6 @@ suite("test_s3_load_properties", "p2") { // "", "", "", "","").addProperties("skip_lines", "10")) // } - /* ========================================================== deprecated properties ========================================================== */ - for (String table : basicTables) { - attributesList.add(new LoadAttributes("s3://${s3BucketName}/regression/load/data/basic_data.csv", - "${table}", "LINES TERMINATED BY \"\n\"", "COLUMNS TERMINATED BY \"|\"", "FORMAT AS \"CSV\"", "(k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18)", - "", "", "", "", "")).addProperties("use_new_load_scan_node", "true") - } - - for (String table : basicTables) { - attributesList.add(new LoadAttributes("s3://${s3BucketName}/regression/load/data/basic_data.csv", - "${table}", "LINES TERMINATED BY \"\n\"", "COLUMNS TERMINATED BY \"|\"", "FORMAT AS \"CSV\"", "(k00,k01,k02,k03,k04,k05,k06,k07,k08,k09,k10,k11,k12,k13,k14,k15,k16,k17,k18)", - "", "", "", "", "")).addProperties("use_new_load_scan_node", "false") - } - /* ========================================================== wrong column sep ========================================================== */ for (String table : basicTables) { attributesList.add(new LoadAttributes("s3://${s3BucketName}/regression/load/data/basic_data.csv", @@ -538,7 +538,7 @@ suite("test_s3_load_properties", "p2") { """ logger.info("submit sql: ${sql_str}"); sql """${sql_str}""" - logger.info("Submit load with lable: $label, table: $attributes.dataDesc.tableName, path: $attributes.dataDesc.path") + logger.info("Submit load with label: $label, table: $attributes.dataDesc.tableName, path: $attributes.dataDesc.path") def max_try_milli_secs = 600000 while (max_try_milli_secs > 0) { From d0f9dafcd2bc33931f0edb1d8332e3d1febd57ab Mon Sep 17 00:00:00 2001 From: yujun Date: Tue, 21 Jan 2025 17:46:37 +0800 Subject: [PATCH 10/31] [opt](nereids) simplify cast date like from high scale to small scale (#47036) ### What problem does this PR solve? Problem Summary: PR do two things: 1. simplify cast date like from high scale to small scale. example: ``` # suppose c_datetime_2 is datetime(2) cast(c_datetime_2 as date) = '2020-12-20' ---> c_datetime_2 >= '2020-12-20 00:00:00.00' and c_datetime_2 <= '2020-12-20 23:59:59.99' cast(c_datetime_2 as datetime(1)) > '2020-12-20 12:34:56.7' --> c_datetime_2 > '2020-12-20 12:34:56.79' ``` 2. handle compare date like with overflow. before this PR, it will not simplify if met overflow, but this PR will simplify too. example: ``` # suppose c_datetime_2 is datetime(2), c_date is date cast(c_date as datetime) < '9999-12-31 23:59:59' --> c_date <= '9999-12-31' cast(c_date as datetime) >= '9999-12-31 23:59:59' --> c_date > '9999-12-31' cast(c_datetime_2 as datetime(3)) < '9999-12-31 23:59:59.995' --> c_datetime_2 <= '9999-12-31 23:59:59.99' cast(c_datetime_2 as datetime(3)) >= '9999-12-31 23:59:59.995' --> c_datetime_2 > '9999-12-31 23:59:59.99' ``` --- .../rules/SimplifyComparisonPredicate.java | 250 +++++++++++++----- .../SimplifyComparisonPredicateTest.java | 142 +++++++++- .../extend_infer_equal_predicate.out | 2 +- .../month_quarter_cast_in_prune.groovy | 6 +- 4 files changed, 322 insertions(+), 78 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicate.java index e2bba0173b0e9d..43ba84fb7a461f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicate.java @@ -24,14 +24,17 @@ import org.apache.doris.nereids.rules.expression.ExpressionPatternRuleFactory; import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext; import org.apache.doris.nereids.rules.expression.ExpressionRuleType; +import org.apache.doris.nereids.trees.expressions.And; import org.apache.doris.nereids.trees.expressions.Cast; import org.apache.doris.nereids.trees.expressions.ComparisonPredicate; import org.apache.doris.nereids.trees.expressions.EqualTo; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.GreaterThan; import org.apache.doris.nereids.trees.expressions.GreaterThanEqual; +import org.apache.doris.nereids.trees.expressions.IsNull; import org.apache.doris.nereids.trees.expressions.LessThan; import org.apache.doris.nereids.trees.expressions.LessThanEqual; +import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.NullSafeEqual; import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral; @@ -60,6 +63,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import java.math.BigDecimal; import java.math.RoundingMode; @@ -117,22 +121,46 @@ public static Expression simplify(ComparisonPredicate cp) { return result; } + private static Expression processDateLikeTypeCoercion(ComparisonPredicate cp, Expression left, Expression right) { + if (left instanceof Cast && right instanceof DateLiteral + && ((Cast) left).getDataType().equals(right.getDataType())) { + Cast cast = (Cast) left; + if (cast.child().getDataType() instanceof DateTimeType + || cast.child().getDataType() instanceof DateTimeV2Type) { + // right is datetime + if (right instanceof DateTimeV2Literal) { + return processDateTimeLikeComparisonPredicateDateTimeV2Literal( + cp, cast.child(), (DateTimeV2Literal) right); + } + // right is date, not datetime + if (!(right instanceof DateTimeLiteral)) { + return processDateTimeLikeComparisonPredicateDateLiteral( + cp, cast.child(), (DateLiteral) right); + } + } + + // datetime to datev2 + if (cast.child().getDataType() instanceof DateType || cast.child().getDataType() instanceof DateV2Type) { + return processDateLikeComparisonPredicateDateLiteral(cp, cast.child(), (DateLiteral) right); + } + } + + return cp; + } + + // process cast(datetime as datetime) cmp datetime private static Expression processDateTimeLikeComparisonPredicateDateTimeV2Literal( ComparisonPredicate comparisonPredicate, Expression left, DateTimeV2Literal right) { DataType leftType = left.getDataType(); - int toScale = 0; - if (leftType instanceof DateTimeType) { - toScale = 0; - } else if (leftType instanceof DateTimeV2Type) { - toScale = ((DateTimeV2Type) leftType).getScale(); - } else { + if (!(leftType instanceof DateTimeType) && !(leftType instanceof DateTimeV2Type)) { return comparisonPredicate; } + int toScale = leftType instanceof DateTimeV2Type ? ((DateTimeV2Type) leftType).getScale() : 0; DateTimeV2Type rightType = right.getDataType(); if (toScale < rightType.getScale()) { if (comparisonPredicate instanceof EqualTo) { long originValue = right.getMicroSecond(); - right = right.roundCeiling(toScale); + right = right.roundFloor(toScale); if (right.getMicroSecond() != originValue) { // TODO: the ideal way is to return an If expr like: // return new If(new IsNull(left), new NullLiteral(BooleanType.INSTANCE), @@ -144,7 +172,7 @@ private static Expression processDateTimeLikeComparisonPredicateDateTimeV2Litera } } else if (comparisonPredicate instanceof NullSafeEqual) { long originValue = right.getMicroSecond(); - right = right.roundCeiling(toScale); + right = right.roundFloor(toScale); if (right.getMicroSecond() != originValue) { return BooleanLiteral.of(false); } @@ -153,76 +181,156 @@ private static Expression processDateTimeLikeComparisonPredicateDateTimeV2Litera right = right.roundFloor(toScale); } else if (comparisonPredicate instanceof LessThan || comparisonPredicate instanceof GreaterThanEqual) { - right = right.roundCeiling(toScale); + try { + right = right.roundCeiling(toScale); + } catch (AnalysisException e) { + // '9999-12-31 23:59:59.9'.roundCeiling(0) overflow + DateTimeLiteral newRight = right.roundFloor(toScale); + if (leftType instanceof DateTimeType) { + newRight = migrateToDateTime((DateTimeV2Literal) newRight); + } + if (comparisonPredicate instanceof LessThan) { + return new LessThanEqual(left, newRight); + } else { + return new GreaterThan(left, newRight); + } + } } else { return comparisonPredicate; } Expression newRight = leftType instanceof DateTimeType ? migrateToDateTime(right) : right; return comparisonPredicate.withChildren(left, newRight); - } else { - if (leftType instanceof DateTimeType) { - return comparisonPredicate.withChildren(left, migrateToDateTime(right)); - } else { - return comparisonPredicate; + } else if (toScale > rightType.getScale()) { + // when toScale > right's scale, then left must be datetimev2, not datetimev1 + Preconditions.checkArgument(leftType instanceof DateTimeV2Type, leftType); + + // for expression cast(left as datetime(2)) = '2020-12-20 01:02:03.45' + // then left scale is 5, right = '2020-12-20 01:02:03.45", right scale is 2, + // then left >= '2020-12-20 01:02:03.45000' && left <= '2020-12-20 01:02:03.45999' + // for low bound, it add (5-2) '0' to the origin right's tail + // for up bound, it add (5-2) '9' to the origin right's tail + // when roundFloor to high scale, its microsecond shouldn't change, only change its data type. + DateTimeV2Literal lowBound = right.roundFloor(toScale); + long upMicroSecond = 0; + for (int i = 0; i < toScale - rightType.getScale(); i++) { + upMicroSecond = 10 * upMicroSecond + 9; } + upMicroSecond *= (int) Math.pow(10, 6 - toScale); + upMicroSecond += lowBound.getMicroSecond(); + // left must be a datetimev2 + DateTimeV2Literal upBound = new DateTimeV2Literal((DateTimeV2Type) leftType, + right.getYear(), right.getMonth(), right.getDay(), + right.getHour(), right.getMinute(), right.getSecond(), upMicroSecond); + + if (comparisonPredicate instanceof GreaterThanEqual || comparisonPredicate instanceof LessThan) { + return comparisonPredicate.withChildren(left, lowBound); + } + + if (comparisonPredicate instanceof GreaterThan || comparisonPredicate instanceof LessThanEqual) { + return comparisonPredicate.withChildren(left, upBound); + } + + if (comparisonPredicate instanceof EqualTo || comparisonPredicate instanceof NullSafeEqual) { + List conjunctions = Lists.newArrayListWithExpectedSize(3); + conjunctions.add(new GreaterThanEqual(left, lowBound)); + conjunctions.add(new LessThanEqual(left, upBound)); + if (left.nullable() && comparisonPredicate instanceof NullSafeEqual) { + conjunctions.add(new Not(new IsNull(left))); + } + return new And(conjunctions); + } + } + + if (leftType instanceof DateTimeType) { + return comparisonPredicate.withChildren(left, migrateToDateTime(right)); + } else { + return comparisonPredicate; } } - private static Expression processDateLikeTypeCoercion(ComparisonPredicate cp, Expression left, Expression right) { - if (left instanceof Cast && right instanceof DateLiteral) { - Cast cast = (Cast) left; - if (cast.child().getDataType() instanceof DateTimeType - || cast.child().getDataType() instanceof DateTimeV2Type) { - if (right instanceof DateTimeV2Literal) { - try { - return processDateTimeLikeComparisonPredicateDateTimeV2Literal( - cp, cast.child(), (DateTimeV2Literal) right); - } catch (AnalysisException e) { - // '9999-12-31 23:59:59.9'.roundCeiling(0) overflow - return cp; - } - } + // process cast(datetime as date) cmp date + private static Expression processDateTimeLikeComparisonPredicateDateLiteral( + ComparisonPredicate comparisonPredicate, Expression left, DateLiteral right) { + DataType leftType = left.getDataType(); + if (!(leftType instanceof DateTimeType) && !(leftType instanceof DateTimeV2Type)) { + return comparisonPredicate; + } + if (right instanceof DateTimeLiteral) { + return comparisonPredicate; + } + + DateTimeLiteral lowBound = null; + DateTimeLiteral upBound = null; + if (leftType instanceof DateTimeType) { + lowBound = new DateTimeLiteral(right.getYear(), right.getMonth(), right.getDay(), 0, 0, 0); + upBound = new DateTimeLiteral(right.getYear(), right.getMonth(), right.getDay(), 23, 59, 59); + } else { + long upMicroSecond = 0; + for (int i = 0; i < ((DateTimeV2Type) leftType).getScale(); i++) { + upMicroSecond = 10 * upMicroSecond + 9; } + upMicroSecond *= (int) Math.pow(10, 6 - ((DateTimeV2Type) leftType).getScale()); + lowBound = new DateTimeV2Literal((DateTimeV2Type) leftType, + right.getYear(), right.getMonth(), right.getDay(), 0, 0, 0, 0); + upBound = new DateTimeV2Literal((DateTimeV2Type) leftType, + right.getYear(), right.getMonth(), right.getDay(), 23, 59, 59, upMicroSecond); + } - // datetime to datev2 - if (cast.child().getDataType() instanceof DateType || cast.child().getDataType() instanceof DateV2Type) { - if (right instanceof DateTimeLiteral) { - DateTimeLiteral dateTimeLiteral = (DateTimeLiteral) right; - right = migrateToDateV2(dateTimeLiteral); - if (dateTimeLiteral.getHour() != 0 || dateTimeLiteral.getMinute() != 0 - || dateTimeLiteral.getSecond() != 0 || dateTimeLiteral.getMicroSecond() != 0) { - if (cp instanceof EqualTo) { - return ExpressionUtils.falseOrNull(cast.child()); - } else if (cp instanceof NullSafeEqual) { - return BooleanLiteral.FALSE; - } else if (cp instanceof GreaterThanEqual || cp instanceof LessThan) { - // '9999-12-31' + 1 will overflow - if (DateLiteral.isDateOutOfRange(((DateV2Literal) right).toJavaDateType().plusDays(1))) { - return cp; - } - right = ((DateV2Literal) right).plusDays(1); - } - } - if (cast.child().getDataType() instanceof DateV2Type) { - left = cast.child(); - } - } + if (comparisonPredicate instanceof GreaterThanEqual || comparisonPredicate instanceof LessThan) { + return comparisonPredicate.withChildren(left, lowBound); + } + if (comparisonPredicate instanceof GreaterThan || comparisonPredicate instanceof LessThanEqual) { + return comparisonPredicate.withChildren(left, upBound); + } + + if (comparisonPredicate instanceof EqualTo || comparisonPredicate instanceof NullSafeEqual) { + List conjunctions = Lists.newArrayListWithExpectedSize(3); + conjunctions.add(new GreaterThanEqual(left, lowBound)); + conjunctions.add(new LessThanEqual(left, upBound)); + if (left.nullable() && comparisonPredicate instanceof NullSafeEqual) { + conjunctions.add(new Not(new IsNull(left))); } + return new And(conjunctions); + } + + return comparisonPredicate; + } - // datev2 to date - if (cast.child().getDataType() instanceof DateType) { - if (right instanceof DateV2Literal) { - left = cast.child(); - right = migrateToDate((DateV2Literal) right); + // process cast(date as datetime/date) cmp datetime/date + private static Expression processDateLikeComparisonPredicateDateLiteral( + ComparisonPredicate comparisonPredicate, Expression left, DateLiteral right) { + if (!(left.getDataType() instanceof DateType) && !(left.getDataType() instanceof DateV2Type)) { + return comparisonPredicate; + } + if (right instanceof DateTimeLiteral) { + DateTimeLiteral dateTimeLiteral = (DateTimeLiteral) right; + right = migrateToDateV2(dateTimeLiteral); + if (dateTimeLiteral.getHour() != 0 || dateTimeLiteral.getMinute() != 0 + || dateTimeLiteral.getSecond() != 0 || dateTimeLiteral.getMicroSecond() != 0) { + if (comparisonPredicate instanceof EqualTo) { + return ExpressionUtils.falseOrNull(left); + } else if (comparisonPredicate instanceof NullSafeEqual) { + return BooleanLiteral.FALSE; + } else if (comparisonPredicate instanceof GreaterThanEqual + || comparisonPredicate instanceof LessThan) { + // '9999-12-31' + 1 will overflow + if (DateLiteral.isDateOutOfRange(right.toJavaDateType().plusDays(1))) { + right = convertDateLiteralToDateType(right, left.getDataType()); + if (comparisonPredicate instanceof GreaterThanEqual) { + return new GreaterThan(left, right); + } else { + return new LessThanEqual(left, right); + } + } + right = (DateLiteral) right.plusDays(1); } } } - - if (left != cp.left() || right != cp.right()) { - return cp.withChildren(left, right); - } else { - return cp; + right = convertDateLiteralToDateType(right, left.getDataType()); + if (right != comparisonPredicate.right()) { + return comparisonPredicate.withChildren(left, right); } + return comparisonPredicate; } private static Expression processFloatLikeTypeCoercion(ComparisonPredicate comparisonPredicate, @@ -475,15 +583,29 @@ private static IntegerLikeLiteral convertDecimalToIntegerLikeLiteral(BigDecimal } } - private static Expression migrateToDateTime(DateTimeV2Literal l) { + private static DateLiteral convertDateLiteralToDateType(DateLiteral l, DataType dateType) { + if (dateType instanceof DateType) { + if (l instanceof DateV2Literal) { + return migrateToDate((DateV2Literal) l); + } + } + if (dateType instanceof DateV2Type) { + if (!(l instanceof DateV2Literal)) { + return migrateToDateV2(l); + } + } + return l; + } + + private static DateTimeLiteral migrateToDateTime(DateTimeV2Literal l) { return new DateTimeLiteral(l.getYear(), l.getMonth(), l.getDay(), l.getHour(), l.getMinute(), l.getSecond()); } - private static Expression migrateToDateV2(DateTimeLiteral l) { + private static DateV2Literal migrateToDateV2(DateLiteral l) { return new DateV2Literal(l.getYear(), l.getMonth(), l.getDay()); } - private static Expression migrateToDate(DateV2Literal l) { + private static DateLiteral migrateToDate(DateV2Literal l) { return new DateLiteral(l.getYear(), l.getMonth(), l.getDay()); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicateTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicateTest.java index b4b5877c07d4ea..a4faf95146eb72 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicateTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicateTest.java @@ -30,6 +30,7 @@ import org.apache.doris.nereids.trees.expressions.IsNull; import org.apache.doris.nereids.trees.expressions.LessThan; import org.apache.doris.nereids.trees.expressions.LessThanEqual; +import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.NullSafeEqual; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; @@ -302,23 +303,144 @@ void testDateTimeV2CmpDateTimeV2() { assertRewrite(new GreaterThan(new Cast(datetimev1, DateTimeV2Type.of(1)), new DateTimeV2Literal("0000-01-01 00:00:00.1")), new GreaterThan(datetimev1, new DateTimeLiteral("0000-01-01 00:00:00"))); - // test overflow, not cast assertRewrite(new LessThan(new Cast(date, DateTimeType.INSTANCE), new DateTimeLiteral("9999-12-31 23:59:59")), - new LessThan(new Cast(date, DateTimeType.INSTANCE), new DateTimeLiteral("9999-12-31 23:59:59"))); + new LessThanEqual(date, new DateV2Literal("9999-12-31"))); assertRewrite(new LessThan(new Cast(date, DateTimeV2Type.SYSTEM_DEFAULT), new DateTimeV2Literal("9999-12-31 23:59:59")), - new LessThan(new Cast(date, DateTimeV2Type.SYSTEM_DEFAULT), new DateTimeV2Literal("9999-12-31 23:59:59"))); + new LessThanEqual(date, new DateV2Literal("9999-12-31"))); assertRewrite(new LessThan(new Cast(datev1, DateTimeType.INSTANCE), new DateTimeLiteral("9999-12-31 23:59:59")), - new LessThan(new Cast(datev1, DateTimeType.INSTANCE), new DateTimeLiteral("9999-12-31 23:59:59"))); + new LessThanEqual(datev1, new DateLiteral("9999-12-31"))); assertRewrite(new LessThan(new Cast(datev1, DateTimeV2Type.SYSTEM_DEFAULT), new DateTimeV2Literal("9999-12-31 23:59:59")), - new LessThan(new Cast(datev1, DateTimeV2Type.SYSTEM_DEFAULT), new DateTimeV2Literal("9999-12-31 23:59:59"))); + new LessThanEqual(datev1, new DateLiteral("9999-12-31"))); + assertRewrite(new GreaterThanEqual(new Cast(date, DateTimeType.INSTANCE), new DateTimeLiteral("9999-12-31 23:59:59")), + new GreaterThan(date, new DateV2Literal("9999-12-31"))); + assertRewrite(new GreaterThanEqual(new Cast(date, DateTimeV2Type.SYSTEM_DEFAULT), new DateTimeV2Literal("9999-12-31 23:59:59")), + new GreaterThan(date, new DateV2Literal("9999-12-31"))); + assertRewrite(new GreaterThanEqual(new Cast(datev1, DateTimeType.INSTANCE), new DateTimeLiteral("9999-12-31 23:59:59")), + new GreaterThan(datev1, new DateLiteral("9999-12-31"))); + assertRewrite(new GreaterThanEqual(new Cast(datev1, DateTimeV2Type.SYSTEM_DEFAULT), new DateTimeV2Literal("9999-12-31 23:59:59")), + new GreaterThan(datev1, new DateLiteral("9999-12-31"))); + + // test from high datetime to low datetime or date + assertRewrite(new GreaterThan(new Cast(datetime0, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new GreaterThan(datetime0, new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 23:59:59"))); + assertRewrite(new GreaterThanEqual(new Cast(datetime0, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new GreaterThanEqual(datetime0, new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 00:00:00"))); + assertRewrite(new LessThan(new Cast(datetime0, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new LessThan(datetime0, new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 00:00:00"))); + assertRewrite(new LessThanEqual(new Cast(datetime0, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new LessThanEqual(datetime0, new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 23:59:59"))); + assertRewrite(new EqualTo(new Cast(datetime0, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new And( + new GreaterThanEqual(datetime0, new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 00:00:00")), + new LessThanEqual(datetime0, new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 23:59:59")))); + assertRewrite(new NullSafeEqual(new Cast(datetime0, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new And(ImmutableList.of( + new GreaterThanEqual(datetime0, new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 00:00:00")), + new LessThanEqual(datetime0, new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 23:59:59")), + new Not(new IsNull(datetime0))))); + assertRewrite(new GreaterThan(new Cast(datetimev1, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new GreaterThan(datetimev1, new DateTimeLiteral("9999-12-31 23:59:59"))); + assertRewrite(new GreaterThanEqual(new Cast(datetimev1, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new GreaterThanEqual(datetimev1, new DateTimeLiteral("9999-12-31 00:00:00"))); + assertRewrite(new LessThan(new Cast(datetimev1, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new LessThan(datetimev1, new DateTimeLiteral("9999-12-31 00:00:00"))); + assertRewrite(new LessThanEqual(new Cast(datetimev1, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new LessThanEqual(datetimev1, new DateTimeLiteral("9999-12-31 23:59:59"))); + assertRewrite(new EqualTo(new Cast(datetimev1, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new And( + new GreaterThanEqual(datetimev1, new DateTimeLiteral("9999-12-31 00:00:00")), + new LessThanEqual(datetimev1, new DateTimeLiteral("9999-12-31 23:59:59")))); + assertRewrite(new NullSafeEqual(new Cast(datetimev1, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new And(ImmutableList.of( + new GreaterThanEqual(datetimev1, new DateTimeLiteral("9999-12-31 00:00:00")), + new LessThanEqual(datetimev1, new DateTimeLiteral("9999-12-31 23:59:59")), + new Not(new IsNull(datetimev1))))); + assertRewrite(new GreaterThan(new Cast(datetime2, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new GreaterThan(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 23:59:59.99"))); + assertRewrite(new GreaterThanEqual(new Cast(datetime2, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new GreaterThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 00:00:00.00"))); + assertRewrite(new LessThan(new Cast(datetime2, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new LessThan(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 00:00:00.00"))); + assertRewrite(new LessThanEqual(new Cast(datetime2, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new LessThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 23:59:59.99"))); + assertRewrite(new EqualTo(new Cast(datetime2, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new And( + new GreaterThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 00:00:00.00")), + new LessThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 23:59:59.99")))); + assertRewrite(new NullSafeEqual(new Cast(datetime2, DateType.INSTANCE), new DateLiteral("9999-12-31")), + new And(ImmutableList.of( + new GreaterThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 00:00:00.00")), + new LessThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 23:59:59.99")), + new Not(new IsNull(datetime2))))); + assertRewrite(new GreaterThan(new Cast(datetime2, DateTimeV2Type.of(0)), new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 12:34:56")), + new GreaterThan(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.99"))); + assertRewrite(new GreaterThanEqual(new Cast(datetime2, DateTimeV2Type.of(0)), new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 12:34:56")), + new GreaterThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.00"))); + assertRewrite(new LessThan(new Cast(datetime2, DateTimeV2Type.of(0)), new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 12:34:56")), + new LessThan(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.00"))); + assertRewrite(new LessThanEqual(new Cast(datetime2, DateTimeV2Type.of(0)), new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 12:34:56")), + new LessThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.99"))); + assertRewrite(new EqualTo(new Cast(datetime2, DateTimeV2Type.of(0)), new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 12:34:56")), + new And( + new GreaterThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.00")), + new LessThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.99")))); + assertRewrite(new NullSafeEqual(new Cast(datetime2, DateTimeV2Type.of(0)), new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 12:34:56")), + new And(ImmutableList.of( + new GreaterThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.00")), + new LessThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.99")), + new Not(new IsNull(datetime2))))); + assertRewrite(new GreaterThan(new Cast(datetime2, DateTimeV2Type.of(1)), new DateTimeV2Literal(DateTimeV2Type.of(1), "9999-12-31 12:34:56.7")), + new GreaterThan(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.79"))); + assertRewrite(new GreaterThanEqual(new Cast(datetime2, DateTimeV2Type.of(1)), new DateTimeV2Literal(DateTimeV2Type.of(1), "9999-12-31 12:34:56.7")), + new GreaterThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.70"))); + assertRewrite(new LessThan(new Cast(datetime2, DateTimeV2Type.of(1)), new DateTimeV2Literal(DateTimeV2Type.of(1), "9999-12-31 12:34:56.7")), + new LessThan(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.70"))); + assertRewrite(new LessThanEqual(new Cast(datetime2, DateTimeV2Type.of(1)), new DateTimeV2Literal(DateTimeV2Type.of(1), "9999-12-31 12:34:56.7")), + new LessThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.79"))); + assertRewrite(new EqualTo(new Cast(datetime2, DateTimeV2Type.of(1)), new DateTimeV2Literal(DateTimeV2Type.of(1), "9999-12-31 12:34:56.7")), + new And( + new GreaterThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.70")), + new LessThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.79")))); + assertRewrite(new NullSafeEqual(new Cast(datetime2, DateTimeV2Type.of(1)), new DateTimeV2Literal(DateTimeV2Type.of(1), "9999-12-31 12:34:56.7")), + new And(ImmutableList.of( + new GreaterThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.70")), + new LessThanEqual(datetime2, new DateTimeV2Literal(DateTimeV2Type.of(2), "9999-12-31 12:34:56.79")), + new Not(new IsNull(datetime2))))); + + assertRewrite(new EqualTo(new Cast(datetime0, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1")), + ExpressionUtils.falseOrNull(datetime0)); + assertRewrite(new NullSafeEqual(new Cast(datetime0, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1")), + BooleanLiteral.FALSE); + assertRewrite(new GreaterThan(new Cast(datetime0, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1")), + new GreaterThan(datetime0, new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 23:59:59"))); + assertRewrite(new GreaterThanEqual(new Cast(datetime0, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1")), + new GreaterThan(datetime0, new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 23:59:59"))); assertRewrite(new LessThan(new Cast(datetime0, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1")), - new LessThan(new Cast(datetime0, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1"))); + new LessThanEqual(datetime0, new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 23:59:59"))); + assertRewrite(new LessThanEqual(new Cast(datetime0, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1")), + new LessThanEqual(datetime0, new DateTimeV2Literal(DateTimeV2Type.of(0), "9999-12-31 23:59:59"))); + assertRewrite(new EqualTo(new Cast(datetimev1, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1")), + ExpressionUtils.falseOrNull(datetimev1)); + assertRewrite(new NullSafeEqual(new Cast(datetimev1, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1")), + BooleanLiteral.FALSE); + assertRewrite(new GreaterThan(new Cast(datetimev1, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1")), + new GreaterThan(datetimev1, new DateTimeLiteral("9999-12-31 23:59:59"))); + assertRewrite(new GreaterThanEqual(new Cast(datetimev1, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1")), + new GreaterThan(datetimev1, new DateTimeLiteral("9999-12-31 23:59:59"))); + assertRewrite(new LessThan(new Cast(datetimev1, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1")), + new LessThanEqual(datetimev1, new DateTimeLiteral("9999-12-31 23:59:59"))); + assertRewrite(new LessThanEqual(new Cast(datetimev1, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1")), + new LessThanEqual(datetimev1, new DateTimeLiteral("9999-12-31 23:59:59"))); assertRewrite(new LessThan(new Cast(datetime2, DateTimeV2Type.of(3)), new DateTimeV2Literal("9999-12-31 23:59:59.991")), - new LessThan(new Cast(datetime2, DateTimeV2Type.of(3)), new DateTimeV2Literal("9999-12-31 23:59:59.991"))); + new LessThanEqual(datetime2, new DateTimeV2Literal("9999-12-31 23:59:59.99"))); assertRewrite(new LessThan(new Cast(datetime2, DateTimeV2Type.of(6)), new DateTimeV2Literal("9999-12-31 23:59:59.999999")), - new LessThan(new Cast(datetime2, DateTimeV2Type.of(6)), new DateTimeV2Literal("9999-12-31 23:59:59.999999"))); - assertRewrite(new LessThan(new Cast(datetimev1, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1")), - new LessThan(new Cast(datetimev1, DateTimeV2Type.of(1)), new DateTimeV2Literal("9999-12-31 23:59:59.1"))); + new LessThanEqual(datetime2, new DateTimeV2Literal("9999-12-31 23:59:59.99"))); + assertRewrite(new GreaterThanEqual(new Cast(datetime2, DateTimeV2Type.of(3)), new DateTimeV2Literal("9999-12-31 23:59:59.991")), + new GreaterThan(datetime2, new DateTimeV2Literal("9999-12-31 23:59:59.99"))); + assertRewrite(new GreaterThanEqual(new Cast(datetime2, DateTimeV2Type.of(6)), new DateTimeV2Literal("9999-12-31 23:59:59.999999")), + new GreaterThan(datetime2, new DateTimeV2Literal("9999-12-31 23:59:59.99"))); + assertRewrite(new EqualTo(new Cast(datetime2, DateTimeV2Type.of(6)), new DateTimeV2Literal("9999-12-31 23:59:59.999999")), + ExpressionUtils.falseOrNull(datetime2)); } @Test diff --git a/regression-test/data/nereids_rules_p0/infer_predicate/extend_infer_equal_predicate.out b/regression-test/data/nereids_rules_p0/infer_predicate/extend_infer_equal_predicate.out index ed43d254b5063f..470f044da340dd 100644 --- a/regression-test/data/nereids_rules_p0/infer_predicate/extend_infer_equal_predicate.out +++ b/regression-test/data/nereids_rules_p0/infer_predicate/extend_infer_equal_predicate.out @@ -251,7 +251,7 @@ PhysicalResultSink --hashJoin[INNER_JOIN] hashCondition=((t1.d_datev2 = expr_cast(d_datetimev2 as DATEV2))) otherCondition=() ----filter((t1.d_datev2 < '2022-01-03')) ------PhysicalOlapScan[extend_infer_t1] -----filter((cast(d_datetimev2 as DATEV2) < '2022-01-03')) +----filter((t2.d_datetimev2 < '2022-01-03 00:00:00')) ------PhysicalOlapScan[extend_infer_t2] -- !test_date_both_upcast1 -- diff --git a/regression-test/suites/nereids_rules_p0/partition_prune/month_quarter_cast_in_prune.groovy b/regression-test/suites/nereids_rules_p0/partition_prune/month_quarter_cast_in_prune.groovy index f2ac6c85d5423d..88235c9ab81209 100644 --- a/regression-test/suites/nereids_rules_p0/partition_prune/month_quarter_cast_in_prune.groovy +++ b/regression-test/suites/nereids_rules_p0/partition_prune/month_quarter_cast_in_prune.groovy @@ -79,7 +79,7 @@ suite("month_quarter_cast_in_prune") { }; explain { sql "select * from test_month where cast(dt as date) > '2022-02-26'" - contains("partitions=5/11 (p7,p8,p9,p10,p11)") + contains("partitions=1/11 (p11)") } explain { @@ -88,7 +88,7 @@ suite("month_quarter_cast_in_prune") { } explain { sql "select * from test_month where quarter(dt)>1 or cast(dt as date) > '2022-02-26'" - contains("partitions=8/11 (p1,p4,p5,p7,p8,p9,p10,p11)") + contains("partitions=4/11 (p1,p4,p5,p11)") } explain { sql "select * from test_month where day(dt)>80" @@ -223,4 +223,4 @@ suite("month_quarter_cast_in_prune") { sql """select * from from_unixtime_t where from_unixtime(a,"yyyyMMdd %T") <='2001-05-16 16:00:00'""" contains("partitions=5/5 (p1,p2,p3,p4,p5)") } -} \ No newline at end of file +} From a15a901eb4b5af900b9adddd9e9da9871bc82880 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Tue, 21 Jan 2025 19:39:57 +0800 Subject: [PATCH 11/31] [fix](merge-on-write) Remove pending delete bitmap check when `commit_txn()` (#47136) ### What problem does this PR solve? Related PR: https://github.com/apache/doris/pull/46039 Problem Summary: https://github.com/apache/doris/pull/46039 introduce an defensive check when `commit_txn()`, but this may influence the commit process. This PR remove this check totally to eliminate this overhead. --- cloud/src/meta-service/meta_service.cpp | 1 - cloud/src/meta-service/meta_service_txn.cpp | 31 ---- cloud/test/meta_service_test.cpp | 148 -------------------- gensrc/proto/cloud.proto | 2 - 4 files changed, 182 deletions(-) diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index 7914bf5db11cf6..36322f7a3ea3b9 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -1813,7 +1813,6 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont // 3. store all pending delete bitmap for this txn PendingDeleteBitmapPB delete_bitmap_keys; - delete_bitmap_keys.set_lock_id(request->lock_id()); for (size_t i = 0; i < request->rowset_ids_size(); ++i) { MetaDeleteBitmapInfo key_info {instance_id, tablet_id, request->rowset_ids(i), request->versions(i), request->segment_ids(i)}; diff --git a/cloud/src/meta-service/meta_service_txn.cpp b/cloud/src/meta-service/meta_service_txn.cpp index e5c22ff401b7eb..5d696220b72dab 100644 --- a/cloud/src/meta-service/meta_service_txn.cpp +++ b/cloud/src/meta-service/meta_service_txn.cpp @@ -972,39 +972,8 @@ void process_mow_when_commit_txn( LOG(INFO) << "xxx remove delete bitmap lock, lock_key=" << hex(lock_keys[i]) << " txn_id=" << txn_id; - int64_t lock_id = lock_info.lock_id(); for (auto tablet_id : table_id_tablet_ids[table_id]) { std::string pending_key = meta_pending_delete_bitmap_key({instance_id, tablet_id}); - - // check that if the pending info's lock_id is correct - std::string pending_val; - err = txn->get(pending_key, &pending_val); - if (err != TxnErrorCode::TXN_OK && err != TxnErrorCode::TXN_KEY_NOT_FOUND) { - ss << "failed to get delete bitmap pending info, instance_id=" << instance_id - << " tablet_id=" << tablet_id << " key=" << hex(pending_key) << " err=" << err; - msg = ss.str(); - code = cast_as(err); - return; - } - - if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) continue; - - PendingDeleteBitmapPB pending_info; - if (!pending_info.ParseFromString(pending_val)) [[unlikely]] { - code = MetaServiceCode::PROTOBUF_PARSE_ERR; - msg = "failed to parse PendingDeleteBitmapPB"; - return; - } - - if (pending_info.has_lock_id() && pending_info.lock_id() != lock_id) { - TEST_SYNC_POINT_CALLBACK("commit_txn:check_pending_delete_bitmap_lock_id", - &tablet_id); - LOG_WARNING( - "wrong lock_id in pending delete bitmap infos, expect lock_id={}, but " - "found {} tablet_id={} instance_id={}", - lock_id, pending_info.lock_id(), tablet_id, instance_id); - } - txn->remove(pending_key); LOG(INFO) << "xxx remove delete bitmap pending key, pending_key=" << hex(pending_key) << " txn_id=" << txn_id; diff --git a/cloud/test/meta_service_test.cpp b/cloud/test/meta_service_test.cpp index fb17c29629b04e..9eed77271f2efa 100644 --- a/cloud/test/meta_service_test.cpp +++ b/cloud/test/meta_service_test.cpp @@ -5494,7 +5494,6 @@ TEST(MetaServiceTest, DeleteBimapCommitTxnTest) { ASSERT_EQ(ret, TxnErrorCode::TXN_OK); PendingDeleteBitmapPB pending_info; ASSERT_TRUE(pending_info.ParseFromString(pending_val)); - ASSERT_EQ(pending_info.lock_id(), lock_info.lock_id()); } // commit txn @@ -5528,153 +5527,6 @@ TEST(MetaServiceTest, DeleteBimapCommitTxnTest) { } } -TEST(MetaServiceTest, WrongPendingBitmapTest) { - auto meta_service = get_meta_service(); - extern std::string get_instance_id(const std::shared_ptr& rc_mgr, - const std::string& cloud_unique_id); - auto instance_id = get_instance_id(meta_service->resource_mgr(), "test_cloud_unique_id"); - - std::set real_wrong_pending_delete_bitmap_tablet_ids; - std::set expected_wrong_pending_delete_bitmap_tablet_ids; - auto sp = SyncPoint::get_instance(); - sp->set_call_back("commit_txn:check_pending_delete_bitmap_lock_id", [&](auto&& args) { - auto* tablet_id = try_any_cast(args[0]); - real_wrong_pending_delete_bitmap_tablet_ids.insert(*tablet_id); - }); - sp->enable_processing(); - - // case: first version of rowset - { - int64_t txn_id = 56789; - int64_t table_id = 123456; // same as table_id of tmp rowset - int64_t db_id = 222; - int64_t tablet_id_base = 8113; - int64_t partition_id = 1234; - // begin txn - { - brpc::Controller cntl; - BeginTxnRequest req; - req.set_cloud_unique_id("test_cloud_unique_id"); - TxnInfoPB txn_info_pb; - txn_info_pb.set_db_id(db_id); - txn_info_pb.set_label("test_label"); - txn_info_pb.add_table_ids(table_id); - txn_info_pb.set_timeout_ms(36000); - req.mutable_txn_info()->CopyFrom(txn_info_pb); - BeginTxnResponse res; - meta_service->begin_txn(reinterpret_cast<::google::protobuf::RpcController*>(&cntl), - &req, &res, nullptr); - ASSERT_EQ(res.status().code(), MetaServiceCode::OK); - txn_id = res.txn_id(); - } - - // mock rowset and tablet - for (int i = 0; i < 5; ++i) { - create_tablet(meta_service.get(), table_id, 1235, partition_id, tablet_id_base + i); - auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i); - tmp_rowset.set_partition_id(partition_id); - CreateRowsetResponse res; - commit_rowset(meta_service.get(), tmp_rowset, res); - ASSERT_EQ(res.status().code(), MetaServiceCode::OK); - } - - // update delete bitmap - { - // get delete bitmap update lock - brpc::Controller cntl; - GetDeleteBitmapUpdateLockRequest get_lock_req; - GetDeleteBitmapUpdateLockResponse get_lock_res; - get_lock_req.set_cloud_unique_id("test_cloud_unique_id"); - get_lock_req.set_table_id(table_id); - get_lock_req.add_partition_ids(partition_id); - get_lock_req.set_expiration(5); - get_lock_req.set_lock_id(txn_id); - get_lock_req.set_initiator(-1); - meta_service->get_delete_bitmap_update_lock( - reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &get_lock_req, - &get_lock_res, nullptr); - ASSERT_EQ(get_lock_res.status().code(), MetaServiceCode::OK); - - // first update delete bitmap - UpdateDeleteBitmapRequest update_delete_bitmap_req; - UpdateDeleteBitmapResponse update_delete_bitmap_res; - update_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id"); - update_delete_bitmap_req.set_table_id(table_id); - update_delete_bitmap_req.set_partition_id(partition_id); - update_delete_bitmap_req.set_lock_id(txn_id); - update_delete_bitmap_req.set_initiator(-1); - update_delete_bitmap_req.set_tablet_id(tablet_id_base); - - update_delete_bitmap_req.add_rowset_ids("123"); - update_delete_bitmap_req.add_segment_ids(1); - update_delete_bitmap_req.add_versions(2); - update_delete_bitmap_req.add_segment_delete_bitmaps("abc0"); - - meta_service->update_delete_bitmap( - reinterpret_cast(&cntl), - &update_delete_bitmap_req, &update_delete_bitmap_res, nullptr); - ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::OK); - } - - // check delete bitmap update lock and pending delete bitmap - { - std::unique_ptr txn; - ASSERT_EQ(meta_service->txn_kv()->create_txn(&txn), TxnErrorCode::TXN_OK); - std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id, table_id, -1}); - std::string lock_val; - auto ret = txn->get(lock_key, &lock_val); - ASSERT_EQ(ret, TxnErrorCode::TXN_OK); - DeleteBitmapUpdateLockPB lock_info; - ASSERT_TRUE(lock_info.ParseFromString(lock_val)); - - std::string pending_key = meta_pending_delete_bitmap_key({instance_id, tablet_id_base}); - std::string pending_val; - ret = txn->get(pending_key, &pending_val); - ASSERT_EQ(ret, TxnErrorCode::TXN_OK); - PendingDeleteBitmapPB pending_info; - ASSERT_TRUE(pending_info.ParseFromString(pending_val)); - ASSERT_EQ(pending_info.lock_id(), lock_info.lock_id()); - } - - { - std::unique_ptr txn; - ASSERT_EQ(meta_service->txn_kv()->create_txn(&txn), TxnErrorCode::TXN_OK); - // pending bitmap have been modified by other txn - std::string pending_key = meta_pending_delete_bitmap_key({instance_id, tablet_id_base}); - std::string pending_val; - auto ret = txn->get(pending_key, &pending_val); - ASSERT_EQ(ret, TxnErrorCode::TXN_OK); - PendingDeleteBitmapPB pending_info; - ASSERT_TRUE(pending_info.ParseFromString(pending_val)); - // change pending bitmap's lock_id - pending_info.set_lock_id(pending_info.lock_id() + 1); - ASSERT_TRUE(pending_info.SerializeToString(&pending_val)); - txn->put(pending_key, pending_val); - ASSERT_EQ(txn->commit(), TxnErrorCode::TXN_OK); - - expected_wrong_pending_delete_bitmap_tablet_ids.insert(tablet_id_base); - } - - // commit txn - { - brpc::Controller cntl; - CommitTxnRequest req; - req.set_cloud_unique_id("test_cloud_unique_id"); - req.set_db_id(db_id); - req.set_txn_id(txn_id); - req.add_mow_table_ids(table_id); - CommitTxnResponse res; - meta_service->commit_txn(reinterpret_cast<::google::protobuf::RpcController*>(&cntl), - &req, &res, nullptr); - ASSERT_EQ(expected_wrong_pending_delete_bitmap_tablet_ids, - real_wrong_pending_delete_bitmap_tablet_ids); - } - } - - SyncPoint::get_instance()->disable_processing(); - SyncPoint::get_instance()->clear_all_call_backs(); -} - TEST(MetaServiceTest, GetDeleteBitmapWithRetryTest1) { auto meta_service = get_meta_service(); SyncPoint::get_instance()->enable_processing(); diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto index d82d88d169d91d..5d541547b97feb 100644 --- a/gensrc/proto/cloud.proto +++ b/gensrc/proto/cloud.proto @@ -1383,7 +1383,6 @@ enum MetaServiceCode { LOCK_EXPIRED = 8001; LOCK_CONFLICT = 8002; ROWSETS_EXPIRED = 8003; - PENDING_DELETE_BITMAP_WRONG = 8004; // partial update ROWSET_META_NOT_FOUND = 9001; @@ -1449,7 +1448,6 @@ message RemoveDeleteBitmapResponse { message PendingDeleteBitmapPB { repeated bytes delete_bitmap_keys = 1; - optional int64 lock_id = 2; } message DeleteBitmapUpdateLockPB { From f199abcf5a7890cbb859961ce798c591a0a40276 Mon Sep 17 00:00:00 2001 From: zhangyuan Date: Tue, 21 Jan 2025 19:49:01 +0800 Subject: [PATCH 12/31] [feature](backup) backup restore colocate table (#46059) ### What problem does this PR solve? 1. Add a property `reserve_colocate` when backup a table to record what group a table belongs to. 2. When restore the table, create or find the colocate group, and add the table to the colocate group. 3. Remove the table from the colocate group when restore failed. --------- Co-authored-by: lide --- .../apache/doris/analysis/RestoreStmt.java | 9 +- .../apache/doris/backup/BackupHandler.java | 7 +- .../org/apache/doris/backup/BackupJob.java | 8 - .../org/apache/doris/backup/RestoreJob.java | 42 ++- .../org/apache/doris/catalog/OlapTable.java | 83 ++++- .../apache/doris/backup/RestoreJobTest.java | 2 +- .../apache/doris/catalog/OlapTableTest.java | 2 +- .../test_backup_restore_colocate.groovy | 331 ++++++++++++++++++ 8 files changed, 456 insertions(+), 28 deletions(-) create mode 100644 regression-test/suites/backup_restore/test_backup_restore_colocate.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/RestoreStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/RestoreStmt.java index bc38cfe09e5606..5ea8135ce5168d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/RestoreStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/RestoreStmt.java @@ -40,6 +40,7 @@ public class RestoreStmt extends AbstractBackupStmt implements NotFallbackInPars private static final String PROP_IS_BEING_SYNCED = PropertyAnalyzer.PROPERTIES_IS_BEING_SYNCED; public static final String PROP_RESERVE_REPLICA = "reserve_replica"; + public static final String PROP_RESERVE_COLOCATE = "reserve_colocate"; public static final String PROP_RESERVE_DYNAMIC_PARTITION_ENABLE = "reserve_dynamic_partition_enable"; public static final String PROP_CLEAN_TABLES = "clean_tables"; public static final String PROP_CLEAN_PARTITIONS = "clean_partitions"; @@ -50,6 +51,7 @@ public class RestoreStmt extends AbstractBackupStmt implements NotFallbackInPars private String backupTimestamp = null; private int metaVersion = -1; private boolean reserveReplica = false; + private boolean reserveColocate = false; private boolean reserveDynamicPartitionEnable = false; private boolean isLocal = false; private boolean isBeingSynced = false; @@ -91,6 +93,10 @@ public boolean reserveReplica() { return reserveReplica; } + public boolean reserveColocate() { + return reserveColocate; + } + public boolean reserveDynamicPartitionEnable() { return reserveDynamicPartitionEnable; } @@ -173,7 +179,8 @@ public void analyzeProperties() throws AnalysisException { if (reserveReplica && !Config.force_olap_table_replication_allocation.isEmpty()) { reserveReplica = false; } - + // reserve colocate + reserveColocate = eatBooleanProperty(copiedProperties, PROP_RESERVE_COLOCATE, reserveColocate); // reserve dynamic partition enable reserveDynamicPartitionEnable = eatBooleanProperty( copiedProperties, PROP_RESERVE_DYNAMIC_PARTITION_ENABLE, reserveDynamicPartitionEnable); diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java index 6f88881e3cb2a3..040ab729a5fd61 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java @@ -558,15 +558,16 @@ private void restore(Repository repository, Database db, RestoreStmt stmt) throw jobInfo.getBackupTime(), TimeUtils.getDatetimeFormatWithHyphenWithTimeZone()); restoreJob = new RestoreJob(stmt.getLabel(), backupTimestamp, db.getId(), db.getFullName(), jobInfo, stmt.allowLoad(), stmt.getReplicaAlloc(), - stmt.getTimeoutMs(), metaVersion, stmt.reserveReplica(), + stmt.getTimeoutMs(), metaVersion, stmt.reserveReplica(), stmt.reserveColocate(), stmt.reserveDynamicPartitionEnable(), stmt.isBeingSynced(), stmt.isCleanTables(), stmt.isCleanPartitions(), stmt.isAtomicRestore(), env, Repository.KEEP_ON_LOCAL_REPO_ID, backupMeta); } else { restoreJob = new RestoreJob(stmt.getLabel(), stmt.getBackupTimestamp(), db.getId(), db.getFullName(), jobInfo, stmt.allowLoad(), stmt.getReplicaAlloc(), - stmt.getTimeoutMs(), stmt.getMetaVersion(), stmt.reserveReplica(), stmt.reserveDynamicPartitionEnable(), - stmt.isBeingSynced(), stmt.isCleanTables(), stmt.isCleanPartitions(), stmt.isAtomicRestore(), + stmt.getTimeoutMs(), stmt.getMetaVersion(), stmt.reserveReplica(), stmt.reserveColocate(), + stmt.reserveDynamicPartitionEnable(), stmt.isBeingSynced(), stmt.isCleanTables(), + stmt.isCleanPartitions(), stmt.isAtomicRestore(), env, repository.getId()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java index de12670807f20e..b8c735842291cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java @@ -675,8 +675,6 @@ private void prepareBackupMetaForOlapTableWithoutLock(TableRef tableRef, OlapTab status = new Status(ErrCode.COMMON_ERROR, "failed to copy table: " + olapTable.getName()); return; } - - removeUnsupportProperties(copiedTbl); copiedTables.add(copiedTbl); } @@ -710,12 +708,6 @@ private void prepareBackupMetaForOdbcTableWithoutLock(OdbcTable odbcTable, List< } } - private void removeUnsupportProperties(OlapTable tbl) { - // We cannot support the colocate attribute because the colocate information is not backed up - // synchronously when backing up. - tbl.setColocateGroup(null); - } - private void waitingAllSnapshotsFinished() { if (unfinishedTaskIds.isEmpty()) { if (env.getEditLog().exceedMaxJournalSize(this)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index 6dfd02b3a42648..746001a6a43637 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -68,6 +68,7 @@ import org.apache.doris.common.util.TimeUtils; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.datasource.property.S3ClientBEProperties; +import org.apache.doris.persist.ColocatePersistInfo; import org.apache.doris.persist.gson.GsonPostProcessable; import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.resource.Tag; @@ -117,6 +118,7 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { private static final String PROP_RESERVE_REPLICA = RestoreStmt.PROP_RESERVE_REPLICA; + private static final String PROP_RESERVE_COLOCATE = RestoreStmt.PROP_RESERVE_COLOCATE; private static final String PROP_RESERVE_DYNAMIC_PARTITION_ENABLE = RestoreStmt.PROP_RESERVE_DYNAMIC_PARTITION_ENABLE; private static final String PROP_IS_BEING_SYNCED = PropertyAnalyzer.PROPERTIES_IS_BEING_SYNCED; @@ -172,6 +174,7 @@ public enum RestoreJobState { private ReplicaAllocation replicaAlloc; private boolean reserveReplica = false; + private boolean reserveColocate = false; private boolean reserveDynamicPartitionEnable = false; // this 2 members is to save all newly restored objs @@ -193,6 +196,8 @@ public enum RestoreJobState { private Map unfinishedSignatureToId = Maps.newConcurrentMap(); + private List colocatePersistInfos = Lists.newArrayList(); + // the meta version is used when reading backup meta from file. // we do not persist this field, because this is just a temporary solution. // the true meta version should be get from backup job info, which is saved when doing backup job. @@ -227,8 +232,8 @@ public RestoreJob(JobType jobType) { public RestoreJob(String label, String backupTs, long dbId, String dbName, BackupJobInfo jobInfo, boolean allowLoad, ReplicaAllocation replicaAlloc, long timeoutMs, int metaVersion, boolean reserveReplica, - boolean reserveDynamicPartitionEnable, boolean isBeingSynced, boolean isCleanTables, - boolean isCleanPartitions, boolean isAtomicRestore, Env env, long repoId) { + boolean reserveColocate, boolean reserveDynamicPartitionEnable, boolean isBeingSynced, + boolean isCleanTables, boolean isCleanPartitions, boolean isAtomicRestore, Env env, long repoId) { super(JobType.RESTORE, label, dbId, dbName, timeoutMs, env, repoId); this.backupTimestamp = backupTs; this.jobInfo = jobInfo; @@ -237,6 +242,7 @@ public RestoreJob(String label, String backupTs, long dbId, String dbName, Backu this.state = RestoreJobState.PENDING; this.metaVersion = metaVersion; this.reserveReplica = reserveReplica; + this.reserveColocate = reserveColocate; // if backup snapshot is come from a cluster with force replication allocation, ignore the origin allocation if (jobInfo.isForceReplicationAllocation) { this.reserveReplica = false; @@ -247,6 +253,7 @@ public RestoreJob(String label, String backupTs, long dbId, String dbName, Backu this.isCleanPartitions = isCleanPartitions; this.isAtomicRestore = isAtomicRestore; properties.put(PROP_RESERVE_REPLICA, String.valueOf(reserveReplica)); + properties.put(PROP_RESERVE_COLOCATE, String.valueOf(reserveColocate)); properties.put(PROP_RESERVE_DYNAMIC_PARTITION_ENABLE, String.valueOf(reserveDynamicPartitionEnable)); properties.put(PROP_IS_BEING_SYNCED, String.valueOf(isBeingSynced)); properties.put(PROP_CLEAN_TABLES, String.valueOf(isCleanTables)); @@ -256,11 +263,13 @@ public RestoreJob(String label, String backupTs, long dbId, String dbName, Backu public RestoreJob(String label, String backupTs, long dbId, String dbName, BackupJobInfo jobInfo, boolean allowLoad, ReplicaAllocation replicaAlloc, long timeoutMs, int metaVersion, boolean reserveReplica, - boolean reserveDynamicPartitionEnable, boolean isBeingSynced, boolean isCleanTables, - boolean isCleanPartitions, boolean isAtomicRestore, Env env, long repoId, BackupMeta backupMeta) { + boolean reserveColocate, boolean reserveDynamicPartitionEnable, boolean isBeingSynced, + boolean isCleanTables, boolean isCleanPartitions, boolean isAtomicRestore, Env env, long repoId, + BackupMeta backupMeta) { this(label, backupTs, dbId, dbName, jobInfo, allowLoad, replicaAlloc, timeoutMs, metaVersion, reserveReplica, - reserveDynamicPartitionEnable, isBeingSynced, isCleanTables, isCleanPartitions, isAtomicRestore, env, - repoId); + reserveColocate, reserveDynamicPartitionEnable, isBeingSynced, isCleanTables, isCleanPartitions, + isAtomicRestore, env, repoId); + this.backupMeta = backupMeta; } @@ -280,6 +289,10 @@ public boolean isBeingSynced() { return isBeingSynced; } + public List getColocatePersistInfos() { + return colocatePersistInfos; + } + public synchronized boolean finishTabletSnapshotTask(SnapshotTask task, TFinishTaskRequest request) { if (checkTaskStatus(task, task.getJobId(), request)) { return false; @@ -690,6 +703,12 @@ private void checkAndPrepareMeta() { OlapTable localOlapTbl = (OlapTable) localTbl; OlapTable remoteOlapTbl = (OlapTable) remoteTbl; + if (localOlapTbl.isColocateTable() || (reserveColocate && remoteOlapTbl.isColocateTable())) { + status = new Status(ErrCode.COMMON_ERROR, "Not support to restore to local table " + + tableName + " with colocate group."); + return; + } + localOlapTbl.readLock(); try { List intersectPartNames = Lists.newArrayList(); @@ -806,7 +825,8 @@ private void checkAndPrepareMeta() { // reset all ids in this table String srcDbName = jobInfo.dbName; - Status st = remoteOlapTbl.resetIdsForRestore(env, db, replicaAlloc, reserveReplica, srcDbName); + Status st = remoteOlapTbl.resetIdsForRestore(env, db, replicaAlloc, reserveReplica, + reserveColocate, colocatePersistInfos, srcDbName); if (!st.ok()) { status = st; return; @@ -1639,6 +1659,9 @@ private void waitingAllSnapshotsFinished() { state = RestoreJobState.DOWNLOAD; env.getEditLog().logRestoreJob(this); + for (ColocatePersistInfo info : colocatePersistInfos) { + env.getEditLog().logColocateAddTable(info); + } LOG.info("finished making snapshots. {}", this); return; } @@ -2384,6 +2407,11 @@ private void cancelInternal(boolean isReplay) { state = RestoreJobState.CANCELLED; // log env.getEditLog().logRestoreJob(this); + for (ColocatePersistInfo info : colocatePersistInfos) { + Env.getCurrentColocateIndex().removeTable(info.getTableId()); + env.getEditLog().logColocateRemoveTable(info); + } + colocatePersistInfos.clear(); LOG.info("finished to cancel restore job. current state: {}. is replay: {}. {}", curState.name(), isReplay, this); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 9df096fc4219b3..d31a3c3806b3cc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -41,6 +41,8 @@ import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; +import org.apache.doris.common.ErrorCode; +import org.apache.doris.common.ErrorReport; import org.apache.doris.common.FeConstants; import org.apache.doris.common.FeMetaVersion; import org.apache.doris.common.Pair; @@ -57,6 +59,7 @@ import org.apache.doris.mtmv.MTMVVersionSnapshot; import org.apache.doris.nereids.hint.Hint; import org.apache.doris.nereids.hint.UseMvHint; +import org.apache.doris.persist.ColocatePersistInfo; import org.apache.doris.persist.gson.GsonPostProcessable; import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.qe.ConnectContext; @@ -718,8 +721,6 @@ public void resetPropertiesForRestore(boolean reserveDynamicPartitionEnable, boo if (isBeingSynced) { setBeingSyncedProperties(); } - // remove colocate property. - setColocateGroup(null); } /** @@ -743,7 +744,8 @@ public void resetVersionForRestore() { } public Status resetIdsForRestore(Env env, Database db, ReplicaAllocation restoreReplicaAlloc, - boolean reserveReplica, String srcDbName) { + boolean reserveReplica, boolean reserveColocate, List colocatePersistInfos, + String srcDbName) { // ATTN: The meta of the restore may come from different clusters, so the // original ID in the meta may conflict with the ID of the new cluster. For // example, if a newly allocated ID happens to be the same as an original ID, @@ -794,6 +796,47 @@ public Status resetIdsForRestore(Env env, Database db, ReplicaAllocation restore partitionInfo.resetPartitionIdForRestore(partitionMap, reserveReplica ? null : restoreReplicaAlloc, isSinglePartition); + boolean createNewColocateGroup = false; + Map>> backendsPerBucketSeq = null; + ColocateTableIndex colocateIndex = Env.getCurrentColocateIndex(); + ColocateTableIndex.GroupId groupId = null; + if (reserveColocate && isColocateTable()) { + String fullGroupName = ColocateTableIndex.GroupId.getFullGroupName(db.getId(), getColocateGroup()); + ColocateGroupSchema groupSchema = Env.getCurrentColocateIndex().getGroupSchema(fullGroupName); + + if (groupSchema != null) { + try { + // group already exist, check if this table can be added to this group + groupSchema.checkColocateSchema(this); + //groupSchema.checkDynamicPartition(properties, getDefaultDistributionInfo()); + if (dynamicPartitionExists() + && getTableProperty().getDynamicPartitionProperty().getBuckets() + != groupSchema.getBucketsNum()) { + ErrorReport.reportDdlException( + ErrorCode.ERR_DYNAMIC_PARTITION_MUST_HAS_SAME_BUCKET_NUM_WITH_COLOCATE_TABLE, + getDefaultDistributionInfo().getBucketNum()); + } + } catch (Exception e) { + return new Status(ErrCode.COMMON_ERROR, "Restore table " + getName() + + " with colocate group " + getColocateGroup() + " failed: " + e.getMessage()); + } + + // if this is a colocate table, try to get backend seqs from colocation index. + backendsPerBucketSeq = colocateIndex.getBackendsPerBucketSeq(groupSchema.getGroupId()); + createNewColocateGroup = false; + } else { + backendsPerBucketSeq = Maps.newHashMap(); + createNewColocateGroup = true; + } + + // add table to this group, if group does not exist, create a new one + groupId = Env.getCurrentColocateIndex() + .addTableToGroup(db.getId(), this, fullGroupName, null /* generate group id inside */); + } else { + // remove colocate property. + setColocateGroup(null); + } + // for each partition, reset rollup index map Map nextIndexes = Maps.newHashMap(); for (Map.Entry entry : idToPartition.entrySet()) { @@ -833,10 +876,20 @@ public Status resetIdsForRestore(Env env, Database db, ReplicaAllocation restore // replicas try { - Pair>, TStorageMedium> tag2beIdsAndMedium = - Env.getCurrentSystemInfo().selectBackendIdsForReplicaCreation( - replicaAlloc, nextIndexes, null, false, false); - Map> tag2beIds = tag2beIdsAndMedium.first; + Map> tag2beIds = null; + if (isColocateTable() && !createNewColocateGroup) { + // get backends from existing backend sequence + tag2beIds = Maps.newHashMap(); + for (Map.Entry>> entry3 : backendsPerBucketSeq.entrySet()) { + tag2beIds.put(entry3.getKey(), entry3.getValue().get(i)); + } + } else { + Pair>, TStorageMedium> tag2beIdsAndMedium = + Env.getCurrentSystemInfo().selectBackendIdsForReplicaCreation( + replicaAlloc, nextIndexes, null, + false, false); + tag2beIds = tag2beIdsAndMedium.first; + } for (Map.Entry> entry3 : tag2beIds.entrySet()) { for (Long beId : entry3.getValue()) { long newReplicaId = env.getNextId(); @@ -844,6 +897,10 @@ public Status resetIdsForRestore(Env env, Database db, ReplicaAllocation restore visibleVersion, schemaHash); newTablet.addReplica(replica, true /* is restore */); } + if (createNewColocateGroup) { + backendsPerBucketSeq.putIfAbsent(entry3.getKey(), Lists.newArrayList()); + backendsPerBucketSeq.get(entry3.getKey()).add(entry3.getValue()); + } } } catch (DdlException e) { return new Status(ErrCode.COMMON_ERROR, e.getMessage()); @@ -851,6 +908,18 @@ public Status resetIdsForRestore(Env env, Database db, ReplicaAllocation restore } } + if (createNewColocateGroup) { + colocateIndex.addBackendsPerBucketSeq(groupId, backendsPerBucketSeq); + } + + // we have added these index to memory, only need to persist here + if (groupId != null) { + backendsPerBucketSeq = colocateIndex.getBackendsPerBucketSeq(groupId); + ColocatePersistInfo info = ColocatePersistInfo.createForAddTable(groupId, getId(), + backendsPerBucketSeq); + colocatePersistInfos.add(info); + } + // reset partition id partition.setIdForRestore(entry.getKey()); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java index dadfdb632e394d..568a168bafae86 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java @@ -256,7 +256,7 @@ boolean await(long timeout, TimeUnit unit) { db.unregisterTable(expectedRestoreTbl.getName()); job = new RestoreJob(label, "2018-01-01 01:01:01", db.getId(), db.getFullName(), jobInfo, false, - new ReplicaAllocation((short) 3), 100000, -1, false, false, false, false, false, false, + new ReplicaAllocation((short) 3), 100000, -1, false, false, false, false, false, false, false, env, repo.getId()); List tbls = Lists.newArrayList(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/OlapTableTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/OlapTableTest.java index 656fea01b2d89d..218042e3aa2872 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/OlapTableTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/OlapTableTest.java @@ -103,7 +103,7 @@ public void testResetPropertiesForRestore() { olapTable.resetPropertiesForRestore(false, false, replicaAlloc, false); Assert.assertEquals(tableProperty.getProperties(), olapTable.getTableProperty().getProperties()); Assert.assertFalse(tableProperty.getDynamicPartitionProperty().isExist()); - Assert.assertFalse(olapTable.isColocateTable()); + Assert.assertTrue(olapTable.isColocateTable()); Assert.assertEquals((short) 4, olapTable.getDefaultReplicaAllocation().getTotalReplicaNum()); // restore with dynamic partition keys diff --git a/regression-test/suites/backup_restore/test_backup_restore_colocate.groovy b/regression-test/suites/backup_restore/test_backup_restore_colocate.groovy new file mode 100644 index 00000000000000..cbda545db7b773 --- /dev/null +++ b/regression-test/suites/backup_restore/test_backup_restore_colocate.groovy @@ -0,0 +1,331 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_backup_restore_colocate", "backup_restore") { + String suiteName = "test_backup_restore_colocate" + String repoName = "${suiteName}_repo" + String dbName = "${suiteName}_db" + String newDbName = "${suiteName}_db_new" + String tableName1 = "${suiteName}_table1" + String tableName2 = "${suiteName}_table2" + String tableName3 = "${suiteName}_table3" + String snapshotName = "${suiteName}_snapshot" + String groupName = "${suiteName}_group" + + def syncer = getSyncer() + syncer.createS3Repository(repoName) + + sql "DROP DATABASE IF EXISTS ${dbName}" + sql "DROP DATABASE IF EXISTS ${newDbName}" + sql "CREATE DATABASE IF NOT EXISTS ${dbName}" + sql "CREATE DATABASE IF NOT EXISTS ${newDbName}" + sql "DROP TABLE IF EXISTS ${dbName}.${tableName1}" + sql "DROP TABLE IF EXISTS ${dbName}.${tableName2}" + sql """ + CREATE TABLE if NOT EXISTS ${dbName}.${tableName1} + ( + `test` INT, + `id` INT + ) + ENGINE=OLAP + UNIQUE KEY(`test`, `id`) + DISTRIBUTED BY HASH(id) BUCKETS 2 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "colocate_with" = "${groupName}" + ) + """ + sql """ + CREATE TABLE if NOT EXISTS ${dbName}.${tableName2} + ( + `test` INT, + `id` INT + ) + ENGINE=OLAP + UNIQUE KEY(`test`, `id`) + DISTRIBUTED BY HASH(id) BUCKETS 2 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "colocate_with" = "${groupName}" + ) + """ + def insert_num = 5 + for (int i = 0; i < insert_num; ++i) { + sql """ + INSERT INTO ${dbName}.${tableName1} VALUES (${i}, ${i}) + """ + sql """ + INSERT INTO ${dbName}.${tableName2} VALUES (${i}, ${i}) + """ + } + + def query = "select * from ${dbName}.${tableName1} as t1, ${dbName}.${tableName2} as t2 where t1.id=t2.id;" + + def res = sql "SELECT * FROM ${dbName}.${tableName1}" + assertEquals(res.size(), insert_num) + res = sql "SELECT * FROM ${dbName}.${tableName2}" + assertEquals(res.size(), insert_num) + + explain { + sql("${query}") + contains("COLOCATE") + } + + res = sql "${query}" + assertEquals(res.size(), insert_num) + + sql """ + BACKUP SNAPSHOT ${dbName}.${snapshotName} + TO `${repoName}` + ON (${tableName1}, ${tableName2}) + PROPERTIES ("type" = "full") + """ + + syncer.waitSnapshotFinish(dbName) + def snapshot = syncer.getSnapshotTimestamp(repoName, snapshotName) + assertTrue(snapshot != null) + + + logger.info("============== test 1: without reserve_colocate =============") + + sql "DROP TABLE IF EXISTS ${dbName}.${tableName1}" + sql "DROP TABLE IF EXISTS ${dbName}.${tableName2}" + + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true" + ) + """ + syncer.waitAllRestoreFinish(dbName) + + res = sql "SELECT * FROM ${dbName}.${tableName1}" + assertEquals(res.size(), insert_num) + res = sql "SELECT * FROM ${dbName}.${tableName2}" + assertEquals(res.size(), insert_num) + + + explain { + sql("${query}") + notContains("COLOCATE") + } + res = sql "${query}" + assertEquals(res.size(), insert_num) + + logger.info("============== test 2: reserve_colocate = false =============") + + sql "DROP TABLE IF EXISTS ${dbName}.${tableName1}" + sql "DROP TABLE IF EXISTS ${dbName}.${tableName2}" + + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "reserve_colocate" = "false" + ) + """ + syncer.waitAllRestoreFinish(dbName) + + res = sql "SELECT * FROM ${dbName}.${tableName1}" + assertEquals(res.size(), insert_num) + res = sql "SELECT * FROM ${dbName}.${tableName2}" + assertEquals(res.size(), insert_num) + + + explain { + sql("${query}") + notContains("COLOCATE") + } + res = sql "${query}" + assertEquals(res.size(), insert_num) + + + logger.info("============== test 3: reserve_colocate = true =============") + + sql "DROP TABLE IF EXISTS ${dbName}.${tableName1}" + sql "DROP TABLE IF EXISTS ${dbName}.${tableName2}" + + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "reserve_colocate" = "true" + ) + """ + syncer.waitAllRestoreFinish(dbName) + + res = sql "SELECT * FROM ${dbName}.${tableName1}" + assertEquals(res.size(), insert_num) + res = sql "SELECT * FROM ${dbName}.${tableName2}" + assertEquals(res.size(), insert_num) + + + explain { + sql("${query}") + contains("COLOCATE") + } + res = sql "${query}" + assertEquals(res.size(), insert_num) + + logger.info("============== test 4: Not support to restore to local table with colocate group =============") + + res = sql "SELECT * FROM ${dbName}.${tableName1}" + assertEquals(res.size(), insert_num) + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "reserve_colocate" = "true" + ) + """ + syncer.waitAllRestoreFinish(dbName) + // Not support to restore to local table with colocate group + def records = sql_return_maparray "SHOW restore FROM ${dbName}" + def row = records[records.size() - 1] + assertTrue(row.Status.contains("with colocate group")) + + + logger.info("============== test 5: local table with colocate group =============") + + res = sql "SELECT * FROM ${dbName}.${tableName1}" + assertEquals(res.size(), insert_num) + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "reserve_colocate" = "false" + ) + """ + syncer.waitAllRestoreFinish(dbName) + records = sql_return_maparray "SHOW restore FROM ${dbName}" + row = records[records.size() - 1] + assertTrue(row.Status.contains("with colocate group")) + + + logger.info("============== test 6: local table without colocate group =============") + + sql "DROP TABLE IF EXISTS ${dbName}.${tableName1}" + sql "DROP TABLE IF EXISTS ${dbName}.${tableName2}" + // without colocate group + sql """ + CREATE TABLE if NOT EXISTS ${dbName}.${tableName1} + ( + `test` INT, + `id` INT + ) + ENGINE=OLAP + UNIQUE KEY(`test`, `id`) + DISTRIBUTED BY HASH(id) BUCKETS 2 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + sql """ + CREATE TABLE if NOT EXISTS ${dbName}.${tableName2} + ( + `test` INT, + `id` INT + ) + ENGINE=OLAP + UNIQUE KEY(`test`, `id`) + DISTRIBUTED BY HASH(id) BUCKETS 2 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + + assertEquals(res.size(), insert_num) + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "reserve_colocate" = "false" + ) + """ + syncer.waitAllRestoreFinish(dbName) + records = sql_return_maparray "SHOW restore FROM ${dbName}" + row = records[records.size() - 1] + assertTrue(row.Status.contains("OK")) + + + logger.info("============== test 7: local colocate mismatch error =============") + + sql "DROP TABLE IF EXISTS ${newDbName}.${tableName1}" + sql "DROP TABLE IF EXISTS ${newDbName}.${tableName2}" + sql "DROP TABLE IF EXISTS ${newDbName}.${tableName3}" + // create with different colocat + sql """ + CREATE TABLE if NOT EXISTS ${newDbName}.${tableName3} + ( + `test` INT, + `id` INT + ) + ENGINE=OLAP + UNIQUE KEY(`test`, `id`) + DISTRIBUTED BY HASH(id) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "colocate_with" = "${groupName}" + ) + """ + + sql """ + RESTORE SNAPSHOT ${newDbName}.${snapshotName} + FROM `${repoName}` + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "reserve_colocate" = "true" + ) + """ + syncer.waitAllRestoreFinish(newDbName) + + records = sql_return_maparray "SHOW restore FROM ${newDbName}" + row = records[records.size() - 1] + assertTrue(row.Status.contains("Colocate tables must have same bucket num")) + + //cleanup + sql "DROP TABLE IF EXISTS ${newDbName}.${tableName1}" + sql "DROP TABLE IF EXISTS ${newDbName}.${tableName2}" + sql "DROP TABLE IF EXISTS ${newDbName}.${tableName3}" + sql "DROP DATABASE ${newDbName} FORCE" + + sql "DROP TABLE IF EXISTS ${dbName}.${tableName1}" + sql "DROP TABLE IF EXISTS ${dbName}.${tableName2}" + sql "DROP TABLE IF EXISTS ${dbName}.${tableName3}" + sql "DROP DATABASE ${dbName} FORCE" + sql "DROP REPOSITORY `${repoName}`" +} \ No newline at end of file From 34c3b3962cfaa847a89696db3c4615d2043dfca3 Mon Sep 17 00:00:00 2001 From: walter Date: Tue, 21 Jan 2025 19:52:29 +0800 Subject: [PATCH 13/31] [fix](restore) Release snapshots after the FINISHED state is persisted (#47277) When Doris FE restarts, the DirMoveTask will be sent to BE again. If the ReleaseSnapshotTask is sent to BE before the FINISHED state is persisted, and Doris FE restarts, then the DirMoveTask and the restore job will fail too. --- .../org/apache/doris/backup/RestoreJob.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index 746001a6a43637..e563192c584e15 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -2124,8 +2124,8 @@ private Status allTabletCommitted(boolean isReplay) { restoredTbls.clear(); restoredResources.clear(); - // release snapshot before clearing snapshotInfos - releaseSnapshots(); + com.google.common.collect.Table savedSnapshotInfos = snapshotInfos; + snapshotInfos = HashBasedTable.create(); snapshotInfos.clear(); fileMapping.clear(); @@ -2135,6 +2135,9 @@ private Status allTabletCommitted(boolean isReplay) { state = RestoreJobState.FINISHED; env.getEditLog().logRestoreJob(this); + + // Only send release snapshot tasks after the job is finished. + releaseSnapshots(savedSnapshotInfos); } LOG.info("job is finished. is replay: {}. {}", isReplay, this); @@ -2202,7 +2205,7 @@ private void dropNonRestoredPartitions( } } - private void releaseSnapshots() { + private void releaseSnapshots(com.google.common.collect.Table snapshotInfos) { if (snapshotInfos.isEmpty()) { return; } @@ -2396,9 +2399,8 @@ private void cancelInternal(boolean isReplay) { // backupMeta is useless backupMeta = null; - releaseSnapshots(); - - snapshotInfos.clear(); + com.google.common.collect.Table savedSnapshotInfos = snapshotInfos; + snapshotInfos = HashBasedTable.create(); fileMapping.clear(); jobInfo.releaseSnapshotInfo(); @@ -2415,6 +2417,10 @@ private void cancelInternal(boolean isReplay) { LOG.info("finished to cancel restore job. current state: {}. is replay: {}. {}", curState.name(), isReplay, this); + + // Send release snapshot tasks after log restore job, so that the snapshot won't be released + // before the cancelled restore job is persisted. + releaseSnapshots(savedSnapshotInfos); return; } From 6e49c4e5452c431e09c4068ef64976222a5c5eee Mon Sep 17 00:00:00 2001 From: meiyi Date: Tue, 21 Jan 2025 20:03:55 +0800 Subject: [PATCH 14/31] [fix](case) fix group commit and cluster key case (#47141) --- .../suites/insert_p0/insert_group_commit_into_unique.groovy | 3 +++ .../unique_with_mow_c_p0/test_compact_multi_segments.groovy | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/regression-test/suites/insert_p0/insert_group_commit_into_unique.groovy b/regression-test/suites/insert_p0/insert_group_commit_into_unique.groovy index f4ab4e41065bcf..d3fddcafef4367 100644 --- a/regression-test/suites/insert_p0/insert_group_commit_into_unique.groovy +++ b/regression-test/suites/insert_p0/insert_group_commit_into_unique.groovy @@ -95,6 +95,7 @@ suite("insert_group_commit_into_unique") { sql """ set group_commit = async_mode; """ group_commit_insert """ insert into ${dbTableName} values (1, 'a', 10),(5, 'q', 50); """, 2 group_commit_insert """ insert into ${dbTableName}(id) values(6); """, 1 + getRowCount(3) group_commit_insert """ insert into ${dbTableName}(id) values(4); """, 1 group_commit_insert """ insert into ${dbTableName}(name, id) values('c', 3); """, 1 group_commit_insert """ insert into ${dbTableName}(id, name) values(2, 'b'); """, 1 @@ -175,6 +176,7 @@ suite("insert_group_commit_into_unique") { group_commit_insert """ insert into ${dbTableName} values (1, 'a', 10),(5, 'q', 50); """, 2 group_commit_insert """ insert into ${dbTableName}(id, score) values(6, 60); """, 1 + getRowCount(3) group_commit_insert """ insert into ${dbTableName}(id, score) values(4, 70); """, 1 group_commit_insert """ insert into ${dbTableName}(name, id, score) values('c', 3, 30); """, 1 group_commit_insert """ insert into ${dbTableName}(score, id, name) values(30, 2, 'b'); """, 1 @@ -256,6 +258,7 @@ suite("insert_group_commit_into_unique") { group_commit_insert """ insert into ${dbTableName}(id, name, score, __DORIS_SEQUENCE_COL__) values (1, 'a', 10, 100),(5, 'q', 50, 500); """, 2 group_commit_insert """ insert into ${dbTableName}(id, score, __DORIS_SEQUENCE_COL__) values(6, 60, 600); """, 1 + getRowCount(3) group_commit_insert """ insert into ${dbTableName}(id, score, __DORIS_SEQUENCE_COL__) values(6, 50, 500); """, 1 group_commit_insert """ insert into ${dbTableName}(name, id, score, __DORIS_SEQUENCE_COL__) values('c', 3, 30, 300); """, 1 group_commit_insert """ insert into ${dbTableName}(score, id, name, __DORIS_SEQUENCE_COL__) values(30, 2, 'b', 200); """, 1 diff --git a/regression-test/suites/unique_with_mow_c_p0/test_compact_multi_segments.groovy b/regression-test/suites/unique_with_mow_c_p0/test_compact_multi_segments.groovy index 3ef2d309758b67..e6fba5bfdfe9d1 100644 --- a/regression-test/suites/unique_with_mow_c_p0/test_compact_multi_segments.groovy +++ b/regression-test/suites/unique_with_mow_c_p0/test_compact_multi_segments.groovy @@ -90,6 +90,7 @@ suite("test_compact_multi_segments", "nonConcurrent") { } } // check generate 3 segments + sql """ select * from ${tableName} limit 1; """ getTabletStatus(2, 3) streamLoad { @@ -109,6 +110,7 @@ suite("test_compact_multi_segments", "nonConcurrent") { } } // check generate 3 segments + sql """ select * from ${tableName} limit 1; """ getTabletStatus(3, 6) streamLoad { @@ -128,6 +130,7 @@ suite("test_compact_multi_segments", "nonConcurrent") { } } // check generate 3 segments + sql """ select * from ${tableName} limit 1; """ getTabletStatus(4, 6) streamLoad { @@ -147,6 +150,7 @@ suite("test_compact_multi_segments", "nonConcurrent") { } } // check generate 3 segments + sql """ select * from ${tableName} limit 1; """ getTabletStatus(5, 6) def rowCount1 = sql """ select count() from ${tableName}; """ @@ -182,6 +186,7 @@ suite("test_compact_multi_segments", "nonConcurrent") { } // check generate 1 segments + sql """ select * from ${tableName} limit 1; """ getTabletStatus(2, 1) // [2-5] // check row count From bad5b609103e950cc784ebf87401b4c56d7ab9b0 Mon Sep 17 00:00:00 2001 From: abmdocrt Date: Tue, 21 Jan 2025 21:17:58 +0800 Subject: [PATCH 15/31] [Fix](test) Fix Show Data Case (#47224) --- cloud/src/common/config.h | 6 +- .../java/org/apache/doris/common/Config.java | 3 +- regression-test/plugins/aliyun_oss_sdk.groovy | 7 ++ .../plugins/cloud_show_data_plugin.groovy | 94 ++++++++++++++++--- .../test_cloud_follower_show_data.groovy | 11 ++- .../test_cloud_mtmv_show_data.groovy | 62 +++--------- ...hange_add_and_drop_column_show_data.groovy | 24 +++-- ...change_add_and_drop_index_show_data.groovy | 20 ++-- ...ema_change_reorder_column_show_data.groovy | 15 ++- ...t_cloud_delete_table_rows_show_data.groovy | 27 +++--- ...rop_and_recover_partition_show_data.groovy | 40 ++++---- .../test_cloud_drop_table_show_data.groovy | 44 ++++----- ...est_cloud_truncate_table_show_data.groovy} | 53 +++-------- ..._cloud_disable_compaction_show_data.groovy | 11 ++- ...t_cloud_inverted_index_v1_show_data.groovy | 11 ++- ...t_cloud_inverted_index_v2_show_data.groovy | 11 ++- .../test_cloud_lz4_show_data.groovy | 11 ++- .../test_cloud_zstd_show_data.groovy | 11 ++- .../test_cloud_agg_show_data.groovy | 11 ++- .../test_cloud_dup_show_data.groovy | 16 +++- .../test_cloud_mor_show_data.groovy | 11 ++- ..._cloud_mow_partial_update_show_data.groovy | 13 ++- .../test_cloud_mow_show_data.groovy | 11 ++- 23 files changed, 313 insertions(+), 210 deletions(-) rename regression-test/suites/show_data_p2/test_table_operation/{test_cloud_truncate_and_recover_table_show_data.groovy => test_cloud_truncate_table_show_data.groovy} (77%) diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h index 1e58f0d4d74037..359d087416cd2e 100644 --- a/cloud/src/common/config.h +++ b/cloud/src/common/config.h @@ -62,7 +62,7 @@ CONF_String(custom_conf_path, "./conf/doris_cloud.conf"); CONF_mInt64(recycle_interval_seconds, "3600"); CONF_mInt64(retention_seconds, "259200"); // 72h, global retention time CONF_Int32(recycle_concurrency, "16"); -CONF_Int32(recycle_job_lease_expired_ms, "60000"); +CONF_mInt32(recycle_job_lease_expired_ms, "60000"); CONF_mInt64(compacted_rowset_retention_seconds, "1800"); // 0.5h CONF_mInt64(dropped_index_retention_seconds, "10800"); // 3h CONF_mInt64(dropped_partition_retention_seconds, "10800"); // 3h @@ -110,7 +110,7 @@ CONF_String(test_hdfs_fs_name, ""); // CONF_Bool(b, "true"); // txn config -CONF_Int32(label_keep_max_second, "259200"); //3 * 24 * 3600 seconds +CONF_mInt32(label_keep_max_second, "259200"); //3 * 24 * 3600 seconds CONF_Int32(expired_txn_scan_key_nums, "1000"); // Maximum number of version of a tablet. If the version num of a tablet exceed limit, @@ -133,7 +133,7 @@ CONF_String(specific_max_qps_limit, "get_cluster:5000000;begin_txn:5000000"); CONF_Bool(enable_rate_limit, "true"); CONF_Int64(bvar_qps_update_second, "5"); -CONF_Int32(copy_job_max_retention_second, "259200"); //3 * 24 * 3600 seconds +CONF_mInt32(copy_job_max_retention_second, "259200"); //3 * 24 * 3600 seconds CONF_String(arn_id, ""); CONF_String(arn_ak, ""); CONF_String(arn_sk, ""); diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index a162a0c48781e3..24f9bd48682557 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -947,7 +947,8 @@ public class Config extends ConfigBase { // update interval of tablet stat // All frontends will get tablet stat from all backends at each interval - @ConfField public static int tablet_stat_update_interval_second = 60; // 1 min + @ConfField(mutable = true) + public static int tablet_stat_update_interval_second = 60; // 1 min /** * Max bytes a broker scanner can process in one broker load job. diff --git a/regression-test/plugins/aliyun_oss_sdk.groovy b/regression-test/plugins/aliyun_oss_sdk.groovy index efd6efa585b397..6b0c096d7e5992 100644 --- a/regression-test/plugins/aliyun_oss_sdk.groovy +++ b/regression-test/plugins/aliyun_oss_sdk.groovy @@ -97,6 +97,7 @@ Suite.metaClass.calculateFolderLength = { OSS client, String bucketName, String ObjectListing objectListing = null; do { // The default value for MaxKey is 100, and the maximum value is 1000 + logger.info("debug:" + folder) ListObjectsRequest request = new ListObjectsRequest(bucketName).withPrefix(folder).withMaxKeys(1000); if (objectListing != null) { request.setMarker(objectListing.getNextMarker()); @@ -104,6 +105,12 @@ Suite.metaClass.calculateFolderLength = { OSS client, String bucketName, String objectListing = client.listObjects(request); List sums = objectListing.getObjectSummaries(); for (OSSObjectSummary s : sums) { + logger.info("Object Key: ${s.getKey()}") + logger.info("Size: ${s.getSize()} bytes") + logger.info("Last Modified: ${s.getLastModified()}") + logger.info("Storage Class: ${s.getStorageClass()}") + logger.info("Owner: ${s.getOwner()?.getId()}") + logger.info("-------------------") size += s.getSize(); } } while (objectListing.isTruncated()); diff --git a/regression-test/plugins/cloud_show_data_plugin.groovy b/regression-test/plugins/cloud_show_data_plugin.groovy index 43dc6fd38345cd..54375c955d4d14 100644 --- a/regression-test/plugins/cloud_show_data_plugin.groovy +++ b/regression-test/plugins/cloud_show_data_plugin.groovy @@ -69,7 +69,8 @@ import org.codehaus.groovy.runtime.IOGroovyMethods } Suite.metaClass.get_tablets_from_table = { String table -> - def res = sql_return_maparray """show tablets from ${table}""" + def res = sql_return_maparray """show tablets from ${table}""" + logger.info("get tablets from ${table}:" + res) return res } @@ -120,10 +121,10 @@ import org.codehaus.groovy.runtime.IOGroovyMethods if (tabletStatusAfterCompaction.rowsets.size() < tabletStatusBeforeCompaction.rowsets.size()){ compactionStatus = 'FINISHED' } - Thread.sleep(60 * 1000) - } while (timeoutTimestamp > System.currentTimeMillis() && (status != 'FINISHED')) + Thread.sleep(10 * 1000) + } while (timeoutTimestamp > System.currentTimeMillis() && (compactionStatus != 'FINISHED')) - if (status != "FINISHED") { + if (compactionStatus != "FINISHED") { logger.info("compaction not Finish or failed") return false } @@ -132,8 +133,6 @@ import org.codehaus.groovy.runtime.IOGroovyMethods Suite.metaClass.trigger_compaction = { List> tablets -> for(def tablet: tablets) { - trigger_tablet_compaction(tablet, "cumulative") - trigger_tablet_compaction(tablet, "base") trigger_tablet_compaction(tablet, "full") } } @@ -157,7 +156,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods def client = initOssClient(ak, sk, endpoint) for(String tabletId: tabletIds) { - storageSize += calculateFolderLength(client, bucketName, storagePrefix + "/data/" + tabletId) + storageSize += calculateFolderLength(client, bucketName, storagePrefix + "data/" + tabletId) } shutDownOssClient(client) } @@ -168,8 +167,8 @@ import org.codehaus.groovy.runtime.IOGroovyMethods def fsUser = context.config.otherConfigs.get("cbsFsUser") def storagePrefix = context.config.otherConfigs.get("cbsFsPrefix") } - - return storageSize + def round_size = new BigDecimal(storageSize/1024/1024).setScale(0, BigDecimal.ROUND_FLOOR); + return round_size } Suite.metaClass.translate_different_unit_to_MB = { String size, String unitField -> @@ -196,7 +195,8 @@ import org.codehaus.groovy.runtime.IOGroovyMethods def unitField = fields[1] mysqlShowDataSize = translate_different_unit_to_MB(sizeField, unitField) } - return mysqlShowDataSize + def round_size = new BigDecimal(mysqlShowDataSize).setScale(0, BigDecimal.ROUND_FLOOR); + return round_size } Suite.metaClass.caculate_table_data_size_through_api = { List> tablets -> @@ -214,7 +214,79 @@ import org.codehaus.groovy.runtime.IOGroovyMethods } } } + def round_size = new BigDecimal(apiCaculateSize).setScale(0, BigDecimal.ROUND_FLOOR); + return round_size + } + + Suite.metaClass.update_ms_config = { String ms_endpoint, String key, String value /*param */ -> + return curl("POST", String.format("http://%s/MetaService/http/v1/update_config?%s=%s", ms_endpoint, key, value)) + } + + Suite.metaClass.set_config_before_show_data_test = { -> + + sql """admin set frontend config ("tablet_stat_update_interval_second" = "1")""" + sql """admin set frontend config ("catalog_trash_expire_second" = "1")""" + + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + def get_be_param = { paramName -> + // assuming paramName on all BEs have save value + def (code, out, err) = show_be_config(backendIdToBackendIP.get(backendId), backendIdToBackendHttpPort.get(backendId)) + assertEquals(code, 0) + def configList = parseJson(out.trim()) + assert configList instanceof List + for (Object ele in (List) configList) { + assert ele instanceof List + if (((List) ele)[0] == paramName) { + return ((List) ele)[2] + } + } + } + + def ms_endpoint = get_be_param("meta_service_endpoint"); + + update_ms_config.call(ms_endpoint, "recycle_interval_seconds", "5") + update_ms_config.call(ms_endpoint, "retention_seconds", "0") + update_ms_config.call(ms_endpoint, "compacted_rowset_retention_seconds", "0") + update_ms_config.call(ms_endpoint, "recycle_job_lease_expired_ms", "0") + update_ms_config.call(ms_endpoint, "dropped_partition_retention_seconds", "0") + update_ms_config.call(ms_endpoint, "label_keep_max_second", "0") + update_ms_config.call(ms_endpoint, "copy_job_max_retention_second", "0") + } + + Suite.metaClass.set_config_after_show_data_test = { -> + + sql """admin set frontend config ("tablet_stat_update_interval_second" = "10")""" + sql """admin set frontend config ("catalog_trash_expire_second" = "600")""" + + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + def get_be_param = { paramName -> + // assuming paramName on all BEs have save value + def (code, out, err) = show_be_config(backendIdToBackendIP.get(backendId), backendIdToBackendHttpPort.get(backendId)) + assertEquals(code, 0) + def configList = parseJson(out.trim()) + assert configList instanceof List + for (Object ele in (List) configList) { + assert ele instanceof List + if (((List) ele)[0] == paramName) { + return ((List) ele)[2] + } + } + } + + def ms_endpoint = get_be_param("meta_service_endpoint"); - return apiCaculateSize + update_ms_config.call(ms_endpoint, "recycle_interval_seconds", "600") + update_ms_config.call(ms_endpoint, "retention_seconds", "259200") + update_ms_config.call(ms_endpoint, "compacted_rowset_retention_seconds", "1800") + update_ms_config.call(ms_endpoint, "recycle_job_lease_expired_ms", "60000") + update_ms_config.call(ms_endpoint, "dropped_partition_retention_seconds", "10800") + update_ms_config.call(ms_endpoint, "label_keep_max_second", "300") + update_ms_config.call(ms_endpoint, "copy_job_max_retention_second", "259200") } //http://qa-build.oss-cn-beijing.aliyuncs.com/regression/show_data/fullData.1.part1.gz diff --git a/regression-test/suites/show_data_p2/test_cloud_follower_show_data.groovy b/regression-test/suites/show_data_p2/test_cloud_follower_show_data.groovy index f748cb740b4335..671191a963dd8d 100644 --- a/regression-test/suites/show_data_p2/test_cloud_follower_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_cloud_follower_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_follower_show_data","p2") { +suite("test_cloud_follower_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -72,13 +72,13 @@ suite("test_cloud_follower_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -86,6 +86,7 @@ suite("test_cloud_follower_show_data","p2") { assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) // expect load 1 times == load 10 times + logger.info("after 1 time stream load, size is ${sizeRecords["mysqlSize"][0]}, after 10 times stream load, size is ${sizeRecords["mysqlSize"][1]}") assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) @@ -121,5 +122,9 @@ suite("test_cloud_follower_show_data","p2") { check(tableName) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_modification/test_cloud_mtmv_show_data.groovy b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_mtmv_show_data.groovy index cc4fd289296028..637aa463d45a96 100644 --- a/regression-test/suites/show_data_p2/test_table_modification/test_cloud_mtmv_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_mtmv_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_mtmv_show_data","p2") { +suite("test_cloud_mtmv_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -98,13 +98,13 @@ suite("test_cloud_mtmv_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -124,75 +124,37 @@ suite("test_cloud_mtmv_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) - - sql "select count(*) from ${tableName}" - - sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) - sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) - sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - - - // expect mysqlSize == apiSize == storageSize - assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["apiSize"][2]) - assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["cbsSize"][2]) - - // 加一下触发compaction的机制 - trigger_compaction(tablets) - - // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) - + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - - - // expect mysqlSize == apiSize == storageSize - assertEquals(sizeRecords["mysqlSize"][3], sizeRecords["apiSize"][3]) - assertEquals(sizeRecords["mysqlSize"][3], sizeRecords["cbsSize"][3]) + logger.info("after create mv, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } if (op == 2){ create_mtmv(tableName) - tableName = ${tableName} + "_mtmv" + tableName = "${tableName}" + "_mtmv" tablets = get_tablets_from_table(tableName) // 加一下触发compaction的机制 trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) - + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - + logger.info("after create mtmv, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") // expect mysqlSize == apiSize == storageSize assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["apiSize"][2]) assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["cbsSize"][2]) - - // 加一下触发compaction的机制 - trigger_compaction(tablets) - - // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) - - sql "select count(*) from ${tableName}" - - sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) - sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) - sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - - - // expect mysqlSize == apiSize == storageSize - assertEquals(sizeRecords["mysqlSize"][3], sizeRecords["apiSize"][3]) - assertEquals(sizeRecords["mysqlSize"][3], sizeRecords["cbsSize"][3]) } } @@ -205,5 +167,9 @@ suite("test_cloud_mtmv_show_data","p2") { check(tableName, 2) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_column_show_data.groovy b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_column_show_data.groovy index 51d4f3936c35b8..a2c49850b40551 100644 --- a/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_column_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_column_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_schema_change_add_and_drop_column_show_data","p2") { +suite("test_cloud_schema_change_add_and_drop_column_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -126,13 +126,13 @@ suite("test_cloud_schema_change_add_and_drop_column_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -152,14 +152,15 @@ suite("test_cloud_schema_change_add_and_drop_column_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) - + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) + tablets = get_tablets_from_table(tableName) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - + logger.info("after add column, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") // expect mysqlSize == apiSize == storageSize assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["apiSize"][2]) @@ -171,14 +172,15 @@ suite("test_cloud_schema_change_add_and_drop_column_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) - + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) + tablets = get_tablets_from_table(tableName) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - + logger.info("after drop column, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") // expect mysqlSize == apiSize == storageSize assertEquals(sizeRecords["mysqlSize"][3], sizeRecords["apiSize"][3]) @@ -194,5 +196,9 @@ suite("test_cloud_schema_change_add_and_drop_column_show_data","p2") { check(tableName) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_index_show_data.groovy b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_index_show_data.groovy index 328b73cb60d034..c5d94213f9e548 100644 --- a/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_index_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_add_and_drop_index_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_schema_change_add_and_drop_index_show_data","p2") { +suite("test_cloud_schema_change_add_and_drop_index_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -126,13 +126,13 @@ suite("test_cloud_schema_change_add_and_drop_index_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -152,13 +152,14 @@ suite("test_cloud_schema_change_add_and_drop_index_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) - + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + logger.info("after add index, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") // expect mysqlSize == apiSize == storageSize @@ -173,13 +174,14 @@ suite("test_cloud_schema_change_add_and_drop_index_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) - + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + logger.info("after drop index, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") // expect mysqlSize == apiSize == storageSize @@ -196,5 +198,9 @@ suite("test_cloud_schema_change_add_and_drop_index_show_data","p2") { check(tableName) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_reorder_column_show_data.groovy b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_reorder_column_show_data.groovy index 507d578bbcb4c7..55c87ba56c4411 100644 --- a/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_reorder_column_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_modification/test_cloud_schema_change_reorder_column_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_schema_change_reorder_column_show_data","p2") { +suite("test_cloud_schema_change_reorder_column_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -115,13 +115,13 @@ suite("test_cloud_schema_change_reorder_column_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -140,13 +140,14 @@ suite("test_cloud_schema_change_reorder_column_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) - + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + logger.info("after reorder column, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") // expect mysqlSize == apiSize == storageSize @@ -163,5 +164,9 @@ suite("test_cloud_schema_change_reorder_column_show_data","p2") { check(tableName) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_operation/test_cloud_delete_table_rows_show_data.groovy b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_delete_table_rows_show_data.groovy index 792cc1d2b4da98..9cef5a53712d4e 100644 --- a/regression-test/suites/show_data_p2/test_table_operation/test_cloud_delete_table_rows_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_delete_table_rows_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_delete_table_rows_show_data","p2") { +suite("test_cloud_delete_table_rows_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -84,13 +84,11 @@ suite("test_cloud_delete_table_rows_show_data","p2") { PARTITION BY RANGE(L_ORDERKEY) ( PARTITION p1 VALUES LESS THAN (100000), - PARTITION p2 VALUES LESS THAN (200000), PARTITION p3 VALUES LESS THAN (300000), - PARTITION p4 VALUES LESS THAN (400000), PARTITION p5 VALUES LESS THAN (500000), PARTITION other VALUES LESS THAN (MAXVALUE) ) - DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 1 PROPERTIES ( "replication_num" = "1" ) @@ -125,7 +123,7 @@ suite("test_cloud_delete_table_rows_show_data","p2") { AUTO PARTITION BY RANGE (date_trunc(`L_SHIPDATE`, 'year')) ( ) - DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 1 PROPERTIES ( "replication_num" = "1" ) @@ -141,17 +139,18 @@ suite("test_cloud_delete_table_rows_show_data","p2") { repeate_stream_load_same_data(tableName, i, "regression/tpch/sf0.1/lineitem.tbl.gz") def rows = sql_return_maparray "select count(*) as count from ${tableName};" logger.info("table ${tableName} has ${rows[0]["count"]} rows") + tablets = get_tablets_from_table(tableName) // 加一下触发compaction的机制 trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -164,18 +163,20 @@ suite("test_cloud_delete_table_rows_show_data","p2") { assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) sql """delete from ${tableName} where L_ORDERKEY >=0;""" - sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) - sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) - sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) // 加一下触发compaction的机制 trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) // expect mysqlSize == apiSize == storageSize + logger.info("after delete, mysqlSize is: ${sizeRecords["mysqlSize"][2]}, apiSize is: ${sizeRecords["apiSize"][2]}, storageSize is: ${sizeRecords["cbsSize"][2]}") assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["apiSize"][2]) assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["cbsSize"][2]) } @@ -192,5 +193,9 @@ suite("test_cloud_delete_table_rows_show_data","p2") { check(tableName) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_and_recover_partition_show_data.groovy b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_and_recover_partition_show_data.groovy index 672c0f78e394a0..b067ad5d9ad25d 100644 --- a/regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_and_recover_partition_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_and_recover_partition_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_drop_and_recover_partition_show_data","p2") { +suite("test_cloud_drop_and_recover_partition_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -84,13 +84,11 @@ suite("test_cloud_drop_and_recover_partition_show_data","p2") { PARTITION BY RANGE(L_ORDERKEY) ( PARTITION p1 VALUES LESS THAN (100000), - PARTITION p2 VALUES LESS THAN (200000), PARTITION p3 VALUES LESS THAN (300000), - PARTITION p4 VALUES LESS THAN (400000), PARTITION p5 VALUES LESS THAN (500000), PARTITION other VALUES LESS THAN (MAXVALUE) ) - DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 1 PROPERTIES ( "replication_num" = "1" ) @@ -125,7 +123,7 @@ suite("test_cloud_drop_and_recover_partition_show_data","p2") { AUTO PARTITION BY RANGE (date_trunc(`L_SHIPDATE`, 'year')) ( ) - DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 + DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 1 PROPERTIES ( "replication_num" = "1" ) @@ -141,17 +139,18 @@ suite("test_cloud_drop_and_recover_partition_show_data","p2") { repeate_stream_load_same_data(tableName, i, "regression/tpch/sf0.1/lineitem.tbl.gz") def rows = sql_return_maparray "select count(*) as count from ${tableName};" logger.info("table ${tableName} has ${rows[0]["count"]} rows") + tablets = get_tablets_from_table(tableName) // 加一下触发compaction的机制 trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -171,16 +170,18 @@ suite("test_cloud_drop_and_recover_partition_show_data","p2") { // after drop partition,tablets will changed,need get new tablets tablets = get_tablets_from_table(tableName) - sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) - sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) - sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) // 加一下触发compaction的机制 trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + logger.info("after drop partition, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") // expect mysqlSize == apiSize == storageSize assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["apiSize"][2]) @@ -189,21 +190,24 @@ suite("test_cloud_drop_and_recover_partition_show_data","p2") { if (op == 1){ sql """recover partition p1 from ${tableName};""" } else if(op == 2){ - sql """recover partition pp19920101000000 from ${tableName};""" + sql """recover partition p19920101000000 from ${tableName};""" } // after drop partition,tablets will changed,need get new tablets tablets = get_tablets_from_table(tableName) - sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) - sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) - sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) // 加一下触发compaction的机制 trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) + + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + logger.info("after recover partition, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") // expect mysqlSize == apiSize == storageSize assertEquals(sizeRecords["mysqlSize"][3], sizeRecords["apiSize"][3]) @@ -222,5 +226,9 @@ suite("test_cloud_drop_and_recover_partition_show_data","p2") { check(tableName, 2) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_table_show_data.groovy b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_table_show_data.groovy index c8daaaa69be5e2..d80295d802f8cc 100644 --- a/regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_table_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_drop_table_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_drop_and_recover_table_show_data","p2") { +suite("test_cloud_drop_and_recover_table_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -71,13 +71,13 @@ suite("test_cloud_drop_and_recover_table_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -93,20 +93,21 @@ suite("test_cloud_drop_and_recover_table_show_data","p2") { sql """drop table ${tableName}""" - sleep(60 * 1000) + sleep(10 * 1000) sql """recover table ${tableName}""" - sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) - sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) - sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - - tablets = get_tablets_from_table(tableName) // 加一下触发compaction的机制 trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) + tablets = get_tablets_from_table(tableName) + sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) + logger.info("after recover table, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") // expect mysqlSize == apiSize == storageSize assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["apiSize"][2]) @@ -117,35 +118,30 @@ suite("test_cloud_drop_and_recover_table_show_data","p2") { if(op == 2){ sql """drop table ${tableName} force""" - sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) - sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) - sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - - tablets = get_tablets_from_table(tableName) - // 加一下触发compaction的机制 - trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 sleep(60 * 1000) - sql "select count(*) from ${tableName}" + sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) + logger.info("after drop table force, storageSize is: ${sizeRecords["cbsSize"][-1]}") - // expect mysqlSize == apiSize == storageSize - assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["apiSize"][2]) - assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["cbsSize"][2]) - assertEquals(sizeRecords["mysqlSize"][2], 0) + assertEquals(sizeRecords["cbsSize"][2], 0.0) } } def main = { def tableName = "test_cloud_drop_and_recover_table_show_data" - create_normal_table(tableName) - check(tableName, 1) + //create_normal_table(tableName) + //check(tableName, 1) tableName = "test_cloud_drop_and_recover_table_force_show_data" create_normal_table(tableName) check(tableName, 2) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_operation/test_cloud_truncate_and_recover_table_show_data.groovy b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_truncate_table_show_data.groovy similarity index 77% rename from regression-test/suites/show_data_p2/test_table_operation/test_cloud_truncate_and_recover_table_show_data.groovy rename to regression-test/suites/show_data_p2/test_table_operation/test_cloud_truncate_table_show_data.groovy index c06a402ce94a4a..e435b6bb180b56 100644 --- a/regression-test/suites/show_data_p2/test_table_operation/test_cloud_truncate_and_recover_table_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_operation/test_cloud_truncate_table_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_truncate_and_recover_table_show_data","p2") { +suite("test_cloud_truncate_table_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -58,7 +58,7 @@ suite("test_cloud_truncate_and_recover_table_show_data","p2") { """ } - def check = {String tableName, int op -> + def check = {String tableName-> List tablets = get_tablets_from_table(tableName) def loadTimes = [1, 10] Map sizeRecords = ["apiSize":[], "mysqlSize":[], "cbsSize":[]] @@ -71,13 +71,13 @@ suite("test_cloud_truncate_and_recover_table_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) + sleep(10 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -89,61 +89,36 @@ suite("test_cloud_truncate_and_recover_table_show_data","p2") { assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) - if(op == 1){ - sql """truncate table ${tableName}""" - sleep(60 * 1000) - - sql """recover table ${tableName}""" - sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) - sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) - sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - // 加一下触发compaction的机制 trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) + tablets = get_tablets_from_table(tableName) - // expect mysqlSize == apiSize == storageSize - assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["apiSize"][2]) - assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["cbsSize"][2]) - assertEquals(sizeRecords["mysqlSize"][1], sizeRecords["apiSize"][2]) - } - - if(op == 2){ - - sql """truncate table ${tableName} force""" sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - - // 加一下触发compaction的机制 - trigger_compaction(tablets) - - // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) - sql "select count(*) from ${tableName}" + logger.info("after truncate table, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") // expect mysqlSize == apiSize == storageSize assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["apiSize"][2]) assertEquals(sizeRecords["mysqlSize"][2], sizeRecords["cbsSize"][2]) - assertEquals(sizeRecords["mysqlSize"][2], 0) - - } } def main = { - def tableName = "test_cloud_truncate_and_recover_table_show_data" - create_normal_table(tableName) - check(tableName, 1) - - tableName = "test_cloud_truncate_and_recover_table_force_show_data" + def tableName = "test_cloud_truncate_table_show_data" create_normal_table(tableName) - check(tableName, 2) + check(tableName) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_property/test_cloud_disable_compaction_show_data.groovy b/regression-test/suites/show_data_p2/test_table_property/test_cloud_disable_compaction_show_data.groovy index 452d25a4739283..f04c66133343a5 100644 --- a/regression-test/suites/show_data_p2/test_table_property/test_cloud_disable_compaction_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_property/test_cloud_disable_compaction_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_disable_compaction_show_data","p2") { +suite("test_cloud_disable_compaction_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -68,13 +68,13 @@ suite("test_cloud_disable_compaction_show_data","p2") { logger.info("table ${tableName} has ${rows[0]["count"]} rows") // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -82,10 +82,15 @@ suite("test_cloud_disable_compaction_show_data","p2") { assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) // expect load 1 times == load 10 times + logger.info("after 1 time stream load, size is ${sizeRecords["mysqlSize"][0]}, after 10 times stream load, size is ${sizeRecords["mysqlSize"][1]}") assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v1_show_data.groovy b/regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v1_show_data.groovy index 0cd12e6a9ffbab..1077e0436a0c68 100644 --- a/regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v1_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v1_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_inverted_index_v1_show_data","p2") { +suite("test_cloud_inverted_index_v1_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -73,13 +73,13 @@ suite("test_cloud_inverted_index_v1_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -87,10 +87,15 @@ suite("test_cloud_inverted_index_v1_show_data","p2") { assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) // expect load 1 times == load 10 times + logger.info("after 1 time stream load, size is ${sizeRecords["mysqlSize"][0]}, after 10 times stream load, size is ${sizeRecords["mysqlSize"][1]}") assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v2_show_data.groovy b/regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v2_show_data.groovy index 6670e2067da03f..db07832bfb1b12 100644 --- a/regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v2_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_property/test_cloud_inverted_index_v2_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_inverted_index_v2_show_data","p2") { +suite("test_cloud_inverted_index_v2_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -73,13 +73,13 @@ suite("test_cloud_inverted_index_v2_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -87,10 +87,15 @@ suite("test_cloud_inverted_index_v2_show_data","p2") { assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) // expect load 1 times == load 10 times + logger.info("after 1 time stream load, size is ${sizeRecords["mysqlSize"][0]}, after 10 times stream load, size is ${sizeRecords["mysqlSize"][1]}") assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_property/test_cloud_lz4_show_data.groovy b/regression-test/suites/show_data_p2/test_table_property/test_cloud_lz4_show_data.groovy index de5464759cc1eb..aab9604f67fd39 100644 --- a/regression-test/suites/show_data_p2/test_table_property/test_cloud_lz4_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_property/test_cloud_lz4_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_lz4_show_data","p2") { +suite("test_cloud_lz4_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -70,13 +70,13 @@ suite("test_cloud_lz4_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -84,10 +84,15 @@ suite("test_cloud_lz4_show_data","p2") { assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) // expect load 1 times == load 10 times + logger.info("after 1 time stream load, size is ${sizeRecords["mysqlSize"][0]}, after 10 times stream load, size is ${sizeRecords["mysqlSize"][1]}") assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_property/test_cloud_zstd_show_data.groovy b/regression-test/suites/show_data_p2/test_table_property/test_cloud_zstd_show_data.groovy index ad37f9ac95e03a..83e50450dca8b6 100644 --- a/regression-test/suites/show_data_p2/test_table_property/test_cloud_zstd_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_property/test_cloud_zstd_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_lz4_show_data","p2") { +suite("test_cloud_zstd_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -70,13 +70,13 @@ suite("test_cloud_lz4_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -84,10 +84,15 @@ suite("test_cloud_lz4_show_data","p2") { assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) // expect load 1 times == load 10 times + logger.info("after 1 time stream load, size is ${sizeRecords["mysqlSize"][0]}, after 10 times stream load, size is ${sizeRecords["mysqlSize"][1]}") assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_type/test_cloud_agg_show_data.groovy b/regression-test/suites/show_data_p2/test_table_type/test_cloud_agg_show_data.groovy index e995845f26ac5d..55bf038efb011c 100644 --- a/regression-test/suites/show_data_p2/test_table_type/test_cloud_agg_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_type/test_cloud_agg_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_agg_show_data","p2") { +suite("test_cloud_agg_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -69,13 +69,13 @@ suite("test_cloud_agg_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -87,9 +87,14 @@ suite("test_cloud_agg_show_data","p2") { assertEquals(sizeRecords["mysqlSize"][1], sizeRecords["apiSize"][1]) assertEquals(sizeRecords["mysqlSize"][1], sizeRecords["cbsSize"][1]) // expect 10 * 1 times on agg table >= load 10 times on agg table >= 1 times on agg table + logger.info("after 1 time stream load, size is ${sizeRecords["mysqlSize"][0]}, after 10 times stream load, size is ${sizeRecords["mysqlSize"][1]}") assertTrue(10*sizeRecords["mysqlSize"][0]>=sizeRecords["mysqlSize"][1]) assertTrue(sizeRecords["mysqlSize"][1]>=sizeRecords["mysqlSize"][0]) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_type/test_cloud_dup_show_data.groovy b/regression-test/suites/show_data_p2/test_table_type/test_cloud_dup_show_data.groovy index ad3109dd945b49..f4ed8338407754 100644 --- a/regression-test/suites/show_data_p2/test_table_type/test_cloud_dup_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_type/test_cloud_dup_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_dup_show_data","p2") { +suite("test_cloud_dup_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -69,13 +69,13 @@ suite("test_cloud_dup_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 5min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -85,9 +85,15 @@ suite("test_cloud_dup_show_data","p2") { // expect mysqlSize == apiSize == storageSize assertEquals(sizeRecords["mysqlSize"][1], sizeRecords["apiSize"][1]) assertEquals(sizeRecords["mysqlSize"][1], sizeRecords["cbsSize"][1]) - // expect load 10 times on dup table = 10 * load 1 times on dup table - assertTrue(10*sizeRecords["mysqlSize"][0]==sizeRecords["mysqlSize"][1]) + // expect load 10 times on dup table < 10 * load 1 times on dup table + logger.info("after 1 time stream load, size is ${sizeRecords["mysqlSize"][0]}, after 10 times stream load, size is ${sizeRecords["mysqlSize"][1]}") + assertTrue(10*sizeRecords["mysqlSize"][0] > sizeRecords["mysqlSize"][1]) + assertTrue(sizeRecords["mysqlSize"][0] < sizeRecords["mysqlSize"][1]) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_type/test_cloud_mor_show_data.groovy b/regression-test/suites/show_data_p2/test_table_type/test_cloud_mor_show_data.groovy index e159ebcecf942c..fb11d96ac5e659 100644 --- a/regression-test/suites/show_data_p2/test_table_type/test_cloud_mor_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_type/test_cloud_mor_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_mor_show_data","p2") { +suite("test_cloud_mor_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -70,13 +70,13 @@ suite("test_cloud_mor_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -84,10 +84,15 @@ suite("test_cloud_mor_show_data","p2") { assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) // expect load 1 times == load 10 times + logger.info("after 1 time stream load, size is ${sizeRecords["mysqlSize"][0]}, after 10 times stream load, size is ${sizeRecords["mysqlSize"][1]}") assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_partial_update_show_data.groovy b/regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_partial_update_show_data.groovy index e32342775fb06f..6521b5190f4079 100644 --- a/regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_partial_update_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_partial_update_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_mow_partial_update_show_data","p2") { +suite("test_cloud_mow_partial_update_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -236,7 +236,7 @@ suite("test_cloud_mow_partial_update_show_data","p2") { ) ENGINE=OLAP UNIQUE KEY(`id`) COMMENT "OLAP" - DISTRIBUTED BY HASH(`id`) BUCKETS 32 + DISTRIBUTED BY HASH(`id`) BUCKETS 3 PROPERTIES ( "store_row_column" = "true" ); @@ -258,13 +258,13 @@ suite("test_cloud_mow_partial_update_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -272,10 +272,15 @@ suite("test_cloud_mow_partial_update_show_data","p2") { assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) // expect load 1 times == load 10 times + logger.info("after 1 time stream load, size is ${sizeRecords["mysqlSize"][0]}, after 10 times stream load, size is ${sizeRecords["mysqlSize"][1]}") assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } diff --git a/regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_show_data.groovy b/regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_show_data.groovy index 29ce5af49091ea..26a407349cadaa 100644 --- a/regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_show_data.groovy +++ b/regression-test/suites/show_data_p2/test_table_type/test_cloud_mow_show_data.groovy @@ -21,7 +21,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods // loading one data 10 times, expect data size not rising -suite("test_cloud_mow_show_data","p2") { +suite("test_cloud_mow_show_data","p2, nonConcurrent") { //cloud-mode if (!isCloudMode()) { logger.info("not cloud mode, not run") @@ -69,13 +69,13 @@ suite("test_cloud_mow_show_data","p2") { trigger_compaction(tablets) // 然后 sleep 1min, 等fe汇报完 - sleep(60 * 1000) + sleep(10 * 1000) sql "select count(*) from ${tableName}" + sleep(10 * 1000) sizeRecords["apiSize"].add(caculate_table_data_size_through_api(tablets)) sizeRecords["cbsSize"].add(caculate_table_data_size_in_backend_storage(tablets)) sizeRecords["mysqlSize"].add(show_table_data_size_through_mysql(tableName)) - sleep(60 * 1000) logger.info("after ${i} times stream load, mysqlSize is: ${sizeRecords["mysqlSize"][-1]}, apiSize is: ${sizeRecords["apiSize"][-1]}, storageSize is: ${sizeRecords["cbsSize"][-1]}") } @@ -83,10 +83,15 @@ suite("test_cloud_mow_show_data","p2") { assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["apiSize"][0]) assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["cbsSize"][0]) // expect load 1 times == load 10 times + logger.info("after 1 time stream load, size is ${sizeRecords["mysqlSize"][0]}, after 10 times stream load, size is ${sizeRecords["mysqlSize"][1]}") assertEquals(sizeRecords["mysqlSize"][0], sizeRecords["mysqlSize"][1]) assertEquals(sizeRecords["apiSize"][0], sizeRecords["apiSize"][1]) assertEquals(sizeRecords["cbsSize"][0], sizeRecords["cbsSize"][1]) } + set_config_before_show_data_test() + sleep(10 * 1000) main() + set_config_after_show_data_test() + sleep(10 * 1000) } From bac58079acb571aef1c6dcaf3b83cb928990ec90 Mon Sep 17 00:00:00 2001 From: walter Date: Tue, 21 Jan 2025 21:18:25 +0800 Subject: [PATCH 16/31] [fix](restore) Add a local snapshot lock to protect snapshot dir (#47279) To avoid concurrent modification of a snapshot dir. --- be/src/olap/snapshot_manager.cpp | 33 +++++++++- be/src/olap/snapshot_manager.h | 51 +++++++++++++++ be/src/runtime/snapshot_loader.cpp | 100 ++++++++++++++--------------- 3 files changed, 132 insertions(+), 52 deletions(-) diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp index 8202feb68c65b5..7f0e94274d999b 100644 --- a/be/src/olap/snapshot_manager.cpp +++ b/be/src/olap/snapshot_manager.cpp @@ -67,6 +67,35 @@ using std::vector; namespace doris { using namespace ErrorCode; +LocalSnapshotLockGuard LocalSnapshotLock::acquire(const std::string& path) { + std::unique_lock l(_lock); + auto& ctx = _local_snapshot_contexts[path]; + while (ctx._is_locked) { + ctx._waiting_count++; + ctx._cv.wait(l); + ctx._waiting_count--; + } + + ctx._is_locked = true; + return {path}; +} + +void LocalSnapshotLock::release(const std::string& path) { + std::lock_guard l(_lock); + auto iter = _local_snapshot_contexts.find(path); + if (iter == _local_snapshot_contexts.end()) { + return; + } + + auto& ctx = iter->second; + ctx._is_locked = false; + if (ctx._waiting_count > 0) { + ctx._cv.notify_one(); + } else { + _local_snapshot_contexts.erase(iter); + } +} + SnapshotManager::SnapshotManager(StorageEngine& engine) : _engine(engine) { _mem_tracker = MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::OTHER, "SnapshotManager"); @@ -118,6 +147,8 @@ Status SnapshotManager::make_snapshot(const TSnapshotRequest& request, string* s } Status SnapshotManager::release_snapshot(const string& snapshot_path) { + auto local_snapshot_guard = LocalSnapshotLock::instance().acquire(snapshot_path); + // If the requested snapshot_path is located in the root/snapshot folder, it is considered legal and can be deleted. // Otherwise, it is considered an illegal request and returns an error result. SCOPED_ATTACH_TASK(_mem_tracker); @@ -448,7 +479,7 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet } } // be would definitely set it as true no matter has missed version or not - // but it would take no effets on the following range loop + // but it would take no effects on the following range loop if (!is_single_rowset_clone && request.__isset.missing_version) { for (int64_t missed_version : request.missing_version) { Version version = {missed_version, missed_version}; diff --git a/be/src/olap/snapshot_manager.h b/be/src/olap/snapshot_manager.h index dd10f7f355058b..668bb860e1b282 100644 --- a/be/src/olap/snapshot_manager.h +++ b/be/src/olap/snapshot_manager.h @@ -17,7 +17,9 @@ #pragma once +#include #include +#include #include #include @@ -33,6 +35,55 @@ struct RowsetId; class StorageEngine; class MemTrackerLimiter; +class LocalSnapshotLockGuard; + +// A simple lock to protect the local snapshot path. +class LocalSnapshotLock { + friend class LocalSnapshotLockGuard; + +public: + LocalSnapshotLock() = default; + ~LocalSnapshotLock() = default; + LocalSnapshotLock(const LocalSnapshotLock&) = delete; + LocalSnapshotLock& operator=(const LocalSnapshotLock&) = delete; + + static LocalSnapshotLock& instance() { + static LocalSnapshotLock instance; + return instance; + } + + // Acquire the lock for the specified path. It will block if the lock is already held by another. + LocalSnapshotLockGuard acquire(const std::string& path); + +private: + void release(const std::string& path); + + class LocalSnapshotContext { + public: + bool _is_locked = false; + size_t _waiting_count = 0; + std::condition_variable _cv; + + LocalSnapshotContext() = default; + LocalSnapshotContext(const LocalSnapshotContext&) = delete; + LocalSnapshotContext& operator=(const LocalSnapshotContext&) = delete; + }; + + std::mutex _lock; + std::unordered_map _local_snapshot_contexts; +}; + +class LocalSnapshotLockGuard { +public: + LocalSnapshotLockGuard(std::string path) : _snapshot_path(std::move(path)) {} + LocalSnapshotLockGuard(const LocalSnapshotLockGuard&) = delete; + LocalSnapshotLockGuard& operator=(const LocalSnapshotLockGuard&) = delete; + ~LocalSnapshotLockGuard() { LocalSnapshotLock::instance().release(_snapshot_path); } + +private: + std::string _snapshot_path; +}; + class SnapshotManager { public: SnapshotManager(StorageEngine& engine); diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index c5b27c823054a4..422aecad37a498 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -146,6 +147,9 @@ Status SnapshotLoader::upload(const std::map& src_to_d const std::string& src_path = iter.first; const std::string& dest_path = iter.second; + // Take a lock to protect the local snapshot path. + auto local_snapshot_guard = LocalSnapshotLock::instance().acquire(src_path); + int64_t tablet_id = 0; int32_t schema_hash = 0; RETURN_IF_ERROR( @@ -242,6 +246,9 @@ Status SnapshotLoader::download(const std::map& src_to const std::string& remote_path = iter.first; const std::string& local_path = iter.second; + // Take a lock to protect the local snapshot path. + auto local_snapshot_guard = LocalSnapshotLock::instance().acquire(local_path); + int64_t local_tablet_id = 0; int32_t schema_hash = 0; RETURN_IF_ERROR(_get_tablet_id_and_schema_hash_from_file_path(local_path, &local_tablet_id, @@ -397,8 +404,6 @@ Status SnapshotLoader::download(const std::map& src_to Status SnapshotLoader::remote_http_download( const std::vector& remote_tablet_snapshots, std::vector* downloaded_tablet_ids) { - LOG(INFO) << fmt::format("begin to download snapshots via http. job: {}, task id: {}", _job_id, - _task_id); constexpr uint32_t kListRemoteFileTimeout = 15; constexpr uint32_t kDownloadFileMaxRetry = 3; constexpr uint32_t kGetLengthTimeout = 10; @@ -408,35 +413,39 @@ Status SnapshotLoader::remote_http_download( RETURN_IF_ERROR(_report_every(0, &tmp_counter, 0, 0, TTaskType::type::DOWNLOAD)); Status status = Status::OK(); - // Step before, validate all remote - - // Step 1: Validate local tablet snapshot paths + int report_counter = 0; + int finished_num = 0; + int total_num = remote_tablet_snapshots.size(); for (const auto& remote_tablet_snapshot : remote_tablet_snapshots) { - const auto& path = remote_tablet_snapshot.local_snapshot_path; + const auto& local_path = remote_tablet_snapshot.local_snapshot_path; + const auto& remote_path = remote_tablet_snapshot.remote_snapshot_path; + LOG(INFO) << fmt::format( + "download snapshots via http. job: {}, task id: {}, local dir: {}, remote dir: {}", + _job_id, _task_id, local_path, remote_path); + + // Take a lock to protect the local snapshot path. + auto local_snapshot_guard = LocalSnapshotLock::instance().acquire(local_path); + + // Step 1: Validate local tablet snapshot paths bool res = true; - RETURN_IF_ERROR(io::global_local_filesystem()->is_directory(path, &res)); + RETURN_IF_ERROR(io::global_local_filesystem()->is_directory(local_path, &res)); if (!res) { std::stringstream ss; auto err_msg = - fmt::format("snapshot path is not directory or does not exist: {}", path); + fmt::format("snapshot path is not directory or does not exist: {}", local_path); LOG(WARNING) << err_msg; return Status::RuntimeError(err_msg); } - } - // Step 2: get all local files - struct LocalFileStat { - uint64_t size; - std::string md5; - }; - std::unordered_map> local_files_map; - for (const auto& remote_tablet_snapshot : remote_tablet_snapshots) { - const auto& local_path = remote_tablet_snapshot.local_snapshot_path; - std::vector local_files; - RETURN_IF_ERROR(_get_existing_files_from_local(local_path, &local_files)); - - auto& local_filestat = local_files_map[local_path]; - for (auto& local_file : local_files) { + // Step 2: get all local files + struct LocalFileStat { + uint64_t size; + std::string md5; + }; + std::unordered_map local_files; + std::vector existing_files; + RETURN_IF_ERROR(_get_existing_files_from_local(local_path, &existing_files)); + for (auto& local_file : existing_files) { // add file size std::string local_file_path = local_path + "/" + local_file; std::error_code ec; @@ -453,27 +462,20 @@ Status SnapshotLoader::remote_http_download( << " md5sum: " << status.to_string(); return status; } - local_filestat[local_file] = {local_file_size, md5}; + local_files[local_file] = {local_file_size, md5}; } - } - - // Step 3: Validate remote tablet snapshot paths && remote files map - // key is remote snapshot paths, value is filelist - // get all these use http download action - // http://172.16.0.14:6781/api/_tablet/_download?token=e804dd27-86da-4072-af58-70724075d2a4&file=/home/ubuntu/doris_master/output/be/storage/snapshot/20230410102306.9.180//2774718/217609978/2774718.hdr - int report_counter = 0; - int total_num = remote_tablet_snapshots.size(); - int finished_num = 0; - struct RemoteFileStat { - std::string url; - std::string md5; - uint64_t size; - }; - std::unordered_map> - remote_files_map; - for (const auto& remote_tablet_snapshot : remote_tablet_snapshots) { - const auto& remote_path = remote_tablet_snapshot.remote_snapshot_path; - auto& remote_files = remote_files_map[remote_path]; + existing_files.clear(); + + // Step 3: Validate remote tablet snapshot paths && remote files map + // key is remote snapshot paths, value is filelist + // get all these use http download action + // http://172.16.0.14:6781/api/_tablet/_download?token=e804dd27-86da-4072-af58-70724075d2a4&file=/home/ubuntu/doris_master/output/be/storage/snapshot/20230410102306.9.180//2774718/217609978/2774718.hdr + struct RemoteFileStat { + std::string url; + std::string md5; + uint64_t size; + }; + std::unordered_map remote_files; const auto& token = remote_tablet_snapshot.remote_token; const auto& remote_be_addr = remote_tablet_snapshot.remote_be_addr; @@ -516,19 +518,11 @@ Status SnapshotLoader::remote_http_download( remote_files[filename] = RemoteFileStat {remote_file_url, file_md5, file_size}; } - } - // Step 4: Compare local and remote files && get all need download files - for (const auto& remote_tablet_snapshot : remote_tablet_snapshots) { + // Step 4: Compare local and remote files && get all need download files RETURN_IF_ERROR(_report_every(10, &report_counter, finished_num, total_num, TTaskType::type::DOWNLOAD)); - const auto& remote_path = remote_tablet_snapshot.remote_snapshot_path; - const auto& local_path = remote_tablet_snapshot.local_snapshot_path; - auto& remote_files = remote_files_map[remote_path]; - auto& local_files = local_files_map[local_path]; - auto remote_tablet_id = remote_tablet_snapshot.remote_tablet_id; - // get all need download files std::vector need_download_files; for (const auto& [remote_file, remote_filestat] : remote_files) { @@ -656,6 +650,7 @@ Status SnapshotLoader::remote_http_download( if (total_time_ms > 0) { copy_rate = total_file_size / ((double)total_time_ms) / 1000; } + auto remote_tablet_id = remote_tablet_snapshot.remote_tablet_id; LOG(INFO) << fmt::format( "succeed to copy remote tablet {} to local tablet {}, total file size: {} B, cost: " "{} ms, rate: {} MB/s", @@ -705,6 +700,9 @@ Status SnapshotLoader::remote_http_download( // MUST hold tablet's header lock, push lock, cumulative lock and base compaction lock Status SnapshotLoader::move(const std::string& snapshot_path, TabletSharedPtr tablet, bool overwrite) { + // Take a lock to protect the local snapshot path. + auto local_snapshot_guard = LocalSnapshotLock::instance().acquire(snapshot_path); + auto tablet_path = tablet->tablet_path(); auto store_path = tablet->data_dir()->path(); LOG(INFO) << "begin to move snapshot files. from: " << snapshot_path << ", to: " << tablet_path From e5361a6839c3369ca80740c262c7a279a824b416 Mon Sep 17 00:00:00 2001 From: huanghaibin Date: Tue, 21 Jan 2025 21:21:11 +0800 Subject: [PATCH 17/31] [fix](cloud-mow)Fe should process KV_TXN_CONFLICT_RETRY_EXCEEDED_MAX_TIMES when geting delete bitmap lock fail (#47161) Fe should process KV_TXN_CONFLICT_RETRY_EXCEEDED_MAX_TIMES when geting delete bitmap lock fail --- .../CloudGlobalTransactionMgr.java | 21 ++++++- ...loud_mow_stream_load_with_txn_conflict.out | 5 ++ ...d_mow_stream_load_with_txn_conflict.groovy | 61 +++++++++++++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 regression-test/data/fault_injection_p0/cloud/test_cloud_mow_stream_load_with_txn_conflict.out create mode 100644 regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stream_load_with_txn_conflict.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java index e1e722443e40f4..114dbeaa4e58a0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java @@ -54,6 +54,7 @@ import org.apache.doris.cloud.proto.Cloud.GetTxnResponse; import org.apache.doris.cloud.proto.Cloud.LoadJobSourceTypePB; import org.apache.doris.cloud.proto.Cloud.MetaServiceCode; +import org.apache.doris.cloud.proto.Cloud.MetaServiceResponseStatus; import org.apache.doris.cloud.proto.Cloud.PrecommitTxnRequest; import org.apache.doris.cloud.proto.Cloud.PrecommitTxnResponse; import org.apache.doris.cloud.proto.Cloud.RemoveDeleteBitmapUpdateLockRequest; @@ -143,6 +144,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Random; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; @@ -867,6 +869,22 @@ private void getDeleteBitmapUpdateLock(long transactionId, List mowTa LOG.debug("get delete bitmap lock, transactionId={}, Request: {}, Response: {}", transactionId, request, response); } + if (DebugPointUtil.isEnable("CloudGlobalTransactionMgr.getDeleteBitmapUpdateLock.conflict")) { + DebugPoint debugPoint = DebugPointUtil.getDebugPoint( + "CloudGlobalTransactionMgr.getDeleteBitmapUpdateLock.conflict"); + double percent = debugPoint.param("percent", 0.4); + long timestamp = System.currentTimeMillis(); + Random random = new Random(timestamp); + if (Math.abs(random.nextInt()) % 100 < 100 * percent) { + LOG.info("set kv txn conflict for test"); + GetDeleteBitmapUpdateLockResponse.Builder getLockResponseBuilder + = GetDeleteBitmapUpdateLockResponse.newBuilder(); + getLockResponseBuilder.setStatus(MetaServiceResponseStatus.newBuilder() + .setCode(MetaServiceCode.KV_TXN_CONFLICT_RETRY_EXCEEDED_MAX_TIMES) + .setMsg("kv txn conflict")); + response = getLockResponseBuilder.build(); + } + } if (response.getStatus().getCode() != MetaServiceCode.LOCK_CONFLICT && response.getStatus().getCode() != MetaServiceCode.KV_TXN_CONFLICT) { break; @@ -892,7 +910,8 @@ private void getDeleteBitmapUpdateLock(long transactionId, List mowTa LOG.warn("get delete bitmap lock failed, transactionId={}, for {} times, response:{}", transactionId, retryTime, response); if (response.getStatus().getCode() == MetaServiceCode.LOCK_CONFLICT - || response.getStatus().getCode() == MetaServiceCode.KV_TXN_CONFLICT) { + || response.getStatus().getCode() == MetaServiceCode.KV_TXN_CONFLICT + || response.getStatus().getCode() == MetaServiceCode.KV_TXN_CONFLICT_RETRY_EXCEEDED_MAX_TIMES) { // DELETE_BITMAP_LOCK_ERR will be retried on be throw new UserException(InternalErrorCode.DELETE_BITMAP_LOCK_ERR, "Failed to get delete bitmap lock due to confilct"); diff --git a/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_stream_load_with_txn_conflict.out b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_stream_load_with_txn_conflict.out new file mode 100644 index 00000000000000..9c8bb8cd785f24 --- /dev/null +++ b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_stream_load_with_txn_conflict.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +5 e 90 +6 f 100 + diff --git a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stream_load_with_txn_conflict.groovy b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stream_load_with_txn_conflict.groovy new file mode 100644 index 00000000000000..5e4479064f4029 --- /dev/null +++ b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stream_load_with_txn_conflict.groovy @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_cloud_mow_stream_load_with_txn_conflict", "nonConcurrent") { + GetDebugPoint().clearDebugPointsForAllFEs() + def tableName = "test_cloud_mow_stream_load_with_txn_conflict" + try { + // create table + sql """ drop table if exists ${tableName}; """ + + sql """ + CREATE TABLE `${tableName}` ( + `id` int(11) NOT NULL, + `name` varchar(1100) NULL, + `score` int(11) NULL default "-1" + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "replication_num" = "1" + ); + """ + GetDebugPoint().enableDebugPointForAllFEs('CloudGlobalTransactionMgr.getDeleteBitmapUpdateLock.conflict', [percent: 0.4]) + streamLoad { + table "${tableName}" + + set 'column_separator', ',' + set 'columns', 'id, name, score' + file "test_stream_load.csv" + + time 10000 // limit inflight 10s + + check { result, exception, startTime, endTime -> + log.info("Stream load result: ${result}") + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + } + } + qt_sql """ select * from ${tableName} order by id""" + } finally { + GetDebugPoint().disableDebugPointForAllFEs('CloudGlobalTransactionMgr.getDeleteBitmapUpdateLock.conflict') + sql "DROP TABLE IF EXISTS ${tableName};" + GetDebugPoint().clearDebugPointsForAllFEs() + } + +} From 55702a16fac1031868869ee3acf7eadb3851564d Mon Sep 17 00:00:00 2001 From: Uniqueyou Date: Tue, 21 Jan 2025 22:35:22 +0800 Subject: [PATCH 18/31] [fix](binlog) Binlog ts not initialized (#47174) https://github.com/selectdb/ccr-syncer/pull/395 --- .../apache/doris/binlog/BinlogManager.java | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogManager.java b/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogManager.java index 69293ea6c00da0..848f408ce0ba67 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogManager.java @@ -184,7 +184,7 @@ public void addAddPartitionRecord(AddPartitionRecord addPartitionRecord) { List tableIds = Lists.newArrayList(); tableIds.add(addPartitionRecord.getTableId()); long commitSeq = addPartitionRecord.getCommitSeq(); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.ADD_PARTITION; String data = addPartitionRecord.toJson(); @@ -196,7 +196,7 @@ public void addCreateTableRecord(CreateTableRecord createTableRecord) { List tableIds = Lists.newArrayList(); tableIds.add(createTableRecord.getTableId()); long commitSeq = createTableRecord.getCommitSeq(); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.CREATE_TABLE; String data = createTableRecord.toJson(); @@ -207,7 +207,7 @@ public void addDropPartitionRecord(DropPartitionInfo dropPartitionInfo, long com long dbId = dropPartitionInfo.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(dropPartitionInfo.getTableId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.DROP_PARTITION; String data = dropPartitionInfo.toJson(); @@ -219,7 +219,7 @@ public void addDropTableRecord(DropTableRecord record) { List tableIds = Lists.newArrayList(); tableIds.add(record.getTableId()); long commitSeq = record.getCommitSeq(); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.DROP_TABLE; String data = record.toJson(); @@ -230,7 +230,7 @@ public void addAlterJobV2(AlterJobV2 alterJob, long commitSeq) { long dbId = alterJob.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(alterJob.getTableId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.ALTER_JOB; AlterJobRecord alterJobRecord = new AlterJobRecord(alterJob); String data = alterJobRecord.toJson(); @@ -242,7 +242,7 @@ public void addModifyTableAddOrDropColumns(TableAddOrDropColumnsInfo info, long long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(info.getTableId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.MODIFY_TABLE_ADD_OR_DROP_COLUMNS; String data = info.toJson(); @@ -253,7 +253,7 @@ public void addAlterDatabaseProperty(AlterDatabasePropertyInfo info, long commit long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.ALTER_DATABASE_PROPERTY; String data = info.toJson(); @@ -264,7 +264,7 @@ public void addModifyTableProperty(ModifyTablePropertyOperationLog info, long co long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(info.getTableId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.MODIFY_TABLE_PROPERTY; String data = info.toJson(); @@ -285,7 +285,7 @@ public void addBarrierLog(BarrierLog barrierLog, long commitSeq) { List tableIds = Lists.newArrayList(); tableIds.add(tableId); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.BARRIER; String data = barrierLog.toJson(); @@ -297,7 +297,7 @@ public void addModifyPartitions(BatchModifyPartitionsInfo info, long commitSeq) long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(info.getTableId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.MODIFY_PARTITIONS; String data = info.toJson(); @@ -309,7 +309,7 @@ public void addReplacePartitions(ReplacePartitionOperationLog info, long commitS long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(info.getTblId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.REPLACE_PARTITIONS; String data = info.toJson(); @@ -321,7 +321,7 @@ public void addTruncateTable(TruncateTableInfo info, long commitSeq) { long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(info.getTblId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.TRUNCATE_TABLE; TruncateTableRecord record = new TruncateTableRecord(info); String data = record.toJson(); @@ -333,7 +333,7 @@ public void addTableRename(TableInfo info, long commitSeq) { long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(info.getTableId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.RENAME_TABLE; String data = info.toJson(); addBinlog(dbId, tableIds, commitSeq, timestamp, type, data, false, info); @@ -343,7 +343,7 @@ public void addRollupRename(TableInfo info, long commitSeq) { long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(info.getTableId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.RENAME_ROLLUP; String data = info.toJson(); addBinlog(dbId, tableIds, commitSeq, timestamp, type, data, false, info); @@ -353,7 +353,7 @@ public void addPartitionRename(TableInfo info, long commitSeq) { long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(info.getTableId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.RENAME_PARTITION; String data = info.toJson(); addBinlog(dbId, tableIds, commitSeq, timestamp, type, data, false, info); @@ -363,7 +363,7 @@ public void addModifyComment(ModifyCommentOperationLog info, long commitSeq) { long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(info.getTblId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.MODIFY_COMMENT; String data = info.toJson(); @@ -374,7 +374,7 @@ public void addColumnRename(TableRenameColumnInfo info, long commitSeq) { long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(info.getTableId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.RENAME_COLUMN; String data = info.toJson(); @@ -386,7 +386,7 @@ public void addModifyViewDef(AlterViewInfo alterViewInfo, long commitSeq) { long dbId = alterViewInfo.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(alterViewInfo.getTableId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.MODIFY_VIEW_DEF; String data = alterViewInfo.toJson(); @@ -402,7 +402,7 @@ public void addReplaceTable(ReplaceTableOperationLog info, long commitSeq) { long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(info.getOrigTblId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.REPLACE_TABLE; String data = info.toJson(); @@ -413,7 +413,7 @@ public void addModifyTableAddOrDropInvertedIndices(TableAddOrDropInvertedIndices long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(info.getTableId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.MODIFY_TABLE_ADD_OR_DROP_INVERTED_INDICES; String data = info.toJson(); @@ -424,7 +424,7 @@ public void addIndexChangeJob(IndexChangeJob indexChangeJob, long commitSeq) { long dbId = indexChangeJob.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(indexChangeJob.getTableId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.INDEX_CHANGE_JOB; String data = indexChangeJob.toJson(); @@ -440,7 +440,7 @@ public void addDropRollup(DropInfo info, long commitSeq) { long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(info.getTableId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.DROP_ROLLUP; String data = info.toJson(); @@ -466,7 +466,7 @@ public void addRecoverTableRecord(RecoverInfo info, long commitSeq) { long dbId = info.getDbId(); List tableIds = Lists.newArrayList(); tableIds.add(info.getTableId()); - long timestamp = -1; + long timestamp = System.currentTimeMillis(); TBinlogType type = TBinlogType.RECOVER_INFO; String data = info.toJson(); addBinlog(dbId, tableIds, commitSeq, timestamp, type, data, false, info); From 2e6145105591e8dcff742798654b05baad127cdf Mon Sep 17 00:00:00 2001 From: seawinde Date: Wed, 22 Jan 2025 10:21:10 +0800 Subject: [PATCH 19/31] [test](mtmv) Fix mv regression test fail occasionally and add some log (#47103) ### What problem does this PR solve? 1. Optimize the test validation logic to check whether the materialized view is built based on the materialized view name. 2. Wait for the synchronization of the materialized view to be completed, using the new method. --- .../org/apache/doris/qe/SessionVariable.java | 10 +- regression-test/data/mv_p0/await/await.out | 510 ++++++++++++++++++ .../external_table/part_partition_invalid.out | 27 - .../doris/regression/suite/Suite.groovy | 44 +- .../suites/mv_p0/await/await.groovy | 170 ++++++ .../suites/mv_p0/no_await/no_await.groovy | 54 +- .../suites/nereids_p0/hint/test_use_mv.groovy | 4 +- .../part_partition_invalid.groovy | 29 - .../is_in_debug_mode/is_in_debug_mode.groovy | 3 + .../usercase_union_rewrite.groovy | 2 +- 10 files changed, 734 insertions(+), 119 deletions(-) create mode 100644 regression-test/data/mv_p0/await/await.out create mode 100644 regression-test/suites/mv_p0/await/await.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 30f9a010c58c02..c334ad0328f047 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -1070,7 +1070,7 @@ public enum IgnoreSplitType { public int maxScanKeyNum = 48; @VariableMgr.VarAttr(name = MAX_PUSHDOWN_CONDITIONS_PER_COLUMN) public int maxPushdownConditionsPerColumn = 1024; - @VariableMgr.VarAttr(name = SHOW_HIDDEN_COLUMNS, flag = VariableMgr.SESSION_ONLY) + @VariableMgr.VarAttr(name = SHOW_HIDDEN_COLUMNS, flag = VariableMgr.SESSION_ONLY, needForward = true) public boolean showHiddenColumns = false; @VariableMgr.VarAttr(name = ALLOW_PARTITION_COLUMN_NULLABLE, description = { @@ -1535,25 +1535,25 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) { /** * For debug purpose, don't merge unique key and agg key when reading data. */ - @VariableMgr.VarAttr(name = SKIP_STORAGE_ENGINE_MERGE) + @VariableMgr.VarAttr(name = SKIP_STORAGE_ENGINE_MERGE, needForward = true) public boolean skipStorageEngineMerge = false; /** * For debug purpose, skip delete predicate when reading data. */ - @VariableMgr.VarAttr(name = SKIP_DELETE_PREDICATE) + @VariableMgr.VarAttr(name = SKIP_DELETE_PREDICATE, needForward = true) public boolean skipDeletePredicate = false; /** * For debug purpose, skip delete sign when reading data. */ - @VariableMgr.VarAttr(name = SKIP_DELETE_SIGN) + @VariableMgr.VarAttr(name = SKIP_DELETE_SIGN, needForward = true) public boolean skipDeleteSign = false; /** * For debug purpose, skip delete bitmap when reading data. */ - @VariableMgr.VarAttr(name = SKIP_DELETE_BITMAP) + @VariableMgr.VarAttr(name = SKIP_DELETE_BITMAP, needForward = true) public boolean skipDeleteBitmap = false; // This variable replace the original FE config `recover_with_skip_missing_version`. diff --git a/regression-test/data/mv_p0/await/await.out b/regression-test/data/mv_p0/await/await.out new file mode 100644 index 00000000000000..a123f490bdea6f --- /dev/null +++ b/regression-test/data/mv_p0/await/await.out @@ -0,0 +1,510 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !mv -- +49994996 + +-- !mv -- +49994992 + +-- !mv -- +49994988 + +-- !mv -- +49994984 + +-- !mv -- +49994980 + +-- !mv -- +49994976 + +-- !mv -- +49994972 + +-- !mv -- +49994968 + +-- !mv -- +49994964 + +-- !mv -- +49994960 + +-- !mv -- +49994956 + +-- !mv -- +49994952 + +-- !mv -- +49994948 + +-- !mv -- +49994944 + +-- !mv -- +49994940 + +-- !mv -- +49994936 + +-- !mv -- +49994932 + +-- !mv -- +49994928 + +-- !mv -- +49994924 + +-- !mv -- +49994920 + +-- !mv -- +49994916 + +-- !mv -- +49994912 + +-- !mv -- +49994908 + +-- !mv -- +49994904 + +-- !mv -- +49994900 + +-- !mv -- +49994896 + +-- !mv -- +49994892 + +-- !mv -- +49994888 + +-- !mv -- +49994884 + +-- !mv -- +49994880 + +-- !mv -- +49994876 + +-- !mv -- +49994872 + +-- !mv -- +49994868 + +-- !mv -- +49994864 + +-- !mv -- +49994860 + +-- !mv -- +49994856 + +-- !mv -- +49994852 + +-- !mv -- +49994848 + +-- !mv -- +49994844 + +-- !mv -- +49994840 + +-- !mv -- +49994836 + +-- !mv -- +49994832 + +-- !mv -- +49994828 + +-- !mv -- +49994824 + +-- !mv -- +49994820 + +-- !mv -- +49994816 + +-- !mv -- +49994812 + +-- !mv -- +49994808 + +-- !mv -- +49994804 + +-- !mv -- +49994800 + +-- !mv -- +49994796 + +-- !mv -- +49994792 + +-- !mv -- +49994788 + +-- !mv -- +49994784 + +-- !mv -- +49994780 + +-- !mv -- +49994776 + +-- !mv -- +49994772 + +-- !mv -- +49994768 + +-- !mv -- +49994764 + +-- !mv -- +49994760 + +-- !mv -- +49994756 + +-- !mv -- +49994752 + +-- !mv -- +49994748 + +-- !mv -- +49994744 + +-- !mv -- +49994740 + +-- !mv -- +49994736 + +-- !mv -- +49994732 + +-- !mv -- +49994728 + +-- !mv -- +49994724 + +-- !mv -- +49994720 + +-- !mv -- +49994716 + +-- !mv -- +49994712 + +-- !mv -- +49994708 + +-- !mv -- +49994704 + +-- !mv -- +49994700 + +-- !mv -- +49994696 + +-- !mv -- +49994692 + +-- !mv -- +49994688 + +-- !mv -- +49994684 + +-- !mv -- +49994680 + +-- !mv -- +49994676 + +-- !mv -- +49994672 + +-- !mv -- +49994668 + +-- !mv -- +49994664 + +-- !mv -- +49994660 + +-- !mv -- +49994656 + +-- !mv -- +49994652 + +-- !mv -- +49994648 + +-- !mv -- +49994644 + +-- !mv -- +49994640 + +-- !mv -- +49994636 + +-- !mv -- +49994632 + +-- !mv -- +49994628 + +-- !mv -- +49994624 + +-- !mv -- +49994620 + +-- !mv -- +49994616 + +-- !mv -- +49994612 + +-- !mv -- +49994608 + +-- !mv -- +49994604 + +-- !mv -- +49994600 + +-- !mv -- +49994596 + +-- !mv -- +49994592 + +-- !mv -- +49994588 + +-- !mv -- +49994584 + +-- !mv -- +49994580 + +-- !mv -- +49994576 + +-- !mv -- +49994572 + +-- !mv -- +49994568 + +-- !mv -- +49994564 + +-- !mv -- +49994560 + +-- !mv -- +49994556 + +-- !mv -- +49994552 + +-- !mv -- +49994548 + +-- !mv -- +49994544 + +-- !mv -- +49994540 + +-- !mv -- +49994536 + +-- !mv -- +49994532 + +-- !mv -- +49994528 + +-- !mv -- +49994524 + +-- !mv -- +49994520 + +-- !mv -- +49994516 + +-- !mv -- +49994512 + +-- !mv -- +49994508 + +-- !mv -- +49994504 + +-- !mv -- +49994500 + +-- !mv -- +49994496 + +-- !mv -- +49994492 + +-- !mv -- +49994488 + +-- !mv -- +49994484 + +-- !mv -- +49994480 + +-- !mv -- +49994476 + +-- !mv -- +49994472 + +-- !mv -- +49994468 + +-- !mv -- +49994464 + +-- !mv -- +49994460 + +-- !mv -- +49994456 + +-- !mv -- +49994452 + +-- !mv -- +49994448 + +-- !mv -- +49994444 + +-- !mv -- +49994440 + +-- !mv -- +49994436 + +-- !mv -- +49994432 + +-- !mv -- +49994428 + +-- !mv -- +49994424 + +-- !mv -- +49994420 + +-- !mv -- +49994416 + +-- !mv -- +49994412 + +-- !mv -- +49994408 + +-- !mv -- +49994404 + +-- !mv -- +49994400 + +-- !mv -- +49994396 + +-- !mv -- +49994392 + +-- !mv -- +49994388 + +-- !mv -- +49994384 + +-- !mv -- +49994380 + +-- !mv -- +49994376 + +-- !mv -- +49994372 + +-- !mv -- +49994368 + +-- !mv -- +49994364 + +-- !mv -- +49994360 + +-- !mv -- +49994356 + +-- !mv -- +49994352 + +-- !mv -- +49994348 + +-- !mv -- +49994344 + +-- !mv -- +49994340 + +-- !mv -- +49994336 + +-- !mv -- +49994332 + +-- !mv -- +49994328 + +-- !mv -- +49994324 + +-- !mv -- +49994320 \ No newline at end of file diff --git a/regression-test/data/nereids_rules_p0/mv/external_table/part_partition_invalid.out b/regression-test/data/nereids_rules_p0/mv/external_table/part_partition_invalid.out index d70bd0bbae15c6..eb3d49efc9b7c5 100644 --- a/regression-test/data/nereids_rules_p0/mv/external_table/part_partition_invalid.out +++ b/regression-test/data/nereids_rules_p0/mv/external_table/part_partition_invalid.out @@ -9,19 +9,6 @@ 2 2 2 2023-10-18 2023-10-18 3 2 3 2023-10-19 2023-10-19 --- !after_modify_data_without_refresh_catalog -- -1 2 1 2023-10-17 2023-10-17 -2 2 2 2023-10-18 2023-10-18 -3 2 3 2023-10-19 2023-10-19 -3 2 3 2023-10-19 2023-10-19 - --- !after_modify_and_without_refresh_catalog_19 -- -3 2 3 2023-10-19 2023-10-19 -3 2 3 2023-10-19 2023-10-19 - --- !after_modify_and_without_refresh_catalog_18 -- -2 2 2 2023-10-18 2023-10-18 - -- !after_modify_data_and_refresh_catalog -- 1 2 1 2023-10-17 2023-10-17 2 2 2 2023-10-18 2023-10-18 @@ -41,20 +28,6 @@ 3 2 3 2023-10-19 2023-10-19 3 2 3 2023-10-19 2023-10-19 --- !after_add_data_without_refresh_catalog -- -\N \N 7 \N 2023-10-20 -1 2 1 2023-10-17 2023-10-17 -2 2 2 2023-10-18 2023-10-18 -3 2 3 2023-10-19 2023-10-19 -3 2 3 2023-10-19 2023-10-19 - --- !after_add_and_without_refresh_catalog_19 -- -3 2 3 2023-10-19 2023-10-19 -3 2 3 2023-10-19 2023-10-19 - --- !after_add_and_without_refresh_catalog_20 -- -\N \N 7 \N 2023-10-20 - -- !after_add_data_with_refresh_catalog -- \N \N 7 \N 2023-10-20 1 2 1 2023-10-17 2023-10-17 diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy index 9077223288f0a9..3d616654057efe 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy @@ -1501,21 +1501,21 @@ class Suite implements GroovyInterceptable { def waitingMTMVTaskFinishedByMvName = { mvName, dbName = context.dbName -> Thread.sleep(2000); - String showTasks = "select TaskId,JobId,JobName,MvId,Status,MvName,MvDatabaseName,ErrorMsg from tasks('type'='mv') where MvDatabaseName = '${dbName}' and MvName = '${mvName}' order by CreateTime DESC LIMIT 1" + String showTasks = "select TaskId,JobId,JobName,MvId,Status,MvName,MvDatabaseName,ErrorMsg from tasks('type'='mv') where MvDatabaseName = '${dbName}' and MvName = '${mvName}' order by CreateTime ASC" String status = "NULL" List> result long startTime = System.currentTimeMillis() long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min List toCheckTaskRow = new ArrayList<>(); - while (timeoutTimestamp > System.currentTimeMillis() && (status != "SUCCESS")) { + while (timeoutTimestamp > System.currentTimeMillis() && (status == 'PENDING' || status == 'RUNNING' || status == 'NULL')) { result = sql(showTasks) - logger.info("current db is " + dbName + ", showTasks is " + showTasks) + logger.info("current db is " + dbName + ", showTasks is " + result.toString()) if (result.isEmpty()) { logger.info("waitingMTMVTaskFinishedByMvName toCheckTaskRow is empty") Thread.sleep(1000); continue; } - toCheckTaskRow = result.get(0); + toCheckTaskRow = result.last(); status = toCheckTaskRow.get(4) logger.info("The state of ${showTasks} is ${status}") Thread.sleep(1000); @@ -1539,14 +1539,14 @@ class Suite implements GroovyInterceptable { def waitingMTMVTaskFinishedByMvNameAllowCancel = {mvName, dbName = context.dbName -> Thread.sleep(2000); - String showTasks = "select TaskId,JobId,JobName,MvId,Status,MvName,MvDatabaseName,ErrorMsg from tasks('type'='mv') where MvDatabaseName = '${dbName}' and MvName = '${mvName}' order by CreateTime DESC LIMIT 1" + String showTasks = "select TaskId,JobId,JobName,MvId,Status,MvName,MvDatabaseName,ErrorMsg from tasks('type'='mv') where MvDatabaseName = '${dbName}' and MvName = '${mvName}' order by CreateTime ASC" String status = "NULL" List> result long startTime = System.currentTimeMillis() long timeoutTimestamp = startTime + 5 * 60 * 1000 // 5 min List toCheckTaskRow = new ArrayList<>(); - while (timeoutTimestamp > System.currentTimeMillis() && (status != "SUCCESS")) { + while (timeoutTimestamp > System.currentTimeMillis() && (status == 'PENDING' || status == 'RUNNING' || status == 'NULL' || status == 'CANCELED')) { result = sql(showTasks) logger.info("current db is " + dbName + ", showTasks result: " + result.toString()) if (result.isEmpty()) { @@ -1554,7 +1554,7 @@ class Suite implements GroovyInterceptable { Thread.sleep(1000); continue; } - toCheckTaskRow = result.get(0) + toCheckTaskRow = result.last() status = toCheckTaskRow.get(4) logger.info("The state of ${showTasks} is ${status}") Thread.sleep(1000); @@ -1687,36 +1687,6 @@ class Suite implements GroovyInterceptable { } } - def getMVJobState = { tableName -> - def jobStateResult = sql """ SHOW ALTER TABLE ROLLUP WHERE TableName='${tableName}' ORDER BY CreateTime DESC limit 1""" - if (jobStateResult == null || jobStateResult.isEmpty()) { - logger.info("show alter table roll is empty" + jobStateResult) - return "NOT_READY" - } - logger.info("getMVJobState jobStateResult is " + jobStateResult.toString()) - if (!jobStateResult[0][8].equals("FINISHED")) { - return "NOT_READY" - } - return "FINISHED"; - } - def waitForRollUpJob = (tbName, timeoutMillisecond) -> { - - long startTime = System.currentTimeMillis() - long timeoutTimestamp = startTime + timeoutMillisecond - - String result - while (timeoutTimestamp > System.currentTimeMillis()){ - result = getMVJobState(tbName) - if (result == "FINISHED") { - sleep(200) - return - } else { - sleep(200) - } - } - Assert.assertEquals("FINISHED", result) - } - void testFoldConst(String foldSql) { String openFoldConstant = "set debug_skip_fold_constant=false"; sql(openFoldConstant) diff --git a/regression-test/suites/mv_p0/await/await.groovy b/regression-test/suites/mv_p0/await/await.groovy new file mode 100644 index 00000000000000..92d83b982a5bff --- /dev/null +++ b/regression-test/suites/mv_p0/await/await.groovy @@ -0,0 +1,170 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite ("await") { + + String db = context.config.getDbNameByFile(context.file) + + def tblName = "agg_have_dup_base_await" + def waitDrop = { + def try_times = 1000 + def result = "null" + sql "sync;" + while (!result.contains("FINISHED")) { + result = (sql "SHOW ALTER TABLE MATERIALIZED VIEW WHERE TableName='${tblName}' ORDER BY CreateTime DESC LIMIT 1;")[0] + if (!result.contains("RUNNING")&&!result.contains("PENDING")&&!result.contains("FINISHED")&&!result.contains("WAITING_TXN")) { + assertTrue(false) + } + log.info("result: ${result}") + Thread.sleep(3000) + try_times -= 1 + assertTrue(try_times > 0) + } + sql "sync;" + sql "drop materialized view k12s3m on ${tblName};" + while (!(sql "show create materialized view k12s3m on ${tblName};").empty) { + sleep(100) + try_times -= 1 + assertTrue(try_times > 0) + } + sql "sync;" + } + + sql "drop table if exists ${tblName} force;" + sql """ + create table ${tblName} ( + k1 int null, + k2 int not null, + k3 bigint null, + k4 varchar(100) null + ) + duplicate key (k1, k2, k3) + distributed by hash(k1) buckets 3 + properties("replication_num" = "1"); + """ + sql "insert into ${tblName} select e1, -4, -4, 'd' from (select 1 k1) as t lateral view explode_numbers(10000) tmp1 as e1;" + // do not await + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" + + waitDrop() + create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" + qt_mv "select sum(k1) from ${tblName}" +} diff --git a/regression-test/suites/mv_p0/no_await/no_await.groovy b/regression-test/suites/mv_p0/no_await/no_await.groovy index 3eab03aa7e4adc..b6ea39754ea709 100644 --- a/regression-test/suites/mv_p0/no_await/no_await.groovy +++ b/regression-test/suites/mv_p0/no_await/no_await.groovy @@ -60,93 +60,111 @@ suite ("no_await") { """ sql "insert into ${tblName} select e1, -4, -4, 'd' from (select 1 k1) as t lateral view explode_numbers(10000) tmp1 as e1;" // do not await - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" waitDrop() - create_sync_mv(db, tblName, "k12s3m", """select k1,sum(k2),max(k2) from ${tblName} group by k1;""") + sql "create materialized view k12s3m as select k1,sum(k2),max(k2) from ${tblName} group by k1;" sql "insert into ${tblName} select -4, -4, -4, \'d\'" + sql "sync;" qt_mv "select sum(k1) from ${tblName}" } diff --git a/regression-test/suites/nereids_p0/hint/test_use_mv.groovy b/regression-test/suites/nereids_p0/hint/test_use_mv.groovy index 041b3b3a7dacc2..13a3b863f21892 100644 --- a/regression-test/suites/nereids_p0/hint/test_use_mv.groovy +++ b/regression-test/suites/nereids_p0/hint/test_use_mv.groovy @@ -55,9 +55,9 @@ suite("test_use_mv") { """ sql """ insert into t1 values (101, 101, 101, 102);""" sql """ alter table t1 add rollup r1(k2, k1); """ - waitForRollUpJob("t1", 150000) + waitingMVTaskFinishedByMvName("test_use_mv", "t1","r1") sql """ alter table t1 add rollup r2(k2); """ - waitForRollUpJob("t1", 150000) + waitingMVTaskFinishedByMvName("test_use_mv", "t1","r2") createMV("create materialized view k1_k2_sumk3 as select k1, k2, sum(v1) from t1 group by k1, k2;") sql """set ENABLE_SYNC_MV_COST_BASED_REWRITE=false;""" diff --git a/regression-test/suites/nereids_rules_p0/mv/external_table/part_partition_invalid.groovy b/regression-test/suites/nereids_rules_p0/mv/external_table/part_partition_invalid.groovy index 181891f9dedb8a..3c2ad76c81a5f5 100644 --- a/regression-test/suites/nereids_rules_p0/mv/external_table/part_partition_invalid.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/external_table/part_partition_invalid.groovy @@ -148,21 +148,6 @@ suite("part_partition_invalid", "p0,external,external_docker") { // data change in external table doesn't influence query rewrite, // if want to use new data in external table should be refresh manually sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(3, 3, 'ok', 99.5, 'a', 'b', 1, 'yy', '2023-10-19');""" - mv_rewrite_success(query_sql, mv_name) - order_qt_after_modify_data_without_refresh_catalog """ ${query_sql}""" - - // query invalid partition data, should hit mv, because not check now. - mv_rewrite_fail(""" - ${query_sql} where o_orderdate = '2023-10-19'; - """, mv_name) - order_qt_after_modify_and_without_refresh_catalog_19 """ ${query_sql} where o_orderdate = '2023-10-19';""" - - // query valid partition data, should hit mv - mv_rewrite_success(""" - ${query_sql} where o_orderdate = '2023-10-18'; - """, mv_name - ) - order_qt_after_modify_and_without_refresh_catalog_18 """ ${query_sql} where o_orderdate = '2023-10-18';""" // refresh catalog cache sql """ REFRESH CATALOG ${hive_catalog_name} PROPERTIES("invalid_cache" = "true"); """ @@ -190,20 +175,6 @@ suite("part_partition_invalid", "p0,external,external_docker") { // test after hive add partition sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(6, 7, 'ok', 29.5, 'x', 'y', 6, 'ss', '2023-10-20');""" - mv_rewrite_success(query_sql, mv_name) - order_qt_after_add_data_without_refresh_catalog """ ${query_sql}""" - - // query invalid partition data, should hit mv, because not check now. - mv_rewrite_success(query_sql, mv_name) - - order_qt_after_add_and_without_refresh_catalog_19 """ ${query_sql} where o_orderdate = '2023-10-19';""" - - // query valid partition data, should hit mv, because data not aware - mv_rewrite_fail(""" - ${query_sql} where o_orderdate = '2023-10-20'; - """, mv_name) - - order_qt_after_add_and_without_refresh_catalog_20 """ ${query_sql} where o_orderdate = '2023-10-20';""" // refresh catalog cache sql """ REFRESH CATALOG ${hive_catalog_name} PROPERTIES("invalid_cache" = "true"); """ diff --git a/regression-test/suites/nereids_rules_p0/mv/is_in_debug_mode/is_in_debug_mode.groovy b/regression-test/suites/nereids_rules_p0/mv/is_in_debug_mode/is_in_debug_mode.groovy index ebc1921c0ab6ae..e4c93660764370 100644 --- a/regression-test/suites/nereids_rules_p0/mv/is_in_debug_mode/is_in_debug_mode.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/is_in_debug_mode/is_in_debug_mode.groovy @@ -75,6 +75,9 @@ suite("is_in_debug_mode") { sql """set skip_delete_sign = true;""" mv_not_part_in("""select * from orders where o_orderkey > 1;""", "basic_mv") logger.info("skip_delete_sign session is " + sql("show variables like '%skip_delete_sign%'")) + + sql """drop materialized view if exists test_create_mv;""" + test { sql """ CREATE MATERIALIZED VIEW test_create_mv diff --git a/regression-test/suites/nereids_rules_p0/mv/union_rewrite/usercase_union_rewrite.groovy b/regression-test/suites/nereids_rules_p0/mv/union_rewrite/usercase_union_rewrite.groovy index a5bc5586e518e6..dbbc6f929ef6b1 100644 --- a/regression-test/suites/nereids_rules_p0/mv/union_rewrite/usercase_union_rewrite.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/union_rewrite/usercase_union_rewrite.groovy @@ -93,7 +93,7 @@ suite ("usercase_union_rewrite") { sql """analyze table orders_user with sync;""" sql """analyze table lineitem_user with sync;""" - sql """alter table orders_user modify column o_comment set stats ('row_count'='4');""" + sql """alter table orders_user modify column o_comment set stats ('row_count'='7');""" sql """alter table lineitem_user modify column l_comment set stats ('row_count'='3');""" def create_mv_orders = { mv_name, mv_sql -> From c9cac8fc9f8a5d31e2a5f3fd6226c965a7008ace Mon Sep 17 00:00:00 2001 From: LiBinfeng Date: Wed, 22 Jan 2025 10:26:26 +0800 Subject: [PATCH 20/31] [fix](Nereids) fix fold constant of time acquired functions (#47288) Problem Summary: explain select substr(current_date, 1, 10); when logicalPlanBuilder build ast from original sql of date acquired functions like current_date, it would add an alias above. Which would stop folding constant when fold constant rule traversing expression tree So remove alias when translate to ast --- .../nereids/parser/LogicalPlanBuilder.java | 14 ++++---- .../fold_constant_date_arithmatic.groovy | 32 +++++++++++++++++++ 2 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_date_arithmatic.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 72f427c5095727..a3d215441374f5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -2387,37 +2387,37 @@ public Expression visitArithmeticBinary(ArithmeticBinaryContext ctx) { @Override public Expression visitCurrentDate(DorisParser.CurrentDateContext ctx) { - return new CurrentDate().alias("CURRENT_DATE"); + return new CurrentDate(); } @Override public Expression visitCurrentTime(DorisParser.CurrentTimeContext ctx) { - return new CurrentTime().alias("CURRENT_TIME"); + return new CurrentTime(); } @Override public Expression visitCurrentTimestamp(DorisParser.CurrentTimestampContext ctx) { - return new Now().alias("CURRENT_TIMESTAMP"); + return new Now(); } @Override public Expression visitLocalTime(DorisParser.LocalTimeContext ctx) { - return new CurrentTime().alias("LOCALTIME"); + return new CurrentTime(); } @Override public Expression visitLocalTimestamp(DorisParser.LocalTimestampContext ctx) { - return new Now().alias("LOCALTIMESTAMP"); + return new Now(); } @Override public Expression visitCurrentUser(DorisParser.CurrentUserContext ctx) { - return new CurrentUser().alias("CURRENT_USER"); + return new CurrentUser(); } @Override public Expression visitSessionUser(DorisParser.SessionUserContext ctx) { - return new SessionUser().alias("SESSION_USER"); + return new SessionUser(); } @Override diff --git a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_date_arithmatic.groovy b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_date_arithmatic.groovy new file mode 100644 index 00000000000000..5e0ed2e5d251ca --- /dev/null +++ b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_date_arithmatic.groovy @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("fold_constant_date_arithmatic") { + def db = "fold_constant_date_arithmatic" + sql "create database if not exists ${db}" + + sql "set enable_nereids_planner=true" + sql "set enable_fallback_to_original_planner=false" + sql "set enable_fold_constant_by_be=false" + + testFoldConst("select substr(now(), 1, 10);") + testFoldConst("select substr(now(3), 1, 10);") + testFoldConst("select substr(curdate(), 1, 10);") + testFoldConst("select substr(current_date(), 1, 10);") + testFoldConst("select substr(current_timestamp(), 1, 10);") + testFoldConst("select substr(current_timestamp(3), 1, 10);") +} From 26b5baf9a270b6a3fca9354dbbb27eb40091274e Mon Sep 17 00:00:00 2001 From: James Date: Wed, 22 Jan 2025 11:49:40 +0800 Subject: [PATCH 21/31] [refactor](neredis)Remove dependency of old planner partition prune code in DeleteJobCommand (#47234) ### What problem does this PR solve? Remove dependency of old planner partition prune code in DeleteJobCommand --- .../org/apache/doris/load/DeleteHandler.java | 8 +- .../java/org/apache/doris/load/DeleteJob.java | 57 ++- .../plans/commands/DeleteFromCommand.java | 69 ++- .../data/delete_p0/test_basic_delete_job.out | 435 ++++++++++++++++++ .../delete_p0/test_basic_delete_job.groovy | 190 ++++++++ 5 files changed, 753 insertions(+), 6 deletions(-) create mode 100644 regression-test/data/delete_p0/test_basic_delete_job.out create mode 100644 regression-test/suites/delete_p0/test_basic_delete_job.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java b/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java index 299dac295dc909..387b81064c2f9c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java @@ -22,6 +22,7 @@ import org.apache.doris.catalog.Database; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.Partition; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; import org.apache.doris.common.FeConstants; @@ -103,8 +104,8 @@ public void processEmptyRelation(QueryState execState) { /** * used for Nereids planner */ - public void process(Database targetDb, OlapTable targetTbl, List partitionNames, - List deleteConditions, QueryState execState) { + public void process(Database targetDb, OlapTable targetTbl, List selectedPartitions, + List deleteConditions, QueryState execState, List partitionNames) { DeleteJob deleteJob = null; try { targetTbl.readLock(); @@ -114,10 +115,11 @@ public void process(Database targetDb, OlapTable targetTbl, List partiti // just add a comment here to notice. } deleteJob = DeleteJob.newBuilder() - .buildWith(new DeleteJob.BuildParams( + .buildWithNereids(new DeleteJob.BuildParams( targetDb, targetTbl, partitionNames, + selectedPartitions, deleteConditions)); long txnId = deleteJob.beginTxn(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/DeleteJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/DeleteJob.java index 992239d3369cac..ed4b5d99d5dafe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/DeleteJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/DeleteJob.java @@ -508,6 +508,7 @@ public static class BuildParams { private final Collection partitionNames; private final List deleteConditions; + private final List selectedPartitions; public BuildParams(Database db, OlapTable table, Collection partitionNames, @@ -516,6 +517,18 @@ public BuildParams(Database db, OlapTable table, this.table = table; this.partitionNames = partitionNames; this.deleteConditions = deleteConditions; + this.selectedPartitions = null; + } + + public BuildParams(Database db, OlapTable table, + List partitionNames, + List selectedPartitions, + List deleteConditions) { + this.db = db; + this.table = table; + this.partitionNames = partitionNames; + this.deleteConditions = deleteConditions; + this.selectedPartitions = selectedPartitions; } public OlapTable getTable() { @@ -533,14 +546,54 @@ public Database getDb() { public List getDeleteConditions() { return deleteConditions; } + + public List getSelectedPartitions() { + return selectedPartitions; + } } public static class Builder { + public DeleteJob buildWithNereids(BuildParams params) { + boolean noPartitionSpecified = params.getPartitionNames().isEmpty(); + List partitions = params.getSelectedPartitions(); + Map partitionReplicaNum = partitions.stream() + .collect(Collectors.toMap( + Partition::getId, + partition -> + params.getTable() + .getPartitionInfo() + .getReplicaAllocation(partition.getId()) + .getTotalReplicaNum())); + // generate label + String label = DELETE_PREFIX + UUID.randomUUID(); + //generate jobId + long jobId = Env.getCurrentEnv().getNextId(); + List partitionNames = partitions.stream().map(Partition::getName).collect(Collectors.toList()); + List partitionIds = partitions.stream().map(Partition::getId).collect(Collectors.toList()); + DeleteInfo deleteInfo = new DeleteInfo(params.getDb().getId(), params.getTable().getId(), + params.getTable().getName(), getDeleteCondString(params.getDeleteConditions()), + noPartitionSpecified, partitionIds, partitionNames); + DeleteJob deleteJob = ConnectContext.get() != null && ConnectContext.get().isTxnModel() + ? new TxnDeleteJob(jobId, -1, label, partitionReplicaNum, deleteInfo) + : new DeleteJob(jobId, -1, label, partitionReplicaNum, deleteInfo); + long replicaNum = partitions.stream().mapToLong(Partition::getAllReplicaCount).sum(); + deleteJob.setPartitions(partitions); + deleteJob.setDeleteConditions(params.getDeleteConditions()); + deleteJob.setTargetDb(params.getDb()); + deleteJob.setTargetTbl(params.getTable()); + deleteJob.setCountDownLatch(new MarkedCountDownLatch<>((int) replicaNum)); + ConnectContext connectContext = ConnectContext.get(); + if (connectContext != null) { + deleteJob.setTimeoutS(connectContext.getExecTimeout()); + } + return deleteJob; + } + public DeleteJob buildWith(BuildParams params) throws Exception { boolean noPartitionSpecified = params.getPartitionNames().isEmpty(); - List partitions = getSelectedPartitions(params.getTable(), - params.getPartitionNames(), params.getDeleteConditions()); + List partitions = getSelectedPartitions(params.getTable(), params.getPartitionNames(), + params.getDeleteConditions()); Map partitionReplicaNum = partitions.stream() .collect(Collectors.toMap( Partition::getId, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java index ab2a7d3f7414c1..55215323e7015d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java @@ -29,18 +29,28 @@ import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.MaterializedIndexMeta; import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.PartitionInfo; +import org.apache.doris.catalog.PartitionItem; +import org.apache.doris.catalog.PartitionType; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.Config; import org.apache.doris.common.ErrorCode; import org.apache.doris.mysql.privilege.PrivPredicate; +import org.apache.doris.nereids.CascadesContext; import org.apache.doris.nereids.NereidsPlanner; +import org.apache.doris.nereids.StatementContext; import org.apache.doris.nereids.analyzer.UnboundAlias; import org.apache.doris.nereids.analyzer.UnboundRelation; import org.apache.doris.nereids.analyzer.UnboundSlot; import org.apache.doris.nereids.analyzer.UnboundTableSinkCreator; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.glue.LogicalPlanAdapter; +import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.rules.expression.rules.PartitionPruner; +import org.apache.doris.nereids.rules.expression.rules.PartitionPruner.PartitionTableType; +import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges; import org.apache.doris.nereids.trees.expressions.And; import org.apache.doris.nereids.trees.expressions.ComparisonPredicate; import org.apache.doris.nereids.trees.expressions.Expression; @@ -48,6 +58,7 @@ import org.apache.doris.nereids.trees.expressions.IsNull; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Not; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral; @@ -75,17 +86,23 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.TreeSet; +import java.util.function.Function; import java.util.stream.Collectors; /** * delete from unique key table. */ public class DeleteFromCommand extends Command implements ForwardWithSync, Explainable { + private static final Logger LOG = LogManager.getLogger(DeleteFromCommand.class); protected final List nameParts; protected final String tableAlias; @@ -195,10 +212,14 @@ public void run(ConnectContext ctx, StmtExecutor executor) throws Exception { // just throw exception to fallback until storage support true predicate. throw new AnalysisException("delete all rows is forbidden temporary."); } + + ArrayList partitionNames = Lists.newArrayList(relation.getPartNames()); + List selectedPartitions = getSelectedPartitions(olapTable, filter, partitionNames); + Env.getCurrentEnv() .getDeleteHandler() .process((Database) scan.getDatabase(), scan.getTable(), - Lists.newArrayList(relation.getPartNames()), predicates, ctx.getState()); + selectedPartitions, predicates, ctx.getState(), partitionNames); } private void updateSessionVariableForDelete(SessionVariable sessionVariable) { @@ -219,6 +240,52 @@ private void updateSessionVariableForDelete(SessionVariable sessionVariable) { } } + private List getSelectedPartitions(OlapTable olapTable, PhysicalFilter filter, + List partitionNames) { + // For un_partitioned table, return all partitions. + if (olapTable.getPartitionInfo().getType().equals(PartitionType.UNPARTITIONED)) { + return Lists.newArrayList(olapTable.getPartitions()); + } + List partitionSlots = Lists.newArrayList(); + for (Column c : olapTable.getPartitionColumns()) { + Slot partitionSlot = null; + // loop search is faster than build a map + for (Slot slot : filter.getOutput()) { + if (slot.getName().equalsIgnoreCase(c.getName())) { + partitionSlot = slot; + break; + } + } + if (partitionSlot != null) { + partitionSlots.add(partitionSlot); + } + } + PartitionInfo partitionInfo = olapTable.getPartitionInfo(); + Map idToPartitions = partitionInfo.getIdToItem(false); + Optional> sortedPartitionRanges = Optional.empty(); + // User specified partition is not empty. + if (partitionNames != null && !partitionNames.isEmpty()) { + Set partitionIds = partitionNames.stream() + .map(olapTable::getPartition) + .map(Partition::getId) + .collect(Collectors.toSet()); + idToPartitions = idToPartitions.keySet().stream() + .filter(partitionIds::contains) + .collect(Collectors.toMap(Function.identity(), idToPartitions::get)); + } else { + Optional> sortedPartitionRangesOpt + = Env.getCurrentEnv().getSortedPartitionsCacheManager().get(olapTable); + if (sortedPartitionRangesOpt.isPresent()) { + sortedPartitionRanges = (Optional) sortedPartitionRangesOpt; + } + } + List prunedPartitions = PartitionPruner.prune( + partitionSlots, filter.getPredicate(), idToPartitions, + CascadesContext.initContext(new StatementContext(), this, PhysicalProperties.ANY), + PartitionTableType.OLAP, sortedPartitionRanges); + return prunedPartitions.stream().map(olapTable::getPartition).collect(Collectors.toList()); + } + private void checkColumn(Set tableColumns, SlotReference slotReference, OlapTable table) { // 0. must slot from table if (!slotReference.getColumn().isPresent()) { diff --git a/regression-test/data/delete_p0/test_basic_delete_job.out b/regression-test/data/delete_p0/test_basic_delete_job.out new file mode 100644 index 00000000000000..095403a3169a6c --- /dev/null +++ b/regression-test/data/delete_p0/test_basic_delete_job.out @@ -0,0 +1,435 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !unpartition1 -- +1 a +2 b +3 c + +-- !unpartition2 -- +1 a +2 b +3 c + +-- !unpartition3 -- +2 b +3 c + +-- !unpartition4 -- +2 b + +-- !one_range1 -- +1 a +2 b +3 c +11 d +12 e +13 f +21 g +22 h +23 i + +-- !one_range2 -- +1 a +2 b +3 c +11 d +12 e +13 f +21 g +22 h +23 i + +-- !one_range3 -- +2 b +3 c +11 d +12 e +13 f +21 g +22 h +23 i + +-- !one_range4 -- +2 b +3 c +11 d +12 e +13 f +21 g +22 h +23 i + +-- !one_range5 -- +11 d +12 e +13 f +21 g +22 h +23 i + +-- !one_range6 -- +12 e +13 f +21 g +22 h +23 i + +-- !one_range7 -- +12 e +13 f +22 h +23 i + +-- !one_range8 -- +12 e +13 f +22 h +23 i + +-- !two_range1 -- +0 0 a +1 100 b +2 3 c +3 3 d +4 6 e +6 7 f +8 8 g +9 1000 h +10 9 i +10 10 j +11 10 k +12 13 l +13 5 m +14 16 n +16 17 o +18 88 p +19 1000 q +20 19 r +20 20 s +21 20 t +22 3 u +23 5 v +24 6 w +26 7 x +28 8 y +29 1000 z +30 29 zz + +-- !two_range2 -- +0 0 a +1 100 b +2 3 c +3 3 d +4 6 e +6 7 f +8 8 g +9 1000 h +10 9 i +10 10 j +11 10 k +12 13 l +13 5 m +14 16 n +16 17 o +18 88 p +19 1000 q +20 19 r +20 20 s +21 20 t +22 3 u +23 5 v +24 6 w +26 7 x +28 8 y +29 1000 z +30 29 zz + +-- !two_range3 -- +0 0 a +1 100 b +2 3 c +3 3 d +4 6 e +6 7 f +8 8 g +9 1000 h +10 9 i +10 10 j +11 10 k +12 13 l +13 5 m +14 16 n +16 17 o +18 88 p +19 1000 q +20 19 r +20 20 s +21 20 t +22 3 u +23 5 v +24 6 w +26 7 x +28 8 y +29 1000 z +30 29 zz + +-- !two_range4 -- +0 0 a +2 3 c +3 3 d +4 6 e +6 7 f +8 8 g +9 1000 h +10 9 i +10 10 j +11 10 k +12 13 l +13 5 m +14 16 n +16 17 o +18 88 p +19 1000 q +20 19 r +20 20 s +21 20 t +22 3 u +23 5 v +24 6 w +26 7 x +28 8 y +29 1000 z +30 29 zz + +-- !two_range5 -- +0 0 a +2 3 c +3 3 d +4 6 e +6 7 f +8 8 g +9 1000 h +11 10 k +12 13 l +13 5 m +14 16 n +16 17 o +18 88 p +19 1000 q +20 19 r +20 20 s +21 20 t +22 3 u +23 5 v +24 6 w +26 7 x +28 8 y +29 1000 z +30 29 zz + +-- !two_range6 -- +0 0 a +2 3 c +3 3 d +4 6 e +6 7 f +8 8 g +9 1000 h +11 10 k +12 13 l +13 5 m +14 16 n +16 17 o +18 88 p +19 1000 q +20 19 r +20 20 s +21 20 t +23 5 v +24 6 w +26 7 x +28 8 y +29 1000 z +30 29 zz + +-- !two_range7 -- +11 10 k +12 13 l +13 5 m +14 16 n +16 17 o +18 88 p +19 1000 q +20 19 r +20 20 s +21 20 t +23 5 v +24 6 w +26 7 x +28 8 y +29 1000 z +30 29 zz + +-- !two_range8 -- +16 17 o +18 88 p +19 1000 q +20 19 r +20 20 s +21 20 t +23 5 v +24 6 w +26 7 x +28 8 y +29 1000 z +30 29 zz + +-- !two_range9 -- +23 5 v +24 6 w +26 7 x +28 8 y + +-- !one_list1 -- +1 a +2 b +3 c +11 d +12 e +13 f +21 g +22 h +23 i + +-- !one_list2 -- +1 a +2 b +3 c +11 d +12 e +13 f +21 g +22 h +23 i + +-- !one_list3 -- +2 b +3 c +11 d +12 e +13 f +21 g +22 h +23 i + +-- !one_list4 -- +2 b +3 c +11 d +12 e +13 f +21 g +22 h +23 i + +-- !one_list5 -- +11 d +12 e +13 f +21 g +22 h +23 i + +-- !one_list6 -- +12 e +13 f +21 g +22 h +23 i + +-- !one_list7 -- +12 e +13 f +22 h +23 i + +-- !one_list8 -- +12 e +13 f +22 h +23 i + +-- !two_list1 -- +0 100 a +1 101 b +2 102 c +10 200 d +11 201 e +12 202 f +20 300 g +21 301 h +22 302 i + +-- !two_list2 -- +0 100 a +1 101 b +2 102 c +10 200 d +11 201 e +12 202 f +20 300 g +21 301 h +22 302 i + +-- !two_list3 -- +0 100 a +1 101 b +2 102 c +10 200 d +11 201 e +12 202 f +20 300 g +21 301 h +22 302 i + +-- !two_list4 -- +0 100 a +2 102 c +10 200 d +11 201 e +12 202 f +20 300 g +21 301 h +22 302 i + +-- !two_list5 -- +0 100 a +2 102 c +11 201 e +12 202 f +20 300 g +21 301 h +22 302 i + +-- !two_list6 -- +0 100 a +2 102 c +11 201 e +12 202 f +20 300 g +22 302 i + +-- !two_list7 -- +11 201 e +12 202 f +20 300 g +22 302 i + +-- !two_list8 -- +12 202 f +20 300 g +22 302 i + +-- !two_list9 -- +12 202 f +20 300 g + diff --git a/regression-test/suites/delete_p0/test_basic_delete_job.groovy b/regression-test/suites/delete_p0/test_basic_delete_job.groovy new file mode 100644 index 00000000000000..de41f4cc15a4d5 --- /dev/null +++ b/regression-test/suites/delete_p0/test_basic_delete_job.groovy @@ -0,0 +1,190 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_basic_delete_job") { + def unpartitionTable = "un_partition_table" + def oneRangeColumnTable = "one_range_column_table" + def twoRangeColumnTable = "two_range_column_table" + def oneListColumnTable = "one_list_column_table" + def twoListColumnTable = "two_list_column_table" + + + // Test no partition + sql """DROP TABLE IF EXISTS ${unpartitionTable} """ + sql """CREATE TABLE ${unpartitionTable} ( + `id` int NOT NULL, + `name` varchar(25) NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`name`) BUCKETS 2 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """insert into ${unpartitionTable} values (1, "a"), (2, "b"), (3, "c")""" + qt_unpartition1 """select * from ${unpartitionTable} order by id""" + sql """delete from ${unpartitionTable} where id < 0""" + qt_unpartition2 """select * from ${unpartitionTable} order by id""" + sql """delete from ${unpartitionTable} where id = 1""" + qt_unpartition3 """select * from ${unpartitionTable} order by id""" + sql """delete from ${unpartitionTable} where name = "c" """ + qt_unpartition4 """select * from ${unpartitionTable} order by id""" + + // Test one range partition column + sql """DROP TABLE IF EXISTS ${oneRangeColumnTable} """ + sql """CREATE TABLE ${oneRangeColumnTable} ( + `id` int NOT NULL, + `name` varchar(25) NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + PARTITION BY RANGE(`id`) + (PARTITION p0 VALUES [("0"), ("10")), + PARTITION p1 VALUES [("10"), ("20")), + PARTITION p2 VALUES [("20"), ("30"))) + DISTRIBUTED BY HASH(`name`) BUCKETS 2 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """insert into ${oneRangeColumnTable} values (1, "a"), (2, "b"), (3, "c"), (11, "d"), (12, "e"), (13, "f"), (21, "g"), (22, "h"), (23, "i")""" + qt_one_range1 """select * from ${oneRangeColumnTable} order by id""" + sql """delete from ${oneRangeColumnTable} where id < 0""" + qt_one_range2 """select * from ${oneRangeColumnTable} order by id""" + sql """delete from ${oneRangeColumnTable} where id = 1""" + qt_one_range3 """select * from ${oneRangeColumnTable} order by id""" + sql """delete from ${oneRangeColumnTable} partition(p0) where id = 11""" + qt_one_range4 """select * from ${oneRangeColumnTable} order by id""" + sql """delete from ${oneRangeColumnTable} partition(p0) where id < 22""" + qt_one_range5 """select * from ${oneRangeColumnTable} order by id""" + sql """delete from ${oneRangeColumnTable} where name = "d" """ + qt_one_range6 """select * from ${oneRangeColumnTable} order by id""" + sql """delete from ${oneRangeColumnTable} partition(p2) where name = "g" """ + qt_one_range7 """select * from ${oneRangeColumnTable} order by id""" + sql """delete from ${oneRangeColumnTable} partition(p1) where name = "h" """ + qt_one_range8 """select * from ${oneRangeColumnTable} order by id""" + + // Test two range partition columns + sql """DROP TABLE IF EXISTS ${twoRangeColumnTable} """ + sql """CREATE TABLE ${twoRangeColumnTable} ( + `id1` int NOT NULL, + `id2` int NOT NULL, + `name` varchar(25) NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id1`) + PARTITION BY RANGE(`id1`, `id2`) + (PARTITION p0 VALUES [("0", "0"), ("10", "10")), + PARTITION p1 VALUES [("10", "10"), ("20", "20")), + PARTITION p2 VALUES [("20", "20"), ("30", "30"))) + DISTRIBUTED BY HASH(`name`) BUCKETS 2 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """insert into ${twoRangeColumnTable} values (0, 0, "a"), (1, 100, "b"), (2, 3, "c"), (3, 3, "d"), (4, 6, "e"), (6, 7, "f"), (8, 8, "g"), (9, 1000, "h"), (10, 9, "i")""" + sql """insert into ${twoRangeColumnTable} values (10, 10, "j"), (11, 10, "k"), (12, 13, "l"), (13, 5, "m"), (14, 16, "n"), (16, 17, "o"), (18, 88, "p"), (19, 1000, "q"), (20, 19, "r")""" + sql """insert into ${twoRangeColumnTable} values (20, 20, "s"), (21, 20, "t"), (22, 3, "u"), (23, 5, "v"), (24, 6, "w"), (26, 7, "x"), (28, 8, "y"), (29, 1000, "z"), (30, 29, "zz")""" + + qt_two_range1 """select * from ${twoRangeColumnTable} order by id1, id2""" + sql """delete from ${twoRangeColumnTable} where id1 < 0""" + qt_two_range2 """select * from ${twoRangeColumnTable} order by id1, id2""" + sql """delete from ${twoRangeColumnTable} where id2 < 0""" + qt_two_range3 """select * from ${twoRangeColumnTable} order by id1, id2""" + sql """delete from ${twoRangeColumnTable} where id1 = 1""" + qt_two_range4 """select * from ${twoRangeColumnTable} order by id1, id2""" + sql """delete from ${twoRangeColumnTable} where id1 = 10""" + qt_two_range5 """select * from ${twoRangeColumnTable} order by id1, id2""" + sql """delete from ${twoRangeColumnTable} where name = "u" """ + qt_two_range6 """select * from ${twoRangeColumnTable} order by id1, id2""" + sql """delete from ${twoRangeColumnTable} partition(p0) where id1 < 15""" + qt_two_range7 """select * from ${twoRangeColumnTable} order by id1, id2""" + sql """delete from ${twoRangeColumnTable} where id1 < 15""" + qt_two_range8 """select * from ${twoRangeColumnTable} order by id1, id2""" + sql """delete from ${twoRangeColumnTable} where id2 > 10""" + qt_two_range9 """select * from ${twoRangeColumnTable} order by id1, id2""" + + // Test one list partition column + sql """DROP TABLE IF EXISTS ${oneListColumnTable} """ + sql """CREATE TABLE ${oneListColumnTable} ( + `id` int NOT NULL, + `name` varchar(25) NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + PARTITION BY LIST(`id`) + (PARTITION p0 VALUES IN ("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"), + PARTITION p1 VALUES IN ("10", "11", "12", "13", "14", "15", "16", "17", "18", "19"), + PARTITION p2 VALUES IN ("20", "21", "22", "23", "24", "25", "26", "27", "28", "29")) + DISTRIBUTED BY HASH(`name`) BUCKETS 2 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """insert into ${oneListColumnTable} values (1, "a"), (2, "b"), (3, "c"), (11, "d"), (12, "e"), (13, "f"), (21, "g"), (22, "h"), (23, "i")""" + qt_one_list1 """select * from ${oneListColumnTable} order by id""" + sql """delete from ${oneListColumnTable} where id < 0""" + qt_one_list2 """select * from ${oneListColumnTable} order by id""" + sql """delete from ${oneListColumnTable} where id = 1""" + qt_one_list3 """select * from ${oneListColumnTable} order by id""" + sql """delete from ${oneListColumnTable} partition(p0) where id = 11""" + qt_one_list4 """select * from ${oneListColumnTable} order by id""" + sql """delete from ${oneListColumnTable} partition(p0) where id < 22""" + qt_one_list5 """select * from ${oneListColumnTable} order by id""" + sql """delete from ${oneListColumnTable} where name = "d" """ + qt_one_list6 """select * from ${oneListColumnTable} order by id""" + sql """delete from ${oneListColumnTable} partition(p2) where name = "g" """ + qt_one_list7 """select * from ${oneListColumnTable} order by id""" + sql """delete from ${oneListColumnTable} partition(p1) where name = "h" """ + qt_one_list8 """select * from ${oneListColumnTable} order by id""" + + // Test two list partition columns + sql """DROP TABLE IF EXISTS ${twoListColumnTable} """ + sql """CREATE TABLE ${twoListColumnTable} ( + `id1` int NOT NULL, + `id2` int NOT NULL, + `name` varchar(25) NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id1`) + PARTITION BY LIST(`id1`, `id2`) + (PARTITION p0 VALUES IN (("0", "100"), ("1", "101"), ("2", "102")), + PARTITION p1 VALUES IN (("10", "200"), ("11", "201"), ("12", "202")), + PARTITION p2 VALUES IN (("20", "300"), ("21", "301"), ("22", "302"))) + DISTRIBUTED BY HASH(`name`) BUCKETS 2 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """insert into ${twoListColumnTable} values (0, 100, "a"), (1, 101, "b"), (2, 102, "c")""" + sql """insert into ${twoListColumnTable} values (10, 200, "d"), (11, 201, "e"), (12, 202, "f")""" + sql """insert into ${twoListColumnTable} values (20, 300, "g"), (21, 301, "h"), (22, 302, "i")""" + + qt_two_list1 """select * from ${twoListColumnTable} order by id1, id2""" + sql """delete from ${twoListColumnTable} where id1 < 0""" + qt_two_list2 """select * from ${twoListColumnTable} order by id1, id2""" + sql """delete from ${twoListColumnTable} where id2 < 0""" + qt_two_list3 """select * from ${twoListColumnTable} order by id1, id2""" + sql """delete from ${twoListColumnTable} where id1 = 1""" + qt_two_list4 """select * from ${twoListColumnTable} order by id1, id2""" + sql """delete from ${twoListColumnTable} where id2 = 200""" + qt_two_list5 """select * from ${twoListColumnTable} order by id1, id2""" + sql """delete from ${twoListColumnTable} where name = "h" """ + qt_two_list6 """select * from ${twoListColumnTable} order by id1, id2""" + sql """delete from ${twoListColumnTable} partition(p0) where id1 < 12""" + qt_two_list7 """select * from ${twoListColumnTable} order by id1, id2""" + sql """delete from ${twoListColumnTable} where id1 < 12""" + qt_two_list8 """select * from ${twoListColumnTable} order by id1, id2""" + sql """delete from ${twoListColumnTable} where id2 > 300""" + qt_two_list9 """select * from ${twoListColumnTable} order by id1, id2""" +} From e674bb20741bb92e6d856bed7279b62ba7dbdd07 Mon Sep 17 00:00:00 2001 From: Thearas Date: Wed, 22 Jan 2025 13:36:27 +0900 Subject: [PATCH 22/31] [fix](hive docker)Reserve host port for hive2 namenode and datanode (#47262) ### What problem does this PR solve? Problem Summary: The [External hive CI](http://43.132.222.7:8111/buildConfiguration/Doris_External_Regression/612304?buildTab=log&linesState=3650&logView=flowAware) failed because of `namenode` error( 50070 port already in used), docker logs: ```txt 2025-01-21T04:22:37.955682469Z java.net.BindException: Port in use: 0.0.0.0:50070 2025-01-21T04:22:37.955686106Z at org.apache.hadoop.http.HttpServer2.openListeners(HttpServer2.java:940) 2025-01-21T04:22:37.955689402Z at org.apache.hadoop.http.HttpServer2.start(HttpServer2.java:876) 2025-01-21T04:22:37.955692708Z at org.apache.hadoop.hdfs.server.namenode.NameNodeHttpServer.start(NameNodeHttpServer.java:142) 2025-01-21T04:22:37.955697828Z at org.apache.hadoop.hdfs.server.namenode.NameNode.startHttpServer(NameNode.java:760) 2025-01-21T04:22:37.955701444Z at org.apache.hadoop.hdfs.server.namenode.NameNode.initialize(NameNode.java:639) 2025-01-21T04:22:37.955704831Z at org.apache.hadoop.hdfs.server.namenode.NameNode.(NameNode.java:819) 2025-01-21T04:22:37.955708237Z at org.apache.hadoop.hdfs.server.namenode.NameNode.(NameNode.java:803) 2025-01-21T04:22:37.955711674Z at org.apache.hadoop.hdfs.server.namenode.NameNode.createNameNode(NameNode.java:1500) 2025-01-21T04:22:37.955715090Z at org.apache.hadoop.hdfs.server.namenode.NameNode.main(NameNode.java:1566) 2025-01-21T04:22:37.955718446Z Caused by: java.net.BindException: Address already in use 2025-01-21T04:22:37.955722013Z at sun.nio.ch.Net.bind0(Native Method) 2025-01-21T04:22:37.955725460Z at sun.nio.ch.Net.bind(Net.java:433) 2025-01-21T04:22:37.955729227Z at sun.nio.ch.Net.bind(Net.java:425) 2025-01-21T04:22:37.955733074Z at sun.nio.ch.ServerSocketChannelImpl.bind(ServerSocketChannelImpl.java:223) 2025-01-21T04:22:37.955736600Z at sun.nio.ch.ServerSocketAdaptor.bind(ServerSocketAdaptor.java:74) 2025-01-21T04:22:37.955740197Z at org.mortbay.jetty.nio.SelectChannelConnector.open(SelectChannelConnector.java:216) 2025-01-21T04:22:37.955743884Z at org.apache.hadoop.http.HttpServer2.openListeners(HttpServer2.java:934) 2025-01-21T04:22:37.955747391Z ... 8 more 2025-01-21T04:22:37.961686454Z 25/01/21 04:22:37 INFO util.ExitUtil: Exiting with status 1 ``` The best choice is avoid the services using server port at range `/proc/sys/net/ipv4/ip_local_port_range` (32768-60999). But since the namenode [hardcode exposing port `50070` in docker image](https://hub.docker.com/layers/bde2020/hadoop-datanode/2.0.0-hadoop2.7.4-java8/images/sha256-5623fca5e36d890983cdc6cfd29744d1d65476528117975b3af6a80d99b3c62f), so we add the port to `net.ipv4.ip_local_reserved_ports` and introduce a new flags `--reserve-ports` to control it (default false, because not everyone want to modify system reserved ports). --- .../docker-compose/hive/hive-2x.yaml.tpl | 9 ++++--- .../docker-compose/hive/hive-2x_settings.env | 2 +- .../docker-compose/hive/hive-3x.yaml.tpl | 9 ++++--- .../thirdparties/run-thirdparties-docker.sh | 24 ++++++++++++++++++- 4 files changed, 36 insertions(+), 8 deletions(-) diff --git a/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl b/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl index acc253e70272f7..29a449732843bf 100644 --- a/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl +++ b/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl @@ -63,8 +63,10 @@ services: expose: - "${HS_PORT}" depends_on: - - datanode - - namenode + datanode: + condition: service_healthy + namenode: + condition: service_healthy healthcheck: test: beeline -u "jdbc:hive2://127.0.0.1:${HS_PORT}/default" -n health_check -e "show databases;" interval: 10s @@ -86,7 +88,8 @@ services: volumes: - ./scripts:/mnt/scripts depends_on: - - hive-metastore-postgresql + hive-metastore-postgresql: + condition: service_healthy healthcheck: test: ["CMD", "sh", "-c", "/mnt/scripts/healthy_check.sh"] interval: 20s diff --git a/docker/thirdparties/docker-compose/hive/hive-2x_settings.env b/docker/thirdparties/docker-compose/hive/hive-2x_settings.env index ddd4625aaab43c..9045bb91683dc5 100644 --- a/docker/thirdparties/docker-compose/hive/hive-2x_settings.env +++ b/docker/thirdparties/docker-compose/hive/hive-2x_settings.env @@ -20,7 +20,7 @@ # Do not use "_" or other sepcial characters, only number and alphabeta. # NOTICE: change this uid will modify hive-*.yaml -export FS_PORT=8020 # should be same as hive3HmsPort in regression-conf.groovy +export FS_PORT=8020 # should be same as hive2HmsPort in regression-conf.groovy export HMS_PORT=9083 # should be same as hive2HmsPort in regression-conf.groovy export HS_PORT=10000 # should be same as hive2ServerPort in regression-conf.groovy export PG_PORT=5432 # should be same as hive2PgPort in regression-conf.groovy diff --git a/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl b/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl index 1f6eca76d431df..c5b54bae032cef 100644 --- a/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl +++ b/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl @@ -63,8 +63,10 @@ services: expose: - "${HS_PORT}" depends_on: - - datanode - - namenode + datanode: + condition: service_healthy + namenode: + condition: service_healthy healthcheck: test: beeline -u "jdbc:hive2://127.0.0.1:${HS_PORT}/default" -n health_check -e "show databases;" interval: 10s @@ -86,7 +88,8 @@ services: volumes: - ./scripts:/mnt/scripts depends_on: - - hive-metastore-postgresql + hive-metastore-postgresql: + condition: service_healthy healthcheck: test: ["CMD", "sh", "-c", "/mnt/scripts/healthy_check.sh"] interval: 20s diff --git a/docker/thirdparties/run-thirdparties-docker.sh b/docker/thirdparties/run-thirdparties-docker.sh index 6e3561706f7e75..cf4f850d49801f 100755 --- a/docker/thirdparties/run-thirdparties-docker.sh +++ b/docker/thirdparties/run-thirdparties-docker.sh @@ -33,8 +33,9 @@ Usage: $0 [no option] start all components --help,-h show this usage -c mysql start MySQL - -c mysql,hive3 start MySQL and Hive3 + -c mysql,hive3 start MySQL and Hive3 --stop stop the specified components + --reserve-ports reserve host ports by setting 'net.ipv4.ip_local_reserved_ports' to avoid port already bind error All valid components: mysql,pg,oracle,sqlserver,clickhouse,es,hive2,hive3,iceberg,hudi,trino,kafka,mariadb,db2,oceanbase,lakesoul,kerberos @@ -44,6 +45,7 @@ Usage: $0 COMPONENTS=$2 HELP=0 STOP=0 +NEED_RESERVE_PORTS=0 if ! OPTS="$(getopt \ -n "$0" \ @@ -79,6 +81,10 @@ else COMPONENTS=$2 shift 2 ;; + --reserve-ports) + NEED_RESERVE_PORTS=1 + shift + ;; --) shift break @@ -140,6 +146,8 @@ RUN_LAKESOUL=0 RUN_KERBEROS=0 RUN_MINIO=0 +RESERVED_PORTS="" + for element in "${COMPONENTS_ARR[@]}"; do if [[ "${element}"x == "mysql"x ]]; then RUN_MYSQL=1 @@ -155,6 +163,7 @@ for element in "${COMPONENTS_ARR[@]}"; do RUN_ES=1 elif [[ "${element}"x == "hive2"x ]]; then RUN_HIVE2=1 + RESERVED_PORTS="${RESERVED_PORTS},50070,50075" # namenode and datanode ports elif [[ "${element}"x == "hive3"x ]]; then RUN_HIVE3=1 elif [[ "${element}"x == "kafka"x ]]; then @@ -185,6 +194,17 @@ for element in "${COMPONENTS_ARR[@]}"; do fi done +reserve_ports() { + if [[ "${NEED_RESERVE_PORTS}" -eq 0 ]]; then + return + fi + + if [[ "${RESERVED_PORTS}"x != ""x ]]; then + echo "Reserve ports: ${RESERVED_PORTS}" + sudo sysctl -w net.ipv4.ip_local_reserved_ports="${RESERVED_PORTS}" + fi +} + start_es() { # elasticsearch cp "${ROOT}"/docker-compose/elasticsearch/es.yaml.tpl "${ROOT}"/docker-compose/elasticsearch/es.yaml @@ -625,6 +645,8 @@ start_minio() { echo "starting dockers in parallel" +reserve_ports + declare -A pids if [[ "${RUN_ES}" -eq 1 ]]; then From 3479bf615803a9affd4ae8a8df05aae47007ac80 Mon Sep 17 00:00:00 2001 From: zclllyybb Date: Wed, 22 Jan 2025 14:30:40 +0800 Subject: [PATCH 23/31] [chore](beut) Remove invalid argument in be-ut and fix wrong path in .gitignore (#47202) already removed in https://github.com/apache/doris/pull/44181 --- .gitignore | 5 +++-- run-be-ut.sh | 7 ------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 1a5a40140694c6..4a399233948df7 100644 --- a/.gitignore +++ b/.gitignore @@ -110,8 +110,9 @@ cloud/cmake-build*/ cloud/ut_build*/ ## tools -tools/ssb-tools/ssb-data/ -tools/ssb-tools/ssb-dbgen/ +tools/ssb-tools/bin/ssb-data/ +tools/ssb-tools/bin/ssb-dbgen/ +tools/ssb-tools/bin/*.tar.gz tools/**/TPC-H_Tools_v*.zip tools/**/TPC-H_Tools_v*/ tools/**/tpc-h_v*.docx diff --git a/run-be-ut.sh b/run-be-ut.sh index 5f73f6f0ee628f..96a97bd34f50e6 100755 --- a/run-be-ut.sh +++ b/run-be-ut.sh @@ -46,7 +46,6 @@ usage() { echo " Usage: $0 Optional options: - --benchmark build benchmark-tool --clean clean and build ut --run build and run all ut --run --filter=xx build and run specified ut @@ -78,7 +77,6 @@ eval set -- "${OPTS}" CLEAN=0 RUN=0 -BUILD_BENCHMARK_TOOL='OFF' DENABLE_CLANG_COVERAGE='OFF' BUILD_AZURE='ON' FILTER="" @@ -93,10 +91,6 @@ if [[ "$#" != 1 ]]; then RUN=1 shift ;; - --benchmark) - BUILD_BENCHMARK_TOOL='ON' - shift - ;; --coverage) DENABLE_CLANG_COVERAGE='ON' shift @@ -239,7 +233,6 @@ cd "${CMAKE_BUILD_DIR}" -DGLIBC_COMPATIBILITY="${GLIBC_COMPATIBILITY}" \ -DUSE_LIBCPP="${USE_LIBCPP}" \ -DBUILD_META_TOOL=OFF \ - -DBUILD_BENCHMARK_TOOL="${BUILD_BENCHMARK_TOOL}" \ -DWITH_MYSQL=ON \ -DUSE_DWARF="${USE_DWARF}" \ -DUSE_UNWIND="${USE_UNWIND}" \ From c878ec9bb43e4a0108e610cc015ee5edc284acc6 Mon Sep 17 00:00:00 2001 From: zclllyybb Date: Wed, 22 Jan 2025 15:48:35 +0800 Subject: [PATCH 24/31] [chore](BE) Remove some unnecessary SpinLock and dangerous DCHECK (#47134) Remove some unnecessary SpinLock and dangerous DCHECK --- be/src/runtime/record_batch_queue.cpp | 5 +++-- be/src/runtime/record_batch_queue.h | 5 ++--- be/src/runtime/user_function_cache.cpp | 1 - be/src/vec/core/block.h | 10 +++++++--- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/be/src/runtime/record_batch_queue.cpp b/be/src/runtime/record_batch_queue.cpp index 25db550db3a7f1..0f860664ecce96 100644 --- a/be/src/runtime/record_batch_queue.cpp +++ b/be/src/runtime/record_batch_queue.cpp @@ -17,8 +17,9 @@ #include "runtime/record_batch_queue.h" +#include + #include "pipeline/dependency.h" -#include "util/spinlock.h" namespace doris { @@ -41,7 +42,7 @@ void RecordBatchQueue::update_status(const Status& status) { return; } { - std::lock_guard l(_status_lock); + std::lock_guard l(_status_lock); if (_status.ok()) { _status = status; } diff --git a/be/src/runtime/record_batch_queue.h b/be/src/runtime/record_batch_queue.h index a8e8c80c91cd7b..c61243237eb730 100644 --- a/be/src/runtime/record_batch_queue.h +++ b/be/src/runtime/record_batch_queue.h @@ -18,7 +18,6 @@ #pragma once #include -#include #include #include @@ -46,7 +45,7 @@ class RecordBatchQueue { RecordBatchQueue(u_int32_t max_elements) : _queue(max_elements) {} Status status() { - std::lock_guard l(_status_lock); + std::lock_guard l(_status_lock); return _status; } @@ -66,7 +65,7 @@ class RecordBatchQueue { private: BlockingQueue> _queue; - SpinLock _status_lock; + std::mutex _status_lock; Status _status; std::shared_ptr _dep = nullptr; }; diff --git a/be/src/runtime/user_function_cache.cpp b/be/src/runtime/user_function_cache.cpp index ab9d90846abbea..f04b508e4723f7 100644 --- a/be/src/runtime/user_function_cache.cpp +++ b/be/src/runtime/user_function_cache.cpp @@ -42,7 +42,6 @@ #include "runtime/exec_env.h" #include "util/dynamic_util.h" #include "util/md5.h" -#include "util/spinlock.h" #include "util/string_util.h" namespace doris { diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index 2242db3f9058c2..4696680d7f624e 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -555,9 +555,13 @@ class MutableBlock { dump_names(), dump_types(), block.dump_names(), block.dump_types()); } for (int i = 0; i < _columns.size(); ++i) { - DCHECK(_data_types[i]->equals(*block.get_by_position(i).type)) - << " target type: " << _data_types[i]->get_name() - << " src type: " << block.get_by_position(i).type->get_name(); + if (!_data_types[i]->equals(*block.get_by_position(i).type)) { + throw doris::Exception(doris::ErrorCode::FATAL_ERROR, + "Merge block not match, self:[columns: {}, types: {}], " + "input:[columns: {}, types: {}], ", + dump_names(), dump_types(), block.dump_names(), + block.dump_types()); + } _columns[i]->insert_range_from_ignore_overflow( *block.get_by_position(i).column->convert_to_full_column_if_const().get(), 0, block.rows()); From 5bf04af6015d578f272d7d47531e859a96017c7d Mon Sep 17 00:00:00 2001 From: lihangyu Date: Wed, 22 Jan 2025 16:48:57 +0800 Subject: [PATCH 25/31] [Fix](Variant) fix variant may lost schema info when meet TXN_CONFLICT in cloud mode (#47284) schema should be filled when encounter `MetaServiceCode::ALREADY_EXISTED` **I will refactor to remove schema for variant from meta service soon** --- cloud/src/meta-service/meta_service.cpp | 35 ++++++++++++- .../src/meta-service/meta_service_schema.cpp | 14 +++--- cloud/src/meta-service/meta_service_schema.h | 6 ++- .../schema_change_with_txn_conflict.out | 5 ++ .../schema_change_with_txn_conflict.groovy | 49 +++++++++++++++++++ 5 files changed, 100 insertions(+), 9 deletions(-) create mode 100644 regression-test/data/variant_p0/schema_change/schema_change_with_txn_conflict.out create mode 100644 regression-test/suites/variant_p0/schema_change/schema_change_with_txn_conflict.groovy diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index 36322f7a3ea3b9..58e2a6fd4e89f9 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -939,6 +939,30 @@ static void set_schema_in_existed_rowset(MetaServiceCode& code, std::string& msg } } +/** + * Fills schema information into the rowset meta from the dictionary. + * Handles schemas with variant types by retrieving the complete schema from the dictionary. + * + * @param code Result code indicating the operation status. + * @param msg Error description for failed operations. + * @param instance_id Identifier for the specific instance. + * @param txn Pointer to the transaction object for transactional operations. + * @param existed_rowset_meta Rowset meta object to be updated with schema information. + */ +static void fill_schema_from_dict(MetaServiceCode& code, std::string& msg, + const std::string& instance_id, Transaction* txn, + doris::RowsetMetaCloudPB* existed_rowset_meta) { + google::protobuf::RepeatedPtrField metas; + metas.Add()->CopyFrom(*existed_rowset_meta); + // Retrieve schema from the dictionary and update metas. + read_schema_dict(code, msg, instance_id, existed_rowset_meta->index_id(), txn, &metas, nullptr); + if (code != MetaServiceCode::OK) { + return; + } + // Update the original rowset meta with the complete schema from metas. + existed_rowset_meta->CopyFrom(metas.Get(0)); +} + /** * 1. Check and confirm tmp rowset kv does not exist * 2. Construct recycle rowset kv which contains object path @@ -1010,6 +1034,10 @@ void MetaServiceImpl::prepare_rowset(::google::protobuf::RpcController* controll existed_rowset_meta->set_schema_version( existed_rowset_meta->tablet_schema().schema_version()); } + if (existed_rowset_meta->has_variant_type_in_schema()) { + fill_schema_from_dict(code, msg, instance_id, txn.get(), existed_rowset_meta); + if (code != MetaServiceCode::OK) return; + } code = MetaServiceCode::ALREADY_EXISTED; msg = "rowset already exists"; return; @@ -1134,6 +1162,10 @@ void MetaServiceImpl::commit_rowset(::google::protobuf::RpcController* controlle existed_rowset_meta->set_schema_version( existed_rowset_meta->tablet_schema().schema_version()); } + if (existed_rowset_meta->has_variant_type_in_schema()) { + fill_schema_from_dict(code, msg, instance_id, txn.get(), existed_rowset_meta); + if (code != MetaServiceCode::OK) return; + } code = MetaServiceCode::ALREADY_EXISTED; msg = "rowset already exists"; return; @@ -1626,7 +1658,8 @@ void MetaServiceImpl::get_rowset(::google::protobuf::RpcController* controller, } if (need_read_schema_dict) { - read_schema_dict(code, msg, instance_id, idx.index_id(), txn.get(), response, + read_schema_dict(code, msg, instance_id, idx.index_id(), txn.get(), + response->mutable_rowset_meta(), response->mutable_schema_dict(), request->schema_op()); if (code != MetaServiceCode::OK) return; } diff --git a/cloud/src/meta-service/meta_service_schema.cpp b/cloud/src/meta-service/meta_service_schema.cpp index ca0a15d8577b31..ff88e82cf200c8 100644 --- a/cloud/src/meta-service/meta_service_schema.cpp +++ b/cloud/src/meta-service/meta_service_schema.cpp @@ -312,13 +312,15 @@ void write_schema_dict(MetaServiceCode& code, std::string& msg, const std::strin LOG(INFO) << "Dictionary saved, key=" << hex(dict_key) << " txn_id=" << rowset_meta->txn_id() << " Dict size=" << dict.column_dict_size() << ", Current column ID=" << dict.current_column_dict_id() - << ", Current index ID=" << dict.current_index_dict_id(); + << ", Current index ID=" << dict.current_index_dict_id() + << ", Dict bytes=" << dict_val.size(); } } void read_schema_dict(MetaServiceCode& code, std::string& msg, const std::string& instance_id, - int64_t index_id, Transaction* txn, GetRowsetResponse* response, - GetRowsetRequest::SchemaOp schema_op) { + int64_t index_id, Transaction* txn, + google::protobuf::RepeatedPtrField* rsp_metas, + SchemaCloudDictionary* rsp_dict, GetRowsetRequest::SchemaOp schema_op) { std::stringstream ss; // read dict if any rowset has dict key list @@ -341,8 +343,8 @@ void read_schema_dict(MetaServiceCode& code, std::string& msg, const std::string << ", index size=" << dict.index_dict_size(); // Return dict, let backend to fill schema with dict info - if (schema_op == GetRowsetRequest::RETURN_DICT) { - response->mutable_schema_dict()->Swap(&dict); + if (schema_op == GetRowsetRequest::RETURN_DICT && rsp_dict != nullptr) { + rsp_dict->Swap(&dict); return; } @@ -381,7 +383,7 @@ void read_schema_dict(MetaServiceCode& code, std::string& msg, const std::string }; // fill rowsets's schema with dict info - for (auto& rowset_meta : *response->mutable_rowset_meta()) { + for (auto& rowset_meta : *rsp_metas) { if (rowset_meta.has_schema_dict_key_list()) { fill_schema_with_dict(&rowset_meta); } diff --git a/cloud/src/meta-service/meta_service_schema.h b/cloud/src/meta-service/meta_service_schema.h index ec1dcc6731f458..aded32edf09d9a 100644 --- a/cloud/src/meta-service/meta_service_schema.h +++ b/cloud/src/meta-service/meta_service_schema.h @@ -36,7 +36,9 @@ void write_schema_dict(MetaServiceCode& code, std::string& msg, const std::strin // Read schema from dictionary metadata, modified to rowset_metas void read_schema_dict(MetaServiceCode& code, std::string& msg, const std::string& instance_id, - int64_t index_id, Transaction* txn, GetRowsetResponse* response, - GetRowsetRequest::SchemaOp schema_op); + int64_t index_id, Transaction* txn, + google::protobuf::RepeatedPtrField* rsp_metas, + SchemaCloudDictionary* rsp_dict, + GetRowsetRequest::SchemaOp schema_op = GetRowsetRequest::FILL_WITH_DICT); } // namespace doris::cloud diff --git a/regression-test/data/variant_p0/schema_change/schema_change_with_txn_conflict.out b/regression-test/data/variant_p0/schema_change/schema_change_with_txn_conflict.out new file mode 100644 index 00000000000000..dded84050ec441 --- /dev/null +++ b/regression-test/data/variant_p0/schema_change/schema_change_with_txn_conflict.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +0 {"k1":1,"k2":"hello world","k3":[1234],"k4":1.1,"k5":[[123]]} +2 {"a":12345} + diff --git a/regression-test/suites/variant_p0/schema_change/schema_change_with_txn_conflict.groovy b/regression-test/suites/variant_p0/schema_change/schema_change_with_txn_conflict.groovy new file mode 100644 index 00000000000000..5f566cef8854f7 --- /dev/null +++ b/regression-test/suites/variant_p0/schema_change/schema_change_with_txn_conflict.groovy @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_schema_change_txn_conflict", "nonConcurrent") { + def customFeConfig = [ + schema_change_max_retry_time: 10 + ] + def tableName = "variant_txn_conflict" + setFeConfigTemporary(customFeConfig) { + try { + GetDebugPoint().enableDebugPointForAllBEs("CloudSchemaChangeJob::_convert_historical_rowsets.test_conflict") + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + k bigint, + v variant + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 4 + properties("replication_num" = "1"); + """ + sql """INSERT INTO ${tableName} SELECT *, '{"k1":1, "k2": "hello world", "k3" : [1234], "k4" : 1.10000, "k5" : [[123]]}' FROM numbers("number" = "1")""" + sql """ALTER TABLE ${tableName} SET("bloom_filter_columns" = "v")""" + + waitForSchemaChangeDone { + sql """SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1""" + time 600 + } + sql """insert into ${tableName} values (2, '{"a" : 12345}')""" + } catch (Exception e) { + GetDebugPoint().disableDebugPointForAllBEs("CloudSchemaChangeJob::_convert_historical_rowsets.test_conflict") + } + } + qt_sql "select * from ${tableName} order by k limit 5" +} \ No newline at end of file From 0968b99ea3db5a134df9815fbbfc3643a7020488 Mon Sep 17 00:00:00 2001 From: TengJianPing Date: Wed, 22 Jan 2025 16:49:28 +0800 Subject: [PATCH 26/31] [fix](load) fix table sink memory usage counter (#47254) --- be/src/olap/memtable_writer.cpp | 4 ++-- be/src/vec/sink/writer/async_result_writer.cpp | 12 ++++++++++++ be/src/vec/sink/writer/async_result_writer.h | 7 ++++--- .../sink/writer/iceberg/viceberg_table_writer.cpp | 1 - .../vec/sink/writer/iceberg/viceberg_table_writer.h | 1 - be/src/vec/sink/writer/vhive_table_writer.h | 1 - be/src/vec/sink/writer/vtablet_writer.h | 3 +-- be/src/vec/sink/writer/vtablet_writer_v2.h | 3 +-- 8 files changed, 20 insertions(+), 12 deletions(-) diff --git a/be/src/olap/memtable_writer.cpp b/be/src/olap/memtable_writer.cpp index 88532646b66349..52228c3afa299e 100644 --- a/be/src/olap/memtable_writer.cpp +++ b/be/src/olap/memtable_writer.cpp @@ -158,8 +158,8 @@ Status MemTableWriter::flush_async() { } VLOG_NOTICE << "flush memtable to reduce mem consumption. memtable size: " - << _mem_table->memory_usage() << ", tablet: " << _req.tablet_id - << ", load id: " << print_id(_req.load_id); + << PrettyPrinter::print_bytes(_mem_table->memory_usage()) + << ", tablet: " << _req.tablet_id << ", load id: " << print_id(_req.load_id); auto s = _flush_memtable_async(); _reset_mem_table(); return s; diff --git a/be/src/vec/sink/writer/async_result_writer.cpp b/be/src/vec/sink/writer/async_result_writer.cpp index ed4f71677f2afe..9a5911140bf95b 100644 --- a/be/src/vec/sink/writer/async_result_writer.cpp +++ b/be/src/vec/sink/writer/async_result_writer.cpp @@ -58,6 +58,7 @@ Status AsyncResultWriter::sink(Block* block, bool eos) { _dependency->set_ready(); } if (rows) { + _memory_used_counter->update(add_block->allocated_bytes()); _data_queue.emplace_back(std::move(add_block)); if (!_data_queue_is_available() && !_is_finished()) { _dependency->block(); @@ -81,10 +82,18 @@ std::unique_ptr AsyncResultWriter::_get_block_from_queue() { if (_data_queue_is_available()) { _dependency->set_ready(); } + _memory_used_counter->update(-block->allocated_bytes()); return block; } Status AsyncResultWriter::start_writer(RuntimeState* state, RuntimeProfile* profile) { + // Attention!!! + // AsyncResultWriter::open is called asynchronously, + // so we need to setupt the profile and memory counter here, + // or else the counter can be nullptr when AsyncResultWriter::sink is called. + _profile = profile; + _memory_used_counter = _profile->get_counter("MemoryUsage"); + // Should set to false here, to DCHECK(_finish_dependency); _finish_dependency->block(); @@ -222,6 +231,7 @@ void AsyncResultWriter::force_close(Status s) { } void AsyncResultWriter::_return_free_block(std::unique_ptr b) { + _memory_used_counter->update(b->allocated_bytes()); _free_blocks.enqueue(std::move(b)); } @@ -230,6 +240,8 @@ std::unique_ptr AsyncResultWriter::_get_free_block(doris::vectorized::Blo std::unique_ptr b; if (!_free_blocks.try_dequeue(b)) { b = block->create_same_struct_block(rows, true); + } else { + _memory_used_counter->update(-b->allocated_bytes()); } b->swap(*block); return b; diff --git a/be/src/vec/sink/writer/async_result_writer.h b/be/src/vec/sink/writer/async_result_writer.h index 2a90dd2dbd04e9..0ceb2a4f3b0bad 100644 --- a/be/src/vec/sink/writer/async_result_writer.h +++ b/be/src/vec/sink/writer/async_result_writer.h @@ -22,13 +22,13 @@ #include // IWYU pragma: keep #include "runtime/result_writer.h" +#include "util/runtime_profile.h" #include "vec/exprs/vexpr_fwd.h" namespace doris { class ObjectPool; class RowDescriptor; class RuntimeState; -class RuntimeProfile; class TDataSink; class TExpr; @@ -75,17 +75,17 @@ class AsyncResultWriter : public ResultWriter { protected: Status _projection_block(Block& input_block, Block* output_block); const VExprContextSPtrs& _vec_output_expr_ctxs; + RuntimeProfile* _profile = nullptr; // not owned, set when open std::unique_ptr _get_free_block(Block*, size_t rows); - void _return_free_block(std::unique_ptr); - private: void process_block(RuntimeState* state, RuntimeProfile* profile); [[nodiscard]] bool _data_queue_is_available() const { return _data_queue.size() < QUEUE_SIZE; } [[nodiscard]] bool _is_finished() const { return !_writer_status.ok() || _eos; } void _set_ready_to_finish(); + void _return_free_block(std::unique_ptr); std::unique_ptr _get_block_from_queue(); static constexpr auto QUEUE_SIZE = 3; @@ -100,6 +100,7 @@ class AsyncResultWriter : public ResultWriter { std::shared_ptr _finish_dependency; moodycamel::ConcurrentQueue> _free_blocks; + RuntimeProfile::Counter* _memory_used_counter = nullptr; }; } // namespace vectorized diff --git a/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp b/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp index 608afced8d92db..81d97593e870ac 100644 --- a/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp +++ b/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp @@ -47,7 +47,6 @@ Status VIcebergTableWriter::init_properties(ObjectPool* pool) { Status VIcebergTableWriter::open(RuntimeState* state, RuntimeProfile* profile) { _state = state; - _profile = profile; // add all counter _written_rows_counter = ADD_COUNTER(_profile, "WrittenRows", TUnit::UNIT); diff --git a/be/src/vec/sink/writer/iceberg/viceberg_table_writer.h b/be/src/vec/sink/writer/iceberg/viceberg_table_writer.h index e53c7020a683f9..b3389d94880363 100644 --- a/be/src/vec/sink/writer/iceberg/viceberg_table_writer.h +++ b/be/src/vec/sink/writer/iceberg/viceberg_table_writer.h @@ -116,7 +116,6 @@ class VIcebergTableWriter final : public AsyncResultWriter { // Currently it is a copy, maybe it is better to use move semantics to eliminate it. TDataSink _t_sink; RuntimeState* _state = nullptr; - RuntimeProfile* _profile = nullptr; std::shared_ptr _schema; std::unique_ptr _partition_spec; diff --git a/be/src/vec/sink/writer/vhive_table_writer.h b/be/src/vec/sink/writer/vhive_table_writer.h index 9361fdbc408b9c..1a2cfd3e8d2406 100644 --- a/be/src/vec/sink/writer/vhive_table_writer.h +++ b/be/src/vec/sink/writer/vhive_table_writer.h @@ -71,7 +71,6 @@ class VHiveTableWriter final : public AsyncResultWriter { // Currently it is a copy, maybe it is better to use move semantics to eliminate it. TDataSink _t_sink; RuntimeState* _state = nullptr; - RuntimeProfile* _profile = nullptr; std::vector _partition_columns_input_index; std::set _non_write_columns_indices; std::unordered_map> _partitions_to_writers; diff --git a/be/src/vec/sink/writer/vtablet_writer.h b/be/src/vec/sink/writer/vtablet_writer.h index 52aa0f6b918057..62528a57114286 100644 --- a/be/src/vec/sink/writer/vtablet_writer.h +++ b/be/src/vec/sink/writer/vtablet_writer.h @@ -683,8 +683,7 @@ class VTabletWriter final : public AsyncResultWriter { VOlapTablePartitionParam* _vpartition = nullptr; - RuntimeState* _state = nullptr; // not owned, set when open - RuntimeProfile* _profile = nullptr; // not owned, set when open + RuntimeState* _state = nullptr; // not owned, set when open VRowDistribution _row_distribution; // reuse to avoid frequent memory allocation and release. diff --git a/be/src/vec/sink/writer/vtablet_writer_v2.h b/be/src/vec/sink/writer/vtablet_writer_v2.h index 46a3974bba8aa0..cd0a53d55ff7dc 100644 --- a/be/src/vec/sink/writer/vtablet_writer_v2.h +++ b/be/src/vec/sink/writer/vtablet_writer_v2.h @@ -217,8 +217,7 @@ class VTabletWriterV2 final : public AsyncResultWriter { VOlapTablePartitionParam* _vpartition = nullptr; - RuntimeState* _state = nullptr; // not owned, set when open - RuntimeProfile* _profile = nullptr; // not owned, set when open + RuntimeState* _state = nullptr; // not owned, set when open std::unordered_set _opened_partitions; From 3b8658f0fb500c82e04bdeca7e03e30937d2973c Mon Sep 17 00:00:00 2001 From: Pxl Date: Wed, 22 Jan 2025 17:22:36 +0800 Subject: [PATCH 27/31] [Chore](profile) add RowsExprPredFiltered RowsExprPredInput on profile (#47182) ### What problem does this PR solve? add RowsExprPredFiltered RowsExprPredInput on profile ### Check List (For Author) - Test - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [x] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [x] Other reason - Behavior changed: - [x] No. - [ ] Yes. - Does this need documentation? - [x] No. - [ ] Yes. ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label --- be/src/olap/olap_common.h | 2 ++ be/src/olap/rowset/segment_v2/segment_iterator.cpp | 2 ++ be/src/pipeline/exec/olap_scan_operator.cpp | 3 +++ be/src/pipeline/exec/olap_scan_operator.h | 2 ++ be/src/vec/exec/scan/new_olap_scanner.cpp | 4 +++- 5 files changed, 12 insertions(+), 1 deletion(-) diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index 8c07245c55c6c4..623d2c83e49a9f 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -327,8 +327,10 @@ struct OlapReaderStatistics { int64_t rows_vec_cond_filtered = 0; int64_t rows_short_circuit_cond_filtered = 0; + int64_t rows_expr_cond_filtered = 0; int64_t vec_cond_input_rows = 0; int64_t short_circuit_cond_input_rows = 0; + int64_t expr_cond_input_rows = 0; int64_t rows_vec_del_cond_filtered = 0; int64_t vec_cond_ns = 0; int64_t short_cond_ns = 0; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index ec0f9104e050e7..f1174d3ff26bfa 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -2299,12 +2299,14 @@ Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& DCHECK(!_remaining_conjunct_roots.empty()); DCHECK(block->rows() != 0); size_t prev_columns = block->columns(); + _opts.stats->expr_cond_input_rows += selected_size; vectorized::IColumn::Filter filter; RETURN_IF_ERROR(vectorized::VExprContext::execute_conjuncts_and_filter_block( _common_expr_ctxs_push_down, block, _columns_to_filter, prev_columns, filter)); selected_size = _evaluate_common_expr_filter(sel_rowid_idx, selected_size, filter); + _opts.stats->rows_expr_cond_filtered += selected_size; return Status::OK(); } diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp b/be/src/pipeline/exec/olap_scan_operator.cpp index 6fc65bbbbe8830..b26530d73e23f3 100644 --- a/be/src/pipeline/exec/olap_scan_operator.cpp +++ b/be/src/pipeline/exec/olap_scan_operator.cpp @@ -81,10 +81,13 @@ Status OlapScanLocalState::_init_profile() { ADD_COUNTER(_segment_profile, "RowsVectorPredFiltered", TUnit::UNIT); _rows_short_circuit_cond_filtered_counter = ADD_COUNTER(_segment_profile, "RowsShortCircuitPredFiltered", TUnit::UNIT); + _rows_expr_cond_filtered_counter = + ADD_COUNTER(_segment_profile, "RowsExprPredFiltered", TUnit::UNIT); _rows_vec_cond_input_counter = ADD_COUNTER(_segment_profile, "RowsVectorPredInput", TUnit::UNIT); _rows_short_circuit_cond_input_counter = ADD_COUNTER(_segment_profile, "RowsShortCircuitPredInput", TUnit::UNIT); + _rows_expr_cond_input_counter = ADD_COUNTER(_segment_profile, "RowsExprPredInput", TUnit::UNIT); _vec_cond_timer = ADD_TIMER(_segment_profile, "VectorPredEvalTime"); _short_cond_timer = ADD_TIMER(_segment_profile, "ShortPredEvalTime"); _expr_filter_timer = ADD_TIMER(_segment_profile, "ExprFilterEvalTime"); diff --git a/be/src/pipeline/exec/olap_scan_operator.h b/be/src/pipeline/exec/olap_scan_operator.h index 0e8e7223d4b8c5..7077abe484740a 100644 --- a/be/src/pipeline/exec/olap_scan_operator.h +++ b/be/src/pipeline/exec/olap_scan_operator.h @@ -111,8 +111,10 @@ class OlapScanLocalState final : public ScanLocalState { RuntimeProfile::Counter* _rows_vec_cond_filtered_counter = nullptr; RuntimeProfile::Counter* _rows_short_circuit_cond_filtered_counter = nullptr; + RuntimeProfile::Counter* _rows_expr_cond_filtered_counter = nullptr; RuntimeProfile::Counter* _rows_vec_cond_input_counter = nullptr; RuntimeProfile::Counter* _rows_short_circuit_cond_input_counter = nullptr; + RuntimeProfile::Counter* _rows_expr_cond_input_counter = nullptr; RuntimeProfile::Counter* _vec_cond_timer = nullptr; RuntimeProfile::Counter* _short_cond_timer = nullptr; RuntimeProfile::Counter* _expr_filter_timer = nullptr; diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 38b56cba1176ce..17dedc82368511 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -592,10 +592,12 @@ void NewOlapScanner::_collect_profile_before_close() { COUNTER_UPDATE(local_state->_rows_vec_cond_filtered_counter, stats.rows_vec_cond_filtered); COUNTER_UPDATE(local_state->_rows_short_circuit_cond_filtered_counter, stats.rows_short_circuit_cond_filtered); + COUNTER_UPDATE(local_state->_rows_expr_cond_filtered_counter, stats.rows_expr_cond_filtered); COUNTER_UPDATE(local_state->_rows_vec_cond_input_counter, stats.vec_cond_input_rows); COUNTER_UPDATE(local_state->_rows_short_circuit_cond_input_counter, stats.short_circuit_cond_input_rows); - for (auto& [id, info] : stats.filter_info) { + COUNTER_UPDATE(local_state->_rows_expr_cond_input_counter, stats.expr_cond_input_rows); + for (const auto& [id, info] : stats.filter_info) { local_state->add_filter_info(id, info); } COUNTER_UPDATE(local_state->_stats_filtered_counter, stats.rows_stats_filtered); From 7d49851494e82ca89d16bbd6c81d916beb70c0a0 Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Wed, 22 Jan 2025 22:32:36 +0800 Subject: [PATCH 28/31] [fix](hive) Incorrect location conversion for target path (#47162) ### What problem does this PR solve? Related PR: #46362 Problem Summary: For the "target_path" field, when the location is of the S3 type, it should not be converted into the S3 schema. --- .../main/java/org/apache/doris/common/util/LocationPath.java | 2 +- .../main/java/org/apache/doris/planner/HiveTableSink.java | 4 ++-- .../java/org/apache/doris/planner/HiveTableSinkTest.java | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java index 2318532cba697f..4ca8f9605a06fa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java @@ -87,7 +87,7 @@ public LocationPath(String location, Map props) { this(location, props, true); } - private LocationPath(String originLocation, Map props, boolean convertPath) { + public LocationPath(String originLocation, Map props, boolean convertPath) { isBindBroker = props.containsKey(HMSExternalCatalog.BIND_BROKER_NAME); String tmpLocation = originLocation; if (!(originLocation.contains(SCHEME_DELIM) || originLocation.contains(NONSTANDARD_SCHEME_DELIM))) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java index 3635d10633f108..a4012d357e55ad 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java @@ -122,13 +122,13 @@ public void bindDataSink(Optional insertCtx) setSerDeProperties(tSink); THiveLocationParams locationParams = new THiveLocationParams(); - LocationPath locationPath = new LocationPath(sd.getLocation(), targetTable.getHadoopProperties()); + LocationPath locationPath = new LocationPath(sd.getLocation(), targetTable.getHadoopProperties(), false); String location = locationPath.getPath().toString(); String storageLocation = locationPath.toStorageLocation().toString(); TFileType fileType = locationPath.getTFileTypeForBE(); if (fileType == TFileType.FILE_S3) { locationParams.setWritePath(storageLocation); - locationParams.setOriginalWritePath(sd.getLocation()); + locationParams.setOriginalWritePath(location); locationParams.setTargetPath(location); if (insertCtx.isPresent()) { HiveInsertCommandContext context = (HiveInsertCommandContext) insertCtx.get(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/HiveTableSinkTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/HiveTableSinkTest.java index 8794a56eac9138..b57bbcb51a2772 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/HiveTableSinkTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/HiveTableSinkTest.java @@ -32,8 +32,8 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.junit.Assert; import org.junit.Test; -import org.locationtech.jts.util.Assert; import java.util.ArrayList; import java.util.HashMap; @@ -94,7 +94,8 @@ public HMSCachedClient getClient() { HiveTableSink hiveTableSink = new HiveTableSink(tbl); hiveTableSink.bindDataSink(Optional.empty()); - Assert.equals(hiveTableSink.tDataSink.hive_table_sink.location.original_write_path, location); + Assert.assertEquals(hiveTableSink.tDataSink.hive_table_sink.location.original_write_path, location); + Assert.assertEquals(hiveTableSink.tDataSink.hive_table_sink.location.target_path, location); } } From 631aedeb5349a5f0e5f4d8acc6065fa1f344c82d Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Wed, 22 Jan 2025 22:43:35 +0800 Subject: [PATCH 29/31] [opt](jdbc scan) Add more jdbc scan profile items (#46460) ### What problem does this PR solve? Problem Summary: ``` - GetDataTime: 197.0us (Total time to read data) - CastTime: 2.0us (Convert data from JDBC memory format to Doris BE C++ memory format) - FillBlockTime: 6.0us (Put data into BE Block) - ReadAndFillVectorTableTime: 129.0us (Time to read data and convert to vectorTable) - HasNextTime: 35.0us (The time to obtain each batch of data) - JniSetupTime: 0ns (Start JNI time) - PrepareParamsTime: 24.0us (Preparation parameter time) ``` --- be/src/vec/exec/scan/new_jdbc_scanner.cpp | 14 +++++- be/src/vec/exec/scan/new_jdbc_scanner.h | 6 ++- be/src/vec/exec/vjdbc_connector.cpp | 57 +++++++++++++++++------ be/src/vec/exec/vjdbc_connector.h | 6 ++- 4 files changed, 64 insertions(+), 19 deletions(-) diff --git a/be/src/vec/exec/scan/new_jdbc_scanner.cpp b/be/src/vec/exec/scan/new_jdbc_scanner.cpp index 7eaa9ab3eab788..a470af27179e92 100644 --- a/be/src/vec/exec/scan/new_jdbc_scanner.cpp +++ b/be/src/vec/exec/scan/new_jdbc_scanner.cpp @@ -163,8 +163,13 @@ void NewJdbcScanner::_init_profile(const std::shared_ptr& profil _init_connector_timer = ADD_TIMER(profile, "InitConnectorTime"); _check_type_timer = ADD_TIMER(profile, "CheckTypeTime"); _get_data_timer = ADD_TIMER(profile, "GetDataTime"); - _get_block_address_timer = ADD_CHILD_TIMER(profile, "GetBlockAddressTime", "GetDataTime"); + _read_and_fill_vector_table_timer = + ADD_CHILD_TIMER(profile, "ReadAndFillVectorTableTime", "GetDataTime"); + _jni_setup_timer = ADD_CHILD_TIMER(profile, "JniSetupTime", "GetDataTime"); + _has_next_timer = ADD_CHILD_TIMER(profile, "HasNextTime", "GetDataTime"); + _prepare_params_timer = ADD_CHILD_TIMER(profile, "PrepareParamsTime", "GetDataTime"); _fill_block_timer = ADD_CHILD_TIMER(profile, "FillBlockTime", "GetDataTime"); + _cast_timer = ADD_CHILD_TIMER(profile, "CastTime", "GetDataTime"); _execte_read_timer = ADD_TIMER(profile, "ExecteReadTime"); _connector_close_timer = ADD_TIMER(profile, "ConnectorCloseTime"); } @@ -175,8 +180,13 @@ void NewJdbcScanner::_update_profile() { COUNTER_UPDATE(_init_connector_timer, jdbc_statistic._init_connector_timer); COUNTER_UPDATE(_check_type_timer, jdbc_statistic._check_type_timer); COUNTER_UPDATE(_get_data_timer, jdbc_statistic._get_data_timer); - COUNTER_UPDATE(_get_block_address_timer, jdbc_statistic._get_block_address_timer); + COUNTER_UPDATE(_jni_setup_timer, jdbc_statistic._jni_setup_timer); + COUNTER_UPDATE(_has_next_timer, jdbc_statistic._has_next_timer); + COUNTER_UPDATE(_prepare_params_timer, jdbc_statistic._prepare_params_timer); + COUNTER_UPDATE(_read_and_fill_vector_table_timer, + jdbc_statistic._read_and_fill_vector_table_timer); COUNTER_UPDATE(_fill_block_timer, jdbc_statistic._fill_block_timer); + COUNTER_UPDATE(_cast_timer, jdbc_statistic._cast_timer); COUNTER_UPDATE(_execte_read_timer, jdbc_statistic._execte_read_timer); COUNTER_UPDATE(_connector_close_timer, jdbc_statistic._connector_close_timer); } diff --git a/be/src/vec/exec/scan/new_jdbc_scanner.h b/be/src/vec/exec/scan/new_jdbc_scanner.h index 92188e43f37f8d..e3e172596efe49 100644 --- a/be/src/vec/exec/scan/new_jdbc_scanner.h +++ b/be/src/vec/exec/scan/new_jdbc_scanner.h @@ -59,7 +59,11 @@ class NewJdbcScanner : public VScanner { RuntimeProfile::Counter* _load_jar_timer = nullptr; RuntimeProfile::Counter* _init_connector_timer = nullptr; RuntimeProfile::Counter* _get_data_timer = nullptr; - RuntimeProfile::Counter* _get_block_address_timer = nullptr; + RuntimeProfile::Counter* _jni_setup_timer = nullptr; + RuntimeProfile::Counter* _has_next_timer = nullptr; + RuntimeProfile::Counter* _prepare_params_timer = nullptr; + RuntimeProfile::Counter* _cast_timer = nullptr; + RuntimeProfile::Counter* _read_and_fill_vector_table_timer = nullptr; RuntimeProfile::Counter* _fill_block_timer = nullptr; RuntimeProfile::Counter* _check_type_timer = nullptr; RuntimeProfile::Counter* _execte_read_timer = nullptr; diff --git a/be/src/vec/exec/vjdbc_connector.cpp b/be/src/vec/exec/vjdbc_connector.cpp index 28baac567cc5d1..e33c8777fa5f9a 100644 --- a/be/src/vec/exec/vjdbc_connector.cpp +++ b/be/src/vec/exec/vjdbc_connector.cpp @@ -234,14 +234,25 @@ Status JdbcConnector::query() { } Status JdbcConnector::get_next(bool* eos, Block* block, int batch_size) { + SCOPED_RAW_TIMER(&_jdbc_statistic._get_data_timer); // Timer for the entire method + if (!_is_open) { return Status::InternalError("get_next before open of jdbc connector."); } - SCOPED_RAW_TIMER(&_jdbc_statistic._get_data_timer); + JNIEnv* env = nullptr; - RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); - jboolean has_next = - env->CallNonvirtualBooleanMethod(_executor_obj, _executor_clazz, _executor_has_next_id); + { + SCOPED_RAW_TIMER(&_jdbc_statistic._jni_setup_timer); // Timer for setting up JNI environment + RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); + } // _jni_setup_timer stops when going out of this scope + + jboolean has_next = JNI_FALSE; + { + SCOPED_RAW_TIMER(&_jdbc_statistic._has_next_timer); // Timer for hasNext check + has_next = env->CallNonvirtualBooleanMethod(_executor_obj, _executor_clazz, + _executor_has_next_id); + } // _has_next_timer stops here + if (has_next != JNI_TRUE) { *eos = true; return Status::OK(); @@ -252,10 +263,21 @@ Status JdbcConnector::get_next(bool* eos, Block* block, int batch_size) { auto column_size = _tuple_desc->slots().size(); auto slots = _tuple_desc->slots(); - jobject map = _get_reader_params(block, env, column_size); - SCOPED_RAW_TIMER(&_jdbc_statistic._get_block_address_timer); - long address = - env->CallLongMethod(_executor_obj, _executor_get_block_address_id, batch_size, map); + jobject map; + { + SCOPED_RAW_TIMER(&_jdbc_statistic._prepare_params_timer); // Timer for preparing params + map = _get_reader_params(block, env, column_size); + } // _prepare_params_timer stops here + + long address = 0; + { + SCOPED_RAW_TIMER( + &_jdbc_statistic + ._read_and_fill_vector_table_timer); // Timer for getBlockAddress call + address = + env->CallLongMethod(_executor_obj, _executor_get_block_address_id, batch_size, map); + } // _get_block_address_timer stops here + RETURN_IF_ERROR(JniUtil::GetJniExceptionMsg(env)); env->DeleteLocalRef(map); @@ -263,17 +285,22 @@ Status JdbcConnector::get_next(bool* eos, Block* block, int batch_size) { for (uint32_t i = 0; i < column_size; ++i) { all_columns.push_back(i); } - SCOPED_RAW_TIMER(&_jdbc_statistic._fill_block_timer); - Status fill_block_status = JniConnector::fill_block(block, all_columns, address); + + Status fill_block_status; + { + SCOPED_RAW_TIMER(&_jdbc_statistic._fill_block_timer); // Timer for fill_block + fill_block_status = JniConnector::fill_block(block, all_columns, address); + } // _fill_block_timer stops here + if (!fill_block_status) { return fill_block_status; } - Status cast_status = _cast_string_to_special(block, env, column_size); - - if (!cast_status) { - return cast_status; - } + Status cast_status; + { + SCOPED_RAW_TIMER(&_jdbc_statistic._cast_timer); // Timer for casting process + cast_status = _cast_string_to_special(block, env, column_size); + } // _cast_timer stops here return JniUtil::GetJniExceptionMsg(env); } diff --git a/be/src/vec/exec/vjdbc_connector.h b/be/src/vec/exec/vjdbc_connector.h index 954b0abfa78f0c..941ce4331a50fd 100644 --- a/be/src/vec/exec/vjdbc_connector.h +++ b/be/src/vec/exec/vjdbc_connector.h @@ -71,8 +71,12 @@ class JdbcConnector : public TableConnector { int64_t _load_jar_timer = 0; int64_t _init_connector_timer = 0; int64_t _get_data_timer = 0; - int64_t _get_block_address_timer = 0; + int64_t _read_and_fill_vector_table_timer = 0; + int64_t _jni_setup_timer = 0; + int64_t _has_next_timer = 0; + int64_t _prepare_params_timer = 0; int64_t _fill_block_timer = 0; + int64_t _cast_timer = 0; int64_t _check_type_timer = 0; int64_t _execte_read_timer = 0; int64_t _connector_close_timer = 0; From 13e6b972c763411cf835dc2cbc9ec1506fbaa970 Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Wed, 22 Jan 2025 22:54:09 +0800 Subject: [PATCH 30/31] [opt](jdbc catalog) Compatible with higher ClickHouse JDBC Driver versions (#46026) ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: 1. Since clickhouse changes the database level in jdbc metadata from schema to catalog in JDBC Driver 0.5.0 and later, we need to be compatible with this change 2. Since clickhouse JDBC Driver supports getting metadata from prepared statements only in Driver version 0.6.2 and later, if you use query tvf to query clickhouse catalog, you need to use a driver later than this version 3. Delete some tests and add them again later --- .../datasource/jdbc/JdbcExternalCatalog.java | 2 +- .../jdbc/client/JdbcClickHouseClient.java | 130 ++++++++++++++++++ .../datasource/jdbc/client/JdbcClient.java | 12 ++ .../jdbc/client/JdbcOceanBaseClient.java | 2 +- .../jdbc/client/JdbcClickHouseClientTest.java | 67 +++++++++ .../{ => client}/JdbcClientExceptionTest.java | 4 +- .../jdbc/test_clickhouse_jdbc_catalog.out | Bin 4381 -> 8056 bytes .../jdbc/test_clickhouse_jdbc_catalog.groovy | 73 ++++++---- 8 files changed, 261 insertions(+), 29 deletions(-) create mode 100644 fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/client/JdbcClickHouseClientTest.java rename fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/{ => client}/JdbcClientExceptionTest.java (97%) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalog.java index fac322d21eb4da..03554dafbcb940 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalog.java @@ -382,7 +382,7 @@ private void testFeToJdbcConnection() throws DdlException { jdbcClient.testConnection(); } catch (JdbcClientException e) { String errorMessage = "Test FE Connection to JDBC Failed: " + e.getMessage(); - LOG.error(errorMessage, e); + LOG.warn(errorMessage, e); throw new DdlException(errorMessage, e); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClickHouseClient.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClickHouseClient.java index bdf0cbbc934482..4f340bebed4732 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClickHouseClient.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClickHouseClient.java @@ -22,12 +22,103 @@ import org.apache.doris.catalog.Type; import org.apache.doris.datasource.jdbc.util.JdbcFieldSchema; +import com.google.common.collect.Lists; + +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.List; import java.util.Optional; +import java.util.function.Consumer; public class JdbcClickHouseClient extends JdbcClient { + private final Boolean databaseTermIsCatalog; + protected JdbcClickHouseClient(JdbcClientConfig jdbcClientConfig) { super(jdbcClientConfig); + try (Connection conn = getConnection()) { + String jdbcUrl = conn.getMetaData().getURL(); + if (!isNewClickHouseDriver(getJdbcDriverVersion())) { + this.databaseTermIsCatalog = false; + } else { + this.databaseTermIsCatalog = "catalog".equalsIgnoreCase(getDatabaseTermFromUrl(jdbcUrl)); + } + } catch (SQLException e) { + throw new JdbcClientException("Failed to initialize JdbcClickHouseClient: %s", e.getMessage()); + } + } + + @Override + public List getDatabaseNameList() { + Connection conn = null; + ResultSet rs = null; + List remoteDatabaseNames = Lists.newArrayList(); + try { + conn = getConnection(); + if (isOnlySpecifiedDatabase && includeDatabaseMap.isEmpty() && excludeDatabaseMap.isEmpty()) { + if (databaseTermIsCatalog) { + remoteDatabaseNames.add(conn.getCatalog()); + } else { + remoteDatabaseNames.add(conn.getSchema()); + } + } else { + if (databaseTermIsCatalog) { + rs = conn.getMetaData().getCatalogs(); + } else { + rs = conn.getMetaData().getSchemas(conn.getCatalog(), null); + } + while (rs.next()) { + remoteDatabaseNames.add(rs.getString(1)); + } + } + } catch (SQLException e) { + throw new JdbcClientException("failed to get database name list from jdbc", e); + } finally { + close(rs, conn); + } + return filterDatabaseNames(remoteDatabaseNames); + } + + @Override + protected void processTable(String remoteDbName, String remoteTableName, String[] tableTypes, + Consumer resultSetConsumer) { + Connection conn = null; + ResultSet rs = null; + try { + conn = super.getConnection(); + DatabaseMetaData databaseMetaData = conn.getMetaData(); + if (databaseTermIsCatalog) { + rs = databaseMetaData.getTables(remoteDbName, null, remoteTableName, tableTypes); + } else { + rs = databaseMetaData.getTables(null, remoteDbName, remoteTableName, tableTypes); + } + resultSetConsumer.accept(rs); + } catch (SQLException e) { + throw new JdbcClientException("Failed to process table", e); + } finally { + close(rs, conn); + } + } + + @Override + protected ResultSet getRemoteColumns(DatabaseMetaData databaseMetaData, String catalogName, String remoteDbName, + String remoteTableName) throws SQLException { + if (databaseTermIsCatalog) { + return databaseMetaData.getColumns(remoteDbName, null, remoteTableName, null); + } else { + return databaseMetaData.getColumns(catalogName, remoteDbName, remoteTableName, null); + } + } + + @Override + protected String getCatalogName(Connection conn) throws SQLException { + if (databaseTermIsCatalog) { + return null; + } else { + return conn.getCatalog(); + } } @Override @@ -121,4 +212,43 @@ protected Type jdbcTypeToDoris(JdbcFieldSchema fieldSchema) { return Type.UNSUPPORTED; } } + + /** + * Determine whether the driver version is greater than or equal to 0.5.0. + */ + private static boolean isNewClickHouseDriver(String driverVersion) { + if (driverVersion == null) { + throw new JdbcClientException("Driver version cannot be null"); + } + try { + String[] versionParts = driverVersion.split("\\."); + int majorVersion = Integer.parseInt(versionParts[0]); + int minorVersion = Integer.parseInt(versionParts[1]); + // Determine whether it is greater than or equal to 0.5.x + return (majorVersion > 0) || (majorVersion == 0 && minorVersion >= 5); + } catch (NumberFormatException | ArrayIndexOutOfBoundsException e) { + throw new JdbcClientException("Invalid clickhouse driver version format: " + driverVersion, e); + } + } + + /** + * Extract databaseterm parameters from the jdbc url. + */ + private String getDatabaseTermFromUrl(String jdbcUrl) { + if (jdbcUrl != null && jdbcUrl.toLowerCase().contains("databaseterm=schema")) { + return "schema"; + } + return "catalog"; + } + + /** + * Get the driver version. + */ + public String getJdbcDriverVersion() { + try (Connection conn = getConnection()) { + return conn.getMetaData().getDriverVersion(); + } catch (SQLException e) { + throw new JdbcClientException("Failed to get jdbc driver version", e); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java index a3dfdcda3193d4..121f7d6ba0499f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java @@ -480,4 +480,16 @@ public void testConnection() { public String getTestQuery() { return "select 1"; } + + public String getJdbcDriverVersion() { + Connection conn = null; + try { + conn = getConnection(); + return conn.getMetaData().getDriverVersion(); + } catch (SQLException e) { + throw new JdbcClientException("Failed to get jdbc driver version", e); + } finally { + close(conn); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcOceanBaseClient.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcOceanBaseClient.java index 0d3970c774b5bd..f43119875d63c4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcOceanBaseClient.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcOceanBaseClient.java @@ -54,7 +54,7 @@ public JdbcClient createClient(JdbcClientConfig jdbcClientConfig) throws JdbcCli throw new JdbcClientException("Failed to determine OceanBase compatibility mode"); } } catch (SQLException e) { - throw new JdbcClientException("Failed to initialize JdbcOceanBaseClient", e.getMessage()); + throw new JdbcClientException("Failed to initialize JdbcOceanBaseClient: %s", e.getMessage()); } finally { close(rs, stmt, conn); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/client/JdbcClickHouseClientTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/client/JdbcClickHouseClientTest.java new file mode 100644 index 00000000000000..99e4aa62dd574d --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/client/JdbcClickHouseClientTest.java @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.jdbc.client; + +import org.junit.Assert; +import org.junit.Test; + +import java.lang.reflect.Method; + +public class JdbcClickHouseClientTest { + + @Test + public void testIsNewClickHouseDriver() { + try { + Method method = JdbcClickHouseClient.class.getDeclaredMethod("isNewClickHouseDriver", String.class); + method.setAccessible(true); + + // Valid test cases + Assert.assertTrue((boolean) method.invoke(null, "0.5.0")); // Major version 0, Minor version 5 + Assert.assertTrue((boolean) method.invoke(null, "1.0.0")); // Major version 1 + Assert.assertTrue((boolean) method.invoke(null, "0.6.3 (revision: a6a8a22)")); // Major version 0, Minor version 6 + Assert.assertFalse((boolean) method.invoke(null, "0.4.2 (revision: 1513b27)")); // Major version 0, Minor version 4 + + // Invalid version formats + try { + method.invoke(null, "invalid.version"); // Invalid version format + Assert.fail("Expected JdbcClientException for invalid version 'invalid.version'"); + } catch (Exception e) { + Assert.assertTrue(e.getCause() instanceof JdbcClientException); + Assert.assertTrue(e.getCause().getMessage().contains("Invalid clickhouse driver version format")); + } + + try { + method.invoke(null, ""); // Empty version + Assert.fail("Expected JdbcClientException for empty version"); + } catch (Exception e) { + Assert.assertTrue(e.getCause() instanceof JdbcClientException); + Assert.assertTrue(e.getCause().getMessage().contains("Invalid clickhouse driver version format")); + } + + try { + method.invoke(null, (Object) null); // Null version + Assert.fail("Expected JdbcClientException for null version"); + } catch (Exception e) { + Assert.assertTrue(e.getCause() instanceof JdbcClientException); + Assert.assertTrue(e.getCause().getMessage().contains("Driver version cannot be null")); + } + } catch (Exception e) { + Assert.fail("Exception occurred while testing isNewClickHouseDriver: " + e.getMessage()); + } + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/JdbcClientExceptionTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/client/JdbcClientExceptionTest.java similarity index 97% rename from fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/JdbcClientExceptionTest.java rename to fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/client/JdbcClientExceptionTest.java index 1bbf54e9438512..c99f2bcfe26dbc 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/JdbcClientExceptionTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/client/JdbcClientExceptionTest.java @@ -15,9 +15,7 @@ // specific language governing permissions and limitations // under the License. -package org.apache.doris.datasource.jdbc; - -import org.apache.doris.datasource.jdbc.client.JdbcClientException; +package org.apache.doris.datasource.jdbc.client; import org.junit.Assert; import org.junit.Test; diff --git a/regression-test/data/external_table_p0/jdbc/test_clickhouse_jdbc_catalog.out b/regression-test/data/external_table_p0/jdbc/test_clickhouse_jdbc_catalog.out index 22f85579a83b532e1c615a93e2f79578b23981a9..910338e08622431116bc5636c9f6e8bd649c963e 100644 GIT binary patch delta 282 zcmbQM^uun0te}Lhu7YB6PG)j;Mt*5=YP@-TN@`kSX->(;Rcja}ut~(1l%;_rWpGKx zCl(cL*5^@WoGie_DuSXaIk6-$CqEt2Y^VfwvymiG%oav5yEr)`H8&Atv^W-Vn90&O K Date: Wed, 22 Jan 2025 22:59:58 +0800 Subject: [PATCH 31/31] [Opt] (multi-catalog) opt max scanner thread number in batch split mode. (#44635) ### What problem does this PR solve? Problem Summary: There's only one scan range for each backend in batch split mode. Each backend only starts up one ScanNode instance. However, when calculating the concurrency of scanners in the scan operator in batch split mode, it is not divided by 1, but by `query_parallel_instance_num`, resulting in poor performance of batch split mode. --- be/src/pipeline/exec/file_scan_operator.cpp | 45 ++++++++++++++------- be/src/pipeline/exec/file_scan_operator.h | 6 +++ be/src/pipeline/exec/scan_operator.cpp | 5 ++- be/src/pipeline/exec/scan_operator.h | 6 +++ be/src/vec/exec/scan/scanner_context.cpp | 9 ++--- be/src/vec/exec/scan/scanner_context.h | 4 +- 6 files changed, 54 insertions(+), 21 deletions(-) diff --git a/be/src/pipeline/exec/file_scan_operator.cpp b/be/src/pipeline/exec/file_scan_operator.cpp index 7afbb29134c079..3cb934c1015766 100644 --- a/be/src/pipeline/exec/file_scan_operator.cpp +++ b/be/src/pipeline/exec/file_scan_operator.cpp @@ -26,6 +26,7 @@ #include "pipeline/exec/olap_scan_operator.h" #include "pipeline/exec/scan_operator.h" #include "vec/exec/format/format_common.h" +#include "vec/exec/scan/scanner_context.h" #include "vec/exec/scan/vfile_scanner.h" namespace doris::pipeline { @@ -37,9 +38,10 @@ Status FileScanLocalState::_init_scanners(std::list* s } auto& p = _parent->cast(); - uint32_t shard_num = std::min( - config::doris_scanner_thread_pool_thread_num / state()->query_parallel_instance_num(), - _max_scanners); + // There's only one scan range for each backend in batch split mode. Each backend only starts up one ScanNode instance. + uint32_t shard_num = + std::min(config::doris_scanner_thread_pool_thread_num / p.query_parallel_instance_num(), + _max_scanners); shard_num = std::max(shard_num, 1U); _kv_cache.reset(new vectorized::ShardedKVCache(shard_num)); for (int i = 0; i < _max_scanners; ++i) { @@ -60,28 +62,43 @@ std::string FileScanLocalState::name_suffix() const { void FileScanLocalState::set_scan_ranges(RuntimeState* state, const std::vector& scan_ranges) { - _max_scanners = - config::doris_scanner_thread_pool_thread_num / state->query_parallel_instance_num(); - _max_scanners = std::max(std::max(_max_scanners, state->parallel_scan_max_scanners_count()), 1); - // For select * from table limit 10; should just use one thread. - if (should_run_serial()) { - _max_scanners = 1; - } + auto& p = _parent->cast(); + + auto calc_max_scanners = [&](int parallel_instance_num) -> int { + int max_scanners = config::doris_scanner_thread_pool_thread_num / parallel_instance_num; + max_scanners = + std::max(std::max(max_scanners, state->parallel_scan_max_scanners_count()), 1); + if (should_run_serial()) { + max_scanners = 1; + } + return max_scanners; + }; + if (scan_ranges.size() == 1) { auto scan_range = scan_ranges[0].scan_range.ext_scan_range.file_scan_range; if (scan_range.__isset.split_source) { + p._batch_split_mode = true; auto split_source = scan_range.split_source; RuntimeProfile::Counter* get_split_timer = ADD_TIMER(_runtime_profile, "GetSplitTime"); + + _max_scanners = calc_max_scanners(p.query_parallel_instance_num()); _split_source = std::make_shared( state, get_split_timer, split_source.split_source_id, split_source.num_splits, _max_scanners); } } - if (_split_source == nullptr) { - _split_source = - std::make_shared(scan_ranges, _max_scanners); + + if (!p._batch_split_mode) { + _max_scanners = calc_max_scanners(p.query_parallel_instance_num()); + if (_split_source == nullptr) { + _split_source = std::make_shared(scan_ranges, + _max_scanners); + } + // currently the total number of splits in the bach split mode cannot be accurately obtained, + // so we don't do it in the batch split mode. + _max_scanners = std::min(_max_scanners, _split_source->num_scan_ranges()); } - _max_scanners = std::min(_max_scanners, _split_source->num_scan_ranges()); + if (scan_ranges.size() > 0 && scan_ranges[0].scan_range.ext_scan_range.file_scan_range.__isset.params) { // for compatibility. diff --git a/be/src/pipeline/exec/file_scan_operator.h b/be/src/pipeline/exec/file_scan_operator.h index 87c5bcd2e54de5..8b7b25a025e85e 100644 --- a/be/src/pipeline/exec/file_scan_operator.h +++ b/be/src/pipeline/exec/file_scan_operator.h @@ -81,10 +81,16 @@ class FileScanOperatorX final : public ScanOperatorX { bool is_file_scan_operator() const override { return true; } + // There's only one scan range for each backend in batch split mode. Each backend only starts up one ScanNode instance. + int query_parallel_instance_num() const override { + return _batch_split_mode ? 1 : _query_parallel_instance_num; + } + private: friend class FileScanLocalState; const std::string _table_name; + bool _batch_split_mode = false; }; #include "common/compile_check_end.h" diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp index c694822adf378d..d437289d6b7d29 100644 --- a/be/src/pipeline/exec/scan_operator.cpp +++ b/be/src/pipeline/exec/scan_operator.cpp @@ -996,7 +996,8 @@ Status ScanLocalState::_start_scanners( auto& p = _parent->cast(); _scanner_ctx = vectorized::ScannerContext::create_shared( state(), this, p._output_tuple_desc, p.output_row_descriptor(), scanners, p.limit(), - _scan_dependency, p.is_serial_operator(), p.is_file_scan_operator()); + _scan_dependency, p.is_serial_operator(), p.is_file_scan_operator(), + p.query_parallel_instance_num()); return Status::OK(); } @@ -1205,6 +1206,8 @@ Status ScanOperatorX::init(const TPlanNode& tnode, RuntimeState* } } + _query_parallel_instance_num = state->query_parallel_instance_num(); + return Status::OK(); } diff --git a/be/src/pipeline/exec/scan_operator.h b/be/src/pipeline/exec/scan_operator.h index c6c9cdf405d5a4..f9a0b5dc428cf9 100644 --- a/be/src/pipeline/exec/scan_operator.h +++ b/be/src/pipeline/exec/scan_operator.h @@ -372,6 +372,10 @@ class ScanOperatorX : public OperatorX { [[nodiscard]] virtual bool is_file_scan_operator() const { return false; } + [[nodiscard]] virtual int query_parallel_instance_num() const { + return _query_parallel_instance_num; + } + const std::vector& runtime_filter_descs() override { return _runtime_filter_descs; } @@ -434,6 +438,8 @@ class ScanOperatorX : public OperatorX { int64_t _push_down_count = -1; const int _parallel_tasks = 0; + int _query_parallel_instance_num = 0; + std::vector topn_filter_source_node_ids; }; diff --git a/be/src/vec/exec/scan/scanner_context.cpp b/be/src/vec/exec/scan/scanner_context.cpp index 688204bdff8ca7..a683e31acda96c 100644 --- a/be/src/vec/exec/scan/scanner_context.cpp +++ b/be/src/vec/exec/scan/scanner_context.cpp @@ -47,7 +47,7 @@ ScannerContext::ScannerContext( const TupleDescriptor* output_tuple_desc, const RowDescriptor* output_row_descriptor, const std::list>& scanners, int64_t limit_, std::shared_ptr dependency, bool ignore_data_distribution, - bool is_file_scan_operator) + bool is_file_scan_operator, int num_parallel_instances) : HasTaskExecutionCtx(state), _state(state), _local_state(local_state), @@ -60,7 +60,8 @@ ScannerContext::ScannerContext( _scanner_scheduler_global(state->exec_env()->scanner_scheduler()), _all_scanners(scanners.begin(), scanners.end()), _ignore_data_distribution(ignore_data_distribution), - _is_file_scan_operator(is_file_scan_operator) { + _is_file_scan_operator(is_file_scan_operator), + _num_parallel_instances(num_parallel_instances) { DCHECK(_output_row_descriptor == nullptr || _output_row_descriptor->tuple_descriptors().size() == 1); _query_id = _state->get_query_ctx()->query_id(); @@ -105,8 +106,6 @@ Status ScannerContext::init() { _local_state->_runtime_profile->add_info_string("UseSpecificThreadToken", thread_token == nullptr ? "False" : "True"); - const int num_parallel_instances = _state->query_parallel_instance_num(); - // _max_bytes_in_queue controls the maximum memory that can be used by a single scan instance. // scan_queue_mem_limit on FE is 100MB by default, on backend we will make sure its actual value // is larger than 10MB. @@ -176,7 +175,7 @@ Status ScannerContext::init() { } else { const size_t factor = _is_file_scan_operator ? 1 : 4; _max_thread_num = factor * (config::doris_scanner_thread_pool_thread_num / - num_parallel_instances); + _num_parallel_instances); // In some rare cases, user may set num_parallel_instances to 1 handly to make many query could be executed // in parallel. We need to make sure the _max_thread_num is smaller than previous value. _max_thread_num = diff --git a/be/src/vec/exec/scan/scanner_context.h b/be/src/vec/exec/scan/scanner_context.h index d1cf06d56686ac..b385855d0f2e4b 100644 --- a/be/src/vec/exec/scan/scanner_context.h +++ b/be/src/vec/exec/scan/scanner_context.h @@ -107,7 +107,8 @@ class ScannerContext : public std::enable_shared_from_this, const RowDescriptor* output_row_descriptor, const std::list>& scanners, int64_t limit_, std::shared_ptr dependency, - bool ignore_data_distribution, bool is_file_scan_operator); + bool ignore_data_distribution, bool is_file_scan_operator, + int num_parallel_instances); ~ScannerContext() override { SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_query_thread_context.query_mem_tracker); @@ -213,6 +214,7 @@ class ScannerContext : public std::enable_shared_from_this, std::shared_ptr _dependency = nullptr; bool _ignore_data_distribution = false; bool _is_file_scan_operator = false; + int _num_parallel_instances; // for scaling up the running scanners size_t _estimated_block_size = 0;