diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index c0f16d304a2b72d..a8ab93de455c3b4 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -1630,11 +1630,13 @@ void drop_tablet_callback(StorageEngine& engine, const TAgentTaskRequest& req) { dropped_tablet->tablet_uid()); LOG_INFO("successfully drop tablet") .tag("signature", req.signature) - .tag("tablet_id", drop_tablet_req.tablet_id); + .tag("tablet_id", drop_tablet_req.tablet_id) + .tag("replica_id", drop_tablet_req.replica_id); } else { LOG_WARNING("failed to drop tablet") .tag("signature", req.signature) .tag("tablet_id", drop_tablet_req.tablet_id) + .tag("replica_id", drop_tablet_req.replica_id) .error(status); } diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp b/be/src/cloud/cloud_cumulative_compaction.cpp index 4e25cd74209e6ec..f63054563aa18ab 100644 --- a/be/src/cloud/cloud_cumulative_compaction.cpp +++ b/be/src/cloud/cloud_cumulative_compaction.cpp @@ -33,6 +33,7 @@ #include "util/uuid_generator.h" namespace doris { +#include "common/compile_check_begin.h" using namespace ErrorCode; bvar::Adder cumu_output_size("cumu_compaction", "output_size"); @@ -488,8 +489,10 @@ Status CloudCumulativeCompaction::pick_rowsets_to_compact() { } int64_t max_score = config::cumulative_compaction_max_deltas; - auto process_memory_usage = doris::GlobalMemoryArbitrator::process_memory_usage(); - bool memory_usage_high = process_memory_usage > MemInfo::soft_mem_limit() * 0.8; + double process_memory_usage = + cast_set(doris::GlobalMemoryArbitrator::process_memory_usage()); + bool memory_usage_high = + process_memory_usage > cast_set(MemInfo::soft_mem_limit()) * 0.8; if (cloud_tablet()->last_compaction_status.is() || memory_usage_high) { max_score = std::max(config::cumulative_compaction_max_deltas / @@ -619,4 +622,5 @@ void CloudCumulativeCompaction::do_lease() { } } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_cumulative_compaction.h b/be/src/cloud/cloud_cumulative_compaction.h index 1159dcb59ceef1b..87fc0b62c9c389d 100644 --- a/be/src/cloud/cloud_cumulative_compaction.h +++ b/be/src/cloud/cloud_cumulative_compaction.h @@ -24,6 +24,7 @@ #include "olap/compaction.h" namespace doris { +#include "common/compile_check_begin.h" class CloudCumulativeCompaction : public CloudCompactionMixin { public: @@ -60,4 +61,5 @@ class CloudCumulativeCompaction : public CloudCompactionMixin { Version _last_delete_version {-1, -1}; }; +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_cumulative_compaction_policy.cpp b/be/src/cloud/cloud_cumulative_compaction_policy.cpp index 5a9879387b23278..92a47fcc69f8d7d 100644 --- a/be/src/cloud/cloud_cumulative_compaction_policy.cpp +++ b/be/src/cloud/cloud_cumulative_compaction_policy.cpp @@ -31,6 +31,7 @@ #include "olap/tablet_meta.h" namespace doris { +#include "common/compile_check_begin.h" CloudSizeBasedCumulativeCompactionPolicy::CloudSizeBasedCumulativeCompactionPolicy( int64_t promotion_size, double promotion_ratio, int64_t promotion_min_size, @@ -48,7 +49,7 @@ int64_t CloudSizeBasedCumulativeCompactionPolicy::_level_size(const int64_t size return (int64_t)1 << (sizeof(size) * 8 - 1 - __builtin_clzl(size)); } -int32_t CloudSizeBasedCumulativeCompactionPolicy::pick_input_rowsets( +int64_t CloudSizeBasedCumulativeCompactionPolicy::pick_input_rowsets( CloudTablet* tablet, const std::vector& candidate_rowsets, const int64_t max_compaction_score, const int64_t min_compaction_score, std::vector* input_rowsets, Version* last_delete_version, @@ -114,8 +115,8 @@ int32_t CloudSizeBasedCumulativeCompactionPolicy::pick_input_rowsets( size_t new_compaction_score = *compaction_score; while (rs_begin != input_rowsets->end()) { auto& rs_meta = (*rs_begin)->rowset_meta(); - int current_level = _level_size(rs_meta->total_disk_size()); - int remain_level = _level_size(total_size - rs_meta->total_disk_size()); + int64_t current_level = _level_size(rs_meta->total_disk_size()); + int64_t remain_level = _level_size(total_size - rs_meta->total_disk_size()); // if current level less then remain level, input rowsets contain current rowset // and process return; otherwise, input rowsets do not contain current rowset. if (current_level <= remain_level) { @@ -185,7 +186,7 @@ int32_t CloudSizeBasedCumulativeCompactionPolicy::pick_input_rowsets( } int64_t CloudSizeBasedCumulativeCompactionPolicy::cloud_promotion_size(CloudTablet* t) const { - int64_t promotion_size = int64_t(t->base_size() * _promotion_ratio); + int64_t promotion_size = int64_t(cast_set(t->base_size()) * _promotion_ratio); // promotion_size is between _size_based_promotion_size and _size_based_promotion_min_size return promotion_size > _promotion_size ? _promotion_size : promotion_size < _promotion_min_size ? _promotion_min_size @@ -215,7 +216,7 @@ int64_t CloudSizeBasedCumulativeCompactionPolicy::new_cumulative_point( : last_cumulative_point; } -int32_t CloudTimeSeriesCumulativeCompactionPolicy::pick_input_rowsets( +int64_t CloudTimeSeriesCumulativeCompactionPolicy::pick_input_rowsets( CloudTablet* tablet, const std::vector& candidate_rowsets, const int64_t max_compaction_score, const int64_t min_compaction_score, std::vector* input_rowsets, Version* last_delete_version, @@ -377,4 +378,5 @@ int64_t CloudTimeSeriesCumulativeCompactionPolicy::new_cumulative_point( return output_rowset->end_version() + 1; } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_cumulative_compaction_policy.h b/be/src/cloud/cloud_cumulative_compaction_policy.h index c142a8a6d3dffef..9373728547241ba 100644 --- a/be/src/cloud/cloud_cumulative_compaction_policy.h +++ b/be/src/cloud/cloud_cumulative_compaction_policy.h @@ -30,6 +30,7 @@ #include "olap/rowset/rowset_meta.h" namespace doris { +#include "common/compile_check_begin.h" class Tablet; struct Version; @@ -44,7 +45,7 @@ class CloudCumulativeCompactionPolicy { virtual int64_t new_compaction_level(const std::vector& input_rowsets) = 0; - virtual int32_t pick_input_rowsets(CloudTablet* tablet, + virtual int64_t pick_input_rowsets(CloudTablet* tablet, const std::vector& candidate_rowsets, const int64_t max_compaction_score, const int64_t min_compaction_score, @@ -71,7 +72,7 @@ class CloudSizeBasedCumulativeCompactionPolicy : public CloudCumulativeCompactio return 0; } - int32_t pick_input_rowsets(CloudTablet* tablet, + int64_t pick_input_rowsets(CloudTablet* tablet, const std::vector& candidate_rowsets, const int64_t max_compaction_score, const int64_t min_compaction_score, @@ -106,7 +107,7 @@ class CloudTimeSeriesCumulativeCompactionPolicy : public CloudCumulativeCompacti int64_t new_compaction_level(const std::vector& input_rowsets) override; - int32_t pick_input_rowsets(CloudTablet* tablet, + int64_t pick_input_rowsets(CloudTablet* tablet, const std::vector& candidate_rowsets, const int64_t max_compaction_score, const int64_t min_compaction_score, @@ -115,4 +116,5 @@ class CloudTimeSeriesCumulativeCompactionPolicy : public CloudCumulativeCompacti bool allow_delete = false) override; }; +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_delete_bitmap_action.cpp b/be/src/cloud/cloud_delete_bitmap_action.cpp index 86cc535e1bc88e5..3d834bfe7b373c6 100644 --- a/be/src/cloud/cloud_delete_bitmap_action.cpp +++ b/be/src/cloud/cloud_delete_bitmap_action.cpp @@ -50,6 +50,7 @@ #include "util/stopwatch.hpp" namespace doris { +#include "common/compile_check_begin.h" using namespace ErrorCode; namespace { @@ -177,4 +178,5 @@ void CloudDeleteBitmapAction::handle(HttpRequest* req) { } } +#include "common/compile_check_end.h" } // namespace doris \ No newline at end of file diff --git a/be/src/cloud/cloud_delete_bitmap_action.h b/be/src/cloud/cloud_delete_bitmap_action.h index 35739a7373efc87..ce507ee99917570 100644 --- a/be/src/cloud/cloud_delete_bitmap_action.h +++ b/be/src/cloud/cloud_delete_bitmap_action.h @@ -27,6 +27,7 @@ #include "olap/tablet.h" namespace doris { +#include "common/compile_check_begin.h" class HttpRequest; class ExecEnv; @@ -52,4 +53,5 @@ class CloudDeleteBitmapAction : public HttpHandlerWithAuth { CloudStorageEngine& _engine; DeleteBitmapActionType _delete_bitmap_action_type; }; +#include "common/compile_check_end.h" } // namespace doris \ No newline at end of file diff --git a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp index 336117d1012d4dd..fbf4b9cf303570c 100644 --- a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp +++ b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp @@ -34,6 +34,7 @@ #include "runtime/memory/mem_tracker_limiter.h" namespace doris { +#include "common/compile_check_begin.h" CloudEngineCalcDeleteBitmapTask::CloudEngineCalcDeleteBitmapTask( CloudStorageEngine& engine, const TCalcDeleteBitmapRequest& cal_delete_bitmap_req, @@ -325,4 +326,5 @@ Status CloudTabletCalcDeleteBitmapTask::_handle_rowset( return status; } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp index 8e21498b0d873d7..835e74ca7d5687f 100644 --- a/be/src/cloud/cloud_meta_mgr.cpp +++ b/be/src/cloud/cloud_meta_mgr.cpp @@ -64,6 +64,7 @@ #include "util/thrift_rpc_helper.h" namespace doris::cloud { +#include "common/compile_check_begin.h" using namespace ErrorCode; Status bthread_fork_join(const std::vector>& tasks, int concurrency) { @@ -717,7 +718,7 @@ Status CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_ "rowset_ids.size={},segment_ids.size={},vers.size={},delete_bitmaps.size={}", rowset_ids.size(), segment_ids.size(), vers.size(), delete_bitmaps.size()); } - for (size_t i = 0; i < rowset_ids.size(); i++) { + for (int i = 0; i < rowset_ids.size(); i++) { RowsetId rst_id; rst_id.init(rowset_ids[i]); delete_bitmap->merge( @@ -757,10 +758,10 @@ Status CloudMetaMgr::prepare_rowset(const RowsetMeta& rs_meta, Status st = retry_rpc("prepare rowset", req, &resp, &MetaService_Stub::prepare_rowset); if (!st.ok() && resp.status().code() == MetaServiceCode::ALREADY_EXISTED) { if (existed_rs_meta != nullptr && resp.has_existed_rowset_meta()) { - RowsetMetaPB doris_rs_meta = + RowsetMetaPB doris_rs_meta_tmp = cloud_rowset_meta_to_doris(std::move(*resp.mutable_existed_rowset_meta())); *existed_rs_meta = std::make_shared(); - (*existed_rs_meta)->init_from_pb(doris_rs_meta); + (*existed_rs_meta)->init_from_pb(doris_rs_meta_tmp); } return Status::AlreadyExist("failed to prepare rowset: {}", resp.status().msg()); } @@ -1286,4 +1287,5 @@ int64_t CloudMetaMgr::get_inverted_index_file_szie(const RowsetMeta& rs_meta) { return total_inverted_index_size; } +#include "common/compile_check_end.h" } // namespace doris::cloud diff --git a/be/src/cloud/cloud_meta_mgr.h b/be/src/cloud/cloud_meta_mgr.h index c49b036ad90c151..913ef59489a1b3c 100644 --- a/be/src/cloud/cloud_meta_mgr.h +++ b/be/src/cloud/cloud_meta_mgr.h @@ -27,6 +27,7 @@ #include "util/s3_util.h" namespace doris { +#include "common/compile_check_begin.h" class DeleteBitmap; class StreamLoadContext; @@ -124,4 +125,5 @@ class CloudMetaMgr { }; } // namespace cloud +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_storage_engine.cpp b/be/src/cloud/cloud_storage_engine.cpp index dc6abbac31ba1bf..b66a9cfbdb22451 100644 --- a/be/src/cloud/cloud_storage_engine.cpp +++ b/be/src/cloud/cloud_storage_engine.cpp @@ -52,6 +52,7 @@ #include "util/parse_util.h" namespace doris { +#include "common/compile_check_begin.h" using namespace std::literals; @@ -166,7 +167,8 @@ Status CloudStorageEngine::open() { _memtable_flush_executor = std::make_unique(); // Use file cache disks number - _memtable_flush_executor->init(io::FileCacheFactory::instance()->get_cache_instance_size()); + _memtable_flush_executor->init( + cast_set(io::FileCacheFactory::instance()->get_cache_instance_size())); _calc_delete_bitmap_executor = std::make_unique(); _calc_delete_bitmap_executor->init(); @@ -321,7 +323,7 @@ void CloudStorageEngine::_check_file_cache_ttl_block_valid() { for (const auto& rowset : rowsets) { int64_t ttl_seconds = tablet->tablet_meta()->ttl_seconds(); if (rowset->newest_write_timestamp() + ttl_seconds <= UnixSeconds()) continue; - for (int64_t seg_id = 0; seg_id < rowset->num_segments(); seg_id++) { + for (uint32_t seg_id = 0; seg_id < rowset->num_segments(); seg_id++) { auto hash = Segment::file_cache_key(rowset->rowset_id().to_string(), seg_id); auto* file_cache = io::FileCacheFactory::instance()->get_by_path(hash); file_cache->update_ttl_atime(hash); @@ -350,11 +352,11 @@ void CloudStorageEngine::sync_storage_vault() { for (auto& [id, vault_info, path_format] : vault_infos) { auto fs = get_filesystem(id); - auto st = (fs == nullptr) - ? std::visit(VaultCreateFSVisitor {id, path_format}, vault_info) - : std::visit(RefreshFSVaultVisitor {id, std::move(fs), path_format}, - vault_info); - if (!st.ok()) [[unlikely]] { + auto status = (fs == nullptr) + ? std::visit(VaultCreateFSVisitor {id, path_format}, vault_info) + : std::visit(RefreshFSVaultVisitor {id, std::move(fs), path_format}, + vault_info); + if (!status.ok()) [[unlikely]] { LOG(WARNING) << vault_process_error(id, vault_info, std::move(st)); } } @@ -504,13 +506,13 @@ void CloudStorageEngine::_compaction_tasks_producer_callback() { /// If it is not cleaned up, the reference count of the tablet will always be greater than 1, /// thus cannot be collected by the garbage collector. (TabletManager::start_trash_sweep) for (const auto& tablet : tablets_compaction) { - Status st = submit_compaction_task(tablet, compaction_type); - if (st.ok()) continue; - if ((!st.is() && - !st.is()) || + Status status = submit_compaction_task(tablet, compaction_type); + if (status.ok()) continue; + if ((!status.is() && + !status.is()) || VLOG_DEBUG_IS_ON) { LOG(WARNING) << "failed to submit compaction task for tablet: " - << tablet->tablet_id() << ", err: " << st; + << tablet->tablet_id() << ", err: " << status; } } interval = config::generate_compaction_tasks_interval_ms; @@ -544,7 +546,8 @@ std::vector CloudStorageEngine::_generate_cloud_compaction_task int num_cumu = std::accumulate(submitted_cumu_compactions.begin(), submitted_cumu_compactions.end(), 0, [](int a, auto& b) { return a + b.second.size(); }); - int num_base = submitted_base_compactions.size() + submitted_full_compactions.size(); + int num_base = + cast_set(submitted_base_compactions.size() + submitted_full_compactions.size()); int n = thread_per_disk - num_cumu - num_base; if (compaction_type == CompactionType::BASE_COMPACTION) { // We need to reserve at least one thread for cumulative compaction, @@ -822,7 +825,7 @@ Status CloudStorageEngine::get_compaction_status_json(std::string* result) { // cumu std::string_view cumu = "CumulativeCompaction"; rapidjson::Value cumu_key; - cumu_key.SetString(cumu.data(), cumu.length(), root.GetAllocator()); + cumu_key.SetString(cumu.data(), cast_set(cumu.length()), root.GetAllocator()); rapidjson::Document cumu_arr; cumu_arr.SetArray(); for (auto& [tablet_id, v] : _submitted_cumu_compactions) { @@ -834,7 +837,7 @@ Status CloudStorageEngine::get_compaction_status_json(std::string* result) { // base std::string_view base = "BaseCompaction"; rapidjson::Value base_key; - base_key.SetString(base.data(), base.length(), root.GetAllocator()); + base_key.SetString(base.data(), cast_set(base.length()), root.GetAllocator()); rapidjson::Document base_arr; base_arr.SetArray(); for (auto& [tablet_id, _] : _submitted_base_compactions) { @@ -857,4 +860,5 @@ std::shared_ptr CloudStorageEngine::cumu_compac return _cumulative_compaction_policies.at(compaction_policy); } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index c27353be5235d07..d545f827bc4e218 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -50,6 +50,7 @@ #include "vec/common/schema_util.h" namespace doris { +#include "common/compile_check_begin.h" using namespace ErrorCode; static constexpr int COMPACTION_DELETE_BITMAP_LOCK_ID = -1; @@ -380,7 +381,7 @@ void CloudTablet::delete_rowsets(const std::vector& to_delete, _tablet_meta->modify_rs_metas({}, rs_metas, false); } -int CloudTablet::delete_expired_stale_rowsets() { +uint64_t CloudTablet::delete_expired_stale_rowsets() { std::vector expired_rowsets; int64_t expired_stale_sweep_endtime = ::time(nullptr) - config::tablet_rowset_stale_sweep_time_sec; @@ -539,7 +540,7 @@ Result> CloudTablet::create_transient_rowset_write return RowsetFactory::create_rowset_writer(_engine, context, false) .transform([&](auto&& writer) { - writer->set_segment_start_id(rowset.num_segments()); + writer->set_segment_start_id(cast_set(rowset.num_segments())); return writer; }); } @@ -617,7 +618,8 @@ void CloudTablet::get_compaction_status(std::string* json_result) { } rapidjson::Value value; std::string version_str = rowset->get_rowset_info_str(); - value.SetString(version_str.c_str(), version_str.length(), versions_arr.GetAllocator()); + value.SetString(version_str.c_str(), cast_set(version_str.length()), + versions_arr.GetAllocator()); versions_arr.PushBack(value, versions_arr.GetAllocator()); last_version = ver.second; } @@ -630,7 +632,7 @@ void CloudTablet::get_compaction_status(std::string* json_result) { for (auto& rowset : stale_rowsets) { rapidjson::Value value; std::string version_str = rowset->get_rowset_info_str(); - value.SetString(version_str.c_str(), version_str.length(), + value.SetString(version_str.c_str(), cast_set(version_str.length()), stale_versions_arr.GetAllocator()); stale_versions_arr.PushBack(value, stale_versions_arr.GetAllocator()); } @@ -776,7 +778,7 @@ Status CloudTablet::calc_delete_bitmap_for_compaction( std::unique_ptr> location_map; if (config::enable_rowid_conversion_correctness_check && - tablet_schema()->cluster_key_idxes().empty()) { + tablet_schema()->cluster_key_uids().empty()) { location_map = std::make_unique>(); LOG(INFO) << "Location Map inited succ for tablet:" << tablet_id(); } @@ -925,4 +927,5 @@ void CloudTablet::build_tablet_report_info(TTabletInfo* tablet_info) { // but it may be used in the future. } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/cloud/cloud_tablet.h b/be/src/cloud/cloud_tablet.h index 80038e569ba2fc9..fc0d64a493d316a 100644 --- a/be/src/cloud/cloud_tablet.h +++ b/be/src/cloud/cloud_tablet.h @@ -92,7 +92,7 @@ class CloudTablet final : public BaseTablet { void clear_cache() override; // Return number of deleted stale rowsets - int delete_expired_stale_rowsets(); + uint64_t delete_expired_stale_rowsets(); bool has_stale_rowsets() const { return !_stale_rs_version_map.empty(); } diff --git a/be/src/cloud/config.cpp b/be/src/cloud/config.cpp index e95c295ae1daa56..f90bf536f630182 100644 --- a/be/src/cloud/config.cpp +++ b/be/src/cloud/config.cpp @@ -20,6 +20,7 @@ #include "common/status.h" namespace doris::config { +#include "common/compile_check_begin.h" DEFINE_String(deploy_mode, ""); DEFINE_mString(cloud_unique_id, ""); @@ -76,4 +77,5 @@ DEFINE_mInt32(tablet_txn_info_min_expired_seconds, "120"); DEFINE_mBool(enable_use_cloud_unique_id_from_fe, "true"); DEFINE_mBool(enable_cloud_tablet_report, "true"); +#include "common/compile_check_end.h" } // namespace doris::config diff --git a/be/src/cloud/config.h b/be/src/cloud/config.h index b345e6355921a49..a8a7c0c48ec91f9 100644 --- a/be/src/cloud/config.h +++ b/be/src/cloud/config.h @@ -20,6 +20,7 @@ #include "common/config.h" namespace doris::config { +#include "common/compile_check_begin.h" DECLARE_String(deploy_mode); // deprecated do not configure directly @@ -110,4 +111,5 @@ DECLARE_mBool(enable_use_cloud_unique_id_from_fe); DECLARE_Bool(enable_cloud_tablet_report); +#include "common/compile_check_end.h" } // namespace doris::config diff --git a/be/src/cloud/pb_convert.cpp b/be/src/cloud/pb_convert.cpp index bff7d8388d30d81..e655ceacf2f08d4 100644 --- a/be/src/cloud/pb_convert.cpp +++ b/be/src/cloud/pb_convert.cpp @@ -324,7 +324,7 @@ void doris_tablet_schema_to_cloud(TabletSchemaCloudPB* out, const TabletSchemaPB out->set_store_row_column(in.store_row_column()); out->set_enable_single_replica_compaction(in.enable_single_replica_compaction()); out->set_skip_write_index_on_load(in.skip_write_index_on_load()); - out->mutable_cluster_key_idxes()->CopyFrom(in.cluster_key_idxes()); + out->mutable_cluster_key_uids()->CopyFrom(in.cluster_key_uids()); out->set_is_dynamic_schema(in.is_dynamic_schema()); out->mutable_row_store_column_unique_ids()->CopyFrom(in.row_store_column_unique_ids()); out->set_inverted_index_storage_format(in.inverted_index_storage_format()); @@ -353,7 +353,7 @@ void doris_tablet_schema_to_cloud(TabletSchemaCloudPB* out, TabletSchemaPB&& in) out->set_store_row_column(in.store_row_column()); out->set_enable_single_replica_compaction(in.enable_single_replica_compaction()); out->set_skip_write_index_on_load(in.skip_write_index_on_load()); - out->mutable_cluster_key_idxes()->Swap(in.mutable_cluster_key_idxes()); + out->mutable_cluster_key_uids()->Swap(in.mutable_cluster_key_uids()); out->set_is_dynamic_schema(in.is_dynamic_schema()); out->mutable_row_store_column_unique_ids()->Swap(in.mutable_row_store_column_unique_ids()); out->set_inverted_index_storage_format(in.inverted_index_storage_format()); @@ -395,7 +395,7 @@ void cloud_tablet_schema_to_doris(TabletSchemaPB* out, const TabletSchemaCloudPB out->set_store_row_column(in.store_row_column()); out->set_enable_single_replica_compaction(in.enable_single_replica_compaction()); out->set_skip_write_index_on_load(in.skip_write_index_on_load()); - out->mutable_cluster_key_idxes()->CopyFrom(in.cluster_key_idxes()); + out->mutable_cluster_key_uids()->CopyFrom(in.cluster_key_uids()); out->set_is_dynamic_schema(in.is_dynamic_schema()); out->mutable_row_store_column_unique_ids()->CopyFrom(in.row_store_column_unique_ids()); out->set_inverted_index_storage_format(in.inverted_index_storage_format()); @@ -425,7 +425,7 @@ void cloud_tablet_schema_to_doris(TabletSchemaPB* out, TabletSchemaCloudPB&& in) out->set_store_row_column(in.store_row_column()); out->set_enable_single_replica_compaction(in.enable_single_replica_compaction()); out->set_skip_write_index_on_load(in.skip_write_index_on_load()); - out->mutable_cluster_key_idxes()->Swap(in.mutable_cluster_key_idxes()); + out->mutable_cluster_key_uids()->Swap(in.mutable_cluster_key_uids()); out->set_is_dynamic_schema(in.is_dynamic_schema()); out->mutable_row_store_column_unique_ids()->Swap(in.mutable_row_store_column_unique_ids()); out->set_inverted_index_storage_format(in.inverted_index_storage_format()); diff --git a/be/src/common/compile_check_begin.h b/be/src/common/compile_check_begin.h index 6da403f28948857..4d860d39d1cf72e 100644 --- a/be/src/common/compile_check_begin.h +++ b/be/src/common/compile_check_begin.h @@ -23,8 +23,9 @@ #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic error "-Wconversion" +#pragma clang diagnostic error "-Wshadow" #pragma clang diagnostic ignored "-Wsign-conversion" #pragma clang diagnostic ignored "-Wfloat-conversion" #endif -//#include "common/compile_check_begin.h" \ No newline at end of file +//#include "common/compile_check_begin.h" diff --git a/be/src/common/compile_check_end.h b/be/src/common/compile_check_end.h index 0897965dc74a3dc..40df41b6bdfc6ca 100644 --- a/be/src/common/compile_check_end.h +++ b/be/src/common/compile_check_end.h @@ -20,4 +20,4 @@ #endif #undef COMPILE_CHECK -// #include "common/compile_check_end.h" \ No newline at end of file +// #include "common/compile_check_end.h" diff --git a/be/src/exec/schema_scanner/schema_columns_scanner.cpp b/be/src/exec/schema_scanner/schema_columns_scanner.cpp index 8325a7f5dc4f2d8..b60dfc3d203f898 100644 --- a/be/src/exec/schema_scanner/schema_columns_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_columns_scanner.cpp @@ -450,7 +450,19 @@ Status SchemaColumnsScanner::_fill_block_impl(vectorized::Block* block) { RETURN_IF_ERROR(fill_dest_column_for_range(block, 4, datas)); } // COLUMN_DEFAULT - { RETURN_IF_ERROR(fill_dest_column_for_range(block, 5, null_datas)); } + { + std::vector strs(columns_num); + for (int i = 0; i < columns_num; ++i) { + if (_desc_result.columns[i].columnDesc.__isset.defaultValue) { + strs[i] = StringRef(_desc_result.columns[i].columnDesc.defaultValue.c_str(), + _desc_result.columns[i].columnDesc.defaultValue.length()); + datas[i] = strs.data() + i; + } else { + datas[i] = nullptr; + } + } + RETURN_IF_ERROR(fill_dest_column_for_range(block, 5, datas)); + } // IS_NULLABLE { StringRef str_yes = StringRef("YES", 3); diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index 104dc44ebec8ae3..82dc122e19f5ef5 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -376,7 +376,7 @@ Status BaseTablet::calc_delete_bitmap_between_segments( seq_col_length = _tablet_meta->tablet_schema()->column(seq_col_idx).length() + 1; } size_t rowid_length = 0; - if (!_tablet_meta->tablet_schema()->cluster_key_idxes().empty()) { + if (!_tablet_meta->tablet_schema()->cluster_key_uids().empty()) { rowid_length = PrimaryKeyIndexReader::ROW_ID_LENGTH; } @@ -438,7 +438,6 @@ Status BaseTablet::lookup_row_data(const Slice& encoded_key, const RowLocation& StringRef value = string_column->get_data_at(0); values = value.to_string(); if (write_to_cache) { - StringRef value = string_column->get_data_at(0); RowCache::instance()->insert({tablet_id(), encoded_key}, Slice {value.data, value.size}); } return Status::OK(); @@ -461,7 +460,7 @@ Status BaseTablet::lookup_row_key(const Slice& encoded_key, TabletSchema* latest seq_col_length = schema->column(schema->sequence_col_idx()).length() + 1; } size_t rowid_length = 0; - if (with_rowid && !schema->cluster_key_idxes().empty()) { + if (with_rowid && !schema->cluster_key_uids().empty()) { rowid_length = PrimaryKeyIndexReader::ROW_ID_LENGTH; } Slice key_without_seq = @@ -476,12 +475,12 @@ Status BaseTablet::lookup_row_key(const Slice& encoded_key, TabletSchema* latest int num_segments = cast_set(rs->num_segments()); DCHECK_EQ(segments_key_bounds.size(), num_segments); std::vector picked_segments; - for (int i = num_segments - 1; i >= 0; i--) { - if (key_without_seq.compare(segments_key_bounds[i].max_key()) > 0 || - key_without_seq.compare(segments_key_bounds[i].min_key()) < 0) { + for (int j = num_segments - 1; j >= 0; j--) { + if (key_without_seq.compare(segments_key_bounds[j].max_key()) > 0 || + key_without_seq.compare(segments_key_bounds[j].min_key()) < 0) { continue; } - picked_segments.emplace_back(i); + picked_segments.emplace_back(j); } if (picked_segments.empty()) { continue; @@ -654,7 +653,7 @@ Status BaseTablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset, Slice key = Slice(index_column->get_data_at(i).data, index_column->get_data_at(i).size); RowLocation loc; // calculate row id - if (!_tablet_meta->tablet_schema()->cluster_key_idxes().empty()) { + if (!_tablet_meta->tablet_schema()->cluster_key_uids().empty()) { size_t seq_col_length = 0; if (_tablet_meta->tablet_schema()->has_sequence_col()) { seq_col_length = @@ -778,11 +777,11 @@ Status BaseTablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset, if (config::enable_merge_on_write_correctness_check) { RowsetIdUnorderedSet rowsetids; - for (const auto& rowset : specified_rowsets) { - rowsetids.emplace(rowset->rowset_id()); + for (const auto& specified_rowset : specified_rowsets) { + rowsetids.emplace(specified_rowset->rowset_id()); VLOG_NOTICE << "[tabletID:" << tablet_id() << "]" << "[add_sentinel_mark_to_delete_bitmap][end_version:" << end_version << "]" - << "add:" << rowset->rowset_id(); + << "add:" << specified_rowset->rowset_id(); } add_sentinel_mark_to_delete_bitmap(delete_bitmap.get(), rowsetids); } @@ -892,11 +891,11 @@ Status BaseTablet::fetch_value_through_row_column(RowsetSharedPtr input_rowset, std::vector default_values; default_values.resize(cids.size()); for (int i = 0; i < cids.size(); ++i) { - const TabletColumn& column = tablet_schema.column(cids[i]); + const TabletColumn& tablet_column = tablet_schema.column(cids[i]); vectorized::DataTypePtr type = - vectorized::DataTypeFactory::instance().create_data_type(column); - col_uid_to_idx[column.unique_id()] = i; - default_values[i] = column.default_value(); + vectorized::DataTypeFactory::instance().create_data_type(tablet_column); + col_uid_to_idx[tablet_column.unique_id()] = i; + default_values[i] = tablet_column.default_value(); serdes[i] = type->get_serde(); } vectorized::JsonbSerializeUtil::jsonb_to_block(serdes, *string_column, col_uid_to_idx, block, @@ -1326,12 +1325,12 @@ Status BaseTablet::check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap required_rowsets_arr.PushBack(value, required_rowsets_arr.GetAllocator()); } } else { - std::vector rowsets; + std::vector tablet_rowsets; { std::shared_lock meta_rlock(_meta_lock); - rowsets = get_rowset_by_ids(&rowset_ids); + tablet_rowsets = get_rowset_by_ids(&rowset_ids); } - for (const auto& rowset : rowsets) { + for (const auto& rowset : tablet_rowsets) { rapidjson::Value value; std::string version_str = rowset->get_rowset_info_str(); value.SetString(version_str.c_str(), @@ -1439,12 +1438,12 @@ Status BaseTablet::update_delete_bitmap(const BaseTabletSPtr& self, TabletTxnInf txn_info->partial_update_info->max_version_in_flush_phase; DCHECK(max_version_in_flush_phase != -1); std::vector remained_rowsets; - for (const auto& rowset : specified_rowsets) { - if (rowset->end_version() <= max_version_in_flush_phase && - rowset->produced_by_compaction()) { - rowsets_skip_alignment.emplace_back(rowset); + for (const auto& specified_rowset : specified_rowsets) { + if (specified_rowset->end_version() <= max_version_in_flush_phase && + specified_rowset->produced_by_compaction()) { + rowsets_skip_alignment.emplace_back(specified_rowset); } else { - remained_rowsets.emplace_back(rowset); + remained_rowsets.emplace_back(specified_rowset); } } if (!rowsets_skip_alignment.empty()) { @@ -1758,7 +1757,7 @@ std::vector BaseTablet::get_snapshot_rowset(bool include_stale_ void BaseTablet::calc_consecutive_empty_rowsets( std::vector* empty_rowsets, - const std::vector& candidate_rowsets, int limit) { + const std::vector& candidate_rowsets, int64_t limit) { int len = cast_set(candidate_rowsets.size()); for (int i = 0; i < len - 1; ++i) { auto rowset = candidate_rowsets[i]; diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index f961f4c49eedd66..bd46cdbbe14854a 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -276,7 +276,7 @@ class BaseTablet { // Find the first consecutive empty rowsets. output->size() >= limit void calc_consecutive_empty_rowsets(std::vector* empty_rowsets, const std::vector& candidate_rowsets, - int limit); + int64_t limit); // Return the merged schema of all rowsets virtual TabletSchemaSPtr merged_tablet_schema() const { return _max_version_schema; } diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 98d7787550d197f..8c45c20f7994270 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -191,14 +191,14 @@ Status Compaction::merge_input_rowsets() { SCOPED_TIMER(_merge_rowsets_latency_timer); // 1. Merge segment files and write bkd inverted index if (_is_vertical) { - if (!_tablet->tablet_schema()->cluster_key_idxes().empty()) { + if (!_tablet->tablet_schema()->cluster_key_uids().empty()) { RETURN_IF_ERROR(update_delete_bitmap()); } res = Merger::vertical_merge_rowsets(_tablet, compaction_type(), *_cur_tablet_schema, input_rs_readers, _output_rs_writer.get(), get_avg_segment_rows(), way_num, &_stats); } else { - if (!_tablet->tablet_schema()->cluster_key_idxes().empty()) { + if (!_tablet->tablet_schema()->cluster_key_uids().empty()) { return Status::InternalError( "mow table with cluster keys does not support non vertical compaction"); } @@ -966,7 +966,7 @@ Status CompactionMixin::modify_rowsets() { } std::unique_ptr> location_map; if (config::enable_rowid_conversion_correctness_check && - tablet()->tablet_schema()->cluster_key_idxes().empty()) { + tablet()->tablet_schema()->cluster_key_uids().empty()) { location_map = std::make_unique>(); LOG(INFO) << "Location Map inited succ for tablet:" << _tablet->tablet_id(); } @@ -983,7 +983,7 @@ Status CompactionMixin::modify_rowsets() { if (missed_rows) { missed_rows_size = missed_rows->size(); std::size_t merged_missed_rows_size = _stats.merged_rows; - if (!_tablet->tablet_meta()->tablet_schema()->cluster_key_idxes().empty()) { + if (!_tablet->tablet_meta()->tablet_schema()->cluster_key_uids().empty()) { merged_missed_rows_size += _stats.filtered_rows; } if (_tablet->tablet_state() == TABLET_RUNNING && diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index 5db3d89378bcc28..765f67a07c78845 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -350,7 +350,7 @@ Status MemTable::_sort_by_cluster_keys() { } Tie tie = Tie(0, mutable_block.rows()); - for (auto cid : _tablet_schema->cluster_key_idxes()) { + for (auto cid : _tablet_schema->cluster_key_uids()) { auto index = _tablet_schema->field_index(cid); if (index == -1) { return Status::InternalError("could not find cluster key column with unique_id=" + @@ -619,7 +619,7 @@ Status MemTable::_to_block(std::unique_ptr* res) { (_skip_bitmap_col_idx == -1) ? _aggregate() : _aggregate(); } if (_keys_type == KeysType::UNIQUE_KEYS && _enable_unique_key_mow && - !_tablet_schema->cluster_key_idxes().empty()) { + !_tablet_schema->cluster_key_uids().empty()) { if (_partial_update_mode != UniqueKeyUpdateModePB::UPSERT) { return Status::InternalError( "Partial update for mow with cluster keys is not supported"); diff --git a/be/src/olap/merger.cpp b/be/src/olap/merger.cpp index b207cc4c5ad22b2..975aaa0bca3de5b 100644 --- a/be/src/olap/merger.cpp +++ b/be/src/olap/merger.cpp @@ -86,7 +86,7 @@ Status Merger::vmerge_rowsets(BaseTabletSPtr tablet, ReaderType reader_type, merge_tablet_schema->merge_dropped_columns(*del_pred_rs->tablet_schema()); } reader_params.tablet_schema = merge_tablet_schema; - if (!tablet->tablet_schema()->cluster_key_idxes().empty()) { + if (!tablet->tablet_schema()->cluster_key_uids().empty()) { reader_params.delete_bitmap = &tablet->tablet_meta()->delete_bitmap(); } @@ -173,8 +173,8 @@ void Merger::vertical_split_columns(const TabletSchema& tablet_schema, if (delete_sign_idx != -1) { key_columns.emplace_back(delete_sign_idx); } - if (!tablet_schema.cluster_key_idxes().empty()) { - for (const auto& cid : tablet_schema.cluster_key_idxes()) { + if (!tablet_schema.cluster_key_uids().empty()) { + for (const auto& cid : tablet_schema.cluster_key_uids()) { auto idx = tablet_schema.field_index(cid); DCHECK(idx >= 0) << "could not find cluster key column with unique_id=" << cid << " in tablet schema, table_id=" << tablet_schema.table_id(); @@ -186,7 +186,7 @@ void Merger::vertical_split_columns(const TabletSchema& tablet_schema, // cluster key unique ids: [3, 1, 4] // the key_columns should be [0, 1, 3, 5] // the key_group_cluster_key_idxes should be [2, 1, 3] - for (const auto& cid : tablet_schema.cluster_key_idxes()) { + for (const auto& cid : tablet_schema.cluster_key_uids()) { auto idx = tablet_schema.field_index(cid); for (auto i = 0; i < key_columns.size(); ++i) { if (idx == key_columns[i]) { @@ -261,7 +261,7 @@ Status Merger::vertical_compact_one_group( reader_params.tablet_schema = merge_tablet_schema; bool has_cluster_key = false; - if (!tablet->tablet_schema()->cluster_key_idxes().empty()) { + if (!tablet->tablet_schema()->cluster_key_uids().empty()) { reader_params.delete_bitmap = &tablet->tablet_meta()->delete_bitmap(); has_cluster_key = true; } diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 736bdaa99304d37..90d0883984e78b6 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -1071,7 +1071,8 @@ Status StorageEngine::_submit_compaction_task(TabletSharedPtr tablet, if (!tablet->can_do_compaction(tablet->data_dir()->path_hash(), compaction_type)) { LOG(INFO) << "Tablet state has been changed, no need to begin this compaction " "task, tablet_id=" - << tablet->tablet_id() << "tablet_state=" << tablet->tablet_state(); + << tablet->tablet_id() << ", tablet_state=" << tablet->tablet_state(); + _pop_tablet_from_submitted_compaction(tablet, compaction_type); return; } tablet->compaction_stage = CompactionStage::EXECUTING; diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 198b4e8595ed207..ab5bc48db80b00a 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -60,6 +60,7 @@ #include "vec/data_types/data_type_factory.hpp" namespace doris { +#include "common/compile_check_begin.h" using namespace ErrorCode; namespace { @@ -475,15 +476,15 @@ Status BetaRowsetWriter::_rename_compacted_segments(int64_t begin, int64_t end) return Status::OK(); } -void BetaRowsetWriter::_clear_statistics_for_deleting_segments_unsafe(uint64_t begin, - uint64_t end) { +void BetaRowsetWriter::_clear_statistics_for_deleting_segments_unsafe(uint32_t begin, + uint32_t end) { VLOG_DEBUG << "_segid_statistics_map clear record segid range from:" << begin << " to:" << end; - for (int i = begin; i <= end; ++i) { + for (uint32_t i = begin; i <= end; ++i) { _segid_statistics_map.erase(i); } } -Status BetaRowsetWriter::_rename_compacted_segment_plain(uint64_t seg_id) { +Status BetaRowsetWriter::_rename_compacted_segment_plain(uint32_t seg_id) { if (seg_id == _num_segcompacted) { ++_num_segcompacted; return Status::OK(); @@ -581,7 +582,7 @@ Status BetaRowsetWriter::_segcompaction_if_necessary() { Status status = Status::OK(); // if not doing segcompaction, just check segment number if (!config::enable_segcompaction || !_context.enable_segcompaction || - !_context.tablet_schema->cluster_key_idxes().empty() || + !_context.tablet_schema->cluster_key_uids().empty() || _context.tablet_schema->num_variant_columns() > 0) { return _check_segment_number_limit(_num_segment); } @@ -653,7 +654,7 @@ Status BaseBetaRowsetWriter::add_rowset(RowsetSharedPtr rowset) { _num_rows_written += rowset->num_rows(); _total_data_size += rowset->rowset_meta()->data_disk_size(); _total_index_size += rowset->rowset_meta()->index_disk_size(); - _num_segment += rowset->num_segments(); + _num_segment += cast_set(rowset->num_segments()); // append key_bounds to current rowset RETURN_IF_ERROR(rowset->get_segments_key_bounds(&_segments_encoded_key_bounds)); @@ -1043,7 +1044,7 @@ Status BaseBetaRowsetWriter::add_segment(uint32_t segment_id, const SegmentStati if (segment_id >= _segment_num_rows.size()) { _segment_num_rows.resize(segment_id + 1); } - _segment_num_rows[segid_offset] = segstat.row_num; + _segment_num_rows[segid_offset] = cast_set(segstat.row_num); } VLOG_DEBUG << "_segid_statistics_map add new record. segment_id:" << segment_id << " row_num:" << segstat.row_num << " data_size:" << segstat.data_size @@ -1111,4 +1112,5 @@ Status BetaRowsetWriter::flush_segment_writer_for_segcompaction( return Status::OK(); } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/olap/rowset/beta_rowset_writer.h b/be/src/olap/rowset/beta_rowset_writer.h index d96301af22630d1..a69d1063a55086e 100644 --- a/be/src/olap/rowset/beta_rowset_writer.h +++ b/be/src/olap/rowset/beta_rowset_writer.h @@ -298,9 +298,9 @@ class BetaRowsetWriter : public BaseBetaRowsetWriter { Status _load_noncompacted_segment(segment_v2::SegmentSharedPtr& segment, int32_t segment_id); Status _find_longest_consecutive_small_segment(SegCompactionCandidatesSharedPtr& segments); Status _rename_compacted_segments(int64_t begin, int64_t end); - Status _rename_compacted_segment_plain(uint64_t seg_id); + Status _rename_compacted_segment_plain(uint32_t seg_id); Status _rename_compacted_indices(int64_t begin, int64_t end, uint64_t seg_id); - void _clear_statistics_for_deleting_segments_unsafe(uint64_t begin, uint64_t end); + void _clear_statistics_for_deleting_segments_unsafe(uint32_t begin, uint32_t end); StorageEngine& _engine; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index d2c2920d4e6f74e..513c0be4f8cd14a 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -966,7 +966,7 @@ Status Segment::lookup_row_key(const Slice& key, const TabletSchema* latest_sche std::string* encoded_seq_value, OlapReaderStatistics* stats) { RETURN_IF_ERROR(load_pk_index_and_bf()); bool has_seq_col = latest_schema->has_sequence_col(); - bool has_rowid = !latest_schema->cluster_key_idxes().empty(); + bool has_rowid = !latest_schema->cluster_key_uids().empty(); size_t seq_col_length = 0; if (has_seq_col) { seq_col_length = latest_schema->column(latest_schema->sequence_col_idx()).length() + 1; @@ -1076,7 +1076,7 @@ Status Segment::read_key_by_rowid(uint32_t row_id, std::string* key) { RETURN_IF_ERROR(iter->next_batch(&num_read, index_column)); CHECK(num_read == 1); // trim row id - if (_tablet_schema->cluster_key_idxes().empty()) { + if (_tablet_schema->cluster_key_uids().empty()) { *key = index_column->get_data_at(0).to_string(); } else { Slice sought_key = diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 4ee73547c117e9d..f5f46e938650e12 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -377,7 +377,7 @@ Status SegmentIterator::_lazy_init() { _row_bitmap.addRange(0, _segment->num_rows()); // z-order can not use prefix index if (_segment->_tablet_schema->sort_type() != SortType::ZORDER && - _segment->_tablet_schema->cluster_key_idxes().empty()) { + _segment->_tablet_schema->cluster_key_uids().empty()) { RETURN_IF_ERROR(_get_row_ranges_by_keys()); } RETURN_IF_ERROR(_get_row_ranges_by_column_conditions()); @@ -1193,7 +1193,7 @@ Status SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool bool has_seq_col = _segment->_tablet_schema->has_sequence_col(); // Used to get key range from primary key index, // for mow with cluster key table, we should get key range from short key index. - DCHECK(_segment->_tablet_schema->cluster_key_idxes().empty()); + DCHECK(_segment->_tablet_schema->cluster_key_uids().empty()); // if full key is exact_match, the primary key without sequence column should also the same if (has_seq_col && !exact_match) { diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index c6c9664be4b2327..fe465f98a2aad2e 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -103,7 +103,7 @@ SegmentWriter::SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, << ", table_id=" << _tablet_schema->table_id() << ", num_key_columns=" << _num_sort_key_columns << ", num_short_key_columns=" << _num_short_key_columns - << ", cluster_key_columns=" << _tablet_schema->cluster_key_idxes().size(); + << ", cluster_key_columns=" << _tablet_schema->cluster_key_uids().size(); } for (size_t cid = 0; cid < _num_sort_key_columns; ++cid) { const auto& column = _tablet_schema->column(cid); @@ -125,8 +125,8 @@ SegmentWriter::SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, // cluster keys _key_coders.clear(); _key_index_size.clear(); - _num_sort_key_columns = _tablet_schema->cluster_key_idxes().size(); - for (auto cid : _tablet_schema->cluster_key_idxes()) { + _num_sort_key_columns = _tablet_schema->cluster_key_uids().size(); + for (auto cid : _tablet_schema->cluster_key_uids()) { const auto& column = _tablet_schema->column_by_uid(cid); _key_coders.push_back(get_key_coder(column.type())); _key_index_size.push_back(column.index_length()); @@ -545,6 +545,39 @@ Status SegmentWriter::probe_key_for_mow( return Status::OK(); } +Status SegmentWriter::partial_update_preconditions_check(size_t row_pos) { + if (!_is_mow()) { + auto msg = fmt::format( + "Can only do partial update on merge-on-write unique table, but found: " + "keys_type={}, _opts.enable_unique_key_merge_on_write={}, tablet_id={}", + _tablet_schema->keys_type(), _opts.enable_unique_key_merge_on_write, + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (_opts.rowset_ctx->partial_update_info == nullptr) { + auto msg = + fmt::format("partial_update_info should not be nullptr, please check, tablet_id={}", + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (!_opts.rowset_ctx->partial_update_info->is_fixed_partial_update()) { + auto msg = fmt::format( + "in fixed partial update code, but update_mode={}, please check, tablet_id={}", + _opts.rowset_ctx->partial_update_info->update_mode(), _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (row_pos != 0) { + auto msg = fmt::format("row_pos should be 0, but found {}, tablet_id={}", row_pos, + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + return Status::OK(); +} + // for partial update, we should do following steps to fill content of block: // 1. set block data to data convertor, and get all key_column's converted slice // 2. get pk of input block, and read missing columns @@ -562,11 +595,7 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* block->columns(), _tablet_schema->num_key_columns(), _tablet_schema->num_columns())); } - DCHECK(_is_mow()); - - DCHECK(_opts.rowset_ctx->partial_update_info); - DCHECK(_opts.rowset_ctx->partial_update_info->is_fixed_partial_update()); - DCHECK(row_pos == 0); + RETURN_IF_ERROR(partial_update_preconditions_check(row_pos)); // find missing column cids const auto& missing_cids = _opts.rowset_ctx->partial_update_info->missing_cids; @@ -788,7 +817,7 @@ Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_po seq_column, num_rows, true)); // 2. generate short key index (use cluster key) key_columns.clear(); - for (const auto& cid : _tablet_schema->cluster_key_idxes()) { + for (const auto& cid : _tablet_schema->cluster_key_uids()) { // find cluster key index in tablet schema auto cluster_key_index = _tablet_schema->field_index(cid); if (cluster_key_index == -1) { @@ -1290,7 +1319,7 @@ inline bool SegmentWriter::_is_mow() { } inline bool SegmentWriter::_is_mow_with_cluster_key() { - return _is_mow() && !_tablet_schema->cluster_key_idxes().empty(); + return _is_mow() && !_tablet_schema->cluster_key_uids().empty(); } } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h index a1b7491a6696731..60300383d7287d1 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.h +++ b/be/src/olap/rowset/segment_v2/segment_writer.h @@ -105,6 +105,7 @@ class SegmentWriter { const std::function& found_cb, const std::function& not_found_cb, PartialUpdateStats& stats); + Status partial_update_preconditions_check(size_t row_pos); Status append_block_with_partial_content(const vectorized::Block* block, size_t row_pos, size_t num_rows); Status append_block_with_variant_subcolumns(vectorized::Block& data); diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index ce16e2d502b6225..0846b0fc1186a84 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -109,7 +109,7 @@ VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, uint32 << ", table_id=" << _tablet_schema->table_id() << ", num_key_columns=" << _num_sort_key_columns << ", num_short_key_columns=" << _num_short_key_columns - << ", cluster_key_columns=" << _tablet_schema->cluster_key_idxes().size(); + << ", cluster_key_columns=" << _tablet_schema->cluster_key_uids().size(); } for (size_t cid = 0; cid < _num_sort_key_columns; ++cid) { const auto& column = _tablet_schema->column(cid); @@ -131,8 +131,8 @@ VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, uint32 // cluster keys _key_coders.clear(); _key_index_size.clear(); - _num_sort_key_columns = _tablet_schema->cluster_key_idxes().size(); - for (auto cid : _tablet_schema->cluster_key_idxes()) { + _num_sort_key_columns = _tablet_schema->cluster_key_uids().size(); + for (auto cid : _tablet_schema->cluster_key_uids()) { const auto& column = _tablet_schema->column_by_uid(cid); _key_coders.push_back(get_key_coder(column.type())); _key_index_size.push_back(column.index_length()); @@ -418,6 +418,51 @@ Status VerticalSegmentWriter::_probe_key_for_mow( return Status::OK(); } +Status VerticalSegmentWriter::_partial_update_preconditions_check(size_t row_pos, + bool is_flexible_update) { + if (!_is_mow()) { + auto msg = fmt::format( + "Can only do partial update on merge-on-write unique table, but found: " + "keys_type={}, _opts.enable_unique_key_merge_on_write={}, tablet_id={}", + _tablet_schema->keys_type(), _opts.enable_unique_key_merge_on_write, + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (_opts.rowset_ctx->partial_update_info == nullptr) { + auto msg = + fmt::format("partial_update_info should not be nullptr, please check, tablet_id={}", + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + if (!is_flexible_update) { + if (!_opts.rowset_ctx->partial_update_info->is_fixed_partial_update()) { + auto msg = fmt::format( + "in fixed partial update code, but update_mode={}, please check, tablet_id={}", + _opts.rowset_ctx->partial_update_info->update_mode(), _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + } else { + if (!_opts.rowset_ctx->partial_update_info->is_flexible_partial_update()) { + auto msg = fmt::format( + "in flexible partial update code, but update_mode={}, please check, " + "tablet_id={}", + _opts.rowset_ctx->partial_update_info->update_mode(), _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + } + if (row_pos != 0) { + auto msg = fmt::format("row_pos should be 0, but found {}, tablet_id={}", row_pos, + _tablet->tablet_id()); + DCHECK(false) << msg; + return Status::InternalError(msg); + } + return Status::OK(); +} + // for partial update, we should do following steps to fill content of block: // 1. set block data to data convertor, and get all key_column's converted slice // 2. get pk of input block, and read missing columns @@ -427,11 +472,7 @@ Status VerticalSegmentWriter::_probe_key_for_mow( // 3. set columns to data convertor and then write all columns Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& data, vectorized::Block& full_block) { - DCHECK(_is_mow()); - DCHECK(_opts.rowset_ctx->partial_update_info != nullptr); - DCHECK(_opts.rowset_ctx->partial_update_info->is_fixed_partial_update()); - DCHECK(data.row_pos == 0); - + RETURN_IF_ERROR(_partial_update_preconditions_check(data.row_pos, false)); // create full block and fill with input columns full_block = _tablet_schema->create_block(); const auto& including_cids = _opts.rowset_ctx->partial_update_info->update_cids; @@ -580,10 +621,7 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da Status VerticalSegmentWriter::_append_block_with_flexible_partial_content( RowsInBlock& data, vectorized::Block& full_block) { - DCHECK(_is_mow()); - DCHECK(_opts.rowset_ctx->partial_update_info != nullptr); - DCHECK(_opts.rowset_ctx->partial_update_info->is_flexible_partial_update()); - DCHECK(data.row_pos == 0); + RETURN_IF_ERROR(_partial_update_preconditions_check(data.row_pos, true)); // data.block has the same schema with full_block DCHECK(data.block->columns() == _tablet_schema->num_columns()); @@ -1149,9 +1187,9 @@ Status VerticalSegmentWriter::write_batch() { } auto column_unique_id = _tablet_schema->column(cid).unique_id(); if (_is_mow_with_cluster_key() && - std::find(_tablet_schema->cluster_key_idxes().begin(), - _tablet_schema->cluster_key_idxes().end(), - column_unique_id) != _tablet_schema->cluster_key_idxes().end()) { + std::find(_tablet_schema->cluster_key_uids().begin(), + _tablet_schema->cluster_key_uids().end(), + column_unique_id) != _tablet_schema->cluster_key_uids().end()) { cid_to_column[column_unique_id] = column; } RETURN_IF_ERROR(_column_writers[cid]->append(column->get_nullmap(), column->get_data(), @@ -1213,7 +1251,7 @@ Status VerticalSegmentWriter::_generate_key_index( data.num_rows, true)); // 2. generate short key index (use cluster key) std::vector short_key_columns; - for (const auto& cid : _tablet_schema->cluster_key_idxes()) { + for (const auto& cid : _tablet_schema->cluster_key_uids()) { short_key_columns.push_back(cid_to_column[cid]); } RETURN_IF_ERROR(_generate_short_key_index(short_key_columns, data.num_rows, short_key_pos)); @@ -1572,7 +1610,7 @@ inline bool VerticalSegmentWriter::_is_mow() { } inline bool VerticalSegmentWriter::_is_mow_with_cluster_key() { - return _is_mow() && !_tablet_schema->cluster_key_idxes().empty(); + return _is_mow() && !_tablet_schema->cluster_key_uids().empty(); } } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h index 951e9c2e2838c3d..8cec6ed4d1abd66 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h @@ -175,6 +175,7 @@ class VerticalSegmentWriter { const std::function& found_cb, const std::function& not_found_cb, PartialUpdateStats& stats); + Status _partial_update_preconditions_check(size_t row_pos, bool is_flexible_update); Status _append_block_with_partial_content(RowsInBlock& data, vectorized::Block& full_block); Status _append_block_with_flexible_partial_content(RowsInBlock& data, vectorized::Block& full_block); diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index cd1b10d733a88c5..cdb637b1c426470 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -198,9 +198,9 @@ class MultiBlockMerger { pushed_row_refs.push_back(row_refs[i]); } } - if (!_tablet->tablet_schema()->cluster_key_idxes().empty()) { + if (!_tablet->tablet_schema()->cluster_key_uids().empty()) { std::vector ids; - for (const auto& cid : _tablet->tablet_schema()->cluster_key_idxes()) { + for (const auto& cid : _tablet->tablet_schema()->cluster_key_uids()) { auto index = _tablet->tablet_schema()->field_index(cid); if (index == -1) { return Status::InternalError( diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index 59f0e1893d84d24..33fee7ca3509005 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -1170,14 +1170,14 @@ bool TabletManager::_move_tablet_to_trash(const TabletSharedPtr& tablet) { if (tablet_in_not_shutdown->tablet_path() != tablet->tablet_path()) { LOG(INFO) << "tablet path not eq shutdown tablet path, move it to trash, tablet_id=" << tablet_in_not_shutdown->tablet_id() - << " mem manager tablet path=" << tablet_in_not_shutdown->tablet_path() - << " shutdown tablet path=" << tablet->tablet_path(); + << ", mem manager tablet path=" << tablet_in_not_shutdown->tablet_path() + << ", shutdown tablet path=" << tablet->tablet_path(); return tablet->data_dir()->move_to_trash(tablet->tablet_path()); } else { LOG(INFO) << "tablet path eq shutdown tablet path, not move to trash, tablet_id=" << tablet_in_not_shutdown->tablet_id() - << " mem manager tablet path=" << tablet_in_not_shutdown->tablet_path() - << " shutdown tablet path=" << tablet->tablet_path(); + << ", mem manager tablet path=" << tablet_in_not_shutdown->tablet_path() + << ", shutdown tablet path=" << tablet->tablet_path(); return true; } } @@ -1282,7 +1282,7 @@ Status TabletManager::register_transition_tablet(int64_t tablet_id, std::string // not found shard.tablets_under_transition[tablet_id] = std::make_tuple(reason, thread_id, 1); LOG(INFO) << "add tablet_id= " << tablet_id << " to map, reason=" << reason - << " lock times=1 thread_id_in_map=" << thread_id; + << ", lock times=1, thread_id_in_map=" << thread_id; return Status::OK(); } else { // found @@ -1290,15 +1290,15 @@ Status TabletManager::register_transition_tablet(int64_t tablet_id, std::string if (thread_id != thread_id_in_map) { // other thread, failed LOG(INFO) << "tablet_id = " << tablet_id << " is doing " << r - << " thread_id_in_map=" << thread_id_in_map << " , add reason=" << reason - << " thread_id=" << thread_id; + << ", thread_id_in_map=" << thread_id_in_map << " , add reason=" << reason + << ", thread_id=" << thread_id; return Status::InternalError("{} failed try later, tablet_id={}", reason, tablet_id); } // add lock times ++lock_times; LOG(INFO) << "add tablet_id= " << tablet_id << " to map, reason=" << reason - << " lock times=" << lock_times << " thread_id_in_map=" << thread_id_in_map; + << ", lock times=" << lock_times << ", thread_id_in_map=" << thread_id_in_map; return Status::OK(); } } @@ -1322,10 +1322,10 @@ void TabletManager::unregister_transition_tablet(int64_t tablet_id, std::string --lock_times; if (lock_times != 0) { LOG(INFO) << "erase tablet_id= " << tablet_id << " from map, reason=" << reason - << " left=" << lock_times << " thread_id_in_map=" << thread_id_in_map; + << ", left=" << lock_times << ", thread_id_in_map=" << thread_id_in_map; } else { LOG(INFO) << "erase tablet_id= " << tablet_id << " from map, reason=" << reason - << " thread_id_in_map=" << thread_id_in_map; + << ", thread_id_in_map=" << thread_id_in_map; shard.tablets_under_transition.erase(tablet_id); } } diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index d1746836e231c2c..3247f34656fb5d8 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -57,6 +57,7 @@ using std::unordered_map; using std::vector; namespace doris { +#include "common/compile_check_begin.h" using namespace ErrorCode; TabletMetaSharedPtr TabletMeta::create( @@ -106,7 +107,7 @@ TabletMeta::TabletMeta() _delete_bitmap(new DeleteBitmap(_tablet_id)) {} TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id, - int64_t replica_id, int32_t schema_hash, uint64_t shard_id, + int64_t replica_id, int32_t schema_hash, int32_t shard_id, const TTabletSchema& tablet_schema, uint32_t next_unique_id, const std::unordered_map& col_ordinal_to_unique_id, TabletUid tablet_uid, TTabletType::type tabletType, @@ -219,8 +220,8 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id schema->set_sort_type(SortType::LEXICAL); } schema->set_sort_col_num(tablet_schema.sort_col_num); - for (const auto& i : tablet_schema.cluster_key_idxes) { - schema->add_cluster_key_idxes(i); + for (const auto& i : tablet_schema.cluster_key_uids) { + schema->add_cluster_key_uids(i); } tablet_meta_pb.set_in_restore_mode(false); @@ -574,7 +575,8 @@ void TabletMeta::serialize(string* meta_binary) { Status TabletMeta::deserialize(std::string_view meta_binary) { TabletMetaPB tablet_meta_pb; - bool parsed = tablet_meta_pb.ParseFromArray(meta_binary.data(), meta_binary.size()); + bool parsed = tablet_meta_pb.ParseFromArray(meta_binary.data(), + static_cast(meta_binary.size())); if (!parsed) { return Status::Error("parse tablet meta failed"); } @@ -667,7 +669,7 @@ void TabletMeta::init_from_pb(const TabletMetaPB& tablet_meta_pb) { int seg_maps_size = tablet_meta_pb.delete_bitmap().segment_delete_bitmaps_size(); CHECK(rst_ids_size == seg_ids_size && seg_ids_size == seg_maps_size && seg_maps_size == versions_size); - for (size_t i = 0; i < rst_ids_size; ++i) { + for (int i = 0; i < rst_ids_size; ++i) { RowsetId rst_id; rst_id.init(tablet_meta_pb.delete_bitmap().rowset_ids(i)); auto seg_id = tablet_meta_pb.delete_bitmap().segment_ids(i); @@ -1309,4 +1311,5 @@ std::string tablet_state_name(TabletState state) { } } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index a8c82a4abd29215..25f6bcd569be430 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -51,6 +51,7 @@ #include "util/uid_util.h" namespace json2pb { +#include "common/compile_check_begin.h" struct Pb2JsonOptions; } // namespace json2pb @@ -100,7 +101,7 @@ class TabletMeta : public MetadataAdder { TabletMeta(); TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id, int64_t replica_id, - int32_t schema_hash, uint64_t shard_id, const TTabletSchema& tablet_schema, + int32_t schema_hash, int32_t shard_id, const TTabletSchema& tablet_schema, uint32_t next_unique_id, const std::unordered_map& col_ordinal_to_unique_id, TabletUid tablet_uid, TTabletType::type tabletType, @@ -152,7 +153,7 @@ class TabletMeta : public MetadataAdder { int64_t replica_id() const; void set_replica_id(int64_t replica_id) { _replica_id = replica_id; } int32_t schema_hash() const; - int16_t shard_id() const; + int32_t shard_id() const; void set_shard_id(int32_t shard_id); int64_t creation_time() const; void set_creation_time(int64_t creation_time); @@ -610,7 +611,7 @@ inline int32_t TabletMeta::schema_hash() const { return _schema_hash; } -inline int16_t TabletMeta::shard_id() const { +inline int32_t TabletMeta::shard_id() const { return _shard_id; } @@ -774,4 +775,5 @@ std::string tablet_state_name(TabletState state); bool operator==(const TabletMeta& a, const TabletMeta& b); bool operator!=(const TabletMeta& a, const TabletMeta& b); +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/olap/tablet_meta_manager.cpp b/be/src/olap/tablet_meta_manager.cpp index 6f27dd4db4e672f..7c08d7856200f90 100644 --- a/be/src/olap/tablet_meta_manager.cpp +++ b/be/src/olap/tablet_meta_manager.cpp @@ -291,8 +291,7 @@ Status TabletMetaManager::remove_old_version_delete_bitmap(DataDir* store, TTabl return true; }; LOG(INFO) << "remove old version delete bitmap, tablet_id: " << tablet_id - << " version: " << version << " removed keys size: " << remove_keys.size(); - ; + << " version: " << version << ", removed keys size: " << remove_keys.size(); RETURN_IF_ERROR(meta->iterate(META_COLUMN_FAMILY_INDEX, begin_key, get_remove_keys_func)); return meta->remove(META_COLUMN_FAMILY_INDEX, remove_keys); } diff --git a/be/src/olap/tablet_reader.cpp b/be/src/olap/tablet_reader.cpp index a78a52e5fd3e318..a83e0bfdbf4c30d 100644 --- a/be/src/olap/tablet_reader.cpp +++ b/be/src/olap/tablet_reader.cpp @@ -464,9 +464,16 @@ Status TabletReader::_init_orderby_keys_param(const ReaderParams& read_params) { // UNIQUE_KEYS will compare all keys as before if (_tablet_schema->keys_type() == DUP_KEYS || (_tablet_schema->keys_type() == UNIQUE_KEYS && _tablet->enable_unique_key_merge_on_write())) { - if (!_tablet_schema->cluster_key_idxes().empty()) { + if (!_tablet_schema->cluster_key_uids().empty()) { + if (read_params.read_orderby_key_num_prefix_columns > + _tablet_schema->cluster_key_uids().size()) { + return Status::Error( + "read_orderby_key_num_prefix_columns={} > cluster_keys.size()={}", + read_params.read_orderby_key_num_prefix_columns, + _tablet_schema->cluster_key_uids().size()); + } for (uint32_t i = 0; i < read_params.read_orderby_key_num_prefix_columns; i++) { - auto cid = _tablet_schema->cluster_key_idxes()[i]; + auto cid = _tablet_schema->cluster_key_uids()[i]; auto index = _tablet_schema->field_index(cid); if (index < 0) { return Status::Error( diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 47179ce19b23e7a..3ec5d22166477f9 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -973,10 +973,10 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac _indexes.clear(); _field_name_to_index.clear(); _field_id_to_index.clear(); - _cluster_key_idxes.clear(); + _cluster_key_uids.clear(); clear_column_cache_handlers(); - for (const auto& i : schema.cluster_key_idxes()) { - _cluster_key_idxes.push_back(i); + for (const auto& i : schema.cluster_key_uids()) { + _cluster_key_uids.push_back(i); } for (auto& column_pb : schema.column()) { TabletColumnPtr column; @@ -1124,10 +1124,10 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version _sequence_col_idx = -1; _version_col_idx = -1; _skip_bitmap_col_idx = -1; - _cluster_key_idxes.clear(); + _cluster_key_uids.clear(); clear_column_cache_handlers(); - for (const auto& i : ori_tablet_schema._cluster_key_idxes) { - _cluster_key_idxes.push_back(i); + for (const auto& i : ori_tablet_schema._cluster_key_uids) { + _cluster_key_uids.push_back(i); } for (auto& column : index->columns) { if (column->is_key()) { @@ -1235,8 +1235,8 @@ void TabletSchema::reserve_extracted_columns() { } void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const { - for (const auto& i : _cluster_key_idxes) { - tablet_schema_pb->add_cluster_key_idxes(i); + for (const auto& i : _cluster_key_uids) { + tablet_schema_pb->add_cluster_key_uids(i); } tablet_schema_pb->set_keys_type(_keys_type); for (const auto& col : _cols) { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 9a0cd53f7b1786c..c813d6f0ef8722b 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -349,7 +349,7 @@ class TabletSchema : public MetadataAdder { const std::vector& columns() const; size_t num_columns() const { return _num_columns; } size_t num_key_columns() const { return _num_key_columns; } - const std::vector& cluster_key_idxes() const { return _cluster_key_idxes; } + const std::vector& cluster_key_uids() const { return _cluster_key_uids; } size_t num_null_columns() const { return _num_null_columns; } size_t num_short_key_columns() const { return _num_short_key_columns; } size_t num_rows_per_row_block() const { return _num_rows_per_row_block; } @@ -548,7 +548,7 @@ class TabletSchema : public MetadataAdder { size_t _num_columns = 0; size_t _num_variant_columns = 0; size_t _num_key_columns = 0; - std::vector _cluster_key_idxes; + std::vector _cluster_key_uids; size_t _num_null_columns = 0; size_t _num_short_key_columns = 0; size_t _num_rows_per_row_block = 0; diff --git a/be/src/pipeline/dependency.cpp b/be/src/pipeline/dependency.cpp index 983429f15e2fd53..5fef018423df25d 100644 --- a/be/src/pipeline/dependency.cpp +++ b/be/src/pipeline/dependency.cpp @@ -92,11 +92,10 @@ std::string Dependency::debug_string(int indentation_level) { std::string CountedFinishDependency::debug_string(int indentation_level) { fmt::memory_buffer debug_string_buffer; - fmt::format_to( - debug_string_buffer, - "{}{}: id={}, block_task={}, ready={}, _always_ready={}, count={}, _stack_set_ready={}", - std::string(indentation_level * 2, ' '), _name, _node_id, _blocked_task.size(), _ready, - _always_ready, _counter, _stack_set_ready); + fmt::format_to(debug_string_buffer, + "{}{}: id={}, block_task={}, ready={}, _always_ready={}, count={}", + std::string(indentation_level * 2, ' '), _name, _node_id, _blocked_task.size(), + _ready, _always_ready, _counter); return fmt::to_string(debug_string_buffer); } diff --git a/be/src/pipeline/dependency.h b/be/src/pipeline/dependency.h index 71ef2d8b8fd85f6..ad018c8b4f8f3dd 100644 --- a/be/src/pipeline/dependency.h +++ b/be/src/pipeline/dependency.h @@ -173,26 +173,11 @@ struct FakeSharedState final : public BasicSharedState { ENABLE_FACTORY_CREATOR(FakeSharedState) }; -class DependencyWithStack : public Dependency { -public: - using SharedState = FakeSharedState; - DependencyWithStack(int id, int node_id, std::string name, bool ready = false) - : Dependency(id, node_id, name, ready) {} - - void set_ready() override { - _stack_set_ready = get_stack_trace(); - Dependency::set_ready(); - } - -protected: - std::string _stack_set_ready; -}; - -class CountedFinishDependency final : public DependencyWithStack { +class CountedFinishDependency final : public Dependency { public: using SharedState = FakeSharedState; CountedFinishDependency(int id, int node_id, std::string name) - : DependencyWithStack(id, node_id, name, true) {} + : Dependency(id, node_id, name, true) {} void add() { std::unique_lock l(_mtx); diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp index ccd8757aba7f281..b2a79a941f79e76 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.cpp +++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp @@ -170,6 +170,9 @@ Status HashJoinBuildSinkLocalState::close(RuntimeState* state, Status exec_statu (p._shared_hash_table_context && !p._shared_hash_table_context->complete_build_stage)) { throw Exception(ErrorCode::INTERNAL_ERROR, "build_sink::close meet error state"); + } else { + RETURN_IF_ERROR( + _runtime_filter_slots->copy_from_shared_context(p._shared_hash_table_context)); } SCOPED_TIMER(_publish_runtime_filter_timer); @@ -553,9 +556,6 @@ Status HashJoinBuildSinkOperatorX::sink(RuntimeState* state, vectorized::Block* return _shared_hash_table_context->status; } - RETURN_IF_ERROR(local_state._runtime_filter_slots->copy_from_shared_context( - _shared_hash_table_context)); - local_state.profile()->add_info_string( "SharedHashTableFrom", print_id( diff --git a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp index ab0a43f4a635cf8..5273960a5c1c29b 100644 --- a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp +++ b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp @@ -202,7 +202,7 @@ size_t PartitionedAggSinkOperatorX::revocable_mem_size(RuntimeState* state) cons Status PartitionedAggSinkLocalState::setup_in_memory_agg_op(RuntimeState* state) { _runtime_state = RuntimeState::create_unique( - nullptr, state->fragment_instance_id(), state->query_id(), state->fragment_id(), + state->fragment_instance_id(), state->query_id(), state->fragment_id(), state->query_options(), TQueryGlobals {}, state->exec_env(), state->get_query_ctx()); _runtime_state->set_task_execution_context(state->get_task_execution_context().lock()); _runtime_state->set_be_number(state->be_number()); diff --git a/be/src/pipeline/exec/partitioned_aggregation_source_operator.cpp b/be/src/pipeline/exec/partitioned_aggregation_source_operator.cpp index 655a6e19725a9b3..cdc6ef881d436d9 100644 --- a/be/src/pipeline/exec/partitioned_aggregation_source_operator.cpp +++ b/be/src/pipeline/exec/partitioned_aggregation_source_operator.cpp @@ -166,7 +166,7 @@ Status PartitionedAggSourceOperatorX::get_block(RuntimeState* state, vectorized: Status PartitionedAggLocalState::setup_in_memory_agg_op(RuntimeState* state) { _runtime_state = RuntimeState::create_unique( - nullptr, state->fragment_instance_id(), state->query_id(), state->fragment_id(), + state->fragment_instance_id(), state->query_id(), state->fragment_id(), state->query_options(), TQueryGlobals {}, state->exec_env(), state->get_query_ctx()); _runtime_state->set_task_execution_context(state->get_task_execution_context().lock()); _runtime_state->set_be_number(state->be_number()); diff --git a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp index 0e56acc1c574b20..20b25d54ff9f168 100644 --- a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp +++ b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp @@ -606,7 +606,7 @@ Status PartitionedHashJoinProbeOperatorX::_setup_internal_operators( } local_state._runtime_state = RuntimeState::create_unique( - nullptr, state->fragment_instance_id(), state->query_id(), state->fragment_id(), + state->fragment_instance_id(), state->query_id(), state->fragment_id(), state->query_options(), TQueryGlobals {}, state->exec_env(), state->get_query_ctx()); local_state._runtime_state->set_task_execution_context( diff --git a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp index d221eaeed0faba4..878c3870946f1c4 100644 --- a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp +++ b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp @@ -438,7 +438,7 @@ Status PartitionedHashJoinSinkOperatorX::_setup_internal_operator(RuntimeState* auto& local_state = get_local_state(state); local_state._shared_state->inner_runtime_state = RuntimeState::create_unique( - nullptr, state->fragment_instance_id(), state->query_id(), state->fragment_id(), + state->fragment_instance_id(), state->query_id(), state->fragment_id(), state->query_options(), TQueryGlobals {}, state->exec_env(), state->get_query_ctx()); local_state._shared_state->inner_runtime_state->set_task_execution_context( state->get_task_execution_context().lock()); diff --git a/be/src/pipeline/exec/spill_sort_sink_operator.cpp b/be/src/pipeline/exec/spill_sort_sink_operator.cpp index 6e6689d4134deb1..6071301c1d7bccd 100644 --- a/be/src/pipeline/exec/spill_sort_sink_operator.cpp +++ b/be/src/pipeline/exec/spill_sort_sink_operator.cpp @@ -80,7 +80,7 @@ Status SpillSortSinkLocalState::close(RuntimeState* state, Status execsink_statu Status SpillSortSinkLocalState::setup_in_memory_sort_op(RuntimeState* state) { _runtime_state = RuntimeState::create_unique( - nullptr, state->fragment_instance_id(), state->query_id(), state->fragment_id(), + state->fragment_instance_id(), state->query_id(), state->fragment_id(), state->query_options(), TQueryGlobals {}, state->exec_env(), state->get_query_ctx()); _runtime_state->set_task_execution_context(state->get_task_execution_context().lock()); _runtime_state->set_be_number(state->be_number()); diff --git a/be/src/pipeline/exec/spill_sort_source_operator.cpp b/be/src/pipeline/exec/spill_sort_source_operator.cpp index e766cb27168de15..69ed816fa9142dd 100644 --- a/be/src/pipeline/exec/spill_sort_source_operator.cpp +++ b/be/src/pipeline/exec/spill_sort_source_operator.cpp @@ -212,7 +212,7 @@ Status SpillSortLocalState::_create_intermediate_merger( } Status SpillSortLocalState::setup_in_memory_sort_op(RuntimeState* state) { _runtime_state = RuntimeState::create_unique( - nullptr, state->fragment_instance_id(), state->query_id(), state->fragment_id(), + state->fragment_instance_id(), state->query_id(), state->fragment_id(), state->query_options(), TQueryGlobals {}, state->exec_env(), state->get_query_ctx()); _runtime_state->set_task_execution_context(state->get_task_execution_context().lock()); _runtime_state->set_be_number(state->be_number()); diff --git a/be/src/pipeline/pipeline_fragment_context.cpp b/be/src/pipeline/pipeline_fragment_context.cpp index e6f257f9da792a1..8ceb63eb99324c6 100644 --- a/be/src/pipeline/pipeline_fragment_context.cpp +++ b/be/src/pipeline/pipeline_fragment_context.cpp @@ -397,9 +397,9 @@ Status PipelineFragmentContext::_build_pipeline_tasks(const doris::TPipelineFrag << print_id(_task_runtime_states[pip_idx][i]->fragment_instance_id()) << " " << pipeline->debug_string(); _task_runtime_states[pip_idx][i] = RuntimeState::create_unique( - this, local_params.fragment_instance_id, request.query_id, - request.fragment_id, request.query_options, _query_ctx->query_globals, - _exec_env, _query_ctx.get()); + local_params.fragment_instance_id, request.query_id, request.fragment_id, + request.query_options, _query_ctx->query_globals, _exec_env, + _query_ctx.get()); auto& task_runtime_state = _task_runtime_states[pip_idx][i]; _runtime_filter_states[i]->set_state(task_runtime_state.get()); { diff --git a/be/src/runtime/buffer_control_block.cpp b/be/src/runtime/buffer_control_block.cpp index 1ed2836f8eb6165..8c1ae79955f317d 100644 --- a/be/src/runtime/buffer_control_block.cpp +++ b/be/src/runtime/buffer_control_block.cpp @@ -292,6 +292,9 @@ Status BufferControlBlock::get_arrow_batch(std::shared_ptr* r _arrow_data_arrival.wait_for(l, std::chrono::milliseconds(20)); } + if (!_status.ok()) { + return _status; + } if (_is_cancelled) { return Status::Cancelled(fmt::format("Cancelled ()", print_id(_fragment_id))); } @@ -311,9 +314,12 @@ Status BufferControlBlock::get_arrow_batch(std::shared_ptr* r // normal path end if (_is_close) { + if (!_status.ok()) { + return _status; + } std::stringstream ss; _profile.pretty_print(&ss); - VLOG_NOTICE << fmt::format( + LOG(INFO) << fmt::format( "BufferControlBlock finished, fragment_id={}, is_close={}, is_cancelled={}, " "packet_num={}, peak_memory_usage={}, profile={}", print_id(_fragment_id), _is_close, _is_cancelled, _packet_num, @@ -321,7 +327,7 @@ Status BufferControlBlock::get_arrow_batch(std::shared_ptr* r return Status::OK(); } return Status::InternalError( - fmt::format("Get Arrow Batch Abnormal Ending ()", print_id(_fragment_id))); + fmt::format("Get Arrow Batch Abnormal Ending (), ()", print_id(_fragment_id), _status)); } void BufferControlBlock::get_arrow_batch(GetArrowResultBatchCtx* ctx) { @@ -354,10 +360,14 @@ void BufferControlBlock::get_arrow_batch(GetArrowResultBatchCtx* ctx) { // normal path end if (_is_close) { + if (!_status.ok()) { + ctx->on_failure(_status); + return; + } ctx->on_close(_packet_num); std::stringstream ss; _profile.pretty_print(&ss); - VLOG_NOTICE << fmt::format( + LOG(INFO) << fmt::format( "BufferControlBlock finished, fragment_id={}, is_close={}, is_cancelled={}, " "packet_num={}, peak_memory_usage={}, profile={}", print_id(_fragment_id), _is_close, _is_cancelled, _packet_num, @@ -391,8 +401,8 @@ Status BufferControlBlock::find_arrow_schema(std::shared_ptr* arr if (_is_close) { return Status::RuntimeError(fmt::format("Closed ()", print_id(_fragment_id))); } - return Status::InternalError( - fmt::format("Get Arrow Schema Abnormal Ending ()", print_id(_fragment_id))); + return Status::InternalError(fmt::format("Get Arrow Schema Abnormal Ending (), ()", + print_id(_fragment_id), _status)); } Status BufferControlBlock::close(const TUniqueId& id, Status exec_status) { diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp index 579329c1082633e..f3376d06858ec01 100644 --- a/be/src/runtime/runtime_state.cpp +++ b/be/src/runtime/runtime_state.cpp @@ -123,37 +123,6 @@ RuntimeState::RuntimeState(const TUniqueId& instance_id, const TUniqueId& query_ DCHECK(_query_mem_tracker != nullptr && _query_mem_tracker->label() != "Orphan"); } -RuntimeState::RuntimeState(pipeline::PipelineFragmentContext*, const TUniqueId& instance_id, - const TUniqueId& query_id, int32_t fragment_id, - const TQueryOptions& query_options, const TQueryGlobals& query_globals, - ExecEnv* exec_env, QueryContext* ctx) - : _profile("Fragment " + print_id(instance_id)), - _load_channel_profile(""), - _obj_pool(new ObjectPool()), - _unreported_error_idx(0), - _query_id(query_id), - _fragment_id(fragment_id), - _per_fragment_instance_idx(0), - _num_rows_load_total(0), - _num_rows_load_filtered(0), - _num_rows_load_unselected(0), - _num_rows_filtered_in_strict_mode_partial_update(0), - _num_print_error_rows(0), - _num_bytes_load_total(0), - _num_finished_scan_range(0), - _error_row_number(0), - _query_ctx(ctx) { - [[maybe_unused]] auto status = init(instance_id, query_options, query_globals, exec_env); - _query_mem_tracker = ctx->query_mem_tracker; -#ifdef BE_TEST - if (_query_mem_tracker == nullptr) { - init_mem_trackers(); - } -#endif - DCHECK(_query_mem_tracker != nullptr && _query_mem_tracker->label() != "Orphan"); - DCHECK(status.ok()); -} - RuntimeState::RuntimeState(const TUniqueId& query_id, int32_t fragment_id, const TQueryOptions& query_options, const TQueryGlobals& query_globals, ExecEnv* exec_env, QueryContext* ctx) diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index ad63510e2af82cb..a49567109a3b31b 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -85,12 +85,7 @@ class RuntimeState { const TQueryOptions& query_options, const TQueryGlobals& query_globals, ExecEnv* exec_env, QueryContext* ctx); - // for only use in pipelineX - RuntimeState(pipeline::PipelineFragmentContext*, const TUniqueId& instance_id, - const TUniqueId& query_id, int32 fragment_id, const TQueryOptions& query_options, - const TQueryGlobals& query_globals, ExecEnv* exec_env, QueryContext* ctx); - - // Used by pipelineX. This runtime state is only used for setup. + // Used by pipeline. This runtime state is only used for setup. RuntimeState(const TUniqueId& query_id, int32 fragment_id, const TQueryOptions& query_options, const TQueryGlobals& query_globals, ExecEnv* exec_env, QueryContext* ctx); diff --git a/be/src/service/arrow_flight/arrow_flight_batch_reader.cpp b/be/src/service/arrow_flight/arrow_flight_batch_reader.cpp index e935aff996d55e6..c24fcb733844941 100644 --- a/be/src/service/arrow_flight/arrow_flight_batch_reader.cpp +++ b/be/src/service/arrow_flight/arrow_flight_batch_reader.cpp @@ -56,7 +56,7 @@ arrow::Status ArrowFlightBatchReaderBase::_return_invalid_status(const std::stri } ArrowFlightBatchReaderBase::~ArrowFlightBatchReaderBase() { - VLOG_NOTICE << fmt::format( + LOG(INFO) << fmt::format( "ArrowFlightBatchReader finished, packet_seq={}, result_addr={}:{}, finistId={}, " "convert_arrow_batch_timer={}, deserialize_block_timer={}, peak_memory_usage={}", _packet_seq, _statement->result_addr.hostname, _statement->result_addr.port, diff --git a/be/src/service/http_service.cpp b/be/src/service/http_service.cpp index 57600d1f56aae93..912f9f5ff403e7b 100644 --- a/be/src/service/http_service.cpp +++ b/be/src/service/http_service.cpp @@ -80,6 +80,7 @@ #include "util/doris_metrics.h" namespace doris { +#include "common/compile_check_begin.h" namespace { std::shared_ptr get_rate_limit_group(event_base* event_base) { auto rate_limit = config::download_binlog_rate_limit_kbs; @@ -473,4 +474,5 @@ int HttpService::get_real_port() const { return _ev_http_server->get_real_port(); } +#include "common/compile_check_end.h" } // namespace doris diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp index bed8569afaca325..fbf10b75ae02c06 100644 --- a/be/src/util/hash_util.hpp +++ b/be/src/util/hash_util.hpp @@ -297,7 +297,7 @@ class HashUtil { #endif } - static uint64_t hash64(const void* data, uint32_t bytes, uint64_t seed) { + static uint64_t hash64(const void* data, uint64_t bytes, uint64_t seed) { #ifdef _SSE4_2_ if (LIKELY(CpuInfo::is_supported(CpuInfo::SSE4_2))) { return crc_hash64(data, bytes, seed); diff --git a/be/src/util/runtime_profile.cpp b/be/src/util/runtime_profile.cpp index 8b9e97bfc9cb549..1df4d8b55c278e9 100644 --- a/be/src/util/runtime_profile.cpp +++ b/be/src/util/runtime_profile.cpp @@ -574,8 +574,6 @@ void RuntimeProfile::to_thrift(TRuntimeProfileTree* tree) { } void RuntimeProfile::to_thrift(std::vector* nodes) { - nodes->reserve(nodes->size() + _children.size()); - int index = nodes->size(); nodes->push_back(TRuntimeProfileNode()); TRuntimeProfileNode& node = (*nodes)[index]; @@ -602,10 +600,13 @@ void RuntimeProfile::to_thrift(std::vector* nodes) { ChildVector children; { + // _children may be modified during to_thrift(), + // so we have to lock and copy _children to avoid race condition std::lock_guard l(_children_lock); children = _children; } node.num_children = children.size(); + nodes->reserve(nodes->size() + children.size()); for (int i = 0; i < children.size(); ++i) { int child_idx = nodes->size(); diff --git a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h index dfa9e7e37e53976..1bb6a7a170486cb 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h +++ b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h @@ -99,7 +99,7 @@ struct AggregateFunctionGroupArrayIntersectData { value = std::make_unique(); } - void process_col_data(auto& column_data, size_t offset, size_t arr_size, bool& init, Set& set) { + void process_col_data(auto& column_data, size_t offset, size_t arr_size, Set& set) { const bool is_column_data_nullable = column_data.is_nullable(); const ColumnNullable* col_null = nullptr; @@ -175,7 +175,6 @@ class AggregateFunctionGroupArrayIntersect void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { auto& data = this->data(place); - auto& init = data.init; auto& set = data.value; const bool col_is_nullable = (*columns[0]).is_nullable(); @@ -192,7 +191,7 @@ class AggregateFunctionGroupArrayIntersect const auto arr_size = offsets[row_num] - offset; const auto& column_data = column.get_data(); - data.process_col_data(column_data, offset, arr_size, init, set); + data.process_col_data(column_data, offset, arr_size, set); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, diff --git a/be/src/vec/aggregate_functions/aggregate_function_retention.h b/be/src/vec/aggregate_functions/aggregate_function_retention.h index 3318ac96ef36cfd..95b2c8f9bb2e7ab 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_retention.h +++ b/be/src/vec/aggregate_functions/aggregate_function_retention.h @@ -94,18 +94,18 @@ struct RetentionState { } } - void insert_result_into(IColumn& to, size_t events_size, const uint8_t* events) const { + void insert_result_into(IColumn& to, size_t events_size, const uint8_t* arg_events) const { auto& data_to = assert_cast(to).get_data(); ColumnArray::Offset64 current_offset = data_to.size(); data_to.resize(current_offset + events_size); - bool first_flag = events[0]; + bool first_flag = arg_events[0]; data_to[current_offset] = first_flag; ++current_offset; for (size_t i = 1; i < events_size; ++i) { - data_to[current_offset] = (first_flag && events[i]); + data_to[current_offset] = (first_flag && arg_events[i]); ++current_offset; } } diff --git a/be/src/vec/aggregate_functions/aggregate_function_sequence_match.h b/be/src/vec/aggregate_functions/aggregate_function_sequence_match.h index 827e506a3b0ed98..8fd0a1f5525cd16 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_sequence_match.h +++ b/be/src/vec/aggregate_functions/aggregate_function_sequence_match.h @@ -89,10 +89,10 @@ struct AggregateFunctionSequenceMatchData final { size_t get_arg_count() const { return arg_count; } - void init(const std::string pattern, size_t arg_count) { + void init(const std::string pattern_, size_t arg_count_) { if (!init_flag) { - this->pattern = pattern; - this->arg_count = arg_count; + this->pattern = pattern_; + this->arg_count = arg_count_; parse_pattern(); init_flag = true; } @@ -629,8 +629,7 @@ class AggregateFunctionSequenceBase void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena*) const override { const std::string pattern = this->data(rhs).get_pattern(); - size_t arg_count = this->data(rhs).get_arg_count(); - this->data(place).init(pattern, arg_count); + this->data(place).init(pattern, this->data(rhs).get_arg_count()); this->data(place).merge(this->data(rhs)); } @@ -642,8 +641,7 @@ class AggregateFunctionSequenceBase Arena*) const override { this->data(place).read(buf); const std::string pattern = this->data(place).get_pattern(); - size_t arg_count = this->data(place).get_arg_count(); - this->data(place).init(pattern, arg_count); + this->data(place).init(pattern, this->data(place).get_arg_count()); } private: diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 3e8d3722305e8d2..d67a70d2f630f03 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -645,8 +645,8 @@ void ColumnObject::resize(size_t n) { num_rows = n; } -bool ColumnObject::Subcolumn::check_if_sparse_column(size_t num_rows) { - if (num_rows < config::variant_threshold_rows_to_estimate_sparse_column) { +bool ColumnObject::Subcolumn::check_if_sparse_column(size_t arg_num_rows) { + if (arg_num_rows < config::variant_threshold_rows_to_estimate_sparse_column) { return false; } std::vector defaults_ratio; @@ -1603,7 +1603,7 @@ Status ColumnObject::merge_sparse_to_root_column() { return Status::OK(); } -void ColumnObject::unnest(Subcolumns::NodePtr& entry, Subcolumns& subcolumns) const { +void ColumnObject::unnest(Subcolumns::NodePtr& entry, Subcolumns& arg_subcolumns) const { entry->data.finalize(); auto nested_column = entry->data.get_finalized_column_ptr()->assume_mutable(); auto* nested_column_nullable = assert_cast(nested_column.get()); @@ -1634,7 +1634,7 @@ void ColumnObject::unnest(Subcolumns::NodePtr& entry, Subcolumns& subcolumns) co auto type = make_nullable( std::make_shared(nested_entry->data.least_common_type.get())); Subcolumn subcolumn(nullable_subnested_column->assume_mutable(), type, is_nullable); - subcolumns.add(path_builder.build(), subcolumn); + arg_subcolumns.add(path_builder.build(), subcolumn); } } diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp index adc041f511198e6..e597cdba224376e 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp @@ -212,9 +212,9 @@ void convert_jsonb_to_rapidjson(const JsonbValue& val, rapidjson::Value& target, } target.Reserve(array.numElem(), allocator); for (auto it = array.begin(); it != array.end(); ++it) { - rapidjson::Value val; - convert_jsonb_to_rapidjson(*static_cast(it), val, allocator); - target.PushBack(val, allocator); + rapidjson::Value array_val; + convert_jsonb_to_rapidjson(*static_cast(it), array_val, allocator); + target.PushBack(array_val, allocator); } break; } @@ -222,9 +222,9 @@ void convert_jsonb_to_rapidjson(const JsonbValue& val, rapidjson::Value& target, target.SetObject(); const ObjectVal& obj = static_cast(val); for (auto it = obj.begin(); it != obj.end(); ++it) { - rapidjson::Value val; - convert_jsonb_to_rapidjson(*it->value(), val, allocator); - target.AddMember(rapidjson::GenericStringRef(it->getKeyStr(), it->klen()), val, + rapidjson::Value obj_val; + convert_jsonb_to_rapidjson(*it->value(), obj_val, allocator); + target.AddMember(rapidjson::GenericStringRef(it->getKeyStr(), it->klen()), obj_val, allocator); } break; diff --git a/be/src/vec/exec/format/table/max_compute_jni_reader.cpp b/be/src/vec/exec/format/table/max_compute_jni_reader.cpp index d1a71fd1a2f9d90..665e19b6bcebd9e 100644 --- a/be/src/vec/exec/format/table/max_compute_jni_reader.cpp +++ b/be/src/vec/exec/format/table/max_compute_jni_reader.cpp @@ -77,7 +77,11 @@ MaxComputeJniReader::MaxComputeJniReader(const MaxComputeTableDescriptor* mc_des {"start_offset", std::to_string(_range.start_offset)}, {"split_size", std::to_string(_range.size)}, {"required_fields", required_fields.str()}, - {"columns_types", columns_types.str()}}; + {"columns_types", columns_types.str()}, + + {"connect_timeout", std::to_string(_max_compute_params.connect_timeout)}, + {"read_timeout", std::to_string(_max_compute_params.read_timeout)}, + {"retry_count", std::to_string(_max_compute_params.retry_times)}}; _jni_connector = std::make_unique( "org/apache/doris/maxcompute/MaxComputeJniScanner", params, column_names); } diff --git a/be/src/vec/exec/jni_connector.cpp b/be/src/vec/exec/jni_connector.cpp index a7b0d5144ee623f..4c977b69ad6a426 100644 --- a/be/src/vec/exec/jni_connector.cpp +++ b/be/src/vec/exec/jni_connector.cpp @@ -599,68 +599,79 @@ std::string JniConnector::get_jni_type(const TypeDescriptor& desc) { } } -Status JniConnector::_fill_column_meta(ColumnPtr& doris_column, DataTypePtr& data_type, +Status JniConnector::_fill_column_meta(const ColumnPtr& doris_column, const DataTypePtr& data_type, std::vector& meta_data) { TypeIndex logical_type = remove_nullable(data_type)->get_type_id(); + const IColumn* column = nullptr; + // insert const flag + if (is_column_const(*doris_column)) { + meta_data.emplace_back((long)1); + const auto& const_column = assert_cast(*doris_column); + column = &(const_column.get_data_column()); + } else { + meta_data.emplace_back((long)0); + column = &(*doris_column); + } + // insert null map address - MutableColumnPtr data_column; - if (doris_column->is_nullable()) { - auto* nullable_column = - reinterpret_cast(doris_column->assume_mutable().get()); - data_column = nullable_column->get_nested_column_ptr(); - NullMap& null_map = nullable_column->get_null_map_data(); + const IColumn* data_column = nullptr; + if (column->is_nullable()) { + const auto& nullable_column = assert_cast(*column); + data_column = &(nullable_column.get_nested_column()); + const auto& null_map = nullable_column.get_null_map_data(); meta_data.emplace_back((long)null_map.data()); } else { meta_data.emplace_back(0); - data_column = doris_column->assume_mutable(); + data_column = column; } switch (logical_type) { -#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ - case TYPE_INDEX: { \ - meta_data.emplace_back(_get_fixed_length_column_address(data_column)); \ - break; \ +#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ + case TYPE_INDEX: { \ + meta_data.emplace_back(_get_fixed_length_column_address(*data_column)); \ + break; \ } FOR_FIXED_LENGTH_TYPES(DISPATCH) #undef DISPATCH case TypeIndex::String: [[fallthrough]]; case TypeIndex::FixedString: { - auto& string_column = static_cast(*data_column); + const auto& string_column = assert_cast(*data_column); // inert offsets meta_data.emplace_back((long)string_column.get_offsets().data()); meta_data.emplace_back((long)string_column.get_chars().data()); break; } case TypeIndex::Array: { - ColumnPtr& element_column = static_cast(*data_column).get_data_ptr(); - meta_data.emplace_back((long)static_cast(*data_column).get_offsets().data()); - DataTypePtr& element_type = const_cast( - (reinterpret_cast(remove_nullable(data_type).get())) + const auto& element_column = assert_cast(*data_column).get_data_ptr(); + meta_data.emplace_back( + (long)assert_cast(*data_column).get_offsets().data()); + const auto& element_type = assert_cast( + (assert_cast(remove_nullable(data_type).get())) ->get_nested_type()); RETURN_IF_ERROR(_fill_column_meta(element_column, element_type, meta_data)); break; } case TypeIndex::Struct: { - auto& doris_struct = static_cast(*data_column); - const DataTypeStruct* doris_struct_type = - reinterpret_cast(remove_nullable(data_type).get()); + const auto& doris_struct = assert_cast(*data_column); + const auto* doris_struct_type = + assert_cast(remove_nullable(data_type).get()); for (int i = 0; i < doris_struct.tuple_size(); ++i) { - ColumnPtr& struct_field = doris_struct.get_column_ptr(i); - DataTypePtr& field_type = const_cast(doris_struct_type->get_element(i)); + const auto& struct_field = doris_struct.get_column_ptr(i); + const auto& field_type = + assert_cast(doris_struct_type->get_element(i)); RETURN_IF_ERROR(_fill_column_meta(struct_field, field_type, meta_data)); } break; } case TypeIndex::Map: { - auto& map = static_cast(*data_column); - DataTypePtr& key_type = const_cast( - reinterpret_cast(remove_nullable(data_type).get()) - ->get_key_type()); - DataTypePtr& value_type = const_cast( - reinterpret_cast(remove_nullable(data_type).get()) + const auto& map = assert_cast(*data_column); + const auto& key_type = assert_cast( + assert_cast(remove_nullable(data_type).get())->get_key_type()); + const auto& value_type = assert_cast( + assert_cast(remove_nullable(data_type).get()) ->get_value_type()); - ColumnPtr& key_column = map.get_keys_ptr(); - ColumnPtr& value_column = map.get_values_ptr(); + const auto& key_column = map.get_keys_ptr(); + const auto& value_column = map.get_values_ptr(); meta_data.emplace_back((long)map.get_offsets().data()); RETURN_IF_ERROR(_fill_column_meta(key_column, key_type, meta_data)); RETURN_IF_ERROR(_fill_column_meta(value_column, value_type, meta_data)); @@ -686,11 +697,6 @@ Status JniConnector::to_java_table(Block* block, size_t num_rows, const ColumnNu // insert number of rows meta_data.emplace_back(num_rows); for (size_t i : arguments) { - if (is_column_const(*(block->get_by_position(i).column))) { - auto doris_column = block->get_by_position(i).column->convert_to_full_column_if_const(); - bool is_nullable = block->get_by_position(i).type->is_nullable(); - block->replace_by_position(i, is_nullable ? make_nullable(doris_column) : doris_column); - } auto& column_with_type_and_name = block->get_by_position(i); RETURN_IF_ERROR(_fill_column_meta(column_with_type_and_name.column, column_with_type_and_name.type, meta_data)); diff --git a/be/src/vec/exec/jni_connector.h b/be/src/vec/exec/jni_connector.h index 52a3fb2e7782ca6..e06654dcfe3ca8a 100644 --- a/be/src/vec/exec/jni_connector.h +++ b/be/src/vec/exec/jni_connector.h @@ -336,13 +336,13 @@ class JniConnector : public ProfileCollector { static Status _fill_struct_column(TableMetaAddress& address, MutableColumnPtr& doris_column, DataTypePtr& data_type, size_t num_rows); - static Status _fill_column_meta(ColumnPtr& doris_column, DataTypePtr& data_type, + static Status _fill_column_meta(const ColumnPtr& doris_column, const DataTypePtr& data_type, std::vector& meta_data); template static Status _fill_fixed_length_column(MutableColumnPtr& doris_column, CPP_TYPE* ptr, size_t num_rows) { - auto& column_data = static_cast(*doris_column).get_data(); + auto& column_data = assert_cast(*doris_column).get_data(); size_t origin_size = column_data.size(); column_data.resize(origin_size + num_rows); memcpy(column_data.data() + origin_size, ptr, sizeof(CPP_TYPE) * num_rows); @@ -350,8 +350,8 @@ class JniConnector : public ProfileCollector { } template - static long _get_fixed_length_column_address(MutableColumnPtr& doris_column) { - return (long)static_cast(*doris_column).get_data().data(); + static long _get_fixed_length_column_address(const IColumn& doris_column) { + return (long)assert_cast(doris_column).get_data().data(); } void _generate_predicates( diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index d5f2075dffb6d28..92a5dba7b7a4d44 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -685,8 +685,8 @@ ColumnPtr handle_bitmap_op_count_null_value(ColumnPtr& src, const Block& block, continue; } - if (auto* nullable = assert_cast(elem.column.get())) { - const ColumnPtr& null_map_column = nullable->get_null_map_column_ptr(); + if (const auto* nullable_column = assert_cast(elem.column.get())) { + const ColumnPtr& null_map_column = nullable_column->get_null_map_column_ptr(); const NullMap& src_null_map = assert_cast(*null_map_column).get_data(); diff --git a/be/src/vec/functions/function_conv.cpp b/be/src/vec/functions/function_conv.cpp index 085d982a1cbf1ec..38932530c5a2b4e 100644 --- a/be/src/vec/functions/function_conv.cpp +++ b/be/src/vec/functions/function_conv.cpp @@ -223,8 +223,8 @@ struct ConvStringImpl { if (!MathFunctions::handle_parse_result(dst_base, &decimal_num, parse_res)) { result_column->insert_data("0", 1); } else { - StringRef str = MathFunctions::decimal_to_base(context, decimal_num, dst_base); - result_column->insert_data(reinterpret_cast(str.data), str.size); + StringRef str_base = MathFunctions::decimal_to_base(context, decimal_num, dst_base); + result_column->insert_data(reinterpret_cast(str_base.data), str_base.size); } } }; diff --git a/be/src/vec/functions/function_ip.h b/be/src/vec/functions/function_ip.h index 928ce4369726b29..67edad5015aeaf3 100644 --- a/be/src/vec/functions/function_ip.h +++ b/be/src/vec/functions/function_ip.h @@ -458,10 +458,10 @@ ColumnPtr convert_to_ipv6(const StringColumnType& string_column, std::reverse(res_value, res_value + IPV6_BINARY_LENGTH); } if constexpr (std::is_same_v) { - auto* column_string = assert_cast(col_res.get()); + auto* column_string_res = assert_cast(col_res.get()); std::copy(res_value, res_value + IPV6_BINARY_LENGTH, - column_string->get_chars().begin() + i * IPV6_BINARY_LENGTH); - column_string->get_offsets().push_back((i + 1) * IPV6_BINARY_LENGTH); + column_string_res->get_chars().begin() + i * IPV6_BINARY_LENGTH); + column_string_res->get_offsets().push_back((i + 1) * IPV6_BINARY_LENGTH); } else { col_res->insert_data(reinterpret_cast(res_value), IPV6_BINARY_LENGTH); } @@ -471,8 +471,8 @@ ColumnPtr convert_to_ipv6(const StringColumnType& string_column, } std::fill_n(&vec_res[out_offset], offset_inc, 0); if constexpr (std::is_same_v) { - auto* column_string = assert_cast(col_res.get()); - column_string->get_offsets().push_back((i + 1) * IPV6_BINARY_LENGTH); + auto* column_string_res = assert_cast(col_res.get()); + column_string_res->get_offsets().push_back((i + 1) * IPV6_BINARY_LENGTH); } if constexpr (exception_mode == IPConvertExceptionMode::Null) { (*vec_null_map_to)[i] = true; diff --git a/be/src/vec/functions/function_jsonb.cpp b/be/src/vec/functions/function_jsonb.cpp index 0e78eb894b20c55..463508169aadc61 100644 --- a/be/src/vec/functions/function_jsonb.cpp +++ b/be/src/vec/functions/function_jsonb.cpp @@ -1638,17 +1638,16 @@ class FunctionJsonSearch : public IFunction { LikeState* state, JsonbPath* cur_path, std::unordered_set* matches) const { if (element.isString()) { - const std::string_view str = element.getString(); + const std::string_view element_str = element.getString(); unsigned char res; - RETURN_IF_ERROR(matched(str, state, &res)); + RETURN_IF_ERROR(matched(element_str, state, &res)); if (res) { std::string str; auto valid = cur_path->to_string(&str); if (!valid) { return false; } - auto res = matches->insert(str); - return res.second; + return matches->insert(str).second; } else { return false; } diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index 89737d03453ae34..c434d344daa40a7 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -888,16 +888,17 @@ struct StringSpace { ColumnString::Offsets& res_offsets) { res_offsets.resize(data.size()); size_t input_size = res_offsets.size(); - std::vector> buffer; + // sample to get approximate best reserve size + if (input_size > 4) { + res_data.reserve(((data[0] + data[input_size >> 1] + data[input_size >> 2] + + data[input_size - 1]) >> + 2) * + input_size); + } for (size_t i = 0; i < input_size; ++i) { - buffer.clear(); - if (data[i] > 0) { - buffer.resize(data[i]); - for (size_t j = 0; j < data[i]; ++j) { - buffer[j] = ' '; - } - StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, - res_data, res_offsets); + if (data[i] > 0) [[likely]] { + res_data.resize_fill(res_data.size() + data[i], ' '); + res_offsets[i] = res_data.size(); } else { StringOP::push_empty_string(i, res_data, res_offsets); } diff --git a/be/src/vec/olap/vertical_block_reader.cpp b/be/src/vec/olap/vertical_block_reader.cpp index 369bf04459a8f23..8df4e38e4558816 100644 --- a/be/src/vec/olap/vertical_block_reader.cpp +++ b/be/src/vec/olap/vertical_block_reader.cpp @@ -237,7 +237,7 @@ Status VerticalBlockReader::init(const ReaderParams& read_params, _next_block_func = &VerticalBlockReader::_direct_next_block; break; case KeysType::UNIQUE_KEYS: - if (tablet()->tablet_meta()->tablet_schema()->cluster_key_idxes().empty()) { + if (tablet()->tablet_meta()->tablet_schema()->cluster_key_uids().empty()) { _next_block_func = &VerticalBlockReader::_unique_key_next_block; if (_filter_delete) { _delete_filter_column = ColumnUInt8::create(); diff --git a/be/src/vec/sink/load_stream_stub.cpp b/be/src/vec/sink/load_stream_stub.cpp index 979daf6a85e6821..59719765ab39642 100644 --- a/be/src/vec/sink/load_stream_stub.cpp +++ b/be/src/vec/sink/load_stream_stub.cpp @@ -65,13 +65,11 @@ int LoadStreamReplyHandler::on_received_messages(brpc::StreamId id, butil::IOBuf if (response.failed_tablets_size() > 0) { ss << ", failed tablet ids:"; for (auto pb : response.failed_tablets()) { - Status st = Status::create(pb.status()); - ss << " " << pb.id() << ":" << st; + ss << " " << pb.id() << ":" << Status::create(pb.status()); } std::lock_guard lock(stub->_failed_tablets_mutex); for (auto pb : response.failed_tablets()) { - Status st = Status::create(pb.status()); - stub->_failed_tablets.emplace(pb.id(), st); + stub->_failed_tablets.emplace(pb.id(), Status::create(pb.status())); } } if (response.tablet_schemas_size() > 0) { diff --git a/be/test/olap/delta_writer_cluster_key_test.cpp b/be/test/olap/delta_writer_cluster_key_test.cpp index 9c3e64109c4440a..6c4b4d367a3d21a 100644 --- a/be/test/olap/delta_writer_cluster_key_test.cpp +++ b/be/test/olap/delta_writer_cluster_key_test.cpp @@ -116,8 +116,8 @@ static void create_tablet_request_with_sequence_col(int64_t tablet_id, int32_t s request->tablet_schema.__set_sequence_col_idx(4); request->__set_storage_format(TStorageFormat::V2); request->__set_enable_unique_key_merge_on_write(enable_mow); - request->tablet_schema.cluster_key_idxes.push_back(1); - request->tablet_schema.cluster_key_idxes.push_back(0); + request->tablet_schema.cluster_key_uids.push_back(1); + request->tablet_schema.cluster_key_uids.push_back(0); TColumn k1; k1.column_name = "k1"; diff --git a/be/test/vec/data_types/common_data_type_test.h b/be/test/vec/data_types/common_data_type_test.h index 0480137ac7f0e41..36abc3402e21561 100644 --- a/be/test/vec/data_types/common_data_type_test.h +++ b/be/test/vec/data_types/common_data_type_test.h @@ -45,7 +45,7 @@ // have_maximum_size_of_value, get_maximum_size_of_value_in_memory, get_size_of_value_in_memory // get_precision, get_scale // get_field -// is_null_literal, is_value_represented_by_number, is_value_represented_by_integer, is_value_represented_by_unsigned_integer, is_value_unambiguously_represented_in_contiguous_memory_region, is_value_unambiguously_represented_in_fixed_size_contiguous_memory_region +// is_null_literal, is_value_represented_by_number, is_value_unambiguously_represented_in_contiguous_memory_region, is_value_unambiguously_represented_in_fixed_size_contiguous_memory_region // 2. datatype creation with column: create_column, create_column_const (size_t size, const Field &field), create_column_const_with_default_value (size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int be_exec_version) // 3. serde related: get_serde (int nesting_level=1) // to_string (const IColumn &column, size_t row_num, BufferWritable &ostr), to_string (const IColumn &column, size_t row_num), to_string_batch (const IColumn &column, ColumnString &column_to), from_string (ReadBuffer &rb, IColumn *column) @@ -114,7 +114,6 @@ class CommonDataTypeTest : public ::testing::Test { size_t scale = -1; bool is_null_literal = true; bool is_value_represented_by_number = false; - bool is_value_represented_by_unsigned_integer = false; PColumnMeta* pColumnMeta = nullptr; DataTypeSerDeSPtr serde = nullptr; // bool is_value_unambiguously_represented_in_contiguous_memory_region = false; @@ -146,8 +145,6 @@ class CommonDataTypeTest : public ::testing::Test { ASSERT_EQ(data_type->is_null_literal(), meta_info.is_null_literal); ASSERT_EQ(data_type->is_value_represented_by_number(), meta_info.is_value_represented_by_number); - ASSERT_EQ(data_type->is_value_represented_by_unsigned_integer(), - meta_info.is_value_represented_by_unsigned_integer); // ASSERT_EQ(data_type->is_value_unambiguously_represented_in_contiguous_memory_region(), meta_info.is_value_unambiguously_represented_in_contiguous_memory_region); } diff --git a/be/test/vec/data_types/data_type_ip_test.cpp b/be/test/vec/data_types/data_type_ip_test.cpp index 91ae064e4473a34..72a340b9e145b82 100644 --- a/be/test/vec/data_types/data_type_ip_test.cpp +++ b/be/test/vec/data_types/data_type_ip_test.cpp @@ -46,7 +46,7 @@ // text_can_contain_only_valid_utf8 // have_maximum_size_of_value, get_maximum_size_of_value_in_memory, get_size_of_value_in_memory // get_precision, get_scale -// is_null_literal, is_value_represented_by_number, is_value_represented_by_integer, is_value_represented_by_unsigned_integer, is_value_unambiguously_represented_in_contiguous_memory_region +// is_null_literal, is_value_represented_by_number, is_value_unambiguously_represented_in_contiguous_memory_region // 2. datatype creation with column : create_column, create_column_const (size_t size, const Field &field), create_column_const_with_default_value (size_t size), get_uncompressed_serialized_bytes (const IColumn &column, int be_exec_version) // 3. serde related: get_serde (int nesting_level=1) // to_string (const IColumn &column, size_t row_num, BufferWritable &ostr), to_string (const IColumn &column, size_t row_num), to_string_batch (const IColumn &column, ColumnString &column_to), from_string (ReadBuffer &rb, IColumn *column) @@ -95,7 +95,6 @@ TEST_F(DataTypeIPTest, MetaInfoTest) { .scale = size_t(-1), .is_null_literal = false, .is_value_represented_by_number = true, - .is_value_represented_by_unsigned_integer = true, .pColumnMeta = col_meta.get() // .is_value_unambiguously_represented_in_contiguous_memory_region = true }; @@ -116,7 +115,6 @@ TEST_F(DataTypeIPTest, MetaInfoTest) { .scale = size_t(-1), .is_null_literal = false, .is_value_represented_by_number = true, - .is_value_represented_by_unsigned_integer = true, .pColumnMeta = col_meta6.get() // .is_value_unambiguously_represented_in_contiguous_memory_region = true }; diff --git a/cloud/src/meta-service/meta_service.h b/cloud/src/meta-service/meta_service.h index 7af96cbc14b8ee8..5374cbea741fb0e 100644 --- a/cloud/src/meta-service/meta_service.h +++ b/cloud/src/meta-service/meta_service.h @@ -729,6 +729,7 @@ class MetaServiceProxy final : public MetaService { int32_t retry_times = 0; uint64_t duration_ms = 0, retry_drift_ms = 0; while (true) { + resp->Clear(); // reset the response message in case it is reused for retry (impl_.get()->*method)(ctrl, req, resp, brpc::DoNothing()); MetaServiceCode code = resp->status().code(); if (code != MetaServiceCode::KV_TXN_STORE_GET_RETRYABLE && diff --git a/cloud/src/recycler/s3_accessor.cpp b/cloud/src/recycler/s3_accessor.cpp index 2c983a5fa0617b5..1aca88d2d1161d9 100644 --- a/cloud/src/recycler/s3_accessor.cpp +++ b/cloud/src/recycler/s3_accessor.cpp @@ -205,6 +205,7 @@ std::optional S3Conf::from_obj_store_info(const ObjectStoreInfoPB& obj_i s3_conf.region = obj_info.region(); s3_conf.bucket = obj_info.bucket(); s3_conf.prefix = obj_info.prefix(); + s3_conf.use_virtual_addressing = !obj_info.use_path_style(); return s3_conf; } @@ -289,7 +290,7 @@ int S3Accessor::init() { auto s3_client = std::make_shared( std::move(aws_cred), std::move(aws_config), Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, - true /* useVirtualAddressing */); + conf_.use_virtual_addressing /* useVirtualAddressing */); obj_client_ = std::make_shared(std::move(s3_client), conf_.endpoint); return 0; } diff --git a/cloud/src/recycler/s3_accessor.h b/cloud/src/recycler/s3_accessor.h index 6886ee5e7c56406..e9640b5693a1f59 100644 --- a/cloud/src/recycler/s3_accessor.h +++ b/cloud/src/recycler/s3_accessor.h @@ -69,6 +69,7 @@ struct S3Conf { std::string region; std::string bucket; std::string prefix; + bool use_virtual_addressing {true}; enum Provider : uint8_t { S3, diff --git a/cloud/src/recycler/s3_obj_client.cpp b/cloud/src/recycler/s3_obj_client.cpp index 53fa821c7e55036..c8dcdad18d71153 100644 --- a/cloud/src/recycler/s3_obj_client.cpp +++ b/cloud/src/recycler/s3_obj_client.cpp @@ -284,6 +284,7 @@ ObjectStorageResponse S3ObjClient::delete_object(ObjectStoragePathRef path) { SCOPED_BVAR_LATENCY(s3_bvar::s3_delete_object_latency); return s3_client_->DeleteObject(request); }); + TEST_SYNC_POINT_CALLBACK("S3ObjClient::delete_object", &outcome); if (!outcome.IsSuccess()) { LOG_WARNING("failed to delete object") .tag("endpoint", endpoint_) diff --git a/cloud/test/s3_accessor_test.cpp b/cloud/test/s3_accessor_test.cpp index 0dd51b749d86e2e..c19f5f6a1dfdfb2 100644 --- a/cloud/test/s3_accessor_test.cpp +++ b/cloud/test/s3_accessor_test.cpp @@ -17,8 +17,10 @@ #include "recycler/s3_accessor.h" +#include #include #include +#include #include #include @@ -320,4 +322,70 @@ TEST(S3AccessorTest, gcs) { test_s3_accessor(*accessor); } +TEST(S3AccessorTest, path_style_test) { + ObjectStoreInfoPB obj_info; + obj_info.set_prefix("doris-debug-instance-prefix"); + obj_info.set_provider(ObjectStoreInfoPB_Provider_S3); + obj_info.set_ak("dummy_ak"); + obj_info.set_sk("dummy_sk"); + obj_info.set_endpoint("dummy-bucket"); + obj_info.set_region("cn-north-1"); + obj_info.set_bucket("dummy-bucket"); + config::max_s3_client_retry = 0; + + auto* sp = SyncPoint::get_instance(); + sp->enable_processing(); + std::vector guards; + + std::string base_domain = "s3.cn-north-1.amazonaws.com.cn"; + std::string domain_ip = "54.222.51.71"; // first ip of base_domain + // to test custom_domain, add ${domain_ip} ${custom_domain} to /etc/hosts + // otherwise the related cases will fail + std::string custom_domain = "gavin.s3.aws.com"; + // clang-format off + // http code 403 means there is nothing wrong the given domain in objinfo + // domain, use_path_style, http_code + std::vector> inputs { + {base_domain , false , 403}, // works + {base_domain , true , 403}, // works + {"http://" + base_domain , false , 403}, // works + {"http://" + base_domain , true , 403}, // works + {"https://" + base_domain , false , 403}, // works + {"https://" + base_domain , true , 403}, // works + {"http://" + domain_ip , false , 301}, // works, ip with virtual addressing + {"http://" + domain_ip , true , 301}, // works, ip with path style + {custom_domain , false , -1} , // custom_domain could not resolve with virtual addressing + {custom_domain , true , 403}, // custom_domain working with path style + {"http://" + custom_domain , false , -1} , // custom_domain could not resolve with virtual addressing + {"https://" + custom_domain, true , -1}, // certificate issue, custom_domain does not attached with any certs + // {"https://54.222.51.71" , false , -1} , // certificate issue + // {"https://54.222.51.71" , true , -1} , // certificate issue + }; + + int case_idx = 0; + sp->set_call_back("S3ObjClient::delete_object", + [&case_idx, &inputs](auto&& args) { + auto* res = try_any_cast(args[0]); + EXPECT_EQ(std::get<2>(inputs[case_idx]), static_cast(res->GetError().GetResponseCode())) << "<<<<<<<<<<<<<<<<<<<<< " << case_idx; + case_idx++; + }, + &guards.emplace_back()); + // clang-format on + + for (auto& i : inputs) { + obj_info.set_endpoint(std::get<0>(i)); + obj_info.set_use_path_style(std::get<1>(i)); + auto s3_conf = S3Conf::from_obj_store_info(obj_info); + EXPECT_EQ(s3_conf->use_virtual_addressing, !obj_info.use_path_style()) << case_idx; + std::shared_ptr accessor; + int ret = S3Accessor::create(*s3_conf, &accessor); + EXPECT_EQ(ret, 0) << "<<<<<<<<<<<<<<<<<<<<< " << case_idx; + ret = accessor->init(); + EXPECT_EQ(ret, 0) << "<<<<<<<<<<<<<<<<<<<<< " << case_idx; + // this function call will trigger syncpoint callback to increment case_idx + accessor->delete_file("abc"); // try to delete a nonexisted file, ignore the result + // EXPECT_EQ(ret, exp) << "<<<<<<<<<<<<<<<<<<<<< " << case_idx << " domain " << std::get<0>(i); + } +} + } // namespace doris::cloud diff --git a/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiJniScanner.java b/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiJniScanner.java index a284c7adcdd1057..bc082e567324760 100644 --- a/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiJniScanner.java +++ b/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiJniScanner.java @@ -17,11 +17,10 @@ package org.apache.doris.hudi; - import org.apache.doris.common.jni.JniScanner; import org.apache.doris.common.jni.vec.ColumnType; import org.apache.doris.common.security.authentication.AuthenticationConfig; -import org.apache.doris.common.security.authentication.HadoopUGI; +import org.apache.doris.common.security.authentication.HadoopAuthenticator; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.avro.generic.GenericDatumReader; @@ -160,14 +159,15 @@ public void open() throws IOException { cleanResolverLock.readLock().lock(); try { lastUpdateTime.set(System.currentTimeMillis()); + AuthenticationConfig authenticationConfig = AuthenticationConfig.getKerberosConfig(split.hadoopConf()); + HadoopAuthenticator hadoopAuthenticator = HadoopAuthenticator + .getHadoopAuthenticator(authenticationConfig); if (split.incrementalRead()) { - recordIterator = HadoopUGI.ugiDoAs(AuthenticationConfig.getKerberosConfig( - split.hadoopConf()), - () -> new MORIncrementalSplitReader(split).buildScanIterator(new Filter[0])); + recordIterator = hadoopAuthenticator.doAs(() -> new MORIncrementalSplitReader(split) + .buildScanIterator(new Filter[0])); } else { - recordIterator = HadoopUGI.ugiDoAs(AuthenticationConfig.getKerberosConfig( - split.hadoopConf()), - () -> new MORSnapshotSplitReader(split).buildScanIterator(new Filter[0])); + recordIterator = hadoopAuthenticator.doAs(() -> new MORSnapshotSplitReader(split) + .buildScanIterator(new Filter[0])); } if (AVRO_RESOLVER_CACHE != null && AVRO_RESOLVER_CACHE.get() != null) { cachedResolvers.computeIfAbsent(Thread.currentThread().getId(), diff --git a/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/Utils.java b/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/Utils.java index 3e07c8917905a35..c0fbec633e897cf 100644 --- a/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/Utils.java +++ b/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/Utils.java @@ -18,7 +18,7 @@ package org.apache.doris.hudi; import org.apache.doris.common.security.authentication.AuthenticationConfig; -import org.apache.doris.common.security.authentication.HadoopUGI; +import org.apache.doris.common.security.authentication.HadoopAuthenticator; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; @@ -77,7 +77,13 @@ public static void killProcess(long pid) { public static HoodieTableMetaClient getMetaClient(Configuration conf, String basePath) { HadoopStorageConfiguration hadoopStorageConfiguration = new HadoopStorageConfiguration(conf); - return HadoopUGI.ugiDoAs(AuthenticationConfig.getKerberosConfig(conf), () -> HoodieTableMetaClient.builder() - .setConf(hadoopStorageConfiguration).setBasePath(basePath).build()); + AuthenticationConfig authenticationConfig = AuthenticationConfig.getKerberosConfig(conf); + HadoopAuthenticator hadoopAuthenticator = HadoopAuthenticator.getHadoopAuthenticator(authenticationConfig); + try { + return hadoopAuthenticator.doAs(() -> HoodieTableMetaClient.builder() + .setConf(hadoopStorageConfiguration).setBasePath(basePath).build()); + } catch (IOException e) { + throw new RuntimeException("Failed to get HoodieTableMetaClient", e); + } } } diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnType.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnType.java index 1a919d846310d00..37a58075978c186 100644 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnType.java +++ b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/ColumnType.java @@ -207,16 +207,16 @@ public int getScale() { public int metaSize() { switch (type) { case UNSUPPORTED: - // set nullMap address as 0. - return 1; + // const flag / set nullMap address as 0. + return 2; case ARRAY: case MAP: case STRUCT: - // array & map : [nullMap | offsets | ... ] - // struct : [nullMap | ... ] - int size = 2; + // array & map : [const | nullMap | offsets | ... ] + // struct : [const | nullMap | ... ] + int size = 3; if (type == Type.STRUCT) { - size = 1; + size = 2; } for (ColumnType c : childTypes) { size += c.metaSize(); @@ -226,11 +226,11 @@ public int metaSize() { case BINARY: case CHAR: case VARCHAR: - // [nullMap | offsets | data ] - return 3; + // [const | nullMap | offsets | data ] + return 4; default: - // [nullMap | data] - return 2; + // [const | nullMap | data] + return 3; } } diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java index 1542174cc1c5873..839027b03b61c51 100644 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java +++ b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java @@ -24,6 +24,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import org.apache.log4j.Logger; import java.math.BigDecimal; import java.math.BigInteger; @@ -41,10 +42,14 @@ * see WritableColumnVector */ public class VectorColumn { + public static final Logger LOG = Logger.getLogger(VectorColumn.class); // String is stored as array // The default string length to initialize the capacity. private static final int DEFAULT_STRING_LENGTH = 4; + //add a new flag for const column + private boolean isConst = false; + // NullMap column address private long nullMap; private boolean[] nulls = null; @@ -75,6 +80,7 @@ public class VectorColumn { // Create writable column private VectorColumn(ColumnType columnType, int capacity) { + this.isConst = false; this.columnType = columnType; this.capacity = 0; this.nullMap = 0; @@ -106,6 +112,7 @@ private VectorColumn(ColumnType columnType, int capacity) { // restore the child of string column & restore meta column private VectorColumn(long address, int capacity, ColumnType columnType) { + this.isConst = false; this.columnType = columnType; this.capacity = capacity; this.nullMap = 0; @@ -122,7 +129,7 @@ private VectorColumn(long address, int capacity, ColumnType columnType) { } } - // Create readable column + // Create readable column, and maybe pass by const column in Readable column private VectorColumn(ColumnType columnType, int numRows, long columnMetaAddress) { if (columnType.isUnsupported()) { throw new RuntimeException("Unsupported type for column: " + columnType.getName()); @@ -130,14 +137,27 @@ private VectorColumn(ColumnType columnType, int numRows, long columnMetaAddress) long address = columnMetaAddress; this.capacity = numRows; this.columnType = columnType; + Long constFlag = OffHeap.getLong(null, address); + address += 8; + if (constFlag != 0) { + this.isConst = true; + } + // record the real rows even if it's const column + // as only one rows in const column + int realRows = this.isConst ? 1 : numRows; this.nullMap = OffHeap.getLong(null, address); address += 8; this.numNulls = 0; if (this.nullMap != 0) { - nulls = OffHeap.getBoolean(null, nullMap, numRows); + nulls = OffHeap.getBoolean(null, nullMap, realRows); for (boolean isNull : nulls) { if (isNull) { - this.numNulls++; + if (this.isConst) { + // all of const is null value + this.numNulls += numRows; + } else { + this.numNulls++; + } } } } @@ -149,7 +169,7 @@ private VectorColumn(ColumnType columnType, int numRows, long columnMetaAddress) if (!columnType.isStruct()) { this.offsets = OffHeap.getLong(null, address); address += 8; - childRows = getArrayEndOffset(numRows - 1); + childRows = getArrayEndOffset(realRows - 1); } this.data = 0; List children = columnType.getChildTypes(); @@ -162,7 +182,7 @@ private VectorColumn(ColumnType columnType, int numRows, long columnMetaAddress) this.offsets = OffHeap.getLong(null, address); address += 8; this.data = 0; - int length = OffHeap.getInt(null, this.offsets + (numRows - 1) * 4L); + int length = OffHeap.getInt(null, this.offsets + (realRows - 1) * 4L); childColumns = new VectorColumn[1]; childColumns[0] = new VectorColumn(OffHeap.getLong(null, address), length, new ColumnType("#stringBytes", Type.BYTE)); @@ -221,6 +241,10 @@ public ColumnType getColumnType() { return columnType; } + public boolean isConst() { + return isConst; + } + /** * Release columns and meta information */ @@ -248,6 +272,7 @@ public void close() { capacity = 0; numNulls = 0; appendIndex = 0; + isConst = false; } private void throwReserveException(int requiredCapacity, Throwable cause) { @@ -1450,6 +1475,11 @@ public void appendObjectColumn(Object[] batch, boolean isNullable) { } public Object[] getObjectColumn(int start, int end) { + // for const column only one row in column + if (isConst()) { + start = 0; + end = 1; + } switch (columnType.getType()) { case BOOLEAN: return getBooleanColumn(start, end); @@ -1583,6 +1613,7 @@ public void appendValue(ColumnValue o) { // for test only. public void dump(StringBuilder sb, int i) { + i = isConst() ? 0 : i; if (isNullAt(i)) { sb.append("NULL"); return; diff --git a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java index a8849d1fa2044d1..c94c95b3e8be31b 100644 --- a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java +++ b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java @@ -21,6 +21,8 @@ import org.apache.doris.common.jni.utils.OffHeap; import org.apache.doris.common.jni.vec.ColumnType.Type; +import org.apache.log4j.Logger; + import java.util.Collections; import java.util.Map; @@ -28,6 +30,7 @@ * Store a batch of data as vector table. */ public class VectorTable { + public static final Logger LOG = Logger.getLogger(VectorTable.class); private final VectorColumn[] columns; private final ColumnType[] columnTypes; private final String[] fields; @@ -199,6 +202,10 @@ public int getNumColumns() { return columns.length; } + public boolean isConstColumn(int idx) { + return columns[idx].isConst(); + } + public long getMetaAddress() { if (!onlyReadable) { meta.reset(); @@ -229,6 +236,18 @@ public void close() { // for test only. public String dump(int rowLimit) { StringBuilder sb = new StringBuilder(); + for (int col = 0; col < columns.length; col++) { + ColumnType.Type typeValue = columns[col].getColumnPrimitiveType(); + sb.append(typeValue.name()); + sb.append("(rows: " + columns[col].numRows()); + sb.append(")(const: "); + sb.append(columns[col].isConst() ? "true) " : "false) "); + if (col != 0) { + sb.append(", "); + } + } + sb.append("\n"); + for (int i = 0; i < rowLimit && i < getNumRows(); i++) { for (int j = 0; j < columns.length; j++) { if (j != 0) { diff --git a/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java b/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java index 685e20de843869d..1b5bff1e7c11d1e 100644 --- a/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java +++ b/fe/be-java-extensions/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java @@ -116,7 +116,8 @@ public long evaluate(Map inputParams, Map output Object[] parameters = new Object[numColumns]; for (int i = 0; i < numRows; ++i) { for (int j = 0; j < numColumns; ++j) { - parameters[j] = inputs[j][i]; + int row = inputTable.isConstColumn(j) ? 0 : i; + parameters[j] = inputs[j][row]; } result[i] = methodAccess.invoke(udf, evaluateIndex, parameters); } diff --git a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java index 6cbed70adc7d466..d6325bdae4673aa 100644 --- a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java +++ b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java @@ -25,6 +25,7 @@ import com.aliyun.odps.account.AliyunAccount; import com.aliyun.odps.table.configuration.CompressionCodec; import com.aliyun.odps.table.configuration.ReaderOptions; +import com.aliyun.odps.table.configuration.RestOptions; import com.aliyun.odps.table.enviroment.Credentials; import com.aliyun.odps.table.enviroment.EnvironmentSettings; import com.aliyun.odps.table.read.SplitReader; @@ -67,6 +68,10 @@ public class MaxComputeJniScanner extends JniScanner { private static final String SCAN_SERIALIZER = "scan_serializer"; private static final String TIME_ZONE = "time_zone"; + private static final String CONNECT_TIMEOUT = "connect_timeout"; + private static final String READ_TIMEOUT = "read_timeout"; + private static final String RETRY_COUNT = "retry_count"; + private enum SplitType { BYTE_SIZE, ROW_OFFSET @@ -136,16 +141,40 @@ public MaxComputeJniScanner(int batchSize, Map params) { Credentials credentials = Credentials.newBuilder().withAccount(odps.getAccount()) .withAppAccount(odps.getAppAccount()).build(); + + int connectTimeout = 10; // 10s + if (!Strings.isNullOrEmpty(params.get(CONNECT_TIMEOUT))) { + connectTimeout = Integer.parseInt(params.get(CONNECT_TIMEOUT)); + } + + int readTimeout = 120; // 120s + if (!Strings.isNullOrEmpty(params.get(READ_TIMEOUT))) { + readTimeout = Integer.parseInt(params.get(READ_TIMEOUT)); + } + + int retryTimes = 4; // 4 times + if (!Strings.isNullOrEmpty(params.get(RETRY_COUNT))) { + retryTimes = Integer.parseInt(params.get(RETRY_COUNT)); + } + + RestOptions restOptions = RestOptions.newBuilder() + .withConnectTimeout(connectTimeout) + .withReadTimeout(readTimeout) + .withRetryTimes(retryTimes).build(); + settings = EnvironmentSettings.newBuilder() .withCredentials(credentials) .withServiceEndpoint(odps.getEndpoint()) .withQuotaName(quota) + .withRestOptions(restOptions) .build(); try { scan = (TableBatchReadSession) deserialize(scanSerializer); } catch (Exception e) { - LOG.info("deserialize TableBatchReadSession failed.", e); + String errorMsg = "Failed to deserialize table batch read session."; + LOG.warn(errorMsg, e); + throw new IllegalArgumentException(errorMsg, e); } } @@ -176,11 +205,11 @@ public void open() throws IOException { .withReuseBatch(true) .build()); - } catch (IOException e) { - LOG.info("createArrowReader failed.", e); } catch (Exception e) { + String errorMsg = "MaxComputeJniScanner Failed to open table batch read session."; + LOG.warn(errorMsg, e); close(); - throw new IOException(e.getMessage(), e); + throw new IOException(errorMsg, e); } } @@ -215,8 +244,9 @@ private int readVectors(int expectedRows) throws IOException { break; } } catch (Exception e) { - LOG.info("currentSplitReader hasNext fail", e); - break; + String errorMsg = "MaxComputeJniScanner readVectors hasNext fail"; + LOG.warn(errorMsg, e); + throw new IOException(e.getMessage(), e); } try { @@ -241,7 +271,10 @@ private int readVectors(int expectedRows) throws IOException { } curReadRows += batchRows; } catch (Exception e) { - throw new RuntimeException("Fail to read arrow data, reason: " + e.getMessage(), e); + String errorMsg = String.format("MaxComputeJniScanner Fail to read arrow data. " + + "curReadRows = {}, expectedRows = {}", curReadRows, expectedRows); + LOG.warn(errorMsg, e); + throw new RuntimeException(errorMsg, e); } } return curReadRows; diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 2ef391bb7ab3905..247c61ecd365ec3 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -425,6 +425,10 @@ public class Config extends ConfigBase { + "`/proc/sys/net/core/somaxconn` at the same time"}) public static int mysql_nio_backlog_num = 1024; + @ConfField(description = {"是否启用 mysql 连接中的 TCP keep alive,默认禁用", + "Whether to enable TCP Keep-Alive for MySQL connections, disabled by default"}) + public static boolean mysql_nio_enable_keep_alive = false; + @ConfField(description = {"thrift client 的连接超时时间,单位是毫秒。0 表示不设置超时时间。", "The connection timeout of thrift client, in milliseconds. 0 means no timeout."}) public static int thrift_client_timeout_ms = 0; diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java b/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java deleted file mode 100644 index 2f73440ecfa368c..000000000000000 --- a/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java +++ /dev/null @@ -1,103 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.common.security.authentication; - -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.io.IOException; -import java.security.PrivilegedExceptionAction; - -@Deprecated -public class HadoopUGI { - private static final Logger LOG = LogManager.getLogger(HadoopUGI.class); - - /** - * login and return hadoop ugi - * @param config auth config - * @return ugi - */ - private static UserGroupInformation loginWithUGI(AuthenticationConfig config) { - if (config == null || !config.isValid()) { - return null; - } - if (config instanceof KerberosAuthenticationConfig) { - try { - // TODO: remove after iceberg and hudi kerberos test case pass - try { - // login hadoop with keytab and try checking TGT - UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - LOG.debug("Current login user: {}", ugi.getUserName()); - String principal = ((KerberosAuthenticationConfig) config).getKerberosPrincipal(); - if (ugi.hasKerberosCredentials() && StringUtils.equals(ugi.getUserName(), principal)) { - // if the current user is logged by kerberos and is the same user - // just use checkTGTAndReloginFromKeytab because this method will only relogin - // when the TGT is expired or is close to expiry - ugi.checkTGTAndReloginFromKeytab(); - return ugi; - } - } catch (IOException e) { - LOG.warn("A SecurityException occurs with kerberos, do login immediately.", e); - } - return new HadoopKerberosAuthenticator((KerberosAuthenticationConfig) config).getUGI(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } else { - String hadoopUserName = ((SimpleAuthenticationConfig) config).getUsername(); - if (hadoopUserName == null) { - hadoopUserName = "hadoop"; - ((SimpleAuthenticationConfig) config).setUsername(hadoopUserName); - LOG.debug(AuthenticationConfig.HADOOP_USER_NAME + " is unset, use default user: hadoop"); - } - - UserGroupInformation ugi; - try { - ugi = UserGroupInformation.getLoginUser(); - if (ugi.getUserName().equals(hadoopUserName)) { - return ugi; - } - } catch (IOException e) { - LOG.warn("A SecurityException occurs with simple, do login immediately.", e); - } - - ugi = UserGroupInformation.createRemoteUser(hadoopUserName); - UserGroupInformation.setLoginUser(ugi); - LOG.debug("Login by proxy user, hadoop.username: {}", hadoopUserName); - return ugi; - } - } - - public static T ugiDoAs(AuthenticationConfig authConf, PrivilegedExceptionAction action) { - UserGroupInformation ugi = HadoopUGI.loginWithUGI(authConf); - try { - if (ugi != null) { - if (authConf instanceof KerberosAuthenticationConfig) { - ugi.checkTGTAndReloginFromKeytab(); - } - return ugi.doAs(action); - } else { - return action.run(); - } - } catch (Exception e) { - throw new RuntimeException(e.getMessage(), e); - } - } -} diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index fd40e2c14007177..f229a1b2f4a7b1e 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -216,6 +216,8 @@ supportedDropStatement | DROP FILE name=STRING_LITERAL ((FROM | IN) database=identifier)? properties=propertyClause #dropFile | DROP WORKLOAD POLICY (IF EXISTS)? name=identifierOrText #dropWorkloadPolicy + | DROP REPOSITORY name=identifier #dropRepository + ; supportedShowStatement @@ -684,7 +686,6 @@ unsupportedDropStatement functionIdentifier LEFT_PAREN functionArguments? RIGHT_PAREN #dropFunction | DROP TABLE (IF EXISTS)? name=multipartIdentifier FORCE? #dropTable | DROP VIEW (IF EXISTS)? name=multipartIdentifier #dropView - | DROP REPOSITORY name=identifier #dropRepository | DROP INDEX (IF EXISTS)? name=identifier ON tableName=multipartIdentifier #dropIndex | DROP RESOURCE (IF EXISTS)? name=identifierOrText #dropResource | DROP ROW POLICY (IF EXISTS)? policyName=identifier diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java index 92a74e46b922be1..f765dd1bb7fdcab 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java @@ -242,9 +242,9 @@ private void createShadowIndexReplicaForPartition(OlapTable tbl) throws Exceptio short shadowShortKeyColumnCount = indexShortKeyMap.get(shadowIdxId); List shadowSchema = indexSchemaMap.get(shadowIdxId); - List clusterKeyIndexes = null; + List clusterKeyUids = null; if (shadowIdxId == tbl.getBaseIndexId() || isShadowIndexOfBase(shadowIdxId, tbl)) { - clusterKeyIndexes = OlapTable.getClusterKeyIndexes(shadowSchema); + clusterKeyUids = OlapTable.getClusterKeyUids(shadowSchema); } int shadowSchemaHash = indexSchemaVersionAndHashMap.get(shadowIdxId).schemaHash; int shadowSchemaVersion = indexSchemaVersionAndHashMap.get(shadowIdxId).schemaVersion; @@ -277,7 +277,7 @@ private void createShadowIndexReplicaForPartition(OlapTable tbl) throws Exceptio tbl.getEnableMowLightDelete(), tbl.getInvertedIndexFileStorageFormat(), tbl.rowStorePageSize(), - tbl.variantEnableFlattenNested(), clusterKeyIndexes, + tbl.variantEnableFlattenNested(), clusterKeyUids, tbl.storagePageSize()); requestBuilder.addTabletMetas(builder); } // end for rollupTablets diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java index 58e3e05e214a9d2..a624dc3c7333748 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java @@ -287,9 +287,9 @@ protected void createShadowIndexReplica() throws AlterCancelException { short shadowShortKeyColumnCount = indexShortKeyMap.get(shadowIdxId); List shadowSchema = indexSchemaMap.get(shadowIdxId); - List clusterKeyIndexes = null; + List clusterKeyUids = null; if (shadowIdxId == tbl.getBaseIndexId() || isShadowIndexOfBase(shadowIdxId, tbl)) { - clusterKeyIndexes = OlapTable.getClusterKeyIndexes(shadowSchema); + clusterKeyUids = OlapTable.getClusterKeyUids(shadowSchema); } int shadowSchemaHash = indexSchemaVersionAndHashMap.get(shadowIdxId).schemaHash; long originIndexId = indexIdMap.get(shadowIdxId); @@ -340,10 +340,10 @@ protected void createShadowIndexReplica() throws AlterCancelException { } createReplicaTask.setInvertedIndexFileStorageFormat(tbl .getInvertedIndexFileStorageFormat()); - if (!CollectionUtils.isEmpty(clusterKeyIndexes)) { - createReplicaTask.setClusterKeyIndexes(clusterKeyIndexes); - LOG.info("table: {}, partition: {}, index: {}, tablet: {}, cluster key indexes: {}", - tableId, partitionId, shadowIdxId, shadowTabletId, clusterKeyIndexes); + if (!CollectionUtils.isEmpty(clusterKeyUids)) { + createReplicaTask.setClusterKeyUids(clusterKeyUids); + LOG.info("table: {}, partition: {}, index: {}, tablet: {}, cluster key uids: {}", + tableId, partitionId, shadowIdxId, shadowTabletId, clusterKeyUids); } batchTask.addTask(createReplicaTask); } // end for rollupReplicas diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java index 7f406d2a7e1365c..f51e63e4fbec0f6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java @@ -43,12 +43,17 @@ public class IndexDef { private boolean isBuildDeferred = false; private PartitionNames partitionNames; private List columnUniqueIds = Lists.newArrayList(); + private static final int MIN_NGRAM_SIZE = 1; + private static final int MAX_NGRAM_SIZE = 255; + private static final int MIN_BF_SIZE = 64; + private static final int MAX_BF_SIZE = 65535; public static final String NGRAM_SIZE_KEY = "gram_size"; public static final String NGRAM_BF_SIZE_KEY = "bf_size"; public static final String DEFAULT_NGRAM_SIZE = "2"; public static final String DEFAULT_NGRAM_BF_SIZE = "256"; + public IndexDef(String indexName, boolean ifNotExists, List columns, IndexType indexType, Map properties, String comment) { this.indexName = indexName; @@ -238,8 +243,8 @@ public void checkColumn(Column column, KeysType keysType, boolean enableUniqueKe throw new AnalysisException("index should only be used in columns of DUP_KEYS/UNIQUE_KEYS table" + " or key columns of AGG_KEYS table. invalid index: " + indexName); } else if (keysType == KeysType.UNIQUE_KEYS && !enableUniqueKeyMergeOnWrite - && indexType == IndexType.INVERTED && properties != null - && properties.containsKey(InvertedIndexUtil.INVERTED_INDEX_PARSER_KEY)) { + && indexType == IndexType.INVERTED && properties != null + && properties.containsKey(InvertedIndexUtil.INVERTED_INDEX_PARSER_KEY)) { throw new AnalysisException("INVERTED index with parser can NOT be used in value columns of" + " UNIQUE_KEYS table with merge_on_write disable. invalid index: " + indexName); } @@ -256,21 +261,29 @@ public void checkColumn(Column column, KeysType keysType, boolean enableUniqueKe if (properties.size() != 2) { throw new AnalysisException("ngram_bf index should have gram_size and bf_size properties"); } - try { - int ngramSize = Integer.parseInt(properties.get(NGRAM_SIZE_KEY)); - int bfSize = Integer.parseInt(properties.get(NGRAM_BF_SIZE_KEY)); - if (ngramSize > 256 || ngramSize < 1) { - throw new AnalysisException("gram_size should be integer and less than 256"); - } - if (bfSize > 65535 || bfSize < 64) { - throw new AnalysisException("bf_size should be integer and between 64 and 65535"); - } - } catch (NumberFormatException e) { - throw new AnalysisException("invalid ngram properties:" + e.getMessage(), e); - } + + parseAndValidateProperty(properties, NGRAM_SIZE_KEY, MIN_NGRAM_SIZE, MAX_NGRAM_SIZE); + parseAndValidateProperty(properties, NGRAM_BF_SIZE_KEY, MIN_BF_SIZE, MAX_BF_SIZE); } } else { throw new AnalysisException("Unsupported index type: " + indexType); } } + + private void parseAndValidateProperty(Map properties, String key, int minValue, int maxValue) + throws AnalysisException { + String valueStr = properties.get(key); + if (valueStr == null) { + throw new AnalysisException("Property '" + key + "' is missing."); + } + try { + int value = Integer.parseInt(valueStr); + if (value < minValue || value > maxValue) { + throw new AnalysisException("'" + key + "' should be an integer between " + + minValue + " and " + maxValue + "."); + } + } catch (NumberFormatException e) { + throw new AnalysisException("Invalid value for '" + key + "': " + valueStr, e); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java index d70544add98747d..6a12eee3a78cb38 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java @@ -281,9 +281,14 @@ public void alterRepository(AlterRepositoryStmt stmt) throws DdlException { // handle drop repository stmt public void dropRepository(DropRepositoryStmt stmt) throws DdlException { + dropRepository(stmt.getRepoName()); + } + + // handle drop repository stmt + public void dropRepository(String repoName) throws DdlException { tryLock(); try { - Repository repo = repoMgr.getRepo(stmt.getRepoName()); + Repository repo = repoMgr.getRepo(repoName); if (repo == null) { ErrorReport.reportDdlException(ErrorCode.ERR_COMMON_ERROR, "Repository does not exist"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index f1abb0c9e632bc1..6dfd02b3a426481 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -1312,9 +1312,9 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc MaterializedIndexMeta indexMeta = localTbl.getIndexMetaByIndexId(restoredIdx.getId()); List indexes = restoredIdx.getId() == localTbl.getBaseIndexId() ? localTbl.getCopiedIndexes() : null; - List clusterKeyIndexes = null; + List clusterKeyUids = null; if (indexMeta.getIndexId() == localTbl.getBaseIndexId() || localTbl.isShadowIndex(indexMeta.getIndexId())) { - clusterKeyIndexes = OlapTable.getClusterKeyIndexes(indexMeta.getSchema()); + clusterKeyUids = OlapTable.getClusterKeyUids(indexMeta.getSchema()); } for (Tablet restoreTablet : restoredIdx.getTablets()) { TabletRef baseTabletRef = tabletBases == null ? null : tabletBases.get(restoreTablet.getId()); @@ -1363,11 +1363,11 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc LOG.info("set base tablet {} for replica {} in restore job {}, tablet id={}", baseTabletRef.tabletId, restoreReplica.getId(), jobId, restoreTablet.getId()); } - if (!CollectionUtils.isEmpty(clusterKeyIndexes)) { - task.setClusterKeyIndexes(clusterKeyIndexes); - LOG.info("table: {}, partition: {}, index: {}, tablet: {}, cluster key indexes: {}", + if (!CollectionUtils.isEmpty(clusterKeyUids)) { + task.setClusterKeyUids(clusterKeyUids); + LOG.info("table: {}, partition: {}, index: {}, tablet: {}, cluster key uids: {}", localTbl.getId(), restorePart.getId(), restoredIdx.getId(), restoreTablet.getId(), - clusterKeyIndexes); + clusterKeyUids); } batchTask.addTask(task); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index e4adf1f03963e94..c1d62a4be0d4d55 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -3292,14 +3292,14 @@ private static List getVisibleVersionFromMeta(List dbIds, List } } - public static List getClusterKeyIndexes(List columns) { - Map clusterKeyIndexes = new TreeMap<>(); + public static List getClusterKeyUids(List columns) { + Map clusterKeyUids = new TreeMap<>(); for (Column column : columns) { if (column.isClusterKey()) { - clusterKeyIndexes.put(column.getClusterKeyId(), column.getUniqueId()); + clusterKeyUids.put(column.getClusterKeyId(), column.getUniqueId()); } } - return clusterKeyIndexes.isEmpty() ? null : new ArrayList<>(clusterKeyIndexes.values()); + return clusterKeyUids.isEmpty() ? null : new ArrayList<>(clusterKeyUids.values()); } public long getVisibleVersionTime() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java index e14b4efb0d590d7..fb0df9e488b3c20 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java @@ -156,10 +156,10 @@ protected Partition createPartitionWithIndices(long dbId, OlapTable tbl, long pa } else { indexes = Lists.newArrayList(); } - List clusterKeyIndexes = null; + List clusterKeyUids = null; if (indexId == tbl.getBaseIndexId()) { - // only base and shadow index need cluster key indexes - clusterKeyIndexes = OlapTable.getClusterKeyIndexes(columns); + // only base and shadow index need cluster key unique column ids + clusterKeyUids = OlapTable.getClusterKeyUids(columns); } Cloud.CreateTabletsRequest.Builder requestBuilder = Cloud.CreateTabletsRequest.newBuilder(); List rowStoreColumns = @@ -180,7 +180,7 @@ protected Partition createPartitionWithIndices(long dbId, OlapTable tbl, long pa tbl.getEnableMowLightDelete(), tbl.getInvertedIndexFileStorageFormat(), tbl.rowStorePageSize(), - tbl.variantEnableFlattenNested(), clusterKeyIndexes, + tbl.variantEnableFlattenNested(), clusterKeyUids, tbl.storagePageSize()); requestBuilder.addTabletMetas(builder); } @@ -231,7 +231,7 @@ public OlapFile.TabletMetaCloudPB.Builder createTabletMetaBuilder(long tableId, Long timeSeriesCompactionLevelThreshold, boolean disableAutoCompaction, List rowStoreColumnUniqueIds, boolean enableMowLightDelete, TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat, long pageSize, - boolean variantEnableFlattenNested, List clusterKeyIdxes, + boolean variantEnableFlattenNested, List clusterKeyUids, long storagePageSize) throws DdlException { OlapFile.TabletMetaCloudPB.Builder builder = OlapFile.TabletMetaCloudPB.newBuilder(); builder.setTableId(tableId); @@ -365,8 +365,8 @@ public OlapFile.TabletMetaCloudPB.Builder createTabletMetaBuilder(long tableId, schemaBuilder.setRowStorePageSize(pageSize); schemaBuilder.setStoragePageSize(storagePageSize); schemaBuilder.setEnableVariantFlattenNested(variantEnableFlattenNested); - if (!CollectionUtils.isEmpty(clusterKeyIdxes)) { - schemaBuilder.addAllClusterKeyIdxes(clusterKeyIdxes); + if (!CollectionUtils.isEmpty(clusterKeyUids)) { + schemaBuilder.addAllClusterKeyUids(clusterKeyUids); } OlapFile.TabletSchemaCloudPB schema = schemaBuilder.build(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java index 915b5d48f013b99..536ca5633bf30a0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java @@ -637,6 +637,9 @@ public static Set analyzeBloomFilterColumns(Map properti } String[] bfColumnArr = bfColumnsStr.split(COMMA_SEPARATOR); + if (bfColumnArr.length == 0) { + return bfColumns; + } Set bfColumnSet = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER); for (String bfColumn : bfColumnArr) { bfColumn = bfColumn.trim(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java index 9845c0070fc4308..29fdb2b09acfcd6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java @@ -126,7 +126,6 @@ public String getNodeExplainString(String prefix, TExplainLevel detailLevel) { output.append(prefix); if (isBatchMode()) { output.append("(approximate)"); - splitAssignment.stop(); } output.append("inputSplitNum=").append(selectedSplitNum).append(", totalFileSize=") .append(totalFileSize).append(", scanRanges=").append(scanRangeLocations.size()).append("\n"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java index 7a645d267b3fe26..85b7eb245f4ecf4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java @@ -2174,10 +2174,10 @@ protected Partition createPartitionWithIndices(long dbId, OlapTable tbl, long pa short shortKeyColumnCount = indexMeta.getShortKeyColumnCount(); TStorageType storageType = indexMeta.getStorageType(); List schema = indexMeta.getSchema(); - List clusterKeyIndexes = null; + List clusterKeyUids = null; if (indexId == tbl.getBaseIndexId()) { - // only base and shadow index need cluster key indexes - clusterKeyIndexes = OlapTable.getClusterKeyIndexes(schema); + // only base and shadow index need cluster key unique column ids + clusterKeyUids = OlapTable.getClusterKeyUids(schema); } KeysType keysType = indexMeta.getKeysType(); List indexes = indexId == tbl.getBaseIndexId() ? tbl.getCopiedIndexes() : null; @@ -2211,10 +2211,10 @@ protected Partition createPartitionWithIndices(long dbId, OlapTable tbl, long pa task.setStorageFormat(tbl.getStorageFormat()); task.setInvertedIndexFileStorageFormat(tbl.getInvertedIndexFileStorageFormat()); - if (!CollectionUtils.isEmpty(clusterKeyIndexes)) { - task.setClusterKeyIndexes(clusterKeyIndexes); - LOG.info("table: {}, partition: {}, index: {}, tablet: {}, cluster key indexes: {}", - tbl.getId(), partitionId, indexId, tabletId, clusterKeyIndexes); + if (!CollectionUtils.isEmpty(clusterKeyUids)) { + task.setClusterKeyUids(clusterKeyUids); + LOG.info("table: {}, partition: {}, index: {}, tablet: {}, cluster key uids: {}", + tbl.getId(), partitionId, indexId, tabletId, clusterKeyUids); } batchTask.addTask(task); // add to AgentTaskQueue for handling finish report. diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 2115f47d777b80c..7584b5b392feb51 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -17,6 +17,7 @@ package org.apache.doris.datasource.hive; +import org.apache.doris.analysis.TableSnapshot; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.ListPartitionItem; @@ -31,6 +32,7 @@ import org.apache.doris.common.DdlException; import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.SchemaCacheValue; +import org.apache.doris.datasource.TablePartitionValues; import org.apache.doris.datasource.hudi.HudiUtils; import org.apache.doris.datasource.iceberg.IcebergUtils; import org.apache.doris.datasource.mvcc.MvccSnapshot; @@ -41,6 +43,7 @@ import org.apache.doris.mtmv.MTMVSnapshotIf; import org.apache.doris.mtmv.MTMVTimestampSnapshot; import org.apache.doris.nereids.exceptions.NotSupportedException; +import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions; import org.apache.doris.qe.GlobalVariable; import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.BaseAnalysisTask; @@ -302,7 +305,28 @@ public List getPartitionColumns(Optional snapshot) { @Override public boolean supportInternalPartitionPruned() { - return getDlaType() == DLAType.HIVE; + return getDlaType() == DLAType.HIVE || getDlaType() == DLAType.HUDI; + } + + public SelectedPartitions initHudiSelectedPartitions(Optional tableSnapshot) { + if (getDlaType() != DLAType.HUDI) { + return SelectedPartitions.NOT_PRUNED; + } + + if (getPartitionColumns().isEmpty()) { + return SelectedPartitions.NOT_PRUNED; + } + TablePartitionValues tablePartitionValues = HudiUtils.getPartitionValues(tableSnapshot, this); + + Map idToPartitionItem = tablePartitionValues.getIdToPartitionItem(); + Map idToNameMap = tablePartitionValues.getPartitionIdToNameMap(); + + Map nameToPartitionItems = Maps.newHashMapWithExpectedSize(idToPartitionItem.size()); + for (Entry entry : idToPartitionItem.entrySet()) { + nameToPartitionItems.put(idToNameMap.get(entry.getKey()), entry.getValue()); + } + + return new SelectedPartitions(nameToPartitionItems.size(), nameToPartitionItems, false); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java index 97f86612a495daf..884cfbee45ba9f3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java @@ -40,7 +40,7 @@ import org.apache.doris.catalog.Type; import org.apache.doris.common.DdlException; import org.apache.doris.common.security.authentication.AuthenticationConfig; -import org.apache.doris.common.security.authentication.HadoopUGI; +import org.apache.doris.common.security.authentication.HadoopAuthenticator; import org.apache.doris.datasource.ExternalCatalog; import org.apache.doris.fs.remote.dfs.DFSFileSystem; import org.apache.doris.thrift.TExprOpcode; @@ -68,6 +68,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.io.IOException; import java.security.PrivilegedExceptionAction; import java.time.LocalDateTime; import java.time.ZoneId; @@ -823,19 +824,22 @@ public static T ugiDoAs(long catalogId, PrivilegedExceptionAction action) public static T ugiDoAs(Configuration conf, PrivilegedExceptionAction action) { // if hive config is not ready, then use hadoop kerberos to login - AuthenticationConfig krbConfig = AuthenticationConfig.getKerberosConfig(conf, - AuthenticationConfig.HADOOP_KERBEROS_PRINCIPAL, - AuthenticationConfig.HADOOP_KERBEROS_KEYTAB); - return HadoopUGI.ugiDoAs(krbConfig, action); + AuthenticationConfig authenticationConfig = AuthenticationConfig.getKerberosConfig(conf); + HadoopAuthenticator hadoopAuthenticator = HadoopAuthenticator.getHadoopAuthenticator(authenticationConfig); + try { + return hadoopAuthenticator.doAs(action); + } catch (IOException e) { + LOG.warn("HiveMetaStoreClientHelper ugiDoAs failed.", e); + throw new RuntimeException(e); + } } public static HoodieTableMetaClient getHudiClient(HMSExternalTable table) { String hudiBasePath = table.getRemoteTable().getSd().getLocation(); Configuration conf = getConfiguration(table); HadoopStorageConfiguration hadoopStorageConfiguration = new HadoopStorageConfiguration(conf); - return HadoopUGI.ugiDoAs(AuthenticationConfig.getKerberosConfig(conf), - () -> HoodieTableMetaClient.builder().setConf(hadoopStorageConfiguration).setBasePath(hudiBasePath) - .build()); + return ugiDoAs(conf, () -> HoodieTableMetaClient.builder().setConf(hadoopStorageConfiguration) + .setBasePath(hudiBasePath).build()); } public static Configuration getConfiguration(HMSExternalTable table) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java index c98d994a28a08f6..0f38abafaa4d984 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java @@ -17,24 +17,35 @@ package org.apache.doris.datasource.hudi; +import org.apache.doris.analysis.TableSnapshot; import org.apache.doris.catalog.ArrayType; +import org.apache.doris.catalog.Env; import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.StructField; import org.apache.doris.catalog.StructType; import org.apache.doris.catalog.Type; +import org.apache.doris.datasource.TablePartitionValues; +import org.apache.doris.datasource.hive.HMSExternalTable; +import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper; +import org.apache.doris.datasource.hudi.source.HudiCachedPartitionProcessor; import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; +import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; +import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator; +import org.apache.hudi.common.table.timeline.HoodieTimeline; +import org.apache.hudi.common.util.Option; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; public class HudiUtils { @@ -231,4 +242,43 @@ private static Type handleUnionType(Schema avroSchema) { } return Type.UNSUPPORTED; } + + public static TablePartitionValues getPartitionValues(Optional tableSnapshot, + HMSExternalTable hmsTable) { + TablePartitionValues partitionValues = new TablePartitionValues(); + if (hmsTable.getPartitionColumns().isEmpty()) { + //isn't partition table. + return partitionValues; + } + + HoodieTableMetaClient hudiClient = HiveMetaStoreClientHelper.getHudiClient(hmsTable); + HudiCachedPartitionProcessor processor = (HudiCachedPartitionProcessor) Env.getCurrentEnv() + .getExtMetaCacheMgr().getHudiPartitionProcess(hmsTable.getCatalog()); + boolean useHiveSyncPartition = hmsTable.useHiveSyncPartition(); + + if (tableSnapshot.isPresent()) { + if (tableSnapshot.get().getType() == TableSnapshot.VersionType.VERSION) { + // Hudi does not support `FOR VERSION AS OF`, please use `FOR TIME AS OF`"; + return partitionValues; + } + String queryInstant = tableSnapshot.get().getTime().replaceAll("[-: ]", ""); + + partitionValues = + HiveMetaStoreClientHelper.ugiDoAs( + HiveMetaStoreClientHelper.getConfiguration(hmsTable), + () -> processor.getSnapshotPartitionValues( + hmsTable, hudiClient, queryInstant, useHiveSyncPartition)); + } else { + HoodieTimeline timeline = hudiClient.getCommitsAndCompactionTimeline().filterCompletedInstants(); + Option snapshotInstant = timeline.lastInstant(); + if (!snapshotInstant.isPresent()) { + return partitionValues; + } + partitionValues = + HiveMetaStoreClientHelper.ugiDoAs( + HiveMetaStoreClientHelper.getConfiguration(hmsTable), + () -> processor.getPartitionValues(hmsTable, hudiClient, useHiveSyncPartition)); + } + return partitionValues; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java index a73a2065d0ffaf3..b2cad8ab710178f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java @@ -21,6 +21,7 @@ import org.apache.doris.analysis.TableSnapshot; import org.apache.doris.analysis.TupleDescriptor; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.ListPartitionItem; import org.apache.doris.catalog.PartitionItem; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; @@ -30,12 +31,10 @@ import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.FileSplit; import org.apache.doris.datasource.TableFormatType; -import org.apache.doris.datasource.TablePartitionValues; import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper; import org.apache.doris.datasource.hive.HivePartition; import org.apache.doris.datasource.hive.source.HiveScanNode; import org.apache.doris.datasource.hudi.HudiUtils; -import org.apache.doris.planner.ListPartitionPrunerV2; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.SessionVariable; @@ -70,7 +69,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Locale; @@ -286,50 +284,29 @@ private boolean canUseNativeReader() { return !sessionVariable.isForceJniScanner() && isCowTable; } - private List getPrunedPartitions( - HoodieTableMetaClient metaClient, Option snapshotTimestamp) throws AnalysisException { + private List getPrunedPartitions(HoodieTableMetaClient metaClient) { List partitionColumnTypes = hmsTable.getPartitionColumnTypes(); if (!partitionColumnTypes.isEmpty()) { - HudiCachedPartitionProcessor processor = (HudiCachedPartitionProcessor) Env.getCurrentEnv() - .getExtMetaCacheMgr().getHudiPartitionProcess(hmsTable.getCatalog()); - TablePartitionValues partitionValues; - if (snapshotTimestamp.isPresent()) { - partitionValues = processor.getSnapshotPartitionValues( - hmsTable, metaClient, snapshotTimestamp.get(), useHiveSyncPartition); - } else { - partitionValues = processor.getPartitionValues(hmsTable, metaClient, useHiveSyncPartition); - } - if (partitionValues != null) { - // 2. prune partitions by expr - partitionValues.readLock().lock(); - try { - Map idToPartitionItem = partitionValues.getIdToPartitionItem(); - this.totalPartitionNum = idToPartitionItem.size(); - ListPartitionPrunerV2 pruner = new ListPartitionPrunerV2(idToPartitionItem, - hmsTable.getPartitionColumns(), columnNameToRange, - partitionValues.getUidToPartitionRange(), - partitionValues.getRangeToId(), - partitionValues.getSingleColumnRangeMap(), - true); - Collection filteredPartitionIds = pruner.prune(); - this.selectedPartitionNum = filteredPartitionIds.size(); - // 3. get partitions from cache - String dbName = hmsTable.getDbName(); - String tblName = hmsTable.getName(); - String inputFormat = hmsTable.getRemoteTable().getSd().getInputFormat(); - String basePath = metaClient.getBasePathV2().toString(); - Map partitionIdToNameMap = partitionValues.getPartitionIdToNameMap(); - Map> partitionValuesMap = partitionValues.getPartitionValuesMap(); - return filteredPartitionIds.stream().map(id -> { - String path = basePath + "/" + partitionIdToNameMap.get(id); - return new HivePartition( - dbName, tblName, false, inputFormat, path, partitionValuesMap.get(id), - Maps.newHashMap()); - }).collect(Collectors.toList()); - } finally { - partitionValues.readLock().unlock(); - } - } + this.totalPartitionNum = selectedPartitions.totalPartitionNum; + Map prunedPartitions = selectedPartitions.selectedPartitions; + this.selectedPartitionNum = prunedPartitions.size(); + + String dbName = hmsTable.getDbName(); + String tblName = hmsTable.getName(); + String inputFormat = hmsTable.getRemoteTable().getSd().getInputFormat(); + String basePath = metaClient.getBasePathV2().toString(); + + List hivePartitions = Lists.newArrayList(); + prunedPartitions.forEach( + (key, value) -> { + String path = basePath + "/" + key; + hivePartitions.add(new HivePartition( + dbName, tblName, false, inputFormat, path, + ((ListPartitionItem) value).getItems().get(0).getPartitionValuesAsStringList(), + Maps.newHashMap())); + } + ); + return hivePartitions; } // unpartitioned table, create a dummy partition to save location and // inputformat, @@ -420,7 +397,7 @@ public List getSplits() throws UserException { if (!partitionInit) { prunedPartitions = HiveMetaStoreClientHelper.ugiDoAs( HiveMetaStoreClientHelper.getConfiguration(hmsTable), - () -> getPrunedPartitions(hudiClient, snapshotTimestamp)); + () -> getPrunedPartitions(hudiClient)); partitionInit = true; } List splits = Collections.synchronizedList(new ArrayList<>()); @@ -482,7 +459,7 @@ public boolean isBatchMode() { // Non partition table will get one dummy partition prunedPartitions = HiveMetaStoreClientHelper.ugiDoAs( HiveMetaStoreClientHelper.getConfiguration(hmsTable), - () -> getPrunedPartitions(hudiClient, snapshotTimestamp)); + () -> getPrunedPartitions(hudiClient)); partitionInit = true; } int numPartitions = ConnectContext.get().getSessionVariable().getNumPartitionsInBatchMode(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalCatalog.java index e6cd77103dbc3bf..06c1e55dcf6f4b2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalCatalog.java @@ -33,6 +33,7 @@ import com.aliyun.odps.account.Account; import com.aliyun.odps.account.AliyunAccount; import com.aliyun.odps.security.SecurityManager; +import com.aliyun.odps.table.configuration.RestOptions; import com.aliyun.odps.table.configuration.SplitOptions; import com.aliyun.odps.table.enviroment.Credentials; import com.aliyun.odps.table.enviroment.EnvironmentSettings; @@ -71,6 +72,10 @@ public class MaxComputeExternalCatalog extends ExternalCatalog { private long splitRowCount; private long splitByteSize; + private int connectTimeout; + private int readTimeout; + private int retryTimes; + private static final Map REGION_ZONE_MAP; private static final List REQUIRED_PROPERTIES = ImmutableList.of( MCProperties.PROJECT, @@ -178,6 +183,17 @@ protected void initLocalObjectsImpl() { .build(); } + connectTimeout = Integer.parseInt( + props.getOrDefault(MCProperties.CONNECT_TIMEOUT, MCProperties.DEFAULT_CONNECT_TIMEOUT)); + readTimeout = Integer.parseInt( + props.getOrDefault(MCProperties.READ_TIMEOUT, MCProperties.DEFAULT_READ_TIMEOUT)); + retryTimes = Integer.parseInt( + props.getOrDefault(MCProperties.RETRY_COUNT, MCProperties.DEFAULT_RETRY_COUNT)); + + RestOptions restOptions = RestOptions.newBuilder() + .withConnectTimeout(connectTimeout) + .withReadTimeout(readTimeout) + .withRetryTimes(retryTimes).build(); CloudCredential credential = MCProperties.getCredential(props); accessKey = credential.getAccessKey(); @@ -196,6 +212,7 @@ protected void initLocalObjectsImpl() { .withCredentials(credentials) .withServiceEndpoint(odps.getEndpoint()) .withQuotaName(quota) + .withRestOptions(restOptions) .build(); } @@ -304,6 +321,21 @@ public String getDefaultProject() { return defaultProject; } + public int getRetryTimes() { + makeSureInitialized(); + return retryTimes; + } + + public int getConnectTimeout() { + makeSureInitialized(); + return connectTimeout; + } + + public int getReadTimeout() { + makeSureInitialized(); + return readTimeout; + } + public ZoneId getProjectDateTimeZone() { makeSureInitialized(); @@ -385,6 +417,31 @@ public void checkProperties() throws DdlException { + MCProperties.SPLIT_ROW_COUNT + "must be an integer"); } + + try { + connectTimeout = Integer.parseInt( + props.getOrDefault(MCProperties.CONNECT_TIMEOUT, MCProperties.DEFAULT_CONNECT_TIMEOUT)); + readTimeout = Integer.parseInt( + props.getOrDefault(MCProperties.READ_TIMEOUT, MCProperties.DEFAULT_READ_TIMEOUT)); + retryTimes = Integer.parseInt( + props.getOrDefault(MCProperties.RETRY_COUNT, MCProperties.DEFAULT_RETRY_COUNT)); + if (connectTimeout <= 0) { + throw new DdlException(MCProperties.CONNECT_TIMEOUT + " must be greater than 0"); + } + + if (readTimeout <= 0) { + throw new DdlException(MCProperties.READ_TIMEOUT + " must be greater than 0"); + } + + if (retryTimes <= 0) { + throw new DdlException(MCProperties.RETRY_COUNT + " must be greater than 0"); + } + + } catch (NumberFormatException e) { + throw new DdlException("property " + MCProperties.CONNECT_TIMEOUT + "/" + + MCProperties.READ_TIMEOUT + "/" + MCProperties.RETRY_COUNT + "must be an integer"); + } + CloudCredential credential = MCProperties.getCredential(props); if (!credential.isWhole()) { throw new DdlException("Max-Compute credential properties '" diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java index e177e9d8b7c88c2..4ad971a5c647890 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java @@ -89,6 +89,10 @@ public class MaxComputeScanNode extends FileQueryScanNode { private static final LocationPath ROW_OFFSET_PATH = new LocationPath("/row_offset", Maps.newHashMap()); private static final LocationPath BYTE_SIZE_PATH = new LocationPath("/byte_size", Maps.newHashMap()); + private int connectTimeout; + private int readTimeout; + private int retryTimes; + @Setter private SelectedPartitions selectedPartitions = null; @@ -127,6 +131,11 @@ private void setScanParams(TFileRangeDesc rangeDesc, MaxComputeSplit maxComputeS fileDesc.setPartitionSpec("deprecated"); fileDesc.setTableBatchReadSession(maxComputeSplit.scanSerialize); fileDesc.setSessionId(maxComputeSplit.getSessionId()); + + fileDesc.setReadTimeout(readTimeout); + fileDesc.setConnectTimeout(connectTimeout); + fileDesc.setRetryTimes(retryTimes); + tableFormatFileDesc.setMaxComputeParams(fileDesc); rangeDesc.setTableFormatParams(tableFormatFileDesc); rangeDesc.setPath("[ " + maxComputeSplit.getStart() + " , " + maxComputeSplit.getLength() + " ]"); @@ -477,6 +486,10 @@ public List getSplits() throws UserException { MaxComputeExternalCatalog mcCatalog = (MaxComputeExternalCatalog) table.getCatalog(); + readTimeout = mcCatalog.getReadTimeout(); + connectTimeout = mcCatalog.getConnectTimeout(); + retryTimes = mcCatalog.getRetryTimes(); + if (mcCatalog.getSplitStrategy().equals(MCProperties.SPLIT_BY_BYTE_SIZE_STRATEGY)) { for (com.aliyun.odps.table.read.split.InputSplit split : assigner.getAllSplits()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalCatalog.java index 5a9e6feb5ad5b8f..eb25336ab0b0338 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalCatalog.java @@ -19,7 +19,7 @@ import org.apache.doris.common.DdlException; import org.apache.doris.common.security.authentication.AuthenticationConfig; -import org.apache.doris.common.security.authentication.HadoopUGI; +import org.apache.doris.common.security.authentication.HadoopAuthenticator; import org.apache.doris.datasource.CatalogProperty; import org.apache.doris.datasource.ExternalCatalog; import org.apache.doris.datasource.InitCatalogLog; @@ -40,6 +40,7 @@ import org.apache.paimon.catalog.Identifier; import org.apache.paimon.options.Options; +import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -53,6 +54,7 @@ public abstract class PaimonExternalCatalog extends ExternalCatalog { protected String catalogType; protected Catalog catalog; protected AuthenticationConfig authConf; + protected HadoopAuthenticator hadoopAuthenticator; private static final List REQUIRED_PROPERTIES = ImmutableList.of( PaimonProperties.WAREHOUSE @@ -71,9 +73,8 @@ protected void initLocalObjectsImpl() { for (Map.Entry propEntry : this.catalogProperty.getHadoopProperties().entrySet()) { conf.set(propEntry.getKey(), propEntry.getValue()); } - authConf = AuthenticationConfig.getKerberosConfig(conf, - AuthenticationConfig.HADOOP_KERBEROS_PRINCIPAL, - AuthenticationConfig.HADOOP_KERBEROS_KEYTAB); + authConf = AuthenticationConfig.getKerberosConfig(conf); + hadoopAuthenticator = HadoopAuthenticator.getHadoopAuthenticator(authConf); } public String getCatalogType() { @@ -82,40 +83,57 @@ public String getCatalogType() { } protected List listDatabaseNames() { - return HadoopUGI.ugiDoAs(authConf, () -> new ArrayList<>(catalog.listDatabases())); + try { + return hadoopAuthenticator.doAs(() -> new ArrayList<>(catalog.listDatabases())); + } catch (IOException e) { + throw new RuntimeException("Failed to list databases names, catalog name: " + getName(), e); + } } @Override public boolean tableExist(SessionContext ctx, String dbName, String tblName) { makeSureInitialized(); - return HadoopUGI.ugiDoAs(authConf, () -> catalog.tableExists(Identifier.create(dbName, tblName))); + try { + return hadoopAuthenticator.doAs(() -> catalog.tableExists(Identifier.create(dbName, tblName))); + } catch (IOException e) { + throw new RuntimeException("Failed to check table existence, catalog name: " + getName(), e); + } } @Override public List listTableNames(SessionContext ctx, String dbName) { makeSureInitialized(); - return HadoopUGI.ugiDoAs(authConf, () -> { - List tableNames = null; - try { - tableNames = catalog.listTables(dbName); - } catch (Catalog.DatabaseNotExistException e) { - LOG.warn("DatabaseNotExistException", e); - } - return tableNames; - }); + try { + return hadoopAuthenticator.doAs(() -> { + List tableNames = null; + try { + tableNames = catalog.listTables(dbName); + } catch (Catalog.DatabaseNotExistException e) { + LOG.warn("DatabaseNotExistException", e); + } + return tableNames; + }); + } catch (IOException e) { + throw new RuntimeException("Failed to list table names, catalog name: " + getName(), e); + } } public org.apache.paimon.table.Table getPaimonTable(String dbName, String tblName) { makeSureInitialized(); - return HadoopUGI.ugiDoAs(authConf, () -> { - org.apache.paimon.table.Table table = null; - try { - table = catalog.getTable(Identifier.create(dbName, tblName)); - } catch (Catalog.TableNotExistException e) { - LOG.warn("TableNotExistException", e); - } - return table; - }); + try { + return hadoopAuthenticator.doAs(() -> { + org.apache.paimon.table.Table table = null; + try { + table = catalog.getTable(Identifier.create(dbName, tblName)); + } catch (Catalog.TableNotExistException e) { + LOG.warn("TableNotExistException", e); + } + return table; + }); + } catch (IOException e) { + throw new RuntimeException("Failed to get Paimon table, catalog name: " + getName() + ", db: " + + dbName + ", table: " + tblName, e); + } } protected String getPaimonCatalogType(String catalogType) { @@ -127,15 +145,19 @@ protected String getPaimonCatalogType(String catalogType) { } protected Catalog createCatalog() { - return HadoopUGI.ugiDoAs(authConf, () -> { - Options options = new Options(); - Map paimonOptionsMap = getPaimonOptionsMap(); - for (Map.Entry kv : paimonOptionsMap.entrySet()) { - options.set(kv.getKey(), kv.getValue()); - } - CatalogContext context = CatalogContext.create(options, getConfiguration()); - return createCatalogImpl(context); - }); + try { + return hadoopAuthenticator.doAs(() -> { + Options options = new Options(); + Map paimonOptionsMap = getPaimonOptionsMap(); + for (Map.Entry kv : paimonOptionsMap.entrySet()) { + options.set(kv.getKey(), kv.getValue()); + } + CatalogContext context = CatalogContext.create(options, getConfiguration()); + return createCatalogImpl(context); + }); + } catch (IOException e) { + throw new RuntimeException("Failed to create catalog, catalog name: " + getName(), e); + } } protected Catalog createCatalogImpl(CatalogContext context) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java index bf917804fb7b6d9..5009ec3c9049fb6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java @@ -183,9 +183,7 @@ private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit) fileDesc.setDbId(((PaimonExternalTable) source.getTargetTable()).getDbId()); fileDesc.setTblId(source.getTargetTable().getId()); fileDesc.setLastUpdateTime(source.getTargetTable().getUpdateTime()); - fileDesc.setPaimonTable(encodeObjectToString(source.getPaimonTable())); - // The hadoop conf should be same with - // PaimonExternalCatalog.createCatalog()#getConfiguration() + // The hadoop conf should be same with PaimonExternalCatalog.createCatalog()#getConfiguration() fileDesc.setHadoopConf(source.getCatalog().getCatalogProperty().getHadoopProperties()); Optional optDeletionFile = paimonSplit.getDeletionFile(); if (optDeletionFile.isPresent()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MCProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MCProperties.java index 20a77574fc78203..efbd01c14777de2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MCProperties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MCProperties.java @@ -56,6 +56,14 @@ public class MCProperties extends BaseProperties { public static final String SPLIT_ROW_COUNT = "mc.split_row_count"; public static final String DEFAULT_SPLIT_ROW_COUNT = "1048576"; // 256 * 4096 + public static final String CONNECT_TIMEOUT = "mc.connect_timeout"; + public static final String READ_TIMEOUT = "mc.read_timeout"; + public static final String RETRY_COUNT = "mc.retry_count"; + + public static final String DEFAULT_CONNECT_TIMEOUT = "10"; // 10s + public static final String DEFAULT_READ_TIMEOUT = "120"; // 120s + public static final String DEFAULT_RETRY_COUNT = "4"; // 4 times + public static CloudCredential getCredential(Map props) { return getCloudCredential(props, ACCESS_KEY, SECRET_KEY, SESSION_TOKEN); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/executor/TimerJobSchedulerTask.java b/fe/fe-core/src/main/java/org/apache/doris/job/executor/TimerJobSchedulerTask.java index 65a9cf2e0911642..4269fa0d8f3c520 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/job/executor/TimerJobSchedulerTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/job/executor/TimerJobSchedulerTask.java @@ -48,6 +48,8 @@ public void run(Timeout timeout) { log.warn("dispatch timer job failed, queue maybe full. job id is {}, job name is {}", this.job.getJobId(), this.job.getJobName() + getMsgWhenExecuteQueueFull()); } + log.info("dispatch timer job success, job id is {}, job name is {}", this.job.getJobId(), + this.job.getJobName()); } catch (Exception e) { log.warn("dispatch timer job error, task id is {}", this.job.getJobId(), e); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/scheduler/JobScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/job/scheduler/JobScheduler.java index 7f8b39f1e66dc7e..921f333791cb444 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/job/scheduler/JobScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/job/scheduler/JobScheduler.java @@ -146,6 +146,11 @@ public void close() throws IOException { private void cycleTimerJobScheduler(T job, long startTimeWindowMs) { List delaySeconds = job.getJobConfig().getTriggerDelayTimes(System.currentTimeMillis(), startTimeWindowMs, latestBatchSchedulerTimerTaskTimeMs); + if (CollectionUtils.isEmpty(delaySeconds)) { + log.info("skip job {} scheduler timer job, delay seconds is empty", job.getJobName()); + return; + } + log.info("job {} scheduler timer job, delay seconds size is {}", job.getJobName(), delaySeconds.size()); if (CollectionUtils.isNotEmpty(delaySeconds)) { delaySeconds.forEach(delaySecond -> { TimerJobSchedulerTask timerJobSchedulerTask = new TimerJobSchedulerTask<>(timerJobDisruptor, job); @@ -188,6 +193,8 @@ private void executeTimerJobIdsWithinLastTenMinutesWindow() { this.latestBatchSchedulerTimerTaskTimeMs = System.currentTimeMillis(); } this.latestBatchSchedulerTimerTaskTimeMs += BATCH_SCHEDULER_INTERVAL_MILLI_SECONDS; + log.info("execute timer job ids within last ten minutes window, last time window is {}", + TimeUtils.longToTimeString(lastTimeWindowMs)); if (jobMap.isEmpty()) { return; } @@ -209,6 +216,7 @@ private void clearEndJob(T job) { } try { Env.getCurrentEnv().getJobManager().unregisterJob(job.getJobId()); + log.info("clear finish job, job id is {}, job name is {}", job.getJobId(), job.getJobName()); } catch (JobException e) { log.error("clear finish job error, job id is {}", job.getJobId(), e); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/ExportMgr.java b/fe/fe-core/src/main/java/org/apache/doris/load/ExportMgr.java index eddd5fb27eeef11..94ae436ee6d87dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/ExportMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/ExportMgr.java @@ -108,26 +108,24 @@ public void addExportJobAndRegisterTask(ExportJob job) throws Exception { } } unprotectAddJob(job); - // delete existing files - if (Config.enable_delete_existing_files && Boolean.parseBoolean(job.getDeleteExistingFiles())) { - if (job.getBrokerDesc() == null) { - throw new AnalysisException("Local file system does not support delete existing files"); - } - String fullPath = job.getExportPath(); - BrokerUtil.deleteDirectoryWithFileSystem(fullPath.substring(0, fullPath.lastIndexOf('/') + 1), - job.getBrokerDesc()); - } Env.getCurrentEnv().getEditLog().logExportCreate(job); - // ATTN: Must add task after edit log, otherwise the job may finish before adding job. - job.getCopiedTaskExecutors().forEach(executor -> { - Env.getCurrentEnv().getTransientTaskManager().addMemoryTask(executor); - }); - LOG.info("add export job. {}", job); - } finally { writeUnlock(); } - + // delete existing files + if (Config.enable_delete_existing_files && Boolean.parseBoolean(job.getDeleteExistingFiles())) { + if (job.getBrokerDesc() == null) { + throw new AnalysisException("Local file system does not support delete existing files"); + } + String fullPath = job.getExportPath(); + BrokerUtil.deleteDirectoryWithFileSystem(fullPath.substring(0, fullPath.lastIndexOf('/') + 1), + job.getBrokerDesc()); + } + // ATTN: Must add task after edit log, otherwise the job may finish before adding job. + for (int i = 0; i < job.getCopiedTaskExecutors().size(); i++) { + Env.getCurrentEnv().getTransientTaskManager().addMemoryTask(job.getCopiedTaskExecutors().get(i)); + } + LOG.info("add export job. {}", job); } public void cancelExportJob(CancelExportStmt stmt) throws DdlException, AnalysisException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadTaskScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadTaskScheduler.java index 8afc35411b55f55..d40a6705626c846 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadTaskScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadTaskScheduler.java @@ -106,7 +106,7 @@ private void process() throws UserException, InterruptedException { if (routineLoadTaskInfo.getIsEof()) { RoutineLoadJob routineLoadJob = routineLoadManager.getJob(routineLoadTaskInfo.getJobId()); if (System.currentTimeMillis() - routineLoadTaskInfo.getLastScheduledTime() - < routineLoadJob.getMaxBatchIntervalS()) { + < routineLoadJob.getMaxBatchIntervalS() * 1000) { needScheduleTasksQueue.addLast(routineLoadTaskInfo); return; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java index 06047e2cf16682e..08e4844863a1b71 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java @@ -981,13 +981,13 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta createReplicaTask.setInvertedIndexFileStorageFormat(olapTable .getInvertedIndexFileStorageFormat()); if (indexId == olapTable.getBaseIndexId() || olapTable.isShadowIndex(indexId)) { - List clusterKeyIndexes = OlapTable.getClusterKeyIndexes( + List clusterKeyUids = OlapTable.getClusterKeyUids( indexMeta.getSchema()); - if (!CollectionUtils.isEmpty(clusterKeyIndexes)) { - createReplicaTask.setClusterKeyIndexes(clusterKeyIndexes); + if (!CollectionUtils.isEmpty(clusterKeyUids)) { + createReplicaTask.setClusterKeyUids(clusterKeyUids); LOG.info("table: {}, partition: {}, index: {}, tablet: {}, " - + "cluster key indexes: {}", tableId, partitionId, indexId, - tabletId, clusterKeyIndexes); + + "cluster key uids: {}", tableId, partitionId, indexId, + tabletId, clusterKeyUids); } } createReplicaBatchTask.addTask(createReplicaTask); diff --git a/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlServer.java b/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlServer.java index 5f70e3000b95079..e7a888cdd249b87 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlServer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlServer.java @@ -68,14 +68,17 @@ public MysqlServer(int port, ConnectScheduler connectScheduler) { // return true if success, otherwise false public boolean start() { try { + OptionMap optionMap = OptionMap.builder() + .set(Options.TCP_NODELAY, true) + .set(Options.BACKLOG, Config.mysql_nio_backlog_num) + .set(Options.KEEP_ALIVE, Config.mysql_nio_enable_keep_alive) + .getMap(); if (FrontendOptions.isBindIPV6()) { server = xnioWorker.createStreamConnectionServer(new InetSocketAddress("::0", port), acceptListener, - OptionMap.create(Options.TCP_NODELAY, true, Options.BACKLOG, Config.mysql_nio_backlog_num)); - + optionMap); } else { server = xnioWorker.createStreamConnectionServer(new InetSocketAddress(port), acceptListener, - OptionMap.create(Options.TCP_NODELAY, true, Options.BACKLOG, Config.mysql_nio_backlog_num)); - + optionMap); } server.resumeAccepts(); running = true; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 468a58bd6f68151..bea5eec432b2ab8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -653,6 +653,8 @@ public PlanFragment visitPhysicalHudiScan(PhysicalHudiScan fileScan, PlanTransla if (fileScan.getTableSnapshot().isPresent()) { ((FileQueryScanNode) scanNode).setQueryTableSnapshot(fileScan.getTableSnapshot().get()); } + HudiScanNode hudiScanNode = (HudiScanNode) scanNode; + hudiScanNode.setSelectedPartitions(fileScan.getSelectedPartitions()); return getPlanFragmentForPhysicalFileScan(fileScan, context, scanNode, table, tupleDescriptor); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/HyperElement.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/HyperElement.java new file mode 100644 index 000000000000000..6d8d7c6326c0d9d --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/HyperElement.java @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.jobs.joinorder.hypergraph; + +/** + * This is the common base class for all + * */ +public interface HyperElement { + + // Get the references nodes + long getReferenceNodes(); +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/edge/Edge.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/edge/Edge.java index 7698d881c661aa7..f75ed83250119a9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/edge/Edge.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/edge/Edge.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.jobs.joinorder.hypergraph.edge; import org.apache.doris.common.Pair; +import org.apache.doris.nereids.jobs.joinorder.hypergraph.HyperElement; import org.apache.doris.nereids.jobs.joinorder.hypergraph.bitmap.LongBitmap; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Slot; @@ -32,7 +33,7 @@ /** * Edge in HyperGraph */ -public abstract class Edge { +public abstract class Edge implements HyperElement { private final int index; private final double selectivity; @@ -192,6 +193,7 @@ public boolean isSub(Edge edge) { return LongBitmap.isSubset(getReferenceNodes(), otherBitmap); } + @Override public long getReferenceNodes() { return LongBitmap.newBitmapUnion(leftExtendedNodes, rightExtendedNodes); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/node/AbstractNode.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/node/AbstractNode.java index a4a64e0449deee7..686576de771d94d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/node/AbstractNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/node/AbstractNode.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.jobs.joinorder.hypergraph.node; +import org.apache.doris.nereids.jobs.joinorder.hypergraph.HyperElement; import org.apache.doris.nereids.jobs.joinorder.hypergraph.bitmap.LongBitmap; import org.apache.doris.nereids.jobs.joinorder.hypergraph.edge.Edge; import org.apache.doris.nereids.jobs.joinorder.hypergraph.edge.FilterEdge; @@ -33,7 +34,7 @@ /** * HyperGraph Node. */ -public class AbstractNode { +public class AbstractNode implements HyperElement { protected final int index; protected final List joinEdges; protected final List filterEdges; @@ -65,6 +66,11 @@ public List getEdges() { .build(); } + @Override + public long getReferenceNodes() { + return getNodeMap(); + } + public int getIndex() { return index; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 9ed352b65e587a6..71ea972c4ab2a21 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -118,6 +118,7 @@ import org.apache.doris.nereids.DorisParser.DropFileContext; import org.apache.doris.nereids.DorisParser.DropMTMVContext; import org.apache.doris.nereids.DorisParser.DropProcedureContext; +import org.apache.doris.nereids.DorisParser.DropRepositoryContext; import org.apache.doris.nereids.DorisParser.DropRoleContext; import org.apache.doris.nereids.DorisParser.DropSqlBlockRuleContext; import org.apache.doris.nereids.DorisParser.DropUserContext; @@ -508,6 +509,7 @@ import org.apache.doris.nereids.trees.plans.commands.DropJobCommand; import org.apache.doris.nereids.trees.plans.commands.DropMTMVCommand; import org.apache.doris.nereids.trees.plans.commands.DropProcedureCommand; +import org.apache.doris.nereids.trees.plans.commands.DropRepositoryCommand; import org.apache.doris.nereids.trees.plans.commands.DropRoleCommand; import org.apache.doris.nereids.trees.plans.commands.DropSqlBlockRuleCommand; import org.apache.doris.nereids.trees.plans.commands.DropUserCommand; @@ -4867,6 +4869,11 @@ public LogicalPlan visitDropFile(DropFileContext ctx) { return new DropFileCommand(stripQuotes(ctx.name.getText()), dbName, properties); } + @Override + public LogicalPlan visitDropRepository(DropRepositoryContext ctx) { + return new DropRepositoryCommand(stripQuotes(ctx.name.getText())); + } + @Override public LogicalPlan visitDropSqlBlockRule(DropSqlBlockRuleContext ctx) { return new DropSqlBlockRuleCommand(visitIdentifierSeq(ctx.identifierSeq()), ctx.EXISTS() != null); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/HyperGraphComparator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/HyperGraphComparator.java index d4594583c314c23..22282a2351627b3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/HyperGraphComparator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/HyperGraphComparator.java @@ -19,12 +19,14 @@ import org.apache.doris.common.Pair; import org.apache.doris.nereids.jobs.joinorder.hypergraph.ConflictRulesMaker; +import org.apache.doris.nereids.jobs.joinorder.hypergraph.HyperElement; import org.apache.doris.nereids.jobs.joinorder.hypergraph.HyperGraph; import org.apache.doris.nereids.jobs.joinorder.hypergraph.bitmap.LongBitmap; import org.apache.doris.nereids.jobs.joinorder.hypergraph.edge.Edge; import org.apache.doris.nereids.jobs.joinorder.hypergraph.edge.FilterEdge; import org.apache.doris.nereids.jobs.joinorder.hypergraph.edge.JoinEdge; import org.apache.doris.nereids.jobs.joinorder.hypergraph.node.StructInfoNode; +import org.apache.doris.nereids.rules.exploration.mv.StructInfo.ExpressionPosition; import org.apache.doris.nereids.rules.rewrite.PushDownFilterThroughJoin; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; @@ -51,6 +53,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -79,9 +82,9 @@ public class HyperGraphComparator { private final Map> pullUpViewExprWithEdge = new HashMap<>(); private final LogicalCompatibilityContext logicalCompatibilityContext; // this records the slots which needs to reject null - // the key is the target join which should reject null, the value is a pair, the first value of the pair is the - // join type, the second value is also a pair which left represents the slots in the left of join that should - // reject null, right represents the slots in the right of join that should reject null. + // the key is the view join edge which should reject null, the value is a pair, the first value of the pair is the + // query join type, the second value is also a pair which left represents the slots in the left of view join that + // should reject null, right represents the slots in the right of view join that should reject null. private final Map, Set>>> inferredViewEdgeWithCond = new HashMap<>(); private List viewJoinEdgesAfterInferring; private List viewFilterEdgesAfterInferring; @@ -249,9 +252,17 @@ private boolean compareNodeWithExpr(StructInfoNode query, StructInfoNode view) { } int size = queryExprSetList.size(); for (int i = 0; i < size; i++) { - Set mappingQueryExprSet = queryExprSetList.get(i).stream() - .map(logicalCompatibilityContext::getViewNodeExprFromQuery) - .collect(Collectors.toSet()); + Set queryExpressions = queryExprSetList.get(i); + Set mappingQueryExprSet = new HashSet<>(); + for (Expression queryExpression : queryExpressions) { + Optional mappingViewExprByQueryExpr = getMappingViewExprByQueryExpr(queryExpression, query, + this.logicalCompatibilityContext, + ExpressionPosition.NODE); + if (!mappingViewExprByQueryExpr.isPresent()) { + return false; + } + mappingQueryExprSet.add(mappingViewExprByQueryExpr.get()); + } if (!mappingQueryExprSet.equals(viewExprSetList.get(i))) { return false; } @@ -407,7 +418,10 @@ private Map constructQueryToViewJoinMapWithExpr() { if (edgeMap.containsKey(entry.getValue())) { continue; } - Expression viewExpr = logicalCompatibilityContext.getViewJoinExprFromQuery(entry.getKey()); + Expression viewExpr = getMappingViewExprByQueryExpr(entry.getKey(), + entry.getValue(), + logicalCompatibilityContext, + ExpressionPosition.JOIN_EDGE).orElse(null); if (viewExprToEdge.containsKey(viewExpr)) { edgeMap.put(entry.getValue(), Objects.requireNonNull(viewExprToEdge.get(viewExpr))); } @@ -441,15 +455,19 @@ private Map constructQueryToViewFilterMapWithExpr() { HashMap queryToViewEdgeMap = new HashMap<>(); for (Entry> entry : queryExprToEdge.asMap().entrySet()) { - Expression queryExprViewBased = logicalCompatibilityContext.getViewFilterExprFromQuery(entry.getKey()); - if (queryExprViewBased == null) { - continue; - } - Collection viewEdges = viewExprToEdge.get(queryExprViewBased); - if (viewEdges.isEmpty()) { - continue; - } + Expression queryExprViewBased = null; for (Edge queryEdge : entry.getValue()) { + queryExprViewBased = getMappingViewExprByQueryExpr(entry.getKey(), + queryEdge, + logicalCompatibilityContext, + ExpressionPosition.FILTER_EDGE).orElse(null); + if (queryExprViewBased == null) { + continue; + } + Collection viewEdges = viewExprToEdge.get(queryExprViewBased); + if (viewEdges.isEmpty()) { + continue; + } for (Edge viewEdge : viewEdges) { if (!isSubTreeNodesEquals(queryEdge, viewEdge, logicalCompatibilityContext)) { // Such as query filter edge is <{1} --FILTER-- {}> but view filter edge is @@ -514,17 +532,17 @@ private boolean compareEdgeWithNode(Edge query, Edge view) { } private boolean compareFilterEdgeWithNode(FilterEdge query, FilterEdge view) { - return rewriteQueryNodeMap(query.getReferenceNodes()) == view.getReferenceNodes(); + return getViewNodesByQuery(query.getReferenceNodes()) == view.getReferenceNodes(); } private boolean compareJoinEdgeWithNode(JoinEdge query, JoinEdge view) { boolean res = false; if (query.getJoinType().swap() == view.getJoinType()) { - res |= rewriteQueryNodeMap(query.getLeftExtendedNodes()) == view.getRightExtendedNodes() - && rewriteQueryNodeMap(query.getRightExtendedNodes()) == view.getLeftExtendedNodes(); + res |= getViewNodesByQuery(query.getLeftExtendedNodes()) == view.getRightExtendedNodes() + && getViewNodesByQuery(query.getRightExtendedNodes()) == view.getLeftExtendedNodes(); } - res |= rewriteQueryNodeMap(query.getLeftExtendedNodes()) == view.getLeftExtendedNodes() - && rewriteQueryNodeMap(query.getRightExtendedNodes()) == view.getRightExtendedNodes(); + res |= getViewNodesByQuery(query.getLeftExtendedNodes()) == view.getLeftExtendedNodes() + && getViewNodesByQuery(query.getRightExtendedNodes()) == view.getRightExtendedNodes(); return res; } @@ -547,8 +565,8 @@ private boolean compareJoinEdgeOrInfer(JoinEdge query, JoinEdge view) { } private boolean tryInferEdge(JoinEdge query, JoinEdge view) { - if (rewriteQueryNodeMap(query.getLeftRequiredNodes()) != view.getLeftRequiredNodes() - || rewriteQueryNodeMap(query.getRightRequiredNodes()) != view.getRightRequiredNodes()) { + if (getViewNodesByQuery(query.getLeftRequiredNodes()) != view.getLeftRequiredNodes() + || getViewNodesByQuery(query.getRightRequiredNodes()) != view.getRightRequiredNodes()) { return false; } if (!query.getJoinType().equals(view.getJoinType())) { @@ -569,7 +587,7 @@ private boolean tryInferEdge(JoinEdge query, JoinEdge view) { return true; } - private long rewriteQueryNodeMap(long bitmap) { + private long getViewNodesByQuery(long bitmap) { long newBitmap = LongBitmap.newBitmap(); for (int i : LongBitmap.getIterator(bitmap)) { int newIdx = getQueryToViewNodeIdMap().getOrDefault(i, 0); @@ -578,6 +596,35 @@ private long rewriteQueryNodeMap(long bitmap) { return newBitmap; } + private Optional getMappingViewExprByQueryExpr(Expression queryExpression, + HyperElement queryExpressionBelongedHyperElement, + LogicalCompatibilityContext context, + ExpressionPosition expressionPosition) { + Expression queryShuttledExpr; + Collection> viewExpressions; + if (ExpressionPosition.JOIN_EDGE.equals(expressionPosition)) { + queryShuttledExpr = context.getQueryJoinShuttledExpr(queryExpression); + viewExpressions = context.getViewJoinExprFromQuery(queryShuttledExpr); + } else if (ExpressionPosition.FILTER_EDGE.equals(expressionPosition)) { + queryShuttledExpr = context.getQueryFilterShuttledExpr(queryExpression); + viewExpressions = context.getViewFilterExprFromQuery(queryShuttledExpr); + } else { + queryShuttledExpr = context.getQueryNodeShuttledExpr(queryExpression); + viewExpressions = context.getViewNodeExprFromQuery(queryShuttledExpr); + } + if (viewExpressions.size() == 1) { + return Optional.of(viewExpressions.iterator().next().key()); + } + long queryReferenceNodes = queryExpressionBelongedHyperElement.getReferenceNodes(); + long viewReferenceNodes = getViewNodesByQuery(queryReferenceNodes); + for (Pair viewExpressionPair : viewExpressions) { + if (viewExpressionPair.value().getReferenceNodes() == viewReferenceNodes) { + return Optional.of(viewExpressionPair.key()); + } + } + return Optional.empty(); + } + private void compareJoinEdgeWithExpr(Edge query, Edge view) { Set queryExprSet = query.getExpressionSet(); Set viewExprSet = view.getExpressionSet(); @@ -585,7 +632,10 @@ private void compareJoinEdgeWithExpr(Edge query, Edge view) { Set exprMappedOfView = new HashSet<>(); List residualQueryExpr = new ArrayList<>(); for (Expression queryExpr : queryExprSet) { - Expression viewExpr = logicalCompatibilityContext.getViewJoinExprFromQuery(queryExpr); + Expression viewExpr = getMappingViewExprByQueryExpr(queryExpr, + query, + logicalCompatibilityContext, + ExpressionPosition.JOIN_EDGE).orElse(null); if (viewExprSet.contains(viewExpr)) { exprMappedOfView.add(viewExpr); } else { @@ -604,7 +654,10 @@ private void compareFilterEdgeWithExpr(Edge query, Edge view) { Set exprMappedOfView = new HashSet<>(); List residualQueryExpr = new ArrayList<>(); for (Expression queryExpr : queryExprSet) { - Expression viewExpr = logicalCompatibilityContext.getViewFilterExprFromQuery(queryExpr); + Expression viewExpr = getMappingViewExprByQueryExpr(queryExpr, + query, + logicalCompatibilityContext, + ExpressionPosition.FILTER_EDGE).orElse(null); if (viewExprSet.contains(viewExpr)) { exprMappedOfView.add(viewExpr); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/LogicalCompatibilityContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/LogicalCompatibilityContext.java index ca13c9701dabc2c..77ab37873d06b40 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/LogicalCompatibilityContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/LogicalCompatibilityContext.java @@ -17,6 +17,8 @@ package org.apache.doris.nereids.rules.exploration.mv; +import org.apache.doris.common.Pair; +import org.apache.doris.nereids.jobs.joinorder.hypergraph.HyperElement; import org.apache.doris.nereids.jobs.joinorder.hypergraph.node.StructInfoNode; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.rules.exploration.mv.StructInfo.ExpressionPosition; @@ -36,8 +38,10 @@ import com.google.common.base.Suppliers; import com.google.common.collect.BiMap; import com.google.common.collect.HashBiMap; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; -import java.util.HashMap; +import java.util.Collection; import java.util.Map; import java.util.function.Supplier; @@ -48,11 +52,15 @@ public class LogicalCompatibilityContext { private final BiMap queryToViewNodeMapping; private final BiMap queryToViewNodeIDMapping; private final ObjectId planNodeId; - private final Supplier> queryToViewJoinEdgeExpressionMappingSupplier; - private final Supplier> queryToViewNodeExpressionMappingSupplier; - private final Supplier> queryToViewFilterEdgeExpressionMappingSupplier; - @Deprecated - private BiMap queryToViewAllExpressionMapping; + private final Supplier>> + queryToViewJoinEdgeExpressionMappingSupplier; + private final Supplier> queryToQueryShuttledJoinExpressionMappingSupplier; + private final Supplier>> + queryToViewNodeExpressionMappingSupplier; + private final Supplier> queryToQueryShuttledNodeExpressionMappingSupplier; + private final Supplier>> + queryToViewFilterEdgeExpressionMappingSupplier; + private final Supplier> queryToQueryShuttledFilterExpressionMappingSupplier; /** * LogicalCompatibilityContext @@ -66,16 +74,25 @@ private LogicalCompatibilityContext(BiMap queryT queryStructInfo.getShuttledExpressionsToExpressionsMap().get(ExpressionPosition.JOIN_EDGE), viewStructInfo.getShuttledExpressionsToExpressionsMap().get(ExpressionPosition.JOIN_EDGE))); + this.queryToQueryShuttledJoinExpressionMappingSupplier = Suppliers.memoize( + () -> queryStructInfo.getExpressionToShuttledExpressionToMap().get(ExpressionPosition.JOIN_EDGE)); + this.queryToViewNodeExpressionMappingSupplier = Suppliers.memoize(() -> generateExpressionMapping(viewToQuerySlotMapping, queryStructInfo.getShuttledExpressionsToExpressionsMap().get(ExpressionPosition.NODE), viewStructInfo.getShuttledExpressionsToExpressionsMap().get(ExpressionPosition.NODE))); + this.queryToQueryShuttledNodeExpressionMappingSupplier = Suppliers.memoize( + () -> queryStructInfo.getExpressionToShuttledExpressionToMap().get(ExpressionPosition.NODE)); + this.queryToViewFilterEdgeExpressionMappingSupplier = Suppliers.memoize(() -> generateExpressionMapping(viewToQuerySlotMapping, queryStructInfo.getShuttledExpressionsToExpressionsMap().get(ExpressionPosition.FILTER_EDGE), viewStructInfo.getShuttledExpressionsToExpressionsMap().get(ExpressionPosition.FILTER_EDGE))); + this.queryToQueryShuttledFilterExpressionMappingSupplier = Suppliers.memoize( + () -> queryStructInfo.getExpressionToShuttledExpressionToMap().get(ExpressionPosition.FILTER_EDGE)); + this.queryToViewNodeMapping = queryToViewNodeMapping; this.queryToViewNodeIDMapping = HashBiMap.create(); queryToViewNodeMapping.forEach((k, v) -> queryToViewNodeIDMapping.put(k.getIndex(), v.getIndex())); @@ -92,18 +109,30 @@ public BiMap getQueryToViewNodeIDMapping() { return queryToViewNodeIDMapping; } - public Expression getViewJoinExprFromQuery(Expression queryJoinExpr) { + public Collection> getViewJoinExprFromQuery(Expression queryJoinExpr) { return queryToViewJoinEdgeExpressionMappingSupplier.get().get(queryJoinExpr); } - public Expression getViewFilterExprFromQuery(Expression queryJoinExpr) { + public Expression getQueryJoinShuttledExpr(Expression queryJoinExpr) { + return queryToQueryShuttledJoinExpressionMappingSupplier.get().get(queryJoinExpr); + } + + public Collection> getViewFilterExprFromQuery(Expression queryJoinExpr) { return queryToViewFilterEdgeExpressionMappingSupplier.get().get(queryJoinExpr); } - public Expression getViewNodeExprFromQuery(Expression queryJoinExpr) { + public Expression getQueryFilterShuttledExpr(Expression queryFilterExpr) { + return queryToQueryShuttledFilterExpressionMappingSupplier.get().get(queryFilterExpr); + } + + public Collection> getViewNodeExprFromQuery(Expression queryJoinExpr) { return queryToViewNodeExpressionMappingSupplier.get().get(queryJoinExpr); } + public Expression getQueryNodeShuttledExpr(Expression queryNodeExpr) { + return queryToQueryShuttledNodeExpressionMappingSupplier.get().get(queryNodeExpr); + } + /** * Generate logical compatibility context, * this make expression mapping between query and view by relation and the slot in relation mapping @@ -134,24 +163,31 @@ public static LogicalCompatibilityContext from(RelationMapping relationMapping, viewStructInfo); } - private static BiMap generateExpressionMapping( + /** + * The result is multimap + * the key is shuttled query expr + * the value is original view expr collection + * */ + private static Multimap> generateExpressionMapping( Map viewToQuerySlotMapping, - Map queryShuttledExprToExprMap, - Map viewShuttledExprToExprMap) { - final Map viewEdgeToConjunctsMapQueryBased = new HashMap<>(); - BiMap queryToViewEdgeMapping = HashBiMap.create(); + Multimap> queryShuttledExprToExprMap, + Multimap> viewShuttledExprToExprMap) { + Multimap> queryToViewEdgeMapping = HashMultimap.create(); if (queryShuttledExprToExprMap == null || viewShuttledExprToExprMap == null || queryShuttledExprToExprMap.isEmpty() || viewShuttledExprToExprMap.isEmpty()) { return queryToViewEdgeMapping; } + final Multimap> viewShuttledExprToExprMapQueryBased = + HashMultimap.create(); viewShuttledExprToExprMap.forEach((shuttledExpr, expr) -> { - viewEdgeToConjunctsMapQueryBased.put( + viewShuttledExprToExprMapQueryBased.put( orderSlotAsc(ExpressionUtils.replace(shuttledExpr, viewToQuerySlotMapping)), expr); }); - queryShuttledExprToExprMap.forEach((exprSet, edge) -> { - Expression viewExpr = viewEdgeToConjunctsMapQueryBased.get(orderSlotAsc(exprSet)); - if (viewExpr != null) { - queryToViewEdgeMapping.put(edge, viewExpr); + queryShuttledExprToExprMap.forEach((shuttledExpr, expr) -> { + Collection> viewExpressions = viewShuttledExprToExprMapQueryBased.get( + orderSlotAsc(shuttledExpr)); + if (viewExpressions != null) { + queryToViewEdgeMapping.putAll(shuttledExpr, viewExpressions); } }); return queryToViewEdgeMapping; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java index 3de48dc7ff6abf7..365360e06b096c3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java @@ -23,10 +23,10 @@ import org.apache.doris.mtmv.BaseTableInfo; import org.apache.doris.nereids.CascadesContext; import org.apache.doris.nereids.jobs.executor.Rewriter; +import org.apache.doris.nereids.jobs.joinorder.hypergraph.HyperElement; import org.apache.doris.nereids.jobs.joinorder.hypergraph.HyperGraph; import org.apache.doris.nereids.jobs.joinorder.hypergraph.edge.JoinEdge; import org.apache.doris.nereids.jobs.joinorder.hypergraph.node.StructInfoNode; -import org.apache.doris.nereids.memo.Group; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewUtils.TableQueryOperatorChecker; import org.apache.doris.nereids.rules.exploration.mv.Predicates.SplitPredicate; @@ -65,12 +65,15 @@ import org.apache.doris.nereids.trees.plans.visitor.ExpressionLineageReplacer; import org.apache.doris.nereids.util.ExpressionUtils; +import com.google.common.collect.HashMultimap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; +import com.google.common.collect.Multimap; import com.google.common.collect.Sets; import java.util.ArrayList; import java.util.BitSet; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; @@ -113,9 +116,23 @@ public class StructInfo { // split predicates is shuttled private SplitPredicate splitPredicate; private EquivalenceClass equivalenceClass; - // Key is the expression shuttled and the value is the origin expression + // For value of Map, the key is the position of expression + // the value is the expressions and the hyper element of expression pair + // Key of pair is the expression shuttled and the value is the origin expression and the hyper element it belonged + // Sometimes origin expressions are different and shuttled expression is same + // Such as origin expressions are l_partkey#0 > 1 and l_partkey#10 > 1 and shuttled expression is l_partkey#10 > 1 // this is for building LogicalCompatibilityContext later. - private final Map> shuttledExpressionsToExpressionsMap; + private final Map>> + shuttledExpressionsToExpressionsMap; + // For value of Map, the key is the position of expression + // the value is the original expression and shuttled expression map + // Such as origin expressions are l_partkey#0 > 1 and shuttled expression is l_partkey#10 > 1 + // the map would be {ExpressionPosition.FILTER, { + // l_partkey#0 > 1 : l_partkey#10 > 1 + // }} + // this is for building LogicalCompatibilityContext later. + private final Map> expressionToShuttledExpressionToMap; + // Record the exprId and the corresponding expr map, this is used by expression shuttled private final Map namedExprIdAndExprMapping; private final List planOutputShuttledExpressions; @@ -127,7 +144,9 @@ private StructInfo(Plan originalPlan, ObjectId originalPlanId, HyperGraph hyperG Plan bottomPlan, List relations, Map relationIdStructInfoNodeMap, @Nullable Predicates predicates, - Map> shuttledExpressionsToExpressionsMap, + Map>> + shuttledExpressionsToExpressionsMap, + Map> expressionToShuttledExpressionToMap, Map namedExprIdAndExprMapping, BitSet tableIdSet, SplitPredicate splitPredicate, @@ -146,6 +165,7 @@ private StructInfo(Plan originalPlan, ObjectId originalPlanId, HyperGraph hyperG this.splitPredicate = splitPredicate; this.equivalenceClass = equivalenceClass; this.shuttledExpressionsToExpressionsMap = shuttledExpressionsToExpressionsMap; + this.expressionToShuttledExpressionToMap = expressionToShuttledExpressionToMap; this.namedExprIdAndExprMapping = namedExprIdAndExprMapping; this.planOutputShuttledExpressions = planOutputShuttledExpressions; } @@ -156,7 +176,8 @@ private StructInfo(Plan originalPlan, ObjectId originalPlanId, HyperGraph hyperG public StructInfo withPredicates(Predicates predicates) { return new StructInfo(this.originalPlan, this.originalPlanId, this.hyperGraph, this.valid, this.topPlan, this.bottomPlan, this.relations, this.relationIdStructInfoNodeMap, predicates, - this.shuttledExpressionsToExpressionsMap, this.namedExprIdAndExprMapping, this.tableBitSet, + this.shuttledExpressionsToExpressionsMap, this.expressionToShuttledExpressionToMap, + this.namedExprIdAndExprMapping, this.tableBitSet, null, null, this.planOutputShuttledExpressions); } @@ -166,13 +187,16 @@ public StructInfo withPredicates(Predicates predicates) { public StructInfo withTableBitSet(BitSet tableBitSet) { return new StructInfo(this.originalPlan, this.originalPlanId, this.hyperGraph, this.valid, this.topPlan, this.bottomPlan, this.relations, this.relationIdStructInfoNodeMap, this.predicates, - this.shuttledExpressionsToExpressionsMap, this.namedExprIdAndExprMapping, tableBitSet, + this.shuttledExpressionsToExpressionsMap, this.expressionToShuttledExpressionToMap, + this.namedExprIdAndExprMapping, tableBitSet, this.splitPredicate, this.equivalenceClass, this.planOutputShuttledExpressions); } private static boolean collectStructInfoFromGraph(HyperGraph hyperGraph, Plan topPlan, - Map> shuttledExpressionsToExpressionsMap, + Map>> + shuttledExpressionsToExpressionsMap, + Map> expressionToShuttledExpressionToMap, Map namedExprIdAndExprMapping, List relations, Map relationIdStructInfoNodeMap, @@ -200,8 +224,9 @@ private static boolean collectStructInfoFromGraph(HyperGraph hyperGraph, structInfoNode.getPlan().accept(ExpressionLineageReplacer.INSTANCE, replaceContext); // Replace expressions by expression map List replacedExpressions = replaceContext.getReplacedExpressions(); - putShuttledExpressionsToExpressionsMap(shuttledExpressionsToExpressionsMap, - ExpressionPosition.NODE, replacedExpressions.get(0), expression); + putShuttledExpressionToExpressionsMap(shuttledExpressionsToExpressionsMap, + expressionToShuttledExpressionToMap, + ExpressionPosition.NODE, replacedExpressions.get(0), expression, node); // Record this, will be used in top level expression shuttle later, see the method // ExpressionLineageReplacer#visitGroupPlan namedExprIdAndExprMapping.putAll(replaceContext.getExprIdExpressionMap()); @@ -227,8 +252,10 @@ private static boolean collectStructInfoFromGraph(HyperGraph hyperGraph, // Replace expressions by expression map List replacedExpressions = replaceContext.getReplacedExpressions(); for (int i = 0; i < replacedExpressions.size(); i++) { - putShuttledExpressionsToExpressionsMap(shuttledExpressionsToExpressionsMap, - ExpressionPosition.JOIN_EDGE, replacedExpressions.get(i), joinConjunctExpressions.get(i)); + putShuttledExpressionToExpressionsMap(shuttledExpressionsToExpressionsMap, + expressionToShuttledExpressionToMap, + ExpressionPosition.JOIN_EDGE, replacedExpressions.get(i), joinConjunctExpressions.get(i), + edge); } // Record this, will be used in top level expression shuttle later, see the method // ExpressionLineageReplacer#visitGroupPlan @@ -240,10 +267,11 @@ private static boolean collectStructInfoFromGraph(HyperGraph hyperGraph, filterExpressions.forEach(predicate -> { // this is used for LogicalCompatibilityContext ExpressionUtils.extractConjunction(predicate).forEach(expr -> - putShuttledExpressionsToExpressionsMap(shuttledExpressionsToExpressionsMap, + putShuttledExpressionToExpressionsMap(shuttledExpressionsToExpressionsMap, + expressionToShuttledExpressionToMap, ExpressionPosition.FILTER_EDGE, ExpressionUtils.shuttleExpressionWithLineage(predicate, topPlan, new BitSet()), - predicate)); + predicate, filterEdge)); }); }); return true; @@ -315,11 +343,13 @@ public static StructInfo of(Plan originalPlan, @Nullable Plan topPlan, @Nullable // collect struct info fromGraph List relationList = new ArrayList<>(); Map relationIdStructInfoNodeMap = new LinkedHashMap<>(); - Map> shuttledHashConjunctsToConjunctsMap = - new LinkedHashMap<>(); + Map>> + shuttledHashConjunctsToConjunctsMap = new LinkedHashMap<>(); Map namedExprIdAndExprMapping = new LinkedHashMap<>(); BitSet tableBitSet = new BitSet(); + Map> expressionToShuttledExpressionToMap = new HashMap<>(); boolean valid = collectStructInfoFromGraph(hyperGraph, topPlan, shuttledHashConjunctsToConjunctsMap, + expressionToShuttledExpressionToMap, namedExprIdAndExprMapping, relationList, relationIdStructInfoNodeMap, @@ -341,19 +371,11 @@ public static StructInfo of(Plan originalPlan, @Nullable Plan topPlan, @Nullable ExpressionUtils.shuttleExpressionWithLineage(originalPlan.getOutput(), originalPlan, new BitSet()); return new StructInfo(originalPlan, originalPlanId, hyperGraph, valid, topPlan, bottomPlan, relationList, relationIdStructInfoNodeMap, predicates, shuttledHashConjunctsToConjunctsMap, + expressionToShuttledExpressionToMap, namedExprIdAndExprMapping, tableBitSet, null, null, planOutputShuttledExpressions); } - /** - * Build Struct info from group. - * Maybe return multi structInfo when original plan already be rewritten by mv - */ - public static StructInfo of(Group group) { - // TODO build graph from original plan and get relations and predicates from graph - return null; - } - public List getRelations() { return relations; } @@ -410,21 +432,36 @@ public Map getRelationIdStructInfoNodeMap() { return relationIdStructInfoNodeMap; } - public Map> getShuttledExpressionsToExpressionsMap() { + public Map>> + getShuttledExpressionsToExpressionsMap() { return shuttledExpressionsToExpressionsMap; } - private static void putShuttledExpressionsToExpressionsMap( - Map> shuttledExpressionsToExpressionsMap, + public Map> getExpressionToShuttledExpressionToMap() { + return expressionToShuttledExpressionToMap; + } + + private static void putShuttledExpressionToExpressionsMap( + Map>> + shuttledExpressionsToExpressionsMap, + Map> expressionPositionToExpressionToMap, ExpressionPosition expressionPosition, - Expression key, Expression value) { - Map expressionExpressionMap = shuttledExpressionsToExpressionsMap.get( - expressionPosition); - if (expressionExpressionMap == null) { - expressionExpressionMap = new LinkedHashMap<>(); - shuttledExpressionsToExpressionsMap.put(expressionPosition, expressionExpressionMap); - } - expressionExpressionMap.put(key, value); + Expression shuttledExpression, Expression originalExpression, HyperElement valueBelongedElement) { + Multimap> shuttledExpressionToExpressionMap = + shuttledExpressionsToExpressionsMap.get(expressionPosition); + if (shuttledExpressionToExpressionMap == null) { + shuttledExpressionToExpressionMap = HashMultimap.create(); + shuttledExpressionsToExpressionsMap.put(expressionPosition, shuttledExpressionToExpressionMap); + } + shuttledExpressionToExpressionMap.put(shuttledExpression, Pair.of(originalExpression, valueBelongedElement)); + + Map originalExprToShuttledExprMap = + expressionPositionToExpressionToMap.get(expressionPosition); + if (originalExprToShuttledExprMap == null) { + originalExprToShuttledExprMap = new HashMap<>(); + expressionPositionToExpressionToMap.put(expressionPosition, originalExprToShuttledExprMap); + } + originalExprToShuttledExprMap.put(originalExpression, shuttledExpression); } public List getExpressions() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java index aaa93d70c355aca..bc8ccf83093b258 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java @@ -168,6 +168,7 @@ public enum PlanType { CREATE_PROCEDURE_COMMAND, DROP_PROCEDURE_COMMAND, DROP_ROLE_COMMAND, + DROP_REPOSITOORY_COMMAND, SHOW_PROCEDURE_COMMAND, SHOW_CREATE_PROCEDURE_COMMAND, CREATE_VIEW_COMMAND, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterRoleCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterRoleCommand.java index fe8d0fd5db95a1b..9f2d4ba22745eac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterRoleCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterRoleCommand.java @@ -27,14 +27,11 @@ import org.apache.doris.qe.StmtExecutor; import com.google.common.base.Strings; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; /** * alter role command */ public class AlterRoleCommand extends AlterCommand { - public static final Logger LOG = LogManager.getLogger(AlterRoleCommand.class); private final String role; private final String comment; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropRepositoryCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropRepositoryCommand.java new file mode 100644 index 000000000000000..4fef49c39937ed1 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropRepositoryCommand.java @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.commands; + +import org.apache.doris.catalog.Env; +import org.apache.doris.common.DdlException; +import org.apache.doris.common.ErrorCode; +import org.apache.doris.common.ErrorReport; +import org.apache.doris.mysql.privilege.PrivPredicate; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.qe.StmtExecutor; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +/** + * drop repository command + */ +public class DropRepositoryCommand extends DropCommand { + public static final Logger LOG = LogManager.getLogger(DropRepositoryCommand.class); + private final String repoName; + + /** + * constructor + */ + public DropRepositoryCommand(String repoName) { + super(PlanType.DROP_REPOSITOORY_COMMAND); + this.repoName = repoName; + } + + @Override + public void doRun(ConnectContext ctx, StmtExecutor executor) throws Exception { + // check auth + if (!Env.getCurrentEnv().getAccessManager().checkGlobalPriv(ConnectContext.get(), PrivPredicate.ADMIN)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_SPECIFIC_ACCESS_DENIED_ERROR, "ADMIN"); + } + Env.getCurrentEnv().getBackupHandler().dropRepository(repoName); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitDropRepositoryCommand(this, context); + } + + @Override + protected void checkSupportedInCloudMode(ConnectContext ctx) throws DdlException { + LOG.info("DropRepositoryCommand not supported in cloud mode"); + throw new DdlException("Unsupported operation"); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ExplainCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ExplainCommand.java index d4fce906693a680..e3f2f1d732ae5a1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ExplainCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ExplainCommand.java @@ -27,6 +27,7 @@ import org.apache.doris.nereids.trees.plans.PlanType; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.planner.ScanNode; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.StmtExecutor; @@ -95,6 +96,9 @@ public void run(ConnectContext ctx, StmtExecutor executor) throws Exception { } else { executor.handleExplainStmt(planner.getExplainString(explainOptions), true); } + for (ScanNode scanNode : planner.getScanNodes()) { + scanNode.stop(); + } } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowGrantsCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowGrantsCommand.java index f877f42b83599f5..6d52d374fa7ae4f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowGrantsCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowGrantsCommand.java @@ -34,8 +34,6 @@ import org.apache.doris.qe.StmtExecutor; import com.google.common.base.Preconditions; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import java.util.List; @@ -43,7 +41,6 @@ * show grants command */ public class ShowGrantsCommand extends ShowCommand { - public static final Logger LOG = LogManager.getLogger(ShowGrantsCommand.class); private static final ShowResultSetMetaData META_DATA; private final boolean isAll; private UserIdentity userIdent; // if not given will update with self. diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowPartitionIdCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowPartitionIdCommand.java index fb7e5cdf9683309..47977a8e896a371 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowPartitionIdCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowPartitionIdCommand.java @@ -35,8 +35,6 @@ import org.apache.doris.qe.StmtExecutor; import com.google.common.collect.Lists; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import java.util.ArrayList; import java.util.List; @@ -45,7 +43,6 @@ * show partition command */ public class ShowPartitionIdCommand extends ShowCommand { - public static final Logger LOG = LogManager.getLogger(ShowPartitionIdCommand.class); private final long partitionId; /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowProcCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowProcCommand.java index da2fb38c4d8d80b..fc865a07457ab62 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowProcCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowProcCommand.java @@ -35,16 +35,12 @@ import org.apache.doris.qe.ShowResultSetMetaData; import org.apache.doris.qe.StmtExecutor; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - import java.util.List; /** * show proc command */ public class ShowProcCommand extends ShowCommand { - public static final Logger LOG = LogManager.getLogger(ShowProcCommand.class); private final String path; /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowRolesCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowRolesCommand.java index 7cbb5f934cabba6..4ca4c051cdbf8f3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowRolesCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowRolesCommand.java @@ -30,16 +30,12 @@ import org.apache.doris.qe.ShowResultSetMetaData; import org.apache.doris.qe.StmtExecutor; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - import java.util.List; /** * show roles command */ public class ShowRolesCommand extends ShowCommand { - public static final Logger LOG = LogManager.getLogger(ShowRolesCommand.class); private static final ShowResultSetMetaData META_DATA; static { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableIdCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableIdCommand.java index 3acd6c7992c4dfa..59e1834215a40fb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableIdCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableIdCommand.java @@ -33,8 +33,6 @@ import org.apache.doris.qe.StmtExecutor; import com.google.common.collect.Lists; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import java.util.ArrayList; import java.util.List; @@ -43,7 +41,6 @@ * show table id command */ public class ShowTableIdCommand extends ShowCommand { - public static final Logger LOG = LogManager.getLogger(ShowTableIdCommand.class); private final long tableId; /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java index 629690889432b37..51e68eb07631aeb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java @@ -84,7 +84,7 @@ protected LogicalHudiScan(RelationId id, ExternalTable table, List quali public LogicalHudiScan(RelationId id, ExternalTable table, List qualifier, Optional tableSample, Optional tableSnapshot) { this(id, table, qualifier, Optional.empty(), Optional.empty(), - SelectedPartitions.NOT_PRUNED, tableSample, tableSnapshot, + ((HMSExternalTable) table).initHudiSelectedPartitions(tableSnapshot), tableSample, tableSnapshot, Optional.empty(), Optional.empty()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java index 339bb4e4c6a5f0f..77846ff3ad5328c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java @@ -55,6 +55,7 @@ import org.apache.doris.nereids.trees.plans.commands.DropJobCommand; import org.apache.doris.nereids.trees.plans.commands.DropMTMVCommand; import org.apache.doris.nereids.trees.plans.commands.DropProcedureCommand; +import org.apache.doris.nereids.trees.plans.commands.DropRepositoryCommand; import org.apache.doris.nereids.trees.plans.commands.DropRoleCommand; import org.apache.doris.nereids.trees.plans.commands.DropSqlBlockRuleCommand; import org.apache.doris.nereids.trees.plans.commands.DropUserCommand; @@ -506,12 +507,16 @@ default R visitShowLoadProfileCommand(ShowLoadProfileCommand showLoadProfileComm return visitCommand(showLoadProfileCommand, context); } - default R visitAlterSqlBlockRuleCommand(AlterSqlBlockRuleCommand dropRoleCommand, C context) { - return visitCommand(dropRoleCommand, context); + default R visitAlterSqlBlockRuleCommand(AlterSqlBlockRuleCommand cmd, C context) { + return visitCommand(cmd, context); } - default R visitCreateSqlBlockRuleCommand(CreateSqlBlockRuleCommand dropRoleCommand, C context) { - return visitCommand(dropRoleCommand, context); + default R visitCreateSqlBlockRuleCommand(CreateSqlBlockRuleCommand cmd, C context) { + return visitCommand(cmd, context); + } + + default R visitDropRepositoryCommand(DropRepositoryCommand cmd, C context) { + return visitCommand(cmd, context); } default R visitCreateRoleCommand(CreateRoleCommand createRoleCommand, C context) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/BatchDropInfo.java b/fe/fe-core/src/main/java/org/apache/doris/persist/BatchDropInfo.java index 260ad316d3cc246..8e4e9b9af8e0d90 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/BatchDropInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/BatchDropInfo.java @@ -86,6 +86,10 @@ public Set getIndexIdSet() { return indexIdSet; } + public boolean hasIndexNameMap() { + return indexNameMap != null; + } + public Map getIndexNameMap() { return indexNameMap; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/DropInfo.java b/fe/fe-core/src/main/java/org/apache/doris/persist/DropInfo.java index 69994caf23d5dcd..db0688bd6ad2671 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/DropInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/DropInfo.java @@ -52,7 +52,7 @@ public DropInfo() { public DropInfo(long dbId, long tableId, String tableName, boolean isView, boolean forceDrop, long recycleTime) { - this(dbId, tableId, tableName, -1, "", isView, forceDrop, recycleTime); + this(dbId, tableId, tableName, -1L, "", isView, forceDrop, recycleTime); } public DropInfo(long dbId, long tableId, String tableName, long indexId, String indexName, boolean isView, diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java index 7d1f2127eecaafc..f1377e9daebfc44 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java @@ -347,13 +347,21 @@ public static void loadJournal(Env env, Long logId, JournalEntity journal) { } case OperationType.OP_BATCH_DROP_ROLLUP: { BatchDropInfo batchDropInfo = (BatchDropInfo) journal.getData(); - for (Map.Entry entry : batchDropInfo.getIndexNameMap().entrySet()) { - long indexId = entry.getKey(); - String indexName = entry.getValue(); - DropInfo info = new DropInfo(batchDropInfo.getDbId(), batchDropInfo.getTableId(), - batchDropInfo.getTableName(), indexId, indexName, false, false, 0); - env.getMaterializedViewHandler().replayDropRollup(info, env); - env.getBinlogManager().addDropRollup(info, logId); + if (batchDropInfo.hasIndexNameMap()) { + for (Map.Entry entry : batchDropInfo.getIndexNameMap().entrySet()) { + long indexId = entry.getKey(); + String indexName = entry.getValue(); + DropInfo info = new DropInfo(batchDropInfo.getDbId(), batchDropInfo.getTableId(), + batchDropInfo.getTableName(), indexId, indexName, false, false, 0); + env.getMaterializedViewHandler().replayDropRollup(info, env); + env.getBinlogManager().addDropRollup(info, logId); + } + } else { + for (Long indexId : batchDropInfo.getIndexIdSet()) { + DropInfo info = new DropInfo(batchDropInfo.getDbId(), batchDropInfo.getTableId(), + batchDropInfo.getTableName(), indexId, "", false, false, 0); + env.getMaterializedViewHandler().replayDropRollup(info, env); + } } break; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/scheduler/disruptor/TaskDisruptor.java b/fe/fe-core/src/main/java/org/apache/doris/scheduler/disruptor/TaskDisruptor.java index 345b31d6bc2537c..8144ca22ea22eb5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/scheduler/disruptor/TaskDisruptor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/scheduler/disruptor/TaskDisruptor.java @@ -20,6 +20,7 @@ import org.apache.doris.common.Config; import org.apache.doris.common.CustomThreadFactory; import org.apache.doris.scheduler.constants.TaskType; +import org.apache.doris.scheduler.exception.JobException; import com.lmax.disruptor.EventTranslatorThreeArg; import com.lmax.disruptor.LiteTimeoutBlockingWaitStrategy; @@ -119,15 +120,17 @@ public void tryPublish(Long jobId, Long taskId, TaskType taskType) { * * @param taskId task id */ - public void tryPublishTask(Long taskId) { + public void tryPublishTask(Long taskId) throws JobException { if (isClosed) { log.info("tryPublish failed, disruptor is closed, taskId: {}", taskId); return; } - try { + // We reserve two slots in the ring buffer + // to prevent it from becoming stuck due to competition between producers and consumers. + if (disruptor.getRingBuffer().hasAvailableCapacity(2)) { disruptor.publishEvent(TRANSLATOR, taskId, 0L, TaskType.TRANSIENT_TASK); - } catch (Exception e) { - log.warn("tryPublish failed, taskId: {}", taskId, e); + } else { + throw new JobException("There is not enough available capacity in the RingBuffer."); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/scheduler/manager/TransientTaskManager.java b/fe/fe-core/src/main/java/org/apache/doris/scheduler/manager/TransientTaskManager.java index 7461399c8eb0c5a..de501d3e0c2ffd0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/scheduler/manager/TransientTaskManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/scheduler/manager/TransientTaskManager.java @@ -21,7 +21,6 @@ import org.apache.doris.scheduler.exception.JobException; import org.apache.doris.scheduler.executor.TransientTaskExecutor; -import lombok.Setter; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -41,7 +40,6 @@ public class TransientTaskManager { * disruptor is used to handle task * disruptor will start a thread pool to handle task */ - @Setter private TaskDisruptor disruptor; public TransientTaskManager() { @@ -56,7 +54,7 @@ public TransientTaskExecutor getMemoryTaskExecutor(Long taskId) { return taskExecutorMap.get(taskId); } - public Long addMemoryTask(TransientTaskExecutor executor) { + public Long addMemoryTask(TransientTaskExecutor executor) throws JobException { Long taskId = executor.getId(); taskExecutorMap.put(taskId, executor); disruptor.tryPublishTask(taskId); diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index cf9e8e82ce112f9..1ad8d733ddea070 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -972,6 +972,10 @@ private TColumnDesc getColumnDesc(Column column) { } desc.setChildren(children); } + String defaultValue = column.getDefaultValue(); + if (defaultValue != null) { + desc.setDefaultValue(defaultValue); + } return desc; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java index 94b73ef286475ae..90e709c96c3e5a3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java @@ -125,7 +125,7 @@ public class CreateReplicaTask extends AgentTask { private boolean storeRowColumn; private BinlogConfig binlogConfig; - private List clusterKeyIndexes; + private List clusterKeyUids; private Map objectPool; private List rowStoreColumnUniqueIds; @@ -276,8 +276,8 @@ public void setInvertedIndexFileStorageFormat(TInvertedIndexFileStorageFormat in this.invertedIndexFileStorageFormat = invertedIndexFileStorageFormat; } - public void setClusterKeyIndexes(List clusterKeyIndexes) { - this.clusterKeyIndexes = clusterKeyIndexes; + public void setClusterKeyUids(List clusterKeyUids) { + this.clusterKeyUids = clusterKeyUids; } public TCreateTabletReq toThrift() { @@ -337,10 +337,10 @@ public TCreateTabletReq toThrift() { tSchema.setSequenceColIdx(sequenceCol); tSchema.setVersionColIdx(versionCol); tSchema.setRowStoreColCids(rowStoreColumnUniqueIds); - if (!CollectionUtils.isEmpty(clusterKeyIndexes)) { - tSchema.setClusterKeyIdxes(clusterKeyIndexes); + if (!CollectionUtils.isEmpty(clusterKeyUids)) { + tSchema.setClusterKeyUids(clusterKeyUids); if (LOG.isDebugEnabled()) { - LOG.debug("cluster key index={}, table_id={}, tablet_id={}", clusterKeyIndexes, tableId, tabletId); + LOG.debug("cluster key uids={}, table_id={}, tablet_id={}", clusterKeyUids, tableId, tabletId); } } if (CollectionUtils.isNotEmpty(indexes)) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableWithBloomFilterIndexTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableWithBloomFilterIndexTest.java new file mode 100644 index 000000000000000..54787ee8eefe920 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableWithBloomFilterIndexTest.java @@ -0,0 +1,905 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.DdlException; +import org.apache.doris.common.ExceptionChecker; +import org.apache.doris.utframe.TestWithFeService; + +import org.junit.jupiter.api.Test; + +import java.util.UUID; + +public class CreateTableWithBloomFilterIndexTest extends TestWithFeService { + private static String runningDir = "fe/mocked/CreateTableWithBloomFilterIndexTest/" + + UUID.randomUUID().toString() + "/"; + + @Override + protected void runBeforeAll() throws Exception { + createDatabase("test"); + } + + @Test + public void testCreateTableWithTinyIntBloomFilterIndex() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "TINYINT is not supported in bloom filter index. invalid column: k1", + () -> createTable("CREATE TABLE test.tbl_tinyint_bf (\n" + + "k1 TINYINT, \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithSupportedIntBloomFilterIndex() throws Exception { + // smallint + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_smallint_bf (\n" + + "k1 SMALLINT, \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + + // int + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_int_bf (\n" + + "k1 INT, \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + + // bigint + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_bigint_bf (\n" + + "k1 BIGINT, \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + + // largeint + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_largeint_bf (\n" + + "k1 LARGEINT, \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithFloatBloomFilterIndex() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "FLOAT is not supported in bloom filter index. invalid column: k2", + () -> createTable("CREATE TABLE test.tbl_float_bf (\n" + + "k1 INT, \n" + + "k2 FLOAT, \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k2\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithDoubleBloomFilterIndex() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "DOUBLE is not supported in bloom filter index. invalid column: k2", + () -> createTable("CREATE TABLE test.tbl_double_bf (\n" + + "k1 INT, \n" + + "k2 DOUBLE, \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k2\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithDecimalBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_decimal_bf (\n" + + "k1 DECIMAL(10,2), \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithCharBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_char_bf (\n" + + "k1 CHAR(20), \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithVarcharBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_varchar_bf (\n" + + "k1 VARCHAR(20), \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithTextBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_text_bf (\n" + + "k1 INT, \n" + + "k2 TEXT, \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k2\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithDecimalV3BloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_decimalv3_bf (\n" + + "k1 DECIMALV3(10,2), \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithIPv4BloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_ipv4_bf (\n" + + "k1 IPV4, \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithIPv6BloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_ipv6_bf (\n" + + "k1 IPV6, \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithDateBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_date_bf (\n" + + "k1 DATE, \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithDateTimeBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_datetime_bf (\n" + + "k1 DATETIME, \n" + + "v1 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithCharNgramBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_char_ngram_bf (\n" + + "k1 CHAR(20), \n" + + "v1 INT,\n" + + "INDEX idx_k1_ngram (k1) USING NGRAM_BF PROPERTIES(\"gram_size\"=\"3\", \"bf_size\"=\"1024\")\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithVarcharNgramBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_varchar_ngram_bf (\n" + + "k1 VARCHAR(50), \n" + + "v1 INT,\n" + + "INDEX idx_k1_ngram (k1) USING NGRAM_BF PROPERTIES(\"gram_size\"=\"3\", \"bf_size\"=\"1024\")\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithStringNgramBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_string_ngram_bf (\n" + + "k1 INT, \n" + + "k2 STRING, \n" + + "v1 INT,\n" + + "INDEX idx_k2_ngram (k2) USING NGRAM_BF PROPERTIES(\"gram_size\"=\"3\", \"bf_size\"=\"1024\")\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithArrayNumericBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "ARRAY is not supported in bloom filter index. invalid column: k1", + () -> createTable("CREATE TABLE test.tbl_array_numeric_bf (\n" + + "v1 INT,\n" + + "k1 ARRAY\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(v1)\n" + + "DISTRIBUTED BY HASH(v1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithArrayDateBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "ARRAY is not supported in bloom filter index. invalid column: k1", + () -> createTable("CREATE TABLE test.tbl_array_date_bf (\n" + + "v1 INT,\n" + + "k1 ARRAY\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(v1)\n" + + "DISTRIBUTED BY HASH(v1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithArrayStringNgramBloomFilterIndex() { + ExceptionChecker.expectThrowsWithMsg(AnalysisException.class, + " ARRAY is not supported in ngram_bf index. invalid column: k1", + () -> createTable("CREATE TABLE test.tbl_array_string_ngram_bf (\n" + + "v1 INT,\n" + + "k1 ARRAY,\n" + + "INDEX idx_k1_ngram (k1) USING NGRAM_BF PROPERTIES(\"gram_size\"=\"3\", \"bf_size\"=\"1024\")\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(v1)\n" + + "DISTRIBUTED BY HASH(v1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithMapBloomFilterIndex() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "MAP is not supported in bloom filter index. invalid column: k1", + () -> createTable("CREATE TABLE test.tbl_map_bf (\n" + + "v1 INT,\n" + + "k1 MAP\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(v1)\n" + + "DISTRIBUTED BY HASH(v1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithStructBloomFilterIndex() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "STRUCT is not supported in bloom filter index. invalid column: k1", + () -> createTable("CREATE TABLE test.tbl_struct_bf (\n" + + "v1 INT,\n" + + "k1 STRUCT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(v1)\n" + + "DISTRIBUTED BY HASH(v1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithJsonBloomFilterIndex() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + " JSON is not supported in bloom filter index. invalid column: k1", + () -> createTable("CREATE TABLE test.tbl_json_bf (\n" + + "v1 INT,\n" + + "k1 JSON\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(v1)\n" + + "DISTRIBUTED BY HASH(v1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateTableWithHllBloomFilterIndex() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + " HLL is not supported in bloom filter index. invalid column: k1", + () -> createTable("CREATE TABLE test.tbl_hll_bf (\n" + + "v1 INT,\n" + + "k1 HLL\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(v1)\n" + + "DISTRIBUTED BY HASH(v1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateMowTableWithBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_mow_bf (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "UNIQUE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\",\n" + + "\"bloom_filter_columns\" = \"v1\",\n" + + "\"enable_unique_key_merge_on_write\" = \"true\"\n" + + ");")); + } + + @Test + public void testCreateDuplicateTableWithBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_duplicate_bf (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"v1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testCreateMorTableWithBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_mor_bf (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "UNIQUE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\",\n" + + "\"bloom_filter_columns\" = \"v1\",\n" + + "\"enable_unique_key_merge_on_write\" = \"false\"\n" + + ");")); + } + + @Test + public void testCreateAggTableWithBloomFilterIndex() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_agg_bf (\n" + + "k1 INT, \n" + + "v1 INT SUM\n" + + ") ENGINE=OLAP\n" + + "AGGREGATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnsValidCharacters() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_bf_valid_chars (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1,v1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnsInvalidCharacters() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Bloom filter column does not exist in table. invalid column: k1;v1", + () -> createTable("CREATE TABLE test.tbl_bf_invalid_chars (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1;v1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterFppValidInput() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_bf_fpp_valid (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"v1\",\n" + + "\"bloom_filter_fpp\" = \"0.05\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterFppInvalidInput() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Bloom filter fpp should in [1.0E-4, 0.05]", + () -> createTable("CREATE TABLE test.tbl_bf_fpp_invalid (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"v1\",\n" + + "\"bloom_filter_fpp\" = \"-0.05\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testNgramBloomFilterGramSizeValidInput() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_ngram_gramsize_valid (\n" + + "k1 INT, \n" + + "k2 STRING, \n" + + "v1 INT,\n" + + "INDEX idx_k2_ngram (k2) USING NGRAM_BF PROPERTIES(\"gram_size\"=\"4\")\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testNgramBloomFilterGramSizeInvalidInput() { + ExceptionChecker.expectThrowsWithMsg(AnalysisException.class, + "'gram_size' should be an integer between 1 and 255", + () -> createTable("CREATE TABLE test.tbl_ngram_gramsize_invalid (\n" + + "k1 INT, \n" + + "k2 STRING, \n" + + "v1 INT,\n" + + "INDEX idx_k2_ngram (k2) USING NGRAM_BF PROPERTIES(\"gram_size\"=\"-1\")\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testNgramBloomFilterGramSizeInvalidInput256() { + ExceptionChecker.expectThrowsWithMsg(AnalysisException.class, + "'gram_size' should be an integer between 1 and 255", + () -> createTable("CREATE TABLE test.tbl_ngram_gram_size_invalid (\n" + + "k1 INT, \n" + + "k2 STRING, \n" + + "v1 INT,\n" + + "INDEX idx_k2_ngram (k2) USING NGRAM_BF PROPERTIES(\"gram_size\"=\"256\")\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testNgramBloomFilterBfSizeValidInput() throws Exception { + ExceptionChecker.expectThrowsWithMsg(AnalysisException.class, + "'bf_size' should be an integer between 64 and 65535", + () -> createTable("CREATE TABLE test.tbl_ngram_bfsize_valid (\n" + + "k1 INT, \n" + + "k2 STRING, \n" + + "v1 INT,\n" + + "INDEX idx_k2_ngram (k2) USING NGRAM_BF PROPERTIES(\"bf_size\"=\"256000000\")\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testNgramBloomFilterBfSizeInvalidInput() { + ExceptionChecker.expectThrowsWithMsg(AnalysisException.class, + "'bf_size' should be an integer between 64 and 65535", + () -> createTable("CREATE TABLE test.tbl_ngram_bfsize_invalid (\n" + + "k1 INT, \n" + + "k2 STRING, \n" + + "v1 INT,\n" + + "INDEX idx_k2_ngram (k2) USING NGRAM_BF PROPERTIES(\"bf_size\"=\"-256000000\")\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testNgramBloomFilterBfSizeInvalidInput65536() { + ExceptionChecker.expectThrowsWithMsg(AnalysisException.class, + "'bf_size' should be an integer between 64 and 65535", + () -> createTable("CREATE TABLE test.tbl_ngram_bf_size_invalid (\n" + + "k1 INT, \n" + + "k2 STRING, \n" + + "v1 INT,\n" + + "INDEX idx_k2_ngram (k2) USING NGRAM_BF PROPERTIES(\"bf_size\"=\"65536\")\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnsDuplicated() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Reduplicated bloom filter column: k1", + () -> createTable("CREATE TABLE test.tbl_bf_duplicated_columns (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1,k1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnDoesNotExist() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Bloom filter column does not exist in table. invalid column: k3", + () -> createTable("CREATE TABLE test.tbl_bf_column_not_exist (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k3\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnInvalidType() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "BOOLEAN is not supported in bloom filter index. invalid column: k2", + () -> createTable("CREATE TABLE test.tbl_bf_invalid_type (\n" + + "k1 INT, \n" + + "k2 BOOLEAN,\n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k2\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnNonKeyInAggKeys() throws Exception { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Bloom filter index should only be used in columns of UNIQUE_KEYS/DUP_KEYS table or key columns of AGG_KEYS table. invalid column: v1", + () -> createTable("CREATE TABLE test.tbl_bf_nonkey_in_agg (\n" + + "k1 INT, \n" + + "v1 INT SUM\n" + + ") ENGINE=OLAP\n" + + "AGGREGATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"v1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterFppNotDouble() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Bloom filter fpp is not Double", + () -> createTable("CREATE TABLE test.tbl_bf_fpp_not_double (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"v1\",\n" + + "\"bloom_filter_fpp\" = \"abc\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterFppOutOfRange() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Bloom filter fpp should in [1.0E-4, 0.05]", + () -> createTable("CREATE TABLE test.tbl_bf_fpp_out_of_range (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"v1\",\n" + + "\"bloom_filter_fpp\" = \"0.1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterFppBelowMin() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Bloom filter fpp should in [1.0E-4, 0.05]", + () -> createTable("CREATE TABLE test.tbl_bf_fpp_below_min (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"v1\",\n" + + "\"bloom_filter_fpp\" = \"1e-5\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnsEmptyString() throws Exception { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Unknown properties: {bloom_filter_columns=}", + () -> createTable("CREATE TABLE test.tbl_bf_empty_columns (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnsOnlyCommas() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Unknown properties: {bloom_filter_columns=,,,}", + () -> createTable("CREATE TABLE test.tbl_bf_only_commas (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \",,,\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnsNonExistingColumns() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Bloom filter column does not exist in table. invalid column: k3", + () -> createTable("CREATE TABLE test.tbl_bf_non_existing_columns (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20),\n" + + "k2 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k2,k3\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnsWithSpecialCharacters() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Bloom filter column does not exist in table. invalid column: k1@", + () -> createTable("CREATE TABLE test.tbl_bf_special_chars (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20),\n" + + "k2 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1@,v1#\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnsWithDifferentCase() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_bf_different_case (\n" + + "k1 INT, \n" + + "V1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(K1)\n" + + "DISTRIBUTED BY HASH(K1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"k1,v1\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnsWithSpaces() throws Exception { + ExceptionChecker.expectThrowsNoException(() -> createTable("CREATE TABLE test.tbl_bf_columns_with_spaces (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \" k1 , v1 \",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnsWithLongColumnName() throws Exception { + StringBuilder sb = new StringBuilder("k"); + for (int i = 0; i < 1000; i++) { + sb.append('1'); + } + String longColumnName = sb.toString(); + + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Bloom filter column does not exist in table. invalid column: " + longColumnName, + () -> createTable("CREATE TABLE test.tbl_bf_long_column_name (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20),\n" + + "k2 INT\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"" + longColumnName + "\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnsWithUnicodeCharacters() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Bloom filter column does not exist in table. invalid column: 名字", + () -> createTable("CREATE TABLE test.tbl_bf_unicode_columns (\n" + + "k1 INT, \n" + + "name VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \"名字\",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } + + @Test + public void testBloomFilterColumnsWithNullOrWhitespace() { + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Bloom filter column does not exist in table. invalid column: ", + () -> createTable("CREATE TABLE test.tbl_bf_null_or_whitespace (\n" + + "k1 INT, \n" + + "v1 VARCHAR(20)\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n" + + "PROPERTIES (\n" + + "\"bloom_filter_columns\" = \" , \",\n" + + "\"replication_num\" = \"1\"\n" + + ");")); + } +} diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 41f8727ed1b301d..2c378fe2d458e0a 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -392,7 +392,7 @@ message TabletSchemaPB { repeated string partial_update_input_columns = 21; // deprecated optional bool enable_single_replica_compaction = 22 [default=false]; optional bool skip_write_index_on_load = 23 [default=false]; - repeated int32 cluster_key_idxes = 24; + repeated int32 cluster_key_uids = 24; optional InvertedIndexStorageFormatPB inverted_index_storage_format = 25 [default=V1]; // column unique ids for row store columns repeated int32 row_store_column_unique_ids = 26; @@ -425,7 +425,7 @@ message TabletSchemaCloudPB { reserved 21; // deprecated partial_update_input_columns optional bool enable_single_replica_compaction = 22 [default=false]; optional bool skip_write_index_on_load = 23 [default=false]; - repeated int32 cluster_key_idxes = 24; + repeated int32 cluster_key_uids = 24; optional InvertedIndexStorageFormatPB inverted_index_storage_format = 25 [default=V1]; // column unique ids for row store columns repeated int32 row_store_column_unique_ids = 26; diff --git a/gensrc/thrift/AgentService.thrift b/gensrc/thrift/AgentService.thrift index fdbf4483bf8c4cf..7bb24ba8d4d9da5 100644 --- a/gensrc/thrift/AgentService.thrift +++ b/gensrc/thrift/AgentService.thrift @@ -44,7 +44,7 @@ struct TTabletSchema { 16: optional bool store_row_column = false 17: optional bool enable_single_replica_compaction = false 18: optional bool skip_write_index_on_load = false - 19: optional list cluster_key_idxes + 19: optional list cluster_key_uids // col unique id for row store column 20: optional list row_store_col_cids 21: optional i64 row_store_page_size = 16384 diff --git a/gensrc/thrift/FrontendService.thrift b/gensrc/thrift/FrontendService.thrift index 246cc9995629343..916885028ad4a0d 100644 --- a/gensrc/thrift/FrontendService.thrift +++ b/gensrc/thrift/FrontendService.thrift @@ -53,6 +53,7 @@ struct TColumnDesc { 6: optional bool isAllowNull 7: optional string columnKey 8: optional list children + 9: optional string defaultValue } // A column definition; used by CREATE TABLE and DESCRIBE statements. A column diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index 77d9c0ef672feb8..bd8c43622d1f608 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -353,6 +353,10 @@ struct TMaxComputeFileDesc { 1: optional string partition_spec // deprecated 2: optional string session_id 3: optional string table_batch_read_session + // for mc network configuration + 4: optional i32 connect_timeout + 5: optional i32 read_timeout + 6: optional i32 retry_times } struct THudiFileDesc { diff --git a/regression-test/data/account_p0/test_information_schema.out b/regression-test/data/account_p0/test_information_schema.out index 77d5f6dccd50e85..e4f0ed09d0e1c74 100644 --- a/regression-test/data/account_p0/test_information_schema.out +++ b/regression-test/data/account_p0/test_information_schema.out @@ -17,3 +17,10 @@ -- !sql -- DUP +-- !default -- +id largeint YES \N +name varchar(20) YES 无 +age smallint(6) YES 0 +address varchar(100) YES beijing +date datetime YES 20240101 + diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out b/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out new file mode 100644 index 000000000000000..fd3eafa02557220 --- /dev/null +++ b/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out @@ -0,0 +1,357 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !one_partition_1_1 -- +1 Alice 2024 +2 Bob 2024 +3 Charlie 2024 + +-- !one_partition_2_1 -- +4 David 2025 +5 Eva 2025 + +-- !one_partition_3_all -- +1 Alice 2024 +2 Bob 2024 +3 Charlie 2024 +4 David 2025 +5 Eva 2025 + +-- !one_partition_4_all -- +5 Eva 2025 + +-- !one_partition_5_1 -- +3 Charlie 2024 + +-- !two_partition_1_1 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 + +-- !two_partition_2_1 -- +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !two_partition_3_2 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 + +-- !two_partition_4_all -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 +6 Frank EU 1 +7 Grace EU 1 +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !two_partition_5_1 -- + +-- !two_partition_6_1 -- +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !three_partition_1_1 -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 + +-- !three_partition_2_1 -- +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 + +-- !three_partition_3_3 -- +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_4_2 -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 +6 Frank US 2025 Q1 + +-- !three_partition_5_all -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 +4 David US 2024 Q2 +5 Eva US 2024 Q2 +6 Frank US 2025 Q1 +7 Grace US 2025 Q2 +8 Hannah EU 2024 Q1 +9 Ivy EU 2024 Q1 +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 +12 Mia EU 2025 Q3 +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_6_1 -- +8 Hannah EU 2024 Q1 +9 Ivy EU 2024 Q1 + +-- !three_partition_7_7 -- +6 Frank US 2025 Q1 +7 Grace US 2025 Q2 +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 +12 Mia EU 2025 Q3 +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_8_2 -- +7 Grace US 2025 Q2 + +-- !one_partition_6_0 -- + +-- !two_partition_7_0 -- + +-- !two_partition_8_0 -- + +-- !three_partition_9_0 -- + +-- !three_partition_10_0 -- + +-- !three_partition_11_0 -- + +-- !time_travel_two_partition_1_3 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 +6 Frank EU 1 + +-- !time_travel_two_partition_2_2 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 + +-- !time_travel_two_partition_3_1 -- +4 David US 2 +5 Eva US 2 + +-- !time_travel_two_partition_4_0 -- + +-- !time_travel_two_partition_5_0 -- + +-- !time_travel_two_partition_6_1 -- +1 Alice US 1 + +-- !one_partition_boolean -- +1 Alice true +2 Bob true + +-- !one_partition_tinyint -- +1 Alice 1 +2 Bob 1 + +-- !one_partition_smallint -- +1 Alice 10 +2 Bob 10 + +-- !one_partition_int -- +1 Alice 100 +2 Bob 100 + +-- !one_partition_bigint -- +1 Alice 1234567890 +2 Bob 1234567890 + +-- !one_partition_string -- +1 Alice RegionA +2 Bob RegionA + +-- !one_partition_date -- +1 Alice 2023-12-01 +2 Bob 2023-12-01 + +-- !one_partition_1_1 -- +1 Alice 2024 +2 Bob 2024 +3 Charlie 2024 + +-- !one_partition_2_1 -- +4 David 2025 +5 Eva 2025 + +-- !one_partition_3_all -- +1 Alice 2024 +2 Bob 2024 +3 Charlie 2024 +4 David 2025 +5 Eva 2025 + +-- !one_partition_4_all -- +5 Eva 2025 + +-- !one_partition_5_1 -- +3 Charlie 2024 + +-- !two_partition_1_1 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 + +-- !two_partition_2_1 -- +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !two_partition_3_2 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 + +-- !two_partition_4_all -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 +6 Frank EU 1 +7 Grace EU 1 +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !two_partition_5_1 -- + +-- !two_partition_6_1 -- +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !three_partition_1_1 -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 + +-- !three_partition_2_1 -- +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 + +-- !three_partition_3_3 -- +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_4_2 -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 +6 Frank US 2025 Q1 + +-- !three_partition_5_all -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 +4 David US 2024 Q2 +5 Eva US 2024 Q2 +6 Frank US 2025 Q1 +7 Grace US 2025 Q2 +8 Hannah EU 2024 Q1 +9 Ivy EU 2024 Q1 +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 +12 Mia EU 2025 Q3 +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_6_1 -- +8 Hannah EU 2024 Q1 +9 Ivy EU 2024 Q1 + +-- !three_partition_7_7 -- +6 Frank US 2025 Q1 +7 Grace US 2025 Q2 +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 +12 Mia EU 2025 Q3 +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_8_2 -- +7 Grace US 2025 Q2 + +-- !one_partition_6_0 -- + +-- !two_partition_7_0 -- + +-- !two_partition_8_0 -- + +-- !three_partition_9_0 -- + +-- !three_partition_10_0 -- + +-- !three_partition_11_0 -- + +-- !time_travel_two_partition_1_3 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 +6 Frank EU 1 + +-- !time_travel_two_partition_2_2 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 + +-- !time_travel_two_partition_3_1 -- +4 David US 2 +5 Eva US 2 + +-- !time_travel_two_partition_4_0 -- + +-- !time_travel_two_partition_5_0 -- + +-- !time_travel_two_partition_6_1 -- +1 Alice US 1 + +-- !one_partition_boolean -- +1 Alice true +2 Bob true + +-- !one_partition_tinyint -- +1 Alice 1 +2 Bob 1 + +-- !one_partition_smallint -- +1 Alice 10 +2 Bob 10 + +-- !one_partition_int -- +1 Alice 100 +2 Bob 100 + +-- !one_partition_bigint -- +1 Alice 1234567890 +2 Bob 1234567890 + +-- !one_partition_string -- +1 Alice RegionA +2 Bob RegionA + +-- !one_partition_date -- +1 Alice 2023-12-01 +2 Bob 2023-12-01 + diff --git a/regression-test/data/javaudf_p0/test_javaudf_const_test.out b/regression-test/data/javaudf_p0/test_javaudf_const_test.out new file mode 100644 index 000000000000000..984cd976b3ec408 --- /dev/null +++ b/regression-test/data/javaudf_p0/test_javaudf_const_test.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !java_udf_all_types -- +1 true 2 3 4.4000 5.5 6.6 2020-02-02 2022-02-03T10:10:10 asd ["a", "b"] {"s_id":2, "s_name":"sa2"} {"aa":"bb"} +2 true 2 3 4.4000 5.5 6.6 2020-02-02 2022-02-03T10:10:10 asd ["a", "b"] {"s_id":2, "s_name":"sa2"} {"aa":"bb"} +3 true 2 3 4.4000 5.5 6.6 2020-02-02 2022-02-03T10:10:10 asd ["a", "b"] {"s_id":2, "s_name":"sa2"} {"aa":"bb"} +4 true 2 3 4.4000 5.5 6.6 2020-02-02 2022-02-03T10:10:10 asd ["a", "b"] {"s_id":2, "s_name":"sa2"} {"aa":"bb"} +5 true 2 3 4.4000 5.5 6.6 2020-02-02 2022-02-03T10:10:10 asd ["a", "b"] {"s_id":2, "s_name":"sa2"} {"aa":"bb"} +6 true 2 3 4.4000 5.5 6.6 2020-02-02 2022-02-03T10:10:10 asd ["a", "b"] {"s_id":2, "s_name":"sa2"} {"aa":"bb"} +7 true 2 3 4.4000 5.5 6.6 2020-02-02 2022-02-03T10:10:10 asd ["a", "b"] {"s_id":2, "s_name":"sa2"} {"aa":"bb"} +8 true 2 3 4.4000 5.5 6.6 2020-02-02 2022-02-03T10:10:10 asd ["a", "b"] {"s_id":2, "s_name":"sa2"} {"aa":"bb"} +9 true 2 3 4.4000 5.5 6.6 2020-02-02 2022-02-03T10:10:10 asd ["a", "b"] {"s_id":2, "s_name":"sa2"} {"aa":"bb"} +10 true 2 3 4.4000 5.5 6.6 2020-02-02 2022-02-03T10:10:10 asd ["a", "b"] {"s_id":2, "s_name":"sa2"} {"aa":"bb"} + diff --git a/regression-test/data/nereids_rules_p0/mv/agg_without_roll_up/aggregate_without_roll_up.out b/regression-test/data/nereids_rules_p0/mv/agg_without_roll_up/aggregate_without_roll_up.out index 5c9df6b7f92256a..c400e078daff1a3 100644 --- a/regression-test/data/nereids_rules_p0/mv/agg_without_roll_up/aggregate_without_roll_up.out +++ b/regression-test/data/nereids_rules_p0/mv/agg_without_roll_up/aggregate_without_roll_up.out @@ -323,3 +323,27 @@ c 3 6 c,c,c 5.333333333333333 mi 3 2 1 2023-12-09 1 yy 2 2 2 4 3 \N 2 3 \N \N 8 8 1 1 2023-12-09 1 yy 2 2 2 4 3 \N 2 3 1 2 8 8 1 +-- !query29_0_before -- +1 2023-12-09 1 yy 2 2 2 4 3 \N 2 3 \N \N 8 8 1 +1 2023-12-09 1 yy 2 2 2 4 3 \N 2 3 1 2 8 8 1 + +-- !query29_0_after -- +1 2023-12-09 1 yy 2 2 2 4 3 \N 2 3 \N \N 8 8 1 +1 2023-12-09 1 yy 2 2 2 4 3 \N 2 3 1 2 8 8 1 + +-- !query30_0_before -- +1 2023-12-09 1 yy 2 2 2 4 3 \N 2 3 \N \N 8 8 1 +1 2023-12-09 1 yy 2 2 2 4 3 \N 2 3 1 2 8 8 1 + +-- !query30_0_after -- +1 2023-12-09 1 yy 2 2 2 4 3 \N 2 3 \N \N 8 8 1 +1 2023-12-09 1 yy 2 2 2 4 3 \N 2 3 1 2 8 8 1 + +-- !query31_0_before -- +1 2023-12-09 1 yy 2 2 2 4 3 \N 2 3 \N \N 8 8 1 +1 2023-12-09 1 yy 2 2 2 4 3 \N 2 3 1 2 8 8 1 + +-- !query31_0_after -- +1 2023-12-09 1 yy 2 2 2 4 3 \N 2 3 \N \N 8 8 1 +1 2023-12-09 1 yy 2 2 2 4 3 \N 2 3 1 2 8 8 1 + diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy index aca1596f8637695..246558eaf06d07e 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy @@ -1032,6 +1032,8 @@ class Config { List excludeCases = ["test_table_properties", "test_create_table" , "test_default_hll", "test_default_pi", "test_default_bitmap_empty" , "test_full_compaction", "test_full_compaction_by_table_id" + // schema change + , "test_alter_muti_modify_column" // partial update , "txn_insert", "test_update_schema_change", "test_generated_column_update", "test_nested_type_with_rowstore", "test_partial_update_generated_column", "nereids_partial_update_native_insert_stmt" , "partial_update", "nereids_update_on_current_timestamp", "update_on_current_timestamp", "nereids_delete_mow_partial_update", "delete_mow_partial_update", "test_unique_table_auto_inc" diff --git a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/UDFConstTest.java b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/UDFConstTest.java new file mode 100644 index 000000000000000..86370356998fed4 --- /dev/null +++ b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/UDFConstTest.java @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.udf; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.HashMap; + +public class UDFConstTest { + public static class ConstBoolean { + public Boolean evaluate(Integer i, Boolean value) { + return value; + } + } + + public static class ConstInt { + public Integer evaluate(Integer i, Integer value) { + return value; + } + } + + public static class ConstFloat { + public Float evaluate(Integer i, Float value) { + return value; + } + } + + public static class ConstDouble { + public Double evaluate(Integer i, Double value) { + return value; + } + } + + public static class ConstLargeInt { + public BigInteger evaluate(Integer i, BigInteger value) { + return value; + } + } + + public static class ConstDecimal { + public BigDecimal evaluate(Integer i, BigDecimal value) { + return value; + } + } + + public static class ConstDate { + public LocalDate evaluate(Integer i, LocalDate value) { + return value; + } + } + + public static class ConstDateTime { + public LocalDateTime evaluate(Integer i, LocalDateTime value) { + return value; + } + } + + public static class ConstString { + public String evaluate(Integer i, String value) { + return value; + } + } + + public static class ConstArray { + public ArrayList evaluate(Integer i, ArrayList value) { + return value; + } + } + + public static class ConstMap { + public HashMap evaluate(Integer i, HashMap value) { + return value; + } + } + + public static class ConstStruct { + public ArrayList evaluate(Integer i, ArrayList value) { + return value; + } + } +} diff --git a/regression-test/suites/account_p0/test_information_schema.groovy b/regression-test/suites/account_p0/test_information_schema.groovy index dcbc0c3532838e0..fc94ad4b9bc2fa7 100644 --- a/regression-test/suites/account_p0/test_information_schema.groovy +++ b/regression-test/suites/account_p0/test_information_schema.groovy @@ -87,4 +87,26 @@ suite("test_information_schema") { def dbName = dbPrefix + i.toString() sql "DROP DATABASE `${dbName}`" } + + def dbName = dbPrefix + "default" + def tableName = tablePrefix + "default" + sql "CREATE DATABASE IF NOT EXISTS `${dbName}`" + sql "USE `${dbName}`" + sql """drop table if exists `${tableName}`""" + sql """ + CREATE TABLE `${tableName}` ( + `id` largeint NULL COMMENT '用户ID', + `name` varchar(20) NULL DEFAULT "无" COMMENT '用户姓名', + `age` smallint NULL DEFAULT "0" COMMENT '用户年龄', + `address` varchar(100) NULL DEFAULT "beijing" COMMENT '用户所在地区', + `date` datetime NULL DEFAULT "20240101" COMMENT '数据导入时间' + ) ENGINE=OLAP + DUPLICATE KEY(`id`, `name`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1") + """ + qt_default "SELECT COLUMN_NAME as field,COLUMN_TYPE as type,IS_NULLABLE as isNullable, COLUMN_DEFAULT as defaultValue FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '${tableName}' AND TABLE_SCHEMA = '${dbName}'" + sql "DROP DATABASE `${dbName}`" } + diff --git a/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy b/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy new file mode 100644 index 000000000000000..eea17b01711b448 --- /dev/null +++ b/regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy @@ -0,0 +1,333 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_partition_prune", "p2,external,hudi,external_remote,external_remote_hudi") { + String enabled = context.config.otherConfigs.get("enableExternalHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + } + + String catalog_name = "test_hudi_partition_prune" + String props = context.config.otherConfigs.get("hudiEmrCatalog") + sql """drop catalog if exists ${catalog_name};""" + + for (String use_hive_sync_partition : ['true','false']) { + + sql """ + create catalog if not exists ${catalog_name} properties ( + ${props} + ,"use_hive_sync_partition"="${use_hive_sync_partition}" + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use regression_hudi;""" + sql """ set enable_fallback_to_original_planner=false """ + + + + def one_partition_1_1 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2024 ORDER BY id;""" + def one_partition_2_1 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2025 ORDER BY id;""" + def one_partition_3_all = """SELECT id,name,part1 FROM one_partition_tb ORDER BY id;""" + def one_partition_4_all = """SELECT id,name,part1 FROM one_partition_tb WHERE id = 5 ORDER BY id;""" + def one_partition_5_1 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2024 AND id >= 3 ORDER BY id;""" + + def two_partition_1_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 1 ORDER BY id;""" + def two_partition_2_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;""" + def two_partition_3_2 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' ORDER BY id;""" + def two_partition_4_all = """SELECT id,name,part1,part2 FROM two_partition_tb ORDER BY id;""" + def two_partition_5_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 2 AND id > 5 ORDER BY id;""" + def two_partition_6_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;""" + + def three_partition_1_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;""" + def three_partition_2_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2025 AND part3 = 'Q2' ORDER BY id;""" + def three_partition_3_3 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 ORDER BY id;""" + def three_partition_4_2 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q1' ORDER BY id;""" + def three_partition_5_all = """SELECT id,name,part1,part2,part3 FROM three_partition_tb ORDER BY id;""" + def three_partition_6_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;""" + def three_partition_7_7 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part2 = 2025 ORDER BY id;""" + def three_partition_8_2 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q2' AND id BETWEEN 6 AND 10 ORDER BY id;""" + + def one_partition_boolean = """SELECT id,name,part1 FROM boolean_partition_tb WHERE part1 = true ORDER BY id;""" + def one_partition_tinyint = """SELECT id,name,part1 FROM tinyint_partition_tb WHERE part1 = 1 ORDER BY id;""" + def one_partition_smallint = """SELECT id,name,part1 FROM smallint_partition_tb WHERE part1 = 10 ORDER BY id;""" + def one_partition_int = """SELECT id,name,part1 FROM int_partition_tb WHERE part1 = 100 ORDER BY id;""" + def one_partition_bigint = """SELECT id,name,part1 FROM bigint_partition_tb WHERE part1 = 1234567890 ORDER BY id;""" + def one_partition_string = """SELECT id,name,part1 FROM string_partition_tb WHERE part1 = 'RegionA' ORDER BY id;""" + def one_partition_date = """SELECT id,name,part1 FROM date_partition_tb WHERE part1 = '2023-12-01' ORDER BY id;""" + def one_partition_timestamp = """SELECT id,name,part1 FROM timestamp_partition_tb WHERE part1 = '2023-12-01 08:00:00' ORDER BY id;""" + + + + qt_one_partition_1_1 one_partition_1_1 + explain { + sql("${one_partition_1_1}") + contains "partition=1/2" + } + + qt_one_partition_2_1 one_partition_2_1 + explain { + sql("${one_partition_2_1}") + contains "partition=1/2" + } + + qt_one_partition_3_all one_partition_3_all + explain { + sql("${one_partition_3_all}") + contains "partition=2/2" + } + + qt_one_partition_4_all one_partition_4_all + explain { + sql("${one_partition_4_all}") + contains "partition=2/2" + } + + qt_one_partition_5_1 one_partition_5_1 + explain { + sql("${one_partition_5_1}") + contains "partition=1/2" + } + + + qt_two_partition_1_1 two_partition_1_1 + explain { + sql("${two_partition_1_1}") + contains "partition=1/4" + } + + qt_two_partition_2_1 two_partition_2_1 + explain { + sql("${two_partition_2_1}") + contains "partition=1/4" + } + + qt_two_partition_3_2 two_partition_3_2 + explain { + sql("${two_partition_3_2}") + contains "partition=2/4" + } + + qt_two_partition_4_all two_partition_4_all + explain { + sql("${two_partition_4_all}") + contains "partition=4/4" + } + + qt_two_partition_5_1 two_partition_5_1 + explain { + sql("${two_partition_5_1}") + contains "partition=1/4" + } + + qt_two_partition_6_1 two_partition_6_1 + explain { + sql("${two_partition_6_1}") + contains "partition=1/4" + } + + + + qt_three_partition_1_1 three_partition_1_1 + explain { + sql("${three_partition_1_1}") + contains "partition=1/10" + } + + qt_three_partition_2_1 three_partition_2_1 + explain { + sql("${three_partition_2_1}") + contains "partition=1/10" + } + + qt_three_partition_3_3 three_partition_3_3 + explain { + sql("${three_partition_3_3}") + contains "partition=3/10" + } + + qt_three_partition_4_2 three_partition_4_2 + explain { + sql("${three_partition_4_2}") + contains "partition=2/10" + } + + qt_three_partition_5_all three_partition_5_all + explain { + sql("${three_partition_5_all}") + contains "partition=10/10" + } + + qt_three_partition_6_1 three_partition_6_1 + explain { + sql("${three_partition_6_1}") + contains "partition=1/10" + } + + qt_three_partition_7_7 three_partition_7_7 + explain { + sql("${three_partition_7_7}") + contains "partition=7/10" + } + + qt_three_partition_8_2 three_partition_8_2 + explain { + sql("${three_partition_8_2}") + contains "partition=2/10" + } + + + // 0 partitions + def one_partition_6_0 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2023 ORDER BY id;""" + qt_one_partition_6_0 one_partition_6_0 + explain { + sql("${one_partition_6_0}") + contains "partition=0/2" + } + + def two_partition_7_0 = """SELECT id,name,part1 FROM two_partition_tb WHERE part1 = 'CN' AND part2 = 1 ORDER BY id;""" + qt_two_partition_7_0 two_partition_7_0 + explain { + sql("${two_partition_7_0}") + contains "partition=0/4" + } + + def two_partition_8_0 = """SELECT id,name,part1 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 3 ORDER BY id;""" + qt_two_partition_8_0 two_partition_8_0 + explain { + sql("${two_partition_8_0}") + contains "partition=0/4" + } + + def three_partition_9_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2023 AND part3 = 'Q1' ORDER BY id;""" + qt_three_partition_9_0 three_partition_9_0 + explain { + sql("${three_partition_9_0}") + contains "partition=0/10" + } + + def three_partition_10_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q4' ORDER BY id;""" + qt_three_partition_10_0 three_partition_10_0 + explain { + sql("${three_partition_10_0}") + contains "partition=0/10" + } + + def three_partition_11_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 AND part3 = 'Q4' ORDER BY id;""" + qt_three_partition_11_0 three_partition_11_0 + explain { + sql("${three_partition_11_0}") + contains "partition=0/10" + } + + + //time travel + def time_travel_two_partition_1_3 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241202171226401' order by id;" + def time_travel_two_partition_2_2 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241202171226401' where part1='US' order by id;" + def time_travel_two_partition_3_1 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241202171226401' where part2=2 order by id;" + def time_travel_two_partition_4_0 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241202171226401' where part2=10 order by id;" + + qt_time_travel_two_partition_1_3 time_travel_two_partition_1_3 + explain { + sql("${time_travel_two_partition_1_3}") + contains "partition=3/3" + } + + + qt_time_travel_two_partition_2_2 time_travel_two_partition_2_2 + explain { + sql("${time_travel_two_partition_2_2}") + contains "partition=2/3" + } + + qt_time_travel_two_partition_3_1 time_travel_two_partition_3_1 + explain { + sql("${time_travel_two_partition_3_1}") + contains "partition=1/3" + } + + qt_time_travel_two_partition_4_0 time_travel_two_partition_4_0 + explain { + sql("${time_travel_two_partition_4_0}") + contains "partition=0/3" + } + + + + + def time_travel_two_partition_5_0 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20231126012025218' order by id;" + qt_time_travel_two_partition_5_0 time_travel_two_partition_5_0 + explain { + sql("${time_travel_two_partition_5_0}") + contains "partition=0/0" + } + + def time_travel_two_partition_6_1 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241202171214902' order by id;" + qt_time_travel_two_partition_6_1 time_travel_two_partition_6_1 + explain { + sql("${time_travel_two_partition_6_1}") + contains "partition=1/1" + } + + // all types as partition + qt_one_partition_boolean one_partition_boolean + explain { + sql("${one_partition_boolean}") + contains "partition=1/2" + } + qt_one_partition_tinyint one_partition_tinyint + explain { + sql("${one_partition_tinyint}") + contains "partition=1/2" + } + qt_one_partition_smallint one_partition_smallint + explain { + sql("${one_partition_smallint}") + contains "partition=1/2" + } + qt_one_partition_int one_partition_int + explain { + sql("${one_partition_int}") + contains "partition=1/2" + } + qt_one_partition_bigint one_partition_bigint + explain { + sql("${one_partition_bigint}") + contains "partition=1/2" + } + qt_one_partition_string one_partition_string + explain { + sql("${one_partition_string}") + contains "partition=1/2" + } + qt_one_partition_date one_partition_date + explain { + sql("${one_partition_date}") + contains "partition=1/2" + } + // qt_one_partition_timestamp one_partition_timestamp + // explain { + // sql("${one_partition_timestamp}") + // contains "partition=1/2" + // } + + sql """drop catalog if exists ${catalog_name};""" + + + } + +} \ No newline at end of file diff --git a/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy b/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy index 3f0929b59ea96ae..81133270fb6e609 100644 --- a/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy +++ b/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy @@ -388,7 +388,10 @@ suite("test_external_catalog_maxcompute", "p2,external,maxcompute,external_remot order_qt_multi_partition_q6 """ select max(pt), yy, mm from multi_partitions where yy = '2023' and mm='08' group by yy, mm order by yy, mm; """ order_qt_multi_partition_q7 """ select count(*) from multi_partitions where yy < '2023' or dd < '03'; """ order_qt_multi_partition_q8 """ select count(*) from multi_partitions where pt>=3; """ - order_qt_multi_partition_q9 """ select city,mnt,gender,finished_time,order_rate,cut_date,create_time,pt, yy, mm, dd from multi_partitions where pt >= 2 and pt < 4 and finished_time is not null; """ + + //`finished_time is not null` => com.aliyun.odps.OdpsException: ODPS-0010000:System internal error - fuxi job failed, caused by: timestamp_ntz + // order_qt_multi_partition_q9 """ select city,mnt,gender,finished_time,order_rate,cut_date,create_time,pt, yy, mm, dd from multi_partitions where pt >= 2 and pt < 4 and finished_time is not null; """ + order_qt_multi_partition_q10 """ select pt, yy, mm, dd from multi_partitions where pt >= 2 and create_time > '2023-08-03 03:11:00' order by pt, yy, mm, dd; """ diff --git a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy index e2ab9b9c117f1c6..cce6ed9fd9d014f 100644 --- a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy +++ b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy @@ -104,6 +104,10 @@ suite("test_ngram_bloomfilter_index") { """ test { sql """ALTER TABLE ${tableName3} ADD INDEX idx_http_url(http_url) USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="65536") COMMENT 'http_url ngram_bf index'""" - exception "bf_size should be integer and between 64 and 65535" + exception "'bf_size' should be an integer between 64 and 65535" + } + test { + sql """ALTER TABLE ${tableName3} ADD INDEX idx_http_url(http_url) USING NGRAM_BF PROPERTIES("gram_size"="256", "bf_size"="65535") COMMENT 'http_url ngram_bf index'""" + exception "'gram_size' should be an integer between 1 and 255" } } diff --git a/regression-test/suites/javaudf_p0/test_javaudf_const_test.groovy b/regression-test/suites/javaudf_p0/test_javaudf_const_test.groovy new file mode 100644 index 000000000000000..31323bb46ed1009 --- /dev/null +++ b/regression-test/suites/javaudf_p0/test_javaudf_const_test.groovy @@ -0,0 +1,185 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.nio.file.Paths + +suite("test_javaudf_const_test") { + def tableName = "test_javaudf_const_test" + def jarPath = """${context.file.parent}/jars/java-udf-case-jar-with-dependencies.jar""" + // scp_udf_file_to_all_be(jarPath) + + log.info("Jar path: ${jarPath}".toString()) + try { + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE IF NOT EXISTS test_javaudf_const_test ( + int_col int, + boolean_col boolean, + tinyint_col tinyint, + smallint_col smallint, + bigint_col bigint, + largeint_col largeint, + decimal_col decimal(15, 4), + float_col float, + double_col double, + date_col date, + datetime_col datetime(6), + string_col string, + array_col array, + struct_col STRUCT, + map_col map + ) + DISTRIBUTED BY HASH(int_col) PROPERTIES("replication_num" = "1"); + """ + StringBuilder sb = new StringBuilder() + int i = 1 + for (; i < 10; i++) { + sb.append(""" + (${i},${i%2},${i},${i}*2,${i}*3,${i}*4,${3.33/i},${(7.77/i).round(3)},${(3.1415/i).round(5)},"2023-10-${i+17}","2023-10-${i+10} 10:1${i}:11.234","row${i}",array(null, "nested${i}"),struct(${i}, "sa${i}"), {"k${i}":null,"k${i+1}":${i}}), + """) + log.info("${sb.toString()}"); + } + sb.append(""" + (${i},${i%2},null,${i}*2,${i}*3,${i}*4,null,${(7.77/i).round(3)},${(3.1415/i).round(5)},null,"2023-10-${i+10} 10:${i}:11.234",null,array(null, "nested${i}"),struct(${i}, "saaaaaa"),{"k${i}":null,"k${i+1}":${i}}) + """) + sql """ INSERT INTO ${tableName} VALUES + ${sb.toString()} + """ + File path = new File(jarPath) + if (!path.exists()) { + throw new IllegalStateException("""${jarPath} doesn't exist! """) + } + + sql """DROP FUNCTION IF EXISTS const_boolean(int,boolean);""" + sql """CREATE FUNCTION const_boolean(int,boolean) RETURNS boolean PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.UDFConstTest\$ConstBoolean", + "type"="JAVA_UDF" + );""" + + sql """DROP FUNCTION IF EXISTS const_int(int,int);""" + sql """CREATE FUNCTION const_int(int,int) RETURNS int PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.UDFConstTest\$ConstInt", + "type"="JAVA_UDF" + );""" + + sql """DROP FUNCTION IF EXISTS const_largeint(int,largeint);""" + sql """CREATE FUNCTION const_largeint(int,largeint) RETURNS largeint PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.UDFConstTest\$ConstLargeInt", + "type"="JAVA_UDF" + );""" + + sql """DROP FUNCTION IF EXISTS const_decimal(int,decimal(15, 4));""" + sql """CREATE FUNCTION const_decimal(int,decimal(15, 4)) RETURNS decimal(15, 4) PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.UDFConstTest\$ConstDecimal", + "type"="JAVA_UDF" + );""" + + sql """DROP FUNCTION IF EXISTS const_float(int,float);""" + sql """CREATE FUNCTION const_float(int,float) RETURNS float PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.UDFConstTest\$ConstFloat", + "type"="JAVA_UDF" + );""" + + sql """DROP FUNCTION IF EXISTS const_double(int,double);""" + sql """CREATE FUNCTION const_double(int,double) RETURNS double PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.UDFConstTest\$ConstDouble", + "type"="JAVA_UDF" + );""" + + sql """DROP FUNCTION IF EXISTS const_date(int,date);""" + sql """CREATE FUNCTION const_date(int,date) RETURNS date PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.UDFConstTest\$ConstDate", + "type"="JAVA_UDF" + );""" + + sql """DROP FUNCTION IF EXISTS const_datetime(int,datetime(6));""" + sql """CREATE FUNCTION const_datetime(int,datetime(6)) RETURNS datetime(6) PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.UDFConstTest\$ConstDateTime", + "type"="JAVA_UDF" + );""" + + sql """DROP FUNCTION IF EXISTS const_string(int,string);""" + sql """CREATE FUNCTION const_string(int,string) RETURNS string PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.UDFConstTest\$ConstString", + "type"="JAVA_UDF" + );""" + + sql """DROP FUNCTION IF EXISTS const_array(int,array);""" + sql """CREATE FUNCTION const_array(int,array) RETURNS array PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.UDFConstTest\$ConstArray", + "type"="JAVA_UDF" + );""" + + sql """DROP FUNCTION IF EXISTS const_struct(int,STRUCT);""" + sql """CREATE FUNCTION const_struct(int,STRUCT) RETURNS STRUCT PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.UDFConstTest\$ConstStruct", + "type"="JAVA_UDF" + );""" + + sql """DROP FUNCTION IF EXISTS const_map(int,map);""" + sql """CREATE FUNCTION const_map(int,map) RETURNS map PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.UDFConstTest\$ConstMap", + "type"="JAVA_UDF" + );""" + + qt_java_udf_all_types """select + int_col, + const_boolean(int_col,true), + const_int(int_col,2), + const_largeint(int_col,3), + const_decimal(int_col,4.4), + const_float(int_col,5.5), + const_double(int_col,6.6), + const_date(int_col,'2020-02-02'), + const_datetime(int_col,'2022-02-03 10:10:10'), + const_string(int_col,'asd'), + const_array(int_col,['a','b']), + const_struct(int_col,struct(2, 'sa2')), + const_map(int_col,{"aa":"bb"}) + from ${tableName} order by int_col;""" + } finally { + try_sql """DROP FUNCTION IF EXISTS const_boolean(int,boolean);""" + try_sql """DROP FUNCTION IF EXISTS const_int(int,int);""" + try_sql """DROP FUNCTION IF EXISTS const_largeint(int,largeint);""" + try_sql """DROP FUNCTION IF EXISTS const_decimal(int,decimal(15, 4));""" + try_sql """DROP FUNCTION IF EXISTS const_float(int,float);""" + try_sql """DROP FUNCTION IF EXISTS const_double(int,double);""" + try_sql """DROP FUNCTION IF EXISTS const_date(int,date);""" + try_sql """DROP FUNCTION IF EXISTS const_datetime(int,datetime(6));""" + try_sql """DROP FUNCTION IF EXISTS const_string(int,string);""" + try_sql """DROP FUNCTION IF EXISTS const_array(int,array);""" + try_sql """DROP FUNCTION IF EXISTS const_struct(int,STRUCT);""" + try_sql """DROP FUNCTION IF EXISTS const_map(int,map);""" + try_sql("""DROP TABLE IF EXISTS ${tableName};""") + } +} diff --git a/regression-test/suites/job_p0/test_base_insert_job.groovy b/regression-test/suites/job_p0/test_base_insert_job.groovy index 8a0bb34ca43fd59..fc97e367483b866 100644 --- a/regression-test/suites/job_p0/test_base_insert_job.groovy +++ b/regression-test/suites/job_p0/test_base_insert_job.groovy @@ -76,6 +76,7 @@ suite("test_base_insert_job") { insert into ${tableName} values ('2023-03-18', 1, 1) """ + // create recurring job sql """ CREATE JOB ${jobName} ON SCHEDULE every 1 second comment 'test' DO INSERT INTO ${tableName} (`timestamp`, `type`, `user_id`) WITH @@ -97,23 +98,25 @@ suite("test_base_insert_job") { """ Awaitility.await().atMost(30, SECONDS).until( { - def onceJob = sql """ select SucceedTaskCount from jobs("type"="insert") where Name like '%${jobName}%' and ExecuteType='RECURRING' """ - println(onceJob) - onceJob.size() == 1 && '1' <= onceJob.get(0).get(0) + def jobSuccendCount = sql """ select SucceedTaskCount from jobs("type"="insert") where Name like '%${jobName}%' and ExecuteType='RECURRING' """ + // check job status and succeed task count larger than 1 + jobSuccendCount.size() == 1 && '1' <= jobSuccendCount.get(0).get(0) } ) sql """ PAUSE JOB where jobname = '${jobName}' """ + def pausedJobStatus = sql """ + select status from jobs("type"="insert") where Name='${jobName}' + """ + assert pausedJobStatus.get(0).get(0) == "PAUSED" def tblDatas = sql """select * from ${tableName}""" - println tblDatas assert tblDatas.size() >= 2 //at least 2 records - def pauseJobId = sql """select id from jobs("type"="insert") where Name='${jobName}'""" - def taskStatus = sql """select status from tasks("type"="insert") where jobid= '${pauseJobId.get(0).get(0)}'""" - println taskStatus + + def taskStatus = sql """select status from tasks("type"="insert") where JobName ='${jobName}'""" for (int i = 0; i < taskStatus.size(); i++) { - assert taskStatus.get(i).get(0) != "FAILED" || taskStatus.get(i).get(0) != "STOPPED" || taskStatus.get(i).get(0) != "STOPPED" + assert taskStatus.get(i).get(0) =="CANCELLED" || taskStatus.get(i).get(0) =="FINISHED" } sql """ CREATE JOB ${jobMixedName} ON SCHEDULE every 1 second DO insert into ${tableName} (timestamp, type, user_id) values ('2023-03-18','1','12213'); @@ -122,6 +125,7 @@ suite("test_base_insert_job") { println mixedNameJobs assert mixedNameJobs.size() == 1 && mixedNameJobs.get(0).get(0) == jobMixedName assert mixedNameJobs.get(0).get(1) == '' + // clean up job and table sql """ DROP JOB IF EXISTS where jobname = '${jobName}' """ @@ -145,23 +149,25 @@ suite("test_base_insert_job") { """ def dataCount = sql """select count(*) from ${tableName}""" assert dataCount.get(0).get(0) == 0 + // create one time job sql """ CREATE JOB ${jobName} ON SCHEDULE at current_timestamp comment 'test for test&68686781jbjbhj//ncsa' DO insert into ${tableName} values ('2023-07-19', 2, 1001); """ - + // wait job finished Awaitility.await("create-one-time-job-test").atMost(30, SECONDS).until( { def onceJob = sql """ select SucceedTaskCount from jobs("type"="insert") where Name like '%${jobName}%' and ExecuteType='ONE_TIME' """ onceJob.size() == 1 && '1' == onceJob.get(0).get(0) } ) - def onceJob = sql """ select SucceedTaskCount from jobs("type"="insert") where Name like '%${jobName}%' and ExecuteType='ONE_TIME' """ + def onceJob = sql """ select SucceedTaskCount from jobs("type"="insert") where Name like '%${jobName}%' and ExecuteType='ONE_TIME' """ assert onceJob.size() == 1 //check succeed task count assert '1' == onceJob.get(0).get(0) def datas = sql """select status,taskid from tasks("type"="insert") where jobName= '${jobName}'""" - println datas + // table should have one record after job finished assert datas.size() == 1 + // one time job only has one task. when job finished, task status should be FINISHED assert datas.get(0).get(0) == "FINISHED" // check table data def dataCount1 = sql """select count(1) from ${tableName} where user_id=1001""" @@ -175,19 +181,20 @@ suite("test_base_insert_job") { sql """ DROP JOB IF EXISTS where jobname = 'press' """ - + // create job with start time is current time and interval is 10 hours sql """ CREATE JOB press ON SCHEDULE every 10 hour starts CURRENT_TIMESTAMP comment 'test for test&68686781jbjbhj//ncsa' DO insert into ${tableName} values ('2023-07-19', 99, 99); """ Awaitility.await("create-immediately-job-test").atMost(60, SECONDS).until({ def pressJob = sql """ select SucceedTaskCount from jobs("type"="insert") where name='press'""" - println pressJob + // check job status and succeed task count is 1 pressJob.size() == 1 && '1' == onceJob.get(0).get(0) }) sql """ DROP JOB IF EXISTS where jobname = 'past_start_time' """ + // create job with start time is past time, job should be running sql """ CREATE JOB past_start_time ON SCHEDULE every 10 hour starts '2023-11-13 14:18:07' comment 'test for test&68686781jbjbhj//ncsa' DO insert into ${tableName} values ('2023-07-19', 99, 99); """ @@ -214,6 +221,10 @@ suite("test_base_insert_job") { sql """ PAUSE JOB where jobname = '${jobName}' """ + pausedJobStatus = sql """ + select status from jobs("type"="insert") where Name='${jobName}' + """ + assert pausedJobStatus.get(0).get(0) == "PAUSED" def tasks = sql """ select status from tasks("type"="insert") where JobName= '${jobName}' """ sql """ RESUME JOB where jobname = '${jobName}' @@ -226,6 +237,11 @@ suite("test_base_insert_job") { //resume tasks size should be greater than before pause afterResumeTasks.size() > tasks.size() }) + // check resume job status + def afterResumeJobStatus = sql """ + select status from jobs("type"="insert") where Name='${jobName}' + """ + assert afterResumeJobStatus.get(0).get(0) == "RUNNING" // assert same job name try { diff --git a/regression-test/suites/manager/test_manager_interface_1.groovy b/regression-test/suites/manager/test_manager_interface_1.groovy index 249ce252ffbf86b..02f6c210f1fd09b 100644 --- a/regression-test/suites/manager/test_manager_interface_1.groovy +++ b/regression-test/suites/manager/test_manager_interface_1.groovy @@ -496,128 +496,6 @@ DISTRIBUTED BY HASH(`k1`) BUCKETS 1""")) test_table_index() - - - -// show proc '/current_query_stmts' -// show proc '/current_queries' -// show processlist -// kill query $query_id -// SHOW PROC '/cluster_health/tablet_health' - def test_proc = { - - def futures = [] - - - futures.add( thread { - - try{ - sql """ select sleep(4.809); """ - }catch(Exception e){ - - } - }) - futures.add( thread { - sleep(1000); - List> result = sql """ show proc '/current_query_stmts' """ - logger.info("result = ${result}" ) - def x = 0 - def queryid = "" - logger.info("result = ${result}") - - for( int i = 0;i> result = sql """ show proc '/current_query_stmts' """ + logger.info("result = ${result}" ) + def x = 0 + def queryid = "" + logger.info("result = ${result}") + + for( int i = 0;i 1 + ) lineitem on l_orderkey = o_orderkey + inner join ( + select + ps_partkey, + ps_suppkey, + partsupp.public_col as public_col + from + partsupp + ) partsupp on ps_partkey = o_orderkey + where + lineitem.public_col is null + or lineitem.public_col <> 1 + group by + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14; + """ + def query29_0 = """ + select + o_custkey, + o_orderdate, + o_shippriority, + o_comment, + o_orderkey, + orders.public_col as col1, + l_orderkey, + l_partkey, + l_suppkey, + lineitem.public_col as col2, + ps_partkey, + ps_suppkey, + partsupp.public_col as col3, + partsupp.public_col * 2 as col4, + o_orderkey + l_orderkey + ps_partkey * 2, + sum( + o_orderkey + l_orderkey + ps_partkey * 2 + ), + count() as count_all + from + ( + select + o_custkey, + o_orderdate, + o_shippriority, + o_comment, + o_orderkey, + orders.public_col as public_col + from + orders + ) orders + left join ( + select + l_orderkey, + l_partkey, + l_suppkey, + lineitem.public_col as public_col + from + lineitem + where + lineitem.public_col is null + or lineitem.public_col <> 1 + ) lineitem on l_orderkey = o_orderkey + inner join ( + select + ps_partkey, + ps_suppkey, + partsupp.public_col as public_col + from + partsupp + ) partsupp on ps_partkey = o_orderkey + where + lineitem.public_col is null + or lineitem.public_col <> 1 + group by + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14; + """ + order_qt_query29_0_before "${query29_0}" + async_mv_rewrite_success(db, mv29_0, query29_0, "mv29_0") + order_qt_query29_0_after "${query29_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv29_0""" + + + // query and mv has the same filter but position is different, should rewrite successfully + // mv join condition has alias + def mv30_0 = """ + select + o_custkey, + o_orderdate, + o_shippriority, + o_comment, + o_orderkey_alias, + orders.public_col as col1, + l_orderkey_alias, + l_partkey, + l_suppkey, + lineitem.public_col as col2, + ps_partkey_alias, + ps_suppkey, + partsupp.public_col as col3, + partsupp.public_col * 2 as col4, + o_orderkey_alias + l_orderkey_alias + ps_partkey_alias * 2, + sum( + o_orderkey_alias + l_orderkey_alias + ps_partkey_alias * 2 + ), + count() as count_all + from + ( + select + o_custkey, + o_orderdate, + o_shippriority, + o_comment, + o_orderkey as o_orderkey_alias, + orders.public_col as public_col + from + orders + ) orders + left join ( + select + l_orderkey as l_orderkey_alias, + l_partkey, + l_suppkey, + lineitem.public_col as public_col + from + lineitem + where + lineitem.public_col is null + or lineitem.public_col <> 1 + ) lineitem on lineitem.l_orderkey_alias = orders.o_orderkey_alias + inner join ( + select + ps_partkey as ps_partkey_alias, + ps_suppkey, + partsupp.public_col as public_col + from + partsupp + ) partsupp on partsupp.ps_partkey_alias = orders.o_orderkey_alias + where + lineitem.public_col is null + or lineitem.public_col <> 1 + and o_orderkey_alias = 2 + group by + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14; + """ + def query30_0 = """ + select + o_custkey, + o_orderdate, + o_shippriority, + o_comment, + o_orderkey, + orders.public_col as col1, + l_orderkey, + l_partkey, + l_suppkey, + lineitem.public_col as col2, + ps_partkey, + ps_suppkey, + partsupp.public_col as col3, + partsupp.public_col * 2 as col4, + o_orderkey + l_orderkey + ps_partkey * 2, + sum( + o_orderkey + l_orderkey + ps_partkey * 2 + ), + count() as count_all + from + ( + select + o_custkey, + o_orderdate, + o_shippriority, + o_comment, + o_orderkey, + orders.public_col as public_col + from + orders + ) orders + left join ( + select + l_orderkey, + l_partkey, + l_suppkey, + lineitem.public_col as public_col + from + lineitem + where + lineitem.public_col is null + or lineitem.public_col <> 1 + ) lineitem on l_orderkey = o_orderkey + inner join ( + select + ps_partkey, + ps_suppkey, + partsupp.public_col as public_col + from + partsupp + ) partsupp on ps_partkey = o_orderkey + where + lineitem.public_col is null + or lineitem.public_col <> 1 + and o_orderkey = 2 + group by + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14; + """ + order_qt_query30_0_before "${query30_0}" + async_mv_rewrite_success(db, mv30_0, query30_0, "mv30_0") + order_qt_query30_0_after "${query30_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv30_0""" + + + // query and mv has the same filter but position is different, should rewrite successfully + // query join condition has alias + def mv31_0 = """ + select + o_custkey, + o_orderdate, + o_shippriority, + o_comment, + o_orderkey, + orders.public_col as col1, + l_orderkey, + l_partkey, + l_suppkey, + lineitem.public_col as col2, + ps_partkey, + ps_suppkey, + partsupp.public_col as col3, + partsupp.public_col * 2 as col4, + o_orderkey + l_orderkey + ps_partkey * 2, + sum( + o_orderkey + l_orderkey + ps_partkey * 2 + ), + count() as count_all + from + ( + select + o_custkey, + o_orderdate, + o_shippriority, + o_comment, + o_orderkey, + orders.public_col as public_col + from + orders + ) orders + left join ( + select + l_orderkey, + l_partkey, + l_suppkey, + lineitem.public_col as public_col + from + lineitem + where + lineitem.public_col is null + or lineitem.public_col <> 1 + ) lineitem on l_orderkey = o_orderkey + inner join ( + select + ps_partkey, + ps_suppkey, + partsupp.public_col as public_col + from + partsupp + ) partsupp on ps_partkey = o_orderkey + where + lineitem.public_col is null + or lineitem.public_col <> 1 + and o_orderkey = 2 + group by + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14; + """ + def query31_0 = """ +select + o_custkey, + o_orderdate, + o_shippriority, + o_comment, + o_orderkey_alias, + orders.public_col as col1, + l_orderkey_alias, + l_partkey, + l_suppkey, + lineitem.public_col as col2, + ps_partkey_alias, + ps_suppkey, + partsupp.public_col as col3, + partsupp.public_col * 2 as col4, + o_orderkey_alias + l_orderkey_alias + ps_partkey_alias * 2, + sum( + o_orderkey_alias + l_orderkey_alias + ps_partkey_alias * 2 + ), + count() as count_all + from + ( + select + o_custkey, + o_orderdate, + o_shippriority, + o_comment, + o_orderkey as o_orderkey_alias, + orders.public_col as public_col + from + orders + ) orders + left join ( + select + l_orderkey as l_orderkey_alias, + l_partkey, + l_suppkey, + lineitem.public_col as public_col + from + lineitem + where + lineitem.public_col is null + or lineitem.public_col <> 1 + ) lineitem on lineitem.l_orderkey_alias = orders.o_orderkey_alias + inner join ( + select + ps_partkey as ps_partkey_alias, + ps_suppkey, + partsupp.public_col as public_col + from + partsupp + ) partsupp on partsupp.ps_partkey_alias = orders.o_orderkey_alias + where + lineitem.public_col is null + or lineitem.public_col <> 1 + and o_orderkey_alias = 2 + group by + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14; + """ + order_qt_query31_0_before "${query31_0}" + async_mv_rewrite_success(db, mv31_0, query31_0, "mv31_0") + order_qt_query31_0_after "${query31_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv31_0""" } diff --git a/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_datetime_part_up_rewrite.groovy b/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_datetime_part_up_rewrite.groovy index f8e601e64f58199..140a91edd7ca969 100644 --- a/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_datetime_part_up_rewrite.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/create_part_and_up/range_datetime_part_up_rewrite.groovy @@ -168,7 +168,7 @@ suite("mtmv_range_datetime_part_up_rewrite") { for (int i = 0; i < mv_name_list.size(); i++) { def job_name = getJobName(db, mv_name_list[i]) waitingMTMVTaskFinished(job_name) - mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + mv_rewrite_any_success(query_stmt_list[i], mv_name_list) compare_res(query_stmt_list[i] + " order by 1,2,3") } @@ -178,13 +178,15 @@ suite("mtmv_range_datetime_part_up_rewrite") { (1, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-11-29 03:00:00')""" sql """alter table lineitem_range_datetime_union modify column l_comment set stats ('row_count'='8');""" for (int i = 0; i < mv_name_list.size(); i++) { - mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + // both mv should rewrite success + mv_rewrite_any_success(query_stmt_list[i], mv_name_list) compare_res(query_stmt_list[i] + " order by 1,2,3") } for (int i = 0; i < mv_name_list.size(); i++) { sql """refresh MATERIALIZED VIEW ${mv_name_list[i]} auto;""" - mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + // both mv should rewrite success + mv_rewrite_any_success(query_stmt_list[i], mv_name_list) compare_res(query_stmt_list[i] + " order by 1,2,3") } @@ -192,25 +194,29 @@ suite("mtmv_range_datetime_part_up_rewrite") { (3, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-11-29 03:00:00');""" sql """alter table lineitem_range_datetime_union modify column l_comment set stats ('row_count'='9');""" for (int i = 0; i < mv_name_list.size(); i++) { - mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + // both mv should rewrite success + mv_rewrite_any_success(query_stmt_list[i], mv_name_list) compare_res(query_stmt_list[i] + " order by 1,2,3") } for (int i = 0; i < mv_name_list.size(); i++) { sql """refresh MATERIALIZED VIEW ${mv_name_list[i]} auto;""" - mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + // both mv should rewrite success + mv_rewrite_any_success(query_stmt_list[i], mv_name_list) compare_res(query_stmt_list[i] + " order by 1,2,3") } sql """ALTER TABLE lineitem_range_datetime_union DROP PARTITION IF EXISTS p4 FORCE""" for (int i = 0; i < mv_name_list.size(); i++) { - mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + // both mv should rewrite success + mv_rewrite_any_success(query_stmt_list[i], mv_name_list) compare_res(query_stmt_list[i] + " order by 1,2,3") } for (int i = 0; i < mv_name_list.size(); i++) { sql """refresh MATERIALIZED VIEW ${mv_name_list[i]} auto;""" - mv_rewrite_success(query_stmt_list[i], mv_name_list[i]) + // both mv should rewrite success + mv_rewrite_any_success(query_stmt_list[i], mv_name_list) compare_res(query_stmt_list[i] + " order by 1,2,3") } diff --git a/regression-test/suites/nereids_rules_p0/partition_prune/int_as_date_partition_col.groovy b/regression-test/suites/nereids_rules_p0/partition_prune/int_as_date_partition_col.groovy index 5e65e6445ef5e5b..9d4e2a164905120 100644 --- a/regression-test/suites/nereids_rules_p0/partition_prune/int_as_date_partition_col.groovy +++ b/regression-test/suites/nereids_rules_p0/partition_prune/int_as_date_partition_col.groovy @@ -16,6 +16,7 @@ // under the License. suite("int_as_date_partition_col") { + sql "set ENABLE_FOLD_CONSTANT_BY_BE=false" sql "drop table if exists partition_int" sql """CREATE TABLE partition_int(a int, dt int) PARTITION BY range(dt) ( partition p20240101 values less than ("20240101"), diff --git a/regression-test/suites/nereids_rules_p0/partition_prune/one_col_range_partition.groovy b/regression-test/suites/nereids_rules_p0/partition_prune/one_col_range_partition.groovy index e8b7621e6d7ff79..4d1e3eb223b1a7e 100644 --- a/regression-test/suites/nereids_rules_p0/partition_prune/one_col_range_partition.groovy +++ b/regression-test/suites/nereids_rules_p0/partition_prune/one_col_range_partition.groovy @@ -16,6 +16,7 @@ // under the License. suite("one_col_range_partition") { + sql "set ENABLE_FOLD_CONSTANT_BY_BE=false" sql "drop table if exists one_col_range_partition_date" sql """ create table one_col_range_partition_date(a int, dt datetime, d date, c varchar(100)) duplicate key(a) diff --git a/regression-test/suites/nereids_rules_p0/partition_prune/varchar_as_date_partition_col.groovy b/regression-test/suites/nereids_rules_p0/partition_prune/varchar_as_date_partition_col.groovy index 0cfe4963f27b2aa..a5b3a4375c12bc1 100644 --- a/regression-test/suites/nereids_rules_p0/partition_prune/varchar_as_date_partition_col.groovy +++ b/regression-test/suites/nereids_rules_p0/partition_prune/varchar_as_date_partition_col.groovy @@ -16,6 +16,7 @@ // under the License. suite("varchar_as_date_partition_col") { + sql "set ENABLE_FOLD_CONSTANT_BY_BE=false" sql"""drop table if exists partition_varchar;""" sql """CREATE TABLE partition_varchar(a int, dt varchar(10), rdt datetime) PARTITION BY list(dt) ( partition p20240101 values in ("20240101","20240102"), diff --git a/regression-test/suites/query_p0/aggregate/aggregate_count1.groovy b/regression-test/suites/query_p0/aggregate/aggregate_count1.groovy index cf657cc8ef3e3d7..3971f304e386462 100644 --- a/regression-test/suites/query_p0/aggregate/aggregate_count1.groovy +++ b/regression-test/suites/query_p0/aggregate/aggregate_count1.groovy @@ -17,7 +17,7 @@ * under the License. */ -suite("aggregate_count1", "query") { +suite("aggregate_count1", "query,arrow_flight_sql") { sql """ DROP TABLE IF EXISTS aggregate_count1 """ sql """create table if not exists aggregate_count1 ( name varchar(128), diff --git a/regression-test/suites/query_p0/aggregate/select_distinct.groovy b/regression-test/suites/query_p0/aggregate/select_distinct.groovy index 6456158bdadb0d7..2d6a8679d87ed80 100644 --- a/regression-test/suites/query_p0/aggregate/select_distinct.groovy +++ b/regression-test/suites/query_p0/aggregate/select_distinct.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("select_distinct") { +suite("select_distinct", "arrow_flight_sql") { sql """DROP TABLE IF EXISTS decimal_a;""" sql """DROP TABLE IF EXISTS decimal_b;""" sql """DROP TABLE IF EXISTS decimal_c;""" diff --git a/regression-test/suites/query_p0/casesensetive_column/join_with_column_casesensetive.groovy b/regression-test/suites/query_p0/casesensetive_column/join_with_column_casesensetive.groovy index 45499fc6f248a2a..8bd3b19088a4866 100644 --- a/regression-test/suites/query_p0/casesensetive_column/join_with_column_casesensetive.groovy +++ b/regression-test/suites/query_p0/casesensetive_column/join_with_column_casesensetive.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("join_with_column_casesensetive") { +suite("join_with_column_casesensetive", "arrow_flight_sql") { def tables=["ad_order_data_v1","ad_order_data"] for (String table in tables) { diff --git a/regression-test/suites/query_p0/cast/test_cast.groovy b/regression-test/suites/query_p0/cast/test_cast.groovy index 947d61bc828861d..dae669e2965f04a 100644 --- a/regression-test/suites/query_p0/cast/test_cast.groovy +++ b/regression-test/suites/query_p0/cast/test_cast.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite('test_cast') { +suite('test_cast', "arrow_flight_sql") { def date = "date '2020-01-01'" def datev2 = "datev2 '2020-01-01'" def datetime = "timestamp '2020-01-01 12:34:45'" diff --git a/regression-test/suites/query_p0/except/test_query_except.groovy b/regression-test/suites/query_p0/except/test_query_except.groovy index 1a2aa742d2910da..410e24f89b92de0 100644 --- a/regression-test/suites/query_p0/except/test_query_except.groovy +++ b/regression-test/suites/query_p0/except/test_query_except.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_query_except") { +suite("test_query_except", "arrow_flight_sql") { // test query except, depend on query_test_data_load.groovy sql "use test_query_db" qt_select_except1 """ diff --git a/regression-test/suites/query_p0/group_concat/test_group_concat.groovy b/regression-test/suites/query_p0/group_concat/test_group_concat.groovy index 5054dc2ee3a91df..522d66ed64b30b1 100644 --- a/regression-test/suites/query_p0/group_concat/test_group_concat.groovy +++ b/regression-test/suites/query_p0/group_concat/test_group_concat.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_group_concat", "query,p0") { +suite("test_group_concat", "query,p0,arrow_flight_sql") { qt_select """ SELECT group_concat(k6) FROM test_query_db.test where k6='false' """ diff --git a/regression-test/suites/query_p0/grouping_sets/test_grouping_sets1.groovy b/regression-test/suites/query_p0/grouping_sets/test_grouping_sets1.groovy index 1f12de6628a5eb1..f8180b0ab438464 100644 --- a/regression-test/suites/query_p0/grouping_sets/test_grouping_sets1.groovy +++ b/regression-test/suites/query_p0/grouping_sets/test_grouping_sets1.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_grouping_sets1") { +suite("test_grouping_sets1", "arrow_flight_sql") { qt_select """ select col1 diff --git a/regression-test/suites/query_p0/having/having.groovy b/regression-test/suites/query_p0/having/having.groovy index fb32b3834af4d61..bbad236c9738702 100644 --- a/regression-test/suites/query_p0/having/having.groovy +++ b/regression-test/suites/query_p0/having/having.groovy @@ -19,7 +19,7 @@ // /testing/trino-product-tests/src/main/resources/sql-tests/testcases/aggregate // and modified by Doris. -suite("having", "query,p0") { +suite("having", "query,p0,arrow_flight_sql") { sql """DROP TABLE IF EXISTS supplier""" sql """CREATE TABLE `supplier` ( `s_suppkey` int(11) NOT NULL, diff --git a/regression-test/suites/query_p0/intersect/test_intersect.groovy b/regression-test/suites/query_p0/intersect/test_intersect.groovy index 1c007b95d7d07d2..7919bec324b8767 100644 --- a/regression-test/suites/query_p0/intersect/test_intersect.groovy +++ b/regression-test/suites/query_p0/intersect/test_intersect.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_intersect") { +suite("test_intersect", "arrow_flight_sql") { qt_select """ SELECT * FROM (SELECT k1 FROM test_query_db.baseall INTERSECT SELECT k1 FROM test_query_db.test) a ORDER BY k1 diff --git a/regression-test/suites/query_p0/join/test_join2.groovy b/regression-test/suites/query_p0/join/test_join2.groovy index 6125b9a873f77ec..9158133948f7549 100644 --- a/regression-test/suites/query_p0/join/test_join2.groovy +++ b/regression-test/suites/query_p0/join/test_join2.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_join2", "query,p0") { +suite("test_join2", "query,p0,arrow_flight_sql") { def DBname = "regression_test_join2" def TBname1 = "J1_TBL" def TBname2 = "J2_TBL" diff --git a/regression-test/suites/query_p0/join/test_left_join1.groovy b/regression-test/suites/query_p0/join/test_left_join1.groovy index d4cbeeee65eda23..104adab4a850d0f 100644 --- a/regression-test/suites/query_p0/join/test_left_join1.groovy +++ b/regression-test/suites/query_p0/join/test_left_join1.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_left_join1", "query,p0") { +suite("test_left_join1", "query,p0,arrow_flight_sql") { def tableName = "test_left_join1" sql """drop table if exists ${tableName}""" diff --git a/regression-test/suites/query_p0/join/test_nestedloop_outer_join.groovy b/regression-test/suites/query_p0/join/test_nestedloop_outer_join.groovy index ad19e554690ee77..f99dfa042446e91 100644 --- a/regression-test/suites/query_p0/join/test_nestedloop_outer_join.groovy +++ b/regression-test/suites/query_p0/join/test_nestedloop_outer_join.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_nestedloop_outer_join", "query_p0") { +suite("test_nestedloop_outer_join", "query_p0,arrow_flight_sql") { def tbl1 = "test_nestedloop_outer_join1" def tbl2 = "test_nestedloop_outer_join2" diff --git a/regression-test/suites/query_p0/join/test_partitioned_hash_join.groovy b/regression-test/suites/query_p0/join/test_partitioned_hash_join.groovy index cbe09ec527ffbcc..676cdd06274a686 100644 --- a/regression-test/suites/query_p0/join/test_partitioned_hash_join.groovy +++ b/regression-test/suites/query_p0/join/test_partitioned_hash_join.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_partitioned_hash_join", "query,p0") { +suite("test_partitioned_hash_join", "query,p0,arrow_flight_sql") { sql "drop table if exists test_partitioned_hash_join_l" sql "drop table if exists test_partitioned_hash_join_r" sql """ create table test_partitioned_hash_join_l ( diff --git a/regression-test/suites/query_p0/lateral_view/lateral_view.groovy b/regression-test/suites/query_p0/lateral_view/lateral_view.groovy index a24623590cd0af0..bfe6ca76872ea8c 100644 --- a/regression-test/suites/query_p0/lateral_view/lateral_view.groovy +++ b/regression-test/suites/query_p0/lateral_view/lateral_view.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("lateral_view") { +suite("lateral_view", "arrow_flight_sql") { sql """ DROP TABLE IF EXISTS `test_explode_bitmap` """ sql """ CREATE TABLE `test_explode_bitmap` ( diff --git a/regression-test/suites/query_p0/limit/OffsetInSubqueryWithJoin.groovy b/regression-test/suites/query_p0/limit/OffsetInSubqueryWithJoin.groovy index da0c7231f425d16..caa75ac7be38edc 100644 --- a/regression-test/suites/query_p0/limit/OffsetInSubqueryWithJoin.groovy +++ b/regression-test/suites/query_p0/limit/OffsetInSubqueryWithJoin.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_offset_in_subquery_with_join", "query") { +suite("test_offset_in_subquery_with_join", "query,arrow_flight_sql") { // define a sql table def testTable = "test_offset_in_subquery_with_join" diff --git a/regression-test/suites/query_p0/literal_view/lietral_test.groovy b/regression-test/suites/query_p0/literal_view/lietral_test.groovy index 6e9d51f0a0d6105..27b82c16247a73d 100644 --- a/regression-test/suites/query_p0/literal_view/lietral_test.groovy +++ b/regression-test/suites/query_p0/literal_view/lietral_test.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("literal_view_test") { +suite("literal_view_test", "arrow_flight_sql") { sql """DROP TABLE IF EXISTS table1""" diff --git a/regression-test/suites/query_p0/operator/test_set_operator.groovy b/regression-test/suites/query_p0/operator/test_set_operator.groovy index cb05e18b3e870ba..7d6219585e4c4c6 100644 --- a/regression-test/suites/query_p0/operator/test_set_operator.groovy +++ b/regression-test/suites/query_p0/operator/test_set_operator.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_set_operators", "query,p0") { +suite("test_set_operators", "query,p0,arrow_flight_sql") { sql """ DROP TABLE IF EXISTS t1; diff --git a/regression-test/suites/query_p0/operator/test_sort_operator.groovy b/regression-test/suites/query_p0/operator/test_sort_operator.groovy index 24a2b8ef73a4246..d76daff01f6fcc5 100644 --- a/regression-test/suites/query_p0/operator/test_sort_operator.groovy +++ b/regression-test/suites/query_p0/operator/test_sort_operator.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_sort_operator", "query,p0") { +suite("test_sort_operator", "query,p0,arrow_flight_sql") { sql """ DROP TABLE IF EXISTS dim_org_ful; diff --git a/regression-test/suites/query_p0/session_variable/test_default_limit.groovy b/regression-test/suites/query_p0/session_variable/test_default_limit.groovy index edda5d51790c567..2ce3b647142ae4e 100644 --- a/regression-test/suites/query_p0/session_variable/test_default_limit.groovy +++ b/regression-test/suites/query_p0/session_variable/test_default_limit.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite('test_default_limit') { +suite('test_default_limit', "arrow_flight_sql") { sql 'drop table if exists baseall' sql 'drop table if exists bigtable' diff --git a/regression-test/suites/query_p0/show/test_show_create_materialized_view.groovy b/regression-test/suites/query_p0/show/test_show_create_materialized_view.groovy index 7c5edcc81a2e8ed..9550a7fec3dbd24 100644 --- a/regression-test/suites/query_p0/show/test_show_create_materialized_view.groovy +++ b/regression-test/suites/query_p0/show/test_show_create_materialized_view.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_show_create_materialized_view", "query") { +suite("test_show_create_materialized_view", "query,arrow_flight_sql") { String tableName = "table_for_mv_test"; String mvName = "mv_show_create_materialized_view"; try { diff --git a/regression-test/suites/query_p0/show/test_show_create_table.groovy b/regression-test/suites/query_p0/show/test_show_create_table.groovy index 6325cbe319fd88a..1e3fc7ff5cb527a 100644 --- a/regression-test/suites/query_p0/show/test_show_create_table.groovy +++ b/regression-test/suites/query_p0/show/test_show_create_table.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_show_create_table", "query") { +suite("test_show_create_table", "query,arrow_flight_sql") { String tb_name = "tb_show_create_table"; try { sql """drop table if exists ${tb_name} """ diff --git a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions.groovy b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions.groovy index 86a951d7ac33c63..cdab9472e27dbd1 100644 --- a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions.groovy +++ b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_aggregate_all_functions") { +suite("test_aggregate_all_functions", "arrow_flight_sql") { sql "set batch_size = 4096" diff --git a/regression-test/suites/query_p0/sql_functions/case_function/test_case_function_null.groovy b/regression-test/suites/query_p0/sql_functions/case_function/test_case_function_null.groovy index 41ef16250808069..a91c86b5f489089 100644 --- a/regression-test/suites/query_p0/sql_functions/case_function/test_case_function_null.groovy +++ b/regression-test/suites/query_p0/sql_functions/case_function/test_case_function_null.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_case_function_null", "query,p0") { +suite("test_case_function_null", "query,p0,arrow_flight_sql") { sql """ drop table if exists case_null0 """ sql """ create table case_null0 ( `c0` decimalv3(17, 1) NULL, diff --git a/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy b/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy index 590ccd10821f616..d547e9fb287d71e 100644 --- a/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/hash_functions/test_hash_function.groovy @@ -14,7 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -suite("test_hash_function") { +suite("test_hash_function", "arrow_flight_sql") { sql "set batch_size = 4096;" sql "set enable_profile = true;" diff --git a/regression-test/suites/query_p0/sql_functions/ip_functions/test_ip_functions.groovy b/regression-test/suites/query_p0/sql_functions/ip_functions/test_ip_functions.groovy index 03e9788a58a3b85..5373217503a0189 100644 --- a/regression-test/suites/query_p0/sql_functions/ip_functions/test_ip_functions.groovy +++ b/regression-test/suites/query_p0/sql_functions/ip_functions/test_ip_functions.groovy @@ -14,7 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -suite("test_ip_functions") { +suite("test_ip_functions", "arrow_flight_sql") { sql "set batch_size = 4096;" qt_sql "SELECT ipv4_num_to_string(-1);" diff --git a/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_insert.groovy b/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_insert.groovy index c885e3ae3431f36..b5865034538a111 100644 --- a/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_insert.groovy +++ b/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_insert.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_query_json_insert", "query") { +suite("test_query_json_insert", "query,arrow_flight_sql") { qt_sql "select json_insert('{\"a\": 1, \"b\": [2, 3]}', '\$', null);" qt_sql "select json_insert('{\"k\": [1, 2]}', '\$.k[0]', null, '\$.[1]', null);" def tableName = "test_query_json_insert" diff --git a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy index aa0deec96f46a20..4bd88bf131e7277 100644 --- a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy @@ -14,7 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -suite("test_json_function") { +suite("test_json_function", "arrow_flight_sql") { sql "set batch_size = 4096;" qt_sql "SELECT get_json_double('{\"k1\":1.3, \"k2\":\"2\"}', \"\$.k1\");" diff --git a/regression-test/suites/query_p0/sql_functions/math_functions/test_conv.groovy b/regression-test/suites/query_p0/sql_functions/math_functions/test_conv.groovy index 6c4867174d11acd..3a74abfe9c8b221 100644 --- a/regression-test/suites/query_p0/sql_functions/math_functions/test_conv.groovy +++ b/regression-test/suites/query_p0/sql_functions/math_functions/test_conv.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_conv") { +suite("test_conv", "arrow_flight_sql") { qt_select "SELECT CONV(15,10,2)" sql """ drop table if exists test_conv; """ diff --git a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy index 061665d3b9da6e8..f1487d283dfcdf2 100644 --- a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy +++ b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_multi_string_search") { +suite("test_multi_string_search", "arrow_flight_sql") { def table_name = "test_multi_string_search_strings" sql """ DROP TABLE IF EXISTS ${table_name} """ diff --git a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy index e98e11ba7e6888f..f76cb44cb4ad4bc 100644 --- a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy @@ -14,7 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -suite("test_gis_function") { +suite("test_gis_function", "arrow_flight_sql") { sql "set batch_size = 4096;" qt_sql "SELECT ST_AsText(ST_Point(24.7, 56.7));" diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy index f5d32653c818b51..6e18fb57eeb4cfe 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_string_function") { +suite("test_string_function", "arrow_flight_sql") { sql "set batch_size = 4096;" qt_sql "select elt(0, \"hello\", \"doris\");" diff --git a/regression-test/suites/query_p0/sql_functions/table_function/explode_split.groovy b/regression-test/suites/query_p0/sql_functions/table_function/explode_split.groovy index b7dd4d640799fbc..53db931c03bb034 100644 --- a/regression-test/suites/query_p0/sql_functions/table_function/explode_split.groovy +++ b/regression-test/suites/query_p0/sql_functions/table_function/explode_split.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("explode_split") { +suite("explode_split", "arrow_flight_sql") { def tableName = "test_lv_str" sql """ DROP TABLE IF EXISTS ${tableName} """ diff --git a/regression-test/suites/query_p0/sql_functions/test_alias_function.groovy b/regression-test/suites/query_p0/sql_functions/test_alias_function.groovy index 8e0e94fa2df8051..095ec89e220f1b6 100644 --- a/regression-test/suites/query_p0/sql_functions/test_alias_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/test_alias_function.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite('test_alias_function') { +suite('test_alias_function', "arrow_flight_sql") { sql ''' CREATE ALIAS FUNCTION IF NOT EXISTS f1(DATETIMEV2(3), INT) with PARAMETER (datetime1, int1) as date_trunc(days_sub(datetime1, int1), 'day')''' diff --git a/regression-test/suites/query_p0/sql_functions/test_predicate.groovy b/regression-test/suites/query_p0/sql_functions/test_predicate.groovy index 20b3c179ad5c019..6cca6b62c9960ba 100644 --- a/regression-test/suites/query_p0/sql_functions/test_predicate.groovy +++ b/regression-test/suites/query_p0/sql_functions/test_predicate.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_predicate") { +suite("test_predicate", "arrow_flight_sql") { sql """drop table if exists t1;""" sql """ create table t1 ( diff --git a/regression-test/suites/query_p0/sql_functions/width_bucket_fuctions/test_width_bucket_function.groovy b/regression-test/suites/query_p0/sql_functions/width_bucket_fuctions/test_width_bucket_function.groovy index d0862a580ca6004..1a455da92446f8c 100644 --- a/regression-test/suites/query_p0/sql_functions/width_bucket_fuctions/test_width_bucket_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/width_bucket_fuctions/test_width_bucket_function.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_width_bucket_function") { +suite("test_width_bucket_function", "arrow_flight_sql") { qt_sql "select width_bucket(1, 2, 3, 2)" qt_sql "select width_bucket(null, 2, 3, 2)" qt_sql "select width_bucket(6, 2, 6, 4)" diff --git a/regression-test/suites/query_p0/subquery/test_subquery2.groovy b/regression-test/suites/query_p0/subquery/test_subquery2.groovy index e572459cc72fe3d..a14a44fa152b972 100644 --- a/regression-test/suites/query_p0/subquery/test_subquery2.groovy +++ b/regression-test/suites/query_p0/subquery/test_subquery2.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_subquery2") { +suite("test_subquery2", "arrow_flight_sql") { sql """DROP TABLE IF EXISTS subquerytest2""" diff --git a/regression-test/suites/query_p0/test_data_type_marks.groovy b/regression-test/suites/query_p0/test_data_type_marks.groovy index 79803d987233135..51fb7c9614e4889 100644 --- a/regression-test/suites/query_p0/test_data_type_marks.groovy +++ b/regression-test/suites/query_p0/test_data_type_marks.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_data_type_marks") { +suite("test_data_type_marks", "arrow_flight_sql") { def tbName = "org" sql "DROP TABLE IF EXISTS ${tbName}" sql """ diff --git a/regression-test/suites/query_p0/test_dict_with_null.groovy b/regression-test/suites/query_p0/test_dict_with_null.groovy index b3738bb68aa1ba0..83d253fa4d1b04b 100644 --- a/regression-test/suites/query_p0/test_dict_with_null.groovy +++ b/regression-test/suites/query_p0/test_dict_with_null.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("dict_with_null", "query") { +suite("dict_with_null", "query,arrow_flight_sql") { def tableName = "test_dict_with_null" sql "DROP TABLE IF EXISTS ${tableName}" sql """ diff --git a/regression-test/suites/query_p0/test_orderby_nullliteral.groovy b/regression-test/suites/query_p0/test_orderby_nullliteral.groovy index fe11c778af0b988..e806060c8bcb1cf 100644 --- a/regression-test/suites/query_p0/test_orderby_nullliteral.groovy +++ b/regression-test/suites/query_p0/test_orderby_nullliteral.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("orderby_nullliteral", "query") { +suite("orderby_nullliteral", "query,arrow_flight_sql") { def tableName = "test_orderby_nullliteral" sql "DROP TABLE IF EXISTS ${tableName}" diff --git a/regression-test/suites/query_p0/test_select_constant.groovy b/regression-test/suites/query_p0/test_select_constant.groovy index 6015e19576c6904..68f0a28a20e853c 100644 --- a/regression-test/suites/query_p0/test_select_constant.groovy +++ b/regression-test/suites/query_p0/test_select_constant.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_select_constant") { +suite("test_select_constant", "arrow_flight_sql") { qt_select1 'select 100, "test", date("2021-01-02");' qt_select_geo1 'SELECT ST_AsText(ST_Point(123.12345678901234567890,89.1234567890));' } diff --git a/regression-test/suites/query_p0/test_select_with_predicate_like.groovy b/regression-test/suites/query_p0/test_select_with_predicate_like.groovy index 9491c4271ca5308..0d01f1b958a11c6 100644 --- a/regression-test/suites/query_p0/test_select_with_predicate_like.groovy +++ b/regression-test/suites/query_p0/test_select_with_predicate_like.groovy @@ -14,7 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -suite("test_select_with_predicate_like") { +suite("test_select_with_predicate_like", "arrow_flight_sql") { def tables=["test_basic_agg"] for (String table in tables) { diff --git a/regression-test/suites/query_p0/test_select_with_predicate_prune.groovy b/regression-test/suites/query_p0/test_select_with_predicate_prune.groovy index 768e04b4c327b59..ccd1b9160fb148a 100644 --- a/regression-test/suites/query_p0/test_select_with_predicate_prune.groovy +++ b/regression-test/suites/query_p0/test_select_with_predicate_prune.groovy @@ -14,7 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -suite("test_select_with_predicate_prune") { +suite("test_select_with_predicate_prune", "arrow_flight_sql") { sql """ drop table if exists `test_select_with_predicate_prune`; """ diff --git a/regression-test/suites/query_p0/type_inference/test_largeint.groovy b/regression-test/suites/query_p0/type_inference/test_largeint.groovy index d5cbfa4b4798388..161359cfa97e725 100644 --- a/regression-test/suites/query_p0/type_inference/test_largeint.groovy +++ b/regression-test/suites/query_p0/type_inference/test_largeint.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_largeint") { +suite("test_largeint", "arrow_flight_sql") { def tbName = "test_largeint" sql "DROP TABLE IF EXISTS ${tbName}" sql """ diff --git a/regression-test/suites/query_p0/with/test_with_and_two_phase_agg.groovy b/regression-test/suites/query_p0/with/test_with_and_two_phase_agg.groovy index 99164a999c557e9..d563ef1630517da 100644 --- a/regression-test/suites/query_p0/with/test_with_and_two_phase_agg.groovy +++ b/regression-test/suites/query_p0/with/test_with_and_two_phase_agg.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_with_and_two_phase_agg") { +suite("test_with_and_two_phase_agg", "arrow_flight_sql") { def tableName = "test_with_and_two_phase_agg_table" sql """ DROP TABLE IF EXISTS ${tableName} """ sql """