From b72274f5f1f9d15314c5552795331f4fd8403292 Mon Sep 17 00:00:00 2001 From: Yingchun Lai Date: Tue, 16 Jul 2024 22:53:58 +0800 Subject: [PATCH] refactor: Simplify code on partition_configuration (#2051) There is no functional changes, but only refactoring, includes: - Use range-based loop - Unify the naming of partition_configuration variables to `pc` (or `pcs`) - Use `fmt::join` for simplify container formating - Reduce some if-else statements depth This patch dosen't change the names in thrift IDL which leads more changes on client drivers, we can do that in following patches. --- src/client/partition_resolver_simple.cpp | 38 +- src/client/partition_resolver_simple.h | 4 +- src/client/replication_ddl_client.cpp | 91 ++-- src/client/replication_ddl_client.h | 2 +- src/common/json_helper.h | 4 +- src/common/replication_common.cpp | 22 +- src/common/replication_other_types.h | 8 +- src/meta/backup_engine.cpp | 2 +- src/meta/cluster_balance_policy.cpp | 16 +- .../duplication/meta_duplication_service.cpp | 16 +- src/meta/load_balance_policy.cpp | 10 +- src/meta/meta_backup_service.cpp | 4 +- src/meta/meta_bulk_load_ingestion_context.cpp | 16 +- src/meta/meta_bulk_load_ingestion_context.h | 8 +- src/meta/meta_bulk_load_service.cpp | 132 +++-- src/meta/meta_bulk_load_service.h | 20 +- src/meta/meta_data.cpp | 38 +- src/meta/meta_data.h | 58 ++- src/meta/meta_http_service.cpp | 65 ++- src/meta/meta_service.cpp | 6 +- src/meta/meta_split_service.cpp | 41 +- src/meta/partition_guardian.cpp | 23 +- src/meta/server_state.cpp | 455 +++++++++--------- src/meta/server_state.h | 32 +- src/meta/server_state_restore.cpp | 4 +- src/meta/test/backup_test.cpp | 2 +- .../balancer_simulator/balancer_simulator.cpp | 16 +- src/meta/test/balancer_validator.cpp | 10 +- src/meta/test/cluster_balance_policy_test.cpp | 18 +- src/meta/test/ford_fulkerson_test.cpp | 8 +- src/meta/test/meta_app_operation_test.cpp | 36 +- .../test/meta_bulk_load_ingestion_test.cpp | 45 +- src/meta/test/meta_bulk_load_service_test.cpp | 62 +-- src/meta/test/meta_data.cpp | 24 +- .../test/meta_duplication_service_test.cpp | 4 +- src/meta/test/meta_mauanl_compaction_test.cpp | 2 +- .../test/meta_partition_guardian_test.cpp | 37 +- src/meta/test/meta_split_service_test.cpp | 58 +-- src/meta/test/misc/misc.cpp | 46 +- src/meta/test/misc/misc.h | 2 +- src/meta/test/state_sync_test.cpp | 8 +- src/meta/test/update_configuration_test.cpp | 25 +- src/replica/backup/replica_backup_manager.cpp | 4 +- src/replica/bulk_load/replica_bulk_loader.cpp | 91 ++-- .../test/replica_bulk_loader_test.cpp | 16 +- src/replica/duplication/replica_follower.cpp | 21 +- src/replica/duplication/replica_follower.h | 9 +- .../test/replica_follower_test.cpp | 32 +- src/replica/replica.cpp | 2 +- src/replica/replica.h | 6 +- src/replica/replica_2pc.cpp | 16 +- src/replica/replica_backup.cpp | 4 +- src/replica/replica_config.cpp | 82 ++-- src/replica/replica_context.cpp | 31 +- src/replica/replica_context.h | 4 +- src/replica/replica_failover.cpp | 4 +- src/replica/replica_stub.cpp | 6 +- src/replica/replica_stub.h | 2 +- src/replica/split/replica_split_manager.cpp | 35 +- src/replica/split/replica_split_manager.h | 2 +- src/replica/split/test/replica_split_test.cpp | 26 +- .../storage/simple_kv/test/checker.cpp | 7 +- src/replica/storage/simple_kv/test/common.cpp | 12 +- src/replica/storage/simple_kv/test/common.h | 2 +- src/replica/test/mock_utils.h | 4 +- src/replica/test/open_replica_test.cpp | 10 +- src/server/available_detector.cpp | 8 +- src/server/available_detector.h | 2 - src/server/hotspot_partition_calculator.cpp | 11 +- src/shell/command_helper.h | 70 ++- src/shell/commands/data_operations.cpp | 35 +- src/shell/commands/node_management.cpp | 14 +- src/shell/commands/recovery.cpp | 9 +- src/shell/commands/table_management.cpp | 53 +- .../detect_hotspot/test_detect_hotspot.cpp | 29 +- src/test/function_test/utils/test_util.cpp | 5 +- src/test/function_test/utils/test_util.h | 2 +- src/test/kill_test/kill_testor.cpp | 38 +- src/test/kill_test/kill_testor.h | 2 +- src/test/kill_test/partition_kill_testor.cpp | 14 +- 80 files changed, 1086 insertions(+), 1152 deletions(-) diff --git a/src/client/partition_resolver_simple.cpp b/src/client/partition_resolver_simple.cpp index 6282da64e4..08d98790fb 100644 --- a/src/client/partition_resolver_simple.cpp +++ b/src/client/partition_resolver_simple.cpp @@ -302,26 +302,24 @@ void partition_resolver_simple::query_config_reply(error_code err, _app_partition_count = resp.partition_count; _app_is_stateful = resp.is_stateful; - for (auto it = resp.partitions.begin(); it != resp.partitions.end(); ++it) { - auto &new_config = *it; - + for (const auto &new_pc : resp.partitions) { LOG_DEBUG_PREFIX("query config reply, gpid = {}, ballot = {}, primary = {}", - new_config.pid, - new_config.ballot, - FMT_HOST_PORT_AND_IP(new_config, primary)); + new_pc.pid, + new_pc.ballot, + FMT_HOST_PORT_AND_IP(new_pc, primary)); - auto it2 = _config_cache.find(new_config.pid.get_partition_index()); + auto it2 = _config_cache.find(new_pc.pid.get_partition_index()); if (it2 == _config_cache.end()) { - std::unique_ptr pi(new partition_info); + auto pi = std::make_unique(); pi->timeout_count = 0; - pi->config = new_config; - _config_cache.emplace(new_config.pid.get_partition_index(), std::move(pi)); - } else if (_app_is_stateful && it2->second->config.ballot < new_config.ballot) { + pi->pc = new_pc; + _config_cache.emplace(new_pc.pid.get_partition_index(), std::move(pi)); + } else if (_app_is_stateful && it2->second->pc.ballot < new_pc.ballot) { it2->second->timeout_count = 0; - it2->second->config = new_config; + it2->second->pc = new_pc; } else if (!_app_is_stateful) { it2->second->timeout_count = 0; - it2->second->config = new_config; + it2->second->pc = new_pc; } else { // nothing to do } @@ -413,32 +411,30 @@ void partition_resolver_simple::handle_pending_requests(std::dequesecond->config; - if (it->second->config.ballot < 0) { + if (it->second->pc.ballot < 0) { // client query config for splitting app, child partition is not ready return ERR_CHILD_NOT_READY; } - hp = get_host_port(it->second->config); + hp = get_host_port(it->second->pc); if (!hp) { return ERR_IO_PENDING; } else { diff --git a/src/client/partition_resolver_simple.h b/src/client/partition_resolver_simple.h index 41ec74e791..5dc203ae34 100644 --- a/src/client/partition_resolver_simple.h +++ b/src/client/partition_resolver_simple.h @@ -65,7 +65,7 @@ class partition_resolver_simple : public partition_resolver struct partition_info { int timeout_count; - ::dsn::partition_configuration config; + ::dsn::partition_configuration pc; }; mutable dsn::zrwlock_nr _config_lock; std::unordered_map> _config_cache; @@ -107,7 +107,7 @@ class partition_resolver_simple : public partition_resolver private: // local routines - host_port get_host_port(const partition_configuration &config) const; + host_port get_host_port(const partition_configuration &pc) const; error_code get_host_port(int partition_index, /*out*/ host_port &hp); void handle_pending_requests(std::deque &reqs, error_code err); void clear_all_pending_requests(); diff --git a/src/client/replication_ddl_client.cpp b/src/client/replication_ddl_client.cpp index 4aadfd20e0..1844b87d94 100644 --- a/src/client/replication_ddl_client.cpp +++ b/src/client/replication_ddl_client.cpp @@ -166,7 +166,7 @@ dsn::error_code replication_ddl_client::wait_app_ready(const std::string &app_na CHECK_EQ(partition_count, query_resp.partition_count); int ready_count = 0; for (int i = 0; i < partition_count; i++) { - const partition_configuration &pc = query_resp.partitions[i]; + const auto &pc = query_resp.partitions[i]; if (pc.hp_primary && (pc.hp_secondaries.size() + 1 >= max_replica_count)) { ready_count++; } @@ -422,8 +422,8 @@ dsn::error_code replication_ddl_client::list_apps(const dsn::app_status::type st } int32_t app_id; int32_t partition_count; - std::vector partitions; - r = list_app(info.app_name, app_id, partition_count, partitions); + std::vector pcs; + r = list_app(info.app_name, app_id, partition_count, pcs); if (r != dsn::ERR_OK) { LOG_ERROR("list app({}) failed, err = {}", info.app_name, r); return r; @@ -433,18 +433,18 @@ dsn::error_code replication_ddl_client::list_apps(const dsn::app_status::type st int fully_healthy = 0; int write_unhealthy = 0; int read_unhealthy = 0; - for (int i = 0; i < partitions.size(); i++) { - const dsn::partition_configuration &p = partitions[i]; + for (const auto &pc : pcs) { int replica_count = 0; - if (p.hp_primary) { + if (pc.hp_primary) { replica_count++; } - replica_count += p.hp_secondaries.size(); - if (p.hp_primary) { - if (replica_count >= p.max_replica_count) + replica_count += pc.hp_secondaries.size(); + if (pc.hp_primary) { + if (replica_count >= pc.max_replica_count) { fully_healthy++; - else if (replica_count < 2) + } else if (replica_count < 2) { write_unhealthy++; + } } else { write_unhealthy++; read_unhealthy++; @@ -566,22 +566,21 @@ dsn::error_code replication_ddl_client::list_nodes(const dsn::replication::node_ for (auto &app : apps) { int32_t app_id; int32_t partition_count; - std::vector partitions; - r = list_app(app.app_name, app_id, partition_count, partitions); + std::vector pcs; + r = list_app(app.app_name, app_id, partition_count, pcs); if (r != dsn::ERR_OK) { return r; } - for (int i = 0; i < partitions.size(); i++) { - const dsn::partition_configuration &p = partitions[i]; - if (p.hp_primary) { - auto find = tmp_map.find(p.hp_primary); + for (const auto &pc : pcs) { + if (pc.hp_primary) { + auto find = tmp_map.find(pc.hp_primary); if (find != tmp_map.end()) { find->second.primary_count++; } } - for (int j = 0; j < p.hp_secondaries.size(); j++) { - auto find = tmp_map.find(p.hp_secondaries[j]); + for (const auto &secondary : pc.hp_secondaries) { + auto find = tmp_map.find(secondary); if (find != tmp_map.end()) { find->second.secondary_count++; } @@ -723,13 +722,13 @@ dsn::error_code replication_ddl_client::list_app(const std::string &app_name, int32_t app_id = 0; int32_t partition_count = 0; int32_t max_replica_count = 0; - std::vector partitions; - dsn::error_code err = list_app(app_name, app_id, partition_count, partitions); + std::vector pcs; + dsn::error_code err = list_app(app_name, app_id, partition_count, pcs); if (err != dsn::ERR_OK) { return err; } - if (!partitions.empty()) { - max_replica_count = partitions[0].max_replica_count; + if (!pcs.empty()) { + max_replica_count = pcs[0].max_replica_count; } // print query_cfg_response @@ -765,41 +764,33 @@ dsn::error_code replication_ddl_client::list_app(const std::string &app_name, int fully_healthy = 0; int write_unhealthy = 0; int read_unhealthy = 0; - for (const auto &p : partitions) { + for (const auto &pc : pcs) { int replica_count = 0; - if (p.hp_primary) { + if (pc.hp_primary) { replica_count++; - node_stat[p.hp_primary].first++; + node_stat[pc.hp_primary].first++; total_prim_count++; } - replica_count += p.hp_secondaries.size(); - total_sec_count += p.hp_secondaries.size(); - if (p.hp_primary) { - if (replica_count >= p.max_replica_count) + replica_count += pc.hp_secondaries.size(); + total_sec_count += pc.hp_secondaries.size(); + if (pc.hp_primary) { + if (replica_count >= pc.max_replica_count) { fully_healthy++; - else if (replica_count < 2) + } else if (replica_count < 2) { write_unhealthy++; + } } else { write_unhealthy++; read_unhealthy++; } - tp_details.add_row(p.pid.get_partition_index()); - tp_details.append_data(p.ballot); - std::stringstream oss; - oss << replica_count << "/" << p.max_replica_count; - tp_details.append_data(oss.str()); - tp_details.append_data(p.hp_primary ? p.hp_primary.to_string() : "-"); - oss.str(""); - oss << "["; - // TODO (yingchun) join - for (int j = 0; j < p.hp_secondaries.size(); j++) { - if (j != 0) - oss << ","; - oss << p.hp_secondaries[j]; - node_stat[p.hp_secondaries[j]].second++; + for (const auto &secondary : pc.hp_secondaries) { + node_stat[secondary].second++; } - oss << "]"; - tp_details.append_data(oss.str()); + tp_details.add_row(pc.pid.get_partition_index()); + tp_details.append_data(pc.ballot); + tp_details.append_data(fmt::format("{}/{}", replica_count, pc.max_replica_count)); + tp_details.append_data(pc.hp_primary ? pc.hp_primary.to_string() : "-"); + tp_details.append_data(fmt::format("[{}]", fmt::join(pc.hp_secondaries, ","))); } mtp.add(std::move(tp_details)); @@ -837,7 +828,7 @@ dsn::error_code replication_ddl_client::list_app(const std::string &app_name, dsn::error_code replication_ddl_client::list_app(const std::string &app_name, int32_t &app_id, int32_t &partition_count, - std::vector &partitions) + std::vector &pcs) { RETURN_EC_NOT_OK_MSG(validate_app_name(app_name), "invalid app_name: '{}'", app_name); @@ -859,7 +850,7 @@ dsn::error_code replication_ddl_client::list_app(const std::string &app_name, app_id = resp.app_id; partition_count = resp.partition_count; - partitions = resp.partitions; + pcs = resp.partitions; return dsn::ERR_OK; } @@ -1322,8 +1313,8 @@ dsn::error_code replication_ddl_client::query_restore(int32_t restore_app_id, bo ::dsn::unmarshall(resp_task->get_response(), response); if (response.err == ERR_OK) { int overall_progress = 0; - for (const auto &p : response.restore_progress) { - overall_progress += p; + for (const auto &progress : response.restore_progress) { + overall_progress += progress; } overall_progress = overall_progress / response.restore_progress.size(); overall_progress = overall_progress / 10; diff --git a/src/client/replication_ddl_client.h b/src/client/replication_ddl_client.h index 14006b7ab2..a14b9a97d4 100644 --- a/src/client/replication_ddl_client.h +++ b/src/client/replication_ddl_client.h @@ -125,7 +125,7 @@ class replication_ddl_client dsn::error_code list_app(const std::string &app_name, int32_t &app_id, int32_t &partition_count, - std::vector &partitions); + std::vector &pcs); dsn::replication::configuration_meta_control_response control_meta_function_level(meta_function_level::type level); diff --git a/src/common/json_helper.h b/src/common/json_helper.h index cbd94af945..feb6a376da 100644 --- a/src/common/json_helper.h +++ b/src/common/json_helper.h @@ -448,8 +448,8 @@ inline bool json_decode(const dsn::json::JsonObject &in, dsn::host_port &hp) return static_cast(hp); } -inline void json_encode(JsonWriter &out, const dsn::partition_configuration &config); -inline bool json_decode(const JsonObject &in, dsn::partition_configuration &config); +inline void json_encode(JsonWriter &out, const dsn::partition_configuration &pc); +inline bool json_decode(const JsonObject &in, dsn::partition_configuration &pc); inline void json_encode(JsonWriter &out, const dsn::app_info &info); inline bool json_decode(const JsonObject &in, dsn::app_info &info); inline void json_encode(JsonWriter &out, const dsn::replication::file_meta &f_meta); diff --git a/src/common/replication_common.cpp b/src/common/replication_common.cpp index 9d0a8f103c..e34bcb5838 100644 --- a/src/common/replication_common.cpp +++ b/src/common/replication_common.cpp @@ -166,26 +166,26 @@ int32_t replication_options::app_mutation_2pc_min_replica_count(int32_t app_max_ } } -/*static*/ bool replica_helper::get_replica_config(const partition_configuration &partition_config, +/*static*/ bool replica_helper::get_replica_config(const partition_configuration &pc, const ::dsn::host_port &node, - /*out*/ replica_configuration &replica_config) + /*out*/ replica_configuration &rc) { - replica_config.pid = partition_config.pid; - replica_config.ballot = partition_config.ballot; - replica_config.learner_signature = invalid_signature; - SET_OBJ_IP_AND_HOST_PORT(replica_config, primary, partition_config, primary); + rc.pid = pc.pid; + rc.ballot = pc.ballot; + rc.learner_signature = invalid_signature; + SET_OBJ_IP_AND_HOST_PORT(rc, primary, pc, primary); - if (node == partition_config.hp_primary) { - replica_config.status = partition_status::PS_PRIMARY; + if (node == pc.hp_primary) { + rc.status = partition_status::PS_PRIMARY; return true; } - if (utils::contains(partition_config.hp_secondaries, node)) { - replica_config.status = partition_status::PS_SECONDARY; + if (utils::contains(pc.hp_secondaries, node)) { + rc.status = partition_status::PS_SECONDARY; return true; } - replica_config.status = partition_status::PS_INACTIVE; + rc.status = partition_status::PS_INACTIVE; return false; } diff --git a/src/common/replication_other_types.h b/src/common/replication_other_types.h index a80457b583..ea194a82af 100644 --- a/src/common/replication_other_types.h +++ b/src/common/replication_other_types.h @@ -79,8 +79,8 @@ inline bool is_partition_config_equal(const partition_configuration &pc1, const partition_configuration &pc2) { // secondaries no need to be same order - for (const auto &hp : pc1.hp_secondaries) { - if (!is_secondary(pc2, hp)) { + for (const auto &pc1_secondary : pc1.hp_secondaries) { + if (!is_secondary(pc2, pc1_secondary)) { return false; } } @@ -106,9 +106,9 @@ class replica_helper } return false; } - static bool get_replica_config(const partition_configuration &partition_config, + static bool get_replica_config(const partition_configuration &pc, const ::dsn::host_port &node, - /*out*/ replica_configuration &replica_config); + /*out*/ replica_configuration &rc); // Return true if 'server_list' is a valid comma-separated list of servers, otherwise return // false. The result is filled into 'servers' if success. diff --git a/src/meta/backup_engine.cpp b/src/meta/backup_engine.cpp index 600ea4e39a..5183415369 100644 --- a/src/meta/backup_engine.cpp +++ b/src/meta/backup_engine.cpp @@ -182,7 +182,7 @@ void backup_engine::backup_app_partition(const gpid &pid) _is_backup_failed = true; return; } - partition_primary = app->partitions[pid.get_partition_index()].hp_primary; + partition_primary = app->pcs[pid.get_partition_index()].hp_primary; } if (!partition_primary) { diff --git a/src/meta/cluster_balance_policy.cpp b/src/meta/cluster_balance_policy.cpp index 516d782238..2b10e39ece 100644 --- a/src/meta/cluster_balance_policy.cpp +++ b/src/meta/cluster_balance_policy.cpp @@ -223,18 +223,18 @@ bool cluster_balance_policy::get_app_migration_info(std::shared_ptr a { info.app_id = app->app_id; info.app_name = app->app_name; - info.partitions.resize(app->partitions.size()); - for (auto i = 0; i < app->partitions.size(); ++i) { + info.partitions.reserve(app->pcs.size()); + for (const auto &pc : app->pcs) { std::map pstatus_map; - pstatus_map[app->partitions[i].hp_primary] = partition_status::PS_PRIMARY; - if (app->partitions[i].hp_secondaries.size() != app->partitions[i].max_replica_count - 1) { + pstatus_map[pc.hp_primary] = partition_status::PS_PRIMARY; + if (pc.hp_secondaries.size() != pc.max_replica_count - 1) { // partition is unhealthy return false; } - for (const auto &hp : app->partitions[i].hp_secondaries) { - pstatus_map[hp] = partition_status::PS_SECONDARY; + for (const auto &secondary : pc.hp_secondaries) { + pstatus_map[secondary] = partition_status::PS_SECONDARY; } - info.partitions[i] = pstatus_map; + info.partitions.push_back(std::move(pstatus_map)); } for (const auto &it : nodes) { @@ -258,7 +258,7 @@ void cluster_balance_policy::get_node_migration_info(const node_state &ns, if (!context.get_disk_tag(ns.host_port(), disk_tag)) { continue; } - auto pid = context.config_owner->pid; + auto pid = context.pc->pid; if (info.partitions.find(disk_tag) != info.partitions.end()) { info.partitions[disk_tag].insert(pid); } else { diff --git a/src/meta/duplication/meta_duplication_service.cpp b/src/meta/duplication/meta_duplication_service.cpp index 45c4bf7db9..02562afeb0 100644 --- a/src/meta/duplication/meta_duplication_service.cpp +++ b/src/meta/duplication/meta_duplication_service.cpp @@ -547,10 +547,10 @@ void meta_duplication_service::check_follower_app_if_create_completed( const host_port secondary1("localhost", 34802); const host_port secondary2("localhost", 34803); - partition_configuration p; - SET_IP_AND_HOST_PORT_BY_DNS(p, primary, primary); - SET_IPS_AND_HOST_PORTS_BY_DNS(p, secondaries, secondary1, secondary2); - resp.partitions.emplace_back(p); + partition_configuration pc; + SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, primary); + SET_IPS_AND_HOST_PORTS_BY_DNS(pc, secondaries, secondary1, secondary2); + resp.partitions.emplace_back(pc); } }); @@ -561,18 +561,18 @@ void meta_duplication_service::check_follower_app_if_create_completed( if (resp.partitions.size() != dup->partition_count) { query_err = ERR_INCONSISTENT_STATE; } else { - for (const auto &partition : resp.partitions) { - if (!partition.hp_primary) { + for (const auto &pc : resp.partitions) { + if (!pc.hp_primary) { query_err = ERR_INACTIVE_STATE; break; } - if (partition.hp_secondaries.empty()) { + if (pc.hp_secondaries.empty()) { query_err = ERR_NOT_ENOUGH_MEMBER; break; } - for (const auto &secondary : partition.hp_secondaries) { + for (const auto &secondary : pc.hp_secondaries) { if (!secondary) { query_err = ERR_INACTIVE_STATE; break; diff --git a/src/meta/load_balance_policy.cpp b/src/meta/load_balance_policy.cpp index 7b0bd19d84..9295796746 100644 --- a/src/meta/load_balance_policy.cpp +++ b/src/meta/load_balance_policy.cpp @@ -320,7 +320,7 @@ void load_balance_policy::start_moving_primary(const std::shared_ptr while (plan_moving-- > 0) { dsn::gpid selected = select_moving(potential_moving, prev_load, current_load, from, to); - const partition_configuration &pc = app->partitions[selected.get_partition_index()]; + const auto &pc = app->pcs[selected.get_partition_index()]; auto balancer_result = _migration_result->emplace( selected, generate_balancer_request( @@ -338,7 +338,7 @@ std::list load_balance_policy::calc_potential_moving( std::list potential_moving; const node_state &ns = _global_view->nodes->find(from)->second; ns.for_each_primary(app->app_id, [&](const gpid &pid) { - const partition_configuration &pc = app->partitions[pid.get_partition_index()]; + const auto &pc = app->pcs[pid.get_partition_index()]; if (is_secondary(pc, to)) { potential_moving.push_back(pid); } @@ -566,10 +566,10 @@ void ford_fulkerson::add_edge(int node_id, const node_state &ns) void ford_fulkerson::update_decree(int node_id, const node_state &ns) { ns.for_each_primary(_app->app_id, [&, this](const gpid &pid) { - const partition_configuration &pc = _app->partitions[pid.get_partition_index()]; + const auto &pc = _app->pcs[pid.get_partition_index()]; for (const auto &secondary : pc.hp_secondaries) { auto i = _host_port_id.find(secondary); - CHECK(i != _host_port_id.end(), "invalid secondary address, address = {}", secondary); + CHECK(i != _host_port_id.end(), "invalid secondary: {}", secondary); _network[node_id][i->second]++; } return true; @@ -709,7 +709,7 @@ void copy_replica_operation::copy_once(gpid selected_pid, migration_list *result const auto &from = _host_port_vec[*_ordered_host_port_ids.rbegin()]; const auto &to = _host_port_vec[*_ordered_host_port_ids.begin()]; - auto pc = _app->partitions[selected_pid.get_partition_index()]; + auto pc = _app->pcs[selected_pid.get_partition_index()]; auto request = generate_balancer_request(_apps, pc, get_balance_type(), from, to); result->emplace(selected_pid, request); } diff --git a/src/meta/meta_backup_service.cpp b/src/meta/meta_backup_service.cpp index 63e1fac30a..7264089e9e 100644 --- a/src/meta/meta_backup_service.cpp +++ b/src/meta/meta_backup_service.cpp @@ -532,7 +532,7 @@ void policy_context::start_backup_partition_unlocked(gpid pid) pid, cold_backup_constant::PROGRESS_FINISHED, dsn::host_port()); return; } - partition_primary = app->partitions[pid.get_partition_index()].hp_primary; + partition_primary = app->pcs[pid.get_partition_index()].hp_primary; } if (!partition_primary) { LOG_WARNING("{}: partition {} doesn't have a primary now, retry to backup it later", @@ -670,7 +670,7 @@ void policy_context::initialize_backup_progress_unlocked() // unfinished_partitions_per_app & partition_progress & app_chkpt_size _progress.unfinished_partitions_per_app[app_id] = app->partition_count; std::map partition_chkpt_size; - for (const partition_configuration &pc : app->partitions) { + for (const auto &pc : app->pcs) { _progress.partition_progress[pc.pid] = 0; partition_chkpt_size[pc.pid.get_app_id()] = 0; } diff --git a/src/meta/meta_bulk_load_ingestion_context.cpp b/src/meta/meta_bulk_load_ingestion_context.cpp index 30989c79f8..6921d4ea86 100644 --- a/src/meta/meta_bulk_load_ingestion_context.cpp +++ b/src/meta/meta_bulk_load_ingestion_context.cpp @@ -44,13 +44,13 @@ ingestion_context::ingestion_context() { reset_all(); } ingestion_context::~ingestion_context() { reset_all(); } -void ingestion_context::partition_node_info::create(const partition_configuration &config, +void ingestion_context::partition_node_info::create(const partition_configuration &pc, const config_context &cc) { - pid = config.pid; + pid = pc.pid; std::unordered_set current_nodes; - current_nodes.insert(config.hp_primary); - for (const auto &secondary : config.hp_secondaries) { + current_nodes.insert(pc.hp_primary); + for (const auto &secondary : pc.hp_secondaries) { current_nodes.insert(secondary); } for (const auto &node : current_nodes) { @@ -120,16 +120,16 @@ void ingestion_context::node_context::decrease(const std::string &disk_tag) disk_ingesting_counts[disk_tag]--; } -bool ingestion_context::try_partition_ingestion(const partition_configuration &config, +bool ingestion_context::try_partition_ingestion(const partition_configuration &pc, const config_context &cc) { FAIL_POINT_INJECT_F("ingestion_try_partition_ingestion", [=](absl::string_view) -> bool { auto info = partition_node_info(); - info.pid = config.pid; - _running_partitions[config.pid] = info; + info.pid = pc.pid; + _running_partitions[pc.pid] = info; return true; }); - partition_node_info info(config, cc); + partition_node_info info(pc, cc); for (const auto &kv : info.node_disk) { if (!check_node_ingestion(kv.first, kv.second)) { return false; diff --git a/src/meta/meta_bulk_load_ingestion_context.h b/src/meta/meta_bulk_load_ingestion_context.h index 1675726d66..d791b2a351 100644 --- a/src/meta/meta_bulk_load_ingestion_context.h +++ b/src/meta/meta_bulk_load_ingestion_context.h @@ -49,11 +49,11 @@ class ingestion_context std::unordered_map node_disk; partition_node_info() {} - partition_node_info(const partition_configuration &config, const config_context &cc) + partition_node_info(const partition_configuration &pc, const config_context &cc) { - create(config, cc); + create(pc, cc); } - void create(const partition_configuration &config, const config_context &cc); + void create(const partition_configuration &pc, const config_context &cc); }; struct node_context @@ -77,7 +77,7 @@ class ingestion_context void decrease(const std::string &disk_tag); }; - bool try_partition_ingestion(const partition_configuration &config, const config_context &cc); + bool try_partition_ingestion(const partition_configuration &pc, const config_context &cc); bool check_node_ingestion(const host_port &node, const std::string &disk_tag); void add_partition(const partition_node_info &info); void remove_partition(const gpid &pid); diff --git a/src/meta/meta_bulk_load_service.cpp b/src/meta/meta_bulk_load_service.cpp index 0845f4f208..aa2688ce55 100644 --- a/src/meta/meta_bulk_load_service.cpp +++ b/src/meta/meta_bulk_load_service.cpp @@ -357,7 +357,7 @@ bool bulk_load_service::check_partition_status( const gpid &pid, bool always_unhealthy_check, const std::function &retry_function, - /*out*/ partition_configuration &pconfig) + /*out*/ partition_configuration &pc) { std::shared_ptr app = get_app(pid.get_app_id()); if (app == nullptr || app->status != app_status::AS_AVAILABLE) { @@ -370,8 +370,8 @@ bool bulk_load_service::check_partition_status( return false; } - pconfig = app->partitions[pid.get_partition_index()]; - if (!pconfig.hp_primary) { + pc = app->pcs[pid.get_partition_index()]; + if (!pc.hp_primary) { LOG_WARNING("app({}) partition({}) primary is invalid, try it later", app_name, pid); tasking::enqueue( LPC_META_STATE_NORMAL, @@ -382,7 +382,7 @@ bool bulk_load_service::check_partition_status( return false; } - if (pconfig.hp_secondaries.size() < pconfig.max_replica_count - 1) { + if (pc.hp_secondaries.size() < pc.max_replica_count - 1) { bulk_load_status::type p_status; { zauto_read_lock l(_lock); @@ -416,7 +416,7 @@ void bulk_load_service::partition_bulk_load(const std::string &app_name, const g { FAIL_POINT_INJECT_F("meta_bulk_load_partition_bulk_load", [](absl::string_view) {}); - partition_configuration pconfig; + partition_configuration pc; if (!check_partition_status(app_name, pid, false, @@ -424,7 +424,7 @@ void bulk_load_service::partition_bulk_load(const std::string &app_name, const g this, std::placeholders::_1, std::placeholders::_2), - pconfig)) { + pc)) { return; } @@ -434,18 +434,18 @@ void bulk_load_service::partition_bulk_load(const std::string &app_name, const g const app_bulk_load_info &ainfo = _app_bulk_load_info[pid.get_app_id()]; req->pid = pid; req->app_name = app_name; - SET_IP_AND_HOST_PORT(*req, primary, pconfig.primary, pconfig.hp_primary); + SET_IP_AND_HOST_PORT(*req, primary, pc.primary, pc.hp_primary); req->remote_provider_name = ainfo.file_provider_type; req->cluster_name = ainfo.cluster_name; req->meta_bulk_load_status = get_partition_bulk_load_status_unlocked(pid); - req->ballot = pconfig.ballot; + req->ballot = pc.ballot; req->query_bulk_load_metadata = is_partition_metadata_not_updated_unlocked(pid); req->remote_root_path = ainfo.remote_root_path; } LOG_INFO("send bulk load request to node({}), app({}), partition({}), partition " "status = {}, remote provider = {}, cluster_name = {}, remote_root_path = {}", - FMT_HOST_PORT_AND_IP(pconfig, primary), + FMT_HOST_PORT_AND_IP(pc, primary), app_name, pid, dsn::enum_to_string(req->meta_bulk_load_status), @@ -454,17 +454,16 @@ void bulk_load_service::partition_bulk_load(const std::string &app_name, const g req->remote_root_path); bulk_load_rpc rpc(std::move(req), RPC_BULK_LOAD, 0_ms, 0, pid.thread_hash()); - rpc.call( - pconfig.primary, _meta_svc->tracker(), [this, pid, rpc, pconfig](error_code err) mutable { - // The remote server may not support FQDN, but do not try to reverse resolve the - // IP addresses because they may be unresolved. Just warning and ignore this. - LOG_WARNING_IF(!rpc.response().__isset.hp_group_bulk_load_state, - "The {} primary {} doesn't support FQDN, the response " - "hp_group_bulk_load_state field is not set", - pid, - FMT_HOST_PORT_AND_IP(pconfig, primary)); - on_partition_bulk_load_reply(err, rpc.request(), rpc.response()); - }); + rpc.call(pc.primary, _meta_svc->tracker(), [this, pid, rpc, pc](error_code err) mutable { + // The remote server may not support FQDN, but do not try to reverse resolve the + // IP addresses because they may be unresolved. Just warning and ignore this. + LOG_WARNING_IF(!rpc.response().__isset.hp_group_bulk_load_state, + "The {} primary {} doesn't support FQDN, the response " + "hp_group_bulk_load_state field is not set", + pid, + FMT_HOST_PORT_AND_IP(pc, primary)); + on_partition_bulk_load_reply(err, rpc.request(), rpc.response()); + }); } // ThreadPool: THREAD_POOL_META_STATE @@ -533,7 +532,7 @@ void bulk_load_service::on_partition_bulk_load_reply(error_code err, handle_app_unavailable(pid.get_app_id(), app_name); return; } - ballot current_ballot = app->partitions[pid.get_partition_index()].ballot; + ballot current_ballot = app->pcs[pid.get_partition_index()].ballot; if (request.ballot < current_ballot) { LOG_WARNING( "receive out-date response from node({}), app({}), partition({}), request ballot = " @@ -597,7 +596,7 @@ void bulk_load_service::try_resend_bulk_load_request(const std::string &app_name // ThreadPool: THREAD_POOL_META_STATE void bulk_load_service::handle_app_downloading(const bulk_load_response &response, - const host_port &primary_addr) + const host_port &primary) { const std::string &app_name = response.app_name; const gpid &pid = response.pid; @@ -606,7 +605,7 @@ void bulk_load_service::handle_app_downloading(const bulk_load_response &respons LOG_WARNING( "receive bulk load response from node({}) app({}), partition({}), primary_status({}), " "but total_download_progress is not set", - primary_addr, + primary, app_name, pid, dsn::enum_to_string(response.primary_bulk_load_status)); @@ -619,7 +618,7 @@ void bulk_load_service::handle_app_downloading(const bulk_load_response &respons !bulk_load_states.__isset.download_status) { LOG_WARNING("receive bulk load response from node({}) app({}), partition({}), " "primary_status({}), but node({}) progress or status is not set", - primary_addr, + primary, app_name, pid, dsn::enum_to_string(response.primary_bulk_load_status), @@ -658,7 +657,7 @@ void bulk_load_service::handle_app_downloading(const bulk_load_response &respons int32_t total_progress = response.total_download_progress; LOG_INFO("receive bulk load response from node({}) app({}) partition({}), primary_status({}), " "total_download_progress = {}", - primary_addr, + primary, app_name, pid, dsn::enum_to_string(response.primary_bulk_load_status), @@ -679,7 +678,7 @@ void bulk_load_service::handle_app_downloading(const bulk_load_response &respons // ThreadPool: THREAD_POOL_META_STATE void bulk_load_service::handle_app_ingestion(const bulk_load_response &response, - const host_port &primary_addr) + const host_port &primary) { const std::string &app_name = response.app_name; const gpid &pid = response.pid; @@ -687,7 +686,7 @@ void bulk_load_service::handle_app_ingestion(const bulk_load_response &response, if (!response.__isset.is_group_ingestion_finished) { LOG_WARNING("receive bulk load response from node({}) app({}) partition({}), " "primary_status({}), but is_group_ingestion_finished is not set", - primary_addr, + primary, app_name, pid, dsn::enum_to_string(response.primary_bulk_load_status)); @@ -699,7 +698,7 @@ void bulk_load_service::handle_app_ingestion(const bulk_load_response &response, if (!bulk_load_states.__isset.ingest_status) { LOG_WARNING("receive bulk load response from node({}) app({}) partition({}), " "primary_status({}), but node({}) ingestion_status is not set", - primary_addr, + primary, app_name, pid, dsn::enum_to_string(response.primary_bulk_load_status), @@ -718,7 +717,7 @@ void bulk_load_service::handle_app_ingestion(const bulk_load_response &response, LOG_INFO("receive bulk load response from node({}) app({}) partition({}), primary_status({}), " "is_group_ingestion_finished = {}", - primary_addr, + primary, app_name, pid, dsn::enum_to_string(response.primary_bulk_load_status), @@ -737,7 +736,7 @@ void bulk_load_service::handle_app_ingestion(const bulk_load_response &response, // ThreadPool: THREAD_POOL_META_STATE void bulk_load_service::handle_bulk_load_finish(const bulk_load_response &response, - const host_port &primary_addr) + const host_port &primary) { const std::string &app_name = response.app_name; const gpid &pid = response.pid; @@ -745,7 +744,7 @@ void bulk_load_service::handle_bulk_load_finish(const bulk_load_response &respon if (!response.__isset.is_group_bulk_load_context_cleaned_up) { LOG_WARNING("receive bulk load response from node({}) app({}) partition({}), " "primary_status({}), but is_group_bulk_load_context_cleaned_up is not set", - primary_addr, + primary, app_name, pid, dsn::enum_to_string(response.primary_bulk_load_status)); @@ -756,7 +755,7 @@ void bulk_load_service::handle_bulk_load_finish(const bulk_load_response &respon if (!kv.second.__isset.is_cleaned_up) { LOG_WARNING("receive bulk load response from node({}) app({}), partition({}), " "primary_status({}), but node({}) is_cleaned_up is not set", - primary_addr, + primary, app_name, pid, dsn::enum_to_string(response.primary_bulk_load_status), @@ -771,7 +770,7 @@ void bulk_load_service::handle_bulk_load_finish(const bulk_load_response &respon LOG_WARNING( "receive bulk load response from node({}) app({}) partition({}), current partition " "has already been cleaned up", - primary_addr, + primary, app_name, pid); return; @@ -782,7 +781,7 @@ void bulk_load_service::handle_bulk_load_finish(const bulk_load_response &respon bool group_cleaned_up = response.is_group_bulk_load_context_cleaned_up; LOG_INFO("receive bulk load response from node({}) app({}) partition({}), primary status = {}, " "is_group_bulk_load_context_cleaned_up = {}", - primary_addr, + primary, app_name, pid, dsn::enum_to_string(response.primary_bulk_load_status), @@ -818,7 +817,7 @@ void bulk_load_service::handle_bulk_load_finish(const bulk_load_response &respon // ThreadPool: THREAD_POOL_META_STATE void bulk_load_service::handle_app_pausing(const bulk_load_response &response, - const host_port &primary_addr) + const host_port &primary) { const std::string &app_name = response.app_name; const gpid &pid = response.pid; @@ -826,7 +825,7 @@ void bulk_load_service::handle_app_pausing(const bulk_load_response &response, if (!response.__isset.is_group_bulk_load_paused) { LOG_WARNING("receive bulk load response from node({}) app({}) partition({}), " "primary_status({}), but is_group_bulk_load_paused is not set", - primary_addr, + primary, app_name, pid, dsn::enum_to_string(response.primary_bulk_load_status)); @@ -837,7 +836,7 @@ void bulk_load_service::handle_app_pausing(const bulk_load_response &response, if (!kv.second.__isset.is_paused) { LOG_WARNING("receive bulk load response from node({}) app({}), partition({}), " "primary_status({}), but node({}) is_paused is not set", - primary_addr, + primary, app_name, pid, dsn::enum_to_string(response.primary_bulk_load_status), @@ -849,7 +848,7 @@ void bulk_load_service::handle_app_pausing(const bulk_load_response &response, bool is_group_paused = response.is_group_bulk_load_paused; LOG_INFO("receive bulk load response from node({}) app({}) partition({}), primary status = {}, " "is_group_bulk_load_paused = {}", - primary_addr, + primary, app_name, pid, dsn::enum_to_string(response.primary_bulk_load_status), @@ -1187,7 +1186,7 @@ void bulk_load_service::update_app_status_on_remote_storage_reply(const app_bulk } // ThreadPool: THREAD_POOL_META_STATE -bool bulk_load_service::check_ever_ingestion_succeed(const partition_configuration &config, +bool bulk_load_service::check_ever_ingestion_succeed(const partition_configuration &pc, const std::string &app_name, const gpid &pid) { @@ -1202,8 +1201,8 @@ bool bulk_load_service::check_ever_ingestion_succeed(const partition_configurati } std::vector current_nodes; - current_nodes.emplace_back(config.hp_primary); - for (const auto &secondary : config.hp_secondaries) { + current_nodes.emplace_back(pc.hp_primary); + for (const auto &secondary : pc.hp_secondaries) { current_nodes.emplace_back(secondary); } @@ -1243,7 +1242,7 @@ void bulk_load_service::partition_ingestion(const std::string &app_name, const g return; } - partition_configuration pconfig; + partition_configuration pc; if (!check_partition_status(app_name, pid, true, @@ -1251,16 +1250,16 @@ void bulk_load_service::partition_ingestion(const std::string &app_name, const g this, std::placeholders::_1, std::placeholders::_2), - pconfig)) { + pc)) { return; } - if (check_ever_ingestion_succeed(pconfig, app_name, pid)) { + if (check_ever_ingestion_succeed(pc, app_name, pid)) { return; } auto app = get_app(pid.get_app_id()); - if (!try_partition_ingestion(pconfig, app->helpers->contexts[pid.get_partition_index()])) { + if (!try_partition_ingestion(pc, app->helpers->contexts[pid.get_partition_index()])) { LOG_WARNING( "app({}) partition({}) couldn't execute ingestion, wait and try later", app_name, pid); tasking::enqueue(LPC_META_STATE_NORMAL, @@ -1271,24 +1270,21 @@ void bulk_load_service::partition_ingestion(const std::string &app_name, const g return; } - const auto &primary_addr = pconfig.hp_primary; - ballot meta_ballot = pconfig.ballot; - tasking::enqueue(LPC_BULK_LOAD_INGESTION, - _meta_svc->tracker(), - std::bind(&bulk_load_service::send_ingestion_request, - this, - app_name, - pid, - primary_addr, - meta_ballot), - 0, - std::chrono::seconds(bulk_load_constant::BULK_LOAD_REQUEST_INTERVAL)); + const auto &primary = pc.hp_primary; + ballot meta_ballot = pc.ballot; + tasking::enqueue( + LPC_BULK_LOAD_INGESTION, + _meta_svc->tracker(), + std::bind( + &bulk_load_service::send_ingestion_request, this, app_name, pid, primary, meta_ballot), + 0, + std::chrono::seconds(bulk_load_constant::BULK_LOAD_REQUEST_INTERVAL)); } // ThreadPool: THREAD_POOL_DEFAULT void bulk_load_service::send_ingestion_request(const std::string &app_name, const gpid &pid, - const host_port &primary_addr, + const host_port &primary, const ballot &meta_ballot) { ingestion_request req; @@ -1311,11 +1307,11 @@ void bulk_load_service::send_ingestion_request(const std::string &app_name, dsn::rpc_response_task_ptr rpc_callback = rpc::create_rpc_response_task( msg, _meta_svc->tracker(), - [this, app_name, pid, primary_addr](error_code err, ingestion_response &&resp) { - on_partition_ingestion_reply(err, std::move(resp), app_name, pid, primary_addr); + [this, app_name, pid, primary](error_code err, ingestion_response &&resp) { + on_partition_ingestion_reply(err, std::move(resp), app_name, pid, primary); }); - _meta_svc->send_request(msg, primary_addr, rpc_callback); - LOG_INFO("send ingest_request to node({}), app({}) partition({})", primary_addr, app_name, pid); + _meta_svc->send_request(msg, primary, rpc_callback); + LOG_INFO("send ingest_request to node({}), app({}) partition({})", primary, app_name, pid); } // ThreadPool: THREAD_POOL_DEFAULT @@ -1323,7 +1319,7 @@ void bulk_load_service::on_partition_ingestion_reply(error_code err, const ingestion_response &&resp, const std::string &app_name, const gpid &pid, - const host_port &primary_addr) + const host_port &primary) { if (err != ERR_OK || resp.err != ERR_OK || resp.rocksdb_error != ERR_OK) { finish_ingestion(pid); @@ -1335,7 +1331,7 @@ void bulk_load_service::on_partition_ingestion_reply(error_code err, "repeated request", app_name, pid, - primary_addr); + primary); return; } @@ -1344,7 +1340,7 @@ void bulk_load_service::on_partition_ingestion_reply(error_code err, LOG_ERROR("app({}) partition({}) on node({}) ingestion files failed, error = {}", app_name, pid, - primary_addr, + primary, err); tasking::enqueue( LPC_META_STATE_NORMAL, @@ -1359,7 +1355,7 @@ void bulk_load_service::on_partition_ingestion_reply(error_code err, "{}, retry it later", app_name, pid, - primary_addr, + primary, resp.rocksdb_error); tasking::enqueue(LPC_BULK_LOAD_INGESTION, _meta_svc->tracker(), @@ -1377,7 +1373,7 @@ void bulk_load_service::on_partition_ingestion_reply(error_code err, "error = {}", app_name, pid, - primary_addr, + primary, resp.err, resp.rocksdb_error); @@ -1393,7 +1389,7 @@ void bulk_load_service::on_partition_ingestion_reply(error_code err, LOG_INFO("app({}) partition({}) receive ingestion response from node({}) succeed", app_name, pid, - primary_addr); + primary); } // ThreadPool: THREAD_POOL_META_STATE diff --git a/src/meta/meta_bulk_load_service.h b/src/meta/meta_bulk_load_service.h index 359b3fa085..50245d9d60 100644 --- a/src/meta/meta_bulk_load_service.h +++ b/src/meta/meta_bulk_load_service.h @@ -189,7 +189,7 @@ class bulk_load_service const gpid &pid, bool always_unhealthy_check, const std::function &retry_function, - /*out*/ partition_configuration &pconfig); + /*out*/ partition_configuration &pc); void partition_bulk_load(const std::string &app_name, const gpid &pid); @@ -200,15 +200,15 @@ class bulk_load_service // if app is still in bulk load, resend bulk_load_request to primary after interval seconds void try_resend_bulk_load_request(const std::string &app_name, const gpid &pid); - void handle_app_downloading(const bulk_load_response &response, const host_port &primary_addr); + void handle_app_downloading(const bulk_load_response &response, const host_port &primary); - void handle_app_ingestion(const bulk_load_response &response, const host_port &primary_addr); + void handle_app_ingestion(const bulk_load_response &response, const host_port &primary); // when app status is `succeed, `failed`, `canceled`, meta and replica should cleanup bulk load // states - void handle_bulk_load_finish(const bulk_load_response &response, const host_port &primary_addr); + void handle_bulk_load_finish(const bulk_load_response &response, const host_port &primary); - void handle_app_pausing(const bulk_load_response &response, const host_port &primary_addr); + void handle_app_pausing(const bulk_load_response &response, const host_port &primary); // app not existed or not available during bulk load void handle_app_unavailable(int32_t app_id, const std::string &app_name); @@ -223,20 +223,20 @@ class bulk_load_service void send_ingestion_request(const std::string &app_name, const gpid &pid, - const host_port &primary_addr, + const host_port &primary, const ballot &meta_ballot); void on_partition_ingestion_reply(error_code err, const ingestion_response &&resp, const std::string &app_name, const gpid &pid, - const host_port &primary_addr); + const host_port &primary); // Called by `partition_ingestion` // - true : this partition has ever executed ingestion succeed, no need to send ingestion // request // - false: this partition has not executed ingestion or executed ingestion failed - bool check_ever_ingestion_succeed(const partition_configuration &config, + bool check_ever_ingestion_succeed(const partition_configuration &pc, const std::string &app_name, const gpid &pid); @@ -252,9 +252,9 @@ class bulk_load_service /// /// ingestion_context functions /// - bool try_partition_ingestion(const partition_configuration &config, const config_context &cc) + bool try_partition_ingestion(const partition_configuration &pc, const config_context &cc) { - return _ingestion_context->try_partition_ingestion(config, cc); + return _ingestion_context->try_partition_ingestion(pc, cc); } void finish_ingestion(const gpid &pid) { _ingestion_context->remove_partition(pid); } diff --git a/src/meta/meta_data.cpp b/src/meta/meta_data.cpp index cc6e96c01b..44da02f058 100644 --- a/src/meta/meta_data.cpp +++ b/src/meta/meta_data.cpp @@ -134,8 +134,9 @@ bool construct_replica(meta_view view, const gpid &pid, int max_replica_count) // when add node to pc.last_drops, we don't remove it from our cc.drop_list CHECK(pc.hp_last_drops.empty(), "last_drops of partition({}) must be empty", pid); for (auto iter = drop_list.rbegin(); iter != drop_list.rend(); ++iter) { - if (pc.hp_last_drops.size() + 1 >= max_replica_count) + if (pc.hp_last_drops.size() + 1 >= max_replica_count) { break; + } // similar to cc.drop_list, pc.last_drop is also a stack structure HEAD_INSERT_IP_AND_HOST_PORT_BY_DNS(pc, last_drops, iter->node); LOG_INFO("construct for ({}), select {} into last_drops, ballot({}), " @@ -303,8 +304,8 @@ void config_context::check_size() { // when add learner, it is possible that replica_count > max_replica_count, so we // need to remove things from dropped only when it's not empty. - while (replica_count(*config_owner) + dropped.size() > - config_owner->max_replica_count + FLAGS_max_reserved_dropped_replicas && + while (replica_count(*pc) + dropped.size() > + pc->max_replica_count + FLAGS_max_reserved_dropped_replicas && !dropped.empty()) { dropped.erase(dropped.begin()); prefered_dropped = (int)dropped.size() - 1; @@ -377,7 +378,7 @@ int config_context::collect_drop_replica(const host_port &node, const replica_in CHECK(!in_dropped, "adjust position of existing node({}) failed, this is a bug, partition({})", node, - config_owner->pid); + pc->pid); return -1; } return in_dropped ? 1 : 0; @@ -391,7 +392,7 @@ bool config_context::check_order() if (dropped_cmp(dropped[i], dropped[i + 1]) > 0) { LOG_ERROR("check dropped order for gpid({}) failed, [{},{},{},{},{}@{}] vs " "[{},{},{},{},{}@{}]", - config_owner->pid, + pc->pid, dropped[i].node, dropped[i].time, dropped[i].ballot, @@ -474,9 +475,9 @@ void app_state_helper::on_init_partitions() context.prefered_dropped = -1; contexts.assign(owner->partition_count, context); - std::vector &partitions = owner->partitions; + auto &pcs = owner->pcs; for (unsigned int i = 0; i != owner->partition_count; ++i) { - contexts[i].config_owner = &(partitions[i]); + contexts[i].pc = &(pcs[i]); } partitions_in_progress.store(owner->partition_count); @@ -525,19 +526,20 @@ app_state::app_state(const app_info &info) : app_info(info), helpers(new app_sta log_name = info.app_name + "(" + boost::lexical_cast(info.app_id) + ")"; helpers->owner = this; - partition_configuration config; - config.ballot = 0; - config.pid.set_app_id(app_id); - config.last_committed_decree = 0; - config.max_replica_count = app_info::max_replica_count; + partition_configuration pc; + pc.ballot = 0; + pc.pid.set_app_id(app_id); + pc.last_committed_decree = 0; + pc.max_replica_count = app_info::max_replica_count; - RESET_IP_AND_HOST_PORT(config, primary); - CLEAR_IP_AND_HOST_PORT(config, secondaries); - CLEAR_IP_AND_HOST_PORT(config, last_drops); + RESET_IP_AND_HOST_PORT(pc, primary); + CLEAR_IP_AND_HOST_PORT(pc, secondaries); + CLEAR_IP_AND_HOST_PORT(pc, last_drops); - partitions.assign(app_info::partition_count, config); - for (int i = 0; i != app_info::partition_count; ++i) - partitions[i].pid.set_partition_index(i); + pcs.assign(app_info::partition_count, pc); + for (int i = 0; i != app_info::partition_count; ++i) { + pcs[i].pid.set_partition_index(i); + } helpers->on_init_partitions(); } diff --git a/src/meta/meta_data.h b/src/meta/meta_data.h index 06ac12c9a7..dd2533ecfc 100644 --- a/src/meta/meta_data.h +++ b/src/meta/meta_data.h @@ -196,7 +196,7 @@ struct serving_replica class config_context { public: - partition_configuration *config_owner; + partition_configuration *pc; config_status stage; // for server state's update config management //[ @@ -264,18 +264,12 @@ class config_context struct partition_configuration_stateless { - partition_configuration &config; - partition_configuration_stateless(partition_configuration &pc) : config(pc) {} - std::vector &workers() { return config.hp_last_drops; } - std::vector &hosts() { return config.hp_secondaries; } - bool is_host(const host_port &node) const - { - return utils::contains(config.hp_secondaries, node); - } - bool is_worker(const host_port &node) const - { - return utils::contains(config.hp_last_drops, node); - } + partition_configuration &pc; + partition_configuration_stateless(partition_configuration &_pc) : pc(_pc) {} + std::vector &workers() { return pc.hp_last_drops; } + std::vector &hosts() { return pc.hp_secondaries; } + bool is_host(const host_port &node) const { return utils::contains(pc.hp_secondaries, node); } + bool is_worker(const host_port &node) const { return utils::contains(pc.hp_last_drops, node); } bool is_member(const host_port &node) const { return is_host(node) || is_worker(node); } }; @@ -362,7 +356,7 @@ class app_state : public app_info public: const char *get_logname() const { return log_name.c_str(); } std::shared_ptr helpers; - std::vector partitions; + std::vector pcs; std::map duplications; static std::shared_ptr create(const app_info &info); @@ -462,7 +456,7 @@ inline const partition_configuration *get_config(const app_mapper &apps, const d auto iter = apps.find(gpid.get_app_id()); if (iter == apps.end() || iter->second->status == app_status::AS_DROPPED) return nullptr; - return &(iter->second->partitions[gpid.get_partition_index()]); + return &(iter->second->pcs[gpid.get_partition_index()]); } inline partition_configuration *get_config(app_mapper &apps, const dsn::gpid &gpid) @@ -470,7 +464,7 @@ inline partition_configuration *get_config(app_mapper &apps, const dsn::gpid &gp auto iter = apps.find(gpid.get_app_id()); if (iter == apps.end() || iter->second->status == app_status::AS_DROPPED) return nullptr; - return &(iter->second->partitions[gpid.get_partition_index()]); + return &(iter->second->pcs[gpid.get_partition_index()]); } inline const config_context *get_config_context(const app_mapper &apps, const dsn::gpid &gpid) @@ -510,29 +504,30 @@ inline health_status partition_health_status(const partition_configuration &pc, int mutation_2pc_min_replica_count) { if (!pc.hp_primary) { - if (pc.hp_secondaries.empty()) + if (pc.hp_secondaries.empty()) { return HS_DEAD; - else - return HS_UNREADABLE; - } else { - int n = pc.hp_secondaries.size() + 1; - if (n < mutation_2pc_min_replica_count) - return HS_UNWRITABLE; - else if (n < pc.max_replica_count) - return HS_WRITABLE_ILL; - else - return HS_HEALTHY; + } + return HS_UNREADABLE; + } + + const auto replica_count = pc.hp_secondaries.size() + 1; + if (replica_count < mutation_2pc_min_replica_count) { + return HS_UNWRITABLE; + } + + if (replica_count < pc.max_replica_count) { + return HS_WRITABLE_ILL; } + return HS_HEALTHY; } inline void for_each_available_app(const app_mapper &apps, const std::function &)> &action) { - for (const auto &p : apps) { - if (p.second->status == app_status::AS_AVAILABLE) { - if (!action(p.second)) - break; + for (const auto &[_, as] : apps) { + if (as->status == app_status::AS_AVAILABLE && !action(as)) { + break; } } } @@ -548,6 +543,7 @@ inline int count_partitions(const app_mapper &apps) void when_update_replicas(config_type::type t, const std::function &func); +// TODO(yingchun): refactor to deal both rpc_address and host_port template void maintain_drops(/*inout*/ std::vector &drops, const T &node, config_type::type t) { diff --git a/src/meta/meta_http_service.cpp b/src/meta/meta_http_service.cpp index 8764096441..b1e7af2d94 100644 --- a/src/meta/meta_http_service.cpp +++ b/src/meta/meta_http_service.cpp @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include #include @@ -141,40 +142,33 @@ void meta_http_service::get_app_handler(const http_request &req, http_response & int fully_healthy = 0; int write_unhealthy = 0; int read_unhealthy = 0; - for (const auto &p : response.partitions) { + for (const auto &pc : response.partitions) { int replica_count = 0; - if (p.hp_primary) { + if (pc.hp_primary) { replica_count++; - node_stat[p.hp_primary].first++; + node_stat[pc.hp_primary].first++; total_prim_count++; } - replica_count += p.hp_secondaries.size(); - total_sec_count += p.hp_secondaries.size(); - if (p.hp_primary) { - if (replica_count >= p.max_replica_count) + replica_count += pc.hp_secondaries.size(); + total_sec_count += pc.hp_secondaries.size(); + if (pc.hp_primary) { + if (replica_count >= pc.max_replica_count) { fully_healthy++; - else if (replica_count < 2) + } else if (replica_count < 2) { write_unhealthy++; + } } else { write_unhealthy++; read_unhealthy++; } - tp_details.add_row(p.pid.get_partition_index()); - tp_details.append_data(p.ballot); - std::stringstream oss; - oss << replica_count << "/" << p.max_replica_count; - tp_details.append_data(oss.str()); - tp_details.append_data(p.hp_primary ? p.hp_primary.to_string() : "-"); - oss.str(""); - oss << "["; - for (int j = 0; j < p.hp_secondaries.size(); j++) { - if (j != 0) - oss << ","; - oss << p.hp_secondaries[j]; - node_stat[p.hp_secondaries[j]].second++; + tp_details.add_row(pc.pid.get_partition_index()); + tp_details.append_data(pc.ballot); + tp_details.append_data(fmt::format("{}/{}", replica_count, pc.max_replica_count)); + tp_details.append_data(pc.hp_primary ? pc.hp_primary.to_string() : "-"); + tp_details.append_data(fmt::format("[{}]", fmt::join(pc.hp_secondaries, ","))); + for (const auto &secondary : pc.hp_secondaries) { + node_stat[secondary].second++; } - oss << "]"; - tp_details.append_data(oss.str()); } mtp.add(std::move(tp_details)); @@ -322,18 +316,18 @@ void meta_http_service::list_app_handler(const http_request &req, http_response int fully_healthy = 0; int write_unhealthy = 0; int read_unhealthy = 0; - for (int i = 0; i < response.partitions.size(); i++) { - const dsn::partition_configuration &p = response.partitions[i]; + for (const auto &pc : response.partitions) { int replica_count = 0; - if (p.hp_primary) { + if (pc.hp_primary) { replica_count++; } - replica_count += p.hp_secondaries.size(); - if (p.hp_primary) { - if (replica_count >= p.max_replica_count) + replica_count += pc.hp_secondaries.size(); + if (pc.hp_primary) { + if (replica_count >= pc.max_replica_count) { fully_healthy++; - else if (replica_count < 2) + } else if (replica_count < 2) { write_unhealthy++; + } } else { write_unhealthy++; read_unhealthy++; @@ -413,16 +407,15 @@ void meta_http_service::list_node_handler(const http_request &req, http_response CHECK_EQ(app.app_id, response_app.app_id); CHECK_EQ(app.partition_count, response_app.partition_count); - for (int i = 0; i < response_app.partitions.size(); i++) { - const dsn::partition_configuration &p = response_app.partitions[i]; - if (p.hp_primary) { - auto find = tmp_map.find(p.hp_primary); + for (const auto &pc : response_app.partitions) { + if (pc.hp_primary) { + auto find = tmp_map.find(pc.hp_primary); if (find != tmp_map.end()) { find->second.primary_count++; } } - for (int j = 0; j < p.hp_secondaries.size(); j++) { - auto find = tmp_map.find(p.hp_secondaries[j]); + for (const auto &secondary : pc.hp_secondaries) { + auto find = tmp_map.find(secondary); if (find != tmp_map.end()) { find->second.secondary_count++; } diff --git a/src/meta/meta_service.cpp b/src/meta/meta_service.cpp index a5c62ea418..1de9069bb9 100644 --- a/src/meta/meta_service.cpp +++ b/src/meta/meta_service.cpp @@ -756,9 +756,9 @@ void meta_service::on_query_configuration_by_index(configuration_query_by_index_ host_port forward_hp; if (!check_status_and_authz(rpc, &forward_hp)) { if (forward_hp) { - partition_configuration config; - SET_IP_AND_HOST_PORT_BY_DNS(config, primary, forward_hp); - response.partitions.push_back(std::move(config)); + partition_configuration pc; + SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, forward_hp); + response.partitions.push_back(std::move(pc)); } return; } diff --git a/src/meta/meta_split_service.cpp b/src/meta/meta_split_service.cpp index ca5072c6be..df0c642a2d 100644 --- a/src/meta/meta_split_service.cpp +++ b/src/meta/meta_split_service.cpp @@ -119,15 +119,15 @@ void meta_split_service::do_start_partition_split(std::shared_ptr app app->helpers->split_states.splitting_count = app->partition_count; app->partition_count *= 2; app->helpers->contexts.resize(app->partition_count); - app->partitions.resize(app->partition_count); + app->pcs.resize(app->partition_count); _state->get_table_metric_entities().resize_partitions(app->app_id, app->partition_count); app->envs[replica_envs::SPLIT_VALIDATE_PARTITION_HASH] = "true"; for (int i = 0; i < app->partition_count; ++i) { - app->helpers->contexts[i].config_owner = &app->partitions[i]; + app->helpers->contexts[i].pc = &app->pcs[i]; if (i >= app->partition_count / 2) { // child partitions - app->partitions[i].ballot = invalid_ballot; - app->partitions[i].pid = gpid(app->app_id, i); + app->pcs[i].ballot = invalid_ballot; + app->pcs[i].pid = gpid(app->app_id, i); } else { // parent partitions app->helpers->split_states.status[i] = split_status::SPLITTING; } @@ -162,17 +162,17 @@ void meta_split_service::register_child_on_meta(register_child_rpc rpc) const gpid &parent_gpid = request.parent_config.pid; const gpid &child_gpid = request.child_config.pid; - const auto &parent_config = app->partitions[parent_gpid.get_partition_index()]; - if (request.parent_config.ballot != parent_config.ballot) { + const auto &parent_pc = app->pcs[parent_gpid.get_partition_index()]; + if (request.parent_config.ballot != parent_pc.ballot) { LOG_ERROR("app({}) partition({}) register child({}) failed, request is outdated, request " "parent ballot = {}, local parent ballot = {}", app_name, parent_gpid, child_gpid, request.parent_config.ballot, - parent_config.ballot); + parent_pc.ballot); response.err = ERR_INVALID_VERSION; - response.parent_config = parent_config; + response.parent_config = parent_pc; return; } @@ -192,7 +192,7 @@ void meta_split_service::register_child_on_meta(register_child_rpc rpc) parent_gpid, child_gpid); response.err = ERR_INVALID_STATE; - response.parent_config = parent_config; + response.parent_config = parent_pc; return; } @@ -202,14 +202,14 @@ void meta_split_service::register_child_on_meta(register_child_rpc rpc) "duplicated register request, app({}) child partition({}) has already been registered", app_name, child_gpid); - const auto &child_config = app->partitions[child_gpid.get_partition_index()]; - CHECK_GT_MSG(child_config.ballot, + const auto &child_pc = app->pcs[child_gpid.get_partition_index()]; + CHECK_GT_MSG(child_pc.ballot, 0, "app({}) partition({}) should have been registered", app_name, child_gpid); response.err = ERR_CHILD_REGISTERED; - response.parent_config = parent_config; + response.parent_config = parent_pc; return; } @@ -307,16 +307,17 @@ void meta_split_service::on_add_child_on_remote_storage_reply(error_code ec, update_child_request->type = config_type::CT_REGISTER_CHILD; SET_OBJ_IP_AND_HOST_PORT(*update_child_request, node, request, primary); - partition_configuration child_config = app->partitions[child_gpid.get_partition_index()]; - child_config.secondaries = request.child_config.secondaries; - child_config.__set_hp_secondaries(request.child_config.hp_secondaries); + // TODO(yingchun): should use conference? + auto child_pc = app->pcs[child_gpid.get_partition_index()]; + child_pc.secondaries = request.child_config.secondaries; + child_pc.__set_hp_secondaries(request.child_config.hp_secondaries); _state->update_configuration_locally(*app, update_child_request); if (parent_context.msg) { response.err = ERR_OK; response.app = *app; - response.parent_config = app->partitions[parent_gpid.get_partition_index()]; - response.child_config = app->partitions[child_gpid.get_partition_index()]; + response.parent_config = app->pcs[parent_gpid.get_partition_index()]; + response.child_config = app->pcs[child_gpid.get_partition_index()]; parent_context.msg = nullptr; } parent_context.pending_sync_task = nullptr; @@ -562,7 +563,7 @@ void meta_split_service::do_cancel_partition_split(std::shared_ptr ap app->partition_count /= 2; app->helpers->contexts.resize(app->partition_count); - app->partitions.resize(app->partition_count); + app->pcs.resize(app->partition_count); _state->get_table_metric_entities().resize_partitions(app->app_id, app->partition_count); }; @@ -597,7 +598,7 @@ void meta_split_service::query_child_state(query_child_state_rpc rpc) app_name); auto child_pidx = parent_pid.get_partition_index() + request.partition_count; - if (app->partitions[child_pidx].ballot == invalid_ballot) { + if (app->pcs[child_pidx].ballot == invalid_ballot) { response.err = ERR_INVALID_STATE; LOG_ERROR("app({}) parent partition({}) split has been canceled", app_name, parent_pid); return; @@ -606,7 +607,7 @@ void meta_split_service::query_child_state(query_child_state_rpc rpc) "app({}) child partition({}.{}) is ready", app_name, parent_pid.get_app_id(), child_pidx); response.err = ERR_OK; response.__set_partition_count(app->partition_count); - response.__set_child_config(app->partitions[child_pidx]); + response.__set_child_config(app->pcs[child_pidx]); } } // namespace replication diff --git a/src/meta/partition_guardian.cpp b/src/meta/partition_guardian.cpp index 70b13bae95..2cbf49122e 100644 --- a/src/meta/partition_guardian.cpp +++ b/src/meta/partition_guardian.cpp @@ -86,14 +86,15 @@ pc_status partition_guardian::cure(meta_view view, CHECK(acts.empty(), ""); pc_status status; - if (!pc.hp_primary) + if (!pc.hp_primary) { status = on_missing_primary(view, gpid); - else if (static_cast(pc.hp_secondaries.size()) + 1 < pc.max_replica_count) + } else if (static_cast(pc.hp_secondaries.size()) + 1 < pc.max_replica_count) { status = on_missing_secondary(view, gpid); - else if (static_cast(pc.hp_secondaries.size()) >= pc.max_replica_count) + } else if (static_cast(pc.hp_secondaries.size()) >= pc.max_replica_count) { status = on_redundant_secondary(view, gpid); - else + } else { status = pc_status::healthy; + } if (!acts.empty()) { action = *acts.front(); @@ -125,9 +126,9 @@ void partition_guardian::reconfig(meta_view view, const configuration_update_req if (request.type == config_type::CT_DROP_PARTITION) { cc->serving.clear(); - const std::vector &config_dropped = request.config.hp_last_drops; - for (const auto &drop_node : config_dropped) { - cc->record_drop_history(drop_node); + const auto &last_drops = request.config.hp_last_drops; + for (const auto &last_drop : last_drops) { + cc->record_drop_history(last_drop); } } else { when_update_replicas(request.type, [cc, &request](bool is_adding) { @@ -248,9 +249,9 @@ pc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpi // try to upgrade a secondary to primary if the primary is missing if (!pc.hp_secondaries.empty()) { RESET_IP_AND_HOST_PORT(action, node); - for (const auto &hp_secondary : pc.hp_secondaries) { - const auto ns = get_node_state(*(view.nodes), hp_secondary, false); - CHECK_NOTNULL(ns, "invalid secondary: {}", hp_secondary); + for (const auto &secondary : pc.hp_secondaries) { + const auto ns = get_node_state(*(view.nodes), secondary, false); + CHECK_NOTNULL(ns, "invalid secondary: {}", secondary); if (dsn_unlikely(!ns->alive())) { continue; } @@ -515,7 +516,7 @@ pc_status partition_guardian::on_missing_secondary(meta_view &view, const dsn::g configuration_proposal_action action; bool is_emergency = false; - if (cc.config_owner->max_replica_count > + if (cc.pc->max_replica_count > _svc->get_options().app_mutation_2pc_min_replica_count(pc.max_replica_count) && replica_count(pc) < _svc->get_options().app_mutation_2pc_min_replica_count(pc.max_replica_count)) { diff --git a/src/meta/server_state.cpp b/src/meta/server_state.cpp index 51621b4336..2c5019cb9a 100644 --- a/src/meta/server_state.cpp +++ b/src/meta/server_state.cpp @@ -285,7 +285,7 @@ error_code server_state::dump_app_states(const char *local_path, binary_writer writer; dsn::marshall(writer, *app, DSF_THRIFT_BINARY); file->append_buffer(writer.get_buffer()); - for (const partition_configuration &pc : app->partitions) { + for (const auto &pc : app->pcs) { binary_writer pc_writer; dsn::marshall(pc_writer, pc, DSF_THRIFT_BINARY); file->append_buffer(pc_writer.get_buffer()); @@ -372,8 +372,8 @@ error_code server_state::restore_from_local_storage(const char *local_path) ans = file->read_next_buffer(data); binary_reader pc_reader(data); CHECK_EQ_MSG(ans, 1, "unexpect read buffer"); - unmarshall(pc_reader, app->partitions[i], DSF_THRIFT_BINARY); - CHECK_EQ_MSG(app->partitions[i].pid.get_partition_index(), + unmarshall(pc_reader, app->pcs[i], DSF_THRIFT_BINARY); + CHECK_EQ_MSG(app->pcs[i].pid.get_partition_index(), i, "uncorrect partition data, gpid({}.{}), appname({})", app->app_id, @@ -585,9 +585,10 @@ dsn::error_code server_state::sync_apps_from_remote_storage() "invalid partition config"); { zauto_write_lock l(_lock); - app->partitions[partition_id] = pc; - for (const auto &hp : pc.hp_last_drops) { - app->helpers->contexts[partition_id].record_drop_history(hp); + app->pcs[partition_id] = pc; + CHECK(pc.__isset.hp_last_drops, ""); + for (const auto &last_drop : pc.hp_last_drops) { + app->helpers->contexts[partition_id].record_drop_history(last_drop); } if (app->status == app_status::AS_CREATING && @@ -624,8 +625,8 @@ dsn::error_code server_state::sync_apps_from_remote_storage() app->helpers->split_states.status[partition_id - app->partition_count / 2] = split_status::SPLITTING; app->helpers->split_states.splitting_count++; - app->partitions[partition_id].ballot = invalid_ballot; - app->partitions[partition_id].pid = gpid(app->app_id, partition_id); + app->pcs[partition_id].ballot = invalid_ballot; + app->pcs[partition_id].pid = gpid(app->app_id, partition_id); process_one_partition(app); } @@ -730,15 +731,15 @@ void server_state::initialize_node_state() zauto_write_lock l(_lock); for (auto &app_pair : _all_apps) { app_state &app = *(app_pair.second); - for (partition_configuration &pc : app.partitions) { + for (const auto &pc : app.pcs) { if (pc.hp_primary) { node_state *ns = get_node_state(_nodes, pc.hp_primary, true); ns->put_partition(pc.pid, true); } - for (auto &ep : pc.hp_secondaries) { - CHECK(ep, "invalid secondary address, addr = {}", ep); - node_state *ns = get_node_state(_nodes, ep, true); + for (const auto &secondary : pc.hp_secondaries) { + CHECK(secondary, "invalid secondary: {}", secondary); + node_state *ns = get_node_state(_nodes, secondary, true); ns->put_partition(pc.pid, false); } } @@ -748,7 +749,7 @@ void server_state::initialize_node_state() } for (auto &app_pair : _all_apps) { app_state &app = *(app_pair.second); - for (const partition_configuration &pc : app.partitions) { + for (const auto &pc : app.pcs) { check_consistency(pc.pid); } } @@ -843,7 +844,7 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc) } response.partitions[i].info = *app; - response.partitions[i].config = app->partitions[pid.get_partition_index()]; + response.partitions[i].config = app->pcs[pid.get_partition_index()]; response.partitions[i].host_node = request.node; // set meta_split_status const split_state &app_split_states = app->helpers->split_states; @@ -957,12 +958,12 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc) } bool server_state::query_configuration_by_gpid(dsn::gpid id, - /*out*/ partition_configuration &config) + /*out*/ partition_configuration &pc) { zauto_read_lock l(_lock); - const partition_configuration *pc = get_config(_all_apps, id); - if (pc != nullptr) { - config = *pc; + const auto *ppc = get_config(_all_apps, id); + if (ppc != nullptr) { + pc = *ppc; return true; } return false; @@ -1005,11 +1006,13 @@ void server_state::query_configuration_by_index(const query_cfg_request &request response.is_stateful = app->is_stateful; for (const int32_t &index : request.partition_indices) { - if (index >= 0 && index < app->partitions.size()) - response.partitions.push_back(app->partitions[index]); + if (index >= 0 && index < app->pcs.size()) { + response.partitions.push_back(app->pcs[index]); + } + } + if (response.partitions.empty()) { + response.partitions = app->pcs; } - if (response.partitions.empty()) - response.partitions = app->partitions; } void server_state::init_app_partition_node(std::shared_ptr &app, @@ -1046,8 +1049,7 @@ void server_state::init_app_partition_node(std::shared_ptr &app, }; std::string app_partition_path = get_partition_path(*app, pidx); - dsn::blob value = - dsn::json::json_forwarder::encode(app->partitions[pidx]); + dsn::blob value = dsn::json::json_forwarder::encode(app->pcs[pidx]); _meta_svc->get_remote_storage()->create_node( app_partition_path, LPC_META_STATE_HIGH, on_create_app_partition, value); } @@ -1457,65 +1459,66 @@ void server_state::send_proposal(const configuration_proposal_action &action, send_proposal(target, request); } -void server_state::request_check(const partition_configuration &old, +void server_state::request_check(const partition_configuration &old_pc, const configuration_update_request &request) { - const partition_configuration &new_config = request.config; + const auto &new_pc = request.config; switch (request.type) { case config_type::CT_ASSIGN_PRIMARY: if (request.__isset.hp_node) { - CHECK_NE(old.hp_primary, request.hp_node); - CHECK(!utils::contains(old.hp_secondaries, request.hp_node), ""); + CHECK_NE(old_pc.hp_primary, request.hp_node); + CHECK(!utils::contains(old_pc.hp_secondaries, request.hp_node), ""); } else { - CHECK_NE(old.primary, request.node); - CHECK(!utils::contains(old.secondaries, request.node), ""); + CHECK_NE(old_pc.primary, request.node); + CHECK(!utils::contains(old_pc.secondaries, request.node), ""); } break; case config_type::CT_UPGRADE_TO_PRIMARY: if (request.__isset.hp_node) { - CHECK_NE(old.hp_primary, request.hp_node); - CHECK(utils::contains(old.hp_secondaries, request.hp_node), ""); + CHECK_NE(old_pc.hp_primary, request.hp_node); + CHECK(utils::contains(old_pc.hp_secondaries, request.hp_node), ""); } else { - CHECK_NE(old.primary, request.node); - CHECK(utils::contains(old.secondaries, request.node), ""); + CHECK_NE(old_pc.primary, request.node); + CHECK(utils::contains(old_pc.secondaries, request.node), ""); } break; case config_type::CT_DOWNGRADE_TO_SECONDARY: if (request.__isset.hp_node) { - CHECK_EQ(old.hp_primary, request.hp_node); - CHECK(!utils::contains(old.hp_secondaries, request.hp_node), ""); + CHECK_EQ(old_pc.hp_primary, request.hp_node); + CHECK(!utils::contains(old_pc.hp_secondaries, request.hp_node), ""); } else { - CHECK_EQ(old.primary, request.node); - CHECK(!utils::contains(old.secondaries, request.node), ""); + CHECK_EQ(old_pc.primary, request.node); + CHECK(!utils::contains(old_pc.secondaries, request.node), ""); } break; case config_type::CT_DOWNGRADE_TO_INACTIVE: case config_type::CT_REMOVE: if (request.__isset.hp_node) { - CHECK(old.hp_primary == request.hp_node || - utils::contains(old.hp_secondaries, request.hp_node), + CHECK(old_pc.hp_primary == request.hp_node || + utils::contains(old_pc.hp_secondaries, request.hp_node), ""); } else { - CHECK(old.primary == request.node || utils::contains(old.secondaries, request.node), + CHECK(old_pc.primary == request.node || + utils::contains(old_pc.secondaries, request.node), ""); } break; case config_type::CT_UPGRADE_TO_SECONDARY: if (request.__isset.hp_node) { - CHECK_NE(old.hp_primary, request.hp_node); - CHECK(!utils::contains(old.hp_secondaries, request.hp_node), ""); + CHECK_NE(old_pc.hp_primary, request.hp_node); + CHECK(!utils::contains(old_pc.hp_secondaries, request.hp_node), ""); } else { - CHECK_NE(old.primary, request.node); - CHECK(!utils::contains(old.secondaries, request.node), ""); + CHECK_NE(old_pc.primary, request.node); + CHECK(!utils::contains(old_pc.secondaries, request.node), ""); } break; case config_type::CT_PRIMARY_FORCE_UPDATE_BALLOT: { if (request.__isset.hp_node) { - CHECK_EQ(old.hp_primary, new_config.hp_primary); - CHECK(old.hp_secondaries == new_config.hp_secondaries, ""); + CHECK_EQ(old_pc.hp_primary, new_pc.hp_primary); + CHECK(old_pc.hp_secondaries == new_pc.hp_secondaries, ""); } else { - CHECK_EQ(old.primary, new_config.primary); - CHECK(old.secondaries == new_config.secondaries, ""); + CHECK_EQ(old_pc.primary, new_pc.primary); + CHECK(old_pc.secondaries == new_pc.secondaries, ""); } break; } @@ -1528,22 +1531,22 @@ void server_state::update_configuration_locally( app_state &app, std::shared_ptr &config_request) { dsn::gpid &gpid = config_request->config.pid; - partition_configuration &old_cfg = app.partitions[gpid.get_partition_index()]; - partition_configuration &new_cfg = config_request->config; + partition_configuration &old_pc = app.pcs[gpid.get_partition_index()]; + partition_configuration &new_pc = config_request->config; int min_2pc_count = _meta_svc->get_options().app_mutation_2pc_min_replica_count(app.max_replica_count); - health_status old_health_status = partition_health_status(old_cfg, min_2pc_count); - health_status new_health_status = partition_health_status(new_cfg, min_2pc_count); + health_status old_health_status = partition_health_status(old_pc, min_2pc_count); + health_status new_health_status = partition_health_status(new_pc, min_2pc_count); host_port node; GET_HOST_PORT(*config_request, node, node); if (app.is_stateful) { - CHECK(old_cfg.ballot == invalid_ballot || old_cfg.ballot + 1 == new_cfg.ballot, + CHECK(old_pc.ballot == invalid_ballot || old_pc.ballot + 1 == new_pc.ballot, "invalid configuration update request, old ballot {}, new ballot {}", - old_cfg.ballot, - new_cfg.ballot); + old_pc.ballot, + new_pc.ballot); node_state *ns = nullptr; if (config_request->type != config_type::CT_DROP_PARTITION) { @@ -1551,7 +1554,7 @@ void server_state::update_configuration_locally( CHECK_NOTNULL(ns, "invalid node: {}", node); } #ifndef NDEBUG - request_check(old_cfg, *config_request); + request_check(old_pc, *config_request); #endif switch (config_request->type) { case config_type::CT_ASSIGN_PRIMARY: @@ -1575,14 +1578,15 @@ void server_state::update_configuration_locally( case config_type::CT_PRIMARY_FORCE_UPDATE_BALLOT: break; - case config_type::CT_DROP_PARTITION: - for (const auto &node : new_cfg.hp_last_drops) { - ns = get_node_state(_nodes, node, false); - if (ns != nullptr) + case config_type::CT_DROP_PARTITION: { + for (const auto &last_drop : new_pc.hp_last_drops) { + ns = get_node_state(_nodes, last_drop, false); + if (ns != nullptr) { ns->remove_partition(gpid, false); + } } break; - + } case config_type::CT_ADD_SECONDARY: case config_type::CT_ADD_SECONDARY_FOR_LB: CHECK(false, "invalid execution work flow"); @@ -1592,7 +1596,7 @@ void server_state::update_configuration_locally( // TODO(yingchun): optimize the duplicate loops. if (config_request->config.__isset.hp_secondaries) { for (const auto &secondary : config_request->config.hp_secondaries) { - auto secondary_node = get_node_state(_nodes, secondary, false); + auto *secondary_node = get_node_state(_nodes, secondary, false); secondary_node->put_partition(gpid, false); } } else { @@ -1617,12 +1621,12 @@ void server_state::update_configuration_locally( break; } } else { - CHECK_EQ(old_cfg.ballot, new_cfg.ballot); + CHECK_EQ(old_pc.ballot, new_pc.ballot); const auto host_node = host_port::from_address(config_request->host_node); // The non-stateful app is just for testing, so just check the host_node is resolvable. CHECK(host_node, "'{}' can not be reverse resolved", config_request->host_node); - new_cfg = old_cfg; - partition_configuration_stateless pcs(new_cfg); + new_pc = old_pc; + partition_configuration_stateless pcs(new_pc); if (config_request->type == config_type::type::CT_ADD_SECONDARY) { pcs.hosts().emplace_back(host_node); pcs.workers().emplace_back(node); @@ -1645,8 +1649,8 @@ void server_state::update_configuration_locally( // we assume config in config_request stores the proper new config // as we sync to remote storage according to it - std::string old_config_str = boost::lexical_cast(old_cfg); - old_cfg = config_request->config; + std::string old_config_str = boost::lexical_cast(old_pc); + old_pc = config_request->config; auto find_name = _config_type_VALUES_TO_NAMES.find(config_request->type); if (find_name != _config_type_VALUES_TO_NAMES.end()) { LOG_INFO("meta update config ok: type({}), old_config={}, {}", @@ -1777,7 +1781,7 @@ void server_state::recall_partition(std::shared_ptr &app, int pidx) auto on_recall_partition = [this, app, pidx](dsn::error_code error) mutable { if (error == dsn::ERR_OK) { zauto_write_lock l(_lock); - app->partitions[pidx].partition_flags &= (~pc_flags::dropped); + app->pcs[pidx].partition_flags &= (~pc_flags::dropped); process_one_partition(app); } else if (error == dsn::ERR_TIMEOUT) { tasking::enqueue(LPC_META_STATE_HIGH, @@ -1790,7 +1794,7 @@ void server_state::recall_partition(std::shared_ptr &app, int pidx) } }; - partition_configuration &pc = app->partitions[pidx]; + partition_configuration &pc = app->pcs[pidx]; CHECK((pc.partition_flags & pc_flags::dropped), ""); pc.partition_flags = 0; @@ -1802,7 +1806,7 @@ void server_state::recall_partition(std::shared_ptr &app, int pidx) void server_state::drop_partition(std::shared_ptr &app, int pidx) { - partition_configuration &pc = app->partitions[pidx]; + partition_configuration &pc = app->pcs[pidx]; config_context &cc = app->helpers->contexts[pidx]; std::shared_ptr req = @@ -1814,11 +1818,11 @@ void server_state::drop_partition(std::shared_ptr &app, int pidx) SET_OBJ_IP_AND_HOST_PORT(request, node, pc, primary); request.config = pc; - for (auto &node : pc.hp_secondaries) { - maintain_drops(request.config.hp_last_drops, node, request.type); + for (const auto &secondary : pc.hp_secondaries) { + maintain_drops(request.config.hp_last_drops, secondary, request.type); } - for (auto &node : pc.secondaries) { - maintain_drops(request.config.last_drops, node, request.type); + for (const auto &secondary : pc.secondaries) { + maintain_drops(request.config.last_drops, secondary, request.type); } if (pc.hp_primary) { maintain_drops(request.config.hp_last_drops, pc.hp_primary, request.type); @@ -1855,7 +1859,7 @@ void server_state::drop_partition(std::shared_ptr &app, int pidx) void server_state::downgrade_primary_to_inactive(std::shared_ptr &app, int pidx) { - partition_configuration &pc = app->partitions[pidx]; + partition_configuration &pc = app->pcs[pidx]; config_context &cc = app->helpers->contexts[pidx]; if (config_status::pending_remote_sync == cc.stage) { @@ -1900,7 +1904,7 @@ void server_state::downgrade_secondary_to_inactive(std::shared_ptr &a int pidx, const host_port &node) { - partition_configuration &pc = app->partitions[pidx]; + partition_configuration &pc = app->pcs[pidx]; config_context &cc = app->helpers->contexts[pidx]; CHECK(pc.hp_primary, "this shouldn't be called if the primary is invalid"); @@ -1923,31 +1927,31 @@ void server_state::downgrade_secondary_to_inactive(std::shared_ptr &a void server_state::downgrade_stateless_nodes(std::shared_ptr &app, int pidx, - const host_port &address) + const host_port &node) { - std::shared_ptr req = - std::make_shared(); + auto req = std::make_shared(); req->info = *app; req->type = config_type::CT_REMOVE; - req->host_node = dsn::dns_resolver::instance().resolve_address(address); + req->host_node = dsn::dns_resolver::instance().resolve_address(node); RESET_IP_AND_HOST_PORT(*req, node); - req->config = app->partitions[pidx]; + req->config = app->pcs[pidx]; config_context &cc = app->helpers->contexts[pidx]; partition_configuration &pc = req->config; unsigned i = 0; for (; i < pc.hp_secondaries.size(); ++i) { - if (pc.hp_secondaries[i] == address) { + if (pc.hp_secondaries[i] == node) { SET_OBJ_IP_AND_HOST_PORT(*req, node, pc, last_drops[i]); break; } } - host_port node; - GET_HOST_PORT(*req, node, node); - CHECK(node, "invalid node: {}", node); + host_port req_node; + GET_HOST_PORT(*req, node, req_node); + CHECK(req_node, "invalid node: {}", req_node); // remove host_node & node from secondaries/last_drops, as it will be sync to remote // storage + CHECK(pc.__isset.hp_secondaries, "hp_secondaries not set"); for (++i; i < pc.hp_secondaries.size(); ++i) { pc.secondaries[i - 1] = pc.secondaries[i]; pc.last_drops[i - 1] = pc.last_drops[i]; @@ -1964,7 +1968,7 @@ void server_state::downgrade_stateless_nodes(std::shared_ptr &app, "removing host({}) worker({})", pc.pid, req->host_node, - node); + req_node); cc.cancel_sync(); } cc.stage = config_status::pending_remote_sync; @@ -1980,7 +1984,7 @@ void server_state::on_update_configuration( zauto_write_lock l(_lock); dsn::gpid &gpid = cfg_request->config.pid; std::shared_ptr app = get_app(gpid.get_app_id()); - partition_configuration &pc = app->partitions[gpid.get_partition_index()]; + partition_configuration &pc = app->pcs[gpid.get_partition_index()]; config_context &cc = app->helpers->contexts[gpid.get_partition_index()]; configuration_update_response response; response.err = ERR_IO_PENDING; @@ -2044,53 +2048,61 @@ void server_state::on_update_configuration( void server_state::on_partition_node_dead(std::shared_ptr &app, int pidx, - const dsn::host_port &address) -{ - partition_configuration &pc = app->partitions[pidx]; - if (app->is_stateful) { - if (is_primary(pc, address)) - downgrade_primary_to_inactive(app, pidx); - else if (is_secondary(pc, address)) { - if (pc.hp_primary) - downgrade_secondary_to_inactive(app, pidx, address); - else if (is_secondary(pc, address)) { - LOG_INFO("gpid({}): secondary({}) is down, ignored it due to no primary for this " - "partition available", - pc.pid, - address); - } else { - CHECK(false, "no primary/secondary on this node, node address = {}", address); - } - } - } else { - downgrade_stateless_nodes(app, pidx, address); + const dsn::host_port &node) +{ + const auto &pc = app->pcs[pidx]; + if (!app->is_stateful) { + downgrade_stateless_nodes(app, pidx, node); + return; + } + + if (is_primary(pc, node)) { + downgrade_primary_to_inactive(app, pidx); + return; + } + + if (!is_secondary(pc, node)) { + return; } + + if (pc.hp_primary) { + downgrade_secondary_to_inactive(app, pidx, node); + return; + } + + CHECK(is_secondary(pc, node), ""); + LOG_INFO("gpid({}): secondary({}) is down, ignored it due to no primary for this partition " + "available", + pc.pid, + node); } void server_state::on_change_node_state(const host_port &node, bool is_alive) { LOG_DEBUG("change node({}) state to {}", node, is_alive ? "alive" : "dead"); zauto_write_lock l(_lock); - if (!is_alive) { - auto iter = _nodes.find(node); - if (iter == _nodes.end()) { - LOG_INFO("node({}) doesn't exist in the node state, just ignore", node); - } else { - node_state &ns = iter->second; - ns.set_alive(false); - ns.set_replicas_collect_flag(false); - ns.for_each_partition([&, this](const dsn::gpid &pid) { - std::shared_ptr app = get_app(pid.get_app_id()); - CHECK(app != nullptr && app->status != app_status::AS_DROPPED, - "invalid app, app_id = {}", - pid.get_app_id()); - on_partition_node_dead(app, pid.get_partition_index(), node); - return true; - }); - } - } else { + if (is_alive) { get_node_state(_nodes, node, true)->set_alive(true); + return; } + + auto iter = _nodes.find(node); + if (iter == _nodes.end()) { + LOG_INFO("node({}) doesn't exist in the node state, just ignore", node); + return; + } + + node_state &ns = iter->second; + ns.set_alive(false); + ns.set_replicas_collect_flag(false); + ns.for_each_partition([&, this](const dsn::gpid &pid) { + std::shared_ptr app = get_app(pid.get_app_id()); + CHECK(app != nullptr && app->status != app_status::AS_DROPPED, + "invalid app, app_id = {}", + pid.get_app_id()); + on_partition_node_dead(app, pid.get_partition_index(), node); + return true; + }); } void server_state::on_propose_balancer(const configuration_balancer_request &request, @@ -2100,19 +2112,21 @@ void server_state::on_propose_balancer(const configuration_balancer_request &req std::shared_ptr app = get_app(request.gpid.get_app_id()); if (app == nullptr || app->status != app_status::AS_AVAILABLE || request.gpid.get_partition_index() < 0 || - request.gpid.get_partition_index() >= app->partition_count) + request.gpid.get_partition_index() >= app->partition_count) { response.err = ERR_INVALID_PARAMETERS; - else { - if (request.force) { - partition_configuration &pc = *get_config(_all_apps, request.gpid); - for (const configuration_proposal_action &act : request.action_list) { - send_proposal(act, pc, *app); - } - response.err = ERR_OK; - } else { - _meta_svc->get_balancer()->register_proposals({&_all_apps, &_nodes}, request, response); + return; + } + + if (request.force) { + const auto &pc = *get_config(_all_apps, request.gpid); + for (const auto &act : request.action_list) { + send_proposal(act, pc, *app); } + response.err = ERR_OK; + return; } + + _meta_svc->get_balancer()->register_proposals({&_all_apps, &_nodes}, request, response); } error_code @@ -2245,7 +2259,7 @@ error_code server_state::construct_partitions( if (app->status == app_status::AS_DROPPING) { LOG_INFO("ignore constructing partitions for dropping app({})", app->app_id); } else { - for (partition_configuration &pc : app->partitions) { + for (const auto &pc : app->pcs) { bool is_succeed = construct_replica({&_all_apps, &_nodes}, pc.pid, app->max_replica_count); if (is_succeed) { @@ -2254,12 +2268,12 @@ error_code server_state::construct_partitions( pc.pid.get_partition_index(), boost::lexical_cast(pc)); if (pc.hp_last_drops.size() + 1 < pc.max_replica_count) { - std::ostringstream oss; - oss << "WARNING: partition(" << app->app_id << "." - << pc.pid.get_partition_index() << ") only collects " - << (pc.hp_last_drops.size() + 1) << "/" << pc.max_replica_count - << " of replicas, may lost data" << std::endl; - hint_message += oss.str(); + hint_message += fmt::format("WARNING: partition({}.{}) only collects {}/{} " + "of replicas, may lost data", + app->app_id, + pc.pid.get_partition_index(), + pc.hp_last_drops.size() + 1, + pc.max_replica_count); } succeed_count++; } else { @@ -2496,8 +2510,9 @@ void server_state::update_partition_metrics() int min_2pc_count = _meta_svc->get_options().app_mutation_2pc_min_replica_count(app->max_replica_count); - for (unsigned int i = 0; i != app->partition_count; ++i) { - health_status st = partition_health_status(app->partitions[i], min_2pc_count); + CHECK_EQ(app->partition_count, app->pcs.size()); + for (const auto &pc : app->pcs) { + health_status st = partition_health_status(pc, min_2pc_count); counters[st]++; } @@ -2551,8 +2566,8 @@ bool server_state::check_all_partitions() continue; } for (unsigned int i = 0; i != app->partition_count; ++i) { - partition_configuration &pc = app->partitions[i]; - config_context &cc = app->helpers->contexts[i]; + const auto &pc = app->pcs[i]; + const auto &cc = app->helpers->contexts[i]; // partition is under re-configuration or is child partition if (cc.stage != config_status::pending_remote_sync && pc.ballot != invalid_ballot) { configuration_proposal_action action; @@ -2584,8 +2599,8 @@ bool server_state::check_all_partitions() // assign secondary for urgent for (int i = 0; i < add_secondary_actions.size(); ++i) { gpid &pid = add_secondary_gpids[i]; - partition_configuration &pc = *get_config(_all_apps, pid); - if (!add_secondary_proposed[i] && pc.hp_secondaries.empty()) { + const auto *pc = get_config(_all_apps, pid); + if (!add_secondary_proposed[i] && pc->hp_secondaries.empty()) { const auto &action = add_secondary_actions[i]; CHECK(action.hp_node, ""); if (_add_secondary_enable_flow_control && add_secondary_running_nodes[action.hp_node] >= @@ -2594,7 +2609,7 @@ bool server_state::check_all_partitions() continue; } std::shared_ptr app = get_app(pid.get_app_id()); - send_proposal(action, pc, *app); + send_proposal(action, *pc, *app); send_proposal_count++; add_secondary_proposed[i] = true; add_secondary_running_nodes[action.hp_node]++; @@ -2607,19 +2622,19 @@ bool server_state::check_all_partitions() const auto &action = add_secondary_actions[i]; CHECK(action.hp_node, ""); gpid pid = add_secondary_gpids[i]; - partition_configuration &pc = *get_config(_all_apps, pid); + const auto *pc = get_config(_all_apps, pid); if (_add_secondary_enable_flow_control && add_secondary_running_nodes[action.hp_node] >= _add_secondary_max_count_for_one_node) { LOG_INFO("do not send {} proposal for gpid({}) for flow control reason, target = " "{}, node = {}", ::dsn::enum_to_string(action.type), - pc.pid, + pc->pid, FMT_HOST_PORT_AND_IP(action, target), FMT_HOST_PORT_AND_IP(action, node)); continue; } std::shared_ptr app = get_app(pid.get_app_id()); - send_proposal(action, pc, *app); + send_proposal(action, *pc, *app); send_proposal_count++; add_secondary_proposed[i] = true; add_secondary_running_nodes[action.hp_node]++; @@ -2704,33 +2719,33 @@ void server_state::check_consistency(const dsn::gpid &gpid) auto iter = _all_apps.find(gpid.get_app_id()); CHECK(iter != _all_apps.end(), "invalid gpid({})", gpid); - app_state &app = *(iter->second); - partition_configuration &config = app.partitions[gpid.get_partition_index()]; + auto &app = *(iter->second); + auto &pc = app.pcs[gpid.get_partition_index()]; if (app.is_stateful) { - if (config.hp_primary) { - auto it = _nodes.find(config.hp_primary); - CHECK(it != _nodes.end(), "invalid primary address, address = {}", config.hp_primary); + if (pc.hp_primary) { + const auto it = _nodes.find(pc.hp_primary); + CHECK(it != _nodes.end(), "invalid primary: {}", pc.hp_primary); CHECK_EQ(it->second.served_as(gpid), partition_status::PS_PRIMARY); - CHECK(!utils::contains(config.hp_last_drops, config.hp_primary), - "primary shouldn't appear in last_drops, address = {}", - config.hp_primary); + CHECK(!utils::contains(pc.hp_last_drops, pc.hp_primary), + "primary({}) shouldn't appear in last_drops", + pc.hp_primary); } - for (auto &ep : config.hp_secondaries) { - auto it = _nodes.find(ep); - CHECK(it != _nodes.end(), "invalid secondary address, address = {}", ep); + for (const auto &secondary : pc.hp_secondaries) { + const auto it = _nodes.find(secondary); + CHECK(it != _nodes.end(), "invalid secondary: {}", secondary); CHECK_EQ(it->second.served_as(gpid), partition_status::PS_SECONDARY); - CHECK(!utils::contains(config.hp_last_drops, ep), - "secondary shouldn't appear in last_drops, address = {}", - ep); + CHECK(!utils::contains(pc.hp_last_drops, secondary), + "secondary({}) shouldn't appear in last_drops", + secondary); } } else { - partition_configuration_stateless pcs(config); + partition_configuration_stateless pcs(pc); CHECK_EQ(pcs.hosts().size(), pcs.workers().size()); - for (auto &ep : pcs.hosts()) { - auto it = _nodes.find(ep); - CHECK(it != _nodes.end(), "invalid host, address = {}", ep); + for (const auto &secondary : pcs.hosts()) { + auto it = _nodes.find(secondary); + CHECK(it != _nodes.end(), "invalid secondary: {}", secondary); CHECK_EQ(it->second.served_as(gpid), partition_status::PS_SECONDARY); } } @@ -3244,18 +3259,17 @@ template bool server_state::check_max_replica_count_consistent(const std::shared_ptr &app, Response &response) const { - for (int i = 0; i < static_cast(app->partitions.size()); ++i) { - const auto &partition_config = app->partitions[i]; - if (partition_config.max_replica_count == app->max_replica_count) { + for (const auto &pc : app->pcs) { + if (pc.max_replica_count == app->max_replica_count) { continue; } response.err = ERR_INCONSISTENT_STATE; response.hint_message = fmt::format("partition_max_replica_count({}) != " "app_max_replica_count({}) for partition {}", - partition_config.max_replica_count, + pc.max_replica_count, app->max_replica_count, - i); + pc.pid); return false; } @@ -3627,8 +3641,8 @@ void server_state::update_partition_max_replica_count(std::shared_ptr app->partition_count, new_max_replica_count); - const auto &old_partition_config = app->partitions[partition_index]; - const auto old_max_replica_count = old_partition_config.max_replica_count; + const auto &old_pc = app->pcs[partition_index]; + const auto old_max_replica_count = old_pc.max_replica_count; if (new_max_replica_count == old_max_replica_count) { LOG_WARNING("partition-level max_replica_count has been updated: app_name={}, " @@ -3675,23 +3689,23 @@ void server_state::update_partition_max_replica_count(std::shared_ptr context.pending_sync_request.reset(); context.msg = nullptr; - auto new_partition_config = old_partition_config; - new_partition_config.max_replica_count = new_max_replica_count; - ++(new_partition_config.ballot); - context.pending_sync_task = update_partition_max_replica_count_on_remote( - app, new_partition_config, on_partition_updated); + auto new_pc = old_pc; + new_pc.max_replica_count = new_max_replica_count; + ++(new_pc.ballot); + context.pending_sync_task = + update_partition_max_replica_count_on_remote(app, new_pc, on_partition_updated); } // ThreadPool: THREAD_POOL_META_STATE -task_ptr server_state::update_partition_max_replica_count_on_remote( - std::shared_ptr &app, - const partition_configuration &new_partition_config, - partition_callback on_partition_updated) +task_ptr +server_state::update_partition_max_replica_count_on_remote(std::shared_ptr &app, + const partition_configuration &new_pc, + partition_callback on_partition_updated) { - const auto &gpid = new_partition_config.pid; + const auto &gpid = new_pc.pid; const auto partition_index = gpid.get_partition_index(); - const auto new_max_replica_count = new_partition_config.max_replica_count; - const auto new_ballot = new_partition_config.ballot; + const auto new_max_replica_count = new_pc.max_replica_count; + const auto new_ballot = new_pc.ballot; const auto level = _meta_svc->get_function_level(); if (level <= meta_function_level::fl_blind) { @@ -3710,15 +3724,15 @@ task_ptr server_state::update_partition_max_replica_count_on_remote( return tasking::enqueue( LPC_META_STATE_HIGH, tracker(), - [this, app, new_partition_config, on_partition_updated]() mutable { - const auto &gpid = new_partition_config.pid; + [this, app, new_pc, on_partition_updated]() mutable { + const auto &gpid = new_pc.pid; const auto partition_index = gpid.get_partition_index(); zauto_write_lock l(_lock); auto &context = app->helpers->contexts[partition_index]; - context.pending_sync_task = update_partition_max_replica_count_on_remote( - app, new_partition_config, on_partition_updated); + context.pending_sync_task = + update_partition_max_replica_count_on_remote(app, new_pc, on_partition_updated); }, server_state::sStateHash, std::chrono::seconds(1)); @@ -3733,8 +3747,7 @@ task_ptr server_state::update_partition_max_replica_count_on_remote( new_ballot); auto partition_path = get_partition_path(gpid); - auto json_config = - dsn::json::json_forwarder::encode(new_partition_config); + auto json_config = dsn::json::json_forwarder::encode(new_pc); return _meta_svc->get_remote_storage()->set_data( partition_path, json_config, @@ -3743,7 +3756,7 @@ task_ptr server_state::update_partition_max_replica_count_on_remote( this, std::placeholders::_1, app, - new_partition_config, + new_pc, on_partition_updated), tracker()); } @@ -3752,13 +3765,13 @@ task_ptr server_state::update_partition_max_replica_count_on_remote( void server_state::on_update_partition_max_replica_count_on_remote_reply( error_code ec, std::shared_ptr &app, - const partition_configuration &new_partition_config, + const partition_configuration &new_pc, partition_callback on_partition_updated) { - const auto &gpid = new_partition_config.pid; + const auto &gpid = new_pc.pid; const auto partition_index = gpid.get_partition_index(); - const auto new_max_replica_count = new_partition_config.max_replica_count; - const auto new_ballot = new_partition_config.ballot; + const auto new_max_replica_count = new_pc.max_replica_count; + const auto new_ballot = new_pc.ballot; zauto_write_lock l(_lock); @@ -3778,15 +3791,15 @@ void server_state::on_update_partition_max_replica_count_on_remote_reply( context.pending_sync_task = tasking::enqueue( LPC_META_STATE_HIGH, tracker(), - [this, app, new_partition_config, on_partition_updated]() mutable { - const auto &gpid = new_partition_config.pid; + [this, app, new_pc, on_partition_updated]() mutable { + const auto &gpid = new_pc.pid; const auto partition_index = gpid.get_partition_index(); zauto_write_lock l(_lock); auto &context = app->helpers->contexts[partition_index]; - context.pending_sync_task = update_partition_max_replica_count_on_remote( - app, new_partition_config, on_partition_updated); + context.pending_sync_task = + update_partition_max_replica_count_on_remote(app, new_pc, on_partition_updated); }, server_state::sStateHash, std::chrono::seconds(1)); @@ -3798,7 +3811,7 @@ void server_state::on_update_partition_max_replica_count_on_remote_reply( return; } - update_partition_max_replica_count_locally(app, new_partition_config); + update_partition_max_replica_count_locally(app, new_pc); context.pending_sync_task = nullptr; context.pending_sync_request.reset(); @@ -3809,17 +3822,17 @@ void server_state::on_update_partition_max_replica_count_on_remote_reply( } // ThreadPool: THREAD_POOL_META_STATE -void server_state::update_partition_max_replica_count_locally( - std::shared_ptr &app, const partition_configuration &new_partition_config) +void server_state::update_partition_max_replica_count_locally(std::shared_ptr &app, + const partition_configuration &new_pc) { - const auto &gpid = new_partition_config.pid; + const auto &gpid = new_pc.pid; const auto partition_index = gpid.get_partition_index(); - const auto new_max_replica_count = new_partition_config.max_replica_count; - const auto new_ballot = new_partition_config.ballot; + const auto new_max_replica_count = new_pc.max_replica_count; + const auto new_ballot = new_pc.ballot; - auto &old_partition_config = app->partitions[gpid.get_partition_index()]; - const auto old_max_replica_count = old_partition_config.max_replica_count; - const auto old_ballot = old_partition_config.ballot; + auto &old_pc = app->pcs[gpid.get_partition_index()]; + const auto old_max_replica_count = old_pc.max_replica_count; + const auto old_ballot = old_pc.ballot; CHECK_EQ_MSG(old_ballot + 1, new_ballot, @@ -3834,14 +3847,14 @@ void server_state::update_partition_max_replica_count_locally( old_ballot, new_ballot); - std::string old_config_str(boost::lexical_cast(old_partition_config)); - std::string new_config_str(boost::lexical_cast(new_partition_config)); + std::string old_config_str(boost::lexical_cast(old_pc)); + std::string new_config_str(boost::lexical_cast(new_pc)); - old_partition_config = new_partition_config; + old_pc = new_pc; LOG_INFO("local partition-level max_replica_count has been changed successfully: ", - "app_name={}, app_id={}, partition_id={}, old_partition_config={}, " - "new_partition_config={}", + "app_name={}, app_id={}, partition_id={}, old_pc={}, " + "new_pc={}", app->app_name, app->app_id, partition_index, @@ -3910,7 +3923,7 @@ void server_state::recover_all_partitions_max_replica_count(std::shared_ptrpartition_count; ++i) { zauto_read_lock l(_lock); - auto new_pc = app->partitions[i]; + auto new_pc = app->pcs[i]; if (new_pc.max_replica_count == new_max_replica_count) { LOG_WARNING("no need to recover partition-level max_replica_count since it has been " "updated before: app_name={}, app_id={}, partition_index={}, " @@ -3944,7 +3957,7 @@ void server_state::recover_all_partitions_max_replica_count(std::shared_ptrpartitions[i]; + auto &old_pc = app->pcs[i]; std::string old_pc_str(boost::lexical_cast(old_pc)); std::string new_pc_str(boost::lexical_cast(new_pc)); diff --git a/src/meta/server_state.h b/src/meta/server_state.h index ad3664cf15..ccace992ba 100644 --- a/src/meta/server_state.h +++ b/src/meta/server_state.h @@ -157,7 +157,7 @@ class server_state void query_configuration_by_index(const query_cfg_request &request, /*out*/ query_cfg_response &response); - bool query_configuration_by_gpid(const dsn::gpid id, /*out*/ partition_configuration &config); + bool query_configuration_by_gpid(const dsn::gpid id, /*out*/ partition_configuration &pc); // app options void create_app(dsn::message_ex *msg); @@ -276,7 +276,7 @@ class server_state void update_configuration_locally(app_state &app, std::shared_ptr &config_request); - void request_check(const partition_configuration &old, + void request_check(const partition_configuration &old_pc, const configuration_update_request &request); void recall_partition(std::shared_ptr &app, int pidx); void drop_partition(std::shared_ptr &app, int pidx); @@ -285,11 +285,9 @@ class server_state int pidx, const host_port &node); void - downgrade_stateless_nodes(std::shared_ptr &app, int pidx, const host_port &address); - - void on_partition_node_dead(std::shared_ptr &app, - int pidx, - const dsn::host_port &address); + downgrade_stateless_nodes(std::shared_ptr &app, int pidx, const host_port &node); + void + on_partition_node_dead(std::shared_ptr &app, int pidx, const dsn::host_port &node); void send_proposal(const host_port &target, const configuration_update_request &proposal); void send_proposal(const configuration_proposal_action &action, const partition_configuration &pc, @@ -347,18 +345,16 @@ class server_state int32_t partition_index, int32_t new_max_replica_count, partition_callback on_partition_updated); - task_ptr update_partition_max_replica_count_on_remote( - std::shared_ptr &app, - const partition_configuration &new_partition_config, - partition_callback on_partition_updated); - void on_update_partition_max_replica_count_on_remote_reply( - error_code ec, - std::shared_ptr &app, - const partition_configuration &new_partition_config, - partition_callback on_partition_updated); + task_ptr update_partition_max_replica_count_on_remote(std::shared_ptr &app, + const partition_configuration &new_pc, + partition_callback on_partition_updated); void - update_partition_max_replica_count_locally(std::shared_ptr &app, - const partition_configuration &new_partition_config); + on_update_partition_max_replica_count_on_remote_reply(error_code ec, + std::shared_ptr &app, + const partition_configuration &new_pc, + partition_callback on_partition_updated); + void update_partition_max_replica_count_locally(std::shared_ptr &app, + const partition_configuration &new_pc); void recover_all_partitions_max_replica_count(std::shared_ptr &app, int32_t max_replica_count, diff --git a/src/meta/server_state_restore.cpp b/src/meta/server_state_restore.cpp index ef99113bef..59dfe2dff5 100644 --- a/src/meta/server_state_restore.cpp +++ b/src/meta/server_state_restore.cpp @@ -250,8 +250,8 @@ void server_state::on_query_restore_status(configuration_query_restore_rpc rpc) response.restore_status.resize(app->partition_count, ERR_OK); for (int32_t i = 0; i < app->partition_count; i++) { const auto &r_state = app->helpers->restore_states[i]; - const auto &p = app->partitions[i]; - if (p.hp_primary || !p.hp_secondaries.empty()) { + const auto &pc = app->pcs[i]; + if (pc.hp_primary || !pc.hp_secondaries.empty()) { // already have primary, restore succeed continue; } diff --git a/src/meta/test/backup_test.cpp b/src/meta/test/backup_test.cpp index f436514b60..f3ec80d140 100644 --- a/src/meta/test/backup_test.cpp +++ b/src/meta/test/backup_test.cpp @@ -504,7 +504,7 @@ TEST_F(policy_context_test, test_app_dropped_during_backup) app_state *app = state->_all_apps[3].get(); app->status = dsn::app_status::AS_AVAILABLE; - for (partition_configuration &pc : app->partitions) { + for (auto &pc : app->pcs) { SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, node_list[0]); SET_IPS_AND_HOST_PORTS_BY_DNS(pc, secondaries, node_list[1], node_list[2]); } diff --git a/src/meta/test/balancer_simulator/balancer_simulator.cpp b/src/meta/test/balancer_simulator/balancer_simulator.cpp index f5ab9d8aeb..4648577a03 100644 --- a/src/meta/test/balancer_simulator/balancer_simulator.cpp +++ b/src/meta/test/balancer_simulator/balancer_simulator.cpp @@ -96,11 +96,11 @@ void generate_balanced_apps(/*out*/ app_mapper &apps, info.partition_count = partitions_per_node * node_list.size(); info.max_replica_count = 3; - std::shared_ptr the_app = app_state::create(info); + std::shared_ptr app = app_state::create(info); simple_priority_queue pq1(node_list, server_load_balancer::primary_comparator(nodes)); // generate balanced primary - for (dsn::partition_configuration &pc : the_app->partitions) { + for (auto &pc : app->pcs) { const auto &n = pq1.pop(); nodes[n].put_partition(pc.pid, true); SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, n); @@ -111,7 +111,7 @@ void generate_balanced_apps(/*out*/ app_mapper &apps, simple_priority_queue pq2(node_list, server_load_balancer::partition_comparator(nodes)); std::vector temp; - for (dsn::partition_configuration &pc : the_app->partitions) { + for (auto &pc : app->pcs) { temp.clear(); while (pc.hp_secondaries.size() + 1 < pc.max_replica_count) { const auto &n = pq2.pop(); @@ -127,7 +127,7 @@ void generate_balanced_apps(/*out*/ app_mapper &apps, // check if balanced int pri_min, part_min; - pri_min = part_min = the_app->partition_count + 1; + pri_min = part_min = app->partition_count + 1; int pri_max, part_max; pri_max = part_max = -1; @@ -142,7 +142,7 @@ void generate_balanced_apps(/*out*/ app_mapper &apps, part_min = kv.second.partition_count(); } - apps.emplace(the_app->app_id, the_app); + apps.emplace(app->app_id, app); CHECK_LE(pri_max - pri_min, 1); CHECK_LE(part_max - part_min, 1); @@ -150,9 +150,9 @@ void generate_balanced_apps(/*out*/ app_mapper &apps, void random_move_primary(app_mapper &apps, node_mapper &nodes, int primary_move_ratio) { - app_state &the_app = *(apps[0]); - int space_size = the_app.partition_count * 100; - for (dsn::partition_configuration &pc : the_app.partitions) { + app_state &app = *(apps[0]); + int space_size = app.partition_count * 100; + for (auto &pc : app.pcs) { int n = random32(1, space_size) / 100; if (n < primary_move_ratio) { int indice = random32(0, 1); diff --git a/src/meta/test/balancer_validator.cpp b/src/meta/test/balancer_validator.cpp index 9b610f003a..3671a90bd3 100644 --- a/src/meta/test/balancer_validator.cpp +++ b/src/meta/test/balancer_validator.cpp @@ -165,14 +165,14 @@ void meta_service_test_app::balancer_validator() iter.second.partition_count()); } - std::shared_ptr &the_app = apps[1]; - for (::dsn::partition_configuration &pc : the_app->partitions) { + const auto &app = apps[1]; + for (const auto &pc : app->pcs) { CHECK(pc.hp_primary, ""); CHECK_GE(pc.secondaries.size(), pc.max_replica_count - 1); } // now test the cure - ::dsn::partition_configuration &pc = the_app->partitions[0]; + auto &pc = app->pcs[0]; nodes[pc.hp_primary].remove_partition(pc.pid, false); for (const auto &hp : pc.hp_secondaries) { nodes[hp].remove_partition(pc.pid, false); @@ -218,11 +218,11 @@ static void load_apps_and_nodes(const char *file, app_mapper &apps, node_mapper infile >> n; infile >> ip_port; const auto primary = host_port::from_string(ip_port); - SET_IP_AND_HOST_PORT_BY_DNS(app->partitions[j], primary, primary); + SET_IP_AND_HOST_PORT_BY_DNS(app->pcs[j], primary, primary); for (int k = 1; k < n; ++k) { infile >> ip_port; const auto secondary = host_port::from_string(ip_port); - ADD_IP_AND_HOST_PORT_BY_DNS(app->partitions[j], secondaries, secondary); + ADD_IP_AND_HOST_PORT_BY_DNS(app->pcs[j], secondaries, secondary); } } } diff --git a/src/meta/test/cluster_balance_policy_test.cpp b/src/meta/test/cluster_balance_policy_test.cpp index 14ae55632b..f8da5a3a14 100644 --- a/src/meta/test/cluster_balance_policy_test.cpp +++ b/src/meta/test/cluster_balance_policy_test.cpp @@ -119,7 +119,7 @@ TEST(cluster_balance_policy, get_app_migration_info) info.app_name = appname; info.partition_count = 1; auto app = std::make_shared(info); - SET_IP_AND_HOST_PORT_BY_DNS(app->partitions[0], primary, hp); + SET_IP_AND_HOST_PORT_BY_DNS(app->pcs[0], primary, hp); node_state ns; ns.set_hp(hp); @@ -129,14 +129,14 @@ TEST(cluster_balance_policy, get_app_migration_info) cluster_balance_policy::app_migration_info migration_info; { - app->partitions[0].max_replica_count = 100; + app->pcs[0].max_replica_count = 100; auto res = policy.get_app_migration_info(app, nodes, balance_type::COPY_PRIMARY, migration_info); ASSERT_FALSE(res); } { - app->partitions[0].max_replica_count = 1; + app->pcs[0].max_replica_count = 1; auto res = policy.get_app_migration_info(app, nodes, balance_type::COPY_PRIMARY, migration_info); ASSERT_TRUE(res); @@ -162,15 +162,15 @@ TEST(cluster_balance_policy, get_node_migration_info) info.app_name = appname; info.partition_count = 1; auto app = std::make_shared(info); - SET_IP_AND_HOST_PORT_BY_DNS(app->partitions[0], primary, hp); + SET_IP_AND_HOST_PORT_BY_DNS(app->pcs[0], primary, hp); serving_replica sr; sr.node = hp; std::string disk_tag = "disk1"; sr.disk_tag = disk_tag; config_context context; - context.config_owner = new partition_configuration(); - auto cleanup = dsn::defer([&context]() { delete context.config_owner; }); - context.config_owner->pid = gpid(appid, 0); + context.pc = new partition_configuration(); + auto cleanup = dsn::defer([&context]() { delete context.pc; }); + context.pc->pid = gpid(appid, 0); context.serving.emplace_back(std::move(sr)); app->helpers->contexts.emplace_back(std::move(context)); @@ -517,8 +517,8 @@ TEST(cluster_balance_policy, calc_potential_moving) partition_configuration pc; SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, hp1); SET_IPS_AND_HOST_PORTS_BY_DNS(pc, secondaries, hp2, hp3); - app->partitions[0] = pc; - app->partitions[1] = pc; + app->pcs[0] = pc; + app->pcs[1] = pc; app_mapper apps; apps[app_id] = app; diff --git a/src/meta/test/ford_fulkerson_test.cpp b/src/meta/test/ford_fulkerson_test.cpp index bb291c8bce..f7d97628ff 100644 --- a/src/meta/test/ford_fulkerson_test.cpp +++ b/src/meta/test/ford_fulkerson_test.cpp @@ -99,8 +99,8 @@ TEST(ford_fulkerson, update_decree) std::shared_ptr app = app_state::create(info); partition_configuration pc; SET_IPS_AND_HOST_PORTS_BY_DNS(pc, secondaries, hp2, hp3); - app->partitions.push_back(pc); - app->partitions.push_back(pc); + app->pcs.push_back(pc); + app->pcs.push_back(pc); node_mapper nodes; node_state ns; @@ -137,8 +137,8 @@ TEST(ford_fulkerson, find_shortest_path) partition_configuration pc; SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, hp1); SET_IPS_AND_HOST_PORTS_BY_DNS(pc, secondaries, hp2, hp3); - app->partitions[0] = pc; - app->partitions[1] = pc; + app->pcs[0] = pc; + app->pcs[1] = pc; node_mapper nodes; node_state ns1; diff --git a/src/meta/test/meta_app_operation_test.cpp b/src/meta/test/meta_app_operation_test.cpp index 2fbce6477d..11d458e059 100644 --- a/src/meta/test/meta_app_operation_test.cpp +++ b/src/meta/test/meta_app_operation_test.cpp @@ -154,8 +154,8 @@ class meta_app_operation_test : public meta_test_base auto app = find_app(app_name); CHECK(app, "app({}) does not exist", app_name); - auto &partition_config = app->partitions[partition_index]; - partition_config.max_replica_count = max_replica_count; + auto &pc = app->pcs[partition_index]; + pc.max_replica_count = max_replica_count; } void set_max_replica_count_env(const std::string &app_name, const std::string &env) @@ -217,16 +217,15 @@ class meta_app_operation_test : public meta_test_base auto app = find_app(app_name); CHECK(app, "app({}) does not exist", app_name); - auto partition_size = static_cast(app->partitions.size()); + auto partition_size = static_cast(app->pcs.size()); for (int i = 0; i < partition_size; ++i) { // set local max_replica_count of each partition - auto &partition_config = app->partitions[i]; - partition_config.max_replica_count = max_replica_count; + auto &pc = app->pcs[i]; + pc.max_replica_count = max_replica_count; // set remote max_replica_count of each partition - auto partition_path = _ss->get_partition_path(partition_config.pid); - auto json_config = - dsn::json::json_forwarder::encode(partition_config); + auto partition_path = _ss->get_partition_path(pc.pid); + auto json_config = dsn::json::json_forwarder::encode(pc); dsn::task_tracker tracker; _ms->get_remote_storage()->set_data( partition_path, @@ -260,28 +259,27 @@ class meta_app_operation_test : public meta_test_base auto app = find_app(app_name); CHECK(app, "app({}) does not exist", app_name); - auto partition_size = static_cast(app->partitions.size()); + auto partition_size = static_cast(app->pcs.size()); for (int i = 0; i < partition_size; ++i) { // verify local max_replica_count of each partition - auto &partition_config = app->partitions[i]; - ASSERT_EQ(partition_config.max_replica_count, expected_max_replica_count); + auto &pc = app->pcs[i]; + ASSERT_EQ(pc.max_replica_count, expected_max_replica_count); // verify remote max_replica_count of each partition - auto partition_path = _ss->get_partition_path(partition_config.pid); + auto partition_path = _ss->get_partition_path(pc.pid); dsn::task_tracker tracker; _ms->get_remote_storage()->get_data( partition_path, LPC_META_CALLBACK, - [expected_pid = partition_config.pid, - expected_max_replica_count](error_code ec, const blob &value) { + [expected_pid = pc.pid, expected_max_replica_count](error_code ec, + const blob &value) { ASSERT_EQ(ec, ERR_OK); - partition_configuration partition_config; - dsn::json::json_forwarder::decode(value, - partition_config); + partition_configuration pc; + dsn::json::json_forwarder::decode(value, pc); - ASSERT_EQ(partition_config.pid, expected_pid); - ASSERT_EQ(partition_config.max_replica_count, expected_max_replica_count); + ASSERT_EQ(pc.pid, expected_pid); + ASSERT_EQ(pc.max_replica_count, expected_max_replica_count); }, &tracker); tracker.wait_outstanding_tasks(); diff --git a/src/meta/test/meta_bulk_load_ingestion_test.cpp b/src/meta/test/meta_bulk_load_ingestion_test.cpp index 8d5e434dc8..f6ee7e9fc6 100644 --- a/src/meta/test/meta_bulk_load_ingestion_test.cpp +++ b/src/meta/test/meta_bulk_load_ingestion_test.cpp @@ -199,39 +199,27 @@ class ingestion_context_test : public meta_test_base ainfo.app_id = APP_ID; ainfo.partition_count = PARTITION_COUNT; _app = std::make_shared(ainfo); - _app->partitions.reserve(PARTITION_COUNT); + _app->pcs.reserve(PARTITION_COUNT); _app->helpers->contexts.reserve(PARTITION_COUNT); - mock_partition(0, - {NODE1, NODE2, NODE3}, - {TAG1, TAG1, TAG2}, - _app->partitions[0], - _app->helpers->contexts[0]); - mock_partition(1, - {NODE4, NODE1, NODE2}, - {TAG2, TAG1, TAG2}, - _app->partitions[1], - _app->helpers->contexts[1]); - mock_partition(2, - {NODE3, NODE1, NODE4}, - {TAG1, TAG2, TAG1}, - _app->partitions[2], - _app->helpers->contexts[2]); - mock_partition(3, - {NODE2, NODE3, NODE4}, - {TAG1, TAG1, TAG2}, - _app->partitions[3], - _app->helpers->contexts[3]); + mock_partition( + 0, {NODE1, NODE2, NODE3}, {TAG1, TAG1, TAG2}, _app->pcs[0], _app->helpers->contexts[0]); + mock_partition( + 1, {NODE4, NODE1, NODE2}, {TAG2, TAG1, TAG2}, _app->pcs[1], _app->helpers->contexts[1]); + mock_partition( + 2, {NODE3, NODE1, NODE4}, {TAG1, TAG2, TAG1}, _app->pcs[2], _app->helpers->contexts[2]); + mock_partition( + 3, {NODE2, NODE3, NODE4}, {TAG1, TAG1, TAG2}, _app->pcs[3], _app->helpers->contexts[3]); } void mock_partition(const uint32_t pidx, std::vector nodes, const std::vector tags, - partition_configuration &config, + partition_configuration &pc, config_context &cc) { - config.pid = gpid(APP_ID, pidx); - SET_IP_AND_HOST_PORT_BY_DNS(config, primary, nodes[0]); - SET_IPS_AND_HOST_PORTS_BY_DNS(config, secondaries, nodes[1], nodes[2]); + pc.pid = gpid(APP_ID, pidx); + SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, nodes[0]); + SET_IPS_AND_HOST_PORTS_BY_DNS(pc, secondaries, nodes[1], nodes[2]); auto count = nodes.size(); for (auto i = 0; i < count; i++) { @@ -253,14 +241,13 @@ class ingestion_context_test : public meta_test_base bool try_partition_ingestion(const uint32_t pidx) { - return _context->try_partition_ingestion(_app->partitions[pidx], - _app->helpers->contexts[pidx]); + return _context->try_partition_ingestion(_app->pcs[pidx], _app->helpers->contexts[pidx]); } void add_partition(const uint32_t pidx) { - auto pinfo = ingestion_context::partition_node_info(_app->partitions[pidx], - _app->helpers->contexts[pidx]); + const auto pinfo = + ingestion_context::partition_node_info(_app->pcs[pidx], _app->helpers->contexts[pidx]); _context->add_partition(pinfo); } diff --git a/src/meta/test/meta_bulk_load_service_test.cpp b/src/meta/test/meta_bulk_load_service_test.cpp index e9868312ef..48477b361c 100644 --- a/src/meta/test/meta_bulk_load_service_test.cpp +++ b/src/meta/test/meta_bulk_load_service_test.cpp @@ -174,16 +174,16 @@ class bulk_load_service_test : public meta_test_base gpid before_check_partition_status(bulk_load_status::type status) { std::shared_ptr app = find_app(APP_NAME); - partition_configuration config; - config.pid = gpid(app->app_id, 0); - config.max_replica_count = 3; - config.ballot = BALLOT; - SET_IP_AND_HOST_PORT_BY_DNS(config, primary, PRIMARY_HP); - SET_IPS_AND_HOST_PORTS_BY_DNS(config, secondaries, SECONDARY1_HP, SECONDARY2_HP); - app->partitions.clear(); - app->partitions.emplace_back(config); + partition_configuration pc; + pc.pid = gpid(app->app_id, 0); + pc.max_replica_count = 3; + pc.ballot = BALLOT; + SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, PRIMARY_HP); + SET_IPS_AND_HOST_PORTS_BY_DNS(pc, secondaries, SECONDARY1_HP, SECONDARY2_HP); + app->pcs.clear(); + app->pcs.emplace_back(pc); mock_meta_bulk_load_context(app->app_id, app->partition_count, status); - return config.pid; + return pc.pid; } bool check_partition_status(const std::string name, @@ -194,18 +194,18 @@ class bulk_load_service_test : public meta_test_base { std::shared_ptr app = find_app(name); if (mock_primary_invalid) { - RESET_IP_AND_HOST_PORT(app->partitions[pid.get_partition_index()], primary); + RESET_IP_AND_HOST_PORT(app->pcs[pid.get_partition_index()], primary); } if (mock_lack_secondary) { - CLEAR_IP_AND_HOST_PORT(app->partitions[pid.get_partition_index()], secondaries); + CLEAR_IP_AND_HOST_PORT(app->pcs[pid.get_partition_index()], secondaries); } - partition_configuration pconfig; + partition_configuration pc; bool flag = bulk_svc().check_partition_status( name, pid, always_unhealthy_check, std::bind(&bulk_load_service_test::mock_partition_bulk_load, this, name, pid), - pconfig); + pc); wait_all(); return flag; } @@ -233,22 +233,22 @@ class bulk_load_service_test : public meta_test_base bool same) { set_partition_bulk_load_info(pid, ever_ingest_succeed); - partition_configuration config; - config.pid = pid; - SET_IP_AND_HOST_PORT_BY_DNS(config, primary, PRIMARY_HP); + partition_configuration pc; + pc.pid = pid; + SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, PRIMARY_HP); if (same) { - ADD_IP_AND_HOST_PORT_BY_DNS(config, secondaries, SECONDARY1_HP); - ADD_IP_AND_HOST_PORT_BY_DNS(config, secondaries, SECONDARY2_HP); + ADD_IP_AND_HOST_PORT_BY_DNS(pc, secondaries, SECONDARY1_HP); + ADD_IP_AND_HOST_PORT_BY_DNS(pc, secondaries, SECONDARY2_HP); } else { - ADD_IP_AND_HOST_PORT_BY_DNS(config, secondaries, SECONDARY1_HP); + ADD_IP_AND_HOST_PORT_BY_DNS(pc, secondaries, SECONDARY1_HP); if (secondary_count == 2) { - ADD_IP_AND_HOST_PORT_BY_DNS(config, secondaries, SECONDARY3_HP); + ADD_IP_AND_HOST_PORT_BY_DNS(pc, secondaries, SECONDARY3_HP); } else if (secondary_count >= 3) { - ADD_IP_AND_HOST_PORT_BY_DNS(config, secondaries, SECONDARY2_HP); - ADD_IP_AND_HOST_PORT_BY_DNS(config, secondaries, SECONDARY3_HP); + ADD_IP_AND_HOST_PORT_BY_DNS(pc, secondaries, SECONDARY2_HP); + ADD_IP_AND_HOST_PORT_BY_DNS(pc, secondaries, SECONDARY3_HP); } } - auto flag = bulk_svc().check_ever_ingestion_succeed(config, APP_NAME, pid); + auto flag = bulk_svc().check_ever_ingestion_succeed(pc, APP_NAME, pid); wait_all(); return flag; } @@ -315,9 +315,9 @@ class bulk_load_service_test : public meta_test_base fail::cfg("ingestion_try_partition_ingestion", "return()"); config_context cc; for (auto i = 0; i < count; i++) { - partition_configuration config; - config.pid = gpid(app_id, i); - bulk_svc().try_partition_ingestion(config, cc); + partition_configuration pc; + pc.pid = gpid(app_id, i); + bulk_svc().try_partition_ingestion(pc, cc); } } @@ -452,11 +452,11 @@ class bulk_load_service_test : public meta_test_base [this, &info]() { LOG_INFO("create app({}) app_id={}, dir succeed", info.app_name, info.app_id); for (int i = 0; i < info.partition_count; ++i) { - partition_configuration config; - config.max_replica_count = 3; - config.pid = gpid(info.app_id, i); - config.ballot = BALLOT; - blob v = json::json_forwarder::encode(config); + partition_configuration pc; + pc.max_replica_count = 3; + pc.pid = gpid(info.app_id, i); + pc.ballot = BALLOT; + blob v = json::json_forwarder::encode(pc); _ms->get_meta_storage()->create_node( _app_root + "/" + boost::lexical_cast(info.app_id) + "/" + boost::lexical_cast(i), diff --git a/src/meta/test/meta_data.cpp b/src/meta/test/meta_data.cpp index c32f33e866..07b568cd5b 100644 --- a/src/meta/test/meta_data.cpp +++ b/src/meta/test/meta_data.cpp @@ -109,7 +109,7 @@ static bool vec_equal(const std::vector &vec1, TEST(meta_data, collect_replica) { - app_mapper app; + app_mapper apps; node_mapper nodes; dsn::app_info info; @@ -120,16 +120,16 @@ TEST(meta_data, collect_replica) info.app_type = "test"; info.max_replica_count = 3; info.partition_count = 1024; - std::shared_ptr the_app = app_state::create(info); - app.emplace(the_app->app_id, the_app); - meta_view view = {&app, &nodes}; + std::shared_ptr app = app_state::create(info); + apps.emplace(app->app_id, app); + meta_view view = {&apps, &nodes}; replica_info rep; rep.app_type = "test"; rep.pid = dsn::gpid(1, 0); - dsn::partition_configuration &pc = *get_config(app, rep.pid); - config_context &cc = *get_config_context(app, rep.pid); + auto &pc = *get_config(apps, rep.pid); + auto &cc = *get_config_context(apps, rep.pid); std::vector node_list; generate_node_list(node_list, 10, 10); @@ -352,7 +352,7 @@ TEST(meta_data, collect_replica) TEST(meta_data, construct_replica) { - app_mapper app; + app_mapper apps; node_mapper nodes; dsn::app_info info; @@ -363,16 +363,16 @@ TEST(meta_data, construct_replica) info.app_type = "test"; info.max_replica_count = 3; info.partition_count = 1024; - std::shared_ptr the_app = app_state::create(info); - app.emplace(the_app->app_id, the_app); - meta_view view = {&app, &nodes}; + std::shared_ptr app = app_state::create(info); + apps.emplace(app->app_id, app); + meta_view view = {&apps, &nodes}; replica_info rep; rep.app_type = "test"; rep.pid = dsn::gpid(1, 0); - dsn::partition_configuration &pc = *get_config(app, rep.pid); - config_context &cc = *get_config_context(app, rep.pid); + dsn::partition_configuration &pc = *get_config(apps, rep.pid); + config_context &cc = *get_config_context(apps, rep.pid); std::vector node_list; generate_node_list(node_list, 10, 10); diff --git a/src/meta/test/meta_duplication_service_test.cpp b/src/meta/test/meta_duplication_service_test.cpp index 621a34d9cc..54a58af87d 100644 --- a/src/meta/test/meta_duplication_service_test.cpp +++ b/src/meta/test/meta_duplication_service_test.cpp @@ -674,7 +674,7 @@ TEST_F(meta_duplication_service_test, duplication_sync) auto app = find_app(test_app); // generate all primaries on node[0] - for (partition_configuration &pc : app->partitions) { + for (auto &pc : app->pcs) { pc.ballot = random32(1, 10000); SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, server_nodes[0]); SET_IPS_AND_HOST_PORTS_BY_DNS(pc, secondaries, server_nodes[1], server_nodes[2]); @@ -897,7 +897,7 @@ TEST_F(meta_duplication_service_test, fail_mode) // ensure dup_sync will synchronize fail_mode const auto hp = generate_node_list(3)[0]; - for (partition_configuration &pc : app->partitions) { + for (auto &pc : app->pcs) { SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, hp); } initialize_node_state(); diff --git a/src/meta/test/meta_mauanl_compaction_test.cpp b/src/meta/test/meta_mauanl_compaction_test.cpp index 6853d34e4f..02c7c82b1e 100644 --- a/src/meta/test/meta_mauanl_compaction_test.cpp +++ b/src/meta/test/meta_mauanl_compaction_test.cpp @@ -51,7 +51,7 @@ class meta_app_compaction_test : public meta_test_base { create_app(APP_NAME, PARTITION_COUNT); auto app = find_app(APP_NAME); - app->partitions.resize(PARTITION_COUNT); + app->pcs.resize(PARTITION_COUNT); app->helpers->contexts.resize(PARTITION_COUNT); for (auto i = 0; i < PARTITION_COUNT; ++i) { serving_replica rep; diff --git a/src/meta/test/meta_partition_guardian_test.cpp b/src/meta/test/meta_partition_guardian_test.cpp index dbc3bf08e1..927d26ea09 100644 --- a/src/meta/test/meta_partition_guardian_test.cpp +++ b/src/meta/test/meta_partition_guardian_test.cpp @@ -24,6 +24,7 @@ * THE SOFTWARE. */ +// IWYU pragma: no_include #include #include #include @@ -207,8 +208,8 @@ void meta_partition_guardian_test::cure_test() std::vector nodes; generate_node_list(nodes, 4, 4); - dsn::partition_configuration &pc = app->partitions[0]; - config_context &cc = *get_config_context(state->_all_apps, dsn::gpid(1, 0)); + auto &pc = app->pcs[0]; + auto &cc = *get_config_context(state->_all_apps, dsn::gpid(1, 0)); #define PROPOSAL_FLAG_CHECK \ ASSERT_TRUE(proposal_sent); \ @@ -797,7 +798,7 @@ void meta_partition_guardian_test::cure() std::vector nodes_list; generate_node_list(nodes_list, 20, 100); - app_mapper app; + app_mapper apps; node_mapper nodes; meta_service svc; partition_guardian guardian(&svc); @@ -810,9 +811,9 @@ void meta_partition_guardian_test::cure() info.app_type = "test"; info.max_replica_count = 3; info.partition_count = 1024; - std::shared_ptr the_app = app_state::create(info); + std::shared_ptr app = app_state::create(info); - app.emplace(the_app->app_id, the_app); + apps.emplace(app->app_id, app); for (const auto &hp : nodes_list) { get_node_state(nodes, hp, true)->set_alive(true); } @@ -823,21 +824,21 @@ void meta_partition_guardian_test::cure() pc_status status; all_partitions_healthy = true; - for (int i = 0; i != the_app->partition_count; ++i) { - dsn::gpid &pid = the_app->partitions[i].pid; - status = guardian.cure({&app, &nodes}, pid, action); + CHECK_EQ(app->partition_count, app->pcs.size()); + for (const auto &pc : app->pcs) { + status = guardian.cure({&apps, &nodes}, pc.pid, action); if (status != pc_status::healthy) { all_partitions_healthy = false; - proposal_action_check_and_apply(action, pid, app, nodes, nullptr); + proposal_action_check_and_apply(action, pc.pid, apps, nodes, nullptr); configuration_update_request fake_request; - fake_request.info = *the_app; - fake_request.config = the_app->partitions[i]; + fake_request.info = *app; + fake_request.config = pc; fake_request.type = action.type; SET_OBJ_IP_AND_HOST_PORT(fake_request, node, action, node); fake_request.host_node = action.node; - guardian.reconfig({&app, &nodes}, fake_request); + guardian.reconfig({&apps, &nodes}, fake_request); check_nodes_loads(nodes); } } @@ -849,7 +850,7 @@ void meta_partition_guardian_test::from_proposal_test() std::vector nodes_list; generate_node_list(nodes_list, 3, 3); - app_mapper app; + app_mapper apps; node_mapper nodes; meta_service svc; @@ -863,20 +864,20 @@ void meta_partition_guardian_test::from_proposal_test() info.app_type = "test"; info.max_replica_count = 3; info.partition_count = 1; - std::shared_ptr the_app = app_state::create(info); + std::shared_ptr app = app_state::create(info); - app.emplace(the_app->app_id, the_app); + apps.emplace(app->app_id, app); for (const auto &hp : nodes_list) { get_node_state(nodes, hp, true)->set_alive(true); } - meta_view mv{&app, &nodes}; + meta_view mv{&apps, &nodes}; dsn::gpid p(1, 0); configuration_proposal_action cpa; configuration_proposal_action cpa2; - dsn::partition_configuration &pc = *get_config(app, p); - config_context &cc = *get_config_context(app, p); + dsn::partition_configuration &pc = *get_config(apps, p); + config_context &cc = *get_config_context(apps, p); std::cerr << "Case 1: test no proposals in config_context" << std::endl; ASSERT_FALSE(guardian.from_proposals(mv, p, cpa)); diff --git a/src/meta/test/meta_split_service_test.cpp b/src/meta/test/meta_split_service_test.cpp index f881500d44..5eb3940a7b 100644 --- a/src/meta/test/meta_split_service_test.cpp +++ b/src/meta/test/meta_split_service_test.cpp @@ -128,16 +128,16 @@ class meta_split_service_test : public meta_test_base error_code register_child(int32_t parent_index, ballot req_parent_ballot, bool wait_zk) { - partition_configuration parent_config; - parent_config.ballot = req_parent_ballot; - parent_config.last_committed_decree = 5; - parent_config.max_replica_count = 3; - parent_config.pid = gpid(app->app_id, parent_index); + partition_configuration parent_pc; + parent_pc.ballot = req_parent_ballot; + parent_pc.last_committed_decree = 5; + parent_pc.max_replica_count = 3; + parent_pc.pid = gpid(app->app_id, parent_index); - partition_configuration child_config; - child_config.ballot = PARENT_BALLOT + 1; - child_config.last_committed_decree = 5; - child_config.pid = gpid(app->app_id, parent_index + PARTITION_COUNT); + partition_configuration child_pc; + child_pc.ballot = PARENT_BALLOT + 1; + child_pc.last_committed_decree = 5; + child_pc.pid = gpid(app->app_id, parent_index + PARTITION_COUNT); // mock node state node_state node; @@ -147,8 +147,8 @@ class meta_split_service_test : public meta_test_base auto request = std::make_unique(); request->app.app_name = app->app_name; request->app.app_id = app->app_id; - request->parent_config = parent_config; - request->child_config = child_config; + request->parent_config = parent_pc; + request->child_config = child_pc; SET_IP_AND_HOST_PORT_BY_DNS(*request, primary, NODE); register_child_rpc rpc(std::move(request), RPC_CM_REGISTER_CHILD_REPLICA); @@ -207,17 +207,17 @@ class meta_split_service_test : public meta_test_base void mock_app_partition_split_context() { app->partition_count = NEW_PARTITION_COUNT; - app->partitions.resize(app->partition_count); + app->pcs.resize(app->partition_count); _ss->get_table_metric_entities().resize_partitions(app->app_id, app->partition_count); app->helpers->contexts.resize(app->partition_count); app->helpers->split_states.splitting_count = app->partition_count / 2; for (int i = 0; i < app->partition_count; ++i) { - app->helpers->contexts[i].config_owner = &app->partitions[i]; - app->partitions[i].pid = gpid(app->app_id, i); + app->helpers->contexts[i].pc = &app->pcs[i]; + app->pcs[i].pid = gpid(app->app_id, i); if (i >= app->partition_count / 2) { - app->partitions[i].ballot = invalid_ballot; + app->pcs[i].ballot = invalid_ballot; } else { - app->partitions[i].ballot = PARENT_BALLOT; + app->pcs[i].ballot = PARENT_BALLOT; app->helpers->contexts[i].stage = config_status::not_pending; app->helpers->split_states.status[i] = split_status::SPLITTING; } @@ -227,7 +227,7 @@ class meta_split_service_test : public meta_test_base void clear_app_partition_split_context() { app->partition_count = PARTITION_COUNT; - app->partitions.resize(app->partition_count); + app->pcs.resize(app->partition_count); _ss->get_table_metric_entities().resize_partitions(app->app_id, app->partition_count); app->helpers->contexts.resize(app->partition_count); app->helpers->split_states.splitting_count = 0; @@ -237,16 +237,16 @@ class meta_split_service_test : public meta_test_base void mock_only_one_partition_split(split_status::type split_status) { app->partition_count = NEW_PARTITION_COUNT; - app->partitions.resize(app->partition_count); + app->pcs.resize(app->partition_count); _ss->get_table_metric_entities().resize_partitions(app->app_id, app->partition_count); app->helpers->contexts.resize(app->partition_count); for (int i = 0; i < app->partition_count; ++i) { - app->helpers->contexts[i].config_owner = &app->partitions[i]; - app->partitions[i].pid = dsn::gpid(app->app_id, i); + app->helpers->contexts[i].pc = &app->pcs[i]; + app->pcs[i].pid = dsn::gpid(app->app_id, i); if (i >= app->partition_count / 2) { - app->partitions[i].ballot = invalid_ballot; + app->pcs[i].ballot = invalid_ballot; } else { - app->partitions[i].ballot = PARENT_BALLOT; + app->pcs[i].ballot = PARENT_BALLOT; app->helpers->contexts[i].stage = config_status::not_pending; } } @@ -256,7 +256,7 @@ class meta_split_service_test : public meta_test_base void mock_child_registered() { - app->partitions[CHILD_INDEX].ballot = PARENT_BALLOT; + app->pcs[CHILD_INDEX].ballot = PARENT_BALLOT; app->helpers->split_states.splitting_count--; app->helpers->split_states.status.erase(PARENT_INDEX); } @@ -358,11 +358,11 @@ class meta_split_service_test : public meta_test_base const int32_t app_id, const int32_t pidx) { - partition_configuration config; - config.max_replica_count = 3; - config.pid = gpid(app_id, pidx); - config.ballot = PARENT_BALLOT; - blob value = json::json_forwarder::encode(config); + partition_configuration pc; + pc.max_replica_count = 3; + pc.pid = gpid(app_id, pidx); + pc.ballot = PARENT_BALLOT; + blob value = json::json_forwarder::encode(pc); _ms->get_meta_storage()->create_node( app_root + "/" + boost::lexical_cast(app_id) + "/" + boost::lexical_cast(pidx), @@ -846,7 +846,7 @@ TEST_F(meta_split_service_failover_test, half_split_test) ASSERT_EQ(split_states.splitting_count, PARTITION_COUNT - 1); ASSERT_EQ(split_states.status.find(PARENT_INDEX), split_states.status.end()); ASSERT_EQ(app->partition_count, NEW_PARTITION_COUNT); - ASSERT_EQ(app->partitions.size(), NEW_PARTITION_COUNT); + ASSERT_EQ(app->pcs.size(), NEW_PARTITION_COUNT); } } // namespace replication diff --git a/src/meta/test/misc/misc.cpp b/src/meta/test/misc/misc.cpp index ef85d7e998..d4bfa25a0f 100644 --- a/src/meta/test/misc/misc.cpp +++ b/src/meta/test/misc/misc.cpp @@ -78,11 +78,11 @@ void verbose_apps(const app_mapper &input_apps) for (const auto &apps : input_apps) { const std::shared_ptr &app = apps.second; std::cout << apps.first << " " << app->partition_count << std::endl; - for (int i = 0; i < app->partition_count; ++i) { - std::cout << app->partitions[i].hp_secondaries.size() + 1 << " " - << app->partitions[i].hp_primary; - for (int j = 0; j < app->partitions[i].hp_secondaries.size(); ++j) { - std::cout << " " << app->partitions[i].hp_secondaries[j]; + CHECK_EQ(app->partition_count, app->pcs.size()); + for (const auto &pc : app->pcs) { + std::cout << pc.hp_secondaries.size() + 1 << " " << pc.hp_primary; + for (const auto &secondary : pc.hp_secondaries) { + std::cout << " " << secondary; } std::cout << std::endl; } @@ -101,7 +101,7 @@ void generate_node_mapper( for (auto &kv : input_apps) { const std::shared_ptr &app = kv.second; - for (const dsn::partition_configuration &pc : app->partitions) { + for (const auto &pc : app->pcs) { node_state *ns; if (pc.hp_primary) { ns = get_node_state(output_nodes, pc.hp_primary, true); @@ -119,7 +119,7 @@ void generate_node_mapper( void generate_app(/*out*/ std::shared_ptr &app, const std::vector &node_list) { - for (dsn::partition_configuration &pc : app->partitions) { + for (auto &pc : app->pcs) { pc.ballot = random32(1, 10000); std::vector indices(3, 0); indices[0] = random32(0, node_list.size() - 3); @@ -147,18 +147,18 @@ void generate_app_serving_replica_info(/*out*/ std::shared_ptrpartition_count; ++i) { - config_context &cc = app->helpers->contexts[i]; - dsn::partition_configuration &pc = app->partitions[i]; + auto &cc = app->helpers->contexts[i]; + auto &pc = app->pcs[i]; replica_info ri; snprintf(buffer, 256, "disk%u", dsn::rand::next_u32(1, total_disks)); ri.disk_tag = buffer; cc.collect_serving_replica(pc.hp_primary, ri); - for (const auto &hp : pc.hp_secondaries) { + for (const auto &secondary : pc.hp_secondaries) { snprintf(buffer, 256, "disk%u", dsn::rand::next_u32(1, total_disks)); ri.disk_tag = buffer; - cc.collect_serving_replica(hp, ri); + cc.collect_serving_replica(secondary, ri); } } } @@ -180,14 +180,14 @@ void generate_apps(/*out*/ dsn::replication::app_mapper &mapper, info.app_type = "test"; info.max_replica_count = 3; info.partition_count = random32(partitions_range.first, partitions_range.second); - std::shared_ptr the_app = app_state::create(info); - generate_app(the_app, node_list); + std::shared_ptr app = app_state::create(info); + generate_app(app, node_list); if (generate_serving_info) { - generate_app_serving_replica_info(the_app, disks_per_node); + generate_app_serving_replica_info(app, disks_per_node); } LOG_DEBUG("generated app, partitions({})", info.partition_count); - mapper.emplace(the_app->app_id, the_app); + mapper.emplace(app->app_id, app); } } @@ -408,19 +408,17 @@ void proposal_action_check_and_apply(const configuration_proposal_action &act, void migration_check_and_apply(app_mapper &apps, node_mapper &nodes, - migration_list &ml, + const migration_list &ml, nodes_fs_manager *manager) { int i = 0; - for (auto kv = ml.begin(); kv != ml.end(); ++kv) { - std::shared_ptr &proposal = kv->second; + for (const auto &[_, proposal] : ml) { LOG_DEBUG("the {}th round of proposal, gpid({})", i++, proposal->gpid); - std::shared_ptr &the_app = apps.find(proposal->gpid.get_app_id())->second; + const auto &app = apps.find(proposal->gpid.get_app_id())->second; - CHECK_EQ(proposal->gpid.get_app_id(), the_app->app_id); - CHECK_LT(proposal->gpid.get_partition_index(), the_app->partition_count); - dsn::partition_configuration &pc = - the_app->partitions[proposal->gpid.get_partition_index()]; + CHECK_EQ(proposal->gpid.get_app_id(), app->app_id); + CHECK_LT(proposal->gpid.get_partition_index(), app->partition_count); + const auto &pc = app->pcs[proposal->gpid.get_partition_index()]; CHECK(pc.hp_primary, ""); CHECK_EQ(pc.hp_secondaries.size(), 2); @@ -460,7 +458,7 @@ void app_mapper_compare(const app_mapper &mapper1, const app_mapper &mapper2) if (app1->status == dsn::app_status::AS_AVAILABLE) { CHECK_EQ(app1->partition_count, app2->partition_count); for (unsigned int i = 0; i < app1->partition_count; ++i) { - CHECK(is_partition_config_equal(app1->partitions[i], app2->partitions[i]), ""); + CHECK(is_partition_config_equal(app1->pcs[i], app2->pcs[i]), ""); } } } diff --git a/src/meta/test/misc/misc.h b/src/meta/test/misc/misc.h index 45ab38ac1f..41a583df43 100644 --- a/src/meta/test/misc/misc.h +++ b/src/meta/test/misc/misc.h @@ -111,7 +111,7 @@ void generate_apps(/*out*/ dsn::replication::app_mapper &apps, void migration_check_and_apply( /*in-out*/ dsn::replication::app_mapper &apps, /*in-out*/ dsn::replication::node_mapper &nodes, - /*in-out*/ dsn::replication::migration_list &ml, + /*in*/ const dsn::replication::migration_list &ml, /*in-out*/ nodes_fs_manager *manager); // when the test need to track the disk info, please input the fs_manager of all disks, diff --git a/src/meta/test/state_sync_test.cpp b/src/meta/test/state_sync_test.cpp index f0429317d9..1a7901560f 100644 --- a/src/meta/test/state_sync_test.cpp +++ b/src/meta/test/state_sync_test.cpp @@ -75,7 +75,7 @@ static void random_assign_partition_config(std::shared_ptr &app, }; int max_servers = (server_list.size() - 1) * 2 - 1; - for (dsn::partition_configuration &pc : app->partitions) { + for (auto &pc : app->pcs) { int start = 0; std::vector indices; for (int i = 0; i < max_replica_count && start <= max_servers; ++i) { @@ -169,7 +169,7 @@ void meta_service_test_app::state_sync_test() random_assign_partition_config(app, server_list, 3); if (app->status == dsn::app_status::AS_DROPPING) { for (int j = 0; j < app->partition_count; ++j) { - app->partitions[j].partition_flags = pc_flags::dropped; + app->pcs[j].partition_flags = pc_flags::dropped; } } } @@ -281,7 +281,7 @@ void meta_service_test_app::state_sync_test() dsn::gpid gpid = {15, 0}; dsn::partition_configuration pc; ASSERT_TRUE(ss2->query_configuration_by_gpid(gpid, pc)); - ASSERT_EQ(ss1->_all_apps[15]->partitions[0], pc); + ASSERT_EQ(ss1->_all_apps[15]->pcs[0], pc); // 1.2 dropped app if (!drop_set.empty()) { gpid.set_app_id(drop_set[0]); @@ -301,7 +301,7 @@ void meta_service_test_app::state_sync_test() ASSERT_EQ(app_created->partition_count, resp.partition_count); ASSERT_EQ(resp.partitions.size(), 3); for (int i = 1; i <= 3; ++i) - ASSERT_EQ(resp.partitions[i - 1], app_created->partitions[i]); + ASSERT_EQ(resp.partitions[i - 1], app_created->pcs[i]); // 2.2 no exist app req.app_name = "make_no_sense"; diff --git a/src/meta/test/update_configuration_test.cpp b/src/meta/test/update_configuration_test.cpp index 8f6223dec2..f4c8e9702e 100644 --- a/src/meta/test/update_configuration_test.cpp +++ b/src/meta/test/update_configuration_test.cpp @@ -161,8 +161,9 @@ class dummy_partition_guardian : public partition_guardian { action.type = config_type::CT_INVALID; const dsn::partition_configuration &pc = *get_config(*view.apps, gpid); - if (pc.hp_primary && pc.hp_secondaries.size() == 2) + if (pc.hp_primary && pc.hp_secondaries.size() == 2) { return pc_status::healthy; + } return pc_status::ill; } }; @@ -248,12 +249,12 @@ void meta_service_test_app::update_configuration_test() std::vector nodes; generate_node_list(nodes, 4, 4); - dsn::partition_configuration &pc0 = app->partitions[0]; + auto &pc0 = app->pcs[0]; SET_IP_AND_HOST_PORT_BY_DNS(pc0, primary, nodes[0]); SET_IPS_AND_HOST_PORTS_BY_DNS(pc0, secondaries, nodes[1], nodes[2]); pc0.ballot = 3; - dsn::partition_configuration &pc1 = app->partitions[1]; + auto &pc1 = app->pcs[1]; SET_IP_AND_HOST_PORT_BY_DNS(pc1, primary, nodes[1]); SET_IPS_AND_HOST_PORTS_BY_DNS(pc1, secondaries, nodes[0], nodes[2]); pc1.ballot = 3; @@ -326,7 +327,7 @@ void meta_service_test_app::adjust_dropped_size() generate_node_list(nodes, 10, 10); // first, the replica is healthy, and there are 2 dropped - dsn::partition_configuration &pc = app->partitions[0]; + auto &pc = app->pcs[0]; SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, nodes[0]); SET_IPS_AND_HOST_PORTS_BY_DNS(pc, secondaries, nodes[1], nodes[2]); pc.ballot = 10; @@ -394,8 +395,8 @@ static void clone_app_mapper(app_mapper &output, const app_mapper &input) const std::shared_ptr &old_app = iter.second; dsn::app_info info = *old_app; std::shared_ptr new_app = app_state::create(info); - for (unsigned int i = 0; i != old_app->partition_count; ++i) - new_app->partitions[i] = old_app->partitions[i]; + CHECK_EQ(old_app->partition_count, old_app->pcs.size()); + new_app->pcs = old_app->pcs; output.emplace(new_app->app_id, new_app); } } @@ -498,12 +499,12 @@ void meta_service_test_app::cannot_run_balancer_test() info.partition_count = 1; info.status = dsn::app_status::AS_AVAILABLE; - std::shared_ptr the_app = app_state::create(info); - svc->_state->_all_apps.emplace(info.app_id, the_app); - svc->_state->_exist_apps.emplace(info.app_name, the_app); + std::shared_ptr app = app_state::create(info); + svc->_state->_all_apps.emplace(info.app_id, app); + svc->_state->_exist_apps.emplace(info.app_name, app); svc->_state->_table_metric_entities.create_entity(info.app_id, info.partition_count); - dsn::partition_configuration &pc = the_app->partitions[0]; + auto &pc = app->pcs[0]; SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, nodes[0]); SET_IPS_AND_HOST_PORTS_BY_DNS(pc, secondaries, nodes[1], nodes[2]); @@ -535,11 +536,11 @@ void meta_service_test_app::cannot_run_balancer_test() // some apps are staging REGENERATE_NODE_MAPPER; - the_app->status = dsn::app_status::AS_DROPPING; + app->status = dsn::app_status::AS_DROPPING; ASSERT_FALSE(svc->_state->check_all_partitions()); // call function can run balancer - the_app->status = dsn::app_status::AS_AVAILABLE; + app->status = dsn::app_status::AS_AVAILABLE; ASSERT_TRUE(svc->_state->can_run_balancer()); // recover original FLAGS_min_live_node_count_for_unfreeze diff --git a/src/replica/backup/replica_backup_manager.cpp b/src/replica/backup/replica_backup_manager.cpp index c9349441ac..42882a2597 100644 --- a/src/replica/backup/replica_backup_manager.cpp +++ b/src/replica/backup/replica_backup_manager.cpp @@ -235,9 +235,9 @@ void replica_backup_manager::send_clear_request_to_secondaries(const gpid &pid, request.__set_pid(pid); request.__set_policy_name(policy_name); - for (const auto &target_address : _replica->_primary_states.membership.secondaries) { + for (const auto &secondary : _replica->_primary_states.pc.secondaries) { rpc::call_one_way_typed( - target_address, RPC_CLEAR_COLD_BACKUP, request, get_gpid().thread_hash()); + secondary, RPC_CLEAR_COLD_BACKUP, request, get_gpid().thread_hash()); } } diff --git a/src/replica/bulk_load/replica_bulk_loader.cpp b/src/replica/bulk_load/replica_bulk_loader.cpp index a3c27e99e4..e8eb03987e 100644 --- a/src/replica/bulk_load/replica_bulk_loader.cpp +++ b/src/replica/bulk_load/replica_bulk_loader.cpp @@ -188,14 +188,15 @@ void replica_bulk_loader::broadcast_group_bulk_load(const bulk_load_request &met LOG_INFO_PREFIX("start to broadcast group bulk load"); - for (const auto &hp : _replica->_primary_states.membership.hp_secondaries) { - if (hp == _stub->primary_host_port()) + for (const auto &secondary : _replica->_primary_states.pc.hp_secondaries) { + if (secondary == _stub->primary_host_port()) { continue; + } auto request = std::make_unique(); request->app_name = _replica->_app_info.app_name; - const auto &addr = dsn::dns_resolver::instance().resolve_address(hp); - SET_IP_AND_HOST_PORT(*request, target, addr, hp); + const auto &addr = dsn::dns_resolver::instance().resolve_address(secondary); + SET_IP_AND_HOST_PORT(*request, target, addr, secondary); _replica->_primary_states.get_replica_config(partition_status::PS_SECONDARY, request->config); request->cluster_name = meta_req.cluster_name; @@ -203,14 +204,14 @@ void replica_bulk_loader::broadcast_group_bulk_load(const bulk_load_request &met request->meta_bulk_load_status = meta_req.meta_bulk_load_status; request->remote_root_path = meta_req.remote_root_path; - LOG_INFO_PREFIX("send group_bulk_load_request to {}({})", hp, addr); + LOG_INFO_PREFIX("send group_bulk_load_request to {}({})", secondary, addr); group_bulk_load_rpc rpc( std::move(request), RPC_GROUP_BULK_LOAD, 0_ms, 0, get_gpid().thread_hash()); auto callback_task = rpc.call(addr, tracker(), [this, rpc](error_code err) mutable { on_group_bulk_load_reply(err, rpc.request(), rpc.response()); }); - _replica->_primary_states.group_bulk_load_pending_replies[hp] = callback_task; + _replica->_primary_states.group_bulk_load_pending_replies[secondary] = callback_task; } } @@ -740,8 +741,8 @@ void replica_bulk_loader::handle_bulk_load_finish(bulk_load_status::type new_sta } if (status() == partition_status::PS_PRIMARY) { - for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { - _replica->_primary_states.reset_node_bulk_load_states(target_hp); + for (const auto &secondary : _replica->_primary_states.pc.hp_secondaries) { + _replica->_primary_states.reset_node_bulk_load_states(secondary); } } @@ -929,29 +930,29 @@ void replica_bulk_loader::report_group_download_progress(/*out*/ bulk_load_respo } SET_VALUE_FROM_IP_AND_HOST_PORT(response, group_bulk_load_state, - _replica->_primary_states.membership.primary, - _replica->_primary_states.membership.hp_primary, + _replica->_primary_states.pc.primary, + _replica->_primary_states.pc.hp_primary, primary_state); LOG_INFO_PREFIX("primary = {}, download progress = {}%, status = {}", - FMT_HOST_PORT_AND_IP(_replica->_primary_states.membership, primary), + FMT_HOST_PORT_AND_IP(_replica->_primary_states.pc, primary), primary_state.download_progress, primary_state.download_status); int32_t total_progress = primary_state.download_progress; - for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { + for (const auto &secondary : _replica->_primary_states.pc.hp_secondaries) { const auto &secondary_state = - _replica->_primary_states.secondary_bulk_load_states[target_hp]; + _replica->_primary_states.secondary_bulk_load_states[secondary]; int32_t s_progress = secondary_state.__isset.download_progress ? secondary_state.download_progress : 0; error_code s_status = secondary_state.__isset.download_status ? secondary_state.download_status : ERR_OK; LOG_INFO_PREFIX( - "secondary = {}, download progress = {}%, status={}", target_hp, s_progress, s_status); - SET_VALUE_FROM_HOST_PORT(response, group_bulk_load_state, target_hp, secondary_state); + "secondary = {}, download progress = {}%, status={}", secondary, s_progress, s_status); + SET_VALUE_FROM_HOST_PORT(response, group_bulk_load_state, secondary, secondary_state); total_progress += s_progress; } - total_progress /= _replica->_primary_states.membership.max_replica_count; + total_progress /= _replica->_primary_states.pc.max_replica_count; LOG_INFO_PREFIX("total download progress = {}%", total_progress); response.__set_total_download_progress(total_progress); } @@ -971,26 +972,26 @@ void replica_bulk_loader::report_group_ingestion_status(/*out*/ bulk_load_respon primary_state.__set_ingest_status(_replica->_app->get_ingestion_status()); SET_VALUE_FROM_IP_AND_HOST_PORT(response, group_bulk_load_state, - _replica->_primary_states.membership.primary, - _replica->_primary_states.membership.hp_primary, + _replica->_primary_states.pc.primary, + _replica->_primary_states.pc.hp_primary, primary_state); LOG_INFO_PREFIX("primary = {}, ingestion status = {}", - FMT_HOST_PORT_AND_IP(_replica->_primary_states.membership, primary), + FMT_HOST_PORT_AND_IP(_replica->_primary_states.pc, primary), enum_to_string(primary_state.ingest_status)); bool is_group_ingestion_finish = (primary_state.ingest_status == ingestion_status::IS_SUCCEED) && - (_replica->_primary_states.membership.hp_secondaries.size() + 1 == - _replica->_primary_states.membership.max_replica_count); - for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { + (_replica->_primary_states.pc.hp_secondaries.size() + 1 == + _replica->_primary_states.pc.max_replica_count); + for (const auto &secondary : _replica->_primary_states.pc.hp_secondaries) { const auto &secondary_state = - _replica->_primary_states.secondary_bulk_load_states[target_hp]; + _replica->_primary_states.secondary_bulk_load_states[secondary]; ingestion_status::type ingest_status = secondary_state.__isset.ingest_status ? secondary_state.ingest_status : ingestion_status::IS_INVALID; LOG_INFO_PREFIX( - "secondary = {}, ingestion status={}", target_hp, enum_to_string(ingest_status)); - SET_VALUE_FROM_HOST_PORT(response, group_bulk_load_state, target_hp, secondary_state); + "secondary = {}, ingestion status={}", secondary, enum_to_string(ingest_status)); + SET_VALUE_FROM_HOST_PORT(response, group_bulk_load_state, secondary, secondary_state); is_group_ingestion_finish &= (ingest_status == ingestion_status::IS_SUCCEED); } response.__set_is_group_ingestion_finished(is_group_ingestion_finish); @@ -1018,24 +1019,24 @@ void replica_bulk_loader::report_group_cleaned_up(bulk_load_response &response) primary_state.__set_is_cleaned_up(is_cleaned_up()); SET_VALUE_FROM_IP_AND_HOST_PORT(response, group_bulk_load_state, - _replica->_primary_states.membership.primary, - _replica->_primary_states.membership.hp_primary, + _replica->_primary_states.pc.primary, + _replica->_primary_states.pc.hp_primary, primary_state); LOG_INFO_PREFIX("primary = {}, bulk load states cleaned_up = {}", - FMT_HOST_PORT_AND_IP(_replica->_primary_states.membership, primary), + FMT_HOST_PORT_AND_IP(_replica->_primary_states.pc, primary), primary_state.is_cleaned_up); - bool group_flag = (primary_state.is_cleaned_up) && - (_replica->_primary_states.membership.hp_secondaries.size() + 1 == - _replica->_primary_states.membership.max_replica_count); - for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { + bool group_flag = + (primary_state.is_cleaned_up) && (_replica->_primary_states.pc.hp_secondaries.size() + 1 == + _replica->_primary_states.pc.max_replica_count); + for (const auto &secondary : _replica->_primary_states.pc.hp_secondaries) { const auto &secondary_state = - _replica->_primary_states.secondary_bulk_load_states[target_hp]; + _replica->_primary_states.secondary_bulk_load_states[secondary]; bool is_cleaned_up = secondary_state.__isset.is_cleaned_up ? secondary_state.is_cleaned_up : false; LOG_INFO_PREFIX( - "secondary = {}, bulk load states cleaned_up = {}", target_hp, is_cleaned_up); - SET_VALUE_FROM_HOST_PORT(response, group_bulk_load_state, target_hp, secondary_state); + "secondary = {}, bulk load states cleaned_up = {}", secondary, is_cleaned_up); + SET_VALUE_FROM_HOST_PORT(response, group_bulk_load_state, secondary, secondary_state); group_flag &= is_cleaned_up; } LOG_INFO_PREFIX("group bulk load states cleaned_up = {}", group_flag); @@ -1057,22 +1058,22 @@ void replica_bulk_loader::report_group_is_paused(bulk_load_response &response) primary_state.__set_is_paused(_status == bulk_load_status::BLS_PAUSED); SET_VALUE_FROM_IP_AND_HOST_PORT(response, group_bulk_load_state, - _replica->_primary_states.membership.primary, - _replica->_primary_states.membership.hp_primary, + _replica->_primary_states.pc.primary, + _replica->_primary_states.pc.hp_primary, primary_state); LOG_INFO_PREFIX("primary = {}, bulk_load is_paused = {}", - FMT_HOST_PORT_AND_IP(_replica->_primary_states.membership, primary), + FMT_HOST_PORT_AND_IP(_replica->_primary_states.pc, primary), primary_state.is_paused); - bool group_is_paused = primary_state.is_paused && - (_replica->_primary_states.membership.hp_secondaries.size() + 1 == - _replica->_primary_states.membership.max_replica_count); - for (const auto &target_hp : _replica->_primary_states.membership.hp_secondaries) { + bool group_is_paused = + primary_state.is_paused && (_replica->_primary_states.pc.hp_secondaries.size() + 1 == + _replica->_primary_states.pc.max_replica_count); + for (const auto &secondary : _replica->_primary_states.pc.hp_secondaries) { partition_bulk_load_state secondary_state = - _replica->_primary_states.secondary_bulk_load_states[target_hp]; + _replica->_primary_states.secondary_bulk_load_states[secondary]; bool is_paused = secondary_state.__isset.is_paused ? secondary_state.is_paused : false; - LOG_INFO_PREFIX("secondary = {}, bulk_load is_paused = {}", target_hp, is_paused); - SET_VALUE_FROM_HOST_PORT(response, group_bulk_load_state, target_hp, secondary_state); + LOG_INFO_PREFIX("secondary = {}, bulk_load is_paused = {}", secondary, is_paused); + SET_VALUE_FROM_HOST_PORT(response, group_bulk_load_state, secondary, secondary_state); group_is_paused &= is_paused; } LOG_INFO_PREFIX("group bulk load is_paused = {}", group_is_paused); diff --git a/src/replica/bulk_load/test/replica_bulk_loader_test.cpp b/src/replica/bulk_load/test/replica_bulk_loader_test.cpp index 6deeda9e4e..9c8bf3046f 100644 --- a/src/replica/bulk_load/test/replica_bulk_loader_test.cpp +++ b/src/replica/bulk_load/test/replica_bulk_loader_test.cpp @@ -236,13 +236,13 @@ class replica_bulk_loader_test : public replica_test_base void mock_primary_states() { mock_replica_config(partition_status::PS_PRIMARY); - partition_configuration config; - config.max_replica_count = 3; - config.pid = PID; - config.ballot = BALLOT; - SET_IP_AND_HOST_PORT_BY_DNS(config, primary, PRIMARY_HP); - SET_IPS_AND_HOST_PORTS_BY_DNS(config, secondaries, SECONDARY_HP, SECONDARY_HP2); - _replica->set_primary_partition_configuration(config); + partition_configuration pc; + pc.max_replica_count = 3; + pc.pid = PID; + pc.ballot = BALLOT; + SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, PRIMARY_HP); + SET_IPS_AND_HOST_PORTS_BY_DNS(pc, secondaries, SECONDARY_HP, SECONDARY_HP2); + _replica->set_primary_partition_configuration(pc); } void create_local_metadata_file() @@ -773,7 +773,7 @@ TEST_P(replica_bulk_loader_test, report_group_ingestion_status_test) // report_group_context_clean_flag unit tests TEST_P(replica_bulk_loader_test, report_group_cleanup_flag_in_unhealthy_state) { - // _primary_states.membership.secondaries is empty + // _primary_states.pc.secondaries is empty mock_replica_config(partition_status::PS_PRIMARY); ASSERT_FALSE(test_report_group_cleaned_up()); } diff --git a/src/replica/duplication/replica_follower.cpp b/src/replica/duplication/replica_follower.cpp index 5d0c29e91f..4918f60e81 100644 --- a/src/replica/duplication/replica_follower.cpp +++ b/src/replica/duplication/replica_follower.cpp @@ -184,12 +184,12 @@ error_code replica_follower::update_master_replica_config(error_code err, query_ } // since the request just specify one partition, the result size is single - _master_replica_config = resp.partitions[0]; + _pc = resp.partitions[0]; LOG_INFO_PREFIX( "query master[{}] config successfully and update local config: remote={}, gpid={}", master_replica_name(), - FMT_HOST_PORT_AND_IP(_master_replica_config, primary), - _master_replica_config.pid); + FMT_HOST_PORT_AND_IP(_pc, primary), + _pc.pid); return ERR_OK; } @@ -199,16 +199,13 @@ void replica_follower::copy_master_replica_checkpoint() LOG_INFO_PREFIX("query master[{}] replica checkpoint info and start use nfs copy the data", master_replica_name()); learn_request request; - request.pid = _master_replica_config.pid; - dsn::message_ex *msg = dsn::message_ex::create_request( - RPC_QUERY_LAST_CHECKPOINT_INFO, 0, _master_replica_config.pid.thread_hash()); + request.pid = _pc.pid; + dsn::message_ex *msg = + dsn::message_ex::create_request(RPC_QUERY_LAST_CHECKPOINT_INFO, 0, _pc.pid.thread_hash()); dsn::marshall(msg, request); - rpc::call(_master_replica_config.primary, - msg, - &_tracker, - [&](error_code err, learn_response &&resp) mutable { - nfs_copy_checkpoint(err, std::move(resp)); - }); + rpc::call(_pc.primary, msg, &_tracker, [&](error_code err, learn_response &&resp) mutable { + nfs_copy_checkpoint(err, std::move(resp)); + }); } // ThreadPool: THREAD_POOL_DEFAULT diff --git a/src/replica/duplication/replica_follower.h b/src/replica/duplication/replica_follower.h index 70c5a7bfcc..acf978c7ad 100644 --- a/src/replica/duplication/replica_follower.h +++ b/src/replica/duplication/replica_follower.h @@ -60,7 +60,7 @@ class replica_follower : replica_base std::string _master_cluster_name; std::string _master_app_name; std::vector _master_meta_list; - partition_configuration _master_replica_config; + partition_configuration _pc; bool need_duplicate{false}; @@ -78,11 +78,8 @@ class replica_follower : replica_base std::string master_replica_name() { std::string app_info = fmt::format("{}.{}", _master_cluster_name, _master_app_name); - if (_master_replica_config.hp_primary) { - return fmt::format("{}({}|{})", - app_info, - FMT_HOST_PORT_AND_IP(_master_replica_config, primary), - _master_replica_config.pid); + if (_pc.hp_primary) { + return fmt::format("{}({}|{})", app_info, FMT_HOST_PORT_AND_IP(_pc, primary), _pc.pid); } return app_info; } diff --git a/src/replica/duplication/test/replica_follower_test.cpp b/src/replica/duplication/test/replica_follower_test.cpp index 15728386ef..effcb7e3c1 100644 --- a/src/replica/duplication/test/replica_follower_test.cpp +++ b/src/replica/duplication/test/replica_follower_test.cpp @@ -99,7 +99,7 @@ class replica_follower_test : public duplication_test_base const partition_configuration &master_replica_config(replica_follower *follower) const { - return follower->_master_replica_config; + return follower->_pc; } error_code nfs_copy_checkpoint(replica_follower *follower, error_code err, learn_response resp) @@ -225,42 +225,42 @@ TEST_P(replica_follower_test, test_update_master_replica_config) ASSERT_FALSE(master_replica_config(follower).hp_primary); resp.partition_count = _app_info.partition_count; - partition_configuration p; - resp.partitions.emplace_back(p); - resp.partitions.emplace_back(p); + partition_configuration pc; + resp.partitions.emplace_back(pc); + resp.partitions.emplace_back(pc); ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INVALID_DATA); ASSERT_FALSE(master_replica_config(follower).primary); ASSERT_FALSE(master_replica_config(follower).hp_primary); resp.partitions.clear(); - p.pid = gpid(2, 100); - resp.partitions.emplace_back(p); + pc.pid = gpid(2, 100); + resp.partitions.emplace_back(pc); ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INCONSISTENT_STATE); ASSERT_FALSE(master_replica_config(follower).primary); ASSERT_FALSE(master_replica_config(follower).hp_primary); resp.partitions.clear(); - RESET_IP_AND_HOST_PORT(p, primary); - p.pid = gpid(2, 1); - resp.partitions.emplace_back(p); + RESET_IP_AND_HOST_PORT(pc, primary); + pc.pid = gpid(2, 1); + resp.partitions.emplace_back(pc); ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INVALID_STATE); ASSERT_FALSE(master_replica_config(follower).primary); ASSERT_FALSE(master_replica_config(follower).hp_primary); resp.partitions.clear(); - p.pid = gpid(2, 1); + pc.pid = gpid(2, 1); const host_port primary("localhost", 34801); const host_port secondary1("localhost", 34802); const host_port secondary2("localhost", 34803); - SET_IP_AND_HOST_PORT_BY_DNS(p, primary, primary); - SET_IPS_AND_HOST_PORTS_BY_DNS(p, secondaries, secondary1, secondary2); - resp.partitions.emplace_back(p); + SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, primary); + SET_IPS_AND_HOST_PORTS_BY_DNS(pc, secondaries, secondary1, secondary2); + resp.partitions.emplace_back(pc); ASSERT_EQ(update_master_replica_config(follower, resp), ERR_OK); - ASSERT_EQ(master_replica_config(follower).primary, p.primary); - ASSERT_EQ(master_replica_config(follower).hp_primary, p.hp_primary); - ASSERT_EQ(master_replica_config(follower).pid, p.pid); + ASSERT_EQ(master_replica_config(follower).primary, pc.primary); + ASSERT_EQ(master_replica_config(follower).hp_primary, pc.hp_primary); + ASSERT_EQ(master_replica_config(follower).pid, pc.pid); } TEST_P(replica_follower_test, test_nfs_copy_checkpoint) diff --git a/src/replica/replica.cpp b/src/replica/replica.cpp index be31df14fd..24d5cf77ef 100644 --- a/src/replica/replica.cpp +++ b/src/replica/replica.cpp @@ -369,7 +369,7 @@ void replica::init_state() _config.pid.set_app_id(0); _config.pid.set_partition_index(0); _config.status = partition_status::PS_INACTIVE; - _primary_states.membership.ballot = 0; + _primary_states.pc.ballot = 0; _create_time_ms = dsn_now_ms(); _last_config_change_time_ms = _create_time_ms; update_last_checkpoint_generate_time(); diff --git a/src/replica/replica.h b/src/replica/replica.h index 802d07ef36..4bb2ca5892 100644 --- a/src/replica/replica.h +++ b/src/replica/replica.h @@ -188,7 +188,7 @@ class replica : public serverlet, public ref_counter, public replica_ba // void on_config_proposal(configuration_update_request &proposal); void on_config_sync(const app_info &info, - const partition_configuration &config, + const partition_configuration &pc, split_status::type meta_split_status); void on_cold_backup(const backup_request &request, /*out*/ backup_response &response); @@ -444,7 +444,7 @@ class replica : public serverlet, public ref_counter, public replica_ba void remove(configuration_update_request &proposal); void update_configuration_on_meta_server(config_type::type type, const host_port &node, - partition_configuration &new_config); + partition_configuration &new_pc); void on_update_configuration_on_meta_server_reply(error_code err, dsn::message_ex *request, @@ -458,7 +458,7 @@ class replica : public serverlet, public ref_counter, public replica_ba void update_app_envs_internal(const std::map &envs); void query_app_envs(/*out*/ std::map &envs); - bool update_configuration(const partition_configuration &config); + bool update_configuration(const partition_configuration &pc); bool update_local_configuration(const replica_configuration &config, bool same_ballot = false); error_code update_init_info_ballot_and_decree(); diff --git a/src/replica/replica_2pc.cpp b/src/replica/replica_2pc.cpp index 5e7943a6b1..8f48438d1a 100644 --- a/src/replica/replica_2pc.cpp +++ b/src/replica/replica_2pc.cpp @@ -200,8 +200,8 @@ void replica::on_client_write(dsn::message_ex *request, bool ignore_throttling) LOG_INFO_PREFIX("receive bulk load ingestion request"); // bulk load ingestion request requires that all secondaries should be alive - if (static_cast(_primary_states.membership.hp_secondaries.size()) + 1 < - _primary_states.membership.max_replica_count) { + if (static_cast(_primary_states.pc.hp_secondaries.size()) + 1 < + _primary_states.pc.max_replica_count) { response_client_write(request, ERR_NOT_ENOUGH_MEMBER); return; } @@ -209,7 +209,7 @@ void replica::on_client_write(dsn::message_ex *request, bool ignore_throttling) _bulk_load_ingestion_start_time_ms = dsn_now_ms(); } - if (static_cast(_primary_states.membership.hp_secondaries.size()) + 1 < + if (static_cast(_primary_states.pc.hp_secondaries.size()) + 1 < _options->app_mutation_2pc_min_replica_count(_app_info.max_replica_count)) { response_client_write(request, ERR_NOT_ENOUGH_MEMBER); return; @@ -269,8 +269,8 @@ void replica::init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_c break; } LOG_INFO_PREFIX("try to prepare bulk load mutation({})", mu->name()); - if (static_cast(_primary_states.membership.hp_secondaries.size()) + 1 < - _primary_states.membership.max_replica_count) { + if (static_cast(_primary_states.pc.hp_secondaries.size()) + 1 < + _primary_states.pc.max_replica_count) { err = ERR_NOT_ENOUGH_MEMBER; break; } @@ -282,7 +282,7 @@ void replica::init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_c // stop prepare if there are too few replicas unless it's a reconciliation // for reconciliation, we should ensure every prepared mutation to be committed // please refer to PacificA paper - if (static_cast(_primary_states.membership.hp_secondaries.size()) + 1 < + if (static_cast(_primary_states.pc.hp_secondaries.size()) + 1 < _options->app_mutation_2pc_min_replica_count(_app_info.max_replica_count) && !reconciliation) { err = ERR_NOT_ENOUGH_MEMBER; @@ -300,8 +300,8 @@ void replica::init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_c // remote prepare mu->set_prepare_ts(); mu->set_left_secondary_ack_count( - (unsigned int)_primary_states.membership.hp_secondaries.size()); - for (const auto &secondary : _primary_states.membership.hp_secondaries) { + static_cast(_primary_states.pc.hp_secondaries.size())); + for (const auto &secondary : _primary_states.pc.hp_secondaries) { send_prepare_message(secondary, partition_status::PS_SECONDARY, mu, diff --git a/src/replica/replica_backup.cpp b/src/replica/replica_backup.cpp index 2afae09a6c..79ec2cd0a0 100644 --- a/src/replica/replica_backup.cpp +++ b/src/replica/replica_backup.cpp @@ -256,10 +256,10 @@ void replica::on_cold_backup(const backup_request &request, /*out*/ backup_respo void replica::send_backup_request_to_secondary(const backup_request &request) { - for (const auto &target_address : _primary_states.membership.secondaries) { + for (const auto &secondary : _primary_states.pc.secondaries) { // primary will send backup_request to secondary periodically // so, we shouldn't handle the response - rpc::call_one_way_typed(target_address, RPC_COLD_BACKUP, request, get_gpid().thread_hash()); + rpc::call_one_way_typed(secondary, RPC_COLD_BACKUP, request, get_gpid().thread_hash()); } } diff --git a/src/replica/replica_config.cpp b/src/replica/replica_config.cpp index ab03c4c4ee..23767986b6 100644 --- a/src/replica/replica_config.cpp +++ b/src/replica/replica_config.cpp @@ -189,9 +189,9 @@ void replica::add_potential_secondary(const configuration_update_request &propos } CHECK_EQ(proposal.config.ballot, get_ballot()); - CHECK_EQ(proposal.config.pid, _primary_states.membership.pid); - CHECK_EQ(proposal.config.hp_primary, _primary_states.membership.hp_primary); - CHECK(proposal.config.hp_secondaries == _primary_states.membership.hp_secondaries, ""); + CHECK_EQ(proposal.config.pid, _primary_states.pc.pid); + CHECK_EQ(proposal.config.hp_primary, _primary_states.pc.hp_primary); + CHECK(proposal.config.hp_secondaries == _primary_states.pc.hp_secondaries, ""); host_port node; GET_HOST_PORT(proposal, node, node); @@ -199,8 +199,8 @@ void replica::add_potential_secondary(const configuration_update_request &propos CHECK(!_primary_states.check_exist(node, partition_status::PS_SECONDARY), "node = {}", node); int potential_secondaries_count = - _primary_states.membership.hp_secondaries.size() + _primary_states.learners.size(); - if (potential_secondaries_count >= _primary_states.membership.max_replica_count - 1) { + _primary_states.pc.hp_secondaries.size() + _primary_states.learners.size(); + if (potential_secondaries_count >= _primary_states.pc.max_replica_count - 1) { if (proposal.type == config_type::CT_ADD_SECONDARY) { if (_primary_states.learners.find(node) == _primary_states.learners.end()) { LOG_INFO_PREFIX( @@ -209,7 +209,7 @@ void replica::add_potential_secondary(const configuration_update_request &propos return; } } else if (proposal.type == config_type::CT_ADD_SECONDARY_FOR_LB) { - if (potential_secondaries_count >= _primary_states.membership.max_replica_count) { + if (potential_secondaries_count >= _primary_states.pc.max_replica_count) { LOG_INFO_PREFIX("only allow one extra (potential) secondary, ingnore new potential " "secondary proposal"); return; @@ -255,12 +255,12 @@ void replica::upgrade_to_secondary_on_primary(const ::dsn::host_port &node) { LOG_INFO_PREFIX("upgrade potential secondary {} to secondary", node); - partition_configuration new_config = _primary_states.membership; + partition_configuration new_pc = _primary_states.pc; // add secondary - ADD_IP_AND_HOST_PORT_BY_DNS(new_config, secondaries, node); + ADD_IP_AND_HOST_PORT_BY_DNS(new_pc, secondaries, node); - update_configuration_on_meta_server(config_type::CT_UPGRADE_TO_SECONDARY, node, new_config); + update_configuration_on_meta_server(config_type::CT_UPGRADE_TO_SECONDARY, node, new_pc); } void replica::downgrade_to_secondary_on_primary(configuration_update_request &proposal) @@ -269,9 +269,9 @@ void replica::downgrade_to_secondary_on_primary(configuration_update_request &pr return; } - CHECK_EQ(proposal.config.pid, _primary_states.membership.pid); - CHECK_EQ(proposal.config.hp_primary, _primary_states.membership.hp_primary); - CHECK(proposal.config.hp_secondaries == _primary_states.membership.hp_secondaries, ""); + CHECK_EQ(proposal.config.pid, _primary_states.pc.pid); + CHECK_EQ(proposal.config.hp_primary, _primary_states.pc.hp_primary); + CHECK(proposal.config.hp_secondaries == _primary_states.pc.hp_secondaries, ""); CHECK_EQ(proposal.hp_node, proposal.config.hp_primary); CHECK_EQ(proposal.node, proposal.config.primary); @@ -286,9 +286,9 @@ void replica::downgrade_to_inactive_on_primary(configuration_update_request &pro if (proposal.config.ballot != get_ballot() || status() != partition_status::PS_PRIMARY) return; - CHECK_EQ(proposal.config.pid, _primary_states.membership.pid); - CHECK_EQ(proposal.config.hp_primary, _primary_states.membership.hp_primary); - CHECK(proposal.config.hp_secondaries == _primary_states.membership.hp_secondaries, ""); + CHECK_EQ(proposal.config.pid, _primary_states.pc.pid); + CHECK_EQ(proposal.config.hp_primary, _primary_states.pc.hp_primary); + CHECK(proposal.config.hp_secondaries == _primary_states.pc.hp_secondaries, ""); host_port node; GET_HOST_PORT(proposal, node, node); @@ -314,9 +314,9 @@ void replica::remove(configuration_update_request &proposal) if (proposal.config.ballot != get_ballot() || status() != partition_status::PS_PRIMARY) return; - CHECK_EQ(proposal.config.pid, _primary_states.membership.pid); - CHECK_EQ(proposal.config.hp_primary, _primary_states.membership.hp_primary); - CHECK(proposal.config.hp_secondaries == _primary_states.membership.hp_secondaries, ""); + CHECK_EQ(proposal.config.pid, _primary_states.pc.pid); + CHECK_EQ(proposal.config.hp_primary, _primary_states.pc.hp_primary); + CHECK(proposal.config.hp_secondaries == _primary_states.pc.hp_secondaries, ""); host_port node; GET_HOST_PORT(proposal, node, node); @@ -375,24 +375,24 @@ void replica::on_remove(const replica_configuration &request) void replica::update_configuration_on_meta_server(config_type::type type, const host_port &node, - partition_configuration &new_config) + partition_configuration &new_pc) { // type should never be `CT_REGISTER_CHILD` // if this happens, it means serious mistake happened during partition split // assert here to stop split and avoid splitting wrong CHECK_NE_PREFIX(type, config_type::CT_REGISTER_CHILD); - new_config.last_committed_decree = last_committed_decree(); + new_pc.last_committed_decree = last_committed_decree(); if (type == config_type::CT_PRIMARY_FORCE_UPDATE_BALLOT) { CHECK(status() == partition_status::PS_INACTIVE && _inactive_is_transient && _is_initializing, ""); - CHECK_EQ(new_config.hp_primary, node); + CHECK_EQ(new_pc.hp_primary, node); } else if (type != config_type::CT_ASSIGN_PRIMARY && type != config_type::CT_UPGRADE_TO_PRIMARY) { CHECK_EQ(status(), partition_status::PS_PRIMARY); - CHECK_EQ(new_config.ballot, _primary_states.membership.ballot); + CHECK_EQ(new_pc.ballot, _primary_states.pc.ballot); } // disable 2pc during reconfiguration @@ -406,7 +406,7 @@ void replica::update_configuration_on_meta_server(config_type::type type, std::shared_ptr request(new configuration_update_request); request->info = _app_info; - request->config = new_config; + request->config = new_pc; request->config.ballot++; request->type = type; SET_IP_AND_HOST_PORT_BY_DNS(*request, node, node); @@ -645,23 +645,24 @@ void replica::query_app_envs(/*out*/ std::map &envs) } } -bool replica::update_configuration(const partition_configuration &config) +bool replica::update_configuration(const partition_configuration &pc) { - CHECK_GE(config.ballot, get_ballot()); + CHECK_GE(pc.ballot, get_ballot()); replica_configuration rconfig; - replica_helper::get_replica_config(config, _stub->primary_host_port(), rconfig); + replica_helper::get_replica_config(pc, _stub->primary_host_port(), rconfig); if (rconfig.status == partition_status::PS_PRIMARY && (rconfig.ballot > get_ballot() || status() != partition_status::PS_PRIMARY)) { - _primary_states.reset_membership(config, config.hp_primary != _stub->primary_host_port()); + _primary_states.reset_membership(pc, pc.hp_primary != _stub->primary_host_port()); } - if (config.ballot > get_ballot() || + if (pc.ballot > get_ballot() || is_same_ballot_status_change_allowed(status(), rconfig.status)) { return update_local_configuration(rconfig, true); - } else + } else { return false; + } } bool replica::is_same_ballot_status_change_allowed(partition_status::type olds, @@ -1041,7 +1042,8 @@ bool replica::update_local_configuration(const replica_configuration &config, init_prepare(next, false); } - if (_primary_states.membership.hp_secondaries.size() + 1 < + CHECK(_primary_states.pc.__isset.hp_secondaries, ""); + if (_primary_states.pc.hp_secondaries.size() + 1 < _options->app_mutation_2pc_min_replica_count(_app_info.max_replica_count)) { std::vector queued; _primary_states.write_queue.clear(queued); @@ -1069,12 +1071,12 @@ bool replica::update_local_configuration_with_no_ballot_change(partition_status: // ThreadPool: THREAD_POOL_REPLICATION void replica::on_config_sync(const app_info &info, - const partition_configuration &config, + const partition_configuration &pc, split_status::type meta_split_status) { LOG_DEBUG_PREFIX("configuration sync"); // no outdated update - if (config.ballot < get_ballot()) + if (pc.ballot < get_ballot()) return; update_app_max_replica_count(info.max_replica_count); @@ -1091,25 +1093,25 @@ void replica::on_config_sync(const app_info &info, } else { if (_is_initializing) { // in initializing, when replica still primary, need to inc ballot - if (config.hp_primary == _stub->primary_host_port() && + if (pc.hp_primary == _stub->primary_host_port() && status() == partition_status::PS_INACTIVE && _inactive_is_transient) { update_configuration_on_meta_server(config_type::CT_PRIMARY_FORCE_UPDATE_BALLOT, - config.hp_primary, - const_cast(config)); + pc.hp_primary, + const_cast(pc)); return; } _is_initializing = false; } - update_configuration(config); + update_configuration(pc); if (status() == partition_status::PS_INACTIVE && !_inactive_is_transient) { - if (config.hp_primary == _stub->primary_host_port() // dead primary - || !config.hp_primary // primary is dead (otherwise let primary remove this) + if (pc.hp_primary == _stub->primary_host_port() // dead primary + || !pc.hp_primary // primary is dead (otherwise let primary remove this) ) { LOG_INFO_PREFIX("downgrade myself as inactive is not transient, remote_config({})", - boost::lexical_cast(config)); - _stub->remove_replica_on_meta_server(_app_info, config); + boost::lexical_cast(pc)); + _stub->remove_replica_on_meta_server(_app_info, pc); } else { LOG_INFO_PREFIX("state is non-transient inactive, waiting primary to remove me"); } diff --git a/src/replica/replica_context.cpp b/src/replica/replica_context.cpp index a1fda93147..1d483d7015 100644 --- a/src/replica/replica_context.cpp +++ b/src/replica/replica_context.cpp @@ -67,7 +67,7 @@ void primary_context::cleanup(bool clean_pending_mutations) } group_bulk_load_pending_replies.clear(); - membership.ballot = 0; + pc.ballot = 0; cleanup_bulk_load_states(); @@ -91,25 +91,26 @@ void primary_context::do_cleanup_pending_mutations(bool clean_pending_mutations) } } -void primary_context::reset_membership(const partition_configuration &config, bool clear_learners) +void primary_context::reset_membership(const partition_configuration &new_pc, bool clear_learners) { statuses.clear(); if (clear_learners) { learners.clear(); } - if (config.ballot > membership.ballot) - next_learning_version = (((uint64_t)config.ballot) << 32) + 1; - else + if (new_pc.ballot > pc.ballot) { + next_learning_version = (((uint64_t)new_pc.ballot) << 32) + 1; + } else { ++next_learning_version; + } - membership = config; + pc = new_pc; - if (membership.hp_primary) { - statuses[membership.hp_primary] = partition_status::PS_PRIMARY; + if (pc.hp_primary) { + statuses[pc.hp_primary] = partition_status::PS_PRIMARY; } - for (auto it = config.hp_secondaries.begin(); it != config.hp_secondaries.end(); ++it) { + for (auto it = new_pc.hp_secondaries.begin(); it != new_pc.hp_secondaries.end(); ++it) { statuses[*it] = partition_status::PS_SECONDARY; learners.erase(*it); } @@ -123,9 +124,9 @@ void primary_context::get_replica_config(partition_status::type st, /*out*/ replica_configuration &config, uint64_t learner_signature /*= invalid_signature*/) { - config.pid = membership.pid; - SET_OBJ_IP_AND_HOST_PORT(config, primary, membership, primary); - config.ballot = membership.ballot; + config.pid = pc.pid; + SET_OBJ_IP_AND_HOST_PORT(config, primary, pc, primary); + config.ballot = pc.ballot; config.status = st; config.learner_signature = learner_signature; } @@ -134,9 +135,9 @@ bool primary_context::check_exist(const ::dsn::host_port &node, partition_status { switch (st) { case partition_status::PS_PRIMARY: - return membership.hp_primary == node; + return pc.hp_primary == node; case partition_status::PS_SECONDARY: - return utils::contains(membership.hp_secondaries, node); + return utils::contains(pc.hp_secondaries, node); case partition_status::PS_POTENTIAL_SECONDARY: return learners.find(node) != learners.end(); default: @@ -176,7 +177,7 @@ bool primary_context::secondary_disk_abnormal() const for (const auto &kv : secondary_disk_status) { if (kv.second != disk_status::NORMAL) { LOG_INFO("partition[{}] secondary[{}] disk space is {}", - membership.pid, + pc.pid, kv.first, enum_to_string(kv.second)); return true; diff --git a/src/replica/replica_context.h b/src/replica/replica_context.h index 0a3b499243..8fba146646 100644 --- a/src/replica/replica_context.h +++ b/src/replica/replica_context.h @@ -100,7 +100,7 @@ class primary_context void cleanup(bool clean_pending_mutations = true); bool is_cleaned(); - void reset_membership(const partition_configuration &config, bool clear_learners); + void reset_membership(const partition_configuration &new_pc, bool clear_learners); void get_replica_config(partition_status::type status, /*out*/ replica_configuration &config, uint64_t learner_signature = invalid_signature); @@ -120,7 +120,7 @@ class primary_context public: // membership mgr, including learners - partition_configuration membership; + partition_configuration pc; node_statuses statuses; learner_map learners; uint64_t next_learning_version; diff --git a/src/replica/replica_failover.cpp b/src/replica/replica_failover.cpp index 994b38fb93..e563336f46 100644 --- a/src/replica/replica_failover.cpp +++ b/src/replica/replica_failover.cpp @@ -58,7 +58,7 @@ void replica::handle_local_failure(error_code error) } if (status() == partition_status::PS_PRIMARY) { - _stub->remove_replica_on_meta_server(_app_info, _primary_states.membership); + _stub->remove_replica_on_meta_server(_app_info, _primary_states.pc); } update_local_configuration_with_no_ballot_change(partition_status::PS_ERROR); @@ -88,7 +88,7 @@ void replica::handle_remote_failure(partition_status::type st, configuration_update_request request; SET_IP_AND_HOST_PORT_BY_DNS(request, node, node); request.type = config_type::CT_DOWNGRADE_TO_INACTIVE; - request.config = _primary_states.membership; + request.config = _primary_states.pc; downgrade_to_inactive_on_primary(request); } break; diff --git a/src/replica/replica_stub.cpp b/src/replica/replica_stub.cpp index 2abd52b443..32f5141e17 100644 --- a/src/replica/replica_stub.cpp +++ b/src/replica/replica_stub.cpp @@ -1454,7 +1454,7 @@ void replica_stub::on_node_query_reply_scatter2(replica_stub_ptr this_, gpid id) } void replica_stub::remove_replica_on_meta_server(const app_info &info, - const partition_configuration &config) + const partition_configuration &pc) { if (FLAGS_fd_disabled) { return; @@ -1464,12 +1464,12 @@ void replica_stub::remove_replica_on_meta_server(const app_info &info, std::shared_ptr request(new configuration_update_request); request->info = info; - request->config = config; + request->config = pc; request->config.ballot++; SET_IP_AND_HOST_PORT(*request, node, primary_address(), _primary_host_port); request->type = config_type::CT_DOWNGRADE_TO_INACTIVE; - if (_primary_host_port == config.hp_primary) { + if (_primary_host_port == pc.hp_primary) { RESET_IP_AND_HOST_PORT(request->config, primary); } else if (replica_helper::remove_node(primary_address(), request->config.secondaries) && replica_helper::remove_node(_primary_host_port, request->config.hp_secondaries)) { diff --git a/src/replica/replica_stub.h b/src/replica/replica_stub.h index 516bc2303a..116ab350fc 100644 --- a/src/replica/replica_stub.h +++ b/src/replica/replica_stub.h @@ -341,7 +341,7 @@ class replica_stub : public serverlet, public ref_counter void on_node_query_reply_scatter(replica_stub_ptr this_, const configuration_update_request &config); void on_node_query_reply_scatter2(replica_stub_ptr this_, gpid id); - void remove_replica_on_meta_server(const app_info &info, const partition_configuration &config); + void remove_replica_on_meta_server(const app_info &info, const partition_configuration &pc); task_ptr begin_open_replica(const app_info &app, gpid id, const std::shared_ptr &req, diff --git a/src/replica/split/replica_split_manager.cpp b/src/replica/split/replica_split_manager.cpp index e0b193692f..e59096e4d4 100644 --- a/src/replica/split/replica_split_manager.cpp +++ b/src/replica/split/replica_split_manager.cpp @@ -776,11 +776,11 @@ void replica_split_manager::update_child_group_partition_count( } if (!_replica->_primary_states.learners.empty() || - _replica->_primary_states.membership.hp_secondaries.size() + 1 < - _replica->_primary_states.membership.max_replica_count) { + _replica->_primary_states.pc.hp_secondaries.size() + 1 < + _replica->_primary_states.pc.max_replica_count) { LOG_ERROR_PREFIX("there are {} learners or not have enough secondaries(count is {})", _replica->_primary_states.learners.size(), - _replica->_primary_states.membership.hp_secondaries.size()); + _replica->_primary_states.pc.hp_secondaries.size()); parent_handle_split_error( "update_child_group_partition_count failed, have learner or lack of secondary", true); return; @@ -987,17 +987,17 @@ void replica_split_manager::register_child_on_meta(ballot b) // on primary paren return; } - partition_configuration child_config = _replica->_primary_states.membership; - child_config.ballot++; - child_config.last_committed_decree = 0; - CLEAR_IP_AND_HOST_PORT(child_config, last_drops); - child_config.pid.set_partition_index(_replica->_app_info.partition_count + - get_gpid().get_partition_index()); + auto child_pc = _replica->_primary_states.pc; + child_pc.ballot++; + child_pc.last_committed_decree = 0; + CLEAR_IP_AND_HOST_PORT(child_pc, last_drops); + child_pc.pid.set_partition_index(_replica->_app_info.partition_count + + get_gpid().get_partition_index()); register_child_request request; request.app = _replica->_app_info; - request.child_config = child_config; - request.parent_config = _replica->_primary_states.membership; + request.child_config = child_pc; + request.parent_config = _replica->_primary_states.pc; SET_IP_AND_HOST_PORT(request, primary, _stub->primary_address(), _stub->primary_host_port()); // reject client request @@ -1141,8 +1141,7 @@ void replica_split_manager::on_register_child_on_meta_reply( } // ThreadPool: THREAD_POOL_REPLICATION -void replica_split_manager::child_partition_active( - const partition_configuration &config) // on child +void replica_split_manager::child_partition_active(const partition_configuration &pc) // on child { if (status() != partition_status::PS_PARTITION_SPLIT) { LOG_WARNING_PREFIX("child partition has been active, status={}", enum_to_string(status())); @@ -1151,7 +1150,7 @@ void replica_split_manager::child_partition_active( _replica->_primary_states.last_prepare_decree_on_new_primary = _replica->_prepare_list->max_decree(); - _replica->update_configuration(config); + _replica->update_configuration(pc); METRIC_VAR_INCREMENT(splitting_successful_count); LOG_INFO_PREFIX("child partition is active, status={}", enum_to_string(status())); } @@ -1223,13 +1222,13 @@ void replica_split_manager::trigger_primary_parent_split( _meta_split_status = meta_split_status; if (meta_split_status == split_status::SPLITTING) { if (!_replica->_primary_states.learners.empty() || - _replica->_primary_states.membership.hp_secondaries.size() + 1 < - _replica->_primary_states.membership.max_replica_count) { + _replica->_primary_states.pc.hp_secondaries.size() + 1 < + _replica->_primary_states.pc.max_replica_count) { LOG_WARNING_PREFIX( "there are {} learners or not have enough secondaries(count is {}), wait for " "next round", _replica->_primary_states.learners.size(), - _replica->_primary_states.membership.hp_secondaries.size()); + _replica->_primary_states.pc.hp_secondaries.size()); return; } @@ -1508,7 +1507,7 @@ void replica_split_manager::primary_parent_handle_stop_split( } } // all secondaries have already stop split succeed - if (count == _replica->_primary_states.membership.max_replica_count - 1) { + if (count == _replica->_primary_states.pc.max_replica_count - 1) { _replica->_primary_states.cleanup_split_states(); parent_send_notify_stop_request(req->meta_split_status); } diff --git a/src/replica/split/replica_split_manager.h b/src/replica/split/replica_split_manager.h index 982e1c2cb3..09c04c3a37 100644 --- a/src/replica/split/replica_split_manager.h +++ b/src/replica/split/replica_split_manager.h @@ -150,7 +150,7 @@ class replica_split_manager : replica_base void parent_send_register_request(const register_child_request &request); // child partition has been registered on meta_server, could be active - void child_partition_active(const partition_configuration &config); + void child_partition_active(const partition_configuration &pc); // return true if parent status is valid bool parent_check_states(); diff --git a/src/replica/split/test/replica_split_test.cpp b/src/replica/split/test/replica_split_test.cpp index 61f8010788..d7d7cdec1c 100644 --- a/src/replica/split/test/replica_split_test.cpp +++ b/src/replica/split/test/replica_split_test.cpp @@ -186,16 +186,16 @@ class replica_split_test : public replica_test_base void mock_parent_primary_configuration(bool lack_of_secondary = false) { - partition_configuration config; - config.max_replica_count = 3; - config.pid = PARENT_GPID; - config.ballot = INIT_BALLOT; - SET_IP_AND_HOST_PORT_BY_DNS(config, primary, PRIMARY); - ADD_IP_AND_HOST_PORT_BY_DNS(config, secondaries, SECONDARY); + partition_configuration pc; + pc.max_replica_count = 3; + pc.pid = PARENT_GPID; + pc.ballot = INIT_BALLOT; + SET_IP_AND_HOST_PORT_BY_DNS(pc, primary, PRIMARY); + ADD_IP_AND_HOST_PORT_BY_DNS(pc, secondaries, SECONDARY); if (!lack_of_secondary) { - ADD_IP_AND_HOST_PORT_BY_DNS(config, secondaries, SECONDARY2); + ADD_IP_AND_HOST_PORT_BY_DNS(pc, secondaries, SECONDARY2); } - _parent_replica->set_primary_partition_configuration(config); + _parent_replica->set_primary_partition_configuration(pc); } void mock_update_child_partition_count_request(update_child_group_partition_count_request &req, @@ -453,15 +453,15 @@ class replica_split_test : public replica_test_base req.partition_count = OLD_PARTITION_COUNT; req.pid = PARENT_GPID; - partition_configuration child_config; - child_config.pid = CHILD_GPID; - child_config.ballot = INIT_BALLOT + 1; - child_config.last_committed_decree = 0; + partition_configuration child_pc; + child_pc.pid = CHILD_GPID; + child_pc.ballot = INIT_BALLOT + 1; + child_pc.last_committed_decree = 0; query_child_state_response resp; resp.err = ERR_OK; resp.__set_partition_count(NEW_PARTITION_COUNT); - resp.__set_child_config(child_config); + resp.__set_child_config(child_pc); _parent_split_mgr->on_query_child_state_reply(ERR_OK, req, resp); _parent_split_mgr->tracker()->wait_outstanding_tasks(); diff --git a/src/replica/storage/simple_kv/test/checker.cpp b/src/replica/storage/simple_kv/test/checker.cpp index ae59ec4958..de6e52c552 100644 --- a/src/replica/storage/simple_kv/test/checker.cpp +++ b/src/replica/storage/simple_kv/test/checker.cpp @@ -323,7 +323,7 @@ bool test_checker::get_current_config(parti_config &config) meta_service_app *meta = meta_leader(); if (meta == nullptr) return false; - partition_configuration c; + partition_configuration pc; // we should never try to acquire lock when we are in checker. Because we are the only // thread that is running. @@ -332,11 +332,8 @@ bool test_checker::get_current_config(parti_config &config) // the rDSN's //"enqueue,dequeue and lock..." - // meta->_service->_state->query_configuration_by_gpid(g_default_gpid, c); const meta_view view = meta->_service->_state->get_meta_view(); - const partition_configuration *pc = get_config(*(view.apps), g_default_gpid); - c = *pc; - config.convert_from(c); + config.convert_from(*get_config(*(view.apps), g_default_gpid)); return true; } diff --git a/src/replica/storage/simple_kv/test/common.cpp b/src/replica/storage/simple_kv/test/common.cpp index 6564e7fb72..61dd9a5131 100644 --- a/src/replica/storage/simple_kv/test/common.cpp +++ b/src/replica/storage/simple_kv/test/common.cpp @@ -315,13 +315,13 @@ bool parti_config::from_string(const std::string &str) return true; } -void parti_config::convert_from(const partition_configuration &c) +void parti_config::convert_from(const partition_configuration &pc) { - pid = c.pid; - ballot = c.ballot; - primary = address_to_node(c.hp_primary); - for (auto &s : c.hp_secondaries) { - secondaries.push_back(address_to_node(s)); + pid = pc.pid; + ballot = pc.ballot; + primary = address_to_node(pc.hp_primary); + for (const auto &secondary : pc.hp_secondaries) { + secondaries.push_back(address_to_node(secondary)); } std::sort(secondaries.begin(), secondaries.end()); } diff --git a/src/replica/storage/simple_kv/test/common.h b/src/replica/storage/simple_kv/test/common.h index 2a0acf3b89..633a8b7e33 100644 --- a/src/replica/storage/simple_kv/test/common.h +++ b/src/replica/storage/simple_kv/test/common.h @@ -198,7 +198,7 @@ struct parti_config bool operator<(const parti_config &o) const { return pid == o.pid && ballot < o.ballot; } std::string to_string() const; bool from_string(const std::string &str); - void convert_from(const partition_configuration &c); + void convert_from(const partition_configuration &pc); friend std::ostream &operator<<(std::ostream &os, const parti_config &pc) { diff --git a/src/replica/test/mock_utils.h b/src/replica/test/mock_utils.h index 01c3bc2ae8..e89e6c7f98 100644 --- a/src/replica/test/mock_utils.h +++ b/src/replica/test/mock_utils.h @@ -180,9 +180,9 @@ class mock_replica : public replica void prepare_list_commit_hard(decree d) { _prepare_list->commit(d, COMMIT_TO_DECREE_HARD); } decree get_app_last_committed_decree() { return _app->last_committed_decree(); } void set_app_last_committed_decree(decree d) { _app->_last_committed_decree = d; } - void set_primary_partition_configuration(partition_configuration &pconfig) + void set_primary_partition_configuration(partition_configuration &pc) { - _primary_states.membership = pconfig; + _primary_states.pc = pc; } partition_bulk_load_state get_secondary_bulk_load_state(const host_port &node) { diff --git a/src/replica/test/open_replica_test.cpp b/src/replica/test/open_replica_test.cpp index 4df92dda5a..75094de5e9 100644 --- a/src/replica/test/open_replica_test.cpp +++ b/src/replica/test/open_replica_test.cpp @@ -72,15 +72,15 @@ TEST_P(open_replica_test, open_replica_add_decree_and_ballot_check) _replica->register_service(); - partition_configuration config; - config.pid = pid; - config.ballot = test.b; - config.last_committed_decree = test.last_committed_decree; + partition_configuration pc; + pc.pid = pid; + pc.ballot = test.b; + pc.last_committed_decree = test.last_committed_decree; auto as = app_state::create(ai); auto req = std::make_shared(); req->info = *as; - req->config = config; + req->config = pc; req->type = config_type::CT_ASSIGN_PRIMARY; SET_IP_AND_HOST_PORT_BY_DNS(*req, node, node); if (test.expect_crash) { diff --git a/src/server/available_detector.cpp b/src/server/available_detector.cpp index b61b2f078d..1052b0ff7a 100644 --- a/src/server/available_detector.cpp +++ b/src/server/available_detector.cpp @@ -20,6 +20,7 @@ #include "available_detector.h" #include +#include // IWYU pragma: keep // IWYU pragma: no_include #include #include @@ -31,12 +32,11 @@ #include #include -#include // IWYU pragma: keep - #include "base/pegasus_key_schema.h" #include "client/replication_ddl_client.h" #include "common/common.h" #include "common/replication_other_types.h" +#include "dsn.layer2_types.h" #include "pegasus/client.h" #include "perf_counter/perf_counter.h" #include "result_writer.h" @@ -266,8 +266,8 @@ void available_detector::report_availability_info() bool available_detector::generate_hash_keys() { // get app_id and partition_count. - auto err = - _ddl_client->list_app(FLAGS_available_detect_app, _app_id, _partition_count, partitions); + std::vector<::dsn::partition_configuration> pcs; + auto err = _ddl_client->list_app(FLAGS_available_detect_app, _app_id, _partition_count, pcs); if (err == ::dsn::ERR_OK && _app_id >= 0) { _hash_keys.clear(); for (auto pidx = 0; pidx < _partition_count; pidx++) { diff --git a/src/server/available_detector.h b/src/server/available_detector.h index 075c271dd2..5de60d150e 100644 --- a/src/server/available_detector.h +++ b/src/server/available_detector.h @@ -25,7 +25,6 @@ #include #include -#include "dsn.layer2_types.h" #include "perf_counter/perf_counter_wrapper.h" #include "runtime/rpc/rpc_host_port.h" #include "runtime/task/task.h" @@ -86,7 +85,6 @@ class available_detector std::vector<::dsn::task_ptr> _detect_tasks; int32_t _app_id; int32_t _partition_count; - std::vector<::dsn::partition_configuration> partitions; std::string _send_alert_email_cmd; std::string _send_availability_info_email_cmd; diff --git a/src/server/hotspot_partition_calculator.cpp b/src/server/hotspot_partition_calculator.cpp index 93ab165d22..b2b71e6987 100644 --- a/src/server/hotspot_partition_calculator.cpp +++ b/src/server/hotspot_partition_calculator.cpp @@ -216,24 +216,23 @@ void hotspot_partition_calculator::send_detect_hotkey_request( int app_id = -1; int partition_count = -1; - std::vector partitions; - _shell_context->ddl_client->list_app(app_name, app_id, partition_count, partitions); + std::vector pcs; + _shell_context->ddl_client->list_app(app_name, app_id, partition_count, pcs); dsn::replication::detect_hotkey_response resp; dsn::replication::detect_hotkey_request req; req.type = hotkey_type; req.action = action; req.pid = dsn::gpid(app_id, partition_index); - auto error = _shell_context->ddl_client->detect_hotkey( - partitions[partition_index].hp_primary, req, resp); + auto error = + _shell_context->ddl_client->detect_hotkey(pcs[partition_index].hp_primary, req, resp); LOG_INFO("{} {} hotkey detection in {}.{}, server: {}", (action == dsn::replication::detect_action::STOP) ? "Stop" : "Start", (hotkey_type == dsn::replication::hotkey_type::WRITE) ? "write" : "read", app_name, partition_index, - FMT_HOST_PORT_AND_IP(partitions[partition_index], primary)); - + FMT_HOST_PORT_AND_IP(pcs[partition_index], primary)); if (error != dsn::ERR_OK) { LOG_ERROR("Hotkey detect rpc sending failed, in {}.{}, error_hint:{}", app_name, diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index 0411f5c170..0a5440b400 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -1562,7 +1562,7 @@ inline stat_var_map create_rates(row_data &row) // Given all tables, create all aggregations needed for the table-level stats. All selected // partitions should have their primary replicas on this node. inline std::unique_ptr create_table_aggregate_stats_calcs( - const std::map> &table_partitions, + const std::map> &pcs_by_appid, const dsn::host_port &node, const std::string &entity_type, std::vector &rows) @@ -1584,18 +1584,18 @@ inline std::unique_ptr create_table_aggregate_stats_calcs processor.first->emplace(row.app_id, processor.second(row)); } - const auto &table = table_partitions.find(row.app_id); - CHECK(table != table_partitions.end(), - "table could not be found in table_partitions: table_id={}", + const auto &iter = pcs_by_appid.find(row.app_id); + CHECK(iter != pcs_by_appid.end(), + "table could not be found in pcs_by_appid: table_id={}", row.app_id); - for (const auto &partition : table->second) { - if (partition.hp_primary != node) { + for (const auto &pc : iter->second) { + if (pc.hp_primary != node) { // Ignore once the replica of the metrics is not the primary of the partition. continue; } - partitions.insert(partition.pid); + partitions.insert(pc.pid); } } @@ -1610,18 +1610,18 @@ inline std::unique_ptr create_table_aggregate_stats_calcs // stats. All selected partitions should have their primary replicas on this node. inline std::unique_ptr create_partition_aggregate_stats_calcs(const int32_t table_id, - const std::vector &partitions, + const std::vector &pcs, const dsn::host_port &node, const std::string &entity_type, std::vector &rows) { - CHECK_EQ(rows.size(), partitions.size()); + CHECK_EQ(rows.size(), pcs.size()); partition_stat_map sums; partition_stat_map increases; partition_stat_map rates; for (size_t i = 0; i < rows.size(); ++i) { - if (partitions[i].hp_primary != node) { + if (pcs[i].hp_primary != node) { // Ignore once the replica of the metrics is not the primary of the partition. continue; } @@ -1791,13 +1791,13 @@ inline bool get_apps_and_nodes(shell_context *sc, inline bool get_app_partitions(shell_context *sc, const std::vector<::dsn::app_info> &apps, - std::map> &app_partitions) + std::map> &pcs_by_appid) { for (const ::dsn::app_info &app : apps) { int32_t app_id = 0; int32_t partition_count = 0; dsn::error_code err = sc->ddl_client->list_app( - app.app_name, app_id, partition_count, app_partitions[app.app_id]); + app.app_name, app_id, partition_count, pcs_by_appid[app.app_id]); if (err != ::dsn::ERR_OK) { LOG_ERROR("list app {} failed, error = {}", app.app_name, err); return false; @@ -1850,8 +1850,8 @@ inline bool get_app_partition_stat(shell_context *sc, } // get app_id --> partitions - std::map> app_partitions; - if (!get_app_partitions(sc, apps, app_partitions)) { + std::map> pcs_by_appid; + if (!get_app_partitions(sc, apps, pcs_by_appid)) { return false; } @@ -1875,8 +1875,8 @@ inline bool get_app_partition_stat(shell_context *sc, if (parse_app_pegasus_perf_counter_name( m.name, app_id_x, partition_index_x, counter_name)) { // only primary partition will be counted - auto find = app_partitions.find(app_id_x); - if (find != app_partitions.end() && + const auto find = pcs_by_appid.find(app_id_x); + if (find != pcs_by_appid.end() && find->second[partition_index_x].hp_primary == nodes[i].hp) { row_data &row = rows[app_id_name[app_id_x]][partition_index_x]; row.row_name = std::to_string(partition_index_x); @@ -1914,8 +1914,8 @@ get_table_stats(shell_context *sc, uint32_t sample_interval_ms, std::vector> table_partitions; - if (!get_app_partitions(sc, apps, table_partitions)) { + std::map> pcs_by_appid; + if (!get_app_partitions(sc, apps, pcs_by_appid)) { return false; } @@ -1929,15 +1929,14 @@ get_table_stats(shell_context *sc, uint32_t sample_interval_ms, std::vectoraggregate_metrics(results_start[i].body(), results_end[i].body()), nodes[i], @@ -1961,13 +1960,13 @@ inline bool get_partition_stats(shell_context *sc, int32_t table_id = 0; int32_t partition_count = 0; - std::vector partitions; - const auto &err = sc->ddl_client->list_app(table_name, table_id, partition_count, partitions); + std::vector pcs; + const auto &err = sc->ddl_client->list_app(table_name, table_id, partition_count, pcs); if (err != ::dsn::ERR_OK) { LOG_ERROR("list app {} failed, error = {}", table_name, err); return false; } - CHECK_EQ(partitions.size(), partition_count); + CHECK_EQ(pcs.size(), partition_count); const auto &query_string = row_data_filters(table_id).to_query_string(); const auto &results_start = get_metrics(nodes, query_string); @@ -1986,8 +1985,8 @@ inline bool get_partition_stats(shell_context *sc, RETURN_SHELL_IF_GET_METRICS_FAILED( results_end[i], nodes[i], "ending row data requests for table(id={})", table_id); - auto calcs = create_partition_aggregate_stats_calcs( - table_id, partitions, nodes[i].hp, "replica", rows); + auto calcs = + create_partition_aggregate_stats_calcs(table_id, pcs, nodes[i].hp, "replica", rows); RETURN_SHELL_IF_PARSE_METRICS_FAILED( calcs->aggregate_metrics(results_start[i].body(), results_end[i].body()), nodes[i], @@ -2097,17 +2096,16 @@ inline bool get_storage_size_stat(shell_context *sc, app_storage_size_stat &st_s return false; } - std::map> app_partitions; - if (!get_app_partitions(sc, apps, app_partitions)) { + std::map> pcs_by_appid; + if (!get_app_partitions(sc, apps, pcs_by_appid)) { LOG_ERROR("get app partitions failed"); return false; } - for (auto &kv : app_partitions) { - auto &v = kv.second; - for (auto &c : v) { + for (auto &[_, pcs] : pcs_by_appid) { + for (auto &pc : pcs) { // use partition_flags to record if this partition's storage size is calculated, - // because `app_partitions' is a temporary variable, so we can re-use partition_flags. - c.partition_flags = 0; + // because `pcs_by_appid' is a temporary variable, so we can re-use partition_flags. + pc.partition_flags = 0; } } @@ -2128,10 +2126,10 @@ inline bool get_storage_size_stat(shell_context *sc, app_storage_size_stat &st_s CHECK(parse_ret, "name = {}", m.name); if (counter_name != "disk.storage.sst(MB)") continue; - auto find = app_partitions.find(app_id_x); - if (find == app_partitions.end()) // app id not found + auto find = pcs_by_appid.find(app_id_x); + if (find == pcs_by_appid.end()) // app id not found continue; - dsn::partition_configuration &pc = find->second[partition_index_x]; + auto &pc = find->second[partition_index_x]; if (pc.hp_primary != nodes[i].hp) // not primary replica continue; if (pc.partition_flags != 0) // already calculated diff --git a/src/shell/commands/data_operations.cpp b/src/shell/commands/data_operations.cpp index c8be633a78..150f33bede 100644 --- a/src/shell/commands/data_operations.cpp +++ b/src/shell/commands/data_operations.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -2231,16 +2232,16 @@ inline dsn::metric_filters rdb_estimated_keys_filters(int32_t table_id) // All selected partitions should have their primary replicas on this node. std::unique_ptr create_rdb_estimated_keys_stats_calcs(const int32_t table_id, - const std::vector &partitions, + const std::vector &pcs, const dsn::host_port &node, const std::string &entity_type, std::vector &rows) { - CHECK_EQ(rows.size(), partitions.size()); + CHECK_EQ(rows.size(), pcs.size()); partition_stat_map sums; for (size_t i = 0; i < rows.size(); ++i) { - if (partitions[i].hp_primary != node) { + if (pcs[i].hp_primary != node) { // Ignore once the replica of the metrics is not the primary of the partition. continue; } @@ -2268,13 +2269,13 @@ bool get_rdb_estimated_keys_stats(shell_context *sc, int32_t table_id = 0; int32_t partition_count = 0; - std::vector partitions; - const auto &err = sc->ddl_client->list_app(table_name, table_id, partition_count, partitions); + std::vector pcs; + const auto &err = sc->ddl_client->list_app(table_name, table_id, partition_count, pcs); if (err != ::dsn::ERR_OK) { LOG_ERROR("list app {} failed, error = {}", table_name, err); return false; } - CHECK_EQ(partitions.size(), partition_count); + CHECK_EQ(pcs.size(), partition_count); const auto &results = get_metrics(nodes, rdb_estimated_keys_filters(table_id).to_query_string()); @@ -2289,8 +2290,8 @@ bool get_rdb_estimated_keys_stats(shell_context *sc, RETURN_SHELL_IF_GET_METRICS_FAILED( results[i], nodes[i], "rdb_estimated_keys for table(id={})", table_id); - auto calcs = create_rdb_estimated_keys_stats_calcs( - table_id, partitions, nodes[i].hp, "replica", rows); + auto calcs = + create_rdb_estimated_keys_stats_calcs(table_id, pcs, nodes[i].hp, "replica", rows); RETURN_SHELL_IF_PARSE_METRICS_FAILED(calcs->aggregate_metrics(results[i].body()), nodes[i], "rdb_estimated_keys for table(id={})", @@ -2870,9 +2871,9 @@ bool calculate_hash_value(command_executor *e, shell_context *sc, arguments args if (!sc->current_app_name.empty()) { int32_t app_id; int32_t partition_count; - std::vector<::dsn::partition_configuration> partitions; + std::vector<::dsn::partition_configuration> pcs; ::dsn::error_code err = - sc->ddl_client->list_app(sc->current_app_name, app_id, partition_count, partitions); + sc->ddl_client->list_app(sc->current_app_name, app_id, partition_count, pcs); if (err != ::dsn::ERR_OK) { std::cout << "list app [" << sc->current_app_name << "] failed, error=" << err << std::endl; @@ -2883,17 +2884,11 @@ bool calculate_hash_value(command_executor *e, shell_context *sc, arguments args tp.add_row_name_and_data("app_id", app_id); tp.add_row_name_and_data("partition_count", partition_count); tp.add_row_name_and_data("partition_index", partition_index); - if (partitions.size() > partition_index) { - ::dsn::partition_configuration &pc = partitions[partition_index]; + if (pcs.size() > partition_index) { + const auto &pc = pcs[partition_index]; tp.add_row_name_and_data("primary", pc.hp_primary.to_string()); - - std::ostringstream oss; - for (int i = 0; i < pc.hp_secondaries.size(); ++i) { - if (i != 0) - oss << ","; - oss << pc.hp_secondaries[i]; - } - tp.add_row_name_and_data("secondaries", oss.str()); + tp.add_row_name_and_data("secondaries", + fmt::format("{}", fmt::join(pc.hp_secondaries, ","))); } } tp.output(std::cout); diff --git a/src/shell/commands/node_management.cpp b/src/shell/commands/node_management.cpp index a18b9ef8d6..803d2ee2af 100644 --- a/src/shell/commands/node_management.cpp +++ b/src/shell/commands/node_management.cpp @@ -340,22 +340,22 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) for (auto &app : apps) { int32_t app_id; int32_t partition_count; - std::vector partitions; - r = sc->ddl_client->list_app(app.app_name, app_id, partition_count, partitions); + std::vector pcs; + r = sc->ddl_client->list_app(app.app_name, app_id, partition_count, pcs); if (r != dsn::ERR_OK) { std::cout << "list app " << app.app_name << " failed, error=" << r << std::endl; return true; } - for (const dsn::partition_configuration &p : partitions) { - if (p.hp_primary) { - auto find = tmp_map.find(p.hp_primary); + for (const auto &pc : pcs) { + if (pc.hp_primary) { + auto find = tmp_map.find(pc.hp_primary); if (find != tmp_map.end()) { find->second.primary_count++; } } - for (const auto &hp : p.hp_secondaries) { - auto find = tmp_map.find(hp); + for (const auto &secondary : pc.hp_secondaries) { + auto find = tmp_map.find(secondary); if (find != tmp_map.end()) { find->second.secondary_count++; } diff --git a/src/shell/commands/recovery.cpp b/src/shell/commands/recovery.cpp index dfd82122b9..7793142c68 100644 --- a/src/shell/commands/recovery.cpp +++ b/src/shell/commands/recovery.cpp @@ -165,8 +165,9 @@ bool recover(command_executor *e, shell_context *sc, arguments args) dsn::host_port diagnose_recommend(const ddd_partition_info &pinfo) { - if (pinfo.config.hp_last_drops.size() < 2) + if (pinfo.config.hp_last_drops.size() < 2) { return dsn::host_port(); + } std::vector last_two_nodes(pinfo.config.hp_last_drops.end() - 2, pinfo.config.hp_last_drops.end()); @@ -290,11 +291,13 @@ bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args) << "last_committed(" << pinfo.config.last_committed_decree << ")" << std::endl; out << " ----" << std::endl; dsn::host_port latest_dropped, secondary_latest_dropped; - if (pinfo.config.hp_last_drops.size() > 0) + if (pinfo.config.hp_last_drops.size() > 0) { latest_dropped = pinfo.config.hp_last_drops[pinfo.config.hp_last_drops.size() - 1]; - if (pinfo.config.hp_last_drops.size() > 1) + } + if (pinfo.config.hp_last_drops.size() > 1) { secondary_latest_dropped = pinfo.config.hp_last_drops[pinfo.config.hp_last_drops.size() - 2]; + } int j = 0; for (const ddd_node_info &n : pinfo.dropped) { dsn::host_port hp_node; diff --git a/src/shell/commands/table_management.cpp b/src/shell/commands/table_management.cpp index dd7995480b..b75e27f56a 100644 --- a/src/shell/commands/table_management.cpp +++ b/src/shell/commands/table_management.cpp @@ -285,15 +285,15 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) int32_t app_id = 0; int32_t partition_count = 0; int32_t max_replica_count = 0; - std::vector partitions; + std::vector pcs; - dsn::error_code err = sc->ddl_client->list_app(app_name, app_id, partition_count, partitions); + dsn::error_code err = sc->ddl_client->list_app(app_name, app_id, partition_count, pcs); if (err != ::dsn::ERR_OK) { std::cout << "ERROR: list app " << app_name << " failed, error=" << err << std::endl; return true; } - if (!partitions.empty()) { - max_replica_count = partitions[0].max_replica_count; + if (!pcs.empty()) { + max_replica_count = pcs[0].max_replica_count; } std::vector nodes; @@ -333,27 +333,15 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) int primary_replicas_count = 0; double disk_used_for_all_replicas = 0; int all_replicas_count = 0; - for (int i = 0; i < partitions.size(); i++) { - const dsn::partition_configuration &p = partitions[i]; - int replica_count = 0; - if (p.hp_primary) { - replica_count++; - } - replica_count += p.hp_secondaries.size(); - std::string replica_count_str; - { - std::stringstream oss; - oss << replica_count << "/" << p.max_replica_count; - replica_count_str = oss.str(); - } + for (const auto &pc : pcs) { std::string primary_str("-"); - if (p.hp_primary) { + if (pc.hp_primary) { bool disk_found = false; double disk_value = 0; - auto f1 = disk_map.find(p.hp_primary); + auto f1 = disk_map.find(pc.hp_primary); if (f1 != disk_map.end()) { auto &sub_map = f1->second; - auto f2 = sub_map.find(p.pid.get_partition_index()); + auto f2 = sub_map.find(pc.pid.get_partition_index()); if (f2 != sub_map.end()) { disk_found = true; disk_value = f2->second; @@ -365,17 +353,17 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) } bool count_found = false; double count_value = 0; - auto f3 = count_map.find(p.hp_primary); + auto f3 = count_map.find(pc.hp_primary); if (f3 != count_map.end()) { auto &sub_map = f3->second; - auto f4 = sub_map.find(p.pid.get_partition_index()); + auto f4 = sub_map.find(pc.pid.get_partition_index()); if (f4 != sub_map.end()) { count_found = true; count_value = f4->second; } } std::stringstream oss; - oss << replication_ddl_client::node_name(p.hp_primary, resolve_ip) << "("; + oss << replication_ddl_client::node_name(pc.hp_primary, resolve_ip) << "("; if (disk_found) oss << disk_value; else @@ -392,15 +380,15 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) { std::stringstream oss; oss << "["; - for (int j = 0; j < p.hp_secondaries.size(); j++) { + for (int j = 0; j < pc.hp_secondaries.size(); j++) { if (j != 0) oss << ","; bool found = false; double value = 0; - auto f1 = disk_map.find(p.hp_secondaries[j]); + auto f1 = disk_map.find(pc.hp_secondaries[j]); if (f1 != disk_map.end()) { auto &sub_map = f1->second; - auto f2 = sub_map.find(p.pid.get_partition_index()); + auto f2 = sub_map.find(pc.pid.get_partition_index()); if (f2 != sub_map.end()) { found = true; value = f2->second; @@ -410,17 +398,17 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) } bool count_found = false; double count_value = 0; - auto f3 = count_map.find(p.hp_secondaries[j]); + auto f3 = count_map.find(pc.hp_secondaries[j]); if (f3 != count_map.end()) { auto &sub_map = f3->second; - auto f3 = sub_map.find(p.pid.get_partition_index()); + auto f3 = sub_map.find(pc.pid.get_partition_index()); if (f3 != sub_map.end()) { count_found = true; count_value = f3->second; } } - oss << replication_ddl_client::node_name(p.hp_secondaries[j], resolve_ip) << "("; + oss << replication_ddl_client::node_name(pc.hp_secondaries[j], resolve_ip) << "("; if (found) oss << value; else @@ -437,9 +425,10 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) } if (detailed) { - tp_details.add_row(std::to_string(p.pid.get_partition_index())); - tp_details.append_data(p.ballot); - tp_details.append_data(replica_count_str); + tp_details.add_row(std::to_string(pc.pid.get_partition_index())); + tp_details.append_data(pc.ballot); + tp_details.append_data(fmt::format( + "{}/{}", pc.hp_secondaries.size() + (pc.hp_primary ? 1 : 0), pc.max_replica_count)); tp_details.append_data(primary_str); tp_details.append_data(secondary_str); } diff --git a/src/test/function_test/detect_hotspot/test_detect_hotspot.cpp b/src/test/function_test/detect_hotspot/test_detect_hotspot.cpp index 56fbc75391..3c549d4cf6 100644 --- a/src/test/function_test/detect_hotspot/test_detect_hotspot.cpp +++ b/src/test/function_test/detect_hotspot/test_detect_hotspot.cpp @@ -26,6 +26,7 @@ #include "client/replication_ddl_client.h" #include "common/gpid.h" +#include "dsn.layer2_types.h" #include "gtest/gtest.h" #include "include/pegasus/client.h" #include "include/pegasus/error.h" @@ -96,11 +97,9 @@ class detect_hotspot_test : public test_util bool find_hotkey = false; dsn::replication::detect_hotkey_response resp; - for (int partition_index = 0; partition_index < partitions_.size(); partition_index++) { - req.pid = dsn::gpid(table_id_, partition_index); - ASSERT_EQ( - dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[partition_index].hp_primary, req, resp)); + for (const auto &pc : pcs_) { + req.pid = pc.pid; + ASSERT_EQ(dsn::ERR_OK, ddl_client_->detect_hotkey(pc.hp_primary, req, resp)); if (!resp.hotkey_result.empty()) { find_hotkey = true; break; @@ -118,19 +117,15 @@ class detect_hotspot_test : public test_util sleep(15); req.action = dsn::replication::detect_action::STOP; - for (int partition_index = 0; partition_index < partitions_.size(); partition_index++) { - ASSERT_EQ( - dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[partition_index].hp_primary, req, resp)); + for (const auto &pc : pcs_) { + ASSERT_EQ(dsn::ERR_OK, ddl_client_->detect_hotkey(pc.hp_primary, req, resp)); ASSERT_EQ(dsn::ERR_OK, resp.err); } req.action = dsn::replication::detect_action::QUERY; - for (int partition_index = 0; partition_index < partitions_.size(); partition_index++) { - req.pid = dsn::gpid(table_id_, partition_index); - ASSERT_EQ( - dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[partition_index].hp_primary, req, resp)); + for (const auto &pc : pcs_) { + req.pid = pc.pid; + ASSERT_EQ(dsn::ERR_OK, ddl_client_->detect_hotkey(pc.hp_primary, req, resp)); ASSERT_EQ("Can't get hotkey now, now state: hotkey_collector_state::STOPPED", resp.err_hint); } @@ -162,12 +157,12 @@ class detect_hotspot_test : public test_util dsn::replication::detect_hotkey_response resp; ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[target_partition].hp_primary, req, resp)); + ddl_client_->detect_hotkey(pcs_[target_partition].hp_primary, req, resp)); ASSERT_EQ(dsn::ERR_OK, resp.err); req.action = dsn::replication::detect_action::QUERY; ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[target_partition].hp_primary, req, resp)); + ddl_client_->detect_hotkey(pcs_[target_partition].hp_primary, req, resp)); ASSERT_EQ("Can't get hotkey now, now state: hotkey_collector_state::COARSE_DETECTING", resp.err_hint); @@ -178,7 +173,7 @@ class detect_hotspot_test : public test_util req.action = dsn::replication::detect_action::QUERY; ASSERT_EQ(dsn::ERR_OK, - ddl_client_->detect_hotkey(partitions_[target_partition].hp_primary, req, resp)); + ddl_client_->detect_hotkey(pcs_[target_partition].hp_primary, req, resp)); ASSERT_EQ("Can't get hotkey now, now state: hotkey_collector_state::STOPPED", resp.err_hint); } diff --git a/src/test/function_test/utils/test_util.cpp b/src/test/function_test/utils/test_util.cpp index e574eb7046..7a41f18822 100644 --- a/src/test/function_test/utils/test_util.cpp +++ b/src/test/function_test/utils/test_util.cpp @@ -108,11 +108,10 @@ void test_util::SetUp() ASSERT_TRUE(client_ != nullptr); int32_t partition_count; - ASSERT_EQ(dsn::ERR_OK, - ddl_client_->list_app(table_name_, table_id_, partition_count, partitions_)); + ASSERT_EQ(dsn::ERR_OK, ddl_client_->list_app(table_name_, table_id_, partition_count, pcs_)); ASSERT_NE(0, table_id_); ASSERT_EQ(partition_count_, partition_count); - ASSERT_EQ(partition_count_, partitions_.size()); + ASSERT_EQ(partition_count_, pcs_.size()); } void test_util::run_cmd_from_project_root(const string &cmd) diff --git a/src/test/function_test/utils/test_util.h b/src/test/function_test/utils/test_util.h index e00397a82e..519da64718 100644 --- a/src/test/function_test/utils/test_util.h +++ b/src/test/function_test/utils/test_util.h @@ -115,7 +115,7 @@ class test_util : public ::testing::Test std::string table_name_; int32_t table_id_; int32_t partition_count_ = 8; - std::vector partitions_; + std::vector pcs_; pegasus_client *client_ = nullptr; std::vector meta_list_; std::shared_ptr ddl_client_; diff --git a/src/test/kill_test/kill_testor.cpp b/src/test/kill_test/kill_testor.cpp index fbafa349b2..50570bf1b4 100644 --- a/src/test/kill_test/kill_testor.cpp +++ b/src/test/kill_test/kill_testor.cpp @@ -17,6 +17,8 @@ * under the License. */ +#include +#include #include #include #include @@ -102,38 +104,30 @@ dsn::error_code kill_testor::get_partition_info(bool debug_unhealthy, healthy_partition_cnt = 0, unhealthy_partition_cnt = 0; int32_t app_id; int32_t partition_count; - partitions.clear(); - dsn::error_code err = - ddl_client->list_app(FLAGS_verify_app_name, app_id, partition_count, partitions); + pcs.clear(); + dsn::error_code err = ddl_client->list_app(FLAGS_verify_app_name, app_id, partition_count, pcs); if (err == ::dsn::ERR_OK) { LOG_DEBUG("access meta and query partition status success"); - for (int i = 0; i < partitions.size(); i++) { - const dsn::partition_configuration &p = partitions[i]; + for (const auto &pc : pcs) { int replica_count = 0; - if (p.hp_primary) { + if (pc.hp_primary) { replica_count++; } - replica_count += p.hp_secondaries.size(); - if (replica_count == p.max_replica_count) { + replica_count += pc.hp_secondaries.size(); + if (replica_count == pc.max_replica_count) { healthy_partition_cnt++; } else { - std::stringstream info; - info << "gpid=" << p.pid.get_app_id() << "." << p.pid.get_partition_index() << ", "; - info << "primay=" << p.hp_primary << ", "; - info << "secondaries=["; - for (int idx = 0; idx < p.hp_secondaries.size(); idx++) { - if (idx != 0) - info << "," << p.hp_secondaries[idx]; - else - info << p.hp_secondaries[idx]; - } - info << "], "; - info << "last_committed_decree=" << p.last_committed_decree; + const auto &info = + fmt::format("gpid={}, primary={}, secondaries=[{}], last_committed_decree={}", + pc.pid, + pc.hp_primary, + fmt::join(pc.hp_secondaries, ", "), + pc.last_committed_decree); if (debug_unhealthy) { - LOG_INFO("found unhealthy partition, {}", info.str()); + LOG_INFO("found unhealthy partition, {}", info); } else { - LOG_DEBUG("found unhealthy partition, {}", info.str()); + LOG_DEBUG("found unhealthy partition, {}", info); } } } diff --git a/src/test/kill_test/kill_testor.h b/src/test/kill_test/kill_testor.h index f66f31540f..7c3f105719 100644 --- a/src/test/kill_test/kill_testor.h +++ b/src/test/kill_test/kill_testor.h @@ -66,7 +66,7 @@ class kill_testor shared_ptr ddl_client; vector meta_list; - std::vector partitions; + std::vector pcs; }; } // namespace test } // namespace pegasus diff --git a/src/test/kill_test/partition_kill_testor.cpp b/src/test/kill_test/partition_kill_testor.cpp index d6f0054755..d1dad5d3e2 100644 --- a/src/test/kill_test/partition_kill_testor.cpp +++ b/src/test/kill_test/partition_kill_testor.cpp @@ -59,14 +59,14 @@ void partition_kill_testor::Run() void partition_kill_testor::run() { - if (partitions.size() == 0) { + if (pcs.empty()) { LOG_INFO("partitions empty"); return; } - int random_num = generate_one_number(0, partitions.size() - 1); + int random_num = generate_one_number(0, pcs.size() - 1); std::vector random_indexs; - generate_random(random_indexs, random_num, 0, partitions.size() - 1); + generate_random(random_indexs, random_num, 0, pcs.size() - 1); std::vector tasks(random_num); std::vector> results(random_num); @@ -74,10 +74,10 @@ void partition_kill_testor::run() std::vector arguments(2); for (int i = 0; i < random_indexs.size(); ++i) { int index = random_indexs[i]; - const auto &p = partitions[index]; + const auto &pc = pcs[index]; - arguments[0] = to_string(p.pid.get_app_id()); - arguments[1] = to_string(p.pid.get_partition_index()); + arguments[0] = to_string(pc.pid.get_app_id()); + arguments[1] = to_string(pc.pid.get_partition_index()); auto callback = [&results, i](::dsn::error_code err, const std::string &resp) { if (err == ::dsn::ERR_OK) { @@ -88,7 +88,7 @@ void partition_kill_testor::run() results[i].second = err.to_string(); } }; - tasks[i] = dsn::dist::cmd::async_call_remote(p.primary, + tasks[i] = dsn::dist::cmd::async_call_remote(pc.primary, "replica.kill_partition", arguments, callback,