Skip to content

Commit

Permalink
Merge pull request redpanda-data#23915 from IoannisRP/ik-sanction-par…
Browse files Browse the repository at this point in the history
…tition-autobalancer

cluster: Implement sanctions on autobalancer features on invalid license
  • Loading branch information
IoannisRP authored Nov 8, 2024
2 parents 6a181f7 + 7d3588f commit 53e70a8
Show file tree
Hide file tree
Showing 14 changed files with 419 additions and 35 deletions.
3 changes: 1 addition & 2 deletions src/v/cluster/controller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,6 @@ ss::future<> controller::start(
std::ref(_tp_state),
std::ref(_backend),
config::shard_local_cfg().core_balancing_on_core_count_change.bind(),
config::shard_local_cfg().core_balancing_continuous.bind(),
config::shard_local_cfg().core_balancing_debounce_timeout.bind(),
config::shard_local_cfg().topic_partitions_per_shard.bind(),
config::shard_local_cfg().topic_partitions_reserve_shard0.bind());
Expand Down Expand Up @@ -724,12 +723,12 @@ ss::future<> controller::start(
co_await _partition_balancer.start_single(
_raft0,
std::ref(_stm),
std::ref(_feature_table),
std::ref(_partition_balancer_state),
std::ref(_hm_backend),
std::ref(_partition_allocator),
std::ref(_tp_frontend),
std::ref(_members_frontend),
config::shard_local_cfg().partition_autobalancing_mode.bind(),
config::shard_local_cfg()
.partition_autobalancing_node_availability_timeout_sec.bind(),
config::shard_local_cfg()
Expand Down
25 changes: 22 additions & 3 deletions src/v/cluster/partition_balancer_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
#include "cluster/types.h"
#include "config/configuration.h"
#include "config/property.h"
#include "features/enterprise_features.h"
#include "features/feature_table.h"
#include "model/metadata.h"
#include "random/generators.h"
#include "utils/stable_iterator_adaptor.h"

Expand All @@ -43,12 +46,12 @@ static constexpr std::chrono::seconds add_move_cmd_timeout = 10s;
partition_balancer_backend::partition_balancer_backend(
consensus_ptr raft0,
ss::sharded<controller_stm>& controller_stm,
ss::sharded<features::feature_table>& feature_table,
ss::sharded<partition_balancer_state>& state,
ss::sharded<health_monitor_backend>& health_monitor,
ss::sharded<partition_allocator>& partition_allocator,
ss::sharded<topics_frontend>& topics_frontend,
ss::sharded<members_frontend>& members_frontend,
config::binding<model::partition_autobalancing_mode>&& mode,
config::binding<std::chrono::seconds>&& availability_timeout,
config::binding<unsigned>&& max_disk_usage_percent,
config::binding<unsigned>&& storage_space_alert_free_threshold_percent,
Expand All @@ -62,12 +65,15 @@ partition_balancer_backend::partition_balancer_backend(
config::binding<bool> topic_aware)
: _raft0(std::move(raft0))
, _controller_stm(controller_stm.local())
, _feature_table(feature_table.local())
, _state(state.local())
, _health_monitor(health_monitor.local())
, _partition_allocator(partition_allocator.local())
, _topics_frontend(topics_frontend.local())
, _members_frontend(members_frontend.local())
, _mode(std::move(mode))
, _mode(features::make_sanctioning_binding<
features::license_required_feature::
partition_auto_balancing_continuous>())
, _availability_timeout(std::move(availability_timeout))
, _max_disk_usage_percent(std::move(max_disk_usage_percent))
, _storage_space_alert_free_threshold_percent(
Expand Down Expand Up @@ -358,9 +364,22 @@ ss::future<> partition_balancer_backend::do_tick() {
// status requests by default 700ms
const auto node_responsiveness_timeout = _node_status_interval() * 7;

const bool should_sanction = _feature_table.should_sanction();

const auto [mode, is_sanctioned] = _mode(should_sanction);
if (is_sanctioned) {
vlog(
clusterlog.warn,
"A Redpanda Enterprise Edition license is required to use enterprise "
"feature \"partition_autobalancing_mode\" with value \"{}\". "
"Behavior is being restricted to \"{}\".",
_mode(false).first,
mode);
}

partition_balancer_planner planner(
planner_config{
.mode = _mode(),
.mode = mode,
.soft_max_disk_usage_ratio = soft_max_disk_usage_ratio,
.hard_max_disk_usage_ratio = hard_max_disk_usage_ratio,
.max_concurrent_actions = _max_concurrent_actions(),
Expand Down
8 changes: 6 additions & 2 deletions src/v/cluster/partition_balancer_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "cluster/partition_balancer_types.h"
#include "cluster/types.h"
#include "config/property.h"
#include "features/enterprise_features.h"
#include "model/fundamental.h"
#include "raft/consensus.h"
#include "utils/mutex.h"
Expand All @@ -35,12 +36,12 @@ class partition_balancer_backend {
partition_balancer_backend(
consensus_ptr raft0,
ss::sharded<controller_stm>&,
ss::sharded<features::feature_table>&,
ss::sharded<partition_balancer_state>&,
ss::sharded<health_monitor_backend>&,
ss::sharded<partition_allocator>&,
ss::sharded<topics_frontend>&,
ss::sharded<members_frontend>&,
config::binding<model::partition_autobalancing_mode>&& mode,
config::binding<std::chrono::seconds>&& availability_timeout,
config::binding<unsigned>&& max_disk_usage_percent,
config::binding<unsigned>&& storage_space_alert_free_threshold_percent,
Expand Down Expand Up @@ -91,13 +92,16 @@ class partition_balancer_backend {
consensus_ptr _raft0;

controller_stm& _controller_stm;
features::feature_table& _feature_table;
partition_balancer_state& _state;
health_monitor_backend& _health_monitor;
partition_allocator& _partition_allocator;
topics_frontend& _topics_frontend;
members_frontend& _members_frontend;

config::binding<model::partition_autobalancing_mode> _mode;
features::sanctioning_binding<
config::enum_property<model::partition_autobalancing_mode>>
_mode;
config::binding<std::chrono::seconds> _availability_timeout;
config::binding<unsigned> _max_disk_usage_percent;
config::binding<unsigned> _storage_space_alert_free_threshold_percent;
Expand Down
19 changes: 16 additions & 3 deletions src/v/cluster/shard_balancer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "cluster/cluster_utils.h"
#include "cluster/logger.h"
#include "config/node_config.h"
#include "features/enterprise_features.h"
#include "random/generators.h"
#include "ssx/async_algorithm.h"
#include "types.h"
Expand Down Expand Up @@ -49,7 +50,6 @@ shard_balancer::shard_balancer(
ss::sharded<topic_table>& topics,
ss::sharded<controller_backend>& cb,
config::binding<bool> balancing_on_core_count_change,
config::binding<bool> balancing_continuous,
config::binding<std::chrono::milliseconds> debounce_timeout,
config::binding<uint32_t> partitions_per_shard,
config::binding<uint32_t> partitions_reserve_shard0)
Expand All @@ -60,7 +60,9 @@ shard_balancer::shard_balancer(
, _controller_backend(cb)
, _self(*config::node().node_id())
, _balancing_on_core_count_change(std::move(balancing_on_core_count_change))
, _balancing_continuous(std::move(balancing_continuous))
, _balancing_continuous(
features::make_sanctioning_binding<
features::license_required_feature::core_balancing_continuous>())
, _debounce_timeout(std::move(debounce_timeout))
, _debounce_jitter(_debounce_timeout())
, _partitions_per_shard(std::move(partitions_per_shard))
Expand Down Expand Up @@ -461,9 +463,20 @@ void shard_balancer::maybe_assign(
// partition is removed from this node, this will likely disrupt the
// counts balance, so we set up the balancing timer.

const bool should_sanction = _features.should_sanction();
const auto [balancing_continuous, is_sanctioned]
= _balancing_continuous(should_sanction);
if (is_sanctioned) {
vlog(
clusterlog.warn,
"A Redpanda Enterprise Edition license is required to use "
"enterprise feature \"core_balancing_continuous\". "
"This property is being ignored.");
}

if (
_features.is_active(features::feature::node_local_core_assignment)
&& _balancing_continuous() && !_balance_timer.armed()) {
&& balancing_continuous && !_balance_timer.armed()) {
// Add jitter so that different nodes don't move replicas of the
// same partition in unison.
auto debounce_interval = _debounce_jitter.next_duration();
Expand Down
5 changes: 3 additions & 2 deletions src/v/cluster/shard_balancer.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@

#include "cluster/controller_backend.h"
#include "cluster/shard_placement_table.h"
#include "config/property.h"
#include "container/chunked_hash_map.h"
#include "features/enterprise_features.h"
#include "random/simple_time_jitter.h"
#include "ssx/event.h"
#include "utils/mutex.h"
Expand Down Expand Up @@ -41,7 +43,6 @@ class shard_balancer {
ss::sharded<topic_table>&,
ss::sharded<controller_backend>&,
config::binding<bool> balancing_on_core_count_change,
config::binding<bool> balancing_continuous,
config::binding<std::chrono::milliseconds> debounce_timeout,
config::binding<uint32_t> partitions_per_shard,
config::binding<uint32_t> partitions_reserve_shard0);
Expand Down Expand Up @@ -116,7 +117,7 @@ class shard_balancer {
model::node_id _self;

config::binding<bool> _balancing_on_core_count_change;
config::binding<bool> _balancing_continuous;
features::sanctioning_binding<config::property<bool>> _balancing_continuous;
config::binding<std::chrono::milliseconds> _debounce_timeout;
simple_time_jitter<ss::lowres_clock> _debounce_jitter;
config::binding<uint32_t> _partitions_per_shard;
Expand Down
30 changes: 21 additions & 9 deletions src/v/config/property.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,10 @@ class property : public base_property {
return std::nullopt;
}

bool check_restricted(const T& value) const {
return do_check_restricted(value);
}

void reset() override {
auto v = default_value();
update_value(std::move(v));
Expand Down Expand Up @@ -301,6 +305,11 @@ class property : public base_property {
const std::optional<legacy_default<value_type>> _legacy_default;

private:
virtual bool do_check_restricted(const T&) const {
// Config properties are unrestricted by default
return false;
}

validator _validator;

friend class binding_base<value_type>;
Expand Down Expand Up @@ -1062,14 +1071,17 @@ class enterprise : public P {
assert_no_default_conflict();
}

// Needed because the following override shadows the rest of the overloads
using P::check_restricted;

/**
* Decodes the given YAML node into the underlying property's value_type and
* checks whether that value should be restricted to enterprise clusters
* based on the unwrapped value restrictions described at construction.
*/
std::optional<validation_error> check_restricted(YAML::Node n) const final {
auto v = std::move(n.as<T>());
if (check_restricted(v)) {
if (do_check_restricted(v)) {
return std::make_optional<validation_error>(
P::name().data(),
ssx::sformat(
Expand All @@ -1079,14 +1091,7 @@ class enterprise : public P {
}

private:
void assert_no_default_conflict() const {
vassert(
!check_restricted(this->default_value()),
"Enterprise properties must not restrict the default value of the "
"underlying property!");
}

bool check_restricted(const T& setting) const {
bool do_check_restricted(const T& setting) const final {
// depending on how the restriction was defined, construct an applicable
// check function for bare instances of the underlying value type
auto restriction_check = [this](const val_t& v) -> bool {
Expand All @@ -1112,6 +1117,13 @@ class enterprise : public P {
}
}

void assert_no_default_conflict() const {
vassert(
!do_check_restricted(this->default_value()),
"Enterprise properties must not restrict the default value of the "
"underlying property!");
}

restrict_variant_t _restriction;
};

Expand Down
14 changes: 9 additions & 5 deletions src/v/config/tests/enterprise_property_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0

#include "config/base_property.h"
#include "config/config_store.h"
#include "config/property.h"
#include "config/types.h"

#include <gtest/gtest-typed-test.h>
#include <gtest/gtest.h>
#include <yaml-cpp/yaml.h>

#include <concepts>
#include <iostream>

namespace config {
Expand All @@ -34,7 +37,7 @@ struct test_config : public config_store {
true,
"enterprise_bool",
"An enterprise-only bool config",
meta{},
meta{.needs_restart = needs_restart::no},
false,
property<bool>::noop_validator,
std::nullopt)
Expand All @@ -43,27 +46,28 @@ struct test_config : public config_store {
std::vector<ss::sstring>{"bar"},
"enterprise_str_enum",
"An enterprise-only enum property",
meta{},
meta{.needs_restart = needs_restart::no},
"foo",
std::vector<ss::sstring>{"foo", "bar", "baz"})
, enterprise_str_vec(
*this,
std::vector<ss::sstring>{"GSSAPI"},
"enterprise_str_vec",
"An enterprise-only vector of strings")
"An enterprise-only vector of strings",
meta{.needs_restart = needs_restart::no})
, enterprise_opt_int(
*this,
[](const int& v) -> bool { return v > 1000; },
"enterprise_opt_int",
"An enterprise-only optional int",
meta{},
meta{.needs_restart = needs_restart::no},
0)
, enterprise_enum(
*this,
std::vector<tls_version>{tls_version::v1_3},
"enterprise_str_enum",
"An enterprise-only enum property",
meta{},
meta{.needs_restart = needs_restart::no},
tls_version::v1_1,
std::vector<tls_version>{
tls_version::v1_0,
Expand Down
1 change: 1 addition & 0 deletions src/v/features/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ redpanda_cc_library(
visibility = ["//visibility:public"],
deps = [
"//src/v/base",
"//src/v/config",
"@abseil-cpp//absl/container:flat_hash_set",
"@boost//:range",
],
Expand Down
Loading

0 comments on commit 53e70a8

Please sign in to comment.