Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

config: add cluster configuration validation to redpanda preload process #25123

Open
wants to merge 6 commits into
base: dev
Choose a base branch
from
Open
24 changes: 0 additions & 24 deletions src/v/cloud_storage/cache_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include "cloud_storage/access_time_tracker.h"
#include "cloud_storage/logger.h"
#include "cloud_storage/recursive_directory_walker.h"
#include "config/configuration.h"
#include "seastar/util/file.hh"
#include "ssx/future-util.h"
#include "ssx/sformat.h"
Expand Down Expand Up @@ -2005,27 +2004,4 @@ ss::future<> cache::sync_access_time_tracker(
}
}

std::optional<ss::sstring>
cache::validate_cache_config(const config::configuration& conf) {
const auto& cloud_storage_cache_size = conf.cloud_storage_cache_size;
const auto& cloud_storage_cache_size_pct
= conf.cloud_storage_cache_size_percent;

// If not set, cloud cache uses default value of 0.0
auto cache_size_pct = cloud_storage_cache_size_pct().value_or(0.0);

using cache_size_pct_type = double;
static constexpr auto epsilon
= std::numeric_limits<cache_size_pct_type>::epsilon();

if ((cache_size_pct < epsilon) && (cloud_storage_cache_size() == 0)) {
return ss::format(
"Cannot set both {} and {} to 0.",
cloud_storage_cache_size.name(),
cloud_storage_cache_size_pct.name());
}

return std::nullopt;
}

} // namespace cloud_storage
10 changes: 0 additions & 10 deletions src/v/cloud_storage/cache_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,16 +167,6 @@ class cache
return _cache_dir / key;
}

// Checks if a cluster configuration is valid for the properties
// `cloud_storage_cache_size` and `cloud_storage_cache_size_percent`.
// Two cases are invalid: 1. the case in which both are 0, 2. the case in
// which `cache_size` is 0 while `cache_size_percent` is `std::nullopt`.
//
// Returns `std::nullopt` if the passed configuration is valid, or an
// `ss::sstring` explaining the misconfiguration otherwise.
static std::optional<ss::sstring>
validate_cache_config(const config::configuration& conf);

private:
/// Load access time tracker from file
ss::future<> load_access_time_tracker();
Expand Down
21 changes: 21 additions & 0 deletions src/v/cluster/config_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,27 @@ config_manager::preload(const YAML::Node& legacy_config) {
}
}

// Validate the local config on this shard. We will have to propagate
// changes to all shards in case of errors.
auto cfg_validation_result = config::configuration::validate_config(
config::shard_local_cfg());

for (const auto& [prop_name, error] : cfg_validation_result.errors) {
vlog(
clusterlog.warn,
"Misconfigured preload property {}: {}",
prop_name,
error);
}

for (const auto& property_to_unset :
cfg_validation_result.properties_to_unset) {
co_await ss::smp::invoke_on_all([&property_to_unset]() {
config::shard_local_cfg().get(property_to_unset).reset();
});
result.raw_values.erase(property_to_unset);
}

co_return result;
}

Expand Down
1 change: 1 addition & 0 deletions src/v/cluster/config_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "cluster/commands.h"
#include "cluster/fwd.h"
#include "cluster/notification.h"
#include "config/configuration.h"
#include "features/feature_table.h"
#include "model/metadata.h"
#include "model/record.h"
Expand Down
189 changes: 189 additions & 0 deletions src/v/config/configuration.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,41 @@
#include <cstdint>
#include <optional>

namespace {
ss::sstring
join_properties(const std::vector<std::reference_wrapper<
const config::property<std::optional<ss::sstring>>>>& props) {
return ssx::sformat("{}", fmt::join(props, ", "));
}

// Checks if a cluster configuration is valid for the properties
// `cloud_storage_cache_size` and `cloud_storage_cache_size_percent`.
// Two cases are invalid: 1. the case in which both are 0, 2. the case in
// which `cache_size` is 0 while `cache_size_percent` is `std::nullopt`.
//
// Returns `std::nullopt` if the passed configuration is valid, or an
// `ss::sstring` explaining the misconfiguration otherwise.
std::optional<ss::sstring>
validate_cloud_storage_cache_config(const config::configuration& conf) {
const auto& cloud_storage_cache_size = conf.cloud_storage_cache_size;
const auto& cloud_storage_cache_size_pct
= conf.cloud_storage_cache_size_percent;

// If not set, cloud cache uses default value of 0.0
auto cache_size_pct = cloud_storage_cache_size_pct().value_or(0.0);

if ((cache_size_pct == 0.0) && (cloud_storage_cache_size() == 0)) {
return ss::format(
"Cannot set both {} and {} to 0.",
cloud_storage_cache_size.name(),
cloud_storage_cache_size_pct.name());
}

return std::nullopt;
}

} // namespace

namespace config {
using namespace std::chrono_literals;

Expand Down Expand Up @@ -3919,6 +3954,160 @@ configuration::error_map_t configuration::load(const YAML::Node& root_node) {
return config_store::read_yaml(root_node["redpanda"], std::move(ignore));
}

configuration::config_validation_result
configuration::validate_config(const config::configuration& updated_config) {
config_validation_result result;
if (updated_config.cloud_storage_enabled()) {
bool cloud_storage_enabled_is_valid = true;
// The properties that cloud_storage::configuration requires
// to be set if cloud storage is enabled.
using config_properties_seq = std::vector<std::reference_wrapper<
const config::property<std::optional<ss::sstring>>>>;

switch (updated_config.cloud_storage_credentials_source.value()) {
case model::cloud_credentials_source::config_file: {
config_properties_seq s3_properties = {
std::ref(updated_config.cloud_storage_region),
std::ref(updated_config.cloud_storage_bucket),
std::ref(updated_config.cloud_storage_access_key),
std::ref(updated_config.cloud_storage_secret_key),
};

config_properties_seq abs_properties = {
std::ref(updated_config.cloud_storage_azure_storage_account),
std::ref(updated_config.cloud_storage_azure_container),
std::ref(updated_config.cloud_storage_azure_shared_key),
};

std::array<config_properties_seq, 2> valid_configurations = {
s3_properties, abs_properties};

bool is_valid_configuration = std::any_of(
valid_configurations.begin(),
valid_configurations.end(),
[](const auto& config) {
return std::none_of(
config.begin(), config.end(), [](const auto& prop) {
return prop() == std::nullopt;
});
});

if (!is_valid_configuration) {
result.errors[ss::sstring(
updated_config.cloud_storage_enabled.name())]
= ssx::sformat(
"To enable cloud storage you need to configure S3 or Azure "
"Blob Storage access. For S3 {} must be set. For ABS {} "
"must be set",
join_properties(s3_properties),
join_properties(abs_properties));
cloud_storage_enabled_is_valid = false;
}
} break;
case model::cloud_credentials_source::aws_instance_metadata:
case model::cloud_credentials_source::gcp_instance_metadata:
case model::cloud_credentials_source::sts: {
// basic config checks for cloud_storage. for sts it is expected
// to receive part of the configuration via env variables, while
// aws_instance_metadata and gcp_instance_metadata do not
// require extra configuration
config_properties_seq properties = {
std::ref(updated_config.cloud_storage_region),
std::ref(updated_config.cloud_storage_bucket),
};

for (auto& p : properties) {
if (p() == std::nullopt) {
result.errors[ss::sstring(p.get().name())] = ssx::sformat(
"Must be set when cloud storage enabled with "
"cloud_storage_credentials_source = {}",
updated_config.cloud_storage_credentials_source.value());
cloud_storage_enabled_is_valid = false;
}
}
} break;
case model::cloud_credentials_source::azure_aks_oidc_federation: {
// for azure_aks_oidc_federation it is expected to receive part
// of the configuration via env variables. this check is just
// for related cluster properties
config_properties_seq properties = {
std::ref(updated_config.cloud_storage_azure_storage_account),
std::ref(updated_config.cloud_storage_azure_container),
};

for (auto& p : properties) {
if (p() == std::nullopt) {
result.errors[ss::sstring(p.get().name())]
= "Must be set when cloud storage enabled with "
"cloud_storage_credentials_source = "
"azure_aks_oidc_federation";
cloud_storage_enabled_is_valid = false;
}
}
} break;
case model::cloud_credentials_source::azure_vm_instance_metadata: {
// azure_vm_instance_metadata requires an client_id to work
// correctly
config_properties_seq properties = {
std::ref(updated_config.cloud_storage_azure_storage_account),
std::ref(updated_config.cloud_storage_azure_container),
std::ref(updated_config.cloud_storage_azure_managed_identity_id),
};

for (auto& p : properties) {
if (p() == std::nullopt) {
result.errors[ss::sstring(p.get().name())]
= "Must be set when cloud storage enabled with "
"cloud_storage_credentials_source = "
"azure_vm_instance_metadata";
cloud_storage_enabled_is_valid = false;
}
}
} break;
}

if (!cloud_storage_enabled_is_valid) {
auto name = ss::sstring(
updated_config.cloud_storage_enabled.name());
result.properties_to_unset.insert(name);
}
}

// cloud_storage_cache_size/size_percent validation
if (auto invalid_cache = validate_cloud_storage_cache_config(
updated_config);
invalid_cache.has_value()) {
auto cache_size_name = ss::sstring(
updated_config.cloud_storage_cache_size.name());
auto cache_size_pct_name = ss::sstring(
updated_config.cloud_storage_cache_size_percent.name());
result.errors[cache_size_name] = invalid_cache.value();
result.properties_to_unset.insert(cache_size_name);
result.properties_to_unset.insert(cache_size_pct_name);
}

// For simplicity's sake, cloud storage read/write permissions cannot be
// enabled at the same time as tombstone_retention_ms at the cluster
// level, to avoid the case in which topics are created with TS
// read/write permissions and bugs are encountered later with tombstone
// removal.
if (updated_config.tombstone_retention_ms().has_value() &&
(updated_config.cloud_storage_enabled()
|| updated_config.cloud_storage_enable_remote_read()
|| updated_config.cloud_storage_enable_remote_write())) {
auto name = ss::sstring(updated_config.tombstone_retention_ms.name());
result.errors[name] = ssx::sformat(
"cannot set {} if any of ({}, {}, {}) are enabled at the cluster "
"level",
updated_config.tombstone_retention_ms.name(),
updated_config.cloud_storage_enabled.name(),
updated_config.cloud_storage_enable_remote_read.name(),
updated_config.cloud_storage_enable_remote_write.name());
result.properties_to_unset.insert(name);
}
return result;
}

std::unique_ptr<configuration> make_config() {
// Constructing `configuration` requires about 90KB of stack space in debug.
// In some tests this makes us run out of stack space/into stackoverflows as
Expand Down
24 changes: 24 additions & 0 deletions src/v/config/configuration.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@

#include <cctype>
#include <chrono>
#include <map>
#include <unordered_set>
#include <vector>

class monitor_unsafe;
Expand Down Expand Up @@ -731,6 +733,28 @@ struct configuration final : public config_store {

error_map_t load(const YAML::Node& root_node);

// Performs validation of the provided config after all updates have been
// applied. This allows for multi-property validation checks to be
// implemented. For example, certain properties may be mutually exclusive in
// terms of being toggled, and standard property validation cannot take this
// into account.
//
// Returns a config_validation_result, which contains both a set of
// properties which would need to be unset for the config to be valid, and a
// map of related errors. It is not guaranteed that every key in
// `properties_to_unset` has a key in `errors`. However, there should be
// related properties which describe the error encountered (e.g
// `cloud_storage_bucket` being `null` when `cloud_storage_enabled` is set
// to `true` causes `cloud_storage_enabled` to become unset in
// `properties_to_unset`, but the associated error is under the key
// `cloud_storage_bucket` in `errors`).
struct config_validation_result {
std::unordered_set<ss::sstring> properties_to_unset;
std::map<ss::sstring, ss::sstring> errors;
};
static config_validation_result
validate_config(const config::configuration& config);

public:
development_feature_property<bool> development_enable_cloud_topics;

Expand Down
Loading