diff --git a/src/v/cloud_storage/cache_service.cc b/src/v/cloud_storage/cache_service.cc index 6cb0614200e1..1e5b1e50f8c6 100644 --- a/src/v/cloud_storage/cache_service.cc +++ b/src/v/cloud_storage/cache_service.cc @@ -14,7 +14,6 @@ #include "cloud_storage/access_time_tracker.h" #include "cloud_storage/logger.h" #include "cloud_storage/recursive_directory_walker.h" -#include "config/configuration.h" #include "seastar/util/file.hh" #include "ssx/future-util.h" #include "ssx/sformat.h" @@ -2005,27 +2004,4 @@ ss::future<> cache::sync_access_time_tracker( } } -std::optional -cache::validate_cache_config(const config::configuration& conf) { - const auto& cloud_storage_cache_size = conf.cloud_storage_cache_size; - const auto& cloud_storage_cache_size_pct - = conf.cloud_storage_cache_size_percent; - - // If not set, cloud cache uses default value of 0.0 - auto cache_size_pct = cloud_storage_cache_size_pct().value_or(0.0); - - using cache_size_pct_type = double; - static constexpr auto epsilon - = std::numeric_limits::epsilon(); - - if ((cache_size_pct < epsilon) && (cloud_storage_cache_size() == 0)) { - return ss::format( - "Cannot set both {} and {} to 0.", - cloud_storage_cache_size.name(), - cloud_storage_cache_size_pct.name()); - } - - return std::nullopt; -} - } // namespace cloud_storage diff --git a/src/v/cloud_storage/cache_service.h b/src/v/cloud_storage/cache_service.h index 386aac358fc1..779f4af142cc 100644 --- a/src/v/cloud_storage/cache_service.h +++ b/src/v/cloud_storage/cache_service.h @@ -167,16 +167,6 @@ class cache return _cache_dir / key; } - // Checks if a cluster configuration is valid for the properties - // `cloud_storage_cache_size` and `cloud_storage_cache_size_percent`. - // Two cases are invalid: 1. the case in which both are 0, 2. the case in - // which `cache_size` is 0 while `cache_size_percent` is `std::nullopt`. - // - // Returns `std::nullopt` if the passed configuration is valid, or an - // `ss::sstring` explaining the misconfiguration otherwise. - static std::optional - validate_cache_config(const config::configuration& conf); - private: /// Load access time tracker from file ss::future<> load_access_time_tracker(); diff --git a/src/v/cluster/config_manager.cc b/src/v/cluster/config_manager.cc index fd73b8890088..9630e6c14b12 100644 --- a/src/v/cluster/config_manager.cc +++ b/src/v/cluster/config_manager.cc @@ -429,6 +429,27 @@ config_manager::preload(const YAML::Node& legacy_config) { } } + // Validate the local config on this shard. We will have to propagate + // changes to all shards in case of errors. + auto cfg_validation_result = config::configuration::validate_config( + config::shard_local_cfg()); + + for (const auto& [prop_name, error] : cfg_validation_result.errors) { + vlog( + clusterlog.warn, + "Misconfigured preload property {}: {}", + prop_name, + error); + } + + for (const auto& property_to_unset : + cfg_validation_result.properties_to_unset) { + co_await ss::smp::invoke_on_all([&property_to_unset]() { + config::shard_local_cfg().get(property_to_unset).reset(); + }); + result.raw_values.erase(property_to_unset); + } + co_return result; } diff --git a/src/v/cluster/config_manager.h b/src/v/cluster/config_manager.h index d3ea9ea9eabe..f9dc46273bf6 100644 --- a/src/v/cluster/config_manager.h +++ b/src/v/cluster/config_manager.h @@ -14,6 +14,7 @@ #include "cluster/commands.h" #include "cluster/fwd.h" #include "cluster/notification.h" +#include "config/configuration.h" #include "features/feature_table.h" #include "model/metadata.h" #include "model/record.h" diff --git a/src/v/config/configuration.cc b/src/v/config/configuration.cc index 176c901f8e2d..57f29589fa95 100644 --- a/src/v/config/configuration.cc +++ b/src/v/config/configuration.cc @@ -30,6 +30,41 @@ #include #include +namespace { +ss::sstring +join_properties(const std::vector>>>& props) { + return ssx::sformat("{}", fmt::join(props, ", ")); +} + +// Checks if a cluster configuration is valid for the properties +// `cloud_storage_cache_size` and `cloud_storage_cache_size_percent`. +// Two cases are invalid: 1. the case in which both are 0, 2. the case in +// which `cache_size` is 0 while `cache_size_percent` is `std::nullopt`. +// +// Returns `std::nullopt` if the passed configuration is valid, or an +// `ss::sstring` explaining the misconfiguration otherwise. +std::optional +validate_cloud_storage_cache_config(const config::configuration& conf) { + const auto& cloud_storage_cache_size = conf.cloud_storage_cache_size; + const auto& cloud_storage_cache_size_pct + = conf.cloud_storage_cache_size_percent; + + // If not set, cloud cache uses default value of 0.0 + auto cache_size_pct = cloud_storage_cache_size_pct().value_or(0.0); + + if ((cache_size_pct == 0.0) && (cloud_storage_cache_size() == 0)) { + return ss::format( + "Cannot set both {} and {} to 0.", + cloud_storage_cache_size.name(), + cloud_storage_cache_size_pct.name()); + } + + return std::nullopt; +} + +} // namespace + namespace config { using namespace std::chrono_literals; @@ -3919,6 +3954,160 @@ configuration::error_map_t configuration::load(const YAML::Node& root_node) { return config_store::read_yaml(root_node["redpanda"], std::move(ignore)); } +configuration::config_validation_result +configuration::validate_config(const config::configuration& updated_config) { + config_validation_result result; + if (updated_config.cloud_storage_enabled()) { + bool cloud_storage_enabled_is_valid = true; + // The properties that cloud_storage::configuration requires + // to be set if cloud storage is enabled. + using config_properties_seq = std::vector>>>; + + switch (updated_config.cloud_storage_credentials_source.value()) { + case model::cloud_credentials_source::config_file: { + config_properties_seq s3_properties = { + std::ref(updated_config.cloud_storage_region), + std::ref(updated_config.cloud_storage_bucket), + std::ref(updated_config.cloud_storage_access_key), + std::ref(updated_config.cloud_storage_secret_key), + }; + + config_properties_seq abs_properties = { + std::ref(updated_config.cloud_storage_azure_storage_account), + std::ref(updated_config.cloud_storage_azure_container), + std::ref(updated_config.cloud_storage_azure_shared_key), + }; + + std::array valid_configurations = { + s3_properties, abs_properties}; + + bool is_valid_configuration = std::any_of( + valid_configurations.begin(), + valid_configurations.end(), + [](const auto& config) { + return std::none_of( + config.begin(), config.end(), [](const auto& prop) { + return prop() == std::nullopt; + }); + }); + + if (!is_valid_configuration) { + result.errors[ss::sstring( + updated_config.cloud_storage_enabled.name())] + = ssx::sformat( + "To enable cloud storage you need to configure S3 or Azure " + "Blob Storage access. For S3 {} must be set. For ABS {} " + "must be set", + join_properties(s3_properties), + join_properties(abs_properties)); + cloud_storage_enabled_is_valid = false; + } + } break; + case model::cloud_credentials_source::aws_instance_metadata: + case model::cloud_credentials_source::gcp_instance_metadata: + case model::cloud_credentials_source::sts: { + // basic config checks for cloud_storage. for sts it is expected + // to receive part of the configuration via env variables, while + // aws_instance_metadata and gcp_instance_metadata do not + // require extra configuration + config_properties_seq properties = { + std::ref(updated_config.cloud_storage_region), + std::ref(updated_config.cloud_storage_bucket), + }; + + for (auto& p : properties) { + if (p() == std::nullopt) { + result.errors[ss::sstring(p.get().name())] = ssx::sformat( + "Must be set when cloud storage enabled with " + "cloud_storage_credentials_source = {}", + updated_config.cloud_storage_credentials_source.value()); + cloud_storage_enabled_is_valid = false; + } + } + } break; + case model::cloud_credentials_source::azure_aks_oidc_federation: { + // for azure_aks_oidc_federation it is expected to receive part + // of the configuration via env variables. this check is just + // for related cluster properties + config_properties_seq properties = { + std::ref(updated_config.cloud_storage_azure_storage_account), + std::ref(updated_config.cloud_storage_azure_container), + }; + + for (auto& p : properties) { + if (p() == std::nullopt) { + result.errors[ss::sstring(p.get().name())] + = "Must be set when cloud storage enabled with " + "cloud_storage_credentials_source = " + "azure_aks_oidc_federation"; + cloud_storage_enabled_is_valid = false; + } + } + } break; + case model::cloud_credentials_source::azure_vm_instance_metadata: { + // azure_vm_instance_metadata requires an client_id to work + // correctly + config_properties_seq properties = { + std::ref(updated_config.cloud_storage_azure_storage_account), + std::ref(updated_config.cloud_storage_azure_container), + std::ref(updated_config.cloud_storage_azure_managed_identity_id), + }; + + for (auto& p : properties) { + if (p() == std::nullopt) { + result.errors[ss::sstring(p.get().name())] + = "Must be set when cloud storage enabled with " + "cloud_storage_credentials_source = " + "azure_vm_instance_metadata"; + cloud_storage_enabled_is_valid = false; + } + } + } break; + } + + if (!cloud_storage_enabled_is_valid) { + auto name = ss::sstring( + updated_config.cloud_storage_enabled.name()); + result.properties_to_unset.insert(name); + } + } + + // cloud_storage_cache_size/size_percent validation + if (auto invalid_cache = validate_cloud_storage_cache_config( + updated_config); + invalid_cache.has_value()) { + auto cache_size_name = ss::sstring( + updated_config.cloud_storage_cache_size.name()); + auto cache_size_pct_name = ss::sstring( + updated_config.cloud_storage_cache_size_percent.name()); + result.errors[cache_size_name] = invalid_cache.value(); + result.properties_to_unset.insert(cache_size_name); + result.properties_to_unset.insert(cache_size_pct_name); + } + + // For simplicity's sake, cloud storage read/write permissions cannot be + // enabled at the same time as tombstone_retention_ms at the cluster + // level, to avoid the case in which topics are created with TS + // read/write permissions and bugs are encountered later with tombstone + // removal. + if (updated_config.tombstone_retention_ms().has_value() && + (updated_config.cloud_storage_enabled() + || updated_config.cloud_storage_enable_remote_read() + || updated_config.cloud_storage_enable_remote_write())) { + auto name = ss::sstring(updated_config.tombstone_retention_ms.name()); + result.errors[name] = ssx::sformat( + "cannot set {} if any of ({}, {}, {}) are enabled at the cluster " + "level", + updated_config.tombstone_retention_ms.name(), + updated_config.cloud_storage_enabled.name(), + updated_config.cloud_storage_enable_remote_read.name(), + updated_config.cloud_storage_enable_remote_write.name()); + result.properties_to_unset.insert(name); + } + return result; +} + std::unique_ptr make_config() { // Constructing `configuration` requires about 90KB of stack space in debug. // In some tests this makes us run out of stack space/into stackoverflows as diff --git a/src/v/config/configuration.h b/src/v/config/configuration.h index 69f96ea32e26..2c2539f7be5b 100644 --- a/src/v/config/configuration.h +++ b/src/v/config/configuration.h @@ -36,6 +36,8 @@ #include #include +#include +#include #include class monitor_unsafe; @@ -731,6 +733,28 @@ struct configuration final : public config_store { error_map_t load(const YAML::Node& root_node); + // Performs validation of the provided config after all updates have been + // applied. This allows for multi-property validation checks to be + // implemented. For example, certain properties may be mutually exclusive in + // terms of being toggled, and standard property validation cannot take this + // into account. + // + // Returns a config_validation_result, which contains both a set of + // properties which would need to be unset for the config to be valid, and a + // map of related errors. It is not guaranteed that every key in + // `properties_to_unset` has a key in `errors`. However, there should be + // related properties which describe the error encountered (e.g + // `cloud_storage_bucket` being `null` when `cloud_storage_enabled` is set + // to `true` causes `cloud_storage_enabled` to become unset in + // `properties_to_unset`, but the associated error is under the key + // `cloud_storage_bucket` in `errors`). + struct config_validation_result { + std::unordered_set properties_to_unset; + std::map errors; + }; + static config_validation_result + validate_config(const config::configuration& config); + public: development_feature_property development_enable_cloud_topics; diff --git a/src/v/redpanda/admin/server.cc b/src/v/redpanda/admin/server.cc index c42ecfbadf8f..a737c212a515 100644 --- a/src/v/redpanda/admin/server.cc +++ b/src/v/redpanda/admin/server.cc @@ -1546,23 +1546,6 @@ json::validator make_cluster_config_validator() { return json::validator(schema); } -ss::sstring -join_properties(const std::vector>>>& props) { - ss::sstring result = ""; - for (size_t idx = 0; const auto& prop : props) { - if (idx == props.size() - 1) { - result += ss::sstring{prop.get().name()}; - } else { - result += ssx::sformat("{}, ", prop.get().name()); - } - - ++idx; - }; - - return result; -} - /** * This function provides special case validation for configuration * properties that need to check other properties' values as well @@ -1607,110 +1590,6 @@ void config_multi_property_validation( } } - if (updated_config.cloud_storage_enabled()) { - // The properties that cloud_storage::configuration requires - // to be set if cloud storage is enabled. - using config_properties_seq = std::vector>>>; - - switch (updated_config.cloud_storage_credentials_source.value()) { - case model::cloud_credentials_source::config_file: { - config_properties_seq s3_properties = { - std::ref(updated_config.cloud_storage_region), - std::ref(updated_config.cloud_storage_bucket), - std::ref(updated_config.cloud_storage_access_key), - std::ref(updated_config.cloud_storage_secret_key), - }; - - config_properties_seq abs_properties = { - std::ref(updated_config.cloud_storage_azure_storage_account), - std::ref(updated_config.cloud_storage_azure_container), - std::ref(updated_config.cloud_storage_azure_shared_key), - }; - - std::array valid_configurations = { - s3_properties, abs_properties}; - - bool is_valid_configuration = std::any_of( - valid_configurations.begin(), - valid_configurations.end(), - [](const auto& config) { - return std::none_of( - config.begin(), config.end(), [](const auto& prop) { - return prop() == std::nullopt; - }); - }); - - if (!is_valid_configuration) { - errors["cloud_storage_enabled"] = ssx::sformat( - "To enable cloud storage you need to configure S3 or " - "Azure " - "Blob Storage access. For S3 {} must be set. For ABS {} " - "must be set", - join_properties(s3_properties), - join_properties(abs_properties)); - } - } break; - case model::cloud_credentials_source::aws_instance_metadata: - case model::cloud_credentials_source::gcp_instance_metadata: - case model::cloud_credentials_source::sts: { - // basic config checks for cloud_storage. for sts it is expected to - // receive part of the configuration via env variables, while - // aws_instance_metadata and gcp_instance_metadata do not require - // extra configuration - config_properties_seq properties = { - std::ref(updated_config.cloud_storage_region), - std::ref(updated_config.cloud_storage_bucket), - }; - - for (auto& p : properties) { - if (p() == std::nullopt) { - errors[ss::sstring(p.get().name())] = ssx::sformat( - "Must be set when cloud storage enabled with " - "cloud_storage_credentials_source = {}", - updated_config.cloud_storage_credentials_source.value()); - } - } - } break; - case model::cloud_credentials_source::azure_aks_oidc_federation: { - // for azure_aks_oidc_federation it is expected to receive part of - // the configuration via env variables. this check is just for - // related cluster properties - config_properties_seq properties = { - std::ref(updated_config.cloud_storage_azure_storage_account), - std::ref(updated_config.cloud_storage_azure_container), - }; - - for (auto& p : properties) { - if (p() == std::nullopt) { - errors[ss::sstring(p.get().name())] - = "Must be set when cloud storage enabled with " - "cloud_storage_credentials_source = " - "azure_aks_oidc_federation"; - } - } - } break; - case model::cloud_credentials_source::azure_vm_instance_metadata: { - // azure_vm_instance_metadata requires an client_id to work - // correctly - config_properties_seq properties = { - std::ref(updated_config.cloud_storage_azure_storage_account), - std::ref(updated_config.cloud_storage_azure_container), - std::ref(updated_config.cloud_storage_azure_managed_identity_id), - }; - - for (auto& p : properties) { - if (p() == std::nullopt) { - errors[ss::sstring(p.get().name())] - = "Must be set when cloud storage enabled with " - "cloud_storage_credentials_source = " - "azure_vm_instance_metadata"; - } - } - } break; - } - } - if ( updated_config.enable_schema_id_validation != pandaproxy::schema_registry::schema_id_validation_mode::none @@ -1720,30 +1599,9 @@ void config_multi_property_validation( "{} requires schema_registry to be enabled in redpanda.yaml", name); } - // cloud_storage_cache_size/size_percent validation - if (auto invalid_cache = cloud_storage::cache::validate_cache_config( - updated_config); - invalid_cache.has_value()) { - auto name = ss::sstring(updated_config.cloud_storage_cache_size.name()); - errors[name] = invalid_cache.value(); - } - - // For simplicity's sake, cloud storage read/write permissions cannot be - // enabled at the same time as tombstone_retention_ms at the cluster level, - // to avoid the case in which topics are created with TS read/write - // permissions and bugs are encountered later with tombstone removal. - if (updated_config.tombstone_retention_ms().has_value() && - (updated_config.cloud_storage_enabled() - || updated_config.cloud_storage_enable_remote_read() - || updated_config.cloud_storage_enable_remote_write())) { - errors["cloud_storage_enabled"] = ssx::sformat( - "cannot set {} if any of ({}, {}, {}) are enabled at the cluster " - "level", - updated_config.tombstone_retention_ms.name(), - updated_config.cloud_storage_enabled.name(), - updated_config.cloud_storage_enable_remote_read.name(), - updated_config.cloud_storage_enable_remote_write.name()); - } + auto cfg_validation_result = config::configuration::validate_config( + updated_config); + errors.merge(cfg_validation_result.errors); } } // namespace diff --git a/tests/rptest/tests/cluster_config_test.py b/tests/rptest/tests/cluster_config_test.py index dc783da092bd..c58ce7a8c5e2 100644 --- a/tests/rptest/tests/cluster_config_test.py +++ b/tests/rptest/tests/cluster_config_test.py @@ -2453,3 +2453,44 @@ def test_development_property_cannot_be_set(self): config = self.admin.get_cluster_config() value = config[self._property_name] assert int(value) == set_value, f"{value} != {set_value}" + + +class ClusterInvalidConfigPreloadTest(RedpandaTest): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.admin = Admin(self.redpanda) + self.rpk = RpkTool(self.redpanda) + + @cluster(num_nodes=3) + def test_invalid_config(self): + invalid_config_cache_yaml = """# Auto-generated, do not edit this file unless Redpanda will not start +_version: 1 +tombstone_retention_ms: 1200 +cloud_storage_enabled: true + """ + + for node in self.redpanda.nodes: + self.redpanda.stop_node(node) + cache_path = f"{self.redpanda.DATA_DIR}/config_cache.yaml" + assert node.account.exists(cache_path) + node.account.create_file(cache_path, invalid_config_cache_yaml) + self.redpanda.start_node(node) + + wait_until( + lambda: self.redpanda.search_log_all( + "Misconfigured preload property cloud_storage_enabled") and + self.redpanda.search_log_all( + "Misconfigured preload property tombstone_retention_ms"), + timeout_sec=30, + backoff_sec=1, + err_msg="Did not see \"misconfigured preload property \" message") + + # These properties should be reset to their defaults, + # instead of the being set to the ones provided in invalid_config_cache_yaml. + cluster_cfg = self.admin.get_cluster_config() + tombstone_retention_ms = cluster_cfg["tombstone_retention_ms"] + assert tombstone_retention_ms == None, f"Expected {None} for tombstone_retention_ms, got {tombstone_retention_ms}" + + cloud_storage_enabled = cluster_cfg["cloud_storage_enabled"] + assert cloud_storage_enabled == False, f"Expected {False} for cloud_storage_enabled, got {cloud_storage_enabled}"