From 4c05b7ab0140ca707fed2cfcc4d0c1656b13fe7d Mon Sep 17 00:00:00 2001 From: "Zhanxiang (Patrick) Huang" Date: Tue, 29 Oct 2024 16:50:12 +0800 Subject: [PATCH] fix: reject invalid storage cache capacities configs (#19016) --- src/common/src/config.rs | 25 ++++- src/compute/src/memory/config.rs | 106 +++++++++++++----- src/config/docs.md | 62 +++++++++- .../regress/data/expected/create_table.out | 2 +- src/tests/regress/data/sql/create_table.sql | 2 +- 5 files changed, 157 insertions(+), 40 deletions(-) diff --git a/src/common/src/config.rs b/src/common/src/config.rs index 60824468cce0d..b2020b2ac0542 100644 --- a/src/common/src/config.rs +++ b/src/common/src/config.rs @@ -318,6 +318,7 @@ pub struct MetaConfig { pub cut_table_size_limit: u64, #[serde(default, flatten)] + #[config_doc(omitted)] pub unrecognized: Unrecognized, /// Whether config object storage bucket lifecycle to purge stale data. @@ -651,22 +652,36 @@ pub use risingwave_common_metrics::MetricLevel; /// the section `[storage.cache]` in `risingwave.toml`. #[derive(Clone, Debug, Serialize, Deserialize, DefaultFromSerde, ConfigDoc)] pub struct CacheConfig { + /// Configure the capacity of the block cache in MB explicitly. + /// The overridden value will only be effective if: + /// 1. `meta_cache_capacity_mb` and `shared_buffer_capacity_mb` are also configured explicitly. + /// 2. `block_cache_capacity_mb` + `meta_cache_capacity_mb` + `meta_cache_capacity_mb` doesn't exceed 0.3 * non-reserved memory. #[serde(default)] pub block_cache_capacity_mb: Option, + /// Configure the number of shards in the block cache explicitly. + /// If not set, the shard number will be determined automatically based on cache capacity. #[serde(default)] pub block_cache_shard_num: Option, #[serde(default)] + #[config_doc(omitted)] pub block_cache_eviction: CacheEvictionConfig, + /// Configure the capacity of the block cache in MB explicitly. + /// The overridden value will only be effective if: + /// 1. `block_cache_capacity_mb` and `shared_buffer_capacity_mb` are also configured explicitly. + /// 2. `block_cache_capacity_mb` + `meta_cache_capacity_mb` + `meta_cache_capacity_mb` doesn't exceed 0.3 * non-reserved memory. #[serde(default)] pub meta_cache_capacity_mb: Option, + /// Configure the number of shards in the meta cache explicitly. + /// If not set, the shard number will be determined automatically based on cache capacity. #[serde(default)] pub meta_cache_shard_num: Option, #[serde(default)] + #[config_doc(omitted)] pub meta_cache_eviction: CacheEvictionConfig, } @@ -710,8 +725,10 @@ pub struct StorageConfig { #[serde(default = "default::storage::share_buffer_compaction_worker_threads_number")] pub share_buffer_compaction_worker_threads_number: u32, - /// Maximum shared buffer size, writes attempting to exceed the capacity will stall until there - /// is enough space. + /// Configure the maximum shared buffer size in MB explicitly. Writes attempting to exceed the capacity + /// will stall until there is enough space. The overridden value will only be effective if: + /// 1. `block_cache_capacity_mb` and `meta_cache_capacity_mb` are also configured explicitly. + /// 2. `block_cache_capacity_mb` + `meta_cache_capacity_mb` + `meta_cache_capacity_mb` doesn't exceed 0.3 * non-reserved memory. #[serde(default)] pub shared_buffer_capacity_mb: Option, @@ -735,6 +752,7 @@ pub struct StorageConfig { pub write_conflict_detection_enabled: bool, #[serde(default)] + #[config_doc(nested)] pub cache: CacheConfig, /// DEPRECATED: This config will be deprecated in the future version, use `storage.cache.block_cache_capacity_mb` instead. @@ -789,12 +807,15 @@ pub struct StorageConfig { pub min_sstable_size_mb: u32, #[serde(default)] + #[config_doc(nested)] pub data_file_cache: FileCacheConfig, #[serde(default)] + #[config_doc(nested)] pub meta_file_cache: FileCacheConfig, #[serde(default)] + #[config_doc(nested)] pub cache_refill: CacheRefillConfig, /// Whether to enable streaming upload for sstable. diff --git a/src/compute/src/memory/config.rs b/src/compute/src/memory/config.rs index fdba52884f47e..b74da7d9c481f 100644 --- a/src/compute/src/memory/config.rs +++ b/src/compute/src/memory/config.rs @@ -37,7 +37,6 @@ const COMPACTOR_MEMORY_PROPORTION: f64 = 0.1; const STORAGE_BLOCK_CACHE_MEMORY_PROPORTION: f64 = 0.3; -const STORAGE_META_CACHE_MAX_MEMORY_MB: usize = 4096; const STORAGE_SHARED_BUFFER_MAX_MEMORY_MB: usize = 4096; const STORAGE_META_CACHE_MEMORY_PROPORTION: f64 = 0.35; const STORAGE_SHARED_BUFFER_MEMORY_PROPORTION: f64 = 0.3; @@ -66,6 +65,14 @@ pub fn reserve_memory_bytes(opts: &ComputeNodeOpts) -> (usize, usize) { // Should have at least `MIN_SYSTEM_RESERVED_MEMORY_MB` for reserved memory. let reserved = std::cmp::max(reserved, MIN_SYSTEM_RESERVED_MEMORY_MB << 20); + if reserved >= opts.total_memory_bytes { + panic!( + "reserved memory ({}) >= total memory ({}).", + convert(reserved as _), + convert(opts.total_memory_bytes as _) + ); + } + (reserved, opts.total_memory_bytes - reserved) } @@ -107,24 +114,60 @@ pub fn storage_memory_config( } else { (STORAGE_MEMORY_PROPORTION + COMPACTOR_MEMORY_PROPORTION, 0.0) }; - let mut default_block_cache_capacity_mb = ((non_reserved_memory_bytes as f64 - * storage_memory_proportion - * STORAGE_BLOCK_CACHE_MEMORY_PROPORTION) - .ceil() as usize) - >> 20; - let default_meta_cache_capacity_mb = ((non_reserved_memory_bytes as f64 - * storage_memory_proportion - * STORAGE_META_CACHE_MEMORY_PROPORTION) - .ceil() as usize) - >> 20; - let meta_cache_capacity_mb = storage_config.cache.meta_cache_capacity_mb.unwrap_or( + + let storage_memory_bytes = non_reserved_memory_bytes as f64 * storage_memory_proportion; + + // Only if the all cache capacities are specified and their sum doesn't exceed the max allowed storage_memory_bytes, will we use them. + // Other invalid combination of the cache capacities config will be ignored. + let ( + config_block_cache_capacity_mb, + config_meta_cache_capacity_mb, + config_shared_buffer_capacity_mb, + ) = match ( + storage_config.cache.block_cache_capacity_mb, + storage_config.cache.meta_cache_capacity_mb, + storage_config.shared_buffer_capacity_mb, + ) { + ( + Some(block_cache_capacity_mb), + Some(meta_cache_capacity_mb), + Some(shared_buffer_capacity_mb), + ) => { + let config_storage_memory_bytes = + (block_cache_capacity_mb + meta_cache_capacity_mb + shared_buffer_capacity_mb) + << 20; + if config_storage_memory_bytes as f64 > storage_memory_bytes { + tracing::warn!( + "config block_cache_capacity_mb {} + meta_cache_capacity_mb {} + shared_buffer_capacity_mb {} = {} exceeds allowed storage_memory_bytes {}. These configs will be ignored.", + block_cache_capacity_mb, meta_cache_capacity_mb, shared_buffer_capacity_mb, convert(config_storage_memory_bytes as _), convert(storage_memory_bytes as _) + ); + (None, None, None) + } else { + ( + Some(block_cache_capacity_mb), + Some(meta_cache_capacity_mb), + Some(shared_buffer_capacity_mb), + ) + } + } + c => { + tracing::warn!( + "config (block_cache_capacity_mb, meta_cache_capacity_mb, shared_buffer_capacity_mb): {:?} should be set altogether. These configs will be ignored.", + c + ); + (None, None, None) + } + }; + + let mut default_block_cache_capacity_mb = + ((storage_memory_bytes * STORAGE_BLOCK_CACHE_MEMORY_PROPORTION).ceil() as usize) >> 20; + let default_meta_cache_capacity_mb = + ((storage_memory_bytes * STORAGE_META_CACHE_MEMORY_PROPORTION).ceil() as usize) >> 20; + let meta_cache_capacity_mb = config_meta_cache_capacity_mb.unwrap_or( // adapt to old version storage_config .meta_cache_capacity_mb - .unwrap_or(std::cmp::min( - default_meta_cache_capacity_mb, - STORAGE_META_CACHE_MAX_MEMORY_MB, - )), + .unwrap_or(default_meta_cache_capacity_mb), ); let prefetch_buffer_capacity_mb = storage_config @@ -137,18 +180,12 @@ pub fn storage_memory_config( default_block_cache_capacity_mb.saturating_sub(meta_cache_capacity_mb); } - let default_shared_buffer_capacity_mb = ((non_reserved_memory_bytes as f64 - * storage_memory_proportion - * STORAGE_SHARED_BUFFER_MEMORY_PROPORTION) - .ceil() as usize) - >> 20; - let mut shared_buffer_capacity_mb = - storage_config - .shared_buffer_capacity_mb - .unwrap_or(std::cmp::min( - default_shared_buffer_capacity_mb, - STORAGE_SHARED_BUFFER_MAX_MEMORY_MB, - )); + let default_shared_buffer_capacity_mb = + ((storage_memory_bytes * STORAGE_SHARED_BUFFER_MEMORY_PROPORTION).ceil() as usize) >> 20; + let mut shared_buffer_capacity_mb = config_shared_buffer_capacity_mb.unwrap_or(std::cmp::min( + default_shared_buffer_capacity_mb, + STORAGE_SHARED_BUFFER_MAX_MEMORY_MB, + )); if is_serving { default_block_cache_capacity_mb += default_shared_buffer_capacity_mb; // set 1 to pass internal check @@ -158,7 +195,7 @@ pub fn storage_memory_config( default_block_cache_capacity_mb = default_block_cache_capacity_mb.saturating_sub(shared_buffer_capacity_mb); } - let block_cache_capacity_mb = storage_config.cache.block_cache_capacity_mb.unwrap_or( + let block_cache_capacity_mb = config_block_cache_capacity_mb.unwrap_or( // adapt to old version storage_config .block_cache_capacity_mb @@ -353,6 +390,7 @@ mod tests { let total_non_reserved_memory_bytes = 8 << 30; + // Embedded compactor enabled, streaming node let memory_config = storage_memory_config( total_non_reserved_memory_bytes, true, @@ -364,6 +402,7 @@ mod tests { assert_eq!(memory_config.shared_buffer_capacity_mb, 737); assert_eq!(memory_config.compactor_memory_limit_mb, 819); + // Embedded compactor disabled, serving node let memory_config = storage_memory_config( total_non_reserved_memory_bytes, false, @@ -375,6 +414,7 @@ mod tests { assert_eq!(memory_config.shared_buffer_capacity_mb, 1); assert_eq!(memory_config.compactor_memory_limit_mb, 0); + // Embedded compactor enabled, streaming node, file cache storage_config.data_file_cache.dir = "data".to_string(); storage_config.meta_file_cache.dir = "meta".to_string(); let memory_config = storage_memory_config( @@ -388,11 +428,17 @@ mod tests { assert_eq!(memory_config.shared_buffer_capacity_mb, 737); assert_eq!(memory_config.compactor_memory_limit_mb, 819); + // Embedded compactor enabled, streaming node, file cache, cache capacities specified storage_config.cache.block_cache_capacity_mb = Some(512); storage_config.cache.meta_cache_capacity_mb = Some(128); storage_config.shared_buffer_capacity_mb = Some(1024); storage_config.compactor_memory_limit_mb = Some(512); - let memory_config = storage_memory_config(0, true, &storage_config, false); + let memory_config = storage_memory_config( + total_non_reserved_memory_bytes, + true, + &storage_config, + false, + ); assert_eq!(memory_config.block_cache_capacity_mb, 512); assert_eq!(memory_config.meta_cache_capacity_mb, 128); assert_eq!(memory_config.shared_buffer_capacity_mb, 1024); diff --git a/src/config/docs.md b/src/config/docs.md index 4f2bfe69ff60e..4a25867ed63c1 100644 --- a/src/config/docs.md +++ b/src/config/docs.md @@ -68,7 +68,6 @@ This page is automatically generated by `./risedev generate-example-config` | table_stat_low_write_throughput_ratio_for_merge | To merge the compaction group when the low throughput statistics of the group exceeds the threshold. | 0.7 | | table_stat_throuput_window_seconds_for_merge | The window seconds of table throughput statistic history for merge compaction group. | 240 | | table_stat_throuput_window_seconds_for_split | The window seconds of table throughput statistic history for split compaction group. | 60 | -| unrecognized | | | | vacuum_interval_sec | Interval of invoking a vacuum job, to remove stale metadata from meta store and objects from object store. | 30 | | vacuum_spin_interval_ms | The spin interval inside a vacuum job. It avoids the vacuum job monopolizing resources of meta node. | 200 | @@ -119,8 +118,6 @@ This page is automatically generated by `./risedev generate-example-config` | Config | Description | Default | |--------|-------------|---------| | block_cache_capacity_mb | DEPRECATED: This config will be deprecated in the future version, use `storage.cache.block_cache_capacity_mb` instead. | | -| cache | | | -| cache_refill | | | | check_compaction_result | | false | | compact_iter_recreate_timeout_ms | | 600000 | | compactor_concurrent_uploading_sst_count | The concurrent uploading number of `SSTables` of buidler | | @@ -133,7 +130,6 @@ This page is automatically generated by `./risedev generate-example-config` | compactor_max_task_multiplier | Compactor calculates the maximum number of tasks that can be executed on the node based on `worker_num` and `compactor_max_task_multiplier`. `max_pull_task_count` = `worker_num` * `compactor_max_task_multiplier` | 3.0 | | compactor_memory_available_proportion | The percentage of memory available when compactor is deployed separately. `non_reserved_memory_bytes` = `system_memory_available_bytes` * `compactor_memory_available_proportion` | 0.8 | | compactor_memory_limit_mb | | | -| data_file_cache | | | | disable_remote_compactor | | false | | enable_fast_compaction | | true | | high_priority_ratio_in_percent | DEPRECATED: This config will be deprecated in the future version, use `storage.cache.block_cache_eviction.high_priority_ratio_in_percent` with `storage.cache.block_cache_eviction.algorithm = "Lru"` instead. | | @@ -146,7 +142,6 @@ This page is automatically generated by `./risedev generate-example-config` | max_version_pinning_duration_sec | | 10800 | | mem_table_spill_threshold | The spill threshold for mem table. | 4194304 | | meta_cache_capacity_mb | DEPRECATED: This config will be deprecated in the future version, use `storage.cache.meta_cache_capacity_mb` instead. | | -| meta_file_cache | | | | min_sst_size_for_streaming_upload | Whether to enable streaming upload for sstable. | 33554432 | | min_sstable_size_mb | | 32 | | object_store | Object storage configuration 1. General configuration 2. Some special configuration of Backend 3. Retry and timeout configuration | | @@ -154,7 +149,7 @@ This page is automatically generated by `./risedev generate-example-config` | share_buffer_compaction_worker_threads_number | Worker threads number of dedicated tokio runtime for share buffer compaction. 0 means use tokio's default value (number of CPU core). | 4 | | share_buffer_upload_concurrency | Number of tasks shared buffer can upload in parallel. | 8 | | share_buffers_sync_parallelism | parallelism while syncing share buffers into L0 SST. Should NOT be 0. | 1 | -| shared_buffer_capacity_mb | Maximum shared buffer size, writes attempting to exceed the capacity will stall until there is enough space. | | +| shared_buffer_capacity_mb | Configure the maximum shared buffer size in MB explicitly. Writes attempting to exceed the capacity will stall until there is enough space. The overridden value will only be effective if: 1. `block_cache_capacity_mb` and `meta_cache_capacity_mb` are also configured explicitly. 2. `block_cache_capacity_mb` + `meta_cache_capacity_mb` + `meta_cache_capacity_mb` doesn't exceed 0.3 * non-reserved memory. | | | shared_buffer_flush_ratio | The shared buffer will start flushing data to object when the ratio of memory usage to the shared buffer capacity exceed such ratio. | 0.800000011920929 | | shared_buffer_min_batch_flush_size_mb | The minimum total flush size of shared buffer spill. When a shared buffer spilled is trigger, the total flush size across multiple epochs should be at least higher than this size. | 800 | | sstable_id_remote_fetch_number | Number of SST ids fetched from meta per RPC | 10 | @@ -162,6 +157,61 @@ This page is automatically generated by `./risedev generate-example-config` | time_travel_version_cache_capacity | | 32 | | write_conflict_detection_enabled | Whether to enable write conflict detection | true | +## storage.cache + +| Config | Description | Default | +|--------|-------------|---------| +| block_cache_capacity_mb | Configure the capacity of the block cache in MB explicitly. The overridden value will only be effective if: 1. `meta_cache_capacity_mb` and `shared_buffer_capacity_mb` are also configured explicitly. 2. `block_cache_capacity_mb` + `meta_cache_capacity_mb` + `meta_cache_capacity_mb` doesn't exceed 0.3 * non-reserved memory. | | +| block_cache_shard_num | Configure the number of shards in the block cache explicitly. If not set, the shard number will be determined automatically based on cache capacity. | | +| meta_cache_capacity_mb | Configure the capacity of the block cache in MB explicitly. The overridden value will only be effective if: 1. `block_cache_capacity_mb` and `shared_buffer_capacity_mb` are also configured explicitly. 2. `block_cache_capacity_mb` + `meta_cache_capacity_mb` + `meta_cache_capacity_mb` doesn't exceed 0.3 * non-reserved memory. | | +| meta_cache_shard_num | Configure the number of shards in the meta cache explicitly. If not set, the shard number will be determined automatically based on cache capacity. | | + +## storage.cache_refill + +| Config | Description | Default | +|--------|-------------|---------| +| concurrency | Inflight data cache refill tasks. | 10 | +| data_refill_levels | `SSTable` levels to refill. | [] | +| recent_filter_layers | Recent filter layer count. | 6 | +| recent_filter_rotate_interval_ms | Recent filter layer rotate interval. | 10000 | +| threshold | Data cache refill unit admission ratio. Only unit whose blocks are admitted above the ratio will be refilled. | 0.5 | +| timeout_ms | Cache refill maximum timeout to apply version delta. | 6000 | +| unit | Block count that a data cache refill request fetches. | 64 | + +## storage.data_file_cache + +| Config | Description | Default | +|--------|-------------|---------| +| capacity_mb | | 1024 | +| compression | | "None" | +| dir | | "" | +| file_capacity_mb | | 64 | +| flush_buffer_threshold_mb | | | +| flushers | | 4 | +| indexer_shards | | 64 | +| insert_rate_limit_mb | | 0 | +| reclaimers | | 4 | +| recover_concurrency | | 8 | +| recover_mode | Recover mode. Options: - "None": Do not recover disk cache. - "Quiet": Recover disk cache and skip errors. - "Strict": Recover disk cache and panic on errors. More details, see [`RecoverMode::None`], [`RecoverMode::Quiet`] and [`RecoverMode::Strict`], | "None" | +| runtime_config | | | + +## storage.meta_file_cache + +| Config | Description | Default | +|--------|-------------|---------| +| capacity_mb | | 1024 | +| compression | | "None" | +| dir | | "" | +| file_capacity_mb | | 64 | +| flush_buffer_threshold_mb | | | +| flushers | | 4 | +| indexer_shards | | 64 | +| insert_rate_limit_mb | | 0 | +| reclaimers | | 4 | +| recover_concurrency | | 8 | +| recover_mode | Recover mode. Options: - "None": Do not recover disk cache. - "Quiet": Recover disk cache and skip errors. - "Strict": Recover disk cache and panic on errors. More details, see [`RecoverMode::None`], [`RecoverMode::Quiet`] and [`RecoverMode::Strict`], | "None" | +| runtime_config | | | + ## streaming | Config | Description | Default | diff --git a/src/tests/regress/data/expected/create_table.out b/src/tests/regress/data/expected/create_table.out index a958b84979992..6248ca2c21222 100644 --- a/src/tests/regress/data/expected/create_table.out +++ b/src/tests/regress/data/expected/create_table.out @@ -1016,7 +1016,7 @@ create table parted_notnull_inh_test1 partition of parted_notnull_inh_test (a no insert into parted_notnull_inh_test (b) values (null); ERROR: null value in column "b" of relation "parted_notnull_inh_test1" violates not-null constraint DETAIL: Failing row contains (1, null). --- note that while b's default is overriden, a's default is preserved +-- note that while b's default is overridden, a's default is preserved \d parted_notnull_inh_test1 Table "public.parted_notnull_inh_test1" Column | Type | Collation | Nullable | Default diff --git a/src/tests/regress/data/sql/create_table.sql b/src/tests/regress/data/sql/create_table.sql index cc41f58ba22a7..7f549e966a8f0 100644 --- a/src/tests/regress/data/sql/create_table.sql +++ b/src/tests/regress/data/sql/create_table.sql @@ -801,7 +801,7 @@ CREATE TABLE part_c_1_10 PARTITION OF part_c FOR VALUES FROM (1) TO (10); create table parted_notnull_inh_test (a int default 1, b int not null default 0) partition by list (a); create table parted_notnull_inh_test1 partition of parted_notnull_inh_test (a not null, b default 1) for values in (1); insert into parted_notnull_inh_test (b) values (null); --- note that while b's default is overriden, a's default is preserved +-- note that while b's default is overridden, a's default is preserved \d parted_notnull_inh_test1 drop table parted_notnull_inh_test;