From f3535397ebcdb37b783abdfb5207f3a11a86ec70 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 3 Dec 2024 12:33:24 +0000 Subject: [PATCH] Add schema to allow further controls of tictacaae (#46) (#48) * Add schema to allow further controls of tictacaae This allows: - setting of log_level for AAE Backend; - setting of log_level for TictacAAE overall (hidden); - setting of compression method (support switch to better performing zstd). Previously all AAE backends had the database ID 2 ^16 - 1. They now have an ID 2 ^ 16 + Partition Number e.g. with a ring size of 1024 the partition numbers will be 0 to 1023). This allow leveled logs in parallel mode AAE to be distinguished from leveled vnode logs, and also distinguished between vnodes. * Revert to openriak-3.2 after merge of dependent PR --- priv/riak_kv.schema | 37 ++++++++++++++++++--- src/riak_kv_leveled_backend.erl | 6 ++-- src/riak_kv_tictacaae_repairs.erl | 19 +++++++++-- src/riak_kv_vnode.erl | 54 +++++++++++++++++++++++++------ 4 files changed, 97 insertions(+), 19 deletions(-) diff --git a/priv/riak_kv.schema b/priv/riak_kv.schema index 0563b3715..ebb54764b 100644 --- a/priv/riak_kv.schema +++ b/priv/riak_kv.schema @@ -129,21 +129,21 @@ ]}. %% @doc Minimum Rebuild Wait -%% The minimum number of hours to wait between rebuilds. Default value is 2 +%% The minimum number of hours to wait between rebuilds. Default value is 12 %% weeks {mapping, "tictacaae_rebuildwait", "riak_kv.tictacaae_rebuildwait", [ {datatype, integer}, - {default, 336} + {default, 2016} ]}. %% @doc Maximum Rebuild Delay %% The number of seconds which represents the length of the period in which the %% next rebuild will be scheduled. So if all vnodes are scheduled to rebuild %% at the same time, they will actually rebuild randomly between 0 an this -%% value (in seconds) after the rebuild time. Default value is 4 days +%% value (in seconds) after the rebuild time. Default value is 30 days {mapping, "tictacaae_rebuilddelay", "riak_kv.tictacaae_rebuilddelay", [ {datatype, integer}, - {default, 345600} + {default, 2592000} ]}. %% @doc Store heads in parallel key stores @@ -256,6 +256,35 @@ hidden ]}. +%% @doc Tictacaae - change log level of parallel keystore +%% Backend logging within leveled is relatively verbose, so in some cases +%% there may be a preference of running leveled at warning not info. This will +%% only change logging in parallel-mode aae backends, not of leveled when +%% running as a vnode backend +{mapping, "tictacaae_backendloglevel", "riak_kv.tictacaae_backendloglevel", [ + {datatype, {enum, [debug, info, warning]}}, + {default, info} +]}. + +%% @doc Tictacaae - change log level of tictacaae +%% Set the log level for each aae_controller, and also for aae exchanges. It +%% is strongly recommended to keep this at info. +{mapping, "tictacaae_loglevel", "riak_kv.tictacaae_loglevel", [ + {datatype, {enum, [info, warning]}}, + {default, info}, + hidden +]}. + +%% @doc Tictacaae - change compression method of parallel keystore +%% Better performance is expected when using the NIF-based zstd compression. +%% Actual compression achieved is similar to native (zlib). Native will +%% compress using the built-in compression option for Erlang term_to_binary. +%% This will only change compression in parallel-mode aae backends, not of +%% leveled when running as a vnode backend +{mapping, "tictacaae_backendcompression", "riak_kv.tictacaae_backendcompression", [ + {datatype, {enum, [native, zstd]}}, + {default, native} +]}. %% @doc Pool Strategy - should a single node_worker_pool or multiple pools be %% used for queueing potentially longer-running "background" queries diff --git a/src/riak_kv_leveled_backend.erl b/src/riak_kv_leveled_backend.erl index aade73f39..e43ac7e8a 100644 --- a/src/riak_kv_leveled_backend.erl +++ b/src/riak_kv_leveled_backend.erl @@ -37,9 +37,9 @@ callback/3]). %% Extended KV Backend API --export([head/3, - fold_heads/4, - return_self/1]). +-export([head/3, fold_heads/4, return_self/1]). + +-export([generate_partition_identity/1]). -include("riak_kv_index.hrl"). diff --git a/src/riak_kv_tictacaae_repairs.erl b/src/riak_kv_tictacaae_repairs.erl index 4a444a39f..d699fe15c 100644 --- a/src/riak_kv_tictacaae_repairs.erl +++ b/src/riak_kv_tictacaae_repairs.erl @@ -22,7 +22,7 @@ %% @doc Various functions that are useful for repairing entropy via tictac aae -module(riak_kv_tictacaae_repairs). --export([prompt_tictac_exchange/7, log_tictac_result/4]). +-export([prompt_tictac_exchange/7, log_tictac_result/4, aae_loglevels/0]). -include_lib("kernel/include/logger.hrl"). @@ -49,6 +49,19 @@ %% Public API %% =================================================================== +-spec aae_loglevels() -> aae_util:log_levels(). +aae_loglevels() -> + case app_helper:get_env(riak_kv, tictacaae_loglevel) of + warning -> + % Peculiarity of tictacaae logging, is that as list of levels + % at which logging is required is passed in, not just the log + % level. Note also use of warn not warning. + % TODO: warn -> warning in 3.4 + % TODO: should fix the log_levels not log_level issue too + [warn, error, critical]; + info -> + [info, warn, error, critical] + end. -spec prompt_tictac_exchange({riak_core_ring:partition_id(), node()}, {riak_core_ring:partition_id(), node()}, @@ -83,7 +96,9 @@ prompt_tictac_exchange(LocalVnode, RemoteVnode, IndexN, [{scan_timeout, ScanTimeout}, {transition_pause_ms, ExchangePause}, {purpose, kv_aae}, - {max_results, MaxResults}], + {max_results, MaxResults}, + {log_levels, aae_loglevels()} + ], BlueList = [{riak_kv_vnode:aae_send(LocalVnode), [IndexN]}], diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 6cbf33845..85f75e723 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -343,13 +343,47 @@ maybe_start_aaecontroller(active, State=#state{mod=Mod, StoreHead = app_helper:get_env(riak_kv, tictacaae_storeheads), ObjSplitFun = riak_object:aae_from_object_binary(StoreHead), + LeveledOpts = aae_keystore:store_generate_backendoptions(), + Compression = app_helper:get_env(riak_kv, tictacaae_backendcompression), + BackendLogLevel = + case app_helper:get_env(riak_kv, tictacaae_backendloglevel) of + warning -> + % Leveled used warn, but future releases wil use logger + % based logging, where warning is used, so future proofing + % configuration option here. + % TODO: Case statement not required in 3.4 + warn; + Other -> + Other + end, + DatabaseID = + 65536 + riak_kv_leveled_backend:generate_partition_identity(Partition), + UpdLeveledOpts = + aae_keystore:store_setbackendoption( + database_id, + DatabaseID, + aae_keystore:store_setbackendoption( + compression_method, + Compression, + aae_keystore:store_setbackendoption( + log_level, + BackendLogLevel, + LeveledOpts) + ) + ), + AAELogLevels = riak_kv_tictacaae_repairs:aae_loglevels(), + {ok, AAECntrl} = - aae_controller:aae_start(KeyStoreType, - IsEmpty, - {RW, RD}, - Preflists, - RootPath, - ObjSplitFun), + aae_controller:aae_start( + KeyStoreType, + IsEmpty, + {RW, RD}, + Preflists, + RootPath, + ObjSplitFun, + AAELogLevels, + UpdLeveledOpts + ), ?LOG_INFO("AAE Controller started with pid=~w", [AAECntrl]), InitD = erlang:phash2(Partition, 256), @@ -358,10 +392,10 @@ maybe_start_aaecontroller(active, State=#state{mod=Mod, % the points wrapping every 256 vnodes (assuming coordinated restart) FirstRebuildDelay = RTick + ((RTick div 256) * InitD), FirstExchangeDelay = XTick + ((XTick div 256) * InitD), - riak_core_vnode:send_command_after(FirstRebuildDelay, - tictacaae_rebuildpoke), - riak_core_vnode:send_command_after(FirstExchangeDelay, - tictacaae_exchangepoke), + riak_core_vnode:send_command_after( + FirstRebuildDelay, tictacaae_rebuildpoke), + riak_core_vnode:send_command_after( + FirstExchangeDelay, tictacaae_exchangepoke), InitalStep = case StepInitialTick of