From c9495d1ecc6dc8c988e52d455a23ded662b4985f Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 9 Jun 2023 13:51:06 +0100 Subject: [PATCH] Mas i1861 batch (#1377) * Improve reliability of test * Need to wait delay after last TS Otherwise the erase keys may start before the upper timestamp rolls over to the next second --- tests/nextgenrepl_deletemodes.erl | 38 ++++++++++++++++++------- tests/nextgenrepl_deletewithfailure.erl | 23 +++++++++++---- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/tests/nextgenrepl_deletemodes.erl b/tests/nextgenrepl_deletemodes.erl index 2c55aa389..8724c5628 100644 --- a/tests/nextgenrepl_deletemodes.erl +++ b/tests/nextgenrepl_deletemodes.erl @@ -19,11 +19,15 @@ -define(C_NVAL, 2). -define(KEY_COUNT, 10000). --define(LOOP_COUNT, 10). +-define(LOOP_COUNT, 4). -define(SNK_WORKERS, 4). -define(DELETE_WAIT, 8000). +%% This must be increased, otherwise tombstones may be reaped before their +%% presence can be checked in the test + +-define(TOMB_PAUSE, 2). -define(COMMMON_VAL_INIT, <<"CommonValueToWriteForAllObjects">>). -define(COMMMON_VAL_MOD, <<"CommonValueToWriteForAllModifiedObjects">>). @@ -50,9 +54,10 @@ {tictacaae_storeheads, true}, {tictacaae_rebuildwait, 4}, {tictacaae_rebuilddelay, 3600}, - {tictacaae_exchangetick, 120 * 1000}, + {tictacaae_exchangetick, 3600 * 1000}, % don't exchange during test {tictacaae_rebuildtick, 3600000}, % don't tick for an hour! {ttaaefs_maxresults, 128}, + {tombstone_pause, ?TOMB_PAUSE}, {delete_mode, DeleteMode} ]} ]). @@ -225,6 +230,8 @@ test_repl(Protocol, [ClusterA, ClusterB, ClusterC]) -> {NodeB1, ?B_NVAL, cluster_c}, {NodeC1ip, NodeC1port, ?C_NVAL}), + lager:info( + "*** Re-write and re-delete after initial tombstones reaped ***"), write_then_delete(NodeA1, NodeA2, NodeB1, NodeB2, NodeC1, NodeC2), lager:info("Find all tombstones in cluster A"), {ok, BKdhL1} = find_tombs(NodeA1, all, all), @@ -257,6 +264,8 @@ test_repl(Protocol, [ClusterA, ClusterB, ClusterC]) -> {ok, BKdhLC} = find_tombs(NodeC1, all, all), ?assertMatch(0, length(BKdhLC)), + lager:info( + "*** Re-re-write and re-re-delete after initial tombstones reaped ***"), write_then_delete(NodeA1, NodeA2, NodeB1, NodeB2, NodeC1, NodeC2), reap_from_cluster(NodeA1, {job, 1}), lager:info("Immediate reap count ~w after fsm managed reap", @@ -311,7 +320,7 @@ write_to_cluster(Node, Start, End, CommonValBin) -> {ok, C} = riak:client_connect(Node), F = fun(N, Acc) -> - Key = list_to_binary(io_lib:format("~8..0B~n", [N])), + Key = list_to_binary(io_lib:format("~8..0B", [N])), Obj = case CommonValBin of new_obj -> @@ -344,7 +353,7 @@ delete_from_cluster(Node, Start, End) -> {ok, C} = riak:client_connect(Node), F = fun(N, Acc) -> - Key = list_to_binary(io_lib:format("~8..0B~n", [N])), + Key = list_to_binary(io_lib:format("~8..0B", [N])), try riak_client:delete(?TEST_BUCKET, Key, C) of ok -> Acc; @@ -364,7 +373,7 @@ reap_from_cluster(Node, Start, End) -> {ok, C} = riak:client_connect(Node), F = fun(N, Acc) -> - Key = list_to_binary(io_lib:format("~8..0B~n", [N])), + Key = list_to_binary(io_lib:format("~8..0B", [N])), try riak_client:reap(?TEST_BUCKET, Key, C) of true -> Acc; @@ -414,14 +423,18 @@ read_from_cluster(Node, Start, End, CommonValBin, Errors) -> {ok, C} = riak:client_connect(Node), F = fun(N, Acc) -> - Key = list_to_binary(io_lib:format("~8..0B~n", [N])), + Key = list_to_binary(io_lib:format("~8..0B", [N])), case riak_client:get(?TEST_BUCKET, Key, C) of {ok, Obj} -> ExpectedVal = <>, - case riak_object:get_value(Obj) of - ExpectedVal -> + case riak_object:get_values(Obj) of + [ExpectedVal] -> Acc; - UnexpectedVal -> + Siblings when length(Siblings) > 1 -> + lager:info( + "Siblings for Key ~s:~n ~w", [Key, Obj]), + [{wrong_value, Key, siblings}|Acc]; + [UnexpectedVal] -> [{wrong_value, Key, UnexpectedVal}|Acc] end; {error, Error} -> @@ -514,13 +527,18 @@ write_then_delete(NodeA1, NodeA2, NodeB1, NodeB2, NodeC1, NodeC2) -> [NodeA2, 1, ?KEY_COUNT, ?COMMMON_VAL_INIT, undefined], ?KEY_COUNT, ?LOOP_COUNT), + lager:info( + "Waiting for delete wait before reading from delete_mode ~w cluster " + "as otherwise read may overlap with reap and prompt a repair", + [?DELETE_WAIT]), + timer:sleep(?DELETE_WAIT), ?KEY_COUNT = wait_for_outcome(?MODULE, read_from_cluster, [NodeC2, 1, ?KEY_COUNT, ?COMMMON_VAL_INIT, undefined], ?KEY_COUNT, ?LOOP_COUNT), - lager:info("Write and delete cylcle confirmed"). + lager:info("Write and delete cycle confirmed"). root_compare( diff --git a/tests/nextgenrepl_deletewithfailure.erl b/tests/nextgenrepl_deletewithfailure.erl index b8f0dbe08..4ea74904f 100644 --- a/tests/nextgenrepl_deletewithfailure.erl +++ b/tests/nextgenrepl_deletewithfailure.erl @@ -22,7 +22,7 @@ -define(KEY_COUNT, 10000). -define(UPDATE_COUNT, 2000). --define(LOOP_COUNT, 20). +-define(LOOP_COUNT, 16). -define(ACTION_DELAY, 2000). -define(SNK_WORKERS, 4). @@ -159,6 +159,7 @@ test_repl(Protocol, [ClusterA, ClusterB]) -> write_then_delete(NodeA1, NodeB1, ?KEY_COUNT + 1, ?KEY_COUNT * 2), timer:sleep(?ACTION_DELAY), SW1B = os:timestamp(), + timer:sleep(?ACTION_DELAY), {ok, K0} = aae_fold(NodeA1, Protocol, @@ -266,7 +267,9 @@ test_repl(Protocol, [ClusterA, ClusterB]) -> {EK0, 2} = lists:last(SKLA0), [{SK1, 2}|_RestA1] = SKLA1, {EK1, 2} = lists:last(SKLA1), - lager:info("Erasing partial delete siblings from Node"), + lager:info( + "Erasing partial delete siblings from Node ~p ~p - local", + [SK0, EK0]), {ok, EraseCount0} = aae_fold(NodeA1, Protocol, @@ -275,6 +278,9 @@ test_repl(Protocol, [ClusterA, ClusterB]) -> all, {ts_epoch(SW0A), ts_epoch(SW0B)}, local}), + lager:info( + "Erasing partial delete siblings from Node ~p ~p - job", + [SK1, EK1]), {ok, EraseCount1} = aae_fold(NodeA1, Protocol, @@ -319,8 +325,13 @@ test_repl(Protocol, [ClusterA, ClusterB]) -> ?TEST_BUCKET, all, all, {ts_epoch(SW1A), ts_epoch(SW1B)}, local}), + lager:info("Reaped ~w tombs from first time range", [TombCount0]), + lager:info("Reaped ~w tombs from second time range", [TombCount1]), ExpectedEC = EraseCount0 + EraseCount1, + lager:info( + "EraseCount0 ~w EraseCount1 ~w expected tombs ~w", + [EraseCount0, EraseCount1, ExpectedEC]), {ok, ExpectedEC} = wait_for_outcome(?MODULE, aae_fold, @@ -489,7 +500,7 @@ write_to_cluster(Node, Start, End, CommonValBin) -> B = ?TEST_BUCKET, F = fun(N, Acc) -> - Key = list_to_binary(io_lib:format("~8..0B~n", [N])), + Key = list_to_binary(io_lib:format("~8..0B", [N])), Obj = case CommonValBin of new_obj -> @@ -528,7 +539,7 @@ delete_from_cluster(Node, Start, End) -> {ok, C} = riak:client_connect(Node), F = fun(N, Acc) -> - Key = list_to_binary(io_lib:format("~8..0B~n", [N])), + Key = list_to_binary(io_lib:format("~8..0B", [N])), try riak_client:delete(?TEST_BUCKET, Key, C) of ok -> Acc; @@ -548,7 +559,7 @@ read_from_cluster(Node, Start, End, CommonValBin, Errors) -> {ok, C} = riak:client_connect(Node), F = fun(N, Acc) -> - Key = list_to_binary(io_lib:format("~8..0B~n", [N])), + Key = list_to_binary(io_lib:format("~8..0B", [N])), case riak_client:get(?TEST_BUCKET, Key, C) of {ok, Obj} -> ExpectedVal = <>, @@ -619,7 +630,7 @@ write_then_delete(NodeA1, NodeB1, Start, End) -> 0, ?LOOP_COUNT), - lager:info("Deleting ~w objects from B and read not_found from A and C", + lager:info("Deleting ~w objects from B and read not_found from A", [?KEY_COUNT]), delete_from_cluster(NodeB1, Start, End), lager:info("Waiting for missing sample"),