Skip to content

Commit c4c9447

Browse files
committed
Improve many testsuites to make them work with mixed versions of Khepri
1 parent 20b5c37 commit c4c9447

File tree

7 files changed

+301
-147
lines changed

7 files changed

+301
-147
lines changed

deps/rabbit/test/cluster_minority_SUITE.erl

Lines changed: 169 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@
99

1010
-include_lib("amqp_client/include/amqp_client.hrl").
1111
-include_lib("eunit/include/eunit.hrl").
12+
-include_lib("rabbitmq_ct_helpers/include/rabbit_assert.hrl").
1213

1314
-compile([export_all, nowarn_export_all]).
1415

1516
all() ->
1617
[
1718
{group, client_operations},
18-
{group, cluster_operation_add},
19-
{group, cluster_operation_remove}
19+
{group, cluster_operation}
2020
].
2121

2222
groups() ->
@@ -42,8 +42,10 @@ groups() ->
4242
delete_policy,
4343
export_definitions
4444
]},
45-
{cluster_operation_add, [], [add_node]},
46-
{cluster_operation_remove, [], [remove_node]},
45+
{cluster_operation, [], [add_node_when_seed_node_is_leader,
46+
add_node_when_seed_node_is_follower,
47+
remove_node_when_seed_node_is_leader,
48+
remove_node_when_seed_node_is_follower]},
4749
{feature_flags, [], [enable_feature_flag]}
4850
].
4951

@@ -127,26 +129,49 @@ init_per_group(Group, Config0) when Group == client_operations;
127129
partition_5_node_cluster(Config1),
128130
Config1
129131
end;
130-
init_per_group(Group, Config0) ->
132+
init_per_group(_Group, Config0) ->
131133
Config = rabbit_ct_helpers:set_config(Config0, [{rmq_nodes_count, 5},
132-
{rmq_nodename_suffix, Group},
133134
{rmq_nodes_clustered, false},
134135
{tcp_ports_base},
135136
{net_ticktime, 5}]),
136137
Config1 = rabbit_ct_helpers:merge_app_env(
137-
Config, {rabbit, [{forced_feature_flags_on_init, []}]}),
138-
rabbit_ct_helpers:run_steps(Config1,
139-
rabbit_ct_broker_helpers:setup_steps() ++
140-
rabbit_ct_client_helpers:setup_steps()).
138+
Config, {rabbit, [{forced_feature_flags_on_init, []},
139+
{khepri_leader_wait_retry_timeout, 30000}]}),
140+
Config1.
141141

142-
end_per_group(_, Config) ->
142+
end_per_group(Group, Config) when Group == client_operations;
143+
Group == feature_flags ->
143144
rabbit_ct_helpers:run_steps(Config,
144145
rabbit_ct_client_helpers:teardown_steps() ++
145-
rabbit_ct_broker_helpers:teardown_steps()).
146-
146+
rabbit_ct_broker_helpers:teardown_steps());
147+
end_per_group(_Group, Config) ->
148+
Config.
149+
150+
init_per_testcase(Testcase, Config)
151+
when Testcase =:= add_node_when_seed_node_is_leader orelse
152+
Testcase =:= add_node_when_seed_node_is_follower orelse
153+
Testcase =:= remove_node_when_seed_node_is_leader orelse
154+
Testcase =:= remove_node_when_seed_node_is_follower ->
155+
rabbit_ct_helpers:testcase_started(Config, Testcase),
156+
Config1 = rabbit_ct_helpers:set_config(
157+
Config, [{rmq_nodename_suffix, Testcase}]),
158+
rabbit_ct_helpers:run_steps(
159+
Config1,
160+
rabbit_ct_broker_helpers:setup_steps() ++
161+
rabbit_ct_client_helpers:setup_steps());
147162
init_per_testcase(Testcase, Config) ->
148163
rabbit_ct_helpers:testcase_started(Config, Testcase).
149164

165+
end_per_testcase(Testcase, Config)
166+
when Testcase =:= add_node_when_seed_node_is_leader orelse
167+
Testcase =:= add_node_when_seed_node_is_follower orelse
168+
Testcase =:= remove_node_when_seed_node_is_leader orelse
169+
Testcase =:= remove_node_when_seed_node_is_follower ->
170+
rabbit_ct_helpers:run_steps(
171+
Config,
172+
rabbit_ct_client_helpers:teardown_steps() ++
173+
rabbit_ct_broker_helpers:teardown_steps()),
174+
rabbit_ct_helpers:testcase_finished(Config, Testcase);
150175
end_per_testcase(Testcase, Config) ->
151176
rabbit_ct_helpers:testcase_finished(Config, Testcase).
152177

@@ -271,53 +296,153 @@ set_policy(Config) ->
271296
delete_policy(Config) ->
272297
?assertError(_, rabbit_ct_broker_helpers:clear_policy(Config, 0, <<"policy-to-delete">>)).
273298

274-
add_node(Config) ->
275-
[A, B, C, D, _E] = rabbit_ct_broker_helpers:get_node_configs(
299+
add_node_when_seed_node_is_leader(Config) ->
300+
[A, B, C, _D, E] = rabbit_ct_broker_helpers:get_node_configs(
276301
Config, nodename),
277302

278303
%% Three node cluster: A, B, C
279-
ok = rabbit_control_helper:command(stop_app, B),
280-
ok = rabbit_control_helper:command(join_cluster, B, [atom_to_list(A)], []),
281-
rabbit_control_helper:command(start_app, B),
304+
Cluster = [A, B, C],
305+
Config1 = rabbit_ct_broker_helpers:cluster_nodes(Config, Cluster),
282306

283-
ok = rabbit_control_helper:command(stop_app, C),
284-
ok = rabbit_control_helper:command(join_cluster, C, [atom_to_list(A)], []),
285-
rabbit_control_helper:command(start_app, C),
307+
AMember = {rabbit_khepri:get_store_id(), A},
308+
_ = ra:transfer_leadership(AMember, AMember),
309+
clustering_utils:assert_cluster_status({Cluster, Cluster}, Cluster),
286310

287311
%% Minority partition: A
312+
partition_3_node_cluster(Config1),
313+
314+
Pong = ra:ping(AMember, 10000),
315+
ct:pal("Member A state: ~0p", [Pong]),
316+
case Pong of
317+
{pong, State} when State =/= follower andalso State =/= candidate ->
318+
Ret = rabbit_control_helper:command(
319+
join_cluster, E, [atom_to_list(A)], []),
320+
?assertMatch({error, _, _}, Ret),
321+
{error, _, Msg} = Ret,
322+
?assertEqual(
323+
match,
324+
re:run(
325+
Msg, "(Khepri cluster could be in minority|\\{:rabbit, \\{\\{:error, :timeout\\})",
326+
[{capture, none}]));
327+
Ret ->
328+
ct:pal("A is not the expected leader: ~p", [Ret]),
329+
{skip, "Node A was not elected leader"}
330+
end.
331+
332+
add_node_when_seed_node_is_follower(Config) ->
333+
[A, B, C, _D, E] = rabbit_ct_broker_helpers:get_node_configs(
334+
Config, nodename),
335+
336+
%% Three node cluster: A, B, C
288337
Cluster = [A, B, C],
289-
partition_3_node_cluster(Config),
290-
291-
ok = rabbit_control_helper:command(stop_app, D),
292-
%% The command is appended to the log, but it will be dropped once the connectivity
293-
%% is restored
294-
?assertMatch(ok,
295-
rabbit_control_helper:command(join_cluster, D, [atom_to_list(A)], [])),
296-
timer:sleep(10000),
297-
join_3_node_cluster(Config),
298-
clustering_utils:assert_cluster_status({Cluster, Cluster}, Cluster).
299-
300-
remove_node(Config) ->
338+
Config1 = rabbit_ct_broker_helpers:cluster_nodes(Config, Cluster),
339+
340+
CMember = {rabbit_khepri:get_store_id(), C},
341+
ra:transfer_leadership(CMember, CMember),
342+
clustering_utils:assert_cluster_status({Cluster, Cluster}, Cluster),
343+
344+
%% Minority partition: A
345+
partition_3_node_cluster(Config1),
346+
347+
AMember = {rabbit_khepri:get_store_id(), A},
348+
Pong = ra:ping(AMember, 10000),
349+
ct:pal("Member A state: ~0p", [Pong]),
350+
case Pong of
351+
{pong, State}
352+
when State =:= follower orelse State =:= pre_vote ->
353+
Ret = rabbit_control_helper:command(
354+
join_cluster, E, [atom_to_list(A)], []),
355+
?assertMatch({error, _, _}, Ret),
356+
{error, _, Msg} = Ret,
357+
?assertEqual(
358+
match,
359+
re:run(
360+
Msg, "Khepri cluster could be in minority",
361+
[{capture, none}]));
362+
{pong, await_condition} ->
363+
Ret = rabbit_control_helper:command(
364+
join_cluster, E, [atom_to_list(A)], []),
365+
?assertMatch({error, _, _}, Ret),
366+
{error, _, Msg} = Ret,
367+
?assertEqual(
368+
match,
369+
re:run(
370+
Msg, "\\{:rabbit, \\{\\{:error, :timeout\\}",
371+
[{capture, none}])),
372+
clustering_utils:assert_cluster_status(
373+
{Cluster, Cluster}, Cluster);
374+
Ret ->
375+
ct:pal("A is not the expected follower: ~p", [Ret]),
376+
{skip, "Node A was not a follower"}
377+
end.
378+
379+
remove_node_when_seed_node_is_leader(Config) ->
301380
[A, B, C | _] = rabbit_ct_broker_helpers:get_node_configs(
302381
Config, nodename),
303382

304383
%% Three node cluster: A, B, C
305-
ok = rabbit_control_helper:command(stop_app, B),
306-
ok = rabbit_control_helper:command(join_cluster, B, [atom_to_list(A)], []),
307-
rabbit_control_helper:command(start_app, B),
384+
Cluster = [A, B, C],
385+
Config1 = rabbit_ct_broker_helpers:cluster_nodes(Config, Cluster),
308386

309-
ok = rabbit_control_helper:command(stop_app, C),
310-
ok = rabbit_control_helper:command(join_cluster, C, [atom_to_list(A)], []),
311-
rabbit_control_helper:command(start_app, C),
387+
AMember = {rabbit_khepri:get_store_id(), A},
388+
ra:transfer_leadership(AMember, AMember),
389+
clustering_utils:assert_cluster_status({Cluster, Cluster}, Cluster),
312390

313391
%% Minority partition: A
314-
partition_3_node_cluster(Config),
392+
partition_3_node_cluster(Config1),
393+
394+
Pong = ra:ping(AMember, 10000),
395+
ct:pal("Member A state: ~0p", [Pong]),
396+
case Pong of
397+
{pong, leader} ->
398+
?awaitMatch(
399+
ok,
400+
rabbit_control_helper:command(
401+
forget_cluster_node, A, [atom_to_list(B)], []),
402+
60000);
403+
Ret ->
404+
ct:pal("A is not the expected leader: ~p", [Ret]),
405+
{skip, "Node A was not a leader"}
406+
end.
407+
408+
remove_node_when_seed_node_is_follower(Config) ->
409+
[A, B, C | _] = rabbit_ct_broker_helpers:get_node_configs(
410+
Config, nodename),
411+
412+
%% Three node cluster: A, B, C
315413
Cluster = [A, B, C],
414+
Config1 = rabbit_ct_broker_helpers:cluster_nodes(Config, Cluster),
415+
416+
CMember = {rabbit_khepri:get_store_id(), C},
417+
ra:transfer_leadership(CMember, CMember),
418+
clustering_utils:assert_cluster_status({Cluster, Cluster}, Cluster),
316419

317-
ok = rabbit_control_helper:command(forget_cluster_node, A, [atom_to_list(B)], []),
318-
timer:sleep(10000),
319-
join_3_node_cluster(Config),
320-
clustering_utils:assert_cluster_status({Cluster, Cluster}, Cluster).
420+
%% Minority partition: A
421+
partition_3_node_cluster(Config1),
422+
423+
AMember = {rabbit_khepri:get_store_id(), A},
424+
Pong = ra:ping(AMember, 10000),
425+
ct:pal("Member A state: ~0p", [Pong]),
426+
case Pong of
427+
{pong, State}
428+
when State =:= follower orelse State =:= pre_vote ->
429+
Ret = rabbit_control_helper:command(
430+
forget_cluster_node, A, [atom_to_list(B)], []),
431+
?assertMatch({error, _, _}, Ret),
432+
{error, _, Msg} = Ret,
433+
?assertEqual(
434+
match,
435+
re:run(
436+
Msg, "Khepri cluster could be in minority",
437+
[{capture, none}]));
438+
{pong, await_condition} ->
439+
Ret = rabbit_control_helper:command(
440+
forget_cluster_node, A, [atom_to_list(B)], []),
441+
?assertMatch(ok, Ret);
442+
Ret ->
443+
ct:pal("A is not the expected leader: ~p", [Ret]),
444+
{skip, "Node A was not a leader"}
445+
end.
321446

322447
enable_feature_flag(Config) ->
323448
[A | _] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),

deps/rabbit/test/clustering_management_SUITE.erl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -745,13 +745,13 @@ is_in_minority(Ret) ->
745745
?assertMatch(match, re:run(Msg, ".*timed out.*minority.*", [{capture, none}])).
746746

747747
reset_last_disc_node(Config) ->
748-
Servers = [Rabbit, Hare | _] = cluster_members(Config),
748+
[Rabbit, Hare | _] = cluster_members(Config),
749749

750750
stop_app(Config, Hare),
751751
?assertEqual(ok, change_cluster_node_type(Config, Hare, ram)),
752752
start_app(Config, Hare),
753753

754-
case rabbit_ct_broker_helpers:enable_feature_flag(Config, Servers, khepri_db) of
754+
case rabbit_ct_broker_helpers:enable_feature_flag(Config, [Rabbit], khepri_db) of
755755
ok ->
756756
%% The reset works after the switch to Khepri because the RAM node was
757757
%% implicitly converted to a disc one as Khepri always writes data on disc.

deps/rabbit/test/peer_discovery_classic_config_SUITE.erl

Lines changed: 37 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,28 +21,32 @@
2121
all() ->
2222
[
2323
{group, non_parallel},
24-
{group, cluster_size_3},
25-
{group, cluster_size_5},
26-
{group, cluster_size_7}
24+
{group, discovery}
2725
].
2826

2927
groups() ->
3028
[
3129
{non_parallel, [], [
3230
no_nodes_configured
3331
]},
34-
{cluster_size_3, [], [
35-
successful_discovery,
36-
successful_discovery_with_a_subset_of_nodes_coming_online
37-
]},
38-
{cluster_size_5, [], [
39-
successful_discovery,
40-
successful_discovery_with_a_subset_of_nodes_coming_online
41-
]},
42-
{cluster_size_7, [], [
43-
successful_discovery,
44-
successful_discovery_with_a_subset_of_nodes_coming_online
45-
]}
32+
{discovery, [],
33+
[
34+
{cluster_size_3, [],
35+
[
36+
successful_discovery,
37+
successful_discovery_with_a_subset_of_nodes_coming_online
38+
]},
39+
{cluster_size_5, [],
40+
[
41+
successful_discovery,
42+
successful_discovery_with_a_subset_of_nodes_coming_online
43+
]},
44+
{cluster_size_7, [],
45+
[
46+
successful_discovery,
47+
successful_discovery_with_a_subset_of_nodes_coming_online
48+
]}
49+
]}
4650
].
4751

4852
suite() ->
@@ -63,6 +67,24 @@ init_per_suite(Config) ->
6367
end_per_suite(Config) ->
6468
rabbit_ct_helpers:run_teardown_steps(Config).
6569

70+
init_per_group(discovery, Config) ->
71+
case rabbit_ct_helpers:is_mixed_versions(Config) of
72+
false ->
73+
Config;
74+
true ->
75+
%% We can't support the creation of a cluster because peer
76+
%% discovery might select a newer node as the seed node and ask an
77+
%% older node to join it. The creation of the cluster may fail of
78+
%% the cluster might be degraded. Examples:
79+
%% - a feature flag is enabled by the newer node but the older
80+
%% node doesn't know it
81+
%% - the newer node uses a newer Khepri machine version and the
82+
%% older node can join but won't be able to apply Khepri
83+
%% commands and progress.
84+
{skip,
85+
"Peer discovery is unsupported with a mix of old and new "
86+
"RabbitMQ versions"}
87+
end;
6688
init_per_group(cluster_size_3 = Group, Config) ->
6789
rabbit_ct_helpers:set_config(Config, [{rmq_nodes_count, 3}, {group, Group}]);
6890
init_per_group(cluster_size_5 = Group, Config) ->

0 commit comments

Comments
 (0)