Skip to content

Commit 8efd8ca

Browse files
dumbbellthe-mikedavis
authored andcommitted
rabbitmq_ct_broker_helpers: Use node 2 as the cluster seed node
[Why] When running mixed-version tests, nodes 1/3/5/... are using the primary umbrella, so usually the newest version. Nodes 2/4/6/... are using the secondary umbrella, thus the old version. When clustering, we used to use node 1 (running a new version) as the seed node, meaning other nodes would join it. This complicates things with feature flags because we have to make sure that we start node 1 with new stable feature flags disabled to allow old nodes to join. This is also a problem with Khepri machine versions because the cluster would start with the latest version, which old nodes might not have. [How] This patch changes the logic to use a node running the secondary umbrella as the seed node instead. If there is no node running it, we pick the first node as before. V2: Revert part of "rabbitmq_ct_helpers: Fix how we set `$RABBITMQ_FEATURE_FLAGS` in tests" (commit 57ed962). These changes are no longer needed with the new logic. V3: The check that verifies that the correct metadata store is used has a special case for nodes that use the secondary umbrella: if Khepri is supposed to be used but it's not, the feature flag is enabled. The reason is that the `v4.0.x` branch doesn't know about the `rel` configuration of `forced_feature_flags_on_init`. The nodes will have ignored thies parameter and booted with the stable feature flags only. Many testsuites are adapted to the new clustering order. If they manage which node joins which node, either the order is changed in the testcases, or nodes are started with only required feature flags. For testsuites that rely on peer discovery where the order is unknown, nodes are started with only required feature flags.
1 parent 861f744 commit 8efd8ca

12 files changed

+213
-127
lines changed

deps/rabbit/test/cluster_minority_SUITE.erl

+3-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,9 @@ init_per_group(Group, Config0) ->
133133
{rmq_nodes_clustered, false},
134134
{tcp_ports_base},
135135
{net_ticktime, 5}]),
136-
rabbit_ct_helpers:run_steps(Config,
136+
Config1 = rabbit_ct_helpers:merge_app_env(
137+
Config, {rabbit, [{forced_feature_flags_on_init, []}]}),
138+
rabbit_ct_helpers:run_steps(Config1,
137139
rabbit_ct_broker_helpers:setup_steps() ++
138140
rabbit_ct_client_helpers:setup_steps()).
139141

deps/rabbit/test/clustering_events_SUITE.erl

+4-4
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,10 @@ configure_cluster_essentials(Config, Group, Clustered) ->
8787

8888
node_added_event(Config) ->
8989
[Server1, Server2, _Server3] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
90-
ok = event_recorder:start(Config),
91-
join_cluster(Server2, Server1),
92-
E = event_recorder:get_events(Config),
93-
ok = event_recorder:stop(Config),
90+
ok = event_recorder:start(Config, Server2),
91+
join_cluster(Server1, Server2),
92+
E = event_recorder:get_events(Config, Server2),
93+
ok = event_recorder:stop(Config, Server2),
9494
?assert(lists:any(fun(#event{type = node_added}) ->
9595
true;
9696
(_) ->

deps/rabbit/test/clustering_management_SUITE.erl

+8-2
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,15 @@ init_per_group(mnesia_store, Config) ->
144144
Config
145145
end;
146146
init_per_group(unclustered_2_nodes, Config) ->
147-
rabbit_ct_helpers:set_config(Config, [{rmq_nodes_clustered, false}]);
147+
Config1 = rabbit_ct_helpers:set_config(
148+
Config, [{rmq_nodes_clustered, false}]),
149+
rabbit_ct_helpers:merge_app_env(
150+
Config1, {rabbit, [{forced_feature_flags_on_init, []}]});
148151
init_per_group(unclustered_3_nodes, Config) ->
149-
rabbit_ct_helpers:set_config(Config, [{rmq_nodes_clustered, false}]);
152+
Config1 = rabbit_ct_helpers:set_config(
153+
Config, [{rmq_nodes_clustered, false}]),
154+
rabbit_ct_helpers:merge_app_env(
155+
Config1, {rabbit, [{forced_feature_flags_on_init, []}]});
150156
init_per_group(clustered_2_nodes, Config) ->
151157
rabbit_ct_helpers:set_config(Config, [{rmq_nodes_clustered, true}]);
152158
init_per_group(clustered_3_nodes, Config) ->

deps/rabbit/test/direct_exchange_routing_v2_SUITE.erl

+16-16
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ join_cluster(Config) ->
337337
Servers0 = [Server1, Server2] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
338338
Servers = lists:sort(Servers0),
339339

340-
{_Conn1, Ch1} = rabbit_ct_client_helpers:open_connection_and_channel(Config, Server1),
340+
{_Conn1, Ch1} = rabbit_ct_client_helpers:open_connection_and_channel(Config, Server2),
341341
DirectX = <<"amq.direct">>,
342342
Q = <<"q">>,
343343
RKey = <<"k">>,
@@ -346,35 +346,35 @@ join_cluster(Config) ->
346346
bind_queue(Ch1, Q, DirectX, RKey),
347347

348348
%% Server1 and Server2 are not clustered yet.
349-
%% Hence, every node has their own table (copy) and only Server1's table contains the binding.
350-
?assertEqual([Server1], index_table_ram_copies(Config, Server1)),
349+
%% Hence, every node has their own table (copy) and only Server2's table contains the binding.
351350
?assertEqual([Server2], index_table_ram_copies(Config, Server2)),
352-
?assertEqual(1, table_size(Config, ?INDEX_TABLE_NAME, Server1)),
353-
?assertEqual(0, table_size(Config, ?INDEX_TABLE_NAME, Server2)),
351+
?assertEqual([Server1], index_table_ram_copies(Config, Server1)),
352+
?assertEqual(1, table_size(Config, ?INDEX_TABLE_NAME, Server2)),
353+
?assertEqual(0, table_size(Config, ?INDEX_TABLE_NAME, Server1)),
354354

355-
ok = rabbit_control_helper:command(stop_app, Server2),
356-
%% For the purpose of this test it shouldn't matter whether Server2 is reset. Both should work.
355+
ok = rabbit_control_helper:command(stop_app, Server1),
356+
%% For the purpose of this test it shouldn't matter whether Server1 is reset. Both should work.
357357
case erlang:system_time() rem 2 of
358358
0 ->
359-
ok = rabbit_control_helper:command(reset, Server2);
359+
ok = rabbit_control_helper:command(reset, Server1);
360360
1 ->
361361
ok
362362
end,
363-
ok = rabbit_control_helper:command(join_cluster, Server2, [atom_to_list(Server1)], []),
364-
ok = rabbit_control_helper:command(start_app, Server2),
363+
ok = rabbit_control_helper:command(join_cluster, Server1, [atom_to_list(Server2)], []),
364+
ok = rabbit_control_helper:command(start_app, Server1),
365365

366-
%% After Server2 joined Server1, the table should be clustered.
367-
?assertEqual(Servers, index_table_ram_copies(Config, Server2)),
368-
?assertEqual(1, table_size(Config, ?INDEX_TABLE_NAME, Server2)),
366+
%% After Server1 joined Server2, the table should be clustered.
367+
?assertEqual(Servers, index_table_ram_copies(Config, Server1)),
368+
?assertEqual(1, table_size(Config, ?INDEX_TABLE_NAME, Server1)),
369369

370-
%% Publishing via Server1 via "direct exchange routing v2" should work.
370+
%% Publishing via Server2 via "direct exchange routing v2" should work.
371371
amqp_channel:call(Ch1, #'confirm.select'{}),
372372
amqp_channel:register_confirm_handler(Ch1, self()),
373373
publish(Ch1, DirectX, RKey),
374374
assert_confirm(),
375375

376-
%% Publishing via Server2 via "direct exchange routing v2" should work.
377-
{_Conn2, Ch2} = rabbit_ct_client_helpers:open_connection_and_channel(Config, Server2),
376+
%% Publishing via Server1 via "direct exchange routing v2" should work.
377+
{_Conn2, Ch2} = rabbit_ct_client_helpers:open_connection_and_channel(Config, Server1),
378378
amqp_channel:call(Ch2, #'confirm.select'{}),
379379
amqp_channel:register_confirm_handler(Ch2, self()),
380380
publish(Ch2, DirectX, RKey),

deps/rabbit/test/event_recorder.erl

+18-9
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
handle_event/2,
1717
handle_call/2]).
1818
%% client API
19-
-export([start/1,
20-
stop/1,
21-
get_events/1]).
19+
-export([start/1, start/2,
20+
stop/1, stop/2,
21+
get_events/1, get_events/2]).
2222
-export([assert_event_type/2,
2323
assert_event_prop/2]).
2424

@@ -42,22 +42,31 @@ handle_call(take_state, State) ->
4242
{ok, lists:reverse(State), ?INIT_STATE}.
4343

4444
start(Config) ->
45+
start(Config, 0).
46+
47+
start(Config, Node) ->
4548
ok = rabbit_ct_broker_helpers:add_code_path_to_all_nodes(Config, ?MODULE),
46-
ok = gen_event:add_handler(event_manager_ref(Config), ?MODULE, []).
49+
ok = gen_event:add_handler(event_manager_ref(Config, Node), ?MODULE, []).
4750

4851
stop(Config) ->
49-
ok = gen_event:delete_handler(event_manager_ref(Config), ?MODULE, []).
52+
stop(Config, 0).
53+
54+
stop(Config, Node) ->
55+
ok = gen_event:delete_handler(event_manager_ref(Config, Node), ?MODULE, []).
5056

5157
get_events(Config) ->
58+
get_events(Config, 0).
59+
60+
get_events(Config, Node) ->
5261
%% events are sent and processed asynchronously
5362
timer:sleep(500),
54-
Result = gen_event:call(event_manager_ref(Config), ?MODULE, take_state),
63+
Result = gen_event:call(event_manager_ref(Config, Node), ?MODULE, take_state),
5564
?assert(is_list(Result)),
5665
Result.
5766

58-
event_manager_ref(Config) ->
59-
Node = get_node_config(Config, 0, nodename),
60-
{rabbit_event, Node}.
67+
event_manager_ref(Config, Node) ->
68+
Node1 = get_node_config(Config, Node, nodename),
69+
{rabbit_event, Node1}.
6170

6271
assert_event_type(ExpectedType, #event{type = ActualType}) ->
6372
?assertEqual(ExpectedType, ActualType).

deps/rabbit/test/feature_flags_SUITE.erl

+7-7
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,7 @@ groups() ->
119119

120120
init_per_suite(Config) ->
121121
rabbit_ct_helpers:log_environment(),
122-
Config1 = rabbit_ct_helpers:set_config(
123-
Config, {skip_metadata_store_configuration, true}),
124-
rabbit_ct_helpers:run_setup_steps(Config1, [
122+
rabbit_ct_helpers:run_setup_steps(Config, [
125123
fun rabbit_ct_broker_helpers:configure_dist_proxy/1
126124
]).
127125

@@ -198,7 +196,9 @@ init_per_group(clustering, Config) ->
198196
[{rmq_nodes_count, 2},
199197
{rmq_nodes_clustered, false},
200198
{start_rmq_with_plugins_disabled, true}]),
201-
rabbit_ct_helpers:run_setup_steps(Config1, [fun prepare_my_plugin/1]);
199+
Config2 = rabbit_ct_helpers:merge_app_env(
200+
Config1, {rabbit, [{forced_feature_flags_on_init, []}]}),
201+
rabbit_ct_helpers:run_setup_steps(Config2, [fun prepare_my_plugin/1]);
202202
init_per_group(activating_plugin, Config) ->
203203
Config1 = rabbit_ct_helpers:set_config(
204204
Config,
@@ -891,7 +891,7 @@ clustering_ok_with_ff_enabled_on_some_nodes(Config) ->
891891
ok
892892
end,
893893

894-
?assertEqual(Config, rabbit_ct_broker_helpers:cluster_nodes(Config)),
894+
?assertEqual(Config, rabbit_ct_broker_helpers:cluster_nodes(Config, 0)),
895895

896896
log_feature_flags_of_all_nodes(Config),
897897
case FFSubsysOk of
@@ -987,7 +987,7 @@ clustering_denied_with_new_ff_enabled(Config) ->
987987
false -> ok
988988
end,
989989

990-
?assertMatch({skip, _}, rabbit_ct_broker_helpers:cluster_nodes(Config)),
990+
?assertMatch({skip, _}, rabbit_ct_broker_helpers:cluster_nodes(Config, 0)),
991991

992992
log_feature_flags_of_all_nodes(Config),
993993
case FFSubsysOk of
@@ -1049,7 +1049,7 @@ clustering_ok_with_new_ff_enabled_from_plugin_on_some_nodes(Config) ->
10491049
false -> ok
10501050
end,
10511051

1052-
?assertEqual(Config, rabbit_ct_broker_helpers:cluster_nodes(Config)),
1052+
?assertEqual(Config, rabbit_ct_broker_helpers:cluster_nodes(Config, 0)),
10531053

10541054
log_feature_flags_of_all_nodes(Config),
10551055
case FFSubsysOk of

deps/rabbit/test/peer_discovery_classic_config_SUITE.erl

+3
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ init_per_testcase(successful_discovery = Testcase, Config) ->
9191
NodeNamesWithHostname = [rabbit_nodes:make({Name, "localhost"}) || Name <- NodeNames],
9292
Config3 = rabbit_ct_helpers:merge_app_env(Config2,
9393
{rabbit, [
94+
{forced_feature_flags_on_init, []},
9495
{cluster_nodes, {NodeNamesWithHostname, disc}},
9596
{cluster_formation, [
9697
{internal_lock_retries, 10}
@@ -124,6 +125,7 @@ init_per_testcase(successful_discovery_with_a_subset_of_nodes_coming_online = Te
124125
%% unreachable nodes vs ~6min without them
125126
Config3 = rabbit_ct_helpers:merge_app_env(Config2,
126127
{rabbit, [
128+
{forced_feature_flags_on_init, []},
127129
{cluster_nodes, {NodeNamesWithHostname, disc}},
128130
{cluster_formation, [
129131
{internal_lock_retries, 10}
@@ -141,6 +143,7 @@ init_per_testcase(no_nodes_configured = Testcase, Config) ->
141143
]),
142144
Config3 = rabbit_ct_helpers:merge_app_env(Config2,
143145
{rabbit, [
146+
{forced_feature_flags_on_init, []},
144147
{cluster_nodes, {[], disc}},
145148
{cluster_formation, [
146149
{internal_lock_retries, 10}

deps/rabbit/test/quorum_queue_member_reconciliation_SUITE.erl

+23-23
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ merge_app_env(Config) ->
8181
end_per_testcase(Testcase, Config) ->
8282
[Server0, Server1, Server2] =
8383
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
84-
reset_nodes([Server1, Server2], Server0),
84+
reset_nodes([Server2, Server0], Server1),
8585
Config1 = rabbit_ct_helpers:run_steps(
8686
Config,
8787
rabbit_ct_client_helpers:teardown_steps()),
@@ -107,83 +107,83 @@ reset_nodes([Node| Nodes], Leader) ->
107107
auto_grow(Config) ->
108108
[Server0, Server1, Server2] =
109109
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
110-
Ch = rabbit_ct_client_helpers:open_channel(Config, Server0),
110+
Ch = rabbit_ct_client_helpers:open_channel(Config, Server1),
111111

112112
QQ = ?config(queue_name, Config),
113113
?assertEqual({'queue.declare_ok', QQ, 0, 0},
114114
declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])),
115115

116116
%% There is only one node in the cluster at the moment
117-
{ok, Members, _} = ra:members({queue_utils:ra_name(QQ), Server0}),
117+
{ok, Members, _} = ra:members({queue_utils:ra_name(QQ), Server1}),
118118
?assertEqual(1, length(Members)),
119119

120-
add_server_to_cluster(Server1, Server0),
120+
add_server_to_cluster(Server0, Server1),
121121
%% With 2 nodes in the cluster, target group size is not reached, so no
122122
%% new members should be available. We sleep a while so the periodic check
123123
%% runs
124124
timer:sleep(4000),
125-
{ok, Members, _} = ra:members({queue_utils:ra_name(QQ), Server0}),
125+
{ok, Members, _} = ra:members({queue_utils:ra_name(QQ), Server1}),
126126
?assertEqual(1, length(Members)),
127127

128-
add_server_to_cluster(Server2, Server0),
128+
add_server_to_cluster(Server2, Server1),
129129
%% With 3 nodes in the cluster, target size is met so eventually it should
130130
%% be 3 members
131131
wait_until(fun() ->
132-
{ok, M, _} = ra:members({queue_utils:ra_name(QQ), Server0}),
132+
{ok, M, _} = ra:members({queue_utils:ra_name(QQ), Server1}),
133133
3 =:= length(M)
134134
end).
135135

136136
auto_grow_drained_node(Config) ->
137137
[Server0, Server1, Server2] =
138138
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
139-
Ch = rabbit_ct_client_helpers:open_channel(Config, Server0),
139+
Ch = rabbit_ct_client_helpers:open_channel(Config, Server1),
140140

141141
QQ = ?config(queue_name, Config),
142142
?assertEqual({'queue.declare_ok', QQ, 0, 0},
143143
declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])),
144144

145145
%% There is only one node in the cluster at the moment
146-
{ok, Members, _} = ra:members({queue_utils:ra_name(QQ), Server0}),
146+
{ok, Members, _} = ra:members({queue_utils:ra_name(QQ), Server1}),
147147
?assertEqual(1, length(Members)),
148148

149-
add_server_to_cluster(Server1, Server0),
150-
%% mark server1 as drained, which should mean the node is not a candiate
149+
add_server_to_cluster(Server0, Server1),
150+
%% mark Server0 as drained, which should mean the node is not a candiate
151151
%% for qq membership
152-
rabbit_ct_broker_helpers:mark_as_being_drained(Config, Server1),
152+
rabbit_ct_broker_helpers:mark_as_being_drained(Config, Server0),
153153
rabbit_ct_helpers:await_condition(
154-
fun () -> rabbit_ct_broker_helpers:is_being_drained_local_read(Config, Server1) end,
154+
fun () -> rabbit_ct_broker_helpers:is_being_drained_local_read(Config, Server0) end,
155155
10000),
156-
add_server_to_cluster(Server2, Server0),
156+
add_server_to_cluster(Server2, Server1),
157157
timer:sleep(5000),
158158
%% We have 3 nodes, but one is drained, so it will not be concidered.
159-
{ok, Members1, _} = ra:members({queue_utils:ra_name(QQ), Server0}),
159+
{ok, Members1, _} = ra:members({queue_utils:ra_name(QQ), Server1}),
160160
?assertEqual(1, length(Members1)),
161161

162-
rabbit_ct_broker_helpers:unmark_as_being_drained(Config, Server1),
162+
rabbit_ct_broker_helpers:unmark_as_being_drained(Config, Server0),
163163
rabbit_ct_helpers:await_condition(
164-
fun () -> not rabbit_ct_broker_helpers:is_being_drained_local_read(Config, Server1) end,
164+
fun () -> not rabbit_ct_broker_helpers:is_being_drained_local_read(Config, Server0) end,
165165
10000),
166166
%% We have 3 nodes, none is being drained, so we should grow membership to 3
167167
wait_until(fun() ->
168-
{ok, M, _} = ra:members({queue_utils:ra_name(QQ), Server0}),
168+
{ok, M, _} = ra:members({queue_utils:ra_name(QQ), Server1}),
169169
3 =:= length(M)
170170
end).
171171

172172

173173
auto_shrink(Config) ->
174174
[Server0, Server1, Server2] =
175175
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
176-
Ch = rabbit_ct_client_helpers:open_channel(Config, Server0),
177-
add_server_to_cluster(Server1, Server0),
178-
add_server_to_cluster(Server2, Server0),
176+
Ch = rabbit_ct_client_helpers:open_channel(Config, Server1),
177+
add_server_to_cluster(Server0, Server1),
178+
add_server_to_cluster(Server2, Server1),
179179

180180
QQ = ?config(queue_name, Config),
181181
?assertEqual({'queue.declare_ok', QQ, 0, 0},
182182
declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])),
183183

184184
wait_until(fun() ->
185185
{ok, M, _} = ra:members({queue_utils:ra_name(QQ),
186-
Server0}),
186+
Server1}),
187187
3 =:= length(M)
188188
end),
189189
ok = rabbit_control_helper:command(stop_app, Server2),
@@ -192,7 +192,7 @@ auto_shrink(Config) ->
192192
%% with one node 'forgotten', eventually the membership will shrink to 2
193193
wait_until(fun() ->
194194
{ok, M, _} = ra:members({queue_utils:ra_name(QQ),
195-
Server0}),
195+
Server1}),
196196
2 =:= length(M)
197197
end).
198198

deps/rabbit/test/rabbit_stream_queue_SUITE.erl

+3-1
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,9 @@ init_per_group1(Group, Config) ->
237237
_ ->
238238
Config1
239239
end,
240-
Ret = rabbit_ct_helpers:run_steps(Config1b,
240+
Config1c = rabbit_ct_helpers:merge_app_env(
241+
Config1b, {rabbit, [{forced_feature_flags_on_init, []}]}),
242+
Ret = rabbit_ct_helpers:run_steps(Config1c,
241243
[fun merge_app_env/1 ] ++
242244
rabbit_ct_broker_helpers:setup_steps()),
243245
case Ret of

0 commit comments

Comments
 (0)