Skip to content

Commit 5896170

Browse files
acogoluegnesmergify[bot]
authored andcommitted
Unblock group of consumers on super stream partition
A group of consumers on a super stream can end up blocked without an active consumer. This can happen with consumer churn: one consumer gets removed, which makes the active consumer passive, but the former active consumer never gets to know because it has been removed itself. This commit changes the structure of the messages the SAC coordinator sends to consumer connections, to embed enough information to look up the group and to instruct it to choose a new active consumer when the race condition mentioned above comes up. Because of the changes in the structure of messages, a feature flag is required to make sure the SAC coordinator starts sending the new messages only when all the nodes have been upgraded. References #7743 (cherry picked from commit 70538c5) (cherry picked from commit 221f10d) # Conflicts: # deps/rabbit/src/rabbit_core_ff.erl # deps/rabbit/src/rabbit_stream_sac_coordinator.erl # deps/rabbitmq_stream/src/rabbit_stream_reader.erl
1 parent 8296864 commit 5896170

File tree

3 files changed

+238
-74
lines changed

3 files changed

+238
-74
lines changed

deps/rabbit/src/rabbit_core_ff.erl

+11
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@
111111
depends_on => [stream_queue]
112112
}}).
113113

114+
<<<<<<< HEAD
114115
%% -------------------------------------------------------------------
115116
%% Direct exchange routing v2.
116117
%% -------------------------------------------------------------------
@@ -230,3 +231,13 @@ delete_table(FeatureName, Tab) ->
230231
%% adheres to the callback interface
231232
ok
232233
end.
234+
=======
235+
236+
-rabbit_feature_flag(
237+
{stream_sac_coordinator_unblock_group,
238+
#{desc => "Bug fix to unblock a group of consumers in a super stream partition",
239+
doc_url => "https://github.com/rabbitmq/rabbitmq-server/issues/7743",
240+
stability => stable,
241+
depends_on => [stream_single_active_consumer]
242+
}}).
243+
>>>>>>> 221f10d2d9 (Unblock group of consumers on super stream partition)

deps/rabbit/src/rabbit_stream_sac_coordinator.erl

+87-20
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ apply(#command_unregister_consumer{vhost = VirtualHost,
253253
of
254254
{value, Consumer} ->
255255
G1 = remove_from_group(Consumer, Group0),
256-
handle_consumer_removal(G1, Consumer);
256+
handle_consumer_removal(G1, Consumer, Stream, ConsumerName);
257257
false ->
258258
{Group0, []}
259259
end,
@@ -269,19 +269,28 @@ apply(#command_activate_consumer{vhost = VirtualHost,
269269
stream = Stream,
270270
consumer_name = ConsumerName},
271271
#?MODULE{groups = StreamGroups0} = State0) ->
272+
rabbit_log:debug("Activating consumer on ~tp, group ~p",
273+
[Stream, ConsumerName]),
272274
{G, Eff} =
273275
case lookup_group(VirtualHost, Stream, ConsumerName, StreamGroups0) of
274276
undefined ->
277+
<<<<<<< HEAD
275278
rabbit_log:warning("trying to activate consumer in group ~p, but "
279+
=======
280+
rabbit_log:warning("Trying to activate consumer in group ~tp, but "
281+
>>>>>>> 221f10d2d9 (Unblock group of consumers on super stream partition)
276282
"the group does not longer exist",
277283
[{VirtualHost, Stream, ConsumerName}]),
278284
{undefined, []};
279285
Group ->
280286
#consumer{pid = Pid, subscription_id = SubId} =
281287
evaluate_active_consumer(Group),
288+
rabbit_log:debug("New active consumer on ~tp, group ~tp " ++
289+
"is ~tp from ~tp",
290+
[Stream, ConsumerName, SubId, Pid]),
282291
Group1 =
283292
update_consumer_state_in_group(Group, Pid, SubId, true),
284-
{Group1, [notify_consumer_effect(Pid, SubId, true)]}
293+
{Group1, [notify_consumer_effect(Pid, SubId, Stream, ConsumerName, true)]}
285294
end,
286295
StreamGroups1 =
287296
update_groups(VirtualHost, Stream, ConsumerName, G, StreamGroups0),
@@ -521,7 +530,8 @@ do_register_consumer(VirtualHost,
521530
Effects =
522531
case Active of
523532
true ->
524-
[notify_consumer_effect(ConnectionPid, SubscriptionId, Active)];
533+
[notify_consumer_effect(ConnectionPid, SubscriptionId,
534+
Stream, ConsumerName, Active)];
525535
_ ->
526536
[]
527537
end,
@@ -549,7 +559,8 @@ do_register_consumer(VirtualHost,
549559
active = true},
550560
G1 = add_to_group(Consumer0, Group0),
551561
{G1,
552-
[notify_consumer_effect(ConnectionPid, SubscriptionId, true)]};
562+
[notify_consumer_effect(ConnectionPid, SubscriptionId,
563+
Stream, ConsumerName, true)]};
553564
_G ->
554565
%% whatever the current state is, the newcomer will be passive
555566
Consumer0 =
@@ -568,18 +579,28 @@ do_register_consumer(VirtualHost,
568579
%% the current active stays the same
569580
{G1, []};
570581
_ ->
582+
rabbit_log:debug("SAC consumer registration: " ++
583+
"active consumer change on stream ~tp, group ~tp. " ++
584+
"Notifying ~tp from ~tp it is no longer active.",
585+
[Stream, ConsumerName, ActSubId, ActPid]),
571586
%% there's a change, telling the active it's not longer active
572587
{update_consumer_state_in_group(G1,
573588
ActPid,
574589
ActSubId,
575590
false),
576591
[notify_consumer_effect(ActPid,
577592
ActSubId,
593+
Stream,
594+
ConsumerName,
578595
false,
579596
true)]}
580597
end;
581598
false ->
582-
%% no active consumer in the (non-empty) group, we are waiting for the reply of a former active
599+
rabbit_log:debug("SAC consumer registration: no active consumer on stream ~tp, group ~tp. " ++
600+
"Likely waiting for a response from former active consumer.",
601+
[Stream, ConsumerName]),
602+
%% no active consumer in the (non-empty) group,
603+
%% we are waiting for the reply of a former active
583604
{G1, []}
584605
end
585606
end,
@@ -593,27 +614,27 @@ do_register_consumer(VirtualHost,
593614
lookup_consumer(ConnectionPid, SubscriptionId, Group1),
594615
{State#?MODULE{groups = StreamGroups1}, {ok, Active}, Effects}.
595616

596-
handle_consumer_removal(#group{consumers = []} = G, _) ->
617+
handle_consumer_removal(#group{consumers = []} = G, _, _, _) ->
597618
{G, []};
598619
handle_consumer_removal(#group{partition_index = -1} = Group0,
599-
Consumer) ->
620+
Consumer, Stream, ConsumerName) ->
600621
case Consumer of
601622
#consumer{active = true} ->
602623
%% this is the active consumer we remove, computing the new one
603624
Group1 = compute_active_consumer(Group0),
604625
case lookup_active_consumer(Group1) of
605626
{value, #consumer{pid = Pid, subscription_id = SubId}} ->
606627
%% creating the side effect to notify the new active consumer
607-
{Group1, [notify_consumer_effect(Pid, SubId, true)]};
628+
{Group1, [notify_consumer_effect(Pid, SubId, Stream, ConsumerName, true)]};
608629
_ ->
609630
%% no active consumer found in the group, nothing to do
610631
{Group1, []}
611632
end;
612633
#consumer{active = false} ->
613-
%% not the active consumer, nothing to do."),
634+
%% not the active consumer, nothing to do.
614635
{Group0, []}
615636
end;
616-
handle_consumer_removal(Group0, Consumer) ->
637+
handle_consumer_removal(Group0, Consumer, Stream, ConsumerName) ->
617638
case lookup_active_consumer(Group0) of
618639
{value,
619640
#consumer{pid = ActPid, subscription_id = ActSubId} =
@@ -623,40 +644,81 @@ handle_consumer_removal(Group0, Consumer) ->
623644
%% the current active stays the same
624645
{Group0, []};
625646
_ ->
647+
rabbit_log:debug("SAC consumer removal: " ++
648+
"active consumer change on stream ~tp, group ~tp. " ++
649+
"Notifying ~tp from ~tp it is no longer active.",
650+
[Stream, ConsumerName, ActSubId, ActPid]),
651+
626652
%% there's a change, telling the active it's not longer active
627653
{update_consumer_state_in_group(Group0,
628654
ActPid,
629655
ActSubId,
630656
false),
631-
[notify_consumer_effect(ActPid, ActSubId, false, true)]}
657+
[notify_consumer_effect(ActPid, ActSubId,
658+
Stream, ConsumerName, false, true)]}
632659
end;
633660
false ->
634661
case Consumer#consumer.active of
635662
true ->
636663
%% the active one is going away, picking a new one
637664
#consumer{pid = P, subscription_id = SID} =
638665
evaluate_active_consumer(Group0),
666+
rabbit_log:debug("SAC consumer removal: " ++
667+
"active consumer change on stream ~tp, group ~tp. " ++
668+
"Notifying ~tp from ~tp it is the new active consumer.",
669+
[Stream, ConsumerName, SID, P]),
639670
{update_consumer_state_in_group(Group0, P, SID, true),
640-
[notify_consumer_effect(P, SID, true)]};
671+
[notify_consumer_effect(P, SID,
672+
Stream, ConsumerName, true)]};
641673
false ->
642-
%% no active consumer in the (non-empty) group, we are waiting for the reply of a former active
674+
rabbit_log:debug("SAC consumer removal: no active consumer on stream ~tp, group ~tp. " ++
675+
"Likely waiting for a response from former active consumer.",
676+
[Stream, ConsumerName]),
677+
%% no active consumer in the (non-empty) group,
678+
%% we are waiting for the reply of a former active
643679
{Group0, []}
644680
end
645681
end.
646682

647-
notify_consumer_effect(Pid, SubId, Active) ->
648-
notify_consumer_effect(Pid, SubId, Active, false).
683+
message_type() ->
684+
case has_unblock_group_support() of
685+
true ->
686+
map;
687+
false ->
688+
tuple
689+
end.
690+
691+
notify_consumer_effect(Pid, SubId, Stream, Name, Active) ->
692+
notify_consumer_effect(Pid, SubId, Stream, Name, Active, false).
693+
694+
notify_consumer_effect(Pid, SubId, Stream, Name, Active, SteppingDown) ->
695+
notify_consumer_effect(Pid, SubId, Stream, Name, Active, SteppingDown, message_type()).
649696

650-
notify_consumer_effect(Pid, SubId, Active, false = _SteppingDown) ->
697+
notify_consumer_effect(Pid, SubId, _Stream, _Name, Active, false = _SteppingDown, tuple) ->
651698
mod_call_effect(Pid,
652699
{sac,
653-
{{subscription_id, SubId}, {active, Active},
700+
{{subscription_id, SubId},
701+
{active, Active},
654702
{extra, []}}});
655-
notify_consumer_effect(Pid, SubId, Active, true = _SteppingDown) ->
703+
notify_consumer_effect(Pid, SubId, _Stream, _Name, Active, true = _SteppingDown, tuple) ->
656704
mod_call_effect(Pid,
657705
{sac,
658-
{{subscription_id, SubId}, {active, Active},
659-
{extra, [{stepping_down, true}]}}}).
706+
{{subscription_id, SubId},
707+
{active, Active},
708+
{extra, [{stepping_down, true}]}}});
709+
notify_consumer_effect(Pid, SubId, Stream, Name, Active, false = _SteppingDown, map) ->
710+
mod_call_effect(Pid,
711+
{sac, #{subscription_id => SubId,
712+
stream => Stream,
713+
consumer_name => Name,
714+
active => Active}});
715+
notify_consumer_effect(Pid, SubId, Stream, Name, Active, true = _SteppingDown, map) ->
716+
mod_call_effect(Pid,
717+
{sac, #{subscription_id => SubId,
718+
stream => Stream,
719+
consumer_name => Name,
720+
active => Active,
721+
stepping_down => true}}).
660722

661723
maybe_create_group(VirtualHost,
662724
Stream,
@@ -766,5 +828,10 @@ send_message(ConnectionPid, Msg) ->
766828
ConnectionPid ! Msg,
767829
ok.
768830

831+
<<<<<<< HEAD
769832
is_ff_enabled() ->
770833
rabbit_feature_flags:is_enabled(stream_single_active_consumer).
834+
=======
835+
has_unblock_group_support() ->
836+
rabbit_feature_flags:is_enabled(stream_sac_coordinator_unblock_group).
837+
>>>>>>> 221f10d2d9 (Unblock group of consumers on super stream partition)

0 commit comments

Comments
 (0)