Skip to content

Commit 62d016d

Browse files
acogoluegnesmergify[bot]
authored andcommitted
Introduce timeout for stream server-side requests
The stream plugin can send frames to a client connection and expect a response from it. This is used currently for the consumer_update frame (single active consumer feature). There was no timeout mechanism so far, so a slow or blocked application could prevent a group of consumers to move on. This commit introduces a timeout mechanism: if the expected response takes too long to arrive, the server assumes the connection is blocked and closes it. The default timeout is 60 seconds but it can be changed by setting the request_timeout parameter of the rabbitmq_stream application. Note the mechanism does not enforce the exact duration of the timeout, as a timer is set for the first request and re-used for other requests. With bad timing, a request can time out after twice as long as the set-up timeout. References #7743 (cherry picked from commit 763acc2)
1 parent 40de329 commit 62d016d

File tree

2 files changed

+156
-28
lines changed

2 files changed

+156
-28
lines changed

deps/rabbitmq_stream/src/rabbit_stream_reader.erl

+93-26
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@
4747
send_limit :: non_neg_integer(),
4848
log :: undefined | osiris_log:state(),
4949
last_listener_offset = undefined :: undefined | osiris:offset()}).
50+
-record(request,
51+
{start :: integer(),
52+
content :: term()}).
5053
-record(stream_connection_state,
5154
{data :: rabbit_stream_core:state(), blocked :: boolean(),
5255
consumers :: #{subscription_id() => #consumer{}}}).
@@ -89,8 +92,10 @@
8992
transport :: tcp | ssl,
9093
proxy_socket :: undefined | ranch_transport:socket(),
9194
correlation_id_sequence :: integer(),
92-
outstanding_requests :: #{integer() => term()},
93-
deliver_version :: rabbit_stream_core:command_version()}).
95+
outstanding_requests :: #{integer() => #request{}},
96+
deliver_version :: rabbit_stream_core:command_version(),
97+
request_timeout :: pos_integer(),
98+
outstanding_requests_timer :: undefined | timer:tref()}).
9499
-record(configuration,
95100
{initial_credits :: integer(),
96101
credits_required_for_unblocking :: integer(),
@@ -223,6 +228,8 @@ init([KeepaliveSup,
223228
socket_op(Sock,
224229
fun(S) -> rabbit_net:socket_ends(S, inbound) end),
225230
DeliverVersion = ?VERSION_1,
231+
RequestTimeout = application:get_env(rabbitmq_stream,
232+
request_timeout, 60_000),
226233
Connection =
227234
#stream_connection{name =
228235
rabbit_data_coercion:to_binary(ConnStr),
@@ -249,6 +256,7 @@ init([KeepaliveSup,
249256
rabbit_net:maybe_get_proxy_socket(Sock),
250257
correlation_id_sequence = 0,
251258
outstanding_requests = #{},
259+
request_timeout = RequestTimeout,
252260
deliver_version = DeliverVersion},
253261
State =
254262
#stream_connection_state{consumers = #{},
@@ -962,6 +970,37 @@ open(info, emit_stats,
962970
StatemData) ->
963971
Connection1 = emit_stats(Connection, State),
964972
{keep_state, StatemData#statem_data{connection = Connection1}};
973+
open(info, check_outstanding_requests,
974+
#statem_data{connection = #stream_connection{outstanding_requests = Requests,
975+
request_timeout = Timeout} = Connection0} =
976+
StatemData) ->
977+
Time = erlang:monotonic_time(millisecond),
978+
rabbit_log:debug("Checking outstanding requests at ~tp: ~tp", [Time, Requests]),
979+
HasTimedOut = maps:fold(fun(_, #request{}, true) ->
980+
true;
981+
(K, #request{content = Ctnt, start = Start}, false) ->
982+
case (Time - Start) > Timeout of
983+
true ->
984+
rabbit_log:debug("Request ~tp with content ~tp has timed out",
985+
[K, Ctnt]),
986+
987+
true;
988+
false ->
989+
false
990+
end
991+
end, false, Requests),
992+
case HasTimedOut of
993+
true ->
994+
rabbit_log_connection:info("Forcing stream connection ~tp closing: request to client timed out",
995+
[self()]),
996+
_ = demonitor_all_streams(Connection0),
997+
{stop, {request_timeout, <<"Request timeout">>}};
998+
false ->
999+
Connection1 = ensure_outstanding_requests_timer(
1000+
Connection0#stream_connection{outstanding_requests_timer = undefined}
1001+
),
1002+
{keep_state, StatemData#statem_data{connection = Connection1}}
1003+
end;
9651004
open(info, {shutdown, Explanation} = Reason,
9661005
#statem_data{connection = Connection}) ->
9671006
%% rabbitmq_management or rabbitmq_stream_management plugin
@@ -2531,7 +2570,7 @@ handle_frame_post_auth(Transport,
25312570
[RC])
25322571
end,
25332572
case maps:take(CorrelationId, Requests0) of
2534-
{#{subscription_id := SubscriptionId} = Msg, Rs} ->
2573+
{#request{content = #{subscription_id := SubscriptionId} = Msg}, Rs} ->
25352574
Stream = stream_from_consumers(SubscriptionId, Consumers),
25362575
rabbit_log:debug("Received consumer update response for subscription "
25372576
"~tp on stream ~tp, correlation ID ~tp",
@@ -2904,31 +2943,58 @@ maybe_register_consumer(VirtualHost,
29042943
Active.
29052944

29062945
maybe_send_consumer_update(Transport,
2907-
Connection =
2908-
#stream_connection{socket = S,
2909-
correlation_id_sequence = CorrIdSeq,
2910-
outstanding_requests = OutstandingRequests0},
2946+
Connection = #stream_connection{
2947+
socket = S,
2948+
correlation_id_sequence = CorrIdSeq},
29112949
Consumer,
29122950
Active,
29132951
Msg) ->
2914-
#consumer{configuration = #consumer_configuration{
2915-
stream = Stream,
2916-
subscription_id = SubscriptionId
2917-
}} = Consumer,
2918-
rabbit_log:debug("SAC subscription ~tp on ~tp, active = ~tp, " ++
2919-
"sending consumer update with correlation ID ~tp",
2920-
[SubscriptionId, Stream, Active, CorrIdSeq]),
2921-
Frame =
2922-
rabbit_stream_core:frame({request, CorrIdSeq,
2923-
{consumer_update, SubscriptionId, Active}}),
2952+
#consumer{configuration =
2953+
#consumer_configuration{subscription_id = SubscriptionId}} = Consumer,
2954+
Frame = rabbit_stream_core:frame({request, CorrIdSeq,
2955+
{consumer_update, SubscriptionId, Active}}),
2956+
2957+
Connection1 = register_request(Connection, Msg),
29242958

2925-
OutstandingRequests1 =
2926-
maps:put(CorrIdSeq,
2927-
Msg,
2928-
OutstandingRequests0),
29292959
send(Transport, S, Frame),
2930-
Connection#stream_connection{correlation_id_sequence = CorrIdSeq + 1,
2931-
outstanding_requests = OutstandingRequests1}.
2960+
Connection1.
2961+
2962+
register_request(#stream_connection{outstanding_requests = Requests0,
2963+
correlation_id_sequence = CorrIdSeq} = C,
2964+
RequestContent) ->
2965+
rabbit_log:debug("Registering RPC request ~tp with correlation ID ~tp",
2966+
[RequestContent, CorrIdSeq]),
2967+
2968+
Requests1 = maps:put(CorrIdSeq, request(RequestContent), Requests0),
2969+
2970+
ensure_outstanding_requests_timer(
2971+
C#stream_connection{correlation_id_sequence = CorrIdSeq + 1,
2972+
outstanding_requests = Requests1}).
2973+
2974+
request(Content) ->
2975+
#request{start = erlang:monotonic_time(millisecond),
2976+
content = Content}.
2977+
2978+
ensure_outstanding_requests_timer(#stream_connection{
2979+
outstanding_requests = Requests,
2980+
outstanding_requests_timer = undefined
2981+
} = C) when map_size(Requests) =:= 0 ->
2982+
C;
2983+
ensure_outstanding_requests_timer(#stream_connection{
2984+
outstanding_requests = Requests,
2985+
outstanding_requests_timer = TRef
2986+
} = C) when map_size(Requests) =:= 0 ->
2987+
erlang:cancel_timer(TRef, [{async, true}, {info, false}]),
2988+
C#stream_connection{outstanding_requests_timer = undefined};
2989+
ensure_outstanding_requests_timer(#stream_connection{
2990+
outstanding_requests = Requests,
2991+
outstanding_requests_timer = undefined,
2992+
request_timeout = Timeout
2993+
} = C) when map_size(Requests) > 0 ->
2994+
TRef = erlang:send_after(Timeout, self(), check_outstanding_requests),
2995+
C#stream_connection{outstanding_requests_timer = TRef};
2996+
ensure_outstanding_requests_timer(C) ->
2997+
C.
29322998

29332999
maybe_unregister_consumer(_, _, false = _Sac, Requests) ->
29343000
Requests;
@@ -2945,9 +3011,10 @@ maybe_unregister_consumer(VirtualHost,
29453011
ConsumerName = consumer_name(Properties),
29463012

29473013
Requests1 = maps:fold(
2948-
fun(_, #{active := false,
2949-
subscription_id := SubId,
2950-
stepping_down := true}, Acc) when SubId =:= SubscriptionId ->
3014+
fun(_, #request{content =
3015+
#{active := false,
3016+
subscription_id := SubId,
3017+
stepping_down := true}}, Acc) when SubId =:= SubscriptionId ->
29513018
_ = rabbit_stream_sac_coordinator:activate_consumer(VirtualHost,
29523019
Stream,
29533020
ConsumerName),

deps/rabbitmq_stream/test/rabbit_stream_SUITE.erl

+63-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ groups() ->
4444
unauthenticated_client_rejected_authenticating,
4545
timeout_authenticating,
4646
timeout_close_sent,
47-
max_segment_size_bytes_validation]},
47+
max_segment_size_bytes_validation,
48+
close_connection_on_consumer_update_timeout]},
4849
%% Run `test_global_counters` on its own so the global metrics are
4950
%% initialised to 0 for each testcase
5051
{single_node_1, [], [test_global_counters]},
@@ -110,6 +111,26 @@ end_per_group(_, Config) ->
110111
rabbit_ct_helpers:run_steps(Config,
111112
rabbit_ct_broker_helpers:teardown_steps()).
112113

114+
init_per_testcase(close_connection_on_consumer_update_timeout = TestCase, Config) ->
115+
ok = rabbit_ct_broker_helpers:rpc(Config,
116+
0,
117+
application,
118+
set_env,
119+
[rabbitmq_stream, request_timeout, 2000]),
120+
rabbit_ct_helpers:testcase_started(Config, TestCase);
121+
init_per_testcase(TestCase, Config) ->
122+
rabbit_ct_helpers:testcase_started(Config, TestCase).
123+
124+
end_per_testcase(close_connection_on_consumer_update_timeout = TestCase, Config) ->
125+
ok = rabbit_ct_broker_helpers:rpc(Config,
126+
0,
127+
application,
128+
set_env,
129+
[rabbitmq_stream, request_timeout, 60000]),
130+
rabbit_ct_helpers:testcase_finished(Config, TestCase);
131+
end_per_testcase(TestCase, Config) ->
132+
rabbit_ct_helpers:testcase_finished(Config, TestCase).
133+
113134
test_global_counters(Config) ->
114135
Stream = atom_to_binary(?FUNCTION_NAME, utf8),
115136
test_server(gen_tcp, Stream, Config),
@@ -350,6 +371,37 @@ max_segment_size_bytes_validation(Config) ->
350371
test_close(Transport, S, C3),
351372
ok.
352373

374+
close_connection_on_consumer_update_timeout(Config) ->
375+
Transport = gen_tcp,
376+
Port = get_stream_port(Config),
377+
{ok, S} =
378+
Transport:connect("localhost", Port,
379+
[{active, false}, {mode, binary}]),
380+
C0 = rabbit_stream_core:init(0),
381+
C1 = test_peer_properties(Transport, S, C0),
382+
C2 = test_authenticate(Transport, S, C1),
383+
Stream = atom_to_binary(?FUNCTION_NAME, utf8),
384+
C3 = test_create_stream(Transport, S, Stream, C2),
385+
386+
SubId = 42,
387+
C4 = test_subscribe(Transport, S, SubId, Stream,
388+
#{<<"single-active-consumer">> => <<"true">>,
389+
<<"name">> => <<"foo">>},
390+
C3),
391+
{Cmd, _C5} = receive_commands(Transport, S, C4),
392+
?assertMatch({request, _, {consumer_update, SubId, true}}, Cmd),
393+
closed = wait_for_socket_close(Transport, S, 10),
394+
{ok, Sb} =
395+
Transport:connect("localhost", Port,
396+
[{active, false}, {mode, binary}]),
397+
Cb0 = rabbit_stream_core:init(0),
398+
Cb1 = test_peer_properties(Transport, Sb, Cb0),
399+
Cb2 = test_authenticate(Transport, Sb, Cb1),
400+
Cb3 = test_delete_stream(Transport, Sb, Stream, Cb2, false),
401+
_Cb4 = test_close(Transport, Sb, Cb3),
402+
closed = wait_for_socket_close(Transport, Sb, 10),
403+
ok.
404+
353405
consumer_count(Config) ->
354406
ets_count(Config, ?TABLE_CONSUMER).
355407

@@ -525,12 +577,21 @@ test_create_stream(Transport, S, Stream, C0) ->
525577
C.
526578

527579
test_delete_stream(Transport, S, Stream, C0) ->
580+
test_delete_stream(Transport, S, Stream, C0, true).
581+
582+
test_delete_stream(Transport, S, Stream, C0, false) ->
583+
do_test_delete_stream(Transport, S, Stream, C0);
584+
test_delete_stream(Transport, S, Stream, C0, true) ->
585+
C1 = do_test_delete_stream(Transport, S, Stream, C0),
586+
test_metadata_update_stream_deleted(Transport, S, Stream, C1).
587+
588+
do_test_delete_stream(Transport, S, Stream, C0) ->
528589
DeleteStreamFrame =
529590
rabbit_stream_core:frame({request, 1, {delete_stream, Stream}}),
530591
ok = Transport:send(S, DeleteStreamFrame),
531592
{Cmd, C1} = receive_commands(Transport, S, C0),
532593
?assertMatch({response, 1, {delete_stream, ?RESPONSE_CODE_OK}}, Cmd),
533-
test_metadata_update_stream_deleted(Transport, S, Stream, C1).
594+
C1.
534595

535596
test_metadata_update_stream_deleted(Transport, S, Stream, C0) ->
536597
{Meta, C1} = receive_commands(Transport, S, C0),

0 commit comments

Comments
 (0)