From 51e3c384854e2bc27fd82bbd016ea52d4556f03d Mon Sep 17 00:00:00 2001 From: Jakub Witczak Date: Wed, 11 Sep 2024 16:55:27 +0200 Subject: [PATCH 1/2] stdlib: customize unlink_flush for noproc scenario - handle termination race in supervisor - race happens when supervisor shutdown procedure - collides with child terminating from other reason - - in ssh tests this happens when client closes connection - which propagates to server over network - and results with termination of connection processes on server side - in parallel, server being is shutdown during test cleanup --- lib/stdlib/src/supervisor.erl | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl index 58b943d874f8..bb1b2456349a 100644 --- a/lib/stdlib/src/supervisor.erl +++ b/lib/stdlib/src/supervisor.erl @@ -968,16 +968,25 @@ shutdown(#child{pid=Pid, shutdown=Time} = Child) -> end end. -unlink_flush(Pid, DefaultReason) -> - %% We call unlink in order to guarantee that the 'EXIT' has arrived - %% from the dead process. See the unlink docs for details. +unlink_flush(Pid, noproc) -> + {links, Ls} = process_info(self(),links), + Timeout = case lists:member(Pid, Ls) of + true -> infinity; + false -> 0 + end, + receive + {'EXIT', Pid, ExitReason} -> + ExitReason + after Timeout -> + naughty_child + end; +unlink_flush(Pid, ExitReason) -> unlink(Pid), receive - {'EXIT', Pid, Reason} -> - Reason - after 0 -> - DefaultReason - end. + {'EXIT', Pid, _} -> ok + after 0 -> ok + end, + ExitReason. %%----------------------------------------------------------------- %% Func: terminate_dynamic_children/1 From 18cba2366be1abd901de0261b219497771f8e817 Mon Sep 17 00:00:00 2001 From: Jakub Witczak Date: Thu, 26 Sep 2024 09:03:38 +0200 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Rickard Green --- lib/stdlib/src/supervisor.erl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/stdlib/src/supervisor.erl b/lib/stdlib/src/supervisor.erl index bb1b2456349a..2cfa8a542559 100644 --- a/lib/stdlib/src/supervisor.erl +++ b/lib/stdlib/src/supervisor.erl @@ -970,6 +970,13 @@ shutdown(#child{pid=Pid, shutdown=Time} = Child) -> unlink_flush(Pid, noproc) -> {links, Ls} = process_info(self(),links), + %% We know that the process has terminated. If we still have a link, we are + %% guaranteed to eventually receive the 'EXIT' message containing the + %% actual exit reason (or a 'noconnection' exit reason if the connection is + %% lost). If we do not have a link, the 'EXIT' message is already present + %% in the message queue unless the child process behaved badly (unlinked + %% itself from us). If it behaved badly, we may or may not receive an 'EXIT' + %% message. Timeout = case lists:member(Pid, Ls) of true -> infinity; false -> 0 @@ -978,9 +985,10 @@ unlink_flush(Pid, noproc) -> {'EXIT', Pid, ExitReason} -> ExitReason after Timeout -> - naughty_child + child_process_unlinked end; unlink_flush(Pid, ExitReason) -> + %% Leave no 'EXIT' message from this process in the message queue. unlink(Pid), receive {'EXIT', Pid, _} -> ok