From 224ff57eafd0f34b00648597bfa5a28455a50b23 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 10 Oct 2024 09:57:47 -0400 Subject: [PATCH] stackwalk: fix jl_thread_suspend_and_get_state race (#56047) There was a missing re-assignment of old = -1; at the end of that loop which means in the ABA case, we accidentally actually acquire the lock on the thread despite not actually having stopped the thread; or in the counter-case, we try to run through this logic with old==-1 on the next iteration, and that isn't valid either (jl_thread_suspend_and_get_state should return failure and the loop will abort too early). Fix #56046 --- src/stackwalk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stackwalk.c b/src/stackwalk.c index 6aa36fa8b499c..7fb4de0372738 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1196,8 +1196,8 @@ JL_DLLEXPORT size_t jl_record_backtrace(jl_task_t *t, jl_bt_element_t *bt_data, } bt_context_t *context = NULL; bt_context_t c; - int16_t old = -1; - while (!jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid) { + int16_t old; + for (old = -1; !jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid; old = -1) { int lockret = jl_lock_stackwalk(); // if this task is already running somewhere, we need to stop the thread it is running on and query its state if (!jl_thread_suspend_and_get_state(old, 1, &c)) {