From b42687329a3dfd774804b9ca2b2866ac770b40cd Mon Sep 17 00:00:00 2001 From: Nathan Brei Date: Tue, 10 Dec 2024 17:01:20 -0500 Subject: [PATCH 1/5] Fix helgrind complaints about notifying outside of lock --- src/libraries/JANA/Engine/JExecutionEngine.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/libraries/JANA/Engine/JExecutionEngine.cc b/src/libraries/JANA/Engine/JExecutionEngine.cc index 189684ba2..e843f6aae 100644 --- a/src/libraries/JANA/Engine/JExecutionEngine.cc +++ b/src/libraries/JANA/Engine/JExecutionEngine.cc @@ -138,9 +138,8 @@ void JExecutionEngine::ScaleWorkers(size_t nthreads) { LOG_DEBUG(GetLogger()) << "Stopping worker " << worker_id << LOG_END; m_worker_states[worker_id]->is_stop_requested = true; } - lock.unlock(); - m_condvar.notify_all(); // Wake up all threads so that they can exit the condvar wait loop + lock.unlock(); // We join all (eligible) threads _outside_ of the mutex for (int worker_id=prev_nthreads-1; worker_id >= (int) nthreads; --worker_id) { @@ -450,7 +449,6 @@ void JExecutionEngine::ExchangeTask(Task& task, size_t worker_id, bool nonblocki } m_total_idle_duration += (worker.last_checkout_time - idle_time_start); - lock.unlock(); // Notify one worker, who will notify the next, etc, as long as FindNextReadyTaskUnsafe() succeeds. // After FindNextReadyTaskUnsafe fails, all threads block until the next returning worker reactivates the // notification chain. From ae66da49cc00434a4f351070f027d9dfdb787431 Mon Sep 17 00:00:00 2001 From: Nathan Brei Date: Tue, 10 Dec 2024 17:01:47 -0500 Subject: [PATCH 2/5] Fix helgrind complaint about unprotected streaming to cout --- src/libraries/JANA/JLogger.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/libraries/JANA/JLogger.h b/src/libraries/JANA/JLogger.h index b027dd0f7..ebbc2138c 100644 --- a/src/libraries/JANA/JLogger.h +++ b/src/libraries/JANA/JLogger.h @@ -12,6 +12,7 @@ #include #include #include +#include struct JLogger { @@ -99,6 +100,9 @@ class JLogMessage : public std::stringstream { } virtual ~JLogMessage() { + static std::mutex cout_mutex; + std::lock_guard lock(cout_mutex); + std::string line; std::ostringstream oss; while (std::getline(*this, line)) { @@ -106,8 +110,6 @@ class JLogMessage : public std::stringstream { } std::cout << oss.str(); std::cout.flush(); - this->str(""); - this->clear(); } }; From e8b5e368e11b0f23b7b32f305b99d6609387c176 Mon Sep 17 00:00:00 2001 From: Nathan Brei Date: Wed, 11 Dec 2024 01:21:27 -0500 Subject: [PATCH 3/5] Fix timeout logic --- src/libraries/JANA/Engine/JExecutionEngine.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/JANA/Engine/JExecutionEngine.cc b/src/libraries/JANA/Engine/JExecutionEngine.cc index e843f6aae..65d525faf 100644 --- a/src/libraries/JANA/Engine/JExecutionEngine.cc +++ b/src/libraries/JANA/Engine/JExecutionEngine.cc @@ -212,7 +212,7 @@ void JExecutionEngine::RunSupervisor() { Perf perf; while (true) { - if (m_enable_timeout) { + if (m_enable_timeout && m_timeout_s > 0) { CheckTimeout(); } From 7cb865527709cea8d7ebdd8e74f5e873fd132062 Mon Sep 17 00:00:00 2001 From: Nathan Brei Date: Thu, 12 Dec 2024 00:04:44 -0500 Subject: [PATCH 4/5] Warm up backtrace() --- src/libraries/JANA/CLI/JSignalHandler.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/libraries/JANA/CLI/JSignalHandler.cc b/src/libraries/JANA/CLI/JSignalHandler.cc index 2ef87f4d8..dfb019b5c 100644 --- a/src/libraries/JANA/CLI/JSignalHandler.cc +++ b/src/libraries/JANA/CLI/JSignalHandler.cc @@ -71,6 +71,13 @@ void register_handlers(JApplication* app) { // It would be nice to do this in a less unexpected place, and hopefully that will naturally // emerge from future refactorings. + // We capture a dummy backtrace to warm it up before it gets called inside a signal handler. + // Because backtrace() dynamically loads libgcc, calling malloc() in the process, it is not + // async-signal-safe until this warmup has happened. Thus we prevent a rare deadlock and several + // TSAN and Helgrind warnings. + JBacktrace backtrace; + backtrace.Capture(); + //Define signal action struct sigaction sSignalAction; sSignalAction.sa_sigaction = handle_sigsegv; From 04bc4bfcbf30aaad103f0809a1eecbce57095afc Mon Sep 17 00:00:00 2001 From: Nathan Brei Date: Thu, 12 Dec 2024 14:13:24 -0500 Subject: [PATCH 5/5] JException has a JBacktrace instead of a string Previously, JException would stringify the JBacktrace in the constructor. This is fine as long as JException represents an unrecoverable error. If used for flow control, however, this imposes a steep performance penalty. The penalty will become even steeper when we enable `addr2line`. Now the JException only stringifies the backtrace upon printing, and only if the backtrace is requested. --- src/libraries/JANA/Engine/JExecutionEngine.cc | 2 +- src/libraries/JANA/JException.h | 18 ++++------- src/libraries/JANA/Utils/JBacktrace.cc | 32 ++++++++++++++++--- src/libraries/JANA/Utils/JBacktrace.h | 16 +++++++--- src/plugins/JTest/JTestTracker.h | 29 +++++++++-------- 5 files changed, 63 insertions(+), 34 deletions(-) diff --git a/src/libraries/JANA/Engine/JExecutionEngine.cc b/src/libraries/JANA/Engine/JExecutionEngine.cc index 65d525faf..f485dc5b0 100644 --- a/src/libraries/JANA/Engine/JExecutionEngine.cc +++ b/src/libraries/JANA/Engine/JExecutionEngine.cc @@ -317,7 +317,7 @@ void JExecutionEngine::HandleFailures() { if (worker->is_timed_out) { GetApplication()->SetExitCode((int) JApplication::ExitCode::Timeout); auto ex = JException("Timeout in worker thread"); - ex.stacktrace = worker->backtrace.ToString(); + ex.backtrace = worker->backtrace; throw ex; } } diff --git a/src/libraries/JANA/JException.h b/src/libraries/JANA/JException.h index 92d81663a..9c38db844 100644 --- a/src/libraries/JANA/JException.h +++ b/src/libraries/JANA/JException.h @@ -15,11 +15,8 @@ struct JException : public std::exception { public: /// Basic constructor - explicit JException(std::string message = "Unknown exception") : message(std::move(message)) - { - JBacktrace backtrace; + explicit JException(std::string message = "Unknown exception") : message(std::move(message)) { backtrace.Capture(2); - stacktrace = backtrace.ToString(); } virtual ~JException() = default; @@ -35,7 +32,7 @@ struct JException : public std::exception { } std::string GetStackTrace() { - return stacktrace; + return backtrace.ToString(); } const char* what() const noexcept { @@ -63,8 +60,10 @@ struct JException : public std::exception { if (ex.plugin_name.length() != 0) { os << " Plugin: " << ex.plugin_name << std::endl; } - if (ex.stacktrace.length() != 0 && ex.show_stacktrace) { - os << " Backtrace:" << std::endl << std::endl << ex.stacktrace; + if (ex.show_stacktrace) { + ex.backtrace.WaitForCapture(); + ex.backtrace.ToString(); + os << " Backtrace:" << std::endl << std::endl << ex.backtrace.ToString(); } return os; } @@ -75,7 +74,7 @@ struct JException : public std::exception { std::string type_name; std::string function_name; std::string instance_name; - std::string stacktrace; + JBacktrace backtrace; std::exception_ptr nested_exception; bool show_stacktrace=true; @@ -89,10 +88,7 @@ JException::JException(std::string format_str, Args... args) { char cmess[1024]; snprintf(cmess, 1024, format_str.c_str(), args...); message = cmess; - - JBacktrace backtrace; backtrace.Capture(2); - stacktrace = backtrace.ToString(); } diff --git a/src/libraries/JANA/Utils/JBacktrace.cc b/src/libraries/JANA/Utils/JBacktrace.cc index 7a76be559..6ccbdb6e4 100644 --- a/src/libraries/JANA/Utils/JBacktrace.cc +++ b/src/libraries/JANA/Utils/JBacktrace.cc @@ -15,6 +15,28 @@ #include +JBacktrace::JBacktrace(const JBacktrace& other) { + m_ready = other.m_ready.load(); + m_frame_count = other.m_frame_count; + m_frames_to_omit = other.m_frames_to_omit; + m_buffer = other.m_buffer; +} + +JBacktrace& JBacktrace::operator=(const JBacktrace& other) { + m_ready = other.m_ready.load(); + m_frame_count = other.m_frame_count; + m_frames_to_omit = other.m_frames_to_omit; + m_buffer = other.m_buffer; + return *this; +} + +JBacktrace::JBacktrace(JBacktrace&& other) { + m_ready = other.m_ready.load(); + m_frame_count = other.m_frame_count; + m_frames_to_omit = other.m_frames_to_omit; + m_buffer = std::move(other.m_buffer); +} + void JBacktrace::Reset() { m_ready = false; } @@ -26,13 +48,13 @@ void JBacktrace::WaitForCapture() const { } void JBacktrace::Capture(int frames_to_omit) { - m_frame_count = backtrace(m_buffer, MAX_FRAMES); + m_frame_count = backtrace(m_buffer.data(), MAX_FRAMES); m_frames_to_omit = frames_to_omit; m_ready.store(true, std::memory_order_release); } -void JBacktrace::Format(std::ostream& os) { - char** symbols = backtrace_symbols(m_buffer, m_frame_count); +void JBacktrace::Format(std::ostream& os) const { + char** symbols = backtrace_symbols(m_buffer.data(), m_frame_count); // Skip the first few frames, which are inevitably signal handlers, JBacktrace ctors, or JException ctors for (int i=m_frames_to_omit; i #include #include +#include class JBacktrace { static const int MAX_FRAMES = 100; - void* m_buffer[MAX_FRAMES]; + std::array m_buffer; int m_frame_count = 0; int m_frames_to_omit = 0; std::atomic_bool m_ready {false}; public: + + JBacktrace() = default; + ~JBacktrace() = default; + JBacktrace(const JBacktrace& other); + JBacktrace(JBacktrace&& other); + JBacktrace& operator=(const JBacktrace&); + void WaitForCapture() const; void Capture(int frames_to_omit=0); void Reset(); - std::string ToString(); - void Format(std::ostream& os); - std::string AddrToLineInfo(const char* filename, size_t offset); + std::string ToString() const; + void Format(std::ostream& os) const; + std::string AddrToLineInfo(const char* filename, size_t offset) const; }; diff --git a/src/plugins/JTest/JTestTracker.h b/src/plugins/JTest/JTestTracker.h index 99cddc1cc..2c5a9520f 100644 --- a/src/plugins/JTest/JTestTracker.h +++ b/src/plugins/JTest/JTestTracker.h @@ -13,8 +13,9 @@ class JTestTracker : public JFactoryT { + Parameter m_except {this, "except", false, "Event #7 always excepts" }; Parameter m_segfault {this, "segfault", false, "Event #7 always segfaults" }; - Parameter m_timeout {this, "timeout", false, "Event #22 always times out" }; + Parameter m_timeout {this, "timeout", false, "Event #7 always times out" }; Parameter m_cputime_ms {this, "cputime_ms", 200, "Time spent during tracking" }; Parameter m_write_bytes {this, "bytes", 1000, "Bytes written during tracking"}; Parameter m_cputime_spread {this, "cputime_spread", 0.25, "Spread of time spent during tracking"}; @@ -36,21 +37,23 @@ class JTestTracker : public JFactoryT { m_bench_utils.read_memory(ed->buffer); // Do lots of computation - if (*m_timeout) { - if (aEvent->GetEventNumber() == 22) { + m_bench_utils.consume_cpu_ms(*m_cputime_ms, *m_cputime_spread); + + // Optionally trigger failure scenarios + if(aEvent->GetEventNumber() == 7) { + // Only one of these can happen, so in principle the param should be an enum + if (*m_except) { + throw std::runtime_error("Something went wrong"); + } + if (*m_segfault) { + // Trigger a segfault on purpose + JTestTrackData* d = nullptr; + d->buffer[0] = 22; + } + if (*m_timeout) { m_bench_utils.consume_cpu_ms(1000000); } } - else { - m_bench_utils.consume_cpu_ms(*m_cputime_ms, *m_cputime_spread); - } - - - if(*m_segfault && aEvent->GetEventNumber() == 7) { - // Trigger a segfault on purpose - JTestTrackData* d = nullptr; - d->buffer[0] = 22; - } // Write (small) track data auto td = new JTestTrackData;