diff --git a/source/lib/omnitrace/library/components/roctracer.cpp b/source/lib/omnitrace/library/components/roctracer.cpp index 748a7b16f..381508955 100644 --- a/source/lib/omnitrace/library/components/roctracer.cpp +++ b/source/lib/omnitrace/library/components/roctracer.cpp @@ -306,6 +306,8 @@ roctracer::flush() // make sure all async operations are executed for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) hip_exec_activity_callbacks(i); + + OMNITRACE_VERBOSE_F(2, "roctracer flush completed\n"); } void diff --git a/source/lib/omnitrace/library/roctracer.cpp b/source/lib/omnitrace/library/roctracer.cpp index f4459c2b5..abb96eefb 100644 --- a/source/lib/omnitrace/library/roctracer.cpp +++ b/source/lib/omnitrace/library/roctracer.cpp @@ -163,6 +163,14 @@ get_hip_activity_callbacks(int64_t _tid = threading::get_id()) return thread_data_t::instance(construct_on_thread{ _tid }); } +size_t +get_hip_activity_callbacks_size() +{ + using thread_data_t = + thread_data>, category::roctracer>; + return thread_data_t::size(); +} + using hip_activity_mutex_t = std::decay_t; using key_data_mutex_t = std::decay_t; @@ -430,6 +438,9 @@ hsa_activity_callback(uint32_t op, const void* vrecord, void* arg) void hip_exec_activity_callbacks(int64_t _tid) { + // guard against initialization of structure when trying to exec + if(static_cast(_tid) >= get_hip_activity_callbacks_size()) return; + // OMNITRACE_ROCTRACER_CALL(roctracer_flush_activity()); locking::atomic_lock _lk{ get_hip_activity_mutex(_tid) }; auto& _async_ops = get_hip_activity_callbacks(_tid);