diff --git a/source/bin/omnitrace-causal/impl.cpp b/source/bin/omnitrace-causal/impl.cpp index 2f613f88a..5e901fa4c 100644 --- a/source/bin/omnitrace-causal/impl.cpp +++ b/source/bin/omnitrace-causal/impl.cpp @@ -200,7 +200,6 @@ get_initial_environment() update_env(_env, "OMNITRACE_TRACE", false); update_env(_env, "OMNITRACE_PROFILE", false); update_env(_env, "OMNITRACE_USE_PROCESS_SAMPLING", false); - update_env(_env, "OMNITRACE_CRITICAL_TRACE", false); update_env(_env, "OMNITRACE_THREAD_POOL_SIZE", get_env("OMNITRACE_THREAD_POOL_SIZE", 0)); update_env(_env, "OMNITRACE_LAUNCHER", "omnitrace-causal"); diff --git a/source/lib/core/argparse.cpp b/source/lib/core/argparse.cpp index 91e33fbbc..b69a4c5de 100644 --- a/source/lib/core/argparse.cpp +++ b/source/lib/core/argparse.cpp @@ -1225,7 +1225,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data) add_group_arguments(_parser, "perfetto", _data, true); add_group_arguments(_parser, "timemory", _data, true); add_group_arguments(_parser, "rocm", _data, true); - add_group_arguments(_parser, "critical_trace", _data, true); _parser.start_group("MISCELLANEOUS OPTIONS", ""); diff --git a/source/lib/core/categories.hpp b/source/lib/core/categories.hpp index ce1106823..ce3f66356 100644 --- a/source/lib/core/categories.hpp +++ b/source/lib/core/categories.hpp @@ -110,9 +110,6 @@ OMNITRACE_DEFINE_CATEGORY(category, mpi, OMNITRACE_CATEGORY_MPI, "mpi", "MPI reg OMNITRACE_DEFINE_CATEGORY(category, ompt, OMNITRACE_CATEGORY_OMPT, "ompt", "OpenMP tools regions") OMNITRACE_DEFINE_CATEGORY(category, process_sampling, OMNITRACE_CATEGORY_PROCESS_SAMPLING, "process_sampling", "Process-level data") OMNITRACE_DEFINE_CATEGORY(category, comm_data, OMNITRACE_CATEGORY_COMM_DATA, "comm_data", "MPI/RCCL counters for tracking amount of data sent or received") -OMNITRACE_DEFINE_CATEGORY(category, critical_trace, OMNITRACE_CATEGORY_CRITICAL_TRACE, "critical-trace", "Critical trace data") -OMNITRACE_DEFINE_CATEGORY(category, host_critical_trace, OMNITRACE_CATEGORY_HOST_CRITICAL_TRACE, "host-critical-trace", "Host-side critical trace data") -OMNITRACE_DEFINE_CATEGORY(category, device_critical_trace, OMNITRACE_CATEGORY_DEVICE_CRITICAL_TRACE, "device-critical-trace", "Device-side critical trace data") OMNITRACE_DEFINE_CATEGORY(category, causal, OMNITRACE_CATEGORY_CAUSAL, "causal", "Causal profiling data") OMNITRACE_DEFINE_CATEGORY(category, cpu_freq, OMNITRACE_CATEGORY_CPU_FREQ, "cpu_frequency", "CPU frequency (collected in background thread)") OMNITRACE_DEFINE_CATEGORY(category, process_page, OMNITRACE_CATEGORY_PROCESS_PAGE, "process_page_fault", "Memory page faults in process (collected in background thread)") @@ -174,9 +171,6 @@ using name = perfetto_category; OMNITRACE_PERFETTO_CATEGORY(category::sampling), \ OMNITRACE_PERFETTO_CATEGORY(category::process_sampling), \ OMNITRACE_PERFETTO_CATEGORY(category::comm_data), \ - OMNITRACE_PERFETTO_CATEGORY(category::critical_trace), \ - OMNITRACE_PERFETTO_CATEGORY(category::host_critical_trace), \ - OMNITRACE_PERFETTO_CATEGORY(category::device_critical_trace), \ OMNITRACE_PERFETTO_CATEGORY(category::causal), \ OMNITRACE_PERFETTO_CATEGORY(category::cpu_freq), \ OMNITRACE_PERFETTO_CATEGORY(category::process_page), \ diff --git a/source/lib/core/config.cpp b/source/lib/core/config.cpp index 6ca5f8065..cdecf7a2f 100644 --- a/source/lib/core/config.cpp +++ b/source/lib/core/config.cpp @@ -520,10 +520,6 @@ configure_settings(bool _init) _backend, "perfetto") ->set_choices({ "inprocess", "system", "all" }); - OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_CRITICAL_TRACE", - "Enable generation of the critical trace", false, "backend", - "critical_trace"); - OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_TRACE_THREAD_LOCKS", "Enable tracing calls to pthread_mutex_lock, " "pthread_mutex_unlock, pthread_mutex_trylock", @@ -647,15 +643,6 @@ configure_settings(bool _init) "is collected on every available device", "", "rocprofiler", "rocm", "hardware_counters"); - OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_CRITICAL_TRACE_DEBUG", - "Enable debugging for critical trace", _omnitrace_debug, - "debugging", "critical_trace", "advanced"); - - OMNITRACE_CONFIG_SETTING( - bool, "OMNITRACE_CRITICAL_TRACE_SERIALIZE_NAMES", - "Include names in serialization of critical trace (mainly for debugging)", - _omnitrace_debug, "debugging", "critical_trace", "advanced"); - OMNITRACE_CONFIG_SETTING(size_t, "OMNITRACE_PERFETTO_SHMEM_SIZE_HINT_KB", "Hint for shared-memory buffer size in perfetto (in KB)", size_t{ 4096 }, "perfetto", "data", "advanced"); @@ -721,21 +708,6 @@ configure_settings(bool _init) 1), "parallelism", "advanced"); - OMNITRACE_CONFIG_EXT_SETTING(int64_t, "OMNITRACE_CRITICAL_TRACE_COUNT", - "Number of critical trace to export (0 == all)", - int64_t{ 0 }, "critical_trace", - "omnitrace-critical-trace", "advanced"); - - OMNITRACE_CONFIG_SETTING(uint64_t, "OMNITRACE_CRITICAL_TRACE_BUFFER_COUNT", - "Number of critical trace records to store in thread-local " - "memory before submitting to shared buffer", - uint64_t{ 2000 }, "critical_trace", "advanced"); - - OMNITRACE_CONFIG_EXT_SETTING( - int64_t, "OMNITRACE_CRITICAL_TRACE_PER_ROW", - "How many critical traces per row in perfetto (0 == all in one row)", - int64_t{ 0 }, "critical_trace", "omnitrace-critical-trace", "advanced"); - OMNITRACE_CONFIG_SETTING( std::string, "OMNITRACE_TIMEMORY_COMPONENTS", "List of components to collect via timemory (see `omnitrace-avail -C`)", @@ -1157,14 +1129,12 @@ configure_mode_settings(const std::shared_ptr& _config) _set("OMNITRACE_USE_OMPT", false); _set("OMNITRACE_USE_SAMPLING", false); _set("OMNITRACE_USE_PROCESS_SAMPLING", false); - _set("OMNITRACE_CRITICAL_TRACE", false); } else if(get_mode() == Mode::Causal) { _set("OMNITRACE_USE_CAUSAL", true); _set("OMNITRACE_TRACE", false); _set("OMNITRACE_PROFILE", false); - _set("OMNITRACE_CRITICAL_TRACE", false); _set("OMNITRACE_USE_SAMPLING", false); _set("OMNITRACE_USE_PROCESS_SAMPLING", false); } @@ -1223,7 +1193,6 @@ configure_mode_settings(const std::shared_ptr& _config) _set("OMNITRACE_USE_SAMPLING", false); _set("OMNITRACE_USE_PROCESS_SAMPLING", false); _set("OMNITRACE_USE_CODE_COVERAGE", false); - _set("OMNITRACE_CRITICAL_TRACE", false); set_setting_value("OMNITRACE_TIMEMORY_COMPONENTS", std::string{}); set_setting_value("OMNITRACE_PAPI_EVENTS", std::string{}); } @@ -1404,7 +1373,6 @@ configure_disabled_settings(const std::shared_ptr& _config) _handle_use_option("OMNITRACE_USE_ROCM_SMI", "rocm_smi"); _handle_use_option("OMNITRACE_USE_ROCTRACER", "roctracer"); _handle_use_option("OMNITRACE_USE_ROCPROFILER", "rocprofiler"); - _handle_use_option("OMNITRACE_CRITICAL_TRACE", "critical_trace"); #if !defined(OMNITRACE_USE_ROCTRACER) || OMNITRACE_USE_ROCTRACER == 0 _config->find("OMNITRACE_USE_ROCTRACER")->second->set_hidden(true); @@ -1971,13 +1939,6 @@ get_use_mpip() return static_cast&>(*_v->second).get(); } -bool& -get_use_critical_trace() -{ - static auto _v = get_config()->find("OMNITRACE_CRITICAL_TRACE"); - return static_cast&>(*_v->second).get(); -} - bool get_use_kokkosp() { @@ -2024,20 +1985,6 @@ get_num_threads_hint() return static_cast&>(*_v->second).get(); } -bool -get_critical_trace_debug() -{ - static auto _v = get_config()->find("OMNITRACE_CRITICAL_TRACE_DEBUG"); - return static_cast&>(*_v->second).get(); -} - -bool -get_critical_trace_serialize_names() -{ - static auto _v = get_config()->find("OMNITRACE_CRITICAL_TRACE_SERIALIZE_NAMES"); - return static_cast&>(*_v->second).get(); -} - bool get_sampling_keep_internal() { @@ -2094,13 +2041,6 @@ get_trace_hsa_activity() return static_cast&>(*_v->second).get(); } -int64_t -get_critical_trace_per_row() -{ - static auto _v = get_config()->find("OMNITRACE_CRITICAL_TRACE_PER_ROW"); - return static_cast&>(*_v->second).get(); -} - size_t get_perfetto_shmem_size_hint() { @@ -2210,14 +2150,6 @@ get_perfetto_annotations() return static_cast&>(*_v->second).get(); } -uint64_t -get_critical_trace_update_freq() -{ - static uint64_t _v = - get_config()->get("OMNITRACE_CRITICAL_TRACE_BUFFER_COUNT"); - return _v; -} - uint64_t get_thread_pool_size() { @@ -2389,13 +2321,6 @@ get_sampling_allocator_size() return std::max(static_cast&>(*_v->second).get(), 1); } -int64_t -get_critical_trace_count() -{ - static auto _v = get_config()->find("OMNITRACE_CRITICAL_TRACE_COUNT"); - return static_cast&>(*_v->second).get(); -} - double get_process_sampling_freq() { diff --git a/source/lib/core/config.hpp b/source/lib/core/config.hpp index 1a90d9d87..dac62fc3f 100644 --- a/source/lib/core/config.hpp +++ b/source/lib/core/config.hpp @@ -218,9 +218,6 @@ get_use_pid(); bool& get_use_mpip(); -bool& -get_use_critical_trace() OMNITRACE_HOT; - bool get_use_kokkosp(); @@ -251,12 +248,6 @@ get_trace_hsa_api(); bool get_trace_hsa_activity(); -bool -get_critical_trace_debug(); - -bool -get_critical_trace_serialize_names(); - size_t get_perfetto_shmem_size_hint(); @@ -278,9 +269,6 @@ get_disabled_categories(); bool get_perfetto_annotations() OMNITRACE_HOT; -uint64_t -get_critical_trace_update_freq(); - uint64_t get_thread_pool_size(); @@ -297,9 +285,6 @@ get_perfetto_output_filename(); bool get_perfetto_roctracer_per_stream() OMNITRACE_HOT; -int64_t -get_critical_trace_count(); - double get_trace_delay(); @@ -360,9 +345,6 @@ get_process_sampling_duration(); std::string get_sampling_gpus(); -int64_t -get_critical_trace_per_row(); - bool get_trace_thread_locks(); diff --git a/source/lib/core/debug.hpp b/source/lib/core/debug.hpp index e869e261d..7f8141634 100644 --- a/source/lib/core/debug.hpp +++ b/source/lib/core/debug.hpp @@ -67,9 +67,6 @@ get_debug_tid() OMNITRACE_HOT; bool get_debug_pid() OMNITRACE_HOT; - -bool -get_critical_trace_debug() OMNITRACE_HOT; } // namespace config namespace debug @@ -560,12 +557,6 @@ as_hex(void*, size_t); #define OMNITRACE_BASIC_DEBUG_F(...) \ OMNITRACE_CONDITIONAL_BASIC_PRINT_F(::omnitrace::get_debug_env(), __VA_ARGS__) -#define OMNITRACE_CT_DEBUG(...) \ - OMNITRACE_CONDITIONAL_PRINT(::omnitrace::get_critical_trace_debug(), __VA_ARGS__) - -#define OMNITRACE_CT_DEBUG_F(...) \ - OMNITRACE_CONDITIONAL_PRINT_F(::omnitrace::get_critical_trace_debug(), __VA_ARGS__) - //--------------------------------------------------------------------------------------// // // Verbose macros diff --git a/source/lib/core/exception.cpp b/source/lib/core/exception.cpp index 43aebda47..e95d43cb2 100644 --- a/source/lib/core/exception.cpp +++ b/source/lib/core/exception.cpp @@ -50,6 +50,7 @@ template auto get_backtrace(Args... _arg) { + consume_args(_arg...); auto _bt = std::stringstream{}; if constexpr(sizeof...(Args) > 0) { @@ -57,7 +58,6 @@ get_backtrace(Args... _arg) } tim::unwind::detailed_backtrace<2>(_bt, true); return strdup(_bt.str().c_str()); - consume_args(_arg...); } } // namespace diff --git a/source/lib/omnitrace-user/omnitrace/categories.h b/source/lib/omnitrace-user/omnitrace/categories.h index 480a828ce..dd435e502 100644 --- a/source/lib/omnitrace-user/omnitrace/categories.h +++ b/source/lib/omnitrace-user/omnitrace/categories.h @@ -63,9 +63,6 @@ extern "C" OMNITRACE_CATEGORY_OMPT, OMNITRACE_CATEGORY_PROCESS_SAMPLING, OMNITRACE_CATEGORY_COMM_DATA, - OMNITRACE_CATEGORY_CRITICAL_TRACE, - OMNITRACE_CATEGORY_HOST_CRITICAL_TRACE, - OMNITRACE_CATEGORY_DEVICE_CRITICAL_TRACE, OMNITRACE_CATEGORY_CAUSAL, OMNITRACE_CATEGORY_CPU_FREQ, OMNITRACE_CATEGORY_PROCESS_PAGE, diff --git a/source/lib/omnitrace/library.cpp b/source/lib/omnitrace/library.cpp index 63b6b2aff..1086989c0 100644 --- a/source/lib/omnitrace/library.cpp +++ b/source/lib/omnitrace/library.cpp @@ -48,7 +48,6 @@ #include "library/components/pthread_gotcha.hpp" #include "library/components/rocprofiler.hpp" #include "library/coverage.hpp" -#include "library/critical_trace.hpp" #include "library/ompt.hpp" #include "library/process_sampler.hpp" #include "library/ptl.hpp" @@ -203,9 +202,6 @@ ensure_finalization(bool _static_init = false) return scope::destructor{ []() { omnitrace_finalize_hidden(); } }; } -using Device = critical_trace::Device; -using Phase = critical_trace::Phase; - template struct fini_bundle { @@ -402,11 +398,6 @@ omnitrace_init_library_hidden() if(_debug_init) config::set_setting_value("OMNITRACE_DEBUG", _debug_value); } }; - // below will effectively do: - // get_cpu_cid_stack(0)->emplace_back(-1); - // plus query some env variables - add_critical_trace(0, -1, 0, 0, 0, 0, 0, 0, 0, -1, 0); - tim::trait::runtime_enabled::set(get_use_roctracer()); tim::trait::runtime_enabled::set(get_use_roctracer() && get_use_timemory()); @@ -920,55 +911,12 @@ omnitrace_finalize_hidden(void) causal::finish_experimenting(); } - if(get_use_critical_trace() || (get_use_rocm_smi() && get_use_roctracer())) - { - OMNITRACE_VERBOSE_F(1, "Generating the critical trace...\n"); - for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) - { - using critical_trace_hash_data = - thread_data; - - if(i < critical_trace_hash_data::get()->size() && - critical_trace_hash_data::get()->at(i)) - { - OMNITRACE_DEBUG_F("Copying the hash id data for thread %zu...\n", i); - critical_trace::add_hash_id(*critical_trace_hash_data::get()->at(i)); - } - } - - for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) - { - using critical_trace_chain_data = thread_data; - - if(i < critical_trace_chain_data::get()->size() && - critical_trace_chain_data::get()->at(i)) - { - OMNITRACE_DEBUG_F( - "Updating the critical trace call-chains for thread %zu...\n", i); - critical_trace::update(i); // launch update task - } - } - - OMNITRACE_VERBOSE_F(1, "Waiting on critical trace updates...\n"); - tasking::join(); - } - if(get_use_process_sampling()) { OMNITRACE_VERBOSE_F(1, "Post-processing the system-level samples...\n"); process_sampler::post_process(); } - if(get_use_critical_trace()) - { - // launch compute task - OMNITRACE_VERBOSE_F(1, "Launching critical trace compute task...\n"); - critical_trace::compute(); - - OMNITRACE_VERBOSE_F(1, "Waiting on critical trace computation...\n"); - tasking::join(); - } - // shutdown tasking before timemory is finalized, especially the roctracer thread-pool OMNITRACE_VERBOSE_F(1, "Shutting down thread-pools...\n"); tasking::shutdown(); diff --git a/source/lib/omnitrace/library/components/category_region.hpp b/source/lib/omnitrace/library/components/category_region.hpp index 2b39f21da..b0348f84d 100644 --- a/source/lib/omnitrace/library/components/category_region.hpp +++ b/source/lib/omnitrace/library/components/category_region.hpp @@ -27,7 +27,6 @@ #include "core/state.hpp" #include "core/timemory.hpp" #include "library/causal/data.hpp" -#include "library/critical_trace.hpp" #include "library/runtime.hpp" #include "library/tracing.hpp" #include "library/tracing/annotation.hpp" @@ -68,12 +67,6 @@ using tracing_count_categories_t = type_list; -// these categories are added to the critical trace -using critical_trace_categories_t = - type_list; - // convert these categories to throughput points using causal_throughput_categories_t = type_list::start(std::string_view name, Args&&... args) tracing::push_perfetto(CategoryT{}, name.data(), std::forward(args)...); } } - - if constexpr(is_one_of::value) - { - using Device = critical_trace::Device; - using Phase = critical_trace::Phase; - - if(get_use_critical_trace()) - { - uint64_t _cid = 0; - uint64_t _parent_cid = 0; - uint32_t _depth = 0; - std::tie(_cid, _parent_cid, _depth) = create_cpu_cid_entry(); - auto _ts = comp::wall_clock::record(); - add_critical_trace( - threading::get_id(), _cid, 0, _parent_cid, _ts, 0, 0, 0, - critical_trace::add_hash_id(name.data()), _depth); - } - } } template @@ -278,30 +253,6 @@ category_region::stop(std::string_view name, Args&&... args) if(get_use_causal()) causal::pop_progress_point(name); } } - - if constexpr(is_one_of::value) - { - using Device = critical_trace::Device; - using Phase = critical_trace::Phase; - - if(get_use_critical_trace()) - { - if(get_cpu_cid_stack() && !get_cpu_cid_stack()->empty()) - { - auto _cid = get_cpu_cid_stack()->back(); - if(get_cpu_cid_parents()->find(_cid) != get_cpu_cid_parents()->end()) - { - uint64_t _parent_cid = 0; - uint32_t _depth = 0; - auto _ts = comp::wall_clock::record(); - std::tie(_parent_cid, _depth) = get_cpu_cid_parents()->at(_cid); - add_critical_trace( - threading::get_id(), _cid, 0, _parent_cid, _ts, _ts, 0, 0, - critical_trace::add_hash_id(name.data()), _depth); - } - } - } - } } else { diff --git a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp index 279e4b4c1..ed9650a4e 100644 --- a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp @@ -25,7 +25,6 @@ #include "core/debug.hpp" #include "core/utility.hpp" #include "library/components/category_region.hpp" -#include "library/critical_trace.hpp" #include "library/runtime.hpp" #include "library/thread_info.hpp" @@ -41,9 +40,6 @@ namespace omnitrace { namespace component { -using Device = critical_trace::Device; -using Phase = critical_trace::Phase; - pthread_mutex_gotcha::hash_array_t& pthread_mutex_gotcha::get_hashes() { @@ -76,7 +72,7 @@ pthread_mutex_gotcha::get_hashes() { auto&& _id = _data.at(i).tool_id; if(!_id.empty()) - _init.at(i) = critical_trace::add_hash_id(_id.c_str()); + _init.at(i) = tim::add_hash_id(_id.c_str()); else { if(_skip.count(i) > 0) continue; @@ -176,7 +172,7 @@ pthread_mutex_gotcha::pthread_mutex_gotcha(const gotcha_data_t& _data) template auto -pthread_mutex_gotcha::operator()(uintptr_t&& _id, int (*_callee)(Args...), +pthread_mutex_gotcha::operator()(uintptr_t&&, int (*_callee)(Args...), Args... _args) const { using bundle_t = category_region; @@ -203,30 +199,10 @@ pthread_mutex_gotcha::operator()(uintptr_t&& _id, int (*_callee)(Args...), bool& _protect; } _dtor{ m_protect = true }; - uint64_t _cid = 0; - uint64_t _parent_cid = 0; - uint32_t _depth = 0; - int64_t _ts = 0; - - if(_id < std::numeric_limits::max() && get_use_critical_trace()) - { - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - std::tie(_cid, _parent_cid, _depth) = create_cpu_cid_entry(); - _ts = comp::wall_clock::record(); - } - bundle_t::audit(std::string_view{ m_data->tool_id }, audit::incoming{}, _args...); auto _ret = (*_callee)(_args...); bundle_t::audit(std::string_view{ m_data->tool_id }, audit::outgoing{}, _ret); - if(_id < std::numeric_limits::max() && get_use_critical_trace()) - { - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - add_critical_trace( - threading::get_id(), _cid, 0, _parent_cid, _ts, comp::wall_clock::record(), 0, - _id, get_hashes().at(m_data->index), _depth); - } - tim::consume_parameters(_id, _cid, _parent_cid, _depth, _ts); return _ret; } diff --git a/source/lib/omnitrace/library/perf.cpp b/source/lib/omnitrace/library/perf.cpp index 8d71440b6..476515b6d 100644 --- a/source/lib/omnitrace/library/perf.cpp +++ b/source/lib/omnitrace/library/perf.cpp @@ -633,6 +633,11 @@ perf_event::record::locate_field() const if constexpr(SampleT == sample::last) return reinterpret_cast(p); OMNITRACE_FATAL << "Unsupported sample field requested!"; + + if constexpr(std::is_pointer::value) + return nullptr; + else + return Tp{}; } namespace diff --git a/source/lib/omnitrace/library/ptl.cpp b/source/lib/omnitrace/library/ptl.cpp index 45b29ccdc..5b9638138 100644 --- a/source/lib/omnitrace/library/ptl.cpp +++ b/source/lib/omnitrace/library/ptl.cpp @@ -129,19 +129,6 @@ get_thread_pool_state() } // namespace } // namespace roctracer -namespace critical_trace -{ -namespace -{ -auto& -get_thread_pool_state() -{ - static auto _v = State::PreInit; - return _v; -} -} // namespace -} // namespace critical_trace - void setup() { @@ -164,17 +151,6 @@ join() OMNITRACE_DEBUG_F("roctracer thread-pool is not active...\n"); } - if(critical_trace::get_thread_pool_state() == State::Active) - { - OMNITRACE_DEBUG_F("waiting for all critical trace tasks to complete...\n"); - for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) - critical_trace::get_task_group(i).join(); - } - else - { - OMNITRACE_DEBUG_F("critical-trace thread-pool is not active...\n"); - } - if(general::get_thread_pool_state() == State::Active) { OMNITRACE_DEBUG_F("waiting for all general tasks to complete...\n"); @@ -202,22 +178,6 @@ shutdown() OMNITRACE_DEBUG_F("roctracer thread-pool is not active...\n"); } - if(critical_trace::get_thread_pool_state() == State::Active) - { - OMNITRACE_DEBUG_F("Waiting on completion of critical trace tasks...\n"); - for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) - { - critical_trace::get_task_group(i).join(); - critical_trace::get_task_group(i).clear(); - critical_trace::get_task_group(i).set_pool(nullptr); - } - critical_trace::get_thread_pool_state() = State::Finalized; - } - else - { - OMNITRACE_DEBUG_F("critical-trace thread-pool is not active...\n"); - } - if(general::get_thread_pool_state() == State::Active) { OMNITRACE_DEBUG_F("Waiting on completion of general tasks...\n"); @@ -270,18 +230,5 @@ roctracer::get_task_group(int64_t _tid) &tasking::get_thread_pool())); return *_v; } - -PTL::TaskGroup& -critical_trace::get_task_group(int64_t _tid) -{ - struct local - {}; - using thread_data_t = thread_data, local>; - static thread_local auto& _v = - (critical_trace::get_thread_pool_state() = State::Active, - thread_data_t::instance(construct_on_thread{ _tid }, - &tasking::get_thread_pool())); - return *_v; -} } // namespace tasking } // namespace omnitrace diff --git a/source/lib/omnitrace/library/ptl.hpp b/source/lib/omnitrace/library/ptl.hpp index b3630216b..4980673d8 100644 --- a/source/lib/omnitrace/library/ptl.hpp +++ b/source/lib/omnitrace/library/ptl.hpp @@ -67,17 +67,5 @@ namespace roctracer PTL::TaskGroup& get_task_group(int64_t _tid = utility::get_thread_index()); } // namespace roctracer - -//--------------------------------------------------------------------------------------// -// -// critical trace -// -//--------------------------------------------------------------------------------------// - -namespace critical_trace -{ -PTL::TaskGroup& -get_task_group(int64_t _tid = utility::get_thread_index()); -} // namespace critical_trace } // namespace tasking } // namespace omnitrace diff --git a/source/lib/omnitrace/library/rocm_smi.cpp b/source/lib/omnitrace/library/rocm_smi.cpp index c77454476..d86a77bc3 100644 --- a/source/lib/omnitrace/library/rocm_smi.cpp +++ b/source/lib/omnitrace/library/rocm_smi.cpp @@ -38,7 +38,6 @@ #include "core/gpu.hpp" #include "core/perfetto.hpp" #include "core/state.hpp" -#include "library/critical_trace.hpp" #include "library/runtime.hpp" #include "library/thread_info.hpp" @@ -280,50 +279,6 @@ data::post_process(uint32_t _dev_id) }; if(get_use_perfetto()) _process_perfetto(); - - if(!get_use_timemory()) return; - -#if !defined(TIMEMORY_USE_MPI) - // timemory + MPI here causes hangs for some reason. it is unclear why - using samp_bundle_t = tim::lightweight_tuple; - - using entry_t = critical_trace::entry; - auto _gpu_entries = critical_trace::get_entries( - [](const entry_t& _e) { return (_e.device == critical_trace::Device::GPU); }); - - for(auto& itr : _rocm_smi) - { - auto _ts = itr.m_ts; - if(!_thread_info->is_valid_time(_ts)) continue; - - auto _entries = std::vector>{}; - for(const auto& eitr : _gpu_entries) - { - if(_ts >= eitr.second.begin_ns && _ts <= eitr.second.end_ns) - _entries.emplace_back(std::string_view{ eitr.first }, &eitr.second); - } - - std::vector _tc{}; - _tc.reserve(_entries.size()); - for(auto& eitr : _entries) - { - auto& _v = _tc.emplace_back(eitr.first); - _v.push(); - _v.start(); - _v.stop(); - - GPU_METRIC(sampling_gpu_busy, m_busy_perc) - GPU_METRIC(sampling_gpu_temp, m_temp / 1.0e3) // provided in milli-degree C - GPU_METRIC(sampling_gpu_power, - m_power * units::microwatt / static_cast(units::watt)) - GPU_METRIC(sampling_gpu_memory, - m_mem_usage / static_cast(units::megabyte)) - - _v.pop(); - } - } -#endif } //--------------------------------------------------------------------------------------// diff --git a/source/lib/omnitrace/library/roctracer.cpp b/source/lib/omnitrace/library/roctracer.cpp index abb96eefb..224f59728 100644 --- a/source/lib/omnitrace/library/roctracer.cpp +++ b/source/lib/omnitrace/library/roctracer.cpp @@ -28,7 +28,6 @@ #include "core/debug.hpp" #include "core/locking.hpp" #include "library/components/category_region.hpp" -#include "library/critical_trace.hpp" #include "library/runtime.hpp" #include "library/sampling.hpp" #include "library/thread_data.hpp" @@ -129,32 +128,6 @@ get_roctracer_tid_data() return _v; } -using cid_tuple_t = std::tuple; -struct cid_data : cid_tuple_t -{ - using cid_tuple_t::cid_tuple_t; - - OMNITRACE_DEFAULT_OBJECT(cid_data) - - auto& cid() { return std::get<0>(*this); } - auto& pcid() { return std::get<1>(*this); } - auto& depth() { return std::get<2>(*this); } - auto& queue() { return std::get<3>(*this); } - - auto cid() const { return std::get<0>(*this); } - auto pcid() const { return std::get<1>(*this); } - auto depth() const { return std::get<2>(*this); } - auto queue() const { return std::get<3>(*this); } -}; - -auto& -get_roctracer_cid_data(int64_t _tid = threading::get_id()) -{ - using thread_data_t = - thread_data, category::roctracer>; - return thread_data_t::instance(construct_on_thread{ _tid }); -} - auto& get_hip_activity_callbacks(int64_t _tid = threading::get_id()) { @@ -562,9 +535,6 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - using Device = critical_trace::Device; - using Phase = critical_trace::Phase; - assert(domain == ACTIVITY_DOMAIN_HIP_API); const char* op_name = roctracer_op_string(domain, cid, 0); if(op_name == nullptr) op_name = hip_api_name(cid); @@ -591,88 +561,12 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* op_name, cid, data->correlation_id, (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); - int64_t _ts = comp::wall_clock::record(); - auto _tid = threading::get_id(); - uint64_t _crit_cid = 0; - uint64_t _parent_crit_cid = 0; - uint32_t _depth = 0; - uintptr_t _queue = 0; - auto _roct_cid = data->correlation_id; - -#define OMNITRACE_HIP_API_QUEUE_CASE(API_FUNC, VARIABLE) \ - case HIP_API_ID_##API_FUNC: \ - _queue = reinterpret_cast(data->args.API_FUNC.VARIABLE); \ - break; - -#define OMNITRACE_HIP_API_QUEUE_CASE_ALT(API_FUNC, UNION, VARIABLE) \ - case HIP_API_ID_##API_FUNC: \ - _queue = reinterpret_cast(data->args.UNION.VARIABLE); \ - break; - - switch(cid) - { - OMNITRACE_HIP_API_QUEUE_CASE(hipLaunchKernel, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipModuleLaunchKernel, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipHccModuleLaunchKernel, hStream) - OMNITRACE_HIP_API_QUEUE_CASE(hipLaunchCooperativeKernel, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipExtLaunchKernel, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipExtModuleLaunchKernel, hStream) - OMNITRACE_HIP_API_QUEUE_CASE(hipExtStreamCreateWithCUMask, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipExtStreamGetCUMask, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamSynchronize, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipConfigureCall, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipDrvMemcpy3DAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipEventRecord, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemPrefetchAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpy2DAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpy2DFromArrayAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpy3DAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyDtoDAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyDtoHAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyFromSymbolAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyHtoDAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyParam2DAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyPeerAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyToSymbolAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyWithStream, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemset2DAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemset3DAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemsetAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemsetD16Async, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemsetD32Async, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemsetD8Async, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamAddCallback, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamAttachMemAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamDestroy, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamGetFlags, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamGetPriority, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamQuery, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamWaitEvent, stream) -#if OMNITRACE_HIP_VERSION >= 40300 - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpy2DToArrayAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamWaitValue32, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamWaitValue64, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamWriteValue32, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamWriteValue64, stream) -#endif -#if OMNITRACE_HIP_VERSION >= 40500 - OMNITRACE_HIP_API_QUEUE_CASE(hipGraphLaunch, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipGraphicsMapResources, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipGraphicsUnmapResources, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipSignalExternalSemaphoresAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamBeginCapture, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamEndCapture, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipWaitExternalSemaphoresAsync, stream) -#endif -#if OMNITRACE_HIP_VERSION >= 50000 - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamIsCapturing, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamGetCaptureInfo, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamGetCaptureInfo_v2, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamUpdateCaptureDependencies, stream) -#endif - default: break; - } + int64_t _ts = comp::wall_clock::record(); + auto _tid = threading::get_id(); + uint64_t _crit_cid = 0; + uint64_t _parent_crit_cid = 0; + uint32_t _depth = 0; + auto _roct_cid = data->correlation_id; auto& _device_id = get_current_device(); @@ -863,15 +757,6 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* get_roctracer_hip_data()->erase(itr.first); } } - if(get_use_critical_trace() || get_use_rocm_smi()) - { - add_critical_trace( - _tid, _crit_cid, _roct_cid, _parent_crit_cid, _ts, 0, _device_id, _queue, - critical_trace::add_hash_id(op_name), _depth); - } - - get_roctracer_cid_data(_tid)->emplace( - _roct_cid, cid_data{ _crit_cid, _parent_crit_cid, _depth, _queue }); hip_exec_activity_callbacks(_tid); } @@ -879,9 +764,6 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* { hip_exec_activity_callbacks(_tid); - std::tie(_crit_cid, _parent_crit_cid, _depth, std::ignore) = - get_roctracer_cid_data(_tid)->at(_roct_cid); - if(get_use_perfetto()) { tracing::pop_perfetto_ts( @@ -913,12 +795,6 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* } } } - if(get_use_critical_trace() || get_use_rocm_smi()) - { - add_critical_trace( - _tid, _crit_cid, _roct_cid, _parent_crit_cid, _ts, _ts, _device_id, - _queue, critical_trace::add_hash_id(op_name), _depth); - } } tim::consume_parameters(arg); } @@ -935,9 +811,6 @@ hip_activity_callback(const char* begin, const char* end, void* arg) auto&& _protect = comp::roctracer::protect_flush_activity(); (void) _protect; - using Device = critical_trace::Device; - using Phase = critical_trace::Phase; - if(!trait::runtime_enabled::get()) return; static auto _kernel_names = std::unordered_map{}; static auto _indexes = std::unordered_map{}; @@ -982,17 +855,12 @@ hip_activity_callback(const char* begin, const char* end, void* arg) auto& _keys = get_roctracer_key_data(); auto& _tids = get_roctracer_tid_data(); - int16_t _depth = 0; // depth of kernel launch - int64_t _tid = 0; // thread id - uint64_t _crit_cid = 0; // correlation id - uint64_t _pcid = 0; // parent corr_id - int32_t _devid = record->device_id; // device id - int64_t _queid = record->queue_id; // queue id - uintptr_t _queue = 0; // Host queue (stream) - auto _laps = _indexes[_roct_cid]++; // see note #1 - const char* _name = nullptr; - bool _found = false; - bool _critical_trace = get_use_critical_trace() || get_use_rocm_smi(); + int64_t _tid = 0; // thread id + int32_t _devid = record->device_id; // device id + int64_t _queid = record->queue_id; // queue id + uintptr_t _queue = 0; // Host queue (stream) + const char* _name = nullptr; + bool _found = false; { locking::atomic_lock _lk{ roctracer_type_mutex() }; @@ -1008,21 +876,6 @@ hip_activity_callback(const char* begin, const char* end, void* arg) if(_name == nullptr && op_name == nullptr) continue; if(_name == nullptr) _name = op_name; - if(_critical_trace) - { - auto& _crit_cids = get_roctracer_cid_data(_tid); - if(_crit_cids->find(_roct_cid) != _crit_cids->end()) - std::tie(_crit_cid, _pcid, _depth, _queue) = _crit_cids->at(_roct_cid); - else - { - OMNITRACE_VERBOSE_F(3, - "No critical trace entry generated for \"%s\" :: " - "unknown correlation id...\n", - _name); - _critical_trace = false; - } - } - static auto _op_id_names = std::array{ "DISPATCH", "COPY", "BARRIER" }; @@ -1094,15 +947,6 @@ hip_activity_callback(const char* begin, const char* end, void* arg) tracing::pop_perfetto_track(category::device_hip{}, "", _track, _end_ns); } - if(_critical_trace) - { - auto _hash = critical_trace::add_hash_id(_name); - uint16_t _prio = _laps + 1; // priority - add_critical_trace( - _tid, _crit_cid, _roct_cid, _crit_cid, _beg_ns, _end_ns, _devid, _queid, - _hash, _depth + 1, _prio); - } - if(_found && _name != nullptr && get_use_timemory()) { auto _func = [_beg_ns, _end_ns, _name]() {