diff --git a/.github/workflows/opensuse.yml b/.github/workflows/opensuse.yml index a3c37d8cd..d62311197 100644 --- a/.github/workflows/opensuse.yml +++ b/.github/workflows/opensuse.yml @@ -107,8 +107,6 @@ jobs: ldd $(which omnitrace-avail) omnitrace-avail --help omnitrace-avail -a - which omnitrace-critical-trace - ldd $(which omnitrace-critical-trace) which omnitrace ldd $(which omnitrace) omnitrace-instrument --help diff --git a/.github/workflows/redhat.yml b/.github/workflows/redhat.yml index c94d08157..a3e95191d 100644 --- a/.github/workflows/redhat.yml +++ b/.github/workflows/redhat.yml @@ -125,7 +125,7 @@ jobs: run: | set -v source /opt/omnitrace/share/omnitrace/setup-env.sh - ./scripts/test-install.sh --test-omnitrace-{instrument,avail,sample,rewrite,runtime,critical-trace,python}=1 + ./scripts/test-install.sh --test-omnitrace-{instrument,avail,sample,rewrite,runtime,python}=1 - name: Test User API timeout-minutes: 10 diff --git a/.github/workflows/ubuntu-bionic.yml b/.github/workflows/ubuntu-bionic.yml index 0630cd384..48c14ffd7 100644 --- a/.github/workflows/ubuntu-bionic.yml +++ b/.github/workflows/ubuntu-bionic.yml @@ -138,8 +138,6 @@ jobs: ldd $(which omnitrace-avail) omnitrace-avail --help omnitrace-avail -a - which omnitrace-critical-trace - ldd $(which omnitrace-critical-trace) which omnitrace ldd $(which omnitrace) omnitrace-instrument --help diff --git a/.github/workflows/ubuntu-focal.yml b/.github/workflows/ubuntu-focal.yml index e626d32c1..fec0afc0d 100644 --- a/.github/workflows/ubuntu-focal.yml +++ b/.github/workflows/ubuntu-focal.yml @@ -196,7 +196,7 @@ jobs: module use /opt/omnitrace/share/modulefiles module avail module load omnitrace - ./scripts/test-install.sh --test-omnitrace-{instrument,avail,sample,rewrite,runtime,critical-trace}=1 --test-omnitrace-python=${{ matrix.python }} + ./scripts/test-install.sh --test-omnitrace-{instrument,avail,sample,rewrite,runtime}=1 --test-omnitrace-python=${{ matrix.python }} - name: Test User API timeout-minutes: 10 @@ -362,7 +362,7 @@ jobs: shell: bash run: | source /opt/omnitrace/share/omnitrace/setup-env.sh - ./scripts/test-install.sh --test-omnitrace-{instrument,avail,sample,python,rewrite,runtime,critical-trace}=1 + ./scripts/test-install.sh --test-omnitrace-{instrument,avail,sample,python,rewrite,runtime}=1 - name: Test User API timeout-minutes: 10 @@ -525,7 +525,7 @@ jobs: run: | set -v source /opt/omnitrace/share/omnitrace/setup-env.sh - ${{ github.workspace }}/scripts/test-install.sh --test-omnitrace-{instrument,avail,sample,python,rewrite,runtime,critical-trace}=1 + ${{ github.workspace }}/scripts/test-install.sh --test-omnitrace-{instrument,avail,sample,python,rewrite,runtime}=1 - name: Test Install with Modulefile timeout-minutes: 15 @@ -534,7 +534,7 @@ jobs: source /usr/share/modules/init/$(basename ${SHELL}) module use /opt/omnitrace/share/modulefiles module load omnitrace - ${{ github.workspace }}/scripts/test-install.sh --test-omnitrace-{instrument,avail,sample,python,rewrite,runtime,critical-trace}=1 + ${{ github.workspace }}/scripts/test-install.sh --test-omnitrace-{instrument,avail,sample,python,rewrite,runtime}=1 - name: Test User API timeout-minutes: 10 diff --git a/.github/workflows/ubuntu-jammy.yml b/.github/workflows/ubuntu-jammy.yml index d37f3fcaa..c04952042 100644 --- a/.github/workflows/ubuntu-jammy.yml +++ b/.github/workflows/ubuntu-jammy.yml @@ -216,7 +216,7 @@ jobs: module use /opt/omnitrace/share/modulefiles module avail module load omnitrace - ./scripts/test-install.sh --test-omnitrace-{instrument,avail,sample,python,rewrite,runtime,critical-trace}=1 + ./scripts/test-install.sh --test-omnitrace-{instrument,avail,sample,python,rewrite,runtime}=1 - name: Test User API timeout-minutes: 10 diff --git a/README.md b/README.md index c391e0a2c..a938f0af1 100755 --- a/README.md +++ b/README.md @@ -35,7 +35,6 @@ such as the memory usage, page-faults, and context-switches, and thread-level me - Background thread records process-, system- and device-level metrics while the application executes - Causal profiling - Quantifies the potential impact of optimizations in parallel codes -- Critical trace generation ### Data Analysis @@ -45,7 +44,6 @@ such as the memory usage, page-faults, and context-switches, and thread-level me - Comprehensive traces - Every individual event/measurement - Application speedup predictions resulting from potential optimizations in functions and lines of code (causal profiling) -- Critical trace analysis (alpha) ### Parallelism API Support diff --git a/scripts/test-install.sh b/scripts/test-install.sh index 66d81f954..943603c08 100755 --- a/scripts/test-install.sh +++ b/scripts/test-install.sh @@ -42,7 +42,6 @@ fi : ${ENABLE_OMNITRACE_PYTHON:=0} : ${ENABLE_OMNITRACE_REWRITE:=1} : ${ENABLE_OMNITRACE_RUNTIME:=1} -: ${ENABLE_OMNITRACE_CRITICAL_TRACE:=1} usage() { @@ -55,7 +54,6 @@ usage() print_option test-omnitrace-python "0|1" "Enable testing omnitrace-python" "${ENABLE_OMNITRACE_PYTHON}" print_option test-omnitrace-rewrite "0|1" "Enable testing omnitrace-instrument binary rewrite" "${ENABLE_OMNITRACE_REWRITE}" print_option test-omnitrace-runtime "0|1" "Enable testing omnitrace-instrument runtime instrumentation" "${ENABLE_OMNITRACE_RUNTIME}" - print_option test-omnitrace-critial-trace "0|1" "Enable testing omnitrace-instrument critical trace" "${ENABLE_OMNITRACE_CRITICAL_TRACE}" } cat << EOF > ${CONFIG_DIR}/omnitrace.cfg @@ -126,10 +124,6 @@ do ENABLE_OMNITRACE_RUNTIME=${VAL} continue ;; - --test-omnitrace-critical-trace) - ENABLE_OMNITRACE_CRITICAL_TRACE=${VAL} - continue - ;; --source-dir) SOURCE_DIR=${VAL} continue @@ -204,16 +198,9 @@ test-omnitrace-runtime() verbose-run omnitrace-instrument -e -v 1 -- ${LS_NAME} ${LS_ARGS} } -test-omnitrace-critical-trace() -{ - which omnitrace-critical-trace - ldd $(which omnitrace-critical-trace) -} - if [ "${ENABLE_OMNITRACE_INSTRUMENT}" -ne 0 ]; then verbose-run test-omnitrace; fi if [ "${ENABLE_OMNITRACE_AVAIL}" -ne 0 ]; then verbose-run test-omnitrace-avail; fi if [ "${ENABLE_OMNITRACE_SAMPLE}" -ne 0 ]; then verbose-run test-omnitrace-sample; fi if [ "${ENABLE_OMNITRACE_PYTHON}" -ne 0 ]; then verbose-run test-omnitrace-python; fi if [ "${ENABLE_OMNITRACE_REWRITE}" -ne 0 ]; then verbose-run test-omnitrace-rewrite; fi if [ "${ENABLE_OMNITRACE_RUNTIME}" -ne 0 ]; then verbose-run test-omnitrace-runtime; fi -if [ "${ENABLE_OMNITRACE_CRITICAL_TRACE}" -ne 0 ]; then verbose-run test-omnitrace-critical-trace; fi diff --git a/source/bin/CMakeLists.txt b/source/bin/CMakeLists.txt index afdc31be9..859b729fc 100644 --- a/source/bin/CMakeLists.txt +++ b/source/bin/CMakeLists.txt @@ -15,7 +15,6 @@ endif() # executables add_subdirectory(omnitrace-avail) -add_subdirectory(omnitrace-critical-trace) add_subdirectory(omnitrace-causal) add_subdirectory(omnitrace-sample) add_subdirectory(omnitrace-instrument) diff --git a/source/bin/omnitrace-avail/avail.cpp b/source/bin/omnitrace-avail/avail.cpp index e4e474b1f..53031f2f4 100644 --- a/source/bin/omnitrace-avail/avail.cpp +++ b/source/bin/omnitrace-avail/avail.cpp @@ -41,6 +41,8 @@ #include #include #include +#include +#include #include #include #include @@ -118,6 +120,11 @@ namespace { // initialize HIP before main so that libomnitrace is not HSA_TOOLS_LIB int gpu_count = omnitrace::gpu::hip_device_count(); + +// statically allocated shared_ptrs to prevent use after free errors +auto timemory_manager = tim::manager::master_instance(); +auto timemory_hash_ids = tim::hash::get_main_hash_ids(); +auto timemory_hash_aliases = tim::hash::get_main_hash_aliases(); } // namespace //--------------------------------------------------------------------------------------// @@ -125,6 +132,10 @@ int gpu_count = omnitrace::gpu::hip_device_count(); int main(int argc, char** argv) { + (void) timemory_manager; // suppress unused variables + (void) timemory_hash_ids; // + (void) timemory_hash_aliases; // + tim::unwind::set_bfd_verbose(3); tim::set_env("OMNITRACE_INIT_TOOLING", "OFF", 1); omnitrace_init_library(); diff --git a/source/bin/omnitrace-causal/impl.cpp b/source/bin/omnitrace-causal/impl.cpp index 2f613f88a..5e901fa4c 100644 --- a/source/bin/omnitrace-causal/impl.cpp +++ b/source/bin/omnitrace-causal/impl.cpp @@ -200,7 +200,6 @@ get_initial_environment() update_env(_env, "OMNITRACE_TRACE", false); update_env(_env, "OMNITRACE_PROFILE", false); update_env(_env, "OMNITRACE_USE_PROCESS_SAMPLING", false); - update_env(_env, "OMNITRACE_CRITICAL_TRACE", false); update_env(_env, "OMNITRACE_THREAD_POOL_SIZE", get_env("OMNITRACE_THREAD_POOL_SIZE", 0)); update_env(_env, "OMNITRACE_LAUNCHER", "omnitrace-causal"); diff --git a/source/bin/omnitrace-critical-trace/CMakeLists.txt b/source/bin/omnitrace-critical-trace/CMakeLists.txt deleted file mode 100644 index df08edcbc..000000000 --- a/source/bin/omnitrace-critical-trace/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -# ------------------------------------------------------------------------------# -# -# omnitrace-critical-trace target -# -# ------------------------------------------------------------------------------# - -add_executable(omnitrace-critical-trace ${CMAKE_CURRENT_LIST_DIR}/critical-trace.cpp - ${CMAKE_CURRENT_LIST_DIR}/critical-trace.hpp) - -target_include_directories(omnitrace-critical-trace PRIVATE ${CMAKE_CURRENT_LIST_DIR}) -target_compile_definitions(omnitrace-critical-trace PRIVATE OMNITRACE_EXTERN_COMPONENTS=0) -target_link_libraries( - omnitrace-critical-trace - PRIVATE omnitrace::omnitrace-compile-definitions - omnitrace::omnitrace-interface-library omnitrace::omnitrace-headers - omnitrace::omnitrace-timemory omnitrace::libomnitrace-static) -set_target_properties( - omnitrace-critical-trace - PROPERTIES BUILD_RPATH "\$ORIGIN:\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}" - INSTALL_RPATH "${OMNITRACE_EXE_INSTALL_RPATH}") - -install( - TARGETS omnitrace-critical-trace - DESTINATION ${CMAKE_INSTALL_BINDIR} - OPTIONAL) diff --git a/source/bin/omnitrace-critical-trace/critical-trace.cpp b/source/bin/omnitrace-critical-trace/critical-trace.cpp deleted file mode 100644 index f59777dc0..000000000 --- a/source/bin/omnitrace-critical-trace/critical-trace.cpp +++ /dev/null @@ -1,1006 +0,0 @@ -// MIT License -// -// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "critical-trace.hpp" - -#include "api.hpp" -#include "core/config.hpp" -#include "core/debug.hpp" -#include "core/perfetto.hpp" -#include "library/tracing.hpp" - -#include - -#include -#include - -namespace config = omnitrace::config; -namespace critical_trace = omnitrace::critical_trace; - -namespace -{ -std::unique_ptr tracing_session = {}; - -void -init_perfetto(); - -void -fini_perfetto(); -} // namespace - -int -main(int argc, char** argv) -{ - omnitrace_init_library(); - - // config::set_setting_value("OMNITRACE_TRACE", true); - config::set_setting_value("OMNITRACE_CRITICAL_TRACE", true); - // config::set_setting_value("OMNITRACE_CRITICAL_TRACE_DEBUG", true); - config::set_setting_value("OMNITRACE_CRITICAL_TRACE_COUNT", 500); - config::set_setting_value("OMNITRACE_CRITICAL_TRACE_PER_ROW", 100); - config::set_setting_value("OMNITRACE_THREAD_POOL_SIZE", - std::thread::hardware_concurrency()); - config::set_setting_value("OMNITRACE_CRITICAL_TRACE_SERIALIZE_NAMES", true); - config::set_setting_value("OMNITRACE_USE_PID", false); - config::set_setting_value("OMNITRACE_TIME_OUTPUT", false); - - if(config::get_verbose() >= 0) - { - config::print_banner(); - config::print_settings(false); - } - - if(config::get_use_perfetto()) init_perfetto(); - - for(int i = 1; i < argc; ++i) - { - critical_trace::complete_call_chain = {}; - OMNITRACE_BASIC_PRINT_F("Loading call-chain %s...\n", argv[i]); - if(!critical_trace::load_call_chain(argv[i], "call_chain", - critical_trace::complete_call_chain)) - { - OMNITRACE_THROW("Error loading '%s'. Data size: %zu\n", argv[i], - critical_trace::complete_call_chain.size()); - } - for(const auto& itr : *tim::get_hash_ids()) - critical_trace::complete_hash_ids.emplace(itr.second); - OMNITRACE_BASIC_PRINT_F("Computing critical trace for %s...\n", argv[i]); - critical_trace::compute_critical_trace(); - } - - if(config::get_use_perfetto()) fini_perfetto(); - - return EXIT_SUCCESS; -} - -namespace -{ -void -init_perfetto() -{ - ::perfetto::TracingInitArgs args{}; - ::perfetto::TraceConfig cfg{}; - ::perfetto::protos::gen::TrackEventConfig track_event_cfg{}; - - auto shmem_size_hint = config::get_perfetto_shmem_size_hint(); - auto buffer_size = config::get_perfetto_buffer_size(); - auto* buffer_config = cfg.add_buffers(); - - buffer_config->set_size_kb(buffer_size); - buffer_config->set_fill_policy( - ::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD); - - auto* ds_cfg = cfg.add_data_sources()->mutable_config(); - ds_cfg->set_name("track_event"); - ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString()); - - args.backends |= ::perfetto::kInProcessBackend; - args.shmem_size_hint_kb = shmem_size_hint; - - ::perfetto::Tracing::Initialize(args); - ::perfetto::TrackEvent::Register(); - - tracing_session = ::perfetto::Tracing::NewTrace(); - tracing_session->Setup(cfg); - tracing_session->StartBlocking(); -} - -void -fini_perfetto() -{ - // Make sure the last event is closed for this example. - ::perfetto::TrackEvent::Flush(); - - OMNITRACE_DEBUG_F("Stopping the blocking perfetto trace sessions...\n"); - tracing_session->StopBlocking(); - - OMNITRACE_DEBUG_F("Getting the trace data...\n"); - std::vector trace_data{ tracing_session->ReadTraceBlocking() }; - - if(trace_data.empty()) - { - OMNITRACE_BASIC_PRINT_F( - "> trace data is empty. File '%s' will not be written...\n", - config::get_perfetto_output_filename().c_str()); - } - else - { - // Write the trace into a file. - OMNITRACE_BASIC_VERBOSE(0, "> Outputting '%s' (%.2f KB / %.2f MB / %.2f GB)... ", - config::get_perfetto_output_filename().c_str(), - static_cast(trace_data.size()) / tim::units::KB, - static_cast(trace_data.size()) / tim::units::MB, - static_cast(trace_data.size()) / tim::units::GB); - - std::ofstream ofs{}; - if(!tim::filepath::open(ofs, config::get_perfetto_output_filename(), - std::ios::out | std::ios::binary)) - { - OMNITRACE_BASIC_PRINT_F("> Error opening '%s'...\n", - config::get_perfetto_output_filename().c_str()); - std::exit(EXIT_FAILURE); - } - else - { - // Write the trace into a file. - if(config::get_verbose() >= 0) fprintf(stderr, "Done\n"); - ofs.write(&trace_data[0], trace_data.size()); - } - ofs.close(); - } -} -} // namespace - -namespace omnitrace -{ -namespace critical_trace -{ -namespace -{ -//--------------------------------------------------------------------------------------// - -std::string -get_perf_name(std::string _func) -{ - const auto _npos = std::string::npos; - auto _pos = std::string::npos; - while((_pos = _func.find('_')) != _npos) - _func = _func.replace(_pos, 1, " "); - if(_func.length() > 0) _func.at(0) = std::toupper(_func.at(0)); - return _func; -} - -//--------------------------------------------------------------------------------------// - -void -save_call_graph(const std::string& _fname, const std::string& _label, - const call_graph_t& _call_graph, bool _msg = false, - std::string _func = {}) -{ - OMNITRACE_CT_DEBUG("\n"); - - using perfstats_t = - tim::lightweight_tuple; - perfstats_t _perf{ get_perf_name(__FUNCTION__) }; - _perf.start(); - - std::stringstream oss{}; - { - namespace cereal = tim::cereal; - auto ar = tim::policy::output_archive::get(oss); - - auto _hash_map = *tim::hash::get_hash_ids(); - for(auto& itr : _hash_map) - itr.second = tim::demangle(itr.second); - ar->setNextName("omnitrace"); - ar->startNode(); - (*ar)(cereal::make_nvp("hash_map", _hash_map)); - ar->setNextName(_label.c_str()); - ar->startNode(); - serialize_graph(*ar, _call_graph); - ar->finishNode(); - ar->finishNode(); - } - - std::ofstream ofs{}; - if(tim::filepath::open(ofs, _fname)) - { - if(_msg) - { - if(_func.empty()) _func = __FUNCTION__; - OMNITRACE_BASIC_VERBOSE(0, "[%s] Outputting '%s'...\n", _func.c_str(), - _fname.c_str()); - } - ofs << oss.str() << std::endl; - } - - _perf.stop(); - if(_msg) - { - OMNITRACE_CT_DEBUG("%s\n", JOIN("", _perf).c_str()); - } -} - -void -save_critical_trace(const std::string& _fname, const std::string& _label, - const std::vector& _cchain, bool _msg = false, - std::string _func = {}) -{ - OMNITRACE_CT_DEBUG("\n"); - - using perfstats_t = - tim::lightweight_tuple; - perfstats_t _perf{ get_perf_name(__FUNCTION__) }; - _perf.start(); - - auto _save = [&](std::ostream& _os) { - namespace cereal = tim::cereal; - auto ar = tim::policy::output_archive::get(_os); - - auto _hash_map = *tim::hash::get_hash_ids(); - for(auto& itr : _hash_map) - itr.second = tim::demangle(itr.second); - ar->setNextName("omnitrace"); - ar->startNode(); - (*ar)(cereal::make_nvp("hash_map", _hash_map), - cereal::make_nvp(_label.c_str(), _cchain)); - ar->finishNode(); - }; - - std::ofstream ofs{}; - if(tim::filepath::open(ofs, _fname)) - { - if(_msg) - { - if(_func.empty()) _func = __FUNCTION__; - OMNITRACE_BASIC_VERBOSE(0, "[%s] Outputting '%s'...\n", _func.c_str(), - _fname.c_str()); - } - std::stringstream oss{}; - if(_cchain.size() > 1000) - { - _save(ofs); - } - else - { - _save(oss); - ofs << oss.str() << std::endl; - } - } - - _perf.stop(); - if(_msg) - { - OMNITRACE_CT_DEBUG("%s\n", JOIN("", _perf).c_str()); - } -} - -void -save_call_chain_text(const std::string& _fname, const call_chain& _call_chain, - bool _msg = false, std::string _func = {}) -{ - OMNITRACE_CT_DEBUG("\n"); - - using perfstats_t = - tim::lightweight_tuple; - perfstats_t _perf{ get_perf_name(__FUNCTION__) }; - _perf.start(); - - std::ofstream ofs{}; - if(tim::filepath::open(ofs, _fname)) - { - if(_msg) - { - if(_func.empty()) _func = __FUNCTION__; - OMNITRACE_BASIC_VERBOSE(0, "[%s] Outputting '%s'...\n", _func.c_str(), - _fname.c_str()); - } - ofs << _call_chain << "\n"; - } - - _perf.stop(); - if(_msg) - { - OMNITRACE_CT_DEBUG("%s\n", JOIN("", _perf).c_str()); - } -} - -void -save_call_chain_json(const std::string& _fname, const std::string& _label, - const call_chain& _call_chain, bool _msg = false, - std::string _func = {}) -{ - OMNITRACE_CT_DEBUG("\n"); - - using perfstats_t = - tim::lightweight_tuple; - perfstats_t _perf{ get_perf_name(__FUNCTION__) }; - _perf.start(); - - auto _save = [&](std::ostream& _os) { - namespace cereal = tim::cereal; - auto ar = tim::policy::output_archive::get(_os); - - auto _hash_map = *tim::hash::get_hash_ids(); - for(auto& itr : _hash_map) - itr.second = tim::demangle(itr.second); - ar->setNextName("omnitrace"); - ar->startNode(); - (*ar)(cereal::make_nvp("hash_map", _hash_map), - cereal::make_nvp(_label.c_str(), _call_chain)); - ar->finishNode(); - }; - - std::ofstream ofs{}; - if(tim::filepath::open(ofs, _fname)) - { - if(_msg) - { - if(_func.empty()) _func = __FUNCTION__; - OMNITRACE_BASIC_VERBOSE(0, "[%s] Outputting '%s'...\n", _func.c_str(), - _fname.c_str()); - } - std::stringstream oss{}; - if(_call_chain.size() > 100000) - { - _save(ofs); - } - else - { - _save(oss); - ofs << oss.str() << std::endl; - } - } - - _perf.stop(); - if(_msg) - { - OMNITRACE_CT_DEBUG("%s\n", JOIN("", _perf).c_str()); - } -} - -bool -load_call_chain(const std::string& _fname, const std::string& _label, - call_chain& _call_chain) -{ - namespace cereal = tim::cereal; - - std::ifstream ifs{}; - ifs.open(_fname); - - OMNITRACE_CONDITIONAL_THROW(!ifs || !ifs.is_open(), - "Error! call-chain file '%s' could not be opened", - _fname.c_str()); - - auto ar = tim::policy::input_archive::get(ifs); - auto _val = call_chain{}; - ar->setNextName("omnitrace"); - ar->startNode(); - (*ar)(cereal::make_nvp(_label.c_str(), _val)); - ar->finishNode(); - auto _success = (_val.empty() == false); - if(_success) std::swap(_call_chain, _val); - return _success; -} - -auto -get_indexed(const call_chain& _chain) -{ - OMNITRACE_CT_DEBUG("\n"); - std::map> _indexed = {}; - - // allocate for all cpu correlation ids - for(const auto& itr : _chain) - { - _indexed.emplace(static_cast(itr.cpu_cid), std::vector{}); - _indexed.emplace(static_cast(itr.parent_cid), std::vector{}); - } - - // index based on parent correlation id - for(const auto& itr : _chain) - { - if(itr.depth < 1 && itr.phase == Phase::BEGIN) continue; - _indexed[static_cast(itr.parent_cid)].emplace_back(itr); - } - - for(auto& itr : _indexed) - std::sort(itr.second.begin(), itr.second.end(), - [](const entry& lhs, const entry& rhs) { - // return lhs.cpu_cid < rhs.cpu_cid; - return lhs.begin_ns < rhs.begin_ns; - }); - - return _indexed; -} - -void -find_children(PTL::ThreadPool& _tp, call_graph_t& _graph, const call_chain& _chain) -{ - OMNITRACE_CT_DEBUG("\n"); - - using iterator_t = call_graph_sibling_itr_t; - using itr_entry_vec_t = std::vector>; - using task_group_t = PTL::TaskGroup; - - auto _indexed = get_indexed(_chain); - std::map> _entry_map{}; - - // allocate all entries - OMNITRACE_CT_DEBUG_F("Allocating...\n"); - for(const auto& itr : _chain) - { - auto _ins = _entry_map.emplace(itr, std::vector{}); - if(!_ins.second) - { - auto _existing = _ins.first->first; - OMNITRACE_BASIC_PRINT("Warning! Duplicate entry for [%s] :: [%s]\n", - JOIN("", _existing).c_str(), JOIN("", itr).c_str()); - } - } - - task_group_t _tg{ &_tp }; - OMNITRACE_CT_DEBUG_F("Parallel mapping...\n"); - for(const auto& itr : _chain) - { - _tg.run([&]() { _entry_map[itr] = _indexed.at(itr.cpu_cid); }); - } - _tg.join(); - - std::function _recursive_func; - _recursive_func = [&](iterator_t itr, const entry& _v) { - auto _child = _graph.append_child(itr, _v); - auto _children = std::move(_entry_map[_v]); - _entry_map[_v].clear(); - for(auto&& vitr : _children) - { - _recursive_func(_child, vitr); - } - }; - - // the recursive version of _func + _loop_func has a tendency to overflow the stack - auto _func = [&](iterator_t itr, const entry& _v) { - auto _child = _graph.append_child(itr, _v); - auto _children = std::move(_entry_map[_v]); - _entry_map[_v].clear(); - itr_entry_vec_t _data{}; - for(auto&& vitr : _children) - _data.emplace_back(_child, vitr); - return _data; - }; - - auto _loop_func = [&_func](itr_entry_vec_t& _data) { - auto _inp = _data; - _data.clear(); - for(auto itr : _inp) - { - for(auto&& fitr : _func(itr.first, itr.second)) - _data.emplace_back(std::move(fitr)); - } - // if data is empty return false so we can break out of while loop - return !_data.empty(); - }; - - OMNITRACE_CT_DEBUG_F("Checking index at -1...\n"); - if(!_indexed.at(-1).empty()) - { - OMNITRACE_CT_DEBUG_F("Setting root (line %i)...\n", __LINE__); - _graph.set_head(_indexed.at(-1).front()); - } - else - { - OMNITRACE_CT_DEBUG_F("Setting root (line %i)...\n", __LINE__); - uint32_t _depth = -1; - uint64_t _cpu_cid = -1; - entry _root{ Device::NONE, Phase::NONE, 0, _depth, 0, 0, 0, _cpu_cid, 0, 0, 0 }; - _graph.set_head(_root); - } - - iterator_t _root = _graph.begin(); - for(auto&& itr : _entry_map) - { - if(itr.first.depth == _root->depth + 1) - { - OMNITRACE_CT_DEBUG_F("Generating call-graph...\n"); - // _recursive_func(_root, itr.first); - itr_entry_vec_t _data = _func(_root, itr.first); - while(_loop_func(_data)) - {} - } - } -} - -void -find_sequences(PTL::ThreadPool& _tp, call_graph_t& _graph, - std::vector& _chain) -{ - OMNITRACE_CT_DEBUG("\n"); - /* - using sibling_itr_t = call_graph_sibling_itr_t; - using sibling_vec_t = std::vector; - using sibling_map_t = std::map; - - std::function _no_overlap{}; - _no_overlap = [&](sibling_map_t& _v, sibling_itr_t root) { - sibling_map_t _l{}; - int64_t n = _graph.number_of_children(root); - if(n == 0) return; - - //_graph.sort(sibling_itr_t{ root }, - // [](auto lhs, auto rhs) { return lhs.get_cost() > rhs.get_cost(); }); - - for(int64_t i = 0; i < n; ++i) - { - if(_l.empty()) - { - auto itr = _graph.child(root, i); - _l[itr->tid].emplace_back(itr); - } - else - { - auto itr = _graph.child(root, i); - bool _overlaps = false; - for(auto& litr : _l[itr->tid]) - { - if(litr->device == itr->device && litr->get_overlap(*itr) > 0) - { - _overlaps = true; - break; - } - } - if(!_overlaps) _l[itr->tid].emplace_back(itr); - } - } - for(auto& iitr : _l) - { - for(auto itr : iitr.second) - { - _v[iitr.first].emplace_back(itr); - _no_overlap(_v, itr); - } - } - }; - - std::map _tot{}; - for(sibling_itr_t itr = _graph.begin(); itr != _graph.end(); ++itr) - { - _no_overlap(_tot, itr); - } - - for(const auto& iitr : _tot) - { - call_chain _cc{}; - _cc.emplace_back(*_graph.begin()); - for(const auto& itr : iitr.second) - _cc.emplace_back(*itr); - _chain.emplace_back(_cc); - } - - (void) _tp; - */ - - using iterator_t = call_graph_preorder_itr_t; - std::vector _end_nodes{}; - size_t _n = 0; - for(iterator_t itr = _graph.begin(); itr != _graph.end(); ++itr, ++_n) - { - auto _nchild = _graph.number_of_children(itr); - if(_nchild > 0) - { - // OMNITRACE_CT_DEBUG("Skipping node #%zu with %u children :: %s\n", _n, - // _nchild, JOIN("", *itr).c_str()); - continue; - } - _end_nodes.emplace_back(itr); - } - OMNITRACE_CT_DEBUG("Number of end nodes: %zu\n", _end_nodes.size()); - _chain.resize(_end_nodes.size()); - - auto _construct = [&](size_t i) { - auto itr = _end_nodes.at(i); - while(itr != nullptr && _graph.is_valid(itr)) - { - _chain.at(i).emplace_back(*itr); - itr = _graph.parent(itr); - } - std::reverse(_chain.at(i).begin(), _chain.at(i).end()); - std::sort( - _chain.at(i).begin(), _chain.at(i).end(), - [](const entry& lhs, const entry& rhs) { return lhs.begin_ns > rhs.end_ns; }); - }; - - PTL::TaskGroup _tg{ &_tp }; - for(size_t i = 0; i < _end_nodes.size(); ++i) - _tg.run(_construct, i); - _tg.join(); - - std::sort(_chain.begin(), _chain.end(), - [](const call_chain& lhs, const call_chain& rhs) { - return lhs.get_cost() > rhs.get_cost(); - }); - - /* - std::vector _new_chain{}; - for(auto& itr : _chain) - { - if(itr.empty()) continue; - if(_new_chain.empty()) - { - _new_chain.emplace_back(std::move(itr)); - continue; - } - std::sort(itr.begin(), itr.end(), [](const entry& lhs, const entry& rhs) { - return lhs.get_cost() > rhs.get_cost(); - }); - - call_chain* _append_chain = nullptr; - for(auto& nitr : _new_chain) - { - if(nitr.at(0).tid == itr.at(0).tid && nitr.at(0).get_overlap(itr.at(0)) <= 0) - { - _append_chain = &nitr; - break; - } - } - - if(_append_chain) - { - for(auto& oitr : itr) - _append_chain->emplace_back(oitr); - std::sort(_append_chain->begin(), _append_chain->end(), - [](const entry& lhs, const entry& rhs) { - return lhs.get_cost() > rhs.get_cost(); - }); - } - else - { - _new_chain.emplace_back(std::move(itr)); - } - itr.clear(); - } - - _chain = _new_chain;*/ -} - -template -void -serialize_graph(ArchiveT& ar, const tim::graph& t) -{ - OMNITRACE_CT_DEBUG("\n"); - - namespace cereal = tim::cereal; - using iterator_t = typename tim::graph::sibling_iterator; - - ar(cereal::make_nvp("graph_nodes", t.size())); - ar.setNextName("graph"); - ar.startNode(); - ar.makeArray(); - for(iterator_t itr = t.begin(); itr != t.end(); ++itr) - serialize_subgraph(ar, t, itr); - ar.finishNode(); -} - -template -void -serialize_subgraph(ArchiveT& ar, const tim::graph& _graph, - typename tim::graph::iterator _root) -{ - using iterator_t = typename tim::graph::sibling_iterator; - - if(_graph.empty()) return; - - ar.setNextName("node"); - ar.startNode(); - ar(*_root); - { - ar.setNextName("children"); - ar.startNode(); - ar.makeArray(); - for(iterator_t itr = _graph.begin(_root); itr != _graph.end(_root); ++itr) - serialize_subgraph(ar, _graph, itr); - ar.finishNode(); - } - ar.finishNode(); -} - -template -std::vector -get_top(const std::vector& _chain, size_t _count) -{ - OMNITRACE_CT_DEBUG("\n"); - std::vector _data{}; - _data.reserve(_count); - for(const auto& itr : _chain) - { - if(_data.size() >= _count) break; - if(itr.query<>([](const entry& _v) { - return (DevT == Device::ANY) ? true : (_v.device == DevT); - })) - { - _data.emplace_back(itr); - } - } - return _data; -} - -template -void -generate_perfetto(const std::vector& _data) -{ - OMNITRACE_CT_DEBUG("\n"); - - auto _nrows = std::min(get_critical_trace_per_row(), _data.size()); - - // run in separate thread(s) so that it ends up in unique row - if(_nrows < 1) _nrows = _data.size(); - - std::string _dev = (DevT == Device::NONE) ? "" - : (DevT == Device::ANY) ? "CPU + GPU " - : (DevT == Device::CPU) ? "CPU " - : "GPU "; - - using category_t = std::conditional_t< - DevT == Device::ANY, omnitrace::category::critical_trace, - std::conditional_t>; - - // ensure all hash ids exist - copy_hash_ids(); - std::set _used{}; - - auto _func = [&](size_t _idx, size_t _beg, size_t _end) { - auto&& _name_generator = [](auto _dev_type, auto _rows, auto _idx_v) { - return (_rows < 2) - ? TIMEMORY_JOIN(" ", std::to_string(_dev_type), "Critical Path") - : TIMEMORY_JOIN(" ", std::to_string(_dev_type), "Critical Path", - _idx_v); - }; - auto _track = - (DevT == Device::NONE) - ? ::perfetto::ProcessTrack::Current() - : omnitrace::tracing::get_perfetto_track( - category_t{}, std::move(_name_generator), DevT, _nrows, _idx); - - for(size_t i = _beg; i < _end; ++i) - { - if(i >= _data.size()) break; - _data.at(i).generate_perfetto(_track, _used); - } - }; - - for(size_t i = 0; i < _data.size(); i += _nrows) - _func(i, i, i + _nrows); -} - -template class ContainerT, typename... Args, - typename FuncT = bool (*)(const Tp&, const Tp&)> -inline Tp* -find( - const Tp& _v, ContainerT& _vec, - FuncT&& _func = [](const Tp& _lhs, const Tp& _rhs) { return (_lhs == _rhs); }) -{ - for(auto& itr : _vec) - { - if(std::forward(_func)(_v, itr)) return &itr; - } - return nullptr; -} - -template -inline entry* -find( - const entry& _v, call_chain& _vec, - FuncT&& _func = [](const entry& _lhs, const entry& _rhs) { return (_lhs == _rhs); }) -{ - return find(_v, reinterpret_cast&>(_vec), - std::forward(_func)); -} - -void -squash_critical_path(call_chain& _targ) -{ - OMNITRACE_CT_DEBUG("\n"); - static auto _strict_equal = [](const entry& _lhs, const entry& _rhs) { - auto _same_phase = (_lhs.phase == _rhs.phase); - bool _phase_check = true; - if(_same_phase) _phase_check = (_lhs.get_timestamp() == _rhs.get_timestamp()); - return (_lhs == _rhs && _lhs.parent_cid == _rhs.parent_cid && _phase_check); - }; - - std::sort(_targ.begin(), _targ.end()); - - call_chain _squashed{}; - for(auto& itr : _targ) - { - if(itr.phase == Phase::DELTA) - { - _squashed.emplace_back(itr); - } - else if(itr.phase == Phase::BEGIN) - { - if(!find(itr, _squashed, _strict_equal)) _squashed.emplace_back(itr); - } - else - { - entry* _match = nullptr; - if((_match = find(itr, _squashed)) != nullptr) - *_match += itr; - else - _squashed.emplace_back(itr); - } - } - - std::swap(_targ, _squashed); - std::sort(_targ.begin(), _targ.end()); -} - -void -compute_critical_trace() -{ - OMNITRACE_CT_DEBUG_F("Generating critical trace...\n"); - - // ensure all hash ids exist - copy_hash_ids(); - - using perfstats_t = - tim::lightweight_tuple; - - perfstats_t _ct_perf{}; - _ct_perf.start(); - - auto _report_perf = [](auto& _perf_v, const char* _func, const std::string& _label) { - _perf_v.stop().rekey(_label); - auto _str = JOIN("", _perf_v); - if(_str.length() > 5) _str = _str.substr(5); - OMNITRACE_BASIC_PRINT("[%s] %s\n", _func, _str.c_str()); - OMNITRACE_BASIC_PRINT("\n"); - _perf_v.reset().start(); - }; - - OMNITRACE_BASIC_PRINT("\n"); - - // try - { - PTL::ThreadPool _tp{ get_thread_pool_size(), []() { copy_hash_ids(); }, []() {} }; - _tp.set_verbose(-1); - PTL::TaskGroup _tg{ &_tp }; - - perfstats_t _perf{}; - _perf.start(); - - OMNITRACE_BASIC_PRINT_F("sorting %zu call chain entries\n", - complete_call_chain.size()); - - // sort the complete call chain - std::sort(complete_call_chain.begin(), complete_call_chain.end()); - _report_perf(_perf, __FUNCTION__, "sorting call chain"); - - OMNITRACE_BASIC_PRINT_F("squashing call chain...\n"); - - // squash the critical path (combine start/stop into delta) - squash_critical_path(complete_call_chain); - _report_perf(_perf, __FUNCTION__, "squashing critical path"); - - // generate the perfetto - if(config::get_use_perfetto()) - { - OMNITRACE_BASIC_PRINT_F("generating perfetto for call chain...\n"); - generate_perfetto({ complete_call_chain }); - generate_perfetto({ complete_call_chain }); - generate_perfetto({ complete_call_chain }); - _report_perf(_perf, __FUNCTION__, "perfetto generation"); - } - - OMNITRACE_BASIC_PRINT_F("finding children...\n"); - call_graph_t _graph{}; - find_children(_tp, _graph, complete_call_chain); - _report_perf(_perf, __FUNCTION__, "finding children"); - - // sort the call-graph based on cost - OMNITRACE_BASIC_PRINT_F("sorting %zu call-graph entries...\n", _graph.size() - 1); - _graph.sort([](auto lhs, auto rhs) { return lhs.get_cost() > rhs.get_cost(); }, - [&_tg](auto _f) { _tg.run(_f); }, [&_tg]() { _tg.join(); }); - _report_perf(_perf, __FUNCTION__, "call-graph sort"); - - OMNITRACE_BASIC_PRINT_F("saving call-graph...\n"); - save_call_graph(tim::settings::compose_output_filename("call-graph", ".json"), - "call_graph", _graph, true, __FUNCTION__); - _report_perf(_perf, __FUNCTION__, "saving call-graph"); - - OMNITRACE_BASIC_PRINT_F("finding sequences...\n"); - std::vector _top{}; - find_sequences(_tp, _graph, _top); - _report_perf(_perf, __FUNCTION__, "call-graph sequence search"); - - OMNITRACE_BASIC_PRINT_F("number of sequences found: %zu (%zu)...\n", _top.size(), - (_top.empty()) ? 0 : _top.at(0).size()); - - if(get_critical_trace_count() == 0) - { - OMNITRACE_CT_DEBUG_F("saving critical trace...\n"); - save_critical_trace( - tim::settings::compose_output_filename("critical-trace", ".json"), - "critical_trace", _top, true, __FUNCTION__); - } - else - { - // get the top CPU critical traces - OMNITRACE_BASIC_PRINT_F("getting top CPU functions...\n"); - auto _top_cpu = get_top(_top, get_critical_trace_count()); - - // get the top GPU critical traces - OMNITRACE_BASIC_PRINT_F("getting top GPU functions...\n"); - auto _top_gpu = get_top(_top, get_critical_trace_count()); - - // get the top CPU + GPU critical traces - OMNITRACE_BASIC_PRINT_F("getting top CPU + GPU functions...\n"); - auto _top_any = get_top(_top, get_critical_trace_count()); - - if(!_top_cpu.empty()) - { - OMNITRACE_BASIC_PRINT_F( - "generating %zu perfetto CPU critical traces...\n", _top_cpu.size()); - if(config::get_use_perfetto()) generate_perfetto(_top_cpu); - OMNITRACE_CT_DEBUG_F("saving CPU critical traces...\n"); - save_critical_trace( - tim::settings::compose_output_filename("critical-trace-cpu", ".json"), - "critical_trace", _top_cpu, true, __FUNCTION__); - } - - if(!_top_gpu.empty()) - { - OMNITRACE_BASIC_PRINT_F( - "generating %zu perfetto GPU critical traces...\n", _top_gpu.size()); - if(config::get_use_perfetto()) generate_perfetto(_top_gpu); - OMNITRACE_CT_DEBUG_F("saving GPU critical traces...\n"); - save_critical_trace( - tim::settings::compose_output_filename("critical-trace-gpu", ".json"), - "critical_trace", _top_gpu, true, __FUNCTION__); - } - - if(!_top_any.empty()) - { - OMNITRACE_BASIC_PRINT_F( - "generating %zu perfetto CPU + GPU critical traces...\n", - _top_gpu.size()); - if(config::get_use_perfetto()) generate_perfetto(_top_gpu); - OMNITRACE_CT_DEBUG_F("saving CPU + GPU critical traces...\n"); - save_critical_trace( - tim::settings::compose_output_filename("critical-trace-any", ".json"), - "critical_trace", _top_any, true, __FUNCTION__); - } - } - - _tg.join(); - _tp.destroy_threadpool(); - } // catch(std::exception& e) - { - // OMNITRACE_BASIC_PRINT("Thread exited '%s' with exception: %s\n", __FUNCTION__, - // e.what()); - // TIMEMORY_CONDITIONAL_DEMANGLED_BACKTRACE(true, 32); - } - - _report_perf(_ct_perf, __FUNCTION__, "critical trace computation"); -} -} // namespace -} // namespace critical_trace -} // namespace omnitrace diff --git a/source/bin/omnitrace-critical-trace/critical-trace.hpp b/source/bin/omnitrace-critical-trace/critical-trace.hpp deleted file mode 100644 index 0cbe03331..000000000 --- a/source/bin/omnitrace-critical-trace/critical-trace.hpp +++ /dev/null @@ -1,113 +0,0 @@ -// MIT License -// -// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include "core/config.hpp" -#include "core/debug.hpp" -#include "core/defines.hpp" -#include "core/perfetto.hpp" -#include "library/critical_trace.hpp" -#include "library/ptl.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -namespace omnitrace -{ -namespace critical_trace -{ -namespace -{ -using call_graph_t = tim::graph; -using call_graph_itr_t = typename call_graph_t::iterator; -using call_graph_sibling_itr_t = typename call_graph_t::sibling_iterator; -using call_graph_preorder_itr_t = typename call_graph_t::pre_order_iterator; - -hash_ids complete_hash_ids{}; -call_chain complete_call_chain{}; -std::mutex complete_call_mutex{}; - -void -update_critical_path(call_chain _chain, int64_t _tid); - -bool -load_call_chain(const std::string& _fname, const std::string& _label, - call_chain& _call_chain); - -void -compute_critical_trace(); - -void -find_children(PTL::ThreadPool& _tp, call_graph_t& _graph, const call_chain& _chain); - -void -find_sequences(PTL::ThreadPool& _tp, call_graph_t& _graph, - std::vector& _chain); - -void -find_sequences(PTL::ThreadPool& _tp, call_graph_t& _graph, call_graph_itr_t _root, - std::vector& _chain); - -template -void -serialize_graph(ArchiveT& ar, const tim::graph& _graph); - -template -void -serialize_subgraph(ArchiveT& ar, const tim::graph& _graph, - typename tim::graph::iterator _root); - -void -compute_critical_trace(); - -template -void -generate_perfetto(const std::vector& _data); - -inline void -copy_hash_ids() -{ - // make copy to avoid parallel iteration issues - auto _hash_ids = complete_hash_ids; - // ensure all hash ids exist - for(const auto& itr : _hash_ids) - tim::hash::add_hash_id(itr); -} -} // namespace -} // namespace critical_trace -} // namespace omnitrace diff --git a/source/bin/tests/CMakeLists.txt b/source/bin/tests/CMakeLists.txt index 5a421d97a..24345625c 100644 --- a/source/bin/tests/CMakeLists.txt +++ b/source/bin/tests/CMakeLists.txt @@ -301,7 +301,6 @@ omnitrace_add_bin_test( ARGS -R omnitrace ~timemory - ~critical_trace -r _P ~PERFETTO diff --git a/source/docs/critical_trace.md b/source/docs/critical_trace.md index f6777a814..f6b725f4b 100644 --- a/source/docs/critical_trace.md +++ b/source/docs/critical_trace.md @@ -1,4 +1,4 @@ -# Generating a Critical Trace +# Critical Trace Support ```eval_rst .. toctree:: @@ -6,24 +6,5 @@ :maxdepth: 4 ``` -## Overview - -A critical trace is defined in omnitrace as the most time-consuming path through a parallelized code. -The steps for generating a critical trace are: - -1. Enable the `OMNITRACE_CRITICAL_TRACE` setting -2. Configure any other relevant critical-trace settings, as needed - - `omnitrace-avail --categories settings::critical_trace` -3. Execute application -4. Locate the JSON files with `call-chain` in their name -5. Provide these files to the `omnitrace-critical-trace` executable -6. Open generated perfetto file in [ui.perfetto.dev](https://ui.perfetto.dev/) - -## omnitrace-critical-trace Executable - -The `omnitrace-critical-trace` executable post-processes one or more `call-chain` JSON files and generates a perfetto output -for visualizing the critical trace. - -**INCOMPLETE** - -This executable is still under-development. +Critical trace support has been superseded by causal profiling support. +Critical trace support was removed in Omnitrace v1.11.0 due to incomplete implementation. diff --git a/source/docs/development.md b/source/docs/development.md index a1a672ca9..45f76a2ae 100644 --- a/source/docs/development.md +++ b/source/docs/development.md @@ -50,10 +50,6 @@ for each variant: - For a binary rewrite: outputs new instrumented binary and exits - For runtime instrumentation or attaching to a process: instructs the application to resume executing and then waits for the application to exit -### omnitrace-critical-trace: [source/bin/omnitrace-critical-trace](https://github.com/ROCm/omnitrace/tree/main/source/bin/omnitrace-critical-trace) - -Post-processing tool for critical-trace data output by omnitrace. - ## Libraries ### Common Library: [source/lib/common](https://github.com/ROCm/omnitrace/tree/main/source/lib/common) diff --git a/source/docs/features.md b/source/docs/features.md index 8761c5228..79151a01f 100644 --- a/source/docs/features.md +++ b/source/docs/features.md @@ -25,7 +25,6 @@ manage extensions, resources, data, etc. - Background thread records process-, system- and device-level metrics while the application executes - Causal profiling - Quantifies the potential impact of optimizations in parallel codes -- Critical trace generation ### Data Analysis @@ -35,7 +34,6 @@ manage extensions, resources, data, etc. - Comprehensive traces - Every individual event/measurement - Application speedup predictions resulting from potential optimizations in functions and lines of code (causal profiling) -- Critical trace analysis (alpha) ### Parallelism API Support diff --git a/source/docs/runtime.md b/source/docs/runtime.md index 7f5667a3d..12791c3df 100644 --- a/source/docs/runtime.md +++ b/source/docs/runtime.md @@ -191,13 +191,7 @@ OMNITRACE_USE_PID = true OMNITRACE_OUTPUT_PATH = omnitrace-%tag%-output OMNITRACE_OUTPUT_PREFIX = OMNITRACE_CI = false -OMNITRACE_CRITICAL_TRACE = false -OMNITRACE_CRITICAL_TRACE_BUFFER_COUNT = 2000 -OMNITRACE_CRITICAL_TRACE_COUNT = 0 -OMNITRACE_CRITICAL_TRACE_DEBUG = false OMNITRACE_THREAD_POOL_SIZE = 8 -OMNITRACE_CRITICAL_TRACE_PER_ROW = 0 -OMNITRACE_CRITICAL_TRACE_SERIALIZE_NAMES = false OMNITRACE_DEBUG = false OMNITRACE_DL_VERBOSE = 0 OMNITRACE_INSTRUMENTATION_INTERVAL = 1 @@ -283,13 +277,7 @@ $ omnitrace-avail -S -bd | OMNITRACE_CONFIG_FILE | Configuration file for omnitrace | | OMNITRACE_COUT_OUTPUT | Write output to stdout | | OMNITRACE_CPU_AFFINITY | Enable pinning threads to CPUs (Linu... | -| OMNITRACE_CRITICAL_TRACE | Enable generation of the critical trace | -| OMNITRACE_CRITICAL_TRACE_BUFFER_COUNT | Number of critical trace records to ... | -| OMNITRACE_CRITICAL_TRACE_COUNT | Number of critical trace to export (... | -| OMNITRACE_CRITICAL_TRACE_DEBUG | Enable debugging for critical trace | | OMNITRACE_THREAD_POOL_SIZE | Number of threads to use when genera... | -| OMNITRACE_CRITICAL_TRACE_PER_ROW | How many critical traces per row in ... | -| OMNITRACE_CRITICAL_TRACE_SERIALIZE_N... | Include names in serialization of cr... | | OMNITRACE_DEBUG | Enable debug output | | OMNITRACE_DIFF_OUTPUT | Generate a difference output vs. a p... | | OMNITRACE_DL_VERBOSE | Verbosity within the omnitrace-dl li... | diff --git a/source/docs/sampling.md b/source/docs/sampling.md index 1a040bb9f..caa4bcfd8 100644 --- a/source/docs/sampling.md +++ b/source/docs/sampling.md @@ -209,7 +209,6 @@ $ omnitrace-sample -- ./parallel-overhead-locks 30 4 100 HSA_TOOLS_LIB=/opt/omnitrace/lib/libomnitrace-dl.so.1.7.1 HSA_TOOLS_REPORT_LOAD_FAILURE=1 LD_PRELOAD=/opt/omnitrace/lib/libomnitrace-dl.so.1.7.1 -OMNITRACE_CRITICAL_TRACE=false OMNITRACE_USE_PROCESS_SAMPLING=false OMNITRACE_USE_SAMPLING=true OMP_TOOL_LIBRARIES=/opt/omnitrace/lib/libomnitrace-dl.so.1.7.1 @@ -228,7 +227,6 @@ HSA_TOOLS_REPORT_LOAD_FAILURE=1 KOKKOS_PROFILE_LIBRARY=/opt/omnitrace/lib/libomnitrace.so.1.7.1 LD_PRELOAD=/opt/omnitrace/lib/libomnitrace-dl.so.1.7.1 OMNITRACE_CPU_FREQ_ENABLED=true -OMNITRACE_CRITICAL_TRACE=false OMNITRACE_TRACE_THREAD_LOCKS=true OMNITRACE_TRACE_THREAD_RW_LOCKS=true OMNITRACE_TRACE_THREAD_SPIN_LOCKS=true @@ -258,7 +256,6 @@ $ omnitrace-sample -PTDH -E all -o omnitrace-output %tag% -- ./parallel-overhead LD_PRELOAD=/opt/omnitrace/lib/libomnitrace-dl.so.1.7.1 OMNITRACE_CPU_FREQ_ENABLED=true -OMNITRACE_CRITICAL_TRACE=false OMNITRACE_OUTPUT_PATH=omnitrace-output OMNITRACE_OUTPUT_PREFIX=%tag% OMNITRACE_TRACE_THREAD_LOCKS=false @@ -288,7 +285,6 @@ $ omnitrace-sample -PTDH -E all -o omnitrace-output %tag% -c -- ./parallel-overh LD_PRELOAD=/opt/omnitrace/lib/libomnitrace-dl.so.1.7.1 OMNITRACE_CONFIG_FILE= OMNITRACE_CPU_FREQ_ENABLED=true -OMNITRACE_CRITICAL_TRACE=false OMNITRACE_OUTPUT_PATH=omnitrace-output OMNITRACE_OUTPUT_PREFIX=%tag% OMNITRACE_TRACE_THREAD_LOCKS=false diff --git a/source/lib/core/argparse.cpp b/source/lib/core/argparse.cpp index 91e33fbbc..b69a4c5de 100644 --- a/source/lib/core/argparse.cpp +++ b/source/lib/core/argparse.cpp @@ -1225,7 +1225,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data) add_group_arguments(_parser, "perfetto", _data, true); add_group_arguments(_parser, "timemory", _data, true); add_group_arguments(_parser, "rocm", _data, true); - add_group_arguments(_parser, "critical_trace", _data, true); _parser.start_group("MISCELLANEOUS OPTIONS", ""); diff --git a/source/lib/core/categories.hpp b/source/lib/core/categories.hpp index ce1106823..ce3f66356 100644 --- a/source/lib/core/categories.hpp +++ b/source/lib/core/categories.hpp @@ -110,9 +110,6 @@ OMNITRACE_DEFINE_CATEGORY(category, mpi, OMNITRACE_CATEGORY_MPI, "mpi", "MPI reg OMNITRACE_DEFINE_CATEGORY(category, ompt, OMNITRACE_CATEGORY_OMPT, "ompt", "OpenMP tools regions") OMNITRACE_DEFINE_CATEGORY(category, process_sampling, OMNITRACE_CATEGORY_PROCESS_SAMPLING, "process_sampling", "Process-level data") OMNITRACE_DEFINE_CATEGORY(category, comm_data, OMNITRACE_CATEGORY_COMM_DATA, "comm_data", "MPI/RCCL counters for tracking amount of data sent or received") -OMNITRACE_DEFINE_CATEGORY(category, critical_trace, OMNITRACE_CATEGORY_CRITICAL_TRACE, "critical-trace", "Critical trace data") -OMNITRACE_DEFINE_CATEGORY(category, host_critical_trace, OMNITRACE_CATEGORY_HOST_CRITICAL_TRACE, "host-critical-trace", "Host-side critical trace data") -OMNITRACE_DEFINE_CATEGORY(category, device_critical_trace, OMNITRACE_CATEGORY_DEVICE_CRITICAL_TRACE, "device-critical-trace", "Device-side critical trace data") OMNITRACE_DEFINE_CATEGORY(category, causal, OMNITRACE_CATEGORY_CAUSAL, "causal", "Causal profiling data") OMNITRACE_DEFINE_CATEGORY(category, cpu_freq, OMNITRACE_CATEGORY_CPU_FREQ, "cpu_frequency", "CPU frequency (collected in background thread)") OMNITRACE_DEFINE_CATEGORY(category, process_page, OMNITRACE_CATEGORY_PROCESS_PAGE, "process_page_fault", "Memory page faults in process (collected in background thread)") @@ -174,9 +171,6 @@ using name = perfetto_category; OMNITRACE_PERFETTO_CATEGORY(category::sampling), \ OMNITRACE_PERFETTO_CATEGORY(category::process_sampling), \ OMNITRACE_PERFETTO_CATEGORY(category::comm_data), \ - OMNITRACE_PERFETTO_CATEGORY(category::critical_trace), \ - OMNITRACE_PERFETTO_CATEGORY(category::host_critical_trace), \ - OMNITRACE_PERFETTO_CATEGORY(category::device_critical_trace), \ OMNITRACE_PERFETTO_CATEGORY(category::causal), \ OMNITRACE_PERFETTO_CATEGORY(category::cpu_freq), \ OMNITRACE_PERFETTO_CATEGORY(category::process_page), \ diff --git a/source/lib/core/config.cpp b/source/lib/core/config.cpp index abcc8fca5..d7bb5d959 100644 --- a/source/lib/core/config.cpp +++ b/source/lib/core/config.cpp @@ -520,10 +520,6 @@ configure_settings(bool _init) _backend, "perfetto") ->set_choices({ "inprocess", "system", "all" }); - OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_CRITICAL_TRACE", - "Enable generation of the critical trace", false, "backend", - "critical_trace"); - OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_TRACE_THREAD_LOCKS", "Enable tracing calls to pthread_mutex_lock, " "pthread_mutex_unlock, pthread_mutex_trylock", @@ -652,15 +648,6 @@ configure_settings(bool _init) "busy,temp,power,mem_usage", "backend", "rocm_smi", "rocm", "process_sampling", "advanced"); - OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_CRITICAL_TRACE_DEBUG", - "Enable debugging for critical trace", _omnitrace_debug, - "debugging", "critical_trace", "advanced"); - - OMNITRACE_CONFIG_SETTING( - bool, "OMNITRACE_CRITICAL_TRACE_SERIALIZE_NAMES", - "Include names in serialization of critical trace (mainly for debugging)", - _omnitrace_debug, "debugging", "critical_trace", "advanced"); - OMNITRACE_CONFIG_SETTING(size_t, "OMNITRACE_PERFETTO_SHMEM_SIZE_HINT_KB", "Hint for shared-memory buffer size in perfetto (in KB)", size_t{ 4096 }, "perfetto", "data", "advanced"); @@ -726,21 +713,6 @@ configure_settings(bool _init) 1), "parallelism", "advanced"); - OMNITRACE_CONFIG_EXT_SETTING(int64_t, "OMNITRACE_CRITICAL_TRACE_COUNT", - "Number of critical trace to export (0 == all)", - int64_t{ 0 }, "critical_trace", - "omnitrace-critical-trace", "advanced"); - - OMNITRACE_CONFIG_SETTING(uint64_t, "OMNITRACE_CRITICAL_TRACE_BUFFER_COUNT", - "Number of critical trace records to store in thread-local " - "memory before submitting to shared buffer", - uint64_t{ 2000 }, "critical_trace", "advanced"); - - OMNITRACE_CONFIG_EXT_SETTING( - int64_t, "OMNITRACE_CRITICAL_TRACE_PER_ROW", - "How many critical traces per row in perfetto (0 == all in one row)", - int64_t{ 0 }, "critical_trace", "omnitrace-critical-trace", "advanced"); - OMNITRACE_CONFIG_SETTING( std::string, "OMNITRACE_TIMEMORY_COMPONENTS", "List of components to collect via timemory (see `omnitrace-avail -C`)", @@ -1162,14 +1134,12 @@ configure_mode_settings(const std::shared_ptr& _config) _set("OMNITRACE_USE_OMPT", false); _set("OMNITRACE_USE_SAMPLING", false); _set("OMNITRACE_USE_PROCESS_SAMPLING", false); - _set("OMNITRACE_CRITICAL_TRACE", false); } else if(get_mode() == Mode::Causal) { _set("OMNITRACE_USE_CAUSAL", true); _set("OMNITRACE_TRACE", false); _set("OMNITRACE_PROFILE", false); - _set("OMNITRACE_CRITICAL_TRACE", false); _set("OMNITRACE_USE_SAMPLING", false); _set("OMNITRACE_USE_PROCESS_SAMPLING", false); } @@ -1228,7 +1198,6 @@ configure_mode_settings(const std::shared_ptr& _config) _set("OMNITRACE_USE_SAMPLING", false); _set("OMNITRACE_USE_PROCESS_SAMPLING", false); _set("OMNITRACE_USE_CODE_COVERAGE", false); - _set("OMNITRACE_CRITICAL_TRACE", false); set_setting_value("OMNITRACE_TIMEMORY_COMPONENTS", std::string{}); set_setting_value("OMNITRACE_PAPI_EVENTS", std::string{}); } @@ -1409,7 +1378,6 @@ configure_disabled_settings(const std::shared_ptr& _config) _handle_use_option("OMNITRACE_USE_ROCM_SMI", "rocm_smi"); _handle_use_option("OMNITRACE_USE_ROCTRACER", "roctracer"); _handle_use_option("OMNITRACE_USE_ROCPROFILER", "rocprofiler"); - _handle_use_option("OMNITRACE_CRITICAL_TRACE", "critical_trace"); #if !defined(OMNITRACE_USE_ROCTRACER) || OMNITRACE_USE_ROCTRACER == 0 _config->find("OMNITRACE_USE_ROCTRACER")->second->set_hidden(true); @@ -1976,13 +1944,6 @@ get_use_mpip() return static_cast&>(*_v->second).get(); } -bool& -get_use_critical_trace() -{ - static auto _v = get_config()->find("OMNITRACE_CRITICAL_TRACE"); - return static_cast&>(*_v->second).get(); -} - bool get_use_kokkosp() { @@ -2029,20 +1990,6 @@ get_num_threads_hint() return static_cast&>(*_v->second).get(); } -bool -get_critical_trace_debug() -{ - static auto _v = get_config()->find("OMNITRACE_CRITICAL_TRACE_DEBUG"); - return static_cast&>(*_v->second).get(); -} - -bool -get_critical_trace_serialize_names() -{ - static auto _v = get_config()->find("OMNITRACE_CRITICAL_TRACE_SERIALIZE_NAMES"); - return static_cast&>(*_v->second).get(); -} - bool get_sampling_keep_internal() { @@ -2099,13 +2046,6 @@ get_trace_hsa_activity() return static_cast&>(*_v->second).get(); } -int64_t -get_critical_trace_per_row() -{ - static auto _v = get_config()->find("OMNITRACE_CRITICAL_TRACE_PER_ROW"); - return static_cast&>(*_v->second).get(); -} - size_t get_perfetto_shmem_size_hint() { @@ -2215,14 +2155,6 @@ get_perfetto_annotations() return static_cast&>(*_v->second).get(); } -uint64_t -get_critical_trace_update_freq() -{ - static uint64_t _v = - get_config()->get("OMNITRACE_CRITICAL_TRACE_BUFFER_COUNT"); - return _v; -} - uint64_t get_thread_pool_size() { @@ -2394,13 +2326,6 @@ get_sampling_allocator_size() return std::max(static_cast&>(*_v->second).get(), 1); } -int64_t -get_critical_trace_count() -{ - static auto _v = get_config()->find("OMNITRACE_CRITICAL_TRACE_COUNT"); - return static_cast&>(*_v->second).get(); -} - double get_process_sampling_freq() { diff --git a/source/lib/core/config.hpp b/source/lib/core/config.hpp index 1a90d9d87..dac62fc3f 100644 --- a/source/lib/core/config.hpp +++ b/source/lib/core/config.hpp @@ -218,9 +218,6 @@ get_use_pid(); bool& get_use_mpip(); -bool& -get_use_critical_trace() OMNITRACE_HOT; - bool get_use_kokkosp(); @@ -251,12 +248,6 @@ get_trace_hsa_api(); bool get_trace_hsa_activity(); -bool -get_critical_trace_debug(); - -bool -get_critical_trace_serialize_names(); - size_t get_perfetto_shmem_size_hint(); @@ -278,9 +269,6 @@ get_disabled_categories(); bool get_perfetto_annotations() OMNITRACE_HOT; -uint64_t -get_critical_trace_update_freq(); - uint64_t get_thread_pool_size(); @@ -297,9 +285,6 @@ get_perfetto_output_filename(); bool get_perfetto_roctracer_per_stream() OMNITRACE_HOT; -int64_t -get_critical_trace_count(); - double get_trace_delay(); @@ -360,9 +345,6 @@ get_process_sampling_duration(); std::string get_sampling_gpus(); -int64_t -get_critical_trace_per_row(); - bool get_trace_thread_locks(); diff --git a/source/lib/core/debug.hpp b/source/lib/core/debug.hpp index e869e261d..7f8141634 100644 --- a/source/lib/core/debug.hpp +++ b/source/lib/core/debug.hpp @@ -67,9 +67,6 @@ get_debug_tid() OMNITRACE_HOT; bool get_debug_pid() OMNITRACE_HOT; - -bool -get_critical_trace_debug() OMNITRACE_HOT; } // namespace config namespace debug @@ -560,12 +557,6 @@ as_hex(void*, size_t); #define OMNITRACE_BASIC_DEBUG_F(...) \ OMNITRACE_CONDITIONAL_BASIC_PRINT_F(::omnitrace::get_debug_env(), __VA_ARGS__) -#define OMNITRACE_CT_DEBUG(...) \ - OMNITRACE_CONDITIONAL_PRINT(::omnitrace::get_critical_trace_debug(), __VA_ARGS__) - -#define OMNITRACE_CT_DEBUG_F(...) \ - OMNITRACE_CONDITIONAL_PRINT_F(::omnitrace::get_critical_trace_debug(), __VA_ARGS__) - //--------------------------------------------------------------------------------------// // // Verbose macros diff --git a/source/lib/core/exception.cpp b/source/lib/core/exception.cpp index 43aebda47..e95d43cb2 100644 --- a/source/lib/core/exception.cpp +++ b/source/lib/core/exception.cpp @@ -50,6 +50,7 @@ template auto get_backtrace(Args... _arg) { + consume_args(_arg...); auto _bt = std::stringstream{}; if constexpr(sizeof...(Args) > 0) { @@ -57,7 +58,6 @@ get_backtrace(Args... _arg) } tim::unwind::detailed_backtrace<2>(_bt, true); return strdup(_bt.str().c_str()); - consume_args(_arg...); } } // namespace diff --git a/source/lib/omnitrace-user/omnitrace/categories.h b/source/lib/omnitrace-user/omnitrace/categories.h index 480a828ce..dd435e502 100644 --- a/source/lib/omnitrace-user/omnitrace/categories.h +++ b/source/lib/omnitrace-user/omnitrace/categories.h @@ -63,9 +63,6 @@ extern "C" OMNITRACE_CATEGORY_OMPT, OMNITRACE_CATEGORY_PROCESS_SAMPLING, OMNITRACE_CATEGORY_COMM_DATA, - OMNITRACE_CATEGORY_CRITICAL_TRACE, - OMNITRACE_CATEGORY_HOST_CRITICAL_TRACE, - OMNITRACE_CATEGORY_DEVICE_CRITICAL_TRACE, OMNITRACE_CATEGORY_CAUSAL, OMNITRACE_CATEGORY_CPU_FREQ, OMNITRACE_CATEGORY_PROCESS_PAGE, diff --git a/source/lib/omnitrace/library.cpp b/source/lib/omnitrace/library.cpp index 63b6b2aff..1086989c0 100644 --- a/source/lib/omnitrace/library.cpp +++ b/source/lib/omnitrace/library.cpp @@ -48,7 +48,6 @@ #include "library/components/pthread_gotcha.hpp" #include "library/components/rocprofiler.hpp" #include "library/coverage.hpp" -#include "library/critical_trace.hpp" #include "library/ompt.hpp" #include "library/process_sampler.hpp" #include "library/ptl.hpp" @@ -203,9 +202,6 @@ ensure_finalization(bool _static_init = false) return scope::destructor{ []() { omnitrace_finalize_hidden(); } }; } -using Device = critical_trace::Device; -using Phase = critical_trace::Phase; - template struct fini_bundle { @@ -402,11 +398,6 @@ omnitrace_init_library_hidden() if(_debug_init) config::set_setting_value("OMNITRACE_DEBUG", _debug_value); } }; - // below will effectively do: - // get_cpu_cid_stack(0)->emplace_back(-1); - // plus query some env variables - add_critical_trace(0, -1, 0, 0, 0, 0, 0, 0, 0, -1, 0); - tim::trait::runtime_enabled::set(get_use_roctracer()); tim::trait::runtime_enabled::set(get_use_roctracer() && get_use_timemory()); @@ -920,55 +911,12 @@ omnitrace_finalize_hidden(void) causal::finish_experimenting(); } - if(get_use_critical_trace() || (get_use_rocm_smi() && get_use_roctracer())) - { - OMNITRACE_VERBOSE_F(1, "Generating the critical trace...\n"); - for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) - { - using critical_trace_hash_data = - thread_data; - - if(i < critical_trace_hash_data::get()->size() && - critical_trace_hash_data::get()->at(i)) - { - OMNITRACE_DEBUG_F("Copying the hash id data for thread %zu...\n", i); - critical_trace::add_hash_id(*critical_trace_hash_data::get()->at(i)); - } - } - - for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) - { - using critical_trace_chain_data = thread_data; - - if(i < critical_trace_chain_data::get()->size() && - critical_trace_chain_data::get()->at(i)) - { - OMNITRACE_DEBUG_F( - "Updating the critical trace call-chains for thread %zu...\n", i); - critical_trace::update(i); // launch update task - } - } - - OMNITRACE_VERBOSE_F(1, "Waiting on critical trace updates...\n"); - tasking::join(); - } - if(get_use_process_sampling()) { OMNITRACE_VERBOSE_F(1, "Post-processing the system-level samples...\n"); process_sampler::post_process(); } - if(get_use_critical_trace()) - { - // launch compute task - OMNITRACE_VERBOSE_F(1, "Launching critical trace compute task...\n"); - critical_trace::compute(); - - OMNITRACE_VERBOSE_F(1, "Waiting on critical trace computation...\n"); - tasking::join(); - } - // shutdown tasking before timemory is finalized, especially the roctracer thread-pool OMNITRACE_VERBOSE_F(1, "Shutting down thread-pools...\n"); tasking::shutdown(); diff --git a/source/lib/omnitrace/library/CMakeLists.txt b/source/lib/omnitrace/library/CMakeLists.txt index a69101193..60f6b4f61 100644 --- a/source/lib/omnitrace/library/CMakeLists.txt +++ b/source/lib/omnitrace/library/CMakeLists.txt @@ -2,7 +2,6 @@ set(library_sources ${CMAKE_CURRENT_LIST_DIR}/coverage.cpp ${CMAKE_CURRENT_LIST_DIR}/cpu_freq.cpp - ${CMAKE_CURRENT_LIST_DIR}/critical_trace.cpp ${CMAKE_CURRENT_LIST_DIR}/kokkosp.cpp ${CMAKE_CURRENT_LIST_DIR}/ompt.cpp ${CMAKE_CURRENT_LIST_DIR}/perf.cpp @@ -17,7 +16,6 @@ set(library_sources set(library_headers ${CMAKE_CURRENT_LIST_DIR}/coverage.hpp ${CMAKE_CURRENT_LIST_DIR}/cpu_freq.hpp - ${CMAKE_CURRENT_LIST_DIR}/critical_trace.hpp ${CMAKE_CURRENT_LIST_DIR}/ompt.hpp ${CMAKE_CURRENT_LIST_DIR}/process_sampler.hpp ${CMAKE_CURRENT_LIST_DIR}/perf.hpp diff --git a/source/lib/omnitrace/library/components/category_region.hpp b/source/lib/omnitrace/library/components/category_region.hpp index 2b39f21da..b0348f84d 100644 --- a/source/lib/omnitrace/library/components/category_region.hpp +++ b/source/lib/omnitrace/library/components/category_region.hpp @@ -27,7 +27,6 @@ #include "core/state.hpp" #include "core/timemory.hpp" #include "library/causal/data.hpp" -#include "library/critical_trace.hpp" #include "library/runtime.hpp" #include "library/tracing.hpp" #include "library/tracing/annotation.hpp" @@ -68,12 +67,6 @@ using tracing_count_categories_t = type_list; -// these categories are added to the critical trace -using critical_trace_categories_t = - type_list; - // convert these categories to throughput points using causal_throughput_categories_t = type_list::start(std::string_view name, Args&&... args) tracing::push_perfetto(CategoryT{}, name.data(), std::forward(args)...); } } - - if constexpr(is_one_of::value) - { - using Device = critical_trace::Device; - using Phase = critical_trace::Phase; - - if(get_use_critical_trace()) - { - uint64_t _cid = 0; - uint64_t _parent_cid = 0; - uint32_t _depth = 0; - std::tie(_cid, _parent_cid, _depth) = create_cpu_cid_entry(); - auto _ts = comp::wall_clock::record(); - add_critical_trace( - threading::get_id(), _cid, 0, _parent_cid, _ts, 0, 0, 0, - critical_trace::add_hash_id(name.data()), _depth); - } - } } template @@ -278,30 +253,6 @@ category_region::stop(std::string_view name, Args&&... args) if(get_use_causal()) causal::pop_progress_point(name); } } - - if constexpr(is_one_of::value) - { - using Device = critical_trace::Device; - using Phase = critical_trace::Phase; - - if(get_use_critical_trace()) - { - if(get_cpu_cid_stack() && !get_cpu_cid_stack()->empty()) - { - auto _cid = get_cpu_cid_stack()->back(); - if(get_cpu_cid_parents()->find(_cid) != get_cpu_cid_parents()->end()) - { - uint64_t _parent_cid = 0; - uint32_t _depth = 0; - auto _ts = comp::wall_clock::record(); - std::tie(_parent_cid, _depth) = get_cpu_cid_parents()->at(_cid); - add_critical_trace( - threading::get_id(), _cid, 0, _parent_cid, _ts, _ts, 0, 0, - critical_trace::add_hash_id(name.data()), _depth); - } - } - } - } } else { diff --git a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp index 279e4b4c1..ed9650a4e 100644 --- a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp @@ -25,7 +25,6 @@ #include "core/debug.hpp" #include "core/utility.hpp" #include "library/components/category_region.hpp" -#include "library/critical_trace.hpp" #include "library/runtime.hpp" #include "library/thread_info.hpp" @@ -41,9 +40,6 @@ namespace omnitrace { namespace component { -using Device = critical_trace::Device; -using Phase = critical_trace::Phase; - pthread_mutex_gotcha::hash_array_t& pthread_mutex_gotcha::get_hashes() { @@ -76,7 +72,7 @@ pthread_mutex_gotcha::get_hashes() { auto&& _id = _data.at(i).tool_id; if(!_id.empty()) - _init.at(i) = critical_trace::add_hash_id(_id.c_str()); + _init.at(i) = tim::add_hash_id(_id.c_str()); else { if(_skip.count(i) > 0) continue; @@ -176,7 +172,7 @@ pthread_mutex_gotcha::pthread_mutex_gotcha(const gotcha_data_t& _data) template auto -pthread_mutex_gotcha::operator()(uintptr_t&& _id, int (*_callee)(Args...), +pthread_mutex_gotcha::operator()(uintptr_t&&, int (*_callee)(Args...), Args... _args) const { using bundle_t = category_region; @@ -203,30 +199,10 @@ pthread_mutex_gotcha::operator()(uintptr_t&& _id, int (*_callee)(Args...), bool& _protect; } _dtor{ m_protect = true }; - uint64_t _cid = 0; - uint64_t _parent_cid = 0; - uint32_t _depth = 0; - int64_t _ts = 0; - - if(_id < std::numeric_limits::max() && get_use_critical_trace()) - { - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - std::tie(_cid, _parent_cid, _depth) = create_cpu_cid_entry(); - _ts = comp::wall_clock::record(); - } - bundle_t::audit(std::string_view{ m_data->tool_id }, audit::incoming{}, _args...); auto _ret = (*_callee)(_args...); bundle_t::audit(std::string_view{ m_data->tool_id }, audit::outgoing{}, _ret); - if(_id < std::numeric_limits::max() && get_use_critical_trace()) - { - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - add_critical_trace( - threading::get_id(), _cid, 0, _parent_cid, _ts, comp::wall_clock::record(), 0, - _id, get_hashes().at(m_data->index), _depth); - } - tim::consume_parameters(_id, _cid, _parent_cid, _depth, _ts); return _ret; } diff --git a/source/lib/omnitrace/library/critical_trace.cpp b/source/lib/omnitrace/library/critical_trace.cpp deleted file mode 100644 index 1c037d44c..000000000 --- a/source/lib/omnitrace/library/critical_trace.cpp +++ /dev/null @@ -1,753 +0,0 @@ -// MIT License -// -// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "library/critical_trace.hpp" -#include "core/config.hpp" -#include "core/debug.hpp" -#include "core/defines.hpp" -#include "core/perfetto.hpp" -#include "library/ptl.hpp" -#include "library/runtime.hpp" -#include "library/thread_data.hpp" -#include "library/tracing.hpp" -#include "library/tracing/annotation.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -namespace omnitrace -{ -namespace critical_trace -{ -namespace -{ -using call_graph_t = tim::graph; -using call_graph_itr_t = typename call_graph_t::iterator; -using call_graph_sibling_itr_t = typename call_graph_t::sibling_iterator; -using call_graph_preorder_itr_t = typename call_graph_t::pre_order_iterator; - -hash_ids complete_hash_ids{}; -call_chain complete_call_chain{}; -std::mutex complete_call_mutex{}; -std::mutex tasking_mutex{}; - -void -update_critical_path(call_chain _chain, int64_t _tid); - -void -compute_critical_trace(); - -void -copy_hash_ids() -{ - // make copy to avoid parallel iteration issues - auto _hash_ids = complete_hash_ids; - // ensure all hash ids exist - for(const auto& itr : _hash_ids) - tim::hash::add_hash_id(itr); -} -} // namespace -} // namespace critical_trace - -namespace critical_trace -{ -namespace -{ -template -size_t -get_combined_hash(Arg0&& _zero, Arg1&& _one, Args&&... _args) -{ - return tim::hash::get_hash_id(std::forward(_zero), std::forward(_one), - std::forward(_args)...); -} -} // namespace - -//--------------------------------------------------------------------------------------// -// -// ENTRY -// -//--------------------------------------------------------------------------------------// - -bool -entry::operator==(const entry& rhs) const -{ - if(device != rhs.device) return false; - if(cpu_cid != rhs.cpu_cid) return false; - if(gpu_cid != rhs.gpu_cid) return false; - if(hash != rhs.hash) return false; - if(tid != rhs.tid) return false; - if(devid != rhs.devid) return false; - if(queue_id != rhs.queue_id) return false; - if(depth != rhs.depth) return false; - if(priority != rhs.priority) return false; - if(pid != rhs.pid) return false; - return true; - /* - return std::tie(device, depth, priority, devid, pid, tid, cpu_cid, gpu_cid, queue_id, - hash) == std::tie(rhs.device, rhs.depth, rhs.priority, rhs.devid, - rhs.pid, rhs.tid, rhs.cpu_cid, rhs.gpu_cid, - rhs.queue_id, rhs.hash); - */ -} - -bool -entry::operator<(const entry& rhs) const -{ - // sort by process ids - auto _pid_eq = (pid == rhs.pid); - if(!_pid_eq) return (pid < rhs.pid); - - // sort by device ids - auto _devid_eq = (devid == rhs.devid); - if(!_devid_eq) return (devid < rhs.devid); - - // sort by cpu ids - auto _cpu_eq = (cpu_cid == rhs.cpu_cid); - if(!_cpu_eq) return (cpu_cid < rhs.cpu_cid); - - // sort by gpu ids - if(gpu_cid > 0 && rhs.gpu_cid > 0) - { - auto _gpu_eq = (gpu_cid == rhs.gpu_cid); - if(!_gpu_eq) return (gpu_cid < rhs.gpu_cid); - } - - // sort by parent ids - auto _par_eq = (parent_cid == rhs.parent_cid); - if(!_par_eq) return (parent_cid < rhs.parent_cid); - - // sort by queue ids - auto _queue_eq = (queue_id == rhs.queue_id); - if(!_queue_eq) return (queue_id < rhs.queue_id); - - // sort by priority - auto _prio_eq = (priority == rhs.priority); - if(!_prio_eq) return (priority < rhs.priority); - - // sort by timestamp (last resort) - return (begin_ns < rhs.begin_ns); -} - -bool -entry::operator>(const entry& rhs) const -{ - return (!(*this < rhs) && std::tie(begin_ns, cpu_cid, gpu_cid) != - std::tie(rhs.begin_ns, rhs.cpu_cid, rhs.gpu_cid)); -} - -entry& -entry::operator+=(const entry& rhs) -{ - if(phase == Phase::BEGIN && rhs.phase == Phase::END) - { - assert(rhs.end_ns >= begin_ns); - end_ns = rhs.end_ns; - phase = Phase::DELTA; - return *this; - } - else - { - OMNITRACE_VERBOSE( - 2, "Warning! Incorrect phase. entry::operator+=(entry) is only valid for " - "Phase::BEGIN += Phase::END\n"); - } - return *this; -} - -size_t -entry::get_hash() const -{ - return get_combined_hash(hash, static_cast(device), static_cast(phase), - devid, pid, tid, cpu_cid, gpu_cid, queue_id, priority); -} - -int64_t -entry::get_timestamp() const -{ - switch(phase) - { - case Phase::BEGIN: return begin_ns; - case Phase::END: return end_ns; - case Phase::DELTA: return (end_ns - begin_ns); - case Phase::NONE: break; - } - return 0; -} - -int64_t -entry::get_cost() const -{ - switch(phase) - { - case Phase::DELTA: return (end_ns - begin_ns); - default: break; - } - return 0; -} - -void -entry::write(std::ostream& _os) const -{ - if(device == Device::GPU) - _os << "[GPU][" << cpu_cid << "][" << gpu_cid << "]"; - else - _os << "[CPU][" << cpu_cid << "]"; - _os << " parent: " << static_cast(parent_cid); - _os << ", device: " << devid; - _os << ", pid: " << pid; - _os << ", tid: " << tid; - _os << ", depth: " << depth; - _os << ", queue: " << queue_id; - _os << ", priority: " << priority; - if(phase == Phase::DELTA) - { - std::stringstream _cost{}; - _cost << std::setprecision(4) << std::scientific << (get_timestamp() / 1.0e9); - _os << ", cost: [" << std::setw(8) << _cost.str() << " sec]"; - } - else - { - _os << ", phase: "; - if(phase == Phase::BEGIN) - _os << "begin "; - else if(phase == Phase::END) - _os << "end "; - _os << "[" << begin_ns << ":" << end_ns << "]"; - } - _os << ", hash: " << hash << " :: " << tim::demangle(tim::get_hash_identifier(hash)); -} - -//--------------------------------------------------------------------------------------// -// -// CALL CHAIN -// -//--------------------------------------------------------------------------------------// - -bool -call_chain::operator==(const call_chain& rhs) const -{ - if(size() != rhs.size()) return false; - for(size_t i = 0; i < size(); ++i) - if(at(i) != rhs.at(i)) return false; - return true; -} - -int64_t -call_chain::get_cost(int64_t _tid) const -{ - int64_t _cost = 0; - if(_tid < 0) - { - for(const auto& itr : *this) - _cost += itr.get_cost(); - } - else - { - for(const auto& itr : *this) - { - if(itr.tid == _tid) _cost += itr.get_cost(); - } - } - return _cost; -} - -template -void -call_chain::generate_perfetto(::perfetto::Track _track, std::set& _used) const -{ - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - - static std::set _static_strings{}; - static std::mutex _static_mutex{}; - - for(const auto& itr : *this) - { - if(!_used.emplace(itr).second) continue; - - auto&& _annotater = [&](::perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation(ctx, "begin_ns", itr.begin_ns); - tracing::add_perfetto_annotation(ctx, "end_ns", itr.end_ns); - } - }; - - if constexpr(DevT == Device::NONE) - { - if(itr.device == Device::CPU) - { - tracing::push_perfetto_track(category::host_critical_trace{}, "CPU", - _track, itr.begin_ns, std::move(_annotater)); - tracing::pop_perfetto_track(category::host_critical_trace{}, "CPU", - _track, itr.end_ns); - } - else if(itr.device == Device::GPU) - { - tracing::push_perfetto_track(category::device_critical_trace{}, "GPU", - _track, itr.begin_ns, std::move(_annotater)); - tracing::pop_perfetto_track(category::device_critical_trace{}, "GPU", - _track, itr.end_ns); - } - } - else - { - using category_t = std::conditional_t< - DevT == Device::ANY, omnitrace::category::critical_trace, - std::conditional_t>; - - if constexpr(DevT != Device::ANY) - { - if(itr.device != DevT) continue; - } - - std::string _name = tim::demangle(tim::get_hash_identifier(itr.hash)); - _static_mutex.lock(); - auto sitr = _static_strings.emplace(_name); - _static_mutex.unlock(); - - tracing::push_perfetto_track(category_t{}, sitr.first->c_str(), _track, - itr.begin_ns, std::move(_annotater)); - tracing::pop_perfetto_track(category_t{}, sitr.first->c_str(), _track, - itr.end_ns); - } - } -} - -// explicit instantiations -template void -call_chain::generate_perfetto(::perfetto::Track, std::set&) const; - -template void -call_chain::generate_perfetto(::perfetto::Track, std::set&) const; - -template void -call_chain::generate_perfetto(::perfetto::Track, std::set&) const; - -template void -call_chain::generate_perfetto(::perfetto::Track, std::set&) const; - -//--------------------------------------------------------------------------------------// -// -// FREE FUNCTIONS -// -//--------------------------------------------------------------------------------------// - -uint64_t -get_update_frequency() -{ - return get_critical_trace_update_freq(); -} - -unique_ptr_t& -get(int64_t _tid) -{ - static auto* _v = thread_data::get(); - static thread_local auto _once = [_tid]() { - if(!_v->at(0)) _v->at(0) = unique_ptr_t{ new call_chain{} }; - if(!_v->at(_tid)) _v->at(_tid) = unique_ptr_t{ new call_chain{} }; - if(_tid > 0) *_v->at(_tid) = *_v->at(0); - return true; - }(); - (void) _once; - return _v->at(_tid); -} - -void -add_hash_id(const hash_ids& _labels) -{ - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - if(!tasking::critical_trace::get_task_group().pool()) return; - std::unique_lock _lk{ tasking_mutex }; - tasking::critical_trace::get_task_group().exec([_labels]() { - static std::mutex _mtx{}; - _mtx.lock(); - for(auto itr : _labels) - complete_hash_ids.emplace(std::move(itr)); - _mtx.unlock(); - }); -} - -size_t -add_hash_id(const std::string& _label) -{ - using critical_trace_hash_data = - thread_data; - - auto _hash = tim::hash::add_hash_id(_label); - if(get_use_critical_trace() || get_use_rocm_smi()) - { - critical_trace_hash_data::construct(); - critical_trace_hash_data::instance()->emplace(_label); - } - return _hash; -} - -void -update(int64_t _tid) -{ - if(!get_use_critical_trace() && !get_use_rocm_smi()) return; - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - if(!tasking::critical_trace::get_task_group().pool()) return; - std::unique_lock _lk{ tasking_mutex }; - call_chain _data{}; - std::swap(_data, *critical_trace::get(_tid)); - tasking::critical_trace::get_task_group().exec(update_critical_path, _data, _tid); -} - -void -compute(int64_t _tid) -{ - update(_tid); - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - if(!tasking::critical_trace::get_task_group().pool()) return; - std::unique_lock _lk{ tasking_mutex }; - tasking::critical_trace::get_task_group().exec(compute_critical_trace); -} - -//--------------------------------------------------------------------------------------// -// -// HELPER FUNCTIONS -// -//--------------------------------------------------------------------------------------// - -namespace -{ -std::string -get_perf_name(std::string _func) -{ - const auto _npos = std::string::npos; - auto _pos = std::string::npos; - while((_pos = _func.find('_')) != _npos) - _func = _func.replace(_pos, 1, " "); - if(_func.length() > 0) _func.at(0) = std::toupper(_func.at(0)); - return _func; -} - -void -save_call_chain_json(const std::string& _fname, const std::string& _label, - const call_chain& _call_chain, bool _msg = false, - std::string _func = {}) -{ - OMNITRACE_CT_DEBUG("[%s][%s] saving %zu call chain entries to '%s'\n", __FUNCTION__, - _label.c_str(), _call_chain.size(), _fname.c_str()); - - using perfstats_t = - tim::lightweight_tuple; - perfstats_t _perf{ get_perf_name(__FUNCTION__) }; - _perf.start(); - - auto _save = [&](std::ostream& _os) { - namespace cereal = tim::cereal; - auto ar = tim::policy::output_archive::get(_os); - - auto _hash_map = *tim::hash::get_hash_ids(); - for(auto& itr : _hash_map) - itr.second = tim::demangle(itr.second); - ar->setNextName("omnitrace"); - ar->startNode(); - (*ar)(cereal::make_nvp("hash_map", _hash_map), - cereal::make_nvp(_label.c_str(), _call_chain)); - ar->finishNode(); - }; - - std::ofstream ofs{}; - if(tim::filepath::open(ofs, _fname)) - { - if(_msg) - { - if(_func.empty()) _func = __FUNCTION__; - if(get_verbose() >= 0) - operation::file_output_message{}( - _fname, std::string{ _func }); - } - std::stringstream oss{}; - if(_call_chain.size() > 100000) - { - _save(ofs); - } - else - { - _save(oss); - ofs << oss.str() << std::endl; - } - } - - _perf.stop(); - if(_msg) - { - OMNITRACE_CT_DEBUG("%s\n", JOIN("", _perf).c_str()); - } -} - -template class ContainerT, typename... Args, - typename FuncT = bool (*)(const Tp&, const Tp&)> -inline auto -find( - const Tp& _v, ContainerT& _vec, - FuncT&& _func = [](const Tp& _lhs, const Tp& _rhs) { return (_lhs == _rhs); }) -{ - for(auto itr = _vec.begin(); itr != _vec.end(); ++itr) - { - if(std::forward(_func)(_v, *itr)) - { - return itr; - } - } - OMNITRACE_CT_DEBUG("[%s] no match found in %zu entries...\n", __FUNCTION__, - _vec.size()); - return _vec.end(); -} - -template -inline auto -find( - const entry& _v, call_chain& _vec, - FuncT&& _func = [](const entry& _lhs, const entry& _rhs) { return (_lhs == _rhs); }) -{ - return find(_v, reinterpret_cast&>(_vec), - std::forward(_func)); -} - -void -squash_critical_path(call_chain& _targ) -{ - OMNITRACE_CT_DEBUG("[%s]\n", __FUNCTION__); - static auto _strict_equal = [](const entry& _lhs, const entry& _rhs) { - auto _same_phase = (_lhs.phase == _rhs.phase); - bool _phase_check = true; - if(_same_phase) _phase_check = (_lhs.get_timestamp() == _rhs.get_timestamp()); - return (_lhs == _rhs && _lhs.parent_cid == _rhs.parent_cid && _phase_check); - }; - - std::sort(_targ.begin(), _targ.end()); - - call_chain _squashed{}; - for(auto& itr : _targ) - { - if(itr.phase == Phase::DELTA) - { - _squashed.emplace_back(itr); - } - else if(itr.phase == Phase::BEGIN) - { - if(find(itr, _squashed, _strict_equal) == _squashed.end()) - _squashed.emplace_back(itr); - } - else - { - auto mitr = find(itr, _squashed); - if(mitr != _squashed.end()) - *mitr += itr; - else - _squashed.emplace_back(itr); - } - } - - std::swap(_targ, _squashed); - std::sort(_targ.begin(), _targ.end()); -} - -void -combine_critical_path(call_chain& _targ, call_chain _chain) -{ - OMNITRACE_CT_DEBUG("[%s]\n", __FUNCTION__); - OMNITRACE_CT_DEBUG("[%s] adding %zu entries to existing call-chain of %zu...\n", - __FUNCTION__, _chain.size(), _targ.size()); - - // use a deque here because when combining _begin and _end, you end - // up erasing entries from the front of _begin. When _begin is large, it - // takes a lot of time to move all the elements each iteration - std::deque _begin{}; - std::deque _end{}; - - call_chain _delta{}; - _delta.reserve(_chain.size() / 2); // estimated total deltas - - for(auto& itr : _chain) - { - if(itr.phase == Phase::DELTA) - _delta.emplace_back(itr); - else if(itr.phase == Phase::BEGIN) - _begin.emplace_back(itr); - else if(itr.phase == Phase::END) - _end.emplace_back(itr); - } - - OMNITRACE_CT_DEBUG("[%s] sorting %zu begin and %zu end call-chain entries...\n", - __FUNCTION__, _begin.size(), _end.size()); - - std::sort(_begin.begin(), _begin.end()); - std::sort(_end.begin(), _end.end()); - - std::deque _tmp{}; - std::swap(_end, _tmp); - for(auto& eitr : _tmp) - { - auto mitr = find(eitr, _begin); - if(mitr == _begin.end()) - _end.emplace_back(eitr); - else - { - *mitr += eitr; - _delta.emplace_back(*mitr); - _begin.erase(mitr); - } - } - _tmp.clear(); - - OMNITRACE_CT_DEBUG( - "[%s] %zu begin and %zu end call-chain entries were not matched...\n", - __FUNCTION__, _begin.size(), _end.size()); - - call_chain _combined{}; - _combined.reserve(_delta.size() + _begin.size() + _end.size()); - for(auto& itr : _delta) - _combined.emplace_back(itr); - for(auto& itr : _begin) - _combined.emplace_back(itr); - for(auto& itr : _end) - _combined.emplace_back(itr); - - OMNITRACE_CT_DEBUG("[%s] sorting %zu combined call-chain entries...\n", __FUNCTION__, - _combined.size()); - - std::sort(_combined.begin(), _combined.end()); - - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - std::unique_lock _lk{ complete_call_mutex }; - for(auto& itr : _combined) - _targ.emplace_back(itr); - - // squash_critical_path(_targ); -} - -void -update_critical_path(call_chain _chain, int64_t) -{ - OMNITRACE_CT_DEBUG("[%s] updating critical path with %zu entries...\n", __FUNCTION__, - _chain.size()); - try - { - // remove any data not - // auto _diff_tid = [_tid](const entry& _v) { return _v.tid != _tid; }; - //_chain.erase(std::remove_if(_chain.begin(), _chain.end(), _diff_tid), - // _chain.end()); - combine_critical_path(complete_call_chain, std::move(_chain)); - } catch(const std::exception& e) - { - std::cerr << "Thread exited with exception: " << e.what() << std::endl; - TIMEMORY_CONDITIONAL_DEMANGLED_BACKTRACE(true, 32); - } -} - -void -compute_critical_trace() -{ - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - - static bool _computed = false; - std::unique_lock _lk{ complete_call_mutex }; - - if(_computed) return; - - OMNITRACE_CONDITIONAL_PRINT(get_critical_trace_debug() || get_verbose() >= 0, - "[%s] Generating critical trace...\n", __FUNCTION__); - - // ensure all hash ids exist - copy_hash_ids(); - - using perfstats_t = - tim::lightweight_tuple; - - perfstats_t _ct_perf{}; - _ct_perf.start(); - - try - { - OMNITRACE_VERBOSE_F(1, "[%s] initial call chain: %zu entries\n", __FUNCTION__, - complete_call_chain.size()); - - perfstats_t _perf{ get_perf_name(__FUNCTION__) }; - _perf.start(); - - std::sort(complete_call_chain.begin(), complete_call_chain.end()); - - _perf.stop().rekey("Sorting critical trace"); - OMNITRACE_VERBOSE_F(1, "%s\n", JOIN("", _perf).c_str()); - - _perf.reset().start(); - save_call_chain_json( - tim::settings::compose_output_filename("call-chain", ".json"), "call_chain", - complete_call_chain, true, __FUNCTION__); - - _perf.stop().rekey("Save call-chain"); - OMNITRACE_VERBOSE_F(1, "%s\n", JOIN("", _perf).c_str()); - - } catch(std::exception& e) - { - OMNITRACE_PRINT_F("Thread exited '%s' with exception: %s\n", __FUNCTION__, - e.what()); - TIMEMORY_CONDITIONAL_DEMANGLED_BACKTRACE(true, 32); - } - - OMNITRACE_PRINT_F("%s\n", _ct_perf.stop().as_string().c_str()); -} -} // namespace - -std::vector> -get_entries(const std::function& _eval) -{ - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - tasking::join(); - copy_hash_ids(); - squash_critical_path(complete_call_chain); - std::sort(complete_call_chain.begin(), complete_call_chain.end()); - - auto _v = std::vector>{}; - for(const auto& itr : complete_call_chain) - { - if(itr.phase != Phase::DELTA) continue; - if(_eval(itr)) _v.emplace_back(tim::get_hash_identifier(itr.hash), itr); - } - return _v; -} - -} // namespace critical_trace -} // namespace omnitrace diff --git a/source/lib/omnitrace/library/critical_trace.hpp b/source/lib/omnitrace/library/critical_trace.hpp deleted file mode 100644 index b0f72d6f4..000000000 --- a/source/lib/omnitrace/library/critical_trace.hpp +++ /dev/null @@ -1,370 +0,0 @@ -// MIT License -// -// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include "core/common.hpp" -#include "core/config.hpp" -#include "core/defines.hpp" -#include "core/perfetto.hpp" -#include "library/runtime.hpp" -#include "library/thread_data.hpp" - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace omnitrace -{ -namespace critical_trace -{ -enum class Device : uint8_t -{ - NONE = 0, - CPU, - GPU, - ANY, -}; - -enum class Phase : uint8_t -{ - NONE = 0, - BEGIN, - END, - DELTA, -}; - -struct OMNITRACE_ATTRIBUTE(packed) entry -{ - entry() = default; - ~entry() = default; - entry(const entry&) = default; - entry(entry&&) noexcept = default; - entry& operator=(const entry&) = default; - entry& operator=(entry&&) noexcept = default; - - Device device = Device::CPU; /// which device it executed on - Phase phase = Phase::NONE; /// start / stop / unspecified - uint16_t priority = 0; /// priority value (for sorting) - uint32_t depth = 0; /// call-stack depth - int32_t devid = 0; /// device id - int32_t pid = 0; /// process id - int32_t tid = 0; /// thread id it was registered on - uint64_t cpu_cid = 0; /// CPU correlation id - uint64_t gpu_cid = 0; /// GPU correlation id - uint64_t parent_cid = 0; /// parent CPU correlation id - int64_t begin_ns = 0; /// timestamp of start - int64_t end_ns = 0; /// timestamp of end - uintptr_t queue_id = 0; /// stream id (GPU) or mutex id - size_t hash = 0; /// hash for name - - bool operator==(const entry& rhs) const; - bool operator!=(const entry& rhs) const { return !(*this == rhs); } - bool operator<(const entry& rhs) const; - bool operator>(const entry& rhs) const; - bool operator<=(const entry& rhs) const { return !(*this > rhs); } - bool operator>=(const entry& rhs) const { return !(*this < rhs); } - - entry& operator+=(const entry& rhs); - - size_t get_hash() const; - int64_t get_timestamp() const; - - int64_t get_cost() const; - - void write(std::ostream& _os) const; - - friend std::ostream& operator<<(std::ostream& _os, const entry& _v) - { - _v.write(_os); - return _os; - } - template - void save(Archive& ar, unsigned int) const; - - template - void load(Archive& ar, unsigned int); -}; - -template -void -entry::save(Archive& ar, unsigned int) const -{ - namespace cereal = tim::cereal; - -#define SAVE_PACKED_ENTRY_FIELD(VAR) \ - { \ - auto _val = VAR; \ - ar(cereal::make_nvp(#VAR, _val)); \ - } - SAVE_PACKED_ENTRY_FIELD(priority); - SAVE_PACKED_ENTRY_FIELD(device); - SAVE_PACKED_ENTRY_FIELD(phase); - SAVE_PACKED_ENTRY_FIELD(depth); - SAVE_PACKED_ENTRY_FIELD(devid); - SAVE_PACKED_ENTRY_FIELD(pid); - SAVE_PACKED_ENTRY_FIELD(tid); - SAVE_PACKED_ENTRY_FIELD(cpu_cid); - SAVE_PACKED_ENTRY_FIELD(gpu_cid); - SAVE_PACKED_ENTRY_FIELD(parent_cid); - SAVE_PACKED_ENTRY_FIELD(begin_ns); - SAVE_PACKED_ENTRY_FIELD(end_ns); - SAVE_PACKED_ENTRY_FIELD(queue_id); - SAVE_PACKED_ENTRY_FIELD(hash); -#undef SAVE_PACKED_ENTRY_FIELD - - std::string _name{}; - auto _hash = hash; - if(_hash > 0) _name = tim::get_hash_identifier(_hash); - - ar(cereal::make_nvp("name", _name), - cereal::make_nvp("demangled_name", tim::demangle(_name))); -} - -template -void -entry::load(Archive& ar, unsigned int) -{ - namespace cereal = tim::cereal; - -#define LOAD_PACKED_ENTRY_FIELD(VAR) \ - { \ - auto _val = VAR; \ - ar(cereal::make_nvp(#VAR, _val)); \ - VAR = _val; \ - } - LOAD_PACKED_ENTRY_FIELD(priority); - LOAD_PACKED_ENTRY_FIELD(device); - LOAD_PACKED_ENTRY_FIELD(phase); - LOAD_PACKED_ENTRY_FIELD(depth); - LOAD_PACKED_ENTRY_FIELD(devid); - LOAD_PACKED_ENTRY_FIELD(pid); - LOAD_PACKED_ENTRY_FIELD(tid); - LOAD_PACKED_ENTRY_FIELD(cpu_cid); - LOAD_PACKED_ENTRY_FIELD(gpu_cid); - LOAD_PACKED_ENTRY_FIELD(parent_cid); - LOAD_PACKED_ENTRY_FIELD(begin_ns); - LOAD_PACKED_ENTRY_FIELD(end_ns); - LOAD_PACKED_ENTRY_FIELD(queue_id); - LOAD_PACKED_ENTRY_FIELD(hash); -#undef LOAD_PACKED_ENTRY_FIELD - - std::string _name{}; - std::string _demangled_name{}; - ar(cereal::make_nvp("name", _name), - cereal::make_nvp("demangled_name", _demangled_name)); - - auto _hash = hash; - tim::get_hash_ids()->emplace(_hash, _name); -} - -struct call_chain : private std::vector -{ - using base_type = std::vector; - - using base_type::at; - using base_type::back; - using base_type::begin; - using base_type::cbegin; - using base_type::cend; - using base_type::clear; - using base_type::emplace_back; - using base_type::empty; - using base_type::end; - using base_type::erase; - using base_type::front; - using base_type::pop_back; - using base_type::push_back; - using base_type::rbegin; - using base_type::rend; - using base_type::reserve; - using base_type::size; - - int64_t get_cost(int64_t _tid = -1) const; - - bool operator==(const call_chain& rhs) const; - bool operator!=(const call_chain& rhs) const { return !(*this == rhs); } - friend std::ostream& operator<<(std::ostream& _os, const call_chain& _v) - { - size_t _n = 0; - for(const auto& itr : _v) - _os << " [" << _n++ << "] " << itr << "\n"; - return _os; - } - - template - void serialize(Archive& ar, unsigned int) - { - namespace cereal = tim::cereal; - ar(cereal::make_nvp("call_chain", static_cast(*this))); - } - - template - void generate_perfetto(::perfetto::Track, std::set& _used) const; - - template - bool query(FuncT&&) const; -}; - -template -bool -call_chain::query(FuncT&& _func) const -{ - for(const auto& itr : *this) - { - if(std::forward(_func)(itr)) return BoolV; - } - return !BoolV; -} - -using hash_ids = std::unordered_set; - -uint64_t -get_update_frequency(); - -unique_ptr_t& -get(int64_t _tid = threading::get_id()); - -size_t -add_hash_id(const std::string& _label); - -void -add_hash_id(const hash_ids&); - -void -update(int64_t _tid = threading::get_id()); - -void -compute(int64_t _tid = threading::get_id()); - -std::vector> -get_entries(const std::function& _eval = [](const entry&) { - return true; -}); - -struct id -{}; - -} // namespace critical_trace - -template -inline void -add_critical_trace(int32_t _targ_tid, size_t _cpu_cid, size_t _gpu_cid, - size_t _parent_cid, int64_t _ts_beg, int64_t _ts_val, int32_t _devid, - uintptr_t _queue, size_t _hash, uint32_t _depth, uint16_t _prio = 0) -{ - // clang-format off - // these are used to create unique type mutexes - struct critical_insert {}; - struct cpu_cid_stack {}; - // clang-format on - - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - - static constexpr auto num_mutexes = max_supported_threads; - static auto _update_freq = critical_trace::get_update_frequency(); - static auto _pid = process::get_id(); - auto _self_tid = threading::get_id(); - - if constexpr(PhaseID != critical_trace::Phase::NONE) - { - auto& _self_mtx = - type_mutex(_self_tid); - - auto_lock_t _self_lk{ _self_mtx, std::defer_lock }; - - // unique lock per thread - if(!_self_lk.owns_lock()) _self_lk.lock(); - - auto& _critical_trace = critical_trace::get(_self_tid); - _critical_trace->emplace_back(critical_trace::entry{ - DevID, PhaseID, _prio, _depth, _devid, _pid, _targ_tid, _cpu_cid, _gpu_cid, - _parent_cid, _ts_beg, _ts_val, _queue, _hash }); - } - - if constexpr(UpdateStack) - { - auto& _self_mtx = get_cpu_cid_stack_lock(_self_tid); - auto& _targ_mtx = get_cpu_cid_stack_lock(_targ_tid); - - auto_lock_t _self_lk{ _self_mtx, std::defer_lock }; - auto_lock_t _targ_lk{ _targ_mtx, std::defer_lock }; - - // unique lock per thread - auto _lock = [&_self_lk, &_targ_lk, _self_tid, _targ_tid]() { - if(!_self_lk.owns_lock() && _self_tid != _targ_tid) _self_lk.lock(); - if(!_targ_lk.owns_lock()) _targ_lk.lock(); - }; - - if constexpr(PhaseID == critical_trace::Phase::NONE) - { - _lock(); - get_cpu_cid_stack(_targ_tid)->emplace_back(_cpu_cid); - } - else if constexpr(PhaseID == critical_trace::Phase::BEGIN) - { - _lock(); - get_cpu_cid_stack(_targ_tid)->emplace_back(_cpu_cid); - } - else if constexpr(PhaseID == critical_trace::Phase::END) - { - _lock(); - get_cpu_cid_stack(_targ_tid)->pop_back(); - if(_gpu_cid == 0 && _cpu_cid % _update_freq == (_update_freq - 1)) - critical_trace::update(_targ_tid); - } - tim::consume_parameters(_lock); - } - - tim::consume_parameters(_pid, _targ_tid, _cpu_cid, _gpu_cid, _parent_cid, _ts_beg, - _ts_val, _devid, _queue, _hash, _depth, _prio, num_mutexes); -} -} // namespace omnitrace - -namespace std -{ -inline std::string -to_string(::omnitrace::critical_trace::Device _v) -{ - using Device = ::omnitrace::critical_trace::Device; - switch(_v) - { - case Device::NONE: return std::string{}; - case Device::CPU: return std::string{ "CPU" }; - case Device::GPU: return std::string{ "GPU" }; - case Device::ANY: return std::string{ "CPU + GPU" }; - } - return std::string{ "Unknown Device" }; -} -} // namespace std diff --git a/source/lib/omnitrace/library/perf.cpp b/source/lib/omnitrace/library/perf.cpp index 8d71440b6..476515b6d 100644 --- a/source/lib/omnitrace/library/perf.cpp +++ b/source/lib/omnitrace/library/perf.cpp @@ -633,6 +633,11 @@ perf_event::record::locate_field() const if constexpr(SampleT == sample::last) return reinterpret_cast(p); OMNITRACE_FATAL << "Unsupported sample field requested!"; + + if constexpr(std::is_pointer::value) + return nullptr; + else + return Tp{}; } namespace diff --git a/source/lib/omnitrace/library/ptl.cpp b/source/lib/omnitrace/library/ptl.cpp index 45b29ccdc..5b9638138 100644 --- a/source/lib/omnitrace/library/ptl.cpp +++ b/source/lib/omnitrace/library/ptl.cpp @@ -129,19 +129,6 @@ get_thread_pool_state() } // namespace } // namespace roctracer -namespace critical_trace -{ -namespace -{ -auto& -get_thread_pool_state() -{ - static auto _v = State::PreInit; - return _v; -} -} // namespace -} // namespace critical_trace - void setup() { @@ -164,17 +151,6 @@ join() OMNITRACE_DEBUG_F("roctracer thread-pool is not active...\n"); } - if(critical_trace::get_thread_pool_state() == State::Active) - { - OMNITRACE_DEBUG_F("waiting for all critical trace tasks to complete...\n"); - for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) - critical_trace::get_task_group(i).join(); - } - else - { - OMNITRACE_DEBUG_F("critical-trace thread-pool is not active...\n"); - } - if(general::get_thread_pool_state() == State::Active) { OMNITRACE_DEBUG_F("waiting for all general tasks to complete...\n"); @@ -202,22 +178,6 @@ shutdown() OMNITRACE_DEBUG_F("roctracer thread-pool is not active...\n"); } - if(critical_trace::get_thread_pool_state() == State::Active) - { - OMNITRACE_DEBUG_F("Waiting on completion of critical trace tasks...\n"); - for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) - { - critical_trace::get_task_group(i).join(); - critical_trace::get_task_group(i).clear(); - critical_trace::get_task_group(i).set_pool(nullptr); - } - critical_trace::get_thread_pool_state() = State::Finalized; - } - else - { - OMNITRACE_DEBUG_F("critical-trace thread-pool is not active...\n"); - } - if(general::get_thread_pool_state() == State::Active) { OMNITRACE_DEBUG_F("Waiting on completion of general tasks...\n"); @@ -270,18 +230,5 @@ roctracer::get_task_group(int64_t _tid) &tasking::get_thread_pool())); return *_v; } - -PTL::TaskGroup& -critical_trace::get_task_group(int64_t _tid) -{ - struct local - {}; - using thread_data_t = thread_data, local>; - static thread_local auto& _v = - (critical_trace::get_thread_pool_state() = State::Active, - thread_data_t::instance(construct_on_thread{ _tid }, - &tasking::get_thread_pool())); - return *_v; -} } // namespace tasking } // namespace omnitrace diff --git a/source/lib/omnitrace/library/ptl.hpp b/source/lib/omnitrace/library/ptl.hpp index b3630216b..4980673d8 100644 --- a/source/lib/omnitrace/library/ptl.hpp +++ b/source/lib/omnitrace/library/ptl.hpp @@ -67,17 +67,5 @@ namespace roctracer PTL::TaskGroup& get_task_group(int64_t _tid = utility::get_thread_index()); } // namespace roctracer - -//--------------------------------------------------------------------------------------// -// -// critical trace -// -//--------------------------------------------------------------------------------------// - -namespace critical_trace -{ -PTL::TaskGroup& -get_task_group(int64_t _tid = utility::get_thread_index()); -} // namespace critical_trace } // namespace tasking } // namespace omnitrace diff --git a/source/lib/omnitrace/library/rocm_smi.cpp b/source/lib/omnitrace/library/rocm_smi.cpp index 7170a250c..b65a7731c 100644 --- a/source/lib/omnitrace/library/rocm_smi.cpp +++ b/source/lib/omnitrace/library/rocm_smi.cpp @@ -38,7 +38,6 @@ #include "core/gpu.hpp" #include "core/perfetto.hpp" #include "core/state.hpp" -#include "library/critical_trace.hpp" #include "library/runtime.hpp" #include "library/thread_info.hpp" @@ -326,55 +325,6 @@ data::post_process(uint32_t _dev_id) }; if(get_use_perfetto()) _process_perfetto(); - - if(!get_use_timemory()) return; - -#if !defined(TIMEMORY_USE_MPI) - // timemory + MPI here causes hangs for some reason. it is unclear why - using samp_bundle_t = tim::lightweight_tuple; - - trait::runtime_enabled::set(_settings.busy); - trait::runtime_enabled::set(_settings.temp); - trait::runtime_enabled::set(_settings.power); - trait::runtime_enabled::set(_settings.mem_usage); - - using entry_t = critical_trace::entry; - auto _gpu_entries = critical_trace::get_entries( - [](const entry_t& _e) { return (_e.device == critical_trace::Device::GPU); }); - - for(auto& itr : _rocm_smi) - { - auto _ts = itr.m_ts; - if(!_thread_info->is_valid_time(_ts)) continue; - - auto _entries = std::vector>{}; - for(const auto& eitr : _gpu_entries) - { - if(_ts >= eitr.second.begin_ns && _ts <= eitr.second.end_ns) - _entries.emplace_back(std::string_view{ eitr.first }, &eitr.second); - } - - std::vector _tc{}; - _tc.reserve(_entries.size()); - for(auto& eitr : _entries) - { - auto& _v = _tc.emplace_back(eitr.first); - _v.push(); - _v.start(); - _v.stop(); - - GPU_METRIC(sampling_gpu_busy, m_busy_perc) - GPU_METRIC(sampling_gpu_temp, m_temp / 1.0e3) // provided in milli-degree C - GPU_METRIC(sampling_gpu_power, - m_power * units::microwatt / static_cast(units::watt)) - GPU_METRIC(sampling_gpu_memory, - m_mem_usage / static_cast(units::megabyte)) - - _v.pop(); - } - } -#endif } //--------------------------------------------------------------------------------------// diff --git a/source/lib/omnitrace/library/roctracer.cpp b/source/lib/omnitrace/library/roctracer.cpp index abb96eefb..224f59728 100644 --- a/source/lib/omnitrace/library/roctracer.cpp +++ b/source/lib/omnitrace/library/roctracer.cpp @@ -28,7 +28,6 @@ #include "core/debug.hpp" #include "core/locking.hpp" #include "library/components/category_region.hpp" -#include "library/critical_trace.hpp" #include "library/runtime.hpp" #include "library/sampling.hpp" #include "library/thread_data.hpp" @@ -129,32 +128,6 @@ get_roctracer_tid_data() return _v; } -using cid_tuple_t = std::tuple; -struct cid_data : cid_tuple_t -{ - using cid_tuple_t::cid_tuple_t; - - OMNITRACE_DEFAULT_OBJECT(cid_data) - - auto& cid() { return std::get<0>(*this); } - auto& pcid() { return std::get<1>(*this); } - auto& depth() { return std::get<2>(*this); } - auto& queue() { return std::get<3>(*this); } - - auto cid() const { return std::get<0>(*this); } - auto pcid() const { return std::get<1>(*this); } - auto depth() const { return std::get<2>(*this); } - auto queue() const { return std::get<3>(*this); } -}; - -auto& -get_roctracer_cid_data(int64_t _tid = threading::get_id()) -{ - using thread_data_t = - thread_data, category::roctracer>; - return thread_data_t::instance(construct_on_thread{ _tid }); -} - auto& get_hip_activity_callbacks(int64_t _tid = threading::get_id()) { @@ -562,9 +535,6 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - using Device = critical_trace::Device; - using Phase = critical_trace::Phase; - assert(domain == ACTIVITY_DOMAIN_HIP_API); const char* op_name = roctracer_op_string(domain, cid, 0); if(op_name == nullptr) op_name = hip_api_name(cid); @@ -591,88 +561,12 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* op_name, cid, data->correlation_id, (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); - int64_t _ts = comp::wall_clock::record(); - auto _tid = threading::get_id(); - uint64_t _crit_cid = 0; - uint64_t _parent_crit_cid = 0; - uint32_t _depth = 0; - uintptr_t _queue = 0; - auto _roct_cid = data->correlation_id; - -#define OMNITRACE_HIP_API_QUEUE_CASE(API_FUNC, VARIABLE) \ - case HIP_API_ID_##API_FUNC: \ - _queue = reinterpret_cast(data->args.API_FUNC.VARIABLE); \ - break; - -#define OMNITRACE_HIP_API_QUEUE_CASE_ALT(API_FUNC, UNION, VARIABLE) \ - case HIP_API_ID_##API_FUNC: \ - _queue = reinterpret_cast(data->args.UNION.VARIABLE); \ - break; - - switch(cid) - { - OMNITRACE_HIP_API_QUEUE_CASE(hipLaunchKernel, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipModuleLaunchKernel, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipHccModuleLaunchKernel, hStream) - OMNITRACE_HIP_API_QUEUE_CASE(hipLaunchCooperativeKernel, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipExtLaunchKernel, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipExtModuleLaunchKernel, hStream) - OMNITRACE_HIP_API_QUEUE_CASE(hipExtStreamCreateWithCUMask, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipExtStreamGetCUMask, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamSynchronize, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipConfigureCall, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipDrvMemcpy3DAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipEventRecord, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemPrefetchAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpy2DAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpy2DFromArrayAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpy3DAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyDtoDAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyDtoHAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyFromSymbolAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyHtoDAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyParam2DAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyPeerAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyToSymbolAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpyWithStream, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemset2DAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemset3DAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemsetAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemsetD16Async, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemsetD32Async, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipMemsetD8Async, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamAddCallback, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamAttachMemAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamDestroy, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamGetFlags, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamGetPriority, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamQuery, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamWaitEvent, stream) -#if OMNITRACE_HIP_VERSION >= 40300 - OMNITRACE_HIP_API_QUEUE_CASE(hipMemcpy2DToArrayAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamWaitValue32, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamWaitValue64, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamWriteValue32, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamWriteValue64, stream) -#endif -#if OMNITRACE_HIP_VERSION >= 40500 - OMNITRACE_HIP_API_QUEUE_CASE(hipGraphLaunch, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipGraphicsMapResources, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipGraphicsUnmapResources, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipSignalExternalSemaphoresAsync, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamBeginCapture, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamEndCapture, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipWaitExternalSemaphoresAsync, stream) -#endif -#if OMNITRACE_HIP_VERSION >= 50000 - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamIsCapturing, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamGetCaptureInfo, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamGetCaptureInfo_v2, stream) - OMNITRACE_HIP_API_QUEUE_CASE(hipStreamUpdateCaptureDependencies, stream) -#endif - default: break; - } + int64_t _ts = comp::wall_clock::record(); + auto _tid = threading::get_id(); + uint64_t _crit_cid = 0; + uint64_t _parent_crit_cid = 0; + uint32_t _depth = 0; + auto _roct_cid = data->correlation_id; auto& _device_id = get_current_device(); @@ -863,15 +757,6 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* get_roctracer_hip_data()->erase(itr.first); } } - if(get_use_critical_trace() || get_use_rocm_smi()) - { - add_critical_trace( - _tid, _crit_cid, _roct_cid, _parent_crit_cid, _ts, 0, _device_id, _queue, - critical_trace::add_hash_id(op_name), _depth); - } - - get_roctracer_cid_data(_tid)->emplace( - _roct_cid, cid_data{ _crit_cid, _parent_crit_cid, _depth, _queue }); hip_exec_activity_callbacks(_tid); } @@ -879,9 +764,6 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* { hip_exec_activity_callbacks(_tid); - std::tie(_crit_cid, _parent_crit_cid, _depth, std::ignore) = - get_roctracer_cid_data(_tid)->at(_roct_cid); - if(get_use_perfetto()) { tracing::pop_perfetto_ts( @@ -913,12 +795,6 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* } } } - if(get_use_critical_trace() || get_use_rocm_smi()) - { - add_critical_trace( - _tid, _crit_cid, _roct_cid, _parent_crit_cid, _ts, _ts, _device_id, - _queue, critical_trace::add_hash_id(op_name), _depth); - } } tim::consume_parameters(arg); } @@ -935,9 +811,6 @@ hip_activity_callback(const char* begin, const char* end, void* arg) auto&& _protect = comp::roctracer::protect_flush_activity(); (void) _protect; - using Device = critical_trace::Device; - using Phase = critical_trace::Phase; - if(!trait::runtime_enabled::get()) return; static auto _kernel_names = std::unordered_map{}; static auto _indexes = std::unordered_map{}; @@ -982,17 +855,12 @@ hip_activity_callback(const char* begin, const char* end, void* arg) auto& _keys = get_roctracer_key_data(); auto& _tids = get_roctracer_tid_data(); - int16_t _depth = 0; // depth of kernel launch - int64_t _tid = 0; // thread id - uint64_t _crit_cid = 0; // correlation id - uint64_t _pcid = 0; // parent corr_id - int32_t _devid = record->device_id; // device id - int64_t _queid = record->queue_id; // queue id - uintptr_t _queue = 0; // Host queue (stream) - auto _laps = _indexes[_roct_cid]++; // see note #1 - const char* _name = nullptr; - bool _found = false; - bool _critical_trace = get_use_critical_trace() || get_use_rocm_smi(); + int64_t _tid = 0; // thread id + int32_t _devid = record->device_id; // device id + int64_t _queid = record->queue_id; // queue id + uintptr_t _queue = 0; // Host queue (stream) + const char* _name = nullptr; + bool _found = false; { locking::atomic_lock _lk{ roctracer_type_mutex() }; @@ -1008,21 +876,6 @@ hip_activity_callback(const char* begin, const char* end, void* arg) if(_name == nullptr && op_name == nullptr) continue; if(_name == nullptr) _name = op_name; - if(_critical_trace) - { - auto& _crit_cids = get_roctracer_cid_data(_tid); - if(_crit_cids->find(_roct_cid) != _crit_cids->end()) - std::tie(_crit_cid, _pcid, _depth, _queue) = _crit_cids->at(_roct_cid); - else - { - OMNITRACE_VERBOSE_F(3, - "No critical trace entry generated for \"%s\" :: " - "unknown correlation id...\n", - _name); - _critical_trace = false; - } - } - static auto _op_id_names = std::array{ "DISPATCH", "COPY", "BARRIER" }; @@ -1094,15 +947,6 @@ hip_activity_callback(const char* begin, const char* end, void* arg) tracing::pop_perfetto_track(category::device_hip{}, "", _track, _end_ns); } - if(_critical_trace) - { - auto _hash = critical_trace::add_hash_id(_name); - uint16_t _prio = _laps + 1; // priority - add_critical_trace( - _tid, _crit_cid, _roct_cid, _crit_cid, _beg_ns, _end_ns, _devid, _queid, - _hash, _depth + 1, _prio); - } - if(_found && _name != nullptr && get_use_timemory()) { auto _func = [_beg_ns, _end_ns, _name]() { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 284103d54..cd695bca8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -17,7 +17,6 @@ include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-openmp-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-code-coverage-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-fork-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-time-window-tests.cmake) -include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-critical-trace-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-attach-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-rccl-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/omnitrace-overflow-tests.cmake) diff --git a/tests/omnitrace-critical-trace-tests.cmake b/tests/omnitrace-critical-trace-tests.cmake deleted file mode 100644 index 761ed83a8..000000000 --- a/tests/omnitrace-critical-trace-tests.cmake +++ /dev/null @@ -1,54 +0,0 @@ -# -------------------------------------------------------------------------------------- # -# -# critical-trace tests -# -# -------------------------------------------------------------------------------------- # - -omnitrace_add_test( - SKIP_BASELINE SKIP_RUNTIME SKIP_SAMPLING - NAME parallel-overhead-critical-trace - TARGET parallel-overhead - LABELS "critical-trace" - REWRITE_ARGS - -e - -i - 8 - -E - "^fib" - -v - 2 - --print-instrumented - functions - RUN_ARGS 10 4 100 - ENVIRONMENT "${_critical_trace_environment}") - -add_test( - NAME parallel-overhead-process-critical-trace - COMMAND - $ - ${PROJECT_BINARY_DIR}/omnitrace-tests-output/parallel-overhead-critical-trace-binary-rewrite/call-chain.json - WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) - -set(_parallel_overhead_critical_trace_environ - "OMNITRACE_OUTPUT_PATH=omnitrace-tests-output" - "OMNITRACE_OUTPUT_PREFIX=parallel-overhead-critical-trace/" - "OMNITRACE_CRITICAL_TRACE_DEBUG=ON" - "OMNITRACE_VERBOSE=4" - "OMNITRACE_USE_PID=OFF" - "OMNITRACE_TIME_OUTPUT=OFF" - "OMNITRACE_CI=ON" - "OMNITRACE_CI_TIMEOUT=300") - -set_tests_properties( - parallel-overhead-process-critical-trace - PROPERTIES - ENVIRONMENT - "${_parallel_overhead_critical_trace_environ}" - TIMEOUT - 300 - LABELS - "parallel-overhead;critical-trace" - PASS_REGULAR_EXPRESSION - "Outputting.*(critical-trace-cpu.json).*Outputting.*(critical-trace-any.json)" - DEPENDS - parallel-overhead-critical-trace-binary-rewrite-run) diff --git a/tests/omnitrace-fork-tests.cmake b/tests/omnitrace-fork-tests.cmake index 17baec882..9fd9be134 100644 --- a/tests/omnitrace-fork-tests.cmake +++ b/tests/omnitrace-fork-tests.cmake @@ -10,7 +10,7 @@ omnitrace_add_test( REWRITE_ARGS -e -v 2 --print-instrumented modules -i 16 RUNTIME_ARGS -e -v 1 --label file -i 16 ENVIRONMENT - "${_base_environment};OMNITRACE_CRITICAL_TRACE=ON;OMNITRACE_SAMPLING_FREQ=250;OMNITRACE_SAMPLING_REALTIME=ON" + "${_base_environment};OMNITRACE_SAMPLING_FREQ=250;OMNITRACE_SAMPLING_REALTIME=ON" SAMPLING_PASS_REGEX "fork.. called on PID" RUNTIME_PASS_REGEX "fork.. called on PID" REWRITE_RUN_PASS_REGEX "fork.. called on PID" diff --git a/tests/omnitrace-instrument-tests.cmake b/tests/omnitrace-instrument-tests.cmake index db5d01041..c935510df 100644 --- a/tests/omnitrace-instrument-tests.cmake +++ b/tests/omnitrace-instrument-tests.cmake @@ -40,7 +40,7 @@ omnitrace_add_test( return args RUN_ARGS 10 ${NUM_THREADS} 1000 - ENVIRONMENT "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF") + ENVIRONMENT "${_base_environment}") omnitrace_add_test( SKIP_BASELINE SKIP_RUNTIME diff --git a/tests/omnitrace-kokkos-tests.cmake b/tests/omnitrace-kokkos-tests.cmake index f496b1c4b..156f369bb 100644 --- a/tests/omnitrace-kokkos-tests.cmake +++ b/tests/omnitrace-kokkos-tests.cmake @@ -26,7 +26,7 @@ omnitrace_add_test( LABELS "kokkos;kokkos-profile-library" RUN_ARGS -i 25 -s 20 -p ENVIRONMENT - "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_KOKKOSP=ON;OMNITRACE_COUT_OUTPUT=ON;OMNITRACE_SAMPLING_FREQ=50;OMNITRACE_KOKKOSP_PREFIX=[kokkos];KOKKOS_PROFILE_LIBRARY=libomnitrace-dl.so" + "${_base_environment};OMNITRACE_USE_KOKKOSP=ON;OMNITRACE_COUT_OUTPUT=ON;OMNITRACE_SAMPLING_FREQ=50;OMNITRACE_KOKKOSP_PREFIX=[kokkos];KOKKOS_PROFILE_LIBRARY=libomnitrace-dl.so" REWRITE_RUN_PASS_REGEX "\\|_\\[kokkos\\] [a-zA-Z]" RUNTIME_PASS_REGEX "\\|_\\[kokkos\\] [a-zA-Z]") @@ -40,7 +40,7 @@ omnitrace_add_test( LABELS "kokkos;kokkos-profile-library" RUN_ARGS -i 10 -s 20 -p ENVIRONMENT - "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_KOKKOSP=ON;OMNITRACE_COUT_OUTPUT=ON;OMNITRACE_SAMPLING_FREQ=50;OMNITRACE_KOKKOSP_PREFIX=[kokkos];KOKKOS_PROFILE_LIBRARY=libomnitrace.so" + "${_base_environment};OMNITRACE_USE_KOKKOSP=ON;OMNITRACE_COUT_OUTPUT=ON;OMNITRACE_SAMPLING_FREQ=50;OMNITRACE_KOKKOSP_PREFIX=[kokkos];KOKKOS_PROFILE_LIBRARY=libomnitrace.so" BASELINE_PASS_REGEX "\\|_\\[kokkos\\] [a-zA-Z]") omnitrace_add_test( @@ -53,7 +53,7 @@ omnitrace_add_test( LABELS "kokkos;kokkos-profile-library" RUN_ARGS -i 10 -s 20 -p ENVIRONMENT - "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_KOKKOSP=ON;OMNITRACE_COUT_OUTPUT=ON;OMNITRACE_SAMPLING_FREQ=50;OMNITRACE_KOKKOSP_PREFIX=[kokkos];KOKKOS_PROFILE_LIBRARY=libomnitrace-dl.so" + "${_base_environment};OMNITRACE_USE_KOKKOSP=ON;OMNITRACE_COUT_OUTPUT=ON;OMNITRACE_SAMPLING_FREQ=50;OMNITRACE_KOKKOSP_PREFIX=[kokkos];KOKKOS_PROFILE_LIBRARY=libomnitrace-dl.so" BASELINE_PASS_REGEX "\\|_\\[kokkos\\] [a-zA-Z]") omnitrace_add_test( @@ -77,8 +77,7 @@ omnitrace_add_test( -ME [==[lib(gomp|m-)]==] RUN_ARGS -i 10 -s 20 -p - ENVIRONMENT - "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_KOKKOSP=ON") + ENVIRONMENT "${_base_environment};OMNITRACE_USE_KOKKOSP=ON") omnitrace_add_test( SKIP_BASELINE @@ -100,8 +99,7 @@ omnitrace_add_test( -ME [==[libgomp]==] RUN_ARGS -i 10 -s 20 -p - ENVIRONMENT - "${_perfetto_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_KOKKOSP=OFF") + ENVIRONMENT "${_perfetto_environment};OMNITRACE_USE_KOKKOSP=OFF") omnitrace_add_test( NAME lulesh-timemory @@ -122,6 +120,5 @@ omnitrace_add_test( --env OMNITRACE_TIMEMORY_COMPONENTS="wall_clock peak_rss" RUN_ARGS -i 10 -s 20 -p - ENVIRONMENT - "${_timemory_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_KOKKOSP=OFF" + ENVIRONMENT "${_timemory_environment};OMNITRACE_USE_KOKKOSP=OFF" REWRITE_FAIL_REGEX "0 instrumented loops in procedure") diff --git a/tests/omnitrace-rocm-tests.cmake b/tests/omnitrace-rocm-tests.cmake index cf40180ad..8c576b763 100644 --- a/tests/omnitrace-rocm-tests.cmake +++ b/tests/omnitrace-rocm-tests.cmake @@ -26,7 +26,7 @@ omnitrace_add_test( args -E uniform_int_distribution - ENVIRONMENT "${_base_environment};OMNITRACE_CRITICAL_TRACE=ON") + ENVIRONMENT "${_base_environment}") omnitrace_add_test( SKIP_REWRITE SKIP_RUNTIME @@ -37,7 +37,7 @@ omnitrace_add_test( NUM_PROCS 1 RUN_ARGS 1 2 2 ENVIRONMENT - "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_ROCTRACER_HSA_ACTIVITY=OFF;OMNITRACE_ROCTRACER_HSA_API=OFF" + "${_base_environment};OMNITRACE_ROCTRACER_HSA_ACTIVITY=OFF;OMNITRACE_ROCTRACER_HSA_API=OFF" ) omnitrace_add_test( @@ -61,7 +61,7 @@ omnitrace_add_test( -E uniform_int_distribution RUN_ARGS 2 100 50 - ENVIRONMENT "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF" + ENVIRONMENT "${_base_environment}" REWRITE_FAIL_REGEX "0 instrumented loops in procedure transpose") if(OMNITRACE_USE_ROCPROFILER) @@ -75,7 +75,7 @@ if(OMNITRACE_USE_ROCPROFILER) NUM_PROCS ${NUM_PROCS} REWRITE_ARGS -e -v 2 -E uniform_int_distribution ENVIRONMENT - "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_ROCM_EVENTS=${OMNITRACE_ROCM_EVENTS_TEST}" + "${_base_environment};OMNITRACE_ROCM_EVENTS=${OMNITRACE_ROCM_EVENTS_TEST}" REWRITE_RUN_PASS_REGEX "rocprof-device-0-GRBM_COUNT.txt(.*)rocprof-device-0-GPUBusy.txt(.*)rocprof-device-0-SQ_WAVES.txt(.*)rocprof-device-0-SQ_INSTS_VALU.txt(.*)rocprof-device-0-VALUInsts.txt(.*)rocprof-device-0-TCC_HIT_sum.txt(.*)rocprof-device-0-TA_TA_BUSY_0.txt(.*)rocprof-device-0-TA_TA_BUSY_11.txt" ) @@ -90,7 +90,7 @@ if(OMNITRACE_USE_ROCPROFILER) NUM_PROCS ${NUM_PROCS} REWRITE_ARGS -e -v 2 -E uniform_int_distribution ENVIRONMENT - "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_ROCTRACER=OFF;OMNITRACE_ROCM_EVENTS=${OMNITRACE_ROCM_EVENTS_TEST}" + "${_base_environment};OMNITRACE_USE_ROCTRACER=OFF;OMNITRACE_ROCM_EVENTS=${OMNITRACE_ROCM_EVENTS_TEST}" REWRITE_RUN_PASS_REGEX "rocprof-device-0-GRBM_COUNT.txt(.*)rocprof-device-0-GPUBusy.txt(.*)rocprof-device-0-SQ_WAVES.txt(.*)rocprof-device-0-SQ_INSTS_VALU.txt(.*)rocprof-device-0-VALUInsts.txt(.*)rocprof-device-0-TCC_HIT_sum.txt(.*)rocprof-device-0-TA_TA_BUSY_0.txt(.*)rocprof-device-0-TA_TA_BUSY_11.txt" REWRITE_RUN_FAIL_REGEX "roctracer.txt|OMNITRACE_ABORT_FAIL_REGEX") diff --git a/tests/omnitrace-testing.cmake b/tests/omnitrace-testing.cmake index 743a32a6a..22763ae7d 100644 --- a/tests/omnitrace-testing.cmake +++ b/tests/omnitrace-testing.cmake @@ -80,7 +80,6 @@ set(_lock_environment "OMNITRACE_USE_SAMPLING=ON" "OMNITRACE_USE_PROCESS_SAMPLING=OFF" "OMNITRACE_SAMPLING_FREQ=750" - "OMNITRACE_CRITICAL_TRACE=ON" "OMNITRACE_COLLAPSE_THREADS=ON" "OMNITRACE_TRACE_THREAD_LOCKS=ON" "OMNITRACE_TRACE_THREAD_SPIN_LOCKS=ON" @@ -91,26 +90,11 @@ set(_lock_environment "OMNITRACE_VERBOSE=2" "${_test_library_path}") -set(_critical_trace_environment - "OMNITRACE_VERBOSE=2" - "OMNITRACE_USE_SAMPLING=OFF" - "OMNITRACE_USE_PROCESS_SAMPLING=OFF" - "OMNITRACE_CRITICAL_TRACE=ON" - "OMNITRACE_CRITICAL_TRACE_DEBUG=ON" - "OMNITRACE_TRACE_THREAD_LOCKS=ON" - "OMNITRACE_TRACE_THREAD_SPIN_LOCKS=ON" - "OMNITRACE_TRACE_THREAD_RW_LOCKS=ON" - "OMNITRACE_COUT_OUTPUT=ON" - "OMNITRACE_TIME_OUTPUT=OFF" - "OMNITRACE_TIMELINE_PROFILE=OFF" - "${_test_library_path}") - set(_ompt_environment "OMNITRACE_TRACE=ON" "OMNITRACE_PROFILE=ON" "OMNITRACE_TIME_OUTPUT=OFF" "OMNITRACE_USE_OMPT=ON" - "OMNITRACE_CRITICAL_TRACE=OFF" "OMNITRACE_TIMEMORY_COMPONENTS=wall_clock,trip_count,peak_rss" "${_test_openmp_env}" "${_test_library_path}") @@ -136,7 +120,7 @@ set(_timemory_environment "${_test_openmp_env}" "${_test_library_path}") -set(_test_environment ${_base_environment} "OMNITRACE_CRITICAL_TRACE=OFF") +set(_test_environment ${_base_environment}) set(_causal_environment "${_test_openmp_env}" "${_test_library_path}" "OMNITRACE_TIME_OUTPUT=OFF" @@ -159,7 +143,6 @@ set(_attach_environment "OMNITRACE_PROFILE=ON" "OMNITRACE_USE_SAMPLING=OFF" "OMNITRACE_USE_PROCESS_SAMPLING=ON" - "OMNITRACE_USE_CRITICAL_TRACE=OFF" "OMNITRACE_USE_OMPT=ON" "OMNITRACE_USE_KOKKOSP=ON" "OMNITRACE_TIME_OUTPUT=OFF" diff --git a/tests/omnitrace-user-api-tests.cmake b/tests/omnitrace-user-api-tests.cmake index d26625841..106b1675b 100644 --- a/tests/omnitrace-user-api-tests.cmake +++ b/tests/omnitrace-user-api-tests.cmake @@ -23,7 +23,7 @@ omnitrace_add_test( return args RUN_ARGS 10 ${NUM_THREADS} 1000 - ENVIRONMENT "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF" + ENVIRONMENT "${_base_environment}" REWRITE_RUN_PASS_REGEX "Pushing custom region :: run.10. x 1000" RUNTIME_PASS_REGEX "Pushing custom region :: run.10. x 1000" SAMPLING_PASS_REGEX "Pushing custom region :: run.10. x 1000"