From 9a875757db5a959e2d6101e4b7a8fb7554a4aca0 Mon Sep 17 00:00:00 2001 From: rbramand Date: Thu, 9 Nov 2023 18:41:56 +0530 Subject: [PATCH] Refactor code Signed-off-by: rbramand --- src/runtime_src/core/common/api/xrt_bo.cpp | 4 +- .../core/common/api/xrt_device.cpp | 2 +- .../core/common/api/xrt_hw_context.cpp | 4 +- .../core/common/api/xrt_kernel.cpp | 9 +- src/runtime_src/core/common/device.h | 4 +- src/runtime_src/core/common/usage_metrics.cpp | 292 ++++++++++-------- src/runtime_src/core/common/usage_metrics.h | 21 +- 7 files changed, 190 insertions(+), 146 deletions(-) diff --git a/src/runtime_src/core/common/api/xrt_bo.cpp b/src/runtime_src/core/common/api/xrt_bo.cpp index 196eee18204..ce7957ac5b4 100755 --- a/src/runtime_src/core/common/api/xrt_bo.cpp +++ b/src/runtime_src/core/common/api/xrt_bo.cpp @@ -291,10 +291,10 @@ class bo_impl return handle.get(); } - std::shared_ptr + xrt_core::usage_metrics::base_logger* get_usage_logger() const { - return m_usage_logger; + return m_usage_logger.get(); } // BOs can be cloned internally by XRT to statisfy kernel diff --git a/src/runtime_src/core/common/api/xrt_device.cpp b/src/runtime_src/core/common/api/xrt_device.cpp index 1585d4e1729..74167ec63f8 100644 --- a/src/runtime_src/core/common/api/xrt_device.cpp +++ b/src/runtime_src/core/common/api/xrt_device.cpp @@ -58,7 +58,7 @@ alloc_device_index(unsigned int index) XRT_TRACE_POINT_SCOPE(xrt_device_alloc_index); auto dev = xrt_core::get_userpf_device(index); - dev->get_usage_logger()->log_device_info(dev); + dev->get_usage_logger()->log_device_info(dev.get()); return dev; } diff --git a/src/runtime_src/core/common/api/xrt_hw_context.cpp b/src/runtime_src/core/common/api/xrt_hw_context.cpp index bbb0ac3c4c5..88758a8a014 100644 --- a/src/runtime_src/core/common/api/xrt_hw_context.cpp +++ b/src/runtime_src/core/common/api/xrt_hw_context.cpp @@ -118,10 +118,10 @@ class hw_context_impl : public std::enable_shared_from_this return m_hdl.get(); } - std::shared_ptr + xrt_core::usage_metrics::base_logger* get_usage_logger() { - return m_usage_logger; + return m_usage_logger.get(); } }; diff --git a/src/runtime_src/core/common/api/xrt_kernel.cpp b/src/runtime_src/core/common/api/xrt_kernel.cpp index ea4cc262eec..2b79e46818c 100644 --- a/src/runtime_src/core/common/api/xrt_kernel.cpp +++ b/src/runtime_src/core/common/api/xrt_kernel.cpp @@ -1557,7 +1557,7 @@ class kernel_impl : public std::enable_shared_from_this // amend args with computed data based on kernel protocol amend_args(); - m_usage_logger->log_kernel_info(device->core_device, hwctx, name, args.size()); + m_usage_logger->log_kernel_info(device->core_device.get(), hwctx, name, args.size()); } // Delegating constructor with no module @@ -2347,7 +2347,8 @@ class run_impl // This is in critical path, we need to reduce log overhead // as much as possible, passing kernel impl pointer instead of // constructing args in place - m_usage_logger->log_kernel_run_info(kernel.get(), this, true, ERT_CMD_STATE_NEW); + // sending state as ERT_CMD_STATE_NEW for kernel start + m_usage_logger->log_kernel_run_info(kernel.get(), this, ERT_CMD_STATE_NEW); cmd->run(); } @@ -2440,7 +2441,7 @@ class run_impl state = cmd->wait(); } - m_usage_logger->log_kernel_run_info(kernel.get(), this, false, state); + m_usage_logger->log_kernel_run_info(kernel.get(), this, state); return state; } @@ -2466,7 +2467,7 @@ class run_impl } if (state == ERT_CMD_STATE_COMPLETED) { - m_usage_logger->log_kernel_run_info(kernel.get(), this, false, state); + m_usage_logger->log_kernel_run_info(kernel.get(), this, state); return std::cv_status::no_timeout; } diff --git a/src/runtime_src/core/common/device.h b/src/runtime_src/core/common/device.h index ae08fd5cb7d..ee197b04ad1 100644 --- a/src/runtime_src/core/common/device.h +++ b/src/runtime_src/core/common/device.h @@ -467,10 +467,10 @@ class device : public ishim /** * get_usage_logger() - get usage metrics logger */ - std::shared_ptr + usage_metrics::base_logger* get_usage_logger() { - return m_usage_logger; + return m_usage_logger.get(); } private: diff --git a/src/runtime_src/core/common/usage_metrics.cpp b/src/runtime_src/core/common/usage_metrics.cpp index e269a25e936..f1977d05250 100644 --- a/src/runtime_src/core/common/usage_metrics.cpp +++ b/src/runtime_src/core/common/usage_metrics.cpp @@ -13,6 +13,7 @@ #include "core/common/query_requests.h" #include "core/common/shim/buffer_handle.h" #include "core/common/shim/hwctx_handle.h" +#include "core/include/xrt/xrt_uuid.h" #include #include @@ -22,6 +23,7 @@ #include #include #include +#include #ifdef _WIN32 # pragma warning ( disable : 4996 ) @@ -31,11 +33,26 @@ namespace bpt = boost::property_tree; namespace { // global variables std::mutex m; -static uint32_t thread_count; +uint32_t thread_count; } // namespace namespace xrt_core::usage_metrics { +template +static met_type* +get_metrics(std::vector& metrics_vec, const find_type& finder) +{ + auto it = std::find_if(metrics_vec.begin(), metrics_vec.end(), + [finder](met_type& met) + { + return met.handle == finder; + } + ); + return it == metrics_vec.end() + ? nullptr + : &(*it); +} + struct bo_metrics { uint32_t total_count = 0; @@ -54,12 +71,34 @@ struct kernel_metrics bool is_valid = false; }; - std::string name; + std::string handle; // kernel name is used as handle for identifying kernel std::vector cu_index_vec; uint32_t total_runs = 0; std::chrono::microseconds total_time = {}; std::unordered_map exec_times; // run handle ptr is used for indexing size_t num_args; + + void + log_kernel_exec_time(const xrt::run_impl* run_hdl, const tp& tp_now, ert_cmd_state state) + { + // state ERT_CMD_STATE_NEW indicates kernel start is called + if (state == ERT_CMD_STATE_NEW) { + // record start everytime because previous run may be finished, timeout, aborted or stopped + exec_times[run_hdl].start_time = tp_now; + exec_times[run_hdl].is_valid = true; + } + else { + // make start time invalid so we can record for next run, add duration to total time and increment total runs + if (exec_times[run_hdl].is_valid && state == ERT_CMD_STATE_COMPLETED) { + // valid run increament run + total_runs++; + total_time += std::chrono::duration_cast(tp_now - exec_times[run_hdl].start_time); + + // invalidate start time, run may be finished, aborted or timed out + exec_times[run_hdl].is_valid = false; + } + } + } }; struct hw_ctx_metrics @@ -68,6 +107,15 @@ struct hw_ctx_metrics xrt::uuid xclbin_uuid; struct bo_metrics bos_met; std::vector kernel_metrics_vec; + + void + log_kernel(const std::string& name, size_t args) + { + struct kernel_metrics k; + k.handle = name; + k.num_args = args; + kernel_metrics_vec.emplace_back(k); + } }; struct device_metrics @@ -77,9 +125,16 @@ struct device_metrics uint32_t bo_active_count = 0; uint32_t bo_peak_count = 0; std::vector hw_ctx_vec; + + void + log_hw_ctx(const xrt_core::hwctx_handle* handle, const xrt::uuid& uuid) + { + hw_ctx_vec.emplace_back(hw_ctx_metrics{handle, uuid, {}, {}}); + } }; -using metrics_map = std::map>; +using metrics_map = std::map>; +using device_metrics_map = std::map; // Global map to store usage metrics of all threads std::shared_ptr usage_metrics_map = std::make_shared(); @@ -97,28 +152,28 @@ class usage_metrics_logger : public base_logger ~usage_metrics_logger(); void - log_device_info(std::shared_ptr) override; + log_device_info(const xrt_core::device*) override; void log_hw_ctx_info(const xrt::hw_context_impl*) override; void - log_buffer_info_construct(unsigned int, size_t, const xrt_core::hwctx_handle*) override; + log_buffer_info_construct(device_id, size_t, const xrt_core::hwctx_handle*) override; void - log_buffer_info_destruct(unsigned int) override; + log_buffer_info_destruct(device_id) override; virtual void - log_buffer_sync(unsigned int, const xrt_core::hwctx_handle*, size_t, xclBOSyncDirection) override; + log_buffer_sync(device_id, const xrt_core::hwctx_handle*, size_t, xclBOSyncDirection) override; void - log_kernel_info(std::shared_ptr, const xrt::hw_context&, const std::string&, size_t) override; + log_kernel_info(const xrt_core::device*, const xrt::hw_context&, const std::string&, size_t) override; void - log_kernel_run_info(const xrt::kernel_impl*, const xrt::run_impl*, bool, ert_cmd_state) override; + log_kernel_run_info(const xrt::kernel_impl*, const xrt::run_impl*, ert_cmd_state) override; private: - std::map m_dev_map; + device_metrics_map m_dev_map; std::shared_ptr map_ptr; }; @@ -168,10 +223,10 @@ get_kernels_ptree(const std::vector& kernels_vec) { bpt::ptree kernel_array; - for (auto &kernel : kernels_vec) { + for (auto& kernel : kernels_vec) { bpt::ptree kernel_tree; - kernel_tree.put("name", kernel.name); + kernel_tree.put("name", kernel.handle); kernel_tree.put("num_of_args", kernel.num_args); kernel_tree.put("num_total_runs", std::to_string(kernel.total_runs)); @@ -191,7 +246,7 @@ get_hw_ctx_ptree(const std::vector& hw_ctx_vec) bpt::ptree hw_ctx_array; uint32_t ctx_count = 0; - for (const auto &ctx : hw_ctx_vec) { + for (const auto& ctx : hw_ctx_vec) { bpt::ptree hw_ctx; hw_ctx.put("id", std::to_string(ctx_count)); hw_ctx.put("xclbin_uuid", ctx.xclbin_uuid.to_string()); @@ -216,20 +271,20 @@ print_usage_metrics() uint32_t t_count = 0; // iterate over all threads - for (const auto& thread_it : *usage_metrics_map) { + for (const auto& [thread_id, dev_metrics_map] : *usage_metrics_map) { bpt::ptree dev_array; // iterate over all devices - for (auto dev_it : thread_it.second) { + for (const auto& [dev_id, dev_metrics] : dev_metrics_map) { bpt::ptree dev; - dev.put("device_index", std::to_string(dev_it.first)); - dev.put("bdf", dev_it.second.bdf); - dev.put("bos_peak_count", std::to_string(dev_it.second.bo_peak_count)); + dev.put("device_index", std::to_string(dev_id)); + dev.put("bdf", dev_metrics.bdf); + dev.put("bos_peak_count", std::to_string(dev_metrics.bo_peak_count)); // add global bos - dev.add_child("global_bos", get_bos_ptree(dev_it.second.global_bos_met)); + dev.add_child("global_bos", get_bos_ptree(dev_metrics.global_bos_met)); // add hw ctx info - dev.add_child("hw_context", get_hw_ctx_ptree(dev_it.second.hw_ctx_vec)); + dev.add_child("hw_context", get_hw_ctx_ptree(dev_metrics.hw_ctx_vec)); dev_array.push_back(std::make_pair("device", dev)); } @@ -283,12 +338,22 @@ usage_metrics_logger:: } } +static device_metrics* +get_device_metrics(device_metrics_map& dev_map, device_id dev_id) +{ + auto map_it = dev_map.find(dev_id); + return map_it == dev_map.end() + ? nullptr + : &(*map_it).second; +} + void usage_metrics_logger:: -log_device_info(std::shared_ptr dev) +log_device_info(const xrt_core::device* dev) { auto dev_id = dev->get_device_id(); - if (m_dev_map.find(dev_id) == m_dev_map.end()) { + if (!get_device_metrics(m_dev_map, dev_id)) { + // initialize map with this device index m_dev_map[dev_id] = {}; try { auto bdf = xrt_core::query::pcie_bdf::to_string(xrt_core::device_query(dev)); @@ -310,53 +375,60 @@ log_hw_ctx_info(const xrt::hw_context_impl* hwctx_impl) auto dev_id = xrt_core::hw_context_int::get_core_device(hw_ctx)->get_device_id(); auto uuid = hw_ctx.get_xclbin_uuid(); - // this condition is not required as device will be present - if (auto map_it = m_dev_map.find(dev_id); map_it != m_dev_map.end()) { - if (auto ctx_it = std::find_if( - map_it->second.hw_ctx_vec.begin(), map_it->second.hw_ctx_vec.end(), - [hwctx_handle](hw_ctx_metrics& ctx) {return ctx.handle == hwctx_handle;}); ctx_it == map_it->second.hw_ctx_vec.end()) { - map_it->second.hw_ctx_vec.emplace_back(hw_ctx_metrics{hwctx_handle, uuid, {}, {}}); - } - } + // dont log if device didn't match + auto dev_metrics = get_device_metrics(m_dev_map, dev_id); + if (!dev_metrics) + return; + + // log if this entry is not logged before + if (!get_metrics(dev_metrics->hw_ctx_vec, hwctx_handle)) + dev_metrics->log_hw_ctx(hwctx_handle, uuid); } catch(...) { // dont log anything } } +static bo_metrics* +get_buffer_metrics(device_metrics* dev_metrics, const xrt_core::hwctx_handle* handle) +{ + if (!handle) { + // global bo + return &dev_metrics->global_bos_met; + } + else { + auto hw_ctx_met = get_metrics(dev_metrics->hw_ctx_vec, handle); + if (hw_ctx_met != nullptr) + return &hw_ctx_met->bos_met; + } + return nullptr; +} + void usage_metrics_logger:: -log_buffer_info_construct(unsigned int dev_id, size_t sz, const xrt_core::hwctx_handle* handle) +log_buffer_info_construct(device_id dev_id, size_t sz, const xrt_core::hwctx_handle* handle) { - struct bo_metrics* bo_met = nullptr; - - if (auto map_it = m_dev_map.find(dev_id); map_it != m_dev_map.end()) { - if (!handle) { - // global bo - bo_met = &map_it->second.global_bos_met; - } - - if (auto ctx_it = std::find_if( - map_it->second.hw_ctx_vec.begin(), map_it->second.hw_ctx_vec.end(), - [handle](hw_ctx_metrics& ctx) {return ctx.handle == handle;}); ctx_it != map_it->second.hw_ctx_vec.end()) { - bo_met = &ctx_it->bos_met; - } - - if (bo_met) { - bo_met->total_count++; - bo_met->total_size_in_bytes += sz; - bo_met->peak_size_in_bytes = std::max(bo_met->peak_size_in_bytes, sz); - // increase active count in case of global or ctx bound bo - map_it->second.bo_active_count++; - map_it->second.bo_peak_count = - std::max(map_it->second.bo_peak_count, map_it->second.bo_active_count); - } - } + auto dev_metrics = get_device_metrics(m_dev_map, dev_id); + if (!dev_metrics) + return; + + struct bo_metrics* bo_met = get_buffer_metrics(dev_metrics, handle); + // don't log if bo not found + if (!bo_met) + return; + + bo_met->total_count++; + bo_met->total_size_in_bytes += sz; + bo_met->peak_size_in_bytes = std::max(bo_met->peak_size_in_bytes, sz); + // increase active count in case of global or ctx bound bo + dev_metrics->bo_active_count++; + dev_metrics->bo_peak_count = + std::max(dev_metrics->bo_peak_count, dev_metrics->bo_active_count); } void usage_metrics_logger:: -log_buffer_info_destruct(unsigned int) +log_buffer_info_destruct(device_id) { // TODO : // This call is needed to decrement bo active count @@ -365,58 +437,48 @@ log_buffer_info_destruct(unsigned int) void usage_metrics_logger:: -log_buffer_sync(unsigned int dev_id, const xrt_core::hwctx_handle* handle, size_t sz, xclBOSyncDirection dir) +log_buffer_sync(device_id dev_id, const xrt_core::hwctx_handle* handle, size_t sz, xclBOSyncDirection dir) { - struct bo_metrics* bo_met = nullptr; - - if (auto map_it = m_dev_map.find(dev_id); map_it != m_dev_map.end()) { - if (!handle) { - // global bo - bo_met = &map_it->second.global_bos_met; - } - - if (auto ctx_it = std::find_if( - map_it->second.hw_ctx_vec.begin(), map_it->second.hw_ctx_vec.end(), - [handle](hw_ctx_metrics& ctx) {return ctx.handle == handle;}); ctx_it != map_it->second.hw_ctx_vec.end()) { - bo_met = &ctx_it->bos_met; - } - - if (bo_met) { - if (dir == XCL_BO_SYNC_BO_TO_DEVICE) - bo_met->bytes_synced_to_device += sz; - else - bo_met->bytes_synced_from_device += sz; - } - } + auto dev_metrics = get_device_metrics(m_dev_map, dev_id); + if (!dev_metrics) + return; + + struct bo_metrics* bo_met = get_buffer_metrics(dev_metrics, handle); + // don't log if bo not found + if (!bo_met) + return; + + if (dir == XCL_BO_SYNC_BO_TO_DEVICE) + bo_met->bytes_synced_to_device += sz; + else + bo_met->bytes_synced_from_device += sz; } void usage_metrics_logger:: -log_kernel_info(std::shared_ptr dev, const xrt::hw_context& ctx, const std::string& name, size_t args) +log_kernel_info(const xrt_core::device* dev, const xrt::hw_context& ctx, const std::string& name, size_t args) { auto dev_id = dev->get_device_id(); auto hwctx_handle = static_cast(ctx); - // this condition is not required as device will be present - if (auto map_it = m_dev_map.find(dev_id); map_it != m_dev_map.end()) { - if (auto ctx_it = std::find_if( - map_it->second.hw_ctx_vec.begin(), map_it->second.hw_ctx_vec.end(), - [hwctx_handle](hw_ctx_metrics& ctx) {return ctx.handle == hwctx_handle;}); ctx_it != map_it->second.hw_ctx_vec.end()) { - if (auto kernel_it = std::find_if( - ctx_it->kernel_metrics_vec.begin(), ctx_it->kernel_metrics_vec.end(), - [name](kernel_metrics& kernel) {return kernel.name == name;}); kernel_it == ctx_it->kernel_metrics_vec.end()) { - struct kernel_metrics k; - k.name = name; - k.num_args = args; - ctx_it->kernel_metrics_vec.emplace_back(k); - } - } + auto dev_metrics = get_device_metrics(m_dev_map, dev_id); + if (!dev_metrics) + return; + + auto hw_ctx_met = get_metrics(dev_metrics->hw_ctx_vec, hwctx_handle); + // dont log if hw ctx didn't match existing ones + if (!hw_ctx_met) + return; + + // log if this entry is not logged before + if (!get_metrics(hw_ctx_met->kernel_metrics_vec, name)) { + hw_ctx_met->log_kernel(name, args); } } void usage_metrics_logger:: -log_kernel_run_info(const xrt::kernel_impl* krnl_impl, const xrt::run_impl* run_hdl, bool start, ert_cmd_state state) +log_kernel_run_info(const xrt::kernel_impl* krnl_impl, const xrt::run_impl* run_hdl, ert_cmd_state state) { // collecting time at start of call as next calls will be overhead auto ts_now = std::chrono::high_resolution_clock::now(); @@ -430,34 +492,20 @@ log_kernel_run_info(const xrt::kernel_impl* krnl_impl, const xrt::run_impl* run_ auto dev_id = xrt_core::hw_context_int::get_core_device(hw_ctx)->get_device_id(); auto name = kernel.get_name(); - // this condition is not required as device will be present - if (auto map_it = m_dev_map.find(dev_id); map_it != m_dev_map.end()) { - if (auto ctx_it = std::find_if( - map_it->second.hw_ctx_vec.begin(), map_it->second.hw_ctx_vec.end(), - [hwctx_handle](hw_ctx_metrics& ctx) {return ctx.handle == hwctx_handle;}); ctx_it != map_it->second.hw_ctx_vec.end()) { - if (auto kernel_it = std::find_if( - ctx_it->kernel_metrics_vec.begin(), ctx_it->kernel_metrics_vec.end(), - [name](kernel_metrics& kernel) {return kernel.name == name;}); kernel_it != ctx_it->kernel_metrics_vec.end()) { - if (start) { - // record start everytime because previous run may be finished, timeout, aborted or stopped - kernel_it->exec_times[run_hdl].start_time = ts_now; - kernel_it->exec_times[run_hdl].is_valid = true; - } - else { - // make start time invalid so we can record for next run, add duration to total time and increment total runs - if (kernel_it->exec_times[run_hdl].is_valid) { - if (state == ERT_CMD_STATE_COMPLETED) { - // valid run increament run - kernel_it->total_runs++; - kernel_it->total_time += std::chrono::duration_cast(ts_now - kernel_it->exec_times[run_hdl].start_time); - } - // invalidate start time, run may be finished, aborted or timed out - kernel_it->exec_times[run_hdl].is_valid = false; - } - } - } - } - } + auto dev_metrics = get_device_metrics(m_dev_map, dev_id); + if (!dev_metrics) + return; + + auto hw_ctx_met = get_metrics(dev_metrics->hw_ctx_vec, hwctx_handle); + // dont log if hw ctx didn't match existing ones + if (!hw_ctx_met) + return; + + auto kernel_met = get_metrics(hw_ctx_met->kernel_metrics_vec, name); + if (!kernel_met) + return; + + kernel_met->log_kernel_exec_time(run_hdl, ts_now, state); } catch(...) { // dont log anything diff --git a/src/runtime_src/core/common/usage_metrics.h b/src/runtime_src/core/common/usage_metrics.h index 2c91332879c..2684d059fe4 100644 --- a/src/runtime_src/core/common/usage_metrics.h +++ b/src/runtime_src/core/common/usage_metrics.h @@ -3,20 +3,13 @@ #ifndef XRT_CORE_USAGE_METRICS_H #define XRT_CORE_USAGE_METRICS_H -#include -#include #include #include #include -#include -#include -#include -#include "core/include/ert.h" #include "core/include/xrt.h" #include "core/include/xrt/xrt_hw_context.h" #include "core/include/xrt/xrt_kernel.h" -#include "core/include/xrt/xrt_uuid.h" // forward declaration of xrt_core::device class namespace xrt_core { @@ -38,6 +31,8 @@ class device; //////////////////////////////////////////////////////////////// namespace xrt_core::usage_metrics { +using device_id = unsigned int; + // class base_logger - class with no op calls // // when user doesn't set ini option logging should be no op @@ -45,25 +40,25 @@ class base_logger { public: virtual void - log_device_info(std::shared_ptr) {} + log_device_info(const xrt_core::device*) {} virtual void log_hw_ctx_info(const xrt::hw_context_impl*) {} virtual void - log_buffer_info_construct(unsigned int, size_t, const xrt_core::hwctx_handle*) {} + log_buffer_info_construct(device_id, size_t, const xrt_core::hwctx_handle*) {} virtual void - log_buffer_info_destruct(unsigned int) {} + log_buffer_info_destruct(device_id) {} virtual void - log_buffer_sync(unsigned int, const xrt_core::hwctx_handle*, size_t, xclBOSyncDirection) {} + log_buffer_sync(device_id, const xrt_core::hwctx_handle*, size_t, xclBOSyncDirection) {} virtual void - log_kernel_info(std::shared_ptr, const xrt::hw_context&, const std::string&, size_t) {} + log_kernel_info(const xrt_core::device*, const xrt::hw_context&, const std::string&, size_t) {} virtual void - log_kernel_run_info(const xrt::kernel_impl*, const xrt::run_impl*, bool, ert_cmd_state) {} + log_kernel_run_info(const xrt::kernel_impl*, const xrt::run_impl*, ert_cmd_state) {} }; // get_usage_metrics_logger() - Return logger object for current thread