Skip to content

Commit fc6773a

Browse files
authored
Add Tracelogging for profiling (microsoft#1639)
Enabled only if onnxruntime_ENABLE_INSTRUMENT is ON
1 parent 0c6e9f9 commit fc6773a

18 files changed

+1228
-72
lines changed

cmake/CMakeLists.txt

+10
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ option(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS "Enable operator implemented in l
8383
option(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS "Dump node input shapes and output data to standard output when executing the model." OFF)
8484
option(onnxruntime_USE_DML "Build with DirectML support" OFF)
8585
option(onnxruntime_USE_ACL "Build with ACL support" OFF)
86+
option(onnxruntime_ENABLE_INSTRUMENT "Enable Instrument with Event Tracing for Windows (ETW)" OFF)
8687

8788
set(protobuf_BUILD_TESTS OFF CACHE BOOL "Build protobuf tests" FORCE)
8889
#nsync tests failed on Mac Build
@@ -91,6 +92,15 @@ set(ONNX_ML 1)
9192
if(NOT onnxruntime_ENABLE_PYTHON)
9293
set(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS OFF)
9394
endif()
95+
96+
if(NOT WIN32)
97+
#TODO: On Linux we may try https://github.com/microsoft/TraceLogging
98+
if(onnxruntime_ENABLE_INSTRUMENT)
99+
message(WARNING "Instrument is only supported on Windows now")
100+
set(onnxruntime_ENABLE_INSTRUMENT OFF)
101+
endif()
102+
endif()
103+
94104
if(onnxruntime_USE_OPENMP)
95105
find_package(OpenMP)
96106
if (OPENMP_FOUND)

cmake/onnxruntime_framework.cmake

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ file(GLOB_RECURSE onnxruntime_framework_srcs CONFIGURE_DEPENDS
1010
source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_framework_srcs})
1111

1212
add_library(onnxruntime_framework ${onnxruntime_framework_srcs})
13-
13+
if(onnxruntime_ENABLE_INSTRUMENT)
14+
target_compile_definitions(onnxruntime_framework PRIVATE ONNXRUNTIME_ENABLE_INSTRUMENT)
15+
endif()
1416
target_include_directories(onnxruntime_framework PRIVATE ${ONNXRUNTIME_ROOT} PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
1517
onnxruntime_add_include_to_target(onnxruntime_framework onnxruntime_common onnx onnx_proto protobuf::libprotobuf)
1618
set_target_properties(onnxruntime_framework PROPERTIES FOLDER "ONNXRuntime")

cmake/onnxruntime_session.cmake

+3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_session_srcs})
1212
add_library(onnxruntime_session ${onnxruntime_session_srcs})
1313
install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/session DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
1414
onnxruntime_add_include_to_target(onnxruntime_session onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf)
15+
if(onnxruntime_ENABLE_INSTRUMENT)
16+
target_compile_definitions(onnxruntime_session PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT)
17+
endif()
1518
target_include_directories(onnxruntime_session PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS})
1619
add_dependencies(onnxruntime_session ${onnxruntime_EXTERNAL_DEPENDENCIES})
1720
set_target_properties(onnxruntime_session PROPERTIES FOLDER "ONNXRuntime")

cmake/onnxruntime_unittests.cmake

+11
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,17 @@ if (onnxruntime_BUILD_SERVER)
776776

777777
endif()
778778

779+
#some ETW tools
780+
if(WIN32 AND onnxruntime_ENABLE_INSTRUMENT)
781+
add_executable(generate_perf_report_from_etl ${ONNXRUNTIME_ROOT}/tool/etw/main.cc ${ONNXRUNTIME_ROOT}/tool/etw/eparser.h ${ONNXRUNTIME_ROOT}/tool/etw/eparser.cc ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.h ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.cc)
782+
target_compile_definitions(generate_perf_report_from_etl PRIVATE "_CONSOLE" "_UNICODE" "UNICODE")
783+
target_link_libraries(generate_perf_report_from_etl PRIVATE tdh Advapi32)
784+
785+
add_executable(compare_two_sessions ${ONNXRUNTIME_ROOT}/tool/etw/compare_two_sessions.cc ${ONNXRUNTIME_ROOT}/tool/etw/eparser.h ${ONNXRUNTIME_ROOT}/tool/etw/eparser.cc ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.h ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.cc)
786+
target_compile_definitions(compare_two_sessions PRIVATE "_CONSOLE" "_UNICODE" "UNICODE")
787+
target_link_libraries(compare_two_sessions PRIVATE ${GETOPT_LIB_WIDE} tdh Advapi32)
788+
endif()
789+
779790
add_executable(onnxruntime_mlas_test ${TEST_SRC_DIR}/mlas/unittest.cpp)
780791
target_include_directories(onnxruntime_mlas_test PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${ONNXRUNTIME_ROOT})
781792
set(onnxruntime_mlas_test_libs onnxruntime_mlas onnxruntime_common)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
#pragma once
5+
6+
#include <windows.h>
7+
#include <TraceLoggingProvider.h>
8+
9+
TRACELOGGING_DECLARE_PROVIDER(telemetry_provider_handle);

onnxruntime/core/framework/sequential_executor.cc

+33-3
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,22 @@
2525
using namespace Concurrency;
2626
#endif
2727

28+
#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT
29+
#include <Windows.h>
30+
#include "core/platform/tracing.h"
31+
namespace {
32+
LARGE_INTEGER OrtGetPerformanceFrequency() {
33+
LARGE_INTEGER v;
34+
// On systems that run Windows XP or later, the QueryPerformanceFrequency function will always succeed
35+
// and will thus never return zero.
36+
(void)QueryPerformanceFrequency(&v);
37+
return v;
38+
}
39+
40+
LARGE_INTEGER perf_freq = OrtGetPerformanceFrequency();
41+
} // namespace
42+
#endif
43+
2844
namespace onnxruntime {
2945

3046
static Status ReleaseNodeMLValues(ExecutionFrame& frame,
@@ -87,7 +103,10 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
87103
if (p_op_kernel == nullptr)
88104
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Got nullptr from GetKernel for node: ",
89105
node.Name());
90-
106+
#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT
107+
LARGE_INTEGER kernel_start;
108+
QueryPerformanceCounter(&kernel_start);
109+
#endif
91110
// construct OpKernelContext
92111
// TODO: log kernel inputs?
93112
OpKernelContextInternal op_kernel_context(session_state, frame, *p_op_kernel, logger, terminate_flag_);
@@ -128,7 +147,6 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
128147
}
129148
}
130149
}
131-
132150
#if defined DEBUG_NODE_INPUTS_OUTPUTS
133151
utils::DumpNodeInputs(op_kernel_context, p_op_kernel->Node());
134152
#endif
@@ -202,7 +220,19 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
202220
}
203221
}
204222
}
205-
223+
#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT
224+
LARGE_INTEGER kernel_stop;
225+
QueryPerformanceCounter(&kernel_stop);
226+
LARGE_INTEGER elapsed;
227+
elapsed.QuadPart = kernel_stop.QuadPart - kernel_start.QuadPart;
228+
elapsed.QuadPart *= 1000000;
229+
elapsed.QuadPart /= perf_freq.QuadPart;
230+
// Log an event
231+
TraceLoggingWrite(telemetry_provider_handle, // handle to my provider
232+
"OpEnd", // Event Name that should uniquely identify your event.
233+
TraceLoggingValue(p_op_kernel->KernelDef().OpName().c_str(), "op_name"),
234+
TraceLoggingValue(elapsed.QuadPart, "time"));
235+
#endif
206236
if (is_profiler_enabled) {
207237
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
208238
p_op_kernel->Node().Name() + "_fence_after",

onnxruntime/core/framework/session_state.cc

+4-1
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,10 @@ void SessionState::AddSubgraphSessionState(onnxruntime::NodeIndex index, const s
271271
ORT_ENFORCE(existing_entries.find(attribute_name) == existing_entries.cend(), "Entry exists in node ", index,
272272
" for attribute ", attribute_name);
273273
}
274-
274+
#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT
275+
session_state->parent_ = this;
276+
GenerateGraphId();
277+
#endif
275278
subgraph_session_states_[index].insert(std::make_pair(attribute_name, std::move(session_state)));
276279
}
277280

onnxruntime/core/framework/session_state.h

+13
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,19 @@ class SessionState {
268268

269269
std::unique_ptr<NodeIndexInfo> node_index_info_;
270270
std::multimap<int, std::unique_ptr<FeedsFetchesManager>> cached_feeds_fetches_managers_;
271+
#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT
272+
SessionState* parent_ = nullptr;
273+
//Assign each graph in each session an unique id.
274+
int graph_id_ = 0;
275+
int next_graph_id_ = 1;
276+
277+
void GenerateGraphId() {
278+
SessionState* p = this;
279+
while (p->parent_ != nullptr) p = p->parent_;
280+
graph_id_ = p->next_graph_id_ ++;
281+
}
282+
283+
#endif
271284
};
272285

273286
} // namespace onnxruntime

onnxruntime/core/session/environment.cc

+4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919

2020
#include "core/platform/env.h"
2121

22+
#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT
23+
#include "core/platform/tracing.h"
24+
#endif
25+
2226
namespace onnxruntime {
2327
using namespace ::onnxruntime::common;
2428
using namespace ONNX_NAMESPACE;

0 commit comments

Comments
 (0)