|
25 | 25 | using namespace Concurrency;
|
26 | 26 | #endif
|
27 | 27 |
|
| 28 | +#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT |
| 29 | +#include <Windows.h> |
| 30 | +#include "core/platform/tracing.h" |
| 31 | +namespace { |
| 32 | +LARGE_INTEGER OrtGetPerformanceFrequency() { |
| 33 | + LARGE_INTEGER v; |
| 34 | + // On systems that run Windows XP or later, the QueryPerformanceFrequency function will always succeed |
| 35 | + // and will thus never return zero. |
| 36 | + (void)QueryPerformanceFrequency(&v); |
| 37 | + return v; |
| 38 | +} |
| 39 | + |
| 40 | +LARGE_INTEGER perf_freq = OrtGetPerformanceFrequency(); |
| 41 | +} // namespace |
| 42 | +#endif |
| 43 | + |
28 | 44 | namespace onnxruntime {
|
29 | 45 |
|
30 | 46 | static Status ReleaseNodeMLValues(ExecutionFrame& frame,
|
@@ -87,7 +103,10 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
|
87 | 103 | if (p_op_kernel == nullptr)
|
88 | 104 | return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Got nullptr from GetKernel for node: ",
|
89 | 105 | node.Name());
|
90 |
| - |
| 106 | +#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT |
| 107 | + LARGE_INTEGER kernel_start; |
| 108 | + QueryPerformanceCounter(&kernel_start); |
| 109 | +#endif |
91 | 110 | // construct OpKernelContext
|
92 | 111 | // TODO: log kernel inputs?
|
93 | 112 | OpKernelContextInternal op_kernel_context(session_state, frame, *p_op_kernel, logger, terminate_flag_);
|
@@ -128,7 +147,6 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
|
128 | 147 | }
|
129 | 148 | }
|
130 | 149 | }
|
131 |
| - |
132 | 150 | #if defined DEBUG_NODE_INPUTS_OUTPUTS
|
133 | 151 | utils::DumpNodeInputs(op_kernel_context, p_op_kernel->Node());
|
134 | 152 | #endif
|
@@ -202,7 +220,19 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
|
202 | 220 | }
|
203 | 221 | }
|
204 | 222 | }
|
205 |
| - |
| 223 | +#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT |
| 224 | + LARGE_INTEGER kernel_stop; |
| 225 | + QueryPerformanceCounter(&kernel_stop); |
| 226 | + LARGE_INTEGER elapsed; |
| 227 | + elapsed.QuadPart = kernel_stop.QuadPart - kernel_start.QuadPart; |
| 228 | + elapsed.QuadPart *= 1000000; |
| 229 | + elapsed.QuadPart /= perf_freq.QuadPart; |
| 230 | + // Log an event |
| 231 | + TraceLoggingWrite(telemetry_provider_handle, // handle to my provider |
| 232 | + "OpEnd", // Event Name that should uniquely identify your event. |
| 233 | + TraceLoggingValue(p_op_kernel->KernelDef().OpName().c_str(), "op_name"), |
| 234 | + TraceLoggingValue(elapsed.QuadPart, "time")); |
| 235 | +#endif |
206 | 236 | if (is_profiler_enabled) {
|
207 | 237 | session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
|
208 | 238 | p_op_kernel->Node().Name() + "_fence_after",
|
|
0 commit comments