@@ -53,32 +53,43 @@ class PapiProfiler : public tflite::Profiler {
53
53
inf_thread_ids_ =
54
54
std::vector<pid_t >(current_threads.begin () + 3 , current_threads.end ());
55
55
56
- // Handle core specific events per inference thread
56
+ papi_regions_.reserve (1000 );
57
+ timings_.reserve (1000 );
58
+
57
59
int retval;
58
- for (uint64_t i = 0 ; i < inf_thread_ids_.size (); ++i) {
59
- event_sets_.push_back (PAPI_NULL);
60
- retval = PAPI_create_eventset (&event_sets_.back ());
61
- if (retval != PAPI_OK) {
62
- handle_error (retval, __LINE__, __FILE__);
63
- }
64
- for (auto & event_name : papi_events_) {
65
- retval = PAPI_add_named_event (event_sets_.back (), event_name.c_str ());
60
+
61
+ // Handle core specific events per inference thread
62
+ if (!papi_events_.empty ()) {
63
+ for (uint64_t i = 0 ; i < inf_thread_ids_.size (); ++i) {
64
+ event_sets_.push_back (PAPI_NULL);
65
+ retval = PAPI_create_eventset (&event_sets_.back ());
66
+ if (retval != PAPI_OK) {
67
+ handle_error (retval, __LINE__, __FILE__);
68
+ }
69
+ for (auto & event_name : papi_events_) {
70
+ retval = PAPI_add_named_event (event_sets_.back (), event_name.c_str ());
71
+ if (retval != PAPI_OK)
72
+ handle_error (retval, __LINE__, __FILE__);
73
+ }
74
+
75
+ // Attach event to thread
76
+ LOG_MESSAGE (
77
+ TRITONSERVER_LOG_INFO,
78
+ (" Attaching to " + std::to_string (inf_thread_ids_[i])).c_str ());
79
+ retval = PAPI_attach (event_sets_.back (), inf_thread_ids_[i]);
80
+ if (retval != PAPI_OK)
81
+ handle_error (retval, __LINE__, __FILE__);
82
+
83
+ // Start eventset
84
+ retval = PAPI_start (event_sets_.back ());
66
85
if (retval != PAPI_OK)
67
86
handle_error (retval, __LINE__, __FILE__);
68
87
}
88
+ event_values_.resize (papi_events_.size ());
69
89
70
- // Attach event to thread
71
- LOG_MESSAGE (
72
- TRITONSERVER_LOG_INFO,
73
- (" Attaching to " + std::to_string (inf_thread_ids_[i])).c_str ());
74
- retval = PAPI_attach (event_sets_.back (), inf_thread_ids_[i]);
75
- if (retval != PAPI_OK)
76
- handle_error (retval, __LINE__, __FILE__);
90
+ // Separately we will also track operation timings in nanos
91
+ papi_events_.push_back (" TIME_NS" );
77
92
}
78
- event_values_.resize (papi_events_.size ());
79
-
80
- // Separately we will also track operation timings in nanos
81
- papi_events_.push_back (" TIME_NS" );
82
93
83
94
// Handle uncore events separately
84
95
if (!papi_uncore_events_.empty ()) {
@@ -92,6 +103,10 @@ class PapiProfiler : public tflite::Profiler {
92
103
handle_error (retval, __LINE__, __FILE__);
93
104
}
94
105
uncore_event_values_.resize (papi_uncore_events_.size ());
106
+ // Start uncore eventset
107
+ retval = PAPI_start (uncore_event_set_);
108
+ if (retval != PAPI_OK)
109
+ handle_error (retval, __LINE__, __FILE__);
95
110
}
96
111
}
97
112
@@ -119,6 +134,8 @@ class PapiProfiler : public tflite::Profiler {
119
134
<< papi_events_[i % papi_events_.size ()] << " ,"
120
135
<< event.second [i] << " \n " ;
121
136
}
137
+ }
138
+ for (auto & event : results_uncore_) {
122
139
// Now write the uncore events with a dummy thread id of -1
123
140
for (uint64_t i = 0 ; i < results_uncore_[event.first ].size (); ++i) {
124
141
myfile << event.first << " ," << -1 << " ,"
@@ -152,17 +169,9 @@ class PapiProfiler : public tflite::Profiler {
152
169
trace_event_tag += (" _" + std::to_string (event_metadata1));
153
170
154
171
int retval;
155
- // For the event set attached to each thread, start or restart the event set
156
- for (uint64_t i = 0 ; i < event_sets_.size (); ++i) {
157
- int state;
158
- PAPI_state (event_sets_[i], &state);
159
- if (!(state & PAPI_RUNNING)) {
160
- // Begin tracking counters
161
- retval = PAPI_start (event_sets_[i]);
162
- if (retval != PAPI_OK)
163
- handle_error (retval, __LINE__, __FILE__);
164
172
165
- } else {
173
+ if (!papi_events_.empty ()) { // Reset event set attached to each thread
174
+ for (uint64_t i = 0 ; i < event_sets_.size (); ++i) {
166
175
// Reset counters
167
176
retval = PAPI_reset (event_sets_[i]);
168
177
if (retval != PAPI_OK)
@@ -172,26 +181,16 @@ class PapiProfiler : public tflite::Profiler {
172
181
173
182
// Handle uncore events
174
183
if (!papi_uncore_events_.empty ()) {
175
- int state;
176
- PAPI_state (uncore_event_set_, &state);
177
- if (!(state & PAPI_RUNNING)) {
178
- // Begin tracking counters
179
- retval = PAPI_start (uncore_event_set_);
180
- if (retval != PAPI_OK)
181
- handle_error (retval, __LINE__, __FILE__);
182
-
183
- } else {
184
- // Reset counters
185
- retval = PAPI_reset (uncore_event_set_);
186
- if (retval != PAPI_OK)
187
- handle_error (retval, __LINE__, __FILE__);
188
- }
184
+ // Reset counters
185
+ retval = PAPI_reset (uncore_event_set_);
186
+ if (retval != PAPI_OK)
187
+ handle_error (retval, __LINE__, __FILE__);
189
188
}
190
189
191
- uint32_t event_handle = event_index_++;
192
- papi_regions_[event_handle ] = trace_event_tag;
193
- timings_[event_handle ] = PAPI_get_real_nsec ();
194
- return event_handle ;
190
+ event_index_++;
191
+ papi_regions_[event_index_ ] = std::move ( trace_event_tag) ;
192
+ timings_[event_index_ ] = PAPI_get_real_nsec ();
193
+ return event_index_ ;
195
194
}
196
195
197
196
void EndEvent (uint32_t event_handle) override
@@ -200,32 +199,44 @@ class PapiProfiler : public tflite::Profiler {
200
199
return ;
201
200
}
202
201
203
- timings_[event_handle] = PAPI_get_real_nsec () - timings_[event_handle];
202
+ long long op_latency = PAPI_get_real_nsec () - timings_[event_handle];
204
203
205
- int retval;
206
- // For each thread we are profiling
207
- for (uint64_t i = 0 ; i < event_sets_.size (); ++i) {
208
- retval = PAPI_read (event_sets_[i], event_values_.data ());
209
- if (retval != PAPI_OK)
210
- handle_error (retval, __LINE__, __FILE__);
211
- // For each of the events we collected a counter value for
212
- for (auto val : event_values_) {
213
- results_[papi_regions_[event_handle]].push_back (val);
214
- }
204
+ // For performance reserve space for 10000 elements for each perf event in
205
+ // results
206
+ if (results_[papi_regions_[event_handle]].empty ()) {
207
+ results_[papi_regions_[event_handle]].reserve (
208
+ papi_events_.size () * 10000 );
209
+ }
210
+ if (results_uncore_[papi_regions_[event_handle]].empty ()) {
211
+ results_uncore_[papi_regions_[event_handle]].reserve (
212
+ papi_uncore_events_.size () * 10000 );
215
213
}
216
214
217
- // Push back the op timing
218
- results_[papi_regions_[event_handle]].push_back (timings_[event_handle]);
215
+ int retval;
219
216
217
+ if (!papi_events_.empty ()) { // For each thread we are profiling
218
+ for (uint64_t i = 0 ; i < event_sets_.size (); ++i) {
219
+ retval = PAPI_read (event_sets_[i], event_values_.data ());
220
+ if (retval != PAPI_OK)
221
+ handle_error (retval, __LINE__, __FILE__);
222
+ // Write event counter values to end of results vector for current op
223
+ results_[papi_regions_[event_handle]].insert (
224
+ results_[papi_regions_[event_handle]].end (), event_values_.begin (),
225
+ event_values_.end ());
226
+ }
227
+
228
+ // Push back the op timing
229
+ results_[papi_regions_[event_handle]].push_back (op_latency);
230
+ }
220
231
// Handle uncore events
221
232
if (!papi_uncore_events_.empty ()) {
222
233
retval = PAPI_read (uncore_event_set_, uncore_event_values_.data ());
223
234
if (retval != PAPI_OK)
224
235
handle_error (retval, __LINE__, __FILE__);
225
236
// For each of the events we collected a counter value for
226
- for ( auto val : uncore_event_values_) {
227
- results_uncore_[papi_regions_[event_handle]].push_back (val);
228
- }
237
+ results_uncore_[papi_regions_[event_handle]]. insert (
238
+ results_uncore_[papi_regions_[event_handle]].end (),
239
+ uncore_event_values_. begin (), uncore_event_values_. end ());
229
240
}
230
241
}
231
242
@@ -278,12 +289,7 @@ MaybeCreatePapiProfiler()
278
289
// Per core events
279
290
char * papi_events = getenv (" PAPI_EVENTS" );
280
291
std::vector<std::string> papi_events_vec;
281
- if (papi_events == NULL ) {
282
- LOG_MESSAGE (
283
- TRITONSERVER_LOG_WARN,
284
- " PAPI_EVENTS not specified, op level profiling disabled!" );
285
- return nullptr ;
286
- } else {
292
+ if (papi_events != NULL ) {
287
293
// Parse out all papi events indivdually
288
294
std::stringstream ss (papi_events);
289
295
while (ss.good ()) {
@@ -303,12 +309,7 @@ MaybeCreatePapiProfiler()
303
309
// Uncore events
304
310
char * papi_uncore_events = getenv (" PAPI_UNCORE_EVENTS" );
305
311
std::vector<std::string> papi_uncore_events_vec;
306
- if (papi_uncore_events == NULL ) {
307
- LOG_MESSAGE (
308
- TRITONSERVER_LOG_WARN,
309
- " PAPI_UNCORE_EVENTS not specified, op level profiling disabled!" );
310
- return nullptr ;
311
- } else {
312
+ if (papi_uncore_events != NULL ) {
312
313
// Parse out all papi events indivdually
313
314
std::stringstream ss (papi_uncore_events);
314
315
while (ss.good ()) {
@@ -324,6 +325,15 @@ MaybeCreatePapiProfiler()
324
325
papi_uncore_events_vec.push_back (substr);
325
326
}
326
327
}
328
+
329
+ if ((papi_events == NULL ) && (papi_uncore_events == NULL )) {
330
+ LOG_MESSAGE (
331
+ TRITONSERVER_LOG_WARN,
332
+ " PAPI_EVENTS nor PAPI_UNCORE_EVENTS specified, op level profiling "
333
+ " disabled!" );
334
+ return nullptr ;
335
+ }
336
+
327
337
return std::unique_ptr<tflite::Profiler>(
328
338
new PapiProfiler (papi_events_vec, papi_uncore_events_vec));
329
339
}
0 commit comments