Skip to content

Commit 7bfade4

Browse files
committed
Simplify perf counter infra
Signed-off-by: Josh Minor <[email protected]>
1 parent c08cd3b commit 7bfade4

File tree

1 file changed

+86
-76
lines changed

1 file changed

+86
-76
lines changed

src/model_instance/papi_profiler.cc

+86-76
Original file line numberDiff line numberDiff line change
@@ -53,32 +53,43 @@ class PapiProfiler : public tflite::Profiler {
5353
inf_thread_ids_ =
5454
std::vector<pid_t>(current_threads.begin() + 3, current_threads.end());
5555

56-
// Handle core specific events per inference thread
56+
papi_regions_.reserve(1000);
57+
timings_.reserve(1000);
58+
5759
int retval;
58-
for (uint64_t i = 0; i < inf_thread_ids_.size(); ++i) {
59-
event_sets_.push_back(PAPI_NULL);
60-
retval = PAPI_create_eventset(&event_sets_.back());
61-
if (retval != PAPI_OK) {
62-
handle_error(retval, __LINE__, __FILE__);
63-
}
64-
for (auto& event_name : papi_events_) {
65-
retval = PAPI_add_named_event(event_sets_.back(), event_name.c_str());
60+
61+
// Handle core specific events per inference thread
62+
if (!papi_events_.empty()) {
63+
for (uint64_t i = 0; i < inf_thread_ids_.size(); ++i) {
64+
event_sets_.push_back(PAPI_NULL);
65+
retval = PAPI_create_eventset(&event_sets_.back());
66+
if (retval != PAPI_OK) {
67+
handle_error(retval, __LINE__, __FILE__);
68+
}
69+
for (auto& event_name : papi_events_) {
70+
retval = PAPI_add_named_event(event_sets_.back(), event_name.c_str());
71+
if (retval != PAPI_OK)
72+
handle_error(retval, __LINE__, __FILE__);
73+
}
74+
75+
// Attach event to thread
76+
LOG_MESSAGE(
77+
TRITONSERVER_LOG_INFO,
78+
("Attaching to " + std::to_string(inf_thread_ids_[i])).c_str());
79+
retval = PAPI_attach(event_sets_.back(), inf_thread_ids_[i]);
80+
if (retval != PAPI_OK)
81+
handle_error(retval, __LINE__, __FILE__);
82+
83+
// Start eventset
84+
retval = PAPI_start(event_sets_.back());
6685
if (retval != PAPI_OK)
6786
handle_error(retval, __LINE__, __FILE__);
6887
}
88+
event_values_.resize(papi_events_.size());
6989

70-
// Attach event to thread
71-
LOG_MESSAGE(
72-
TRITONSERVER_LOG_INFO,
73-
("Attaching to " + std::to_string(inf_thread_ids_[i])).c_str());
74-
retval = PAPI_attach(event_sets_.back(), inf_thread_ids_[i]);
75-
if (retval != PAPI_OK)
76-
handle_error(retval, __LINE__, __FILE__);
90+
// Separately we will also track operation timings in nanos
91+
papi_events_.push_back("TIME_NS");
7792
}
78-
event_values_.resize(papi_events_.size());
79-
80-
// Separately we will also track operation timings in nanos
81-
papi_events_.push_back("TIME_NS");
8293

8394
// Handle uncore events separately
8495
if (!papi_uncore_events_.empty()) {
@@ -92,6 +103,10 @@ class PapiProfiler : public tflite::Profiler {
92103
handle_error(retval, __LINE__, __FILE__);
93104
}
94105
uncore_event_values_.resize(papi_uncore_events_.size());
106+
// Start uncore eventset
107+
retval = PAPI_start(uncore_event_set_);
108+
if (retval != PAPI_OK)
109+
handle_error(retval, __LINE__, __FILE__);
95110
}
96111
}
97112

@@ -119,6 +134,8 @@ class PapiProfiler : public tflite::Profiler {
119134
<< papi_events_[i % papi_events_.size()] << ","
120135
<< event.second[i] << "\n";
121136
}
137+
}
138+
for (auto& event : results_uncore_) {
122139
// Now write the uncore events with a dummy thread id of -1
123140
for (uint64_t i = 0; i < results_uncore_[event.first].size(); ++i) {
124141
myfile << event.first << "," << -1 << ","
@@ -152,17 +169,9 @@ class PapiProfiler : public tflite::Profiler {
152169
trace_event_tag += ("_" + std::to_string(event_metadata1));
153170

154171
int retval;
155-
// For the event set attached to each thread, start or restart the event set
156-
for (uint64_t i = 0; i < event_sets_.size(); ++i) {
157-
int state;
158-
PAPI_state(event_sets_[i], &state);
159-
if (!(state & PAPI_RUNNING)) {
160-
// Begin tracking counters
161-
retval = PAPI_start(event_sets_[i]);
162-
if (retval != PAPI_OK)
163-
handle_error(retval, __LINE__, __FILE__);
164172

165-
} else {
173+
if (!papi_events_.empty()) { // Reset event set attached to each thread
174+
for (uint64_t i = 0; i < event_sets_.size(); ++i) {
166175
// Reset counters
167176
retval = PAPI_reset(event_sets_[i]);
168177
if (retval != PAPI_OK)
@@ -172,26 +181,16 @@ class PapiProfiler : public tflite::Profiler {
172181

173182
// Handle uncore events
174183
if (!papi_uncore_events_.empty()) {
175-
int state;
176-
PAPI_state(uncore_event_set_, &state);
177-
if (!(state & PAPI_RUNNING)) {
178-
// Begin tracking counters
179-
retval = PAPI_start(uncore_event_set_);
180-
if (retval != PAPI_OK)
181-
handle_error(retval, __LINE__, __FILE__);
182-
183-
} else {
184-
// Reset counters
185-
retval = PAPI_reset(uncore_event_set_);
186-
if (retval != PAPI_OK)
187-
handle_error(retval, __LINE__, __FILE__);
188-
}
184+
// Reset counters
185+
retval = PAPI_reset(uncore_event_set_);
186+
if (retval != PAPI_OK)
187+
handle_error(retval, __LINE__, __FILE__);
189188
}
190189

191-
uint32_t event_handle = event_index_++;
192-
papi_regions_[event_handle] = trace_event_tag;
193-
timings_[event_handle] = PAPI_get_real_nsec();
194-
return event_handle;
190+
event_index_++;
191+
papi_regions_[event_index_] = std::move(trace_event_tag);
192+
timings_[event_index_] = PAPI_get_real_nsec();
193+
return event_index_;
195194
}
196195

197196
void EndEvent(uint32_t event_handle) override
@@ -200,32 +199,44 @@ class PapiProfiler : public tflite::Profiler {
200199
return;
201200
}
202201

203-
timings_[event_handle] = PAPI_get_real_nsec() - timings_[event_handle];
202+
long long op_latency = PAPI_get_real_nsec() - timings_[event_handle];
204203

205-
int retval;
206-
// For each thread we are profiling
207-
for (uint64_t i = 0; i < event_sets_.size(); ++i) {
208-
retval = PAPI_read(event_sets_[i], event_values_.data());
209-
if (retval != PAPI_OK)
210-
handle_error(retval, __LINE__, __FILE__);
211-
// For each of the events we collected a counter value for
212-
for (auto val : event_values_) {
213-
results_[papi_regions_[event_handle]].push_back(val);
214-
}
204+
// For performance reserve space for 10000 elements for each perf event in
205+
// results
206+
if (results_[papi_regions_[event_handle]].empty()) {
207+
results_[papi_regions_[event_handle]].reserve(
208+
papi_events_.size() * 10000);
209+
}
210+
if (results_uncore_[papi_regions_[event_handle]].empty()) {
211+
results_uncore_[papi_regions_[event_handle]].reserve(
212+
papi_uncore_events_.size() * 10000);
215213
}
216214

217-
// Push back the op timing
218-
results_[papi_regions_[event_handle]].push_back(timings_[event_handle]);
215+
int retval;
219216

217+
if (!papi_events_.empty()) { // For each thread we are profiling
218+
for (uint64_t i = 0; i < event_sets_.size(); ++i) {
219+
retval = PAPI_read(event_sets_[i], event_values_.data());
220+
if (retval != PAPI_OK)
221+
handle_error(retval, __LINE__, __FILE__);
222+
// Write event counter values to end of results vector for current op
223+
results_[papi_regions_[event_handle]].insert(
224+
results_[papi_regions_[event_handle]].end(), event_values_.begin(),
225+
event_values_.end());
226+
}
227+
228+
// Push back the op timing
229+
results_[papi_regions_[event_handle]].push_back(op_latency);
230+
}
220231
// Handle uncore events
221232
if (!papi_uncore_events_.empty()) {
222233
retval = PAPI_read(uncore_event_set_, uncore_event_values_.data());
223234
if (retval != PAPI_OK)
224235
handle_error(retval, __LINE__, __FILE__);
225236
// For each of the events we collected a counter value for
226-
for (auto val : uncore_event_values_) {
227-
results_uncore_[papi_regions_[event_handle]].push_back(val);
228-
}
237+
results_uncore_[papi_regions_[event_handle]].insert(
238+
results_uncore_[papi_regions_[event_handle]].end(),
239+
uncore_event_values_.begin(), uncore_event_values_.end());
229240
}
230241
}
231242

@@ -278,12 +289,7 @@ MaybeCreatePapiProfiler()
278289
// Per core events
279290
char* papi_events = getenv("PAPI_EVENTS");
280291
std::vector<std::string> papi_events_vec;
281-
if (papi_events == NULL) {
282-
LOG_MESSAGE(
283-
TRITONSERVER_LOG_WARN,
284-
"PAPI_EVENTS not specified, op level profiling disabled!");
285-
return nullptr;
286-
} else {
292+
if (papi_events != NULL) {
287293
// Parse out all papi events indivdually
288294
std::stringstream ss(papi_events);
289295
while (ss.good()) {
@@ -303,12 +309,7 @@ MaybeCreatePapiProfiler()
303309
// Uncore events
304310
char* papi_uncore_events = getenv("PAPI_UNCORE_EVENTS");
305311
std::vector<std::string> papi_uncore_events_vec;
306-
if (papi_uncore_events == NULL) {
307-
LOG_MESSAGE(
308-
TRITONSERVER_LOG_WARN,
309-
"PAPI_UNCORE_EVENTS not specified, op level profiling disabled!");
310-
return nullptr;
311-
} else {
312+
if (papi_uncore_events != NULL) {
312313
// Parse out all papi events indivdually
313314
std::stringstream ss(papi_uncore_events);
314315
while (ss.good()) {
@@ -324,6 +325,15 @@ MaybeCreatePapiProfiler()
324325
papi_uncore_events_vec.push_back(substr);
325326
}
326327
}
328+
329+
if ((papi_events == NULL) && (papi_uncore_events == NULL)) {
330+
LOG_MESSAGE(
331+
TRITONSERVER_LOG_WARN,
332+
"PAPI_EVENTS nor PAPI_UNCORE_EVENTS specified, op level profiling "
333+
"disabled!");
334+
return nullptr;
335+
}
336+
327337
return std::unique_ptr<tflite::Profiler>(
328338
new PapiProfiler(papi_events_vec, papi_uncore_events_vec));
329339
}

0 commit comments

Comments
 (0)