Skip to content

Commit

Permalink
Merge pull request #2188 from DARMA-tasking/2187-nested-trace-user-ev…
Browse files Browse the repository at this point in the history
…ents-output-in-wrong-order

#2187: nested trace user events output in wrong order
  • Loading branch information
nlslatt authored Sep 5, 2024
2 parents 9bf7c94 + b7419f3 commit bffea2a
Show file tree
Hide file tree
Showing 11 changed files with 625 additions and 138 deletions.
11 changes: 10 additions & 1 deletion docs/md/trace.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,13 @@ In order to customize when tracing is enabled and disabled, a trace
specification file can be passed to \vt via a command-line flag:
`--vt_trace_spec --vt_trace_spec_file=filename.spec`.

For details about vt's Specification File see \ref spec-file
For details about vt's Specification File see \ref spec-file

\section incremental-tracing Incremental Trace Output

The trace events can be configured to be saved to the file in the incremental matter.
To configure the interval of the flushes use the `--vt_trace_flush_size=X` parameter.
The `X` stands for the number of trace events before the next flush.

\note The incremental flushing will be blocked in the case of an incomplete user note.
In that scenario there will be no output to the files. All trace events will be kept in memory and will be tried to be flushed on the next interval if the incomplete notes were closed.
9 changes: 4 additions & 5 deletions src/vt/event/event.cc
Original file line number Diff line number Diff line change
Expand Up @@ -304,10 +304,9 @@ AsyncEvent::EventStateType AsyncEvent::testEventComplete(EventType const& event)
void AsyncEvent::testEventsTrigger(int const& num_events) {
# if vt_check_enabled(trace_enabled)
int32_t num_completed = 0;
auto tr_begin = TimeType{0.0};

std::unique_ptr<trace::TraceScopedNote> trace_note;
if (theConfig()->vt_trace_event_polling) {
tr_begin = timing::getCurrentTime();
trace_note = std::make_unique<trace::TraceScopedNote>(trace_event_polling);
}
# endif

Expand Down Expand Up @@ -356,9 +355,9 @@ void AsyncEvent::testEventsTrigger(int const& num_events) {
# if vt_check_enabled(trace_enabled)
if (theConfig()->vt_trace_event_polling) {
if (num_completed > 0) {
TimeType tr_end = timing::getCurrentTime();
auto tr_note = fmt::format("completed {} of {}", num_completed, cur);
trace::addUserBracketedNote(tr_begin, tr_end, tr_note, trace_event_polling);
trace_note->setNote(tr_note);
trace_note->end();
}
} else {
(void)num_completed;
Expand Down
32 changes: 16 additions & 16 deletions src/vt/messaging/active.cc
Original file line number Diff line number Diff line change
Expand Up @@ -349,9 +349,9 @@ EventType ActiveMessenger::sendMsgMPI(
{
VT_ALLOW_MPI_CALLS;
#if vt_check_enabled(trace_enabled)
auto tr_begin = TimeType{0.};
std::unique_ptr<trace::TraceScopedNote> trace_note;
if (theConfig()->vt_trace_mpi) {
tr_begin = vt::timing::getCurrentTime();
trace_note = std::make_unique<trace::TraceScopedNote>(trace_isend);
}
#endif
int const ret = MPI_Isend(
Expand All @@ -362,9 +362,9 @@ EventType ActiveMessenger::sendMsgMPI(

#if vt_check_enabled(trace_enabled)
if (theConfig()->vt_trace_mpi) {
auto tr_end = vt::timing::getCurrentTime();
auto tr_note = fmt::format("Isend(AM): dest={}, bytes={}", dest, msg_size);
trace::addUserBracketedNote(tr_begin, tr_end, tr_note, trace_isend);
trace_note->setNote(tr_note);
trace_note->end();
}
#endif
}
Expand Down Expand Up @@ -581,9 +581,9 @@ std::tuple<EventType, int> ActiveMessenger::sendDataMPI(
);
{
#if vt_check_enabled(trace_enabled)
auto tr_begin = TimeType{0.};
std::unique_ptr<trace::TraceScopedNote> trace_note;
if (theConfig()->vt_trace_mpi) {
tr_begin = vt::timing::getCurrentTime();
trace_note = std::make_unique<trace::TraceScopedNote>(trace_isend);
}
#endif

Expand All @@ -603,9 +603,9 @@ std::tuple<EventType, int> ActiveMessenger::sendDataMPI(

#if vt_check_enabled(trace_enabled)
if (theConfig()->vt_trace_mpi) {
auto tr_end = vt::timing::getCurrentTime();
auto tr_note = fmt::format("Isend(Data): dest={}, bytes={}", dest, subsize);
trace::addUserBracketedNote(tr_begin, tr_end, tr_note, trace_isend);
trace_note->setNote(tr_note);
trace_note->end();
}
#endif
}
Expand Down Expand Up @@ -771,9 +771,9 @@ void ActiveMessenger::recvDataDirect(
);

#if vt_check_enabled(trace_enabled)
auto tr_begin = TimeType{0.};
std::unique_ptr<trace::TraceScopedNote> trace_note;
if (theConfig()->vt_trace_mpi) {
tr_begin = vt::timing::getCurrentTime();
trace_note = std::make_unique<trace::TraceScopedNote>(trace_irecv);
}
#endif

Expand All @@ -790,12 +790,12 @@ void ActiveMessenger::recvDataDirect(

#if vt_check_enabled(trace_enabled)
if (theConfig()->vt_trace_mpi) {
auto tr_end = vt::timing::getCurrentTime();
auto tr_note = fmt::format(
"Irecv(Data): from={}, bytes={}",
from, sublen
);
trace::addUserBracketedNote(tr_begin, tr_end, tr_note, trace_irecv);
trace_note->setNote(tr_note);
trace_note->end();
}
#endif

Expand Down Expand Up @@ -1008,9 +1008,9 @@ bool ActiveMessenger::tryProcessIncomingActiveMsg() {

{
#if vt_check_enabled(trace_enabled)
auto tr_begin = TimeType{0.};
std::unique_ptr<trace::TraceScopedNote> trace_note;
if (theConfig()->vt_trace_mpi) {
tr_begin = vt::timing::getCurrentTime();
trace_note = std::make_unique<trace::TraceScopedNote>(trace_irecv);
}
#endif

Expand All @@ -1024,12 +1024,12 @@ bool ActiveMessenger::tryProcessIncomingActiveMsg() {

#if vt_check_enabled(trace_enabled)
if (theConfig()->vt_trace_mpi) {
auto tr_end = vt::timing::getCurrentTime();
auto tr_note = fmt::format(
"Irecv(AM): from={}, bytes={}",
stat.MPI_SOURCE, num_probe_bytes
);
trace::addUserBracketedNote(tr_begin, tr_end, tr_note, trace_irecv);
trace_note->setNote(tr_note);
trace_note->end();
}
#endif
}
Expand Down
8 changes: 4 additions & 4 deletions src/vt/messaging/request_holder.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ struct RequestHolder {
bool testAll(Callable c, int& num_mpi_tests) {
# if vt_check_enabled(trace_enabled)
std::size_t const holder_size_start = holder_.size();
auto tr_begin = TimeType{0.0};
std::unique_ptr<trace::TraceScopedNote> trace_note;
if (theConfig()->vt_trace_irecv_polling) {
tr_begin = vt::timing::getCurrentTime();
trace_note = std::make_unique<trace::TraceScopedNote>(trace_user_event_);
}
# endif

Expand Down Expand Up @@ -131,13 +131,13 @@ struct RequestHolder {
# if vt_check_enabled(trace_enabled)
if (theConfig()->vt_trace_irecv_polling) {
if (holder_size_start > 0) {
auto tr_end = vt::timing::getCurrentTime();
auto tr_note = fmt::format(
"completed {} of {}",
holder_size_start - holder_.size(),
holder_size_start
);
trace::addUserBracketedNote(tr_begin, tr_end, tr_note, trace_user_event_);
trace_note->setNote(tr_note);
trace_note->end();
}
}
# endif
Expand Down
3 changes: 2 additions & 1 deletion src/vt/trace/trace.cc
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,8 @@ void Trace::addUserEventBracketedManual(
);

auto id = user_event_.createEvent(true, false, 0, event);
addUserEventBracketed(id, begin, end);
addUserEventBracketedBeginTime(id, begin);
addUserEventBracketedEndTime(id, end);
}

void Trace::addMemoryEvent(std::size_t memory, TimeType time) {
Expand Down
102 changes: 92 additions & 10 deletions src/vt/trace/trace_lite.cc
Original file line number Diff line number Diff line change
Expand Up @@ -248,43 +248,121 @@ bool TraceLite::checkDynamicRuntimeEnabled(bool is_end_event) {
(trace_enabled_cur_phase_ or is_end_event);
}

void TraceLite::addUserEventBracketed(
UserEventIDType event, TimeType begin, TimeType end) {
void TraceLite::addUserEventBracketedBeginTime(
UserEventIDType event, TimeType begin
) {
if (not checkDynamicRuntimeEnabled()) {
return;
}

vt_debug_print(
normal, trace,
"Trace::addUserEventBracketed: event={:x}, begin={}, end={}\n",
event, begin, end);
"Trace::addUserEventBracketedBegin: event={:x}, begin={}\n",
event, begin
);

auto const type = TraceConstantsType::UserEventPair;
NodeType const node = theContext()->getNode();

logEvent(LogType{begin, type, node, event, true});
}

void TraceLite::addUserEventBracketedEndTime(
UserEventIDType event, TimeType end
) {
if (not checkDynamicRuntimeEnabled()) {
return;
}

vt_debug_print(
normal, trace,
"Trace::addUserEventBracketedEnd: event={:x}, end={}\n",
event, end
);

auto const type = TraceConstantsType::UserEventPair;
NodeType const node = theContext()->getNode();

logEvent(LogType{end, type, node, event, false});
}

void TraceLite::addUserBracketedNote(
TimeType const begin, TimeType const end, std::string const& note,
TraceEventIDType const event
void TraceLite::addUserNoteBracketedBeginTime(
TraceEventIDType const event, std::string const& note
) {
if (not checkDynamicRuntimeEnabled()) {
return;
}
auto begin = getCurrentTime();

vt_debug_print(
normal, trace,
"Trace::addUserBracketedNote: begin={}, end={}, note={}, event={}\n",
begin, end, note, event
"Trace::addUserNoteBracketedBegin: begin={}, note={}, event={}\n",
begin, note, event
);

auto const type = TraceConstantsType::UserSuppliedBracketedNote;
logEvent(LogType{begin, begin, type, note, event});

logEvent(LogType{begin, end, type, note, event});
// Save event log for fixing up the end time later
if (event != no_trace_event) {
auto* last_trace = getLastTraceEvent();
incomplete_notes_[event].push(last_trace);
}
}

void TraceLite::updateNoteEndTime(
const TraceEventIDType& event, const TimeType& end, const std::string* new_note
) {
auto iter = incomplete_notes_.find(event);
vtAssertExpr(iter != incomplete_notes_.end());
// update data in the Log
auto& last_trace = iter->second.top();
last_trace->end_time = end;
if (new_note != nullptr) {
last_trace->setUserNote(*new_note);
}
// clean up pointers to Log
iter->second.pop();
if (iter->second.empty()) {
incomplete_notes_.erase(iter);
}
}

void TraceLite::addUserNoteBracketedEndTime(TraceEventIDType const event) {
if (not checkDynamicRuntimeEnabled()) {
return;
}
auto end = getCurrentTime();

vt_debug_print(
normal, trace,
"Trace::addUserNoteBracketedEnd: end={}, event={}\n",
end, event
);

if (event != no_trace_event) {
updateNoteEndTime(event, end, nullptr);
}
}

void TraceLite::addUserNoteBracketedEndTime(
TraceEventIDType const event, std::string const& new_note
) {
if (not checkDynamicRuntimeEnabled()) {
return;
}
auto end = getCurrentTime();

vt_debug_print(
normal, trace,
"Trace::addUserNoteBracketedEnd: end={}, new_note={}, event={}\n",
end, new_note, event
);

if (event != no_trace_event) {
updateNoteEndTime(event, end, &new_note);
}
}

TraceEventIDType TraceLite::logEvent(LogType&& log) {
if (not checkDynamicRuntimeEnabled()) {
Expand Down Expand Up @@ -448,6 +526,10 @@ void TraceLite::flushTracesFile(bool useGlobalSync) {
// (Consider pushing out: barrier usages are probably domain-specific.)
theCollective()->barrier();
}
if (incomplete_notes_.size() > 0) {
// wait until all incomplete events are patched up before flushing to disk
return;
}
if (
traces_.size() >=
static_cast<std::size_t>(theConfig()->vt_trace_flush_size)) {
Expand Down
Loading

0 comments on commit bffea2a

Please sign in to comment.