Skip to content

Commit

Permalink
powX: log multiPlanIdx with kernelio, to aid multi-gpu troubleshooting
Browse files Browse the repository at this point in the history
  • Loading branch information
evetsso authored Jan 19, 2024
1 parent 77bdad5 commit d6321a9
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 15 deletions.
3 changes: 2 additions & 1 deletion library/src/include/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ struct rocfft_execution_info_t
void TransformPowX(const ExecPlan& execPlan,
void* in_buffer[],
void* out_buffer[],
rocfft_execution_info info);
rocfft_execution_info info,
size_t multiPlanIdx);

#endif // TRANSFORM_H
12 changes: 8 additions & 4 deletions library/src/include/tree_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -817,7 +817,8 @@ struct MultiPlanItem
virtual void ExecuteAsync(const rocfft_plan plan,
void* in_buffer[],
void* out_buffer[],
rocfft_execution_info info)
rocfft_execution_info info,
size_t multiPlanIdx)
= 0;

// wait for async operations to finish
Expand Down Expand Up @@ -895,7 +896,8 @@ struct CommScatter : public MultiPlanItem
void ExecuteAsync(const rocfft_plan plan,
void* in_buffer[],
void* out_buffer[],
rocfft_execution_info info) override;
rocfft_execution_info info,
size_t multiPlanIdx) override;
void Wait() override;

void Print(rocfft_ostream& os, const int indent) const override;
Expand Down Expand Up @@ -947,7 +949,8 @@ struct CommGather : public MultiPlanItem
void ExecuteAsync(const rocfft_plan plan,
void* in_buffer[],
void* out_buffer[],
rocfft_execution_info info) override;
rocfft_execution_info info,
size_t multiPlanIdx) override;
void Wait() override;

void Print(rocfft_ostream& os, const int indent) const override;
Expand Down Expand Up @@ -983,7 +986,8 @@ struct ExecPlan : public MultiPlanItem
void ExecuteAsync(const rocfft_plan plan,
void* in_buffer[],
void* out_buffer[],
rocfft_execution_info info) override;
rocfft_execution_info info,
size_t multiPlanIdx) override;

void Wait() override;

Expand Down
11 changes: 6 additions & 5 deletions library/src/powX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,8 @@ void SetDefaultCallback(const TreeNode* node, const SetCallbackType& type, void*
void TransformPowX(const ExecPlan& execPlan,
void* in_buffer[],
void* out_buffer[],
rocfft_execution_info info)
rocfft_execution_info info,
size_t multiPlanIdx)
{
assert(execPlan.execSeq.size() == execPlan.devFnCall.size());
assert(execPlan.execSeq.size() == execPlan.gridParam.size());
Expand Down Expand Up @@ -706,8 +707,8 @@ void TransformPowX(const ExecPlan& execPlan,
if(emit_kernelio_log && data.node->scheme != CS_KERNEL_CHIRP)
{
kernelio_stream = LogSingleton::GetInstance().GetKernelIOOS();
*kernelio_stream << "--- --- kernel " << i << " (" << PrintScheme(data.node->scheme)
<< ") input:" << std::endl;
*kernelio_stream << "--- --- multiPlanIdx " << multiPlanIdx << " kernel " << i << " ("
<< PrintScheme(data.node->scheme) << ") input:" << std::endl;

if(hipDeviceSynchronize() != hipSuccess)
throw std::runtime_error("hipDeviceSynchronize failure");
Expand Down Expand Up @@ -866,12 +867,12 @@ void TransformPowX(const ExecPlan& execPlan,
execPlan.rootPlan->outArrayType);
}

*kernelio_stream << "final output:\n";
*kernelio_stream << "multiPlanIdx " << multiPlanIdx << " final output:\n";
DebugPrintBuffer(*kernelio_stream,
execPlan.rootPlan->outArrayType,
execPlan.rootPlan->precision,
out_buffer_offset,
execPlan.oLength,
execPlan.rootPlan->GetOutputLength(),
execPlan.rootPlan->outStride,
execPlan.rootPlan->oDist,
execPlan.rootPlan->batch);
Expand Down
25 changes: 22 additions & 3 deletions library/src/transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,23 @@ void rocfft_plan_t::Execute(void* in_buffer[], void* out_buffer[], rocfft_execut
// vector of topologically sorted indexes to the items in multiPlan
auto sortedIdx = MultiPlanTopologicalSort();

// log input/output pointers
if(LOG_PLAN_ENABLED())
{
auto& os = *LogSingleton::GetInstance().GetPlanOS();
for(size_t i = 0; i < desc.count_pointers(desc.inFields, desc.inArrayType); ++i)
{
os << "user input " << i << ": " << in_buffer[i] << std::endl;
}
if(placement == rocfft_placement_notinplace)
{
for(size_t i = 0; i < desc.count_pointers(desc.outFields, desc.outArrayType); ++i)
{
os << "user output " << i << ": " << out_buffer[i] << std::endl;
}
}
}

LogFields("input", desc.inFields);
LogFields("output", desc.outFields);

Expand Down Expand Up @@ -246,7 +263,7 @@ void rocfft_plan_t::Execute(void* in_buffer[], void* out_buffer[], rocfft_execut
// done waiting for all our antecedents, so this item can now proceed

// launch this item async
item.ExecuteAsync(this, in_buffer, out_buffer, info);
item.ExecuteAsync(this, in_buffer, out_buffer, info, idx);
}

// finished executing all items, wait for outstanding work to complete
Expand Down Expand Up @@ -295,7 +312,8 @@ rocfft_status rocfft_execute(const rocfft_plan plan,
void ExecPlan::ExecuteAsync(const rocfft_plan plan,
void* in_buffer[],
void* out_buffer[],
rocfft_execution_info info)
rocfft_execution_info info,
size_t multiPlanIdx)
{
rocfft_scoped_device dev(deviceID);

Expand Down Expand Up @@ -383,7 +401,8 @@ void ExecPlan::ExecuteAsync(const rocfft_plan plan,
in_transform_ptrs,
(rootPlan->placement == rocfft_placement_inplace) ? in_transform_ptrs
: out_transform_ptrs,
&exec_info);
&exec_info,
multiPlanIdx);
// all work is enqueued to the stream, record the event on
// the stream. Not needed for single-device plans.
if(mgpuPlan)
Expand Down
6 changes: 4 additions & 2 deletions library/src/tree_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,8 @@ std::string MultiPlanItem::PrintBufferPtrOffset(const BufferPtr& ptr, size_t off
void CommScatter::ExecuteAsync(const rocfft_plan plan,
void* in_buffer[],
void* out_buffer[],
rocfft_execution_info info)
rocfft_execution_info info,
size_t multiPlanIdx)
{
rocfft_scoped_device dev(srcDeviceID);
stream.alloc();
Expand Down Expand Up @@ -494,7 +495,8 @@ void CommScatter::Print(rocfft_ostream& os, const int indent) const
void CommGather::ExecuteAsync(const rocfft_plan plan,
void* in_buffer[],
void* out_buffer[],
rocfft_execution_info info)
rocfft_execution_info info,
size_t multiPlanIdx)
{
streams.resize(ops.size());
events.resize(ops.size());
Expand Down

0 comments on commit d6321a9

Please sign in to comment.