Skip to content

Commit 9203121

Browse files
dhruvachakronlieb
authored andcommitted
[libomptarget] [OMPT] Fixed return address computation for OMPT events. (llvm#80498)
Currently, __builtin_return_address is used to generate the return address when the callback invoker is created. However, this may result in the return address pointing to an internal runtime function. This is not what a tool would typically want. A tool would want to know the corresponding user code from where the runtime entry point is invoked. This change adds a thread local variable that is assigned the return address at the OpenMP runtime entry points. An RAII is used to manage the modifications to the thread local variable. Whenever the return address is required for OMPT events, it is read from the thread local variable. Adapted to work on amd-staging. Change-Id: I3c85bdf48f81edbc421b9030f26f344ed5b963b2
1 parent c19d3ff commit 9203121

File tree

11 files changed

+282
-43
lines changed

11 files changed

+282
-43
lines changed

openmp/libomptarget/include/OmptCommonDefs.h

-7
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,6 @@
7676
#define OMPT_FRAME_POSITION_DEFAULT ompt_frame_framepointer
7777
#endif
7878

79-
// Allow 0 as the only argument to avoid unpredictable effects.
80-
// The setter stores the return address to a thread local variable.
81-
#define OMPT_SET_RETURN_ADDRESS \
82-
llvm::omp::target::ompt::ReturnAddress = __builtin_return_address(0)
83-
// The getter returns the address stored in the thread local variable.
84-
#define OMPT_GET_RETURN_ADDRESS llvm::omp::target::ompt::ReturnAddress
85-
8679
#define OMPT_PTR_UNKNOWN ((void *)0)
8780

8881
#define performIfOmptInitialized(stmt) \

openmp/libomptarget/include/OpenMP/OMPT/Interface.h

+33
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,8 @@ class Interface {
382382
extern thread_local Interface RegionInterface;
383383

384384
/// Thread local variable holding the return address.
385+
/// When using __builtin_return_address to set the return address,
386+
/// allow 0 as the only argument to avoid unpredictable effects.
385387
extern thread_local void *ReturnAddress;
386388

387389
template <typename FuncTy, typename ArgsTy, size_t... IndexSeq>
@@ -422,11 +424,42 @@ template <typename FunctionPairTy, typename... ArgsTy>
422424
InterfaceRAII(FunctionPairTy Callbacks, ArgsTy... Args)
423425
-> InterfaceRAII<FunctionPairTy, ArgsTy...>;
424426

427+
/// Used to set and reset the thread-local return address. The RAII is expected
428+
/// to be created at a runtime entry point when the return address should be
429+
/// null. If so, the return address is set and \p IsSetter is set in the ctor.
430+
/// The dtor resets the return address only if the corresponding object set it.
431+
/// So if the RAII is called from a nested runtime function, the ctor/dtor will
432+
/// do nothing since the thread local return address is already set.
433+
class ReturnAddressSetterRAII {
434+
public:
435+
ReturnAddressSetterRAII(void *RA) : IsSetter(false) {
436+
// Handle nested calls. If already set, do not set again since it
437+
// must be in a nested call.
438+
if (ReturnAddress == nullptr) {
439+
// Store the return address to a thread local variable.
440+
ReturnAddress = RA;
441+
IsSetter = true;
442+
}
443+
}
444+
~ReturnAddressSetterRAII() {
445+
// Reset the return address if this object set it.
446+
if (IsSetter)
447+
ReturnAddress = nullptr;
448+
}
449+
450+
private:
451+
// Did this object set the thread-local return address?
452+
bool IsSetter;
453+
};
454+
425455
} // namespace ompt
426456
} // namespace target
427457
} // namespace omp
428458
} // namespace llvm
429459

460+
// The getter returns the address stored in the thread local variable.
461+
#define OMPT_GET_RETURN_ADDRESS llvm::omp::target::ompt::ReturnAddress
462+
430463
#pragma pop_macro("DEBUG_PREFIX")
431464

432465
#endif // OMPT_SUPPORT

openmp/libomptarget/src/LegacyAPI.cpp

+20-8
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,21 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "OpenMP/OMPT/Interface.h"
1314
#include "omptarget.h"
1415
#include "private.h"
1516

1617
#include "Shared/Profile.h"
1718

19+
#ifdef OMPT_SUPPORT
20+
using namespace llvm::omp::target::ompt;
21+
#endif
22+
1823
EXTERN void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum,
1924
void **ArgsBase, void **Args,
2025
int64_t *ArgSizes, int64_t *ArgTypes) {
2126
TIMESCOPE();
27+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
2228
__tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
2329
ArgSizes, ArgTypes, nullptr, nullptr);
2430
}
@@ -30,7 +36,7 @@ EXTERN void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum,
3036
int32_t NoAliasDepNum,
3137
void *NoAliasDepList) {
3238
TIMESCOPE();
33-
39+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
3440
__tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
3541
ArgSizes, ArgTypes, nullptr, nullptr);
3642
}
@@ -39,6 +45,7 @@ EXTERN void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum,
3945
void **ArgsBase, void **Args,
4046
int64_t *ArgSizes, int64_t *ArgTypes) {
4147
TIMESCOPE();
48+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
4249
__tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
4350
ArgSizes, ArgTypes, nullptr, nullptr);
4451
}
@@ -47,6 +54,7 @@ EXTERN void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum,
4754
void **ArgsBase, void **Args,
4855
int64_t *ArgSizes, int64_t *ArgTypes) {
4956
TIMESCOPE();
57+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
5058
__tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
5159
ArgSizes, ArgTypes, nullptr, nullptr);
5260
}
@@ -56,7 +64,7 @@ EXTERN void __tgt_target_data_update_nowait(
5664
int64_t *ArgSizes, int64_t *ArgTypes, int32_t DepNum, void *DepList,
5765
int32_t NoAliasDepNum, void *NoAliasDepList) {
5866
TIMESCOPE();
59-
67+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
6068
__tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
6169
ArgSizes, ArgTypes, nullptr, nullptr);
6270
}
@@ -68,7 +76,7 @@ EXTERN void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum,
6876
int32_t NoAliasDepNum,
6977
void *NoAliasDepList) {
7078
TIMESCOPE();
71-
79+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
7280
__tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
7381
ArgSizes, ArgTypes, nullptr, nullptr);
7482
}
@@ -78,6 +86,7 @@ EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
7886
int64_t *ArgSizes, int64_t *ArgTypes,
7987
map_var_info_t *ArgNames, void **ArgMappers) {
8088
TIMESCOPE_WITH_IDENT(Loc);
89+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
8190
KernelArgsTy KernelArgs{1, ArgNum, ArgsBase, Args, ArgSizes,
8291
ArgTypes, ArgNames, ArgMappers, 0};
8392
return __tgt_target_kernel(Loc, DeviceId, -1, -1, HostPtr, &KernelArgs);
@@ -87,6 +96,7 @@ EXTERN int __tgt_target(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
8796
void **ArgsBase, void **Args, int64_t *ArgSizes,
8897
int64_t *ArgTypes) {
8998
TIMESCOPE();
99+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
90100
return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
91101
ArgSizes, ArgTypes, nullptr, nullptr);
92102
}
@@ -96,7 +106,7 @@ EXTERN int __tgt_target_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
96106
int64_t *ArgTypes, int32_t DepNum, void *DepList,
97107
int32_t NoAliasDepNum, void *NoAliasDepList) {
98108
TIMESCOPE();
99-
109+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
100110
return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
101111
ArgSizes, ArgTypes, nullptr, nullptr);
102112
}
@@ -107,7 +117,7 @@ EXTERN int __tgt_target_nowait_mapper(
107117
map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList,
108118
int32_t NoAliasDepNum, void *NoAliasDepList) {
109119
TIMESCOPE_WITH_IDENT(Loc);
110-
120+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
111121
return __tgt_target_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
112122
ArgSizes, ArgTypes, ArgNames, ArgMappers);
113123
}
@@ -120,7 +130,7 @@ EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId,
120130
void **ArgMappers, int32_t NumTeams,
121131
int32_t ThreadLimit) {
122132
TIMESCOPE_WITH_IDENT(Loc);
123-
133+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
124134
KernelArgsTy KernelArgs{1, ArgNum, ArgsBase, Args, ArgSizes,
125135
ArgTypes, ArgNames, ArgMappers, 0};
126136
return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
@@ -132,6 +142,7 @@ EXTERN int __tgt_target_teams(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
132142
int64_t *ArgTypes, int32_t NumTeams,
133143
int32_t ThreadLimit) {
134144
TIMESCOPE();
145+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
135146
return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
136147
Args, ArgSizes, ArgTypes, nullptr, nullptr,
137148
NumTeams, ThreadLimit);
@@ -145,7 +156,7 @@ EXTERN int __tgt_target_teams_nowait(int64_t DeviceId, void *HostPtr,
145156
void *DepList, int32_t NoAliasDepNum,
146157
void *NoAliasDepList) {
147158
TIMESCOPE();
148-
159+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
149160
return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
150161
Args, ArgSizes, ArgTypes, nullptr, nullptr,
151162
NumTeams, ThreadLimit);
@@ -158,7 +169,7 @@ EXTERN int __tgt_target_teams_nowait_mapper(
158169
int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
159170
void *NoAliasDepList) {
160171
TIMESCOPE_WITH_IDENT(Loc);
161-
172+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
162173
return __tgt_target_teams_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase,
163174
Args, ArgSizes, ArgTypes, ArgNames,
164175
ArgMappers, NumTeams, ThreadLimit);
@@ -186,6 +197,7 @@ EXTERN int __tgt_target_kernel_nowait(ident_t *Loc, int64_t DeviceId,
186197
int32_t NoAliasDepNum,
187198
void *NoAliasDepList) {
188199
TIMESCOPE_WITH_IDENT(Loc);
200+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
189201
return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
190202
KernelArgs);
191203
}

0 commit comments

Comments
 (0)