-
Notifications
You must be signed in to change notification settings - Fork 56
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Programatically filter out tool-induced fences from the sampler tool utility #194
Changes from 14 commits
6a66fff
585b7bf
060c1aa
109843a
9e76661
2e97fa6
8c5e523
2c1efac
0f4a089
9c02dda
8264f55
85b4d1a
95c3af2
5ee1a1f
20e5a30
758f3f9
91a48f5
a6c7bc1
eda9d83
5873f0a
874ad93
a76e1ac
fd01d94
6e47ab9
57f9a1f
fa43e59
ffc4fd8
70cdf6a
cf343f5
98b4caf
43b983d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,15 +3,19 @@ | |
#include <cstdlib> | ||
#include <cstring> | ||
#include <dlfcn.h> | ||
// #include <impl/Kokkos_Profiling_Interface.hpp> | ||
#include "../../profiling/all/kp_core.hpp" | ||
#include "kp_config.hpp" | ||
|
||
// using Kokkos::Tools::Experimental; | ||
// using mytpi_type = Kokkos::Tools::Experimental::ToolProgrammingInterface; | ||
namespace KokkosTools { | ||
namespace Sampler { | ||
static uint64_t uniqID = 0; | ||
static uint64_t kernelSampleSkip = 101; | ||
static int tool_verbosity = 0; | ||
static int tool_globFence = 0; | ||
// mytpi_type mytpi; | ||
|
||
typedef void (*initFunction)(const int, const uint64_t, const uint32_t, void*); | ||
typedef void (*finalizeFunction)(); | ||
|
@@ -27,6 +31,45 @@ static endFunction endForCallee = NULL; | |
static endFunction endScanCallee = NULL; | ||
static endFunction endReduceCallee = NULL; | ||
|
||
void getGlobFenceChoice() { | ||
vlkale marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// re-read environment variable to get most accurate value | ||
const char* tool_globFence_str = getenv("KOKKOS_TOOLS_GLOBALFENCES"); | ||
if (NULL != tool_globFence_str) { | ||
tool_globFence = atoi(tool_globFence_str); | ||
} // else | ||
// tool_globFence = 0; | ||
} | ||
|
||
// void kokkosp_tool_invoked_fence(const uint32_t, Kokkos_Tools_SpaceHandle* | ||
// myspchandle, Kokkos_Tools_toolInvokedFenceFunction tool_fence) | ||
//{ | ||
//(*tool_fence)(myspchandle, ); | ||
// } | ||
|
||
// set of functions from Kokkos ToolProgrammingInterface (includes fence) | ||
Kokkos::Tools::Experimental::ToolProgrammingInterface tpi_funcs; | ||
|
||
void invoke_ktools_fence(uint32_t devID) { | ||
// assert( tpi_funcs ! = NULL) | ||
if (tpi_funcs.fence != nullptr) { | ||
tpi_funcs.fence(devID); | ||
} else | ||
printf( | ||
"KokkosP: FATAL: Kokkos Tools Programming Interface's tool-invoked " | ||
"Fence is NULL!\n"); | ||
} | ||
|
||
void kokkosp_provide_tool_programming_interface( | ||
uint32_t num_funcs, Kokkos_Tools_ToolProgrammingInterface* funcsFromTPI) { | ||
if (!num_funcs) { | ||
if (tool_verbosity > 0) | ||
printf( | ||
"KokkosP: Note: Number of functions in Tools Programming Interface " | ||
"is 0!\n"); | ||
} | ||
tpi_funcs = *funcsFromTPI; | ||
} | ||
|
||
void kokkosp_request_tool_settings(const uint32_t, | ||
Kokkos_Tools_ToolSettings* settings) { | ||
if (0 == tool_globFence) { | ||
|
@@ -82,9 +125,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, | |
printf("KokkosP: Next library to call: %s\n", nextLibrary); | ||
printf("KokkosP: Loading child library ..\n"); | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's up with all these whitespace changes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will fix, making sure the code file is indeed getting processed through clang-format. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @masterleinad Does the new file committed address the issues you have with whitespace changes? |
||
void* childLibrary = dlopen(nextLibrary, RTLD_NOW | RTLD_GLOBAL); | ||
|
||
if (NULL == childLibrary) { | ||
fprintf(stderr, "KokkosP: Error: Unable to load: %s (Error=%s)\n", | ||
nextLibrary, dlerror()); | ||
|
@@ -103,17 +144,14 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, | |
(endFunction)dlsym(childLibrary, "kokkosp_end_parallel_for"); | ||
endReduceCallee = | ||
(endFunction)dlsym(childLibrary, "kokkosp_end_parallel_reduce"); | ||
|
||
initProfileLibrary = | ||
(initFunction)dlsym(childLibrary, "kokkosp_init_library"); | ||
finalizeProfileLibrary = | ||
(finalizeFunction)dlsym(childLibrary, "kokkosp_finalize_library"); | ||
|
||
if (NULL != initProfileLibrary) { | ||
(*initProfileLibrary)(loadSeq + 1, interfaceVer, devInfoCount, | ||
deviceInfo); | ||
} | ||
|
||
if (tool_verbosity > 0) { | ||
printf("KokkosP: Function Status:\n"); | ||
printf("KokkosP: begin-parallel-for: %s\n", | ||
|
@@ -133,9 +171,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, | |
} | ||
|
||
free(envBuffer); | ||
|
||
uniqID = 1; | ||
|
||
uniqID = 1; | ||
const char* tool_sample = getenv("KOKKOS_TOOLS_SAMPLER_SKIP"); | ||
if (NULL != tool_sample) { | ||
kernelSampleSkip = atoi(tool_sample) + 1; | ||
|
@@ -152,9 +188,18 @@ void kokkosp_finalize_library() { | |
|
||
void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, | ||
uint64_t* kID) { | ||
*kID = uniqID++; | ||
|
||
if (((*kID) % kernelSampleSkip) == 0) { | ||
*kID = 0; | ||
static uint64_t invocationNum; | ||
++invocationNum; | ||
if ((invocationNum % kernelSampleSkip) == 0) { | ||
getGlobFenceChoice(); // re-read environment variable to get most accurate | ||
// value | ||
if (0 < tool_globFence) { | ||
invoke_ktools_fence( | ||
0); // invoke tool-induced fence from device 0 for now | ||
} | ||
*kID = | ||
1; // set kernel ID to 1 so that it is matched with the end_parallel_* | ||
if (tool_verbosity > 0) { | ||
printf("KokkosP: sample %llu calling child-begin function...\n", | ||
(unsigned long long)(*kID)); | ||
|
@@ -167,12 +212,17 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, | |
} | ||
|
||
void kokkosp_end_parallel_for(const uint64_t kID) { | ||
if ((kID % kernelSampleSkip) == 0) { | ||
if (kID > 0) { | ||
getGlobFenceChoice(); // re-read environment variable to get most accurate | ||
// value | ||
if (0 < tool_globFence) { | ||
invoke_ktools_fence( | ||
0); // invoke tool-induced fence from device 0 for now | ||
} | ||
if (tool_verbosity > 0) { | ||
printf("KokkosP: sample %llu calling child-end function...\n", | ||
(unsigned long long)(kID)); | ||
} | ||
|
||
if (NULL != endForCallee) { | ||
(*endForCallee)(kID); | ||
} | ||
|
@@ -181,60 +231,89 @@ void kokkosp_end_parallel_for(const uint64_t kID) { | |
|
||
void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, | ||
uint64_t* kID) { | ||
*kID = uniqID++; | ||
|
||
if (((*kID) % kernelSampleSkip) == 0) { | ||
*kID = 0; // set memory location value of kID to 0. | ||
static uint64_t invocationNum; | ||
++invocationNum; | ||
if ((invocationNum % kernelSampleSkip) == 0) { | ||
getGlobFenceChoice(); // re-read environment variable to get most accurate | ||
// value | ||
if (0 < tool_globFence) { | ||
// using tool-induced fence from Kokkos_profiling rather than | ||
// Kokkos_C_Profiling_interface. Note that this function | ||
// only invokes a global (device 0 invoked) fence | ||
invoke_ktools_fence(0); | ||
} | ||
*kID = 1; // set kernel ID to 1 so that it is matched with the end. | ||
if (tool_verbosity > 0) { | ||
printf("KokkosP: sample %llu calling child-begin function...\n", | ||
(unsigned long long)(*kID)); | ||
} | ||
|
||
if (NULL != beginScanCallee) { | ||
(*beginScanCallee)(name, devID, kID); | ||
} | ||
} | ||
} | ||
|
||
void kokkosp_end_parallel_scan(const uint64_t kID) { | ||
if ((kID % kernelSampleSkip) == 0) { | ||
if (kID > 0) { | ||
getGlobFenceChoice(); // re-read environment variable to get most accurate | ||
// value | ||
if (0 < tool_globFence) { | ||
// using tool-induced fence from Kokkos_profiling rather than | ||
// Kokkos_C_Profiling_interface. Note that this function | ||
// only invokes a global (device 0 invoked) fence. | ||
invoke_ktools_fence(0); | ||
} | ||
if (tool_verbosity > 0) { | ||
printf("KokkosP: sample %llu calling child-end function...\n", | ||
(unsigned long long)(kID)); | ||
} | ||
|
||
if (NULL != endScanCallee) { | ||
(*endScanCallee)(kID); | ||
} | ||
} | ||
} // end kID sample | ||
} | ||
|
||
void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, | ||
uint64_t* kID) { | ||
*kID = uniqID++; | ||
|
||
if (((*kID) % kernelSampleSkip) == 0) { | ||
*kID = 0; | ||
static uint64_t invocationNum; | ||
++invocationNum; | ||
if ((invocationNum % kernelSampleSkip) == 0) { | ||
getGlobFenceChoice(); // re-read environment variable to get most accurate | ||
// value | ||
if (0 < tool_globFence) { | ||
// using tool-induced fence from Kokkos_profiling rather than | ||
// Kokkos_C_Profiling_interface. Note that this function | ||
// only invokes a global (device 0 invoked) fence. | ||
invoke_ktools_fence(0); | ||
} | ||
*kID = 1; // set kernel ID to 1 so that it is matched with the end. | ||
if (tool_verbosity > 0) { | ||
printf("KokkosP: sample %llu calling child-begin function...\n", | ||
(unsigned long long)(*kID)); | ||
} | ||
|
||
if (NULL != beginReduceCallee) { | ||
(*beginReduceCallee)(name, devID, kID); | ||
} | ||
} | ||
} | ||
|
||
void kokkosp_end_parallel_reduce(const uint64_t kID) { | ||
if ((kID % kernelSampleSkip) == 0) { | ||
if (kID > 0) { | ||
getGlobFenceChoice(); // re-read environment variable to get most accurate | ||
// value | ||
if (0 < tool_globFence) { // Todo: see if this is a performance bottleneck | ||
invoke_ktools_fence(0); | ||
} | ||
if (tool_verbosity > 0) { | ||
printf("KokkosP: sample %llu calling child-end function...\n", | ||
(unsigned long long)(kID)); | ||
} | ||
|
||
if (NULL != endReduceCallee) { | ||
(*endReduceCallee)(kID); | ||
} | ||
} | ||
} // end kID sample | ||
} | ||
|
||
} // namespace Sampler | ||
|
@@ -243,8 +322,10 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) { | |
extern "C" { | ||
|
||
namespace impl = KokkosTools::Sampler; | ||
|
||
EXPOSE_TOOL_PROGRAMMING_INTERFACE( | ||
impl::kokkosp_provide_tool_programming_interface) | ||
EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) | ||
|
||
EXPOSE_INIT(impl::kokkosp_init_library) | ||
EXPOSE_FINALIZE(impl::kokkosp_finalize_library) | ||
EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove all commented lines you are not using.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for that
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed