Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Programatically filter out tool-induced fences from the sampler tool utility #194

Closed
wants to merge 31 commits into from
Closed
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
6a66fff
Update kp_sampler_skip.cpp
vlkale May 5, 2023
585b7bf
formatted sampler
May 5, 2023
060c1aa
Update kp_sampler_skip.cpp
vlkale May 18, 2023
109843a
putting in sampler skip
May 18, 2023
9e76661
putting in new sampler with appropriate functions. Still need to get …
Jun 1, 2023
2e97fa6
putting in new sampler with appropriate functions. Still need to get …
Jun 1, 2023
8c5e523
putting in new sampler with appropriate functions, using profiling in…
Jun 1, 2023
2c1efac
putting in fix to sampler
vlkale Jun 1, 2023
0f4a089
merge to fix kp sampler
vlkale Jun 1, 2023
9c02dda
committing sampler formatted
vlkale Jun 1, 2023
8264f55
fixes to Kokkos_Tools_ToolProgrammingInterface invocation
vlkale Jun 8, 2023
85b4d1a
formatted fix to kp sampler
vlkale Jun 8, 2023
95c3af2
committed formatted kp_core
vlkale Jun 8, 2023
5ee1a1f
formatted kernels.hpp
vlkale Jun 8, 2023
20e5a30
Update common/kokkos-sampler/kp_sampler_skip.cpp
vlkale Jul 20, 2023
758f3f9
Removing all commented code
vlkale Jul 21, 2023
91a48f5
fix name of glob fence choice
Jul 21, 2023
a6c7bc1
applied clang format
Jul 21, 2023
eda9d83
fixing with correct device id
vlkale Jul 31, 2023
5873f0a
clang format for fix with device ID
vlkale Jul 31, 2023
874ad93
adding deep copy and fence callback in sampler
vlkale Jul 31, 2023
a76e1ac
Update kp_sampler_skip.cpp
vlkale Aug 3, 2023
fd01d94
add atomic for uniqID
vlkale Aug 3, 2023
6e47ab9
Merge branch 'kokkos:develop' into FenceOnlyOnSamplePick
vlkale Aug 5, 2023
57f9a1f
change atomic to std::atomic
vlkale Aug 7, 2023
fa43e59
committing fixed file for kp sampler, making thread safe and keeping …
vlkale Aug 7, 2023
ffc4fd8
taking out tuple initialization of pair to get rid of CI warning as e…
vlkale Aug 7, 2023
70cdf6a
making valid device number
vlkale Aug 7, 2023
cf343f5
removing valid nestedkID and valid devID for CI to pass
vlkale Aug 7, 2023
98b4caf
removing valid nestkID function to pass CI without the warnings as e…
vlkale Aug 7, 2023
43b983d
Removing unneeded checks of no kID found in kokkosp_end_*
vlkale Sep 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 109 additions & 28 deletions common/kokkos-sampler/kp_sampler_skip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,19 @@
#include <cstdlib>
#include <cstring>
#include <dlfcn.h>
// #include <impl/Kokkos_Profiling_Interface.hpp>
#include "../../profiling/all/kp_core.hpp"
#include "kp_config.hpp"

// using Kokkos::Tools::Experimental;
// using mytpi_type = Kokkos::Tools::Experimental::ToolProgrammingInterface;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove all commented lines you are not using.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for that

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

namespace KokkosTools {
namespace Sampler {
static uint64_t uniqID = 0;
static uint64_t kernelSampleSkip = 101;
static int tool_verbosity = 0;
static int tool_globFence = 0;
// mytpi_type mytpi;

typedef void (*initFunction)(const int, const uint64_t, const uint32_t, void*);
typedef void (*finalizeFunction)();
Expand All @@ -27,6 +31,45 @@ static endFunction endForCallee = NULL;
static endFunction endScanCallee = NULL;
static endFunction endReduceCallee = NULL;

void getGlobFenceChoice() {
vlkale marked this conversation as resolved.
Show resolved Hide resolved
// re-read environment variable to get most accurate value
const char* tool_globFence_str = getenv("KOKKOS_TOOLS_GLOBALFENCES");
if (NULL != tool_globFence_str) {
tool_globFence = atoi(tool_globFence_str);
} // else
// tool_globFence = 0;
}

// void kokkosp_tool_invoked_fence(const uint32_t, Kokkos_Tools_SpaceHandle*
// myspchandle, Kokkos_Tools_toolInvokedFenceFunction tool_fence)
//{
//(*tool_fence)(myspchandle, );
// }

// set of functions from Kokkos ToolProgrammingInterface (includes fence)
Kokkos::Tools::Experimental::ToolProgrammingInterface tpi_funcs;

void invoke_ktools_fence(uint32_t devID) {
// assert( tpi_funcs ! = NULL)
if (tpi_funcs.fence != nullptr) {
tpi_funcs.fence(devID);
} else
printf(
"KokkosP: FATAL: Kokkos Tools Programming Interface's tool-invoked "
"Fence is NULL!\n");
}

void kokkosp_provide_tool_programming_interface(
uint32_t num_funcs, Kokkos_Tools_ToolProgrammingInterface* funcsFromTPI) {
if (!num_funcs) {
if (tool_verbosity > 0)
printf(
"KokkosP: Note: Number of functions in Tools Programming Interface "
"is 0!\n");
}
tpi_funcs = *funcsFromTPI;
}

void kokkosp_request_tool_settings(const uint32_t,
Kokkos_Tools_ToolSettings* settings) {
if (0 == tool_globFence) {
Expand Down Expand Up @@ -82,9 +125,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
printf("KokkosP: Next library to call: %s\n", nextLibrary);
printf("KokkosP: Loading child library ..\n");
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's up with all these whitespace changes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will fix, making sure the code file is indeed getting processed through clang-format.

Copy link
Contributor Author

@vlkale vlkale Jul 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@masterleinad Does the new file committed address the issues you have with whitespace changes?

void* childLibrary = dlopen(nextLibrary, RTLD_NOW | RTLD_GLOBAL);

if (NULL == childLibrary) {
fprintf(stderr, "KokkosP: Error: Unable to load: %s (Error=%s)\n",
nextLibrary, dlerror());
Expand All @@ -103,17 +144,14 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
(endFunction)dlsym(childLibrary, "kokkosp_end_parallel_for");
endReduceCallee =
(endFunction)dlsym(childLibrary, "kokkosp_end_parallel_reduce");

initProfileLibrary =
(initFunction)dlsym(childLibrary, "kokkosp_init_library");
finalizeProfileLibrary =
(finalizeFunction)dlsym(childLibrary, "kokkosp_finalize_library");

if (NULL != initProfileLibrary) {
(*initProfileLibrary)(loadSeq + 1, interfaceVer, devInfoCount,
deviceInfo);
}

if (tool_verbosity > 0) {
printf("KokkosP: Function Status:\n");
printf("KokkosP: begin-parallel-for: %s\n",
Expand All @@ -133,9 +171,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
}

free(envBuffer);

uniqID = 1;

uniqID = 1;
const char* tool_sample = getenv("KOKKOS_TOOLS_SAMPLER_SKIP");
if (NULL != tool_sample) {
kernelSampleSkip = atoi(tool_sample) + 1;
Expand All @@ -152,9 +188,18 @@ void kokkosp_finalize_library() {

void kokkosp_begin_parallel_for(const char* name, const uint32_t devID,
uint64_t* kID) {
*kID = uniqID++;

if (((*kID) % kernelSampleSkip) == 0) {
*kID = 0;
static uint64_t invocationNum;
++invocationNum;
if ((invocationNum % kernelSampleSkip) == 0) {
getGlobFenceChoice(); // re-read environment variable to get most accurate
// value
if (0 < tool_globFence) {
invoke_ktools_fence(
0); // invoke tool-induced fence from device 0 for now
}
*kID =
1; // set kernel ID to 1 so that it is matched with the end_parallel_*
if (tool_verbosity > 0) {
printf("KokkosP: sample %llu calling child-begin function...\n",
(unsigned long long)(*kID));
Expand All @@ -167,12 +212,17 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID,
}

void kokkosp_end_parallel_for(const uint64_t kID) {
if ((kID % kernelSampleSkip) == 0) {
if (kID > 0) {
getGlobFenceChoice(); // re-read environment variable to get most accurate
// value
if (0 < tool_globFence) {
invoke_ktools_fence(
0); // invoke tool-induced fence from device 0 for now
}
if (tool_verbosity > 0) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
}

if (NULL != endForCallee) {
(*endForCallee)(kID);
}
Expand All @@ -181,60 +231,89 @@ void kokkosp_end_parallel_for(const uint64_t kID) {

void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID,
uint64_t* kID) {
*kID = uniqID++;

if (((*kID) % kernelSampleSkip) == 0) {
*kID = 0; // set memory location value of kID to 0.
static uint64_t invocationNum;
++invocationNum;
if ((invocationNum % kernelSampleSkip) == 0) {
getGlobFenceChoice(); // re-read environment variable to get most accurate
// value
if (0 < tool_globFence) {
// using tool-induced fence from Kokkos_profiling rather than
// Kokkos_C_Profiling_interface. Note that this function
// only invokes a global (device 0 invoked) fence
invoke_ktools_fence(0);
}
*kID = 1; // set kernel ID to 1 so that it is matched with the end.
if (tool_verbosity > 0) {
printf("KokkosP: sample %llu calling child-begin function...\n",
(unsigned long long)(*kID));
}

if (NULL != beginScanCallee) {
(*beginScanCallee)(name, devID, kID);
}
}
}

void kokkosp_end_parallel_scan(const uint64_t kID) {
if ((kID % kernelSampleSkip) == 0) {
if (kID > 0) {
getGlobFenceChoice(); // re-read environment variable to get most accurate
// value
if (0 < tool_globFence) {
// using tool-induced fence from Kokkos_profiling rather than
// Kokkos_C_Profiling_interface. Note that this function
// only invokes a global (device 0 invoked) fence.
invoke_ktools_fence(0);
}
if (tool_verbosity > 0) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
}

if (NULL != endScanCallee) {
(*endScanCallee)(kID);
}
}
} // end kID sample
}

void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID,
uint64_t* kID) {
*kID = uniqID++;

if (((*kID) % kernelSampleSkip) == 0) {
*kID = 0;
static uint64_t invocationNum;
++invocationNum;
if ((invocationNum % kernelSampleSkip) == 0) {
getGlobFenceChoice(); // re-read environment variable to get most accurate
// value
if (0 < tool_globFence) {
// using tool-induced fence from Kokkos_profiling rather than
// Kokkos_C_Profiling_interface. Note that this function
// only invokes a global (device 0 invoked) fence.
invoke_ktools_fence(0);
}
*kID = 1; // set kernel ID to 1 so that it is matched with the end.
if (tool_verbosity > 0) {
printf("KokkosP: sample %llu calling child-begin function...\n",
(unsigned long long)(*kID));
}

if (NULL != beginReduceCallee) {
(*beginReduceCallee)(name, devID, kID);
}
}
}

void kokkosp_end_parallel_reduce(const uint64_t kID) {
if ((kID % kernelSampleSkip) == 0) {
if (kID > 0) {
getGlobFenceChoice(); // re-read environment variable to get most accurate
// value
if (0 < tool_globFence) { // Todo: see if this is a performance bottleneck
invoke_ktools_fence(0);
}
if (tool_verbosity > 0) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
}

if (NULL != endReduceCallee) {
(*endReduceCallee)(kID);
}
}
} // end kID sample
}

} // namespace Sampler
Expand All @@ -243,8 +322,10 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) {
extern "C" {

namespace impl = KokkosTools::Sampler;

EXPOSE_TOOL_PROGRAMMING_INTERFACE(
impl::kokkosp_provide_tool_programming_interface)
EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings)

EXPOSE_INIT(impl::kokkosp_init_library)
EXPOSE_FINALIZE(impl::kokkosp_finalize_library)
EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for)
Expand Down
39 changes: 21 additions & 18 deletions example/kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,30 @@ int run_calculation(const data_type SIZE) {

Kokkos::View<data_type*> data(Kokkos::ViewAllocateWithoutInitializing("data"),
SIZE);
Kokkos::parallel_for(
"initialize()", SIZE, KOKKOS_LAMBDA(data_type i) { data(i) = i; });
Kokkos::fence();
for (int tstep = 0; tstep < 1000; tstep++) {
Kokkos::parallel_for(
"initialize()", SIZE, KOKKOS_LAMBDA(data_type i) { data(i) = i; });
Kokkos::fence();

data_type sum = 0;
Kokkos::parallel_reduce(
"accumulate()", SIZE,
KOKKOS_LAMBDA(data_type i, data_type & lsum) { lsum += 1 + data(i); },
sum);
Kokkos::fence();
data_type sum = 0;
Kokkos::parallel_reduce(
"accumulate()", SIZE,
KOKKOS_LAMBDA(data_type i, data_type & lsum) { lsum += 1 + data(i); },
sum);
Kokkos::fence();

Kokkos::Profiling::popRegion();
Kokkos::Profiling::popRegion();

// check results
const data_type check = (SIZE + 1) * SIZE / 2;
if (sum != check) {
std::cout << "BAD result, got S(" << SIZE << ") = " << sum
<< " (expected " << check << ")" << std::endl;
return 1;
}
std::cout << "Result OK: S(" << SIZE << ") = " << sum << std::endl;
} // end timestep loop

// check results
const data_type check = (SIZE + 1) * SIZE / 2;
if (sum != check) {
std::cout << "BAD result, got S(" << SIZE << ") = " << sum << " (expected "
<< check << ")" << std::endl;
return 1;
}
std::cout << "Result OK: S(" << SIZE << ") = " << sum << std::endl;
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion profiling/all/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ endif()
file(GLOB_RECURSE HEADER_FILES CONFIGURE_DEPENDS kp_all.hpp "${COMMON_HEADERS_PATH}/*.hpp")

install(FILES ${HEADER_FILES} DESTINATION ${EXPORT_INCLUDE_DIR})
install(TARGETS ${LIBNAME} EXPORT ${EXPORT_NAME})
install(TARGETS ${LIBNAME} EXPORT ${EXPORT_NAME})
7 changes: 7 additions & 0 deletions profiling/all/kp_core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ using Kokkos::Tools::SpaceHandle;
FUNC_NAME(num_actions, settings); \
}

#define EXPOSE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) \
__attribute__((weak)) void kokkosp_provide_tool_programming_interface( \
const uint32_t num_actions, \
Kokkos_Tools_ToolProgrammingInterface* tool_funcs) { \
FUNC_NAME(num_actions, tool_funcs); \
}

#define EXPOSE_INIT(FUNC_NAME) \
__attribute__((weak)) void kokkosp_init_library( \
const int loadSeq, const uint64_t interfaceVer, \
Expand Down