Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fence on sample only #209

Merged
merged 13 commits into from
Oct 12, 2023
2 changes: 1 addition & 1 deletion common/kokkos-sampler/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CXX = clang++
CXX = g++

CXXFLAGS = -O3 -std=c++17 -g

Expand Down
62 changes: 57 additions & 5 deletions common/kokkos-sampler/kp_sampler_skip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,47 @@ static endFunction endReduceCallee = NULL;

void kokkosp_request_tool_settings(const uint32_t,
Kokkos_Tools_ToolSettings* settings) {
if (0 == tool_globFence) {
settings->requires_global_fencing = false;
settings->requires_global_fencing = false;
}

// set of functions from Kokkos ToolProgrammingInterface (includes fence)
Kokkos::Tools::Experimental::ToolProgrammingInterface tpi_funcs;

uint32_t getDeviceID(uint32_t devid_in) {
int num_device_bits = 7;
int num_instance_bits = 17;
return (~((uint32_t(-1)) << num_device_bits)) &
(devid_in >> num_instance_bits);
}

void invoke_ktools_fence(uint32_t devID) {
if (tpi_funcs.fence != nullptr) {
tpi_funcs.fence(devID);
if (tool_verbosity > 1) {
printf(
"KokkosP: Sampler utility sucessfully invoked "
" tool-induced fence on device %d\n",
getDeviceID(devID));
}
} else {
settings->requires_global_fencing = true;
printf(
"KokkosP: FATAL: Kokkos Tools Programming Interface's tool-invoked "
"Fence is NULL!\n");
exit(-1);
}
}

void kokkosp_provide_tool_programming_interface(
uint32_t num_funcs, Kokkos_Tools_ToolProgrammingInterface* funcsFromTPI) {
if (!num_funcs) {
if (tool_verbosity > 0)
printf(
"KokkosP: Note: Number of functions in Tools Programming Interface "
"is 0!\n");
}
tpi_funcs = *funcsFromTPI;
}

void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
const uint32_t devInfoCount, void* deviceInfo) {
const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE");
Expand Down Expand Up @@ -164,6 +198,9 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID,
printf("KokkosP: sample %llu calling child-begin function...\n",
(unsigned long long)(*kID));
}
if (tool_globFence) {
invoke_ktools_fence(0);
}
if (NULL != beginForCallee) {
uint64_t nestedkID = 0;
(*beginForCallee)(name, devID, &nestedkID);
Expand All @@ -180,6 +217,9 @@ void kokkosp_end_parallel_for(const uint64_t kID) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
}
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*endForCallee)(retrievedNestedkID);
}
}
Expand All @@ -197,6 +237,9 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID,
}
if (NULL != beginScanCallee) {
uint64_t nestedkID = 0;
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*beginScanCallee)(name, devID, &nestedkID);
infokIDSample.insert({*kID, nestedkID});
}
Expand All @@ -211,6 +254,9 @@ void kokkosp_end_parallel_scan(const uint64_t kID) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
}
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*endScanCallee)(retrievedNestedkID);
}
}
Expand All @@ -226,9 +272,11 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID,
printf("KokkosP: sample %llu calling child-begin function...\n",
(unsigned long long)(*kID));
}

if (NULL != beginReduceCallee) {
uint64_t nestedkID = 0;
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*beginReduceCallee)(name, devID, &nestedkID);
infokIDSample.insert({*kID, nestedkID});
}
Expand All @@ -243,6 +291,9 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
}
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*endScanCallee)(retrievedNestedkID);
}
}
Expand All @@ -254,8 +305,9 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) {
extern "C" {

namespace impl = KokkosTools::Sampler;

EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings)
EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(
impl::kokkosp_provide_tool_programming_interface)
EXPOSE_INIT(impl::kokkosp_init_library)
EXPOSE_FINALIZE(impl::kokkosp_finalize_library)
EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for)
Expand Down
8 changes: 8 additions & 0 deletions profiling/all/kp_core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,17 @@ using Kokkos::Tools::SpaceHandle;
#define EXPOSE_PROFILE_EVENT(FUNC_NAME)
#define EXPOSE_BEGIN_FENCE(FUNC_NAME)
#define EXPOSE_END_FENCE(FUNC_NAME)
#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME)

#else

#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) \
__attribute__((weak)) void kokkosp_provide_tool_programming_interface( \
const uint32_t num_actions, \
Kokkos_Tools_ToolProgrammingInterface* ptpi) { \
FUNC_NAME(num_actions, ptpi); \
}

#define EXPOSE_TOOL_SETTINGS(FUNC_NAME) \
__attribute__((weak)) void kokkosp_request_tool_settings( \
const uint32_t num_actions, Kokkos_Tools_ToolSettings* settings) { \
Expand Down