diff --git a/common/kokkos-sampler/Makefile b/common/kokkos-sampler/Makefile index 862cae8fa..ce5f56ca1 100644 --- a/common/kokkos-sampler/Makefile +++ b/common/kokkos-sampler/Makefile @@ -1,4 +1,4 @@ -CXX = clang++ +CXX = g++ CXXFLAGS = -O3 -std=c++17 -g diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 9c53bcc5c..9131e420f 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -33,13 +33,47 @@ static endFunction endReduceCallee = NULL; void kokkosp_request_tool_settings(const uint32_t, Kokkos_Tools_ToolSettings* settings) { - if (0 == tool_globFence) { - settings->requires_global_fencing = false; + settings->requires_global_fencing = false; +} + +// set of functions from Kokkos ToolProgrammingInterface (includes fence) +Kokkos::Tools::Experimental::ToolProgrammingInterface tpi_funcs; + +uint32_t getDeviceID(uint32_t devid_in) { + int num_device_bits = 7; + int num_instance_bits = 17; + return (~((uint32_t(-1)) << num_device_bits)) & + (devid_in >> num_instance_bits); +} + +void invoke_ktools_fence(uint32_t devID) { + if (tpi_funcs.fence != nullptr) { + tpi_funcs.fence(devID); + if (tool_verbosity > 1) { + printf( + "KokkosP: Sampler utility sucessfully invoked " + " tool-induced fence on device %d\n", + getDeviceID(devID)); + } } else { - settings->requires_global_fencing = true; + printf( + "KokkosP: FATAL: Kokkos Tools Programming Interface's tool-invoked " + "Fence is NULL!\n"); + exit(-1); } } +void kokkosp_provide_tool_programming_interface( + uint32_t num_funcs, Kokkos_Tools_ToolProgrammingInterface* funcsFromTPI) { + if (!num_funcs) { + if (tool_verbosity > 0) + printf( + "KokkosP: Note: Number of functions in Tools Programming Interface " + "is 0!\n"); + } + tpi_funcs = *funcsFromTPI; +} + void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, void* deviceInfo) { const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE"); @@ -164,6 +198,9 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, printf("KokkosP: sample %llu calling child-begin function...\n", (unsigned long long)(*kID)); } + if (tool_globFence) { + invoke_ktools_fence(0); + } if (NULL != beginForCallee) { uint64_t nestedkID = 0; (*beginForCallee)(name, devID, &nestedkID); @@ -180,6 +217,9 @@ void kokkosp_end_parallel_for(const uint64_t kID) { printf("KokkosP: sample %llu calling child-end function...\n", (unsigned long long)(kID)); } + if (tool_globFence) { + invoke_ktools_fence(0); + } (*endForCallee)(retrievedNestedkID); } } @@ -197,6 +237,9 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, } if (NULL != beginScanCallee) { uint64_t nestedkID = 0; + if (tool_globFence) { + invoke_ktools_fence(0); + } (*beginScanCallee)(name, devID, &nestedkID); infokIDSample.insert({*kID, nestedkID}); } @@ -211,6 +254,9 @@ void kokkosp_end_parallel_scan(const uint64_t kID) { printf("KokkosP: sample %llu calling child-end function...\n", (unsigned long long)(kID)); } + if (tool_globFence) { + invoke_ktools_fence(0); + } (*endScanCallee)(retrievedNestedkID); } } @@ -226,9 +272,11 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, printf("KokkosP: sample %llu calling child-begin function...\n", (unsigned long long)(*kID)); } - if (NULL != beginReduceCallee) { uint64_t nestedkID = 0; + if (tool_globFence) { + invoke_ktools_fence(0); + } (*beginReduceCallee)(name, devID, &nestedkID); infokIDSample.insert({*kID, nestedkID}); } @@ -243,6 +291,9 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) { printf("KokkosP: sample %llu calling child-end function...\n", (unsigned long long)(kID)); } + if (tool_globFence) { + invoke_ktools_fence(0); + } (*endScanCallee)(retrievedNestedkID); } } @@ -254,8 +305,9 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) { extern "C" { namespace impl = KokkosTools::Sampler; - EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) +EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE( + impl::kokkosp_provide_tool_programming_interface) EXPOSE_INIT(impl::kokkosp_init_library) EXPOSE_FINALIZE(impl::kokkosp_finalize_library) EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) diff --git a/profiling/all/kp_core.hpp b/profiling/all/kp_core.hpp index 5cb5ed391..c63db1863 100644 --- a/profiling/all/kp_core.hpp +++ b/profiling/all/kp_core.hpp @@ -48,9 +48,17 @@ using Kokkos::Tools::SpaceHandle; #define EXPOSE_PROFILE_EVENT(FUNC_NAME) #define EXPOSE_BEGIN_FENCE(FUNC_NAME) #define EXPOSE_END_FENCE(FUNC_NAME) +#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) #else +#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_provide_tool_programming_interface( \ + const uint32_t num_actions, \ + Kokkos_Tools_ToolProgrammingInterface* ptpi) { \ + FUNC_NAME(num_actions, ptpi); \ + } + #define EXPOSE_TOOL_SETTINGS(FUNC_NAME) \ __attribute__((weak)) void kokkosp_request_tool_settings( \ const uint32_t num_actions, Kokkos_Tools_ToolSettings* settings) { \