From 44ada1f189c07abcecf7a7a1df32010922251828 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 23 May 2024 13:05:53 -0400 Subject: [PATCH] Sampler's begin for callee's null check before fence (less overhead in corner case) (#253) * kp_sampler_skip.cpp: put begin for callee check before fence This improves performance in the case there is no callee for the kokkosp_begin_parallel_for. This is actually done correctly in the kokkosp_begin_parallel_scan and begin_parallel_reduce. * kp_sampler_skip.cpp: apply clang-format --- common/kokkos-sampler/kp_sampler_skip.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 8df516e9e..4d8b578fb 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -200,10 +200,11 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, std::cout << "KokkosP: sample " << *kID << " calling child-begin function...\n"; } - if (tool_globFence) { - invoke_ktools_fence(0); - } + if (NULL != beginForCallee) { + if (tool_globFence) { + invoke_ktools_fence(0); + } uint64_t nestedkID = 0; (*beginForCallee)(name, devID, &nestedkID); if (tool_verbosity > 0) {