From e590f04a5e02311063e6c8f32e94d9c6b1f4a5e4 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Sun, 30 Jul 2023 18:35:52 -0700 Subject: [PATCH 01/24] putting in all fixes --- .../nvprof-focused-connector/CMakeLists.txt | 4 ---- .../nvtx-focused-connector/CMakeLists.txt | 4 ++++ .../Makefile | 4 ++-- .../kp_nvtx_focused_connector.cpp} | 20 +++++++++---------- .../kp_nvtx_focused_connector_domain.h} | 14 ++++++------- 5 files changed, 23 insertions(+), 23 deletions(-) delete mode 100644 profiling/nvprof-focused-connector/CMakeLists.txt create mode 100644 profiling/nvtx-focused-connector/CMakeLists.txt rename profiling/{nvprof-focused-connector => nvtx-focused-connector}/Makefile (79%) rename profiling/{nvprof-focused-connector/kp_nvprof_focused_connector.cpp => nvtx-focused-connector/kp_nvtx_focused_connector.cpp} (87%) rename profiling/{nvprof-focused-connector/kp_nvprof_focused_connector_domain.h => nvtx-focused-connector/kp_nvtx_focused_connector_domain.h} (86%) diff --git a/profiling/nvprof-focused-connector/CMakeLists.txt b/profiling/nvprof-focused-connector/CMakeLists.txt deleted file mode 100644 index 072198bf5..000000000 --- a/profiling/nvprof-focused-connector/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -find_package(CUDAToolkit REQUIRED) -kp_add_library(kp_nvprof_focused_connector kp_nvprof_focused_connector.cpp) - -target_link_libraries(kp_nvprof_focused_connector CUDA::nvToolsExt) \ No newline at end of file diff --git a/profiling/nvtx-focused-connector/CMakeLists.txt b/profiling/nvtx-focused-connector/CMakeLists.txt new file mode 100644 index 000000000..e75e93469 --- /dev/null +++ b/profiling/nvtx-focused-connector/CMakeLists.txt @@ -0,0 +1,4 @@ +find_package(CUDAToolkit REQUIRED) +kp_add_library(kp_nvtx_focused_connector kp_nvtx_focused_connector.cpp) + +target_link_libraries(kp_nvtx_focused_connector CUDA::nvToolsExt) diff --git a/profiling/nvprof-focused-connector/Makefile b/profiling/nvtx-focused-connector/Makefile similarity index 79% rename from profiling/nvprof-focused-connector/Makefile rename to profiling/nvtx-focused-connector/Makefile index 06628279d..c3becc06d 100644 --- a/profiling/nvprof-focused-connector/Makefile +++ b/profiling/nvtx-focused-connector/Makefile @@ -4,13 +4,13 @@ LDFLAGS=-L$(CUDA_ROOT)/lib64 LIBS=-lnvToolsExt -lcudart SHARED_CXXFLAGS=-shared -fPIC -all: kp_nvprof_focused_connector.so +all: kp_nvtx_focused_connector.so MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all -kp_nvprof_focused_connector.so: ${MAKEFILE_PATH}kp_nvprof_focused_connector.cpp +kp_nvprof_focused_connector.so: ${MAKEFILE_PATH}kp_nvtx_focused_connector.cpp $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) \ -o $@ ${MAKEFILE_PATH}kp_nvprof_focused_connector.cpp $(LIBS) diff --git a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp similarity index 87% rename from profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp rename to profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp index 9d971db76..ac43bec1d 100644 --- a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp +++ b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp @@ -23,15 +23,15 @@ #include #include -#include "kp_nvprof_focused_connector_domain.h" +#include "kp_nvtx_focused_connector_domain.h" #include "kp_core.hpp" namespace KokkosTools { -namespace NVProfFocusedConnector { +namespace NVTXFocusedConnector { -static KernelNVProfFocusedConnectorInfo* currentKernel; -static std::unordered_map +static KernelNVTXFocusedConnectorInfo* currentKernel; +static std::unordered_map domain_map; static uint64_t nextKernelID; @@ -41,7 +41,7 @@ void kokkosp_init_library( struct Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) { printf("-----------------------------------------------------------\n"); printf( - "KokkosP: NVProf Analyzer Focused Connector (sequence is %d, version: " + "KokkosP: NVTX Analyzer Focused Connector (sequence is %d, version: " "%llu)\n", loadSeq, (unsigned long long)(interfaceVer)); printf("-----------------------------------------------------------\n"); @@ -49,15 +49,15 @@ void kokkosp_init_library( nextKernelID = 0; } -KernelNVProfFocusedConnectorInfo* getFocusedConnectorInfo( +KernelNVTXFocusedConnectorInfo* getFocusedConnectorInfo( const char* name, KernelExecutionType kType) { std::string nameStr(name); auto kDomain = domain_map.find(nameStr); currentKernel = NULL; if (kDomain == domain_map.end()) { - currentKernel = new KernelNVProfFocusedConnectorInfo(name, kType); - domain_map.insert(std::pair( + currentKernel = new KernelNVTXFocusedConnectorInfo(name, kType); + domain_map.insert(std::pair( nameStr, currentKernel)); } else { currentKernel = kDomain->second; @@ -134,12 +134,12 @@ Kokkos::Tools::Experimental::EventSet get_event_set() { return my_event_set; } -} // namespace NVProfFocusedConnector +} // namespace NVTXFocusedConnector } // namespace KokkosTools extern "C" { -namespace impl = KokkosTools::NVProfFocusedConnector; +namespace impl = KokkosTools::NVTXFocusedConnector; EXPOSE_INIT(impl::kokkosp_init_library) EXPOSE_FINALIZE(impl::kokkosp_finalize_library) diff --git a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector_domain.h similarity index 86% rename from profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h rename to profiling/nvtx-focused-connector/kp_nvtx_focused_connector_domain.h index e668a9a3c..a50cfc71c 100644 --- a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h +++ b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector_domain.h @@ -14,8 +14,8 @@ // //@HEADER -#ifndef _H_KOKKOSP_KERNEL_NVPROF_CONNECTOR_INFO -#define _H_KOKKOSP_KERNEL_NVPROF_CONNECTOR_INFO +#ifndef KOKKOSP_KERNEL_NVTX_CONNECTOR_H +#define KOKKOSP_KERNEL_NVTX_CONNECTOR_H #include #include @@ -24,7 +24,7 @@ #include "nvToolsExt.h" namespace KokkosTools { -namespace NVProfFocusedConnector { +namespace NVTXFocusedConnector { enum KernelExecutionType { PARALLEL_FOR = 0, @@ -32,9 +32,9 @@ enum KernelExecutionType { PARALLEL_SCAN = 2 }; -class KernelNVProfFocusedConnectorInfo { +class KernelNVTXFocusedConnectorInfo { public: - KernelNVProfFocusedConnectorInfo(std::string kName, + KernelNVTXFocusedConnectorInfo(std::string kName, KernelExecutionType kernelType) { domainNameHandle = kName; char* domainName = (char*)malloc(sizeof(char*) * (32 + kName.size())); @@ -71,7 +71,7 @@ class KernelNVProfFocusedConnectorInfo { std::string getDomainNameHandle() { return domainNameHandle; } - ~KernelNVProfFocusedConnectorInfo() { nvtxDomainDestroy(domain); } + ~KernelNVTXFocusedConnectorInfo() { nvtxDomainDestroy(domain); } private: std::string domainNameHandle; @@ -81,4 +81,4 @@ class KernelNVProfFocusedConnectorInfo { #endif } -} // KokkosTools::NVProfFocusedConnector +} // KokkosTools::NVTXFocusedConnector From 4c2e2007c946106a1969b75ede06baa1737937b2 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Sun, 30 Jul 2023 18:41:56 -0700 Subject: [PATCH 02/24] putting changes for nvtx --- CMakeLists.txt | 2 +- build-all.sh | 2 +- profiling/all/CMakeLists.txt | 2 +- profiling/all/kp_all.cpp | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 69f2fb2f1..00ae9bea3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -180,7 +180,7 @@ endif() # GPU profilers if(Kokkos_ENABLE_CUDA) add_subdirectory(profiling/nvprof-connector) - add_subdirectory(profiling/nvprof-focused-connector) + add_subdirectory(profiling/nvtx-focused-connector) endif() if(Kokkos_ENABLE_HIP) add_subdirectory(profiling/roctx-connector) diff --git a/build-all.sh b/build-all.sh index 5cd77240a..6c58f3b64 100644 --- a/build-all.sh +++ b/build-all.sh @@ -10,7 +10,7 @@ make -f $ROOT_DIR/profiling/memory-hwm/Makefile make -f $ROOT_DIR/profiling/memory-hwm-mpi/Makefile make -f $ROOT_DIR/profiling/memory-usage/Makefile make -f $ROOT_DIR/profiling/nvprof-connector/Makefile -make -f $ROOT_DIR/profiling/nvprof-focused-connector/Makefile +make -f $ROOT_DIR/profiling/nvtx-focused-connector/Makefile make -f $ROOT_DIR/profiling/papi-connector/Makefile make -f $ROOT_DIR/profiling/simple-kernel-timer-json/Makefile make -f $ROOT_DIR/profiling/simple-kernel-timer/Makefile diff --git a/profiling/all/CMakeLists.txt b/profiling/all/CMakeLists.txt index ce8b13e27..786f2c2a5 100644 --- a/profiling/all/CMakeLists.txt +++ b/profiling/all/CMakeLists.txt @@ -19,4 +19,4 @@ endif() file(GLOB_RECURSE HEADER_FILES CONFIGURE_DEPENDS kp_all.hpp "${COMMON_HEADERS_PATH}/*.hpp") install(FILES ${HEADER_FILES} DESTINATION ${EXPORT_INCLUDE_DIR}) -install(TARGETS ${LIBNAME} EXPORT ${EXPORT_NAME}) \ No newline at end of file +install(TARGETS ${LIBNAME} EXPORT ${EXPORT_NAME}) diff --git a/profiling/all/kp_all.cpp b/profiling/all/kp_all.cpp index 6199cefcb..75ec3ee0b 100644 --- a/profiling/all/kp_all.cpp +++ b/profiling/all/kp_all.cpp @@ -92,8 +92,8 @@ EventSet get_event_set(const char* profiler, const char* config_str) { #endif #ifdef KOKKOSTOOLS_HAS_NVPROF handlers["nvprof-connector"] = NVProfConnector::get_event_set(); - handlers["nvprof-focused-connector"] = - NVProfFocusedConnector::get_event_set(); + handlers["nvtx-focused-connector"] = + NVTXFocusedConnector::get_event_set(); #endif auto e = handlers.find(profiler); if (e != handlers.end()) return e->second; From 909f481916db118e76254d11947f17e490d11ec6 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Sun, 30 Jul 2023 20:05:57 -0700 Subject: [PATCH 03/24] pushing all nvtx focused connector --- .../nvtx-focused-connector/kp_nvtx_focused_connector_domain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiling/nvtx-focused-connector/kp_nvtx_focused_connector_domain.h b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector_domain.h index a50cfc71c..d38f60538 100644 --- a/profiling/nvtx-focused-connector/kp_nvtx_focused_connector_domain.h +++ b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector_domain.h @@ -35,7 +35,7 @@ enum KernelExecutionType { class KernelNVTXFocusedConnectorInfo { public: KernelNVTXFocusedConnectorInfo(std::string kName, - KernelExecutionType kernelType) { + KernelExecutionType kernelType) { domainNameHandle = kName; char* domainName = (char*)malloc(sizeof(char*) * (32 + kName.size())); From 603b6e491b3d02ea7feb5f86e2d9f7eb52c4ebbc Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Sun, 30 Jul 2023 20:14:28 -0700 Subject: [PATCH 04/24] formatted kp_all --- profiling/all/kp_all.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/profiling/all/kp_all.cpp b/profiling/all/kp_all.cpp index 75ec3ee0b..7f9a22d46 100644 --- a/profiling/all/kp_all.cpp +++ b/profiling/all/kp_all.cpp @@ -91,9 +91,8 @@ EventSet get_event_set(const char* profiler, const char* config_str) { handlers["caliper"] = cali::get_kokkos_event_set(config_str); #endif #ifdef KOKKOSTOOLS_HAS_NVPROF - handlers["nvprof-connector"] = NVProfConnector::get_event_set(); - handlers["nvtx-focused-connector"] = - NVTXFocusedConnector::get_event_set(); + handlers["nvprof-connector"] = NVProfConnector::get_event_set(); + handlers["nvtx-focused-connector"] = NVTXFocusedConnector::get_event_set(); #endif auto e = handlers.find(profiler); if (e != handlers.end()) return e->second; From b203e59f9154423af3eec938ac4d9041837147bf Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Mon, 31 Jul 2023 10:10:46 -0700 Subject: [PATCH 05/24] fixing kp all --- profiling/all/kp_all.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiling/all/kp_all.cpp b/profiling/all/kp_all.cpp index 7f9a22d46..f839245c9 100644 --- a/profiling/all/kp_all.cpp +++ b/profiling/all/kp_all.cpp @@ -50,7 +50,7 @@ KOKKOSTOOLS_EXTERN_EVENT_SET(VariorumConnector) #endif #ifdef KOKKOSTOOLS_HAS_NVPROF KOKKOSTOOLS_EXTERN_EVENT_SET(NVProfConnector) -KOKKOSTOOLS_EXTERN_EVENT_SET(NVProfFocusedConnector) +KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXFocusedConnector) #endif #ifdef KOKKOSTOOLS_HAS_CALIPER namespace cali { From d2a63a144453cce3de04c377b9ce2ae1a9c1967a Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 3 Aug 2023 11:13:50 -0700 Subject: [PATCH 06/24] Using kval hashmap (draft) for nestedID --- common/kokkos-sampler/kp_sampler_skip.cpp | 24 +++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 773753f8b..87bfb4cf8 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -5,10 +5,11 @@ #include #include "../../profiling/all/kp_core.hpp" #include "kp_config.hpp" +#include namespace KokkosTools { namespace Sampler { -static uint64_t uniqID = 0; +static atomic uniqID = 0; static uint64_t kernelSampleSkip = 101; static int tool_verbosity = 0; static int tool_globFence = 0; @@ -160,8 +161,10 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, (unsigned long long)(*kID)); } - if (NULL != beginForCallee) { - (*beginForCallee)(name, devID, kID); + if (NULL != beginForCallee) { + uint64_t nestedID; + (*beginForCallee)(name, devID, nestedID); + // map.insert(kID, nestedID); } } } @@ -174,7 +177,10 @@ void kokkosp_end_parallel_for(const uint64_t kID) { } if (NULL != endForCallee) { - (*endForCallee)(kID); + (*endForCallee)(kID); + // (*endForCallee)(map.find(kID)); + + // map.clear(nestedID); } } } @@ -182,7 +188,6 @@ void kokkosp_end_parallel_for(const uint64_t kID) { void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { *kID = uniqID++; - if (((*kID) % kernelSampleSkip) == 0) { if (tool_verbosity > 0) { printf("KokkosP: sample %llu calling child-begin function...\n", @@ -190,7 +195,9 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, } if (NULL != beginScanCallee) { + uint64_t nestedID = 0; (*beginScanCallee)(name, devID, kID); + // map.insert(kID, nestedID); } } } @@ -204,6 +211,8 @@ void kokkosp_end_parallel_scan(const uint64_t kID) { if (NULL != endScanCallee) { (*endScanCallee)(kID); + // (*endForCallee)(map.find(kID)); + // map.clear(nestedID); } } } @@ -219,7 +228,8 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, } if (NULL != beginReduceCallee) { - (*beginReduceCallee)(name, devID, kID); + (*beginReduceCallee)(name, devID, kID); + } } } @@ -233,6 +243,8 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) { if (NULL != endReduceCallee) { (*endReduceCallee)(kID); + // (*endForCallee)(map.find(kID)); + // map.clear(nestedID); } } } From 3becbe2d34236444fa3e3b0e44a089380a99ab56 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Sat, 5 Aug 2023 08:17:27 -0700 Subject: [PATCH 07/24] take out erroneous changes - uniqID --- common/kokkos-sampler/kp_sampler_skip.cpp | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 87bfb4cf8..c611db0c9 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -5,11 +5,10 @@ #include #include "../../profiling/all/kp_core.hpp" #include "kp_config.hpp" -#include namespace KokkosTools { namespace Sampler { -static atomic uniqID = 0; +static uint64_t uniqID = 0; static uint64_t kernelSampleSkip = 101; static int tool_verbosity = 0; static int tool_globFence = 0; @@ -153,7 +152,6 @@ void kokkosp_finalize_library() { void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { - *kID = uniqID++; if (((*kID) % kernelSampleSkip) == 0) { if (tool_verbosity > 0) { @@ -178,16 +176,12 @@ void kokkosp_end_parallel_for(const uint64_t kID) { if (NULL != endForCallee) { (*endForCallee)(kID); - // (*endForCallee)(map.find(kID)); - - // map.clear(nestedID); } } } void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { - *kID = uniqID++; if (((*kID) % kernelSampleSkip) == 0) { if (tool_verbosity > 0) { printf("KokkosP: sample %llu calling child-begin function...\n", @@ -197,7 +191,6 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, if (NULL != beginScanCallee) { uint64_t nestedID = 0; (*beginScanCallee)(name, devID, kID); - // map.insert(kID, nestedID); } } } @@ -211,8 +204,6 @@ void kokkosp_end_parallel_scan(const uint64_t kID) { if (NULL != endScanCallee) { (*endScanCallee)(kID); - // (*endForCallee)(map.find(kID)); - // map.clear(nestedID); } } } @@ -243,8 +234,6 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) { if (NULL != endReduceCallee) { (*endReduceCallee)(kID); - // (*endForCallee)(map.find(kID)); - // map.clear(nestedID); } } } From 5066cede88f5591c4e2a64ee046eefaf00c765ba Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Sat, 5 Aug 2023 12:41:41 -0700 Subject: [PATCH 08/24] Rollback erroneous change to kp sampler mistakenly made this change in this PR but it is part of PR #194 --- common/kokkos-sampler/kp_sampler_skip.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index c611db0c9..d063c98c4 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -133,14 +133,11 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, } free(envBuffer); - uniqID = 1; - const char* tool_sample = getenv("KOKKOS_TOOLS_SAMPLER_SKIP"); if (NULL != tool_sample) { kernelSampleSkip = atoi(tool_sample) + 1; } - if (tool_verbosity > 0) { printf("KokkosP: Sampling rate set to: %s\n", tool_sample); } @@ -152,17 +149,13 @@ void kokkosp_finalize_library() { void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { - if (((*kID) % kernelSampleSkip) == 0) { if (tool_verbosity > 0) { printf("KokkosP: sample %llu calling child-begin function...\n", (unsigned long long)(*kID)); } - - if (NULL != beginForCallee) { - uint64_t nestedID; - (*beginForCallee)(name, devID, nestedID); - // map.insert(kID, nestedID); + if (NULL != beginForCallee) { + (*beginForCallee)(name, devID, kID); } } } From c4f64b5a889ca9cdc471a0bbed6c188cf20c7d0a Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Sat, 5 Aug 2023 14:15:08 -0700 Subject: [PATCH 09/24] formatting to nvtx focused connector --- profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp index ac43bec1d..9b81c93c1 100644 --- a/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp +++ b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp @@ -79,7 +79,8 @@ void focusedConnectorExecuteEnd() { void kokkosp_finalize_library() { printf("-----------------------------------------------------------\n"); - printf("KokkosP: Finalization of NVProf Connector. Complete.\n"); + printf( + "KokkosP: Finalization of NVTX Analyzer Focused Connector. Complete.\n"); printf("-----------------------------------------------------------\n"); } From 3c0a8a9107729372e8423a2c9a90cb417d041d05 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Sat, 5 Aug 2023 14:26:40 -0700 Subject: [PATCH 10/24] formatting develop version of sampler that was pasted from develop --- common/kokkos-sampler/kp_sampler_skip.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index d063c98c4..ffcd65845 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -8,7 +8,7 @@ namespace KokkosTools { namespace Sampler { -static uint64_t uniqID = 0; +static uint64_t uniqID = 0; static uint64_t kernelSampleSkip = 101; static int tool_verbosity = 0; static int tool_globFence = 0; @@ -133,7 +133,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, } free(envBuffer); - uniqID = 1; + uniqID = 1; const char* tool_sample = getenv("KOKKOS_TOOLS_SAMPLER_SKIP"); if (NULL != tool_sample) { kernelSampleSkip = atoi(tool_sample) + 1; @@ -168,7 +168,7 @@ void kokkosp_end_parallel_for(const uint64_t kID) { } if (NULL != endForCallee) { - (*endForCallee)(kID); + (*endForCallee)(kID); } } } @@ -212,8 +212,7 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, } if (NULL != beginReduceCallee) { - (*beginReduceCallee)(name, devID, kID); - + (*beginReduceCallee)(name, devID, kID); } } } From 50c1b1b0481c75b829f27631a21375c6bbf7eeac Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Sat, 5 Aug 2023 14:29:25 -0700 Subject: [PATCH 11/24] removing nestedID added by mistake --- common/kokkos-sampler/kp_sampler_skip.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index ffcd65845..86b0f6a63 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -182,7 +182,6 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, } if (NULL != beginScanCallee) { - uint64_t nestedID = 0; (*beginScanCallee)(name, devID, kID); } } From a2258af850cf3508d3f3329b6a60bfbd53747ea2 Mon Sep 17 00:00:00 2001 From: Vivek Kale Date: Wed, 16 Aug 2023 17:40:43 -0700 Subject: [PATCH 12/24] fix Makefile to have nvtx --- profiling/nvtx-focused-connector/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/profiling/nvtx-focused-connector/Makefile b/profiling/nvtx-focused-connector/Makefile index c3becc06d..cd44ad472 100644 --- a/profiling/nvtx-focused-connector/Makefile +++ b/profiling/nvtx-focused-connector/Makefile @@ -1,7 +1,7 @@ CXX=g++ CXXFLAGS=-O3 -std=c++11 -g -I$(CUDA_ROOT)/include -I./ LDFLAGS=-L$(CUDA_ROOT)/lib64 -LIBS=-lnvToolsExt -lcudart +#LIBS=-lnvToolsExt -lcudart SHARED_CXXFLAGS=-shared -fPIC all: kp_nvtx_focused_connector.so @@ -10,9 +10,9 @@ MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all -kp_nvprof_focused_connector.so: ${MAKEFILE_PATH}kp_nvtx_focused_connector.cpp +kp_nvtx_focused_connector.so: ${MAKEFILE_PATH}kp_nvtx_focused_connector.cpp $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) \ - -o $@ ${MAKEFILE_PATH}kp_nvprof_focused_connector.cpp $(LIBS) + -o $@ ${MAKEFILE_PATH}kp_nvtx_focused_connector.cpp $(#LIBS) clean: rm *.so From 8c72c1ac9e5fbed0dc58a5c5fd12ff2c7b69042a Mon Sep 17 00:00:00 2001 From: Vivek Kale Date: Wed, 16 Aug 2023 17:41:16 -0700 Subject: [PATCH 13/24] updates to kp nvtx focused connector --- .../kp_nvtx_focused_connector.cpp | 39 +++++++++++++++---- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp index 9b81c93c1..eebb8a061 100644 --- a/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp +++ b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp @@ -27,9 +27,23 @@ #include "kp_core.hpp" +static int tool_globfences; namespace KokkosTools { namespace NVTXFocusedConnector { +void kokkosp_request_tool_settings(const uint32_t, + Kokkos_Tools_ToolSettings* settings) { + settings->requires_global_fencing = true; + if (tool_globfences == 1) { + settings->requires_global_fencing = true; + } + else { + settings->requires_global_fencing = false; + } + // leave the door open for other non-zero values of tools +} // end request tool settings + + static KernelNVTXFocusedConnectorInfo* currentKernel; static std::unordered_map domain_map; @@ -45,9 +59,16 @@ void kokkosp_init_library( "%llu)\n", loadSeq, (unsigned long long)(interfaceVer)); printf("-----------------------------------------------------------\n"); - + const char* tool_global_fences = getenv("KOKKOS_TOOLS_GLOBALFENCES"); + if (NULL != tool_global_fences) { + tool_globfences = atoi(tool_global_fences); + } else { + tool_globfences = 1; // default to 1 to be conservative for capturing state by tool + } + nvtxNameOsThread(pthread_self(), "Application Main Thread"); + nvtxMarkA("Kokkos::Initialization Complete"); nextKernelID = 0; -} +} // end kokkosp_init_library KernelNVTXFocusedConnectorInfo* getFocusedConnectorInfo( const char* name, KernelExecutionType kType) { @@ -64,25 +85,25 @@ KernelNVTXFocusedConnectorInfo* getFocusedConnectorInfo( } return currentKernel; -} +} // end getFocusedConnectorInfo void focusedConnectorExecuteStart() { cudaProfilerStart(); currentKernel->startRange(); -} +} // end focusedConnectorExecuteStart void focusedConnectorExecuteEnd() { currentKernel->endRange(); cudaProfilerStop(); currentKernel = NULL; -} +} // end focusedConnectorExecuteEnd void kokkosp_finalize_library() { printf("-----------------------------------------------------------\n"); printf( "KokkosP: Finalization of NVTX Analyzer Focused Connector. Complete.\n"); printf("-----------------------------------------------------------\n"); -} +} // end kokkosp_finalize_library void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/, uint64_t* kID) { @@ -90,7 +111,7 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/, currentKernel = getFocusedConnectorInfo(name, PARALLEL_FOR); focusedConnectorExecuteStart(); -} +} void kokkosp_end_parallel_for(const uint64_t /*kID*/) { focusedConnectorExecuteEnd(); @@ -124,6 +145,7 @@ Kokkos::Tools::Experimental::EventSet get_event_set() { Kokkos::Tools::Experimental::EventSet my_event_set; memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.request_tool_settings = kokkosp_request_tool_settings; my_event_set.init = kokkosp_init_library; my_event_set.finalize = kokkosp_finalize_library; my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; @@ -133,7 +155,7 @@ Kokkos::Tools::Experimental::EventSet get_event_set() { my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; return my_event_set; -} +} // end get_event_set } // namespace NVTXFocusedConnector } // namespace KokkosTools @@ -142,6 +164,7 @@ extern "C" { namespace impl = KokkosTools::NVTXFocusedConnector; +EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) EXPOSE_INIT(impl::kokkosp_init_library) EXPOSE_FINALIZE(impl::kokkosp_finalize_library) EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) From 9bb7747b916c8d1d02b0bb39d248f6599fea12c8 Mon Sep 17 00:00:00 2001 From: Vivek Kale Date: Wed, 16 Aug 2023 17:52:16 -0700 Subject: [PATCH 14/24] change NVPROF to NVTX --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 00ae9bea3..0178ed0b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,7 +95,7 @@ endif() include(cmake/configure_variorum.cmake) set(KOKKOSTOOLS_HAS_CALIPER ${KokkosTools_ENABLE_CALIPER}) -set(KOKKOSTOOLS_HAS_NVPROF ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvprof should be available +set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available if(DEFINED ENV{VTUNE_HOME}) set(VTune_ROOT $ENV{VTUNE_HOME}) From ff83acb240079cd2d8e24de6fc3eeea4e0c40e67 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Wed, 16 Aug 2023 18:05:18 -0700 Subject: [PATCH 15/24] applying clang format --- .../kp_nvtx_focused_connector.cpp | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp index eebb8a061..415837158 100644 --- a/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp +++ b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp @@ -27,7 +27,7 @@ #include "kp_core.hpp" -static int tool_globfences; +static int tool_globfences; namespace KokkosTools { namespace NVTXFocusedConnector { @@ -35,14 +35,12 @@ void kokkosp_request_tool_settings(const uint32_t, Kokkos_Tools_ToolSettings* settings) { settings->requires_global_fencing = true; if (tool_globfences == 1) { - settings->requires_global_fencing = true; + settings->requires_global_fencing = true; + } else { + settings->requires_global_fencing = false; } - else { - settings->requires_global_fencing = false; - } - // leave the door open for other non-zero values of tools -} // end request tool settings - + // leave the door open for other non-zero values of tools +} // end request tool settings static KernelNVTXFocusedConnectorInfo* currentKernel; static std::unordered_map @@ -61,14 +59,15 @@ void kokkosp_init_library( printf("-----------------------------------------------------------\n"); const char* tool_global_fences = getenv("KOKKOS_TOOLS_GLOBALFENCES"); if (NULL != tool_global_fences) { - tool_globfences = atoi(tool_global_fences); + tool_globfences = atoi(tool_global_fences); } else { - tool_globfences = 1; // default to 1 to be conservative for capturing state by tool + tool_globfences = + 1; // default to 1 to be conservative for capturing state by tool } - nvtxNameOsThread(pthread_self(), "Application Main Thread"); + nvtxNameOsThread(pthread_self(), "Application Main Thread"); nvtxMarkA("Kokkos::Initialization Complete"); nextKernelID = 0; -} // end kokkosp_init_library +} // end kokkosp_init_library KernelNVTXFocusedConnectorInfo* getFocusedConnectorInfo( const char* name, KernelExecutionType kType) { @@ -85,25 +84,25 @@ KernelNVTXFocusedConnectorInfo* getFocusedConnectorInfo( } return currentKernel; -} // end getFocusedConnectorInfo +} // end getFocusedConnectorInfo void focusedConnectorExecuteStart() { cudaProfilerStart(); currentKernel->startRange(); -} // end focusedConnectorExecuteStart +} // end focusedConnectorExecuteStart void focusedConnectorExecuteEnd() { currentKernel->endRange(); cudaProfilerStop(); currentKernel = NULL; -} // end focusedConnectorExecuteEnd +} // end focusedConnectorExecuteEnd void kokkosp_finalize_library() { printf("-----------------------------------------------------------\n"); printf( "KokkosP: Finalization of NVTX Analyzer Focused Connector. Complete.\n"); printf("-----------------------------------------------------------\n"); -} // end kokkosp_finalize_library +} // end kokkosp_finalize_library void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/, uint64_t* kID) { @@ -111,7 +110,7 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/, currentKernel = getFocusedConnectorInfo(name, PARALLEL_FOR); focusedConnectorExecuteStart(); -} +} void kokkosp_end_parallel_for(const uint64_t /*kID*/) { focusedConnectorExecuteEnd(); @@ -155,7 +154,7 @@ Kokkos::Tools::Experimental::EventSet get_event_set() { my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; return my_event_set; -} // end get_event_set +} // end get_event_set } // namespace NVTXFocusedConnector } // namespace KokkosTools From 916b8bd0e7fcde71d39942746eac9e46cc1bfe44 Mon Sep 17 00:00:00 2001 From: Vivek Kale Date: Wed, 16 Aug 2023 19:07:38 -0700 Subject: [PATCH 16/24] adding Makefile --- profiling/nvtx-focused-connector/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiling/nvtx-focused-connector/Makefile b/profiling/nvtx-focused-connector/Makefile index cd44ad472..8ce843bb8 100644 --- a/profiling/nvtx-focused-connector/Makefile +++ b/profiling/nvtx-focused-connector/Makefile @@ -8,7 +8,7 @@ all: kp_nvtx_focused_connector.so MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}../../common/makefile-only -I${MAKEFILE_PATH}../all kp_nvtx_focused_connector.so: ${MAKEFILE_PATH}kp_nvtx_focused_connector.cpp $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) \ From bab216dce4637c6e837c7fdebfff3601d8e7900d Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 17 Aug 2023 15:58:08 -0700 Subject: [PATCH 17/24] Update Makefile Uncomment -lnvToolsExt --- profiling/nvtx-focused-connector/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/profiling/nvtx-focused-connector/Makefile b/profiling/nvtx-focused-connector/Makefile index 8ce843bb8..5c33818ae 100644 --- a/profiling/nvtx-focused-connector/Makefile +++ b/profiling/nvtx-focused-connector/Makefile @@ -1,7 +1,7 @@ CXX=g++ CXXFLAGS=-O3 -std=c++11 -g -I$(CUDA_ROOT)/include -I./ LDFLAGS=-L$(CUDA_ROOT)/lib64 -#LIBS=-lnvToolsExt -lcudart +LIBS=-lnvToolsExt -lcudart SHARED_CXXFLAGS=-shared -fPIC all: kp_nvtx_focused_connector.so @@ -12,7 +12,7 @@ CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}../../common/makefile-only -I${MA kp_nvtx_focused_connector.so: ${MAKEFILE_PATH}kp_nvtx_focused_connector.cpp $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) \ - -o $@ ${MAKEFILE_PATH}kp_nvtx_focused_connector.cpp $(#LIBS) + -o $@ ${MAKEFILE_PATH}kp_nvtx_focused_connector.cpp $(LIBS) clean: rm *.so From 62f291836e1c7accbf1f801972f4fc35746e5f04 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 17 Aug 2023 16:01:28 -0700 Subject: [PATCH 18/24] add_library --> kp_add_library Changing add_library to kp_add_library for kernel_filter --- common/kernel-filter/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/kernel-filter/CMakeLists.txt b/common/kernel-filter/CMakeLists.txt index ae5cad488..efe6d7c6b 100644 --- a/common/kernel-filter/CMakeLists.txt +++ b/common/kernel-filter/CMakeLists.txt @@ -1 +1 @@ -add_library(kp_kernel_filter ${KOKKOSTOOLS_LIBRARY_MODE} kp_kernel_filter.cpp) \ No newline at end of file +kp_add_library(kp_kernel_filter ${KOKKOSTOOLS_LIBRARY_MODE} kp_kernel_filter.cpp) From 14d391b30bfe5b30cc83a730c7624633e433aee5 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 17 Aug 2023 18:04:42 -0700 Subject: [PATCH 19/24] Allow deprecated kp_kernel_filter Fix kp_kernel_filter to allow KOKKOS_TOOLS_LIBS environment variable --- common/kernel-filter/kp_kernel_filter.cpp | 24 ++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/common/kernel-filter/kp_kernel_filter.cpp b/common/kernel-filter/kp_kernel_filter.cpp index 30efeb36a..7b39aec8a 100644 --- a/common/kernel-filter/kp_kernel_filter.cpp +++ b/common/kernel-filter/kp_kernel_filter.cpp @@ -116,7 +116,6 @@ extern "C" void kokkosp_init_library(const int loadSeq, std::regex nextRegEx(lineBuffer, std::regex::optimize); kernelNames.push_back(nextRegEx); } - free(lineBuffer); } @@ -124,9 +123,21 @@ extern "C" void kokkosp_init_library(const int loadSeq, printf("KokkosP: Kernel Filtering is %s\n", (filterKernels ? "enabled" : "disabled")); + + if (filterKernels) { + char* profileLibrary = getenv("KOKKOS_TOOLS_LIBS"); + // check deprecated environment variable. + if (NULL == profileLibrary) { + printf( + "Checking KOKKOS_PROFILE_LIBRARY. WARNING: This is a deprecated " + "variable. Please use KOKKOS_TOOLS_LIBS.\n"); + profileLibrary = getenv("KOKKOS_PROFILE_LIBRARY"); + + if (NULL == profileLibrary) { + printf("KokkosP: No library to call in %s\n", profileLibrary); + exit(-1); + } - if (filterKernels) { - char* profileLibrary = getenv("KOKKOS_PROFILE_LIBRARY"); char* envBuffer = (char*)malloc(sizeof(char) * (strlen(profileLibrary) + 1)); strcpy(envBuffer, profileLibrary); @@ -136,7 +147,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, for (int i = 0; i < loadSeq; i++) { nextLibrary = strtok(NULL, ";"); } - + nextLibrary = strtok(NULL, ";"); if (NULL == nextLibrary) { @@ -157,19 +168,18 @@ extern "C" void kokkosp_init_library(const int loadSeq, (beginFunction)dlsym(childLibrary, "kokkosp_begin_parallel_scan"); beginReduceCallee = (beginFunction)dlsym( childLibrary, "kokkosp_begin_parallel_reduce"); - + endScanCallee = (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_scan"); endForCallee = (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_for"); endReduceCallee = (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_reduce"); - initProfileLibrary = (initFunction)dlsym(childLibrary, "kokkosp_init_library"); finalizeProfileLibrary = (finalizeFunction)dlsym(childLibrary, "kokkosp_finalize_library"); - + if (NULL != initProfileLibrary) { (*initProfileLibrary)(loadSeq + 1, interfaceVer, devInfoCount, deviceInfo); From f63e8cc3df4d450e4b7d31a7f44ad2c2a99fafff Mon Sep 17 00:00:00 2001 From: Vivek Kale Date: Thu, 17 Aug 2023 17:50:45 -0700 Subject: [PATCH 20/24] gix kp kernel with Kokkos Tools libs --- common/kernel-filter/kp_kernel_filter.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/common/kernel-filter/kp_kernel_filter.cpp b/common/kernel-filter/kp_kernel_filter.cpp index 7b39aec8a..747a7f1b0 100644 --- a/common/kernel-filter/kp_kernel_filter.cpp +++ b/common/kernel-filter/kp_kernel_filter.cpp @@ -101,18 +101,14 @@ extern "C" void kokkosp_init_library(const int loadSeq, printf("============================================================\n"); printf("KokkosP: Filter File: %s\n", kernelFilterPath); printf("============================================================\n"); - FILE* kernelFilterFile = fopen(kernelFilterPath, "rt"); - if (NULL == kernelFilterFile) { fprintf(stderr, "Unable to open kernel filter: %s\n", kernelFilterPath); exit(-1); } else { char* lineBuffer = (char*)malloc(sizeof(char) * 65536); - while (kokkospReadLine(kernelFilterFile, lineBuffer)) { printf("KokkosP: Filter [%s]\n", lineBuffer); - std::regex nextRegEx(lineBuffer, std::regex::optimize); kernelNames.push_back(nextRegEx); } @@ -137,7 +133,7 @@ extern "C" void kokkosp_init_library(const int loadSeq, printf("KokkosP: No library to call in %s\n", profileLibrary); exit(-1); } - + char* envBuffer = (char*)malloc(sizeof(char) * (strlen(profileLibrary) + 1)); strcpy(envBuffer, profileLibrary); @@ -168,7 +164,6 @@ extern "C" void kokkosp_init_library(const int loadSeq, (beginFunction)dlsym(childLibrary, "kokkosp_begin_parallel_scan"); beginReduceCallee = (beginFunction)dlsym( childLibrary, "kokkosp_begin_parallel_reduce"); - endScanCallee = (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_scan"); endForCallee = @@ -185,14 +180,15 @@ extern "C" void kokkosp_init_library(const int loadSeq, deviceInfo); } } - } - free(envBuffer); } - + } + } printf("============================================================\n"); } -} + +} // end kokkosp_init_library + extern "C" void kokkosp_finalize_library() { if (NULL != finalizeProfileLibrary) { From c4f924edd5434b874df9dded78de91d808e0878e Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 17 Aug 2023 18:36:05 -0700 Subject: [PATCH 21/24] applying clang format --- common/kernel-filter/kp_kernel_filter.cpp | 113 +++++++++++----------- 1 file changed, 56 insertions(+), 57 deletions(-) diff --git a/common/kernel-filter/kp_kernel_filter.cpp b/common/kernel-filter/kp_kernel_filter.cpp index 747a7f1b0..341682348 100644 --- a/common/kernel-filter/kp_kernel_filter.cpp +++ b/common/kernel-filter/kp_kernel_filter.cpp @@ -119,76 +119,75 @@ extern "C" void kokkosp_init_library(const int loadSeq, printf("KokkosP: Kernel Filtering is %s\n", (filterKernels ? "enabled" : "disabled")); - - if (filterKernels) { + + if (filterKernels) { char* profileLibrary = getenv("KOKKOS_TOOLS_LIBS"); // check deprecated environment variable. if (NULL == profileLibrary) { - printf( - "Checking KOKKOS_PROFILE_LIBRARY. WARNING: This is a deprecated " - "variable. Please use KOKKOS_TOOLS_LIBS.\n"); - profileLibrary = getenv("KOKKOS_PROFILE_LIBRARY"); - - if (NULL == profileLibrary) { - printf("KokkosP: No library to call in %s\n", profileLibrary); - exit(-1); - } - - char* envBuffer = - (char*)malloc(sizeof(char) * (strlen(profileLibrary) + 1)); - strcpy(envBuffer, profileLibrary); - - char* nextLibrary = strtok(envBuffer, ";"); - - for (int i = 0; i < loadSeq; i++) { - nextLibrary = strtok(NULL, ";"); - } - - nextLibrary = strtok(NULL, ";"); + printf( + "Checking KOKKOS_PROFILE_LIBRARY. WARNING: This is a deprecated " + "variable. Please use KOKKOS_TOOLS_LIBS.\n"); + profileLibrary = getenv("KOKKOS_PROFILE_LIBRARY"); + + if (NULL == profileLibrary) { + printf("KokkosP: No library to call in %s\n", profileLibrary); + exit(-1); + } - if (NULL == nextLibrary) { - printf("KokkosP: No child library to call in %s\n", profileLibrary); - } else { - printf("KokkosP: Next library to call: %s\n", nextLibrary); - printf("KokkosP: Loading child library ..\n"); + char* envBuffer = + (char*)malloc(sizeof(char) * (strlen(profileLibrary) + 1)); + strcpy(envBuffer, profileLibrary); - void* childLibrary = dlopen(nextLibrary, RTLD_NOW | RTLD_GLOBAL); + char* nextLibrary = strtok(envBuffer, ";"); - if (NULL == childLibrary) { - fprintf(stderr, "KokkosP: Error: Unable to load: %s (Error=%s)\n", - nextLibrary, dlerror()); + for (int i = 0; i < loadSeq; i++) { + nextLibrary = strtok(NULL, ";"); + } + + nextLibrary = strtok(NULL, ";"); + + if (NULL == nextLibrary) { + printf("KokkosP: No child library to call in %s\n", profileLibrary); } else { - beginForCallee = - (beginFunction)dlsym(childLibrary, "kokkosp_begin_parallel_for"); - beginScanCallee = - (beginFunction)dlsym(childLibrary, "kokkosp_begin_parallel_scan"); - beginReduceCallee = (beginFunction)dlsym( - childLibrary, "kokkosp_begin_parallel_reduce"); - endScanCallee = - (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_scan"); - endForCallee = - (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_for"); - endReduceCallee = - (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_reduce"); - initProfileLibrary = - (initFunction)dlsym(childLibrary, "kokkosp_init_library"); - finalizeProfileLibrary = - (finalizeFunction)dlsym(childLibrary, "kokkosp_finalize_library"); - - if (NULL != initProfileLibrary) { - (*initProfileLibrary)(loadSeq + 1, interfaceVer, devInfoCount, - deviceInfo); + printf("KokkosP: Next library to call: %s\n", nextLibrary); + printf("KokkosP: Loading child library ..\n"); + + void* childLibrary = dlopen(nextLibrary, RTLD_NOW | RTLD_GLOBAL); + + if (NULL == childLibrary) { + fprintf(stderr, "KokkosP: Error: Unable to load: %s (Error=%s)\n", + nextLibrary, dlerror()); + } else { + beginForCallee = (beginFunction)dlsym(childLibrary, + "kokkosp_begin_parallel_for"); + beginScanCallee = (beginFunction)dlsym( + childLibrary, "kokkosp_begin_parallel_scan"); + beginReduceCallee = (beginFunction)dlsym( + childLibrary, "kokkosp_begin_parallel_reduce"); + endScanCallee = + (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_scan"); + endForCallee = + (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_for"); + endReduceCallee = + (endFunction)dlsym(childLibrary, "kokkosp_end_parallel_reduce"); + initProfileLibrary = + (initFunction)dlsym(childLibrary, "kokkosp_init_library"); + finalizeProfileLibrary = (finalizeFunction)dlsym( + childLibrary, "kokkosp_finalize_library"); + + if (NULL != initProfileLibrary) { + (*initProfileLibrary)(loadSeq + 1, interfaceVer, devInfoCount, + deviceInfo); + } } + free(envBuffer); } - free(envBuffer); + } } - } - } printf("============================================================\n"); } -} // end kokkosp_init_library - +} // end kokkosp_init_library extern "C" void kokkosp_finalize_library() { if (NULL != finalizeProfileLibrary) { From 086f2ebc85d6fc5233365ae613584c9757ff5ced Mon Sep 17 00:00:00 2001 From: Vivek Kale Date: Thu, 24 Aug 2023 09:04:32 -0700 Subject: [PATCH 22/24] don't include -lnvToolsExt and -lcudart --- profiling/nvtx-focused-connector/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiling/nvtx-focused-connector/Makefile b/profiling/nvtx-focused-connector/Makefile index 5c33818ae..8f2b36946 100644 --- a/profiling/nvtx-focused-connector/Makefile +++ b/profiling/nvtx-focused-connector/Makefile @@ -1,7 +1,7 @@ CXX=g++ CXXFLAGS=-O3 -std=c++11 -g -I$(CUDA_ROOT)/include -I./ LDFLAGS=-L$(CUDA_ROOT)/lib64 -LIBS=-lnvToolsExt -lcudart +LIBS= SHARED_CXXFLAGS=-shared -fPIC all: kp_nvtx_focused_connector.so From 544522f6c9eba1a3c548383abbe7bc1f278c5b2b Mon Sep 17 00:00:00 2001 From: Vivek Kale Date: Thu, 24 Aug 2023 09:05:57 -0700 Subject: [PATCH 23/24] change tools_globfence variable from int to bool --- .../kp_nvtx_focused_connector.cpp | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp index 415837158..749040b38 100644 --- a/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp +++ b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp @@ -27,19 +27,18 @@ #include "kp_core.hpp" -static int tool_globfences; +static bool tool_globfences; namespace KokkosTools { namespace NVTXFocusedConnector { void kokkosp_request_tool_settings(const uint32_t, Kokkos_Tools_ToolSettings* settings) { settings->requires_global_fencing = true; - if (tool_globfences == 1) { + if (tool_globfences) { settings->requires_global_fencing = true; } else { settings->requires_global_fencing = false; } - // leave the door open for other non-zero values of tools } // end request tool settings static KernelNVTXFocusedConnectorInfo* currentKernel; @@ -59,10 +58,10 @@ void kokkosp_init_library( printf("-----------------------------------------------------------\n"); const char* tool_global_fences = getenv("KOKKOS_TOOLS_GLOBALFENCES"); if (NULL != tool_global_fences) { - tool_globfences = atoi(tool_global_fences); + tool_globfences = (atoi(tool_global_fences) != 0); // if user sets to 0, no global fences } else { tool_globfences = - 1; // default to 1 to be conservative for capturing state by tool + true; // default to true to be conservative for capturing state by tool } nvtxNameOsThread(pthread_self(), "Application Main Thread"); nvtxMarkA("Kokkos::Initialization Complete"); @@ -107,7 +106,6 @@ void kokkosp_finalize_library() { void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/, uint64_t* kID) { *kID = nextKernelID++; - currentKernel = getFocusedConnectorInfo(name, PARALLEL_FOR); focusedConnectorExecuteStart(); } @@ -119,7 +117,6 @@ void kokkosp_end_parallel_for(const uint64_t /*kID*/) { void kokkosp_begin_parallel_scan(const char* name, const uint32_t /*devID*/, uint64_t* kID) { *kID = nextKernelID++; - currentKernel = getFocusedConnectorInfo(name, PARALLEL_SCAN); focusedConnectorExecuteStart(); } @@ -131,7 +128,6 @@ void kokkosp_end_parallel_scan(const uint64_t /*kID*/) { void kokkosp_begin_parallel_reduce(const char* name, const uint32_t /*devID*/, uint64_t* kID) { *kID = nextKernelID++; - currentKernel = getFocusedConnectorInfo(name, PARALLEL_REDUCE); focusedConnectorExecuteStart(); } @@ -160,9 +156,7 @@ Kokkos::Tools::Experimental::EventSet get_event_set() { } // namespace KokkosTools extern "C" { - namespace impl = KokkosTools::NVTXFocusedConnector; - EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) EXPOSE_INIT(impl::kokkosp_init_library) EXPOSE_FINALIZE(impl::kokkosp_finalize_library) @@ -172,5 +166,4 @@ EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) - } // extern "C" From 29f2f2397f124b720413f6e1533a4d0e96f86482 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 24 Aug 2023 09:59:43 -0700 Subject: [PATCH 24/24] applying clang foormat --- .../nvtx-focused-connector/kp_nvtx_focused_connector.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp index 749040b38..4771e45c8 100644 --- a/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp +++ b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp @@ -58,7 +58,8 @@ void kokkosp_init_library( printf("-----------------------------------------------------------\n"); const char* tool_global_fences = getenv("KOKKOS_TOOLS_GLOBALFENCES"); if (NULL != tool_global_fences) { - tool_globfences = (atoi(tool_global_fences) != 0); // if user sets to 0, no global fences + tool_globfences = + (atoi(tool_global_fences) != 0); // if user sets to 0, no global fences } else { tool_globfences = true; // default to true to be conservative for capturing state by tool @@ -105,7 +106,7 @@ void kokkosp_finalize_library() { void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/, uint64_t* kID) { - *kID = nextKernelID++; + *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_FOR); focusedConnectorExecuteStart(); } @@ -116,7 +117,7 @@ void kokkosp_end_parallel_for(const uint64_t /*kID*/) { void kokkosp_begin_parallel_scan(const char* name, const uint32_t /*devID*/, uint64_t* kID) { - *kID = nextKernelID++; + *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_SCAN); focusedConnectorExecuteStart(); } @@ -127,7 +128,7 @@ void kokkosp_end_parallel_scan(const uint64_t /*kID*/) { void kokkosp_begin_parallel_reduce(const char* name, const uint32_t /*devID*/, uint64_t* kID) { - *kID = nextKernelID++; + *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_REDUCE); focusedConnectorExecuteStart(); }