Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use eventset in roctxconnector (as in nvtxconnector) #219

Merged
merged 2 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ endif()
include(cmake/configure_variorum.cmake)

set(KOKKOSTOOLS_HAS_CALIPER ${KokkosTools_ENABLE_CALIPER})
set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available
set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available
set(KOKKOSTOOLS_HAS_ROCTX ${Kokkos_ENABLE_HIP}) # we assume that enabling HIP for Kokkos program means roctx should be available

if(DEFINED ENV{VTUNE_HOME})
set(VTune_ROOT $ENV{VTUNE_HOME})
Expand Down
1 change: 1 addition & 0 deletions common/kp_config.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#define USE_MPI @KOKKOSTOOLS_HAS_MPI@

#cmakedefine KOKKOSTOOLS_HAS_NVTX
#cmakedefine KOKKOSTOOLS_HAS_ROCTX
#cmakedefine KOKKOSTOOLS_HAS_CALIPER
#cmakedefine KOKKOSTOOLS_HAS_SYSTEMTAP
#cmakedefine KOKKOSTOOLS_HAS_VARIORUM
Expand Down
3 changes: 3 additions & 0 deletions example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ if(KOKKOSTOOLS_HAS_NVTX)
add_kp_test(nvtx_connector "nvtx-connector")
add_kp_test(nvtx_focused_connector "nvtx-focused-connector")
endif()
if(KOKKOSTOOLS_HAS_ROCTX)
add_kp_test(roctx_connector "roctx-connector")
endif()
6 changes: 6 additions & 0 deletions profiling/all/kp_all.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ KOKKOSTOOLS_EXTERN_EVENT_SET(VariorumConnector)
KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXConnector)
KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXFocusedConnector)
#endif
#ifdef KOKKOSTOOLS_HAS_ROCTX
KOKKOSTOOLS_EXTERN_EVENT_SET(ROCTXConnector)
#endif
#ifdef KOKKOSTOOLS_HAS_CALIPER
namespace cali {
extern Kokkos::Tools::Experimental::EventSet get_kokkos_event_set(
Expand Down Expand Up @@ -93,6 +96,9 @@ EventSet get_event_set(const char* profiler, const char* config_str) {
#ifdef KOKKOSTOOLS_HAS_NVTX
handlers["nvtx-connector"] = NVTXConnector::get_event_set();
handlers["nvtx-focused-connector"] = NVTXFocusedConnector::get_event_set();
#endif
#ifdef KOKKOSTOOLS_HAS_ROCTX
handlers["roctx-connector"] = ROCTXConnector::get_event_set();
#endif
auto e = handlers.find(profiler);
if (e != handlers.end()) return e->second;
Expand Down
2 changes: 1 addition & 1 deletion profiling/nvtx-connector/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
CXX=g++
CXXFLAGS=-O3 -std=c++11 -g -I$(CUDA_ROOT)/include
CXXFLAGS=-O3 -std=c++11 -g -I$(CUDA_ROOT)/include/
dalg24 marked this conversation as resolved.
Show resolved Hide resolved
LDFLAGS=-L$(CUDA_ROOT)/lib64
LIBS=-lnvToolsExt
SHARED_CXXFLAGS=-shared -fPIC
Expand Down
4 changes: 2 additions & 2 deletions profiling/roctx-connector/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ find_path(ROCM_ROCTX_INCLUDE roctx.h REQUIRED HINTS $ENV{ROCM_PATH}/include/roct

kp_add_library(kp_roctx_connector kp_roctx_connector.cpp)

target_include_directories(kp_roctx_connector PRIVATE ${ROCM_ROCTX_INCLUDE})
target_link_libraries(kp_roctx_connector PRIVATE ${ROCM_ROCTX_LIB})
target_include_directories(kp_roctx_connector PUBLIC ${ROCM_ROCTX_INCLUDE})
target_link_libraries(kp_roctx_connector PUBLIC ${ROCM_ROCTX_LIB})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you change?
I can see how the linking would need some symbols to be private but not why the include directories would be needed downstream.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are leftovers from when I was trying to make the roctx-connector work with rocprof. I've reverted the change.

In fact, in my first tries, I used rocprof as follows:

rocprof --hip-trace --hsa-trace --roctx-trace ./a.out --kokkos-tools-libs=${KOKKOS_TOOLS_DIR}/lib/libkp_roctx_connector.so

That didn't work, in the sense that even though this command asks rocprof to use --roctx-trace, rocprof didn't include the markers in the results.json trace. I suspect that the reason may be that rocprof wants to see the executable linked to roctx64.so. That may not be the case when the connector is linked at runtime via dlsym. And so these leftovers came from attempts to make roctx64.so appear among the linked libraries.

133 changes: 100 additions & 33 deletions profiling/roctx-connector/kp_roctx_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <string>
#include <vector>

#include "kp_core.hpp"

namespace {
struct Section {
std::string label;
Expand All @@ -29,29 +31,40 @@ struct Section {
std::vector<Section> kokkosp_sections;
} // namespace

struct Kokkos_Tools_ToolSettings {
bool requires_global_fencing;
bool padding[255];
};
namespace KokkosTools {
namespace ROCTXConnector {

extern "C" void kokkosp_request_tool_settings(
static bool tool_globfences;

void kokkosp_request_tool_settings(
const uint32_t, Kokkos_Tools_ToolSettings* settings) {
settings->requires_global_fencing = false;
if (tool_globfences) {
settings->requires_global_fencing = true;
} else {
settings->requires_global_fencing = false;
}
}

extern "C" void kokkosp_init_library(const int loadSeq,
const uint64_t interfaceVer,
const uint32_t /*devInfoCount*/,
void* /*deviceInfo*/) {
void kokkosp_init_library(const int loadSeq,
const uint64_t interfaceVer,
const uint32_t /*devInfoCount*/,
Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) {

const char* tool_global_fences = std::getenv("KOKKOS_TOOLS_GLOBALFENCES");
if (tool_global_fences) {
tool_globfences = (atoi(tool_global_fences) != 0);
}

std::cout << "-----------------------------------------------------------\n"
<< "KokkosP: ROC Tracer Connector (sequence is " << loadSeq
<< ", version: " << interfaceVer << ")\n"
<< "Global fences: " << (tool_globfences ? "ON" : "OFF") << "\n"
dalg24 marked this conversation as resolved.
Show resolved Hide resolved
<< "-----------------------------------------------------------\n";

roctxMark("Kokkos::Initialization Complete");
}

extern "C" void kokkosp_finalize_library() {
void kokkosp_finalize_library() {
std::cout << R"(
-----------------------------------------------------------
KokkosP: Finalization of ROC Tracer Connector. Complete.
Expand All @@ -61,66 +74,120 @@ KokkosP: Finalization of ROC Tracer Connector. Complete.
roctxMark("Kokkos::Finalization Complete");
}

extern "C" void kokkosp_begin_parallel_for(const char* name,
const uint32_t /*devID*/,
uint64_t* /*kID*/) {
void kokkosp_begin_parallel_for(const char* name,
const uint32_t /*devID*/,
uint64_t* /*kID*/) {
roctxRangePush(name);
}

extern "C" void kokkosp_end_parallel_for(const uint64_t /*kID*/) {
void kokkosp_end_parallel_for(const uint64_t /*kID*/) {
roctxRangePop();
}

extern "C" void kokkosp_begin_parallel_scan(const char* name,
const uint32_t /*devID*/,
uint64_t* /*kID*/) {
void kokkosp_begin_parallel_scan(const char* name,
const uint32_t /*devID*/,
uint64_t* /*kID*/) {
roctxRangePush(name);
}

extern "C" void kokkosp_end_parallel_scan(const uint64_t /*kID*/) {
void kokkosp_end_parallel_scan(const uint64_t /*kID*/) {
roctxRangePop();
}

extern "C" void kokkosp_begin_parallel_reduce(const char* name,
const uint32_t /*devID*/,
uint64_t* /*kID*/) {
void kokkosp_begin_parallel_reduce(const char* name,
const uint32_t /*devID*/,
uint64_t* /*kID*/) {
roctxRangePush(name);
}

extern "C" void kokkosp_end_parallel_reduce(const uint64_t /*kID*/) {
void kokkosp_end_parallel_reduce(const uint64_t /*kID*/) {
roctxRangePop();
}

extern "C" void kokkosp_push_profile_region(char* name) {
void kokkosp_push_profile_region(const char* name) {
roctxRangePush(name);
}

extern "C" void kokkosp_pop_profile_region() { roctxRangePop(); }
void kokkosp_pop_profile_region() { roctxRangePop(); }

extern "C" void kokkosp_create_profile_section(const char* name,
uint32_t* sID) {
void kokkosp_create_profile_section(const char* name,
uint32_t* sID) {
*sID = kokkosp_sections.size();
kokkosp_sections.push_back(
{std::string(name), static_cast<roctx_range_id_t>(-1)});
}

extern "C" void kokkosp_start_profile_section(const uint32_t sID) {
void kokkosp_start_profile_section(const uint32_t sID) {
auto& section = kokkosp_sections[sID];
section.id = roctxRangeStart(section.label.c_str());
}

extern "C" void kokkosp_stop_profile_section(const uint32_t sID) {
void kokkosp_stop_profile_section(const uint32_t sID) {
auto const& section = kokkosp_sections[sID];
roctxRangeStop(section.id);
}

extern "C" void kokkosp_destroy_profile_section(const uint32_t sID) {
void kokkosp_destroy_profile_section(const uint32_t sID) {
// do nothing
}

extern "C" void kokkosp_begin_fence(const char* name, const uint32_t /*devID*/,
uint64_t* fID) {
void kokkosp_profile_event(const char* name) { roctxMark(name); }

void kokkosp_begin_fence(const char* name, const uint32_t /*devID*/,
uint64_t* fID) {
*fID = roctxRangeStart(name);
}

extern "C" void kokkosp_end_fence(const uint64_t fID) { roctxRangeStop(fID); }
void kokkosp_end_fence(const uint64_t fID) { roctxRangeStop(fID); }

Kokkos::Tools::Experimental::EventSet get_event_set() {
Kokkos::Tools::Experimental::EventSet my_event_set;
memset(&my_event_set, 0,
sizeof(my_event_set)); // zero any pointers not set here
Comment on lines +130 to +131
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uggg. That thing really something that should be handled at construction.
I suspect value-initialization

EventSet my_event_set{};

does what you want.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed :) I used this piece of code because all connectors are currently doing it this way. It may be best to address the issue for all connectors in a separate PR? One way forward might be to provide default initialisers using = NULL for the function pointers in the declaration of the struct in Kokkos_Profiling_C_interface.h?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used this piece of code because all connectors are currently doing it this way.

You are right. This is fine then.

And yes definitely fix elsewhere.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my_event_set.request_tool_settings = kokkosp_request_tool_settings;
my_event_set.init = kokkosp_init_library;
my_event_set.finalize = kokkosp_finalize_library;
my_event_set.push_region = kokkosp_push_profile_region;
my_event_set.pop_region = kokkosp_pop_profile_region;
my_event_set.begin_parallel_for = kokkosp_begin_parallel_for;
my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce;
my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan;
my_event_set.end_parallel_for = kokkosp_end_parallel_for;
my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce;
my_event_set.end_parallel_scan = kokkosp_end_parallel_scan;
my_event_set.create_profile_section = kokkosp_create_profile_section;
my_event_set.start_profile_section = kokkosp_start_profile_section;
my_event_set.stop_profile_section = kokkosp_stop_profile_section;
my_event_set.destroy_profile_section = kokkosp_destroy_profile_section;
my_event_set.profile_event = kokkosp_profile_event;
my_event_set.begin_fence = kokkosp_begin_fence;
my_event_set.end_fence = kokkosp_end_fence;
return my_event_set;
}

} // namespace ROCTXConnector
} // namespace KokkosTools

extern "C" {

namespace impl = KokkosTools::ROCTXConnector;

EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings)
EXPOSE_INIT(impl::kokkosp_init_library)
EXPOSE_FINALIZE(impl::kokkosp_finalize_library)
EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region)
EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region)
EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for)
EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for)
EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan)
EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan)
EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce)
EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce)
EXPOSE_CREATE_PROFILE_SECTION(impl::kokkosp_create_profile_section)
EXPOSE_START_PROFILE_SECTION(impl::kokkosp_start_profile_section)
EXPOSE_STOP_PROFILE_SECTION(impl::kokkosp_stop_profile_section)
EXPOSE_DESTROY_PROFILE_SECTION(impl::kokkosp_destroy_profile_section)
EXPOSE_PROFILE_EVENT(impl::kokkosp_profile_event);
EXPOSE_BEGIN_FENCE(impl::kokkosp_begin_fence);
EXPOSE_END_FENCE(impl::kokkosp_end_fence);
} // extern "C"