Skip to content

Commit

Permalink
enable ACC timers for no ACC compilation
Browse files Browse the repository at this point in the history
  • Loading branch information
alazzaro committed Mar 22, 2022
1 parent 38f892a commit a2bedce
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 39 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -229,4 +229,6 @@ spack-*

.ccls-cache/

.DS_Store
.DS_Store

BUILD/
2 changes: 0 additions & 2 deletions src/acc/acc.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,8 @@ int c_dbcsr_acc_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t nbyt
int c_dbcsr_acc_memset_zero(void* dev_mem, size_t offset, size_t nbytes, void* stream);
int c_dbcsr_acc_dev_mem_info(size_t* mem_free, size_t* mem_total);

#if defined(__DBCSR_ACC)
void c_dbcsr_timeset(const char** routineN, const int* routineN_len, int* handle);
void c_dbcsr_timestop(const int* handle);
#endif

#if defined(__cplusplus)
}
Expand Down
19 changes: 4 additions & 15 deletions src/acc/dbcsr_acc_timings.F
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ MODULE dbcsr_acc_timings

CONTAINS

SUBROUTINE dbcsr_timeset_F(routineN, routineN_len, handle) BIND(C, name="c_dbcsr_timeset")
SUBROUTINE f_dbcsr_timeset(routineN, routineN_len, handle) BIND(C, name="c_dbcsr_timeset")

TYPE(C_PTR), INTENT(IN) :: routineN
INTEGER(KIND=C_INT), INTENT(IN) :: routineN_len
Expand All @@ -38,25 +38,14 @@ SUBROUTINE dbcsr_timeset_F(routineN, routineN_len, handle) BIND(C, name="c_dbcsr
routineName(i:i) = a(i)
END DO

#if defined (__DBCSR_ACC)
CALL timeset(routineName, handle)
#else
DBCSR_ABORT("__DBCSR_ACC not compiled in.")
handle = 0
#endif
END SUBROUTINE f_dbcsr_timeset

END SUBROUTINE dbcsr_timeset_F

SUBROUTINE dbcsr_timestop_F(handle) BIND(C, name="c_dbcsr_timestop")
SUBROUTINE f_dbcsr_timestop(handle) BIND(C, name="c_dbcsr_timestop")

INTEGER(KIND=C_INT), INTENT(IN) :: handle

#if ! defined (__DBCSR_ACC)
MARK_USED(handle)
DBCSR_ABORT("__DBCSR_ACC not compiled in.")
#else
CALL timestop(handle)
#endif
END SUBROUTINE dbcsr_timestop_F
END SUBROUTINE f_dbcsr_timestop

END MODULE dbcsr_acc_timings
14 changes: 6 additions & 8 deletions src/acc/libsmm_acc/libsmm_acc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,11 @@ inline void validate_kernel(ACC_DRV(function) & kern_func, ACC_DRV(stream) strea
//===========================================================================
inline void jit_kernel(ACC_DRV(function) & kern_func, libsmm_acc_algo algo, int tile_m, int tile_n, int w, int v, int threads,
int grouping, int minblocks, int m, int n, int k) {
#if defined(__DBCSR_ACC)
std::string routineN = LIBSMM_ACC_PROCESS_ROUTINE_NAME_STR;
int handle;

timeset(routineN, handle);
#endif

// Get the code and the lowered name corresponding the kernel to launch
std::string kernel_code = smm_acc_common; // prepend include file content to code
std::string kernel_name;
Expand Down Expand Up @@ -188,9 +188,8 @@ inline void jit_kernel(ACC_DRV(function) & kern_func, libsmm_acc_algo algo, int

// Destroy program
ACC_RTC_CALL(DestroyProgram, (&kernel_program));
#if defined(__DBCSR_ACC)

timestop(handle);
#endif
}


Expand Down Expand Up @@ -338,11 +337,11 @@ inline void validate_transpose_kernel(ACC_DRV(function) & kern_func, int threads

//===========================================================================
void jit_transpose_handle(ACC_DRV(function) & kern_func, int m, int n) {
#if defined(__DBCSR_ACC)
std::string routineN = LIBSMM_ACC_TRANSPOSE_ROUTINE_NAME_STR;
int handle;

timeset(routineN, handle);
#endif

// Create nvrtcProgram
ACC_RTC(Program) kernel_program;
std::string transpose_code = smm_acc_common + smm_acc_transpose;
Expand Down Expand Up @@ -405,9 +404,8 @@ void jit_transpose_handle(ACC_DRV(function) & kern_func, int m, int n) {

// Destroy program
ACC_RTC_CALL(DestroyProgram, (&kernel_program));
#if defined(__DBCSR_ACC)

timestop(handle);
#endif
}

//===========================================================================
Expand Down
20 changes: 9 additions & 11 deletions src/acc/libsmm_acc/libsmm_acc_init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

std::vector<ACC_BLAS(Handle_t)*> acc_blashandles;


#if defined(__DBCSR_ACC)
//===========================================================================
void timeset(const std::string& routine_name, int& handle) {
const char* routine_name_ = routine_name.c_str();
Expand All @@ -27,7 +25,6 @@ void timeset(const std::string& routine_name, int& handle) {
}

void timestop(int handle) { c_dbcsr_timestop(&handle); }
#endif

//===========================================================================
int libsmm_acc_gpu_blas_init() {
Expand All @@ -53,37 +50,38 @@ int libsmm_acc_gpu_blas_init() {

//===========================================================================
extern "C" int libsmm_acc_init() {
#if defined(__DBCSR_ACC)
std::string routineN = "libsmm_acc_init";
int handle;

timeset(routineN, handle);
#endif
printf("alfio\n");

// check warp size consistency
libsmm_acc_check_gpu_warp_size_consistency();
libsmm_acc_gpu_blas_init();
#if defined(__DBCSR_ACC)

timestop(handle);
#endif

return 0;
}

//===========================================================================
extern "C" int libsmm_acc_finalize() {
#if defined(__DBCSR_ACC)
std::string routineN = "libsmm_acc_finalize";
int handle;

timeset(routineN, handle);
#endif

// free acc_blas handle resources; one handle per thread
for (size_t i = 0; i < acc_blashandles.size(); i++) {
if (NULL != acc_blashandles[i]) {
acc_blas_destroy(acc_blashandles[i]);
acc_blashandles[i] = NULL;
}
}
#if defined(__DBCSR_ACC)

timestop(handle);
#endif

return 0;
}

Expand Down
2 changes: 0 additions & 2 deletions src/acc/libsmm_acc/libsmm_acc_init.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@
#include <vector>
#include <string>

#if defined(__DBCSR_ACC)
void timeset(const std::string& routine_name, int& handle);
void timestop(int handle);
#endif

extern "C" int libsmm_acc_init(void);
extern "C" int libsmm_acc_finalize(void);
Expand Down

3 comments on commit a2bedce

@hfp
Copy link
Member

@hfp hfp commented on a2bedce Mar 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about my pending task to provide the timer stubs in a central place, and I wonder now why we would not want the protector like defined(__DBCSR_ACC)? Are we ever supposed to call into the backend, i.e., collecting/contributing timer results if __DBCSR_ACC is not defined?

@alazzaro
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is exactly what you are mentioning. The code where we have the timers is relevant when DBCSR_ACC is used, so I don't like the idea to have protect for defined(__DBCSR_ACC), which should be implicit by definition... Even more, I don't like code where we have tons of if defined ... endif...
So, a possibility would be to add the stubs at src/acc/libsmm_acc/libsmm_acc_init.cpp so we have something like:

#if defined(__DBCSR_ACC)
void timeset(const std::string& routine_name, int& handle) {
  const char* routine_name_ = routine_name.c_str();
  int routine_name_length = routine_name.length();
  c_dbcsr_timeset(&routine_name_, &routine_name_length, &handle);
}
void timestop(int handle) { c_dbcsr_timestop(&handle); }
#else
// provide stubs
#endif

What do you think?

@hfp
Copy link
Member

@hfp hfp commented on a2bedce Mar 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Completed: #593

Please sign in to comment.