Skip to content

Commit f1e02ee

Browse files
bashimaominseokl
authored andcommitted
Debug code cleanup and general cleanup of entire codebase
1 parent d5f5e31 commit f1e02ee

File tree

302 files changed

+4847
-4740
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

302 files changed

+4847
-4740
lines changed

HugeCTR/include/base/debug/cuda_debugging.cuh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ namespace HugeCTR {
8989

9090
// The maximum length of printed message. It can affect the SM resource usage and performance to set
9191
// it to a larger value.
92-
const int KERNEL_PRINTF_MAX_LENGTH = 256;
92+
const size_t KERNEL_PRINTF_MAX_LENGTH = 256;
9393

9494
/*
9595
* To directly use the device functions below can lead to an undefined behavior.
@@ -102,36 +102,36 @@ template <typename Type1>
102102
__device__ void kernel_printf(const char* format, Type1 arg1) {
103103
char extended_format[KERNEL_PRINTF_MAX_LENGTH] = {};
104104
kernel_printf_extend_format(extended_format, format);
105-
printf((const char*)extended_format, arg1);
105+
printf(extended_format, arg1);
106106
}
107107

108108
template <typename Type1, typename Type2>
109109
__device__ void kernel_printf(const char* format, Type1 arg1, Type2 arg2) {
110110
char extended_format[KERNEL_PRINTF_MAX_LENGTH] = {};
111111
kernel_printf_extend_format(extended_format, format);
112-
printf((const char*)extended_format, arg1, arg2);
112+
printf(extended_format, arg1, arg2);
113113
}
114114

115115
template <typename Type1, typename Type2, typename Type3>
116116
__device__ void kernel_printf(const char* format, Type1 arg1, Type2 arg2, Type3 arg3) {
117117
char extended_format[KERNEL_PRINTF_MAX_LENGTH] = {};
118118
kernel_printf_extend_format(extended_format, format);
119-
printf((const char*)extended_format, arg1, arg2, arg3);
119+
printf(extended_format, arg1, arg2, arg3);
120120
}
121121

122122
template <typename Type1, typename Type2, typename Type3, typename Type4>
123123
__device__ void kernel_printf(const char* format, Type1 arg1, Type2 arg2, Type3 arg3, Type4 arg4) {
124124
char extended_format[KERNEL_PRINTF_MAX_LENGTH] = {};
125125
kernel_printf_extend_format(extended_format, format);
126-
printf((const char*)extended_format, arg1, arg2, arg3, arg4);
126+
printf(extended_format, arg1, arg2, arg3, arg4);
127127
}
128128

129129
template <typename Type1, typename Type2, typename Type3, typename Type4, typename Type5>
130130
__device__ void kernel_printf(const char* format, Type1 arg1, Type2 arg2, Type3 arg3, Type4 arg4,
131131
Type5 arg5) {
132132
char extended_format[KERNEL_PRINTF_MAX_LENGTH] = {};
133133
kernel_printf_extend_format(extended_format, format);
134-
printf((const char*)extended_format, arg1, arg2, arg3, arg4, arg5);
134+
printf(extended_format, arg1, arg2, arg3, arg4, arg5);
135135
}
136136

137137
template <typename Type1, typename Type2, typename Type3, typename Type4, typename Type5,
@@ -140,7 +140,7 @@ __device__ void kernel_printf(const char* format, Type1 arg1, Type2 arg2, Type3
140140
Type5 arg5, Type6 arg6) {
141141
char extended_format[KERNEL_PRINTF_MAX_LENGTH] = {};
142142
kernel_printf_extend_format(extended_format, format);
143-
printf((const char*)extended_format, arg1, arg2, arg3, arg4, arg5, arg6);
143+
printf(extended_format, arg1, arg2, arg3, arg4, arg5, arg6);
144144
}
145145

146146
template <typename Type1, typename Type2, typename Type3, typename Type4, typename Type5,
@@ -149,7 +149,7 @@ __device__ void kernel_printf(const char* format, Type1 arg1, Type2 arg2, Type3
149149
Type5 arg5, Type6 arg6, Type7 arg7) {
150150
char extended_format[KERNEL_PRINTF_MAX_LENGTH] = {};
151151
kernel_printf_extend_format(extended_format, format);
152-
printf((const char*)extended_format, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
152+
printf(extended_format, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
153153
}
154154

155155
/*

HugeCTR/include/base/debug/logger.hpp

Lines changed: 33 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,8 @@ enum class Error_t {
177177
#define HCTR_PRINT(NAME, ...) Logger::get().log(LOG_LEVEL(NAME), LOG_RANK_ROOT, false, __VA_ARGS__)
178178
#define HCTR_PRINT_AT(LEVEL, ...) Logger::get().log(LEVEL, LOG_RANK_ROOT, false, __VA_ARGS__)
179179

180+
// #define HCTR_PRINT_S(NAME) Logger::get().log(LOG_LEVEL(NAME), LOG_RANK_ROOT, false)
181+
180182
struct SrcLoc {
181183
const char* file;
182184
unsigned line;
@@ -187,6 +189,8 @@ struct SrcLoc {
187189
#define CUR_SRC_LOC(EXPR) \
188190
SrcLoc { __FILE__, __LINE__, __func__, #EXPR }
189191

192+
#define HCTR_LOCATION() '(' << __FILE__ << ':' << __LINE__ << ')'
193+
190194
template <typename SrcType>
191195
Error_t getErrorType(SrcType err);
192196
template <>
@@ -295,47 +299,39 @@ inline std::string getErrorString(curandStatus_t err) {
295299
}
296300

297301
// For HugeCTR own error types, it is up to users to define the msesage.
298-
#define HCTR_OWN_THROW(EXPR, MSG) \
299-
do { \
300-
Error_t err_thr = (EXPR); \
301-
if (err_thr != Error_t::Success) { \
302-
Logger::get().do_throw(err_thr, CUR_SRC_LOC(EXPR), std::string(MSG)); \
303-
} \
304-
} while (0);
302+
#define HCTR_OWN_THROW(EXPR, MSG) \
303+
do { \
304+
const Error_t err = (EXPR); \
305+
if (err != Error_t::Success) { \
306+
Logger::get().do_throw(err, CUR_SRC_LOC(EXPR), (MSG)); \
307+
} \
308+
} while (0)
305309

306310
#ifdef ENABLE_MPI
307311
// Because MPI error code is in int, it is safe to have a separate macro for MPI,
308312
// rather than reserving `int` as MPI error type.
309313
// We don't want this set of macros to become another source of errors.
310-
#define HCTR_MPI_THROW(EXPR) \
311-
do { \
312-
auto err_thr = (EXPR); \
313-
if (err_thr != MPI_SUCCESS) { \
314-
char err_str[MPI_MAX_ERROR_STRING]; \
315-
int err_len = MPI_MAX_ERROR_STRING; \
316-
MPI_Error_string(err_thr, err_str, &err_len); \
317-
Logger::get().do_throw(Error_t::MpiError, CUR_SRC_LOC(EXPR), std::string(err_str)); \
318-
} \
319-
} while (0);
314+
#define HCTR_MPI_THROW(EXPR) \
315+
do { \
316+
const int err_code = (EXPR); \
317+
if (err_code != MPI_SUCCESS) { \
318+
char err_str[MPI_MAX_ERROR_STRING]; \
319+
int err_len = MPI_MAX_ERROR_STRING; \
320+
MPI_Error_string(err_code, err_str, &err_len); \
321+
Logger::get().do_throw(Error_t::MpiError, CUR_SRC_LOC(EXPR), err_str); \
322+
} \
323+
} while (0)
320324
#endif
321325

322326
// For other library calls such as CUDA, cuBLAS and NCCL, use this macro
323-
#define HCTR_LIB_THROW(EXPR) \
324-
do { \
325-
auto ret_thr = (EXPR); \
326-
Error_t err_type = getErrorType(ret_thr); \
327-
if (err_type != Error_t::Success) { \
328-
std::string err_msg = getErrorString(ret_thr); \
329-
Logger::get().do_throw(err_type, CUR_SRC_LOC(EXPR), err_msg); \
330-
} \
331-
} while (0);
332-
333-
#define HCTR_THROW_IF(EXPR, ERROR, MSG) \
334-
do { \
335-
const auto& expr = (EXPR); \
336-
if (expr) { \
337-
Logger::get().do_throw((ERROR), CUR_SRC_LOC(EXPR), std::string(MSG)); \
338-
} \
327+
#define HCTR_LIB_THROW(EXPR) \
328+
do { \
329+
const auto lib_err = (EXPR); \
330+
const Error_t err = getErrorType(lib_err); \
331+
if (err != Error_t::Success) { \
332+
const std::string msg = getErrorString(lib_err); \
333+
Logger::get().do_throw(err, CUR_SRC_LOC(EXPR), msg); \
334+
} \
339335
} while (0)
340336

341337
#define CHECK_CALL(MODE) CHECK_##MODE##_CALL
@@ -380,7 +376,7 @@ class DeferredLogEntry {
380376
std::function<void(std::ostringstream&)> make_log_entry)
381377
: bypass_{bypass}, make_log_entry_{make_log_entry} {}
382378

383-
inline ~DeferredLogEntry() { make_log_entry_(ss_); }
379+
inline ~DeferredLogEntry() { make_log_entry_(os_); }
384380

385381
DeferredLogEntry(const DeferredLogEntry&) = delete;
386382
DeferredLogEntry(const DeferredLogEntry&&) = delete;
@@ -390,21 +386,21 @@ class DeferredLogEntry {
390386
template <typename T>
391387
inline DeferredLogEntry& operator<<(const T& value) {
392388
if (!bypass_) {
393-
ss_ << value;
389+
os_ << value;
394390
}
395391
return *this;
396392
}
397393

398394
inline DeferredLogEntry& operator<<(std::ostream& (*fn)(std::ostream&)) {
399395
if (!bypass_) {
400-
fn(ss_);
396+
fn(os_);
401397
}
402398
return *this;
403399
}
404400

405401
private:
406402
bool bypass_;
407-
std::ostringstream ss_;
403+
std::ostringstream os_;
408404
std::function<void(std::ostringstream&)> make_log_entry_;
409405
};
410406

HugeCTR/include/collectives/ib_proxy.hpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,15 @@
2626

2727
#include "gdrapi.h"
2828

29-
#define PROXY_ASSERT(expr) \
30-
if (!(expr)) { \
31-
ERROR_MESSAGE_(#expr); \
32-
exit(1); \
29+
#define PROXY_ASSERT(expr) \
30+
if (!(expr)) { \
31+
HCTR_LOG_S(ERROR, WORLD) << #expr << ' ' << HCTR_LOCATION() << std::endl; \
32+
exit(1); \
3333
}
34-
#define PROXY_ASSERT_MSG(expr, msg) \
35-
if (!(expr)) { \
36-
ERROR_MESSAGE_(msg); \
37-
exit(1); \
34+
#define PROXY_ASSERT_MSG(expr, msg) \
35+
if (!(expr)) { \
36+
HCTR_LOG_S(ERROR, WORLD) << msg << ' ' << HCTR_LOCATION() << std::endl; \
37+
exit(1); \
3838
}
3939

4040
#define MAX_IBV_DEST 1024
@@ -468,7 +468,7 @@ struct ProxyCommandVisitor : public boost::static_visitor<void> {
468468
proxy_->exec_proxy_cmd(std::get<0>(cmd), std::get<1>(cmd));
469469
}
470470
void operator()(boost::blank __unused) const {
471-
ERROR_MESSAGE_("Invalid proxy command");
471+
HCTR_LOG_S(ERROR, WORLD) << "Invalid proxy command " << HCTR_LOCATION() << std::endl;
472472
exit(1);
473473
}
474474
ProxyCommandVisitor(IbvProxy* proxy) { proxy_ = proxy; };

0 commit comments

Comments
 (0)