Skip to content

Commit

Permalink
Debug code cleanup and general cleanup of entire codebase
Browse files Browse the repository at this point in the history
  • Loading branch information
bashimao authored and minseokl committed Feb 18, 2022
1 parent d5f5e31 commit f1e02ee
Show file tree
Hide file tree
Showing 302 changed files with 4,847 additions and 4,740 deletions.
16 changes: 8 additions & 8 deletions HugeCTR/include/base/debug/cuda_debugging.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ namespace HugeCTR {

// The maximum length of printed message. It can affect the SM resource usage and performance to set
// it to a larger value.
const int KERNEL_PRINTF_MAX_LENGTH = 256;
const size_t KERNEL_PRINTF_MAX_LENGTH = 256;

/*
* To directly use the device functions below can lead to an undefined behavior.
Expand All @@ -102,36 +102,36 @@ template <typename Type1>
__device__ void kernel_printf(const char* format, Type1 arg1) {
char extended_format[KERNEL_PRINTF_MAX_LENGTH] = {};
kernel_printf_extend_format(extended_format, format);
printf((const char*)extended_format, arg1);
printf(extended_format, arg1);
}

template <typename Type1, typename Type2>
__device__ void kernel_printf(const char* format, Type1 arg1, Type2 arg2) {
char extended_format[KERNEL_PRINTF_MAX_LENGTH] = {};
kernel_printf_extend_format(extended_format, format);
printf((const char*)extended_format, arg1, arg2);
printf(extended_format, arg1, arg2);
}

template <typename Type1, typename Type2, typename Type3>
__device__ void kernel_printf(const char* format, Type1 arg1, Type2 arg2, Type3 arg3) {
char extended_format[KERNEL_PRINTF_MAX_LENGTH] = {};
kernel_printf_extend_format(extended_format, format);
printf((const char*)extended_format, arg1, arg2, arg3);
printf(extended_format, arg1, arg2, arg3);
}

template <typename Type1, typename Type2, typename Type3, typename Type4>
__device__ void kernel_printf(const char* format, Type1 arg1, Type2 arg2, Type3 arg3, Type4 arg4) {
char extended_format[KERNEL_PRINTF_MAX_LENGTH] = {};
kernel_printf_extend_format(extended_format, format);
printf((const char*)extended_format, arg1, arg2, arg3, arg4);
printf(extended_format, arg1, arg2, arg3, arg4);
}

template <typename Type1, typename Type2, typename Type3, typename Type4, typename Type5>
__device__ void kernel_printf(const char* format, Type1 arg1, Type2 arg2, Type3 arg3, Type4 arg4,
Type5 arg5) {
char extended_format[KERNEL_PRINTF_MAX_LENGTH] = {};
kernel_printf_extend_format(extended_format, format);
printf((const char*)extended_format, arg1, arg2, arg3, arg4, arg5);
printf(extended_format, arg1, arg2, arg3, arg4, arg5);
}

template <typename Type1, typename Type2, typename Type3, typename Type4, typename Type5,
Expand All @@ -140,7 +140,7 @@ __device__ void kernel_printf(const char* format, Type1 arg1, Type2 arg2, Type3
Type5 arg5, Type6 arg6) {
char extended_format[KERNEL_PRINTF_MAX_LENGTH] = {};
kernel_printf_extend_format(extended_format, format);
printf((const char*)extended_format, arg1, arg2, arg3, arg4, arg5, arg6);
printf(extended_format, arg1, arg2, arg3, arg4, arg5, arg6);
}

template <typename Type1, typename Type2, typename Type3, typename Type4, typename Type5,
Expand All @@ -149,7 +149,7 @@ __device__ void kernel_printf(const char* format, Type1 arg1, Type2 arg2, Type3
Type5 arg5, Type6 arg6, Type7 arg7) {
char extended_format[KERNEL_PRINTF_MAX_LENGTH] = {};
kernel_printf_extend_format(extended_format, format);
printf((const char*)extended_format, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
printf(extended_format, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
}

/*
Expand Down
70 changes: 33 additions & 37 deletions HugeCTR/include/base/debug/logger.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ enum class Error_t {
#define HCTR_PRINT(NAME, ...) Logger::get().log(LOG_LEVEL(NAME), LOG_RANK_ROOT, false, __VA_ARGS__)
#define HCTR_PRINT_AT(LEVEL, ...) Logger::get().log(LEVEL, LOG_RANK_ROOT, false, __VA_ARGS__)

// #define HCTR_PRINT_S(NAME) Logger::get().log(LOG_LEVEL(NAME), LOG_RANK_ROOT, false)

struct SrcLoc {
const char* file;
unsigned line;
Expand All @@ -187,6 +189,8 @@ struct SrcLoc {
#define CUR_SRC_LOC(EXPR) \
SrcLoc { __FILE__, __LINE__, __func__, #EXPR }

#define HCTR_LOCATION() '(' << __FILE__ << ':' << __LINE__ << ')'

template <typename SrcType>
Error_t getErrorType(SrcType err);
template <>
Expand Down Expand Up @@ -295,47 +299,39 @@ inline std::string getErrorString(curandStatus_t err) {
}

// For HugeCTR own error types, it is up to users to define the msesage.
#define HCTR_OWN_THROW(EXPR, MSG) \
do { \
Error_t err_thr = (EXPR); \
if (err_thr != Error_t::Success) { \
Logger::get().do_throw(err_thr, CUR_SRC_LOC(EXPR), std::string(MSG)); \
} \
} while (0);
#define HCTR_OWN_THROW(EXPR, MSG) \
do { \
const Error_t err = (EXPR); \
if (err != Error_t::Success) { \
Logger::get().do_throw(err, CUR_SRC_LOC(EXPR), (MSG)); \
} \
} while (0)

#ifdef ENABLE_MPI
// Because MPI error code is in int, it is safe to have a separate macro for MPI,
// rather than reserving `int` as MPI error type.
// We don't want this set of macros to become another source of errors.
#define HCTR_MPI_THROW(EXPR) \
do { \
auto err_thr = (EXPR); \
if (err_thr != MPI_SUCCESS) { \
char err_str[MPI_MAX_ERROR_STRING]; \
int err_len = MPI_MAX_ERROR_STRING; \
MPI_Error_string(err_thr, err_str, &err_len); \
Logger::get().do_throw(Error_t::MpiError, CUR_SRC_LOC(EXPR), std::string(err_str)); \
} \
} while (0);
#define HCTR_MPI_THROW(EXPR) \
do { \
const int err_code = (EXPR); \
if (err_code != MPI_SUCCESS) { \
char err_str[MPI_MAX_ERROR_STRING]; \
int err_len = MPI_MAX_ERROR_STRING; \
MPI_Error_string(err_code, err_str, &err_len); \
Logger::get().do_throw(Error_t::MpiError, CUR_SRC_LOC(EXPR), err_str); \
} \
} while (0)
#endif

// For other library calls such as CUDA, cuBLAS and NCCL, use this macro
#define HCTR_LIB_THROW(EXPR) \
do { \
auto ret_thr = (EXPR); \
Error_t err_type = getErrorType(ret_thr); \
if (err_type != Error_t::Success) { \
std::string err_msg = getErrorString(ret_thr); \
Logger::get().do_throw(err_type, CUR_SRC_LOC(EXPR), err_msg); \
} \
} while (0);

#define HCTR_THROW_IF(EXPR, ERROR, MSG) \
do { \
const auto& expr = (EXPR); \
if (expr) { \
Logger::get().do_throw((ERROR), CUR_SRC_LOC(EXPR), std::string(MSG)); \
} \
#define HCTR_LIB_THROW(EXPR) \
do { \
const auto lib_err = (EXPR); \
const Error_t err = getErrorType(lib_err); \
if (err != Error_t::Success) { \
const std::string msg = getErrorString(lib_err); \
Logger::get().do_throw(err, CUR_SRC_LOC(EXPR), msg); \
} \
} while (0)

#define CHECK_CALL(MODE) CHECK_##MODE##_CALL
Expand Down Expand Up @@ -380,7 +376,7 @@ class DeferredLogEntry {
std::function<void(std::ostringstream&)> make_log_entry)
: bypass_{bypass}, make_log_entry_{make_log_entry} {}

inline ~DeferredLogEntry() { make_log_entry_(ss_); }
inline ~DeferredLogEntry() { make_log_entry_(os_); }

DeferredLogEntry(const DeferredLogEntry&) = delete;
DeferredLogEntry(const DeferredLogEntry&&) = delete;
Expand All @@ -390,21 +386,21 @@ class DeferredLogEntry {
template <typename T>
inline DeferredLogEntry& operator<<(const T& value) {
if (!bypass_) {
ss_ << value;
os_ << value;
}
return *this;
}

inline DeferredLogEntry& operator<<(std::ostream& (*fn)(std::ostream&)) {
if (!bypass_) {
fn(ss_);
fn(os_);
}
return *this;
}

private:
bool bypass_;
std::ostringstream ss_;
std::ostringstream os_;
std::function<void(std::ostringstream&)> make_log_entry_;
};

Expand Down
18 changes: 9 additions & 9 deletions HugeCTR/include/collectives/ib_proxy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@

#include "gdrapi.h"

#define PROXY_ASSERT(expr) \
if (!(expr)) { \
ERROR_MESSAGE_(#expr); \
exit(1); \
#define PROXY_ASSERT(expr) \
if (!(expr)) { \
HCTR_LOG_S(ERROR, WORLD) << #expr << ' ' << HCTR_LOCATION() << std::endl; \
exit(1); \
}
#define PROXY_ASSERT_MSG(expr, msg) \
if (!(expr)) { \
ERROR_MESSAGE_(msg); \
exit(1); \
#define PROXY_ASSERT_MSG(expr, msg) \
if (!(expr)) { \
HCTR_LOG_S(ERROR, WORLD) << msg << ' ' << HCTR_LOCATION() << std::endl; \
exit(1); \
}

#define MAX_IBV_DEST 1024
Expand Down Expand Up @@ -468,7 +468,7 @@ struct ProxyCommandVisitor : public boost::static_visitor<void> {
proxy_->exec_proxy_cmd(std::get<0>(cmd), std::get<1>(cmd));
}
void operator()(boost::blank __unused) const {
ERROR_MESSAGE_("Invalid proxy command");
HCTR_LOG_S(ERROR, WORLD) << "Invalid proxy command " << HCTR_LOCATION() << std::endl;
exit(1);
}
ProxyCommandVisitor(IbvProxy* proxy) { proxy_ = proxy; };
Expand Down
Loading

0 comments on commit f1e02ee

Please sign in to comment.