From a97561a4ef2e3af758f621b1891d0b7f55eec583 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 31 Mar 2020 13:28:53 -0700 Subject: [PATCH 1/5] test remove asserts and branchy code on hotspot --- src/gpgpu-sim/gpu-cache.cc | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/gpgpu-sim/gpu-cache.cc b/src/gpgpu-sim/gpu-cache.cc index af22c4c2c..e203afa2d 100644 --- a/src/gpgpu-sim/gpu-cache.cc +++ b/src/gpgpu-sim/gpu-cache.cc @@ -704,16 +704,9 @@ unsigned long long cache_stats::operator()(int access_type, int access_outcome, bool fail_outcome) const { /// /// Const accessor into m_stats. - /// if (fail_outcome) { - if (!check_fail_valid(access_type, access_outcome)) - assert(0 && "Unknown cache access type or fail outcome"); - return m_fail_stats[access_type][access_outcome]; } else { - if (!check_valid(access_type, access_outcome)) - assert(0 && "Unknown cache access type or access outcome"); - return m_stats[access_type][access_outcome]; } } From 6a4f5d613a99fd9ffbeccf1f5504f3d576a9ef43 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 7 Apr 2020 12:53:01 -0700 Subject: [PATCH 2/5] Add debug guards --- src/gpgpu-sim/gpu-cache.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/gpgpu-sim/gpu-cache.cc b/src/gpgpu-sim/gpu-cache.cc index e203afa2d..8bf70c34a 100644 --- a/src/gpgpu-sim/gpu-cache.cc +++ b/src/gpgpu-sim/gpu-cache.cc @@ -705,8 +705,16 @@ unsigned long long cache_stats::operator()(int access_type, int access_outcome, /// /// Const accessor into m_stats. if (fail_outcome) { +#ifdef DEBUG + if (!check_fail_valid(access_type, access_outcome)) + assert(0 && "Unknown cache access type or fail outcome"); +#endif return m_fail_stats[access_type][access_outcome]; } else { +#ifdef DEBUG + if (!check_valid(access_type, access_outcome)) + assert(0 && "Unknown cache access type or access outcome"); +#endif return m_stats[access_type][access_outcome]; } } From ec681be8e4e59f880de1524d4d2e5a6cc3e6b9a7 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 7 Apr 2020 20:08:14 -0700 Subject: [PATCH 3/5] Guard off other asserts --- src/gpgpu-sim/gpu-cache.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/gpgpu-sim/gpu-cache.cc b/src/gpgpu-sim/gpu-cache.cc index 8bf70c34a..3e68a24bc 100644 --- a/src/gpgpu-sim/gpu-cache.cc +++ b/src/gpgpu-sim/gpu-cache.cc @@ -643,9 +643,10 @@ void cache_stats::inc_stats(int access_type, int access_outcome) { /// /// Increment the stat corresponding to (access_type, access_outcome) by 1. /// +#ifdef DEBUG if (!check_valid(access_type, access_outcome)) assert(0 && "Unknown cache access type or access outcome"); - +#endif m_stats[access_type][access_outcome]++; } @@ -653,15 +654,18 @@ void cache_stats::inc_stats_pw(int access_type, int access_outcome) { /// /// Increment the corresponding per-window cache stat /// +#ifdef DEBUG if (!check_valid(access_type, access_outcome)) assert(0 && "Unknown cache access type or access outcome"); +#endif m_stats_pw[access_type][access_outcome]++; } void cache_stats::inc_fail_stats(int access_type, int fail_outcome) { +#ifdef DEBUG if (!check_fail_valid(access_type, fail_outcome)) assert(0 && "Unknown cache access type or access fail"); - +#endif m_fail_stats[access_type][fail_outcome]++; } @@ -688,14 +692,16 @@ unsigned long long &cache_stats::operator()(int access_type, int access_outcome, /// read/write member functions /// if (fail_outcome) { +#ifdef DEBUG if (!check_fail_valid(access_type, access_outcome)) assert(0 && "Unknown cache access type or fail outcome"); - +#endif return m_fail_stats[access_type][access_outcome]; } else { +#ifdef DEBUG if (!check_valid(access_type, access_outcome)) assert(0 && "Unknown cache access type or access outcome"); - +#endif return m_stats[access_type][access_outcome]; } } @@ -836,8 +842,10 @@ unsigned long long cache_stats::get_stats( unsigned long long total = 0; for (unsigned type = 0; type < num_access_type; ++type) { for (unsigned status = 0; status < num_access_status; ++status) { +#ifdef DEBUG if (!check_valid((int)access_type[type], (int)access_status[status])) assert(0 && "Unknown cache access type or access outcome"); +#endif total += m_stats[access_type[type]][access_status[status]]; } } From 22559ed681335212e801f1d772d75d685d3962e0 Mon Sep 17 00:00:00 2001 From: Nick Date: Wed, 8 Apr 2020 00:57:32 -0700 Subject: [PATCH 4/5] Remove layer of indirection --- src/gpgpu-sim/gpu-cache.cc | 60 +++++++++++++++++++++----------------- src/gpgpu-sim/gpu-cache.h | 4 +-- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/src/gpgpu-sim/gpu-cache.cc b/src/gpgpu-sim/gpu-cache.cc index 3e68a24bc..e3ce807d3 100644 --- a/src/gpgpu-sim/gpu-cache.cc +++ b/src/gpgpu-sim/gpu-cache.cc @@ -604,13 +604,11 @@ void mshr_table::display(FILE *fp) const { /***************************************************************** Caches * *****************************************************************/ cache_stats::cache_stats() { - m_stats.resize(NUM_MEM_ACCESS_TYPE); + m_stats.resize(NUM_MEM_ACCESS_TYPE * NUM_CACHE_REQUEST_STATUS); m_stats_pw.resize(NUM_MEM_ACCESS_TYPE); - m_fail_stats.resize(NUM_MEM_ACCESS_TYPE); + m_fail_stats.resize(NUM_MEM_ACCESS_TYPE * NUM_CACHE_RESERVATION_FAIL_STATUS); for (unsigned i = 0; i < NUM_MEM_ACCESS_TYPE; ++i) { - m_stats[i].resize(NUM_CACHE_REQUEST_STATUS, 0); m_stats_pw[i].resize(NUM_CACHE_REQUEST_STATUS, 0); - m_fail_stats[i].resize(NUM_CACHE_RESERVATION_FAIL_STATUS, 0); } m_cache_port_available_cycles = 0; m_cache_data_port_busy_cycles = 0; @@ -621,10 +619,8 @@ void cache_stats::clear() { /// /// Zero out all current cache statistics /// - for (unsigned i = 0; i < NUM_MEM_ACCESS_TYPE; ++i) { - std::fill(m_stats[i].begin(), m_stats[i].end(), 0); - std::fill(m_fail_stats[i].begin(), m_fail_stats[i].end(), 0); - } + std::fill(m_stats.begin(), m_stats.end(), 0); + std::fill(m_fail_stats.begin(), m_fail_stats.end(), 0); m_cache_port_available_cycles = 0; m_cache_data_port_busy_cycles = 0; m_cache_fill_port_busy_cycles = 0; @@ -647,7 +643,7 @@ void cache_stats::inc_stats(int access_type, int access_outcome) { if (!check_valid(access_type, access_outcome)) assert(0 && "Unknown cache access type or access outcome"); #endif - m_stats[access_type][access_outcome]++; + m_stats[access_type * NUM_CACHE_REQUEST_STATUS + access_outcome]++; } void cache_stats::inc_stats_pw(int access_type, int access_outcome) { @@ -666,7 +662,8 @@ void cache_stats::inc_fail_stats(int access_type, int fail_outcome) { if (!check_fail_valid(access_type, fail_outcome)) assert(0 && "Unknown cache access type or access fail"); #endif - m_fail_stats[access_type][fail_outcome]++; + m_fail_stats[access_type * NUM_CACHE_RESERVATION_FAIL_STATUS + + fail_outcome]++; } enum cache_request_status cache_stats::select_stats_status( @@ -696,13 +693,14 @@ unsigned long long &cache_stats::operator()(int access_type, int access_outcome, if (!check_fail_valid(access_type, access_outcome)) assert(0 && "Unknown cache access type or fail outcome"); #endif - return m_fail_stats[access_type][access_outcome]; + return m_fail_stats[access_type * NUM_CACHE_RESERVATION_FAIL_STATUS + + access_outcome]; } else { #ifdef DEBUG if (!check_valid(access_type, access_outcome)) assert(0 && "Unknown cache access type or access outcome"); #endif - return m_stats[access_type][access_outcome]; + return m_stats[access_type * NUM_CACHE_REQUEST_STATUS + access_outcome]; } } @@ -715,13 +713,14 @@ unsigned long long cache_stats::operator()(int access_type, int access_outcome, if (!check_fail_valid(access_type, access_outcome)) assert(0 && "Unknown cache access type or fail outcome"); #endif - return m_fail_stats[access_type][access_outcome]; + return m_fail_stats[access_type * NUM_CACHE_RESERVATION_FAIL_STATUS + + access_outcome]; } else { #ifdef DEBUG if (!check_valid(access_type, access_outcome)) assert(0 && "Unknown cache access type or access outcome"); #endif - return m_stats[access_type][access_outcome]; + return m_stats[access_type * NUM_CACHE_REQUEST_STATUS + access_outcome]; } } @@ -733,12 +732,14 @@ cache_stats cache_stats::operator+(const cache_stats &cs) { for (unsigned type = 0; type < NUM_MEM_ACCESS_TYPE; ++type) { for (unsigned status = 0; status < NUM_CACHE_REQUEST_STATUS; ++status) { ret(type, status, false) = - m_stats[type][status] + cs(type, status, false); + m_stats[type * NUM_CACHE_REQUEST_STATUS + status] + + cs(type, status, false); } for (unsigned status = 0; status < NUM_CACHE_RESERVATION_FAIL_STATUS; ++status) { ret(type, status, true) = - m_fail_stats[type][status] + cs(type, status, true); + m_fail_stats[type * NUM_CACHE_RESERVATION_FAIL_STATUS + status] + + cs(type, status, true); } } ret.m_cache_port_available_cycles = @@ -756,14 +757,16 @@ cache_stats &cache_stats::operator+=(const cache_stats &cs) { /// for (unsigned type = 0; type < NUM_MEM_ACCESS_TYPE; ++type) { for (unsigned status = 0; status < NUM_CACHE_REQUEST_STATUS; ++status) { - m_stats[type][status] += cs(type, status, false); + m_stats[type * NUM_CACHE_REQUEST_STATUS + status] += + cs(type, status, false); } for (unsigned status = 0; status < NUM_CACHE_REQUEST_STATUS; ++status) { m_stats_pw[type][status] += cs(type, status, false); } for (unsigned status = 0; status < NUM_CACHE_RESERVATION_FAIL_STATUS; ++status) { - m_fail_stats[type][status] += cs(type, status, true); + m_fail_stats[type * NUM_CACHE_RESERVATION_FAIL_STATUS + status] += + cs(type, status, true); } } m_cache_port_available_cycles += cs.m_cache_port_available_cycles; @@ -787,10 +790,10 @@ void cache_stats::print_stats(FILE *fout, const char *cache_name) const { fprintf(fout, "\t%s[%s][%s] = %llu\n", m_cache_name.c_str(), mem_access_type_str((enum mem_access_type)type), cache_request_status_str((enum cache_request_status)status), - m_stats[type][status]); + m_stats[type * NUM_CACHE_REQUEST_STATUS + status]); if (status != RESERVATION_FAIL) - total_access[type] += m_stats[type][status]; + total_access[type] += m_stats[type * NUM_CACHE_REQUEST_STATUS + status]; } } for (unsigned type = 0; type < NUM_MEM_ACCESS_TYPE; ++type) { @@ -805,11 +808,11 @@ void cache_stats::print_fail_stats(FILE *fout, const char *cache_name) const { std::string m_cache_name = cache_name; for (unsigned type = 0; type < NUM_MEM_ACCESS_TYPE; ++type) { for (unsigned fail = 0; fail < NUM_CACHE_RESERVATION_FAIL_STATUS; ++fail) { - if (m_fail_stats[type][fail] > 0) { + if (m_fail_stats[type * NUM_CACHE_RESERVATION_FAIL_STATUS + fail] > 0) { fprintf(fout, "\t%s[%s][%s] = %llu\n", m_cache_name.c_str(), mem_access_type_str((enum mem_access_type)type), cache_fail_status_str((enum cache_reservation_fail_reason)fail), - m_fail_stats[type][fail]); + m_fail_stats[type * NUM_CACHE_RESERVATION_FAIL_STATUS + fail]); } } } @@ -846,7 +849,8 @@ unsigned long long cache_stats::get_stats( if (!check_valid((int)access_type[type], (int)access_status[status])) assert(0 && "Unknown cache access type or access outcome"); #endif - total += m_stats[access_type[type]][access_status[status]]; + total += m_stats[access_type[type] * NUM_CACHE_REQUEST_STATUS + + access_status[status]]; } } return total; @@ -863,14 +867,16 @@ void cache_stats::get_sub_stats(struct cache_sub_stats &css) const { for (unsigned status = 0; status < NUM_CACHE_REQUEST_STATUS; ++status) { if (status == HIT || status == MISS || status == SECTOR_MISS || status == HIT_RESERVED) - t_css.accesses += m_stats[type][status]; + t_css.accesses += m_stats[type * NUM_CACHE_REQUEST_STATUS + status]; if (status == MISS || status == SECTOR_MISS) - t_css.misses += m_stats[type][status]; + t_css.misses += m_stats[type * NUM_CACHE_REQUEST_STATUS + status]; - if (status == HIT_RESERVED) t_css.pending_hits += m_stats[type][status]; + if (status == HIT_RESERVED) + t_css.pending_hits += m_stats[type * NUM_CACHE_REQUEST_STATUS + status]; - if (status == RESERVATION_FAIL) t_css.res_fails += m_stats[type][status]; + if (status == RESERVATION_FAIL) + t_css.res_fails += m_stats[type * NUM_CACHE_REQUEST_STATUS + status]; } } diff --git a/src/gpgpu-sim/gpu-cache.h b/src/gpgpu-sim/gpu-cache.h index 2a37876c3..3dbfb47f4 100644 --- a/src/gpgpu-sim/gpu-cache.h +++ b/src/gpgpu-sim/gpu-cache.h @@ -1100,10 +1100,10 @@ class cache_stats { bool check_valid(int type, int status) const; bool check_fail_valid(int type, int fail) const; - std::vector > m_stats; + std::vector m_stats; // AerialVision cache stats (per-window) std::vector > m_stats_pw; - std::vector > m_fail_stats; + std::vector m_fail_stats; unsigned long long m_cache_port_available_cycles; unsigned long long m_cache_data_port_busy_cycles; From 54033d6a5cd0e6c9c9a8853603d1b2965794c5ad Mon Sep 17 00:00:00 2001 From: Nick Date: Wed, 8 Apr 2020 19:08:48 -0700 Subject: [PATCH 5/5] Disabled asserts in release mode with -DNDEBUG --- libcuda/Makefile | 4 ++-- src/cuda-sim/Makefile | 4 ++-- src/gpgpu-sim/Makefile | 2 +- src/gpuwattch/mcpat.mk | 2 +- src/intersim2/Makefile | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/libcuda/Makefile b/libcuda/Makefile index c8ff2e35d..5f887d076 100644 --- a/libcuda/Makefile +++ b/libcuda/Makefile @@ -79,8 +79,8 @@ ifeq ($(DEBUG),1) CXXFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -g -fPIC $(GL) CCFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -ggdb -fPIC else - CXXFLAGS += -O3 -g -Wall -Wno-unused-function -Wno-sign-compare -fPIC $(GL) - CCFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -fPIC + CXXFLAGS += -O3 -g -Wall -Wno-unused-function -Wno-sign-compare -fPIC $(GL) -DNDEBUG + CCFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -fPIC -DNDEBUG endif PROG =cuda diff --git a/src/cuda-sim/Makefile b/src/cuda-sim/Makefile index 85d1c8c01..691d1155a 100644 --- a/src/cuda-sim/Makefile +++ b/src/cuda-sim/Makefile @@ -42,9 +42,9 @@ include ../../version_detection.mk OUTPUT_DIR=$(SIM_OBJ_FILES_DIR)/cuda-sim -OPT := -O3 -g3 -Wall -Wno-unused-function -Wno-sign-compare +OPT := -O3 -g3 -Wall -Wno-unused-function -Wno-sign-compare -DNDEBUG ifeq ($(DEBUG),1) - OPT := -g3 -Wall -Wno-unused-function -Wno-sign-compare + OPT := -g3 -Wall -Wno-unused-function -Wno-sign-compare -DDEBUG endif OPT += -I$(CUDA_INSTALL_PATH)/include -I$(OUTPUT_DIR)/ -I. -I$(SIM_OBJ_FILES_DIR) OPT += -fPIC diff --git a/src/gpgpu-sim/Makefile b/src/gpgpu-sim/Makefile index 4994577cd..e5dfb4f26 100644 --- a/src/gpgpu-sim/Makefile +++ b/src/gpgpu-sim/Makefile @@ -34,7 +34,7 @@ TRACE?=0 ifeq ($(DEBUG),1) CXXFLAGS = -Wall -DDEBUG else - CXXFLAGS = -Wall + CXXFLAGS = -Wall -DNDEBUG endif ifeq ($(TRACE),1) diff --git a/src/gpuwattch/mcpat.mk b/src/gpuwattch/mcpat.mk index a09c23b4c..bee096811 100644 --- a/src/gpuwattch/mcpat.mk +++ b/src/gpuwattch/mcpat.mk @@ -28,7 +28,7 @@ ifeq ($(TAG),dbg) DBG = -Wall OPT = -ggdb -fPIC -g -O0 -DNTHREADS=1 -Icacti -lz else - DBG = + DBG = -DNDEBUG OPT = -O3 -fPIC -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS) -Icacti -lz #OPT = -O0 -DNTHREADS=$(NTHREADS) endif diff --git a/src/intersim2/Makefile b/src/intersim2/Makefile index 3eeeb7041..2fe902c42 100644 --- a/src/intersim2/Makefile +++ b/src/intersim2/Makefile @@ -44,7 +44,7 @@ endif CPPFLAGS += -Wall $(INCPATH) $(DEFINE) ifneq ($(DEBUG),1) -CPPFLAGS += -O3 +CPPFLAGS += -O3 -DNDEBUG endif CPPFLAGS += -g CPPFLAGS += -fPIC