From 5b4afdae3a9023b5fec48b121dc32aa329e8a734 Mon Sep 17 00:00:00 2001 From: Nick Date: Sun, 5 Apr 2020 15:14:34 -0700 Subject: [PATCH 1/2] Remove runtime aerialvision overhead w/ preprocessor --- setup_environment | 15 ++++++++++++++- src/Makefile | 4 ++-- src/cuda-sim/Makefile | 7 +++---- src/gpgpu-sim/Makefile | 8 +++----- src/gpgpu-sim/gpu-cache.cc | 18 ++++++++++++++++++ src/gpgpu-sim/gpu-sim.cc | 2 ++ src/gpgpu-sim/l2cache.cc | 7 ++++++- 7 files changed, 48 insertions(+), 13 deletions(-) diff --git a/setup_environment b/setup_environment index ca60d6bd9..6b1d27009 100644 --- a/setup_environment +++ b/setup_environment @@ -62,7 +62,20 @@ fi if [ $# = '1' ] ; then - export GPGPUSIM_CONFIG=gcc-$CC_VERSION/cuda-$CUDA_VERSION_NUMBER/$1 + if [ $1 = 'debug' ] ; + then + export GPGPUSIM_CONFIG=gcc-$CC_VERSION/cuda-$CUDA_VERSION_NUMBER/$1 + elif [ $1 = 'av_enabled' ] ; + then + export GPGPUSIM_CONFIG=gcc-$CC_VERSION/cuda-$CUDA_VERSION_NUMBER/release + export AV_ENABLED=1 + elif [ $1 = 'release' ] ; + then + export GPGPUSIM_CONFIG=gcc-$CC_VERSION/cuda-$CUDA_VERSION_NUMBER/release + else + echo "ERROR - BAD SETUP VARIABLE" + return; + fi else export GPGPUSIM_CONFIG=gcc-$CC_VERSION/cuda-$CUDA_VERSION_NUMBER/release fi diff --git a/src/Makefile b/src/Makefile index 3ad511e20..85a599421 100644 --- a/src/Makefile +++ b/src/Makefile @@ -34,7 +34,7 @@ TRACE?=1 include ../version_detection.mk -CXXFLAGS = -Wall -DDEBUG +CXXFLAGS = -Wall CXXFLAGS += -DCUDART_VERSION=$(CUDART_VERSION) ifeq ($(GNUC_CPP0X), 1) @@ -48,7 +48,7 @@ endif ifneq ($(DEBUG),1) OPTFLAGS += -O3 else - CXXFLAGS += + CXXFLAGS += -DDEBUG endif CXXFLAGS += -I$(CUDA_INSTALL_PATH)/include diff --git a/src/cuda-sim/Makefile b/src/cuda-sim/Makefile index 85d1c8c01..fb430ee1d 100644 --- a/src/cuda-sim/Makefile +++ b/src/cuda-sim/Makefile @@ -42,12 +42,11 @@ include ../../version_detection.mk OUTPUT_DIR=$(SIM_OBJ_FILES_DIR)/cuda-sim -OPT := -O3 -g3 -Wall -Wno-unused-function -Wno-sign-compare -ifeq ($(DEBUG),1) - OPT := -g3 -Wall -Wno-unused-function -Wno-sign-compare +OPT := -g3 -Wall -Wno-unused-function -Wno-sign-compare -fPIC +ifneq ($(DEBUG),1) + OPT += -O3 endif OPT += -I$(CUDA_INSTALL_PATH)/include -I$(OUTPUT_DIR)/ -I. -I$(SIM_OBJ_FILES_DIR) -OPT += -fPIC ifeq ($(TRACE),1) OPT += -DTRACING_ON=1 diff --git a/src/gpgpu-sim/Makefile b/src/gpgpu-sim/Makefile index 4994577cd..cc6e371b2 100644 --- a/src/gpgpu-sim/Makefile +++ b/src/gpgpu-sim/Makefile @@ -34,7 +34,7 @@ TRACE?=0 ifeq ($(DEBUG),1) CXXFLAGS = -Wall -DDEBUG else - CXXFLAGS = -Wall + CXXFLAGS = -Wall -O3 endif ifeq ($(TRACE),1) @@ -47,10 +47,8 @@ ifeq ($(GNUC_CPP0X), 1) CXXFLAGS += -std=c++0x endif -ifneq ($(DEBUG),1) - OPTFLAGS += -O3 -else - CXXFLAGS += +ifneq ($(AV_ENABLED),1) + CXXFLAGS += -DAV_ENABLED endif CXXFLAGS += -I$(CUDA_INSTALL_PATH)/include diff --git a/src/gpgpu-sim/gpu-cache.cc b/src/gpgpu-sim/gpu-cache.cc index af22c4c2c..e8a346124 100644 --- a/src/gpgpu-sim/gpu-cache.cc +++ b/src/gpgpu-sim/gpu-cache.cc @@ -605,11 +605,15 @@ void mshr_table::display(FILE *fp) const { * *****************************************************************/ cache_stats::cache_stats() { m_stats.resize(NUM_MEM_ACCESS_TYPE); +#ifdef AV_ENABLED m_stats_pw.resize(NUM_MEM_ACCESS_TYPE); +#endif m_fail_stats.resize(NUM_MEM_ACCESS_TYPE); for (unsigned i = 0; i < NUM_MEM_ACCESS_TYPE; ++i) { m_stats[i].resize(NUM_CACHE_REQUEST_STATUS, 0); +#ifdef AV_ENABLED m_stats_pw[i].resize(NUM_CACHE_REQUEST_STATUS, 0); +#endif m_fail_stats[i].resize(NUM_CACHE_RESERVATION_FAIL_STATUS, 0); } m_cache_port_available_cycles = 0; @@ -630,6 +634,7 @@ void cache_stats::clear() { m_cache_fill_port_busy_cycles = 0; } +#ifdef AV_ENABLED void cache_stats::clear_pw() { /// /// Zero out per-window cache statistics @@ -638,6 +643,7 @@ void cache_stats::clear_pw() { std::fill(m_stats_pw[i].begin(), m_stats_pw[i].end(), 0); } } +#endif void cache_stats::inc_stats(int access_type, int access_outcome) { /// @@ -649,6 +655,7 @@ void cache_stats::inc_stats(int access_type, int access_outcome) { m_stats[access_type][access_outcome]++; } +#ifdef AV_ENABLED void cache_stats::inc_stats_pw(int access_type, int access_outcome) { /// /// Increment the corresponding per-window cache stat @@ -657,6 +664,7 @@ void cache_stats::inc_stats_pw(int access_type, int access_outcome) { assert(0 && "Unknown cache access type or access outcome"); m_stats_pw[access_type][access_outcome]++; } +#endif void cache_stats::inc_fail_stats(int access_type, int fail_outcome) { if (!check_fail_valid(access_type, fail_outcome)) @@ -751,9 +759,11 @@ cache_stats &cache_stats::operator+=(const cache_stats &cs) { for (unsigned status = 0; status < NUM_CACHE_REQUEST_STATUS; ++status) { m_stats[type][status] += cs(type, status, false); } +#ifdef AV_ENABLED for (unsigned status = 0; status < NUM_CACHE_REQUEST_STATUS; ++status) { m_stats_pw[type][status] += cs(type, status, false); } +#endif for (unsigned status = 0; status < NUM_CACHE_RESERVATION_FAIL_STATUS; ++status) { m_fail_stats[type][status] += cs(type, status, true); @@ -872,6 +882,7 @@ void cache_stats::get_sub_stats(struct cache_sub_stats &css) const { css = t_css; } +#ifdef AV_ENABLED void cache_stats::get_sub_stats_pw(struct cache_sub_stats_pw &css) const { /// /// Overwrites "css" with the appropriate statistics from this cache. @@ -921,6 +932,7 @@ void cache_stats::get_sub_stats_pw(struct cache_sub_stats_pw &css) const { css = t_css; } +#endif bool cache_stats::check_valid(int type, int status) const { /// @@ -1588,8 +1600,10 @@ enum cache_request_status read_only_cache::access( m_stats.inc_stats(mf->get_access_type(), m_stats.select_stats_status(status, cache_status)); +#ifdef AV_ENABLED m_stats.inc_stats_pw(mf->get_access_type(), m_stats.select_stats_status(status, cache_status)); +#endif return cache_status; } @@ -1655,8 +1669,10 @@ enum cache_request_status data_cache::access(new_addr_type addr, mem_fetch *mf, process_tag_probe(wr, probe_status, addr, cache_index, mf, time, events); m_stats.inc_stats(mf->get_access_type(), m_stats.select_stats_status(probe_status, access_status)); +#ifdef AV_ENABLED m_stats.inc_stats_pw(mf->get_access_type(), m_stats.select_stats_status( probe_status, access_status)); +#endif return access_status; } @@ -1719,8 +1735,10 @@ enum cache_request_status tex_cache::access(new_addr_type addr, mem_fetch *mf, } m_stats.inc_stats(mf->get_access_type(), m_stats.select_stats_status(status, cache_status)); +#ifdef AV_ENABLED m_stats.inc_stats_pw(mf->get_access_type(), m_stats.select_stats_status(status, cache_status)); +#endif return cache_status; } diff --git a/src/gpgpu-sim/gpu-sim.cc b/src/gpgpu-sim/gpu-sim.cc index a6a39ab0a..97fbbf527 100644 --- a/src/gpgpu-sim/gpu-sim.cc +++ b/src/gpgpu-sim/gpu-sim.cc @@ -960,11 +960,13 @@ void gpgpu_sim::init() { m_cluster[i]->reinit(); m_shader_stats->new_grid(); // initialize the control-flow, memory access, memory latency logger +#ifdef AV_ENABLED if (m_config.g_visualizer_enabled) { create_thread_CFlogger(gpgpu_ctx, m_config.num_shader(), m_shader_config->n_thread_per_shader, 0, m_config.gpgpu_cflog_interval); } +#endif shader_CTA_count_create(m_config.num_shader(), m_config.gpgpu_cflog_interval); if (m_config.gpgpu_cflog_interval != 0) { insn_warp_occ_create(m_config.num_shader(), m_shader_config->warp_size); diff --git a/src/gpgpu-sim/l2cache.cc b/src/gpgpu-sim/l2cache.cc index ab6e5c228..5bfe6b0e1 100644 --- a/src/gpgpu-sim/l2cache.cc +++ b/src/gpgpu-sim/l2cache.cc @@ -624,19 +624,22 @@ void memory_stats_t::visualizer_print(gzFile visualizer_file) { gzprintf(visualizer_file, "Ltwowritehit: %d\n", L2_write_hit); gzprintf(visualizer_file, "Ltworeadmiss: %d\n", L2_read_miss); gzprintf(visualizer_file, "Ltworeadhit: %d\n", L2_read_hit); +#ifdef AV_EANBLED clear_L2_stats_pw(); - +#endif if (num_mfs) gzprintf(visualizer_file, "averagemflatency: %lld\n", mf_total_lat / num_mfs); } +#ifdef AV_ENABLED void memory_stats_t::clear_L2_stats_pw() { L2_write_miss = 0; L2_write_hit = 0; L2_read_miss = 0; L2_read_hit = 0; } +#endif void gpgpu_sim::print_dram_stats(FILE *fout) const { unsigned cmd = 0; @@ -832,6 +835,7 @@ void memory_sub_partition::get_L2cache_sub_stats( } } +#ifdef AV_ENABLED void memory_sub_partition::get_L2cache_sub_stats_pw( struct cache_sub_stats_pw &css) const { if (!m_config->m_L2_config.disabled()) { @@ -858,3 +862,4 @@ void memory_sub_partition::visualizer_print(gzFile visualizer_file) { clear_L2cache_stats_pw(); } +#endif From 97550da53f6554230358375cc8045d11736725b4 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 7 Apr 2020 21:03:35 -0700 Subject: [PATCH 2/2] Fix preprocessor flags --- setup_environment | 1 + src/gpgpu-sim/Makefile | 6 ++++-- src/gpgpu-sim/gpu-cache.cc | 18 +++++++++--------- src/gpgpu-sim/gpu-sim.cc | 2 +- src/gpgpu-sim/l2cache.cc | 6 +++--- src/gpgpu-sim/shader.cc | 2 ++ 6 files changed, 20 insertions(+), 15 deletions(-) diff --git a/setup_environment b/setup_environment index 02ba50aa9..88b2f25ee 100644 --- a/setup_environment +++ b/setup_environment @@ -61,6 +61,7 @@ if [ $CUDA_VERSION_NUMBER -ge 6000 ]; then fi +export AV_ENABLED=0 if [ $# = '1' ] ; then if [ $1 = 'debug' ] ; diff --git a/src/gpgpu-sim/Makefile b/src/gpgpu-sim/Makefile index cc6e371b2..669e0aba0 100644 --- a/src/gpgpu-sim/Makefile +++ b/src/gpgpu-sim/Makefile @@ -47,8 +47,10 @@ ifeq ($(GNUC_CPP0X), 1) CXXFLAGS += -std=c++0x endif -ifneq ($(AV_ENABLED),1) - CXXFLAGS += -DAV_ENABLED +ifeq ($(AV_ENABLED),1) + CXXFLAGS += -DAV_ENABLED=1 +else + CXXFLAGS += -DAV_ENABLED=0 endif CXXFLAGS += -I$(CUDA_INSTALL_PATH)/include diff --git a/src/gpgpu-sim/gpu-cache.cc b/src/gpgpu-sim/gpu-cache.cc index e8a346124..0f46e8189 100644 --- a/src/gpgpu-sim/gpu-cache.cc +++ b/src/gpgpu-sim/gpu-cache.cc @@ -605,13 +605,13 @@ void mshr_table::display(FILE *fp) const { * *****************************************************************/ cache_stats::cache_stats() { m_stats.resize(NUM_MEM_ACCESS_TYPE); -#ifdef AV_ENABLED +#if AV_ENABLED m_stats_pw.resize(NUM_MEM_ACCESS_TYPE); #endif m_fail_stats.resize(NUM_MEM_ACCESS_TYPE); for (unsigned i = 0; i < NUM_MEM_ACCESS_TYPE; ++i) { m_stats[i].resize(NUM_CACHE_REQUEST_STATUS, 0); -#ifdef AV_ENABLED +#if AV_ENABLED m_stats_pw[i].resize(NUM_CACHE_REQUEST_STATUS, 0); #endif m_fail_stats[i].resize(NUM_CACHE_RESERVATION_FAIL_STATUS, 0); @@ -634,7 +634,7 @@ void cache_stats::clear() { m_cache_fill_port_busy_cycles = 0; } -#ifdef AV_ENABLED +#if AV_ENABLED void cache_stats::clear_pw() { /// /// Zero out per-window cache statistics @@ -655,7 +655,7 @@ void cache_stats::inc_stats(int access_type, int access_outcome) { m_stats[access_type][access_outcome]++; } -#ifdef AV_ENABLED +#if AV_ENABLED void cache_stats::inc_stats_pw(int access_type, int access_outcome) { /// /// Increment the corresponding per-window cache stat @@ -759,7 +759,7 @@ cache_stats &cache_stats::operator+=(const cache_stats &cs) { for (unsigned status = 0; status < NUM_CACHE_REQUEST_STATUS; ++status) { m_stats[type][status] += cs(type, status, false); } -#ifdef AV_ENABLED +#if AV_ENABLED for (unsigned status = 0; status < NUM_CACHE_REQUEST_STATUS; ++status) { m_stats_pw[type][status] += cs(type, status, false); } @@ -882,7 +882,7 @@ void cache_stats::get_sub_stats(struct cache_sub_stats &css) const { css = t_css; } -#ifdef AV_ENABLED +#if AV_ENABLED void cache_stats::get_sub_stats_pw(struct cache_sub_stats_pw &css) const { /// /// Overwrites "css" with the appropriate statistics from this cache. @@ -1600,7 +1600,7 @@ enum cache_request_status read_only_cache::access( m_stats.inc_stats(mf->get_access_type(), m_stats.select_stats_status(status, cache_status)); -#ifdef AV_ENABLED +#if AV_ENABLED m_stats.inc_stats_pw(mf->get_access_type(), m_stats.select_stats_status(status, cache_status)); #endif @@ -1669,7 +1669,7 @@ enum cache_request_status data_cache::access(new_addr_type addr, mem_fetch *mf, process_tag_probe(wr, probe_status, addr, cache_index, mf, time, events); m_stats.inc_stats(mf->get_access_type(), m_stats.select_stats_status(probe_status, access_status)); -#ifdef AV_ENABLED +#if AV_ENABLED m_stats.inc_stats_pw(mf->get_access_type(), m_stats.select_stats_status( probe_status, access_status)); #endif @@ -1735,7 +1735,7 @@ enum cache_request_status tex_cache::access(new_addr_type addr, mem_fetch *mf, } m_stats.inc_stats(mf->get_access_type(), m_stats.select_stats_status(status, cache_status)); -#ifdef AV_ENABLED +#if AV_ENABLED m_stats.inc_stats_pw(mf->get_access_type(), m_stats.select_stats_status(status, cache_status)); #endif diff --git a/src/gpgpu-sim/gpu-sim.cc b/src/gpgpu-sim/gpu-sim.cc index 97fbbf527..9b207d844 100644 --- a/src/gpgpu-sim/gpu-sim.cc +++ b/src/gpgpu-sim/gpu-sim.cc @@ -960,7 +960,7 @@ void gpgpu_sim::init() { m_cluster[i]->reinit(); m_shader_stats->new_grid(); // initialize the control-flow, memory access, memory latency logger -#ifdef AV_ENABLED +#if AV_ENABLED if (m_config.g_visualizer_enabled) { create_thread_CFlogger(gpgpu_ctx, m_config.num_shader(), m_shader_config->n_thread_per_shader, 0, diff --git a/src/gpgpu-sim/l2cache.cc b/src/gpgpu-sim/l2cache.cc index 5bfe6b0e1..7cebb04e3 100644 --- a/src/gpgpu-sim/l2cache.cc +++ b/src/gpgpu-sim/l2cache.cc @@ -624,7 +624,7 @@ void memory_stats_t::visualizer_print(gzFile visualizer_file) { gzprintf(visualizer_file, "Ltwowritehit: %d\n", L2_write_hit); gzprintf(visualizer_file, "Ltworeadmiss: %d\n", L2_read_miss); gzprintf(visualizer_file, "Ltworeadhit: %d\n", L2_read_hit); -#ifdef AV_EANBLED +#if AV_EANBLED clear_L2_stats_pw(); #endif if (num_mfs) @@ -632,7 +632,7 @@ void memory_stats_t::visualizer_print(gzFile visualizer_file) { mf_total_lat / num_mfs); } -#ifdef AV_ENABLED +#if AV_ENABLED void memory_stats_t::clear_L2_stats_pw() { L2_write_miss = 0; L2_write_hit = 0; @@ -835,7 +835,7 @@ void memory_sub_partition::get_L2cache_sub_stats( } } -#ifdef AV_ENABLED +#if AV_ENABLED void memory_sub_partition::get_L2cache_sub_stats_pw( struct cache_sub_stats_pw &css) const { if (!m_config->m_L2_config.disabled()) { diff --git a/src/gpgpu-sim/shader.cc b/src/gpgpu-sim/shader.cc index b596c0daa..c18c4e3d1 100644 --- a/src/gpgpu-sim/shader.cc +++ b/src/gpgpu-sim/shader.cc @@ -4258,6 +4258,7 @@ void shader_core_ctx::checkExecutionStatusAndUpdate(warp_inst_t &inst, } // PC-Histogram Update +#if AV_ENABLED unsigned warp_id = inst.warp_id(); unsigned pc = inst.pc; for (unsigned t = 0; t < m_config->warp_size; t++) { @@ -4266,4 +4267,5 @@ void shader_core_ctx::checkExecutionStatusAndUpdate(warp_inst_t &inst, cflog_update_thread_pc(m_sid, tid, pc); } } +#endif }