Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove runtime aerialvision overhead w/ preprocessor #37

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion setup_environment
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,23 @@ if [ $CUDA_VERSION_NUMBER -ge 6000 ]; then
fi


export AV_ENABLED=0
if [ $# = '1' ] ;
then
export GPGPUSIM_CONFIG=gcc-$CC_VERSION/cuda-$CUDA_VERSION_NUMBER/$1
if [ $1 = 'debug' ] ;
then
export GPGPUSIM_CONFIG=gcc-$CC_VERSION/cuda-$CUDA_VERSION_NUMBER/$1
elif [ $1 = 'av_enabled' ] ;
then
export GPGPUSIM_CONFIG=gcc-$CC_VERSION/cuda-$CUDA_VERSION_NUMBER/release
export AV_ENABLED=1
elif [ $1 = 'release' ] ;
then
export GPGPUSIM_CONFIG=gcc-$CC_VERSION/cuda-$CUDA_VERSION_NUMBER/release
else
echo "ERROR - BAD SETUP VARIABLE"
return;
fi
else
export GPGPUSIM_CONFIG=gcc-$CC_VERSION/cuda-$CUDA_VERSION_NUMBER/release
fi
Expand Down
4 changes: 2 additions & 2 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ TRACE?=1

include ../version_detection.mk

CXXFLAGS = -Wall -DDEBUG
CXXFLAGS = -Wall
CXXFLAGS += -DCUDART_VERSION=$(CUDART_VERSION)

ifeq ($(GNUC_CPP0X), 1)
Expand All @@ -48,7 +48,7 @@ endif
ifneq ($(DEBUG),1)
OPTFLAGS += -O3
else
CXXFLAGS +=
CXXFLAGS += -DDEBUG
endif

CXXFLAGS += -I$(CUDA_INSTALL_PATH)/include
Expand Down
7 changes: 3 additions & 4 deletions src/cuda-sim/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,11 @@ include ../../version_detection.mk

OUTPUT_DIR=$(SIM_OBJ_FILES_DIR)/cuda-sim

OPT := -O3 -g3 -Wall -Wno-unused-function -Wno-sign-compare
ifeq ($(DEBUG),1)
OPT := -g3 -Wall -Wno-unused-function -Wno-sign-compare
OPT := -g3 -Wall -Wno-unused-function -Wno-sign-compare -fPIC
ifneq ($(DEBUG),1)
OPT += -O3
endif
OPT += -I$(CUDA_INSTALL_PATH)/include -I$(OUTPUT_DIR)/ -I. -I$(SIM_OBJ_FILES_DIR)
OPT += -fPIC

ifeq ($(TRACE),1)
OPT += -DTRACING_ON=1
Expand Down
8 changes: 4 additions & 4 deletions src/gpgpu-sim/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ TRACE?=0
ifeq ($(DEBUG),1)
CXXFLAGS = -Wall -DDEBUG
else
CXXFLAGS = -Wall
CXXFLAGS = -Wall -O3
endif

ifeq ($(TRACE),1)
Expand All @@ -47,10 +47,10 @@ ifeq ($(GNUC_CPP0X), 1)
CXXFLAGS += -std=c++0x
endif

ifneq ($(DEBUG),1)
OPTFLAGS += -O3
ifeq ($(AV_ENABLED),1)
CXXFLAGS += -DAV_ENABLED=1
else
CXXFLAGS +=
CXXFLAGS += -DAV_ENABLED=0
endif

CXXFLAGS += -I$(CUDA_INSTALL_PATH)/include
Expand Down
18 changes: 18 additions & 0 deletions src/gpgpu-sim/gpu-cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -605,11 +605,15 @@ void mshr_table::display(FILE *fp) const {
* *****************************************************************/
cache_stats::cache_stats() {
m_stats.resize(NUM_MEM_ACCESS_TYPE);
#if AV_ENABLED
m_stats_pw.resize(NUM_MEM_ACCESS_TYPE);
#endif
m_fail_stats.resize(NUM_MEM_ACCESS_TYPE);
for (unsigned i = 0; i < NUM_MEM_ACCESS_TYPE; ++i) {
m_stats[i].resize(NUM_CACHE_REQUEST_STATUS, 0);
#if AV_ENABLED
m_stats_pw[i].resize(NUM_CACHE_REQUEST_STATUS, 0);
#endif
m_fail_stats[i].resize(NUM_CACHE_RESERVATION_FAIL_STATUS, 0);
}
m_cache_port_available_cycles = 0;
Expand All @@ -630,6 +634,7 @@ void cache_stats::clear() {
m_cache_fill_port_busy_cycles = 0;
}

#if AV_ENABLED
void cache_stats::clear_pw() {
///
/// Zero out per-window cache statistics
Expand All @@ -638,6 +643,7 @@ void cache_stats::clear_pw() {
std::fill(m_stats_pw[i].begin(), m_stats_pw[i].end(), 0);
}
}
#endif

void cache_stats::inc_stats(int access_type, int access_outcome) {
///
Expand All @@ -649,6 +655,7 @@ void cache_stats::inc_stats(int access_type, int access_outcome) {
m_stats[access_type][access_outcome]++;
}

#if AV_ENABLED
void cache_stats::inc_stats_pw(int access_type, int access_outcome) {
///
/// Increment the corresponding per-window cache stat
Expand All @@ -657,6 +664,7 @@ void cache_stats::inc_stats_pw(int access_type, int access_outcome) {
assert(0 && "Unknown cache access type or access outcome");
m_stats_pw[access_type][access_outcome]++;
}
#endif

void cache_stats::inc_fail_stats(int access_type, int fail_outcome) {
if (!check_fail_valid(access_type, fail_outcome))
Expand Down Expand Up @@ -751,9 +759,11 @@ cache_stats &cache_stats::operator+=(const cache_stats &cs) {
for (unsigned status = 0; status < NUM_CACHE_REQUEST_STATUS; ++status) {
m_stats[type][status] += cs(type, status, false);
}
#if AV_ENABLED
for (unsigned status = 0; status < NUM_CACHE_REQUEST_STATUS; ++status) {
m_stats_pw[type][status] += cs(type, status, false);
}
#endif
for (unsigned status = 0; status < NUM_CACHE_RESERVATION_FAIL_STATUS;
++status) {
m_fail_stats[type][status] += cs(type, status, true);
Expand Down Expand Up @@ -872,6 +882,7 @@ void cache_stats::get_sub_stats(struct cache_sub_stats &css) const {
css = t_css;
}

#if AV_ENABLED
void cache_stats::get_sub_stats_pw(struct cache_sub_stats_pw &css) const {
///
/// Overwrites "css" with the appropriate statistics from this cache.
Expand Down Expand Up @@ -921,6 +932,7 @@ void cache_stats::get_sub_stats_pw(struct cache_sub_stats_pw &css) const {

css = t_css;
}
#endif

bool cache_stats::check_valid(int type, int status) const {
///
Expand Down Expand Up @@ -1588,8 +1600,10 @@ enum cache_request_status read_only_cache::access(

m_stats.inc_stats(mf->get_access_type(),
m_stats.select_stats_status(status, cache_status));
#if AV_ENABLED
m_stats.inc_stats_pw(mf->get_access_type(),
m_stats.select_stats_status(status, cache_status));
#endif
return cache_status;
}

Expand Down Expand Up @@ -1655,8 +1669,10 @@ enum cache_request_status data_cache::access(new_addr_type addr, mem_fetch *mf,
process_tag_probe(wr, probe_status, addr, cache_index, mf, time, events);
m_stats.inc_stats(mf->get_access_type(),
m_stats.select_stats_status(probe_status, access_status));
#if AV_ENABLED
m_stats.inc_stats_pw(mf->get_access_type(), m_stats.select_stats_status(
probe_status, access_status));
#endif
return access_status;
}

Expand Down Expand Up @@ -1719,8 +1735,10 @@ enum cache_request_status tex_cache::access(new_addr_type addr, mem_fetch *mf,
}
m_stats.inc_stats(mf->get_access_type(),
m_stats.select_stats_status(status, cache_status));
#if AV_ENABLED
m_stats.inc_stats_pw(mf->get_access_type(),
m_stats.select_stats_status(status, cache_status));
#endif
return cache_status;
}

Expand Down
2 changes: 2 additions & 0 deletions src/gpgpu-sim/gpu-sim.cc
Original file line number Diff line number Diff line change
Expand Up @@ -960,11 +960,13 @@ void gpgpu_sim::init() {
m_cluster[i]->reinit();
m_shader_stats->new_grid();
// initialize the control-flow, memory access, memory latency logger
#if AV_ENABLED
if (m_config.g_visualizer_enabled) {
create_thread_CFlogger(gpgpu_ctx, m_config.num_shader(),
m_shader_config->n_thread_per_shader, 0,
m_config.gpgpu_cflog_interval);
}
#endif
shader_CTA_count_create(m_config.num_shader(), m_config.gpgpu_cflog_interval);
if (m_config.gpgpu_cflog_interval != 0) {
insn_warp_occ_create(m_config.num_shader(), m_shader_config->warp_size);
Expand Down
7 changes: 6 additions & 1 deletion src/gpgpu-sim/l2cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -624,19 +624,22 @@ void memory_stats_t::visualizer_print(gzFile visualizer_file) {
gzprintf(visualizer_file, "Ltwowritehit: %d\n", L2_write_hit);
gzprintf(visualizer_file, "Ltworeadmiss: %d\n", L2_read_miss);
gzprintf(visualizer_file, "Ltworeadhit: %d\n", L2_read_hit);
#if AV_EANBLED
clear_L2_stats_pw();

#endif
if (num_mfs)
gzprintf(visualizer_file, "averagemflatency: %lld\n",
mf_total_lat / num_mfs);
}

#if AV_ENABLED
void memory_stats_t::clear_L2_stats_pw() {
L2_write_miss = 0;
L2_write_hit = 0;
L2_read_miss = 0;
L2_read_hit = 0;
}
#endif

void gpgpu_sim::print_dram_stats(FILE *fout) const {
unsigned cmd = 0;
Expand Down Expand Up @@ -832,6 +835,7 @@ void memory_sub_partition::get_L2cache_sub_stats(
}
}

#if AV_ENABLED
void memory_sub_partition::get_L2cache_sub_stats_pw(
struct cache_sub_stats_pw &css) const {
if (!m_config->m_L2_config.disabled()) {
Expand All @@ -858,3 +862,4 @@ void memory_sub_partition::visualizer_print(gzFile visualizer_file) {

clear_L2cache_stats_pw();
}
#endif
2 changes: 2 additions & 0 deletions src/gpgpu-sim/shader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4258,6 +4258,7 @@ void shader_core_ctx::checkExecutionStatusAndUpdate(warp_inst_t &inst,
}

// PC-Histogram Update
#if AV_ENABLED
unsigned warp_id = inst.warp_id();
unsigned pc = inst.pc;
for (unsigned t = 0; t < m_config->warp_size; t++) {
Expand All @@ -4266,4 +4267,5 @@ void shader_core_ctx::checkExecutionStatusAndUpdate(warp_inst_t &inst,
cflog_update_thread_pc(m_sid, tid, pc);
}
}
#endif
}