Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Suggested perf improvements #36

Open
wants to merge 8 commits into
base: dev
Choose a base branch
from
4 changes: 2 additions & 2 deletions libcuda/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ ifeq ($(DEBUG),1)
CXXFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -g -fPIC $(GL)
CCFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -ggdb -fPIC
else
CXXFLAGS += -O3 -g -Wall -Wno-unused-function -Wno-sign-compare -fPIC $(GL)
CCFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -fPIC
CXXFLAGS += -O3 -g -Wall -Wno-unused-function -Wno-sign-compare -fPIC $(GL) -DNDEBUG
CCFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -fPIC -DNDEBUG
endif

PROG =cuda
Expand Down
4 changes: 2 additions & 2 deletions src/cuda-sim/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ include ../../version_detection.mk

OUTPUT_DIR=$(SIM_OBJ_FILES_DIR)/cuda-sim

OPT := -O3 -g3 -Wall -Wno-unused-function -Wno-sign-compare
OPT := -O3 -g3 -Wall -Wno-unused-function -Wno-sign-compare -DNDEBUG
ifeq ($(DEBUG),1)
OPT := -g3 -Wall -Wno-unused-function -Wno-sign-compare
OPT := -g3 -Wall -Wno-unused-function -Wno-sign-compare -DDEBUG
endif
OPT += -I$(CUDA_INSTALL_PATH)/include -I$(OUTPUT_DIR)/ -I. -I$(SIM_OBJ_FILES_DIR)
OPT += -fPIC
Expand Down
2 changes: 1 addition & 1 deletion src/gpgpu-sim/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ TRACE?=0
ifeq ($(DEBUG),1)
CXXFLAGS = -Wall -DDEBUG
else
CXXFLAGS = -Wall
CXXFLAGS = -Wall -DNDEBUG
endif

ifeq ($(TRACE),1)
Expand Down
83 changes: 49 additions & 34 deletions src/gpgpu-sim/gpu-cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -604,13 +604,11 @@ void mshr_table::display(FILE *fp) const {
/***************************************************************** Caches
* *****************************************************************/
cache_stats::cache_stats() {
m_stats.resize(NUM_MEM_ACCESS_TYPE);
m_stats.resize(NUM_MEM_ACCESS_TYPE * NUM_CACHE_REQUEST_STATUS);
m_stats_pw.resize(NUM_MEM_ACCESS_TYPE);
m_fail_stats.resize(NUM_MEM_ACCESS_TYPE);
m_fail_stats.resize(NUM_MEM_ACCESS_TYPE * NUM_CACHE_RESERVATION_FAIL_STATUS);
for (unsigned i = 0; i < NUM_MEM_ACCESS_TYPE; ++i) {
m_stats[i].resize(NUM_CACHE_REQUEST_STATUS, 0);
m_stats_pw[i].resize(NUM_CACHE_REQUEST_STATUS, 0);
m_fail_stats[i].resize(NUM_CACHE_RESERVATION_FAIL_STATUS, 0);
}
m_cache_port_available_cycles = 0;
m_cache_data_port_busy_cycles = 0;
Expand All @@ -621,10 +619,8 @@ void cache_stats::clear() {
///
/// Zero out all current cache statistics
///
for (unsigned i = 0; i < NUM_MEM_ACCESS_TYPE; ++i) {
std::fill(m_stats[i].begin(), m_stats[i].end(), 0);
std::fill(m_fail_stats[i].begin(), m_fail_stats[i].end(), 0);
}
std::fill(m_stats.begin(), m_stats.end(), 0);
std::fill(m_fail_stats.begin(), m_fail_stats.end(), 0);
m_cache_port_available_cycles = 0;
m_cache_data_port_busy_cycles = 0;
m_cache_fill_port_busy_cycles = 0;
Expand All @@ -643,26 +639,31 @@ void cache_stats::inc_stats(int access_type, int access_outcome) {
///
/// Increment the stat corresponding to (access_type, access_outcome) by 1.
///
#ifdef DEBUG
if (!check_valid(access_type, access_outcome))
assert(0 && "Unknown cache access type or access outcome");

m_stats[access_type][access_outcome]++;
#endif
m_stats[access_type * NUM_CACHE_REQUEST_STATUS + access_outcome]++;
}

void cache_stats::inc_stats_pw(int access_type, int access_outcome) {
///
/// Increment the corresponding per-window cache stat
///
#ifdef DEBUG
if (!check_valid(access_type, access_outcome))
assert(0 && "Unknown cache access type or access outcome");
#endif
m_stats_pw[access_type][access_outcome]++;
}

void cache_stats::inc_fail_stats(int access_type, int fail_outcome) {
#ifdef DEBUG
if (!check_fail_valid(access_type, fail_outcome))
assert(0 && "Unknown cache access type or access fail");

m_fail_stats[access_type][fail_outcome]++;
#endif
m_fail_stats[access_type * NUM_CACHE_RESERVATION_FAIL_STATUS +
fail_outcome]++;
}

enum cache_request_status cache_stats::select_stats_status(
Expand All @@ -688,33 +689,38 @@ unsigned long long &cache_stats::operator()(int access_type, int access_outcome,
/// read/write member functions
///
if (fail_outcome) {
#ifdef DEBUG
if (!check_fail_valid(access_type, access_outcome))
assert(0 && "Unknown cache access type or fail outcome");

return m_fail_stats[access_type][access_outcome];
#endif
return m_fail_stats[access_type * NUM_CACHE_RESERVATION_FAIL_STATUS +
access_outcome];
} else {
#ifdef DEBUG
if (!check_valid(access_type, access_outcome))
assert(0 && "Unknown cache access type or access outcome");

return m_stats[access_type][access_outcome];
#endif
return m_stats[access_type * NUM_CACHE_REQUEST_STATUS + access_outcome];
}
}

unsigned long long cache_stats::operator()(int access_type, int access_outcome,
bool fail_outcome) const {
///
/// Const accessor into m_stats.
///
if (fail_outcome) {
#ifdef DEBUG
if (!check_fail_valid(access_type, access_outcome))
rgreen marked this conversation as resolved.
Show resolved Hide resolved
assert(0 && "Unknown cache access type or fail outcome");

return m_fail_stats[access_type][access_outcome];
#endif
return m_fail_stats[access_type * NUM_CACHE_RESERVATION_FAIL_STATUS +
access_outcome];
} else {
#ifdef DEBUG
if (!check_valid(access_type, access_outcome))
assert(0 && "Unknown cache access type or access outcome");

return m_stats[access_type][access_outcome];
#endif
return m_stats[access_type * NUM_CACHE_REQUEST_STATUS + access_outcome];
}
}

Expand All @@ -726,12 +732,14 @@ cache_stats cache_stats::operator+(const cache_stats &cs) {
for (unsigned type = 0; type < NUM_MEM_ACCESS_TYPE; ++type) {
for (unsigned status = 0; status < NUM_CACHE_REQUEST_STATUS; ++status) {
ret(type, status, false) =
m_stats[type][status] + cs(type, status, false);
m_stats[type * NUM_CACHE_REQUEST_STATUS + status] +
cs(type, status, false);
}
for (unsigned status = 0; status < NUM_CACHE_RESERVATION_FAIL_STATUS;
++status) {
ret(type, status, true) =
m_fail_stats[type][status] + cs(type, status, true);
m_fail_stats[type * NUM_CACHE_RESERVATION_FAIL_STATUS + status] +
cs(type, status, true);
}
}
ret.m_cache_port_available_cycles =
Expand All @@ -749,14 +757,16 @@ cache_stats &cache_stats::operator+=(const cache_stats &cs) {
///
for (unsigned type = 0; type < NUM_MEM_ACCESS_TYPE; ++type) {
for (unsigned status = 0; status < NUM_CACHE_REQUEST_STATUS; ++status) {
m_stats[type][status] += cs(type, status, false);
m_stats[type * NUM_CACHE_REQUEST_STATUS + status] +=
cs(type, status, false);
}
for (unsigned status = 0; status < NUM_CACHE_REQUEST_STATUS; ++status) {
m_stats_pw[type][status] += cs(type, status, false);
}
for (unsigned status = 0; status < NUM_CACHE_RESERVATION_FAIL_STATUS;
++status) {
m_fail_stats[type][status] += cs(type, status, true);
m_fail_stats[type * NUM_CACHE_RESERVATION_FAIL_STATUS + status] +=
cs(type, status, true);
}
}
m_cache_port_available_cycles += cs.m_cache_port_available_cycles;
Expand All @@ -780,10 +790,10 @@ void cache_stats::print_stats(FILE *fout, const char *cache_name) const {
fprintf(fout, "\t%s[%s][%s] = %llu\n", m_cache_name.c_str(),
mem_access_type_str((enum mem_access_type)type),
cache_request_status_str((enum cache_request_status)status),
m_stats[type][status]);
m_stats[type * NUM_CACHE_REQUEST_STATUS + status]);

if (status != RESERVATION_FAIL)
total_access[type] += m_stats[type][status];
total_access[type] += m_stats[type * NUM_CACHE_REQUEST_STATUS + status];
}
}
for (unsigned type = 0; type < NUM_MEM_ACCESS_TYPE; ++type) {
Expand All @@ -798,11 +808,11 @@ void cache_stats::print_fail_stats(FILE *fout, const char *cache_name) const {
std::string m_cache_name = cache_name;
for (unsigned type = 0; type < NUM_MEM_ACCESS_TYPE; ++type) {
for (unsigned fail = 0; fail < NUM_CACHE_RESERVATION_FAIL_STATUS; ++fail) {
if (m_fail_stats[type][fail] > 0) {
if (m_fail_stats[type * NUM_CACHE_RESERVATION_FAIL_STATUS + fail] > 0) {
fprintf(fout, "\t%s[%s][%s] = %llu\n", m_cache_name.c_str(),
mem_access_type_str((enum mem_access_type)type),
cache_fail_status_str((enum cache_reservation_fail_reason)fail),
m_fail_stats[type][fail]);
m_fail_stats[type * NUM_CACHE_RESERVATION_FAIL_STATUS + fail]);
}
}
}
Expand Down Expand Up @@ -835,9 +845,12 @@ unsigned long long cache_stats::get_stats(
unsigned long long total = 0;
for (unsigned type = 0; type < num_access_type; ++type) {
for (unsigned status = 0; status < num_access_status; ++status) {
#ifdef DEBUG
if (!check_valid((int)access_type[type], (int)access_status[status]))
assert(0 && "Unknown cache access type or access outcome");
total += m_stats[access_type[type]][access_status[status]];
#endif
total += m_stats[access_type[type] * NUM_CACHE_REQUEST_STATUS +
access_status[status]];
}
}
return total;
Expand All @@ -854,14 +867,16 @@ void cache_stats::get_sub_stats(struct cache_sub_stats &css) const {
for (unsigned status = 0; status < NUM_CACHE_REQUEST_STATUS; ++status) {
if (status == HIT || status == MISS || status == SECTOR_MISS ||
status == HIT_RESERVED)
t_css.accesses += m_stats[type][status];
t_css.accesses += m_stats[type * NUM_CACHE_REQUEST_STATUS + status];

if (status == MISS || status == SECTOR_MISS)
t_css.misses += m_stats[type][status];
t_css.misses += m_stats[type * NUM_CACHE_REQUEST_STATUS + status];

if (status == HIT_RESERVED) t_css.pending_hits += m_stats[type][status];
if (status == HIT_RESERVED)
t_css.pending_hits += m_stats[type * NUM_CACHE_REQUEST_STATUS + status];

if (status == RESERVATION_FAIL) t_css.res_fails += m_stats[type][status];
if (status == RESERVATION_FAIL)
t_css.res_fails += m_stats[type * NUM_CACHE_REQUEST_STATUS + status];
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/gpgpu-sim/gpu-cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -1100,10 +1100,10 @@ class cache_stats {
bool check_valid(int type, int status) const;
bool check_fail_valid(int type, int fail) const;

std::vector<std::vector<unsigned long long> > m_stats;
std::vector<unsigned long long> m_stats;
// AerialVision cache stats (per-window)
std::vector<std::vector<unsigned long long> > m_stats_pw;
std::vector<std::vector<unsigned long long> > m_fail_stats;
std::vector<unsigned long long> m_fail_stats;

unsigned long long m_cache_port_available_cycles;
unsigned long long m_cache_data_port_busy_cycles;
Expand Down
2 changes: 1 addition & 1 deletion src/gpuwattch/mcpat.mk
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ ifeq ($(TAG),dbg)
DBG = -Wall
OPT = -ggdb -fPIC -g -O0 -DNTHREADS=1 -Icacti -lz
else
DBG =
DBG = -DNDEBUG
OPT = -O3 -fPIC -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS) -Icacti -lz
#OPT = -O0 -DNTHREADS=$(NTHREADS)
endif
Expand Down
2 changes: 1 addition & 1 deletion src/intersim2/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ endif

CPPFLAGS += -Wall $(INCPATH) $(DEFINE)
ifneq ($(DEBUG),1)
CPPFLAGS += -O3
CPPFLAGS += -O3 -DNDEBUG
endif
CPPFLAGS += -g
CPPFLAGS += -fPIC
Expand Down