From 44f1fb5704b9b4f501ecfcbe2ffc9ed108c0adfc Mon Sep 17 00:00:00 2001 From: Seonjin Na Date: Tue, 26 Mar 2024 21:45:43 -0400 Subject: [PATCH] revive cache replacement policy --- def/memory.param.def | 28 +++ src/cache_replacement/rrip.cc | 220 +++++++++++++++++ src/cache_replacement/rrip.h | 80 +++++++ src/cache_replacement/tadip.cc | 161 +++++++++++++ src/cache_replacement/tadip.h | 66 +++++ src/cache_replacement/ucp.cc | 423 +++++++++++++++++++++++++++++++++ src/cache_replacement/ucp.h | 83 +++++++ src/memory.cc | 23 +- 8 files changed, 1081 insertions(+), 3 deletions(-) create mode 100644 src/cache_replacement/rrip.cc create mode 100644 src/cache_replacement/rrip.h create mode 100644 src/cache_replacement/tadip.cc create mode 100644 src/cache_replacement/tadip.h create mode 100644 src/cache_replacement/ucp.cc create mode 100644 src/cache_replacement/ucp.h diff --git a/def/memory.param.def b/def/memory.param.def index f72fcd8f..9a2266ee 100644 --- a/def/memory.param.def +++ b/def/memory.param.def @@ -191,6 +191,34 @@ param param +/* Cache Replacement Policy */ +param +param +param +param +param +param +param +param +param +param + +param +param + +param +param +param +param +param +param +param +param + + + + + /* Cache coherence */ param diff --git a/src/cache_replacement/rrip.cc b/src/cache_replacement/rrip.cc new file mode 100644 index 00000000..631972dd --- /dev/null +++ b/src/cache_replacement/rrip.cc @@ -0,0 +1,220 @@ +/********************************************************************************************** + * File : rrip.cc + * Author : Jaekyu Lee + * Date : 04/26/2011 + * SVN : $Id: cache.h, + * Description : RRIP (Jaleel et al. ISCA 2010) + *********************************************************************************************/ + + +#include + +#include "rrip.h" +#include "../cache.h" +#include "../utils.h" +#include "../debug_macros.h" +#include "../all_knobs.h" +#include "../statistics.h" + +#define DEBUG(args...) _DEBUG(*m_simBase->m_knobs->KNOB_DEBUG_CACHE_LIB, ## args) +#define DEBUG_MEM(args...) _DEBUG(*m_simBase->m_knobs->KNOB_DEBUG_MEM_TRACE, ## args) + + +// constructor + +// cache_rrip_c::cache_rrip_c(string name, int num_set, int assoc, int line_size, +// int data_size, int bank_num, bool cache_by_pass, int core_id, Cache_Type cache_type_info, +// bool enable_partition, macsim_c* simBase) : cache_c(name, num_set, assoc, line_size, +// data_size, bank_num, cache_by_pass, core_id, cache_type_info, 1, 0, enable_partition, simBase) +cache_rrip_c::cache_rrip_c(string name, int num_set, int assoc, int line_size, + int data_size, int bank_num, bool cache_by_pass, int core_id, Cache_Type cache_type_info, + bool enable_partition, int num_tiles, int interleave_factor, macsim_c* simBase) : cache_c(name, num_set, assoc, line_size, + data_size, bank_num, cache_by_pass, core_id, cache_type_info, enable_partition, num_tiles,interleave_factor,simBase) +{ + int max_bit = *KNOB(KNOB_RRIP_CACHE_NUM_BIT); + m_max_lru_value = static_cast(pow(2, max_bit) - 1); + m_insertion_value = *m_simBase->m_knobs->KNOB_RRIP_CACHE_INSERT_AT; + + // assume 32 sets for 1 SDM + m_modulo = num_set / 32; + + m_sdm_counter = new int[m_max_application]; + for (int ii = 0; ii < m_max_application; ++ii) { + m_sdm_counter[ii] = 0; +// m_total_miss[ii] = 0; + } + + m_total_miss = new Counter[m_max_application]; + + m_sdm_max_counter_value = + static_cast(pow(2, static_cast(*KNOB(KNOB_RRIP_CACHE_NUM_COUNTER_BIT)))); + + m_bip_epsilon = *m_simBase->m_knobs->KNOB_RRIP_CACHE_BIP_EPSILON; + m_access_count_by_type[2] = {0}; + m_total_access_count = 0; + m_total_insert_count = 0; + m_access_ratio = 0.0; +} + + +// destructor +cache_rrip_c::~cache_rrip_c() +{ +} + + +// find an entry to be replaced based on the policy +cache_entry_c* cache_rrip_c::find_replacement_line(int set, int appl_id) +{ +// bool gpuline = m_simBase->m_PCL->get_appl_type(appl_id); + bool gpuline = true; + int index = -1; + while (index == -1) { + for (int ii = 0; ii < m_assoc; ++ii) { + cache_entry_c* line = &(m_set[set]->m_entry[ii]); + // find invalid or 2^n-1 entry + if (line->m_valid != true || line->m_last_access_time == m_max_lru_value) { + index = ii; + break; + } + } + + + if (index == -1) { + int count = 0; + for (int ii = 0; ii < m_assoc; ++ii) { + cache_entry_c* line = &(m_set[set]->m_entry[ii]); + ++line->m_last_access_time; + } + + if (count == 0) { + gpuline = !gpuline; + } + } + } + + ++m_total_insert_count; + + return &(m_set[set]->m_entry[index]); +} + + +// initialize a cache line +void cache_rrip_c::initialize_cache_line(cache_entry_c *ins_line, Addr tag, Addr addr, + int appl_id, bool gpuline, int set_id, bool skip) +{ + ins_line->m_valid = true; + ins_line->m_tag = tag; + ins_line->m_base = (addr & ~m_offset_mask); + ins_line->m_access_counter = 0; + ins_line->m_pref = false; + ins_line->m_appl_id = appl_id; + ins_line->m_gpuline = gpuline; + ins_line->m_skip = skip; + + + // SRRIP + if (!*KNOB(KNOB_RRIP_CACHE_DYNAMIC_ON)) { + if (*KNOB(KNOB_RRIP_BIP_ALWAYS && gpuline)) { + // SRRIP + if (rand() % 100 < m_bip_epsilon) { + ins_line->m_last_access_time = m_insertion_value; + } + // LIP + else { + ins_line->m_last_access_time = m_max_lru_value; + } + } + else { + ins_line->m_last_access_time = m_insertion_value; + } + } + // DRRIP + else { + // BIMODAL + if (set_id % m_modulo == (appl_id * 2 + 1)) { + // SRRIP + if (rand() % 100 < m_bip_epsilon) { + ins_line->m_last_access_time = m_insertion_value; + } + // LIP + else { + ins_line->m_last_access_time = m_max_lru_value; + } + } + // SRRIP + else if (set_id % m_modulo == (appl_id * 2)) { + ins_line->m_last_access_time = m_insertion_value; + } + // Followers + else { + // SRRIP favor + if (m_sdm_counter[appl_id] <= 0) { + ins_line->m_last_access_time = m_insertion_value; + } + // BIMODAL + else { + // with small probability, insert it to original position (2n-2) + if (rand() % 100 < m_bip_epsilon) { + ins_line->m_last_access_time = m_insertion_value; + } + // LIP insertion (2n-1) + else { + ins_line->m_last_access_time = m_max_lru_value; + } + } + } + } + + + if (ins_line->m_gpuline) { + ++m_num_gpu_line; + ++m_set[set_id]->m_num_gpu_line; + } + else { + ++m_num_cpu_line; + ++m_set[set_id]->m_num_cpu_line; + } +} + + +// update a line upon cache hits +// use FP (Frequency Priority) +void cache_rrip_c::update_line_on_hit(cache_entry_c* line, int set, int appl_id) +{ + if (line->m_last_access_time > 0) { +// --line->m_last_access_time; + line->m_last_access_time = 0; + } +} + + + + +void cache_rrip_c::update_cache_on_miss(int set_id, int appl_id) +{ + // DRRIP set dueling implementation + if (*KNOB(KNOB_RRIP_CACHE_DYNAMIC_ON)) { + m_total_miss[appl_id]++; + // SRRIP + if (set_id % m_modulo == (appl_id * 2)) { + ++m_sdm_counter[appl_id]; + if (m_sdm_counter[appl_id] > m_sdm_max_counter_value) { + m_sdm_counter[appl_id] = m_sdm_max_counter_value; + } + } + // BIMODAL + else if (set_id % m_modulo == (appl_id * 2 + 1)) { + --m_sdm_counter[appl_id]; + if (m_sdm_counter[appl_id] < -1 * m_sdm_max_counter_value) { + m_sdm_counter[appl_id] = m_sdm_max_counter_value * -1; + } + } + } +} + + +void cache_rrip_c::update_cache_on_access(Addr line_addr, int set, int appl_id) +{ + ++m_total_access_count; +} diff --git a/src/cache_replacement/rrip.h b/src/cache_replacement/rrip.h new file mode 100644 index 00000000..8e530852 --- /dev/null +++ b/src/cache_replacement/rrip.h @@ -0,0 +1,80 @@ +/********************************************************************************************** + * File : rrip.h + * Author : Jaekyu Lee + * Date : 04/26/2011 + * SVN : $Id: cache.h, + * Description : RRIP (Jaleel et al. ISCA 2010) + *********************************************************************************************/ + +#ifndef RRIP_H +#define RRIP_H + +#include "../cache.h" + + +class cache_c; + +class cache_rrip_c : public cache_c +{ + public: + /** + * Constructor + */ + //added num_tiles, interleave_factor 2024-03-26 + cache_rrip_c(string name, int num_set, int assoc, int line_size, + int data_size, int bank_num, bool cache_by_pass, int core_id, + Cache_Type cache_type_info, bool enable_partition, int num_tiles, int interleave_factor, macsim_c* simBase); + + /** + * Destructor + */ + virtual ~cache_rrip_c(); + + /** + * fine a cache line to replace + * \param set set id + */ + cache_entry_c * find_replacement_line(int set, int appl_id); + + /** + * Initialize a new cache line + */ + void initialize_cache_line(cache_entry_c *ins_line, Addr tag, Addr addr, int appl_id, + bool gpuline, int set_id, bool skip); + + /** + * Update LRU value on cache hit + */ + void update_line_on_hit(cache_entry_c* line, int set, int appl_id); + + /** + * Update cache on cache misses - for set dueling + */ + void update_cache_on_miss(int set_id, int appl_id); + + void update_cache_on_access(Addr tag, int set, int appl_id); + + private: + /** + * Default constructor - do not implement + */ + cache_rrip_c(); // do not implement + + static const int m_max_application = 20; + + int m_max_lru_value; /**< maximum lru value in RRIP */ + int m_insertion_value; /**< lru value upon insertion */ + int m_modulo; /**< modulo value for set monitorning */ + int *m_sdm_counter; + Counter *m_total_miss; + //int *m_sdm_counter[m_max_application]; + int m_sdm_max_counter_value; + int m_bip_epsilon; + + int m_access_count_by_type[2]; + Counter m_total_access_count; + Counter m_total_insert_count; + float m_access_ratio; +}; + +#endif diff --git a/src/cache_replacement/tadip.cc b/src/cache_replacement/tadip.cc new file mode 100644 index 00000000..dea39950 --- /dev/null +++ b/src/cache_replacement/tadip.cc @@ -0,0 +1,161 @@ +/********************************************************************************************** + * File : rrip.cc + * Author : Jaekyu Lee + * Date : 04/26/2011 + * SVN : $Id: cache.h, + * Description : TADIP (Jaleel et al. PACT 2008) + *********************************************************************************************/ + + +#include +#include + +#include "tadip.h" +#include "../cache.h" +#include "../debug_macros.h" + +#include "../all_knobs.h" + + +#define DEBUG(args...) _DEBUG(*m_simBase->m_knobs->KNOB_DEBUG_CACHE_LIB, ## args) +#define DEBUG_MEM(args...) _DEBUG(*m_simBase->m_knobs->KNOB_DEBUG_MEM_TRACE, ## args) + + +// constructor +cache_tadip_c::cache_tadip_c(string name, uns num_set, uns assoc, uns line_size, + uns data_size, uns bank_num, bool cache_by_pass, int core_id, Cache_Type cache_type_info, + bool enable_partition, macsim_c* simBase) : cache_c(name, num_set, assoc, line_size, + data_size, bank_num, cache_by_pass, core_id, cache_type_info, enable_partition, 1, 0, simBase) +{ + // assume 32 sets for 1 SDM + m_modulo = num_set / 32; + + m_sdm_counter = new int[m_max_application]; + for (int ii = 0; ii < m_max_application; ++ii) { + m_sdm_counter[ii] = 0; + } + + m_total_miss = new Counter[m_max_application]; + + m_sdm_max_counter_value = + static_cast(pow(2, static_cast(*m_simBase->m_knobs->KNOB_TADIP_CACHE_NUM_COUNTER_BIT))); + m_bip_epsilon = *m_simBase->m_knobs->KNOB_TADIP_CACHE_BIP_EPSILON; +} + + +// destructor +cache_tadip_c::~cache_tadip_c() +{ +} + + +// find an entry to be replaced based on the policy +cache_entry_c* cache_tadip_c::find_replacement_line(int set, int appl_id) +{ + int index = -1; + Counter min_lru = m_simBase->m_simulation_cycle + 1; + for (int ii = 0; ii < m_assoc; ++ii) { + cache_entry_c* line = &(m_set[set]->m_entry[ii]); + // find invalid or LRU entry + if (line->m_valid != true) { + index = ii; + break; + } + + if (line->m_last_access_time < min_lru) { + index = ii; + min_lru = line->m_last_access_time; + } + } + + assert(index != -1); + + return &(m_set[set]->m_entry[index]); +} + + +// initialize a cache line +void cache_tadip_c::initialize_cache_line(cache_entry_c *ins_line, Addr tag, Addr addr, + int appl_id, bool gpuline, int set_id, bool skip) +{ + ins_line->m_valid = true; + ins_line->m_tag = tag; + ins_line->m_base = (addr & ~m_offset_mask); + ins_line->m_access_counter = 0; + ins_line->m_pref = false; + ins_line->m_appl_id = appl_id; + ins_line->m_gpuline = gpuline; + ins_line->m_skip = skip; + + // BIMODAL + if (*m_simBase->m_knobs->KNOB_RRIP_BIP_ALWAYS || set_id % m_modulo == (appl_id * 2 + 1)) { + // LRU + if (rand() % 100 < m_bip_epsilon) { + ins_line->m_last_access_time = m_simBase->m_simulation_cycle; + } + // LIP + else { + ins_line->m_last_access_time = 0; + } + } + // LRU + else if (set_id % m_modulo == (appl_id * 2)) { + ins_line->m_last_access_time = m_simBase->m_simulation_cycle; + } + // Followers + else { + // BIMODAL favor + if (m_sdm_counter[appl_id] > 0) { + // LRU + if (rand() % 100 < m_bip_epsilon) { + ins_line->m_last_access_time = m_simBase->m_simulation_cycle; + } + // LIP + else { + ins_line->m_last_access_time = 0; + } + } + // LRU + else { + ins_line->m_last_access_time = m_simBase->m_simulation_cycle; + } + } + + + if (ins_line->m_gpuline) { + ++m_num_gpu_line; + ++m_set[set_id]->m_num_gpu_line; + } + else { + ++m_num_cpu_line; + ++m_set[set_id]->m_num_cpu_line; + } +} + + +// update miss counter if a set is in SDM +void cache_tadip_c::update_cache_on_miss(int set_id, int appl_id) +{ + ++m_total_miss[appl_id]; + // LRU - miss (+ is non-LRU) + if (set_id % m_modulo == (appl_id * 2)) { + ++m_sdm_counter[appl_id]; + if (m_sdm_counter[appl_id] > m_sdm_max_counter_value) { + m_sdm_counter[appl_id] = m_sdm_max_counter_value; + } +// cout << "TADIP(L3-" << m_core_id << ")" << appl_id << " " << m_sdm_counter[appl_id] << "\n"; + } + // BIMODAL - miss (- is non-BIP) + else if (set_id % m_modulo == (appl_id * 2 + 1)) { + --m_sdm_counter[appl_id]; + if (m_sdm_counter[appl_id] < -1 * m_sdm_max_counter_value) { + m_sdm_counter[appl_id] = m_sdm_max_counter_value * -1; + } +// cout << "TADIP(L3-" << m_core_id << ")" << appl_id << " " << m_sdm_counter[appl_id] << "\n"; + } + + if (m_total_miss[appl_id] % 100 == 0) { + cout << "TADIP(L3-" << m_core_id << ")" << appl_id << " " << m_sdm_counter[appl_id] << "\n"; + } +} + diff --git a/src/cache_replacement/tadip.h b/src/cache_replacement/tadip.h new file mode 100644 index 00000000..5d10189f --- /dev/null +++ b/src/cache_replacement/tadip.h @@ -0,0 +1,66 @@ +/********************************************************************************************** + * File : rrip.h + * Author : Jaekyu Lee + * Date : 04/26/2011 + * SVN : $Id: cache.h, + * Description : TADIP (Jaleel et al. PACT 2008) + *********************************************************************************************/ + +#ifndef TADIP_H +#define TADIP_H + +#include "cache.h" + + +class cache_c; + +class cache_tadip_c : public cache_c +{ + public: + /** + * Constructor + */ + cache_tadip_c(string name, uns num_set, uns assoc, uns line_size, uns data_size, + uns bank_num, bool cache_by_pass, int core_id, Cache_Type cache_type_info, + bool enable_partition, macsim_c* simBase); + + /** + * Destructor + */ + virtual ~cache_tadip_c(); + + /** + * fine a cache line to replace + * \param set set id + */ + cache_entry_c * find_replacement_line(int set, int appl_id); + + /** + * Initialize a new cache line + */ + void initialize_cache_line(cache_entry_c *ins_line, Addr tag, Addr addr, int appl_id, + bool gpuline, int set_id, bool skip); + + /** + * Update cache on cache misses - for set dueling + */ + void update_cache_on_miss(int set_id, int appl_id); + + private: + /** + * Default constructor - do not implement + */ + cache_tadip_c(); // do not implement + + static const int m_max_application = 20; + + int m_max_lru_value; /**< maximum lru value in RRIP */ + int m_insertion_value; /**< lru value upon insertion */ + int m_modulo; /**< modulo value for set monitorning */ + int *m_sdm_counter; + Counter *m_total_miss; + int m_sdm_max_counter_value; + int m_bip_epsilon; +}; + +#endif diff --git a/src/cache_replacement/ucp.cc b/src/cache_replacement/ucp.cc new file mode 100644 index 00000000..cd3b08b4 --- /dev/null +++ b/src/cache_replacement/ucp.cc @@ -0,0 +1,423 @@ +/********************************************************************************************** + * File : ucp.cc + * Author : Jaekyu Lee + * Date : 04/26/2011 + * SVN : $Id: cache.h, + * Description : UCP (Qureshi and Patt MICRO06) + *********************************************************************************************/ + + +#include +#include + +#include "ucp.h" +#include "../cache.h" +#include "../debug_macros.h" +#include "../assert_macros.h" +#include "../all_knobs.h" + + +#define DEBUG(args...) _DEBUG(*KNOB(KNOB_DEBUG_CACHE_LIB, ## args) +#define DEBUG_MEM(args...) _DEBUG(*KNOB(KNOB_DEBUG_MEM_TRACE, ## args) + + +// constructor +cache_ucp_c::cache_ucp_c(string name, uns num_set, uns assoc, uns line_size, + uns data_size, uns bank_num, bool cache_by_pass, int core_id, Cache_Type cache_type_info, + bool enable_partition, macsim_c* simBase) : cache_c(name, num_set, assoc, line_size, data_size, bank_num, + cache_by_pass, core_id, cache_type_info, enable_partition, 1, 0, simBase) +{ + // assume 32 sets for 1 SDM + m_modulo = num_set / 32; + m_num_way = assoc; + m_num_application = *KNOB(KNOB_UCP_CACHE_NUM_APPLICATION); + m_partition_period = *KNOB(KNOB_UCP_CACHE_PARTITION_PERIOD); + + for (int ii = 0; ii < m_max_application; ++ii) { + m_way_counter[ii] = new int[assoc]; + fill_n(m_way_counter[ii], assoc, 0); + } + + for (int ii = 0; ii < m_max_application; ++ii) { + m_atd[ii] = new list[32]; + for (int jj = 0; jj < 32; ++jj) { + for (int kk = 0; kk < assoc; ++kk) { + m_atd[ii][jj].push_back(0); + } + } + } + + for (int ii = 0; ii < m_max_application; ++ii) { + m_num_entry[ii] = new int[num_set]; + fill_n(m_num_entry[ii], num_set, 0); + } + + // initially assign equal amount + for (int ii = 0; ii < m_max_application; ++ii) { + m_quota[ii] = assoc / m_num_application; + } + + // tocheck + //m_last_partition_cycle = 0; + m_last_partition_cycle = 10000 - m_partition_period; + + m_access_count_by_type[2] = {0}; + m_total_access_count = 0; + m_access_ratio = 0.0; +} + + +// destructor +cache_ucp_c::~cache_ucp_c() +{ +} + + +// find an entry to be replaced based on the policy +cache_entry_c* cache_ucp_c::find_replacement_line(int set, int appl_id) +{ + int index = -1; + Counter min_lru = m_simBase->m_simulation_cycle + 1; + if (appl_id == -1) { + if (*KNOB(KNOB_CACHE_USE_PSEUDO_LRU)) { + while (1) { + for (int ii = 0; ii < m_assoc; ++ii) { + cache_entry_c* line = &(m_set[set]->m_entry[ii]); + if (!line->m_valid || line->m_last_access_time == 0) { + return &(m_set[set]->m_entry[ii]); + } + } + + for (int ii = 0; ii < m_assoc; ++ii) { + cache_entry_c* line = &(m_set[set]->m_entry[ii]); + line->m_last_access_time = 0; + } + } + } + else { + for (int ii = 0; ii < m_assoc; ++ii) { + cache_entry_c* line = &(m_set[set]->m_entry[ii]); + // find invalid or LRU entry + if (line->m_valid != true) { + index = ii; + break; + } + + if (line->m_last_access_time < min_lru) { + index = ii; + min_lru = line->m_last_access_time; + } + } + } + } + else { + // evict a block from other applications + if (m_num_entry[appl_id][set] < m_quota[appl_id]) { + if (*KNOB(KNOB_CACHE_USE_PSEUDO_LRU)) { + while (1) { + for (int ii = 0; ii < m_assoc; ++ii) { + cache_entry_c* line = &(m_set[set]->m_entry[ii]); + if (!line->m_valid || (line->m_appl_id != appl_id && line->m_last_access_time == 0)) { + return &(m_set[set]->m_entry[ii]); + } + } + + for (int ii = 0; ii < m_assoc; ++ii) { + cache_entry_c* line = &(m_set[set]->m_entry[ii]); + if (line->m_appl_id != appl_id) { + line->m_last_access_time = 0; + } + } + } + } + else { + for (int ii = 0; ii < m_assoc; ++ii) { + cache_entry_c* line = &(m_set[set]->m_entry[ii]); + if (line->m_valid != true) { + index = ii; + break; + } + + if (line->m_appl_id != appl_id && line->m_last_access_time < min_lru) { + index = ii; + min_lru = line->m_last_access_time; + } + } + } + } + // evict a block from own + else { + if (*KNOB(KNOB_CACHE_USE_PSEUDO_LRU)) { + while (1) { + for (int ii = 0; ii < m_assoc; ++ii) { + cache_entry_c* line = &(m_set[set]->m_entry[ii]); + if (!line->m_valid || (line->m_appl_id == appl_id && line->m_last_access_time == 0)) { + return &(m_set[set]->m_entry[ii]); + } + } + + for (int ii = 0; ii < m_assoc; ++ii) { + cache_entry_c* line = &(m_set[set]->m_entry[ii]); + if (line->m_appl_id == appl_id) { + line->m_last_access_time = 0; + } + } + } + } + else { + for (int ii = 0; ii < m_assoc; ++ii) { + cache_entry_c* line = &(m_set[set]->m_entry[ii]); + if (line->m_appl_id == appl_id && line->m_last_access_time < min_lru) { + index = ii; + min_lru = line->m_last_access_time; + } + } + } + } + } + + ASSERT(index != -1); + + return &(m_set[set]->m_entry[index]); +} + + +// initialize a cache line +void cache_ucp_c::initialize_cache_line(cache_entry_c *ins_line, Addr tag, Addr addr, + int appl_id, bool gpuline, int set, bool skip) +{ + ins_line->m_valid = true; + ins_line->m_tag = tag; + ins_line->m_base = (addr & ~m_offset_mask); + ins_line->m_access_counter = 0; + ins_line->m_pref = false; + ins_line->m_appl_id = appl_id; + ins_line->m_gpuline = gpuline; + ins_line->m_last_access_time = m_simBase->m_simulation_cycle; + ins_line->m_skip = skip; + + ++m_num_entry[appl_id][set]; +} + + +void cache_ucp_c::update_cache_on_access(Addr line_addr, int set, int appl_id) +{ + if (m_total_access_count == 1000) { + if (m_access_count_by_type[0] == 0) { + m_access_ratio = 1000.0; + } + else { + m_access_ratio = static_cast(1.0 * m_access_count_by_type[1] / m_access_count_by_type[0]); + } + m_total_access_count = 0; + } + + if (set % m_modulo == 0) { + int set_index = set / m_modulo; + bool hit = false; + int count = 0; + for (auto I = m_atd[appl_id][set_index].begin(), E = m_atd[appl_id][set_index].end(); I != E; ++I) { + if ((*I) == line_addr) { + ++m_way_counter[appl_id][count]; + hit = true; + break; + } + ++count; + } + + + if (!hit) { + m_atd[appl_id][set_index].pop_back(); + } + else { + m_atd[appl_id][set_index].remove(line_addr); + } + m_atd[appl_id][set_index].push_front(line_addr); + } + + if (m_last_partition_cycle + m_partition_period <= m_simBase->m_simulation_cycle) { + update_partition(); + } + + ++m_total_access_count; +} + + +// update application partition every *KNOB(KNOB_UCP_CACHE_PARTITION_PERIOD cycle +// use lookahead algorithm +void cache_ucp_c::update_partition(void) +{ + // assign at least 1 way + fill_n(m_quota, m_max_application, 1); + int count = m_num_way - m_num_application; + + + if (*KNOB(KNOB_UCP_CACHE_FOR_GPU) == true && m_access_ratio >= 2.0) { + for (int ii = 0; ii < m_num_application; ++ii) { + if (m_simBase->m_PCL->get_appl_type(ii) != true) + continue; + + for (int jj = 0; jj < m_num_way; ++jj) { + m_way_counter[ii][jj] /= *KNOB(KNOB_UCP_CACHE_ACCESS_CONTROL); + //m_way_counter[ii][jj] /= m_access_ratio; + } + } + } + + + int skip_cache[m_max_application] = {false}; + int skip_count = 0; + for (int ii = 0; ii < m_num_application; ++ii) { + // GPU && PSEL = 0 + if (*KNOB(KNOB_UCP_CACHE_FOR_GPU)) { + if (m_simBase->m_PCL->get_appl_type(ii) == true && m_simBase->m_PCL->get_psel_mask() == false) { + skip_cache[ii] = true; + ++skip_count; + } + } + + if (*KNOB(KNOB_CACHE_FOR_STREAM_CPU)) { + if (m_simBase->m_PCL->get_appl_type(ii) == false) { + skip_cache[ii] = true; + ++skip_count; + } + } + } + + + if (skip_count == m_num_application) { + for (int ii = 0; ii < m_num_application; ++ii) { + skip_cache[ii] = false; + } + } + + int total_hit[m_max_application] = {0}; + for (int ii = 0; ii < m_num_application; ++ii) { + total_hit[ii] = m_way_counter[ii][0]; + } + + + int random_index = m_simBase->m_simulation_cycle % m_num_application; + bool skip = true; + while (count > 0) { + float max = -1.0; + int max_appl_id = -1; + int max_way = -1; + int max_total_hit = 0; + + bool cpu_skip = false; + // emulate GPU interference + if (!*KNOB(KNOB_UCP_CACHE_FOR_GPU) && + rand() % 100 >= *KNOB(KNOB_UCP_CACHE_CPU_INTERFERENCE)) + cpu_skip = true; + + for (int ii = random_index; ii < random_index + m_num_application; ++ii) { + int appl_id = ii % m_num_application; + int sum = 0; + + + // skip gpu application partitioning + // skip : when there is not hit anymore for cpus, include gpu as well + if (*KNOB(KNOB_UCP_CACHE_FOR_GPU) == true && skip && skip_cache[appl_id] == true) + continue; + + + // running streaming CPU application, so skip it + if (*KNOB(KNOB_CACHE_FOR_STREAM_CPU) && m_simBase->m_PCL->get_appl_type(appl_id) == false) + continue; + + + // emulate GPU interference + if (m_simBase->m_PCL->get_appl_type(appl_id) == false && cpu_skip == true) + continue; + + + // try to give less space to gpu + if (*KNOB(KNOB_UCP_CACHE_FOR_GPU) && + m_simBase->m_PCL->get_appl_type(appl_id) == true && + rand() % 100 >= *KNOB(KNOB_UCP_CACHE_GPU_DROP_PROBABILITY)) + continue; + + + // try to give less space if access rate is much higher + if (*KNOB(KNOB_UCP_CACHE_FOR_GPU) && + m_simBase->m_PCL->get_appl_type(appl_id) == true && + m_access_ratio >= 3.0 && + rand() % 100 >= 50) + continue; + + + // set limitation for gpu in lookahead partitioning algorithm + int max_lookup = count; + if (*KNOB(KNOB_UCP_CACHE_FOR_GPU) == true && + m_simBase->m_PCL->get_appl_type(appl_id) == true && + count >= *KNOB(KNOB_UCP_CACHE_GPU_MAX_PARTITION_LOOKUP)) { + max_lookup = *KNOB(KNOB_UCP_CACHE_GPU_MAX_PARTITION_LOOKUP); + } + + for (int jj = m_quota[appl_id]; jj < m_quota[appl_id] + max_lookup; ++jj) { + sum += m_way_counter[appl_id][jj]; + float marginal_utility = static_cast(1.0* sum / (jj - m_quota[appl_id] + 1)); + if (marginal_utility > max) { + max = marginal_utility; + max_appl_id = appl_id; + max_way = jj - m_quota[appl_id] + 1; + max_total_hit = total_hit[appl_id]; + assert(max_way > 0); + } + // usual : give it to high total_hit (GPU) + // ucp-g : give it evenly + else if (marginal_utility == max && skip && total_hit[appl_id] > max_total_hit) { + assert(skip == true); + max = marginal_utility; + max_appl_id = appl_id; + max_way = jj - m_quota[appl_id] + 1; + max_total_hit = total_hit[appl_id]; + assert(max_way > 0); + } + } + } + + // If CPU does not have more hits, stop allocating to CPU, give it to GPUs. + if (*KNOB(KNOB_UCP_CACHE_FOR_GPU) && max == 0.0 && skip == true) { + skip = false; + continue; + } + + if (max_appl_id == -1) + continue; + + int current_quota = m_quota[max_appl_id]; + for (int ii = current_quota; ii < current_quota + max_way; ++ii) { + total_hit[max_appl_id] += m_way_counter[max_appl_id][ii]; + } + //cout << max << " " << max_appl_id << " " << max_way << "\n"; + m_quota[max_appl_id] += max_way; + count -= max_way; + random_index++; + } + + // halve the counter values + for (int ii = 0; ii < m_num_application; ++ii) { +// fprintf(g_mystderr, "%s appl%d %d at %lld\n", m_name.c_str(), ii, m_quota[ii], m_simBase->m_simulation_cycle); + for (int jj = 0; jj < m_num_way; ++jj) { + m_way_counter[ii][jj] /= 2; + } + } + + m_last_partition_cycle = m_simBase->m_simulation_cycle; +} + +void cache_ucp_c::update_set_on_replacement(Addr tag, int appl_id, int set, bool gpuline) +{ + if (gpuline) { + --m_num_gpu_line; + --m_set[set]->m_num_gpu_line; + } + else { + --m_num_cpu_line; + --m_set[set]->m_num_cpu_line; + } + --m_num_entry[appl_id][set]; +} + diff --git a/src/cache_replacement/ucp.h b/src/cache_replacement/ucp.h new file mode 100644 index 00000000..f2edf26e --- /dev/null +++ b/src/cache_replacement/ucp.h @@ -0,0 +1,83 @@ +/********************************************************************************************** + * File : ucp.h + * Author : Jaekyu Lee + * Date : 04/26/2011 + * SVN : $Id: cache.h, + * Description : UCP (Qureshi and Patt MICRO06) + *********************************************************************************************/ + + +#ifndef UCP_H +#define UCP_H + + +#include + +#include "cache.h" + + +class cache_c; + +class cache_ucp_c : public cache_c +{ + public: + /** + * Constructor + */ + cache_ucp_c(string name, uns num_set, uns assoc, uns line_size, + uns data_size, uns bank_num, bool cache_by_pass, int core_id, + Cache_Type cache_type_info, bool enable_partition, macsim_c* simBase); + + /** + * Destructor + */ + virtual ~cache_ucp_c(); + + /** + * fine a cache line to replace + * \param set set id + */ + cache_entry_c * find_replacement_line(int set, int appl_id); + + /** + * Initialize a new cache line + */ + void initialize_cache_line(cache_entry_c *ins_line, Addr tag, Addr addr, int appl_id, + bool gpuline, int set_id, bool skip); + + /** + * Update set on replacement + */ + void update_set_on_replacement(Addr tag, int appl_id, int set_id, bool gpuline); + + void update_cache_on_access(Addr tag, int set, int appl_id); + void update_atd(Addr tag, int appl_id, int set_id); + + + private: + /** + * Default constructor - do not implement + */ + cache_ucp_c(); // do not implement + + void update_partition(void); + + static const int m_max_application = 20; + + int* m_way_counter[m_max_application]; /**< counter for each ways */ + list* m_atd[m_max_application]; /**< auxiliary tag directory */ + int m_quota[m_max_application]; + int* m_num_entry[m_max_application]; + + int m_modulo; /**< modulo value for set monitorning */ + int m_last_partition_cycle; + int m_num_way; + int m_num_application; + int m_partition_period; + + int m_access_count_by_type[2]; + int m_total_access_count; + float m_access_ratio; +}; + +#endif diff --git a/src/memory.cc b/src/memory.cc index 63272ddf..1c679795 100644 --- a/src/memory.cc +++ b/src/memory.cc @@ -51,6 +51,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "bug_detector.h" #include "mmu.h" +#include "./cache_replacement/rrip.h" #include "config.h" #include "all_knobs.h" @@ -174,7 +175,7 @@ memory_c* default_mem(macsim_c* m_simBase) { // Default LLC constructor function cache_c* default_llc(macsim_c* m_simBase) { string llc_type = KNOB(KNOB_LLC_TYPE)->getValue(); - assert(llc_type == "default"); + // assert(llc_type == "default"); int num_tiles; int interleaving = -1; @@ -190,12 +191,28 @@ cache_c* default_llc(macsim_c* m_simBase) { interleaving = *m_simBase->m_knobs->KNOB_DRAM_ROWBUFFER_SIZE; } } + + // cache_c* llc = new cache_c( + // "llc_default", *KNOB(KNOB_LLC_NUM_SET), *KNOB(KNOB_LLC_ASSOC), + // *KNOB(KNOB_LLC_LINE_SIZE), sizeof(dcache_data_s), *KNOB(KNOB_LLC_NUM_BANK), + // false, 0, CACHE_DL1, false, num_tiles, interleaving, m_simBase); - cache_c* llc = new cache_c( + // added to extend existing simulator to add cache replacement policy + if(llc_type == "rrip"){ + + cache_c* llc = new cache_rrip_c("llc_rrip", *KNOB(KNOB_LLC_NUM_SET), *KNOB(KNOB_LLC_ASSOC), + *KNOB(KNOB_LLC_LINE_SIZE), sizeof(dcache_data_s), *KNOB(KNOB_LLC_NUM_BANK), + false, 0, CACHE_DL1, false, num_tiles, interleaving, m_simBase); + + return llc; + + }else{ + cache_c* llc = new cache_c( "llc_default", *KNOB(KNOB_LLC_NUM_SET), *KNOB(KNOB_LLC_ASSOC), *KNOB(KNOB_LLC_LINE_SIZE), sizeof(dcache_data_s), *KNOB(KNOB_LLC_NUM_BANK), false, 0, CACHE_DL1, false, num_tiles, interleaving, m_simBase); - return llc; + return llc; + } } ///////////////////////////////////////////////////////////////////////////////////////////////