From 44f1fb5704b9b4f501ecfcbe2ffc9ed108c0adfc Mon Sep 17 00:00:00 2001
From: Seonjin Na <monokai@kaist.ac.kr>
Date: Tue, 26 Mar 2024 21:45:43 -0400
Subject: [PATCH] revive cache replacement policy

---
 def/memory.param.def           |  28 +++
 src/cache_replacement/rrip.cc  | 220 +++++++++++++++++
 src/cache_replacement/rrip.h   |  80 +++++++
 src/cache_replacement/tadip.cc | 161 +++++++++++++
 src/cache_replacement/tadip.h  |  66 +++++
 src/cache_replacement/ucp.cc   | 423 +++++++++++++++++++++++++++++++++
 src/cache_replacement/ucp.h    |  83 +++++++
 src/memory.cc                  |  23 +-
 8 files changed, 1081 insertions(+), 3 deletions(-)
 create mode 100644 src/cache_replacement/rrip.cc
 create mode 100644 src/cache_replacement/rrip.h
 create mode 100644 src/cache_replacement/tadip.cc
 create mode 100644 src/cache_replacement/tadip.h
 create mode 100644 src/cache_replacement/ucp.cc
 create mode 100644 src/cache_replacement/ucp.h
diff --git a/def/memory.param.def b/def/memory.param.def
index f72fcd8f..9a2266ee 100644
--- a/def/memory.param.def
+++ b/def/memory.param.def
@@ -191,6 +191,34 @@ param<SHARED_MEM_CYCLES, shared_mem_cycles, uns8, 4>
 param<SHARED_MEM_PORTS, shared_mem_ports, uns, 2>
 
 
+/* Cache Replacement Policy */ 
+param<RRIP_CACHE_NUM_BIT, rrip_cache_num_bit, int, 3>
+param<RRIP_CACHE_INSERT_AT, rrip_cache_insert_at, int, 6>
+param<RRIP_CACHE_NUM_COUNTER_BIT, rrip_cache_num_counter_bit, int, 10>
+param<RRIP_CACHE_DYNAMIC_ON, rrip_cache_dynamic_on, bool, false>
+param<RRIP_CACHE_BIP_EPSILON, rrip_cache_bip_epsilon, int, 5>
+param<RRIP_CACHE_FOR_GPU, rrip_cache_for_gpu, bool, false> 
+param<RRIP_CACHE_FOR_MULTI_GPU, rrip_cache_for_multi_gpu, bool, false> 
+param<RRIP_CACHE_PROBABILITY, rrip_cache_probability, int, 20>
+param<RRIP_BIP_ALWAYS, rrip_bip_always, bool, false>
+param<CACHE_FOR_STREAM_CPU, cache_for_stream_cpu, bool, false>
+
+param<TADIP_CACHE_NUM_COUNTER_BIT, tadip_cache_num_counter_bit, int, 10>
+param<TADIP_CACHE_BIP_EPSILON, tadip_cache_bip_epsilon, int, 5>
+
+param<UCP_CACHE_NUM_APPLICATION, ucp_cache_num_application, int, 2>
+param<UCP_CACHE_PARTITION_PERIOD, ucp_cache_partition_period, int, 5000000>
+param<UCP_CACHE_FOR_GPU, ucp_cache_for_gpu, bool, false>
+param<UCP_CACHE_FOR_MULTI_GPU, ucp_cache_for_multi_gpu, bool, false>
+param<UCP_CACHE_CPU_INTERFERENCE, ucp_cache_cpu_interference, int, 50>
+param<UCP_CACHE_GPU_MAX_PARTITION_LOOKUP, ucp_cache_gpu_max_partition_lookup, int, 2>
+param<UCP_CACHE_GPU_DROP_PROBABILITY, ucp_cache_gpu_drop_probability, int, 50>
+param<UCP_CACHE_ACCESS_CONTROL, ucp_cache_access_control, int, 1>
+
+
+
+
+
 /* Cache coherence */
 param<ENABLE_CACHE_COHERENCE, enable_cache_coherence, bool, false>
 
diff --git a/src/cache_replacement/rrip.cc b/src/cache_replacement/rrip.cc
new file mode 100644
index 00000000..631972dd
--- /dev/null
+++ b/src/cache_replacement/rrip.cc
@@ -0,0 +1,220 @@
+/**********************************************************************************************
+ * File         : rrip.cc
+ * Author       : Jaekyu Lee
+ * Date         : 04/26/2011 
+ * SVN          : $Id: cache.h,
+ * Description  : RRIP (Jaleel et al. ISCA 2010) 
+ *********************************************************************************************/
+
+
+#include <cmath>
+
+#include "rrip.h"
+#include "../cache.h"
+#include "../utils.h"
+#include "../debug_macros.h"
+#include "../all_knobs.h"
+#include "../statistics.h"
+
+#define DEBUG(args...) _DEBUG(*m_simBase->m_knobs->KNOB_DEBUG_CACHE_LIB, ## args)
+#define DEBUG_MEM(args...) _DEBUG(*m_simBase->m_knobs->KNOB_DEBUG_MEM_TRACE, ## args)
+
+
+// constructor
+
+// cache_rrip_c::cache_rrip_c(string name, int num_set, int assoc, int line_size, 
+//     int data_size, int bank_num, bool cache_by_pass, int core_id, Cache_Type cache_type_info, 
+//     bool enable_partition, macsim_c* simBase) : cache_c(name, num_set, assoc, line_size, 
+//       data_size, bank_num, cache_by_pass, core_id, cache_type_info, 1, 0, enable_partition, simBase)
+cache_rrip_c::cache_rrip_c(string name, int num_set, int assoc, int line_size, 
+    int data_size, int bank_num, bool cache_by_pass, int core_id, Cache_Type cache_type_info, 
+    bool enable_partition, int num_tiles, int interleave_factor, macsim_c* simBase) : cache_c(name, num_set, assoc, line_size, 
+      data_size, bank_num, cache_by_pass, core_id, cache_type_info, enable_partition, num_tiles,interleave_factor,simBase)
+{
+  int max_bit = *KNOB(KNOB_RRIP_CACHE_NUM_BIT);
+  m_max_lru_value   = static_cast<int>(pow(2, max_bit) - 1);
+  m_insertion_value = *m_simBase->m_knobs->KNOB_RRIP_CACHE_INSERT_AT;
+
+  // assume 32 sets for 1 SDM
+  m_modulo = num_set / 32;
+
+  m_sdm_counter = new int[m_max_application];
+  for (int ii = 0; ii < m_max_application; ++ii) {
+    m_sdm_counter[ii] = 0;
+//    m_total_miss[ii] = 0;
+  }
+
+  m_total_miss = new Counter[m_max_application];
+
+  m_sdm_max_counter_value = 
+    static_cast<int>(pow(2, static_cast<int>(*KNOB(KNOB_RRIP_CACHE_NUM_COUNTER_BIT))));
+
+  m_bip_epsilon = *m_simBase->m_knobs->KNOB_RRIP_CACHE_BIP_EPSILON;
+  m_access_count_by_type[2] = {0};
+  m_total_access_count = 0;
+  m_total_insert_count = 0;
+  m_access_ratio = 0.0;
+}
+
+
+// destructor
+cache_rrip_c::~cache_rrip_c()
+{
+}
+
+
+// find an entry to be replaced based on the policy
+cache_entry_c* cache_rrip_c::find_replacement_line(int set, int appl_id) 
+{
+//  bool gpuline = m_simBase->m_PCL->get_appl_type(appl_id);
+  bool gpuline = true;
+  int index = -1;
+  while (index == -1) {
+    for (int ii = 0; ii < m_assoc; ++ii) {
+      cache_entry_c* line = &(m_set[set]->m_entry[ii]);
+      // find invalid or 2^n-1 entry
+      if (line->m_valid != true || line->m_last_access_time == m_max_lru_value) {
+        index = ii;
+        break;
+      }
+    }
+
+
+    if (index == -1) {
+      int count = 0;
+      for (int ii = 0; ii < m_assoc; ++ii) {
+        cache_entry_c* line = &(m_set[set]->m_entry[ii]);
+        ++line->m_last_access_time;
+      }
+
+      if (count == 0) {
+        gpuline = !gpuline;
+      }
+    }
+  }
+
+  ++m_total_insert_count;
+
+  return &(m_set[set]->m_entry[index]);
+}
+
+
+// initialize a cache line
+void cache_rrip_c::initialize_cache_line(cache_entry_c *ins_line, Addr tag, Addr addr, 
+    int appl_id, bool gpuline, int set_id, bool skip) 
+{
+  ins_line->m_valid            = true;
+  ins_line->m_tag              = tag;
+  ins_line->m_base             = (addr & ~m_offset_mask);
+  ins_line->m_access_counter   = 0;
+  ins_line->m_pref             = false;
+  ins_line->m_appl_id          = appl_id;
+  ins_line->m_gpuline          = gpuline;
+  ins_line->m_skip             = skip;
+
+
+  // SRRIP
+  if (!*KNOB(KNOB_RRIP_CACHE_DYNAMIC_ON)) {
+    if (*KNOB(KNOB_RRIP_BIP_ALWAYS && gpuline)) {
+      // SRRIP
+      if (rand() % 100 < m_bip_epsilon) {
+        ins_line->m_last_access_time = m_insertion_value;
+      }
+      // LIP
+      else {
+        ins_line->m_last_access_time = m_max_lru_value;
+      }
+    }
+    else {
+      ins_line->m_last_access_time = m_insertion_value;
+    }
+  }
+  // DRRIP
+  else {
+    // BIMODAL
+    if (set_id % m_modulo == (appl_id * 2 + 1)) {
+      // SRRIP
+      if (rand() % 100 < m_bip_epsilon) {
+        ins_line->m_last_access_time = m_insertion_value;
+      }
+      // LIP
+      else {
+        ins_line->m_last_access_time = m_max_lru_value;
+      }
+    }
+    // SRRIP
+    else if (set_id % m_modulo == (appl_id * 2)) {
+      ins_line->m_last_access_time = m_insertion_value;
+    }
+    // Followers
+    else {
+      // SRRIP favor
+      if (m_sdm_counter[appl_id] <= 0) {
+        ins_line->m_last_access_time = m_insertion_value;
+      }
+      // BIMODAL
+      else {
+        // with small probability, insert it to original position (2n-2)
+        if (rand() % 100 < m_bip_epsilon) {
+          ins_line->m_last_access_time = m_insertion_value;
+        }
+        // LIP insertion (2n-1)
+        else {
+          ins_line->m_last_access_time = m_max_lru_value;
+        }
+      }
+    }
+  }
+
+
+  if (ins_line->m_gpuline) { 
+    ++m_num_gpu_line;
+    ++m_set[set_id]->m_num_gpu_line;
+  }
+  else {
+    ++m_num_cpu_line;
+    ++m_set[set_id]->m_num_cpu_line;
+  }
+}
+
+
+// update a line upon cache hits
+// use FP (Frequency Priority)
+void cache_rrip_c::update_line_on_hit(cache_entry_c* line, int set, int appl_id)
+{
+  if (line->m_last_access_time > 0) {
+//    --line->m_last_access_time;
+    line->m_last_access_time = 0;
+  }
+}
+
+
+
+
+void cache_rrip_c::update_cache_on_miss(int set_id, int appl_id)
+{
+  // DRRIP set dueling implementation
+  if (*KNOB(KNOB_RRIP_CACHE_DYNAMIC_ON)) {
+    m_total_miss[appl_id]++;
+    // SRRIP
+    if (set_id % m_modulo == (appl_id * 2)) {
+      ++m_sdm_counter[appl_id];
+      if (m_sdm_counter[appl_id] > m_sdm_max_counter_value) {
+        m_sdm_counter[appl_id] = m_sdm_max_counter_value;
+      }
+    }
+    // BIMODAL
+    else if (set_id % m_modulo == (appl_id * 2 + 1)) {
+      --m_sdm_counter[appl_id];
+      if (m_sdm_counter[appl_id] < -1 * m_sdm_max_counter_value) {
+        m_sdm_counter[appl_id] = m_sdm_max_counter_value * -1;
+      }
+    }
+  }
+}
+
+
+void cache_rrip_c::update_cache_on_access(Addr line_addr, int set, int appl_id)
+{
+  ++m_total_access_count;
+}
diff --git a/src/cache_replacement/rrip.h b/src/cache_replacement/rrip.h
new file mode 100644
index 00000000..8e530852
--- /dev/null
+++ b/src/cache_replacement/rrip.h
@@ -0,0 +1,80 @@
+/**********************************************************************************************
+ * File         : rrip.h
+ * Author       : Jaekyu Lee
+ * Date         : 04/26/2011 
+ * SVN          : $Id: cache.h,
+ * Description  : RRIP (Jaleel et al. ISCA 2010) 
+ *********************************************************************************************/
+
+#ifndef RRIP_H
+#define RRIP_H
+
+#include "../cache.h"
+
+
+class cache_c;
+
+class cache_rrip_c : public cache_c
+{
+  public:
+    /**
+     * Constructor
+     */
+    //added num_tiles, interleave_factor  2024-03-26
+    cache_rrip_c(string name, int num_set, int assoc, int line_size, 
+        int data_size, int bank_num, bool cache_by_pass, int core_id, 
+        Cache_Type cache_type_info,  bool enable_partition, int num_tiles, int interleave_factor, macsim_c* simBase); 
+
+    /**
+     * Destructor
+     */
+    virtual ~cache_rrip_c();
+    
+    /**
+     * fine a cache line to replace
+     * \param set set id
+     */
+    cache_entry_c * find_replacement_line(int set, int appl_id);
+    
+    /**
+     * Initialize a new cache line
+     */
+    void initialize_cache_line(cache_entry_c *ins_line, Addr tag, Addr addr, int appl_id, 
+        bool gpuline, int set_id, bool skip);
+    
+    /**
+     * Update LRU value on cache hit
+     */
+    void update_line_on_hit(cache_entry_c* line, int set, int appl_id);
+
+    /**
+     * Update cache on cache misses - for set dueling
+     */
+    void update_cache_on_miss(int set_id, int appl_id);
+    
+    void update_cache_on_access(Addr tag, int set, int appl_id);
+
+  private:
+    /**
+     * Default constructor - do not implement
+     */
+    cache_rrip_c(); // do not implement
+
+    static const int m_max_application = 20;
+
+    int m_max_lru_value; /**< maximum lru value in RRIP */
+    int m_insertion_value; /**< lru value upon insertion */
+    int m_modulo; /**< modulo value for set monitorning */
+    int *m_sdm_counter;
+    Counter *m_total_miss;
+    //int *m_sdm_counter[m_max_application];
+    int m_sdm_max_counter_value;
+    int m_bip_epsilon;
+
+    int m_access_count_by_type[2];
+    Counter m_total_access_count;
+    Counter m_total_insert_count;
+    float m_access_ratio;
+};
+
+#endif
diff --git a/src/cache_replacement/tadip.cc b/src/cache_replacement/tadip.cc
new file mode 100644
index 00000000..dea39950
--- /dev/null
+++ b/src/cache_replacement/tadip.cc
@@ -0,0 +1,161 @@
+/**********************************************************************************************
+ * File         : rrip.cc
+ * Author       : Jaekyu Lee
+ * Date         : 04/26/2011 
+ * SVN          : $Id: cache.h,
+ * Description  : TADIP (Jaleel et al. PACT 2008) 
+ *********************************************************************************************/
+
+
+#include <cmath>
+#include <cassert>
+
+#include "tadip.h"
+#include "../cache.h"
+#include "../debug_macros.h"
+
+#include "../all_knobs.h"
+
+
+#define DEBUG(args...) _DEBUG(*m_simBase->m_knobs->KNOB_DEBUG_CACHE_LIB, ## args)
+#define DEBUG_MEM(args...) _DEBUG(*m_simBase->m_knobs->KNOB_DEBUG_MEM_TRACE, ## args)
+
+
+// constructor
+cache_tadip_c::cache_tadip_c(string name, uns num_set, uns assoc, uns line_size, 
+    uns data_size, uns bank_num, bool cache_by_pass, int core_id, Cache_Type cache_type_info, 
+    bool enable_partition, macsim_c* simBase) : cache_c(name, num_set, assoc, line_size, 
+      data_size, bank_num, cache_by_pass, core_id, cache_type_info, enable_partition, 1, 0, simBase)
+{
+  // assume 32 sets for 1 SDM
+  m_modulo = num_set / 32;
+
+  m_sdm_counter = new int[m_max_application];
+  for (int ii = 0; ii < m_max_application; ++ii) {
+    m_sdm_counter[ii] = 0;
+  }
+
+  m_total_miss = new Counter[m_max_application];
+
+  m_sdm_max_counter_value = 
+    static_cast<int>(pow(2, static_cast<int>(*m_simBase->m_knobs->KNOB_TADIP_CACHE_NUM_COUNTER_BIT)));
+  m_bip_epsilon = *m_simBase->m_knobs->KNOB_TADIP_CACHE_BIP_EPSILON;
+}
+
+
+// destructor
+cache_tadip_c::~cache_tadip_c()
+{
+}
+
+
+// find an entry to be replaced based on the policy
+cache_entry_c* cache_tadip_c::find_replacement_line(int set, int appl_id) 
+{
+  int index = -1;
+  Counter min_lru = m_simBase->m_simulation_cycle + 1;
+  for (int ii = 0; ii < m_assoc; ++ii) {
+    cache_entry_c* line = &(m_set[set]->m_entry[ii]);
+    // find invalid or LRU entry
+    if (line->m_valid != true) {
+      index = ii;
+      break;
+    }
+
+    if (line->m_last_access_time < min_lru) {
+      index = ii;
+      min_lru = line->m_last_access_time;
+    }
+  }
+
+  assert(index != -1);
+
+  return &(m_set[set]->m_entry[index]);
+}
+
+
+// initialize a cache line
+void cache_tadip_c::initialize_cache_line(cache_entry_c *ins_line, Addr tag, Addr addr, 
+    int appl_id, bool gpuline, int set_id, bool skip) 
+{
+  ins_line->m_valid            = true;
+  ins_line->m_tag              = tag;
+  ins_line->m_base             = (addr & ~m_offset_mask);
+  ins_line->m_access_counter   = 0;
+  ins_line->m_pref             = false;
+  ins_line->m_appl_id          = appl_id;
+  ins_line->m_gpuline          = gpuline;
+  ins_line->m_skip             = skip;
+
+  // BIMODAL
+  if (*m_simBase->m_knobs->KNOB_RRIP_BIP_ALWAYS || set_id % m_modulo == (appl_id * 2 + 1)) {
+    // LRU
+    if (rand() % 100 < m_bip_epsilon) {
+      ins_line->m_last_access_time = m_simBase->m_simulation_cycle;
+    }
+    // LIP
+    else {
+      ins_line->m_last_access_time = 0;
+    }
+  }
+  // LRU 
+  else if (set_id % m_modulo == (appl_id * 2)) {
+    ins_line->m_last_access_time = m_simBase->m_simulation_cycle;
+  }
+  // Followers
+  else {
+    // BIMODAL favor
+    if (m_sdm_counter[appl_id] > 0) {
+      // LRU
+      if (rand() % 100 < m_bip_epsilon) {
+        ins_line->m_last_access_time = m_simBase->m_simulation_cycle;
+      }
+      // LIP
+      else {
+        ins_line->m_last_access_time = 0;
+      }
+    }
+    // LRU
+    else {
+      ins_line->m_last_access_time = m_simBase->m_simulation_cycle;
+    }
+  }
+
+
+  if (ins_line->m_gpuline) { 
+    ++m_num_gpu_line;
+    ++m_set[set_id]->m_num_gpu_line;
+  }
+  else {
+    ++m_num_cpu_line;
+    ++m_set[set_id]->m_num_cpu_line;
+  }
+}
+
+
+// update miss counter if a set is in SDM
+void cache_tadip_c::update_cache_on_miss(int set_id, int appl_id)
+{
+  ++m_total_miss[appl_id];
+  // LRU - miss (+ is non-LRU) 
+  if (set_id % m_modulo == (appl_id * 2)) {
+    ++m_sdm_counter[appl_id];
+    if (m_sdm_counter[appl_id] > m_sdm_max_counter_value) {
+      m_sdm_counter[appl_id] = m_sdm_max_counter_value;
+    }
+//    cout << "TADIP(L3-" << m_core_id << ")" << appl_id << " " << m_sdm_counter[appl_id] << "\n";
+  }
+  // BIMODAL - miss (- is non-BIP)
+  else if (set_id % m_modulo == (appl_id * 2 + 1)) {
+    --m_sdm_counter[appl_id];
+    if (m_sdm_counter[appl_id] < -1 * m_sdm_max_counter_value) {
+      m_sdm_counter[appl_id] = m_sdm_max_counter_value * -1;
+    }
+//    cout << "TADIP(L3-" << m_core_id << ")" << appl_id << " " << m_sdm_counter[appl_id] << "\n";
+  }
+
+  if (m_total_miss[appl_id] % 100 == 0) {
+    cout << "TADIP(L3-" << m_core_id << ")" << appl_id << " " << m_sdm_counter[appl_id] << "\n";
+  }
+}
+
diff --git a/src/cache_replacement/tadip.h b/src/cache_replacement/tadip.h
new file mode 100644
index 00000000..5d10189f
--- /dev/null
+++ b/src/cache_replacement/tadip.h
@@ -0,0 +1,66 @@
+/**********************************************************************************************
+ * File         : rrip.h
+ * Author       : Jaekyu Lee
+ * Date         : 04/26/2011 
+ * SVN          : $Id: cache.h,
+ * Description  : TADIP (Jaleel et al. PACT 2008) 
+ *********************************************************************************************/
+
+#ifndef TADIP_H
+#define TADIP_H
+
+#include "cache.h"
+
+
+class cache_c;
+
+class cache_tadip_c : public cache_c
+{
+  public:
+    /**
+     * Constructor
+     */
+    cache_tadip_c(string name, uns num_set, uns assoc, uns line_size, uns data_size, 
+        uns bank_num, bool cache_by_pass, int core_id, Cache_Type cache_type_info, 
+        bool enable_partition, macsim_c* simBase); 
+
+    /**
+     * Destructor
+     */
+    virtual ~cache_tadip_c();
+    
+    /**
+     * fine a cache line to replace
+     * \param set set id
+     */
+    cache_entry_c * find_replacement_line(int set, int appl_id);
+    
+    /**
+     * Initialize a new cache line
+     */
+    void initialize_cache_line(cache_entry_c *ins_line, Addr tag, Addr addr, int appl_id, 
+        bool gpuline, int set_id, bool skip);
+
+    /**
+     * Update cache on cache misses - for set dueling
+     */
+    void update_cache_on_miss(int set_id, int appl_id);
+
+  private:
+    /**
+     * Default constructor - do not implement
+     */
+    cache_tadip_c(); // do not implement
+
+    static const int m_max_application = 20;
+
+    int m_max_lru_value; /**< maximum lru value in RRIP */
+    int m_insertion_value; /**< lru value upon insertion */
+    int m_modulo; /**< modulo value for set monitorning */
+    int *m_sdm_counter;
+    Counter *m_total_miss;
+    int m_sdm_max_counter_value;
+    int m_bip_epsilon;
+};
+
+#endif
diff --git a/src/cache_replacement/ucp.cc b/src/cache_replacement/ucp.cc
new file mode 100644
index 00000000..cd3b08b4
--- /dev/null
+++ b/src/cache_replacement/ucp.cc
@@ -0,0 +1,423 @@
+/**********************************************************************************************
+ * File         : ucp.cc
+ * Author       : Jaekyu Lee
+ * Date         : 04/26/2011 
+ * SVN          : $Id: cache.h,
+ * Description  : UCP (Qureshi and Patt MICRO06)
+ *********************************************************************************************/
+
+
+#include <cmath>
+#include <cassert>
+
+#include "ucp.h"
+#include "../cache.h"
+#include "../debug_macros.h"
+#include "../assert_macros.h"
+#include "../all_knobs.h"
+
+
+#define DEBUG(args...) _DEBUG(*KNOB(KNOB_DEBUG_CACHE_LIB, ## args)
+#define DEBUG_MEM(args...) _DEBUG(*KNOB(KNOB_DEBUG_MEM_TRACE, ## args)
+
+
+// constructor
+cache_ucp_c::cache_ucp_c(string name, uns num_set, uns assoc, uns line_size, 
+    uns data_size, uns bank_num, bool cache_by_pass, int core_id, Cache_Type cache_type_info, 
+    bool enable_partition, macsim_c* simBase) : cache_c(name, num_set, assoc, line_size, data_size, bank_num,
+      cache_by_pass, core_id, cache_type_info, enable_partition, 1, 0, simBase)
+{
+  // assume 32 sets for 1 SDM
+  m_modulo = num_set / 32;
+  m_num_way = assoc;
+  m_num_application = *KNOB(KNOB_UCP_CACHE_NUM_APPLICATION);
+  m_partition_period = *KNOB(KNOB_UCP_CACHE_PARTITION_PERIOD);
+
+  for (int ii = 0; ii < m_max_application; ++ii) {
+    m_way_counter[ii] = new int[assoc];
+    fill_n(m_way_counter[ii], assoc, 0); 
+  }
+
+  for (int ii = 0; ii < m_max_application; ++ii) {
+    m_atd[ii] = new list<Addr>[32];
+    for (int jj = 0; jj < 32; ++jj) {
+      for (int kk = 0; kk < assoc; ++kk) {
+        m_atd[ii][jj].push_back(0);
+      }
+    }
+  }
+
+  for (int ii = 0; ii < m_max_application; ++ii) {
+    m_num_entry[ii] = new int[num_set];
+    fill_n(m_num_entry[ii], num_set, 0);
+  }
+
+  // initially assign equal amount
+  for (int ii = 0; ii < m_max_application; ++ii) {
+    m_quota[ii] = assoc / m_num_application;
+  }
+
+  // tocheck
+  //m_last_partition_cycle = 0;
+  m_last_partition_cycle = 10000 - m_partition_period;
+  
+  m_access_count_by_type[2] = {0};
+  m_total_access_count = 0;
+  m_access_ratio = 0.0;
+}
+
+
+// destructor
+cache_ucp_c::~cache_ucp_c()
+{
+}
+
+
+// find an entry to be replaced based on the policy
+cache_entry_c* cache_ucp_c::find_replacement_line(int set, int appl_id) 
+{
+  int index = -1;
+  Counter min_lru = m_simBase->m_simulation_cycle + 1;
+  if (appl_id == -1) {
+    if (*KNOB(KNOB_CACHE_USE_PSEUDO_LRU)) {
+      while (1) {
+        for (int ii = 0; ii < m_assoc; ++ii) {
+          cache_entry_c* line = &(m_set[set]->m_entry[ii]);
+          if (!line->m_valid || line->m_last_access_time == 0) {
+            return &(m_set[set]->m_entry[ii]);
+          }
+        }
+
+        for (int ii = 0; ii < m_assoc; ++ii) {
+          cache_entry_c* line = &(m_set[set]->m_entry[ii]);
+          line->m_last_access_time = 0;
+        }
+      }
+    }
+    else {
+      for (int ii = 0; ii < m_assoc; ++ii) {
+        cache_entry_c* line = &(m_set[set]->m_entry[ii]);
+        // find invalid or LRU entry
+        if (line->m_valid != true) {
+          index = ii;
+          break;
+        }
+
+        if (line->m_last_access_time < min_lru) {
+          index = ii;
+          min_lru = line->m_last_access_time;
+        }
+      }
+    }
+  }
+  else {
+    // evict a block from other applications
+    if (m_num_entry[appl_id][set] < m_quota[appl_id]) {
+      if (*KNOB(KNOB_CACHE_USE_PSEUDO_LRU)) {
+        while (1) {
+          for (int ii = 0; ii < m_assoc; ++ii) {
+            cache_entry_c* line = &(m_set[set]->m_entry[ii]);
+            if (!line->m_valid || (line->m_appl_id != appl_id && line->m_last_access_time == 0)) {
+              return &(m_set[set]->m_entry[ii]);
+            }
+          }
+
+          for (int ii = 0; ii < m_assoc; ++ii) {
+            cache_entry_c* line = &(m_set[set]->m_entry[ii]);
+            if (line->m_appl_id != appl_id) {
+              line->m_last_access_time = 0;
+            }
+          }
+        }
+      }
+      else {
+        for (int ii = 0; ii < m_assoc; ++ii) {
+          cache_entry_c* line = &(m_set[set]->m_entry[ii]);
+          if (line->m_valid != true) {
+            index = ii;
+            break;
+          }
+
+          if (line->m_appl_id != appl_id && line->m_last_access_time < min_lru) {
+            index = ii;
+            min_lru = line->m_last_access_time;
+          }
+        }
+      }
+    }
+    // evict a block from own
+    else {
+      if (*KNOB(KNOB_CACHE_USE_PSEUDO_LRU)) {
+        while (1) {
+          for (int ii = 0; ii < m_assoc; ++ii) {
+            cache_entry_c* line = &(m_set[set]->m_entry[ii]);
+            if (!line->m_valid || (line->m_appl_id == appl_id && line->m_last_access_time == 0)) {
+              return &(m_set[set]->m_entry[ii]);
+            }
+          }
+
+          for (int ii = 0; ii < m_assoc; ++ii) {
+            cache_entry_c* line = &(m_set[set]->m_entry[ii]);
+            if (line->m_appl_id == appl_id) {
+              line->m_last_access_time = 0;
+            }
+          }
+        }
+      }
+      else {
+        for (int ii = 0; ii < m_assoc; ++ii) {
+          cache_entry_c* line = &(m_set[set]->m_entry[ii]);
+          if (line->m_appl_id == appl_id && line->m_last_access_time < min_lru) {
+            index = ii;
+            min_lru = line->m_last_access_time;
+          }
+        }
+      }
+    }
+  }
+
+  ASSERT(index != -1);
+
+  return &(m_set[set]->m_entry[index]);
+}
+
+
+// initialize a cache line
+void cache_ucp_c::initialize_cache_line(cache_entry_c *ins_line, Addr tag, Addr addr, 
+    int appl_id, bool gpuline, int set, bool skip) 
+{
+  ins_line->m_valid            = true;
+  ins_line->m_tag              = tag;
+  ins_line->m_base             = (addr & ~m_offset_mask);
+  ins_line->m_access_counter   = 0;
+  ins_line->m_pref             = false;
+  ins_line->m_appl_id          = appl_id;
+  ins_line->m_gpuline          = gpuline;
+  ins_line->m_last_access_time = m_simBase->m_simulation_cycle;
+  ins_line->m_skip             = skip;
+
+  ++m_num_entry[appl_id][set];
+}
+
+
+void cache_ucp_c::update_cache_on_access(Addr line_addr, int set, int appl_id)
+{
+  if (m_total_access_count == 1000) {
+    if (m_access_count_by_type[0] == 0) {
+      m_access_ratio = 1000.0;
+    }
+    else {
+      m_access_ratio = static_cast<float>(1.0 * m_access_count_by_type[1] / m_access_count_by_type[0]);
+    }
+    m_total_access_count = 0;
+  }
+
+  if (set % m_modulo == 0) {
+    int set_index = set / m_modulo;
+    bool hit = false;
+    int count = 0;
+    for (auto I = m_atd[appl_id][set_index].begin(), E = m_atd[appl_id][set_index].end(); I != E; ++I) {
+      if ((*I) == line_addr) {
+        ++m_way_counter[appl_id][count];
+        hit = true;
+        break;
+      }
+      ++count;
+    }
+
+
+    if (!hit) {
+      m_atd[appl_id][set_index].pop_back();
+    }
+    else {
+      m_atd[appl_id][set_index].remove(line_addr);
+    }
+    m_atd[appl_id][set_index].push_front(line_addr);
+  } 
+
+  if (m_last_partition_cycle + m_partition_period <= m_simBase->m_simulation_cycle) {
+    update_partition();
+  }
+
+  ++m_total_access_count;
+}
+
+
+// update application partition every *KNOB(KNOB_UCP_CACHE_PARTITION_PERIOD cycle
+// use lookahead algorithm
+void cache_ucp_c::update_partition(void)
+{
+  // assign at least 1 way
+  fill_n(m_quota, m_max_application, 1);
+  int count = m_num_way - m_num_application;
+
+
+  if (*KNOB(KNOB_UCP_CACHE_FOR_GPU) == true && m_access_ratio >= 2.0) {
+    for (int ii = 0; ii < m_num_application; ++ii) {
+      if (m_simBase->m_PCL->get_appl_type(ii) != true)
+        continue;
+
+      for (int jj = 0; jj < m_num_way; ++jj) {
+        m_way_counter[ii][jj] /= *KNOB(KNOB_UCP_CACHE_ACCESS_CONTROL);
+        //m_way_counter[ii][jj] /= m_access_ratio;
+      }
+    }
+  }
+
+
+  int skip_cache[m_max_application] = {false};
+  int skip_count = 0;
+  for (int ii = 0; ii < m_num_application; ++ii) {
+    // GPU && PSEL = 0
+    if (*KNOB(KNOB_UCP_CACHE_FOR_GPU)) {
+      if (m_simBase->m_PCL->get_appl_type(ii) == true && m_simBase->m_PCL->get_psel_mask() == false) {
+        skip_cache[ii] = true;
+        ++skip_count;
+      }
+    }
+
+    if (*KNOB(KNOB_CACHE_FOR_STREAM_CPU)) {
+      if (m_simBase->m_PCL->get_appl_type(ii) == false) {
+        skip_cache[ii] = true;
+        ++skip_count;
+      }
+    }
+  }
+
+
+  if (skip_count == m_num_application) {
+    for (int ii = 0; ii < m_num_application; ++ii) {
+      skip_cache[ii] = false;
+    }
+  }
+  
+  int total_hit[m_max_application] = {0};
+  for (int ii = 0; ii < m_num_application; ++ii) {
+    total_hit[ii] = m_way_counter[ii][0];
+  }
+
+
+  int random_index = m_simBase->m_simulation_cycle % m_num_application;
+  bool skip = true;
+  while (count > 0) {
+    float max = -1.0;
+    int max_appl_id = -1;
+    int max_way = -1;
+    int max_total_hit = 0;
+
+    bool cpu_skip = false;
+    // emulate GPU interference
+    if (!*KNOB(KNOB_UCP_CACHE_FOR_GPU) && 
+        rand() % 100 >= *KNOB(KNOB_UCP_CACHE_CPU_INTERFERENCE))
+      cpu_skip = true;
+
+    for (int ii = random_index; ii < random_index + m_num_application; ++ii) {
+      int appl_id = ii % m_num_application;
+      int sum = 0;
+
+
+      // skip gpu application partitioning
+      // skip : when there is not hit anymore for cpus, include gpu as well
+      if (*KNOB(KNOB_UCP_CACHE_FOR_GPU) == true && skip && skip_cache[appl_id] == true)
+        continue;
+
+
+      // running streaming CPU application, so skip it
+      if (*KNOB(KNOB_CACHE_FOR_STREAM_CPU) && m_simBase->m_PCL->get_appl_type(appl_id) == false)
+        continue;
+
+
+      // emulate GPU interference
+      if (m_simBase->m_PCL->get_appl_type(appl_id) == false && cpu_skip == true)
+        continue;
+
+
+      // try to give less space to gpu
+      if (*KNOB(KNOB_UCP_CACHE_FOR_GPU) && 
+          m_simBase->m_PCL->get_appl_type(appl_id) == true && 
+          rand() % 100 >= *KNOB(KNOB_UCP_CACHE_GPU_DROP_PROBABILITY))
+        continue;
+
+
+      // try to give less space if access rate is much higher
+      if (*KNOB(KNOB_UCP_CACHE_FOR_GPU) &&
+          m_simBase->m_PCL->get_appl_type(appl_id) == true &&
+          m_access_ratio >= 3.0 &&
+          rand() % 100 >= 50)
+        continue;
+
+
+      // set limitation for gpu in lookahead partitioning algorithm
+      int max_lookup = count;
+      if (*KNOB(KNOB_UCP_CACHE_FOR_GPU) == true && 
+          m_simBase->m_PCL->get_appl_type(appl_id) == true && 
+          count >= *KNOB(KNOB_UCP_CACHE_GPU_MAX_PARTITION_LOOKUP)) {
+        max_lookup = *KNOB(KNOB_UCP_CACHE_GPU_MAX_PARTITION_LOOKUP);
+      }
+
+      for (int jj = m_quota[appl_id]; jj < m_quota[appl_id] + max_lookup; ++jj) {
+        sum += m_way_counter[appl_id][jj];
+        float marginal_utility = static_cast<float>(1.0* sum / (jj - m_quota[appl_id] + 1));
+        if (marginal_utility > max) {
+          max = marginal_utility;
+          max_appl_id = appl_id;
+          max_way = jj - m_quota[appl_id] + 1;
+          max_total_hit = total_hit[appl_id];
+          assert(max_way > 0);
+        }
+        // usual : give it to high total_hit (GPU)
+        // ucp-g : give it evenly
+        else if (marginal_utility == max && skip && total_hit[appl_id] > max_total_hit) {
+          assert(skip == true);
+          max = marginal_utility;
+          max_appl_id = appl_id;
+          max_way = jj - m_quota[appl_id] + 1;
+          max_total_hit = total_hit[appl_id];
+          assert(max_way > 0);
+        }
+      }
+    }
+    
+    // If CPU does not have more hits, stop allocating to CPU, give it to GPUs.
+    if (*KNOB(KNOB_UCP_CACHE_FOR_GPU) && max == 0.0 && skip == true) {
+      skip = false;
+      continue;
+    }
+
+    if (max_appl_id == -1)
+      continue;
+
+    int current_quota = m_quota[max_appl_id];
+    for (int ii = current_quota; ii < current_quota + max_way; ++ii) {
+      total_hit[max_appl_id] += m_way_counter[max_appl_id][ii];
+    }
+    //cout << max << " " << max_appl_id << " " << max_way << "\n";
+    m_quota[max_appl_id] += max_way;
+    count -= max_way;
+    random_index++;
+  }
+
+  // halve the counter values
+  for (int ii = 0; ii < m_num_application; ++ii) {
+//    fprintf(g_mystderr, "%s appl%d %d at %lld\n", m_name.c_str(), ii, m_quota[ii], m_simBase->m_simulation_cycle);
+    for (int jj = 0; jj < m_num_way; ++jj) {
+      m_way_counter[ii][jj] /= 2;
+    }
+  }
+
+  m_last_partition_cycle = m_simBase->m_simulation_cycle;
+}
+
+void cache_ucp_c::update_set_on_replacement(Addr tag, int appl_id, int set, bool gpuline)
+{
+  if (gpuline) {
+    --m_num_gpu_line;
+    --m_set[set]->m_num_gpu_line;
+  }
+  else {
+    --m_num_cpu_line;
+    --m_set[set]->m_num_cpu_line;
+  }
+  --m_num_entry[appl_id][set];
+}
+
diff --git a/src/cache_replacement/ucp.h b/src/cache_replacement/ucp.h
new file mode 100644
index 00000000..f2edf26e
--- /dev/null
+++ b/src/cache_replacement/ucp.h
@@ -0,0 +1,83 @@
+/**********************************************************************************************
+ * File         : ucp.h
+ * Author       : Jaekyu Lee
+ * Date         : 04/26/2011 
+ * SVN          : $Id: cache.h,
+ * Description  : UCP (Qureshi and Patt MICRO06)
+ *********************************************************************************************/
+
+
+#ifndef UCP_H
+#define UCP_H
+
+
+#include <list>
+
+#include "cache.h"
+
+
+class cache_c;
+
+class cache_ucp_c : public cache_c
+{
+  public:
+    /**
+     * Constructor
+     */
+    cache_ucp_c(string name, uns num_set, uns assoc, uns line_size, 
+        uns data_size, uns bank_num, bool cache_by_pass, int core_id, 
+        Cache_Type cache_type_info, bool enable_partition, macsim_c* simBase); 
+
+    /**
+     * Destructor
+     */
+    virtual ~cache_ucp_c();
+    
+    /**
+     * fine a cache line to replace
+     * \param set set id
+     */
+    cache_entry_c * find_replacement_line(int set, int appl_id);
+    
+    /**
+     * Initialize a new cache line
+     */
+    void initialize_cache_line(cache_entry_c *ins_line, Addr tag, Addr addr, int appl_id, 
+        bool gpuline, int set_id, bool skip);
+    
+    /**
+     * Update set on replacement
+     */
+    void update_set_on_replacement(Addr tag, int appl_id, int set_id, bool gpuline);
+
+    void update_cache_on_access(Addr tag, int set, int appl_id);
+    void update_atd(Addr tag, int appl_id, int set_id);
+    
+
+  private:
+    /**
+     * Default constructor - do not implement
+     */
+    cache_ucp_c(); // do not implement
+    
+    void update_partition(void);
+
+    static const int m_max_application = 20;
+
+    int* m_way_counter[m_max_application]; /**< counter for each ways */
+    list<Addr>* m_atd[m_max_application]; /**< auxiliary tag directory */
+    int m_quota[m_max_application];
+    int* m_num_entry[m_max_application];
+
+    int m_modulo; /**< modulo value for set monitorning */
+    int m_last_partition_cycle;
+    int m_num_way;
+    int m_num_application;
+    int m_partition_period;
+    
+    int m_access_count_by_type[2];
+    int m_total_access_count;
+    float m_access_ratio;
+};
+
+#endif
diff --git a/src/memory.cc b/src/memory.cc
index 63272ddf..1c679795 100644
--- a/src/memory.cc
+++ b/src/memory.cc
@@ -51,6 +51,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "bug_detector.h"
 #include "mmu.h"
 
+#include "./cache_replacement/rrip.h"
 #include "config.h"
 
 #include "all_knobs.h"
@@ -174,7 +175,7 @@ memory_c* default_mem(macsim_c* m_simBase) {
 // Default LLC constructor function
 cache_c* default_llc(macsim_c* m_simBase) {
   string llc_type = KNOB(KNOB_LLC_TYPE)->getValue();
-  assert(llc_type == "default");
+  // assert(llc_type == "default");
 
   int num_tiles;
   int interleaving = -1;
@@ -190,12 +191,28 @@ cache_c* default_llc(macsim_c* m_simBase) {
       interleaving = *m_simBase->m_knobs->KNOB_DRAM_ROWBUFFER_SIZE;
     }
   }
+  
+  // cache_c* llc = new cache_c(
+  //   "llc_default", *KNOB(KNOB_LLC_NUM_SET), *KNOB(KNOB_LLC_ASSOC),
+  //   *KNOB(KNOB_LLC_LINE_SIZE), sizeof(dcache_data_s), *KNOB(KNOB_LLC_NUM_BANK),
+  //   false, 0, CACHE_DL1, false, num_tiles, interleaving, m_simBase);
 
-  cache_c* llc = new cache_c(
+  // added to extend existing simulator to add cache replacement policy 
+  if(llc_type == "rrip"){
+
+    cache_c* llc = new cache_rrip_c("llc_rrip", *KNOB(KNOB_LLC_NUM_SET), *KNOB(KNOB_LLC_ASSOC),
+    *KNOB(KNOB_LLC_LINE_SIZE), sizeof(dcache_data_s), *KNOB(KNOB_LLC_NUM_BANK),
+    false, 0, CACHE_DL1, false, num_tiles, interleaving, m_simBase);
+
+    return llc;
+
+  }else{
+   cache_c* llc = new cache_c(
     "llc_default", *KNOB(KNOB_LLC_NUM_SET), *KNOB(KNOB_LLC_ASSOC),
     *KNOB(KNOB_LLC_LINE_SIZE), sizeof(dcache_data_s), *KNOB(KNOB_LLC_NUM_BANK),
     false, 0, CACHE_DL1, false, num_tiles, interleaving, m_simBase);
-  return llc;
+    return llc;
+  }
 }
 
 ///////////////////////////////////////////////////////////////////////////////////////////////