Merge pull request #13 from chenhao-ye:templated_hash

Templated hash for GhostCache and SampledGhostCache
chenhao-ye · Sep 7, 2024 · 7f66dda · 7f66dda
2 parents 3497925 + 4cf8b6b
commit 7f66dda
Show file tree

Hide file tree

Showing 5 changed files with 54 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -96,7 +96,7 @@ Ghost cache is a type of cache maintained to answer the question "what the cache
 
 // ctor needs the spectrum of cache sizes: the example below will maintain hit
 // rates for size=[4, 6, 8]
-gcache::GhostCache ghost_cache(/*tick*/ 2, /*min_size*/ 4, /*max_size*/ 8);
+gcache::GhostCache<> ghost_cache(/*tick*/ 2, /*min_size*/ 4, /*max_size*/ 8);
 
 // preheat: fill the cache
 for (auto blk_id : {1, 2, 3, 4, 5, 6, 7, 8}) ghost_cache.access(blk_id);
@@ -123,7 +123,7 @@ Using `SampledGhostCache` is straightforward. It is similar to `GhostCache` with
 ```C++
 // to make sure sampling is reasonable, `tick` must be no smaller than the
 // sample rate, and `min_size` and `max_size` must be multiple of sample rate.
-gcache::SampledGhostCache</*SampleShift*/5> ghost_cache(
+gcache::SampledGhostCache</*SampleShift*/5, /*Hash*/gcache::ghash> ghost_cache(
   /*tick*/ 64, /*min_size*/ 128, /*max_size*/ 640);
 // or use `gcache::SampledGhostCache<>` to use default SampleShift=5
 ```

diff --git a/include/gcache/ghost_cache.h b/include/gcache/ghost_cache.h
@@ -22,6 +22,7 @@ enum AccessMode : uint8_t {
 /**
  * Simulate a set of page cache.
  */
+template <typename Hash = ghash>
 class GhostCache {
  protected:
   uint32_t tick;
@@ -33,10 +34,10 @@ class GhostCache {
   // Key is block_id/block number
   // Value is "size_idx", which means the handle will in cache if the cache size
   // is ((size_idx + 1) * tick) but not if the cache size is (size_idx * tick).
-  LRUCache<uint32_t, uint32_t, ghash> cache;
+  LRUCache<uint32_t, uint32_t, Hash> cache;
 
-  using Handle_t = LRUCache<uint32_t, uint32_t, ghash>::Handle_t;
-  using Node_t = LRUCache<uint32_t, uint32_t, ghash>::Node_t;
+  using Handle_t = LRUCache<uint32_t, uint32_t, Hash>::Handle_t;
+  using Node_t = LRUCache<uint32_t, uint32_t, Hash>::Node_t;
   // these must be placed after num_ticks to ensure a correct ctor order
   std::vector<Node_t*> size_boundaries;
   std::vector<CacheStat> caches_stat;
@@ -59,7 +60,7 @@ class GhostCache {
     cache.init(max_size);
   }
   void access(uint32_t block_id, AccessMode mode = AccessMode::DEFAULT) {
-    access_impl(block_id, ghash{}(block_id), mode);
+    access_impl(block_id, Hash{}(block_id), mode);
   }
 
   [[nodiscard]] uint32_t get_tick() const { return tick; }
@@ -93,12 +94,12 @@ class GhostCache {
 };
 
 // only sample 1/32 (~3.125%)
-template <uint32_t SampleShift = 5>
-class SampledGhostCache : public GhostCache {
+template <uint32_t SampleShift = 5, typename Hash = ghash>
+class SampledGhostCache : public GhostCache<Hash> {
  public:
   SampledGhostCache(uint32_t tick, uint32_t min_size, uint32_t max_size)
-      : GhostCache(tick >> SampleShift, min_size >> SampleShift,
-                   max_size >> SampleShift) {
+      : GhostCache<Hash>(tick >> SampleShift, min_size >> SampleShift,
+                         max_size >> SampleShift) {
     assert(tick % (1 << SampleShift) == 0);
     assert(min_size % (1 << SampleShift) == 0);
     assert(max_size % (1 << SampleShift) == 0);
@@ -111,13 +112,14 @@ class SampledGhostCache : public GhostCache {
 
   // Only update ghost cache if the first few bits of hash is all zero
   void access(uint32_t block_id, AccessMode mode = AccessMode::DEFAULT) {
-    uint32_t hash = ghash{}(block_id);
-    if ((hash >> (32 - SampleShift)) == 0) access_impl(block_id, hash, mode);
+    uint32_t hash = Hash{}(block_id);
+    if ((hash >> (32 - SampleShift)) == 0)
+      this->access_impl(block_id, hash, mode);
   }
 
-  uint32_t get_tick() const { return tick << SampleShift; }
-  uint32_t get_min_size() const { return min_size << SampleShift; }
-  uint32_t get_max_size() const { return max_size << SampleShift; }
+  uint32_t get_tick() const { return this->tick << SampleShift; }
+  uint32_t get_min_size() const { return this->min_size << SampleShift; }
+  uint32_t get_max_size() const { return this->max_size << SampleShift; }
 
   double get_hit_rate(uint32_t cache_size) const {
     return get_stat(cache_size).get_hit_rate();
@@ -128,20 +130,21 @@ class SampledGhostCache : public GhostCache {
 
   const CacheStat& get_stat(uint32_t cache_size) const {
     cache_size >>= SampleShift;
-    assert(cache_size >= min_size);
-    assert(cache_size <= max_size);
-    assert((cache_size - min_size) % tick == 0);
-    uint32_t size_idx = (cache_size - min_size) / tick;
-    assert(size_idx < num_ticks);
-    return caches_stat[size_idx];
+    assert(cache_size >= this->min_size);
+    assert(cache_size <= this->max_size);
+    assert((cache_size - this->min_size) % this->tick == 0);
+    uint32_t size_idx = (cache_size - this->min_size) / this->tick;
+    assert(size_idx < this->num_ticks);
+    return this->caches_stat[size_idx];
   }
 };
 
 /**
  * When using ghost cache, we assume in_use list is always empty.
  */
-inline void GhostCache::access_impl(uint32_t block_id, uint32_t hash,
-                                    AccessMode mode) {
+template <typename Hash>
+inline void GhostCache<Hash>::access_impl(uint32_t block_id, uint32_t hash,
+                                          AccessMode mode) {
   uint32_t size_idx;
   Handle_t s;  // successor
   Handle_t h = cache.refresh(block_id, hash, s);
@@ -206,7 +209,9 @@ inline void GhostCache::access_impl(uint32_t block_id, uint32_t hash,
   *h = 0;
 }
 
-inline std::ostream& GhostCache::print(std::ostream& os, int indent) const {
+template <typename Hash>
+inline std::ostream& GhostCache<Hash>::print(std::ostream& os,
+                                             int indent) const {
   os << "GhostCache (tick=" << tick << ", min=" << min_size
      << ", max=" << max_size << ", num_ticks=" << num_ticks
      << ", lru_size=" << lru_size << ") {\n";

diff --git a/include/gcache/hash.h b/include/gcache/hash.h
@@ -44,4 +44,16 @@ struct ghash {
   uint32_t operator()(uint32_t x) const noexcept { return crc_u32(x); }
 };
 
+struct idhash {  // identical mapping
+  uint32_t operator()(uint32_t x) const noexcept { return x; }
+};
+
+struct xxhash {
+  uint32_t operator()(uint32_t x) const noexcept { return xxhash_u32(x); }
+};
+
+struct murmurhash {
+  uint32_t operator()(uint32_t x) const noexcept { return murmurhash_u32(x); }
+};
+
 }  // namespace gcache
diff --git a/include/gcache/lru_cache.h b/include/gcache/lru_cache.h
@@ -15,7 +15,9 @@
 
 namespace gcache {
 
+template <typename Hash>
 class GhostCache;
+
 template <typename Tag_t, typename Key_t, typename Value_t, typename Hash>
 class SharedCache;
 
@@ -179,7 +181,9 @@ class LRUCache {
   // Pool for additionaly allocated handles.
   std::vector<Node_t*> extra_pool_;
 
+  template <typename H>
   friend class GhostCache;
+
   template <typename T, typename K, typename V, typename H>
   friend class SharedCache;
 

diff --git a/include/gcache/node.h b/include/gcache/node.h
@@ -29,8 +29,11 @@ namespace gcache {
 
 template <typename Key_t, typename Value_t>
 class NodeTable;
+
 template <typename Key_t, typename Value_t, typename Hash>
 class LRUCache;
+
+template <typename Hash>
 class GhostCache;
 
 // LRUNodes forms a circular doubly linked list ordered by access time.
@@ -43,8 +46,11 @@ class LRUNode {
 
  protected:
   friend class NodeTable<Key_t, Value_t>;
+
   template <typename K, typename V, typename H>
   friend class LRUCache;
+
+  template <typename H>
   friend class GhostCache;
 
  public:
@@ -135,8 +141,11 @@ class LRUHandle : public BaseHandle<LRUNode<Key_t, Value_t>> {
 
  protected:
   friend class NodeTable<Key_t, Value_t>;
+
   template <typename K, typename V, typename H>
   friend class LRUCache;
+
+  template <typename H>
   friend class GhostCache;
 
  public: