Skip to content

Commit

Permalink
Merge pull request #14 from chenhao-ye/fix-ghost-cache
Browse files Browse the repository at this point in the history
Fix-ghost-cache
  • Loading branch information
chenhao-ye authored Oct 6, 2024
2 parents b032947 + d4b049e commit 022e590
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 92 deletions.
102 changes: 63 additions & 39 deletions include/gcache/ghost_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,16 @@ class GhostCache {
uint32_t min_size;
uint32_t max_size;
uint32_t num_ticks;
uint32_t lru_size; // Number of handles in cache LRU right now

// Key is block_id/block number
// Value is "size_idx", which means the handle will in cache if the cache size
// is ((size_idx + 1) * tick) but not if the cache size is (size_idx * tick).
// Value is "size_idx", which is the least non-negative number such that the
// key will in cache if the cache size is (size_idx * tick) + min_size
LRUCache<uint32_t, uint32_t, Hash> cache;

using Handle_t = typename LRUCache<uint32_t, uint32_t, Hash>::Handle_t;
using Node_t = typename LRUCache<uint32_t, uint32_t, Hash>::Node_t;
// these must be placed after num_ticks to ensure a correct ctor order
std::vector<Node_t*> size_boundaries;
std::vector<Node_t*> boundaries;
std::vector<CacheStat> caches_stat;

void access_impl(uint32_t block_id, uint32_t hash, AccessMode mode);
Expand All @@ -50,15 +49,16 @@ class GhostCache {
min_size(min_size),
max_size(max_size),
num_ticks((max_size - min_size) / tick + 1),
lru_size(0),
cache(),
size_boundaries(num_ticks, nullptr),
boundaries(num_ticks - 1, nullptr),
caches_stat(num_ticks) {
assert(tick > 0);
assert(min_size > 1); // otherwise the first boundary will be LRU evicted
assert(min_size + (num_ticks - 1) * tick == max_size);
assert(num_ticks > 2);
cache.init(max_size);
}

void access(uint32_t block_id, AccessMode mode = AccessMode::DEFAULT) {
access_impl(block_id, Hash{}(block_id), mode);
}
Expand Down Expand Up @@ -87,6 +87,18 @@ class GhostCache {
for (auto& s : caches_stat) s.reset();
}

// For each item in the LRU list, call fn(key) in LRU order
template <typename Fn>
void for_each_lru(Fn&& fn) {
cache.for_each_lru([&fn](Handle_t h) { fn(h.get_key()); });
}

// For each item in the LRU list, call fn(key) in LRU order
template <typename Fn>
void for_each_mru(Fn&& fn) {
cache.for_each_mru([&fn](Handle_t h) { fn(h.get_key()); });
}

std::ostream& print(std::ostream& os, int indent = 0) const;
friend std::ostream& operator<<(std::ostream& os, const GhostCache& c) {
return c.print(os);
Expand Down Expand Up @@ -145,47 +157,61 @@ class SampledGhostCache : public GhostCache<Hash> {
template <typename Hash>
inline void GhostCache<Hash>::access_impl(uint32_t block_id, uint32_t hash,
AccessMode mode) {
uint32_t size_idx;
Handle_t s; // successor
Handle_t h = cache.refresh(block_id, hash, s);
assert(h); // Since there is no handle in use, allocation must never fail.

/**
* To reason through the code below, consider an example where min_size=1,
* max_size=5, tick=2.
* DummyHead <=> A <=> B <=> C <=> D <=> E, where E is MRU and A is LRU.
* size_idx: 2, 1, 1, 0, 0.
* size_boundaries: [1] [0]
* To reason through the code below, consider an example where min_size=3,
* max_size=7, tick=2, num_ticks=3.
* (LRU) (MRU)
* DummyHead <=> A <=> B <=> C <=> D <=> E <=> F <=> G.
* size_idx: 2, 2, 1, 1, 0, 0, 0.
* boundaries: [1] [0]
*
* If B is accessed, the list becomes:
* DummyHead <=> A <=> C <=> D <=> E <=> F <=> G <=> B.
* size_idx: 2, 2, 1, 1, 0, 0, 0.
* idx_changed: ^ ^ ^
* boundaries: [1] [0]
*
* Now B is accessed, so the list becomes:
* DummyHead <=> A <=> C <=> D <=> E <=> B.
* size_idx: 2, 1, 1, 0, 0.
* size_boundaries: [1] [0]
* To get to this state,
* 1) boundaries with size_idx < B should move to its next and increase
* its size_idx
* 2) if B itself is a boundary, set that boundary to B's sucessor.
* If instead C is accessed, so the list becomes:
* DummyHead <=> A <=> B <=> D <=> E <=> F <=> G <=> C.
* size_idx: 2, 2, 1, 1, 0, 0, 0.
* idx_changed: ^ ^
* boundaries: [1] [0]
*
* This means when X is accessed:
* 1) every node at boundary with size_idx < X should increase its size_idx
* and the boundary pointer shall move to its next.
* 2) X's size_idx should be set to 0.
* 3) if X itself is a boundary, set that boundary to X's next (sucessor).
*/
uint32_t size_idx;
if (s) { // No new insertion
size_idx = *h;
if (size_idx < num_ticks && size_boundaries[size_idx] == h.node)
size_boundaries[size_idx] = s.node;
if (size_idx < num_ticks - 1 && boundaries[size_idx] == h.node)
boundaries[size_idx] = s.node;
} else {
assert(lru_size <= max_size);
if (lru_size < max_size) ++lru_size;
if (lru_size <= min_size)
size_idx = 0;
else
size_idx = (lru_size - min_size + tick - 1) / tick;
if (size_idx < num_ticks && lru_size - min_size == size_idx * tick)
size_boundaries[size_idx] = cache.lru_.next;
// New insertion happened may because
// 1) even max_size cannot cache the block
// 2) this block has never been accessed before
// For simplicity, both cases are handled uniformly by treating it as a miss
assert(cache.size() <= max_size);
size_idx = cache.size() > min_size
? (cache.size() - min_size + tick - 1) / tick
: 0;
if (size_idx < num_ticks - 1 && cache.size() == size_idx * tick + min_size)
boundaries[size_idx] = cache.lru_.next;
}
for (uint32_t i = 0; i < size_idx; ++i) {
auto& b = size_boundaries[i];
auto& b = boundaries[i];
if (!b) continue;
b->value++;
b = b->next;
}
*h = 0;

switch (mode) {
case AccessMode::DEFAULT:
if (s) {
Expand All @@ -205,27 +231,25 @@ inline void GhostCache<Hash>::access_impl(uint32_t block_id, uint32_t hash,
case AccessMode::NOOP:
break;
}

*h = 0;
}

template <typename Hash>
inline std::ostream& GhostCache<Hash>::print(std::ostream& os,
int indent) const {
os << "GhostCache (tick=" << tick << ", min=" << min_size
<< ", max=" << max_size << ", num_ticks=" << num_ticks
<< ", lru_size=" << lru_size << ") {\n";
<< ", size=" << cache.size() << ") {\n";

for (int i = 0; i < indent + 1; ++i) os << '\t';
os << "Boundaries: [";
if (size_boundaries[0])
os << size_boundaries[0]->key;
if (boundaries[0])
os << boundaries[0]->key;
else
os << "(null)";
for (uint32_t i = 1; i < num_ticks; ++i) {
for (uint32_t i = 1; i < boundaries.size(); ++i) {
os << ", ";
if (size_boundaries[i])
os << size_boundaries[i]->key;
if (boundaries[i])
os << boundaries[i]->key;
else
os << "(null)";
}
Expand Down
13 changes: 8 additions & 5 deletions include/gcache/stat.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,15 @@ struct CacheStat {
std::ostream& print(std::ostream& os, int width = 0) const {
uint64_t acc_cnt = hit_cnt + miss_cnt;
if (acc_cnt == 0)
return os << " NAN (" << std::setw(width) << std::fixed << hit_cnt
return os << " NAN (" << std::setw(width) << std::fixed << hit_cnt
<< '/' << std::setw(width) << std::fixed << acc_cnt << ')';
return os << std::setw(6) << std::fixed << std::setprecision(2)
<< get_hit_rate() * 100 << "% (" << std::setw(width) << std::fixed
<< hit_cnt << '/' << std::setw(width) << std::fixed << acc_cnt
<< ')';
if (miss_cnt == 0)
os << " 100%";
else
os << std::setw(4) << std::fixed << std::setprecision(1)
<< get_hit_rate() * 100 << '%';
return os << " (" << std::setw(width) << std::fixed << hit_cnt << '/'
<< std::setw(width) << std::fixed << acc_cnt << ')';
}

// print for debugging
Expand Down
Loading

0 comments on commit 022e590

Please sign in to comment.