Skip to content

Commit add2e5f

Browse files
committed
Per tier pool stats (#70)
1 parent 6abb498 commit add2e5f

12 files changed

+419
-125
lines changed

cachelib/allocator/Cache.cpp

+6-4
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ void CacheBase::updateGlobalCacheStats(const std::string& statPrefix) const {
245245
statPrefix + "cache.size.configured",
246246
memStats.configuredRamCacheSize + memStats.nvmCacheSize);
247247

248+
//TODO: add specific per-tier counters
248249
const auto stats = getGlobalCacheStats();
249250

250251
// Eviction Stats
@@ -254,7 +255,8 @@ void CacheBase::updateGlobalCacheStats(const std::string& statPrefix) const {
254255
// from both ram and nvm, this is counted as a single eviction from cache.
255256
// Ram Evictions: item evicted from ram but it can be inserted into nvm
256257
const std::string ramEvictionKey = statPrefix + "ram.evictions";
257-
counters_.updateDelta(ramEvictionKey, stats.numEvictions);
258+
counters_.updateDelta(ramEvictionKey,
259+
std::accumulate(stats.numEvictions.begin(), stats.numEvictions.end(), 0));
258260
// Nvm Evictions: item evicted from nvm but it can be still in ram
259261
const std::string nvmEvictionKey = statPrefix + "nvm.evictions";
260262
counters_.updateDelta(nvmEvictionKey, stats.numNvmEvictions);
@@ -296,11 +298,11 @@ void CacheBase::updateGlobalCacheStats(const std::string& statPrefix) const {
296298
}
297299

298300
counters_.updateDelta(statPrefix + "cache.alloc_attempts",
299-
stats.allocAttempts);
301+
std::accumulate(stats.allocAttempts.begin(), stats.allocAttempts.end(),0));
300302
counters_.updateDelta(statPrefix + "cache.eviction_attempts",
301-
stats.evictionAttempts);
303+
std::accumulate(stats.evictionAttempts.begin(),stats.evictionAttempts.end(),0));
302304
counters_.updateDelta(statPrefix + "cache.alloc_failures",
303-
stats.allocFailures);
305+
std::accumulate(stats.allocFailures.begin(),stats.allocFailures.end(),0));
304306
counters_.updateDelta(statPrefix + "cache.invalid_allocs",
305307
stats.invalidAllocs);
306308

cachelib/allocator/CacheAllocator-inl.h

+122-34
Original file line numberDiff line numberDiff line change
@@ -417,8 +417,7 @@ CacheAllocator<CacheTrait>::allocateInternalTier(TierId tid,
417417
util::RollingLatencyTracker rollTracker{
418418
(*stats_.classAllocLatency)[tid][pid][cid]};
419419

420-
// TODO: per-tier
421-
(*stats_.allocAttempts)[pid][cid].inc();
420+
(*stats_.allocAttempts)[tid][pid][cid].inc();
422421

423422
void* memory = allocator_[tid]->allocate(pid, requiredSize);
424423

@@ -444,12 +443,12 @@ CacheAllocator<CacheTrait>::allocateInternalTier(TierId tid,
444443
handle = acquire(new (memory) Item(key, size, creationTime, expiryTime));
445444
if (handle) {
446445
handle.markNascent();
447-
(*stats_.fragmentationSize)[pid][cid].add(
446+
(*stats_.fragmentationSize)[tid][pid][cid].add(
448447
util::getFragmentation(*this, *handle));
449448
}
450449

451450
} else { // failed to allocate memory.
452-
(*stats_.allocFailures)[pid][cid].inc(); // TODO: per-tier
451+
(*stats_.allocFailures)[tid][pid][cid].inc();
453452
// wake up rebalancer
454453
if (!config_.poolRebalancerDisableForcedWakeUp && poolRebalancer_) {
455454
poolRebalancer_->wakeUp();
@@ -521,16 +520,14 @@ CacheAllocator<CacheTrait>::allocateChainedItemInternal(
521520
util::RollingLatencyTracker rollTracker{
522521
(*stats_.classAllocLatency)[tid][pid][cid]};
523522

524-
// TODO: per-tier? Right now stats_ are not used in any public periodic
525-
// worker
526-
(*stats_.allocAttempts)[pid][cid].inc();
523+
(*stats_.allocAttempts)[tid][pid][cid].inc();
527524

528525
void* memory = allocator_[tid]->allocate(pid, requiredSize);
529526
if (memory == nullptr) {
530527
memory = findEviction(tid, pid, cid);
531528
}
532529
if (memory == nullptr) {
533-
(*stats_.allocFailures)[pid][cid].inc();
530+
(*stats_.allocFailures)[tid][pid][cid].inc();
534531
return WriteHandle{};
535532
}
536533

@@ -542,7 +539,7 @@ CacheAllocator<CacheTrait>::allocateChainedItemInternal(
542539

543540
if (child) {
544541
child.markNascent();
545-
(*stats_.fragmentationSize)[pid][cid].add(
542+
(*stats_.fragmentationSize)[tid][pid][cid].add(
546543
util::getFragmentation(*this, *child));
547544
}
548545

@@ -857,7 +854,7 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
857854
stats_.perPoolEvictionAgeSecs_[allocInfo.poolId].trackValue(refreshTime);
858855
}
859856

860-
(*stats_.fragmentationSize)[allocInfo.poolId][allocInfo.classId].sub(
857+
(*stats_.fragmentationSize)[tid][allocInfo.poolId][allocInfo.classId].sub(
861858
util::getFragmentation(*this, it));
862859

863860
// Chained items can only end up in this place if the user has allocated
@@ -940,7 +937,7 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
940937

941938
const auto childInfo =
942939
allocator_[tid]->getAllocInfo(static_cast<const void*>(head));
943-
(*stats_.fragmentationSize)[childInfo.poolId][childInfo.classId].sub(
940+
(*stats_.fragmentationSize)[tid][childInfo.poolId][childInfo.classId].sub(
944941
util::getFragmentation(*this, *head));
945942

946943
removeFromMMContainer(*head);
@@ -1582,20 +1579,20 @@ CacheAllocator<CacheTrait>::getNextCandidate(TierId tid,
15821579
auto& mmContainer = getMMContainer(tid, pid, cid);
15831580
bool lastTier = tid+1 >= getNumTiers();
15841581

1585-
mmContainer.withEvictionIterator([this, pid, cid, &candidate, &toRecycle,
1582+
mmContainer.withEvictionIterator([this, tid, pid, cid, &candidate, &toRecycle,
15861583
&searchTries, &mmContainer, &lastTier,
15871584
&token](auto&& itr) {
15881585
if (!itr) {
15891586
++searchTries;
1590-
(*stats_.evictionAttempts)[pid][cid].inc();
1587+
(*stats_.evictionAttempts)[tid][pid][cid].inc();
15911588
return;
15921589
}
15931590

15941591
while ((config_.evictionSearchTries == 0 ||
15951592
config_.evictionSearchTries > searchTries) &&
15961593
itr) {
15971594
++searchTries;
1598-
(*stats_.evictionAttempts)[pid][cid].inc();
1595+
(*stats_.evictionAttempts)[tid][pid][cid].inc();
15991596

16001597
auto* toRecycle_ = itr.get();
16011598
auto* candidate_ =
@@ -1701,6 +1698,7 @@ CacheAllocator<CacheTrait>::getNextCandidate(TierId tid,
17011698
XDCHECK(!candidate->isAccessible());
17021699
XDCHECK(candidate->getKey() == evictedToNext->getKey());
17031700

1701+
(*stats_.numWritebacks)[tid][pid][cid].inc();
17041702
wakeUpWaiters(*candidate, std::move(evictedToNext));
17051703
}
17061704

@@ -1728,9 +1726,9 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
17281726
// NULL. If `ref` == 0 then it means that we are the last holder of
17291727
// that item.
17301728
if (candidate->hasChainedItem()) {
1731-
(*stats_.chainedItemEvictions)[pid][cid].inc();
1729+
(*stats_.chainedItemEvictions)[tid][pid][cid].inc();
17321730
} else {
1733-
(*stats_.regularItemEvictions)[pid][cid].inc();
1731+
(*stats_.regularItemEvictions)[tid][pid][cid].inc();
17341732
}
17351733

17361734
if (auto eventTracker = getEventTracker()) {
@@ -2333,7 +2331,7 @@ bool CacheAllocator<CacheTrait>::recordAccessInMMContainer(Item& item,
23332331
const auto tid = getTierId(item);
23342332
const auto allocInfo =
23352333
allocator_[tid]->getAllocInfo(static_cast<const void*>(&item));
2336-
(*stats_.cacheHits)[allocInfo.poolId][allocInfo.classId].inc();
2334+
(*stats_.cacheHits)[tid][allocInfo.poolId][allocInfo.classId].inc();
23372335

23382336
// track recently accessed items if needed
23392337
if (UNLIKELY(config_.trackRecentItemsForDump)) {
@@ -2802,6 +2800,8 @@ size_t CacheAllocator<CacheTrait>::getPoolSize(PoolId poolId) const {
28022800

28032801
template <typename CacheTrait>
28042802
PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
2803+
//this pool ref is just used to get class ids, which will be the
2804+
//same across tiers
28052805
const auto& pool = allocator_[currentTier()]->getPool(poolId);
28062806
const auto& allocSizes = pool.getAllocSizes();
28072807
auto mpStats = pool.getStats();
@@ -2820,24 +2820,42 @@ PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
28202820
// TODO export evictions, numItems etc from compact cache directly.
28212821
if (!isCompactCache) {
28222822
for (const ClassId cid : classIds) {
2823-
uint64_t classHits = (*stats_.cacheHits)[poolId][cid].get();
2824-
XDCHECK(mmContainers_[currentTier()][poolId][cid],
2825-
folly::sformat("Pid {}, Cid {} not initialized.", poolId, cid));
2823+
uint64_t allocAttempts = 0, evictionAttempts = 0, allocFailures = 0,
2824+
fragmentationSize = 0, classHits = 0, chainedItemEvictions = 0,
2825+
regularItemEvictions = 0, numWritebacks = 0;
2826+
MMContainerStat mmContainerStats;
2827+
for (TierId tid = 0; tid < getNumTiers(); tid++) {
2828+
allocAttempts += (*stats_.allocAttempts)[tid][poolId][cid].get();
2829+
evictionAttempts += (*stats_.evictionAttempts)[tid][poolId][cid].get();
2830+
allocFailures += (*stats_.allocFailures)[tid][poolId][cid].get();
2831+
fragmentationSize += (*stats_.fragmentationSize)[tid][poolId][cid].get();
2832+
classHits += (*stats_.cacheHits)[tid][poolId][cid].get();
2833+
chainedItemEvictions += (*stats_.chainedItemEvictions)[tid][poolId][cid].get();
2834+
regularItemEvictions += (*stats_.regularItemEvictions)[tid][poolId][cid].get();
2835+
numWritebacks += (*stats_.numWritebacks)[tid][poolId][cid].get();
2836+
mmContainerStats += getMMContainerStat(tid, poolId, cid);
2837+
XDCHECK(mmContainers_[tid][poolId][cid],
2838+
folly::sformat("Tid {}, Pid {}, Cid {} not initialized.", tid, poolId, cid));
2839+
}
28262840
cacheStats.insert(
28272841
{cid,
2828-
{allocSizes[cid], (*stats_.allocAttempts)[poolId][cid].get(),
2829-
(*stats_.evictionAttempts)[poolId][cid].get(),
2830-
(*stats_.allocFailures)[poolId][cid].get(),
2831-
(*stats_.fragmentationSize)[poolId][cid].get(), classHits,
2832-
(*stats_.chainedItemEvictions)[poolId][cid].get(),
2833-
(*stats_.regularItemEvictions)[poolId][cid].get(),
2834-
getMMContainerStat(currentTier(), poolId, cid)}});
2842+
{allocSizes[cid],
2843+
allocAttempts,
2844+
evictionAttempts,
2845+
allocFailures,
2846+
fragmentationSize,
2847+
classHits,
2848+
chainedItemEvictions,
2849+
regularItemEvictions,
2850+
numWritebacks,
2851+
mmContainerStats}});
28352852
totalHits += classHits;
28362853
}
28372854
}
28382855

28392856
PoolStats ret;
28402857
ret.isCompactCache = isCompactCache;
2858+
//pool name is also shared among tiers
28412859
ret.poolName = allocator_[currentTier()]->getPoolName(poolId);
28422860
ret.poolSize = pool.getPoolSize();
28432861
ret.poolUsableSize = pool.getPoolUsableSize();
@@ -2850,6 +2868,59 @@ PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
28502868
return ret;
28512869
}
28522870

2871+
template <typename CacheTrait>
2872+
PoolStats CacheAllocator<CacheTrait>::getPoolStats(TierId tid, PoolId poolId) const {
2873+
const auto& pool = allocator_[tid]->getPool(poolId);
2874+
const auto& allocSizes = pool.getAllocSizes();
2875+
auto mpStats = pool.getStats();
2876+
const auto& classIds = mpStats.classIds;
2877+
2878+
// check if this is a compact cache.
2879+
bool isCompactCache = false;
2880+
{
2881+
folly::SharedMutex::ReadHolder lock(compactCachePoolsLock_);
2882+
isCompactCache = isCompactCachePool_[poolId];
2883+
}
2884+
2885+
std::unordered_map<ClassId, CacheStat> cacheStats;
2886+
uint64_t totalHits = 0;
2887+
// cacheStats is only menaningful for pools that are not compact caches.
2888+
// TODO export evictions, numItems etc from compact cache directly.
2889+
if (!isCompactCache) {
2890+
for (const ClassId cid : classIds) {
2891+
uint64_t classHits = (*stats_.cacheHits)[tid][poolId][cid].get();
2892+
XDCHECK(mmContainers_[tid][poolId][cid],
2893+
folly::sformat("Tid {}, Pid {}, Cid {} not initialized.", tid, poolId, cid));
2894+
cacheStats.insert(
2895+
{cid,
2896+
{allocSizes[cid],
2897+
(*stats_.allocAttempts)[tid][poolId][cid].get(),
2898+
(*stats_.evictionAttempts)[tid][poolId][cid].get(),
2899+
(*stats_.allocFailures)[tid][poolId][cid].get(),
2900+
(*stats_.fragmentationSize)[tid][poolId][cid].get(),
2901+
classHits,
2902+
(*stats_.chainedItemEvictions)[tid][poolId][cid].get(),
2903+
(*stats_.regularItemEvictions)[tid][poolId][cid].get(),
2904+
(*stats_.numWritebacks)[tid][poolId][cid].get(),
2905+
getMMContainerStat(tid, poolId, cid)}});
2906+
totalHits += classHits;
2907+
}
2908+
}
2909+
2910+
PoolStats ret;
2911+
ret.isCompactCache = isCompactCache;
2912+
ret.poolName = allocator_[tid]->getPoolName(poolId);
2913+
ret.poolSize = pool.getPoolSize();
2914+
ret.poolUsableSize = pool.getPoolUsableSize();
2915+
ret.poolAdvisedSize = pool.getPoolAdvisedSize();
2916+
ret.cacheStats = std::move(cacheStats);
2917+
ret.mpStats = std::move(mpStats);
2918+
ret.numPoolGetHits = totalHits;
2919+
ret.evictionAgeSecs = stats_.perPoolEvictionAgeSecs_[poolId].estimate();
2920+
2921+
return ret;
2922+
}
2923+
28532924
template <typename CacheTrait>
28542925
ACStats CacheAllocator<CacheTrait>::getACStats(TierId tid,
28552926
PoolId poolId,
@@ -3100,7 +3171,7 @@ bool CacheAllocator<CacheTrait>::moveForSlabRelease(
31003171
const auto allocInfo = allocator_[tid]->getAllocInfo(oldItem.getMemory());
31013172
allocator_[tid]->free(&oldItem);
31023173

3103-
(*stats_.fragmentationSize)[allocInfo.poolId][allocInfo.classId].sub(
3174+
(*stats_.fragmentationSize)[tid][allocInfo.poolId][allocInfo.classId].sub(
31043175
util::getFragmentation(*this, oldItem));
31053176
stats_.numMoveSuccesses.inc();
31063177
return true;
@@ -3379,12 +3450,13 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
33793450
nvmCache_->put(*evicted, std::move(token));
33803451
}
33813452

3453+
const auto tid = getTierId(*evicted);
33823454
const auto allocInfo =
3383-
allocator_[getTierId(*evicted)]->getAllocInfo(static_cast<const void*>(evicted));
3455+
allocator_[tid]->getAllocInfo(static_cast<const void*>(evicted));
33843456
if (evicted->hasChainedItem()) {
3385-
(*stats_.chainedItemEvictions)[allocInfo.poolId][allocInfo.classId].inc();
3457+
(*stats_.chainedItemEvictions)[tid][allocInfo.poolId][allocInfo.classId].inc();
33863458
} else {
3387-
(*stats_.regularItemEvictions)[allocInfo.poolId][allocInfo.classId].inc();
3459+
(*stats_.regularItemEvictions)[tid][allocInfo.poolId][allocInfo.classId].inc();
33883460
}
33893461

33903462
stats_.numEvictionSuccesses.inc();
@@ -3607,8 +3679,13 @@ folly::IOBufQueue CacheAllocator<CacheTrait>::saveStateToIOBuf() {
36073679
for (PoolId pid : pools) {
36083680
for (unsigned int cid = 0; cid < (*stats_.fragmentationSize)[pid].size();
36093681
++cid) {
3682+
uint64_t fragmentationSize = 0;
3683+
for (TierId tid = 0; tid < getNumTiers(); tid++) {
3684+
fragmentationSize += (*stats_.fragmentationSize)[tid][pid][cid].get();
3685+
}
36103686
metadata_.fragmentationSize()[pid][static_cast<ClassId>(cid)] =
3611-
(*stats_.fragmentationSize)[pid][cid].get();
3687+
fragmentationSize;
3688+
36123689
}
36133690
if (isCompactCachePool_[pid]) {
36143691
metadata_.compactCachePools()->push_back(pid);
@@ -3854,8 +3931,19 @@ void CacheAllocator<CacheTrait>::initStats() {
38543931
// deserialize the fragmentation size of each thread.
38553932
for (const auto& pid : *metadata_.fragmentationSize()) {
38563933
for (const auto& cid : pid.second) {
3857-
(*stats_.fragmentationSize)[pid.first][cid.first].set(
3858-
static_cast<uint64_t>(cid.second));
3934+
//in multi-tier we serialized as the sum - no way
3935+
//to get back so just divide the two for now
3936+
//TODO: proper multi-tier serialization
3937+
uint64_t total = static_cast<uint64_t>(cid.second);
3938+
uint64_t part = total / getNumTiers();
3939+
uint64_t sum = 0;
3940+
for (TierId tid = 1; tid < getNumTiers(); tid++) {
3941+
(*stats_.fragmentationSize)[tid][pid.first][cid.first].set(part);
3942+
sum += part;
3943+
}
3944+
uint64_t leftover = total - sum;
3945+
(*stats_.fragmentationSize)[0][pid.first][cid.first].set(leftover);
3946+
38593947
}
38603948
}
38613949

cachelib/allocator/CacheAllocator.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -1240,6 +1240,8 @@ class CacheAllocator : public CacheBase {
12401240

12411241
// pool stats by pool id
12421242
PoolStats getPoolStats(PoolId pid) const override final;
1243+
// pool stats by tier id and pool id
1244+
PoolStats getPoolStats(TierId tid, PoolId pid) const;
12431245

12441246
// This can be expensive so it is not part of PoolStats
12451247
PoolEvictionAgeStats getPoolEvictionAgeStats(
@@ -2045,9 +2047,9 @@ auto& mmContainer = getMMContainer(tid, pid, cid);
20452047
XDCHECK(!candidate->isMarkedForEviction() && !candidate->isMoving());
20462048

20472049
if (candidate->hasChainedItem()) {
2048-
(*stats_.chainedItemEvictions)[pid][cid].inc();
2050+
(*stats_.chainedItemEvictions)[tid][pid][cid].inc();
20492051
} else {
2050-
(*stats_.regularItemEvictions)[pid][cid].inc();
2052+
(*stats_.regularItemEvictions)[tid][pid][cid].inc();
20512053
}
20522054

20532055
// it's safe to recycle the item here as there are no more

0 commit comments

Comments
 (0)