diff --git a/cachelib/allocator/BackgroundMover.h b/cachelib/allocator/BackgroundMover.h index aee86a4e3..2b921ff56 100644 --- a/cachelib/allocator/BackgroundMover.h +++ b/cachelib/allocator/BackgroundMover.h @@ -16,9 +16,8 @@ #pragma once -#include "cachelib/allocator/BackgroundMoverStrategy.h" +#include "cachelib/allocator/Cache.h" #include "cachelib/allocator/CacheStats.h" -#include "cachelib/common/AtomicCounter.h" #include "cachelib/common/PeriodicWorker.h" namespace facebook::cachelib { @@ -26,83 +25,139 @@ namespace facebook::cachelib { // needed for the cache api template struct BackgroundMoverAPIWrapper { - static size_t traverseAndEvictItems(C& cache, - unsigned int pid, - unsigned int cid, - size_t batch) { - return cache.traverseAndEvictItems(pid, cid, batch); + // traverse the cache and move items from one tier to another + // @param cache the cache interface + // @param tid the tier to traverse + // @param pid the pool id to traverse + // @param cid the class id to traverse + // @param evictionBatch number of items to evict in one go + // @param promotionBatch number of items to promote in one go + // @return pair of number of items evicted and promoted + static std::pair traverseAndMoveItems(C& cache, + TierId tid, + PoolId pid, + ClassId cid, + size_t evictionBatch, + size_t promotionBatch) { + return cache.traverseAndMoveItems(tid, pid, cid, evictionBatch, promotionBatch); } - - static size_t traverseAndPromoteItems(C& cache, - unsigned int pid, - unsigned int cid, - size_t batch) { - return cache.traverseAndPromoteItems(pid, cid, batch); + static std::pair getApproxUsage(C& cache, + TierId tid, + PoolId pid, + ClassId cid) { + const auto& pool = cache.getPoolByTid(pid, tid); + // we wait until all slabs are allocated before we start evicting + if (!pool.allSlabsAllocated()) { + return {0, 0.0}; + } + return pool.getApproxUsage(cid); + } + static unsigned int getNumTiers(C& cache) { + return cache.getNumTiers(); } }; -enum class MoverDir { Evict = 0, Promote }; - // Periodic worker that evicts items from tiers in batches // The primary aim is to reduce insertion times for new items in the // cache template class BackgroundMover : public PeriodicWorker { public: + using ClassBgStatsType = + std::map>; using Cache = CacheT; // @param cache the cache interface - // @param strategy the stragey class that defines how objects are - // moved (promoted vs. evicted and how much) + // @param evictionBatch number of items to evict in one go + // @param promotionBatch number of items to promote in one go + // @param targetFree target free percentage in the class BackgroundMover(Cache& cache, - std::shared_ptr strategy, - MoverDir direction_); + size_t evictionBatch, + size_t promotionBatch, + double targetFree); ~BackgroundMover() override; BackgroundMoverStats getStats() const noexcept; - std::map> getClassStats() const noexcept; + ClassBgStatsType getPerClassStats() const noexcept { return movesPerClass_; } void setAssignedMemory(std::vector&& assignedMemory); // return id of the worker responsible for promoting/evicting from particlar // pool and allocation calss (id is in range [0, numWorkers)) - static size_t workerId(PoolId pid, ClassId cid, size_t numWorkers); + static size_t workerId(TierId tid, PoolId pid, ClassId cid, size_t numWorkers); private: - std::map> movesPerClass_; + struct TraversalStats { + // record a traversal over all assigned classes + // and its time taken + void recordTraversalTime(uint64_t nsTaken); + + uint64_t getAvgTraversalTimeNs(uint64_t numTraversals) const; + uint64_t getMinTraversalTimeNs() const { return minTraversalTimeNs_; } + uint64_t getMaxTraversalTimeNs() const { return maxTraversalTimeNs_; } + uint64_t getLastTraversalTimeNs() const { return lastTraversalTimeNs_; } + + private: + // time it took us the last time to traverse the cache. + uint64_t lastTraversalTimeNs_{0}; + uint64_t minTraversalTimeNs_{std::numeric_limits::max()}; + uint64_t maxTraversalTimeNs_{0}; + uint64_t totalTraversalTimeNs_{0}; + }; + + TraversalStats traversalStats_; // cache allocator's interface for evicting using Item = typename Cache::Item; Cache& cache_; - std::shared_ptr strategy_; - MoverDir direction_; - - std::function moverFunc; + uint8_t numTiers_{1}; // until we have multi-tier support + size_t evictionBatch_{0}; + size_t promotionBatch_{0}; + double targetFree_{0.03}; // implements the actual logic of running the background evictor void work() override final; void checkAndRun(); - AtomicCounter numMovedItems_{0}; - AtomicCounter numTraversals_{0}; - AtomicCounter totalBytesMoved_{0}; + // populates the toFree map for each class with the number of items to free + std::map getNumItemsToFree( + const std::vector& assignedMemory); + + uint64_t numEvictedItems_{0}; + uint64_t numPromotedItems_{0}; + uint64_t numTraversals_{0}; + + ClassBgStatsType movesPerClass_; std::vector assignedMemory_; folly::DistributedMutex mutex_; }; template -BackgroundMover::BackgroundMover( - Cache& cache, - std::shared_ptr strategy, - MoverDir direction) - : cache_(cache), strategy_(strategy), direction_(direction) { - if (direction_ == MoverDir::Evict) { - moverFunc = BackgroundMoverAPIWrapper::traverseAndEvictItems; - - } else if (direction_ == MoverDir::Promote) { - moverFunc = BackgroundMoverAPIWrapper::traverseAndPromoteItems; - } +BackgroundMover::BackgroundMover(Cache& cache, + size_t evictionBatch, + size_t promotionBatch, + double targetFree) + : cache_(cache), + evictionBatch_(evictionBatch), + promotionBatch_(promotionBatch), + targetFree_(targetFree) { + numTiers_ = BackgroundMoverAPIWrapper::getNumTiers(cache_); + } + +template +void BackgroundMover::TraversalStats::recordTraversalTime( + uint64_t nsTaken) { + lastTraversalTimeNs_ = nsTaken; + minTraversalTimeNs_ = std::min(minTraversalTimeNs_, nsTaken); + maxTraversalTimeNs_ = std::max(maxTraversalTimeNs_, nsTaken); + totalTraversalTimeNs_ += nsTaken; +} + +template +uint64_t BackgroundMover::TraversalStats::getAvgTraversalTimeNs( + uint64_t numTraversals) const { + return numTraversals ? totalTraversalTimeNs_ / numTraversals : 0; } template @@ -123,8 +178,8 @@ template void BackgroundMover::setAssignedMemory( std::vector&& assignedMemory) { XLOG(INFO, "Class assigned to background worker:"); - for (auto [pid, cid] : assignedMemory) { - XLOGF(INFO, "Pid: {}, Cid: {}", pid, cid); + for (auto [tid, pid, cid] : assignedMemory) { + XLOGF(INFO, "Tid: {}, Pid: {}, Cid: {}", tid, pid, cid); } mutex_.lock_combine([this, &assignedMemory] { @@ -132,57 +187,97 @@ void BackgroundMover::setAssignedMemory( }); } -// Look for classes that exceed the target memory capacity -// and return those for eviction +template +std::map +BackgroundMover::getNumItemsToFree( + const std::vector& assignedMemory) { + std::map toFree; + for (const auto& md : assignedMemory) { + const auto [tid, pid, cid] = md; + const auto& pool = cache_.getPool(pid); + const auto [activeItems, usage] = + BackgroundMoverAPIWrapper::getApproxUsage(cache_, tid, pid, cid); + if (usage < 1 - targetFree_) { + toFree[md] = 0; + } else { + size_t maxItems = activeItems / usage; + size_t targetItems = maxItems * (1 - targetFree_); + size_t toFreeItems = + activeItems > targetItems ? activeItems - targetItems : 0; + toFree[md] = toFreeItems; + } + } + return toFree; +} + template void BackgroundMover::checkAndRun() { auto assignedMemory = mutex_.lock_combine([this] { return assignedMemory_; }); - - unsigned int moves = 0; - auto batches = strategy_->calculateBatchSizes(cache_, assignedMemory); - - for (size_t i = 0; i < batches.size(); i++) { - const auto [pid, cid] = assignedMemory[i]; - const auto batch = batches[i]; - - if (batch == 0) { - continue; + auto toFree = getNumItemsToFree(assignedMemory); // calculate the number of + // items to free + while (true) { + bool allDone = true; + for (auto md : assignedMemory) { + const auto [tid, pid, cid] = md; + size_t evictionBatch = evictionBatch_; + size_t promotionBatch = 0; // will enable with multi-tier support + if (toFree[md] == 0) { + // no eviction work to be done since there is already at least + // targetFree remaining in the class + evictionBatch = 0; + } else { + allDone = false; // we still have some items to free + } + if (promotionBatch + evictionBatch > 0) { + const auto begin = util::getCurrentTimeNs(); + // try moving BATCH items from the class in order to reach free target + auto moved = BackgroundMoverAPIWrapper::traverseAndMoveItems( + cache_, tid, pid, cid, evictionBatch, promotionBatch); + numEvictedItems_ += moved.first; + toFree[md] > moved.first ? toFree[md] -= moved.first : toFree[md] = 0; + numPromotedItems_ += moved.second; + auto curr = movesPerClass_[md]; + curr.first += moved.first; + curr.second += moved.second; + movesPerClass_[md] = curr; + numTraversals_++; + auto end = util::getCurrentTimeNs(); + traversalStats_.recordTraversalTime(end > begin ? end - begin : 0); + } + } + if (shouldStopWork() || allDone) { + break; } - - // try moving BATCH items from the class in order to reach free target - auto moved = moverFunc(cache_, pid, cid, batch); - moves += moved; - movesPerClass_[pid][cid] += moved; - totalBytesMoved_.add(moved * cache_.getPool(pid).getAllocSizes()[cid]); } - - numTraversals_.inc(); - numMovedItems_.add(moves); } template BackgroundMoverStats BackgroundMover::getStats() const noexcept { BackgroundMoverStats stats; - stats.numMovedItems = numMovedItems_.get(); - stats.runCount = numTraversals_.get(); - stats.totalBytesMoved = totalBytesMoved_.get(); + stats.numEvictedItems = numEvictedItems_; + stats.numPromotedItems = numPromotedItems_; + stats.numTraversals = numTraversals_; + stats.runCount = getRunCount(); + stats.avgItemsMoved = + (double)(stats.numEvictedItems + stats.numPromotedItems) / + (double)numTraversals_; + stats.lastTraversalTimeNs = traversalStats_.getLastTraversalTimeNs(); + stats.avgTraversalTimeNs = + traversalStats_.getAvgTraversalTimeNs(numTraversals_); + stats.minTraversalTimeNs = traversalStats_.getMinTraversalTimeNs(); + stats.maxTraversalTimeNs = traversalStats_.getMaxTraversalTimeNs(); return stats; } template -std::map> -BackgroundMover::getClassStats() const noexcept { - return movesPerClass_; -} - -template -size_t BackgroundMover::workerId(PoolId pid, +size_t BackgroundMover::workerId(TierId tid, + PoolId pid, ClassId cid, size_t numWorkers) { XDCHECK(numWorkers); // TODO: came up with some better sharding (use hashing?) - return (pid + cid) % numWorkers; + return (tid + pid + cid) % numWorkers; } -} // namespace facebook::cachelib +}; // namespace facebook::cachelib diff --git a/cachelib/allocator/BackgroundMoverStrategy.h b/cachelib/allocator/BackgroundMoverStrategy.h deleted file mode 100644 index abf37edd1..000000000 --- a/cachelib/allocator/BackgroundMoverStrategy.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "cachelib/allocator/Cache.h" - -namespace facebook { -namespace cachelib { - -struct MemoryDescriptorType { - MemoryDescriptorType(PoolId pid, ClassId cid) : pid_(pid), cid_(cid) {} - PoolId pid_; - ClassId cid_; -}; - -// Base class for background eviction strategy. -class BackgroundMoverStrategy { - public: - // Calculate how many items should be moved by the background mover - // - // @param cache Cache allocator that implements CacheBase - // @param acVec vector of memory descriptors for which batch sizes should - // be calculated - // - // @return vector of batch sizes, where each element in the vector specifies - // batch size for the memory descriptor in acVec - virtual std::vector calculateBatchSizes( - const CacheBase& cache, std::vector acVec) = 0; - - virtual ~BackgroundMoverStrategy() = default; -}; - -} // namespace cachelib -} // namespace facebook diff --git a/cachelib/allocator/CMakeLists.txt b/cachelib/allocator/CMakeLists.txt index 6103cdc82..f94c8c90c 100644 --- a/cachelib/allocator/CMakeLists.txt +++ b/cachelib/allocator/CMakeLists.txt @@ -35,7 +35,6 @@ add_library (cachelib_allocator CCacheManager.cpp ContainerTypes.cpp FreeMemStrategy.cpp - FreeThresholdStrategy.cpp HitsPerSlabStrategy.cpp LruTailAgeStrategy.cpp MarginalHitsOptimizeStrategy.cpp diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h index e225ba8a0..d5d99800b 100644 --- a/cachelib/allocator/Cache.h +++ b/cachelib/allocator/Cache.h @@ -73,6 +73,25 @@ enum class DestructorContext { kRemovedFromNVM }; +// a tuple that describes the memory pool and allocation class +struct MemoryDescriptorType { + MemoryDescriptorType(TierId tid, PoolId pid, ClassId cid) : + tid_(tid), pid_(pid), cid_(cid) {} + TierId tid_; + PoolId pid_; + ClassId cid_; + + bool operator<(const MemoryDescriptorType& rhs) const { + return std::make_tuple(tid_, pid_, cid_) < + std::make_tuple(rhs.tid_, rhs.pid_, rhs.cid_); + } + + bool operator==(const MemoryDescriptorType& rhs) const { + return std::make_tuple(tid_, pid_, cid_) == + std::make_tuple(rhs.tid_, rhs.pid_, rhs.cid_); + } +}; + // A base class of cache exposing members and status agnostic of template type. class CacheBase { public: @@ -96,12 +115,24 @@ class CacheBase { // @param poolId The pool id to query virtual const MemoryPool& getPool(PoolId poolId) const = 0; + // Get the reference to a memory pool using a tier id, for stats purposes + // + // @param poolId The pool id to query + // @param tierId The tier of the pool id + virtual const MemoryPool& getPoolByTid(PoolId poolId, TierId tid) const = 0; + // Get Pool specific stats (regular pools). This includes stats from the // Memory Pool and also the cache. // // @param poolId the pool id virtual PoolStats getPoolStats(PoolId poolId) const = 0; + // Get Allocation Class specific stats. + // + // @param poolId the pool id + // @param classId the class id + virtual ACStats getACStats(TierId tid,PoolId poolId, ClassId classId) const = 0; + // @param poolId the pool id virtual AllSlabReleaseEvents getAllSlabReleaseEvents(PoolId poolId) const = 0; diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h index 8238ae2fe..1854bad52 100644 --- a/cachelib/allocator/CacheAllocator.h +++ b/cachelib/allocator/CacheAllocator.h @@ -352,6 +352,43 @@ class CacheAllocator : public CacheBase { // if user-supplied SyncObj can fail. e.g. if a lock can timeout. virtual bool isValid() const { return true; } }; + + // For background worker stats + using ClassBgStatsType = + std::map>; + + // Movement (eviction/promotion) related data returned from + // function executed under mmContainer lock + struct MoveData { + MoveData() = delete; + MoveData(Item* candidate_, + Item* toRecycle_, + Item* toRecycleParent_, + bool chainedItem_, + bool expired_, + typename NvmCacheT::PutToken token_, + WriteHandle candidateHandle_) + : candidate(candidate_), + toRecycle(toRecycle_), + toRecycleParent(toRecycleParent_), + expired(expired_), + chainedItem(chainedItem_), + token(std::move(token_)), + candidateHandle(std::move(candidateHandle_)) {} + + // item that is candidate for eviction + Item* candidate; + // acutal alloc that will be recycled + // back up to allocator + Item* toRecycle; + // possible parent ref + Item* toRecycleParent; + bool expired; // is item expired + bool chainedItem; // is it a chained item + typename NvmCacheT::PutToken token; // put token for NVM cache + WriteHandle candidateHandle; // hande in case we don't use moving bit + }; + using ChainedItemMovingSync = std::function(Key)>; using AccessContainer = typename Item::AccessContainer; @@ -363,6 +400,7 @@ class CacheAllocator : public CacheBase { using MMSerializationTypeContainer = typename MMType::SerializationTypeContainer; using AccessSerializationType = typename AccessType::SerializationType; + using AllocatorsSerializationType = serialization::MemoryAllocatorCollection; using ShmManager = facebook::cachelib::ShmManager; @@ -712,10 +750,7 @@ class CacheAllocator : public CacheBase { uint32_t getUsableSize(const Item& item) const; // create memory assignment to bg workers - auto createBgWorkerMemoryAssignments(size_t numWorkers); - - // whether bg worker should be woken - bool shouldWakeupBgEvictor(PoolId pid, ClassId cid); + auto createBgWorkerMemoryAssignments(size_t numWorkers, TierId tid); // Get a random item from memory // This is useful for profiling and sampling cachelib managed memory @@ -835,7 +870,7 @@ class CacheAllocator : public CacheBase { // @param config new config for the pool // // @throw std::invalid_argument if the poolId is invalid - void overridePoolConfig(PoolId pid, const MMConfig& config); + void overridePoolConfig(TierId tid, PoolId pid, const MMConfig& config); // update an existing pool's rebalance strategy // @@ -876,8 +911,9 @@ class CacheAllocator : public CacheBase { // @return true if the operation succeeded. false if the size of the pool is // smaller than _bytes_ // @throw std::invalid_argument if the poolId is invalid. + // TODO: should call shrinkPool for specific tier? bool shrinkPool(PoolId pid, size_t bytes) { - return allocator_->shrinkPool(pid, bytes); + return allocator_[currentTier()]->shrinkPool(pid, bytes); } // grow an existing pool by _bytes_. This will fail if there is no @@ -886,8 +922,9 @@ class CacheAllocator : public CacheBase { // @return true if the pool was grown. false if the necessary number of // bytes were not available. // @throw std::invalid_argument if the poolId is invalid. + // TODO: should call growPool for specific tier? bool growPool(PoolId pid, size_t bytes) { - return allocator_->growPool(pid, bytes); + return allocator_[currentTier()]->growPool(pid, bytes); } // move bytes from one pool to another. The source pool should be at least @@ -900,7 +937,7 @@ class CacheAllocator : public CacheBase { // correct size to do the transfer. // @throw std::invalid_argument if src or dest is invalid pool bool resizePools(PoolId src, PoolId dest, size_t bytes) override { - return allocator_->resizePools(src, dest, bytes); + return allocator_[currentTier()]->resizePools(src, dest, bytes); } // Add a new compact cache with given name and size @@ -1083,25 +1120,12 @@ class CacheAllocator : public CacheBase { bool startNewReaper(std::chrono::milliseconds interval, util::Throttler::Config reaperThrottleConfig); - // start background promoter, starting/stopping of this worker - // should not be done concurrently with addPool - // @param interval the period this worker fires - // @param strategy strategy to promote items - // @param threads number of threads used by the worker - bool startNewBackgroundPromoter( - std::chrono::milliseconds interval, - std::shared_ptr strategy, - size_t threads); - - // start background evictor, starting/stopping of this worker - // should not be done concurrently with addPool - // @param interval the period this worker fires - // @param strategy strategy to evict items - // @param threads number of threads used by the worker - bool startNewBackgroundEvictor( - std::chrono::milliseconds interval, - std::shared_ptr strategy, - size_t threads); + // start background mover + bool startNewBackgroundMover(std::chrono::milliseconds interval, + size_t evictionBatch, + size_t promotionBatch, + double targetFree, + size_t threads); // Stop existing workers with a timeout bool stopPoolRebalancer(std::chrono::seconds timeout = std::chrono::seconds{ @@ -1111,10 +1135,8 @@ class CacheAllocator : public CacheBase { 0}); bool stopMemMonitor(std::chrono::seconds timeout = std::chrono::seconds{0}); bool stopReaper(std::chrono::seconds timeout = std::chrono::seconds{0}); - bool stopBackgroundEvictor( - std::chrono::seconds timeout = std::chrono::seconds{0}); - bool stopBackgroundPromoter( - std::chrono::seconds timeout = std::chrono::seconds{0}); + bool stopBackgroundMover(std::chrono::seconds timeout = std::chrono::seconds{ + 0}); // Set pool optimization to either true or false // @@ -1129,12 +1151,13 @@ class CacheAllocator : public CacheBase { // @throw std::invalid_argument if the memory does not belong to this // cache allocator AllocInfo getAllocInfo(const void* memory) const { - return allocator_->getAllocInfo(memory); + return allocator_[getTierId(memory)]->getAllocInfo(memory); } // return the ids for the set of existing pools in this cache. std::set getPoolIds() const override final { - return allocator_->getPoolIds(); + // all tiers have the same pool ids. TODO: deduplicate + return allocator_[0]->getPoolIds(); } // return a list of pool ids that are backing compact caches. This includes @@ -1146,18 +1169,22 @@ class CacheAllocator : public CacheBase { // return the pool with speicified id. const MemoryPool& getPool(PoolId pid) const override final { - return allocator_->getPool(pid); + return allocator_[currentTier()]->getPool(pid); + } + + const MemoryPool& getPoolByTid(PoolId pid, TierId tid) const override final { + return allocator_[tid]->getPool(pid); } // calculate the number of slabs to be advised/reclaimed in each pool PoolAdviseReclaimData calcNumSlabsToAdviseReclaim() override final { auto regularPoolIds = getRegularPoolIds(); - return allocator_->calcNumSlabsToAdviseReclaim(regularPoolIds); + return allocator_[currentTier()]->calcNumSlabsToAdviseReclaim(regularPoolIds); } // update number of slabs to advise in the cache void updateNumSlabsToAdvise(int32_t numSlabsToAdvise) override final { - allocator_->updateNumSlabsToAdvise(numSlabsToAdvise); + allocator_[currentTier()]->updateNumSlabsToAdvise(numSlabsToAdvise); } // returns a valid PoolId corresponding to the name or kInvalidPoolId if the @@ -1165,8 +1192,9 @@ class CacheAllocator : public CacheBase { PoolId getPoolId(folly::StringPiece name) const noexcept; // returns the pool's name by its poolId. - std::string getPoolName(PoolId poolId) const override { - return allocator_->getPoolName(poolId); + std::string getPoolName(PoolId poolId) const { + // all tiers have the same pool names. + return allocator_[0]->getPoolName(poolId); } // get stats related to all kinds of slab release events. @@ -1199,6 +1227,30 @@ class CacheAllocator : public CacheBase { return stats; } + // returns the background mover stats per thread + std::vector getBackgroundMoverStats() const { + auto stats = std::vector(); + for (auto& bg : backgroundMover_) { + stats.push_back(bg->getStats()); + } + return stats; + } + + ClassBgStatsType getBackgroundMoverClassStats() const { + ClassBgStatsType stats; + auto record = [&](auto& bg) { + // gives a unique descriptor + auto classStats = bg->getPerClassStats(); + for (const auto& [key, value] : classStats) { + stats[key] = value; + } + }; + for (auto& bg : backgroundMover_) { + record(bg); + } + return stats; + } + // returns the pool rebalancer stats RebalancerStats getRebalancerStats() const { auto stats = @@ -1238,6 +1290,9 @@ class CacheAllocator : public CacheBase { // return cache's memory usage stats CacheMemoryStats getCacheMemoryStats() const override final; + // return stats for Allocation Class + ACStats getACStats(TierId tid, PoolId pid, ClassId cid) const override final; + // return the nvm cache stats map util::StatsMap getNvmCacheStatsMap() const override final; @@ -1443,11 +1498,14 @@ class CacheAllocator : public CacheBase { using MMContainerPtr = std::unique_ptr; using MMContainers = - std::array, - MemoryPoolManager::kMaxPools>; + std::vector, + MemoryPoolManager::kMaxPools>>; void createMMContainers(const PoolId pid, MMConfig config); + TierId getTierId(const Item& item) const; + TierId getTierId(const void* ptr) const; + // acquire the MMContainer corresponding to the the Item's class and pool. // // @return pointer to the MMContainer. @@ -1455,7 +1513,12 @@ class CacheAllocator : public CacheBase { // allocation from the memory allocator. MMContainer& getMMContainer(const Item& item) const noexcept; - MMContainer& getMMContainer(PoolId pid, ClassId cid) const noexcept; + MMContainer& getMMContainer(TierId tid, PoolId pid, ClassId cid) const noexcept; + + // Get stats of the specified pid and cid. + // If such mmcontainer is not valid (pool id or cid out of bound) + // or the mmcontainer is not initialized, return an empty stat. + MMContainerStat getMMContainerStat(TierId tid, PoolId pid, ClassId cid) const noexcept; // create a new cache allocation. The allocation can be initialized // appropriately and made accessible through insert or insertOrReplace. @@ -1487,7 +1550,25 @@ class CacheAllocator : public CacheBase { uint32_t size, uint32_t creationTime, uint32_t expiryTime, - bool fromBgThread = false); + bool fromBgThread); + + // create a new cache allocation on specific memory tier. + // For description see allocateInternal. + // + // @param tid id a memory tier + // @param fromBgThread whether this function was called from a bg + // thread - this is used to decide whether bg thread should + // be waken in case there is no free memory + // @param evict whether to evict an item from tier tid in case there + // is not enough memory + WriteHandle allocateInternalTier(TierId tid, + PoolId id, + Key key, + uint32_t size, + uint32_t creationTime, + uint32_t expiryTime, + bool fromBgThread, + bool evict); // Allocate a chained item // @@ -1566,6 +1647,15 @@ class CacheAllocator : public CacheBase { // not exist. FOLLY_ALWAYS_INLINE WriteHandle findFastImpl(Key key, AccessMode mode); + // Moves a regular item to a different memory tier. + // + // @param oldItem Reference to the item being moved + // @param newItemHdl Reference to the handle of the new item being moved into + // + // @return true If the move was completed, and the containers were updated + // successfully. + bool moveRegularItemOnEviction(Item& oldItem, WriteHandle& newItemHdl); + // Moves a regular item to a different slab. This should only be used during // slab release after the item's exclusive bit has been set. The user supplied // callback is responsible for copying the contents and fixing the semantics @@ -1573,10 +1663,15 @@ class CacheAllocator : public CacheBase { // // @param oldItem Reference to the item being moved // @param newItemHdl Reference to the handle of the new item being moved into + // @param skipAddInMMContainer so we can tell if we should add in mmContainer + // or wait + // to do in batch // // @return true If the move was completed, and the containers were updated // successfully. - bool moveRegularItem(Item& oldItem, WriteHandle& newItemHdl); + bool moveRegularItem(Item& oldItem, + WriteHandle& newItemHdl, + bool skipAddInMMContainer = false); // template class for viewAsChainedAllocs that takes either ReadHandle or // WriteHandle @@ -1743,15 +1838,17 @@ class CacheAllocator : public CacheBase { // Implementation to find a suitable eviction from the container. The // two parameters together identify a single container. // + // @param tid the id of the tier to look for evictions inside // @param pid the id of the pool to look for evictions inside // @param cid the id of the class to look for evictions inside // @return An evicted item or nullptr if there is no suitable candidate found // within the configured number of attempts. - Item* findEviction(PoolId pid, ClassId cid); + Item* findEviction(TierId tid, PoolId pid, ClassId cid); // Get next eviction candidate from MMContainer, remove from AccessContainer, // MMContainer and insert into NVMCache if enabled. // + // @param tid the id of the tier to look for evictions inside // @param pid the id of the pool to look for evictions inside // @param cid the id of the class to look for evictions inside // @param searchTries number of search attempts so far. @@ -1759,9 +1856,16 @@ class CacheAllocator : public CacheBase { // @return pair of [candidate, toRecycle]. Pair of null if reached the end of // the eviction queue or no suitable candidate found // within the configured number of attempts - std::pair getNextCandidate(PoolId pid, + std::pair getNextCandidate(TierId tid, + PoolId pid, ClassId cid, unsigned int& searchTries); + // similiar to the above method but returns a batch of evicted items + // as a pair of vectors + std::vector getNextCandidates(TierId tid, + PoolId pid, + ClassId cid, + uint32_t batch); using EvictionIterator = typename MMContainer::LockedIterator; @@ -1790,7 +1894,7 @@ class CacheAllocator : public CacheBase { const typename Item::PtrCompressor& compressor); unsigned int reclaimSlabs(PoolId id, size_t numSlabs) final { - return allocator_->reclaimSlabsAndGrow(id, numSlabs); + return allocator_[currentTier()]->reclaimSlabsAndGrow(id, numSlabs); } FOLLY_ALWAYS_INLINE EventTracker* getEventTracker() const { @@ -1849,7 +1953,7 @@ class CacheAllocator : public CacheBase { const void* hint = nullptr) final; // @param releaseContext slab release context - void releaseSlabImpl(const SlabReleaseContext& releaseContext); + void releaseSlabImpl(TierId tid, const SlabReleaseContext& releaseContext); // @return true when successfully marked as moving, // fasle when this item has already been freed @@ -1892,24 +1996,53 @@ class CacheAllocator : public CacheBase { // primitives. So we consciously exempt ourselves here from TSAN data race // detection. folly::annotate_ignore_thread_sanitizer_guard g(__FILE__, __LINE__); - auto slabsSkipped = allocator_->forEachAllocation(std::forward(f)); + auto slabsSkipped = allocator_[currentTier()]->forEachAllocation(std::forward(f)); stats().numReaperSkippedSlabs.add(slabsSkipped); } // exposed for the background evictor to iterate through the memory and evict // in batch. This should improve insertion path for tiered memory config - size_t traverseAndEvictItems(unsigned int /* pid */, - unsigned int /* cid */, - size_t /* batch */) { - throw std::runtime_error("Not supported yet!"); - } - - // exposed for the background promoter to iterate through the memory and - // promote in batch. This should improve find latency - size_t traverseAndPromoteItems(unsigned int /* pid */, - unsigned int /* cid */, - size_t /* batch */) { - throw std::runtime_error("Not supported yet!"); + // promotion batch only applies to tiered memory config + std::pair traverseAndMoveItems(TierId tid, + PoolId pid, + ClassId cid, + size_t evictionBatch, + size_t promotionBatch) { + auto& mmContainer = getMMContainer(tid, pid, cid); + uint32_t currItems = mmContainer.size(); + if (currItems < evictionBatch) { + evictionBatch = currItems; + } + if (evictionBatch == 0) { + return {0, 0}; + } + auto evictionData = getNextCandidates(tid, pid, cid, evictionBatch); + // we now have a list of candidates and toRecycles, they should go back + // to the allocator and we will do this in batch to avoid AC lock contention + // note - for chained items - we can't do this in bulk + std::vector chainedIdx; + std::vector toRecycles; + size_t idx = 0; + for (auto& data : evictionData) { + if (data.chainedItem) { + XDCHECK(data.candidate->hasChainedItem()); + chainedIdx.push_back(idx); + } else { + XDCHECK_EQ(data.candidate, data.toRecycle); + toRecycles.push_back(data.candidate); + } + idx++; + } + for (int i = 0; i < chainedIdx.size(); i++) { + auto& data = evictionData[chainedIdx[i]]; + releaseBackToAllocator(*data.candidate, RemoveContext::kNormal, false, + data.toRecycle); + (*stats_.chainedItemEvictions)[pid][cid].inc(); + } + allocator_[tid]->freeBatch(toRecycles.begin(), toRecycles.end(), pid, cid); + size_t evictions = toRecycles.size(); + (*stats_.regularItemEvictions)[pid][cid].add(evictions); + return {evictions, 0}; } // returns true if nvmcache is enabled and we should write this item to @@ -1952,10 +2085,10 @@ class CacheAllocator : public CacheBase { std::unique_ptr& worker, std::chrono::seconds timeout = std::chrono::seconds{0}); - ShmSegmentOpts createShmCacheOpts(); - std::unique_ptr createNewMemoryAllocator(); - std::unique_ptr restoreMemoryAllocator(); - std::unique_ptr restoreCCacheManager(); + ShmSegmentOpts createShmCacheOpts(TierId tid); + std::unique_ptr createNewMemoryAllocator(TierId tid); + std::unique_ptr restoreMemoryAllocator(TierId tid, const serialization::MemoryAllocatorObject& sAllocator); + std::unique_ptr restoreCCacheManager(TierId tid); PoolIds filterCompactCachePools(const PoolIds& poolIds) const; @@ -1996,8 +2129,7 @@ class CacheAllocator : public CacheBase { } typename Item::PtrCompressor createPtrCompressor() const { - return allocator_ - ->createPtrCompressor(); + return typename Item::PtrCompressor{allocator_}; } // helper utility to throttle and optionally log. @@ -2020,9 +2152,14 @@ class CacheAllocator : public CacheBase { // @param type the type of initialization // @return nullptr if the type is invalid - // @return pointer to memory allocator + // @return vector of pointers to memory allocator // @throw std::runtime_error if type is invalid - std::unique_ptr initAllocator(InitMemType type); + std::vector> initAllocator(InitMemType type); + + std::vector> createPrivateAllocator(); + std::vector> createAllocators(); + std::vector> restoreAllocators(); + // @param type the type of initialization // @return nullptr if the type is invalid // @return pointer to access container @@ -2087,44 +2224,6 @@ class CacheAllocator : public CacheBase { : false; } - // returns the background mover stats - BackgroundMoverStats getBackgroundMoverStats(MoverDir direction) const { - auto stats = BackgroundMoverStats{}; - if (direction == MoverDir::Evict) { - for (auto& bg : backgroundEvictor_) - stats += bg->getStats(); - } else if (direction == MoverDir::Promote) { - for (auto& bg : backgroundPromoter_) - stats += bg->getStats(); - } - return stats; - } - - std::map> getBackgroundMoverClassStats( - MoverDir direction) const { - std::map> stats; - - if (direction == MoverDir::Evict) { - for (auto& bg : backgroundEvictor_) { - for (auto& pid : bg->getClassStats()) { - for (auto& cid : pid.second) { - stats[pid.first][cid.first] += cid.second; - } - } - } - } else if (direction == MoverDir::Promote) { - for (auto& bg : backgroundPromoter_) { - for (auto& pid : bg->getClassStats()) { - for (auto& cid : pid.second) { - stats[pid.first][cid.first] += cid.second; - } - } - } - } - - return stats; - } - bool tryGetHandleWithWaitContextForMovingItem(Item& item, WriteHandle& handle); @@ -2207,6 +2306,19 @@ class CacheAllocator : public CacheBase { // BEGIN private members + TierId currentTier() const { + // TODO: every function which calls this method should be refactored. + // We should go case by case and either make such function work on + // all tiers or expose separate parameter to describe the tier ID. + return 0; + } + + unsigned getNumTiers() const { + return config_.memoryTierConfigs.size(); + } + + size_t memoryTierSize(TierId tid) const; + // Whether the memory allocator for this cache allocator was created on shared // memory. The hash table, chained item hash table etc is also created on // shared memory except for temporary shared memory mode when they're created @@ -2232,9 +2344,10 @@ class CacheAllocator : public CacheBase { const MMConfig mmConfig_{}; // the memory allocator for allocating out of the available memory. - std::unique_ptr allocator_; + std::vector> allocator_; // compact cache allocator manager + // TODO: per tier? std::unique_ptr compactCacheManager_; // compact cache instances reside here when user "add" or "attach" compact @@ -2285,9 +2398,8 @@ class CacheAllocator : public CacheBase { // free memory monitor std::unique_ptr memMonitor_; - // background evictor - std::vector>> backgroundEvictor_; - std::vector>> backgroundPromoter_; + // background data movement, for single tier, this just evicts + std::vector>> backgroundMover_; // check whether a pool is a slabs pool std::array isCompactCachePool_{}; @@ -2441,12 +2553,12 @@ CacheAllocator::CacheAllocator( : serialization::CacheAllocatorMetadata{}}, allocator_(initAllocator(type)), compactCacheManager_(type != InitMemType::kMemAttach - ? std::make_unique(*allocator_) - : restoreCCacheManager()), + ? std::make_unique(*allocator_[0] /* TODO: per tier */) + : restoreCCacheManager(0/* TODO: per tier */)), compressor_(createPtrCompressor()), mmContainers_(type == InitMemType::kMemAttach ? deserializeMMContainers(*deserializer_, compressor_) - : MMContainers{}), + : MMContainers{getNumTiers()}), accessContainer_(initAccessContainer( type, detail::kShmHashTableName, config.accessConfig)), chainedItemAccessContainer_( @@ -2481,48 +2593,102 @@ CacheAllocator::~CacheAllocator() { } template -ShmSegmentOpts CacheAllocator::createShmCacheOpts() { +ShmSegmentOpts CacheAllocator::createShmCacheOpts(TierId tid) { ShmSegmentOpts opts; opts.alignment = sizeof(Slab); // TODO: we support single tier so far - if (config_.memoryTierConfigs.size() > 1) { - throw std::invalid_argument("CacheLib only supports a single memory tier"); + if (config_.memoryTierConfigs.size() > 2) { + throw std::invalid_argument("CacheLib only supports two memory tiers"); } - opts.memBindNumaNodes = config_.memoryTierConfigs[0].getMemBind(); + opts.memBindNumaNodes = config_.memoryTierConfigs[tid].getMemBind(); return opts; } +template +size_t CacheAllocator::memoryTierSize(TierId tid) const { + auto partitions = std::accumulate(config_.memoryTierConfigs.begin(), config_.memoryTierConfigs.end(), 0UL, + [](const size_t i, const MemoryTierCacheConfig& config){ + return i + config.getRatio(); + }); + + return config_.memoryTierConfigs[tid].calculateTierSize(config_.getCacheSize(), partitions); +} + +template +std::vector> +CacheAllocator::createPrivateAllocator() { + std::vector> allocators; + + if (isOnShm_) { + allocators.emplace_back(std::make_unique( + getAllocatorConfig(config_), + tempShm_->getAddr(), + config_.getCacheSize())); + } else { + allocators.emplace_back(std::make_unique( + getAllocatorConfig(config_), + config_.getCacheSize())); + } + + return allocators; +} + template std::unique_ptr -CacheAllocator::createNewMemoryAllocator() { +CacheAllocator::createNewMemoryAllocator(TierId tid) { + size_t tierSize = memoryTierSize(tid); return std::make_unique( getAllocatorConfig(config_), shmManager_ - ->createShm(detail::kShmCacheName, config_.getCacheSize(), - config_.slabMemoryBaseAddr, createShmCacheOpts()) + ->createShm(detail::kShmCacheName + std::to_string(tid), + tierSize, config_.slabMemoryBaseAddr, + createShmCacheOpts(tid)) .addr, - config_.getCacheSize()); + tierSize); } template std::unique_ptr -CacheAllocator::restoreMemoryAllocator() { +CacheAllocator::restoreMemoryAllocator(TierId tid, + const serialization::MemoryAllocatorObject& sAllocator) { return std::make_unique( - deserializer_->deserialize(), + sAllocator, shmManager_ - ->attachShm(detail::kShmCacheName, config_.slabMemoryBaseAddr, - createShmCacheOpts()) - .addr, - config_.getCacheSize(), + ->attachShm(detail::kShmCacheName + std::to_string(tid), + config_.slabMemoryBaseAddr, createShmCacheOpts(tid)).addr, + memoryTierSize(tid), config_.disableFullCoredump); } +template +std::vector> +CacheAllocator::createAllocators() { + std::vector> allocators; + for (int tid = 0; tid < getNumTiers(); tid++) { + allocators.emplace_back(createNewMemoryAllocator(tid)); + } + return allocators; +} + +template +std::vector> +CacheAllocator::restoreAllocators() { + std::vector> allocators; + const auto allocatorCollection = + deserializer_->deserialize(); + auto allocMap = *allocatorCollection.allocators(); + for (int tid = 0; tid < getNumTiers(); tid++) { + allocators.emplace_back(restoreMemoryAllocator(tid,allocMap[tid])); + } + return allocators; +} + template std::unique_ptr -CacheAllocator::restoreCCacheManager() { +CacheAllocator::restoreCCacheManager(TierId tid) { return std::make_unique( deserializer_->deserialize(), - *allocator_); + *allocator_[tid]); } template @@ -2612,35 +2778,25 @@ void CacheAllocator::initWorkers() { config_.ccacheOptimizeStepSizePercent); } - if (config_.backgroundEvictorEnabled()) { - startNewBackgroundEvictor(config_.backgroundEvictorInterval, - config_.backgroundEvictorStrategy, - config_.backgroundEvictorThreads); - } - - if (config_.backgroundPromoterEnabled()) { - startNewBackgroundPromoter(config_.backgroundPromoterInterval, - config_.backgroundPromoterStrategy, - config_.backgroundPromoterThreads); + if (config_.backgroundMoverEnabled()) { + startNewBackgroundMover(config_.backgroundMoverInterval, + config_.backgroundEvictionBatch, + config_.backgroundPromotionBatch, + config_.backgroundTargetFree, + config_.backgroundMoverThreads); } } template -std::unique_ptr CacheAllocator::initAllocator( +std::vector> +CacheAllocator::initAllocator( InitMemType type) { if (type == InitMemType::kNone) { - if (isOnShm_ == true) { - return std::make_unique(getAllocatorConfig(config_), - tempShm_->getAddr(), - config_.getCacheSize()); - } else { - return std::make_unique(getAllocatorConfig(config_), - config_.getCacheSize()); - } + return createPrivateAllocator(); } else if (type == InitMemType::kMemNew) { - return createNewMemoryAllocator(); + return createAllocators(); } else if (type == InitMemType::kMemAttach) { - return restoreMemoryAllocator(); + return restoreAllocators(); } // Invalid type @@ -2704,23 +2860,19 @@ CacheAllocator::allocate(PoolId poolId, creationTime = util::getCurrentTimeSec(); } return allocateInternal(poolId, key, size, creationTime, - ttlSecs == 0 ? 0 : creationTime + ttlSecs); -} - -template -bool CacheAllocator::shouldWakeupBgEvictor(PoolId /* pid */, - ClassId /* cid */) { - return false; + ttlSecs == 0 ? 0 : creationTime + ttlSecs, false); } template typename CacheAllocator::WriteHandle -CacheAllocator::allocateInternal(PoolId pid, - typename Item::Key key, - uint32_t size, - uint32_t creationTime, - uint32_t expiryTime, - bool fromBgThread) { +CacheAllocator::allocateInternalTier(TierId tid, + PoolId pid, + typename Item::Key key, + uint32_t size, + uint32_t creationTime, + uint32_t expiryTime, + bool fromBgThread, + bool evict) { util::LatencyTracker tracker{stats().allocateLatency_}; SCOPE_FAIL { stats_.invalidAllocs.inc(); }; @@ -2729,21 +2881,18 @@ CacheAllocator::allocateInternal(PoolId pid, const auto requiredSize = Item::getRequiredSize(key, size); // the allocation class in our memory allocator. - const auto cid = allocator_->getAllocationClassId(pid, requiredSize); + const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize); + // TODO: per-tier (*stats_.allocAttempts)[pid][cid].inc(); - void* memory = allocator_->allocate(pid, requiredSize); - - if (backgroundEvictor_.size() && !fromBgThread && - (memory == nullptr || shouldWakeupBgEvictor(pid, cid))) { - backgroundEvictor_[BackgroundMover::workerId( - pid, cid, backgroundEvictor_.size())] - ->wakeUp(); - } + void* memory = allocator_[tid]->allocate(pid, requiredSize); if (memory == nullptr) { - memory = findEviction(pid, cid); + if (!evict) { + return {}; + } + memory = findEviction(tid, pid, cid); } WriteHandle handle; @@ -2754,7 +2903,7 @@ CacheAllocator::allocateInternal(PoolId pid, // for example. SCOPE_FAIL { // free back the memory to the allocator since we failed. - allocator_->free(memory); + allocator_[tid]->free(memory); }; handle = acquire(new (memory) Item(key, size, creationTime, expiryTime)); @@ -2765,7 +2914,7 @@ CacheAllocator::allocateInternal(PoolId pid, } } else { // failed to allocate memory. - (*stats_.allocFailures)[pid][cid].inc(); + (*stats_.allocFailures)[pid][cid].inc(); // TODO: per-tier // wake up rebalancer if (!config_.poolRebalancerDisableForcedWakeUp && poolRebalancer_) { poolRebalancer_->wakeUp(); @@ -2782,6 +2931,24 @@ CacheAllocator::allocateInternal(PoolId pid, return handle; } +template +typename CacheAllocator::WriteHandle +CacheAllocator::allocateInternal(PoolId pid, + typename Item::Key key, + uint32_t size, + uint32_t creationTime, + uint32_t expiryTime, + bool fromBgThread) { + auto tid = 0; /* TODO: consult admission policy */ + for(TierId tid = 0; tid < getNumTiers(); ++tid) { + bool evict = !config_.insertToFirstFreeTier || tid == getNumTiers() - 1; + auto handle = allocateInternalTier(tid, pid, key, size, creationTime, + expiryTime, fromBgThread, evict); + if (handle) return handle; + } + return {}; +} + template typename CacheAllocator::WriteHandle CacheAllocator::allocateChainedItem(const ReadHandle& parent, @@ -2811,22 +2978,30 @@ CacheAllocator::allocateChainedItemInternal(const Item& parent, // number of bytes required for this item const auto requiredSize = ChainedItem::getRequiredSize(size); - - const auto pid = allocator_->getAllocInfo(parent.getMemory()).poolId; - const auto cid = allocator_->getAllocationClassId(pid, requiredSize); - + + // this is correct for now as we can + // assume the parent and chained item + // will reside in the same tier until + // they are moved + auto tid = getTierId(parent); + + const auto pid = allocator_[tid]->getAllocInfo(parent.getMemory()).poolId; + const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize); + + // TODO: per-tier? Right now stats_ are not used in any public periodic + // worker (*stats_.allocAttempts)[pid][cid].inc(); - void* memory = allocator_->allocate(pid, requiredSize); + void* memory = allocator_[tid]->allocate(pid, requiredSize); if (memory == nullptr) { - memory = findEviction(pid, cid); + memory = findEviction(tid, pid, cid); } if (memory == nullptr) { (*stats_.allocFailures)[pid][cid].inc(); return WriteHandle{}; } - SCOPE_FAIL { allocator_->free(memory); }; + SCOPE_FAIL { allocator_[tid]->free(memory); }; auto child = acquire(new (memory) ChainedItem( compressor_.compress(&parent), size, util::getCurrentTimeSec())); @@ -3160,8 +3335,8 @@ CacheAllocator::releaseBackToAllocator(Item& it, throw std::runtime_error( folly::sformat("cannot release this item: {}", it.toString())); } - - const auto allocInfo = allocator_->getAllocInfo(it.getMemory()); + const auto tid = getTierId(it); + const auto allocInfo = allocator_[tid]->getAllocInfo(it.getMemory()); if (ctx == RemoveContext::kEviction) { const auto timeNow = util::getCurrentTimeSec(); @@ -3185,8 +3360,7 @@ CacheAllocator::releaseBackToAllocator(Item& it, folly::sformat("Can not recycle a chained item {}, toRecyle", it.toString(), toRecycle->toString())); } - - allocator_->free(&it); + allocator_[tid]->free(&it); return ReleaseRes::kReleased; } @@ -3255,7 +3429,7 @@ CacheAllocator::releaseBackToAllocator(Item& it, auto next = head->getNext(compressor_); const auto childInfo = - allocator_->getAllocInfo(static_cast(head)); + allocator_[tid]->getAllocInfo(static_cast(head)); (*stats_.fragmentationSize)[childInfo.poolId][childInfo.classId].sub( util::getFragmentation(*this, *head)); @@ -3271,7 +3445,7 @@ CacheAllocator::releaseBackToAllocator(Item& it, XDCHECK(ReleaseRes::kReleased != res); res = ReleaseRes::kRecycled; } else { - allocator_->free(head); + allocator_[tid]->free(head); } stats_.numChainedChildItems.dec(); @@ -3285,7 +3459,7 @@ CacheAllocator::releaseBackToAllocator(Item& it, res = ReleaseRes::kRecycled; } else { XDCHECK(it.isDrained()); - allocator_->free(&it); + allocator_[tid]->free(&it); } return res; @@ -3575,7 +3749,8 @@ void CacheAllocator::wakeUpWaiters(folly::StringPiece key, template bool CacheAllocator::moveRegularItem(Item& oldItem, - WriteHandle& newItemHdl) { + WriteHandle& newItemHdl, + bool skipAddInMMContainer) { XDCHECK(oldItem.isMoving()); // If an item is expired, proceed to eviction. if (oldItem.isExpired()) { @@ -3605,8 +3780,12 @@ bool CacheAllocator::moveRegularItem(Item& oldItem, // Adding the item to mmContainer has to succeed since no one can remove the // item auto& newContainer = getMMContainer(*newItemHdl); - auto mmContainerAdded = newContainer.add(*newItemHdl); - XDCHECK(mmContainerAdded); + if (!skipAddInMMContainer) { + // Adding the item to mmContainer has to succeed since no one can remove the + // item + auto mmContainerAdded = newContainer.add(*newItemHdl); + XDCHECK(mmContainerAdded); + } if (oldItem.hasChainedItem()) { XDCHECK(!newItemHdl->hasChainedItem()) << newItemHdl->toString(); @@ -3700,13 +3879,14 @@ void CacheAllocator::unlinkItemForEviction(Item& it) { template std::pair::Item*, typename CacheAllocator::Item*> -CacheAllocator::getNextCandidate(PoolId pid, +CacheAllocator::getNextCandidate(TierId tid, + PoolId pid, ClassId cid, unsigned int& searchTries) { typename NvmCacheT::PutToken token; Item* toRecycle = nullptr; Item* candidate = nullptr; - auto& mmContainer = getMMContainer(pid, cid); + auto& mmContainer = getMMContainer(tid, pid, cid); mmContainer.withEvictionIterator([this, pid, cid, &candidate, &toRecycle, &searchTries, &mmContainer, @@ -3808,15 +3988,127 @@ CacheAllocator::getNextCandidate(PoolId pid, return {candidate, toRecycle}; } +// Used for the background movers to get a batch of items +// to move/evict +template +std::vector::MoveData> +CacheAllocator::getNextCandidates(TierId tid, + PoolId pid, + ClassId cid, + uint32_t batch) { + std::vector evictionData; + evictionData.reserve(batch); + + auto& mmContainer = getMMContainer(tid, pid, cid); + unsigned int maxSearchTries = + std::max(config_.evictionSearchTries, batch * 2); + + mmContainer.withEvictionIterator([this, tid, pid, cid, batch, maxSearchTries, + &evictionData, &mmContainer](auto&& itr) { + unsigned int searchTries = 0; + if (!itr) { + ++searchTries; + (*stats_.evictionAttempts)[pid][cid].inc(); + return; + } + + while ((config_.evictionSearchTries == 0 || maxSearchTries > searchTries) && + itr && evictionData.size() < batch) { + ++searchTries; + (*stats_.evictionAttempts)[pid][cid].inc(); + + auto* toRecycle_ = itr.get(); + bool isChained_ = toRecycle_->isChainedItem(); + auto* candidate_ = + isChained_ ? &toRecycle_->asChainedItem().getParentItem(compressor_) + : toRecycle_; + + typename NvmCacheT::PutToken putToken{}; + const bool evictToNvmCache = shouldWriteToNvmCache(*candidate_); + + auto markForEviction = [&candidate_, this]() { + auto markedForEviction = candidate_->markForEviction(); + if (!markedForEviction) { + if (candidate_->hasChainedItem()) { + stats_.evictFailParentAC.inc(); + } else { + stats_.evictFailAC.inc(); + } + return false; + } + return true; + }; + + if (evictToNvmCache) { + auto putTokenRv = nvmCache_->createPutToken( + candidate_->getKey(), + [&markForEviction]() { return markForEviction(); }); + + if (!putTokenRv) { + switch (putTokenRv.error()) { + case InFlightPuts::PutTokenError::TRY_LOCK_FAIL: + stats_.evictFailPutTokenLock.inc(); + break; + case InFlightPuts::PutTokenError::TOKEN_EXISTS: + stats_.evictFailConcurrentFill.inc(); + break; + case InFlightPuts::PutTokenError::CALLBACK_FAILED: + stats_.evictFailConcurrentAccess.inc(); + break; + } + ++itr; + continue; + } + putToken = std::move(*putTokenRv); + XDCHECK(putToken.isValid()); + } else { + if (!markForEviction()) { + ++itr; + continue; + } + } + + // markForEviction to make sure no other thead is evicting the item + // nor holding a handle to that item + + // Check if parent changed for chained items - if yes, we cannot + // remove the child from the mmContainer as we will not be evicting + // it. We could abort right here, but we need to cleanup in case + // unmarkForEviction() returns 0 - so just go through normal path. + if (!toRecycle_->isChainedItem() || + &toRecycle_->asChainedItem().getParentItem(compressor_) == + candidate_) { + mmContainer.remove(itr); + MoveData moveData(candidate_, toRecycle_, nullptr, + isChained_ || candidate_->hasChainedItem(), + candidate_->isExpired(), + std::move(putToken), nullptr); + evictionData.push_back(std::move(moveData)); + } + } + }); + + for (auto& moveData : evictionData) { + Item* candidate = moveData.candidate; + unlinkItemForEviction(*moveData.candidate); + if (moveData.token.isValid() && + shouldWriteToNvmCacheExclusive(*moveData.candidate)) { + nvmCache_->put(*moveData.candidate, std::move(moveData.token)); + } + } + + return evictionData; +} + template typename CacheAllocator::Item* -CacheAllocator::findEviction(PoolId pid, ClassId cid) { +CacheAllocator::findEviction(TierId tid, PoolId pid, ClassId cid) { // Keep searching for a candidate until we were able to evict it // or until the search limit has been exhausted unsigned int searchTries = 0; while (config_.evictionSearchTries == 0 || config_.evictionSearchTries > searchTries) { - auto [candidate, toRecycle] = getNextCandidate(pid, cid, searchTries); + auto [candidate, toRecycle] = getNextCandidate(tid, pid, cid, searchTries); // Reached the end of the eviction queue but doulen't find a candidate, // start again. @@ -4103,21 +4395,57 @@ void CacheAllocator::invalidateNvm(Item& item) { } } +template +TierId +CacheAllocator::getTierId(const Item& item) const { + return getTierId(item.getMemory()); +} + +template +TierId +CacheAllocator::getTierId(const void* ptr) const { + for (TierId tid = 0; tid < getNumTiers(); tid++) { + if (allocator_[tid]->isMemoryInAllocator(ptr)) + return tid; + } + + throw std::invalid_argument("Item does not belong to any tier!"); +} + template typename CacheAllocator::MMContainer& CacheAllocator::getMMContainer(const Item& item) const noexcept { + const auto tid = getTierId(item); const auto allocInfo = - allocator_->getAllocInfo(static_cast(&item)); - return getMMContainer(allocInfo.poolId, allocInfo.classId); + allocator_[tid]->getAllocInfo(static_cast(&item)); + return getMMContainer(tid, allocInfo.poolId, allocInfo.classId); } template typename CacheAllocator::MMContainer& -CacheAllocator::getMMContainer(PoolId pid, +CacheAllocator::getMMContainer(TierId tid, + PoolId pid, ClassId cid) const noexcept { - XDCHECK_LT(static_cast(pid), mmContainers_.size()); - XDCHECK_LT(static_cast(cid), mmContainers_[pid].size()); - return *mmContainers_[pid][cid]; + XDCHECK_LT(static_cast(tid), mmContainers_.size()); + XDCHECK_LT(static_cast(pid), mmContainers_[tid].size()); + XDCHECK_LT(static_cast(cid), mmContainers_[tid][pid].size()); + return *mmContainers_[tid][pid][cid]; +} + +template +MMContainerStat CacheAllocator::getMMContainerStat( + TierId tid, PoolId pid, ClassId cid) const noexcept { + if(static_cast(tid) >= mmContainers_.size()) { + return MMContainerStat{}; + } + if (static_cast(pid) >= mmContainers_[tid].size()) { + return MMContainerStat{}; + } + if (static_cast(cid) >= mmContainers_[tid][pid].size()) { + return MMContainerStat{}; + } + return mmContainers_[tid][pid][cid] ? mmContainers_[tid][pid][cid]->getStats() + : MMContainerStat{}; } template @@ -4306,8 +4634,9 @@ void CacheAllocator::markUseful(const ReadHandle& handle, template bool CacheAllocator::recordAccessInMMContainer(Item& item, AccessMode mode) { + const auto tid = getTierId(item); const auto allocInfo = - allocator_->getAllocInfo(static_cast(&item)); + allocator_[tid]->getAllocInfo(static_cast(&item)); (*stats_.cacheHits)[allocInfo.poolId][allocInfo.classId].inc(); // track recently accessed items if needed @@ -4315,14 +4644,15 @@ bool CacheAllocator::recordAccessInMMContainer(Item& item, ring_->trackItem(reinterpret_cast(&item), item.getSize()); } - auto& mmContainer = getMMContainer(allocInfo.poolId, allocInfo.classId); + auto& mmContainer = getMMContainer(tid, allocInfo.poolId, allocInfo.classId); return mmContainer.recordAccess(item, mode); } template uint32_t CacheAllocator::getUsableSize(const Item& item) const { + const auto tid = getTierId(item); const auto allocSize = - allocator_->getAllocInfo(static_cast(&item)).allocSize; + allocator_[tid]->getAllocInfo(static_cast(&item)).allocSize; return item.isChainedItem() ? allocSize - ChainedItem::getRequiredSize(0) : allocSize - Item::getRequiredSize(item.getKey(), 0); @@ -4331,8 +4661,9 @@ uint32_t CacheAllocator::getUsableSize(const Item& item) const { template typename CacheAllocator::SampleItem CacheAllocator::getSampleItem() { + auto tid = folly::Random::rand32() % getNumTiers(); size_t nvmCacheSize = nvmCache_ ? nvmCache_->getUsableSize() : 0; - size_t ramCacheSize = allocator_->getMemorySizeInclAdvised(); + size_t ramCacheSize = allocator_[tid]->getMemorySizeInclAdvised(); bool fromNvm = folly::Random::rand64(0, nvmCacheSize + ramCacheSize) >= ramCacheSize; @@ -4341,19 +4672,18 @@ CacheAllocator::getSampleItem() { } // Sampling from DRAM cache - auto item = reinterpret_cast(allocator_->getRandomAlloc()); + auto item = reinterpret_cast(allocator_[tid]->getRandomAlloc()); if (!item || UNLIKELY(item->isExpired())) { return SampleItem{false /* fromNvm */}; } // Check that item returned is the same that was sampled - auto sharedHdl = std::make_shared(findInternal(item->getKey())); if (sharedHdl->get() != item) { return SampleItem{false /* fromNvm */}; } - const auto allocInfo = allocator_->getAllocInfo(item->getMemory()); + const auto allocInfo = allocator_[tid]->getAllocInfo(item->getMemory()); // Convert the Item to IOBuf to make SampleItem auto iobuf = folly::IOBuf{ @@ -4377,21 +4707,27 @@ std::vector CacheAllocator::dumpEvictionIterator( return {}; } - if (static_cast(pid) >= mmContainers_.size() || - static_cast(cid) >= mmContainers_[pid].size()) { + // Always evict from the lowest layer. + int tid = getNumTiers() - 1; + if (static_cast(tid) >= mmContainers_.size() || + static_cast(pid) >= mmContainers_[tid].size() || + static_cast(cid) >= mmContainers_[tid][pid].size()) { throw std::invalid_argument( - folly::sformat("Invalid PoolId: {} and ClassId: {}.", pid, cid)); + folly::sformat("Invalid TierId: {} and PoolId: {} and ClassId: {}.", tid, pid, cid)); } std::vector content; - auto& mm = *mmContainers_[pid][cid]; - auto evictItr = mm.getEvictionIterator(); size_t i = 0; - while (evictItr && i < numItems) { - content.push_back(evictItr->toString()); - ++evictItr; - ++i; + while (i < numItems && tid >= 0) { + auto& mm = *mmContainers_[tid][pid][cid]; + mm.withEvictionIterator([&content, numItems](auto&& itr) { + while (itr && content.size() < numItems) { + content.push_back(itr->toString()); + ++itr; + } + }); + --tid; } return content; @@ -4569,25 +4905,43 @@ PoolId CacheAllocator::addPool( std::shared_ptr resizeStrategy, bool ensureProvisionable) { std::unique_lock w(poolsResizeAndRebalanceLock_); - auto pid = allocator_->addPool(name, size, allocSizes, ensureProvisionable); + + PoolId pid = 0; + size_t totalCacheSize = 0; + + for (TierId tid = 0; tid < getNumTiers(); tid++) { + totalCacheSize += allocator_[tid]->getMemorySize(); + } + + for (TierId tid = 0; tid < getNumTiers(); tid++) { + auto tierSizeRatio = + static_cast(allocator_[tid]->getMemorySize()) / totalCacheSize; + size_t tierPoolSize = static_cast(tierSizeRatio * size); + + // TODO: what if we manage to add pool only in one tier? + // we should probably remove that on failure + auto res = allocator_[tid]->addPool( + name, tierPoolSize, allocSizes, ensureProvisionable); + XDCHECK(tid == 0 || res == pid); + pid = res; + } + createMMContainers(pid, std::move(config)); setRebalanceStrategy(pid, std::move(rebalanceStrategy)); setResizeStrategy(pid, std::move(resizeStrategy)); - if (backgroundEvictor_.size()) { - auto memoryAssignments = - createBgWorkerMemoryAssignments(backgroundEvictor_.size()); - for (size_t id = 0; id < backgroundEvictor_.size(); id++) - backgroundEvictor_[id]->setAssignedMemory( - std::move(memoryAssignments[id])); - } - - if (backgroundPromoter_.size()) { - auto memoryAssignments = - createBgWorkerMemoryAssignments(backgroundPromoter_.size()); - for (size_t id = 0; id < backgroundPromoter_.size(); id++) - backgroundPromoter_[id]->setAssignedMemory( - std::move(memoryAssignments[id])); + if (backgroundMover_.size()) { + auto nTiers = getNumTiers(); + unsigned int bgId = 0; + for (TierId tid = 0; tid < nTiers; tid++) { + auto memoryAssignments = + createBgWorkerMemoryAssignments(backgroundMover_.size()/nTiers, tid); + for (size_t i = 0; i < backgroundMover_.size()/nTiers; i++) { + backgroundMover_[bgId]->setAssignedMemory( + std::move(memoryAssignments[i])); + bgId++; + } + } } return pid; @@ -4597,15 +4951,16 @@ template bool CacheAllocator::provisionPool( PoolId poolId, const std::vector& slabsDistribution) { std::unique_lock w(poolsResizeAndRebalanceLock_); - return allocator_->provisionPool(poolId, slabsDistribution); + //TODO: enable for multi-tier + return allocator_[currentTier()]->provisionPool(poolId, slabsDistribution); } template bool CacheAllocator::provisionPoolWithPowerLaw( PoolId poolId, double power, uint32_t minSlabsPerAC) { - const auto& poolSize = allocator_->getPool(poolId).getPoolSize(); + const auto& poolSize = allocator_[currentTier()]->getPool(poolId).getPoolSize(); const uint32_t numACs = - allocator_->getPool(poolId).getStats().classIds.size(); + allocator_[currentTier()]->getPool(poolId).getStats().classIds.size(); const uint32_t numSlabs = poolSize / Slab::kSize; const uint32_t minSlabsRequired = numACs * minSlabsPerAC; if (numSlabs < minSlabsRequired) { @@ -4637,16 +4992,16 @@ bool CacheAllocator::provisionPoolWithPowerLaw( slabsDistribution[i] += slabsToAllocate; allocatedSlabs += slabsToAllocate; } - + //TODO: enable for multi-tier return provisionPool(poolId, slabsDistribution); } template void CacheAllocator::overridePoolRebalanceStrategy( PoolId pid, std::shared_ptr rebalanceStrategy) { - if (static_cast(pid) >= mmContainers_.size()) { + if (static_cast(pid) >= mmContainers_[0].size()) { throw std::invalid_argument(folly::sformat( - "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size())); + "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[0].size())); } setRebalanceStrategy(pid, std::move(rebalanceStrategy)); } @@ -4654,9 +5009,9 @@ void CacheAllocator::overridePoolRebalanceStrategy( template void CacheAllocator::overridePoolResizeStrategy( PoolId pid, std::shared_ptr resizeStrategy) { - if (static_cast(pid) >= mmContainers_.size()) { + if (static_cast(pid) >= mmContainers_[0].size()) { throw std::invalid_argument(folly::sformat( - "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size())); + "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[0].size())); } setResizeStrategy(pid, std::move(resizeStrategy)); } @@ -4668,14 +5023,14 @@ void CacheAllocator::overridePoolOptimizeStrategy( } template -void CacheAllocator::overridePoolConfig(PoolId pid, +void CacheAllocator::overridePoolConfig(TierId tid, PoolId pid, const MMConfig& config) { - if (static_cast(pid) >= mmContainers_.size()) { + if (static_cast(pid) >= mmContainers_[tid].size()) { throw std::invalid_argument(folly::sformat( - "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size())); + "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[tid].size())); } - auto& pool = allocator_->getPool(pid); + auto& pool = allocator_[tid]->getPool(pid); for (unsigned int cid = 0; cid < pool.getNumClassId(); ++cid) { MMConfig mmConfig = config; mmConfig.addExtraConfig( @@ -4683,29 +5038,33 @@ void CacheAllocator::overridePoolConfig(PoolId pid, ? pool.getAllocationClass(static_cast(cid)) .getAllocsPerSlab() : 0); - DCHECK_NOTNULL(mmContainers_[pid][cid].get()); - mmContainers_[pid][cid]->setConfig(mmConfig); + DCHECK_NOTNULL(mmContainers_[tid][pid][cid].get()); + mmContainers_[tid][pid][cid]->setConfig(mmConfig); } } template void CacheAllocator::createMMContainers(const PoolId pid, MMConfig config) { - auto& pool = allocator_->getPool(pid); + // pools on each layer should have the same number of class id, etc. + auto& pool = allocator_[0]->getPool(pid); for (unsigned int cid = 0; cid < pool.getNumClassId(); ++cid) { config.addExtraConfig( config_.trackTailHits ? pool.getAllocationClass(static_cast(cid)) .getAllocsPerSlab() : 0); - mmContainers_[pid][cid].reset(new MMContainer(config, compressor_)); + for (TierId tid = 0; tid < getNumTiers(); tid++) { + mmContainers_[tid][pid][cid].reset(new MMContainer(config, compressor_)); + } } } template PoolId CacheAllocator::getPoolId( folly::StringPiece name) const noexcept { - return allocator_->getPoolId(name.str()); + // each tier has the same pools + return allocator_[0]->getPoolId(name.str()); } // The Function returns a consolidated vector of Release Slab @@ -4748,7 +5107,9 @@ std::set CacheAllocator::filterCompactCachePools( template std::set CacheAllocator::getRegularPoolIds() const { std::shared_lock r(poolsResizeAndRebalanceLock_); - return filterCompactCachePools(allocator_->getPoolIds()); + // TODO - get rid of the duplication - right now, each tier + // holds pool objects with mostly the same info + return filterCompactCachePools(allocator_[0]->getPoolIds()); } template @@ -4773,10 +5134,9 @@ std::set CacheAllocator::getRegularPoolIdsForResize() // getAdvisedMemorySize - then pools may be overLimit even when // all slabs are not allocated. Otherwise, pools may be overLimit // only after all slabs are allocated. - // - return (allocator_->allSlabsAllocated()) || - (allocator_->getAdvisedMemorySize() != 0) - ? filterCompactCachePools(allocator_->getPoolsOverLimit()) + return (allocator_[0]->allSlabsAllocated()) || + (allocator_[0]->getAdvisedMemorySize() != 0) + ? filterCompactCachePools(allocator_[0]->getPoolsOverLimit()) : std::set{}; } @@ -4785,11 +5145,20 @@ const std::string CacheAllocator::getCacheName() const { return config_.cacheName; } +template +size_t CacheAllocator::getPoolSize(PoolId poolId) const { + size_t poolSize = 0; + for (auto& allocator: allocator_) { + const auto& pool = allocator->getPool(poolId); + poolSize += pool.getPoolSize(); + } + return poolSize; +} + template PoolStats CacheAllocator::getPoolStats(PoolId poolId) const { stats().numExpensiveStatsPolled.inc(); - - const auto& pool = allocator_->getPool(poolId); + const auto& pool = allocator_[0]->getPool(poolId); const auto& allocSizes = pool.getAllocSizes(); auto mpStats = pool.getStats(); const auto& classIds = mpStats.classIds; @@ -4808,7 +5177,7 @@ PoolStats CacheAllocator::getPoolStats(PoolId poolId) const { if (!isCompactCache) { for (const ClassId cid : classIds) { uint64_t classHits = (*stats_.cacheHits)[poolId][cid].get(); - XDCHECK(mmContainers_[poolId][cid], + XDCHECK(mmContainers_[0][poolId][cid], folly::sformat("Pid {}, Cid {} not initialized.", poolId, cid)); cacheStats.insert( {cid, @@ -4818,7 +5187,7 @@ PoolStats CacheAllocator::getPoolStats(PoolId poolId) const { (*stats_.fragmentationSize)[poolId][cid].get(), classHits, (*stats_.chainedItemEvictions)[poolId][cid].get(), (*stats_.regularItemEvictions)[poolId][cid].get(), - mmContainers_[poolId][cid]->getStats()} + mmContainers_[0][poolId][cid]->getStats()} }); totalHits += classHits; @@ -4827,7 +5196,7 @@ PoolStats CacheAllocator::getPoolStats(PoolId poolId) const { PoolStats ret; ret.isCompactCache = isCompactCache; - ret.poolName = allocator_->getPoolName(poolId); + ret.poolName = allocator_[0]->getPoolName(poolId); ret.poolSize = pool.getPoolSize(); ret.poolUsableSize = pool.getPoolUsableSize(); ret.poolAdvisedSize = pool.getPoolAdvisedSize(); @@ -4839,6 +5208,15 @@ PoolStats CacheAllocator::getPoolStats(PoolId poolId) const { return ret; } +template +ACStats CacheAllocator::getACStats(TierId tid, + PoolId poolId, + ClassId classId) const { + const auto& pool = allocator_[tid]->getPool(poolId); + const auto& ac = pool.getAllocationClass(classId); + return ac.getStats(); +} + template PoolEvictionAgeStats CacheAllocator::getPoolEvictionAgeStats( PoolId pid, unsigned int slabProjectionLength) const { @@ -4846,12 +5224,12 @@ PoolEvictionAgeStats CacheAllocator::getPoolEvictionAgeStats( PoolEvictionAgeStats stats; - const auto& pool = allocator_->getPool(pid); + const auto& pool = allocator_[0]->getPool(pid); const auto& allocSizes = pool.getAllocSizes(); for (ClassId cid = 0; cid < static_cast(allocSizes.size()); ++cid) { - auto& mmContainer = getMMContainer(pid, cid); + auto& mmContainer = getMMContainer(0, pid, cid); const auto numItemsPerSlab = - allocator_->getPool(pid).getAllocationClass(cid).getAllocsPerSlab(); + allocator_[0]->getPool(pid).getAllocationClass(cid).getAllocsPerSlab(); const auto projectionLength = numItemsPerSlab * slabProjectionLength; stats.classEvictionAgeStats[cid] = mmContainer.getEvictionAgeStat(projectionLength); @@ -4895,7 +5273,7 @@ void CacheAllocator::releaseSlab(PoolId pid, } try { - auto releaseContext = allocator_->startSlabRelease( + auto releaseContext = allocator_[0]->startSlabRelease( pid, victim, receiver, mode, hint, [this]() -> bool { return shutDownInProgress_; }); @@ -4904,15 +5282,15 @@ void CacheAllocator::releaseSlab(PoolId pid, return; } - releaseSlabImpl(releaseContext); - if (!allocator_->allAllocsFreed(releaseContext)) { + releaseSlabImpl(0, releaseContext); + if (!allocator_[0]->allAllocsFreed(releaseContext)) { throw std::runtime_error( folly::sformat("Was not able to free all allocs. PoolId: {}, AC: {}", releaseContext.getPoolId(), releaseContext.getClassId())); } - allocator_->completeSlabRelease(releaseContext); + allocator_[0]->completeSlabRelease(releaseContext); } catch (const exception::SlabReleaseAborted& e) { stats_.numAbortedSlabReleases.inc(); throw exception::SlabReleaseAborted(folly::sformat( @@ -4942,7 +5320,7 @@ SlabReleaseStats CacheAllocator::getSlabReleaseStats() } template -void CacheAllocator::releaseSlabImpl( +void CacheAllocator::releaseSlabImpl(TierId tid, const SlabReleaseContext& releaseContext) { auto startTime = std::chrono::milliseconds(util::getCurrentTimeMs()); bool releaseStuck = false; @@ -4985,7 +5363,7 @@ void CacheAllocator::releaseSlabImpl( // If moving fails, evict it evictForSlabRelease(item); } - XDCHECK(allocator_->isAllocFreed(releaseContext, alloc)); + XDCHECK(allocator_[tid]->isAllocFreed(releaseContext, alloc)); } } @@ -5046,7 +5424,8 @@ bool CacheAllocator::moveForSlabRelease(Item& oldItem) { return false; } - const auto allocInfo = allocator_->getAllocInfo(oldItem.getMemory()); + auto tid = getTierId(oldItem); + const auto allocInfo = allocator_[tid]->getAllocInfo(oldItem.getMemory()); if (chainedItem) { newItemHdl.reset(); auto parentKey = parentItem->getKey(); @@ -5074,7 +5453,7 @@ bool CacheAllocator::moveForSlabRelease(Item& oldItem) { auto ref = unmarkMovingAndWakeUpWaiters(oldItem, std::move(newItemHdl)); XDCHECK_EQ(0u, ref); } - allocator_->free(&oldItem); + allocator_[tid]->free(&oldItem); (*stats_.fragmentationSize)[allocInfo.poolId][allocInfo.classId].sub( util::getFragmentation(*this, oldItem)); @@ -5103,17 +5482,21 @@ CacheAllocator::allocateNewItemForOldItem(const Item& oldItem) { return newItemHdl; } + const auto tid = getTierId(oldItem); const auto allocInfo = - allocator_->getAllocInfo(static_cast(&oldItem)); + allocator_[tid]->getAllocInfo(static_cast(&oldItem)); + bool evict = !config_.insertToFirstFreeTier || tid == getNumTiers() - 1; // Set up the destination for the move. Since oldItem would have the moving // bit set, it won't be picked for eviction. - auto newItemHdl = allocateInternal(allocInfo.poolId, - oldItem.getKey(), - oldItem.getSize(), - oldItem.getCreationTime(), - oldItem.getExpiryTime(), - false); + auto newItemHdl = allocateInternalTier(tid, + allocInfo.poolId, + oldItem.getKey(), + oldItem.getSize(), + oldItem.getCreationTime(), + oldItem.getExpiryTime(), + false, + evict); if (!newItemHdl) { return {}; } @@ -5150,7 +5533,7 @@ void CacheAllocator::evictForSlabRelease(Item& item) { } const auto allocInfo = - allocator_->getAllocInfo(static_cast(&item)); + allocator_[getTierId(item)]->getAllocInfo(static_cast(&item)); if (evicted->hasChainedItem()) { (*stats_.chainedItemEvictions)[allocInfo.poolId][allocInfo.classId].inc(); } else { @@ -5199,11 +5582,15 @@ bool CacheAllocator::markMovingForSlabRelease( // At first, we assume this item was already freed bool itemFreed = true; bool markedMoving = false; - const auto fn = [this, &markedMoving, &itemFreed](void* memory) { + TierId tid = getTierId(alloc); + const auto fn = [this, tid, &markedMoving, &itemFreed](void* memory) { // Since this callback is executed, the item is not yet freed itemFreed = false; Item* item = static_cast(memory); - auto& mmContainer = getMMContainer(*item); + auto allocInfo = allocator_[tid]->getAllocInfo(memory); + auto pid = allocInfo.poolId; + auto cid = allocInfo.classId; + auto& mmContainer = getMMContainer(tid, pid, cid); mmContainer.withContainerLock([this, &mmContainer, &item, &markedMoving]() { // we rely on the mmContainer lock to safely check that the item is // currently in the mmContainer (no other threads are currently @@ -5241,7 +5628,7 @@ bool CacheAllocator::markMovingForSlabRelease( auto startTime = util::getCurrentTimeSec(); while (true) { - allocator_->processAllocForRelease(ctx, alloc, fn); + allocator_[tid]->processAllocForRelease(ctx, alloc, fn); // If item is already freed we give up trying to mark the item moving // and return false, otherwise if marked as moving, we return true. @@ -5256,7 +5643,7 @@ bool CacheAllocator::markMovingForSlabRelease( itemFreed = true; if (shutDownInProgress_) { - allocator_->abortSlabRelease(ctx); + allocator_[tid]->abortSlabRelease(ctx); throw exception::SlabReleaseAborted( folly::sformat("Slab Release aborted while still trying to mark" " as moving for Item: {}. Pool: {}, Class: {}.", @@ -5280,12 +5667,15 @@ template CCacheT* CacheAllocator::addCompactCache(folly::StringPiece name, size_t size, Args&&... args) { + if (getNumTiers() != 1) + throw std::runtime_error("TODO: compact cache for multi-tier Cache not supported."); + if (!config_.isCompactCacheEnabled()) { throw std::logic_error("Compact cache is not enabled"); } std::unique_lock lock(compactCachePoolsLock_); - auto poolId = allocator_->addPool(name, size, {Slab::kSize}); + auto poolId = allocator_[0]->addPool(name, size, {Slab::kSize}); isCompactCachePool_[poolId] = true; auto ptr = std::make_unique( @@ -5394,22 +5784,43 @@ folly::IOBufQueue CacheAllocator::saveStateToIOBuf() { *metadata_.numChainedChildItems() = stats_.numChainedChildItems.get(); *metadata_.numAbortedSlabReleases() = stats_.numAbortedSlabReleases.get(); - auto serializeMMContainers = [](MMContainers& mmContainers) { - MMSerializationTypeContainer state; - for (unsigned int i = 0; i < mmContainers.size(); ++i) { + const auto numTiers = getNumTiers(); + // TODO: implement serialization for multiple tiers + auto serializeMMContainers = [numTiers](MMContainers& mmContainers) { + std::map containers; + for (unsigned int i = 0; i < numTiers; ++i) { for (unsigned int j = 0; j < mmContainers[i].size(); ++j) { - if (mmContainers[i][j]) { - state.pools_ref()[i][j] = mmContainers[i][j]->saveState(); + for (unsigned int k = 0; k < mmContainers[i][j].size(); ++k) { + if (mmContainers[i][j][k]) { + serialization::MemoryDescriptorObject md; + md.tid_ref() = i; + md.pid_ref() = j; + md.cid_ref() = k; + containers[md] = mmContainers[i][j][k]->saveState(); + } } } } + MMSerializationTypeContainer state; + state.containers_ref() = containers; return state; }; MMSerializationTypeContainer mmContainersState = serializeMMContainers(mmContainers_); AccessSerializationType accessContainerState = accessContainer_->saveState(); - MemoryAllocator::SerializationType allocatorState = allocator_->saveState(); + + auto serializeAllocators = [numTiers,this]() { + AllocatorsSerializationType state; + std::map allocators; + for (int i = 0; i < numTiers; ++i) { + allocators[i] = allocator_[i]->saveState(); + } + state.allocators_ref() = allocators; + return state; + }; + AllocatorsSerializationType allocatorsState = serializeAllocators(); + CCacheManager::SerializationType ccState = compactCacheManager_->saveState(); AccessSerializationType chainedItemAccessContainerState = @@ -5419,7 +5830,7 @@ folly::IOBufQueue CacheAllocator::saveStateToIOBuf() { // results into a single buffer. folly::IOBufQueue queue; Serializer::serializeToIOBufQueue(queue, metadata_); - Serializer::serializeToIOBufQueue(queue, allocatorState); + Serializer::serializeToIOBufQueue(queue, allocatorsState); Serializer::serializeToIOBufQueue(queue, ccState); Serializer::serializeToIOBufQueue(queue, mmContainersState); Serializer::serializeToIOBufQueue(queue, accessContainerState); @@ -5434,8 +5845,7 @@ bool CacheAllocator::stopWorkers(std::chrono::seconds timeout) { success &= stopPoolResizer(timeout); success &= stopMemMonitor(timeout); success &= stopReaper(timeout); - success &= stopBackgroundEvictor(timeout); - success &= stopBackgroundPromoter(timeout); + success &= stopBackgroundMover(timeout); return success; } @@ -5473,6 +5883,8 @@ CacheAllocator::shutDown() { (shmShutDownStatus == ShmShutDownRes::kSuccess); shmManager_.reset(); + // TODO: save per-tier state + if (shmShutDownSucceeded) { if (!nvmShutDownStatusOpt || *nvmShutDownStatusOpt) return ShutDownStatus::kSuccess; @@ -5536,23 +5948,26 @@ CacheAllocator::deserializeMMContainers( const auto container = deserializer.deserialize(); - MMContainers mmContainers; - - for (auto& kvPool : *container.pools_ref()) { - auto i = static_cast(kvPool.first); - auto& pool = getPool(i); - for (auto& kv : kvPool.second) { - auto j = static_cast(kv.first); - MMContainerPtr ptr = - std::make_unique(kv.second, - compressor); - auto config = ptr->getConfig(); - config.addExtraConfig(config_.trackTailHits - ? pool.getAllocationClass(j).getAllocsPerSlab() - : 0); - ptr->setConfig(config); - mmContainers[i][j] = std::move(ptr); - } + /* TODO: right now, we create empty containers because deserialization + * only works for a single (topmost) tier. */ + MMContainers mmContainers{getNumTiers()}; + + std::map containerMap = + *container.containers(); + for (auto md : containerMap) { + uint32_t tid = *md.first.tid(); + uint32_t pid = *md.first.pid(); + uint32_t cid = *md.first.cid(); + auto& pool = getPoolByTid(pid,tid); + MMContainerPtr ptr = + std::make_unique(md.second, + compressor); + auto config = ptr->getConfig(); + config.addExtraConfig(config_.trackTailHits + ? pool.getAllocationClass(cid).getAllocsPerSlab() + : 0); + ptr->setConfig(config); + mmContainers[tid][pid][cid] = std::move(ptr); } // We need to drop the unevictableMMContainer in the desierializer. // TODO: remove this at version 17. @@ -5694,8 +6109,7 @@ GlobalCacheStats CacheAllocator::getGlobalCacheStats() const { ret.nvmCacheEnabled = nvmCache_ ? nvmCache_->isEnabled() : false; ret.reaperStats = getReaperStats(); ret.rebalancerStats = getRebalancerStats(); - ret.evictionStats = getBackgroundMoverStats(MoverDir::Evict); - ret.promotionStats = getBackgroundMoverStats(MoverDir::Promote); + ret.moverStats = getBackgroundMoverStats(); ret.numActiveHandles = getNumActiveHandles(); ret.isNewRamCache = cacheCreationTime_ == cacheInstanceCreationTime_; @@ -5709,11 +6123,14 @@ GlobalCacheStats CacheAllocator::getGlobalCacheStats() const { template CacheMemoryStats CacheAllocator::getCacheMemoryStats() const { - const auto totalCacheSize = allocator_->getMemorySize(); - const auto configuredTotalCacheSize = allocator_->getMemorySizeInclAdvised(); - + size_t totalCacheSize = 0; + size_t configuredTotalCacheSize = 0; + for(auto& allocator: allocator_) { + totalCacheSize += allocator->getMemorySize(); + configuredTotalCacheSize += allocator->getMemorySizeInclAdvised(); + } auto addSize = [this](size_t a, PoolId pid) { - return a + allocator_->getPool(pid).getPoolSize(); + return a + allocator_[0]->getPool(pid).getPoolSize(); }; const auto regularPoolIds = getRegularPoolIds(); const auto ccCachePoolIds = getCCachePoolIds(); @@ -5726,9 +6143,9 @@ CacheMemoryStats CacheAllocator::getCacheMemoryStats() const { configuredTotalCacheSize, configuredRegularCacheSize, configuredCompactCacheSize, - allocator_->getAdvisedMemorySize(), + allocator_[0]->getAdvisedMemorySize(), memMonitor_ ? memMonitor_->getMaxAdvisePct() : 0, - allocator_->getUnreservedMemorySize(), + allocator_[0]->getUnreservedMemorySize(), nvmCache_ ? nvmCache_->getSize() : 0, util::getMemAvailable(), util::getRSSBytes()}; @@ -5867,61 +6284,37 @@ bool CacheAllocator::startNewReaper( template auto CacheAllocator::createBgWorkerMemoryAssignments( - size_t numWorkers) { + size_t numWorkers, TierId tid) { std::vector> asssignedMemory(numWorkers); - auto pools = filterCompactCachePools(allocator_->getPoolIds()); + auto pools = filterCompactCachePools(allocator_[tid]->getPoolIds()); for (const auto pid : pools) { - const auto& mpStats = getPool(pid).getStats(); + const auto& mpStats = getPoolByTid(pid, tid).getStats(); for (const auto cid : mpStats.classIds) { - asssignedMemory[BackgroundMover::workerId(pid, cid, numWorkers)] - .emplace_back(pid, cid); + asssignedMemory[BackgroundMover::workerId(tid, pid, cid, numWorkers)] + .emplace_back(tid, pid, cid); } } return asssignedMemory; } template -bool CacheAllocator::startNewBackgroundEvictor( - std::chrono::milliseconds interval, - std::shared_ptr strategy, - size_t threads) { - XDCHECK(threads > 0); - backgroundEvictor_.resize(threads); - bool result = true; - - auto memoryAssignments = createBgWorkerMemoryAssignments(threads); - for (size_t i = 0; i < threads; i++) { - auto ret = startNewWorker("BackgroundEvictor" + std::to_string(i), - backgroundEvictor_[i], interval, *this, strategy, - MoverDir::Evict); - result = result && ret; - - if (result) { - backgroundEvictor_[i]->setAssignedMemory(std::move(memoryAssignments[i])); - } - } - return result; -} - -template -bool CacheAllocator::startNewBackgroundPromoter( +bool CacheAllocator::startNewBackgroundMover( std::chrono::milliseconds interval, - std::shared_ptr strategy, + size_t evictionBatch, + size_t promotionBatch, + double targetFree, size_t threads) { XDCHECK(threads > 0); - backgroundPromoter_.resize(threads); + backgroundMover_.resize(threads); bool result = true; - - auto memoryAssignments = createBgWorkerMemoryAssignments(threads); + auto memoryAssignments = createBgWorkerMemoryAssignments(threads, 0); for (size_t i = 0; i < threads; i++) { - auto ret = startNewWorker("BackgroundPromoter" + std::to_string(i), - backgroundPromoter_[i], interval, *this, strategy, - MoverDir::Promote); + auto ret = startNewWorker("BackgroundMover" + std::to_string(i), + backgroundMover_[i], interval, *this, + evictionBatch, promotionBatch, targetFree); result = result && ret; - if (result) { - backgroundPromoter_[i]->setAssignedMemory( - std::move(memoryAssignments[i])); + backgroundMover_[i]->setAssignedMemory(std::move(memoryAssignments[i])); } } return result; @@ -5976,23 +6369,11 @@ bool CacheAllocator::stopReaper(std::chrono::seconds timeout) { } template -bool CacheAllocator::stopBackgroundEvictor( - std::chrono::seconds timeout) { - bool result = true; - for (size_t i = 0; i < backgroundEvictor_.size(); i++) { - auto ret = stopWorker("BackgroundEvictor", backgroundEvictor_[i], timeout); - result = result && ret; - } - return result; -} - -template -bool CacheAllocator::stopBackgroundPromoter( +bool CacheAllocator::stopBackgroundMover( std::chrono::seconds timeout) { bool result = true; - for (size_t i = 0; i < backgroundPromoter_.size(); i++) { - auto ret = - stopWorker("BackgroundPromoter", backgroundPromoter_[i], timeout); + for (size_t i = 0; i < backgroundMover_.size(); i++) { + auto ret = stopWorker("BackgroundMover", backgroundMover_[i], timeout); result = result && ret; } return result; @@ -6015,7 +6396,8 @@ bool CacheAllocator::cleanupStrayShmSegments( // Any other concurrent process can not be attached to the segments or // even if it does, we want to mark it for destruction. ShmManager::removeByName(cacheDir, detail::kShmInfoName, posix); - ShmManager::removeByName(cacheDir, detail::kShmCacheName, posix); + ShmManager::removeByName(cacheDir, detail::kShmCacheName + + std::to_string(0 /* TODO: per tier */), posix); ShmManager::removeByName(cacheDir, detail::kShmHashTableName, posix); ShmManager::removeByName(cacheDir, detail::kShmChainedItemHashTableName, posix); @@ -6030,13 +6412,14 @@ uint64_t CacheAllocator::getItemPtrAsOffset(const void* ptr) { // errors downstream. // if this succeeeds, the address is valid within the cache. - allocator_->getAllocInfo(ptr); + auto tid = getTierId(ptr); + allocator_[tid]->getAllocInfo(ptr); if (!isOnShm_ || !shmManager_) { throw std::invalid_argument("Shared memory not used"); } - const auto& shm = shmManager_->getShmByName(detail::kShmCacheName); + const auto& shm = shmManager_->getShmByName(detail::kShmCacheName + std::to_string(tid)); return reinterpret_cast(ptr) - reinterpret_cast(shm.getCurrentMapping().addr); diff --git a/cachelib/allocator/CacheAllocatorConfig.h b/cachelib/allocator/CacheAllocatorConfig.h index 59d659f6f..078b51b74 100644 --- a/cachelib/allocator/CacheAllocatorConfig.h +++ b/cachelib/allocator/CacheAllocatorConfig.h @@ -27,7 +27,6 @@ #include #include -#include "cachelib/allocator/BackgroundMoverStrategy.h" #include "cachelib/allocator/Cache.h" #include "cachelib/allocator/MM2Q.h" #include "cachelib/allocator/MemoryMonitor.h" @@ -285,16 +284,13 @@ class CacheAllocatorConfig { std::chrono::seconds ccacheInterval, uint32_t ccacheStepSizePercent); - // Enable the background evictor - scans a tier to look for objects - // to evict to the next tier - CacheAllocatorConfig& enableBackgroundEvictor( - std::shared_ptr backgroundMoverStrategy, - std::chrono::milliseconds regularInterval, - size_t threads); - - CacheAllocatorConfig& enableBackgroundPromoter( - std::shared_ptr backgroundMoverStrategy, + // Enable the background moveor - scans a tier to look for objects + // to move to the next tier or just evict if single tier. + CacheAllocatorConfig& enableBackgroundMover( std::chrono::milliseconds regularInterval, + size_t evictionBatch, + size_t promotionBatch, + double targetFree, size_t threads); // This enables an optimization for Pool rebalancing and resizing. @@ -329,6 +325,9 @@ class CacheAllocatorConfig { // Library team if you find yourself customizing this. CacheAllocatorConfig& setThrottlerConfig(util::Throttler::Config config); + // Insert items to first free memory tier + CacheAllocatorConfig& enableInsertToFirstFreeTier(); + // Passes in a callback to initialize an event tracker when the allocator // starts CacheAllocatorConfig& setEventTracker(EventTrackerSharedPtr&&); @@ -371,15 +370,9 @@ class CacheAllocatorConfig { poolOptimizeStrategy != nullptr; } - // @return whether background evictor thread is enabled - bool backgroundEvictorEnabled() const noexcept { - return backgroundEvictorInterval.count() > 0 && - backgroundEvictorStrategy != nullptr; - } - - bool backgroundPromoterEnabled() const noexcept { - return backgroundPromoterInterval.count() > 0 && - backgroundPromoterStrategy != nullptr; + // @return whether background mover thread is enabled + bool backgroundMoverEnabled() const noexcept { + return backgroundMoverInterval.count() > 0 && backgroundMoverThreads > 0; } // @return whether memory monitor is enabled @@ -496,25 +489,21 @@ class CacheAllocatorConfig { // make any progress for the below threshold std::chrono::milliseconds slabReleaseStuckThreshold{std::chrono::seconds(60)}; - // the background eviction strategy to be used - std::shared_ptr backgroundEvictorStrategy{nullptr}; - - // the background promotion strategy to be used - std::shared_ptr backgroundPromoterStrategy{nullptr}; - - // time interval to sleep between runs of the background evictor - std::chrono::milliseconds backgroundEvictorInterval{ - std::chrono::milliseconds{1000}}; - - // time interval to sleep between runs of the background promoter - std::chrono::milliseconds backgroundPromoterInterval{ + // time interval to sleep between runs of the background mover + std::chrono::milliseconds backgroundMoverInterval{ std::chrono::milliseconds{1000}}; - // number of thread used by background evictor - size_t backgroundEvictorThreads{1}; + // number of thread used by background mover + size_t backgroundMoverThreads{0}; - // number of thread used by background promoter - size_t backgroundPromoterThreads{1}; + // How much to keep the cache memory free. This is used by the background + // mover to decide when to evict items. + double backgroundTargetFree{0.02}; + // The number of items to evict in each batch in the background mover + size_t backgroundEvictionBatch{10}; + // The number of items to promote in each batch in the background mover + // only available when there are multiple memory tiers + size_t backgroundPromotionBatch{0}; // time interval to sleep between iterations of pool size optimization, // for regular pools and compact caches @@ -555,6 +544,11 @@ class CacheAllocatorConfig { // ABOVE are the config for various cache workers // + // if turned off, always insert new elements to topmost memory tier. + // if turned on, insert new element to first free memory tier or evict memory + // from the bottom one if memory cache is full + bool insertToFirstFreeTier = false; + // the number of tries to search for an item to evict // 0 means it's infinite unsigned int evictionSearchTries{50}; @@ -671,6 +665,12 @@ class CacheAllocatorConfig { {MemoryTierCacheConfig::fromShm().setRatio(1)}}; }; +template +CacheAllocatorConfig& CacheAllocatorConfig::enableInsertToFirstFreeTier() { + insertToFirstFreeTier = true; + return *this; +} + template CacheAllocatorConfig& CacheAllocatorConfig::setCacheName( const std::string& _cacheName) { @@ -1016,24 +1016,17 @@ CacheAllocatorConfig& CacheAllocatorConfig::enablePoolRebalancing( } template -CacheAllocatorConfig& CacheAllocatorConfig::enableBackgroundEvictor( - std::shared_ptr strategy, - std::chrono::milliseconds interval, - size_t evictorThreads) { - backgroundEvictorStrategy = strategy; - backgroundEvictorInterval = interval; - backgroundEvictorThreads = evictorThreads; - return *this; -} - -template -CacheAllocatorConfig& CacheAllocatorConfig::enableBackgroundPromoter( - std::shared_ptr strategy, +CacheAllocatorConfig& CacheAllocatorConfig::enableBackgroundMover( std::chrono::milliseconds interval, - size_t promoterThreads) { - backgroundPromoterStrategy = strategy; - backgroundPromoterInterval = interval; - backgroundPromoterThreads = promoterThreads; + size_t evictionBatch, + size_t promotionBatch, + double targetFree, + size_t moverThreads) { + backgroundMoverInterval = interval; + backgroundEvictionBatch = evictionBatch; + backgroundPromotionBatch = promotionBatch; + backgroundTargetFree = targetFree; + backgroundMoverThreads = moverThreads; return *this; } @@ -1274,6 +1267,7 @@ std::map CacheAllocatorConfig::serialize() const { configMap["nvmAdmissionMinTTL"] = std::to_string(nvmAdmissionMinTTL); configMap["delayCacheWorkersStart"] = delayCacheWorkersStart ? "true" : "false"; + configMap["insertToFirstFreeTier"] = std::to_string(insertToFirstFreeTier); mergeWithPrefix(configMap, throttleConfig.serialize(), "throttleConfig"); mergeWithPrefix(configMap, chainedItemAccessConfig.serialize(), diff --git a/cachelib/allocator/CacheStats.h b/cachelib/allocator/CacheStats.h index 3692e55b4..d21400770 100644 --- a/cachelib/allocator/CacheStats.h +++ b/cachelib/allocator/CacheStats.h @@ -308,21 +308,29 @@ struct RebalancerStats { // Mover Stats struct BackgroundMoverStats { // the number of items this worker moved by looking at pools/classes stats - uint64_t numMovedItems{0}; - // number of times we went executed the thread //TODO: is this def correct? + uint64_t numEvictedItems{0}; + uint64_t numPromotedItems{0}; + + // number of times we went executed the thread (by periodic worker) uint64_t runCount{0}; - // total number of classes - uint64_t totalClasses{0}; - // eviction size - uint64_t totalBytesMoved{0}; - - BackgroundMoverStats& operator+=(const BackgroundMoverStats& rhs) { - numMovedItems += rhs.numMovedItems; - runCount += rhs.runCount; - totalClasses += rhs.totalClasses; - totalBytesMoved += rhs.totalBytesMoved; - return *this; - } + + // average number of items moved per run + double avgItemsMoved{0.0}; + + // number of times we actually traversed the mmContainer + uint64_t numTraversals{0}; + + // indicates the time in ns for the last iteration + uint64_t lastTraversalTimeNs{0}; + + // indicates the maximum of all traversals + uint64_t minTraversalTimeNs{0}; + + // indicates the minimum of all traversals + uint64_t maxTraversalTimeNs{0}; + + // indicates the average of all traversals + uint64_t avgTraversalTimeNs{0}; }; // CacheMetadata type to export @@ -345,10 +353,8 @@ struct Stats; // Stats that apply globally in cache and // the ones that are aggregated over all pools struct GlobalCacheStats { - // background eviction stats - BackgroundMoverStats evictionStats; - - BackgroundMoverStats promotionStats; + // background mover stats per each mover thread + std::vector moverStats; // number of calls to CacheAllocator::find uint64_t numCacheGets{0}; diff --git a/cachelib/allocator/FreeThresholdStrategy.cpp b/cachelib/allocator/FreeThresholdStrategy.cpp deleted file mode 100644 index f4afbd78f..000000000 --- a/cachelib/allocator/FreeThresholdStrategy.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "cachelib/allocator/FreeThresholdStrategy.h" - -namespace facebook::cachelib { - -FreeThresholdStrategy::FreeThresholdStrategy(double lowEvictionAcWatermark, - double highEvictionAcWatermark, - uint64_t maxEvictionBatch, - uint64_t minEvictionBatch) - : lowEvictionAcWatermark(lowEvictionAcWatermark), - highEvictionAcWatermark(highEvictionAcWatermark), - maxEvictionBatch(maxEvictionBatch), - minEvictionBatch(minEvictionBatch) {} - -std::vector FreeThresholdStrategy::calculateBatchSizes( - const CacheBase& /* cache */, - std::vector /* acVec */) { - throw std::runtime_error("Not supported yet!"); -} - -} // namespace facebook::cachelib diff --git a/cachelib/allocator/FreeThresholdStrategy.h b/cachelib/allocator/FreeThresholdStrategy.h deleted file mode 100644 index 13a2ac40d..000000000 --- a/cachelib/allocator/FreeThresholdStrategy.h +++ /dev/null @@ -1,56 +0,0 @@ -// @lint-ignore-every CLANGTIDY clang-diagnostic-unused-private-field - -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "cachelib/allocator/BackgroundMoverStrategy.h" -#include "cachelib/allocator/Cache.h" - -namespace facebook { -namespace cachelib { - -// Free threshold strategy for background promotion worker. -// This strategy tries to keep certain percent of memory free -// at all times. -class FreeThresholdStrategy : public BackgroundMoverStrategy { - public: - FreeThresholdStrategy(double lowEvictionAcWatermark, - double highEvictionAcWatermark, - uint64_t maxEvictionBatch, - uint64_t minEvictionBatch); - ~FreeThresholdStrategy() {} - - std::vector calculateBatchSizes( - const CacheBase& cache, std::vector acVecs); - - private: -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunused-private-field" -#endif - double lowEvictionAcWatermark{2.0}; - double highEvictionAcWatermark{5.0}; - uint64_t maxEvictionBatch{40}; - uint64_t minEvictionBatch{5}; -#if defined(__clang__) -#pragma clang diagnostic pop -#endif -}; - -} // namespace cachelib -} // namespace facebook diff --git a/cachelib/allocator/MM2Q.h b/cachelib/allocator/MM2Q.h index cece17e0e..3bf76b018 100644 --- a/cachelib/allocator/MM2Q.h +++ b/cachelib/allocator/MM2Q.h @@ -66,6 +66,7 @@ class MM2Q { enum LruType { Warm, WarmTail, Hot, Cold, ColdTail, NumTypes }; // Config class for MM2Q + // TODO: implement support for useCombinedLockForIterators struct Config { // Create from serialized config explicit Config(SerializationConfigType configState) @@ -460,6 +461,18 @@ class MM2Q { // is unchanged. bool add(T& node) noexcept; + // helper function to add the node under the container lock + void addNodeLocked(T& node, const Time& currTime); + + // adds the given nodes into the container and marks each as being present + // in the container. The nodes are added to the head of the lru. + // + // @param vector of nodes The nodes to be added to the container. + // @return number of nodes added - it is up to user to verify all + // expected nodes have been added. + template + uint32_t addBatch(It begin, It end) noexcept; + // removes the node from the lru and sets it previous and next to nullptr. // // @param node The node to be removed from the container. @@ -500,6 +513,11 @@ class MM2Q { template void withEvictionIterator(F&& f); + // Execute provided function under container lock. Function gets + // iterator passed as parameter. + template + void withPromotionIterator(F&& f); + // Execute provided function under container lock. template void withContainerLock(F&& f); @@ -889,16 +907,41 @@ bool MM2Q::Container::add(T& node) noexcept { if (node.isInMMContainer()) { return false; } + addNodeLocked(node, currTime); + return true; + }); +} - markHot(node); - unmarkCold(node); - unmarkTail(node); - lru_.getList(LruType::Hot).linkAtHead(node); - rebalance(); +// adds the node to the list assuming not in +// container and holding container lock +template T::*HookPtr> +void MM2Q::Container::addNodeLocked(T& node, const Time& currTime) { + XDCHECK(!node.isInMMContainer()); + markHot(node); + unmarkCold(node); + unmarkTail(node); + lru_.getList(LruType::Hot).linkAtHead(node); + rebalance(); + + node.markInMMContainer(); + setUpdateTime(node, currTime); +} - node.markInMMContainer(); - setUpdateTime(node, currTime); - return true; +template T::*HookPtr> +template +uint32_t MM2Q::Container::addBatch(It begin, It end) noexcept { + const auto currTime = static_cast