diff --git a/cachelib/allocator/BackgroundMover.h b/cachelib/allocator/BackgroundMover.h
index aee86a4e3..2b921ff56 100644
--- a/cachelib/allocator/BackgroundMover.h
+++ b/cachelib/allocator/BackgroundMover.h
@@ -16,9 +16,8 @@
 
 #pragma once
 
-#include "cachelib/allocator/BackgroundMoverStrategy.h"
+#include "cachelib/allocator/Cache.h"
 #include "cachelib/allocator/CacheStats.h"
-#include "cachelib/common/AtomicCounter.h"
 #include "cachelib/common/PeriodicWorker.h"
 
 namespace facebook::cachelib {
@@ -26,83 +25,139 @@ namespace facebook::cachelib {
 // needed for the cache api
 template <typename C>
 struct BackgroundMoverAPIWrapper {
-  static size_t traverseAndEvictItems(C& cache,
-                                      unsigned int pid,
-                                      unsigned int cid,
-                                      size_t batch) {
-    return cache.traverseAndEvictItems(pid, cid, batch);
+  // traverse the cache and move items from one tier to another
+  // @param cache             the cache interface
+  // @param tid               the tier to traverse
+  // @param pid               the pool id to traverse
+  // @param cid               the class id to traverse
+  // @param evictionBatch     number of items to evict in one go
+  // @param promotionBatch    number of items to promote in one go
+  // @return pair of number of items evicted and promoted
+  static std::pair<size_t, size_t> traverseAndMoveItems(C& cache,
+                                                        TierId tid,
+                                                        PoolId pid,
+                                                        ClassId cid,
+                                                        size_t evictionBatch,
+                                                        size_t promotionBatch) {
+    return cache.traverseAndMoveItems(tid, pid, cid, evictionBatch, promotionBatch);
   }
-
-  static size_t traverseAndPromoteItems(C& cache,
-                                        unsigned int pid,
-                                        unsigned int cid,
-                                        size_t batch) {
-    return cache.traverseAndPromoteItems(pid, cid, batch);
+  static std::pair<size_t, double> getApproxUsage(C& cache,
+                                                  TierId tid,
+                                                  PoolId pid,
+                                                  ClassId cid) {
+    const auto& pool = cache.getPoolByTid(pid, tid);
+    // we wait until all slabs are allocated before we start evicting
+    if (!pool.allSlabsAllocated()) {
+      return {0, 0.0};
+    }
+    return pool.getApproxUsage(cid);
+  }
+  static unsigned int getNumTiers(C& cache) {
+    return cache.getNumTiers();
   }
 };
 
-enum class MoverDir { Evict = 0, Promote };
-
 // Periodic worker that evicts items from tiers in batches
 // The primary aim is to reduce insertion times for new items in the
 // cache
 template <typename CacheT>
 class BackgroundMover : public PeriodicWorker {
  public:
+  using ClassBgStatsType =
+      std::map<MemoryDescriptorType, std::pair<size_t, size_t>>;
   using Cache = CacheT;
   // @param cache               the cache interface
-  // @param strategy            the stragey class that defines how objects are
-  // moved (promoted vs. evicted and how much)
+  // @param evictionBatch       number of items to evict in one go
+  // @param promotionBatch      number of items to promote in one go
+  // @param targetFree          target free percentage in the class
   BackgroundMover(Cache& cache,
-                  std::shared_ptr<BackgroundMoverStrategy> strategy,
-                  MoverDir direction_);
+                  size_t evictionBatch,
+                  size_t promotionBatch,
+                  double targetFree);
 
   ~BackgroundMover() override;
 
   BackgroundMoverStats getStats() const noexcept;
-  std::map<PoolId, std::map<ClassId, uint64_t>> getClassStats() const noexcept;
+  ClassBgStatsType getPerClassStats() const noexcept { return movesPerClass_; }
 
   void setAssignedMemory(std::vector<MemoryDescriptorType>&& assignedMemory);
 
   // return id of the worker responsible for promoting/evicting from particlar
   // pool and allocation calss (id is in range [0, numWorkers))
-  static size_t workerId(PoolId pid, ClassId cid, size_t numWorkers);
+  static size_t workerId(TierId tid, PoolId pid, ClassId cid, size_t numWorkers);
 
  private:
-  std::map<PoolId, std::map<ClassId, uint64_t>> movesPerClass_;
+  struct TraversalStats {
+    // record a traversal over all assigned classes
+    // and its time taken
+    void recordTraversalTime(uint64_t nsTaken);
+
+    uint64_t getAvgTraversalTimeNs(uint64_t numTraversals) const;
+    uint64_t getMinTraversalTimeNs() const { return minTraversalTimeNs_; }
+    uint64_t getMaxTraversalTimeNs() const { return maxTraversalTimeNs_; }
+    uint64_t getLastTraversalTimeNs() const { return lastTraversalTimeNs_; }
+
+   private:
+    // time it took us the last time to traverse the cache.
+    uint64_t lastTraversalTimeNs_{0};
+    uint64_t minTraversalTimeNs_{std::numeric_limits<uint64_t>::max()};
+    uint64_t maxTraversalTimeNs_{0};
+    uint64_t totalTraversalTimeNs_{0};
+  };
+
+  TraversalStats traversalStats_;
   // cache allocator's interface for evicting
   using Item = typename Cache::Item;
 
   Cache& cache_;
-  std::shared_ptr<BackgroundMoverStrategy> strategy_;
-  MoverDir direction_;
-
-  std::function<size_t(Cache&, unsigned int, unsigned int, size_t)> moverFunc;
+  uint8_t numTiers_{1}; // until we have multi-tier support
+  size_t evictionBatch_{0};
+  size_t promotionBatch_{0};
+  double targetFree_{0.03};
 
   // implements the actual logic of running the background evictor
   void work() override final;
   void checkAndRun();
 
-  AtomicCounter numMovedItems_{0};
-  AtomicCounter numTraversals_{0};
-  AtomicCounter totalBytesMoved_{0};
+  // populates the toFree map for each class with the number of items to free
+  std::map<MemoryDescriptorType, size_t> getNumItemsToFree(
+      const std::vector<MemoryDescriptorType>& assignedMemory);
+
+  uint64_t numEvictedItems_{0};
+  uint64_t numPromotedItems_{0};
+  uint64_t numTraversals_{0};
+
+  ClassBgStatsType movesPerClass_;
 
   std::vector<MemoryDescriptorType> assignedMemory_;
   folly::DistributedMutex mutex_;
 };
 
 template <typename CacheT>
-BackgroundMover<CacheT>::BackgroundMover(
-    Cache& cache,
-    std::shared_ptr<BackgroundMoverStrategy> strategy,
-    MoverDir direction)
-    : cache_(cache), strategy_(strategy), direction_(direction) {
-  if (direction_ == MoverDir::Evict) {
-    moverFunc = BackgroundMoverAPIWrapper<CacheT>::traverseAndEvictItems;
-
-  } else if (direction_ == MoverDir::Promote) {
-    moverFunc = BackgroundMoverAPIWrapper<CacheT>::traverseAndPromoteItems;
-  }
+BackgroundMover<CacheT>::BackgroundMover(Cache& cache,
+                                         size_t evictionBatch,
+                                         size_t promotionBatch,
+                                         double targetFree)
+    : cache_(cache),
+      evictionBatch_(evictionBatch),
+      promotionBatch_(promotionBatch),
+      targetFree_(targetFree) {
+        numTiers_ = BackgroundMoverAPIWrapper<CacheT>::getNumTiers(cache_);
+      }
+
+template <typename CacheT>
+void BackgroundMover<CacheT>::TraversalStats::recordTraversalTime(
+    uint64_t nsTaken) {
+  lastTraversalTimeNs_ = nsTaken;
+  minTraversalTimeNs_ = std::min(minTraversalTimeNs_, nsTaken);
+  maxTraversalTimeNs_ = std::max(maxTraversalTimeNs_, nsTaken);
+  totalTraversalTimeNs_ += nsTaken;
+}
+
+template <typename CacheT>
+uint64_t BackgroundMover<CacheT>::TraversalStats::getAvgTraversalTimeNs(
+    uint64_t numTraversals) const {
+  return numTraversals ? totalTraversalTimeNs_ / numTraversals : 0;
 }
 
 template <typename CacheT>
@@ -123,8 +178,8 @@ template <typename CacheT>
 void BackgroundMover<CacheT>::setAssignedMemory(
     std::vector<MemoryDescriptorType>&& assignedMemory) {
   XLOG(INFO, "Class assigned to background worker:");
-  for (auto [pid, cid] : assignedMemory) {
-    XLOGF(INFO, "Pid: {}, Cid: {}", pid, cid);
+  for (auto [tid, pid, cid] : assignedMemory) {
+    XLOGF(INFO, "Tid: {}, Pid: {}, Cid: {}", tid, pid, cid);
   }
 
   mutex_.lock_combine([this, &assignedMemory] {
@@ -132,57 +187,97 @@ void BackgroundMover<CacheT>::setAssignedMemory(
   });
 }
 
-// Look for classes that exceed the target memory capacity
-// and return those for eviction
+template <typename CacheT>
+std::map<MemoryDescriptorType, size_t>
+BackgroundMover<CacheT>::getNumItemsToFree(
+    const std::vector<MemoryDescriptorType>& assignedMemory) {
+  std::map<MemoryDescriptorType, size_t> toFree;
+  for (const auto& md : assignedMemory) {
+    const auto [tid, pid, cid] = md;
+    const auto& pool = cache_.getPool(pid);
+    const auto [activeItems, usage] =
+        BackgroundMoverAPIWrapper<CacheT>::getApproxUsage(cache_, tid, pid, cid);
+    if (usage < 1 - targetFree_) {
+      toFree[md] = 0;
+    } else {
+      size_t maxItems = activeItems / usage;
+      size_t targetItems = maxItems * (1 - targetFree_);
+      size_t toFreeItems =
+          activeItems > targetItems ? activeItems - targetItems : 0;
+      toFree[md] = toFreeItems;
+    }
+  }
+  return toFree;
+}
+
 template <typename CacheT>
 void BackgroundMover<CacheT>::checkAndRun() {
   auto assignedMemory = mutex_.lock_combine([this] { return assignedMemory_; });
-
-  unsigned int moves = 0;
-  auto batches = strategy_->calculateBatchSizes(cache_, assignedMemory);
-
-  for (size_t i = 0; i < batches.size(); i++) {
-    const auto [pid, cid] = assignedMemory[i];
-    const auto batch = batches[i];
-
-    if (batch == 0) {
-      continue;
+  auto toFree = getNumItemsToFree(assignedMemory); // calculate the number of
+                                                   // items to free
+  while (true) {
+    bool allDone = true;
+    for (auto md : assignedMemory) {
+      const auto [tid, pid, cid] = md;
+      size_t evictionBatch = evictionBatch_;
+      size_t promotionBatch = 0; // will enable with multi-tier support
+      if (toFree[md] == 0) {
+        // no eviction work to be done since there is already at least
+        // targetFree remaining in the class
+        evictionBatch = 0;
+      } else {
+        allDone = false; // we still have some items to free
+      }
+      if (promotionBatch + evictionBatch > 0) {
+        const auto begin = util::getCurrentTimeNs();
+        // try moving BATCH items from the class in order to reach free target
+        auto moved = BackgroundMoverAPIWrapper<CacheT>::traverseAndMoveItems(
+            cache_, tid, pid, cid, evictionBatch, promotionBatch);
+        numEvictedItems_ += moved.first;
+        toFree[md] > moved.first ? toFree[md] -= moved.first : toFree[md] = 0;
+        numPromotedItems_ += moved.second;
+        auto curr = movesPerClass_[md];
+        curr.first += moved.first;
+        curr.second += moved.second;
+        movesPerClass_[md] = curr;
+        numTraversals_++;
+        auto end = util::getCurrentTimeNs();
+        traversalStats_.recordTraversalTime(end > begin ? end - begin : 0);
+      }
+    }
+    if (shouldStopWork() || allDone) {
+      break;
     }
-
-    // try moving BATCH items from the class in order to reach free target
-    auto moved = moverFunc(cache_, pid, cid, batch);
-    moves += moved;
-    movesPerClass_[pid][cid] += moved;
-    totalBytesMoved_.add(moved * cache_.getPool(pid).getAllocSizes()[cid]);
   }
-
-  numTraversals_.inc();
-  numMovedItems_.add(moves);
 }
 
 template <typename CacheT>
 BackgroundMoverStats BackgroundMover<CacheT>::getStats() const noexcept {
   BackgroundMoverStats stats;
-  stats.numMovedItems = numMovedItems_.get();
-  stats.runCount = numTraversals_.get();
-  stats.totalBytesMoved = totalBytesMoved_.get();
+  stats.numEvictedItems = numEvictedItems_;
+  stats.numPromotedItems = numPromotedItems_;
+  stats.numTraversals = numTraversals_;
+  stats.runCount = getRunCount();
+  stats.avgItemsMoved =
+      (double)(stats.numEvictedItems + stats.numPromotedItems) /
+      (double)numTraversals_;
+  stats.lastTraversalTimeNs = traversalStats_.getLastTraversalTimeNs();
+  stats.avgTraversalTimeNs =
+      traversalStats_.getAvgTraversalTimeNs(numTraversals_);
+  stats.minTraversalTimeNs = traversalStats_.getMinTraversalTimeNs();
+  stats.maxTraversalTimeNs = traversalStats_.getMaxTraversalTimeNs();
 
   return stats;
 }
 
 template <typename CacheT>
-std::map<PoolId, std::map<ClassId, uint64_t>>
-BackgroundMover<CacheT>::getClassStats() const noexcept {
-  return movesPerClass_;
-}
-
-template <typename CacheT>
-size_t BackgroundMover<CacheT>::workerId(PoolId pid,
+size_t BackgroundMover<CacheT>::workerId(TierId tid,
+                                         PoolId pid,
                                          ClassId cid,
                                          size_t numWorkers) {
   XDCHECK(numWorkers);
 
   // TODO: came up with some better sharding (use hashing?)
-  return (pid + cid) % numWorkers;
+  return (tid + pid + cid) % numWorkers;
 }
-} // namespace facebook::cachelib
+}; // namespace facebook::cachelib
diff --git a/cachelib/allocator/BackgroundMoverStrategy.h b/cachelib/allocator/BackgroundMoverStrategy.h
deleted file mode 100644
index abf37edd1..000000000
--- a/cachelib/allocator/BackgroundMoverStrategy.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "cachelib/allocator/Cache.h"
-
-namespace facebook {
-namespace cachelib {
-
-struct MemoryDescriptorType {
-  MemoryDescriptorType(PoolId pid, ClassId cid) : pid_(pid), cid_(cid) {}
-  PoolId pid_;
-  ClassId cid_;
-};
-
-// Base class for background eviction strategy.
-class BackgroundMoverStrategy {
- public:
-  // Calculate how many items should be moved by the background mover
-  //
-  // @param cache   Cache allocator that implements CacheBase
-  // @param acVec   vector of memory descriptors for which batch sizes should
-  //                be calculated
-  //
-  // @return vector of batch sizes, where each element in the vector specifies
-  //         batch size for the memory descriptor in acVec
-  virtual std::vector<size_t> calculateBatchSizes(
-      const CacheBase& cache, std::vector<MemoryDescriptorType> acVec) = 0;
-
-  virtual ~BackgroundMoverStrategy() = default;
-};
-
-} // namespace cachelib
-} // namespace facebook
diff --git a/cachelib/allocator/CMakeLists.txt b/cachelib/allocator/CMakeLists.txt
index 6103cdc82..f94c8c90c 100644
--- a/cachelib/allocator/CMakeLists.txt
+++ b/cachelib/allocator/CMakeLists.txt
@@ -35,7 +35,6 @@ add_library (cachelib_allocator
     CCacheManager.cpp
     ContainerTypes.cpp
     FreeMemStrategy.cpp
-    FreeThresholdStrategy.cpp
     HitsPerSlabStrategy.cpp
     LruTailAgeStrategy.cpp
     MarginalHitsOptimizeStrategy.cpp
diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h
index e225ba8a0..d5d99800b 100644
--- a/cachelib/allocator/Cache.h
+++ b/cachelib/allocator/Cache.h
@@ -73,6 +73,25 @@ enum class DestructorContext {
   kRemovedFromNVM
 };
 
+// a tuple that describes the memory pool and allocation class
+struct MemoryDescriptorType {
+  MemoryDescriptorType(TierId tid, PoolId pid, ClassId cid) : 
+      tid_(tid), pid_(pid), cid_(cid) {}
+  TierId tid_;
+  PoolId pid_;
+  ClassId cid_;
+
+  bool operator<(const MemoryDescriptorType& rhs) const {
+    return std::make_tuple(tid_, pid_, cid_) < 
+        std::make_tuple(rhs.tid_, rhs.pid_, rhs.cid_);
+  }
+
+  bool operator==(const MemoryDescriptorType& rhs) const {
+    return std::make_tuple(tid_, pid_, cid_) == 
+        std::make_tuple(rhs.tid_, rhs.pid_, rhs.cid_);
+  }
+};
+
 // A base class of cache exposing members and status agnostic of template type.
 class CacheBase {
  public:
@@ -96,12 +115,24 @@ class CacheBase {
   // @param poolId    The pool id to query
   virtual const MemoryPool& getPool(PoolId poolId) const = 0;
 
+  // Get the reference to a memory pool using a tier id, for stats purposes
+  //
+  // @param poolId    The pool id to query
+  // @param tierId    The tier of the pool id
+  virtual const MemoryPool& getPoolByTid(PoolId poolId, TierId tid) const = 0;
+
   // Get Pool specific stats (regular pools). This includes stats from the
   // Memory Pool and also the cache.
   //
   // @param poolId   the pool id
   virtual PoolStats getPoolStats(PoolId poolId) const = 0;
 
+  // Get Allocation Class specific stats.
+  //
+  // @param poolId   the pool id
+  // @param classId   the class id
+  virtual ACStats getACStats(TierId tid,PoolId poolId, ClassId classId) const = 0;
+
   // @param poolId   the pool id
   virtual AllSlabReleaseEvents getAllSlabReleaseEvents(PoolId poolId) const = 0;
 
diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h
index 8238ae2fe..1854bad52 100644
--- a/cachelib/allocator/CacheAllocator.h
+++ b/cachelib/allocator/CacheAllocator.h
@@ -352,6 +352,43 @@ class CacheAllocator : public CacheBase {
     // if user-supplied SyncObj can fail. e.g. if a lock can timeout.
     virtual bool isValid() const { return true; }
   };
+
+  // For background worker stats
+  using ClassBgStatsType =
+      std::map<MemoryDescriptorType, std::pair<size_t, size_t>>;
+
+  // Movement (eviction/promotion) related data returned from
+  // function executed under mmContainer lock
+  struct MoveData {
+    MoveData() = delete;
+    MoveData(Item* candidate_,
+             Item* toRecycle_,
+             Item* toRecycleParent_,
+             bool chainedItem_,
+             bool expired_,
+             typename NvmCacheT::PutToken token_,
+             WriteHandle candidateHandle_)
+        : candidate(candidate_),
+          toRecycle(toRecycle_),
+          toRecycleParent(toRecycleParent_),
+          expired(expired_),
+          chainedItem(chainedItem_),
+          token(std::move(token_)),
+          candidateHandle(std::move(candidateHandle_)) {}
+
+    // item that is candidate for eviction
+    Item* candidate;
+    // acutal alloc that will be recycled
+    // back up to allocator
+    Item* toRecycle;
+    // possible parent ref
+    Item* toRecycleParent;
+    bool expired;                       // is item expired
+    bool chainedItem;                   // is it a chained item
+    typename NvmCacheT::PutToken token; // put token for NVM cache
+    WriteHandle candidateHandle;        // hande in case we don't use moving bit
+  };
+
   using ChainedItemMovingSync = std::function<std::unique_ptr<SyncObj>(Key)>;
 
   using AccessContainer = typename Item::AccessContainer;
@@ -363,6 +400,7 @@ class CacheAllocator : public CacheBase {
   using MMSerializationTypeContainer =
       typename MMType::SerializationTypeContainer;
   using AccessSerializationType = typename AccessType::SerializationType;
+  using AllocatorsSerializationType = serialization::MemoryAllocatorCollection;
 
   using ShmManager = facebook::cachelib::ShmManager;
 
@@ -712,10 +750,7 @@ class CacheAllocator : public CacheBase {
   uint32_t getUsableSize(const Item& item) const;
 
   // create memory assignment to bg workers
-  auto createBgWorkerMemoryAssignments(size_t numWorkers);
-
-  // whether bg worker should be woken
-  bool shouldWakeupBgEvictor(PoolId pid, ClassId cid);
+  auto createBgWorkerMemoryAssignments(size_t numWorkers, TierId tid);
 
   // Get a random item from memory
   // This is useful for profiling and sampling cachelib managed memory
@@ -835,7 +870,7 @@ class CacheAllocator : public CacheBase {
   // @param config    new config for the pool
   //
   // @throw std::invalid_argument if the poolId is invalid
-  void overridePoolConfig(PoolId pid, const MMConfig& config);
+  void overridePoolConfig(TierId tid, PoolId pid, const MMConfig& config);
 
   // update an existing pool's rebalance strategy
   //
@@ -876,8 +911,9 @@ class CacheAllocator : public CacheBase {
   // @return  true if the operation succeeded. false if the size of the pool is
   //          smaller than _bytes_
   // @throw   std::invalid_argument if the poolId is invalid.
+  // TODO: should call shrinkPool for specific tier?
   bool shrinkPool(PoolId pid, size_t bytes) {
-    return allocator_->shrinkPool(pid, bytes);
+    return allocator_[currentTier()]->shrinkPool(pid, bytes);
   }
 
   // grow an existing pool by _bytes_. This will fail if there is no
@@ -886,8 +922,9 @@ class CacheAllocator : public CacheBase {
   // @return    true if the pool was grown. false if the necessary number of
   //            bytes were not available.
   // @throw     std::invalid_argument if the poolId is invalid.
+  // TODO: should call growPool for specific tier?
   bool growPool(PoolId pid, size_t bytes) {
-    return allocator_->growPool(pid, bytes);
+    return allocator_[currentTier()]->growPool(pid, bytes);
   }
 
   // move bytes from one pool to another. The source pool should be at least
@@ -900,7 +937,7 @@ class CacheAllocator : public CacheBase {
   //          correct size to do the transfer.
   // @throw   std::invalid_argument if src or dest is invalid pool
   bool resizePools(PoolId src, PoolId dest, size_t bytes) override {
-    return allocator_->resizePools(src, dest, bytes);
+    return allocator_[currentTier()]->resizePools(src, dest, bytes);
   }
 
   // Add a new compact cache with given name and size
@@ -1083,25 +1120,12 @@ class CacheAllocator : public CacheBase {
   bool startNewReaper(std::chrono::milliseconds interval,
                       util::Throttler::Config reaperThrottleConfig);
 
-  // start background promoter, starting/stopping of this worker
-  // should not be done concurrently with addPool
-  // @param interval                the period this worker fires
-  // @param strategy                strategy to promote items
-  // @param threads                 number of threads used by the worker
-  bool startNewBackgroundPromoter(
-      std::chrono::milliseconds interval,
-      std::shared_ptr<BackgroundMoverStrategy> strategy,
-      size_t threads);
-
-  // start background evictor, starting/stopping of this worker
-  // should not be done concurrently with addPool
-  // @param interval                the period this worker fires
-  // @param strategy                strategy to evict items
-  // @param threads                 number of threads used by the worker
-  bool startNewBackgroundEvictor(
-      std::chrono::milliseconds interval,
-      std::shared_ptr<BackgroundMoverStrategy> strategy,
-      size_t threads);
+  // start background mover
+  bool startNewBackgroundMover(std::chrono::milliseconds interval,
+                               size_t evictionBatch,
+                               size_t promotionBatch,
+                               double targetFree,
+                               size_t threads);
 
   // Stop existing workers with a timeout
   bool stopPoolRebalancer(std::chrono::seconds timeout = std::chrono::seconds{
@@ -1111,10 +1135,8 @@ class CacheAllocator : public CacheBase {
                              0});
   bool stopMemMonitor(std::chrono::seconds timeout = std::chrono::seconds{0});
   bool stopReaper(std::chrono::seconds timeout = std::chrono::seconds{0});
-  bool stopBackgroundEvictor(
-      std::chrono::seconds timeout = std::chrono::seconds{0});
-  bool stopBackgroundPromoter(
-      std::chrono::seconds timeout = std::chrono::seconds{0});
+  bool stopBackgroundMover(std::chrono::seconds timeout = std::chrono::seconds{
+                               0});
 
   // Set pool optimization to either true or false
   //
@@ -1129,12 +1151,13 @@ class CacheAllocator : public CacheBase {
   // @throw std::invalid_argument if the memory does not belong to this
   //        cache allocator
   AllocInfo getAllocInfo(const void* memory) const {
-    return allocator_->getAllocInfo(memory);
+    return allocator_[getTierId(memory)]->getAllocInfo(memory);
   }
 
   // return the ids for the set of existing pools in this cache.
   std::set<PoolId> getPoolIds() const override final {
-    return allocator_->getPoolIds();
+    // all tiers have the same pool ids. TODO: deduplicate
+    return allocator_[0]->getPoolIds();
   }
 
   // return a list of pool ids that are backing compact caches. This includes
@@ -1146,18 +1169,22 @@ class CacheAllocator : public CacheBase {
 
   // return the pool with speicified id.
   const MemoryPool& getPool(PoolId pid) const override final {
-    return allocator_->getPool(pid);
+    return allocator_[currentTier()]->getPool(pid);
+  }
+
+  const MemoryPool& getPoolByTid(PoolId pid, TierId tid) const override final {
+    return allocator_[tid]->getPool(pid);
   }
 
   // calculate the number of slabs to be advised/reclaimed in each pool
   PoolAdviseReclaimData calcNumSlabsToAdviseReclaim() override final {
     auto regularPoolIds = getRegularPoolIds();
-    return allocator_->calcNumSlabsToAdviseReclaim(regularPoolIds);
+    return allocator_[currentTier()]->calcNumSlabsToAdviseReclaim(regularPoolIds);
   }
 
   // update number of slabs to advise in the cache
   void updateNumSlabsToAdvise(int32_t numSlabsToAdvise) override final {
-    allocator_->updateNumSlabsToAdvise(numSlabsToAdvise);
+    allocator_[currentTier()]->updateNumSlabsToAdvise(numSlabsToAdvise);
   }
 
   // returns a valid PoolId corresponding to the name or kInvalidPoolId if the
@@ -1165,8 +1192,9 @@ class CacheAllocator : public CacheBase {
   PoolId getPoolId(folly::StringPiece name) const noexcept;
 
   // returns the pool's name by its poolId.
-  std::string getPoolName(PoolId poolId) const override {
-    return allocator_->getPoolName(poolId);
+  std::string getPoolName(PoolId poolId) const {
+    // all tiers have the same pool names.
+    return allocator_[0]->getPoolName(poolId);
   }
 
   // get stats related to all kinds of slab release events.
@@ -1199,6 +1227,30 @@ class CacheAllocator : public CacheBase {
     return stats;
   }
 
+  // returns the background mover stats per thread
+  std::vector<BackgroundMoverStats> getBackgroundMoverStats() const {
+    auto stats = std::vector<BackgroundMoverStats>();
+    for (auto& bg : backgroundMover_) {
+      stats.push_back(bg->getStats());
+    }
+    return stats;
+  }
+
+  ClassBgStatsType getBackgroundMoverClassStats() const {
+    ClassBgStatsType stats;
+    auto record = [&](auto& bg) {
+      // gives a unique descriptor
+      auto classStats = bg->getPerClassStats();
+      for (const auto& [key, value] : classStats) {
+        stats[key] = value;
+      }
+    };
+    for (auto& bg : backgroundMover_) {
+      record(bg);
+    }
+    return stats;
+  }
+
   // returns the pool rebalancer stats
   RebalancerStats getRebalancerStats() const {
     auto stats =
@@ -1238,6 +1290,9 @@ class CacheAllocator : public CacheBase {
   // return cache's memory usage stats
   CacheMemoryStats getCacheMemoryStats() const override final;
 
+  // return stats for Allocation Class
+  ACStats getACStats(TierId tid, PoolId pid, ClassId cid) const override final;
+
   // return the nvm cache stats map
   util::StatsMap getNvmCacheStatsMap() const override final;
 
@@ -1443,11 +1498,14 @@ class CacheAllocator : public CacheBase {
 
   using MMContainerPtr = std::unique_ptr<MMContainer>;
   using MMContainers =
-      std::array<std::array<MMContainerPtr, MemoryAllocator::kMaxClasses>,
-                 MemoryPoolManager::kMaxPools>;
+      std::vector<std::array<std::array<MMContainerPtr, MemoryAllocator::kMaxClasses>,
+                 MemoryPoolManager::kMaxPools>>;
 
   void createMMContainers(const PoolId pid, MMConfig config);
 
+  TierId getTierId(const Item& item) const;
+  TierId getTierId(const void* ptr) const;
+
   // acquire the MMContainer corresponding to the the Item's class and pool.
   //
   // @return pointer to the MMContainer.
@@ -1455,7 +1513,12 @@ class CacheAllocator : public CacheBase {
   // allocation from the memory allocator.
   MMContainer& getMMContainer(const Item& item) const noexcept;
 
-  MMContainer& getMMContainer(PoolId pid, ClassId cid) const noexcept;
+  MMContainer& getMMContainer(TierId tid, PoolId pid, ClassId cid) const noexcept;
+
+  // Get stats of the specified pid and cid.
+  // If such mmcontainer is not valid (pool id or cid out of bound)
+  // or the mmcontainer is not initialized, return an empty stat.
+  MMContainerStat getMMContainerStat(TierId tid, PoolId pid, ClassId cid) const noexcept;
 
   // create a new cache allocation. The allocation can be initialized
   // appropriately and made accessible through insert or insertOrReplace.
@@ -1487,7 +1550,25 @@ class CacheAllocator : public CacheBase {
                                uint32_t size,
                                uint32_t creationTime,
                                uint32_t expiryTime,
-                               bool fromBgThread = false);
+                               bool fromBgThread);
+
+  // create a new cache allocation on specific memory tier.
+  // For description see allocateInternal.
+  //
+  // @param tid id a memory tier
+  // @param fromBgThread whether this function was called from a bg
+  //        thread - this is used to decide whether bg thread should
+  //        be waken in case there is no free memory
+  // @param evict whether to evict an item from tier tid in case there
+  //        is not enough memory
+  WriteHandle allocateInternalTier(TierId tid,
+                                   PoolId id,
+                                   Key key,
+                                   uint32_t size,
+                                   uint32_t creationTime,
+                                   uint32_t expiryTime,
+                                   bool fromBgThread,
+                                   bool evict);
 
   // Allocate a chained item
   //
@@ -1566,6 +1647,15 @@ class CacheAllocator : public CacheBase {
   //              not exist.
   FOLLY_ALWAYS_INLINE WriteHandle findFastImpl(Key key, AccessMode mode);
 
+  // Moves a regular item to a different memory tier.
+  //
+  // @param oldItem     Reference to the item being moved
+  // @param newItemHdl  Reference to the handle of the new item being moved into
+  //
+  // @return true  If the move was completed, and the containers were updated
+  //               successfully.
+  bool moveRegularItemOnEviction(Item& oldItem, WriteHandle& newItemHdl);
+
   // Moves a regular item to a different slab. This should only be used during
   // slab release after the item's exclusive bit has been set. The user supplied
   // callback is responsible for copying the contents and fixing the semantics
@@ -1573,10 +1663,15 @@ class CacheAllocator : public CacheBase {
   //
   // @param oldItem     Reference to the item being moved
   // @param newItemHdl  Reference to the handle of the new item being moved into
+  // @param skipAddInMMContainer so we can tell if we should add in mmContainer
+  // or wait
+  //                     to do in batch
   //
   // @return true  If the move was completed, and the containers were updated
   //               successfully.
-  bool moveRegularItem(Item& oldItem, WriteHandle& newItemHdl);
+  bool moveRegularItem(Item& oldItem,
+                       WriteHandle& newItemHdl,
+                       bool skipAddInMMContainer = false);
 
   // template class for viewAsChainedAllocs that takes either ReadHandle or
   // WriteHandle
@@ -1743,15 +1838,17 @@ class CacheAllocator : public CacheBase {
   // Implementation to find a suitable eviction from the container. The
   // two parameters together identify a single container.
   //
+  // @param  tid  the id of the tier to look for evictions inside
   // @param  pid  the id of the pool to look for evictions inside
   // @param  cid  the id of the class to look for evictions inside
   // @return An evicted item or nullptr  if there is no suitable candidate found
   // within the configured number of attempts.
-  Item* findEviction(PoolId pid, ClassId cid);
+  Item* findEviction(TierId tid, PoolId pid, ClassId cid);
 
   // Get next eviction candidate from MMContainer, remove from AccessContainer,
   // MMContainer and insert into NVMCache if enabled.
   //
+  // @param tid  the id of the tier to look for evictions inside
   // @param pid  the id of the pool to look for evictions inside
   // @param cid  the id of the class to look for evictions inside
   // @param searchTries number of search attempts so far.
@@ -1759,9 +1856,16 @@ class CacheAllocator : public CacheBase {
   // @return pair of [candidate, toRecycle]. Pair of null if reached the end of
   // the eviction queue or no suitable candidate found
   // within the configured number of attempts
-  std::pair<Item*, Item*> getNextCandidate(PoolId pid,
+  std::pair<Item*, Item*> getNextCandidate(TierId tid,
+                                           PoolId pid,
                                            ClassId cid,
                                            unsigned int& searchTries);
+  // similiar to the above method but returns a batch of evicted items
+  // as a pair of vectors
+  std::vector<MoveData> getNextCandidates(TierId tid,
+                                          PoolId pid,
+                                          ClassId cid,
+                                          uint32_t batch);
 
   using EvictionIterator = typename MMContainer::LockedIterator;
 
@@ -1790,7 +1894,7 @@ class CacheAllocator : public CacheBase {
       const typename Item::PtrCompressor& compressor);
 
   unsigned int reclaimSlabs(PoolId id, size_t numSlabs) final {
-    return allocator_->reclaimSlabsAndGrow(id, numSlabs);
+    return allocator_[currentTier()]->reclaimSlabsAndGrow(id, numSlabs);
   }
 
   FOLLY_ALWAYS_INLINE EventTracker* getEventTracker() const {
@@ -1849,7 +1953,7 @@ class CacheAllocator : public CacheBase {
                    const void* hint = nullptr) final;
 
   // @param releaseContext  slab release context
-  void releaseSlabImpl(const SlabReleaseContext& releaseContext);
+  void releaseSlabImpl(TierId tid, const SlabReleaseContext& releaseContext);
 
   // @return  true when successfully marked as moving,
   //          fasle when this item has already been freed
@@ -1892,24 +1996,53 @@ class CacheAllocator : public CacheBase {
     // primitives. So we consciously exempt ourselves here from TSAN data race
     // detection.
     folly::annotate_ignore_thread_sanitizer_guard g(__FILE__, __LINE__);
-    auto slabsSkipped = allocator_->forEachAllocation(std::forward<Fn>(f));
+    auto slabsSkipped = allocator_[currentTier()]->forEachAllocation(std::forward<Fn>(f));
     stats().numReaperSkippedSlabs.add(slabsSkipped);
   }
 
   // exposed for the background evictor to iterate through the memory and evict
   // in batch. This should improve insertion path for tiered memory config
-  size_t traverseAndEvictItems(unsigned int /* pid */,
-                               unsigned int /* cid */,
-                               size_t /* batch */) {
-    throw std::runtime_error("Not supported yet!");
-  }
-
-  // exposed for the background promoter to iterate through the memory and
-  // promote in batch. This should improve find latency
-  size_t traverseAndPromoteItems(unsigned int /* pid */,
-                                 unsigned int /* cid */,
-                                 size_t /* batch */) {
-    throw std::runtime_error("Not supported yet!");
+  // promotion batch only applies to tiered memory config
+  std::pair<size_t, size_t> traverseAndMoveItems(TierId tid,
+                                                 PoolId pid,
+                                                 ClassId cid,
+                                                 size_t evictionBatch,
+                                                 size_t promotionBatch) {
+    auto& mmContainer = getMMContainer(tid, pid, cid);
+    uint32_t currItems = mmContainer.size();
+    if (currItems < evictionBatch) {
+      evictionBatch = currItems;
+    }
+    if (evictionBatch == 0) {
+      return {0, 0};
+    }
+    auto evictionData = getNextCandidates(tid, pid, cid, evictionBatch);
+    // we now have a list of candidates and toRecycles, they should go back
+    // to the allocator and we will do this in batch to avoid AC lock contention
+    // note - for chained items - we can't do this in bulk
+    std::vector<size_t> chainedIdx;
+    std::vector<Item*> toRecycles;
+    size_t idx = 0;
+    for (auto& data : evictionData) {
+      if (data.chainedItem) {
+        XDCHECK(data.candidate->hasChainedItem());
+        chainedIdx.push_back(idx);
+      } else {
+        XDCHECK_EQ(data.candidate, data.toRecycle);
+        toRecycles.push_back(data.candidate);
+      }
+      idx++;
+    }
+    for (int i = 0; i < chainedIdx.size(); i++) {
+      auto& data = evictionData[chainedIdx[i]];
+      releaseBackToAllocator(*data.candidate, RemoveContext::kNormal, false,
+                             data.toRecycle);
+      (*stats_.chainedItemEvictions)[pid][cid].inc();
+    }
+    allocator_[tid]->freeBatch(toRecycles.begin(), toRecycles.end(), pid, cid);
+    size_t evictions = toRecycles.size();
+    (*stats_.regularItemEvictions)[pid][cid].add(evictions);
+    return {evictions, 0};
   }
 
   // returns true if nvmcache is enabled and we should write this item to
@@ -1952,10 +2085,10 @@ class CacheAllocator : public CacheBase {
                   std::unique_ptr<T>& worker,
                   std::chrono::seconds timeout = std::chrono::seconds{0});
 
-  ShmSegmentOpts createShmCacheOpts();
-  std::unique_ptr<MemoryAllocator> createNewMemoryAllocator();
-  std::unique_ptr<MemoryAllocator> restoreMemoryAllocator();
-  std::unique_ptr<CCacheManager> restoreCCacheManager();
+  ShmSegmentOpts createShmCacheOpts(TierId tid);
+  std::unique_ptr<MemoryAllocator> createNewMemoryAllocator(TierId tid);
+  std::unique_ptr<MemoryAllocator> restoreMemoryAllocator(TierId tid, const serialization::MemoryAllocatorObject& sAllocator);
+  std::unique_ptr<CCacheManager> restoreCCacheManager(TierId tid);
 
   PoolIds filterCompactCachePools(const PoolIds& poolIds) const;
 
@@ -1996,8 +2129,7 @@ class CacheAllocator : public CacheBase {
   }
 
   typename Item::PtrCompressor createPtrCompressor() const {
-    return allocator_
-        ->createPtrCompressor<Item, typename Item::CompressedPtrType>();
+    return typename Item::PtrCompressor{allocator_};
   }
 
   // helper utility to throttle and optionally log.
@@ -2020,9 +2152,14 @@ class CacheAllocator : public CacheBase {
 
   // @param type        the type of initialization
   // @return nullptr if the type is invalid
-  // @return pointer to memory allocator
+  // @return vector of pointers to memory allocator
   // @throw std::runtime_error if type is invalid
-  std::unique_ptr<MemoryAllocator> initAllocator(InitMemType type);
+  std::vector<std::unique_ptr<MemoryAllocator>> initAllocator(InitMemType type);
+
+  std::vector<std::unique_ptr<MemoryAllocator>> createPrivateAllocator();
+  std::vector<std::unique_ptr<MemoryAllocator>> createAllocators();
+  std::vector<std::unique_ptr<MemoryAllocator>> restoreAllocators();
+
   // @param type        the type of initialization
   // @return nullptr if the type is invalid
   // @return pointer to access container
@@ -2087,44 +2224,6 @@ class CacheAllocator : public CacheBase {
                      : false;
   }
 
-  // returns the background mover stats
-  BackgroundMoverStats getBackgroundMoverStats(MoverDir direction) const {
-    auto stats = BackgroundMoverStats{};
-    if (direction == MoverDir::Evict) {
-      for (auto& bg : backgroundEvictor_)
-        stats += bg->getStats();
-    } else if (direction == MoverDir::Promote) {
-      for (auto& bg : backgroundPromoter_)
-        stats += bg->getStats();
-    }
-    return stats;
-  }
-
-  std::map<PoolId, std::map<ClassId, uint64_t>> getBackgroundMoverClassStats(
-      MoverDir direction) const {
-    std::map<PoolId, std::map<ClassId, uint64_t>> stats;
-
-    if (direction == MoverDir::Evict) {
-      for (auto& bg : backgroundEvictor_) {
-        for (auto& pid : bg->getClassStats()) {
-          for (auto& cid : pid.second) {
-            stats[pid.first][cid.first] += cid.second;
-          }
-        }
-      }
-    } else if (direction == MoverDir::Promote) {
-      for (auto& bg : backgroundPromoter_) {
-        for (auto& pid : bg->getClassStats()) {
-          for (auto& cid : pid.second) {
-            stats[pid.first][cid.first] += cid.second;
-          }
-        }
-      }
-    }
-
-    return stats;
-  }
-
   bool tryGetHandleWithWaitContextForMovingItem(Item& item,
                                                 WriteHandle& handle);
 
@@ -2207,6 +2306,19 @@ class CacheAllocator : public CacheBase {
 
   // BEGIN private members
 
+  TierId currentTier() const {
+    // TODO: every function which calls this method should be refactored.
+    // We should go case by case and either make such function work on
+    // all tiers or expose separate parameter to describe the tier ID.
+    return 0;
+  }
+
+  unsigned getNumTiers() const {
+    return config_.memoryTierConfigs.size();
+  }
+
+  size_t memoryTierSize(TierId tid) const;
+
   // Whether the memory allocator for this cache allocator was created on shared
   // memory. The hash table, chained item hash table etc is also created on
   // shared memory except for temporary shared memory mode when they're created
@@ -2232,9 +2344,10 @@ class CacheAllocator : public CacheBase {
   const MMConfig mmConfig_{};
 
   // the memory allocator for allocating out of the available memory.
-  std::unique_ptr<MemoryAllocator> allocator_;
+  std::vector<std::unique_ptr<MemoryAllocator>> allocator_;
 
   // compact cache allocator manager
+  // TODO: per tier?
   std::unique_ptr<CCacheManager> compactCacheManager_;
 
   // compact cache instances reside here when user "add" or "attach" compact
@@ -2285,9 +2398,8 @@ class CacheAllocator : public CacheBase {
   // free memory monitor
   std::unique_ptr<MemoryMonitor> memMonitor_;
 
-  // background evictor
-  std::vector<std::unique_ptr<BackgroundMover<CacheT>>> backgroundEvictor_;
-  std::vector<std::unique_ptr<BackgroundMover<CacheT>>> backgroundPromoter_;
+  // background data movement, for single tier, this just evicts
+  std::vector<std::unique_ptr<BackgroundMover<CacheT>>> backgroundMover_;
 
   // check whether a pool is a slabs pool
   std::array<bool, MemoryPoolManager::kMaxPools> isCompactCachePool_{};
@@ -2441,12 +2553,12 @@ CacheAllocator<CacheTrait>::CacheAllocator(
                     : serialization::CacheAllocatorMetadata{}},
       allocator_(initAllocator(type)),
       compactCacheManager_(type != InitMemType::kMemAttach
-                               ? std::make_unique<CCacheManager>(*allocator_)
-                               : restoreCCacheManager()),
+                               ? std::make_unique<CCacheManager>(*allocator_[0] /* TODO: per tier */)
+                               : restoreCCacheManager(0/* TODO: per tier */)),
       compressor_(createPtrCompressor()),
       mmContainers_(type == InitMemType::kMemAttach
                         ? deserializeMMContainers(*deserializer_, compressor_)
-                        : MMContainers{}),
+                        : MMContainers{getNumTiers()}),
       accessContainer_(initAccessContainer(
           type, detail::kShmHashTableName, config.accessConfig)),
       chainedItemAccessContainer_(
@@ -2481,48 +2593,102 @@ CacheAllocator<CacheTrait>::~CacheAllocator() {
 }
 
 template <typename CacheTrait>
-ShmSegmentOpts CacheAllocator<CacheTrait>::createShmCacheOpts() {
+ShmSegmentOpts CacheAllocator<CacheTrait>::createShmCacheOpts(TierId tid) {
   ShmSegmentOpts opts;
   opts.alignment = sizeof(Slab);
   // TODO: we support single tier so far
-  if (config_.memoryTierConfigs.size() > 1) {
-    throw std::invalid_argument("CacheLib only supports a single memory tier");
+  if (config_.memoryTierConfigs.size() > 2) {
+    throw std::invalid_argument("CacheLib only supports two memory tiers");
   }
-  opts.memBindNumaNodes = config_.memoryTierConfigs[0].getMemBind();
+  opts.memBindNumaNodes = config_.memoryTierConfigs[tid].getMemBind();
   return opts;
 }
 
+template <typename CacheTrait>
+size_t CacheAllocator<CacheTrait>::memoryTierSize(TierId tid) const {
+  auto partitions = std::accumulate(config_.memoryTierConfigs.begin(), config_.memoryTierConfigs.end(), 0UL,
+  [](const size_t i, const MemoryTierCacheConfig& config){
+    return i + config.getRatio();
+  });
+
+  return config_.memoryTierConfigs[tid].calculateTierSize(config_.getCacheSize(), partitions);
+}
+
+template <typename CacheTrait>
+std::vector<std::unique_ptr<MemoryAllocator>>
+CacheAllocator<CacheTrait>::createPrivateAllocator() {
+  std::vector<std::unique_ptr<MemoryAllocator>> allocators;
+
+  if (isOnShm_) {
+    allocators.emplace_back(std::make_unique<MemoryAllocator>(
+                            getAllocatorConfig(config_),
+                            tempShm_->getAddr(),
+                            config_.getCacheSize()));
+  } else {
+    allocators.emplace_back(std::make_unique<MemoryAllocator>(
+                            getAllocatorConfig(config_),
+                            config_.getCacheSize()));
+  }
+
+  return allocators;
+}
+
 template <typename CacheTrait>
 std::unique_ptr<MemoryAllocator>
-CacheAllocator<CacheTrait>::createNewMemoryAllocator() {
+CacheAllocator<CacheTrait>::createNewMemoryAllocator(TierId tid) {
+  size_t tierSize = memoryTierSize(tid);
   return std::make_unique<MemoryAllocator>(
       getAllocatorConfig(config_),
       shmManager_
-          ->createShm(detail::kShmCacheName, config_.getCacheSize(),
-                      config_.slabMemoryBaseAddr, createShmCacheOpts())
+          ->createShm(detail::kShmCacheName + std::to_string(tid),
+                      tierSize, config_.slabMemoryBaseAddr,
+                      createShmCacheOpts(tid))
           .addr,
-      config_.getCacheSize());
+      tierSize);
 }
 
 template <typename CacheTrait>
 std::unique_ptr<MemoryAllocator>
-CacheAllocator<CacheTrait>::restoreMemoryAllocator() {
+CacheAllocator<CacheTrait>::restoreMemoryAllocator(TierId tid,
+        const serialization::MemoryAllocatorObject& sAllocator) {
   return std::make_unique<MemoryAllocator>(
-      deserializer_->deserialize<MemoryAllocator::SerializationType>(),
+      sAllocator,
       shmManager_
-          ->attachShm(detail::kShmCacheName, config_.slabMemoryBaseAddr,
-                      createShmCacheOpts())
-          .addr,
-      config_.getCacheSize(),
+          ->attachShm(detail::kShmCacheName + std::to_string(tid),
+            config_.slabMemoryBaseAddr, createShmCacheOpts(tid)).addr,
+      memoryTierSize(tid),
       config_.disableFullCoredump);
 }
 
+template <typename CacheTrait>
+std::vector<std::unique_ptr<MemoryAllocator>>
+CacheAllocator<CacheTrait>::createAllocators() {
+  std::vector<std::unique_ptr<MemoryAllocator>> allocators;
+  for (int tid = 0; tid < getNumTiers(); tid++) {
+    allocators.emplace_back(createNewMemoryAllocator(tid));
+  }
+  return allocators;
+}
+
+template <typename CacheTrait>
+std::vector<std::unique_ptr<MemoryAllocator>>
+CacheAllocator<CacheTrait>::restoreAllocators() {
+  std::vector<std::unique_ptr<MemoryAllocator>> allocators;
+  const auto allocatorCollection  =
+      deserializer_->deserialize<AllocatorsSerializationType>();
+  auto allocMap = *allocatorCollection.allocators();
+  for (int tid = 0; tid < getNumTiers(); tid++) {
+    allocators.emplace_back(restoreMemoryAllocator(tid,allocMap[tid]));
+  }
+  return allocators;
+}
+
 template <typename CacheTrait>
 std::unique_ptr<CCacheManager>
-CacheAllocator<CacheTrait>::restoreCCacheManager() {
+CacheAllocator<CacheTrait>::restoreCCacheManager(TierId tid) {
   return std::make_unique<CCacheManager>(
       deserializer_->deserialize<CCacheManager::SerializationType>(),
-      *allocator_);
+      *allocator_[tid]);
 }
 
 template <typename CacheTrait>
@@ -2612,35 +2778,25 @@ void CacheAllocator<CacheTrait>::initWorkers() {
                           config_.ccacheOptimizeStepSizePercent);
   }
 
-  if (config_.backgroundEvictorEnabled()) {
-    startNewBackgroundEvictor(config_.backgroundEvictorInterval,
-                              config_.backgroundEvictorStrategy,
-                              config_.backgroundEvictorThreads);
-  }
-
-  if (config_.backgroundPromoterEnabled()) {
-    startNewBackgroundPromoter(config_.backgroundPromoterInterval,
-                               config_.backgroundPromoterStrategy,
-                               config_.backgroundPromoterThreads);
+  if (config_.backgroundMoverEnabled()) {
+    startNewBackgroundMover(config_.backgroundMoverInterval,
+                            config_.backgroundEvictionBatch,
+                            config_.backgroundPromotionBatch,
+                            config_.backgroundTargetFree,
+                            config_.backgroundMoverThreads);
   }
 }
 
 template <typename CacheTrait>
-std::unique_ptr<MemoryAllocator> CacheAllocator<CacheTrait>::initAllocator(
+std::vector<std::unique_ptr<MemoryAllocator>>
+CacheAllocator<CacheTrait>::initAllocator(
     InitMemType type) {
   if (type == InitMemType::kNone) {
-    if (isOnShm_ == true) {
-      return std::make_unique<MemoryAllocator>(getAllocatorConfig(config_),
-                                               tempShm_->getAddr(),
-                                               config_.getCacheSize());
-    } else {
-      return std::make_unique<MemoryAllocator>(getAllocatorConfig(config_),
-                                               config_.getCacheSize());
-    }
+    return createPrivateAllocator();
   } else if (type == InitMemType::kMemNew) {
-    return createNewMemoryAllocator();
+    return createAllocators();
   } else if (type == InitMemType::kMemAttach) {
-    return restoreMemoryAllocator();
+    return restoreAllocators();
   }
 
   // Invalid type
@@ -2704,23 +2860,19 @@ CacheAllocator<CacheTrait>::allocate(PoolId poolId,
     creationTime = util::getCurrentTimeSec();
   }
   return allocateInternal(poolId, key, size, creationTime,
-                          ttlSecs == 0 ? 0 : creationTime + ttlSecs);
-}
-
-template <typename CacheTrait>
-bool CacheAllocator<CacheTrait>::shouldWakeupBgEvictor(PoolId /* pid */,
-                                                       ClassId /* cid */) {
-  return false;
+                          ttlSecs == 0 ? 0 : creationTime + ttlSecs, false);
 }
 
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::WriteHandle
-CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
-                                             typename Item::Key key,
-                                             uint32_t size,
-                                             uint32_t creationTime,
-                                             uint32_t expiryTime,
-                                             bool fromBgThread) {
+CacheAllocator<CacheTrait>::allocateInternalTier(TierId tid,
+                                                 PoolId pid,
+                                                 typename Item::Key key,
+                                                 uint32_t size,
+                                                 uint32_t creationTime,
+                                                 uint32_t expiryTime,
+                                                 bool fromBgThread,
+                                                 bool evict) {
   util::LatencyTracker tracker{stats().allocateLatency_};
 
   SCOPE_FAIL { stats_.invalidAllocs.inc(); };
@@ -2729,21 +2881,18 @@ CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
   const auto requiredSize = Item::getRequiredSize(key, size);
 
   // the allocation class in our memory allocator.
-  const auto cid = allocator_->getAllocationClassId(pid, requiredSize);
+  const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize);
 
+  // TODO: per-tier
   (*stats_.allocAttempts)[pid][cid].inc();
 
-  void* memory = allocator_->allocate(pid, requiredSize);
-
-  if (backgroundEvictor_.size() && !fromBgThread &&
-      (memory == nullptr || shouldWakeupBgEvictor(pid, cid))) {
-    backgroundEvictor_[BackgroundMover<CacheT>::workerId(
-                           pid, cid, backgroundEvictor_.size())]
-        ->wakeUp();
-  }
+  void* memory = allocator_[tid]->allocate(pid, requiredSize);
 
   if (memory == nullptr) {
-    memory = findEviction(pid, cid);
+    if (!evict) {
+      return {};
+    }
+    memory = findEviction(tid, pid, cid);
   }
 
   WriteHandle handle;
@@ -2754,7 +2903,7 @@ CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
     // for example.
     SCOPE_FAIL {
       // free back the memory to the allocator since we failed.
-      allocator_->free(memory);
+      allocator_[tid]->free(memory);
     };
 
     handle = acquire(new (memory) Item(key, size, creationTime, expiryTime));
@@ -2765,7 +2914,7 @@ CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
     }
 
   } else { // failed to allocate memory.
-    (*stats_.allocFailures)[pid][cid].inc();
+    (*stats_.allocFailures)[pid][cid].inc(); // TODO: per-tier
     // wake up rebalancer
     if (!config_.poolRebalancerDisableForcedWakeUp && poolRebalancer_) {
       poolRebalancer_->wakeUp();
@@ -2782,6 +2931,24 @@ CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
   return handle;
 }
 
+template <typename CacheTrait>
+typename CacheAllocator<CacheTrait>::WriteHandle
+CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
+                                             typename Item::Key key,
+                                             uint32_t size,
+                                             uint32_t creationTime,
+                                             uint32_t expiryTime,
+                                             bool fromBgThread) {
+  auto tid = 0; /* TODO: consult admission policy */
+  for(TierId tid = 0; tid < getNumTiers(); ++tid) {
+    bool evict = !config_.insertToFirstFreeTier || tid == getNumTiers() - 1;
+    auto handle = allocateInternalTier(tid, pid, key, size, creationTime,
+                                       expiryTime, fromBgThread, evict);
+    if (handle) return handle;
+  }
+  return {};
+}
+
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::WriteHandle
 CacheAllocator<CacheTrait>::allocateChainedItem(const ReadHandle& parent,
@@ -2811,22 +2978,30 @@ CacheAllocator<CacheTrait>::allocateChainedItemInternal(const Item& parent,
 
   // number of bytes required for this item
   const auto requiredSize = ChainedItem::getRequiredSize(size);
-
-  const auto pid = allocator_->getAllocInfo(parent.getMemory()).poolId;
-  const auto cid = allocator_->getAllocationClassId(pid, requiredSize);
-
+  
+  // this is correct for now as we can
+  // assume the parent and chained item
+  // will reside in the same tier until 
+  // they are moved
+  auto tid = getTierId(parent);
+
+  const auto pid = allocator_[tid]->getAllocInfo(parent.getMemory()).poolId;
+  const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize);
+
+  // TODO: per-tier? Right now stats_ are not used in any public periodic
+  // worker
   (*stats_.allocAttempts)[pid][cid].inc();
 
-  void* memory = allocator_->allocate(pid, requiredSize);
+  void* memory = allocator_[tid]->allocate(pid, requiredSize);
   if (memory == nullptr) {
-    memory = findEviction(pid, cid);
+    memory = findEviction(tid, pid, cid);
   }
   if (memory == nullptr) {
     (*stats_.allocFailures)[pid][cid].inc();
     return WriteHandle{};
   }
 
-  SCOPE_FAIL { allocator_->free(memory); };
+  SCOPE_FAIL { allocator_[tid]->free(memory); };
 
   auto child = acquire(new (memory) ChainedItem(
       compressor_.compress(&parent), size, util::getCurrentTimeSec()));
@@ -3160,8 +3335,8 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
     throw std::runtime_error(
         folly::sformat("cannot release this item: {}", it.toString()));
   }
-
-  const auto allocInfo = allocator_->getAllocInfo(it.getMemory());
+  const auto tid = getTierId(it);
+  const auto allocInfo = allocator_[tid]->getAllocInfo(it.getMemory());
 
   if (ctx == RemoveContext::kEviction) {
     const auto timeNow = util::getCurrentTimeSec();
@@ -3185,8 +3360,7 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
           folly::sformat("Can not recycle a chained item {}, toRecyle",
                          it.toString(), toRecycle->toString()));
     }
-
-    allocator_->free(&it);
+    allocator_[tid]->free(&it);
     return ReleaseRes::kReleased;
   }
 
@@ -3255,7 +3429,7 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
       auto next = head->getNext(compressor_);
 
       const auto childInfo =
-          allocator_->getAllocInfo(static_cast<const void*>(head));
+          allocator_[tid]->getAllocInfo(static_cast<const void*>(head));
       (*stats_.fragmentationSize)[childInfo.poolId][childInfo.classId].sub(
           util::getFragmentation(*this, *head));
 
@@ -3271,7 +3445,7 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
         XDCHECK(ReleaseRes::kReleased != res);
         res = ReleaseRes::kRecycled;
       } else {
-        allocator_->free(head);
+        allocator_[tid]->free(head);
       }
 
       stats_.numChainedChildItems.dec();
@@ -3285,7 +3459,7 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
     res = ReleaseRes::kRecycled;
   } else {
     XDCHECK(it.isDrained());
-    allocator_->free(&it);
+    allocator_[tid]->free(&it);
   }
 
   return res;
@@ -3575,7 +3749,8 @@ void CacheAllocator<CacheTrait>::wakeUpWaiters(folly::StringPiece key,
 
 template <typename CacheTrait>
 bool CacheAllocator<CacheTrait>::moveRegularItem(Item& oldItem,
-                                                 WriteHandle& newItemHdl) {
+                                                 WriteHandle& newItemHdl,
+                                                 bool skipAddInMMContainer) {
   XDCHECK(oldItem.isMoving());
   // If an item is expired, proceed to eviction.
   if (oldItem.isExpired()) {
@@ -3605,8 +3780,12 @@ bool CacheAllocator<CacheTrait>::moveRegularItem(Item& oldItem,
   // Adding the item to mmContainer has to succeed since no one can remove the
   // item
   auto& newContainer = getMMContainer(*newItemHdl);
-  auto mmContainerAdded = newContainer.add(*newItemHdl);
-  XDCHECK(mmContainerAdded);
+  if (!skipAddInMMContainer) {
+    // Adding the item to mmContainer has to succeed since no one can remove the
+    // item
+    auto mmContainerAdded = newContainer.add(*newItemHdl);
+    XDCHECK(mmContainerAdded);
+  }
 
   if (oldItem.hasChainedItem()) {
     XDCHECK(!newItemHdl->hasChainedItem()) << newItemHdl->toString();
@@ -3700,13 +3879,14 @@ void CacheAllocator<CacheTrait>::unlinkItemForEviction(Item& it) {
 template <typename CacheTrait>
 std::pair<typename CacheAllocator<CacheTrait>::Item*,
           typename CacheAllocator<CacheTrait>::Item*>
-CacheAllocator<CacheTrait>::getNextCandidate(PoolId pid,
+CacheAllocator<CacheTrait>::getNextCandidate(TierId tid,
+                                             PoolId pid,
                                              ClassId cid,
                                              unsigned int& searchTries) {
   typename NvmCacheT::PutToken token;
   Item* toRecycle = nullptr;
   Item* candidate = nullptr;
-  auto& mmContainer = getMMContainer(pid, cid);
+  auto& mmContainer = getMMContainer(tid, pid, cid);
 
   mmContainer.withEvictionIterator([this, pid, cid, &candidate, &toRecycle,
                                     &searchTries, &mmContainer,
@@ -3808,15 +3988,127 @@ CacheAllocator<CacheTrait>::getNextCandidate(PoolId pid,
   return {candidate, toRecycle};
 }
 
+// Used for the background movers to get a batch of items
+// to move/evict
+template <typename CacheTrait>
+std::vector<typename CacheAllocator<CacheTrait>::MoveData>
+CacheAllocator<CacheTrait>::getNextCandidates(TierId tid,
+                                              PoolId pid,
+                                              ClassId cid,
+                                              uint32_t batch) {
+  std::vector<MoveData> evictionData;
+  evictionData.reserve(batch);
+
+  auto& mmContainer = getMMContainer(tid, pid, cid);
+  unsigned int maxSearchTries =
+      std::max(config_.evictionSearchTries, batch * 2);
+
+  mmContainer.withEvictionIterator([this, tid, pid, cid, batch, maxSearchTries,
+                                    &evictionData, &mmContainer](auto&& itr) {
+    unsigned int searchTries = 0;
+    if (!itr) {
+      ++searchTries;
+      (*stats_.evictionAttempts)[pid][cid].inc();
+      return;
+    }
+
+    while ((config_.evictionSearchTries == 0 || maxSearchTries > searchTries) &&
+           itr && evictionData.size() < batch) {
+      ++searchTries;
+      (*stats_.evictionAttempts)[pid][cid].inc();
+
+      auto* toRecycle_ = itr.get();
+      bool isChained_ = toRecycle_->isChainedItem();
+      auto* candidate_ =
+          isChained_ ? &toRecycle_->asChainedItem().getParentItem(compressor_)
+                     : toRecycle_;
+
+      typename NvmCacheT::PutToken putToken{};
+      const bool evictToNvmCache = shouldWriteToNvmCache(*candidate_);
+
+      auto markForEviction = [&candidate_, this]() {
+        auto markedForEviction = candidate_->markForEviction();
+        if (!markedForEviction) {
+          if (candidate_->hasChainedItem()) {
+            stats_.evictFailParentAC.inc();
+          } else {
+            stats_.evictFailAC.inc();
+          }
+          return false;
+        }
+        return true;
+      };
+
+      if (evictToNvmCache) {
+        auto putTokenRv = nvmCache_->createPutToken(
+            candidate_->getKey(),
+            [&markForEviction]() { return markForEviction(); });
+
+        if (!putTokenRv) {
+          switch (putTokenRv.error()) {
+          case InFlightPuts::PutTokenError::TRY_LOCK_FAIL:
+            stats_.evictFailPutTokenLock.inc();
+            break;
+          case InFlightPuts::PutTokenError::TOKEN_EXISTS:
+            stats_.evictFailConcurrentFill.inc();
+            break;
+          case InFlightPuts::PutTokenError::CALLBACK_FAILED:
+            stats_.evictFailConcurrentAccess.inc();
+            break;
+          }
+          ++itr;
+          continue;
+        }
+        putToken = std::move(*putTokenRv);
+        XDCHECK(putToken.isValid());
+      } else {
+        if (!markForEviction()) {
+          ++itr;
+          continue;
+        }
+      }
+
+      // markForEviction to make sure no other thead is evicting the item
+      // nor holding a handle to that item
+
+      // Check if parent changed for chained items - if yes, we cannot
+      // remove the child from the mmContainer as we will not be evicting
+      // it. We could abort right here, but we need to cleanup in case
+      // unmarkForEviction() returns 0 - so just go through normal path.
+      if (!toRecycle_->isChainedItem() ||
+          &toRecycle_->asChainedItem().getParentItem(compressor_) ==
+              candidate_) {
+        mmContainer.remove(itr);
+        MoveData moveData(candidate_, toRecycle_, nullptr,
+                          isChained_ || candidate_->hasChainedItem(),
+                          candidate_->isExpired(),
+                          std::move(putToken), nullptr);
+        evictionData.push_back(std::move(moveData));
+      }
+    }
+  });
+
+  for (auto& moveData : evictionData) {
+    Item* candidate = moveData.candidate;
+    unlinkItemForEviction(*moveData.candidate);
+    if (moveData.token.isValid() &&
+        shouldWriteToNvmCacheExclusive(*moveData.candidate)) {
+      nvmCache_->put(*moveData.candidate, std::move(moveData.token));
+    }
+  }
+
+  return evictionData;
+}
+
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::Item*
-CacheAllocator<CacheTrait>::findEviction(PoolId pid, ClassId cid) {
+CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
   // Keep searching for a candidate until we were able to evict it
   // or until the search limit has been exhausted
   unsigned int searchTries = 0;
   while (config_.evictionSearchTries == 0 ||
          config_.evictionSearchTries > searchTries) {
-    auto [candidate, toRecycle] = getNextCandidate(pid, cid, searchTries);
+    auto [candidate, toRecycle] = getNextCandidate(tid, pid, cid, searchTries);
 
     // Reached the end of the eviction queue but doulen't find a candidate,
     // start again.
@@ -4103,21 +4395,57 @@ void CacheAllocator<CacheTrait>::invalidateNvm(Item& item) {
   }
 }
 
+template <typename CacheTrait>
+TierId
+CacheAllocator<CacheTrait>::getTierId(const Item& item) const {
+  return getTierId(item.getMemory());
+}
+
+template <typename CacheTrait>
+TierId
+CacheAllocator<CacheTrait>::getTierId(const void* ptr) const {
+  for (TierId tid = 0; tid < getNumTiers(); tid++) {
+    if (allocator_[tid]->isMemoryInAllocator(ptr))
+      return tid;
+  }
+
+  throw std::invalid_argument("Item does not belong to any tier!");
+}
+
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::MMContainer&
 CacheAllocator<CacheTrait>::getMMContainer(const Item& item) const noexcept {
+  const auto tid = getTierId(item);
   const auto allocInfo =
-      allocator_->getAllocInfo(static_cast<const void*>(&item));
-  return getMMContainer(allocInfo.poolId, allocInfo.classId);
+      allocator_[tid]->getAllocInfo(static_cast<const void*>(&item));
+  return getMMContainer(tid, allocInfo.poolId, allocInfo.classId);
 }
 
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::MMContainer&
-CacheAllocator<CacheTrait>::getMMContainer(PoolId pid,
+CacheAllocator<CacheTrait>::getMMContainer(TierId tid,
+                                           PoolId pid,
                                            ClassId cid) const noexcept {
-  XDCHECK_LT(static_cast<size_t>(pid), mmContainers_.size());
-  XDCHECK_LT(static_cast<size_t>(cid), mmContainers_[pid].size());
-  return *mmContainers_[pid][cid];
+  XDCHECK_LT(static_cast<size_t>(tid), mmContainers_.size());
+  XDCHECK_LT(static_cast<size_t>(pid), mmContainers_[tid].size());
+  XDCHECK_LT(static_cast<size_t>(cid), mmContainers_[tid][pid].size());
+  return *mmContainers_[tid][pid][cid];
+}
+
+template <typename CacheTrait>
+MMContainerStat CacheAllocator<CacheTrait>::getMMContainerStat(
+    TierId tid, PoolId pid, ClassId cid) const noexcept {
+  if(static_cast<size_t>(tid) >= mmContainers_.size()) {
+    return MMContainerStat{};
+  }
+  if (static_cast<size_t>(pid) >= mmContainers_[tid].size()) {
+    return MMContainerStat{};
+  }
+  if (static_cast<size_t>(cid) >= mmContainers_[tid][pid].size()) {
+    return MMContainerStat{};
+  }
+  return mmContainers_[tid][pid][cid] ? mmContainers_[tid][pid][cid]->getStats()
+                                 : MMContainerStat{};
 }
 
 template <typename CacheTrait>
@@ -4306,8 +4634,9 @@ void CacheAllocator<CacheTrait>::markUseful(const ReadHandle& handle,
 template <typename CacheTrait>
 bool CacheAllocator<CacheTrait>::recordAccessInMMContainer(Item& item,
                                                            AccessMode mode) {
+  const auto tid = getTierId(item);
   const auto allocInfo =
-      allocator_->getAllocInfo(static_cast<const void*>(&item));
+      allocator_[tid]->getAllocInfo(static_cast<const void*>(&item));
   (*stats_.cacheHits)[allocInfo.poolId][allocInfo.classId].inc();
 
   // track recently accessed items if needed
@@ -4315,14 +4644,15 @@ bool CacheAllocator<CacheTrait>::recordAccessInMMContainer(Item& item,
     ring_->trackItem(reinterpret_cast<uintptr_t>(&item), item.getSize());
   }
 
-  auto& mmContainer = getMMContainer(allocInfo.poolId, allocInfo.classId);
+  auto& mmContainer = getMMContainer(tid, allocInfo.poolId, allocInfo.classId);
   return mmContainer.recordAccess(item, mode);
 }
 
 template <typename CacheTrait>
 uint32_t CacheAllocator<CacheTrait>::getUsableSize(const Item& item) const {
+  const auto tid = getTierId(item);
   const auto allocSize =
-      allocator_->getAllocInfo(static_cast<const void*>(&item)).allocSize;
+      allocator_[tid]->getAllocInfo(static_cast<const void*>(&item)).allocSize;
   return item.isChainedItem()
              ? allocSize - ChainedItem::getRequiredSize(0)
              : allocSize - Item::getRequiredSize(item.getKey(), 0);
@@ -4331,8 +4661,9 @@ uint32_t CacheAllocator<CacheTrait>::getUsableSize(const Item& item) const {
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::SampleItem
 CacheAllocator<CacheTrait>::getSampleItem() {
+  auto tid = folly::Random::rand32() % getNumTiers();
   size_t nvmCacheSize = nvmCache_ ? nvmCache_->getUsableSize() : 0;
-  size_t ramCacheSize = allocator_->getMemorySizeInclAdvised();
+  size_t ramCacheSize = allocator_[tid]->getMemorySizeInclAdvised();
 
   bool fromNvm =
       folly::Random::rand64(0, nvmCacheSize + ramCacheSize) >= ramCacheSize;
@@ -4341,19 +4672,18 @@ CacheAllocator<CacheTrait>::getSampleItem() {
   }
 
   // Sampling from DRAM cache
-  auto item = reinterpret_cast<const Item*>(allocator_->getRandomAlloc());
+  auto item = reinterpret_cast<const Item*>(allocator_[tid]->getRandomAlloc());
   if (!item || UNLIKELY(item->isExpired())) {
     return SampleItem{false /* fromNvm */};
   }
 
   // Check that item returned is the same that was sampled
-
   auto sharedHdl = std::make_shared<ReadHandle>(findInternal(item->getKey()));
   if (sharedHdl->get() != item) {
     return SampleItem{false /* fromNvm */};
   }
 
-  const auto allocInfo = allocator_->getAllocInfo(item->getMemory());
+  const auto allocInfo = allocator_[tid]->getAllocInfo(item->getMemory());
 
   // Convert the Item to IOBuf to make SampleItem
   auto iobuf = folly::IOBuf{
@@ -4377,21 +4707,27 @@ std::vector<std::string> CacheAllocator<CacheTrait>::dumpEvictionIterator(
     return {};
   }
 
-  if (static_cast<size_t>(pid) >= mmContainers_.size() ||
-      static_cast<size_t>(cid) >= mmContainers_[pid].size()) {
+  // Always evict from the lowest layer.
+  int tid = getNumTiers() - 1;
+  if (static_cast<size_t>(tid) >= mmContainers_.size() ||
+      static_cast<size_t>(pid) >= mmContainers_[tid].size() ||
+      static_cast<size_t>(cid) >= mmContainers_[tid][pid].size()) {
     throw std::invalid_argument(
-        folly::sformat("Invalid PoolId: {} and ClassId: {}.", pid, cid));
+        folly::sformat("Invalid TierId: {} and PoolId: {} and ClassId: {}.", tid, pid, cid));
   }
 
   std::vector<std::string> content;
 
-  auto& mm = *mmContainers_[pid][cid];
-  auto evictItr = mm.getEvictionIterator();
   size_t i = 0;
-  while (evictItr && i < numItems) {
-    content.push_back(evictItr->toString());
-    ++evictItr;
-    ++i;
+  while (i < numItems && tid >= 0) {
+    auto& mm = *mmContainers_[tid][pid][cid];
+    mm.withEvictionIterator([&content, numItems](auto&& itr) {
+      while (itr && content.size() < numItems) {
+        content.push_back(itr->toString());
+        ++itr;
+      }
+    });
+    --tid;
   }
 
   return content;
@@ -4569,25 +4905,43 @@ PoolId CacheAllocator<CacheTrait>::addPool(
     std::shared_ptr<RebalanceStrategy> resizeStrategy,
     bool ensureProvisionable) {
   std::unique_lock w(poolsResizeAndRebalanceLock_);
-  auto pid = allocator_->addPool(name, size, allocSizes, ensureProvisionable);
+
+  PoolId pid = 0;
+  size_t totalCacheSize = 0;
+
+  for (TierId tid = 0; tid < getNumTiers(); tid++) {
+    totalCacheSize += allocator_[tid]->getMemorySize();
+  }
+
+  for (TierId tid = 0; tid < getNumTiers(); tid++) {
+    auto tierSizeRatio =
+        static_cast<double>(allocator_[tid]->getMemorySize()) / totalCacheSize;
+    size_t tierPoolSize = static_cast<size_t>(tierSizeRatio * size);
+    
+    // TODO: what if we manage to add pool only in one tier?
+    // we should probably remove that on failure
+    auto res = allocator_[tid]->addPool(
+        name, tierPoolSize, allocSizes, ensureProvisionable);
+    XDCHECK(tid == 0 || res == pid);
+    pid = res;
+  }
+
   createMMContainers(pid, std::move(config));
   setRebalanceStrategy(pid, std::move(rebalanceStrategy));
   setResizeStrategy(pid, std::move(resizeStrategy));
 
-  if (backgroundEvictor_.size()) {
-    auto memoryAssignments =
-        createBgWorkerMemoryAssignments(backgroundEvictor_.size());
-    for (size_t id = 0; id < backgroundEvictor_.size(); id++)
-      backgroundEvictor_[id]->setAssignedMemory(
-          std::move(memoryAssignments[id]));
-  }
-
-  if (backgroundPromoter_.size()) {
-    auto memoryAssignments =
-        createBgWorkerMemoryAssignments(backgroundPromoter_.size());
-    for (size_t id = 0; id < backgroundPromoter_.size(); id++)
-      backgroundPromoter_[id]->setAssignedMemory(
-          std::move(memoryAssignments[id]));
+  if (backgroundMover_.size()) {
+    auto nTiers = getNumTiers();
+    unsigned int bgId = 0;
+    for (TierId tid = 0; tid < nTiers; tid++) {
+      auto memoryAssignments =
+          createBgWorkerMemoryAssignments(backgroundMover_.size()/nTiers, tid);
+      for (size_t i = 0; i < backgroundMover_.size()/nTiers; i++) {
+        backgroundMover_[bgId]->setAssignedMemory(
+            std::move(memoryAssignments[i]));
+        bgId++;
+      }
+    }
   }
 
   return pid;
@@ -4597,15 +4951,16 @@ template <typename CacheTrait>
 bool CacheAllocator<CacheTrait>::provisionPool(
     PoolId poolId, const std::vector<uint32_t>& slabsDistribution) {
   std::unique_lock w(poolsResizeAndRebalanceLock_);
-  return allocator_->provisionPool(poolId, slabsDistribution);
+  //TODO: enable for multi-tier
+  return allocator_[currentTier()]->provisionPool(poolId, slabsDistribution);
 }
 
 template <typename CacheTrait>
 bool CacheAllocator<CacheTrait>::provisionPoolWithPowerLaw(
     PoolId poolId, double power, uint32_t minSlabsPerAC) {
-  const auto& poolSize = allocator_->getPool(poolId).getPoolSize();
+  const auto& poolSize = allocator_[currentTier()]->getPool(poolId).getPoolSize();
   const uint32_t numACs =
-      allocator_->getPool(poolId).getStats().classIds.size();
+      allocator_[currentTier()]->getPool(poolId).getStats().classIds.size();
   const uint32_t numSlabs = poolSize / Slab::kSize;
   const uint32_t minSlabsRequired = numACs * minSlabsPerAC;
   if (numSlabs < minSlabsRequired) {
@@ -4637,16 +4992,16 @@ bool CacheAllocator<CacheTrait>::provisionPoolWithPowerLaw(
     slabsDistribution[i] += slabsToAllocate;
     allocatedSlabs += slabsToAllocate;
   }
-
+  //TODO: enable for multi-tier
   return provisionPool(poolId, slabsDistribution);
 }
 
 template <typename CacheTrait>
 void CacheAllocator<CacheTrait>::overridePoolRebalanceStrategy(
     PoolId pid, std::shared_ptr<RebalanceStrategy> rebalanceStrategy) {
-  if (static_cast<size_t>(pid) >= mmContainers_.size()) {
+  if (static_cast<size_t>(pid) >= mmContainers_[0].size()) {
     throw std::invalid_argument(folly::sformat(
-        "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size()));
+        "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[0].size()));
   }
   setRebalanceStrategy(pid, std::move(rebalanceStrategy));
 }
@@ -4654,9 +5009,9 @@ void CacheAllocator<CacheTrait>::overridePoolRebalanceStrategy(
 template <typename CacheTrait>
 void CacheAllocator<CacheTrait>::overridePoolResizeStrategy(
     PoolId pid, std::shared_ptr<RebalanceStrategy> resizeStrategy) {
-  if (static_cast<size_t>(pid) >= mmContainers_.size()) {
+  if (static_cast<size_t>(pid) >= mmContainers_[0].size()) {
     throw std::invalid_argument(folly::sformat(
-        "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size()));
+        "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[0].size()));
   }
   setResizeStrategy(pid, std::move(resizeStrategy));
 }
@@ -4668,14 +5023,14 @@ void CacheAllocator<CacheTrait>::overridePoolOptimizeStrategy(
 }
 
 template <typename CacheTrait>
-void CacheAllocator<CacheTrait>::overridePoolConfig(PoolId pid,
+void CacheAllocator<CacheTrait>::overridePoolConfig(TierId tid, PoolId pid,
                                                     const MMConfig& config) {
-  if (static_cast<size_t>(pid) >= mmContainers_.size()) {
+  if (static_cast<size_t>(pid) >= mmContainers_[tid].size()) {
     throw std::invalid_argument(folly::sformat(
-        "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size()));
+        "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[tid].size()));
   }
 
-  auto& pool = allocator_->getPool(pid);
+  auto& pool = allocator_[tid]->getPool(pid);
   for (unsigned int cid = 0; cid < pool.getNumClassId(); ++cid) {
     MMConfig mmConfig = config;
     mmConfig.addExtraConfig(
@@ -4683,29 +5038,33 @@ void CacheAllocator<CacheTrait>::overridePoolConfig(PoolId pid,
             ? pool.getAllocationClass(static_cast<ClassId>(cid))
                   .getAllocsPerSlab()
             : 0);
-    DCHECK_NOTNULL(mmContainers_[pid][cid].get());
-    mmContainers_[pid][cid]->setConfig(mmConfig);
+    DCHECK_NOTNULL(mmContainers_[tid][pid][cid].get());
+    mmContainers_[tid][pid][cid]->setConfig(mmConfig);
   }
 }
 
 template <typename CacheTrait>
 void CacheAllocator<CacheTrait>::createMMContainers(const PoolId pid,
                                                     MMConfig config) {
-  auto& pool = allocator_->getPool(pid);
+  // pools on each layer should have the same number of class id, etc.
+  auto& pool = allocator_[0]->getPool(pid);
   for (unsigned int cid = 0; cid < pool.getNumClassId(); ++cid) {
     config.addExtraConfig(
         config_.trackTailHits
             ? pool.getAllocationClass(static_cast<ClassId>(cid))
                   .getAllocsPerSlab()
             : 0);
-    mmContainers_[pid][cid].reset(new MMContainer(config, compressor_));
+    for (TierId tid = 0; tid < getNumTiers(); tid++) {
+      mmContainers_[tid][pid][cid].reset(new MMContainer(config, compressor_));
+    }
   }
 }
 
 template <typename CacheTrait>
 PoolId CacheAllocator<CacheTrait>::getPoolId(
     folly::StringPiece name) const noexcept {
-  return allocator_->getPoolId(name.str());
+  // each tier has the same pools
+  return allocator_[0]->getPoolId(name.str());
 }
 
 // The Function returns a consolidated vector of Release Slab
@@ -4748,7 +5107,9 @@ std::set<PoolId> CacheAllocator<CacheTrait>::filterCompactCachePools(
 template <typename CacheTrait>
 std::set<PoolId> CacheAllocator<CacheTrait>::getRegularPoolIds() const {
   std::shared_lock r(poolsResizeAndRebalanceLock_);
-  return filterCompactCachePools(allocator_->getPoolIds());
+  // TODO - get rid of the duplication - right now, each tier
+  // holds pool objects with mostly the same info
+  return filterCompactCachePools(allocator_[0]->getPoolIds());
 }
 
 template <typename CacheTrait>
@@ -4773,10 +5134,9 @@ std::set<PoolId> CacheAllocator<CacheTrait>::getRegularPoolIdsForResize()
   // getAdvisedMemorySize - then pools may be overLimit even when
   // all slabs are not allocated. Otherwise, pools may be overLimit
   // only after all slabs are allocated.
-  //
-  return (allocator_->allSlabsAllocated()) ||
-                 (allocator_->getAdvisedMemorySize() != 0)
-             ? filterCompactCachePools(allocator_->getPoolsOverLimit())
+  return (allocator_[0]->allSlabsAllocated()) ||
+                 (allocator_[0]->getAdvisedMemorySize() != 0)
+             ? filterCompactCachePools(allocator_[0]->getPoolsOverLimit())
              : std::set<PoolId>{};
 }
 
@@ -4785,11 +5145,20 @@ const std::string CacheAllocator<CacheTrait>::getCacheName() const {
   return config_.cacheName;
 }
 
+template <typename CacheTrait>
+size_t CacheAllocator<CacheTrait>::getPoolSize(PoolId poolId) const {
+  size_t poolSize = 0;
+  for (auto& allocator: allocator_) {
+    const auto& pool = allocator->getPool(poolId);
+    poolSize += pool.getPoolSize();
+  }
+  return poolSize;
+}
+
 template <typename CacheTrait>
 PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
   stats().numExpensiveStatsPolled.inc();
-
-  const auto& pool = allocator_->getPool(poolId);
+  const auto& pool = allocator_[0]->getPool(poolId);
   const auto& allocSizes = pool.getAllocSizes();
   auto mpStats = pool.getStats();
   const auto& classIds = mpStats.classIds;
@@ -4808,7 +5177,7 @@ PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
   if (!isCompactCache) {
     for (const ClassId cid : classIds) {
       uint64_t classHits = (*stats_.cacheHits)[poolId][cid].get();
-      XDCHECK(mmContainers_[poolId][cid],
+      XDCHECK(mmContainers_[0][poolId][cid],
               folly::sformat("Pid {}, Cid {} not initialized.", poolId, cid));
       cacheStats.insert(
           {cid,
@@ -4818,7 +5187,7 @@ PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
             (*stats_.fragmentationSize)[poolId][cid].get(), classHits,
             (*stats_.chainedItemEvictions)[poolId][cid].get(),
             (*stats_.regularItemEvictions)[poolId][cid].get(),
-            mmContainers_[poolId][cid]->getStats()}
+            mmContainers_[0][poolId][cid]->getStats()}
 
           });
       totalHits += classHits;
@@ -4827,7 +5196,7 @@ PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
 
   PoolStats ret;
   ret.isCompactCache = isCompactCache;
-  ret.poolName = allocator_->getPoolName(poolId);
+  ret.poolName = allocator_[0]->getPoolName(poolId);
   ret.poolSize = pool.getPoolSize();
   ret.poolUsableSize = pool.getPoolUsableSize();
   ret.poolAdvisedSize = pool.getPoolAdvisedSize();
@@ -4839,6 +5208,15 @@ PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
   return ret;
 }
 
+template <typename CacheTrait>
+ACStats CacheAllocator<CacheTrait>::getACStats(TierId tid,
+                                               PoolId poolId,
+                                               ClassId classId) const {
+  const auto& pool = allocator_[tid]->getPool(poolId);
+  const auto& ac = pool.getAllocationClass(classId);
+  return ac.getStats();
+}
+
 template <typename CacheTrait>
 PoolEvictionAgeStats CacheAllocator<CacheTrait>::getPoolEvictionAgeStats(
     PoolId pid, unsigned int slabProjectionLength) const {
@@ -4846,12 +5224,12 @@ PoolEvictionAgeStats CacheAllocator<CacheTrait>::getPoolEvictionAgeStats(
 
   PoolEvictionAgeStats stats;
 
-  const auto& pool = allocator_->getPool(pid);
+  const auto& pool = allocator_[0]->getPool(pid);
   const auto& allocSizes = pool.getAllocSizes();
   for (ClassId cid = 0; cid < static_cast<ClassId>(allocSizes.size()); ++cid) {
-    auto& mmContainer = getMMContainer(pid, cid);
+    auto& mmContainer = getMMContainer(0, pid, cid);
     const auto numItemsPerSlab =
-        allocator_->getPool(pid).getAllocationClass(cid).getAllocsPerSlab();
+        allocator_[0]->getPool(pid).getAllocationClass(cid).getAllocsPerSlab();
     const auto projectionLength = numItemsPerSlab * slabProjectionLength;
     stats.classEvictionAgeStats[cid] =
         mmContainer.getEvictionAgeStat(projectionLength);
@@ -4895,7 +5273,7 @@ void CacheAllocator<CacheTrait>::releaseSlab(PoolId pid,
   }
 
   try {
-    auto releaseContext = allocator_->startSlabRelease(
+    auto releaseContext = allocator_[0]->startSlabRelease(
         pid, victim, receiver, mode, hint,
         [this]() -> bool { return shutDownInProgress_; });
 
@@ -4904,15 +5282,15 @@ void CacheAllocator<CacheTrait>::releaseSlab(PoolId pid,
       return;
     }
 
-    releaseSlabImpl(releaseContext);
-    if (!allocator_->allAllocsFreed(releaseContext)) {
+    releaseSlabImpl(0, releaseContext);
+    if (!allocator_[0]->allAllocsFreed(releaseContext)) {
       throw std::runtime_error(
           folly::sformat("Was not able to free all allocs. PoolId: {}, AC: {}",
                          releaseContext.getPoolId(),
                          releaseContext.getClassId()));
     }
 
-    allocator_->completeSlabRelease(releaseContext);
+    allocator_[0]->completeSlabRelease(releaseContext);
   } catch (const exception::SlabReleaseAborted& e) {
     stats_.numAbortedSlabReleases.inc();
     throw exception::SlabReleaseAborted(folly::sformat(
@@ -4942,7 +5320,7 @@ SlabReleaseStats CacheAllocator<CacheTrait>::getSlabReleaseStats()
 }
 
 template <typename CacheTrait>
-void CacheAllocator<CacheTrait>::releaseSlabImpl(
+void CacheAllocator<CacheTrait>::releaseSlabImpl(TierId tid,
     const SlabReleaseContext& releaseContext) {
   auto startTime = std::chrono::milliseconds(util::getCurrentTimeMs());
   bool releaseStuck = false;
@@ -4985,7 +5363,7 @@ void CacheAllocator<CacheTrait>::releaseSlabImpl(
       // If moving fails, evict it
       evictForSlabRelease(item);
     }
-    XDCHECK(allocator_->isAllocFreed(releaseContext, alloc));
+    XDCHECK(allocator_[tid]->isAllocFreed(releaseContext, alloc));
   }
 }
 
@@ -5046,7 +5424,8 @@ bool CacheAllocator<CacheTrait>::moveForSlabRelease(Item& oldItem) {
     return false;
   }
 
-  const auto allocInfo = allocator_->getAllocInfo(oldItem.getMemory());
+  auto tid = getTierId(oldItem);
+  const auto allocInfo = allocator_[tid]->getAllocInfo(oldItem.getMemory());
   if (chainedItem) {
     newItemHdl.reset();
     auto parentKey = parentItem->getKey();
@@ -5074,7 +5453,7 @@ bool CacheAllocator<CacheTrait>::moveForSlabRelease(Item& oldItem) {
     auto ref = unmarkMovingAndWakeUpWaiters(oldItem, std::move(newItemHdl));
     XDCHECK_EQ(0u, ref);
   }
-  allocator_->free(&oldItem);
+  allocator_[tid]->free(&oldItem);
 
   (*stats_.fragmentationSize)[allocInfo.poolId][allocInfo.classId].sub(
       util::getFragmentation(*this, oldItem));
@@ -5103,17 +5482,21 @@ CacheAllocator<CacheTrait>::allocateNewItemForOldItem(const Item& oldItem) {
     return newItemHdl;
   }
 
+  const auto tid = getTierId(oldItem);
   const auto allocInfo =
-      allocator_->getAllocInfo(static_cast<const void*>(&oldItem));
+      allocator_[tid]->getAllocInfo(static_cast<const void*>(&oldItem));
+  bool evict = !config_.insertToFirstFreeTier || tid == getNumTiers() - 1;
 
   // Set up the destination for the move. Since oldItem would have the moving
   // bit set, it won't be picked for eviction.
-  auto newItemHdl = allocateInternal(allocInfo.poolId,
-                                     oldItem.getKey(),
-                                     oldItem.getSize(),
-                                     oldItem.getCreationTime(),
-                                     oldItem.getExpiryTime(),
-                                     false);
+  auto newItemHdl = allocateInternalTier(tid,
+                                         allocInfo.poolId,
+                                         oldItem.getKey(),
+                                         oldItem.getSize(),
+                                         oldItem.getCreationTime(),
+                                         oldItem.getExpiryTime(),
+                                         false,
+                                         evict);
   if (!newItemHdl) {
     return {};
   }
@@ -5150,7 +5533,7 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(Item& item) {
   }
 
   const auto allocInfo =
-      allocator_->getAllocInfo(static_cast<const void*>(&item));
+      allocator_[getTierId(item)]->getAllocInfo(static_cast<const void*>(&item));
   if (evicted->hasChainedItem()) {
     (*stats_.chainedItemEvictions)[allocInfo.poolId][allocInfo.classId].inc();
   } else {
@@ -5199,11 +5582,15 @@ bool CacheAllocator<CacheTrait>::markMovingForSlabRelease(
   // At first, we assume this item was already freed
   bool itemFreed = true;
   bool markedMoving = false;
-  const auto fn = [this, &markedMoving, &itemFreed](void* memory) {
+  TierId tid = getTierId(alloc);
+  const auto fn = [this, tid, &markedMoving, &itemFreed](void* memory) {
     // Since this callback is executed, the item is not yet freed
     itemFreed = false;
     Item* item = static_cast<Item*>(memory);
-    auto& mmContainer = getMMContainer(*item);
+    auto allocInfo = allocator_[tid]->getAllocInfo(memory);
+    auto pid = allocInfo.poolId;
+    auto cid = allocInfo.classId;
+    auto& mmContainer = getMMContainer(tid, pid, cid);
     mmContainer.withContainerLock([this, &mmContainer, &item, &markedMoving]() {
       // we rely on the mmContainer lock to safely check that the item is
       // currently in the mmContainer (no other threads are currently
@@ -5241,7 +5628,7 @@ bool CacheAllocator<CacheTrait>::markMovingForSlabRelease(
 
   auto startTime = util::getCurrentTimeSec();
   while (true) {
-    allocator_->processAllocForRelease(ctx, alloc, fn);
+    allocator_[tid]->processAllocForRelease(ctx, alloc, fn);
 
     // If item is already freed we give up trying to mark the item moving
     // and return false, otherwise if marked as moving, we return true.
@@ -5256,7 +5643,7 @@ bool CacheAllocator<CacheTrait>::markMovingForSlabRelease(
     itemFreed = true;
 
     if (shutDownInProgress_) {
-      allocator_->abortSlabRelease(ctx);
+      allocator_[tid]->abortSlabRelease(ctx);
       throw exception::SlabReleaseAborted(
           folly::sformat("Slab Release aborted while still trying to mark"
                          " as moving for Item: {}. Pool: {}, Class: {}.",
@@ -5280,12 +5667,15 @@ template <typename CCacheT, typename... Args>
 CCacheT* CacheAllocator<CacheTrait>::addCompactCache(folly::StringPiece name,
                                                      size_t size,
                                                      Args&&... args) {
+  if (getNumTiers() != 1)
+    throw std::runtime_error("TODO: compact cache for multi-tier Cache not supported.");
+
   if (!config_.isCompactCacheEnabled()) {
     throw std::logic_error("Compact cache is not enabled");
   }
 
   std::unique_lock lock(compactCachePoolsLock_);
-  auto poolId = allocator_->addPool(name, size, {Slab::kSize});
+  auto poolId = allocator_[0]->addPool(name, size, {Slab::kSize});
   isCompactCachePool_[poolId] = true;
 
   auto ptr = std::make_unique<CCacheT>(
@@ -5394,22 +5784,43 @@ folly::IOBufQueue CacheAllocator<CacheTrait>::saveStateToIOBuf() {
   *metadata_.numChainedChildItems() = stats_.numChainedChildItems.get();
   *metadata_.numAbortedSlabReleases() = stats_.numAbortedSlabReleases.get();
 
-  auto serializeMMContainers = [](MMContainers& mmContainers) {
-    MMSerializationTypeContainer state;
-    for (unsigned int i = 0; i < mmContainers.size(); ++i) {
+  const auto numTiers = getNumTiers();
+  // TODO: implement serialization for multiple tiers
+  auto serializeMMContainers = [numTiers](MMContainers& mmContainers) {
+    std::map<serialization::MemoryDescriptorObject,MMSerializationType> containers;
+    for (unsigned int i = 0; i < numTiers; ++i) {
       for (unsigned int j = 0; j < mmContainers[i].size(); ++j) {
-        if (mmContainers[i][j]) {
-          state.pools_ref()[i][j] = mmContainers[i][j]->saveState();
+        for (unsigned int k = 0; k < mmContainers[i][j].size(); ++k) {
+          if (mmContainers[i][j][k]) {
+            serialization::MemoryDescriptorObject md;
+            md.tid_ref() = i;
+            md.pid_ref() = j;
+            md.cid_ref() = k;
+            containers[md] = mmContainers[i][j][k]->saveState();
+          }
         }
       }
     }
+    MMSerializationTypeContainer state;
+    state.containers_ref() = containers;
     return state;
   };
   MMSerializationTypeContainer mmContainersState =
       serializeMMContainers(mmContainers_);
 
   AccessSerializationType accessContainerState = accessContainer_->saveState();
-  MemoryAllocator::SerializationType allocatorState = allocator_->saveState();
+
+  auto serializeAllocators = [numTiers,this]() {
+    AllocatorsSerializationType state;
+    std::map<int,MemoryAllocator::SerializationType> allocators;
+    for (int i = 0; i < numTiers; ++i) {
+      allocators[i] = allocator_[i]->saveState();
+    }
+    state.allocators_ref() = allocators;
+    return state;
+  };
+  AllocatorsSerializationType allocatorsState = serializeAllocators();
+
   CCacheManager::SerializationType ccState = compactCacheManager_->saveState();
 
   AccessSerializationType chainedItemAccessContainerState =
@@ -5419,7 +5830,7 @@ folly::IOBufQueue CacheAllocator<CacheTrait>::saveStateToIOBuf() {
   // results into a single buffer.
   folly::IOBufQueue queue;
   Serializer::serializeToIOBufQueue(queue, metadata_);
-  Serializer::serializeToIOBufQueue(queue, allocatorState);
+  Serializer::serializeToIOBufQueue(queue, allocatorsState);
   Serializer::serializeToIOBufQueue(queue, ccState);
   Serializer::serializeToIOBufQueue(queue, mmContainersState);
   Serializer::serializeToIOBufQueue(queue, accessContainerState);
@@ -5434,8 +5845,7 @@ bool CacheAllocator<CacheTrait>::stopWorkers(std::chrono::seconds timeout) {
   success &= stopPoolResizer(timeout);
   success &= stopMemMonitor(timeout);
   success &= stopReaper(timeout);
-  success &= stopBackgroundEvictor(timeout);
-  success &= stopBackgroundPromoter(timeout);
+  success &= stopBackgroundMover(timeout);
   return success;
 }
 
@@ -5473,6 +5883,8 @@ CacheAllocator<CacheTrait>::shutDown() {
       (shmShutDownStatus == ShmShutDownRes::kSuccess);
   shmManager_.reset();
 
+  // TODO: save per-tier state
+
   if (shmShutDownSucceeded) {
     if (!nvmShutDownStatusOpt || *nvmShutDownStatusOpt)
       return ShutDownStatus::kSuccess;
@@ -5536,23 +5948,26 @@ CacheAllocator<CacheTrait>::deserializeMMContainers(
   const auto container =
       deserializer.deserialize<MMSerializationTypeContainer>();
 
-  MMContainers mmContainers;
-
-  for (auto& kvPool : *container.pools_ref()) {
-    auto i = static_cast<PoolId>(kvPool.first);
-    auto& pool = getPool(i);
-    for (auto& kv : kvPool.second) {
-      auto j = static_cast<ClassId>(kv.first);
-      MMContainerPtr ptr =
-          std::make_unique<typename MMContainerPtr::element_type>(kv.second,
-                                                                  compressor);
-      auto config = ptr->getConfig();
-      config.addExtraConfig(config_.trackTailHits
-                                ? pool.getAllocationClass(j).getAllocsPerSlab()
-                                : 0);
-      ptr->setConfig(config);
-      mmContainers[i][j] = std::move(ptr);
-    }
+  /* TODO: right now, we create empty containers because deserialization
+   * only works for a single (topmost) tier. */
+  MMContainers mmContainers{getNumTiers()};
+
+  std::map<serialization::MemoryDescriptorObject,MMSerializationType> containerMap = 
+      *container.containers();
+  for (auto md : containerMap) {
+     uint32_t tid = *md.first.tid();
+     uint32_t pid = *md.first.pid();
+     uint32_t cid = *md.first.cid();
+     auto& pool = getPoolByTid(pid,tid);
+     MMContainerPtr ptr =
+         std::make_unique<typename MMContainerPtr::element_type>(md.second,
+                                                                 compressor);
+     auto config = ptr->getConfig();
+     config.addExtraConfig(config_.trackTailHits
+                               ? pool.getAllocationClass(cid).getAllocsPerSlab()
+                               : 0);
+     ptr->setConfig(config);
+     mmContainers[tid][pid][cid] = std::move(ptr);
   }
   // We need to drop the unevictableMMContainer in the desierializer.
   // TODO: remove this at version 17.
@@ -5694,8 +6109,7 @@ GlobalCacheStats CacheAllocator<CacheTrait>::getGlobalCacheStats() const {
   ret.nvmCacheEnabled = nvmCache_ ? nvmCache_->isEnabled() : false;
   ret.reaperStats = getReaperStats();
   ret.rebalancerStats = getRebalancerStats();
-  ret.evictionStats = getBackgroundMoverStats(MoverDir::Evict);
-  ret.promotionStats = getBackgroundMoverStats(MoverDir::Promote);
+  ret.moverStats = getBackgroundMoverStats();
   ret.numActiveHandles = getNumActiveHandles();
 
   ret.isNewRamCache = cacheCreationTime_ == cacheInstanceCreationTime_;
@@ -5709,11 +6123,14 @@ GlobalCacheStats CacheAllocator<CacheTrait>::getGlobalCacheStats() const {
 
 template <typename CacheTrait>
 CacheMemoryStats CacheAllocator<CacheTrait>::getCacheMemoryStats() const {
-  const auto totalCacheSize = allocator_->getMemorySize();
-  const auto configuredTotalCacheSize = allocator_->getMemorySizeInclAdvised();
-
+  size_t totalCacheSize = 0;
+  size_t configuredTotalCacheSize = 0;
+  for(auto& allocator: allocator_) {
+    totalCacheSize += allocator->getMemorySize();
+    configuredTotalCacheSize += allocator->getMemorySizeInclAdvised();
+  }
   auto addSize = [this](size_t a, PoolId pid) {
-    return a + allocator_->getPool(pid).getPoolSize();
+    return a + allocator_[0]->getPool(pid).getPoolSize();
   };
   const auto regularPoolIds = getRegularPoolIds();
   const auto ccCachePoolIds = getCCachePoolIds();
@@ -5726,9 +6143,9 @@ CacheMemoryStats CacheAllocator<CacheTrait>::getCacheMemoryStats() const {
                           configuredTotalCacheSize,
                           configuredRegularCacheSize,
                           configuredCompactCacheSize,
-                          allocator_->getAdvisedMemorySize(),
+                          allocator_[0]->getAdvisedMemorySize(),
                           memMonitor_ ? memMonitor_->getMaxAdvisePct() : 0,
-                          allocator_->getUnreservedMemorySize(),
+                          allocator_[0]->getUnreservedMemorySize(),
                           nvmCache_ ? nvmCache_->getSize() : 0,
                           util::getMemAvailable(),
                           util::getRSSBytes()};
@@ -5867,61 +6284,37 @@ bool CacheAllocator<CacheTrait>::startNewReaper(
 
 template <typename CacheTrait>
 auto CacheAllocator<CacheTrait>::createBgWorkerMemoryAssignments(
-    size_t numWorkers) {
+    size_t numWorkers, TierId tid) {
   std::vector<std::vector<MemoryDescriptorType>> asssignedMemory(numWorkers);
-  auto pools = filterCompactCachePools(allocator_->getPoolIds());
+  auto pools = filterCompactCachePools(allocator_[tid]->getPoolIds());
   for (const auto pid : pools) {
-    const auto& mpStats = getPool(pid).getStats();
+    const auto& mpStats = getPoolByTid(pid, tid).getStats();
     for (const auto cid : mpStats.classIds) {
-      asssignedMemory[BackgroundMover<CacheT>::workerId(pid, cid, numWorkers)]
-          .emplace_back(pid, cid);
+      asssignedMemory[BackgroundMover<CacheT>::workerId(tid, pid, cid, numWorkers)]
+          .emplace_back(tid, pid, cid);
     }
   }
   return asssignedMemory;
 }
 
 template <typename CacheTrait>
-bool CacheAllocator<CacheTrait>::startNewBackgroundEvictor(
-    std::chrono::milliseconds interval,
-    std::shared_ptr<BackgroundMoverStrategy> strategy,
-    size_t threads) {
-  XDCHECK(threads > 0);
-  backgroundEvictor_.resize(threads);
-  bool result = true;
-
-  auto memoryAssignments = createBgWorkerMemoryAssignments(threads);
-  for (size_t i = 0; i < threads; i++) {
-    auto ret = startNewWorker("BackgroundEvictor" + std::to_string(i),
-                              backgroundEvictor_[i], interval, *this, strategy,
-                              MoverDir::Evict);
-    result = result && ret;
-
-    if (result) {
-      backgroundEvictor_[i]->setAssignedMemory(std::move(memoryAssignments[i]));
-    }
-  }
-  return result;
-}
-
-template <typename CacheTrait>
-bool CacheAllocator<CacheTrait>::startNewBackgroundPromoter(
+bool CacheAllocator<CacheTrait>::startNewBackgroundMover(
     std::chrono::milliseconds interval,
-    std::shared_ptr<BackgroundMoverStrategy> strategy,
+    size_t evictionBatch,
+    size_t promotionBatch,
+    double targetFree,
     size_t threads) {
   XDCHECK(threads > 0);
-  backgroundPromoter_.resize(threads);
+  backgroundMover_.resize(threads);
   bool result = true;
-
-  auto memoryAssignments = createBgWorkerMemoryAssignments(threads);
+  auto memoryAssignments = createBgWorkerMemoryAssignments(threads, 0);
   for (size_t i = 0; i < threads; i++) {
-    auto ret = startNewWorker("BackgroundPromoter" + std::to_string(i),
-                              backgroundPromoter_[i], interval, *this, strategy,
-                              MoverDir::Promote);
+    auto ret = startNewWorker("BackgroundMover" + std::to_string(i),
+                              backgroundMover_[i], interval, *this,
+                              evictionBatch, promotionBatch, targetFree);
     result = result && ret;
-
     if (result) {
-      backgroundPromoter_[i]->setAssignedMemory(
-          std::move(memoryAssignments[i]));
+      backgroundMover_[i]->setAssignedMemory(std::move(memoryAssignments[i]));
     }
   }
   return result;
@@ -5976,23 +6369,11 @@ bool CacheAllocator<CacheTrait>::stopReaper(std::chrono::seconds timeout) {
 }
 
 template <typename CacheTrait>
-bool CacheAllocator<CacheTrait>::stopBackgroundEvictor(
-    std::chrono::seconds timeout) {
-  bool result = true;
-  for (size_t i = 0; i < backgroundEvictor_.size(); i++) {
-    auto ret = stopWorker("BackgroundEvictor", backgroundEvictor_[i], timeout);
-    result = result && ret;
-  }
-  return result;
-}
-
-template <typename CacheTrait>
-bool CacheAllocator<CacheTrait>::stopBackgroundPromoter(
+bool CacheAllocator<CacheTrait>::stopBackgroundMover(
     std::chrono::seconds timeout) {
   bool result = true;
-  for (size_t i = 0; i < backgroundPromoter_.size(); i++) {
-    auto ret =
-        stopWorker("BackgroundPromoter", backgroundPromoter_[i], timeout);
+  for (size_t i = 0; i < backgroundMover_.size(); i++) {
+    auto ret = stopWorker("BackgroundMover", backgroundMover_[i], timeout);
     result = result && ret;
   }
   return result;
@@ -6015,7 +6396,8 @@ bool CacheAllocator<CacheTrait>::cleanupStrayShmSegments(
     // Any other concurrent process can not be attached to the segments or
     // even if it does, we want to mark it for destruction.
     ShmManager::removeByName(cacheDir, detail::kShmInfoName, posix);
-    ShmManager::removeByName(cacheDir, detail::kShmCacheName, posix);
+    ShmManager::removeByName(cacheDir, detail::kShmCacheName
+                             + std::to_string(0 /* TODO: per tier */), posix);
     ShmManager::removeByName(cacheDir, detail::kShmHashTableName, posix);
     ShmManager::removeByName(cacheDir, detail::kShmChainedItemHashTableName,
                              posix);
@@ -6030,13 +6412,14 @@ uint64_t CacheAllocator<CacheTrait>::getItemPtrAsOffset(const void* ptr) {
   // errors downstream.
 
   // if this succeeeds, the address is valid within the cache.
-  allocator_->getAllocInfo(ptr);
+  auto tid = getTierId(ptr);
+  allocator_[tid]->getAllocInfo(ptr);
 
   if (!isOnShm_ || !shmManager_) {
     throw std::invalid_argument("Shared memory not used");
   }
 
-  const auto& shm = shmManager_->getShmByName(detail::kShmCacheName);
+  const auto& shm = shmManager_->getShmByName(detail::kShmCacheName + std::to_string(tid));
 
   return reinterpret_cast<uint64_t>(ptr) -
          reinterpret_cast<uint64_t>(shm.getCurrentMapping().addr);
diff --git a/cachelib/allocator/CacheAllocatorConfig.h b/cachelib/allocator/CacheAllocatorConfig.h
index 59d659f6f..078b51b74 100644
--- a/cachelib/allocator/CacheAllocatorConfig.h
+++ b/cachelib/allocator/CacheAllocatorConfig.h
@@ -27,7 +27,6 @@
 #include <stdexcept>
 #include <string>
 
-#include "cachelib/allocator/BackgroundMoverStrategy.h"
 #include "cachelib/allocator/Cache.h"
 #include "cachelib/allocator/MM2Q.h"
 #include "cachelib/allocator/MemoryMonitor.h"
@@ -285,16 +284,13 @@ class CacheAllocatorConfig {
       std::chrono::seconds ccacheInterval,
       uint32_t ccacheStepSizePercent);
 
-  // Enable the background evictor - scans a tier to look for objects
-  // to evict to the next tier
-  CacheAllocatorConfig& enableBackgroundEvictor(
-      std::shared_ptr<BackgroundMoverStrategy> backgroundMoverStrategy,
-      std::chrono::milliseconds regularInterval,
-      size_t threads);
-
-  CacheAllocatorConfig& enableBackgroundPromoter(
-      std::shared_ptr<BackgroundMoverStrategy> backgroundMoverStrategy,
+  // Enable the background moveor - scans a tier to look for objects
+  // to move to the next tier or just evict if single tier.
+  CacheAllocatorConfig& enableBackgroundMover(
       std::chrono::milliseconds regularInterval,
+      size_t evictionBatch,
+      size_t promotionBatch,
+      double targetFree,
       size_t threads);
 
   // This enables an optimization for Pool rebalancing and resizing.
@@ -329,6 +325,9 @@ class CacheAllocatorConfig {
   // Library team if you find yourself customizing this.
   CacheAllocatorConfig& setThrottlerConfig(util::Throttler::Config config);
 
+  // Insert items to first free memory tier
+  CacheAllocatorConfig& enableInsertToFirstFreeTier();
+
   // Passes in a callback to initialize an event tracker when the allocator
   // starts
   CacheAllocatorConfig& setEventTracker(EventTrackerSharedPtr&&);
@@ -371,15 +370,9 @@ class CacheAllocatorConfig {
            poolOptimizeStrategy != nullptr;
   }
 
-  // @return whether background evictor thread is enabled
-  bool backgroundEvictorEnabled() const noexcept {
-    return backgroundEvictorInterval.count() > 0 &&
-           backgroundEvictorStrategy != nullptr;
-  }
-
-  bool backgroundPromoterEnabled() const noexcept {
-    return backgroundPromoterInterval.count() > 0 &&
-           backgroundPromoterStrategy != nullptr;
+  // @return whether background mover thread is enabled
+  bool backgroundMoverEnabled() const noexcept {
+    return backgroundMoverInterval.count() > 0 && backgroundMoverThreads > 0;
   }
 
   // @return whether memory monitor is enabled
@@ -496,25 +489,21 @@ class CacheAllocatorConfig {
   // make any progress for the below threshold
   std::chrono::milliseconds slabReleaseStuckThreshold{std::chrono::seconds(60)};
 
-  // the background eviction strategy to be used
-  std::shared_ptr<BackgroundMoverStrategy> backgroundEvictorStrategy{nullptr};
-
-  // the background promotion strategy to be used
-  std::shared_ptr<BackgroundMoverStrategy> backgroundPromoterStrategy{nullptr};
-
-  // time interval to sleep between runs of the background evictor
-  std::chrono::milliseconds backgroundEvictorInterval{
-      std::chrono::milliseconds{1000}};
-
-  // time interval to sleep between runs of the background promoter
-  std::chrono::milliseconds backgroundPromoterInterval{
+  // time interval to sleep between runs of the background mover
+  std::chrono::milliseconds backgroundMoverInterval{
       std::chrono::milliseconds{1000}};
 
-  // number of thread used by background evictor
-  size_t backgroundEvictorThreads{1};
+  // number of thread used by background mover
+  size_t backgroundMoverThreads{0};
 
-  // number of thread used by background promoter
-  size_t backgroundPromoterThreads{1};
+  // How much to keep the cache memory free. This is used by the background
+  // mover to decide when to evict items.
+  double backgroundTargetFree{0.02};
+  // The number of items to evict in each batch in the background mover
+  size_t backgroundEvictionBatch{10};
+  // The number of items to promote in each batch in the background mover
+  // only available when there are multiple memory tiers
+  size_t backgroundPromotionBatch{0};
 
   // time interval to sleep between iterations of pool size optimization,
   // for regular pools and compact caches
@@ -555,6 +544,11 @@ class CacheAllocatorConfig {
   // ABOVE are the config for various cache workers
   //
 
+  // if turned off, always insert new elements to topmost memory tier.
+  // if turned on, insert new element to first free memory tier or evict memory
+  // from the bottom one if memory cache is full
+  bool insertToFirstFreeTier = false;
+
   // the number of tries to search for an item to evict
   // 0 means it's infinite
   unsigned int evictionSearchTries{50};
@@ -671,6 +665,12 @@ class CacheAllocatorConfig {
       {MemoryTierCacheConfig::fromShm().setRatio(1)}};
 };
 
+template <typename T>
+CacheAllocatorConfig<T>& CacheAllocatorConfig<T>::enableInsertToFirstFreeTier() {
+  insertToFirstFreeTier = true;
+  return *this;
+}
+
 template <typename T>
 CacheAllocatorConfig<T>& CacheAllocatorConfig<T>::setCacheName(
     const std::string& _cacheName) {
@@ -1016,24 +1016,17 @@ CacheAllocatorConfig<T>& CacheAllocatorConfig<T>::enablePoolRebalancing(
 }
 
 template <typename T>
-CacheAllocatorConfig<T>& CacheAllocatorConfig<T>::enableBackgroundEvictor(
-    std::shared_ptr<BackgroundMoverStrategy> strategy,
-    std::chrono::milliseconds interval,
-    size_t evictorThreads) {
-  backgroundEvictorStrategy = strategy;
-  backgroundEvictorInterval = interval;
-  backgroundEvictorThreads = evictorThreads;
-  return *this;
-}
-
-template <typename T>
-CacheAllocatorConfig<T>& CacheAllocatorConfig<T>::enableBackgroundPromoter(
-    std::shared_ptr<BackgroundMoverStrategy> strategy,
+CacheAllocatorConfig<T>& CacheAllocatorConfig<T>::enableBackgroundMover(
     std::chrono::milliseconds interval,
-    size_t promoterThreads) {
-  backgroundPromoterStrategy = strategy;
-  backgroundPromoterInterval = interval;
-  backgroundPromoterThreads = promoterThreads;
+    size_t evictionBatch,
+    size_t promotionBatch,
+    double targetFree,
+    size_t moverThreads) {
+  backgroundMoverInterval = interval;
+  backgroundEvictionBatch = evictionBatch;
+  backgroundPromotionBatch = promotionBatch;
+  backgroundTargetFree = targetFree;
+  backgroundMoverThreads = moverThreads;
   return *this;
 }
 
@@ -1274,6 +1267,7 @@ std::map<std::string, std::string> CacheAllocatorConfig<T>::serialize() const {
   configMap["nvmAdmissionMinTTL"] = std::to_string(nvmAdmissionMinTTL);
   configMap["delayCacheWorkersStart"] =
       delayCacheWorkersStart ? "true" : "false";
+  configMap["insertToFirstFreeTier"] = std::to_string(insertToFirstFreeTier);
   mergeWithPrefix(configMap, throttleConfig.serialize(), "throttleConfig");
   mergeWithPrefix(configMap,
                   chainedItemAccessConfig.serialize(),
diff --git a/cachelib/allocator/CacheStats.h b/cachelib/allocator/CacheStats.h
index 3692e55b4..d21400770 100644
--- a/cachelib/allocator/CacheStats.h
+++ b/cachelib/allocator/CacheStats.h
@@ -308,21 +308,29 @@ struct RebalancerStats {
 // Mover Stats
 struct BackgroundMoverStats {
   // the number of items this worker moved by looking at pools/classes stats
-  uint64_t numMovedItems{0};
-  // number of times we went executed the thread //TODO: is this def correct?
+  uint64_t numEvictedItems{0};
+  uint64_t numPromotedItems{0};
+
+  // number of times we went executed the thread (by periodic worker)
   uint64_t runCount{0};
-  // total number of classes
-  uint64_t totalClasses{0};
-  // eviction size
-  uint64_t totalBytesMoved{0};
-
-  BackgroundMoverStats& operator+=(const BackgroundMoverStats& rhs) {
-    numMovedItems += rhs.numMovedItems;
-    runCount += rhs.runCount;
-    totalClasses += rhs.totalClasses;
-    totalBytesMoved += rhs.totalBytesMoved;
-    return *this;
-  }
+
+  // average number of items moved per run
+  double avgItemsMoved{0.0};
+
+  // number of times we actually traversed the mmContainer
+  uint64_t numTraversals{0};
+
+  // indicates the time in ns for the last iteration
+  uint64_t lastTraversalTimeNs{0};
+
+  // indicates the maximum of all traversals
+  uint64_t minTraversalTimeNs{0};
+
+  // indicates the minimum of all traversals
+  uint64_t maxTraversalTimeNs{0};
+
+  // indicates the average of all traversals
+  uint64_t avgTraversalTimeNs{0};
 };
 
 // CacheMetadata type to export
@@ -345,10 +353,8 @@ struct Stats;
 // Stats that apply globally in cache and
 // the ones that are aggregated over all pools
 struct GlobalCacheStats {
-  // background eviction stats
-  BackgroundMoverStats evictionStats;
-
-  BackgroundMoverStats promotionStats;
+  // background mover stats per each mover thread
+  std::vector<BackgroundMoverStats> moverStats;
 
   // number of calls to CacheAllocator::find
   uint64_t numCacheGets{0};
diff --git a/cachelib/allocator/FreeThresholdStrategy.cpp b/cachelib/allocator/FreeThresholdStrategy.cpp
deleted file mode 100644
index f4afbd78f..000000000
--- a/cachelib/allocator/FreeThresholdStrategy.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "cachelib/allocator/FreeThresholdStrategy.h"
-
-namespace facebook::cachelib {
-
-FreeThresholdStrategy::FreeThresholdStrategy(double lowEvictionAcWatermark,
-                                             double highEvictionAcWatermark,
-                                             uint64_t maxEvictionBatch,
-                                             uint64_t minEvictionBatch)
-    : lowEvictionAcWatermark(lowEvictionAcWatermark),
-      highEvictionAcWatermark(highEvictionAcWatermark),
-      maxEvictionBatch(maxEvictionBatch),
-      minEvictionBatch(minEvictionBatch) {}
-
-std::vector<size_t> FreeThresholdStrategy::calculateBatchSizes(
-    const CacheBase& /* cache */,
-    std::vector<MemoryDescriptorType> /* acVec */) {
-  throw std::runtime_error("Not supported yet!");
-}
-
-} // namespace facebook::cachelib
diff --git a/cachelib/allocator/FreeThresholdStrategy.h b/cachelib/allocator/FreeThresholdStrategy.h
deleted file mode 100644
index 13a2ac40d..000000000
--- a/cachelib/allocator/FreeThresholdStrategy.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// @lint-ignore-every CLANGTIDY clang-diagnostic-unused-private-field
-
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "cachelib/allocator/BackgroundMoverStrategy.h"
-#include "cachelib/allocator/Cache.h"
-
-namespace facebook {
-namespace cachelib {
-
-// Free threshold strategy for background promotion worker.
-// This strategy tries to keep certain percent of memory free
-// at all times.
-class FreeThresholdStrategy : public BackgroundMoverStrategy {
- public:
-  FreeThresholdStrategy(double lowEvictionAcWatermark,
-                        double highEvictionAcWatermark,
-                        uint64_t maxEvictionBatch,
-                        uint64_t minEvictionBatch);
-  ~FreeThresholdStrategy() {}
-
-  std::vector<size_t> calculateBatchSizes(
-      const CacheBase& cache, std::vector<MemoryDescriptorType> acVecs);
-
- private:
-#if defined(__clang__)
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-private-field"
-#endif
-  double lowEvictionAcWatermark{2.0};
-  double highEvictionAcWatermark{5.0};
-  uint64_t maxEvictionBatch{40};
-  uint64_t minEvictionBatch{5};
-#if defined(__clang__)
-#pragma clang diagnostic pop
-#endif
-};
-
-} // namespace cachelib
-} // namespace facebook
diff --git a/cachelib/allocator/MM2Q.h b/cachelib/allocator/MM2Q.h
index cece17e0e..3bf76b018 100644
--- a/cachelib/allocator/MM2Q.h
+++ b/cachelib/allocator/MM2Q.h
@@ -66,6 +66,7 @@ class MM2Q {
   enum LruType { Warm, WarmTail, Hot, Cold, ColdTail, NumTypes };
 
   // Config class for MM2Q
+  // TODO: implement support for useCombinedLockForIterators
   struct Config {
     // Create from serialized config
     explicit Config(SerializationConfigType configState)
@@ -460,6 +461,18 @@ class MM2Q {
     //          is unchanged.
     bool add(T& node) noexcept;
 
+    // helper function to add the node under the container lock
+    void addNodeLocked(T& node, const Time& currTime);
+
+    // adds the given nodes into the container and marks each as being present
+    // in the container. The nodes are added to the head of the lru.
+    //
+    // @param vector of nodes  The nodes to be added to the container.
+    // @return  number of nodes added - it is up to user to verify all
+    //          expected nodes have been added.
+    template <typename It>
+    uint32_t addBatch(It begin, It end) noexcept;
+
     // removes the node from the lru and sets it previous and next to nullptr.
     //
     // @param node  The node to be removed from the container.
@@ -500,6 +513,11 @@ class MM2Q {
     template <typename F>
     void withEvictionIterator(F&& f);
 
+    // Execute provided function under container lock. Function gets
+    // iterator passed as parameter.
+    template <typename F>
+    void withPromotionIterator(F&& f);
+
     // Execute provided function under container lock.
     template <typename F>
     void withContainerLock(F&& f);
@@ -889,16 +907,41 @@ bool MM2Q::Container<T, HookPtr>::add(T& node) noexcept {
     if (node.isInMMContainer()) {
       return false;
     }
+    addNodeLocked(node, currTime);
+    return true;
+  });
+}
 
-    markHot(node);
-    unmarkCold(node);
-    unmarkTail(node);
-    lru_.getList(LruType::Hot).linkAtHead(node);
-    rebalance();
+// adds the node to the list assuming not in
+// container and holding container lock
+template <typename T, MM2Q::Hook<T> T::*HookPtr>
+void MM2Q::Container<T, HookPtr>::addNodeLocked(T& node, const Time& currTime) {
+  XDCHECK(!node.isInMMContainer());
+  markHot(node);
+  unmarkCold(node);
+  unmarkTail(node);
+  lru_.getList(LruType::Hot).linkAtHead(node);
+  rebalance();
+
+  node.markInMMContainer();
+  setUpdateTime(node, currTime);
+}
 
-    node.markInMMContainer();
-    setUpdateTime(node, currTime);
-    return true;
+template <typename T, MM2Q::Hook<T> T::*HookPtr>
+template <typename It>
+uint32_t MM2Q::Container<T, HookPtr>::addBatch(It begin, It end) noexcept {
+  const auto currTime = static_cast<Time>(util::getCurrentTimeSec());
+  return lruMutex_->lock_combine([this, begin, end, currTime]() {
+    uint32_t i = 0;
+    for (auto itr = begin; itr != end; itr++) {
+      T* node = *itr;
+      if (node->isInMMContainer()) {
+        return i;
+      }
+      addNodeLocked(*node, currTime);
+      i++;
+    }
+    return i;
   });
 }
 
@@ -920,6 +963,15 @@ void MM2Q::Container<T, HookPtr>::withEvictionIterator(F&& fun) {
   }
 }
 
+// returns the head of the hot queue for promotion
+template <typename T, MM2Q::Hook<T> T::*HookPtr>
+template <typename F>
+void MM2Q::Container<T, HookPtr>::withPromotionIterator(F&& fun) {
+  lruMutex_->lock_combine([this, &fun]() {
+    fun(LockedIterator{LockHolder{}, lru_.begin(LruType::Hot)});
+  });
+}
+
 template <typename T, MM2Q::Hook<T> T::*HookPtr>
 template <typename F>
 void MM2Q::Container<T, HookPtr>::withContainerLock(F&& fun) {
diff --git a/cachelib/allocator/MMLru.h b/cachelib/allocator/MMLru.h
index 747fd6276..d12f60ada 100644
--- a/cachelib/allocator/MMLru.h
+++ b/cachelib/allocator/MMLru.h
@@ -337,6 +337,18 @@ class MMLru {
     //          is unchanged.
     bool add(T& node) noexcept;
 
+    // helper function to add the node under the container lock
+    void addNodeLocked(T& node, const Time& currTime);
+
+    // adds the given nodes into the container and marks each as being present
+    // in the container. The nodes are added to the head of the lru.
+    //
+    // @param vector of nodes  The nodes to be added to the container.
+    // @return  number of nodes added - it is up to user to verify all
+    //          expected nodes have been added.
+    template <typename It>
+    uint32_t addBatch(It begin, It end) noexcept;
+
     // removes the node from the lru and sets it previous and next to nullptr.
     //
     // @param node  The node to be removed from the container.
@@ -378,6 +390,11 @@ class MMLru {
     template <typename F>
     void withContainerLock(F&& f);
 
+    // Execute provided function under container lock. Function gets
+    // iterator passed as parameter.
+    template <typename F>
+    void withPromotionIterator(F&& f);
+
     // get copy of current config
     Config getConfig() const;
 
@@ -685,19 +702,47 @@ bool MMLru::Container<T, HookPtr>::add(T& node) noexcept {
     if (node.isInMMContainer()) {
       return false;
     }
-    if (config_.lruInsertionPointSpec == 0 || insertionPoint_ == nullptr) {
-      lru_.linkAtHead(node);
-    } else {
-      lru_.insertBefore(*insertionPoint_, node);
-    }
-    node.markInMMContainer();
-    setUpdateTime(node, currTime);
-    unmarkAccessed(node);
-    updateLruInsertionPoint();
+    addNodeLocked(node, currTime);
     return true;
   });
 }
 
+template <typename T, MMLru::Hook<T> T::*HookPtr>
+void MMLru::Container<T, HookPtr>::addNodeLocked(T& node,
+                                                 const Time& currTime) {
+  XDCHECK(!node.isInMMContainer());
+  if (config_.lruInsertionPointSpec == 0 || insertionPoint_ == nullptr) {
+    lru_.linkAtHead(node);
+  } else {
+    lru_.insertBefore(*insertionPoint_, node);
+  }
+  node.markInMMContainer();
+  setUpdateTime(node, currTime);
+  unmarkAccessed(node);
+  updateLruInsertionPoint();
+}
+
+template <typename T, MMLru::Hook<T> T::*HookPtr>
+template <typename It>
+uint32_t MMLru::Container<T, HookPtr>::addBatch(It begin, It end) noexcept {
+  const auto currTime = static_cast<Time>(util::getCurrentTimeSec());
+  return lruMutex_->lock_combine([this, begin, end, currTime]() {
+    uint32_t i = 0;
+    for (auto itr = begin; itr != end; ++itr) {
+      T* node = *itr;
+      XDCHECK(!node->isInMMContainer());
+      if (node->isInMMContainer()) {
+        throw std::runtime_error(
+            folly::sformat("Was not able to add all new items, failed item {}",
+                           node->toString()));
+      }
+      addNodeLocked(*node, currTime);
+      i++;
+    }
+    return i;
+  });
+}
+
 template <typename T, MMLru::Hook<T> T::*HookPtr>
 typename MMLru::Container<T, HookPtr>::LockedIterator
 MMLru::Container<T, HookPtr>::getEvictionIterator() const noexcept {
@@ -716,6 +761,17 @@ void MMLru::Container<T, HookPtr>::withEvictionIterator(F&& fun) {
   }
 }
 
+template <typename T, MMLru::Hook<T> T::*HookPtr>
+template <typename F>
+void MMLru::Container<T, HookPtr>::withPromotionIterator(F&& fun) {
+  if (config_.useCombinedLockForIterators) {
+    lruMutex_->lock_combine([this, &fun]() { fun(Iterator{lru_.begin()}); });
+  } else {
+    LockHolder lck{*lruMutex_};
+    fun(Iterator{lru_.begin()});
+  }
+}
+
 template <typename T, MMLru::Hook<T> T::*HookPtr>
 template <typename F>
 void MMLru::Container<T, HookPtr>::withContainerLock(F&& fun) {
diff --git a/cachelib/allocator/MMTinyLFU.h b/cachelib/allocator/MMTinyLFU.h
index 5082b8f69..0994679c5 100644
--- a/cachelib/allocator/MMTinyLFU.h
+++ b/cachelib/allocator/MMTinyLFU.h
@@ -377,6 +377,18 @@ class MMTinyLFU {
     //          is unchanged.
     bool add(T& node) noexcept;
 
+    // helper function to add the node under the container lock
+    void addNodeLocked(T& node, const Time& currTime);
+
+    // adds the given nodes into the container and marks each as being present
+    // in the container. The nodes are added to the head of the lru.
+    //
+    // @param vector of nodes  The nodes to be added to the container.
+    // @return  number of nodes added - it is up to user to verify all
+    //          expected nodes have been added.
+    template <typename It>
+    uint32_t addBatch(It begin, It end) noexcept;
+
     // removes the node from the lru and sets it previous and next to nullptr.
     //
     // @param node  The node to be removed from the container.
@@ -546,6 +558,11 @@ class MMTinyLFU {
     template <typename F>
     void withEvictionIterator(F&& f);
 
+    // Execute provided function under container lock. Function gets
+    // iterator passed as parameter.
+    template <typename F>
+    void withPromotionIterator(F&& f);
+
     // Execute provided function under container lock.
     template <typename F>
     void withContainerLock(F&& f);
@@ -856,7 +873,16 @@ bool MMTinyLFU::Container<T, HookPtr>::add(T& node) noexcept {
   if (node.isInMMContainer()) {
     return false;
   }
+  addNodeLocked(node, currTime);
+  return true;
+}
 
+// adds the node to the list assuming not in
+// container and holding container lock
+template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>
+void MMTinyLFU::Container<T, HookPtr>::addNodeLocked(T& node,
+                                                     const Time& currTime) {
+  XDCHECK(!node.isInMMContainer());
   auto& tinyLru = lru_.getList(LruType::Tiny);
   tinyLru.linkAtHead(node);
   markTiny(node);
@@ -884,7 +910,23 @@ bool MMTinyLFU::Container<T, HookPtr>::add(T& node) noexcept {
   node.markInMMContainer();
   setUpdateTime(node, currTime);
   unmarkAccessed(node);
-  return true;
+}
+
+template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>
+template <typename It>
+uint32_t MMTinyLFU::Container<T, HookPtr>::addBatch(It begin, It end) noexcept {
+  const auto currTime = static_cast<Time>(util::getCurrentTimeSec());
+  LockHolder l(lruMutex_);
+  uint32_t i = 0;
+  for (auto itr = begin; itr != end; itr++) {
+    T* node = *itr;
+    if (node->isInMMContainer()) {
+      return i;
+    }
+    addNodeLocked(*node, currTime);
+    i++;
+  }
+  return i;
 }
 
 template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>
@@ -901,6 +943,12 @@ void MMTinyLFU::Container<T, HookPtr>::withEvictionIterator(F&& fun) {
   fun(getEvictionIterator());
 }
 
+template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>
+template <typename F>
+void MMTinyLFU::Container<T, HookPtr>::withPromotionIterator(F&& fun) {
+  throw std::runtime_error("Not supported");
+}
+
 template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>
 template <typename F>
 void MMTinyLFU::Container<T, HookPtr>::withContainerLock(F&& fun) {
diff --git a/cachelib/allocator/PoolOptimizer.cpp b/cachelib/allocator/PoolOptimizer.cpp
index 1902bfebf..d23bb77b5 100644
--- a/cachelib/allocator/PoolOptimizer.cpp
+++ b/cachelib/allocator/PoolOptimizer.cpp
@@ -50,6 +50,8 @@ void PoolOptimizer::optimizeRegularPoolSizes() {
 
 void PoolOptimizer::optimizeCompactCacheSizes() {
   try {
+    // TODO: should optimizer look at each tier individually?
+    // If yes, then resizePools should be per-tier
     auto strategy = cache_.getPoolOptimizeStrategy();
     if (!strategy) {
       strategy = strategy_;
diff --git a/cachelib/allocator/PromotionStrategy.h b/cachelib/allocator/PromotionStrategy.h
deleted file mode 100644
index d3eb8686c..000000000
--- a/cachelib/allocator/PromotionStrategy.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "cachelib/allocator/BackgroundMoverStrategy.h"
-#include "cachelib/allocator/Cache.h"
-
-namespace facebook {
-namespace cachelib {
-
-// Strategy for background promotion worker.
-class PromotionStrategy : public BackgroundMoverStrategy {
- public:
-  PromotionStrategy(uint64_t promotionAcWatermark,
-                    uint64_t maxPromotionBatch,
-                    uint64_t minPromotionBatch)
-      : promotionAcWatermark(promotionAcWatermark),
-        maxPromotionBatch(maxPromotionBatch),
-        minPromotionBatch(minPromotionBatch) {}
-  ~PromotionStrategy() {}
-
-  std::vector<size_t> calculateBatchSizes(
-      const CacheBase& cache, std::vector<MemoryDescriptorType> acVec) {
-    return {};
-  }
-
- private:
-  double promotionAcWatermark{4.0};
-  uint64_t maxPromotionBatch{40};
-  uint64_t minPromotionBatch{5};
-};
-
-} // namespace cachelib
-} // namespace facebook
diff --git a/cachelib/allocator/memory/AllocationClass.cpp b/cachelib/allocator/memory/AllocationClass.cpp
index ab99e42d6..a8cd43781 100644
--- a/cachelib/allocator/memory/AllocationClass.cpp
+++ b/cachelib/allocator/memory/AllocationClass.cpp
@@ -51,7 +51,7 @@ AllocationClass::AllocationClass(ClassId classId,
       allocationSize_(allocSize),
       slabAlloc_(s),
       freedAllocations_{
-          slabAlloc_.createPtrCompressor<FreeAlloc, CompressedPtr4B>()} {
+          slabAlloc_.createSingleTierPtrCompressor<FreeAlloc, CompressedPtr4B>()} {
   checkState();
 }
 
@@ -104,7 +104,7 @@ AllocationClass::AllocationClass(
       slabAlloc_(s),
       freedAllocations_(
           *object.freedAllocationsObject(),
-          slabAlloc_.createPtrCompressor<FreeAlloc, CompressedPtr4B>()),
+          slabAlloc_.createSingleTierPtrCompressor<FreeAlloc, CompressedPtr4B>()),
       canAllocate_(*object.canAllocate()) {
   if (!slabAlloc_.isRestorable()) {
     throw std::logic_error("The allocation class cannot be restored.");
@@ -142,6 +142,27 @@ void* AllocationClass::addSlabAndAllocate(Slab* slab) {
   });
 }
 
+std::vector<void*> AllocationClass::addSlabAndAllocateBatch(Slab* slab,
+                                                            size_t batch) {
+  XDCHECK_NE(nullptr, slab);
+  std::vector<void*> allocs;
+  allocs.reserve(batch);
+  lock_->lock_combine([this, slab, batch, &allocs]() {
+    addSlabLocked(slab);
+    size_t total = 0;
+    while (total < batch) {
+      void* alloc = allocateLocked();
+      if (alloc != nullptr) {
+        allocs.push_back(alloc);
+        total++;
+      } else {
+        break;
+      }
+    }
+  });
+  return allocs;
+}
+
 void* AllocationClass::allocateFromCurrentSlabLocked() noexcept {
   XDCHECK(canAllocateFromCurrentSlabLocked());
   void* ret = currSlab_->memoryAtOffset(currOffset_);
@@ -161,6 +182,26 @@ void* AllocationClass::allocate() {
   return lock_->lock_combine([this]() -> void* { return allocateLocked(); });
 }
 
+std::vector<void*> AllocationClass::allocateBatch(size_t batch) {
+  std::vector<void*> allocs;
+  if (!canAllocate_) {
+    return allocs;
+  }
+  lock_->lock_combine([this, &allocs, batch]() {
+    size_t total = 0;
+    while (total < batch) {
+      void* alloc = allocateLocked();
+      if (alloc != nullptr) {
+        allocs.push_back(alloc);
+        total++;
+      } else {
+        break;
+      }
+    }
+  });
+  return allocs;
+}
+
 void* AllocationClass::allocateLocked() {
   // fast path for case when the cache is mostly full.
   if (freedAllocations_.empty() && freeSlabs_.empty() &&
@@ -359,10 +400,10 @@ std::pair<bool, std::vector<void*>> AllocationClass::pruneFreeAllocs(
   // Set the bit to true if the corresponding allocation is freed, false
   // otherwise.
   FreeList freeAllocs{
-      slabAlloc_.createPtrCompressor<FreeAlloc, CompressedPtr4B>()};
+      slabAlloc_.createSingleTierPtrCompressor<FreeAlloc, CompressedPtr4B>()};
   FreeList notInSlab{
-      slabAlloc_.createPtrCompressor<FreeAlloc, CompressedPtr4B>()};
-  FreeList inSlab{slabAlloc_.createPtrCompressor<FreeAlloc, CompressedPtr4B>()};
+      slabAlloc_.createSingleTierPtrCompressor<FreeAlloc, CompressedPtr4B>()};
+  FreeList inSlab{slabAlloc_.createSingleTierPtrCompressor<FreeAlloc, CompressedPtr4B>()};
 
   lock_->lock_combine([&]() {
     // Take the allocation class free list offline
@@ -626,6 +667,37 @@ void AllocationClass::processAllocForRelease(
   });
 }
 
+void AllocationClass::freeLocked(const SlabHeader* header,
+                                 const Slab* slab,
+                                 void* memory,
+                                 uintptr_t slabPtrVal) {
+  // check under the lock we actually add the allocation back to the free list
+  if (header->isMarkedForRelease()) {
+    auto it = slabReleaseAllocMap_.find(slabPtrVal);
+
+    // this should not happen.
+    if (it == slabReleaseAllocMap_.end()) {
+      throw std::runtime_error(folly::sformat(
+          "Invalid slabReleaseAllocMap "
+          "state when attempting to free an allocation. Memory: {}",
+          memory));
+    }
+
+    auto& allocState = it->second;
+    const auto idx = getAllocIdx(slab, memory);
+    if (allocState[idx]) {
+      throw std::invalid_argument(
+          folly::sformat("Allocation {} is already marked as free", memory));
+    }
+    allocState[idx] = true;
+    return;
+  }
+
+  // TODO add checks here to ensure that we dont double free in debug mode.
+  freedAllocations_.insert(*reinterpret_cast<FreeAlloc*>(memory));
+  canAllocate_ = true;
+}
+
 void AllocationClass::free(void* memory) {
   const auto* header = slabAlloc_.getSlabHeader(memory);
   auto* slab = slabAlloc_.getSlabForMemory(memory);
@@ -638,31 +710,7 @@ void AllocationClass::free(void* memory) {
 
   const auto slabPtrVal = getSlabPtrValue(slab);
   lock_->lock_combine([this, header, slab, memory, slabPtrVal]() {
-    // check under the lock we actually add the allocation back to the free list
-    if (header->isMarkedForRelease()) {
-      auto it = slabReleaseAllocMap_.find(slabPtrVal);
-
-      // this should not happen.
-      if (it == slabReleaseAllocMap_.end()) {
-        throw std::runtime_error(folly::sformat(
-            "Invalid slabReleaseAllocMap "
-            "state when attempting to free an allocation. Memory: {}",
-            memory));
-      }
-
-      auto& allocState = it->second;
-      const auto idx = getAllocIdx(slab, memory);
-      if (allocState[idx]) {
-        throw std::invalid_argument(
-            folly::sformat("Allocation {} is already marked as free", memory));
-      }
-      allocState[idx] = true;
-      return;
-    }
-
-    // TODO add checks here to ensure that we dont double free in debug mode.
-    freedAllocations_.insert(*reinterpret_cast<FreeAlloc*>(memory));
-    canAllocate_ = true;
+    freeLocked(header, slab, memory, slabPtrVal);
   });
 }
 
@@ -707,6 +755,23 @@ ACStats AllocationClass::getStats() const {
   });
 }
 
+std::pair<size_t, double> AllocationClass::getApproxUsage() const {
+  const unsigned long long nSlabsAllocated = allocatedSlabs_.size();
+  if (nSlabsAllocated == 0) {
+    return {0, 0.0};
+  }
+  const unsigned long long perSlab = getAllocsPerSlab();
+  const auto freeAllocsInCurrSlab =
+      canAllocateFromCurrentSlabLocked()
+          ? (Slab::kSize - currOffset_) / allocationSize_
+          : 0;
+  const unsigned long long nFreedAllocs = freedAllocations_.size();
+  const unsigned long long nActiveAllocs =
+      nSlabsAllocated * perSlab - nFreedAllocs - freeAllocsInCurrSlab;
+  return {nActiveAllocs,
+          (double)nActiveAllocs / (double)(nSlabsAllocated * perSlab)};
+}
+
 void AllocationClass::createSlabReleaseAllocMapLocked(const Slab* slab) {
   // Initialize slab free state
   // Each bit represents whether or not an alloc has already been freed
diff --git a/cachelib/allocator/memory/AllocationClass.h b/cachelib/allocator/memory/AllocationClass.h
index 5f5ebe1b9..fe9dd60c9 100644
--- a/cachelib/allocator/memory/AllocationClass.h
+++ b/cachelib/allocator/memory/AllocationClass.h
@@ -97,6 +97,10 @@ class AllocationClass {
   // fetch stats about this allocation class.
   ACStats getStats() const;
 
+  // (1) total active allocs in this class
+  // (2) approx usage as fraction of used allocs/total allocs in this class
+  std::pair<size_t, double> getApproxUsage() const;
+
   // Whether the pool is full or free to allocate more in the current state.
   // This is only a hint and not a gurantee that subsequent allocate will
   // fail/succeed.
@@ -110,6 +114,13 @@ class AllocationClass {
   //          to this slab class to make further allocations out of it.
   void* allocate();
 
+  // allocates a batch of memory corresponding to the allocation size of this
+  // AllocationClass.
+  //
+  // @return  vector of pointers to the memory of allocationSize_ chunk or
+  //          empty vector if we don't have any free memory.
+  std::vector<void*> allocateBatch(size_t batch);
+
   // @param ctx     release context for the slab owning this alloc
   // @param memory  memory to check
   //
@@ -212,6 +223,39 @@ class AllocationClass {
   // this slab class.
   void free(void* memory);
 
+  // releases the memory under the AC lock
+  void freeLocked(const SlabHeader* header,
+                  const Slab* slab,
+                  void* memory,
+                  uintptr_t slabPtrVal);
+
+  // release the memory back to the class in batch
+  // avoids the overhead of locking for each free
+  template <typename It>
+  uint32_t freeBatch(It begin, It end) {
+    return lock_->lock_combine([this, begin, end]() -> uint32_t {
+      uint32_t i = 0;
+      for (auto itr = begin; itr != end; ++itr) {
+        void* memory = *itr;
+        const auto* header = slabAlloc_.getSlabHeader(memory);
+        auto* slab = slabAlloc_.getSlabForMemory(memory);
+        if (header == nullptr || header->classId != classId_) {
+          throw std::invalid_argument(folly::sformat(
+              "trying to free memory {} (with ClassId {}), not belonging to "
+              "this "
+              "AllocationClass (ClassId {})",
+              memory, header ? header->classId : Slab::kInvalidClassId,
+              classId_));
+        }
+
+        const auto slabPtrVal = getSlabPtrValue(slab);
+        freeLocked(header, slab, memory, slabPtrVal);
+        i++;
+      }
+      return i;
+    });
+  }
+
   // acquires a new slab for this allocation class.
   // @param slab    a new slab to be added. This can NOT be nullptr.
   void addSlab(Slab* slab);
@@ -221,6 +265,12 @@ class AllocationClass {
   // @return  new allocation. This cannot fail.
   void* addSlabAndAllocate(Slab* slab);
 
+  // acquires a new slab and allocates a batch right away
+  // @param slab a new slab to be added.
+  // @param batch number of allocations to be made.
+  // @return  vector of pointers to the memory of new allocations
+  std::vector<void*> addSlabAndAllocateBatch(Slab* slab, size_t batch);
+
   // Releasing a slab is a two step process.
   // 1. Mark a slab for release, by calling `startSlabRelease`.
   // 2. Free all the activeAllocations
@@ -445,7 +495,7 @@ class AllocationClass {
   struct CACHELIB_PACKED_ATTR FreeAlloc {
     using CompressedPtrType = facebook::cachelib::CompressedPtr4B;
     using PtrCompressor = facebook::cachelib::
-        PtrCompressor<FreeAlloc, SlabAllocator, CompressedPtrType>;
+        SingleTierPtrCompressor<FreeAlloc, SlabAllocator, CompressedPtrType>;
     SListHook<FreeAlloc> hook_{};
   };
 
diff --git a/cachelib/allocator/memory/CompressedPtr.h b/cachelib/allocator/memory/CompressedPtr.h
index b56cbf2ee..1300a72ad 100644
--- a/cachelib/allocator/memory/CompressedPtr.h
+++ b/cachelib/allocator/memory/CompressedPtr.h
@@ -176,6 +176,8 @@ class CACHELIB_PACKED_ATTR CompressedPtr4B {
   }
 
   friend SlabAllocator;
+  template <typename PtrType, typename AllocatorContainer, typename CPtrType>
+  friend class PtrCompressor;
   // Allow access to private members by unit tests
   friend class tests::AllocTestBase;
 };
@@ -350,20 +352,23 @@ class CACHELIB_PACKED_ATTR CompressedPtr5B {
     regionIdx_ += static_cast<uint32_t>(tid) << kNumTierIdxOffset;
   }
 
+  template <typename PtrType, typename AllocatorContainer, typename CPtrType>
+  friend class PtrCompressor;
+
   friend SlabAllocator;
   friend class facebook::cachelib::tests::AllocTestBase;
 };
 
 template <typename PtrType, typename AllocatorT, typename CompressedPtrType>
-class PtrCompressor {
+class SingleTierPtrCompressor {
  public:
-  explicit PtrCompressor(const AllocatorT& allocator) noexcept
+  explicit SingleTierPtrCompressor(const AllocatorT& allocator) noexcept
       : allocator_(allocator) {}
 
   const CompressedPtrType compress(const PtrType* uncompressed) const {
     return allocator_.template compress<CompressedPtrType>(
         uncompressed, false /* isMultiTiered */);
-  }
+}
 
   PtrType* unCompress(const CompressedPtrType& compressed) const {
     return static_cast<PtrType*>(
@@ -371,11 +376,11 @@ class PtrCompressor {
             compressed, false /* isMultiTiered */));
   }
 
-  bool operator==(const PtrCompressor& rhs) const noexcept {
+  bool operator==(const SingleTierPtrCompressor& rhs) const noexcept {
     return &allocator_ == &rhs.allocator_;
   }
 
-  bool operator!=(const PtrCompressor& rhs) const noexcept {
+  bool operator!=(const SingleTierPtrCompressor& rhs) const noexcept {
     return !(*this == rhs);
   }
 
@@ -383,5 +388,54 @@ class PtrCompressor {
   // memory allocator that does the pointer compression.
   const AllocatorT& allocator_;
 };
+
+template <typename PtrType, typename AllocatorContainer, typename CompressedPtrType>
+class PtrCompressor {
+ public:
+  explicit PtrCompressor(const AllocatorContainer& allocators) noexcept
+      : allocators_(allocators) {}
+
+  const CompressedPtrType compress(const PtrType* uncompressed) const {
+    if (uncompressed == nullptr) {
+      return CompressedPtrType();
+    }
+    TierId tid;
+    for (tid = 0; tid < allocators_.size(); tid++) {
+      if (allocators_[tid]->isMemoryInAllocator(
+              static_cast<const void*>(uncompressed)))
+        break;
+    }
+    bool isMultiTiered = allocators_.size() > 1;
+    auto cptr = allocators_[tid]->template compress<CompressedPtrType>(
+        uncompressed, isMultiTiered);
+    if (isMultiTiered) {
+      cptr.setTierId(tid);
+    }
+    return cptr;
+  }
+
+  PtrType* unCompress(const CompressedPtrType& compressed) const {
+    if (compressed.isNull()) {
+      return nullptr;
+    }
+    bool isMultiTiered = allocators_.size() > 1;
+    auto& allocator = *allocators_[compressed.getTierId(isMultiTiered)];
+    return static_cast<PtrType*>(
+        allocator.template unCompress<CompressedPtrType>(
+            compressed, isMultiTiered));
+  }
+
+  bool operator==(const PtrCompressor& rhs) const noexcept {
+    return &allocators_ == &rhs.allocators_;
+  }
+
+  bool operator!=(const PtrCompressor& rhs) const noexcept {
+    return !(*this == rhs);
+  }
+
+ private:
+  // memory allocator that does the pointer compression.
+  const AllocatorContainer& allocators_;
+};
 } // namespace cachelib
 } // namespace facebook
diff --git a/cachelib/allocator/memory/MemoryAllocator.cpp b/cachelib/allocator/memory/MemoryAllocator.cpp
index 5de65e4e1..0a81cc987 100644
--- a/cachelib/allocator/memory/MemoryAllocator.cpp
+++ b/cachelib/allocator/memory/MemoryAllocator.cpp
@@ -71,6 +71,13 @@ void* MemoryAllocator::allocate(PoolId id, uint32_t size) {
   return mp.allocate(size);
 }
 
+std::vector<void*> MemoryAllocator::allocateByCidBatch(PoolId id,
+                                                       ClassId cid,
+                                                       size_t batch) {
+  auto& mp = memoryPoolManager_.getPoolById(id);
+  return mp.allocateByCidBatch(cid, batch);
+}
+
 void* MemoryAllocator::allocateZeroedSlab(PoolId id) {
   if (!config_.enableZeroedSlabAllocs) {
     throw std::logic_error("Zeroed Slab allcoation is not enabled");
diff --git a/cachelib/allocator/memory/MemoryAllocator.h b/cachelib/allocator/memory/MemoryAllocator.h
index 105873a39..64e238806 100644
--- a/cachelib/allocator/memory/MemoryAllocator.h
+++ b/cachelib/allocator/memory/MemoryAllocator.h
@@ -168,6 +168,15 @@ class MemoryAllocator {
   //        invalid.
   void* allocate(PoolId id, uint32_t size);
 
+  // allocates a batches of memory for a corresponding class id
+  // @param pid    the pool id to be used for this allocation.
+  // @param cid    the class id for the allocation.
+  // @param batch  the number of allocations to be made.
+  // @return a vector of pointers to the memory corresponding to the allocation.
+  // @throw std::invalid_argument if the poolId is invalid or the class id is
+  //       invalid.
+  std::vector<void*> allocateByCidBatch(PoolId id, ClassId cid, size_t batch);
+
   // Allocate a zeroed Slab
   //
   // This guarantees the content of the allocated slab is zero because when
@@ -186,6 +195,14 @@ class MemoryAllocator {
   //        allocation handed out by this allocator.
   void free(void* memory);
 
+  // frees a list of items back to the class
+  // avoids locking the AC for each free
+  template <typename It>
+  void freeBatch(It begin, It end, PoolId pid, ClassId cid) {
+    auto& mp = memoryPoolManager_.getPoolById(pid);
+    mp.freeBatch(begin, end, cid);
+  }
+
   // Memory pool interface. The memory pools must be established before the
   // first allocation happens. Currently we dont support adding / removing
   // pools dynamically.
@@ -528,13 +545,12 @@ class MemoryAllocator {
   serialization::MemoryAllocatorObject saveState();
 
   template <typename PtrType, typename CompressedPtrType>
-  using PtrCompressorType = facebook::cachelib::
+  using SingleTierPtrCompressorType = facebook::cachelib::
       PtrCompressor<PtrType, SlabAllocator, CompressedPtrType>;
 
   template <typename PtrType, typename CompressedPtrType>
-  PtrCompressorType<PtrType, CompressedPtrType> createPtrCompressor() {
-    return slabAllocator_.createPtrCompressor<PtrType, CompressedPtrType>();
-  }
+  using PtrCompressorType = facebook::cachelib::
+      PtrCompressor<PtrType, std::vector<std::unique_ptr<MemoryAllocator>>, CompressedPtrType>;
 
   // compress a given pointer to a valid allocation made out of this allocator
   // through an allocate() or nullptr. Calling this otherwise with invalid
@@ -665,6 +681,12 @@ class MemoryAllocator {
   uint32_t getMinAllocSize() const noexcept {
     return slabAllocator_.getMinAllocSize();
   }
+  // returns ture if ptr points to memory which is managed by this
+  // allocator
+  bool isMemoryInAllocator(const void *ptr) {
+    return ptr && ptr >= slabAllocator_.getSlabMemoryBegin()
+      && ptr < slabAllocator_.getSlabMemoryEnd();
+  }
 
  private:
   // @param memory    pointer to the memory.
diff --git a/cachelib/allocator/memory/MemoryAllocatorStats.h b/cachelib/allocator/memory/MemoryAllocatorStats.h
index b019b254c..7ee4ca991 100644
--- a/cachelib/allocator/memory/MemoryAllocatorStats.h
+++ b/cachelib/allocator/memory/MemoryAllocatorStats.h
@@ -56,6 +56,17 @@ struct ACStats {
   constexpr size_t getTotalFreeMemory() const noexcept {
     return Slab::kSize * freeSlabs + freeAllocs * allocSize;
   }
+
+  constexpr double usageFraction() const noexcept {
+    if (usedSlabs == 0)
+      return 0.0;
+
+    return activeAllocs / (usedSlabs * allocsPerSlab);
+  }
+
+  constexpr size_t totalAllocatedSize() const noexcept {
+    return activeAllocs * allocSize;
+  }
 };
 
 // structure to query stats corresponding to a MemoryPool
diff --git a/cachelib/allocator/memory/MemoryPool.cpp b/cachelib/allocator/memory/MemoryPool.cpp
index 9614e79af..329469001 100644
--- a/cachelib/allocator/memory/MemoryPool.cpp
+++ b/cachelib/allocator/memory/MemoryPool.cpp
@@ -301,6 +301,68 @@ bool MemoryPool::provision(const std::vector<uint32_t>& slabsDistribution) {
   return true;
 }
 
+std::vector<void*> MemoryPool::allocateByCidBatch(ClassId cid, size_t batch) {
+  uint64_t total = 0;
+  auto& ac = getAllocationClassFor(cid);
+  const auto allocSize = ac.getAllocSize();
+  auto allocs = ac.allocateBatch(batch);
+  if (allocs.size() > 0) {
+    total += allocs.size();
+    currAllocSize_ += allocSize * allocs.size();
+  }
+  if (total == batch) {
+    return allocs;
+  }
+  // atomically see if we can acquire a slab by checking if we have
+  // reached the limit by size. If not, then they can be acquired from
+  // either the slab allocator or our free list. It is important to check
+  // this before we grab it from the slab allocator or free list. Things
+  // that release slab, bump down the currSlabAllocSize_ after actually
+  // releasing and adding it to free list or slab allocator.
+  if (allSlabsAllocated()) {
+    return allocs;
+  }
+
+  uint32_t remain = batch - total;
+  // TODO: introduce a new sharded lock by allocation class id for this slow
+  // path Currently this would also serialize the slow paths of two different
+  // allocation class ids that need slab to initiate an allocation.
+  LockHolder l(lock_);
+  auto allocs2 = ac.allocateBatch(remain);
+  if (allocs2.size() > 0) {
+    total += allocs2.size();
+    currAllocSize_ += allocSize * allocs2.size();
+    allocs.insert(allocs.end(), allocs2.begin(), allocs2.end());
+  }
+  if (total == batch) {
+    return allocs;
+  }
+
+  remain = batch - total;
+  // see if we have a slab to add to the allocation class.
+  auto slab = getSlabLocked();
+  while (remain && slab != nullptr) {
+    if (slab == nullptr) {
+      // out of memory
+      return allocs;
+    }
+
+    // add it to the allocation class and try to allocate.
+    auto allocs3 = ac.addSlabAndAllocateBatch(slab, remain);
+    // XDCHECK_NE(nullptr, alloc);
+
+    currAllocSize_ += allocSize * allocs3.size();
+    total += allocs3.size();
+    remain -= allocs3.size();
+    allocs.insert(allocs.end(), allocs3.begin(), allocs3.end());
+    if (total == batch) {
+      return allocs;
+    }
+    slab = getSlabLocked();
+  }
+  return allocs;
+}
+
 void* MemoryPool::allocate(uint32_t size) {
   auto& ac = getAllocationClassFor(size);
 
@@ -562,3 +624,8 @@ MPStats MemoryPool::getStats() const {
                  slabsUnAllocated,    nSlabResize_,       nSlabRebalance_,
                  curSlabsAdvised_};
 }
+
+std::pair<size_t, double> MemoryPool::getApproxUsage(ClassId cid) const {
+  auto& ac = getAllocationClassFor(cid);
+  return ac.getApproxUsage();
+}
diff --git a/cachelib/allocator/memory/MemoryPool.h b/cachelib/allocator/memory/MemoryPool.h
index 6a11c6665..d7c84fd0d 100644
--- a/cachelib/allocator/memory/MemoryPool.h
+++ b/cachelib/allocator/memory/MemoryPool.h
@@ -133,6 +133,12 @@ class MemoryPool {
 
   MPStats getStats() const;
 
+  // gets the approximate class usage for the given class id.
+  //
+  // @param cid  the class id for which we want to get the usage.
+  // @return a pair of number of active allocations and the usage in the slab.
+  std::pair<size_t, double> getApproxUsage(ClassId cid) const;
+
   // Provision each allocation class with prescribed number of slabs.
   //
   // @param slabsDistribution   number of slabs in each AC
@@ -147,6 +153,14 @@ class MemoryPool {
   // @throw  std::invalid_argument if size is invalid.
   void* allocate(uint32_t size);
 
+  // allocates memory of at least _size_ bytes in a batch.
+  //
+  // @param cid    the class id for the allocation.
+  // @param batch  the number of allocations to be made.
+  // @return a vector of pointers to the memory of the class
+  // @throw  std::invalid_argument if the class id is invalid.
+  std::vector<void*> allocateByCidBatch(ClassId cid, size_t batch);
+
   // Allocate a slab with zeroed memory
   //
   // @return pointer to allocation or nullptr on failure to allocate.
@@ -165,6 +179,19 @@ class MemoryPool {
   // @throw std::run_time_error if the slab class information is corrupted.
   void free(void* memory);
 
+  // frees a batch of memory batch to the pool. throws an exception if the
+  // memory does not belong to this pool.
+  //
+  // @param  begin  iterator to the start of the batch
+  // @param  end    iterator to the end of the batch
+  // @param  cid    the allocation class id of the batch
+  template <typename It>
+  void freeBatch(It begin, It end, ClassId cid) {
+    auto& ac = getAllocationClassFor(cid);
+    auto freed = ac.freeBatch(begin, end);
+    currAllocSize_ -= ac.getAllocSize() * freed;
+  }
+
   // resize the memory pool. This only adjusts the Pool size. It does not
   // release the slabs back to the SlabAllocator if the new size is less than
   // the current size. The caller is responsible for doing that through
diff --git a/cachelib/allocator/memory/SlabAllocator.h b/cachelib/allocator/memory/SlabAllocator.h
index 7ee470722..504180ef5 100644
--- a/cachelib/allocator/memory/SlabAllocator.h
+++ b/cachelib/allocator/memory/SlabAllocator.h
@@ -320,15 +320,26 @@ class SlabAllocator {
   }
 
   template <typename PtrType, typename CompressedPtrType>
-  PtrCompressor<PtrType, SlabAllocator, CompressedPtrType> createPtrCompressor()
+  SingleTierPtrCompressor<PtrType, SlabAllocator, CompressedPtrType> createSingleTierPtrCompressor()
       const {
-    return PtrCompressor<PtrType, SlabAllocator, CompressedPtrType>(*this);
+    return SingleTierPtrCompressor<PtrType, SlabAllocator, CompressedPtrType>(*this);
   }
 
   static constexpr uint32_t getMinAllocSize() noexcept {
     return static_cast<uint32_t>(1) << (Slab::kMinAllocPower);
   }
 
+  // returns starting address of memory we own.
+  const Slab* getSlabMemoryBegin() const noexcept {
+    return reinterpret_cast<Slab*>(memoryStart_);
+  }
+
+  // returns first byte after the end of memory region we own.
+  const Slab* getSlabMemoryEnd() const noexcept {
+    return reinterpret_cast<Slab*>(reinterpret_cast<uint8_t*>(memoryStart_) +
+                                   memorySize_);
+  }
+
  private:
   // null Slab* presenttation. With 4M Slab size, a valid slab index would never
   // reach 2^16 - 1;
@@ -346,12 +357,6 @@ class SlabAllocator {
   // @throw std::invalid_argument if the state is invalid.
   void checkState() const;
 
-  // returns first byte after the end of memory region we own.
-  const Slab* getSlabMemoryEnd() const noexcept {
-    return reinterpret_cast<Slab*>(reinterpret_cast<uint8_t*>(memoryStart_) +
-                                   memorySize_);
-  }
-
   // returns true if we have slabbed all the memory that is available to us.
   // false otherwise.
   bool allMemorySlabbed() const noexcept {
diff --git a/cachelib/allocator/nvmcache/CacheApiWrapper.h b/cachelib/allocator/nvmcache/CacheApiWrapper.h
index 5abc517a6..63e9ebd60 100644
--- a/cachelib/allocator/nvmcache/CacheApiWrapper.h
+++ b/cachelib/allocator/nvmcache/CacheApiWrapper.h
@@ -94,7 +94,7 @@ class CacheAPIWrapperForNvm {
                                       uint32_t size,
                                       uint32_t creationTime,
                                       uint32_t expiryTime) {
-    return cache.allocateInternal(id, key, size, creationTime, expiryTime);
+    return cache.allocateInternal(id, key, size, creationTime, expiryTime, false);
   }
 
   // Insert the allocated handle into the AccessContainer from nvmcache, making
diff --git a/cachelib/allocator/serialize/objects.thrift b/cachelib/allocator/serialize/objects.thrift
index 15d47bd2f..2504abc63 100644
--- a/cachelib/allocator/serialize/objects.thrift
+++ b/cachelib/allocator/serialize/objects.thrift
@@ -17,11 +17,22 @@
 namespace cpp2 facebook.cachelib.serialization
 
 include "cachelib/allocator/datastruct/serialize/objects.thrift"
+include "cachelib/allocator/memory/serialize/objects.thrift"
 
 // Adding a new "required" field will cause the cache to be dropped
 // in the next release for our users. If the field needs to be required,
 // make sure to communicate that with our users.
 
+struct MemoryAllocatorCollection {
+  1: required map<i32, MemoryAllocatorObject> allocators;
+}
+
+struct MemoryDescriptorObject {
+  1: required i32 tid;
+  2: required i32 pid;
+  3: required i32 cid;
+}
+
 struct CacheAllocatorMetadata {
   1: required i64 allocatorVersion; // version of cache alloctor
   2: i64 cacheCreationTime = 0; // time when the cache was created.
@@ -80,7 +91,7 @@ struct MMLruObject {
 }
 
 struct MMLruCollection {
-  1: required map<i32, map<i32, MMLruObject>> pools;
+  1: required map<MemoryDescriptorObject, MMLruObject> containers;
 }
 
 struct MM2QConfig {
@@ -106,7 +117,7 @@ struct MM2QObject {
 }
 
 struct MM2QCollection {
-  1: required map<i32, map<i32, MM2QObject>> pools;
+  1: required map<MemoryDescriptorObject, MM2QObject> containers;
 }
 
 struct MMTinyLFUConfig {
@@ -134,7 +145,7 @@ struct MMTinyLFUObject {
 }
 
 struct MMTinyLFUCollection {
-  1: required map<i32, map<i32, MMTinyLFUObject>> pools;
+  1: required map<MemoryDescriptorObject, MMTinyLFUObject> containers;
 }
 
 struct ChainedHashTableObject {
diff --git a/cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp b/cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp
index 3e4847251..c56f64084 100644
--- a/cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp
+++ b/cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp
@@ -23,9 +23,9 @@ namespace tests {
 using LruAllocatorMemoryTiersTest = AllocatorMemoryTiersTest<LruAllocator>;
 
 // TODO(MEMORY_TIER): add more tests with different eviction policies
-TEST_F(LruAllocatorMemoryTiersTest, MultiTiersValid1) {
-  this->testMultiTiersValid1();
-}
+TEST_F(LruAllocatorMemoryTiersTest, MultiTiersInvalid) { this->testMultiTiersInvalid(); }
+TEST_F(LruAllocatorMemoryTiersTest, MultiTiersValid) { this->testMultiTiersValid(); }
+TEST_F(LruAllocatorMemoryTiersTest, MultiTiersValidMixed) { this->testMultiTiersValidMixed(); }
 
 } // end of namespace tests
 } // end of namespace cachelib
diff --git a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h
index a0d151399..f579bc4b3 100644
--- a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h
+++ b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h
@@ -27,7 +27,7 @@ namespace tests {
 template <typename AllocatorT>
 class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
  public:
-  void testMultiTiersValid1() {
+  void testMultiTiersInvalid() {
     typename AllocatorT::Config config;
     config.setCacheSize(100 * Slab::kSize);
     ASSERT_NO_THROW(config.configureMemoryTiers(
@@ -36,6 +36,44 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
          MemoryTierCacheConfig::fromShm().setRatio(1).setMemBind(
              std::string("0"))}));
   }
+
+  void testMultiTiersValid() {
+    typename AllocatorT::Config config;
+    config.setCacheSize(100 * Slab::kSize);
+    config.enableCachePersistence("/tmp");
+    ASSERT_NO_THROW(config.configureMemoryTiers(
+        {MemoryTierCacheConfig::fromShm().setRatio(1).setMemBind(
+             std::string("0")),
+         MemoryTierCacheConfig::fromShm().setRatio(1).setMemBind(
+             std::string("0"))}));
+
+    auto alloc = std::make_unique<AllocatorT>(AllocatorT::SharedMemNew, config);
+    ASSERT_NE(alloc, nullptr);
+
+    auto pool = alloc->addPool("default", alloc->getCacheMemoryStats().ramCacheSize);
+    auto handle = alloc->allocate(pool, "key", std::string("value").size());
+    ASSERT_NE(handle, nullptr);
+    ASSERT_NO_THROW(alloc->insertOrReplace(handle));
+  }
+
+  void testMultiTiersValidMixed() {
+    typename AllocatorT::Config config;
+    config.setCacheSize(100 * Slab::kSize);
+    config.enableCachePersistence("/tmp");
+    ASSERT_NO_THROW(config.configureMemoryTiers(
+        {MemoryTierCacheConfig::fromShm().setRatio(1).setMemBind(
+             std::string("0")),
+         MemoryTierCacheConfig::fromShm().setRatio(1).setMemBind(
+             std::string("0"))}));
+
+    auto alloc = std::make_unique<AllocatorT>(AllocatorT::SharedMemNew, config);
+    ASSERT_NE(alloc, nullptr);
+
+    auto pool = alloc->addPool("default", alloc->getCacheMemoryStats().ramCacheSize);
+    auto handle = alloc->allocate(pool, "key", std::string("value").size());
+    ASSERT_NE(handle, nullptr);
+    ASSERT_NO_THROW(alloc->insertOrReplace(handle));
+  }
 };
 } // namespace tests
 } // namespace cachelib
diff --git a/cachelib/allocator/tests/AllocatorResizeTest.h b/cachelib/allocator/tests/AllocatorResizeTest.h
index d65205ac7..883dd9c05 100644
--- a/cachelib/allocator/tests/AllocatorResizeTest.h
+++ b/cachelib/allocator/tests/AllocatorResizeTest.h
@@ -966,23 +966,23 @@ class AllocatorResizeTest : public AllocatorTest<AllocatorT> {
       for (i = 1; i <= numItersToMaxAdviseAway + 1; i++) {
         alloc.memMonitor_->adviseAwaySlabs();
         std::this_thread::sleep_for(std::chrono::seconds{2});
-        ASSERT_EQ(alloc.allocator_->getAdvisedMemorySize(), i * perIterAdvSize);
+        ASSERT_EQ(alloc.allocator_[0 /* TODO - extend test */]->getAdvisedMemorySize(), i * perIterAdvSize);
       }
       i--;
       // This should fail
       alloc.memMonitor_->adviseAwaySlabs();
       std::this_thread::sleep_for(std::chrono::seconds{2});
-      auto totalAdvisedAwayMemory = alloc.allocator_->getAdvisedMemorySize();
+      auto totalAdvisedAwayMemory = alloc.allocator_[0 /* TODO - extend test */]->getAdvisedMemorySize();
       ASSERT_EQ(totalAdvisedAwayMemory, i * perIterAdvSize);
 
       // Try to reclaim back
       for (i = 1; i <= numItersToMaxAdviseAway + 1; i++) {
         alloc.memMonitor_->reclaimSlabs();
         std::this_thread::sleep_for(std::chrono::seconds{2});
-        ASSERT_EQ(alloc.allocator_->getAdvisedMemorySize(),
+        ASSERT_EQ(alloc.allocator_[0 /* TODO - extend test */]->getAdvisedMemorySize(),
                   totalAdvisedAwayMemory - i * perIterAdvSize);
       }
-      totalAdvisedAwayMemory = alloc.allocator_->getAdvisedMemorySize();
+      totalAdvisedAwayMemory = alloc.allocator_[0 /* TODO - extend test */]->getAdvisedMemorySize();
       ASSERT_EQ(totalAdvisedAwayMemory, 0);
     }
   }
diff --git a/cachelib/allocator/tests/AllocatorTypeTest.cpp b/cachelib/allocator/tests/AllocatorTypeTest.cpp
index 05f53eb4b..4bd64aee4 100644
--- a/cachelib/allocator/tests/AllocatorTypeTest.cpp
+++ b/cachelib/allocator/tests/AllocatorTypeTest.cpp
@@ -117,10 +117,14 @@ TYPED_TEST(BaseAllocatorTest, DropFile) { this->testDropFile(); }
 TYPED_TEST(BaseAllocatorTest, ShmTemporary) { this->testShmTemporary(); }
 
 TYPED_TEST(BaseAllocatorTest, Serialization) { this->testSerialization(); }
+TYPED_TEST(BaseAllocatorTest, MultiTierSerialization) { this->testMultiTierSerialization(); }
 
 TYPED_TEST(BaseAllocatorTest, SerializationMMConfig) {
   this->testSerializationMMConfig();
 }
+TYPED_TEST(BaseAllocatorTest, MultiTierSerializationMMConfig) {
+  this->testMultiTierSerializationMMConfig();
+}
 
 TYPED_TEST(BaseAllocatorTest, testSerializationWithFragmentation) {
   this->testSerializationWithFragmentation();
@@ -405,6 +409,10 @@ TYPED_TEST(BaseAllocatorTest, SlabReleaseStuck) {
   this->testSlabReleaseStuck();
 }
 
+TYPED_TEST(BaseAllocatorTest, BackgroundEviction) {
+  this->testBackgroundEviction();
+}
+
 TYPED_TEST(BaseAllocatorTest, RateMap) { this->testRateMap(); }
 
 TYPED_TEST(BaseAllocatorTest, StatSnapshotTest) {
diff --git a/cachelib/allocator/tests/BaseAllocatorTest.h b/cachelib/allocator/tests/BaseAllocatorTest.h
index 105a7f9bd..15d185353 100644
--- a/cachelib/allocator/tests/BaseAllocatorTest.h
+++ b/cachelib/allocator/tests/BaseAllocatorTest.h
@@ -1711,6 +1711,141 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
     testShmIsRemoved(config);
   }
 
+  void testMultiTierSerialization() {
+    std::set<std::string> evictedKeys;
+    auto removeCb =
+        [&evictedKeys](const typename AllocatorT::RemoveCbData& data) {
+          if (data.context == RemoveContext::kEviction) {
+            const auto key = data.item.getKey();
+            evictedKeys.insert({key.data(), key.size()});
+          }
+        };
+
+    const size_t nSlabs = 40;
+    const size_t size = nSlabs * Slab::kSize;
+    const unsigned int nSizes = 1;
+    const unsigned int keyLen = 100;
+
+    std::vector<uint32_t> sizes;
+    uint8_t poolId;
+
+    // Test allocations. These allocations should remain after save/restore.
+    // Original lru allocator - with two tiers
+    typename AllocatorT::Config config;
+    config.setCacheSize(size);
+    config.enableCachePersistence(this->cacheDir_);
+    config.enablePoolRebalancing(nullptr, std::chrono::seconds{0});
+    config.configureMemoryTiers({
+        MemoryTierCacheConfig::fromShm()
+            .setRatio(1).setMemBind(std::string("0")),
+        MemoryTierCacheConfig::fromShm()
+            .setRatio(1).setMemBind(std::string("0"))});
+    std::vector<std::string> keys;
+    {
+      AllocatorT alloc(AllocatorT::SharedMemNew, config);
+      const size_t numBytes = alloc.getCacheMemoryStats().ramCacheSize;
+      poolId = alloc.addPool("foobar", numBytes);
+      sizes = this->getValidAllocSizes(alloc, poolId, nSlabs, keyLen);
+      this->fillUpPoolUntilEvictions(alloc, 0,  poolId, sizes, keyLen);
+      this->fillUpPoolUntilEvictions(alloc, 1,  poolId, sizes, keyLen);
+      for (const auto& item : alloc) {
+        auto key = item.getKey();
+        keys.push_back(key.str());
+      }
+
+      // save
+      alloc.shutDown();
+    }
+
+    testShmIsNotRemoved(config);
+    // Restored lru allocator
+    {
+      AllocatorT alloc(AllocatorT::SharedMemAttach, config);
+      for (auto& key : keys) {
+        auto handle = alloc.find(typename AllocatorT::Key{key});
+        ASSERT_NE(nullptr, handle.get());
+      }
+    }
+
+    testShmIsRemoved(config);
+    // Test LRU eviction and length before and after save/restore
+    // Original lru allocator
+    typename AllocatorT::Config config2;
+    config2.setCacheSize(size);
+    config2.setRemoveCallback(removeCb);
+    config2.enableCachePersistence(this->cacheDir_);
+    config2.configureMemoryTiers({
+        MemoryTierCacheConfig::fromShm()
+            .setRatio(1).setMemBind(std::string("0")),
+        MemoryTierCacheConfig::fromShm()
+            .setRatio(1).setMemBind(std::string("0"))});
+    {
+      AllocatorT alloc(AllocatorT::SharedMemNew, config2);
+      const size_t numBytes = alloc.getCacheMemoryStats().ramCacheSize;
+      poolId = alloc.addPool("foobar", numBytes);
+
+      sizes = this->getValidAllocSizes(alloc, poolId, nSizes, keyLen);
+
+      this->testLruLength(alloc, poolId, sizes, keyLen, evictedKeys);
+
+      // save
+      alloc.shutDown();
+    }
+    evictedKeys.clear();
+
+    testShmIsNotRemoved(config2);
+    // Restored lru allocator
+    {
+      AllocatorT alloc(AllocatorT::SharedMemAttach, config2);
+      this->testLruLength(alloc, poolId, sizes, keyLen, evictedKeys);
+    }
+
+    testShmIsRemoved(config2);
+  }
+
+  void testMultiTierSerializationMMConfig() {
+    typename AllocatorT::Config config;
+    config.setCacheSize(20 * Slab::kSize);
+    config.enableCachePersistence(this->cacheDir_);
+    config.enablePoolRebalancing(nullptr, std::chrono::seconds{0});
+    config.configureMemoryTiers({
+        MemoryTierCacheConfig::fromShm()
+            .setRatio(1).setMemBind(std::string("0")),
+        MemoryTierCacheConfig::fromShm()
+            .setRatio(1).setMemBind(std::string("0"))});
+    double ratio = 0.2;
+
+    // start allocator
+    {
+      AllocatorT alloc(AllocatorT::SharedMemNew, config);
+      const size_t numBytes = alloc.getCacheMemoryStats().ramCacheSize;
+      {
+        typename AllocatorT::MMConfig mmConfig;
+        mmConfig.lruRefreshRatio = ratio;
+        auto pid =
+            alloc.addPool("foobar", numBytes, /* allocSizes = */ {}, mmConfig);
+        auto handle = util::allocateAccessible(alloc, pid, "key", 10);
+        ASSERT_NE(nullptr, handle);
+        auto& container = alloc.getMMContainer(*handle);
+        EXPECT_DOUBLE_EQ(ratio, container.getConfig().lruRefreshRatio);
+      }
+
+      // save
+      alloc.shutDown();
+    }
+    testShmIsNotRemoved(config);
+
+    // restore allocator and check lruRefreshRatio
+    {
+      AllocatorT alloc(AllocatorT::SharedMemAttach, config);
+      auto handle = alloc.find("key");
+      ASSERT_NE(nullptr, handle);
+      auto& container = alloc.getMMContainer(*handle);
+      EXPECT_DOUBLE_EQ(ratio, container.getConfig().lruRefreshRatio);
+    }
+    testShmIsRemoved(config);
+  }
+
   // Test temporary shared memory mode which is enabled when memory
   // monitoring is enabled.
   void testShmTemporary() {
@@ -4183,15 +4318,16 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
   // Check that item is in the expected container.
   bool findItem(AllocatorT& allocator, typename AllocatorT::Item* item) {
     auto& container = allocator.getMMContainer(*item);
-    auto itr = container.getEvictionIterator();
     bool found = false;
-    while (itr) {
-      if (itr.get() == item) {
-        found = true;
-        break;
+    container.withEvictionIterator([&found, &item](auto&& itr) {
+      while (itr) {
+        if (itr.get() == item) {
+          found = true;
+          break;
+        }
+        ++itr;
       }
-      ++itr;
-    }
+    });
     return found;
   }
 
@@ -4341,13 +4477,13 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
     // Had a bug: D4799860 where we allocated the wrong size for chained item
     {
       const auto parentAllocInfo =
-          alloc.allocator_->getAllocInfo(itemHandle->getMemory());
+          alloc.allocator_[0 /* TODO - extend test */]->getAllocInfo(itemHandle->getMemory());
       const auto child1AllocInfo =
-          alloc.allocator_->getAllocInfo(chainedItemHandle->getMemory());
+          alloc.allocator_[0 /* TODO - extend test */]->getAllocInfo(chainedItemHandle->getMemory());
       const auto child2AllocInfo =
-          alloc.allocator_->getAllocInfo(chainedItemHandle2->getMemory());
+          alloc.allocator_[0 /* TODO - extend test */]->getAllocInfo(chainedItemHandle2->getMemory());
       const auto child3AllocInfo =
-          alloc.allocator_->getAllocInfo(chainedItemHandle3->getMemory());
+          alloc.allocator_[0 /* TODO - extend test */]->getAllocInfo(chainedItemHandle3->getMemory());
 
       const auto parentCid = parentAllocInfo.classId;
       const auto child1Cid = child1AllocInfo.classId;
@@ -5483,8 +5619,12 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
       ASSERT_TRUE(big->isInMMContainer());
 
       auto& mmContainer = alloc.getMMContainer(*big);
-      auto itr = mmContainer.getEvictionIterator();
-      ASSERT_EQ(big.get(), &(*itr));
+
+      typename AllocatorT::Item* evictionCandidate = nullptr;
+      mmContainer.withEvictionIterator(
+          [&evictionCandidate](auto&& itr) { evictionCandidate = itr.get(); });
+
+      ASSERT_EQ(big.get(), evictionCandidate);
 
       alloc.remove("hello");
     }
@@ -5498,8 +5638,11 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
       ASSERT_TRUE(small2->isInMMContainer());
 
       auto& mmContainer = alloc.getMMContainer(*small2);
-      auto itr = mmContainer.getEvictionIterator();
-      ASSERT_EQ(small2.get(), &(*itr));
+
+      typename AllocatorT::Item* evictionCandidate = nullptr;
+      mmContainer.withEvictionIterator(
+          [&evictionCandidate](auto&& itr) { evictionCandidate = itr.get(); });
+      ASSERT_EQ(small2.get(), evictionCandidate);
 
       alloc.remove("hello");
     }
@@ -6189,6 +6332,53 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
     ASSERT_EQ(0, alloc.getSlabReleaseStats().numSlabReleaseStuck);
   }
 
+  void testBackgroundEviction() {
+    typename AllocatorT::Config config{};
+    size_t cacheSize = 5 * Slab::kSize; // 20 MB
+    double targetFree = 0.03;           // 3% of the cache kept free
+    config.setCacheSize(cacheSize);
+    config.enableBackgroundMover(std::chrono::milliseconds{10000},
+                                 20, // just test eviction for single tier
+                                 0,
+                                 targetFree, // try and keep 0.03 of the cache
+                                             // free
+                                 1);
+    AllocatorT alloc(config);
+    const size_t numBytes = alloc.getCacheMemoryStats().ramCacheSize;
+    auto poolId = alloc.addPool("foobar", numBytes);
+    const unsigned int keyLen = 20;
+    const std::vector<unsigned int> size{500};
+    auto& pool = alloc.getPool(poolId);
+
+    this->fillUpPoolUntilEvictions(alloc, poolId, size, keyLen);
+    int classId = pool.getAllocationClassId(size[0]);
+    auto stats = alloc.getGlobalCacheStats();
+    auto mpStats = pool.getStats();
+    auto [currItems, currUsage] = pool.getApproxUsage(classId);
+    size_t maxItems = (currItems / currUsage);
+    size_t targetItems = maxItems * (1 - targetFree);
+    size_t approxEvictionsNeeded =
+        currItems > targetItems ? currItems - targetItems : 0;
+    XLOGF(INFO, "Current usage: {:.2f}, Current items: {}", currUsage,
+          currItems);
+    XLOGF(INFO, "Target items: {}, Approx evictions needed: {}", targetItems,
+          approxEvictionsNeeded);
+
+    while (stats.moverStats[0].numEvictedItems < approxEvictionsNeeded &&
+           currUsage > (1 - targetFree)) {
+      std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+      stats = alloc.getGlobalCacheStats();
+      mpStats = pool.getStats();
+      currUsage = pool.getApproxUsage(classId).second;
+    }
+    XLOGF(INFO, "Evictions needed: {}, Evictions performed: {}",
+          approxEvictionsNeeded, stats.moverStats[0].numEvictedItems);
+    ASSERT_GE(stats.moverStats[0].numEvictedItems,
+              approxEvictionsNeeded * 0.90); // at least 90% of the evictions
+                                             // should be done by the background
+                                             // mover
+  }
+
   void testRateMap() {
     RateMap counters;
     counters.updateCount("stat1", 11);
diff --git a/cachelib/allocator/tests/CacheBaseTest.cpp b/cachelib/allocator/tests/CacheBaseTest.cpp
index 928fcc0c6..dae14c533 100644
--- a/cachelib/allocator/tests/CacheBaseTest.cpp
+++ b/cachelib/allocator/tests/CacheBaseTest.cpp
@@ -33,7 +33,10 @@ class CacheBaseTest : public CacheBase, public SlabAllocatorTestBase {
   const std::string getCacheName() const override { return cacheName; }
   bool isObjectCache() const override { return false; }
   const MemoryPool& getPool(PoolId) const override { return memoryPool_; }
+  //TODO: support tiers
+  const MemoryPool& getPoolByTid(PoolId, TierId tid) const override { return memoryPool_; }
   PoolStats getPoolStats(PoolId) const override { return PoolStats(); }
+  ACStats getACStats(TierId, PoolId, ClassId) const { return ACStats(); };
   AllSlabReleaseEvents getAllSlabReleaseEvents(PoolId) const override {
     return AllSlabReleaseEvents{};
   }
diff --git a/cachelib/allocator/tests/MemoryTiersTest.cpp b/cachelib/allocator/tests/MemoryTiersTest.cpp
index ed35115c0..535cb14bb 100644
--- a/cachelib/allocator/tests/MemoryTiersTest.cpp
+++ b/cachelib/allocator/tests/MemoryTiersTest.cpp
@@ -34,7 +34,7 @@ constexpr size_t MB = 1024ULL * 1024ULL;
 constexpr size_t GB = MB * 1024ULL;
 
 const size_t defaultTotalCacheSize{1 * GB};
-const std::string defaultCacheDir{"/var/metadataDir"};
+const std::string defaultCacheDir{"/tmp/metadataDir"};
 
 template <typename Allocator>
 class MemoryTiersTest : public AllocatorTest<Allocator> {
@@ -109,7 +109,7 @@ class MemoryTiersTest : public AllocatorTest<Allocator> {
   void validatePoolSize(PoolId poolId,
                         std::unique_ptr<LruAllocator>& allocator,
                         size_t expectedSize) {
-    size_t actualSize = allocator->getPool(poolId).getPoolSize();
+    size_t actualSize = allocator->getPoolSize(poolId);
     EXPECT_EQ(actualSize, expectedSize);
   }
 
@@ -119,9 +119,9 @@ class MemoryTiersTest : public AllocatorTest<Allocator> {
                    size_t numTiers = 2) {
     if (isSizeValid) {
       auto pool = alloc->addPool("validPoolSize", poolSize);
-      EXPECT_LE(alloc->getPool(pool).getPoolSize(), poolSize);
+      EXPECT_LE(alloc->getPoolSize(pool), poolSize);
       if (poolSize >= numTiers * Slab::kSize)
-        EXPECT_GE(alloc->getPool(pool).getPoolSize(),
+        EXPECT_GE(alloc->getPoolSize(pool),
                   poolSize - numTiers * Slab::kSize);
     } else {
       EXPECT_THROW(alloc->addPool("invalidPoolSize", poolSize),
@@ -172,6 +172,84 @@ TEST_F(LruMemoryTiersTest, TestInvalid2TierConfigRatioNotSet) {
 TEST_F(LruMemoryTiersTest, TestInvalid2TierConfigSizesNeCacheSize) {
   EXPECT_THROW(createTestCacheConfig({0, 0}), std::invalid_argument);
 }
+
+TEST_F(LruMemoryTiersTest, TestPoolAllocations) {
+  std::vector<size_t> totalCacheSizes = {8 * GB, 2 * GB};
+
+  static const size_t numExtraSizes = 4;
+  static const size_t numExtraSlabs = 20;
+
+  for (size_t i = 0; i < numExtraSizes; i++) {
+    totalCacheSizes.push_back(totalCacheSizes.back() +
+                              (folly::Random::rand64() % numExtraSlabs) *
+                                  Slab::kSize);
+  }
+
+  size_t min_ratio = 1;
+  size_t max_ratio = 111;
+
+  static const size_t numCombinations = 10;
+
+  for (auto totalCacheSize : totalCacheSizes) {
+    for (size_t k = 0; k < numCombinations; k++) {
+      const size_t i = folly::Random::rand32() % max_ratio + min_ratio;
+      const size_t j = folly::Random::rand32() % max_ratio + min_ratio;
+      LruAllocatorConfig cfg =
+          createTestCacheConfig({i, j},
+                                /* usePoisx */ true, totalCacheSize);
+      basicCheck(cfg, totalCacheSize);
+
+      std::unique_ptr<LruAllocator> alloc = std::unique_ptr<LruAllocator>(
+          new LruAllocator(LruAllocator::SharedMemNew, cfg));
+
+      size_t size = (folly::Random::rand64() %
+                      (alloc->getCacheMemoryStats().ramCacheSize - Slab::kSize)) +
+                    Slab::kSize;
+      testAddPool(alloc, size, true);
+    }
+  }
+}
+
+TEST_F(LruMemoryTiersTest, TestPoolInvalidAllocations) {
+  std::vector<size_t> totalCacheSizes = {48 * MB, 51 * MB, 256 * MB,
+                                         1 * GB,  5 * GB,  8 * GB};
+  size_t min_ratio = 1;
+  size_t max_ratio = 111;
+
+  static const size_t numCombinations = 10;
+
+  for (auto totalCacheSize : totalCacheSizes) {
+    for (size_t k = 0; k < numCombinations; k++) {
+      const size_t i = folly::Random::rand32() % max_ratio + min_ratio;
+      const size_t j = folly::Random::rand32() % max_ratio + min_ratio;
+      LruAllocatorConfig cfg =
+          createTestCacheConfig({i, j},
+                                /* usePoisx */ true, totalCacheSize);
+
+      std::unique_ptr<LruAllocator> alloc = nullptr;
+      try {
+         alloc = std::unique_ptr<LruAllocator>(
+            new LruAllocator(LruAllocator::SharedMemNew, cfg));
+      } catch(...) {
+        // expection only if cache too small
+        size_t sum_ratios = std::accumulate(
+          cfg.getMemoryTierConfigs().begin(), cfg.getMemoryTierConfigs().end(), 0UL,
+          [](const size_t i, const MemoryTierCacheConfig& config) {
+            return i + config.getRatio();
+        });
+        auto tier1slabs = cfg.getMemoryTierConfigs()[0].calculateTierSize(cfg.getCacheSize(), sum_ratios) / Slab::kSize;
+        auto tier2slabs = cfg.getMemoryTierConfigs()[1].calculateTierSize(cfg.getCacheSize(), sum_ratios) / Slab::kSize;
+        EXPECT_TRUE(tier1slabs <= 2 || tier2slabs <= 2);
+
+        continue;
+      }
+
+      size_t size = (folly::Random::rand64() % (100 * GB)) +
+                    alloc->getCacheMemoryStats().ramCacheSize;
+      testAddPool(alloc, size, false);
+    }
+  }
+}
 } // namespace tests
 } // namespace cachelib
 } // namespace facebook
diff --git a/cachelib/allocator/tests/TestBase.h b/cachelib/allocator/tests/TestBase.h
index 086fa65d3..3e687a320 100644
--- a/cachelib/allocator/tests/TestBase.h
+++ b/cachelib/allocator/tests/TestBase.h
@@ -69,6 +69,12 @@ class AllocatorTest : public SlabAllocatorTestBase {
                                 PoolId pid,
                                 const std::vector<uint32_t>& sizes,
                                 unsigned int keyLen);
+  // same as above but uses tiered allocator
+  void fillUpPoolUntilEvictions(AllocatorT& alloc,
+                                TierId tid,
+                                PoolId pid,
+                                const std::vector<uint32_t>& sizes,
+                                unsigned int keyLen);
   void fillUpOneSlab(AllocatorT& alloc,
                      PoolId poolId,
                      const uint32_t size,
@@ -187,15 +193,25 @@ void AllocatorTest<AllocatorT>::fillUpPoolUntilEvictions(
     PoolId poolId,
     const std::vector<uint32_t>& sizes,
     unsigned int keyLen) {
+  fillUpPoolUntilEvictions(alloc, 0, poolId, sizes, keyLen);
+}
+
+template <typename AllocatorT>
+void AllocatorTest<AllocatorT>::fillUpPoolUntilEvictions(
+    AllocatorT& alloc,
+    TierId tid,
+    PoolId poolId,
+    const std::vector<uint32_t>& sizes,
+    unsigned int keyLen) {
   unsigned int allocs = 0;
   do {
     allocs = 0;
     for (const auto size : sizes) {
       const auto key = getRandomNewKey(alloc, keyLen);
       ASSERT_EQ(alloc.find(key), nullptr);
-      const size_t prev = alloc.getPool(poolId).getCurrentAllocSize();
+      const size_t prev = alloc.getPoolByTid(poolId, tid).getCurrentAllocSize();
       auto handle = util::allocateAccessible(alloc, poolId, key, size);
-      if (handle && prev != alloc.getPool(poolId).getCurrentAllocSize()) {
+      if (handle && prev != alloc.getPoolByTid(poolId, tid).getCurrentAllocSize()) {
         // this means we did not cause an eviction.
         ASSERT_GE(handle->getSize(), size);
         allocs++;
@@ -418,7 +434,7 @@ void AllocatorTest<AllocatorT>::testShmIsRemoved(
   ASSERT_FALSE(AllocatorT::ShmManager::segmentExists(
       config.getCacheDir(), detail::kShmHashTableName, config.usePosixShm));
   ASSERT_FALSE(AllocatorT::ShmManager::segmentExists(
-      config.getCacheDir(), detail::kShmCacheName, config.usePosixShm));
+      config.getCacheDir(), detail::kShmCacheName + std::to_string(0), config.usePosixShm));
   ASSERT_FALSE(AllocatorT::ShmManager::segmentExists(
       config.getCacheDir(), detail::kShmChainedItemHashTableName,
       config.usePosixShm));
@@ -432,7 +448,7 @@ void AllocatorTest<AllocatorT>::testShmIsNotRemoved(
   ASSERT_TRUE(AllocatorT::ShmManager::segmentExists(
       config.getCacheDir(), detail::kShmHashTableName, config.usePosixShm));
   ASSERT_TRUE(AllocatorT::ShmManager::segmentExists(
-      config.getCacheDir(), detail::kShmCacheName, config.usePosixShm));
+      config.getCacheDir(), detail::kShmCacheName + std::to_string(0), config.usePosixShm));
   ASSERT_TRUE(AllocatorT::ShmManager::segmentExists(
       config.getCacheDir(), detail::kShmChainedItemHashTableName,
       config.usePosixShm));
diff --git a/cachelib/cachebench/cache/Cache.h b/cachelib/cachebench/cache/Cache.h
index 17a4dc155..4bb5c43f8 100644
--- a/cachelib/cachebench/cache/Cache.h
+++ b/cachelib/cachebench/cache/Cache.h
@@ -326,6 +326,10 @@ class Cache {
   // return the stats for the pool.
   PoolStats getPoolStats(PoolId pid) const { return cache_->getPoolStats(pid); }
 
+  ACStats getACStats(TierId tid, PoolId pid, ClassId cid) const {
+    return cache_->getACStats(tid, pid, cid);
+  }
+
   // return the total number of inconsistent operations detected since start.
   unsigned int getInconsistencyCount() const {
     return inconsistencyCount_.load(std::memory_order_relaxed);
@@ -518,6 +522,13 @@ Cache<Allocator>::Cache(const CacheConfig& config,
       config_.getRebalanceStrategy(),
       std::chrono::seconds(config_.poolRebalanceIntervalSec));
 
+  allocatorConfig_.enableBackgroundMover(
+      std::chrono::milliseconds(config_.backgroundMoverIntervalMilSec),
+      config_.backgroundEvictionBatch,
+      config_.backgroundPromotionBatch,
+      config_.backgroundTargetFree,
+      config_.backgroundMoverThreads);
+
   if (config_.moveOnSlabRelease && movingSync != nullptr) {
     allocatorConfig_.enableMovingOnSlabRelease(
         [](Item& oldItem, Item& newItem, Item* parentPtr) {
@@ -566,6 +577,8 @@ Cache<Allocator>::Cache(const CacheConfig& config,
     allocatorConfig_.configureMemoryTiers(config_.memoryTierConfigs);
   }
 
+  allocatorConfig_.insertToFirstFreeTier = config_.insertToFirstFreeTier;
+
   auto cleanupGuard = folly::makeGuard([&] {
     if (!nvmCacheFilePath_.empty()) {
       util::removePath(nvmCacheFilePath_);
@@ -1125,14 +1138,17 @@ Stats Cache<Allocator>::getStats() const {
     aggregate += poolStats;
   }
 
-  std::map<PoolId, std::map<ClassId, ACStats>> allocationClassStats{};
+  std::map<MemoryDescriptorType, ACStats> allocationClassStats{};
 
   for (size_t pid = 0; pid < pools_.size(); pid++) {
     PoolId poolId = static_cast<PoolId>(pid);
     auto poolStats = cache_->getPoolStats(poolId);
     auto cids = poolStats.getClassIds();
-    for (auto [cid, stats] : poolStats.mpStats.acStats) {
-      allocationClassStats[poolId][cid] = stats;
+    for (TierId tid = 0; tid < cache_->getNumTiers(); tid++) {
+      for (auto cid : cids) {
+        MemoryDescriptorType md(tid, poolId, cid);
+        allocationClassStats[md] = cache_->getACStats(tid, poolId, cid);
+      }
     }
   }
 
@@ -1141,21 +1157,14 @@ Stats Cache<Allocator>::getStats() const {
   const auto navyStats = cache_->getNvmCacheStatsMap().toMap();
 
   ret.allocationClassStats = allocationClassStats;
+  ret.backgroundMoverStats = cacheStats.moverStats;
   ret.numEvictions = aggregate.numEvictions();
   ret.numItems = aggregate.numItems();
   ret.evictAttempts = cacheStats.evictionAttempts;
   ret.allocAttempts = cacheStats.allocAttempts;
   ret.allocFailures = cacheStats.allocFailures;
 
-  ret.backgndEvicStats.nEvictedItems = cacheStats.evictionStats.numMovedItems;
-  ret.backgndEvicStats.nTraversals = cacheStats.evictionStats.runCount;
-  ret.backgndEvicStats.nClasses = cacheStats.evictionStats.totalClasses;
-  ret.backgndEvicStats.evictionSize = cacheStats.evictionStats.totalBytesMoved;
-
-  ret.backgndPromoStats.nPromotedItems =
-      cacheStats.promotionStats.numMovedItems;
-  ret.backgndPromoStats.nTraversals = cacheStats.promotionStats.runCount;
-
+  ret.backgroundMoverClasses = cache_->getBackgroundMoverClassStats();
   ret.numCacheGets = cacheStats.numCacheGets;
   ret.numCacheGetMiss = cacheStats.numCacheGetMiss;
   ret.numCacheEvictions = cacheStats.numCacheEvictions;
@@ -1203,11 +1212,6 @@ Stats Cache<Allocator>::getStats() const {
     ret.nvmCounters = cache_->getNvmCacheStatsMap().toMap();
   }
 
-  ret.backgroundEvictionClasses =
-      cache_->getBackgroundMoverClassStats(MoverDir::Evict);
-  ret.backgroundPromotionClasses =
-      cache_->getBackgroundMoverClassStats(MoverDir::Promote);
-
   // nvm stats from navy
   if (!isRamOnly() && !navyStats.empty()) {
     auto lookup = [&navyStats](const std::string& key) {
diff --git a/cachelib/cachebench/cache/CacheStats.h b/cachelib/cachebench/cache/CacheStats.h
index a0bb1e4dd..e487027eb 100644
--- a/cachelib/cachebench/cache/CacheStats.h
+++ b/cachelib/cachebench/cache/CacheStats.h
@@ -27,31 +27,8 @@ namespace facebook {
 namespace cachelib {
 namespace cachebench {
 
-struct BackgroundEvictionStats {
-  // the number of items this worker evicted by looking at pools/classes stats
-  uint64_t nEvictedItems{0};
-
-  // number of times we went executed the thread //TODO: is this def correct?
-  uint64_t nTraversals{0};
-
-  // number of classes
-  uint64_t nClasses{0};
-
-  // size of evicted items
-  uint64_t evictionSize{0};
-};
-
-struct BackgroundPromotionStats {
-  // the number of items this worker evicted by looking at pools/classes stats
-  uint64_t nPromotedItems{0};
-
-  // number of times we went executed the thread //TODO: is this def correct?
-  uint64_t nTraversals{0};
-};
-
 struct Stats {
-  BackgroundEvictionStats backgndEvicStats;
-  BackgroundPromotionStats backgndPromoStats;
+  std::vector<BackgroundMoverStats> backgroundMoverStats;
 
   uint64_t numEvictions{0};
   uint64_t numItems{0};
@@ -127,15 +104,17 @@ struct Stats {
   uint64_t invalidDestructorCount{0};
   int64_t unDestructedItemCount{0};
 
-  std::map<PoolId, std::map<ClassId, ACStats>> allocationClassStats;
+  std::map<MemoryDescriptorType, ACStats> allocationClassStats;
 
   // populate the counters related to nvm usage. Cache implementation can decide
   // what to populate since not all of those are interesting when running
   // cachebench.
   std::unordered_map<std::string, double> nvmCounters;
 
-  std::map<PoolId, std::map<ClassId, uint64_t>> backgroundEvictionClasses;
-  std::map<PoolId, std::map<ClassId, uint64_t>> backgroundPromotionClasses;
+  using ClassBgStatsType =
+      std::map<MemoryDescriptorType, std::pair<size_t, size_t>>;
+
+  ClassBgStatsType backgroundMoverClasses;
 
   // errors from the nvm engine.
   std::unordered_map<std::string, double> nvmErrors;
@@ -157,10 +136,9 @@ struct Stats {
     out << folly::sformat("RAM Evictions : {:,}", numEvictions) << std::endl;
 
     auto foreachAC = [](const auto& map, auto cb) {
-      for (auto& pidStat : map) {
-        for (auto& cidStat : pidStat.second) {
-          cb(pidStat.first, cidStat.first, cidStat.second);
-        }
+      for (const auto& [key, value] : map) {
+        auto [tid, pid, cid] = key;
+        cb(tid, pid, cid, value);
       }
     };
 
@@ -191,34 +169,28 @@ struct Stats {
         }
       };
 
-      foreachAC(allocationClassStats, [&](auto pid, auto cid, auto stats) {
+      foreachAC(allocationClassStats, [&](auto tid, auto pid, auto cid, auto stats) {
         auto [allocSizeSuffix, allocSize] = formatMemory(stats.allocSize);
         auto [memorySizeSuffix, memorySize] =
-            formatMemory(stats.activeAllocs * stats.allocSize);
-        out << folly::sformat("pid{:2} cid{:4} {:8.2f}{} memorySize: {:8.2f}{}",
-                              pid, cid, allocSize, allocSizeSuffix, memorySize,
+            formatMemory(stats.totalAllocatedSize());
+        out << folly::sformat("tid{:2} pid{:2} cid{:4} {:8.2f}{} memorySize: {:8.2f}{}",
+                              tid, pid, cid, allocSize, allocSizeSuffix, memorySize,
                               memorySizeSuffix)
             << std::endl;
       });
 
-      foreachAC(allocationClassStats, [&](auto pid, auto cid, auto stats) {
+      foreachAC(allocationClassStats, [&](auto tid, auto pid, auto cid, auto stats) {
         auto [allocSizeSuffix, allocSize] = formatMemory(stats.allocSize);
 
         // If the pool is not full, extrapolate usageFraction for AC assuming it
         // will grow at the same rate. This value will be the same for all ACs.
-        double acUsageFraction;
-        if (poolUsageFraction[pid] < 1.0) {
-          acUsageFraction = poolUsageFraction[pid];
-        } else if (stats.usedSlabs == 0) {
-          acUsageFraction = 0.0;
-        } else {
-          acUsageFraction =
-              stats.activeAllocs / (stats.usedSlabs * stats.allocsPerSlab);
-        }
+        auto acUsageFraction = (poolUsageFraction[pid] < 1.0)
+                                   ? poolUsageFraction[pid]
+                                   : stats.usageFraction();
 
         out << folly::sformat(
-                   "pid{:2} cid{:4} {:8.2f}{} usageFraction: {:4.2f}", pid, cid,
-                   allocSize, allocSizeSuffix, acUsageFraction)
+                   "tid{:2} pid{:2} cid{:4} {:8.2f}{} usageFraction: {:4.2f}",
+                   tid, pid, cid, allocSize, allocSizeSuffix, acUsageFraction)
             << std::endl;
       });
     }
@@ -253,40 +225,50 @@ struct Stats {
       }
     }
 
-    if (!backgroundEvictionClasses.empty() &&
-        backgndEvicStats.nEvictedItems > 0) {
-      out << "== Class Background Eviction Counters Map ==" << std::endl;
-      foreachAC(backgroundEvictionClasses,
-                [&](auto pid, auto cid, auto evicted) {
-                  out << folly::sformat("pid{:2} cid{:4} evicted: {:4}", pid,
-                                        cid, evicted)
-                      << std::endl;
-                });
-
-      out << folly::sformat("Background Evicted Items : {:,}",
-                            backgndEvicStats.nEvictedItems)
-          << std::endl;
-      out << folly::sformat("Background Evictor Traversals : {:,}",
-                            backgndEvicStats.nTraversals)
-          << std::endl;
+    size_t bgId = 1;
+    size_t totalBgEvicted = 0;
+    size_t totalBgPromoted = 0;
+    for (auto& bgWorkerStats : backgroundMoverStats) {
+      if (bgWorkerStats.numEvictedItems > 0 ||
+          bgWorkerStats.numPromotedItems > 0) {
+        out << folly::sformat(" == Background Mover {} Threads ==", bgId)
+            << std::endl;
+        if (bgWorkerStats.numEvictedItems > 0) {
+          out << folly::sformat("Evicted Items: {:,}",
+                                bgWorkerStats.numEvictedItems)
+              << std::endl;
+        }
+        if (bgWorkerStats.numPromotedItems > 0) {
+          out << folly::sformat("Promoted Items: {:,}",
+                                bgWorkerStats.numPromotedItems)
+              << std::endl;
+        }
+        out << folly::sformat(
+                   "Traversals: {:,}\n"
+                   "Run Count: {:,}\n"
+                   "Avg Time Per Traversal in ns: {:,}\n"
+                   "Avg Items Evicted: {:.2f}",
+                   bgWorkerStats.numTraversals, bgWorkerStats.runCount,
+                   bgWorkerStats.avgTraversalTimeNs,
+                   (double)bgWorkerStats.numEvictedItems /
+                       (double)bgWorkerStats.numTraversals)
+            << std::endl;
+        totalBgEvicted += bgWorkerStats.numEvictedItems;
+        totalBgPromoted += bgWorkerStats.numPromotedItems;
+        bgId++;
+      }
     }
 
-    if (!backgroundPromotionClasses.empty() &&
-        backgndPromoStats.nPromotedItems > 0) {
-      out << "== Class Background Promotion Counters Map ==" << std::endl;
-      foreachAC(backgroundPromotionClasses,
-                [&](auto pid, auto cid, auto promoted) {
-                  out << folly::sformat("pid{:2} cid{:4} promoted: {:4}", pid,
-                                        cid, promoted)
-                      << std::endl;
-                });
-
-      out << folly::sformat("Background Promoted Items : {:,}",
-                            backgndPromoStats.nPromotedItems)
-          << std::endl;
-      out << folly::sformat("Background Promoter Traversals : {:,}",
-                            backgndPromoStats.nTraversals)
-          << std::endl;
+    if (!backgroundMoverClasses.empty() &&
+        (totalBgEvicted || totalBgPromoted)) {
+      out << "== Per Class Background Movers Counters ==" << std::endl;
+      foreachAC(backgroundMoverClasses, [&](auto tid, auto pid, auto cid, auto pair) {
+        if (pair.first > 0 || pair.second > 0) {
+          out << folly::sformat("tid{:2} pid{:2} cid{:4} evicted: {:4} promoted: {:4}",
+                                tid, pid, cid, pair.first, pair.second)
+              << std::endl;
+        }
+      });
     }
 
     if (numNvmGets > 0 || numNvmDeletes > 0 || numNvmPuts > 0) {
@@ -426,6 +408,11 @@ struct Stats {
     if (numCacheEvictions > 0) {
       out << folly::sformat("Total eviction executed {}", numCacheEvictions)
           << std::endl;
+      if (totalBgEvicted) {
+        out << folly::sformat("Total background eviction executed {}",
+                              totalBgEvicted)
+            << std::endl;
+      }
     }
   }
 
diff --git a/cachelib/cachebench/util/CacheConfig.cpp b/cachelib/cachebench/util/CacheConfig.cpp
index 6d8f40874..ad11941d0 100644
--- a/cachelib/cachebench/util/CacheConfig.cpp
+++ b/cachelib/cachebench/util/CacheConfig.cpp
@@ -44,6 +44,8 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) {
   JSONSetVal(configJson, tryLockUpdate);
   JSONSetVal(configJson, lruIpSpec);
   JSONSetVal(configJson, useCombinedLockForIterators);
+  
+  JSONSetVal(configJson, insertToFirstFreeTier);
 
   JSONSetVal(configJson, lru2qHotPct);
   JSONSetVal(configJson, lru2qColdPct);
@@ -90,6 +92,13 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) {
   JSONSetVal(configJson, deviceMaxWriteSize);
   JSONSetVal(configJson, deviceEnableFDP);
 
+  // Background mover related configs
+  JSONSetVal(configJson, backgroundMoverIntervalMilSec);
+  JSONSetVal(configJson, backgroundMoverThreads);
+  JSONSetVal(configJson, backgroundTargetFree);
+  JSONSetVal(configJson, backgroundEvictionBatch);
+  JSONSetVal(configJson, backgroundPromotionBatch);
+
   JSONSetVal(configJson, memoryOnlyTTL);
 
   JSONSetVal(configJson, usePosixShm);
@@ -112,7 +121,7 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) {
   // if you added new fields to the configuration, update the JSONSetVal
   // to make them available for the json configs and increment the size
   // below
-  checkCorrectSize<CacheConfig, 760>();
+  checkCorrectSize<CacheConfig, 800>();
 
   if (numPools != poolSizes.size()) {
     throw std::invalid_argument(folly::sformat(
diff --git a/cachelib/cachebench/util/CacheConfig.h b/cachelib/cachebench/util/CacheConfig.h
index 0a1569615..fa17ed88c 100644
--- a/cachelib/cachebench/util/CacheConfig.h
+++ b/cachelib/cachebench/util/CacheConfig.h
@@ -93,6 +93,7 @@ struct CacheConfig : public JSONConfig {
   bool lruUpdateOnRead{true};
   bool tryLockUpdate{false};
   bool useCombinedLockForIterators{false};
+  bool insertToFirstFreeTier{false};
 
   // LRU param
   uint64_t lruIpSpec{0};
@@ -243,6 +244,23 @@ struct CacheConfig : public JSONConfig {
   // Memory tiers configs
   std::vector<MemoryTierCacheConfig> memoryTierConfigs{};
 
+  // time interval to sleep in ms between runs of the background mover
+  size_t backgroundMoverIntervalMilSec{0};
+
+  // number of thread used by background mover
+  size_t backgroundMoverThreads{0};
+
+  // How much to keep the cache memory free. This is used by the background
+  // mover to decide when to evict items.
+  double backgroundTargetFree{0.02};
+
+  // The number of items to evict in each batch in the background mover
+  size_t backgroundEvictionBatch{10};
+
+  // The number of items to promote in each batch in the background mover
+  // only available when there are multiple memory tiers
+  size_t backgroundPromotionBatch{10};
+
   // If enabled, we will use the timestamps from the trace file in the ticker
   // so that the cachebench will observe time based on timestamps from the trace
   // instead of the system time.