diff --git a/cachelib/allocator/BackgroundMover.h b/cachelib/allocator/BackgroundMover.h
index aee86a4e3..e951f1074 100644
--- a/cachelib/allocator/BackgroundMover.h
+++ b/cachelib/allocator/BackgroundMover.h
@@ -16,9 +16,8 @@
 
 #pragma once
 
-#include "cachelib/allocator/BackgroundMoverStrategy.h"
+#include "cachelib/allocator/Cache.h"
 #include "cachelib/allocator/CacheStats.h"
-#include "cachelib/common/AtomicCounter.h"
 #include "cachelib/common/PeriodicWorker.h"
 
 namespace facebook::cachelib {
@@ -26,41 +25,54 @@ namespace facebook::cachelib {
 // needed for the cache api
 template <typename C>
 struct BackgroundMoverAPIWrapper {
-  static size_t traverseAndEvictItems(C& cache,
-                                      unsigned int pid,
-                                      unsigned int cid,
-                                      size_t batch) {
-    return cache.traverseAndEvictItems(pid, cid, batch);
+  // traverse the cache and move items from one tier to another
+  // @param cache             the cache interface
+  // @param pid               the pool id to traverse
+  // @param cid               the class id to traverse
+  // @param evictionBatch     number of items to evict in one go
+  // @param promotionBatch    number of items to promote in one go
+  // @return pair of number of items evicted and promoted
+  static std::pair<size_t, size_t> traverseAndMoveItems(C& cache,
+                                                        PoolId pid,
+                                                        ClassId cid,
+                                                        size_t evictionBatch,
+                                                        size_t promotionBatch) {
+    return cache.traverseAndMoveItems(pid, cid, evictionBatch, promotionBatch);
   }
-
-  static size_t traverseAndPromoteItems(C& cache,
-                                        unsigned int pid,
-                                        unsigned int cid,
-                                        size_t batch) {
-    return cache.traverseAndPromoteItems(pid, cid, batch);
+  static std::pair<size_t, double> getApproxUsage(C& cache,
+                                                  PoolId pid,
+                                                  ClassId cid) {
+    const auto& pool = cache.getPool(pid);
+    // we wait until all slabs are allocated before we start evicting
+    if (!pool.allSlabsAllocated()) {
+      return {0, 0.0};
+    }
+    return pool.getApproxUsage(cid);
   }
 };
 
-enum class MoverDir { Evict = 0, Promote };
-
 // Periodic worker that evicts items from tiers in batches
 // The primary aim is to reduce insertion times for new items in the
 // cache
 template <typename CacheT>
 class BackgroundMover : public PeriodicWorker {
  public:
+  using ClassBgStatsType =
+      std::map<MemoryDescriptorType, std::pair<size_t, size_t>>;
   using Cache = CacheT;
   // @param cache               the cache interface
-  // @param strategy            the stragey class that defines how objects are
-  // moved (promoted vs. evicted and how much)
+  // @param evictionBatch       number of items to evict in one go
+  // @param promotionBatch      number of items to promote in one go
+  // @param targetFree          target free percentage in the class
   BackgroundMover(Cache& cache,
-                  std::shared_ptr<BackgroundMoverStrategy> strategy,
-                  MoverDir direction_);
+                  size_t evictionBatch,
+                  size_t promotionBatch,
+                  double targetFree);
 
   ~BackgroundMover() override;
 
   BackgroundMoverStats getStats() const noexcept;
-  std::map<PoolId, std::map<ClassId, uint64_t>> getClassStats() const noexcept;
+  ClassBgStatsType getPerClassStats() const noexcept { return movesPerClass_; }
 
   void setAssignedMemory(std::vector<MemoryDescriptorType>&& assignedMemory);
 
@@ -69,40 +81,75 @@ class BackgroundMover : public PeriodicWorker {
   static size_t workerId(PoolId pid, ClassId cid, size_t numWorkers);
 
  private:
-  std::map<PoolId, std::map<ClassId, uint64_t>> movesPerClass_;
+  struct TraversalStats {
+    // record a traversal over all assigned classes
+    // and its time taken
+    void recordTraversalTime(uint64_t nsTaken);
+
+    uint64_t getAvgTraversalTimeNs(uint64_t numTraversals) const;
+    uint64_t getMinTraversalTimeNs() const { return minTraversalTimeNs_; }
+    uint64_t getMaxTraversalTimeNs() const { return maxTraversalTimeNs_; }
+    uint64_t getLastTraversalTimeNs() const { return lastTraversalTimeNs_; }
+
+   private:
+    // time it took us the last time to traverse the cache.
+    uint64_t lastTraversalTimeNs_{0};
+    uint64_t minTraversalTimeNs_{std::numeric_limits<uint64_t>::max()};
+    uint64_t maxTraversalTimeNs_{0};
+    uint64_t totalTraversalTimeNs_{0};
+  };
+
+  TraversalStats traversalStats_;
   // cache allocator's interface for evicting
   using Item = typename Cache::Item;
 
   Cache& cache_;
-  std::shared_ptr<BackgroundMoverStrategy> strategy_;
-  MoverDir direction_;
-
-  std::function<size_t(Cache&, unsigned int, unsigned int, size_t)> moverFunc;
+  uint8_t numTiers_{1}; // until we have multi-tier support
+  size_t evictionBatch_{0};
+  size_t promotionBatch_{0};
+  double targetFree_{0.03};
 
   // implements the actual logic of running the background evictor
   void work() override final;
   void checkAndRun();
 
-  AtomicCounter numMovedItems_{0};
-  AtomicCounter numTraversals_{0};
-  AtomicCounter totalBytesMoved_{0};
+  // populates the toFree map for each class with the number of items to free
+  std::map<MemoryDescriptorType, size_t> getNumItemsToFree(
+      const std::vector<MemoryDescriptorType>& assignedMemory);
+
+  uint64_t numEvictedItems_{0};
+  uint64_t numPromotedItems_{0};
+  uint64_t numTraversals_{0};
+
+  ClassBgStatsType movesPerClass_;
 
   std::vector<MemoryDescriptorType> assignedMemory_;
   folly::DistributedMutex mutex_;
 };
 
 template <typename CacheT>
-BackgroundMover<CacheT>::BackgroundMover(
-    Cache& cache,
-    std::shared_ptr<BackgroundMoverStrategy> strategy,
-    MoverDir direction)
-    : cache_(cache), strategy_(strategy), direction_(direction) {
-  if (direction_ == MoverDir::Evict) {
-    moverFunc = BackgroundMoverAPIWrapper<CacheT>::traverseAndEvictItems;
-
-  } else if (direction_ == MoverDir::Promote) {
-    moverFunc = BackgroundMoverAPIWrapper<CacheT>::traverseAndPromoteItems;
-  }
+BackgroundMover<CacheT>::BackgroundMover(Cache& cache,
+                                         size_t evictionBatch,
+                                         size_t promotionBatch,
+                                         double targetFree)
+    : cache_(cache),
+      evictionBatch_(evictionBatch),
+      promotionBatch_(promotionBatch),
+      targetFree_(targetFree) {}
+
+template <typename CacheT>
+void BackgroundMover<CacheT>::TraversalStats::recordTraversalTime(
+    uint64_t nsTaken) {
+  lastTraversalTimeNs_ = nsTaken;
+  minTraversalTimeNs_ = std::min(minTraversalTimeNs_, nsTaken);
+  maxTraversalTimeNs_ = std::max(maxTraversalTimeNs_, nsTaken);
+  totalTraversalTimeNs_ += nsTaken;
+}
+
+template <typename CacheT>
+uint64_t BackgroundMover<CacheT>::TraversalStats::getAvgTraversalTimeNs(
+    uint64_t numTraversals) const {
+  return numTraversals ? totalTraversalTimeNs_ / numTraversals : 0;
 }
 
 template <typename CacheT>
@@ -132,50 +179,89 @@ void BackgroundMover<CacheT>::setAssignedMemory(
   });
 }
 
-// Look for classes that exceed the target memory capacity
-// and return those for eviction
+template <typename CacheT>
+std::map<MemoryDescriptorType, size_t>
+BackgroundMover<CacheT>::getNumItemsToFree(
+    const std::vector<MemoryDescriptorType>& assignedMemory) {
+  std::map<MemoryDescriptorType, size_t> toFree;
+  for (const auto& md : assignedMemory) {
+    const auto [pid, cid] = md;
+    const auto& pool = cache_.getPool(pid);
+    const auto [activeItems, usage] =
+        BackgroundMoverAPIWrapper<CacheT>::getApproxUsage(cache_, pid, cid);
+    if (usage < 1 - targetFree_) {
+      toFree[md] = 0;
+    } else {
+      size_t maxItems = activeItems / usage;
+      size_t targetItems = maxItems * (1 - targetFree_);
+      size_t toFreeItems =
+          activeItems > targetItems ? activeItems - targetItems : 0;
+      toFree[md] = toFreeItems;
+    }
+  }
+  return toFree;
+}
+
 template <typename CacheT>
 void BackgroundMover<CacheT>::checkAndRun() {
   auto assignedMemory = mutex_.lock_combine([this] { return assignedMemory_; });
-
-  unsigned int moves = 0;
-  auto batches = strategy_->calculateBatchSizes(cache_, assignedMemory);
-
-  for (size_t i = 0; i < batches.size(); i++) {
-    const auto [pid, cid] = assignedMemory[i];
-    const auto batch = batches[i];
-
-    if (batch == 0) {
-      continue;
+  auto toFree = getNumItemsToFree(assignedMemory); // calculate the number of
+                                                   // items to free
+  while (true) {
+    bool allDone = true;
+    for (auto md : assignedMemory) {
+      const auto [pid, cid] = md;
+      size_t evictionBatch = evictionBatch_;
+      size_t promotionBatch = 0; // will enable with multi-tier support
+      if (toFree[md] == 0) {
+        // no eviction work to be done since there is already at least
+        // targetFree remaining in the class
+        evictionBatch = 0;
+      } else {
+        allDone = false; // we still have some items to free
+      }
+      if (promotionBatch + evictionBatch > 0) {
+        const auto begin = util::getCurrentTimeNs();
+        // try moving BATCH items from the class in order to reach free target
+        auto moved = BackgroundMoverAPIWrapper<CacheT>::traverseAndMoveItems(
+            cache_, pid, cid, evictionBatch, promotionBatch);
+        numEvictedItems_ += moved.first;
+        toFree[md] > moved.first ? toFree[md] -= moved.first : toFree[md] = 0;
+        numPromotedItems_ += moved.second;
+        auto curr = movesPerClass_[md];
+        curr.first += moved.first;
+        curr.second += moved.second;
+        movesPerClass_[md] = curr;
+        numTraversals_++;
+        auto end = util::getCurrentTimeNs();
+        traversalStats_.recordTraversalTime(end > begin ? end - begin : 0);
+      }
+    }
+    if (shouldStopWork() || allDone) {
+      break;
     }
-
-    // try moving BATCH items from the class in order to reach free target
-    auto moved = moverFunc(cache_, pid, cid, batch);
-    moves += moved;
-    movesPerClass_[pid][cid] += moved;
-    totalBytesMoved_.add(moved * cache_.getPool(pid).getAllocSizes()[cid]);
   }
-
-  numTraversals_.inc();
-  numMovedItems_.add(moves);
 }
 
 template <typename CacheT>
 BackgroundMoverStats BackgroundMover<CacheT>::getStats() const noexcept {
   BackgroundMoverStats stats;
-  stats.numMovedItems = numMovedItems_.get();
-  stats.runCount = numTraversals_.get();
-  stats.totalBytesMoved = totalBytesMoved_.get();
+  stats.numEvictedItems = numEvictedItems_;
+  stats.numPromotedItems = numPromotedItems_;
+  stats.numTraversals = numTraversals_;
+  stats.runCount = getRunCount();
+  stats.avgItemsMoved =
+      (double)(stats.numEvictedItems + stats.numPromotedItems) /
+      (double)numTraversals_;
+  stats.lastTraversalTimeNs = traversalStats_.getLastTraversalTimeNs();
+  stats.avgTraversalTimeNs =
+      traversalStats_.getAvgTraversalTimeNs(numTraversals_);
+  stats.minTraversalTimeNs = traversalStats_.getMinTraversalTimeNs();
+  stats.maxTraversalTimeNs = traversalStats_.getMaxTraversalTimeNs();
 
   return stats;
 }
 
-template <typename CacheT>
-std::map<PoolId, std::map<ClassId, uint64_t>>
-BackgroundMover<CacheT>::getClassStats() const noexcept {
-  return movesPerClass_;
-}
-
 template <typename CacheT>
 size_t BackgroundMover<CacheT>::workerId(PoolId pid,
                                          ClassId cid,
@@ -185,4 +271,4 @@ size_t BackgroundMover<CacheT>::workerId(PoolId pid,
   // TODO: came up with some better sharding (use hashing?)
   return (pid + cid) % numWorkers;
 }
-} // namespace facebook::cachelib
+}; // namespace facebook::cachelib
diff --git a/cachelib/allocator/BackgroundMoverStrategy.h b/cachelib/allocator/BackgroundMoverStrategy.h
deleted file mode 100644
index abf37edd1..000000000
--- a/cachelib/allocator/BackgroundMoverStrategy.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "cachelib/allocator/Cache.h"
-
-namespace facebook {
-namespace cachelib {
-
-struct MemoryDescriptorType {
-  MemoryDescriptorType(PoolId pid, ClassId cid) : pid_(pid), cid_(cid) {}
-  PoolId pid_;
-  ClassId cid_;
-};
-
-// Base class for background eviction strategy.
-class BackgroundMoverStrategy {
- public:
-  // Calculate how many items should be moved by the background mover
-  //
-  // @param cache   Cache allocator that implements CacheBase
-  // @param acVec   vector of memory descriptors for which batch sizes should
-  //                be calculated
-  //
-  // @return vector of batch sizes, where each element in the vector specifies
-  //         batch size for the memory descriptor in acVec
-  virtual std::vector<size_t> calculateBatchSizes(
-      const CacheBase& cache, std::vector<MemoryDescriptorType> acVec) = 0;
-
-  virtual ~BackgroundMoverStrategy() = default;
-};
-
-} // namespace cachelib
-} // namespace facebook
diff --git a/cachelib/allocator/CMakeLists.txt b/cachelib/allocator/CMakeLists.txt
index 6103cdc82..f94c8c90c 100644
--- a/cachelib/allocator/CMakeLists.txt
+++ b/cachelib/allocator/CMakeLists.txt
@@ -35,7 +35,6 @@ add_library (cachelib_allocator
     CCacheManager.cpp
     ContainerTypes.cpp
     FreeMemStrategy.cpp
-    FreeThresholdStrategy.cpp
     HitsPerSlabStrategy.cpp
     LruTailAgeStrategy.cpp
     MarginalHitsOptimizeStrategy.cpp
diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h
index e225ba8a0..5144f4f20 100644
--- a/cachelib/allocator/Cache.h
+++ b/cachelib/allocator/Cache.h
@@ -73,6 +73,21 @@ enum class DestructorContext {
   kRemovedFromNVM
 };
 
+// a tuple that describes the memory pool and allocation class
+struct MemoryDescriptorType {
+  MemoryDescriptorType(PoolId pid, ClassId cid) : pid_(pid), cid_(cid) {}
+  PoolId pid_;
+  ClassId cid_;
+
+  bool operator<(const MemoryDescriptorType& rhs) const {
+    return std::make_tuple(pid_, cid_) < std::make_tuple(rhs.pid_, rhs.cid_);
+  }
+
+  bool operator==(const MemoryDescriptorType& rhs) const {
+    return std::make_tuple(pid_, cid_) == std::make_tuple(rhs.pid_, rhs.cid_);
+  }
+};
+
 // A base class of cache exposing members and status agnostic of template type.
 class CacheBase {
  public:
diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h
index 8238ae2fe..dd074d87f 100644
--- a/cachelib/allocator/CacheAllocator.h
+++ b/cachelib/allocator/CacheAllocator.h
@@ -352,6 +352,43 @@ class CacheAllocator : public CacheBase {
     // if user-supplied SyncObj can fail. e.g. if a lock can timeout.
     virtual bool isValid() const { return true; }
   };
+
+  // For background worker stats
+  using ClassBgStatsType =
+      std::map<MemoryDescriptorType, std::pair<size_t, size_t>>;
+
+  // Movement (eviction/promotion) related data returned from
+  // function executed under mmContainer lock
+  struct MoveData {
+    MoveData() = delete;
+    MoveData(Item* candidate_,
+             Item* toRecycle_,
+             Item* toRecycleParent_,
+             bool chainedItem_,
+             bool expired_,
+             typename NvmCacheT::PutToken token_,
+             WriteHandle candidateHandle_)
+        : candidate(candidate_),
+          toRecycle(toRecycle_),
+          toRecycleParent(toRecycleParent_),
+          expired(expired_),
+          chainedItem(chainedItem_),
+          token(std::move(token_)),
+          candidateHandle(std::move(candidateHandle_)) {}
+
+    // item that is candidate for eviction
+    Item* candidate;
+    // acutal alloc that will be recycled
+    // back up to allocator
+    Item* toRecycle;
+    // possible parent ref
+    Item* toRecycleParent;
+    bool expired;                       // is item expired
+    bool chainedItem;                   // is it a chained item
+    typename NvmCacheT::PutToken token; // put token for NVM cache
+    WriteHandle candidateHandle;        // hande in case we don't use moving bit
+  };
+
   using ChainedItemMovingSync = std::function<std::unique_ptr<SyncObj>(Key)>;
 
   using AccessContainer = typename Item::AccessContainer;
@@ -1083,25 +1120,12 @@ class CacheAllocator : public CacheBase {
   bool startNewReaper(std::chrono::milliseconds interval,
                       util::Throttler::Config reaperThrottleConfig);
 
-  // start background promoter, starting/stopping of this worker
-  // should not be done concurrently with addPool
-  // @param interval                the period this worker fires
-  // @param strategy                strategy to promote items
-  // @param threads                 number of threads used by the worker
-  bool startNewBackgroundPromoter(
-      std::chrono::milliseconds interval,
-      std::shared_ptr<BackgroundMoverStrategy> strategy,
-      size_t threads);
-
-  // start background evictor, starting/stopping of this worker
-  // should not be done concurrently with addPool
-  // @param interval                the period this worker fires
-  // @param strategy                strategy to evict items
-  // @param threads                 number of threads used by the worker
-  bool startNewBackgroundEvictor(
-      std::chrono::milliseconds interval,
-      std::shared_ptr<BackgroundMoverStrategy> strategy,
-      size_t threads);
+  // start background mover
+  bool startNewBackgroundMover(std::chrono::milliseconds interval,
+                               size_t evictionBatch,
+                               size_t promotionBatch,
+                               double targetFree,
+                               size_t threads);
 
   // Stop existing workers with a timeout
   bool stopPoolRebalancer(std::chrono::seconds timeout = std::chrono::seconds{
@@ -1111,10 +1135,8 @@ class CacheAllocator : public CacheBase {
                              0});
   bool stopMemMonitor(std::chrono::seconds timeout = std::chrono::seconds{0});
   bool stopReaper(std::chrono::seconds timeout = std::chrono::seconds{0});
-  bool stopBackgroundEvictor(
-      std::chrono::seconds timeout = std::chrono::seconds{0});
-  bool stopBackgroundPromoter(
-      std::chrono::seconds timeout = std::chrono::seconds{0});
+  bool stopBackgroundMover(std::chrono::seconds timeout = std::chrono::seconds{
+                               0});
 
   // Set pool optimization to either true or false
   //
@@ -1199,6 +1221,30 @@ class CacheAllocator : public CacheBase {
     return stats;
   }
 
+  // returns the background mover stats per thread
+  std::vector<BackgroundMoverStats> getBackgroundMoverStats() const {
+    auto stats = std::vector<BackgroundMoverStats>();
+    for (auto& bg : backgroundMover_) {
+      stats.push_back(bg->getStats());
+    }
+    return stats;
+  }
+
+  ClassBgStatsType getBackgroundMoverClassStats() const {
+    ClassBgStatsType stats;
+    auto record = [&](auto& bg) {
+      // gives a unique descriptor
+      auto classStats = bg->getPerClassStats();
+      for (const auto& [key, value] : classStats) {
+        stats[key] = value;
+      }
+    };
+    for (auto& bg : backgroundMover_) {
+      record(bg);
+    }
+    return stats;
+  }
+
   // returns the pool rebalancer stats
   RebalancerStats getRebalancerStats() const {
     auto stats =
@@ -1486,8 +1532,7 @@ class CacheAllocator : public CacheBase {
                                Key key,
                                uint32_t size,
                                uint32_t creationTime,
-                               uint32_t expiryTime,
-                               bool fromBgThread = false);
+                               uint32_t expiryTime);
 
   // Allocate a chained item
   //
@@ -1573,10 +1618,15 @@ class CacheAllocator : public CacheBase {
   //
   // @param oldItem     Reference to the item being moved
   // @param newItemHdl  Reference to the handle of the new item being moved into
+  // @param skipAddInMMContainer so we can tell if we should add in mmContainer
+  // or wait
+  //                     to do in batch
   //
   // @return true  If the move was completed, and the containers were updated
   //               successfully.
-  bool moveRegularItem(Item& oldItem, WriteHandle& newItemHdl);
+  bool moveRegularItem(Item& oldItem,
+                       WriteHandle& newItemHdl,
+                       bool skipAddInMMContainer = false);
 
   // template class for viewAsChainedAllocs that takes either ReadHandle or
   // WriteHandle
@@ -1762,6 +1812,11 @@ class CacheAllocator : public CacheBase {
   std::pair<Item*, Item*> getNextCandidate(PoolId pid,
                                            ClassId cid,
                                            unsigned int& searchTries);
+  // similiar to the above method but returns a batch of evicted items
+  // as a pair of vectors
+  std::vector<MoveData> getNextCandidates(PoolId pid,
+                                          ClassId cid,
+                                          uint32_t batch);
 
   using EvictionIterator = typename MMContainer::LockedIterator;
 
@@ -1898,18 +1953,45 @@ class CacheAllocator : public CacheBase {
 
   // exposed for the background evictor to iterate through the memory and evict
   // in batch. This should improve insertion path for tiered memory config
-  size_t traverseAndEvictItems(unsigned int /* pid */,
-                               unsigned int /* cid */,
-                               size_t /* batch */) {
-    throw std::runtime_error("Not supported yet!");
-  }
-
-  // exposed for the background promoter to iterate through the memory and
-  // promote in batch. This should improve find latency
-  size_t traverseAndPromoteItems(unsigned int /* pid */,
-                                 unsigned int /* cid */,
-                                 size_t /* batch */) {
-    throw std::runtime_error("Not supported yet!");
+  // promotion batch only applies to tiered memory config
+  std::pair<size_t, size_t> traverseAndMoveItems(PoolId pid,
+                                                 ClassId cid,
+                                                 size_t evictionBatch,
+                                                 size_t promotionBatch) {
+    auto& mmContainer = getMMContainer(pid, cid);
+    uint32_t currItems = mmContainer.size();
+    if (currItems < evictionBatch) {
+      evictionBatch = currItems;
+    }
+    if (evictionBatch == 0) {
+      return {0, 0};
+    }
+    auto evictionData = getNextCandidates(pid, cid, evictionBatch);
+    // we now have a list of candidates and toRecycles, they should go back
+    // to the allocator and we will do this in batch to avoid AC lock contention
+    // note - for chained items - we can't do this in bulk
+    std::vector<size_t> chainedIdx;
+    std::vector<Item*> toRecycles;
+    size_t idx = 0;
+    for (auto& data : evictionData) {
+      if (data.chainedItem) {
+        chainedIdx.push_back(idx);
+      } else {
+        toRecycles.push_back(data.candidate);
+      }
+      idx++;
+    }
+    for (int i = 0; i < chainedIdx.size(); i++) {
+      auto& data = evictionData[chainedIdx[i]];
+      releaseBackToAllocator(*data.candidate, RemoveContext::kNormal, false,
+                             data.toRecycle);
+      evictionData.erase(evictionData.begin() + chainedIdx[i]);
+      (*stats_.chainedItemEvictions)[pid][cid].inc();
+    }
+    allocator_->freeBatch(toRecycles.begin(), toRecycles.end(), pid, cid);
+    size_t evictions = toRecycles.size();
+    (*stats_.regularItemEvictions)[pid][cid].add(evictions);
+    return {evictions, 0};
   }
 
   // returns true if nvmcache is enabled and we should write this item to
@@ -2087,44 +2169,6 @@ class CacheAllocator : public CacheBase {
                      : false;
   }
 
-  // returns the background mover stats
-  BackgroundMoverStats getBackgroundMoverStats(MoverDir direction) const {
-    auto stats = BackgroundMoverStats{};
-    if (direction == MoverDir::Evict) {
-      for (auto& bg : backgroundEvictor_)
-        stats += bg->getStats();
-    } else if (direction == MoverDir::Promote) {
-      for (auto& bg : backgroundPromoter_)
-        stats += bg->getStats();
-    }
-    return stats;
-  }
-
-  std::map<PoolId, std::map<ClassId, uint64_t>> getBackgroundMoverClassStats(
-      MoverDir direction) const {
-    std::map<PoolId, std::map<ClassId, uint64_t>> stats;
-
-    if (direction == MoverDir::Evict) {
-      for (auto& bg : backgroundEvictor_) {
-        for (auto& pid : bg->getClassStats()) {
-          for (auto& cid : pid.second) {
-            stats[pid.first][cid.first] += cid.second;
-          }
-        }
-      }
-    } else if (direction == MoverDir::Promote) {
-      for (auto& bg : backgroundPromoter_) {
-        for (auto& pid : bg->getClassStats()) {
-          for (auto& cid : pid.second) {
-            stats[pid.first][cid.first] += cid.second;
-          }
-        }
-      }
-    }
-
-    return stats;
-  }
-
   bool tryGetHandleWithWaitContextForMovingItem(Item& item,
                                                 WriteHandle& handle);
 
@@ -2285,9 +2329,8 @@ class CacheAllocator : public CacheBase {
   // free memory monitor
   std::unique_ptr<MemoryMonitor> memMonitor_;
 
-  // background evictor
-  std::vector<std::unique_ptr<BackgroundMover<CacheT>>> backgroundEvictor_;
-  std::vector<std::unique_ptr<BackgroundMover<CacheT>>> backgroundPromoter_;
+  // background data movement, for single tier, this just evicts
+  std::vector<std::unique_ptr<BackgroundMover<CacheT>>> backgroundMover_;
 
   // check whether a pool is a slabs pool
   std::array<bool, MemoryPoolManager::kMaxPools> isCompactCachePool_{};
@@ -2612,16 +2655,12 @@ void CacheAllocator<CacheTrait>::initWorkers() {
                           config_.ccacheOptimizeStepSizePercent);
   }
 
-  if (config_.backgroundEvictorEnabled()) {
-    startNewBackgroundEvictor(config_.backgroundEvictorInterval,
-                              config_.backgroundEvictorStrategy,
-                              config_.backgroundEvictorThreads);
-  }
-
-  if (config_.backgroundPromoterEnabled()) {
-    startNewBackgroundPromoter(config_.backgroundPromoterInterval,
-                               config_.backgroundPromoterStrategy,
-                               config_.backgroundPromoterThreads);
+  if (config_.backgroundMoverEnabled()) {
+    startNewBackgroundMover(config_.backgroundMoverInterval,
+                            config_.backgroundEvictionBatch,
+                            config_.backgroundPromotionBatch,
+                            config_.backgroundTargetFree,
+                            config_.backgroundMoverThreads);
   }
 }
 
@@ -2719,8 +2758,7 @@ CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
                                              typename Item::Key key,
                                              uint32_t size,
                                              uint32_t creationTime,
-                                             uint32_t expiryTime,
-                                             bool fromBgThread) {
+                                             uint32_t expiryTime) {
   util::LatencyTracker tracker{stats().allocateLatency_};
 
   SCOPE_FAIL { stats_.invalidAllocs.inc(); };
@@ -2735,13 +2773,6 @@ CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
 
   void* memory = allocator_->allocate(pid, requiredSize);
 
-  if (backgroundEvictor_.size() && !fromBgThread &&
-      (memory == nullptr || shouldWakeupBgEvictor(pid, cid))) {
-    backgroundEvictor_[BackgroundMover<CacheT>::workerId(
-                           pid, cid, backgroundEvictor_.size())]
-        ->wakeUp();
-  }
-
   if (memory == nullptr) {
     memory = findEviction(pid, cid);
   }
@@ -3575,7 +3606,8 @@ void CacheAllocator<CacheTrait>::wakeUpWaiters(folly::StringPiece key,
 
 template <typename CacheTrait>
 bool CacheAllocator<CacheTrait>::moveRegularItem(Item& oldItem,
-                                                 WriteHandle& newItemHdl) {
+                                                 WriteHandle& newItemHdl,
+                                                 bool skipAddInMMContainer) {
   XDCHECK(oldItem.isMoving());
   // If an item is expired, proceed to eviction.
   if (oldItem.isExpired()) {
@@ -3605,8 +3637,12 @@ bool CacheAllocator<CacheTrait>::moveRegularItem(Item& oldItem,
   // Adding the item to mmContainer has to succeed since no one can remove the
   // item
   auto& newContainer = getMMContainer(*newItemHdl);
-  auto mmContainerAdded = newContainer.add(*newItemHdl);
-  XDCHECK(mmContainerAdded);
+  if (!skipAddInMMContainer) {
+    // Adding the item to mmContainer has to succeed since no one can remove the
+    // item
+    auto mmContainerAdded = newContainer.add(*newItemHdl);
+    XDCHECK(mmContainerAdded);
+  }
 
   if (oldItem.hasChainedItem()) {
     XDCHECK(!newItemHdl->hasChainedItem()) << newItemHdl->toString();
@@ -3808,6 +3844,116 @@ CacheAllocator<CacheTrait>::getNextCandidate(PoolId pid,
   return {candidate, toRecycle};
 }
 
+// Used for the background movers to get a batch of items
+// to move/evict
+template <typename CacheTrait>
+std::vector<typename CacheAllocator<CacheTrait>::MoveData>
+CacheAllocator<CacheTrait>::getNextCandidates(PoolId pid,
+                                              ClassId cid,
+                                              uint32_t batch) {
+  std::vector<MoveData> evictionData;
+  evictionData.reserve(batch);
+
+  auto& mmContainer = getMMContainer(pid, cid);
+  unsigned int maxSearchTries =
+      std::max(config_.evictionSearchTries, batch * 2);
+
+  mmContainer.withEvictionIterator([this, pid, cid, batch, maxSearchTries,
+                                    &evictionData, &mmContainer](auto&& itr) {
+    unsigned int searchTries = 0;
+    if (!itr) {
+      ++searchTries;
+      (*stats_.evictionAttempts)[pid][cid].inc();
+      return;
+    }
+
+    while ((config_.evictionSearchTries == 0 || maxSearchTries > searchTries) &&
+           itr && evictionData.size() < batch) {
+      ++searchTries;
+      (*stats_.evictionAttempts)[pid][cid].inc();
+
+      auto* toRecycle_ = itr.get();
+      bool isChained_ = toRecycle_->isChainedItem();
+      auto* candidate_ =
+          isChained_ ? &toRecycle_->asChainedItem().getParentItem(compressor_)
+                     : toRecycle_;
+
+      typename NvmCacheT::PutToken putToken{};
+      const bool evictToNvmCache = shouldWriteToNvmCache(*candidate_);
+
+      auto markForEviction = [&candidate_, this]() {
+        auto markedForEviction = candidate_->markForEviction();
+        if (!markedForEviction) {
+          if (candidate_->hasChainedItem()) {
+            stats_.evictFailParentAC.inc();
+          } else {
+            stats_.evictFailAC.inc();
+          }
+          return false;
+        }
+        return true;
+      };
+
+      if (evictToNvmCache) {
+        auto putTokenRv = nvmCache_->createPutToken(
+            candidate_->getKey(),
+            [&markForEviction]() { return markForEviction(); });
+
+        if (!putTokenRv) {
+          switch (putTokenRv.error()) {
+          case InFlightPuts::PutTokenError::TRY_LOCK_FAIL:
+            stats_.evictFailPutTokenLock.inc();
+            break;
+          case InFlightPuts::PutTokenError::TOKEN_EXISTS:
+            stats_.evictFailConcurrentFill.inc();
+            break;
+          case InFlightPuts::PutTokenError::CALLBACK_FAILED:
+            stats_.evictFailConcurrentAccess.inc();
+            break;
+          }
+          ++itr;
+          continue;
+        }
+        putToken = std::move(*putTokenRv);
+        XDCHECK(putToken.isValid());
+      } else {
+        if (!markForEviction()) {
+          ++itr;
+          continue;
+        }
+      }
+
+      // markForEviction to make sure no other thead is evicting the item
+      // nor holding a handle to that item
+
+      // Check if parent changed for chained items - if yes, we cannot
+      // remove the child from the mmContainer as we will not be evicting
+      // it. We could abort right here, but we need to cleanup in case
+      // unmarkForEviction() returns 0 - so just go through normal path.
+      if (!toRecycle_->isChainedItem() ||
+          &toRecycle_->asChainedItem().getParentItem(compressor_) ==
+              candidate_) {
+        mmContainer.remove(itr);
+        MoveData moveData(candidate_, toRecycle_, nullptr, isChained_,
+                          candidate_->isExpired(), std::move(putToken),
+                          nullptr);
+        evictionData.push_back(std::move(moveData));
+      }
+    }
+  });
+
+  for (auto& moveData : evictionData) {
+    Item* candidate = moveData.candidate;
+    unlinkItemForEviction(*moveData.candidate);
+    if (moveData.token.isValid() &&
+        shouldWriteToNvmCacheExclusive(*moveData.candidate)) {
+      nvmCache_->put(*moveData.candidate, std::move(moveData.token));
+    }
+  }
+
+  return evictionData;
+}
+
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::Item*
 CacheAllocator<CacheTrait>::findEviction(PoolId pid, ClassId cid) {
@@ -4574,20 +4720,12 @@ PoolId CacheAllocator<CacheTrait>::addPool(
   setRebalanceStrategy(pid, std::move(rebalanceStrategy));
   setResizeStrategy(pid, std::move(resizeStrategy));
 
-  if (backgroundEvictor_.size()) {
-    auto memoryAssignments =
-        createBgWorkerMemoryAssignments(backgroundEvictor_.size());
-    for (size_t id = 0; id < backgroundEvictor_.size(); id++)
-      backgroundEvictor_[id]->setAssignedMemory(
-          std::move(memoryAssignments[id]));
-  }
-
-  if (backgroundPromoter_.size()) {
+  if (backgroundMover_.size()) {
     auto memoryAssignments =
-        createBgWorkerMemoryAssignments(backgroundPromoter_.size());
-    for (size_t id = 0; id < backgroundPromoter_.size(); id++)
-      backgroundPromoter_[id]->setAssignedMemory(
-          std::move(memoryAssignments[id]));
+        createBgWorkerMemoryAssignments(backgroundMover_.size());
+    for (size_t id = 0; id < backgroundMover_.size(); id++) {
+      backgroundMover_[id]->setAssignedMemory(std::move(memoryAssignments[id]));
+    }
   }
 
   return pid;
@@ -5112,8 +5250,7 @@ CacheAllocator<CacheTrait>::allocateNewItemForOldItem(const Item& oldItem) {
                                      oldItem.getKey(),
                                      oldItem.getSize(),
                                      oldItem.getCreationTime(),
-                                     oldItem.getExpiryTime(),
-                                     false);
+                                     oldItem.getExpiryTime());
   if (!newItemHdl) {
     return {};
   }
@@ -5434,8 +5571,7 @@ bool CacheAllocator<CacheTrait>::stopWorkers(std::chrono::seconds timeout) {
   success &= stopPoolResizer(timeout);
   success &= stopMemMonitor(timeout);
   success &= stopReaper(timeout);
-  success &= stopBackgroundEvictor(timeout);
-  success &= stopBackgroundPromoter(timeout);
+  success &= stopBackgroundMover(timeout);
   return success;
 }
 
@@ -5694,8 +5830,7 @@ GlobalCacheStats CacheAllocator<CacheTrait>::getGlobalCacheStats() const {
   ret.nvmCacheEnabled = nvmCache_ ? nvmCache_->isEnabled() : false;
   ret.reaperStats = getReaperStats();
   ret.rebalancerStats = getRebalancerStats();
-  ret.evictionStats = getBackgroundMoverStats(MoverDir::Evict);
-  ret.promotionStats = getBackgroundMoverStats(MoverDir::Promote);
+  ret.moverStats = getBackgroundMoverStats();
   ret.numActiveHandles = getNumActiveHandles();
 
   ret.isNewRamCache = cacheCreationTime_ == cacheInstanceCreationTime_;
@@ -5881,47 +6016,23 @@ auto CacheAllocator<CacheTrait>::createBgWorkerMemoryAssignments(
 }
 
 template <typename CacheTrait>
-bool CacheAllocator<CacheTrait>::startNewBackgroundEvictor(
+bool CacheAllocator<CacheTrait>::startNewBackgroundMover(
     std::chrono::milliseconds interval,
-    std::shared_ptr<BackgroundMoverStrategy> strategy,
+    size_t evictionBatch,
+    size_t promotionBatch,
+    double targetFree,
     size_t threads) {
   XDCHECK(threads > 0);
-  backgroundEvictor_.resize(threads);
+  backgroundMover_.resize(threads);
   bool result = true;
-
   auto memoryAssignments = createBgWorkerMemoryAssignments(threads);
   for (size_t i = 0; i < threads; i++) {
-    auto ret = startNewWorker("BackgroundEvictor" + std::to_string(i),
-                              backgroundEvictor_[i], interval, *this, strategy,
-                              MoverDir::Evict);
+    auto ret = startNewWorker("BackgroundMover" + std::to_string(i),
+                              backgroundMover_[i], interval, *this,
+                              evictionBatch, promotionBatch, targetFree);
     result = result && ret;
-
     if (result) {
-      backgroundEvictor_[i]->setAssignedMemory(std::move(memoryAssignments[i]));
-    }
-  }
-  return result;
-}
-
-template <typename CacheTrait>
-bool CacheAllocator<CacheTrait>::startNewBackgroundPromoter(
-    std::chrono::milliseconds interval,
-    std::shared_ptr<BackgroundMoverStrategy> strategy,
-    size_t threads) {
-  XDCHECK(threads > 0);
-  backgroundPromoter_.resize(threads);
-  bool result = true;
-
-  auto memoryAssignments = createBgWorkerMemoryAssignments(threads);
-  for (size_t i = 0; i < threads; i++) {
-    auto ret = startNewWorker("BackgroundPromoter" + std::to_string(i),
-                              backgroundPromoter_[i], interval, *this, strategy,
-                              MoverDir::Promote);
-    result = result && ret;
-
-    if (result) {
-      backgroundPromoter_[i]->setAssignedMemory(
-          std::move(memoryAssignments[i]));
+      backgroundMover_[i]->setAssignedMemory(std::move(memoryAssignments[i]));
     }
   }
   return result;
@@ -5976,23 +6087,11 @@ bool CacheAllocator<CacheTrait>::stopReaper(std::chrono::seconds timeout) {
 }
 
 template <typename CacheTrait>
-bool CacheAllocator<CacheTrait>::stopBackgroundEvictor(
-    std::chrono::seconds timeout) {
-  bool result = true;
-  for (size_t i = 0; i < backgroundEvictor_.size(); i++) {
-    auto ret = stopWorker("BackgroundEvictor", backgroundEvictor_[i], timeout);
-    result = result && ret;
-  }
-  return result;
-}
-
-template <typename CacheTrait>
-bool CacheAllocator<CacheTrait>::stopBackgroundPromoter(
+bool CacheAllocator<CacheTrait>::stopBackgroundMover(
     std::chrono::seconds timeout) {
   bool result = true;
-  for (size_t i = 0; i < backgroundPromoter_.size(); i++) {
-    auto ret =
-        stopWorker("BackgroundPromoter", backgroundPromoter_[i], timeout);
+  for (size_t i = 0; i < backgroundMover_.size(); i++) {
+    auto ret = stopWorker("BackgroundMover", backgroundMover_[i], timeout);
     result = result && ret;
   }
   return result;
diff --git a/cachelib/allocator/CacheAllocatorConfig.h b/cachelib/allocator/CacheAllocatorConfig.h
index 59d659f6f..34f90f9a6 100644
--- a/cachelib/allocator/CacheAllocatorConfig.h
+++ b/cachelib/allocator/CacheAllocatorConfig.h
@@ -27,7 +27,6 @@
 #include <stdexcept>
 #include <string>
 
-#include "cachelib/allocator/BackgroundMoverStrategy.h"
 #include "cachelib/allocator/Cache.h"
 #include "cachelib/allocator/MM2Q.h"
 #include "cachelib/allocator/MemoryMonitor.h"
@@ -285,16 +284,13 @@ class CacheAllocatorConfig {
       std::chrono::seconds ccacheInterval,
       uint32_t ccacheStepSizePercent);
 
-  // Enable the background evictor - scans a tier to look for objects
-  // to evict to the next tier
-  CacheAllocatorConfig& enableBackgroundEvictor(
-      std::shared_ptr<BackgroundMoverStrategy> backgroundMoverStrategy,
-      std::chrono::milliseconds regularInterval,
-      size_t threads);
-
-  CacheAllocatorConfig& enableBackgroundPromoter(
-      std::shared_ptr<BackgroundMoverStrategy> backgroundMoverStrategy,
+  // Enable the background moveor - scans a tier to look for objects
+  // to move to the next tier or just evict if single tier.
+  CacheAllocatorConfig& enableBackgroundMover(
       std::chrono::milliseconds regularInterval,
+      size_t evictionBatch,
+      size_t promotionBatch,
+      double targetFree,
       size_t threads);
 
   // This enables an optimization for Pool rebalancing and resizing.
@@ -371,15 +367,9 @@ class CacheAllocatorConfig {
            poolOptimizeStrategy != nullptr;
   }
 
-  // @return whether background evictor thread is enabled
-  bool backgroundEvictorEnabled() const noexcept {
-    return backgroundEvictorInterval.count() > 0 &&
-           backgroundEvictorStrategy != nullptr;
-  }
-
-  bool backgroundPromoterEnabled() const noexcept {
-    return backgroundPromoterInterval.count() > 0 &&
-           backgroundPromoterStrategy != nullptr;
+  // @return whether background mover thread is enabled
+  bool backgroundMoverEnabled() const noexcept {
+    return backgroundMoverInterval.count() > 0 && backgroundMoverThreads > 0;
   }
 
   // @return whether memory monitor is enabled
@@ -496,25 +486,21 @@ class CacheAllocatorConfig {
   // make any progress for the below threshold
   std::chrono::milliseconds slabReleaseStuckThreshold{std::chrono::seconds(60)};
 
-  // the background eviction strategy to be used
-  std::shared_ptr<BackgroundMoverStrategy> backgroundEvictorStrategy{nullptr};
-
-  // the background promotion strategy to be used
-  std::shared_ptr<BackgroundMoverStrategy> backgroundPromoterStrategy{nullptr};
-
-  // time interval to sleep between runs of the background evictor
-  std::chrono::milliseconds backgroundEvictorInterval{
+  // time interval to sleep between runs of the background mover
+  std::chrono::milliseconds backgroundMoverInterval{
       std::chrono::milliseconds{1000}};
 
-  // time interval to sleep between runs of the background promoter
-  std::chrono::milliseconds backgroundPromoterInterval{
-      std::chrono::milliseconds{1000}};
-
-  // number of thread used by background evictor
-  size_t backgroundEvictorThreads{1};
+  // number of thread used by background mover
+  size_t backgroundMoverThreads{1};
 
-  // number of thread used by background promoter
-  size_t backgroundPromoterThreads{1};
+  // How much to keep the cache memory free. This is used by the background
+  // mover to decide when to evict items.
+  double backgroundTargetFree{0.02};
+  // The number of items to evict in each batch in the background mover
+  size_t backgroundEvictionBatch{10};
+  // The number of items to promote in each batch in the background mover
+  // only available when there are multiple memory tiers
+  size_t backgroundPromotionBatch{0};
 
   // time interval to sleep between iterations of pool size optimization,
   // for regular pools and compact caches
@@ -1016,24 +1002,17 @@ CacheAllocatorConfig<T>& CacheAllocatorConfig<T>::enablePoolRebalancing(
 }
 
 template <typename T>
-CacheAllocatorConfig<T>& CacheAllocatorConfig<T>::enableBackgroundEvictor(
-    std::shared_ptr<BackgroundMoverStrategy> strategy,
-    std::chrono::milliseconds interval,
-    size_t evictorThreads) {
-  backgroundEvictorStrategy = strategy;
-  backgroundEvictorInterval = interval;
-  backgroundEvictorThreads = evictorThreads;
-  return *this;
-}
-
-template <typename T>
-CacheAllocatorConfig<T>& CacheAllocatorConfig<T>::enableBackgroundPromoter(
-    std::shared_ptr<BackgroundMoverStrategy> strategy,
+CacheAllocatorConfig<T>& CacheAllocatorConfig<T>::enableBackgroundMover(
     std::chrono::milliseconds interval,
-    size_t promoterThreads) {
-  backgroundPromoterStrategy = strategy;
-  backgroundPromoterInterval = interval;
-  backgroundPromoterThreads = promoterThreads;
+    size_t evictionBatch,
+    size_t promotionBatch,
+    double targetFree,
+    size_t moverThreads) {
+  backgroundMoverInterval = interval;
+  backgroundEvictionBatch = evictionBatch;
+  backgroundPromotionBatch = promotionBatch;
+  backgroundTargetFree = targetFree;
+  backgroundMoverThreads = moverThreads;
   return *this;
 }
 
diff --git a/cachelib/allocator/CacheStats.h b/cachelib/allocator/CacheStats.h
index 3692e55b4..d21400770 100644
--- a/cachelib/allocator/CacheStats.h
+++ b/cachelib/allocator/CacheStats.h
@@ -308,21 +308,29 @@ struct RebalancerStats {
 // Mover Stats
 struct BackgroundMoverStats {
   // the number of items this worker moved by looking at pools/classes stats
-  uint64_t numMovedItems{0};
-  // number of times we went executed the thread //TODO: is this def correct?
+  uint64_t numEvictedItems{0};
+  uint64_t numPromotedItems{0};
+
+  // number of times we went executed the thread (by periodic worker)
   uint64_t runCount{0};
-  // total number of classes
-  uint64_t totalClasses{0};
-  // eviction size
-  uint64_t totalBytesMoved{0};
-
-  BackgroundMoverStats& operator+=(const BackgroundMoverStats& rhs) {
-    numMovedItems += rhs.numMovedItems;
-    runCount += rhs.runCount;
-    totalClasses += rhs.totalClasses;
-    totalBytesMoved += rhs.totalBytesMoved;
-    return *this;
-  }
+
+  // average number of items moved per run
+  double avgItemsMoved{0.0};
+
+  // number of times we actually traversed the mmContainer
+  uint64_t numTraversals{0};
+
+  // indicates the time in ns for the last iteration
+  uint64_t lastTraversalTimeNs{0};
+
+  // indicates the maximum of all traversals
+  uint64_t minTraversalTimeNs{0};
+
+  // indicates the minimum of all traversals
+  uint64_t maxTraversalTimeNs{0};
+
+  // indicates the average of all traversals
+  uint64_t avgTraversalTimeNs{0};
 };
 
 // CacheMetadata type to export
@@ -345,10 +353,8 @@ struct Stats;
 // Stats that apply globally in cache and
 // the ones that are aggregated over all pools
 struct GlobalCacheStats {
-  // background eviction stats
-  BackgroundMoverStats evictionStats;
-
-  BackgroundMoverStats promotionStats;
+  // background mover stats per each mover thread
+  std::vector<BackgroundMoverStats> moverStats;
 
   // number of calls to CacheAllocator::find
   uint64_t numCacheGets{0};
diff --git a/cachelib/allocator/FreeThresholdStrategy.cpp b/cachelib/allocator/FreeThresholdStrategy.cpp
deleted file mode 100644
index f4afbd78f..000000000
--- a/cachelib/allocator/FreeThresholdStrategy.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "cachelib/allocator/FreeThresholdStrategy.h"
-
-namespace facebook::cachelib {
-
-FreeThresholdStrategy::FreeThresholdStrategy(double lowEvictionAcWatermark,
-                                             double highEvictionAcWatermark,
-                                             uint64_t maxEvictionBatch,
-                                             uint64_t minEvictionBatch)
-    : lowEvictionAcWatermark(lowEvictionAcWatermark),
-      highEvictionAcWatermark(highEvictionAcWatermark),
-      maxEvictionBatch(maxEvictionBatch),
-      minEvictionBatch(minEvictionBatch) {}
-
-std::vector<size_t> FreeThresholdStrategy::calculateBatchSizes(
-    const CacheBase& /* cache */,
-    std::vector<MemoryDescriptorType> /* acVec */) {
-  throw std::runtime_error("Not supported yet!");
-}
-
-} // namespace facebook::cachelib
diff --git a/cachelib/allocator/FreeThresholdStrategy.h b/cachelib/allocator/FreeThresholdStrategy.h
deleted file mode 100644
index 13a2ac40d..000000000
--- a/cachelib/allocator/FreeThresholdStrategy.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// @lint-ignore-every CLANGTIDY clang-diagnostic-unused-private-field
-
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "cachelib/allocator/BackgroundMoverStrategy.h"
-#include "cachelib/allocator/Cache.h"
-
-namespace facebook {
-namespace cachelib {
-
-// Free threshold strategy for background promotion worker.
-// This strategy tries to keep certain percent of memory free
-// at all times.
-class FreeThresholdStrategy : public BackgroundMoverStrategy {
- public:
-  FreeThresholdStrategy(double lowEvictionAcWatermark,
-                        double highEvictionAcWatermark,
-                        uint64_t maxEvictionBatch,
-                        uint64_t minEvictionBatch);
-  ~FreeThresholdStrategy() {}
-
-  std::vector<size_t> calculateBatchSizes(
-      const CacheBase& cache, std::vector<MemoryDescriptorType> acVecs);
-
- private:
-#if defined(__clang__)
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-private-field"
-#endif
-  double lowEvictionAcWatermark{2.0};
-  double highEvictionAcWatermark{5.0};
-  uint64_t maxEvictionBatch{40};
-  uint64_t minEvictionBatch{5};
-#if defined(__clang__)
-#pragma clang diagnostic pop
-#endif
-};
-
-} // namespace cachelib
-} // namespace facebook
diff --git a/cachelib/allocator/MM2Q.h b/cachelib/allocator/MM2Q.h
index cece17e0e..3bf76b018 100644
--- a/cachelib/allocator/MM2Q.h
+++ b/cachelib/allocator/MM2Q.h
@@ -66,6 +66,7 @@ class MM2Q {
   enum LruType { Warm, WarmTail, Hot, Cold, ColdTail, NumTypes };
 
   // Config class for MM2Q
+  // TODO: implement support for useCombinedLockForIterators
   struct Config {
     // Create from serialized config
     explicit Config(SerializationConfigType configState)
@@ -460,6 +461,18 @@ class MM2Q {
     //          is unchanged.
     bool add(T& node) noexcept;
 
+    // helper function to add the node under the container lock
+    void addNodeLocked(T& node, const Time& currTime);
+
+    // adds the given nodes into the container and marks each as being present
+    // in the container. The nodes are added to the head of the lru.
+    //
+    // @param vector of nodes  The nodes to be added to the container.
+    // @return  number of nodes added - it is up to user to verify all
+    //          expected nodes have been added.
+    template <typename It>
+    uint32_t addBatch(It begin, It end) noexcept;
+
     // removes the node from the lru and sets it previous and next to nullptr.
     //
     // @param node  The node to be removed from the container.
@@ -500,6 +513,11 @@ class MM2Q {
     template <typename F>
     void withEvictionIterator(F&& f);
 
+    // Execute provided function under container lock. Function gets
+    // iterator passed as parameter.
+    template <typename F>
+    void withPromotionIterator(F&& f);
+
     // Execute provided function under container lock.
     template <typename F>
     void withContainerLock(F&& f);
@@ -889,16 +907,41 @@ bool MM2Q::Container<T, HookPtr>::add(T& node) noexcept {
     if (node.isInMMContainer()) {
       return false;
     }
+    addNodeLocked(node, currTime);
+    return true;
+  });
+}
 
-    markHot(node);
-    unmarkCold(node);
-    unmarkTail(node);
-    lru_.getList(LruType::Hot).linkAtHead(node);
-    rebalance();
+// adds the node to the list assuming not in
+// container and holding container lock
+template <typename T, MM2Q::Hook<T> T::*HookPtr>
+void MM2Q::Container<T, HookPtr>::addNodeLocked(T& node, const Time& currTime) {
+  XDCHECK(!node.isInMMContainer());
+  markHot(node);
+  unmarkCold(node);
+  unmarkTail(node);
+  lru_.getList(LruType::Hot).linkAtHead(node);
+  rebalance();
+
+  node.markInMMContainer();
+  setUpdateTime(node, currTime);
+}
 
-    node.markInMMContainer();
-    setUpdateTime(node, currTime);
-    return true;
+template <typename T, MM2Q::Hook<T> T::*HookPtr>
+template <typename It>
+uint32_t MM2Q::Container<T, HookPtr>::addBatch(It begin, It end) noexcept {
+  const auto currTime = static_cast<Time>(util::getCurrentTimeSec());
+  return lruMutex_->lock_combine([this, begin, end, currTime]() {
+    uint32_t i = 0;
+    for (auto itr = begin; itr != end; itr++) {
+      T* node = *itr;
+      if (node->isInMMContainer()) {
+        return i;
+      }
+      addNodeLocked(*node, currTime);
+      i++;
+    }
+    return i;
   });
 }
 
@@ -920,6 +963,15 @@ void MM2Q::Container<T, HookPtr>::withEvictionIterator(F&& fun) {
   }
 }
 
+// returns the head of the hot queue for promotion
+template <typename T, MM2Q::Hook<T> T::*HookPtr>
+template <typename F>
+void MM2Q::Container<T, HookPtr>::withPromotionIterator(F&& fun) {
+  lruMutex_->lock_combine([this, &fun]() {
+    fun(LockedIterator{LockHolder{}, lru_.begin(LruType::Hot)});
+  });
+}
+
 template <typename T, MM2Q::Hook<T> T::*HookPtr>
 template <typename F>
 void MM2Q::Container<T, HookPtr>::withContainerLock(F&& fun) {
diff --git a/cachelib/allocator/MMLru.h b/cachelib/allocator/MMLru.h
index 747fd6276..d12f60ada 100644
--- a/cachelib/allocator/MMLru.h
+++ b/cachelib/allocator/MMLru.h
@@ -337,6 +337,18 @@ class MMLru {
     //          is unchanged.
     bool add(T& node) noexcept;
 
+    // helper function to add the node under the container lock
+    void addNodeLocked(T& node, const Time& currTime);
+
+    // adds the given nodes into the container and marks each as being present
+    // in the container. The nodes are added to the head of the lru.
+    //
+    // @param vector of nodes  The nodes to be added to the container.
+    // @return  number of nodes added - it is up to user to verify all
+    //          expected nodes have been added.
+    template <typename It>
+    uint32_t addBatch(It begin, It end) noexcept;
+
     // removes the node from the lru and sets it previous and next to nullptr.
     //
     // @param node  The node to be removed from the container.
@@ -378,6 +390,11 @@ class MMLru {
     template <typename F>
     void withContainerLock(F&& f);
 
+    // Execute provided function under container lock. Function gets
+    // iterator passed as parameter.
+    template <typename F>
+    void withPromotionIterator(F&& f);
+
     // get copy of current config
     Config getConfig() const;
 
@@ -685,19 +702,47 @@ bool MMLru::Container<T, HookPtr>::add(T& node) noexcept {
     if (node.isInMMContainer()) {
       return false;
     }
-    if (config_.lruInsertionPointSpec == 0 || insertionPoint_ == nullptr) {
-      lru_.linkAtHead(node);
-    } else {
-      lru_.insertBefore(*insertionPoint_, node);
-    }
-    node.markInMMContainer();
-    setUpdateTime(node, currTime);
-    unmarkAccessed(node);
-    updateLruInsertionPoint();
+    addNodeLocked(node, currTime);
     return true;
   });
 }
 
+template <typename T, MMLru::Hook<T> T::*HookPtr>
+void MMLru::Container<T, HookPtr>::addNodeLocked(T& node,
+                                                 const Time& currTime) {
+  XDCHECK(!node.isInMMContainer());
+  if (config_.lruInsertionPointSpec == 0 || insertionPoint_ == nullptr) {
+    lru_.linkAtHead(node);
+  } else {
+    lru_.insertBefore(*insertionPoint_, node);
+  }
+  node.markInMMContainer();
+  setUpdateTime(node, currTime);
+  unmarkAccessed(node);
+  updateLruInsertionPoint();
+}
+
+template <typename T, MMLru::Hook<T> T::*HookPtr>
+template <typename It>
+uint32_t MMLru::Container<T, HookPtr>::addBatch(It begin, It end) noexcept {
+  const auto currTime = static_cast<Time>(util::getCurrentTimeSec());
+  return lruMutex_->lock_combine([this, begin, end, currTime]() {
+    uint32_t i = 0;
+    for (auto itr = begin; itr != end; ++itr) {
+      T* node = *itr;
+      XDCHECK(!node->isInMMContainer());
+      if (node->isInMMContainer()) {
+        throw std::runtime_error(
+            folly::sformat("Was not able to add all new items, failed item {}",
+                           node->toString()));
+      }
+      addNodeLocked(*node, currTime);
+      i++;
+    }
+    return i;
+  });
+}
+
 template <typename T, MMLru::Hook<T> T::*HookPtr>
 typename MMLru::Container<T, HookPtr>::LockedIterator
 MMLru::Container<T, HookPtr>::getEvictionIterator() const noexcept {
@@ -716,6 +761,17 @@ void MMLru::Container<T, HookPtr>::withEvictionIterator(F&& fun) {
   }
 }
 
+template <typename T, MMLru::Hook<T> T::*HookPtr>
+template <typename F>
+void MMLru::Container<T, HookPtr>::withPromotionIterator(F&& fun) {
+  if (config_.useCombinedLockForIterators) {
+    lruMutex_->lock_combine([this, &fun]() { fun(Iterator{lru_.begin()}); });
+  } else {
+    LockHolder lck{*lruMutex_};
+    fun(Iterator{lru_.begin()});
+  }
+}
+
 template <typename T, MMLru::Hook<T> T::*HookPtr>
 template <typename F>
 void MMLru::Container<T, HookPtr>::withContainerLock(F&& fun) {
diff --git a/cachelib/allocator/MMTinyLFU.h b/cachelib/allocator/MMTinyLFU.h
index 5082b8f69..0994679c5 100644
--- a/cachelib/allocator/MMTinyLFU.h
+++ b/cachelib/allocator/MMTinyLFU.h
@@ -377,6 +377,18 @@ class MMTinyLFU {
     //          is unchanged.
     bool add(T& node) noexcept;
 
+    // helper function to add the node under the container lock
+    void addNodeLocked(T& node, const Time& currTime);
+
+    // adds the given nodes into the container and marks each as being present
+    // in the container. The nodes are added to the head of the lru.
+    //
+    // @param vector of nodes  The nodes to be added to the container.
+    // @return  number of nodes added - it is up to user to verify all
+    //          expected nodes have been added.
+    template <typename It>
+    uint32_t addBatch(It begin, It end) noexcept;
+
     // removes the node from the lru and sets it previous and next to nullptr.
     //
     // @param node  The node to be removed from the container.
@@ -546,6 +558,11 @@ class MMTinyLFU {
     template <typename F>
     void withEvictionIterator(F&& f);
 
+    // Execute provided function under container lock. Function gets
+    // iterator passed as parameter.
+    template <typename F>
+    void withPromotionIterator(F&& f);
+
     // Execute provided function under container lock.
     template <typename F>
     void withContainerLock(F&& f);
@@ -856,7 +873,16 @@ bool MMTinyLFU::Container<T, HookPtr>::add(T& node) noexcept {
   if (node.isInMMContainer()) {
     return false;
   }
+  addNodeLocked(node, currTime);
+  return true;
+}
 
+// adds the node to the list assuming not in
+// container and holding container lock
+template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>
+void MMTinyLFU::Container<T, HookPtr>::addNodeLocked(T& node,
+                                                     const Time& currTime) {
+  XDCHECK(!node.isInMMContainer());
   auto& tinyLru = lru_.getList(LruType::Tiny);
   tinyLru.linkAtHead(node);
   markTiny(node);
@@ -884,7 +910,23 @@ bool MMTinyLFU::Container<T, HookPtr>::add(T& node) noexcept {
   node.markInMMContainer();
   setUpdateTime(node, currTime);
   unmarkAccessed(node);
-  return true;
+}
+
+template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>
+template <typename It>
+uint32_t MMTinyLFU::Container<T, HookPtr>::addBatch(It begin, It end) noexcept {
+  const auto currTime = static_cast<Time>(util::getCurrentTimeSec());
+  LockHolder l(lruMutex_);
+  uint32_t i = 0;
+  for (auto itr = begin; itr != end; itr++) {
+    T* node = *itr;
+    if (node->isInMMContainer()) {
+      return i;
+    }
+    addNodeLocked(*node, currTime);
+    i++;
+  }
+  return i;
 }
 
 template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>
@@ -901,6 +943,12 @@ void MMTinyLFU::Container<T, HookPtr>::withEvictionIterator(F&& fun) {
   fun(getEvictionIterator());
 }
 
+template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>
+template <typename F>
+void MMTinyLFU::Container<T, HookPtr>::withPromotionIterator(F&& fun) {
+  throw std::runtime_error("Not supported");
+}
+
 template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>
 template <typename F>
 void MMTinyLFU::Container<T, HookPtr>::withContainerLock(F&& fun) {
diff --git a/cachelib/allocator/PromotionStrategy.h b/cachelib/allocator/PromotionStrategy.h
deleted file mode 100644
index d3eb8686c..000000000
--- a/cachelib/allocator/PromotionStrategy.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "cachelib/allocator/BackgroundMoverStrategy.h"
-#include "cachelib/allocator/Cache.h"
-
-namespace facebook {
-namespace cachelib {
-
-// Strategy for background promotion worker.
-class PromotionStrategy : public BackgroundMoverStrategy {
- public:
-  PromotionStrategy(uint64_t promotionAcWatermark,
-                    uint64_t maxPromotionBatch,
-                    uint64_t minPromotionBatch)
-      : promotionAcWatermark(promotionAcWatermark),
-        maxPromotionBatch(maxPromotionBatch),
-        minPromotionBatch(minPromotionBatch) {}
-  ~PromotionStrategy() {}
-
-  std::vector<size_t> calculateBatchSizes(
-      const CacheBase& cache, std::vector<MemoryDescriptorType> acVec) {
-    return {};
-  }
-
- private:
-  double promotionAcWatermark{4.0};
-  uint64_t maxPromotionBatch{40};
-  uint64_t minPromotionBatch{5};
-};
-
-} // namespace cachelib
-} // namespace facebook
diff --git a/cachelib/allocator/memory/AllocationClass.cpp b/cachelib/allocator/memory/AllocationClass.cpp
index ab99e42d6..d3a649dd9 100644
--- a/cachelib/allocator/memory/AllocationClass.cpp
+++ b/cachelib/allocator/memory/AllocationClass.cpp
@@ -142,6 +142,27 @@ void* AllocationClass::addSlabAndAllocate(Slab* slab) {
   });
 }
 
+std::vector<void*> AllocationClass::addSlabAndAllocateBatch(Slab* slab,
+                                                            size_t batch) {
+  XDCHECK_NE(nullptr, slab);
+  std::vector<void*> allocs;
+  allocs.reserve(batch);
+  lock_->lock_combine([this, slab, batch, &allocs]() {
+    addSlabLocked(slab);
+    size_t total = 0;
+    while (total < batch) {
+      void* alloc = allocateLocked();
+      if (alloc != nullptr) {
+        allocs.push_back(alloc);
+        total++;
+      } else {
+        break;
+      }
+    }
+  });
+  return allocs;
+}
+
 void* AllocationClass::allocateFromCurrentSlabLocked() noexcept {
   XDCHECK(canAllocateFromCurrentSlabLocked());
   void* ret = currSlab_->memoryAtOffset(currOffset_);
@@ -161,6 +182,26 @@ void* AllocationClass::allocate() {
   return lock_->lock_combine([this]() -> void* { return allocateLocked(); });
 }
 
+std::vector<void*> AllocationClass::allocateBatch(size_t batch) {
+  std::vector<void*> allocs;
+  if (!canAllocate_) {
+    return allocs;
+  }
+  lock_->lock_combine([this, &allocs, batch]() {
+    size_t total = 0;
+    while (total < batch) {
+      void* alloc = allocateLocked();
+      if (alloc != nullptr) {
+        allocs.push_back(alloc);
+        total++;
+      } else {
+        break;
+      }
+    }
+  });
+  return allocs;
+}
+
 void* AllocationClass::allocateLocked() {
   // fast path for case when the cache is mostly full.
   if (freedAllocations_.empty() && freeSlabs_.empty() &&
@@ -626,6 +667,37 @@ void AllocationClass::processAllocForRelease(
   });
 }
 
+void AllocationClass::freeLocked(const SlabHeader* header,
+                                 const Slab* slab,
+                                 void* memory,
+                                 uintptr_t slabPtrVal) {
+  // check under the lock we actually add the allocation back to the free list
+  if (header->isMarkedForRelease()) {
+    auto it = slabReleaseAllocMap_.find(slabPtrVal);
+
+    // this should not happen.
+    if (it == slabReleaseAllocMap_.end()) {
+      throw std::runtime_error(folly::sformat(
+          "Invalid slabReleaseAllocMap "
+          "state when attempting to free an allocation. Memory: {}",
+          memory));
+    }
+
+    auto& allocState = it->second;
+    const auto idx = getAllocIdx(slab, memory);
+    if (allocState[idx]) {
+      throw std::invalid_argument(
+          folly::sformat("Allocation {} is already marked as free", memory));
+    }
+    allocState[idx] = true;
+    return;
+  }
+
+  // TODO add checks here to ensure that we dont double free in debug mode.
+  freedAllocations_.insert(*reinterpret_cast<FreeAlloc*>(memory));
+  canAllocate_ = true;
+}
+
 void AllocationClass::free(void* memory) {
   const auto* header = slabAlloc_.getSlabHeader(memory);
   auto* slab = slabAlloc_.getSlabForMemory(memory);
@@ -637,33 +709,7 @@ void AllocationClass::free(void* memory) {
   }
 
   const auto slabPtrVal = getSlabPtrValue(slab);
-  lock_->lock_combine([this, header, slab, memory, slabPtrVal]() {
-    // check under the lock we actually add the allocation back to the free list
-    if (header->isMarkedForRelease()) {
-      auto it = slabReleaseAllocMap_.find(slabPtrVal);
-
-      // this should not happen.
-      if (it == slabReleaseAllocMap_.end()) {
-        throw std::runtime_error(folly::sformat(
-            "Invalid slabReleaseAllocMap "
-            "state when attempting to free an allocation. Memory: {}",
-            memory));
-      }
-
-      auto& allocState = it->second;
-      const auto idx = getAllocIdx(slab, memory);
-      if (allocState[idx]) {
-        throw std::invalid_argument(
-            folly::sformat("Allocation {} is already marked as free", memory));
-      }
-      allocState[idx] = true;
-      return;
-    }
-
-    // TODO add checks here to ensure that we dont double free in debug mode.
-    freedAllocations_.insert(*reinterpret_cast<FreeAlloc*>(memory));
-    canAllocate_ = true;
-  });
+  lock_->lock_combine([this, header, slab, memory, slabPtrVal]() {});
 }
 
 serialization::AllocationClassObject AllocationClass::saveState() const {
@@ -707,6 +753,23 @@ ACStats AllocationClass::getStats() const {
   });
 }
 
+std::pair<size_t, double> AllocationClass::getApproxUsage() const {
+  const unsigned long long nSlabsAllocated = allocatedSlabs_.size();
+  if (nSlabsAllocated == 0) {
+    return {0, 0.0};
+  }
+  const unsigned long long perSlab = getAllocsPerSlab();
+  const auto freeAllocsInCurrSlab =
+      canAllocateFromCurrentSlabLocked()
+          ? (Slab::kSize - currOffset_) / allocationSize_
+          : 0;
+  const unsigned long long nFreedAllocs = freedAllocations_.size();
+  const unsigned long long nActiveAllocs =
+      nSlabsAllocated * perSlab - nFreedAllocs - freeAllocsInCurrSlab;
+  return {nActiveAllocs,
+          (double)nActiveAllocs / (double)(nSlabsAllocated * perSlab)};
+}
+
 void AllocationClass::createSlabReleaseAllocMapLocked(const Slab* slab) {
   // Initialize slab free state
   // Each bit represents whether or not an alloc has already been freed
diff --git a/cachelib/allocator/memory/AllocationClass.h b/cachelib/allocator/memory/AllocationClass.h
index 5f5ebe1b9..9f20a5161 100644
--- a/cachelib/allocator/memory/AllocationClass.h
+++ b/cachelib/allocator/memory/AllocationClass.h
@@ -97,6 +97,10 @@ class AllocationClass {
   // fetch stats about this allocation class.
   ACStats getStats() const;
 
+  // (1) total active allocs in this class
+  // (2) approx usage as fraction of used allocs/total allocs in this class
+  std::pair<size_t, double> getApproxUsage() const;
+
   // Whether the pool is full or free to allocate more in the current state.
   // This is only a hint and not a gurantee that subsequent allocate will
   // fail/succeed.
@@ -110,6 +114,13 @@ class AllocationClass {
   //          to this slab class to make further allocations out of it.
   void* allocate();
 
+  // allocates a batch of memory corresponding to the allocation size of this
+  // AllocationClass.
+  //
+  // @return  vector of pointers to the memory of allocationSize_ chunk or
+  //          empty vector if we don't have any free memory.
+  std::vector<void*> allocateBatch(size_t batch);
+
   // @param ctx     release context for the slab owning this alloc
   // @param memory  memory to check
   //
@@ -212,6 +223,39 @@ class AllocationClass {
   // this slab class.
   void free(void* memory);
 
+  // releases the memory under the AC lock
+  void freeLocked(const SlabHeader* header,
+                  const Slab* slab,
+                  void* memory,
+                  uintptr_t slabPtrVal);
+
+  // release the memory back to the class in batch
+  // avoids the overhead of locking for each free
+  template <typename It>
+  uint32_t freeBatch(It begin, It end) {
+    return lock_->lock_combine([this, begin, end]() -> uint32_t {
+      uint32_t i = 0;
+      for (auto itr = begin; itr != end; ++itr) {
+        void* memory = *itr;
+        const auto* header = slabAlloc_.getSlabHeader(memory);
+        auto* slab = slabAlloc_.getSlabForMemory(memory);
+        if (header == nullptr || header->classId != classId_) {
+          throw std::invalid_argument(folly::sformat(
+              "trying to free memory {} (with ClassId {}), not belonging to "
+              "this "
+              "AllocationClass (ClassId {})",
+              memory, header ? header->classId : Slab::kInvalidClassId,
+              classId_));
+        }
+
+        const auto slabPtrVal = getSlabPtrValue(slab);
+        freeLocked(header, slab, memory, slabPtrVal);
+        i++;
+      }
+      return i;
+    });
+  }
+
   // acquires a new slab for this allocation class.
   // @param slab    a new slab to be added. This can NOT be nullptr.
   void addSlab(Slab* slab);
@@ -221,6 +265,12 @@ class AllocationClass {
   // @return  new allocation. This cannot fail.
   void* addSlabAndAllocate(Slab* slab);
 
+  // acquires a new slab and allocates a batch right away
+  // @param slab a new slab to be added.
+  // @param batch number of allocations to be made.
+  // @return  vector of pointers to the memory of new allocations
+  std::vector<void*> addSlabAndAllocateBatch(Slab* slab, size_t batch);
+
   // Releasing a slab is a two step process.
   // 1. Mark a slab for release, by calling `startSlabRelease`.
   // 2. Free all the activeAllocations
diff --git a/cachelib/allocator/memory/MemoryAllocator.cpp b/cachelib/allocator/memory/MemoryAllocator.cpp
index 5de65e4e1..0a81cc987 100644
--- a/cachelib/allocator/memory/MemoryAllocator.cpp
+++ b/cachelib/allocator/memory/MemoryAllocator.cpp
@@ -71,6 +71,13 @@ void* MemoryAllocator::allocate(PoolId id, uint32_t size) {
   return mp.allocate(size);
 }
 
+std::vector<void*> MemoryAllocator::allocateByCidBatch(PoolId id,
+                                                       ClassId cid,
+                                                       size_t batch) {
+  auto& mp = memoryPoolManager_.getPoolById(id);
+  return mp.allocateByCidBatch(cid, batch);
+}
+
 void* MemoryAllocator::allocateZeroedSlab(PoolId id) {
   if (!config_.enableZeroedSlabAllocs) {
     throw std::logic_error("Zeroed Slab allcoation is not enabled");
diff --git a/cachelib/allocator/memory/MemoryAllocator.h b/cachelib/allocator/memory/MemoryAllocator.h
index 105873a39..307540204 100644
--- a/cachelib/allocator/memory/MemoryAllocator.h
+++ b/cachelib/allocator/memory/MemoryAllocator.h
@@ -168,6 +168,15 @@ class MemoryAllocator {
   //        invalid.
   void* allocate(PoolId id, uint32_t size);
 
+  // allocates a batches of memory for a corresponding class id
+  // @param pid    the pool id to be used for this allocation.
+  // @param cid    the class id for the allocation.
+  // @param batch  the number of allocations to be made.
+  // @return a vector of pointers to the memory corresponding to the allocation.
+  // @throw std::invalid_argument if the poolId is invalid or the class id is
+  //       invalid.
+  std::vector<void*> allocateByCidBatch(PoolId id, ClassId cid, size_t batch);
+
   // Allocate a zeroed Slab
   //
   // This guarantees the content of the allocated slab is zero because when
@@ -186,6 +195,14 @@ class MemoryAllocator {
   //        allocation handed out by this allocator.
   void free(void* memory);
 
+  // frees a list of items back to the class
+  // avoids locking the AC for each free
+  template <typename It>
+  void freeBatch(It begin, It end, PoolId pid, ClassId cid) {
+    auto& mp = memoryPoolManager_.getPoolById(pid);
+    mp.freeBatch(begin, end, cid);
+  }
+
   // Memory pool interface. The memory pools must be established before the
   // first allocation happens. Currently we dont support adding / removing
   // pools dynamically.
diff --git a/cachelib/allocator/memory/MemoryPool.cpp b/cachelib/allocator/memory/MemoryPool.cpp
index 9614e79af..329469001 100644
--- a/cachelib/allocator/memory/MemoryPool.cpp
+++ b/cachelib/allocator/memory/MemoryPool.cpp
@@ -301,6 +301,68 @@ bool MemoryPool::provision(const std::vector<uint32_t>& slabsDistribution) {
   return true;
 }
 
+std::vector<void*> MemoryPool::allocateByCidBatch(ClassId cid, size_t batch) {
+  uint64_t total = 0;
+  auto& ac = getAllocationClassFor(cid);
+  const auto allocSize = ac.getAllocSize();
+  auto allocs = ac.allocateBatch(batch);
+  if (allocs.size() > 0) {
+    total += allocs.size();
+    currAllocSize_ += allocSize * allocs.size();
+  }
+  if (total == batch) {
+    return allocs;
+  }
+  // atomically see if we can acquire a slab by checking if we have
+  // reached the limit by size. If not, then they can be acquired from
+  // either the slab allocator or our free list. It is important to check
+  // this before we grab it from the slab allocator or free list. Things
+  // that release slab, bump down the currSlabAllocSize_ after actually
+  // releasing and adding it to free list or slab allocator.
+  if (allSlabsAllocated()) {
+    return allocs;
+  }
+
+  uint32_t remain = batch - total;
+  // TODO: introduce a new sharded lock by allocation class id for this slow
+  // path Currently this would also serialize the slow paths of two different
+  // allocation class ids that need slab to initiate an allocation.
+  LockHolder l(lock_);
+  auto allocs2 = ac.allocateBatch(remain);
+  if (allocs2.size() > 0) {
+    total += allocs2.size();
+    currAllocSize_ += allocSize * allocs2.size();
+    allocs.insert(allocs.end(), allocs2.begin(), allocs2.end());
+  }
+  if (total == batch) {
+    return allocs;
+  }
+
+  remain = batch - total;
+  // see if we have a slab to add to the allocation class.
+  auto slab = getSlabLocked();
+  while (remain && slab != nullptr) {
+    if (slab == nullptr) {
+      // out of memory
+      return allocs;
+    }
+
+    // add it to the allocation class and try to allocate.
+    auto allocs3 = ac.addSlabAndAllocateBatch(slab, remain);
+    // XDCHECK_NE(nullptr, alloc);
+
+    currAllocSize_ += allocSize * allocs3.size();
+    total += allocs3.size();
+    remain -= allocs3.size();
+    allocs.insert(allocs.end(), allocs3.begin(), allocs3.end());
+    if (total == batch) {
+      return allocs;
+    }
+    slab = getSlabLocked();
+  }
+  return allocs;
+}
+
 void* MemoryPool::allocate(uint32_t size) {
   auto& ac = getAllocationClassFor(size);
 
@@ -562,3 +624,8 @@ MPStats MemoryPool::getStats() const {
                  slabsUnAllocated,    nSlabResize_,       nSlabRebalance_,
                  curSlabsAdvised_};
 }
+
+std::pair<size_t, double> MemoryPool::getApproxUsage(ClassId cid) const {
+  auto& ac = getAllocationClassFor(cid);
+  return ac.getApproxUsage();
+}
diff --git a/cachelib/allocator/memory/MemoryPool.h b/cachelib/allocator/memory/MemoryPool.h
index 6a11c6665..d7c84fd0d 100644
--- a/cachelib/allocator/memory/MemoryPool.h
+++ b/cachelib/allocator/memory/MemoryPool.h
@@ -133,6 +133,12 @@ class MemoryPool {
 
   MPStats getStats() const;
 
+  // gets the approximate class usage for the given class id.
+  //
+  // @param cid  the class id for which we want to get the usage.
+  // @return a pair of number of active allocations and the usage in the slab.
+  std::pair<size_t, double> getApproxUsage(ClassId cid) const;
+
   // Provision each allocation class with prescribed number of slabs.
   //
   // @param slabsDistribution   number of slabs in each AC
@@ -147,6 +153,14 @@ class MemoryPool {
   // @throw  std::invalid_argument if size is invalid.
   void* allocate(uint32_t size);
 
+  // allocates memory of at least _size_ bytes in a batch.
+  //
+  // @param cid    the class id for the allocation.
+  // @param batch  the number of allocations to be made.
+  // @return a vector of pointers to the memory of the class
+  // @throw  std::invalid_argument if the class id is invalid.
+  std::vector<void*> allocateByCidBatch(ClassId cid, size_t batch);
+
   // Allocate a slab with zeroed memory
   //
   // @return pointer to allocation or nullptr on failure to allocate.
@@ -165,6 +179,19 @@ class MemoryPool {
   // @throw std::run_time_error if the slab class information is corrupted.
   void free(void* memory);
 
+  // frees a batch of memory batch to the pool. throws an exception if the
+  // memory does not belong to this pool.
+  //
+  // @param  begin  iterator to the start of the batch
+  // @param  end    iterator to the end of the batch
+  // @param  cid    the allocation class id of the batch
+  template <typename It>
+  void freeBatch(It begin, It end, ClassId cid) {
+    auto& ac = getAllocationClassFor(cid);
+    auto freed = ac.freeBatch(begin, end);
+    currAllocSize_ -= ac.getAllocSize() * freed;
+  }
+
   // resize the memory pool. This only adjusts the Pool size. It does not
   // release the slabs back to the SlabAllocator if the new size is less than
   // the current size. The caller is responsible for doing that through
diff --git a/cachelib/allocator/tests/AllocatorTypeTest.cpp b/cachelib/allocator/tests/AllocatorTypeTest.cpp
index 05f53eb4b..db1270bc0 100644
--- a/cachelib/allocator/tests/AllocatorTypeTest.cpp
+++ b/cachelib/allocator/tests/AllocatorTypeTest.cpp
@@ -405,6 +405,10 @@ TYPED_TEST(BaseAllocatorTest, SlabReleaseStuck) {
   this->testSlabReleaseStuck();
 }
 
+TYPED_TEST(BaseAllocatorTest, BackgroundEviction) {
+  this->testBackgroundEviction();
+}
+
 TYPED_TEST(BaseAllocatorTest, RateMap) { this->testRateMap(); }
 
 TYPED_TEST(BaseAllocatorTest, StatSnapshotTest) {
diff --git a/cachelib/allocator/tests/BaseAllocatorTest.h b/cachelib/allocator/tests/BaseAllocatorTest.h
index 105a7f9bd..a1e3fe5f1 100644
--- a/cachelib/allocator/tests/BaseAllocatorTest.h
+++ b/cachelib/allocator/tests/BaseAllocatorTest.h
@@ -6189,6 +6189,45 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
     ASSERT_EQ(0, alloc.getSlabReleaseStats().numSlabReleaseStuck);
   }
 
+  void testBackgroundEviction() {
+    typename AllocatorT::Config config{};
+    size_t cacheSize = 5 * Slab::kSize; //20 MB
+    double targetFree = 0.03; //3% of the cache kept free
+    config.setCacheSize(cacheSize);
+    config.enableBackgroundMover(std::chrono::milliseconds{10000},
+                                 20, //just test eviction for single tier
+                                 0,
+                                 targetFree, //try and keep 0.03 of the cache free
+                                 1);
+    AllocatorT alloc(config);
+    const size_t numBytes = alloc.getCacheMemoryStats().ramCacheSize;
+    auto poolId = alloc.addPool("foobar", numBytes);
+    const unsigned int keyLen = 20;
+    const std::vector<unsigned int> size{500};
+    auto& pool = alloc.getPool(poolId);
+
+    this->fillUpPoolUntilEvictions(alloc, poolId, size, keyLen);
+    int classId = pool.getAllocationClassId(size[0]);
+    auto stats = alloc.getGlobalCacheStats();
+    auto mpStats = pool.getStats();
+    auto [currItems,currUsage] = pool.getApproxUsage(classId);
+    size_t maxItems =  (currItems / currUsage);
+    size_t targetItems = maxItems * (1-targetFree);
+    size_t approxEvictionsNeeded = currItems > targetItems ? currItems - targetItems : 0;
+    XLOGF(INFO, "Current usage: {:.2f}, Current items: {}", currUsage, currItems);
+    XLOGF(INFO, "Target items: {}, Approx evictions needed: {}", targetItems, approxEvictionsNeeded);
+
+    while (stats.moverStats[0].numEvictedItems < approxEvictionsNeeded &&
+           currUsage > (1-targetFree)) {
+      std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+      stats = alloc.getGlobalCacheStats();
+      mpStats = pool.getStats();
+      currUsage = pool.getApproxUsage(classId).second;
+    }
+    XLOGF(INFO, "Evictions needed: {}, Evictions performed: {}", approxEvictionsNeeded, stats.moverStats[0].numEvictedItems);
+    ASSERT_GE(stats.moverStats[0].numEvictedItems, approxEvictionsNeeded*0.90); //at least 90% of the evictions should be done by the background mover
+  }
+
   void testRateMap() {
     RateMap counters;
     counters.updateCount("stat1", 11);
diff --git a/cachelib/cachebench/cache/Cache.h b/cachelib/cachebench/cache/Cache.h
index 17a4dc155..07e997030 100644
--- a/cachelib/cachebench/cache/Cache.h
+++ b/cachelib/cachebench/cache/Cache.h
@@ -518,6 +518,13 @@ Cache<Allocator>::Cache(const CacheConfig& config,
       config_.getRebalanceStrategy(),
       std::chrono::seconds(config_.poolRebalanceIntervalSec));
 
+  allocatorConfig_.enableBackgroundMover(
+      std::chrono::milliseconds(config_.backgroundMoverIntervalMilSec),
+      config_.backgroundEvictionBatch,
+      config_.backgroundPromotionBatch,
+      config_.backgroundTargetFree,
+      config_.backgroundMoverThreads);
+
   if (config_.moveOnSlabRelease && movingSync != nullptr) {
     allocatorConfig_.enableMovingOnSlabRelease(
         [](Item& oldItem, Item& newItem, Item* parentPtr) {
@@ -1125,14 +1132,15 @@ Stats Cache<Allocator>::getStats() const {
     aggregate += poolStats;
   }
 
-  std::map<PoolId, std::map<ClassId, ACStats>> allocationClassStats{};
+  std::map<MemoryDescriptorType, ACStats> allocationClassStats{};
 
   for (size_t pid = 0; pid < pools_.size(); pid++) {
     PoolId poolId = static_cast<PoolId>(pid);
     auto poolStats = cache_->getPoolStats(poolId);
     auto cids = poolStats.getClassIds();
     for (auto [cid, stats] : poolStats.mpStats.acStats) {
-      allocationClassStats[poolId][cid] = stats;
+      MemoryDescriptorType md(poolId, cid);
+      allocationClassStats[md] = stats;
     }
   }
 
@@ -1141,21 +1149,14 @@ Stats Cache<Allocator>::getStats() const {
   const auto navyStats = cache_->getNvmCacheStatsMap().toMap();
 
   ret.allocationClassStats = allocationClassStats;
+  ret.backgroundMoverStats = cacheStats.moverStats;
   ret.numEvictions = aggregate.numEvictions();
   ret.numItems = aggregate.numItems();
   ret.evictAttempts = cacheStats.evictionAttempts;
   ret.allocAttempts = cacheStats.allocAttempts;
   ret.allocFailures = cacheStats.allocFailures;
 
-  ret.backgndEvicStats.nEvictedItems = cacheStats.evictionStats.numMovedItems;
-  ret.backgndEvicStats.nTraversals = cacheStats.evictionStats.runCount;
-  ret.backgndEvicStats.nClasses = cacheStats.evictionStats.totalClasses;
-  ret.backgndEvicStats.evictionSize = cacheStats.evictionStats.totalBytesMoved;
-
-  ret.backgndPromoStats.nPromotedItems =
-      cacheStats.promotionStats.numMovedItems;
-  ret.backgndPromoStats.nTraversals = cacheStats.promotionStats.runCount;
-
+  ret.backgroundMoverClasses = cache_->getBackgroundMoverClassStats();
   ret.numCacheGets = cacheStats.numCacheGets;
   ret.numCacheGetMiss = cacheStats.numCacheGetMiss;
   ret.numCacheEvictions = cacheStats.numCacheEvictions;
@@ -1203,11 +1204,6 @@ Stats Cache<Allocator>::getStats() const {
     ret.nvmCounters = cache_->getNvmCacheStatsMap().toMap();
   }
 
-  ret.backgroundEvictionClasses =
-      cache_->getBackgroundMoverClassStats(MoverDir::Evict);
-  ret.backgroundPromotionClasses =
-      cache_->getBackgroundMoverClassStats(MoverDir::Promote);
-
   // nvm stats from navy
   if (!isRamOnly() && !navyStats.empty()) {
     auto lookup = [&navyStats](const std::string& key) {
diff --git a/cachelib/cachebench/cache/CacheStats.h b/cachelib/cachebench/cache/CacheStats.h
index a0bb1e4dd..750d660ee 100644
--- a/cachelib/cachebench/cache/CacheStats.h
+++ b/cachelib/cachebench/cache/CacheStats.h
@@ -27,31 +27,8 @@ namespace facebook {
 namespace cachelib {
 namespace cachebench {
 
-struct BackgroundEvictionStats {
-  // the number of items this worker evicted by looking at pools/classes stats
-  uint64_t nEvictedItems{0};
-
-  // number of times we went executed the thread //TODO: is this def correct?
-  uint64_t nTraversals{0};
-
-  // number of classes
-  uint64_t nClasses{0};
-
-  // size of evicted items
-  uint64_t evictionSize{0};
-};
-
-struct BackgroundPromotionStats {
-  // the number of items this worker evicted by looking at pools/classes stats
-  uint64_t nPromotedItems{0};
-
-  // number of times we went executed the thread //TODO: is this def correct?
-  uint64_t nTraversals{0};
-};
-
 struct Stats {
-  BackgroundEvictionStats backgndEvicStats;
-  BackgroundPromotionStats backgndPromoStats;
+  std::vector<BackgroundMoverStats> backgroundMoverStats;
 
   uint64_t numEvictions{0};
   uint64_t numItems{0};
@@ -127,15 +104,17 @@ struct Stats {
   uint64_t invalidDestructorCount{0};
   int64_t unDestructedItemCount{0};
 
-  std::map<PoolId, std::map<ClassId, ACStats>> allocationClassStats;
+  std::map<MemoryDescriptorType, ACStats> allocationClassStats;
 
   // populate the counters related to nvm usage. Cache implementation can decide
   // what to populate since not all of those are interesting when running
   // cachebench.
   std::unordered_map<std::string, double> nvmCounters;
 
-  std::map<PoolId, std::map<ClassId, uint64_t>> backgroundEvictionClasses;
-  std::map<PoolId, std::map<ClassId, uint64_t>> backgroundPromotionClasses;
+  using ClassBgStatsType =
+      std::map<MemoryDescriptorType, std::pair<size_t, size_t>>;
+
+  ClassBgStatsType backgroundMoverClasses;
 
   // errors from the nvm engine.
   std::unordered_map<std::string, double> nvmErrors;
@@ -157,10 +136,9 @@ struct Stats {
     out << folly::sformat("RAM Evictions : {:,}", numEvictions) << std::endl;
 
     auto foreachAC = [](const auto& map, auto cb) {
-      for (auto& pidStat : map) {
-        for (auto& cidStat : pidStat.second) {
-          cb(pidStat.first, cidStat.first, cidStat.second);
-        }
+      for (const auto& [key, value] : map) {
+        auto [pid, cid] = key;
+        cb(pid, cid, value);
       }
     };
 
@@ -253,40 +231,50 @@ struct Stats {
       }
     }
 
-    if (!backgroundEvictionClasses.empty() &&
-        backgndEvicStats.nEvictedItems > 0) {
-      out << "== Class Background Eviction Counters Map ==" << std::endl;
-      foreachAC(backgroundEvictionClasses,
-                [&](auto pid, auto cid, auto evicted) {
-                  out << folly::sformat("pid{:2} cid{:4} evicted: {:4}", pid,
-                                        cid, evicted)
-                      << std::endl;
-                });
-
-      out << folly::sformat("Background Evicted Items : {:,}",
-                            backgndEvicStats.nEvictedItems)
-          << std::endl;
-      out << folly::sformat("Background Evictor Traversals : {:,}",
-                            backgndEvicStats.nTraversals)
-          << std::endl;
+    size_t bgId = 1;
+    size_t totalBgEvicted = 0;
+    size_t totalBgPromoted = 0;
+    for (auto& bgWorkerStats : backgroundMoverStats) {
+      if (bgWorkerStats.numEvictedItems > 0 ||
+          bgWorkerStats.numPromotedItems > 0) {
+        out << folly::sformat(" == Background Mover {} Threads ==", bgId)
+            << std::endl;
+        if (bgWorkerStats.numEvictedItems > 0) {
+          out << folly::sformat("Evicted Items: {:,}",
+                                bgWorkerStats.numEvictedItems)
+              << std::endl;
+        }
+        if (bgWorkerStats.numPromotedItems > 0) {
+          out << folly::sformat("Promoted Items: {:,}",
+                                bgWorkerStats.numPromotedItems)
+              << std::endl;
+        }
+        out << folly::sformat(
+                   "Traversals: {:,}\n"
+                   "Run Count: {:,}\n"
+                   "Avg Time Per Traversal in ns: {:,}\n"
+                   "Avg Items Evicted: {:.2f}",
+                   bgWorkerStats.numTraversals, bgWorkerStats.runCount,
+                   bgWorkerStats.avgTraversalTimeNs,
+                   (double)bgWorkerStats.numEvictedItems /
+                       (double)bgWorkerStats.numTraversals)
+            << std::endl;
+        totalBgEvicted += bgWorkerStats.numEvictedItems;
+        totalBgPromoted += bgWorkerStats.numPromotedItems;
+        bgId++;
+      }
     }
 
-    if (!backgroundPromotionClasses.empty() &&
-        backgndPromoStats.nPromotedItems > 0) {
-      out << "== Class Background Promotion Counters Map ==" << std::endl;
-      foreachAC(backgroundPromotionClasses,
-                [&](auto pid, auto cid, auto promoted) {
-                  out << folly::sformat("pid{:2} cid{:4} promoted: {:4}", pid,
-                                        cid, promoted)
-                      << std::endl;
-                });
-
-      out << folly::sformat("Background Promoted Items : {:,}",
-                            backgndPromoStats.nPromotedItems)
-          << std::endl;
-      out << folly::sformat("Background Promoter Traversals : {:,}",
-                            backgndPromoStats.nTraversals)
-          << std::endl;
+    if (!backgroundMoverClasses.empty() &&
+        (totalBgEvicted || totalBgPromoted)) {
+      out << "== Per Class Background Movers Counters ==" << std::endl;
+      foreachAC(backgroundMoverClasses, [&](auto pid, auto cid, auto pair) {
+        if (pair.first > 0 || pair.second > 0) {
+          out << folly::sformat("pid{:2} cid{:4} evicted: {:4} promoted: {:4}",
+                                pid, cid, pair.first, pair.second)
+              << std::endl;
+        }
+      });
     }
 
     if (numNvmGets > 0 || numNvmDeletes > 0 || numNvmPuts > 0) {
@@ -426,6 +414,10 @@ struct Stats {
     if (numCacheEvictions > 0) {
       out << folly::sformat("Total eviction executed {}", numCacheEvictions)
           << std::endl;
+      if (totalBgEvicted) {
+        out << folly::sformat("Total background eviction executed {}", totalBgEvicted)
+            << std::endl;
+      }
     }
   }
 
diff --git a/cachelib/cachebench/util/CacheConfig.cpp b/cachelib/cachebench/util/CacheConfig.cpp
index 6d8f40874..f54bc6e12 100644
--- a/cachelib/cachebench/util/CacheConfig.cpp
+++ b/cachelib/cachebench/util/CacheConfig.cpp
@@ -90,6 +90,13 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) {
   JSONSetVal(configJson, deviceMaxWriteSize);
   JSONSetVal(configJson, deviceEnableFDP);
 
+  // Background mover related configs
+  JSONSetVal(configJson, backgroundMoverIntervalMilSec);
+  JSONSetVal(configJson, backgroundMoverThreads);
+  JSONSetVal(configJson, backgroundTargetFree);
+  JSONSetVal(configJson, backgroundEvictionBatch);
+  JSONSetVal(configJson, backgroundPromotionBatch);
+
   JSONSetVal(configJson, memoryOnlyTTL);
 
   JSONSetVal(configJson, usePosixShm);
@@ -112,7 +119,7 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) {
   // if you added new fields to the configuration, update the JSONSetVal
   // to make them available for the json configs and increment the size
   // below
-  checkCorrectSize<CacheConfig, 760>();
+  checkCorrectSize<CacheConfig, 800>();
 
   if (numPools != poolSizes.size()) {
     throw std::invalid_argument(folly::sformat(
diff --git a/cachelib/cachebench/util/CacheConfig.h b/cachelib/cachebench/util/CacheConfig.h
index 0a1569615..0b59f823b 100644
--- a/cachelib/cachebench/util/CacheConfig.h
+++ b/cachelib/cachebench/util/CacheConfig.h
@@ -243,6 +243,23 @@ struct CacheConfig : public JSONConfig {
   // Memory tiers configs
   std::vector<MemoryTierCacheConfig> memoryTierConfigs{};
 
+  // time interval to sleep in ms between runs of the background mover
+  size_t backgroundMoverIntervalMilSec{0};
+
+  // number of thread used by background mover
+  size_t backgroundMoverThreads{0};
+
+  // How much to keep the cache memory free. This is used by the background
+  // mover to decide when to evict items.
+  double backgroundTargetFree{0.02};
+
+  // The number of items to evict in each batch in the background mover
+  size_t backgroundEvictionBatch{10};
+
+  // The number of items to promote in each batch in the background mover
+  // only available when there are multiple memory tiers
+  size_t backgroundPromotionBatch{10};
+
   // If enabled, we will use the timestamps from the trace file in the ticker
   // so that the cachebench will observe time based on timestamps from the trace
   // instead of the system time.