diff --git a/cachelib/allocator/BackgroundMover.h b/cachelib/allocator/BackgroundMover.h index aee86a4e3..e951f1074 100644 --- a/cachelib/allocator/BackgroundMover.h +++ b/cachelib/allocator/BackgroundMover.h @@ -16,9 +16,8 @@ #pragma once -#include "cachelib/allocator/BackgroundMoverStrategy.h" +#include "cachelib/allocator/Cache.h" #include "cachelib/allocator/CacheStats.h" -#include "cachelib/common/AtomicCounter.h" #include "cachelib/common/PeriodicWorker.h" namespace facebook::cachelib { @@ -26,41 +25,54 @@ namespace facebook::cachelib { // needed for the cache api template struct BackgroundMoverAPIWrapper { - static size_t traverseAndEvictItems(C& cache, - unsigned int pid, - unsigned int cid, - size_t batch) { - return cache.traverseAndEvictItems(pid, cid, batch); + // traverse the cache and move items from one tier to another + // @param cache the cache interface + // @param pid the pool id to traverse + // @param cid the class id to traverse + // @param evictionBatch number of items to evict in one go + // @param promotionBatch number of items to promote in one go + // @return pair of number of items evicted and promoted + static std::pair traverseAndMoveItems(C& cache, + PoolId pid, + ClassId cid, + size_t evictionBatch, + size_t promotionBatch) { + return cache.traverseAndMoveItems(pid, cid, evictionBatch, promotionBatch); } - - static size_t traverseAndPromoteItems(C& cache, - unsigned int pid, - unsigned int cid, - size_t batch) { - return cache.traverseAndPromoteItems(pid, cid, batch); + static std::pair getApproxUsage(C& cache, + PoolId pid, + ClassId cid) { + const auto& pool = cache.getPool(pid); + // we wait until all slabs are allocated before we start evicting + if (!pool.allSlabsAllocated()) { + return {0, 0.0}; + } + return pool.getApproxUsage(cid); } }; -enum class MoverDir { Evict = 0, Promote }; - // Periodic worker that evicts items from tiers in batches // The primary aim is to reduce insertion times for new items in the // cache template class BackgroundMover : public PeriodicWorker { public: + using ClassBgStatsType = + std::map>; using Cache = CacheT; // @param cache the cache interface - // @param strategy the stragey class that defines how objects are - // moved (promoted vs. evicted and how much) + // @param evictionBatch number of items to evict in one go + // @param promotionBatch number of items to promote in one go + // @param targetFree target free percentage in the class BackgroundMover(Cache& cache, - std::shared_ptr strategy, - MoverDir direction_); + size_t evictionBatch, + size_t promotionBatch, + double targetFree); ~BackgroundMover() override; BackgroundMoverStats getStats() const noexcept; - std::map> getClassStats() const noexcept; + ClassBgStatsType getPerClassStats() const noexcept { return movesPerClass_; } void setAssignedMemory(std::vector&& assignedMemory); @@ -69,40 +81,75 @@ class BackgroundMover : public PeriodicWorker { static size_t workerId(PoolId pid, ClassId cid, size_t numWorkers); private: - std::map> movesPerClass_; + struct TraversalStats { + // record a traversal over all assigned classes + // and its time taken + void recordTraversalTime(uint64_t nsTaken); + + uint64_t getAvgTraversalTimeNs(uint64_t numTraversals) const; + uint64_t getMinTraversalTimeNs() const { return minTraversalTimeNs_; } + uint64_t getMaxTraversalTimeNs() const { return maxTraversalTimeNs_; } + uint64_t getLastTraversalTimeNs() const { return lastTraversalTimeNs_; } + + private: + // time it took us the last time to traverse the cache. + uint64_t lastTraversalTimeNs_{0}; + uint64_t minTraversalTimeNs_{std::numeric_limits::max()}; + uint64_t maxTraversalTimeNs_{0}; + uint64_t totalTraversalTimeNs_{0}; + }; + + TraversalStats traversalStats_; // cache allocator's interface for evicting using Item = typename Cache::Item; Cache& cache_; - std::shared_ptr strategy_; - MoverDir direction_; - - std::function moverFunc; + uint8_t numTiers_{1}; // until we have multi-tier support + size_t evictionBatch_{0}; + size_t promotionBatch_{0}; + double targetFree_{0.03}; // implements the actual logic of running the background evictor void work() override final; void checkAndRun(); - AtomicCounter numMovedItems_{0}; - AtomicCounter numTraversals_{0}; - AtomicCounter totalBytesMoved_{0}; + // populates the toFree map for each class with the number of items to free + std::map getNumItemsToFree( + const std::vector& assignedMemory); + + uint64_t numEvictedItems_{0}; + uint64_t numPromotedItems_{0}; + uint64_t numTraversals_{0}; + + ClassBgStatsType movesPerClass_; std::vector assignedMemory_; folly::DistributedMutex mutex_; }; template -BackgroundMover::BackgroundMover( - Cache& cache, - std::shared_ptr strategy, - MoverDir direction) - : cache_(cache), strategy_(strategy), direction_(direction) { - if (direction_ == MoverDir::Evict) { - moverFunc = BackgroundMoverAPIWrapper::traverseAndEvictItems; - - } else if (direction_ == MoverDir::Promote) { - moverFunc = BackgroundMoverAPIWrapper::traverseAndPromoteItems; - } +BackgroundMover::BackgroundMover(Cache& cache, + size_t evictionBatch, + size_t promotionBatch, + double targetFree) + : cache_(cache), + evictionBatch_(evictionBatch), + promotionBatch_(promotionBatch), + targetFree_(targetFree) {} + +template +void BackgroundMover::TraversalStats::recordTraversalTime( + uint64_t nsTaken) { + lastTraversalTimeNs_ = nsTaken; + minTraversalTimeNs_ = std::min(minTraversalTimeNs_, nsTaken); + maxTraversalTimeNs_ = std::max(maxTraversalTimeNs_, nsTaken); + totalTraversalTimeNs_ += nsTaken; +} + +template +uint64_t BackgroundMover::TraversalStats::getAvgTraversalTimeNs( + uint64_t numTraversals) const { + return numTraversals ? totalTraversalTimeNs_ / numTraversals : 0; } template @@ -132,50 +179,89 @@ void BackgroundMover::setAssignedMemory( }); } -// Look for classes that exceed the target memory capacity -// and return those for eviction +template +std::map +BackgroundMover::getNumItemsToFree( + const std::vector& assignedMemory) { + std::map toFree; + for (const auto& md : assignedMemory) { + const auto [pid, cid] = md; + const auto& pool = cache_.getPool(pid); + const auto [activeItems, usage] = + BackgroundMoverAPIWrapper::getApproxUsage(cache_, pid, cid); + if (usage < 1 - targetFree_) { + toFree[md] = 0; + } else { + size_t maxItems = activeItems / usage; + size_t targetItems = maxItems * (1 - targetFree_); + size_t toFreeItems = + activeItems > targetItems ? activeItems - targetItems : 0; + toFree[md] = toFreeItems; + } + } + return toFree; +} + template void BackgroundMover::checkAndRun() { auto assignedMemory = mutex_.lock_combine([this] { return assignedMemory_; }); - - unsigned int moves = 0; - auto batches = strategy_->calculateBatchSizes(cache_, assignedMemory); - - for (size_t i = 0; i < batches.size(); i++) { - const auto [pid, cid] = assignedMemory[i]; - const auto batch = batches[i]; - - if (batch == 0) { - continue; + auto toFree = getNumItemsToFree(assignedMemory); // calculate the number of + // items to free + while (true) { + bool allDone = true; + for (auto md : assignedMemory) { + const auto [pid, cid] = md; + size_t evictionBatch = evictionBatch_; + size_t promotionBatch = 0; // will enable with multi-tier support + if (toFree[md] == 0) { + // no eviction work to be done since there is already at least + // targetFree remaining in the class + evictionBatch = 0; + } else { + allDone = false; // we still have some items to free + } + if (promotionBatch + evictionBatch > 0) { + const auto begin = util::getCurrentTimeNs(); + // try moving BATCH items from the class in order to reach free target + auto moved = BackgroundMoverAPIWrapper::traverseAndMoveItems( + cache_, pid, cid, evictionBatch, promotionBatch); + numEvictedItems_ += moved.first; + toFree[md] > moved.first ? toFree[md] -= moved.first : toFree[md] = 0; + numPromotedItems_ += moved.second; + auto curr = movesPerClass_[md]; + curr.first += moved.first; + curr.second += moved.second; + movesPerClass_[md] = curr; + numTraversals_++; + auto end = util::getCurrentTimeNs(); + traversalStats_.recordTraversalTime(end > begin ? end - begin : 0); + } + } + if (shouldStopWork() || allDone) { + break; } - - // try moving BATCH items from the class in order to reach free target - auto moved = moverFunc(cache_, pid, cid, batch); - moves += moved; - movesPerClass_[pid][cid] += moved; - totalBytesMoved_.add(moved * cache_.getPool(pid).getAllocSizes()[cid]); } - - numTraversals_.inc(); - numMovedItems_.add(moves); } template BackgroundMoverStats BackgroundMover::getStats() const noexcept { BackgroundMoverStats stats; - stats.numMovedItems = numMovedItems_.get(); - stats.runCount = numTraversals_.get(); - stats.totalBytesMoved = totalBytesMoved_.get(); + stats.numEvictedItems = numEvictedItems_; + stats.numPromotedItems = numPromotedItems_; + stats.numTraversals = numTraversals_; + stats.runCount = getRunCount(); + stats.avgItemsMoved = + (double)(stats.numEvictedItems + stats.numPromotedItems) / + (double)numTraversals_; + stats.lastTraversalTimeNs = traversalStats_.getLastTraversalTimeNs(); + stats.avgTraversalTimeNs = + traversalStats_.getAvgTraversalTimeNs(numTraversals_); + stats.minTraversalTimeNs = traversalStats_.getMinTraversalTimeNs(); + stats.maxTraversalTimeNs = traversalStats_.getMaxTraversalTimeNs(); return stats; } -template -std::map> -BackgroundMover::getClassStats() const noexcept { - return movesPerClass_; -} - template size_t BackgroundMover::workerId(PoolId pid, ClassId cid, @@ -185,4 +271,4 @@ size_t BackgroundMover::workerId(PoolId pid, // TODO: came up with some better sharding (use hashing?) return (pid + cid) % numWorkers; } -} // namespace facebook::cachelib +}; // namespace facebook::cachelib diff --git a/cachelib/allocator/BackgroundMoverStrategy.h b/cachelib/allocator/BackgroundMoverStrategy.h deleted file mode 100644 index abf37edd1..000000000 --- a/cachelib/allocator/BackgroundMoverStrategy.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "cachelib/allocator/Cache.h" - -namespace facebook { -namespace cachelib { - -struct MemoryDescriptorType { - MemoryDescriptorType(PoolId pid, ClassId cid) : pid_(pid), cid_(cid) {} - PoolId pid_; - ClassId cid_; -}; - -// Base class for background eviction strategy. -class BackgroundMoverStrategy { - public: - // Calculate how many items should be moved by the background mover - // - // @param cache Cache allocator that implements CacheBase - // @param acVec vector of memory descriptors for which batch sizes should - // be calculated - // - // @return vector of batch sizes, where each element in the vector specifies - // batch size for the memory descriptor in acVec - virtual std::vector calculateBatchSizes( - const CacheBase& cache, std::vector acVec) = 0; - - virtual ~BackgroundMoverStrategy() = default; -}; - -} // namespace cachelib -} // namespace facebook diff --git a/cachelib/allocator/CMakeLists.txt b/cachelib/allocator/CMakeLists.txt index 6103cdc82..f94c8c90c 100644 --- a/cachelib/allocator/CMakeLists.txt +++ b/cachelib/allocator/CMakeLists.txt @@ -35,7 +35,6 @@ add_library (cachelib_allocator CCacheManager.cpp ContainerTypes.cpp FreeMemStrategy.cpp - FreeThresholdStrategy.cpp HitsPerSlabStrategy.cpp LruTailAgeStrategy.cpp MarginalHitsOptimizeStrategy.cpp diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h index e225ba8a0..5144f4f20 100644 --- a/cachelib/allocator/Cache.h +++ b/cachelib/allocator/Cache.h @@ -73,6 +73,21 @@ enum class DestructorContext { kRemovedFromNVM }; +// a tuple that describes the memory pool and allocation class +struct MemoryDescriptorType { + MemoryDescriptorType(PoolId pid, ClassId cid) : pid_(pid), cid_(cid) {} + PoolId pid_; + ClassId cid_; + + bool operator<(const MemoryDescriptorType& rhs) const { + return std::make_tuple(pid_, cid_) < std::make_tuple(rhs.pid_, rhs.cid_); + } + + bool operator==(const MemoryDescriptorType& rhs) const { + return std::make_tuple(pid_, cid_) == std::make_tuple(rhs.pid_, rhs.cid_); + } +}; + // A base class of cache exposing members and status agnostic of template type. class CacheBase { public: diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h index 8238ae2fe..dd074d87f 100644 --- a/cachelib/allocator/CacheAllocator.h +++ b/cachelib/allocator/CacheAllocator.h @@ -352,6 +352,43 @@ class CacheAllocator : public CacheBase { // if user-supplied SyncObj can fail. e.g. if a lock can timeout. virtual bool isValid() const { return true; } }; + + // For background worker stats + using ClassBgStatsType = + std::map>; + + // Movement (eviction/promotion) related data returned from + // function executed under mmContainer lock + struct MoveData { + MoveData() = delete; + MoveData(Item* candidate_, + Item* toRecycle_, + Item* toRecycleParent_, + bool chainedItem_, + bool expired_, + typename NvmCacheT::PutToken token_, + WriteHandle candidateHandle_) + : candidate(candidate_), + toRecycle(toRecycle_), + toRecycleParent(toRecycleParent_), + expired(expired_), + chainedItem(chainedItem_), + token(std::move(token_)), + candidateHandle(std::move(candidateHandle_)) {} + + // item that is candidate for eviction + Item* candidate; + // acutal alloc that will be recycled + // back up to allocator + Item* toRecycle; + // possible parent ref + Item* toRecycleParent; + bool expired; // is item expired + bool chainedItem; // is it a chained item + typename NvmCacheT::PutToken token; // put token for NVM cache + WriteHandle candidateHandle; // hande in case we don't use moving bit + }; + using ChainedItemMovingSync = std::function(Key)>; using AccessContainer = typename Item::AccessContainer; @@ -1083,25 +1120,12 @@ class CacheAllocator : public CacheBase { bool startNewReaper(std::chrono::milliseconds interval, util::Throttler::Config reaperThrottleConfig); - // start background promoter, starting/stopping of this worker - // should not be done concurrently with addPool - // @param interval the period this worker fires - // @param strategy strategy to promote items - // @param threads number of threads used by the worker - bool startNewBackgroundPromoter( - std::chrono::milliseconds interval, - std::shared_ptr strategy, - size_t threads); - - // start background evictor, starting/stopping of this worker - // should not be done concurrently with addPool - // @param interval the period this worker fires - // @param strategy strategy to evict items - // @param threads number of threads used by the worker - bool startNewBackgroundEvictor( - std::chrono::milliseconds interval, - std::shared_ptr strategy, - size_t threads); + // start background mover + bool startNewBackgroundMover(std::chrono::milliseconds interval, + size_t evictionBatch, + size_t promotionBatch, + double targetFree, + size_t threads); // Stop existing workers with a timeout bool stopPoolRebalancer(std::chrono::seconds timeout = std::chrono::seconds{ @@ -1111,10 +1135,8 @@ class CacheAllocator : public CacheBase { 0}); bool stopMemMonitor(std::chrono::seconds timeout = std::chrono::seconds{0}); bool stopReaper(std::chrono::seconds timeout = std::chrono::seconds{0}); - bool stopBackgroundEvictor( - std::chrono::seconds timeout = std::chrono::seconds{0}); - bool stopBackgroundPromoter( - std::chrono::seconds timeout = std::chrono::seconds{0}); + bool stopBackgroundMover(std::chrono::seconds timeout = std::chrono::seconds{ + 0}); // Set pool optimization to either true or false // @@ -1199,6 +1221,30 @@ class CacheAllocator : public CacheBase { return stats; } + // returns the background mover stats per thread + std::vector getBackgroundMoverStats() const { + auto stats = std::vector(); + for (auto& bg : backgroundMover_) { + stats.push_back(bg->getStats()); + } + return stats; + } + + ClassBgStatsType getBackgroundMoverClassStats() const { + ClassBgStatsType stats; + auto record = [&](auto& bg) { + // gives a unique descriptor + auto classStats = bg->getPerClassStats(); + for (const auto& [key, value] : classStats) { + stats[key] = value; + } + }; + for (auto& bg : backgroundMover_) { + record(bg); + } + return stats; + } + // returns the pool rebalancer stats RebalancerStats getRebalancerStats() const { auto stats = @@ -1486,8 +1532,7 @@ class CacheAllocator : public CacheBase { Key key, uint32_t size, uint32_t creationTime, - uint32_t expiryTime, - bool fromBgThread = false); + uint32_t expiryTime); // Allocate a chained item // @@ -1573,10 +1618,15 @@ class CacheAllocator : public CacheBase { // // @param oldItem Reference to the item being moved // @param newItemHdl Reference to the handle of the new item being moved into + // @param skipAddInMMContainer so we can tell if we should add in mmContainer + // or wait + // to do in batch // // @return true If the move was completed, and the containers were updated // successfully. - bool moveRegularItem(Item& oldItem, WriteHandle& newItemHdl); + bool moveRegularItem(Item& oldItem, + WriteHandle& newItemHdl, + bool skipAddInMMContainer = false); // template class for viewAsChainedAllocs that takes either ReadHandle or // WriteHandle @@ -1762,6 +1812,11 @@ class CacheAllocator : public CacheBase { std::pair getNextCandidate(PoolId pid, ClassId cid, unsigned int& searchTries); + // similiar to the above method but returns a batch of evicted items + // as a pair of vectors + std::vector getNextCandidates(PoolId pid, + ClassId cid, + uint32_t batch); using EvictionIterator = typename MMContainer::LockedIterator; @@ -1898,18 +1953,45 @@ class CacheAllocator : public CacheBase { // exposed for the background evictor to iterate through the memory and evict // in batch. This should improve insertion path for tiered memory config - size_t traverseAndEvictItems(unsigned int /* pid */, - unsigned int /* cid */, - size_t /* batch */) { - throw std::runtime_error("Not supported yet!"); - } - - // exposed for the background promoter to iterate through the memory and - // promote in batch. This should improve find latency - size_t traverseAndPromoteItems(unsigned int /* pid */, - unsigned int /* cid */, - size_t /* batch */) { - throw std::runtime_error("Not supported yet!"); + // promotion batch only applies to tiered memory config + std::pair traverseAndMoveItems(PoolId pid, + ClassId cid, + size_t evictionBatch, + size_t promotionBatch) { + auto& mmContainer = getMMContainer(pid, cid); + uint32_t currItems = mmContainer.size(); + if (currItems < evictionBatch) { + evictionBatch = currItems; + } + if (evictionBatch == 0) { + return {0, 0}; + } + auto evictionData = getNextCandidates(pid, cid, evictionBatch); + // we now have a list of candidates and toRecycles, they should go back + // to the allocator and we will do this in batch to avoid AC lock contention + // note - for chained items - we can't do this in bulk + std::vector chainedIdx; + std::vector toRecycles; + size_t idx = 0; + for (auto& data : evictionData) { + if (data.chainedItem) { + chainedIdx.push_back(idx); + } else { + toRecycles.push_back(data.candidate); + } + idx++; + } + for (int i = 0; i < chainedIdx.size(); i++) { + auto& data = evictionData[chainedIdx[i]]; + releaseBackToAllocator(*data.candidate, RemoveContext::kNormal, false, + data.toRecycle); + evictionData.erase(evictionData.begin() + chainedIdx[i]); + (*stats_.chainedItemEvictions)[pid][cid].inc(); + } + allocator_->freeBatch(toRecycles.begin(), toRecycles.end(), pid, cid); + size_t evictions = toRecycles.size(); + (*stats_.regularItemEvictions)[pid][cid].add(evictions); + return {evictions, 0}; } // returns true if nvmcache is enabled and we should write this item to @@ -2087,44 +2169,6 @@ class CacheAllocator : public CacheBase { : false; } - // returns the background mover stats - BackgroundMoverStats getBackgroundMoverStats(MoverDir direction) const { - auto stats = BackgroundMoverStats{}; - if (direction == MoverDir::Evict) { - for (auto& bg : backgroundEvictor_) - stats += bg->getStats(); - } else if (direction == MoverDir::Promote) { - for (auto& bg : backgroundPromoter_) - stats += bg->getStats(); - } - return stats; - } - - std::map> getBackgroundMoverClassStats( - MoverDir direction) const { - std::map> stats; - - if (direction == MoverDir::Evict) { - for (auto& bg : backgroundEvictor_) { - for (auto& pid : bg->getClassStats()) { - for (auto& cid : pid.second) { - stats[pid.first][cid.first] += cid.second; - } - } - } - } else if (direction == MoverDir::Promote) { - for (auto& bg : backgroundPromoter_) { - for (auto& pid : bg->getClassStats()) { - for (auto& cid : pid.second) { - stats[pid.first][cid.first] += cid.second; - } - } - } - } - - return stats; - } - bool tryGetHandleWithWaitContextForMovingItem(Item& item, WriteHandle& handle); @@ -2285,9 +2329,8 @@ class CacheAllocator : public CacheBase { // free memory monitor std::unique_ptr memMonitor_; - // background evictor - std::vector>> backgroundEvictor_; - std::vector>> backgroundPromoter_; + // background data movement, for single tier, this just evicts + std::vector>> backgroundMover_; // check whether a pool is a slabs pool std::array isCompactCachePool_{}; @@ -2612,16 +2655,12 @@ void CacheAllocator::initWorkers() { config_.ccacheOptimizeStepSizePercent); } - if (config_.backgroundEvictorEnabled()) { - startNewBackgroundEvictor(config_.backgroundEvictorInterval, - config_.backgroundEvictorStrategy, - config_.backgroundEvictorThreads); - } - - if (config_.backgroundPromoterEnabled()) { - startNewBackgroundPromoter(config_.backgroundPromoterInterval, - config_.backgroundPromoterStrategy, - config_.backgroundPromoterThreads); + if (config_.backgroundMoverEnabled()) { + startNewBackgroundMover(config_.backgroundMoverInterval, + config_.backgroundEvictionBatch, + config_.backgroundPromotionBatch, + config_.backgroundTargetFree, + config_.backgroundMoverThreads); } } @@ -2719,8 +2758,7 @@ CacheAllocator::allocateInternal(PoolId pid, typename Item::Key key, uint32_t size, uint32_t creationTime, - uint32_t expiryTime, - bool fromBgThread) { + uint32_t expiryTime) { util::LatencyTracker tracker{stats().allocateLatency_}; SCOPE_FAIL { stats_.invalidAllocs.inc(); }; @@ -2735,13 +2773,6 @@ CacheAllocator::allocateInternal(PoolId pid, void* memory = allocator_->allocate(pid, requiredSize); - if (backgroundEvictor_.size() && !fromBgThread && - (memory == nullptr || shouldWakeupBgEvictor(pid, cid))) { - backgroundEvictor_[BackgroundMover::workerId( - pid, cid, backgroundEvictor_.size())] - ->wakeUp(); - } - if (memory == nullptr) { memory = findEviction(pid, cid); } @@ -3575,7 +3606,8 @@ void CacheAllocator::wakeUpWaiters(folly::StringPiece key, template bool CacheAllocator::moveRegularItem(Item& oldItem, - WriteHandle& newItemHdl) { + WriteHandle& newItemHdl, + bool skipAddInMMContainer) { XDCHECK(oldItem.isMoving()); // If an item is expired, proceed to eviction. if (oldItem.isExpired()) { @@ -3605,8 +3637,12 @@ bool CacheAllocator::moveRegularItem(Item& oldItem, // Adding the item to mmContainer has to succeed since no one can remove the // item auto& newContainer = getMMContainer(*newItemHdl); - auto mmContainerAdded = newContainer.add(*newItemHdl); - XDCHECK(mmContainerAdded); + if (!skipAddInMMContainer) { + // Adding the item to mmContainer has to succeed since no one can remove the + // item + auto mmContainerAdded = newContainer.add(*newItemHdl); + XDCHECK(mmContainerAdded); + } if (oldItem.hasChainedItem()) { XDCHECK(!newItemHdl->hasChainedItem()) << newItemHdl->toString(); @@ -3808,6 +3844,116 @@ CacheAllocator::getNextCandidate(PoolId pid, return {candidate, toRecycle}; } +// Used for the background movers to get a batch of items +// to move/evict +template +std::vector::MoveData> +CacheAllocator::getNextCandidates(PoolId pid, + ClassId cid, + uint32_t batch) { + std::vector evictionData; + evictionData.reserve(batch); + + auto& mmContainer = getMMContainer(pid, cid); + unsigned int maxSearchTries = + std::max(config_.evictionSearchTries, batch * 2); + + mmContainer.withEvictionIterator([this, pid, cid, batch, maxSearchTries, + &evictionData, &mmContainer](auto&& itr) { + unsigned int searchTries = 0; + if (!itr) { + ++searchTries; + (*stats_.evictionAttempts)[pid][cid].inc(); + return; + } + + while ((config_.evictionSearchTries == 0 || maxSearchTries > searchTries) && + itr && evictionData.size() < batch) { + ++searchTries; + (*stats_.evictionAttempts)[pid][cid].inc(); + + auto* toRecycle_ = itr.get(); + bool isChained_ = toRecycle_->isChainedItem(); + auto* candidate_ = + isChained_ ? &toRecycle_->asChainedItem().getParentItem(compressor_) + : toRecycle_; + + typename NvmCacheT::PutToken putToken{}; + const bool evictToNvmCache = shouldWriteToNvmCache(*candidate_); + + auto markForEviction = [&candidate_, this]() { + auto markedForEviction = candidate_->markForEviction(); + if (!markedForEviction) { + if (candidate_->hasChainedItem()) { + stats_.evictFailParentAC.inc(); + } else { + stats_.evictFailAC.inc(); + } + return false; + } + return true; + }; + + if (evictToNvmCache) { + auto putTokenRv = nvmCache_->createPutToken( + candidate_->getKey(), + [&markForEviction]() { return markForEviction(); }); + + if (!putTokenRv) { + switch (putTokenRv.error()) { + case InFlightPuts::PutTokenError::TRY_LOCK_FAIL: + stats_.evictFailPutTokenLock.inc(); + break; + case InFlightPuts::PutTokenError::TOKEN_EXISTS: + stats_.evictFailConcurrentFill.inc(); + break; + case InFlightPuts::PutTokenError::CALLBACK_FAILED: + stats_.evictFailConcurrentAccess.inc(); + break; + } + ++itr; + continue; + } + putToken = std::move(*putTokenRv); + XDCHECK(putToken.isValid()); + } else { + if (!markForEviction()) { + ++itr; + continue; + } + } + + // markForEviction to make sure no other thead is evicting the item + // nor holding a handle to that item + + // Check if parent changed for chained items - if yes, we cannot + // remove the child from the mmContainer as we will not be evicting + // it. We could abort right here, but we need to cleanup in case + // unmarkForEviction() returns 0 - so just go through normal path. + if (!toRecycle_->isChainedItem() || + &toRecycle_->asChainedItem().getParentItem(compressor_) == + candidate_) { + mmContainer.remove(itr); + MoveData moveData(candidate_, toRecycle_, nullptr, isChained_, + candidate_->isExpired(), std::move(putToken), + nullptr); + evictionData.push_back(std::move(moveData)); + } + } + }); + + for (auto& moveData : evictionData) { + Item* candidate = moveData.candidate; + unlinkItemForEviction(*moveData.candidate); + if (moveData.token.isValid() && + shouldWriteToNvmCacheExclusive(*moveData.candidate)) { + nvmCache_->put(*moveData.candidate, std::move(moveData.token)); + } + } + + return evictionData; +} + template typename CacheAllocator::Item* CacheAllocator::findEviction(PoolId pid, ClassId cid) { @@ -4574,20 +4720,12 @@ PoolId CacheAllocator::addPool( setRebalanceStrategy(pid, std::move(rebalanceStrategy)); setResizeStrategy(pid, std::move(resizeStrategy)); - if (backgroundEvictor_.size()) { - auto memoryAssignments = - createBgWorkerMemoryAssignments(backgroundEvictor_.size()); - for (size_t id = 0; id < backgroundEvictor_.size(); id++) - backgroundEvictor_[id]->setAssignedMemory( - std::move(memoryAssignments[id])); - } - - if (backgroundPromoter_.size()) { + if (backgroundMover_.size()) { auto memoryAssignments = - createBgWorkerMemoryAssignments(backgroundPromoter_.size()); - for (size_t id = 0; id < backgroundPromoter_.size(); id++) - backgroundPromoter_[id]->setAssignedMemory( - std::move(memoryAssignments[id])); + createBgWorkerMemoryAssignments(backgroundMover_.size()); + for (size_t id = 0; id < backgroundMover_.size(); id++) { + backgroundMover_[id]->setAssignedMemory(std::move(memoryAssignments[id])); + } } return pid; @@ -5112,8 +5250,7 @@ CacheAllocator::allocateNewItemForOldItem(const Item& oldItem) { oldItem.getKey(), oldItem.getSize(), oldItem.getCreationTime(), - oldItem.getExpiryTime(), - false); + oldItem.getExpiryTime()); if (!newItemHdl) { return {}; } @@ -5434,8 +5571,7 @@ bool CacheAllocator::stopWorkers(std::chrono::seconds timeout) { success &= stopPoolResizer(timeout); success &= stopMemMonitor(timeout); success &= stopReaper(timeout); - success &= stopBackgroundEvictor(timeout); - success &= stopBackgroundPromoter(timeout); + success &= stopBackgroundMover(timeout); return success; } @@ -5694,8 +5830,7 @@ GlobalCacheStats CacheAllocator::getGlobalCacheStats() const { ret.nvmCacheEnabled = nvmCache_ ? nvmCache_->isEnabled() : false; ret.reaperStats = getReaperStats(); ret.rebalancerStats = getRebalancerStats(); - ret.evictionStats = getBackgroundMoverStats(MoverDir::Evict); - ret.promotionStats = getBackgroundMoverStats(MoverDir::Promote); + ret.moverStats = getBackgroundMoverStats(); ret.numActiveHandles = getNumActiveHandles(); ret.isNewRamCache = cacheCreationTime_ == cacheInstanceCreationTime_; @@ -5881,47 +6016,23 @@ auto CacheAllocator::createBgWorkerMemoryAssignments( } template -bool CacheAllocator::startNewBackgroundEvictor( +bool CacheAllocator::startNewBackgroundMover( std::chrono::milliseconds interval, - std::shared_ptr strategy, + size_t evictionBatch, + size_t promotionBatch, + double targetFree, size_t threads) { XDCHECK(threads > 0); - backgroundEvictor_.resize(threads); + backgroundMover_.resize(threads); bool result = true; - auto memoryAssignments = createBgWorkerMemoryAssignments(threads); for (size_t i = 0; i < threads; i++) { - auto ret = startNewWorker("BackgroundEvictor" + std::to_string(i), - backgroundEvictor_[i], interval, *this, strategy, - MoverDir::Evict); + auto ret = startNewWorker("BackgroundMover" + std::to_string(i), + backgroundMover_[i], interval, *this, + evictionBatch, promotionBatch, targetFree); result = result && ret; - if (result) { - backgroundEvictor_[i]->setAssignedMemory(std::move(memoryAssignments[i])); - } - } - return result; -} - -template -bool CacheAllocator::startNewBackgroundPromoter( - std::chrono::milliseconds interval, - std::shared_ptr strategy, - size_t threads) { - XDCHECK(threads > 0); - backgroundPromoter_.resize(threads); - bool result = true; - - auto memoryAssignments = createBgWorkerMemoryAssignments(threads); - for (size_t i = 0; i < threads; i++) { - auto ret = startNewWorker("BackgroundPromoter" + std::to_string(i), - backgroundPromoter_[i], interval, *this, strategy, - MoverDir::Promote); - result = result && ret; - - if (result) { - backgroundPromoter_[i]->setAssignedMemory( - std::move(memoryAssignments[i])); + backgroundMover_[i]->setAssignedMemory(std::move(memoryAssignments[i])); } } return result; @@ -5976,23 +6087,11 @@ bool CacheAllocator::stopReaper(std::chrono::seconds timeout) { } template -bool CacheAllocator::stopBackgroundEvictor( - std::chrono::seconds timeout) { - bool result = true; - for (size_t i = 0; i < backgroundEvictor_.size(); i++) { - auto ret = stopWorker("BackgroundEvictor", backgroundEvictor_[i], timeout); - result = result && ret; - } - return result; -} - -template -bool CacheAllocator::stopBackgroundPromoter( +bool CacheAllocator::stopBackgroundMover( std::chrono::seconds timeout) { bool result = true; - for (size_t i = 0; i < backgroundPromoter_.size(); i++) { - auto ret = - stopWorker("BackgroundPromoter", backgroundPromoter_[i], timeout); + for (size_t i = 0; i < backgroundMover_.size(); i++) { + auto ret = stopWorker("BackgroundMover", backgroundMover_[i], timeout); result = result && ret; } return result; diff --git a/cachelib/allocator/CacheAllocatorConfig.h b/cachelib/allocator/CacheAllocatorConfig.h index 59d659f6f..34f90f9a6 100644 --- a/cachelib/allocator/CacheAllocatorConfig.h +++ b/cachelib/allocator/CacheAllocatorConfig.h @@ -27,7 +27,6 @@ #include #include -#include "cachelib/allocator/BackgroundMoverStrategy.h" #include "cachelib/allocator/Cache.h" #include "cachelib/allocator/MM2Q.h" #include "cachelib/allocator/MemoryMonitor.h" @@ -285,16 +284,13 @@ class CacheAllocatorConfig { std::chrono::seconds ccacheInterval, uint32_t ccacheStepSizePercent); - // Enable the background evictor - scans a tier to look for objects - // to evict to the next tier - CacheAllocatorConfig& enableBackgroundEvictor( - std::shared_ptr backgroundMoverStrategy, - std::chrono::milliseconds regularInterval, - size_t threads); - - CacheAllocatorConfig& enableBackgroundPromoter( - std::shared_ptr backgroundMoverStrategy, + // Enable the background moveor - scans a tier to look for objects + // to move to the next tier or just evict if single tier. + CacheAllocatorConfig& enableBackgroundMover( std::chrono::milliseconds regularInterval, + size_t evictionBatch, + size_t promotionBatch, + double targetFree, size_t threads); // This enables an optimization for Pool rebalancing and resizing. @@ -371,15 +367,9 @@ class CacheAllocatorConfig { poolOptimizeStrategy != nullptr; } - // @return whether background evictor thread is enabled - bool backgroundEvictorEnabled() const noexcept { - return backgroundEvictorInterval.count() > 0 && - backgroundEvictorStrategy != nullptr; - } - - bool backgroundPromoterEnabled() const noexcept { - return backgroundPromoterInterval.count() > 0 && - backgroundPromoterStrategy != nullptr; + // @return whether background mover thread is enabled + bool backgroundMoverEnabled() const noexcept { + return backgroundMoverInterval.count() > 0 && backgroundMoverThreads > 0; } // @return whether memory monitor is enabled @@ -496,25 +486,21 @@ class CacheAllocatorConfig { // make any progress for the below threshold std::chrono::milliseconds slabReleaseStuckThreshold{std::chrono::seconds(60)}; - // the background eviction strategy to be used - std::shared_ptr backgroundEvictorStrategy{nullptr}; - - // the background promotion strategy to be used - std::shared_ptr backgroundPromoterStrategy{nullptr}; - - // time interval to sleep between runs of the background evictor - std::chrono::milliseconds backgroundEvictorInterval{ + // time interval to sleep between runs of the background mover + std::chrono::milliseconds backgroundMoverInterval{ std::chrono::milliseconds{1000}}; - // time interval to sleep between runs of the background promoter - std::chrono::milliseconds backgroundPromoterInterval{ - std::chrono::milliseconds{1000}}; - - // number of thread used by background evictor - size_t backgroundEvictorThreads{1}; + // number of thread used by background mover + size_t backgroundMoverThreads{1}; - // number of thread used by background promoter - size_t backgroundPromoterThreads{1}; + // How much to keep the cache memory free. This is used by the background + // mover to decide when to evict items. + double backgroundTargetFree{0.02}; + // The number of items to evict in each batch in the background mover + size_t backgroundEvictionBatch{10}; + // The number of items to promote in each batch in the background mover + // only available when there are multiple memory tiers + size_t backgroundPromotionBatch{0}; // time interval to sleep between iterations of pool size optimization, // for regular pools and compact caches @@ -1016,24 +1002,17 @@ CacheAllocatorConfig& CacheAllocatorConfig::enablePoolRebalancing( } template -CacheAllocatorConfig& CacheAllocatorConfig::enableBackgroundEvictor( - std::shared_ptr strategy, - std::chrono::milliseconds interval, - size_t evictorThreads) { - backgroundEvictorStrategy = strategy; - backgroundEvictorInterval = interval; - backgroundEvictorThreads = evictorThreads; - return *this; -} - -template -CacheAllocatorConfig& CacheAllocatorConfig::enableBackgroundPromoter( - std::shared_ptr strategy, +CacheAllocatorConfig& CacheAllocatorConfig::enableBackgroundMover( std::chrono::milliseconds interval, - size_t promoterThreads) { - backgroundPromoterStrategy = strategy; - backgroundPromoterInterval = interval; - backgroundPromoterThreads = promoterThreads; + size_t evictionBatch, + size_t promotionBatch, + double targetFree, + size_t moverThreads) { + backgroundMoverInterval = interval; + backgroundEvictionBatch = evictionBatch; + backgroundPromotionBatch = promotionBatch; + backgroundTargetFree = targetFree; + backgroundMoverThreads = moverThreads; return *this; } diff --git a/cachelib/allocator/CacheStats.h b/cachelib/allocator/CacheStats.h index 3692e55b4..d21400770 100644 --- a/cachelib/allocator/CacheStats.h +++ b/cachelib/allocator/CacheStats.h @@ -308,21 +308,29 @@ struct RebalancerStats { // Mover Stats struct BackgroundMoverStats { // the number of items this worker moved by looking at pools/classes stats - uint64_t numMovedItems{0}; - // number of times we went executed the thread //TODO: is this def correct? + uint64_t numEvictedItems{0}; + uint64_t numPromotedItems{0}; + + // number of times we went executed the thread (by periodic worker) uint64_t runCount{0}; - // total number of classes - uint64_t totalClasses{0}; - // eviction size - uint64_t totalBytesMoved{0}; - - BackgroundMoverStats& operator+=(const BackgroundMoverStats& rhs) { - numMovedItems += rhs.numMovedItems; - runCount += rhs.runCount; - totalClasses += rhs.totalClasses; - totalBytesMoved += rhs.totalBytesMoved; - return *this; - } + + // average number of items moved per run + double avgItemsMoved{0.0}; + + // number of times we actually traversed the mmContainer + uint64_t numTraversals{0}; + + // indicates the time in ns for the last iteration + uint64_t lastTraversalTimeNs{0}; + + // indicates the maximum of all traversals + uint64_t minTraversalTimeNs{0}; + + // indicates the minimum of all traversals + uint64_t maxTraversalTimeNs{0}; + + // indicates the average of all traversals + uint64_t avgTraversalTimeNs{0}; }; // CacheMetadata type to export @@ -345,10 +353,8 @@ struct Stats; // Stats that apply globally in cache and // the ones that are aggregated over all pools struct GlobalCacheStats { - // background eviction stats - BackgroundMoverStats evictionStats; - - BackgroundMoverStats promotionStats; + // background mover stats per each mover thread + std::vector moverStats; // number of calls to CacheAllocator::find uint64_t numCacheGets{0}; diff --git a/cachelib/allocator/FreeThresholdStrategy.cpp b/cachelib/allocator/FreeThresholdStrategy.cpp deleted file mode 100644 index f4afbd78f..000000000 --- a/cachelib/allocator/FreeThresholdStrategy.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "cachelib/allocator/FreeThresholdStrategy.h" - -namespace facebook::cachelib { - -FreeThresholdStrategy::FreeThresholdStrategy(double lowEvictionAcWatermark, - double highEvictionAcWatermark, - uint64_t maxEvictionBatch, - uint64_t minEvictionBatch) - : lowEvictionAcWatermark(lowEvictionAcWatermark), - highEvictionAcWatermark(highEvictionAcWatermark), - maxEvictionBatch(maxEvictionBatch), - minEvictionBatch(minEvictionBatch) {} - -std::vector FreeThresholdStrategy::calculateBatchSizes( - const CacheBase& /* cache */, - std::vector /* acVec */) { - throw std::runtime_error("Not supported yet!"); -} - -} // namespace facebook::cachelib diff --git a/cachelib/allocator/FreeThresholdStrategy.h b/cachelib/allocator/FreeThresholdStrategy.h deleted file mode 100644 index 13a2ac40d..000000000 --- a/cachelib/allocator/FreeThresholdStrategy.h +++ /dev/null @@ -1,56 +0,0 @@ -// @lint-ignore-every CLANGTIDY clang-diagnostic-unused-private-field - -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "cachelib/allocator/BackgroundMoverStrategy.h" -#include "cachelib/allocator/Cache.h" - -namespace facebook { -namespace cachelib { - -// Free threshold strategy for background promotion worker. -// This strategy tries to keep certain percent of memory free -// at all times. -class FreeThresholdStrategy : public BackgroundMoverStrategy { - public: - FreeThresholdStrategy(double lowEvictionAcWatermark, - double highEvictionAcWatermark, - uint64_t maxEvictionBatch, - uint64_t minEvictionBatch); - ~FreeThresholdStrategy() {} - - std::vector calculateBatchSizes( - const CacheBase& cache, std::vector acVecs); - - private: -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunused-private-field" -#endif - double lowEvictionAcWatermark{2.0}; - double highEvictionAcWatermark{5.0}; - uint64_t maxEvictionBatch{40}; - uint64_t minEvictionBatch{5}; -#if defined(__clang__) -#pragma clang diagnostic pop -#endif -}; - -} // namespace cachelib -} // namespace facebook diff --git a/cachelib/allocator/MM2Q.h b/cachelib/allocator/MM2Q.h index cece17e0e..3bf76b018 100644 --- a/cachelib/allocator/MM2Q.h +++ b/cachelib/allocator/MM2Q.h @@ -66,6 +66,7 @@ class MM2Q { enum LruType { Warm, WarmTail, Hot, Cold, ColdTail, NumTypes }; // Config class for MM2Q + // TODO: implement support for useCombinedLockForIterators struct Config { // Create from serialized config explicit Config(SerializationConfigType configState) @@ -460,6 +461,18 @@ class MM2Q { // is unchanged. bool add(T& node) noexcept; + // helper function to add the node under the container lock + void addNodeLocked(T& node, const Time& currTime); + + // adds the given nodes into the container and marks each as being present + // in the container. The nodes are added to the head of the lru. + // + // @param vector of nodes The nodes to be added to the container. + // @return number of nodes added - it is up to user to verify all + // expected nodes have been added. + template + uint32_t addBatch(It begin, It end) noexcept; + // removes the node from the lru and sets it previous and next to nullptr. // // @param node The node to be removed from the container. @@ -500,6 +513,11 @@ class MM2Q { template void withEvictionIterator(F&& f); + // Execute provided function under container lock. Function gets + // iterator passed as parameter. + template + void withPromotionIterator(F&& f); + // Execute provided function under container lock. template void withContainerLock(F&& f); @@ -889,16 +907,41 @@ bool MM2Q::Container::add(T& node) noexcept { if (node.isInMMContainer()) { return false; } + addNodeLocked(node, currTime); + return true; + }); +} - markHot(node); - unmarkCold(node); - unmarkTail(node); - lru_.getList(LruType::Hot).linkAtHead(node); - rebalance(); +// adds the node to the list assuming not in +// container and holding container lock +template T::*HookPtr> +void MM2Q::Container::addNodeLocked(T& node, const Time& currTime) { + XDCHECK(!node.isInMMContainer()); + markHot(node); + unmarkCold(node); + unmarkTail(node); + lru_.getList(LruType::Hot).linkAtHead(node); + rebalance(); + + node.markInMMContainer(); + setUpdateTime(node, currTime); +} - node.markInMMContainer(); - setUpdateTime(node, currTime); - return true; +template T::*HookPtr> +template +uint32_t MM2Q::Container::addBatch(It begin, It end) noexcept { + const auto currTime = static_cast