From c43227abb2570db0bc7b0af52b440bd6c7b476a0 Mon Sep 17 00:00:00 2001 From: Helen Date: Tue, 20 Aug 2024 14:59:15 -0400 Subject: [PATCH 1/3] added sieve implementation and relevant config changes --- cachelib/allocator/CacheAllocator.cpp | 1 + cachelib/allocator/CacheAllocator.h | 8 + cachelib/allocator/CacheTraits.h | 6 + cachelib/allocator/MMSieve.h | 561 ++++++++++++++++++ cachelib/allocator/datastruct/SieveList.h | 448 ++++++++++++++ .../datastruct/serialize/objects.thrift | 7 + cachelib/allocator/serialize/objects.thrift | 19 + 7 files changed, 1050 insertions(+) create mode 100755 cachelib/allocator/MMSieve.h create mode 100755 cachelib/allocator/datastruct/SieveList.h diff --git a/cachelib/allocator/CacheAllocator.cpp b/cachelib/allocator/CacheAllocator.cpp index 010a1e9597..122ebe6c2e 100644 --- a/cachelib/allocator/CacheAllocator.cpp +++ b/cachelib/allocator/CacheAllocator.cpp @@ -21,4 +21,5 @@ template class CacheAllocator; template class CacheAllocator; template class CacheAllocator; template class CacheAllocator; +template class CacheAllocator; } // namespace facebook::cachelib diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h index 94c2fdf9e8..2164d0af4e 100644 --- a/cachelib/allocator/CacheAllocator.h +++ b/cachelib/allocator/CacheAllocator.h @@ -5980,4 +5980,12 @@ using Lru2QAllocator = CacheAllocator; // inserted items. And eventually it will onl admit items that are accessed // beyond a threshold into the warm cache. using TinyLFUAllocator = CacheAllocator; + + +// CacheAllocator with Sieve eviction policy +// It uses the access bit keep track of item's popularity +// During eviction, the hand ptr sweeps backward to find itme with access bit turned off, +// and turns off access bit as it goes. +using SieveAllocator = CacheAllocator; + } // namespace facebook::cachelib diff --git a/cachelib/allocator/CacheTraits.h b/cachelib/allocator/CacheTraits.h index 15e68b9dcc..9994a64436 100644 --- a/cachelib/allocator/CacheTraits.h +++ b/cachelib/allocator/CacheTraits.h @@ -55,5 +55,11 @@ struct TinyLFUCacheTrait { using AccessTypeLocks = SharedMutexBuckets; }; +struct SieveCacheTrait { + using MMType = MMTinyLFU; + using AccessType = ChainedHashTable; + using AccessTypeLocks = SharedMutexBuckets; +}; + } // namespace cachelib } // namespace facebook diff --git a/cachelib/allocator/MMSieve.h b/cachelib/allocator/MMSieve.h new file mode 100755 index 0000000000..2ed56573d3 --- /dev/null +++ b/cachelib/allocator/MMSieve.h @@ -0,0 +1,561 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#include +#pragma GCC diagnostic pop +#include +#include +#include + +#include "cachelib/allocator/Cache.h" +#include "cachelib/allocator/CacheStats.h" +#include "cachelib/allocator/Util.h" +#include "cachelib/allocator/datastruct/SieveList.h" +#include "cachelib/allocator/memory/serialize/gen-cpp2/objects_types.h" +#include "cachelib/common/CompilerUtils.h" +#include "cachelib/common/Mutex.h" +#include + + +namespace facebook::cachelib { + +class MMSieve { + public: + // unique identifier per MMType + static const int kId; + + // forward declaration;serialize/gen-cpp2/objects_types.h + template + using Hook = SieveListHook; + using SerializationType = serialization::MMSieveObject; + using SerializationConfigType = serialization::MMSieveConfig; + using SerializationTypeContainer = serialization::MMSieveCollection; + + // This is not applicable for MMSieve, just for compile of cache allocator + enum LruType { NumTypes }; + + // Config class for MMSieve + struct Config { + // create from serialized config + explicit Config(SerializationConfigType configState) + : Config( + *configState.updateOnWrite(), + *configState.updateOnRead() + ) {} + + // @param udpateOnW whether to set visit bit for the item on write + // @param updateOnR whether to set visit bit for the item on read + Config(bool updateOnW, bool updateOnR) + : updateOnWrite(updateOnW), + updateOnRead(updateOnR), + useCombinedLockForIterators(false) {} + + // @param udpateOnW whether to set visit bit for the item on write + // @param updateOnR whether to set visit bit for the item on read + // useCombinedLockForIterators Whether to use combined locking for + // withEvictionIterator + Config(bool updateOnW, + bool updateOnR, + bool useCombinedLockForIterators) + : updateOnWrite(updateOnW), + updateOnRead(updateOnR), + useCombinedLockForIterators(useCombinedLockForIterators) {} + + Config() = default; + Config(const Config& rhs) = default; + Config(Config&& rhs) = default; + + Config& operator=(const Config& rhs) = default; + Config& operator=(Config&& rhs) = default; + + template + void addExtraConfig(Args...) {} + + + // Sieve will always set visit bit on write as we there's no locking involved. + bool updateOnWrite{true}; + + // whether the sieve needs to be updated on reads for recordAccess. If + // false, accessing the cache for reads does not promote the cached item + // to the head of the sieve. + bool updateOnRead{true}; + + // Minimum interval between reconfigurations. If 0, reconfigure is never + // called. + std::chrono::seconds mmReconfigureIntervalSecs{0}; + + // Whether to use combined locking for withEvictionIterator. + bool useCombinedLockForIterators{false}; + }; + + // The container object which can be used to keep track of objects of type + // T. T must have a public member of type Hook. This object is wrapper + // around SieveList, is thread safe and can be accessed from multiple threads. + // The current implementation models an SIEVE using the above SieveList + // implementation. + template T::*HookPtr> + struct Container { + private: + using SIEVEList = SieveList; + using Mutex = folly::DistributedMutex; + using LockHolder = std::unique_lock; + using PtrCompressor = typename T::PtrCompressor; + using Time = typename Hook::Time; + using CompressedPtr = typename T::CompressedPtr; + using RefFlags = typename T::Flags; + + public: + Container() = default; + Container(Config c, PtrCompressor compressor) + : compressor_(std::move(compressor)), + queue_(compressor_), + config_(std::move(c)) { + } + Container(serialization::MMSieveObject object, PtrCompressor compressor); + + Container(const Container&) = delete; + Container& operator=(const Container&) = delete; + + using Iterator = typename SIEVEList::Iterator; + + // context for iterating the MM container. At any given point of time, + // there can be only one iterator active since we need to lock the LRU for + // iteration. we can support multiple iterators at same time, by using a + // shared ptr in the context for the lock holder in the future. + class LockedIterator : public Iterator { + public: + // noncopyable but movable. + LockedIterator(const LockedIterator&) = delete; + LockedIterator& operator=(const LockedIterator&) = delete; + + LockedIterator(LockedIterator&&) noexcept = default; + + private: + // private because it's easy to misuse and cause deadlock for MMSieve + LockedIterator& operator=(LockedIterator&&) noexcept = default; + + // create an sieve iterator with the lock being held. + LockedIterator(LockHolder l, const Iterator& iter) noexcept; + + // only the container can create iterators + friend Container; + + // lock protecting the validity of the iterator + LockHolder l_; + }; + + // records the information that the node was accessed. + // accessed node remains where they are, so no locking required. + // @param node node that we want to mark as relevant/accessed + // @param mode the mode for the access operation. + // + // @return True if the information is recorded and bumped the node + // to the head of the sieve, returns false otherwise + bool recordAccess(T& node, AccessMode mode) noexcept; + + // adds the given node into the container and marks it as being present in + // the container. The node is added to the head of the sieve. + // + // @param node The node to be added to the container. + // @return True if the node was successfully added to the container. False + // if the node was already in the contianer. On error state of node + // is unchanged. + bool add(T& node) noexcept; + + // removes the node from the sieve and sets it previous and next to nullptr. + // + // @param node The node to be removed from the container. + // @return True if the node was successfully removed from the container. + // False if the node was not part of the container. On error, the + // state of node is unchanged. + bool remove(T& node) noexcept; + + // same as the above but uses an iterator context. The iterator is updated + // on removal of the corresponding node to point to the next node. The + // iterator context is responsible for locking. + // + // iterator will be advanced to the next node after removing the node + // + // @param it Iterator that will be removed + void remove(Iterator& it) noexcept; + + // replaces one node with another, at the same position + // + // @param oldNode node being replaced + // @param newNode node to replace oldNode with + // + // @return true If the replace was successful. Returns false if the + // destination node did not exist in the container, or if the + // source node already existed. + bool replace(T& oldNode, T& newNode) noexcept; + + // Obtain an iterator that start from the tail and can be used + // to search for evictions. This iterator holds a lock to this + // container and only one such iterator can exist at a time + LockedIterator getEvictionIterator() noexcept; + + // Execute provided function under container lock. Function gets + // iterator passed as parameter. + template + void withEvictionIterator(F&& f); + + // Execute provided function under container lock. + template + void withContainerLock(F&& f); + + // get copy of current config + Config getConfig() const; + + // override the existing config with the new one. + void setConfig(const Config& newConfig); + + bool isEmpty() const noexcept { return size() == 0; } + + // reconfigure the MMContainer: update refresh time according to current + // tail age + void reconfigureLocked(const Time& currTime); + + // returns the number of elements in the container + size_t size() const noexcept { + return sieveMutex_->lock_combine([this]() { return queue_.size(); }); + } + + // Returns the eviction age stats. See CacheStats.h for details + EvictionAgeStat getEvictionAgeStat(uint64_t projectedLength) const noexcept; + + // for saving the state of the sieve + // + // precondition: serialization must happen without any reader or writer + // present. Any modification of this object afterwards will result in an + // invalid, inconsistent state for the serialized data. + // + serialization::MMSieveObject saveState() const noexcept; + + // return the stats for this container. + MMContainerStat getStats() const noexcept; + + static LruType getLruType(const T& /* node */) noexcept { + return LruType{}; + } + + void inspectSieveList() noexcept; + + void inspectHand() noexcept; + + private: + EvictionAgeStat getEvictionAgeStatLocked( + uint64_t projectedLength) const noexcept; + + static Time getUpdateTime(const T& node) noexcept { + return (node.*HookPtr).getUpdateTime(); + } + + static void setUpdateTime(T& node, Time time) noexcept { + (node.*HookPtr).setUpdateTime(time); + } + + // remove node from sieve and adjust insertion points + // @param node node to remove + void removeLocked(T& node); + + // Bit MM_BIT_0 is used to record if the item is in tail. This + // is used to implement LRU insertion points + void markTail(T& node) noexcept { + node.template setFlag(); + } + + void unmarkTail(T& node) noexcept { + node.template unSetFlag(); + } + + bool isTail(T& node) const noexcept { + return node.template isFlagSet(); + } + + // Bit MM_BIT_1 is used to record if the item has been accessed since + // being written in cache. Unaccessed items are ignored when determining + // projected update time. + void markAccessed(T& node) noexcept { + queue_.setAsVisited(node); + } + + void unmarkAccessed(T& node) noexcept { + queue_.setAsUnvisited(node); + } + + bool isAccessed(const T& node) const noexcept { + return queue_.isVisited(node); + } + + // protects all operations on the sieve. We never really just read the state + // of the Sieve. Hence we dont really require a RW mutex at this point of + // time. + mutable folly::cacheline_aligned sieveMutex_; + + const PtrCompressor compressor_{}; + + // Sieve FIFO queue + SIEVEList queue_{}; + + // The next time to reconfigure the container. + std::atomic