diff --git a/cachelib/allocator/CMakeLists.txt b/cachelib/allocator/CMakeLists.txt index 78cfa7ca06..eed4053db3 100644 --- a/cachelib/allocator/CMakeLists.txt +++ b/cachelib/allocator/CMakeLists.txt @@ -54,6 +54,7 @@ add_library (cachelib_allocator PoolOptimizeStrategy.cpp PoolRebalancer.cpp PoolResizer.cpp + PrivateMemoryManager.cpp RebalanceStrategy.cpp SlabReleaseStats.cpp TempShmMapping.cpp diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index 1d89593268..afe8491d05 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -56,6 +56,9 @@ CacheAllocator::CacheAllocator( tempShm_(type == InitMemType::kNone && isOnShm_ ? std::make_unique(config_.size) : nullptr), + privMemManager_(type == InitMemType::kNone && !isOnShm_ + ? std::make_unique() + : nullptr), shmManager_(type != InitMemType::kNone ? std::make_unique(config_.cacheDir, config_.usePosixShm) @@ -116,6 +119,18 @@ ShmSegmentOpts CacheAllocator::createShmCacheOpts() { return opts; } +template +PrivateSegmentOpts CacheAllocator::createPrivateSegmentOpts() { + PrivateSegmentOpts opts; + opts.alignment = sizeof(Slab); + auto memoryTierConfigs = config_.getMemoryTierConfigs(); + // TODO: we support single tier so far + XDCHECK_EQ(memoryTierConfigs.size(), 1ul); + opts.memBindNumaNodes = memoryTierConfigs[0].getMemBind(); + + return opts; +} + template std::unique_ptr CacheAllocator::createNewMemoryAllocator() { @@ -245,8 +260,11 @@ std::unique_ptr CacheAllocator::initAllocator( return std::make_unique( getAllocatorConfig(config_), tempShm_->getAddr(), config_.size); } else { - return std::make_unique(getAllocatorConfig(config_), - config_.size); + return std::make_unique( + getAllocatorConfig(config_), + privMemManager_->createMapping(config_.size, + createPrivateSegmentOpts()), + config_.size); } } else if (type == InitMemType::kMemNew) { return createNewMemoryAllocator(); diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h index ed0096390a..1d66ecfa0f 100644 --- a/cachelib/allocator/CacheAllocator.h +++ b/cachelib/allocator/CacheAllocator.h @@ -57,6 +57,7 @@ #include "cachelib/allocator/PoolOptimizer.h" #include "cachelib/allocator/PoolRebalancer.h" #include "cachelib/allocator/PoolResizer.h" +#include "cachelib/allocator/PrivateMemoryManager.h" #include "cachelib/allocator/ReadOnlySharedCacheView.h" #include "cachelib/allocator/Reaper.h" #include "cachelib/allocator/RebalanceStrategy.h" @@ -1869,6 +1870,7 @@ class CacheAllocator : public CacheBase { std::chrono::seconds timeout = std::chrono::seconds{0}); ShmSegmentOpts createShmCacheOpts(); + PrivateSegmentOpts createPrivateSegmentOpts(); std::unique_ptr createNewMemoryAllocator(); std::unique_ptr restoreMemoryAllocator(); std::unique_ptr restoreCCacheManager(); @@ -1990,6 +1992,8 @@ class CacheAllocator : public CacheBase { // is not persisted when cache process exits. std::unique_ptr tempShm_; + std::unique_ptr privMemManager_; + std::unique_ptr shmManager_; // Deserialize data to restore cache allocator. Used only while attaching to diff --git a/cachelib/allocator/MemoryTierCacheConfig.h b/cachelib/allocator/MemoryTierCacheConfig.h index a60fb64d3e..a9fd2448fa 100644 --- a/cachelib/allocator/MemoryTierCacheConfig.h +++ b/cachelib/allocator/MemoryTierCacheConfig.h @@ -16,11 +16,14 @@ #pragma once +#include "cachelib/common/Utils.h" #include "cachelib/shm/ShmCommon.h" namespace facebook { namespace cachelib { class MemoryTierCacheConfig { + using bitmask_type = util::NumaBitMask; + public: // Creates instance of MemoryTierCacheConfig for Posix/SysV Shared memory. static MemoryTierCacheConfig fromShm() { @@ -42,12 +45,12 @@ class MemoryTierCacheConfig { size_t getRatio() const noexcept { return ratio; } // Allocate memory only from specified NUMA nodes - MemoryTierCacheConfig& setMemBind(const NumaBitMask& _numaNodes) { + MemoryTierCacheConfig& setMemBind(const bitmask_type& _numaNodes) { numaNodes = _numaNodes; return *this; } - const NumaBitMask& getMemBind() const noexcept { return numaNodes; } + const bitmask_type& getMemBind() const noexcept { return numaNodes; } size_t calculateTierSize(size_t totalCacheSize, size_t partitionNum) { // TODO: Call this method when tiers are enabled in allocator @@ -74,7 +77,7 @@ class MemoryTierCacheConfig { size_t ratio{1}; // Numa node(s) to bind the tier - NumaBitMask numaNodes; + bitmask_type numaNodes; // TODO: introduce a container for tier settings when adding support for // file-mapped memory diff --git a/cachelib/allocator/PrivateMemoryManager.cpp b/cachelib/allocator/PrivateMemoryManager.cpp new file mode 100644 index 0000000000..afcf1b2202 --- /dev/null +++ b/cachelib/allocator/PrivateMemoryManager.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cachelib/allocator/PrivateMemoryManager.h" + +#include + +namespace facebook { +namespace cachelib { + +PrivateMemoryManager::~PrivateMemoryManager() { + for (auto& entry : mappings) { + util::munmapMemory(entry.first, entry.second); + } +} + +void* PrivateMemoryManager::createMapping(size_t size, + PrivateSegmentOpts opts) { + void* addr = util::mmapAlignedZeroedMemory(opts.alignment, size); + auto guard = folly::makeGuard([&]() { + util::munmapMemory(addr, size); + mappings.erase(addr); + }); + + XDCHECK_EQ(reinterpret_cast(addr) & (opts.alignment - 1), 0ULL); + + if (!opts.memBindNumaNodes.empty()) { + util::mbindMemory(addr, size, MPOL_BIND, opts.memBindNumaNodes, 0); + } + + mappings.emplace(addr, size); + + guard.dismiss(); + return addr; +} +} // namespace cachelib +} // namespace facebook \ No newline at end of file diff --git a/cachelib/allocator/PrivateMemoryManager.h b/cachelib/allocator/PrivateMemoryManager.h new file mode 100644 index 0000000000..7880ca928a --- /dev/null +++ b/cachelib/allocator/PrivateMemoryManager.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include "cachelib/common/Utils.h" + +namespace facebook { +namespace cachelib { + +struct PrivateSegmentOpts { + size_t alignment{1}; // alignment for mapping. + util::NumaBitMask memBindNumaNodes; +}; + +class PrivateMemoryManager { + public: + PrivateMemoryManager() {} + ~PrivateMemoryManager(); + + void* createMapping(size_t size, PrivateSegmentOpts opts); + + private: + std::unordered_map mappings; +}; + +} // namespace cachelib +} // namespace facebook \ No newline at end of file diff --git a/cachelib/cachebench/util/CacheConfig.h b/cachelib/cachebench/util/CacheConfig.h index a1b8f52011..8768e00683 100644 --- a/cachelib/cachebench/util/CacheConfig.h +++ b/cachelib/cachebench/util/CacheConfig.h @@ -51,7 +51,7 @@ struct MemoryTierConfig : public JSONConfig { MemoryTierCacheConfig getMemoryTierCacheConfig() { MemoryTierCacheConfig config = MemoryTierCacheConfig::fromShm(); config.setRatio(ratio); - config.setMemBind(NumaBitMask(memBindNodes)); + config.setMemBind(util::NumaBitMask(memBindNodes)); return config; } diff --git a/cachelib/common/CMakeLists.txt b/cachelib/common/CMakeLists.txt index 1e6d1a887c..212f421324 100644 --- a/cachelib/common/CMakeLists.txt +++ b/cachelib/common/CMakeLists.txt @@ -39,6 +39,7 @@ target_link_libraries(cachelib_common PUBLIC Folly::folly_exception_tracer Folly::folly_exception_tracer_base Folly::folly_exception_counter + numa ) install(TARGETS cachelib_common diff --git a/cachelib/common/Utils.cpp b/cachelib/common/Utils.cpp index 3e1494698f..daeb15c7e4 100644 --- a/cachelib/common/Utils.cpp +++ b/cachelib/common/Utils.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -181,6 +182,22 @@ void* mmapAlignedZeroedMemory(size_t alignment, throw std::system_error(errno, std::system_category(), "Cannot mmap"); } +void munmapMemory(void* addr, size_t size) { munmap(addr, size); } + +void mbindMemory(void* addr, + unsigned long len, + int mode, + const NumaBitMask& mask, + unsigned int flags) { + auto nodesMask = mask.getNativeBitmask(); + + long ret = mbind(addr, len, mode, nodesMask->maskp, nodesMask->size, flags); + if (ret != 0) { + util::throwSystemError( + errno, folly::sformat("mbind() failed: {}", std::strerror(errno))); + } +} + void setMaxLockMemory(uint64_t bytes) { struct rlimit rlim { bytes, bytes diff --git a/cachelib/common/Utils.h b/cachelib/common/Utils.h index c0b249de12..72193fc90f 100644 --- a/cachelib/common/Utils.h +++ b/cachelib/common/Utils.h @@ -18,6 +18,8 @@ #include #include +#include +#include #include @@ -35,6 +37,57 @@ namespace facebook { namespace cachelib { namespace util { +class NumaBitMask { + public: + using native_bitmask_type = struct bitmask*; + + NumaBitMask() { nodesMask = numa_allocate_nodemask(); } + + NumaBitMask(const NumaBitMask& other) { + nodesMask = numa_allocate_nodemask(); + copy_bitmask_to_bitmask(other.nodesMask, nodesMask); + } + + NumaBitMask(NumaBitMask&& other) { + nodesMask = other.nodesMask; + other.nodesMask = nullptr; + } + + NumaBitMask(const std::string& str) { + nodesMask = numa_parse_nodestring_all(str.c_str()); + } + + ~NumaBitMask() { + if (nodesMask) { + numa_bitmask_free(nodesMask); + } + } + + constexpr NumaBitMask& operator=(const NumaBitMask& other) { + if (this != &other) { + if (!nodesMask) { + nodesMask = numa_allocate_nodemask(); + } + copy_bitmask_to_bitmask(other.nodesMask, nodesMask); + } + return *this; + } + + native_bitmask_type getNativeBitmask() const noexcept { return nodesMask; } + + NumaBitMask& setBit(unsigned int n) { + numa_bitmask_setbit(nodesMask, n); + return *this; + } + + bool empty() const noexcept { + return numa_bitmask_equal(numa_no_nodes_ptr, nodesMask) == 1; + } + + protected: + native_bitmask_type nodesMask = nullptr; +}; + // A wrapper class for functions to collect counters. // It can be initialized by either // 1. folly::StringPiece, double -> void, or @@ -288,6 +341,25 @@ void* mmapAlignedZeroedMemory(size_t alignment, size_t numBytes, bool noAccess = false); +// destroy the mapping created by mmapAlignedZeroedMemory +// +// @param addr the pointer to the memory to unmap +// @param size size of the memory region +void munmapMemory(void* addr, size_t size); + +// binds memory to the NUMA nodes specified by nmask. +// +// @param addr the pointer to the memory to bind. +// @param len length of the memory. +// @param mode mode supported by mmap call +// @param mask mask specifies node ids +// @param flags flags supported by mmap call +void mbindMemory(void* addr, + unsigned long len, + int mode, + const NumaBitMask& mask, + unsigned int flags); + // get the number of pages in the range which are resident in the process. // // @param mem memory start which is page aligned diff --git a/cachelib/shm/PosixShmSegment.cpp b/cachelib/shm/PosixShmSegment.cpp index 7d47d061d1..4c19e229fd 100644 --- a/cachelib/shm/PosixShmSegment.cpp +++ b/cachelib/shm/PosixShmSegment.cpp @@ -31,6 +31,8 @@ namespace facebook { namespace cachelib { +using NumaBitMask = util::NumaBitMask; + constexpr static mode_t kRWMode = 0666; typedef struct stat stat_t; diff --git a/cachelib/shm/ShmCommon.h b/cachelib/shm/ShmCommon.h index 8db8707515..bc451c46d1 100644 --- a/cachelib/shm/ShmCommon.h +++ b/cachelib/shm/ShmCommon.h @@ -15,8 +15,6 @@ */ #pragma once -#include -#include #include #include #include @@ -30,6 +28,8 @@ #include #pragma GCC diagnostic pop +#include "cachelib/common/Utils.h" + /* On Mac OS / FreeBSD, mmap(2) syscall does not support these flags */ #ifndef MAP_LOCKED #define MAP_LOCKED 0 @@ -72,62 +72,11 @@ enum PageSizeT { ONE_GB, }; -class NumaBitMask { - public: - using native_bitmask_type = struct bitmask*; - - NumaBitMask() { nodesMask = numa_allocate_nodemask(); } - - NumaBitMask(const NumaBitMask& other) { - nodesMask = numa_allocate_nodemask(); - copy_bitmask_to_bitmask(other.nodesMask, nodesMask); - } - - NumaBitMask(NumaBitMask&& other) { - nodesMask = other.nodesMask; - other.nodesMask = nullptr; - } - - NumaBitMask(const std::string& str) { - nodesMask = numa_parse_nodestring_all(str.c_str()); - } - - ~NumaBitMask() { - if (nodesMask) { - numa_bitmask_free(nodesMask); - } - } - - constexpr NumaBitMask& operator=(const NumaBitMask& other) { - if (this != &other) { - if (!nodesMask) { - nodesMask = numa_allocate_nodemask(); - } - copy_bitmask_to_bitmask(other.nodesMask, nodesMask); - } - return *this; - } - - native_bitmask_type getNativeBitmask() const noexcept { return nodesMask; } - - NumaBitMask& setBit(unsigned int n) { - numa_bitmask_setbit(nodesMask, n); - return *this; - } - - bool empty() const noexcept { - return numa_bitmask_equal(numa_no_nodes_ptr, nodesMask) == 1; - } - - protected: - native_bitmask_type nodesMask = nullptr; -}; - struct ShmSegmentOpts { PageSizeT pageSize{PageSizeT::NORMAL}; bool readOnly{false}; size_t alignment{1}; // alignment for mapping. - NumaBitMask memBindNumaNodes; + util::NumaBitMask memBindNumaNodes; explicit ShmSegmentOpts(PageSizeT p) : pageSize(p) {} explicit ShmSegmentOpts(PageSizeT p, bool ro) : pageSize(p), readOnly(ro) {} diff --git a/cachelib/shm/SysVShmSegment.cpp b/cachelib/shm/SysVShmSegment.cpp index 29485fa0c4..1cb28da70b 100644 --- a/cachelib/shm/SysVShmSegment.cpp +++ b/cachelib/shm/SysVShmSegment.cpp @@ -189,21 +189,6 @@ void shmCtlImpl(int shmid, int cmd, shmid_ds* buf) { } } -void mbindImpl(void* addr, - unsigned long len, - int mode, - - const NumaBitMask& memBindNumaNodes, - unsigned int flags) { - auto nodesMask = memBindNumaNodes.getNativeBitmask(); - - long ret = mbind(addr, len, mode, nodesMask->maskp, nodesMask->size, flags); - if (ret != 0) { - util::throwSystemError( - errno, folly::sformat("mbind() failed: {}", std::strerror(errno))); - } -} - } // namespace detail void ensureSizeforHugePage(size_t size) { @@ -300,7 +285,7 @@ void SysVShmSegment::memBind(void* addr) const { if (opts_.memBindNumaNodes.empty()) { return; } - detail::mbindImpl(addr, getSize(), MPOL_BIND, opts_.memBindNumaNodes, 0); + util::mbindMemory(addr, getSize(), MPOL_BIND, opts_.memBindNumaNodes, 0); } void SysVShmSegment::markForRemoval() { diff --git a/examples/single_tier_cache/main.cpp b/examples/single_tier_cache/main.cpp index de6373622c..9c19dfeea9 100644 --- a/examples/single_tier_cache/main.cpp +++ b/examples/single_tier_cache/main.cpp @@ -25,7 +25,7 @@ using CacheConfig = typename Cache::Config; using CacheKey = typename Cache::Key; using CacheReadHandle = typename Cache::ReadHandle; using MemoryTierCacheConfig = typename cachelib::MemoryTierCacheConfig; -using NumaBitMask = typename cachelib::NumaBitMask; +using NumaBitMask = typename cachelib::util::NumaBitMask; // Global cache object and a default cache pool std::unique_ptr gCache_;