Skip to content

Commit

Permalink
NUMA bindigs support for private memory
Browse files Browse the repository at this point in the history
  • Loading branch information
vinser52 committed Feb 15, 2023
1 parent 3938082 commit 5f981f0
Show file tree
Hide file tree
Showing 14 changed files with 223 additions and 77 deletions.
1 change: 1 addition & 0 deletions cachelib/allocator/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ add_library (cachelib_allocator
PoolOptimizeStrategy.cpp
PoolRebalancer.cpp
PoolResizer.cpp
PrivateMemoryManager.cpp
RebalanceStrategy.cpp
SlabReleaseStats.cpp
TempShmMapping.cpp
Expand Down
22 changes: 20 additions & 2 deletions cachelib/allocator/CacheAllocator-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ CacheAllocator<CacheTrait>::CacheAllocator(
tempShm_(type == InitMemType::kNone && isOnShm_
? std::make_unique<TempShmMapping>(config_.size)
: nullptr),
privMemManager_(type == InitMemType::kNone && !isOnShm_
? std::make_unique<PrivateMemoryManager>()
: nullptr),
shmManager_(type != InitMemType::kNone
? std::make_unique<ShmManager>(config_.cacheDir,
config_.usePosixShm)
Expand Down Expand Up @@ -116,6 +119,18 @@ ShmSegmentOpts CacheAllocator<CacheTrait>::createShmCacheOpts() {
return opts;
}

template <typename CacheTrait>
PrivateSegmentOpts CacheAllocator<CacheTrait>::createPrivateSegmentOpts() {
PrivateSegmentOpts opts;
opts.alignment = sizeof(Slab);
auto memoryTierConfigs = config_.getMemoryTierConfigs();
// TODO: we support single tier so far
XDCHECK_EQ(memoryTierConfigs.size(), 1ul);
opts.memBindNumaNodes = memoryTierConfigs[0].getMemBind();

return opts;
}

template <typename CacheTrait>
std::unique_ptr<MemoryAllocator>
CacheAllocator<CacheTrait>::createNewMemoryAllocator() {
Expand Down Expand Up @@ -245,8 +260,11 @@ std::unique_ptr<MemoryAllocator> CacheAllocator<CacheTrait>::initAllocator(
return std::make_unique<MemoryAllocator>(
getAllocatorConfig(config_), tempShm_->getAddr(), config_.size);
} else {
return std::make_unique<MemoryAllocator>(getAllocatorConfig(config_),
config_.size);
return std::make_unique<MemoryAllocator>(
getAllocatorConfig(config_),
privMemManager_->createMapping(config_.size,
createPrivateSegmentOpts()),
config_.size);
}
} else if (type == InitMemType::kMemNew) {
return createNewMemoryAllocator();
Expand Down
4 changes: 4 additions & 0 deletions cachelib/allocator/CacheAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#include "cachelib/allocator/PoolOptimizer.h"
#include "cachelib/allocator/PoolRebalancer.h"
#include "cachelib/allocator/PoolResizer.h"
#include "cachelib/allocator/PrivateMemoryManager.h"
#include "cachelib/allocator/ReadOnlySharedCacheView.h"
#include "cachelib/allocator/Reaper.h"
#include "cachelib/allocator/RebalanceStrategy.h"
Expand Down Expand Up @@ -1869,6 +1870,7 @@ class CacheAllocator : public CacheBase {
std::chrono::seconds timeout = std::chrono::seconds{0});

ShmSegmentOpts createShmCacheOpts();
PrivateSegmentOpts createPrivateSegmentOpts();
std::unique_ptr<MemoryAllocator> createNewMemoryAllocator();
std::unique_ptr<MemoryAllocator> restoreMemoryAllocator();
std::unique_ptr<CCacheManager> restoreCCacheManager();
Expand Down Expand Up @@ -1990,6 +1992,8 @@ class CacheAllocator : public CacheBase {
// is not persisted when cache process exits.
std::unique_ptr<TempShmMapping> tempShm_;

std::unique_ptr<PrivateMemoryManager> privMemManager_;

std::unique_ptr<ShmManager> shmManager_;

// Deserialize data to restore cache allocator. Used only while attaching to
Expand Down
9 changes: 6 additions & 3 deletions cachelib/allocator/MemoryTierCacheConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@

#pragma once

#include "cachelib/common/Utils.h"
#include "cachelib/shm/ShmCommon.h"

namespace facebook {
namespace cachelib {
class MemoryTierCacheConfig {
using bitmask_type = util::NumaBitMask;

public:
// Creates instance of MemoryTierCacheConfig for Posix/SysV Shared memory.
static MemoryTierCacheConfig fromShm() {
Expand All @@ -42,12 +45,12 @@ class MemoryTierCacheConfig {
size_t getRatio() const noexcept { return ratio; }

// Allocate memory only from specified NUMA nodes
MemoryTierCacheConfig& setMemBind(const NumaBitMask& _numaNodes) {
MemoryTierCacheConfig& setMemBind(const bitmask_type& _numaNodes) {
numaNodes = _numaNodes;
return *this;
}

const NumaBitMask& getMemBind() const noexcept { return numaNodes; }
const bitmask_type& getMemBind() const noexcept { return numaNodes; }

size_t calculateTierSize(size_t totalCacheSize, size_t partitionNum) {
// TODO: Call this method when tiers are enabled in allocator
Expand All @@ -74,7 +77,7 @@ class MemoryTierCacheConfig {
size_t ratio{1};

// Numa node(s) to bind the tier
NumaBitMask numaNodes;
bitmask_type numaNodes;

// TODO: introduce a container for tier settings when adding support for
// file-mapped memory
Expand Down
50 changes: 50 additions & 0 deletions cachelib/allocator/PrivateMemoryManager.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "cachelib/allocator/PrivateMemoryManager.h"

#include <folly/ScopeGuard.h>

namespace facebook {
namespace cachelib {

PrivateMemoryManager::~PrivateMemoryManager() {
for (auto& entry : mappings) {
util::munmapMemory(entry.first, entry.second);
}
}

void* PrivateMemoryManager::createMapping(size_t size,
PrivateSegmentOpts opts) {
void* addr = util::mmapAlignedZeroedMemory(opts.alignment, size);
auto guard = folly::makeGuard([&]() {
util::munmapMemory(addr, size);
mappings.erase(addr);
});

XDCHECK_EQ(reinterpret_cast<uint64_t>(addr) & (opts.alignment - 1), 0ULL);

if (!opts.memBindNumaNodes.empty()) {
util::mbindMemory(addr, size, MPOL_BIND, opts.memBindNumaNodes, 0);
}

mappings.emplace(addr, size);

guard.dismiss();
return addr;
}
} // namespace cachelib
} // namespace facebook
44 changes: 44 additions & 0 deletions cachelib/allocator/PrivateMemoryManager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cstddef>
#include <unordered_map>

#include "cachelib/common/Utils.h"

namespace facebook {
namespace cachelib {

struct PrivateSegmentOpts {
size_t alignment{1}; // alignment for mapping.
util::NumaBitMask memBindNumaNodes;
};

class PrivateMemoryManager {
public:
PrivateMemoryManager() {}
~PrivateMemoryManager();

void* createMapping(size_t size, PrivateSegmentOpts opts);

private:
std::unordered_map<void*, size_t> mappings;
};

} // namespace cachelib
} // namespace facebook
2 changes: 1 addition & 1 deletion cachelib/cachebench/util/CacheConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct MemoryTierConfig : public JSONConfig {
MemoryTierCacheConfig getMemoryTierCacheConfig() {
MemoryTierCacheConfig config = MemoryTierCacheConfig::fromShm();
config.setRatio(ratio);
config.setMemBind(NumaBitMask(memBindNodes));
config.setMemBind(util::NumaBitMask(memBindNodes));
return config;
}

Expand Down
1 change: 1 addition & 0 deletions cachelib/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ target_link_libraries(cachelib_common PUBLIC
Folly::folly_exception_tracer
Folly::folly_exception_tracer_base
Folly::folly_exception_counter
numa
)

install(TARGETS cachelib_common
Expand Down
17 changes: 17 additions & 0 deletions cachelib/common/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <dirent.h>
#include <folly/experimental/exception_tracer/ExceptionTracer.h>
#include <numaif.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/shm.h>
Expand Down Expand Up @@ -181,6 +182,22 @@ void* mmapAlignedZeroedMemory(size_t alignment,
throw std::system_error(errno, std::system_category(), "Cannot mmap");
}

void munmapMemory(void* addr, size_t size) { munmap(addr, size); }

void mbindMemory(void* addr,
unsigned long len,
int mode,
const NumaBitMask& mask,
unsigned int flags) {
auto nodesMask = mask.getNativeBitmask();

long ret = mbind(addr, len, mode, nodesMask->maskp, nodesMask->size, flags);
if (ret != 0) {
util::throwSystemError(
errno, folly::sformat("mbind() failed: {}", std::strerror(errno)));
}
}

void setMaxLockMemory(uint64_t bytes) {
struct rlimit rlim {
bytes, bytes
Expand Down
72 changes: 72 additions & 0 deletions cachelib/common/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#include <folly/Format.h>
#include <folly/Random.h>
#include <numa.h>
#include <numaif.h>

#include <unordered_map>

Expand All @@ -35,6 +37,57 @@ namespace facebook {
namespace cachelib {
namespace util {

class NumaBitMask {
public:
using native_bitmask_type = struct bitmask*;

NumaBitMask() { nodesMask = numa_allocate_nodemask(); }

NumaBitMask(const NumaBitMask& other) {
nodesMask = numa_allocate_nodemask();
copy_bitmask_to_bitmask(other.nodesMask, nodesMask);
}

NumaBitMask(NumaBitMask&& other) {
nodesMask = other.nodesMask;
other.nodesMask = nullptr;
}

NumaBitMask(const std::string& str) {
nodesMask = numa_parse_nodestring_all(str.c_str());
}

~NumaBitMask() {
if (nodesMask) {
numa_bitmask_free(nodesMask);
}
}

constexpr NumaBitMask& operator=(const NumaBitMask& other) {
if (this != &other) {
if (!nodesMask) {
nodesMask = numa_allocate_nodemask();
}
copy_bitmask_to_bitmask(other.nodesMask, nodesMask);
}
return *this;
}

native_bitmask_type getNativeBitmask() const noexcept { return nodesMask; }

NumaBitMask& setBit(unsigned int n) {
numa_bitmask_setbit(nodesMask, n);
return *this;
}

bool empty() const noexcept {
return numa_bitmask_equal(numa_no_nodes_ptr, nodesMask) == 1;
}

protected:
native_bitmask_type nodesMask = nullptr;
};

// A wrapper class for functions to collect counters.
// It can be initialized by either
// 1. folly::StringPiece, double -> void, or
Expand Down Expand Up @@ -288,6 +341,25 @@ void* mmapAlignedZeroedMemory(size_t alignment,
size_t numBytes,
bool noAccess = false);

// destroy the mapping created by mmapAlignedZeroedMemory
//
// @param addr the pointer to the memory to unmap
// @param size size of the memory region
void munmapMemory(void* addr, size_t size);

// binds memory to the NUMA nodes specified by nmask.
//
// @param addr the pointer to the memory to bind.
// @param len length of the memory.
// @param mode mode supported by mmap call
// @param mask mask specifies node ids
// @param flags flags supported by mmap call
void mbindMemory(void* addr,
unsigned long len,
int mode,
const NumaBitMask& mask,
unsigned int flags);

// get the number of pages in the range which are resident in the process.
//
// @param mem memory start which is page aligned
Expand Down
2 changes: 2 additions & 0 deletions cachelib/shm/PosixShmSegment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
namespace facebook {
namespace cachelib {

using NumaBitMask = util::NumaBitMask;

constexpr static mode_t kRWMode = 0666;
typedef struct stat stat_t;

Expand Down
Loading

0 comments on commit 5f981f0

Please sign in to comment.