Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[upstream] NUMA bindings support for private memory #63

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cachelib/allocator/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ add_library (cachelib_allocator
PoolOptimizeStrategy.cpp
PoolRebalancer.cpp
PoolResizer.cpp
PrivateMemoryManager.cpp
RebalanceStrategy.cpp
SlabReleaseStats.cpp
TempShmMapping.cpp
Expand Down
22 changes: 20 additions & 2 deletions cachelib/allocator/CacheAllocator-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ CacheAllocator<CacheTrait>::CacheAllocator(
tempShm_(type == InitMemType::kNone && isOnShm_
? std::make_unique<TempShmMapping>(config_.size)
: nullptr),
privMemManager_(type == InitMemType::kNone && !isOnShm_
? std::make_unique<PrivateMemoryManager>()
: nullptr),
shmManager_(type != InitMemType::kNone
? std::make_unique<ShmManager>(config_.cacheDir,
config_.usePosixShm)
Expand Down Expand Up @@ -116,6 +119,18 @@ ShmSegmentOpts CacheAllocator<CacheTrait>::createShmCacheOpts() {
return opts;
}

template <typename CacheTrait>
PrivateSegmentOpts CacheAllocator<CacheTrait>::createPrivateSegmentOpts() {
PrivateSegmentOpts opts;
opts.alignment = sizeof(Slab);
auto memoryTierConfigs = config_.getMemoryTierConfigs();
// TODO: we support single tier so far
XDCHECK_EQ(memoryTierConfigs.size(), 1ul);
opts.memBindNumaNodes = memoryTierConfigs[0].getMemBind();

return opts;
}

template <typename CacheTrait>
std::unique_ptr<MemoryAllocator>
CacheAllocator<CacheTrait>::createNewMemoryAllocator() {
Expand Down Expand Up @@ -245,8 +260,11 @@ std::unique_ptr<MemoryAllocator> CacheAllocator<CacheTrait>::initAllocator(
return std::make_unique<MemoryAllocator>(
getAllocatorConfig(config_), tempShm_->getAddr(), config_.size);
} else {
return std::make_unique<MemoryAllocator>(getAllocatorConfig(config_),
config_.size);
return std::make_unique<MemoryAllocator>(
getAllocatorConfig(config_),
privMemManager_->createMapping(config_.size,
createPrivateSegmentOpts()),
config_.size);
}
} else if (type == InitMemType::kMemNew) {
return createNewMemoryAllocator();
Expand Down
4 changes: 4 additions & 0 deletions cachelib/allocator/CacheAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#include "cachelib/allocator/PoolOptimizer.h"
#include "cachelib/allocator/PoolRebalancer.h"
#include "cachelib/allocator/PoolResizer.h"
#include "cachelib/allocator/PrivateMemoryManager.h"
#include "cachelib/allocator/ReadOnlySharedCacheView.h"
#include "cachelib/allocator/Reaper.h"
#include "cachelib/allocator/RebalanceStrategy.h"
Expand Down Expand Up @@ -1869,6 +1870,7 @@ class CacheAllocator : public CacheBase {
std::chrono::seconds timeout = std::chrono::seconds{0});

ShmSegmentOpts createShmCacheOpts();
PrivateSegmentOpts createPrivateSegmentOpts();
std::unique_ptr<MemoryAllocator> createNewMemoryAllocator();
std::unique_ptr<MemoryAllocator> restoreMemoryAllocator();
std::unique_ptr<CCacheManager> restoreCCacheManager();
Expand Down Expand Up @@ -1990,6 +1992,8 @@ class CacheAllocator : public CacheBase {
// is not persisted when cache process exits.
std::unique_ptr<TempShmMapping> tempShm_;

std::unique_ptr<PrivateMemoryManager> privMemManager_;

std::unique_ptr<ShmManager> shmManager_;

// Deserialize data to restore cache allocator. Used only while attaching to
Expand Down
9 changes: 6 additions & 3 deletions cachelib/allocator/MemoryTierCacheConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@

#pragma once

#include "cachelib/common/Utils.h"
#include "cachelib/shm/ShmCommon.h"

namespace facebook {
namespace cachelib {
class MemoryTierCacheConfig {
using bitmask_type = util::NumaBitMask;

public:
// Creates instance of MemoryTierCacheConfig for Posix/SysV Shared memory.
static MemoryTierCacheConfig fromShm() {
Expand All @@ -42,12 +45,12 @@ class MemoryTierCacheConfig {
size_t getRatio() const noexcept { return ratio; }

// Allocate memory only from specified NUMA nodes
MemoryTierCacheConfig& setMemBind(const NumaBitMask& _numaNodes) {
MemoryTierCacheConfig& setMemBind(const bitmask_type& _numaNodes) {
numaNodes = _numaNodes;
return *this;
}

const NumaBitMask& getMemBind() const noexcept { return numaNodes; }
const bitmask_type& getMemBind() const noexcept { return numaNodes; }

size_t calculateTierSize(size_t totalCacheSize, size_t partitionNum) {
// TODO: Call this method when tiers are enabled in allocator
Expand All @@ -74,7 +77,7 @@ class MemoryTierCacheConfig {
size_t ratio{1};

// Numa node(s) to bind the tier
NumaBitMask numaNodes;
bitmask_type numaNodes;

// TODO: introduce a container for tier settings when adding support for
// file-mapped memory
Expand Down
50 changes: 50 additions & 0 deletions cachelib/allocator/PrivateMemoryManager.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "cachelib/allocator/PrivateMemoryManager.h"

#include <folly/ScopeGuard.h>

namespace facebook {
namespace cachelib {

PrivateMemoryManager::~PrivateMemoryManager() {
for (auto& entry : mappings) {
util::munmapMemory(entry.first, entry.second);
}
}

void* PrivateMemoryManager::createMapping(size_t size,
PrivateSegmentOpts opts) {
void* addr = util::mmapAlignedZeroedMemory(opts.alignment, size);
auto guard = folly::makeGuard([&]() {
util::munmapMemory(addr, size);
mappings.erase(addr);
});

XDCHECK_EQ(reinterpret_cast<uint64_t>(addr) & (opts.alignment - 1), 0ULL);

if (!opts.memBindNumaNodes.empty()) {
util::mbindMemory(addr, size, MPOL_BIND, opts.memBindNumaNodes, 0);
}

mappings.emplace(addr, size);

guard.dismiss();
byrnedj marked this conversation as resolved.
Show resolved Hide resolved
return addr;
}
} // namespace cachelib
} // namespace facebook
44 changes: 44 additions & 0 deletions cachelib/allocator/PrivateMemoryManager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cstddef>
#include <unordered_map>

#include "cachelib/common/Utils.h"

namespace facebook {
namespace cachelib {

struct PrivateSegmentOpts {
size_t alignment{1}; // alignment for mapping.
util::NumaBitMask memBindNumaNodes;
};

class PrivateMemoryManager {
public:
PrivateMemoryManager() {}
~PrivateMemoryManager();

void* createMapping(size_t size, PrivateSegmentOpts opts);

private:
std::unordered_map<void*, size_t> mappings;
};

} // namespace cachelib
} // namespace facebook
2 changes: 1 addition & 1 deletion cachelib/cachebench/util/CacheConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct MemoryTierConfig : public JSONConfig {
MemoryTierCacheConfig getMemoryTierCacheConfig() {
MemoryTierCacheConfig config = MemoryTierCacheConfig::fromShm();
config.setRatio(ratio);
config.setMemBind(NumaBitMask(memBindNodes));
config.setMemBind(util::NumaBitMask(memBindNodes));
return config;
}

Expand Down
1 change: 1 addition & 0 deletions cachelib/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ target_link_libraries(cachelib_common PUBLIC
Folly::folly_exception_tracer
Folly::folly_exception_tracer_base
Folly::folly_exception_counter
numa
)

install(TARGETS cachelib_common
Expand Down
17 changes: 17 additions & 0 deletions cachelib/common/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <dirent.h>
#include <folly/experimental/exception_tracer/ExceptionTracer.h>
#include <numaif.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/shm.h>
Expand Down Expand Up @@ -181,6 +182,22 @@ void* mmapAlignedZeroedMemory(size_t alignment,
throw std::system_error(errno, std::system_category(), "Cannot mmap");
}

void munmapMemory(void* addr, size_t size) { munmap(addr, size); }

void mbindMemory(void* addr,
unsigned long len,
int mode,
const NumaBitMask& mask,
unsigned int flags) {
auto nodesMask = mask.getNativeBitmask();

long ret = mbind(addr, len, mode, nodesMask->maskp, nodesMask->size, flags);
if (ret != 0) {
util::throwSystemError(
errno, folly::sformat("mbind() failed: {}", std::strerror(errno)));
}
}

void setMaxLockMemory(uint64_t bytes) {
struct rlimit rlim {
bytes, bytes
Expand Down
72 changes: 72 additions & 0 deletions cachelib/common/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#include <folly/Format.h>
#include <folly/Random.h>
#include <numa.h>
#include <numaif.h>

#include <unordered_map>

Expand All @@ -35,6 +37,57 @@ namespace facebook {
namespace cachelib {
namespace util {

class NumaBitMask {
public:
using native_bitmask_type = struct bitmask*;

NumaBitMask() { nodesMask = numa_allocate_nodemask(); }

NumaBitMask(const NumaBitMask& other) {
nodesMask = numa_allocate_nodemask();
copy_bitmask_to_bitmask(other.nodesMask, nodesMask);
}

NumaBitMask(NumaBitMask&& other) {
nodesMask = other.nodesMask;
other.nodesMask = nullptr;
}

NumaBitMask(const std::string& str) {
nodesMask = numa_parse_nodestring_all(str.c_str());
}

~NumaBitMask() {
if (nodesMask) {
numa_bitmask_free(nodesMask);
}
}

constexpr NumaBitMask& operator=(const NumaBitMask& other) {
if (this != &other) {
if (!nodesMask) {
nodesMask = numa_allocate_nodemask();
}
copy_bitmask_to_bitmask(other.nodesMask, nodesMask);
}
return *this;
}

native_bitmask_type getNativeBitmask() const noexcept { return nodesMask; }

NumaBitMask& setBit(unsigned int n) {
numa_bitmask_setbit(nodesMask, n);
return *this;
}

bool empty() const noexcept {
return numa_bitmask_equal(numa_no_nodes_ptr, nodesMask) == 1;
}

protected:
native_bitmask_type nodesMask = nullptr;
};

// A wrapper class for functions to collect counters.
// It can be initialized by either
// 1. folly::StringPiece, double -> void, or
Expand Down Expand Up @@ -288,6 +341,25 @@ void* mmapAlignedZeroedMemory(size_t alignment,
size_t numBytes,
bool noAccess = false);

// destroy the mapping created by mmapAlignedZeroedMemory
//
// @param addr the pointer to the memory to unmap
// @param size size of the memory region
void munmapMemory(void* addr, size_t size);

// binds memory to the NUMA nodes specified by nmask.
//
// @param addr the pointer to the memory to bind.
// @param len length of the memory.
// @param mode mode supported by mmap call
// @param mask mask specifies node ids
// @param flags flags supported by mmap call
void mbindMemory(void* addr,
unsigned long len,
int mode,
const NumaBitMask& mask,
unsigned int flags);

// get the number of pages in the range which are resident in the process.
//
// @param mem memory start which is page aligned
Expand Down
2 changes: 2 additions & 0 deletions cachelib/shm/PosixShmSegment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
namespace facebook {
namespace cachelib {

using NumaBitMask = util::NumaBitMask;

constexpr static mode_t kRWMode = 0666;
typedef struct stat stat_t;

Expand Down
Loading