Skip to content

Commit 2f5c0be

Browse files
committed
NUMA bindigs support for private memory
1 parent b5d003c commit 2f5c0be

14 files changed

+223
-77
lines changed

cachelib/allocator/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ add_library (cachelib_allocator
5454
PoolOptimizeStrategy.cpp
5555
PoolRebalancer.cpp
5656
PoolResizer.cpp
57+
PrivateMemoryManager.cpp
5758
RebalanceStrategy.cpp
5859
SlabReleaseStats.cpp
5960
TempShmMapping.cpp

cachelib/allocator/CacheAllocator-inl.h

+20-2
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ CacheAllocator<CacheTrait>::CacheAllocator(
5656
tempShm_(type == InitMemType::kNone && isOnShm_
5757
? std::make_unique<TempShmMapping>(config_.size)
5858
: nullptr),
59+
privMemManager_(type == InitMemType::kNone && !isOnShm_
60+
? std::make_unique<PrivateMemoryManager>()
61+
: nullptr),
5962
shmManager_(type != InitMemType::kNone
6063
? std::make_unique<ShmManager>(config_.cacheDir,
6164
config_.usePosixShm)
@@ -116,6 +119,18 @@ ShmSegmentOpts CacheAllocator<CacheTrait>::createShmCacheOpts() {
116119
return opts;
117120
}
118121

122+
template <typename CacheTrait>
123+
PrivateSegmentOpts CacheAllocator<CacheTrait>::createPrivateSegmentOpts() {
124+
PrivateSegmentOpts opts;
125+
opts.alignment = sizeof(Slab);
126+
auto memoryTierConfigs = config_.getMemoryTierConfigs();
127+
// TODO: we support single tier so far
128+
XDCHECK_EQ(memoryTierConfigs.size(), 1ul);
129+
opts.memBindNumaNodes = memoryTierConfigs[0].getMemBind();
130+
131+
return opts;
132+
}
133+
119134
template <typename CacheTrait>
120135
std::unique_ptr<MemoryAllocator>
121136
CacheAllocator<CacheTrait>::createNewMemoryAllocator() {
@@ -245,8 +260,11 @@ std::unique_ptr<MemoryAllocator> CacheAllocator<CacheTrait>::initAllocator(
245260
return std::make_unique<MemoryAllocator>(
246261
getAllocatorConfig(config_), tempShm_->getAddr(), config_.size);
247262
} else {
248-
return std::make_unique<MemoryAllocator>(getAllocatorConfig(config_),
249-
config_.size);
263+
return std::make_unique<MemoryAllocator>(
264+
getAllocatorConfig(config_),
265+
privMemManager_->createMapping(config_.size,
266+
createPrivateSegmentOpts()),
267+
config_.size);
250268
}
251269
} else if (type == InitMemType::kMemNew) {
252270
return createNewMemoryAllocator();

cachelib/allocator/CacheAllocator.h

+4
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
#include "cachelib/allocator/PoolOptimizer.h"
5858
#include "cachelib/allocator/PoolRebalancer.h"
5959
#include "cachelib/allocator/PoolResizer.h"
60+
#include "cachelib/allocator/PrivateMemoryManager.h"
6061
#include "cachelib/allocator/ReadOnlySharedCacheView.h"
6162
#include "cachelib/allocator/Reaper.h"
6263
#include "cachelib/allocator/RebalanceStrategy.h"
@@ -1876,6 +1877,7 @@ class CacheAllocator : public CacheBase {
18761877
std::chrono::seconds timeout = std::chrono::seconds{0});
18771878

18781879
ShmSegmentOpts createShmCacheOpts();
1880+
PrivateSegmentOpts createPrivateSegmentOpts();
18791881
std::unique_ptr<MemoryAllocator> createNewMemoryAllocator();
18801882
std::unique_ptr<MemoryAllocator> restoreMemoryAllocator();
18811883
std::unique_ptr<CCacheManager> restoreCCacheManager();
@@ -1997,6 +1999,8 @@ class CacheAllocator : public CacheBase {
19971999
// is not persisted when cache process exits.
19982000
std::unique_ptr<TempShmMapping> tempShm_;
19992001

2002+
std::unique_ptr<PrivateMemoryManager> privMemManager_;
2003+
20002004
std::unique_ptr<ShmManager> shmManager_;
20012005

20022006
// Deserialize data to restore cache allocator. Used only while attaching to

cachelib/allocator/MemoryTierCacheConfig.h

+6-3
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,14 @@
1616

1717
#pragma once
1818

19+
#include "cachelib/common/Utils.h"
1920
#include "cachelib/shm/ShmCommon.h"
2021

2122
namespace facebook {
2223
namespace cachelib {
2324
class MemoryTierCacheConfig {
25+
using bitmask_type = util::NumaBitMask;
26+
2427
public:
2528
// Creates instance of MemoryTierCacheConfig for Posix/SysV Shared memory.
2629
static MemoryTierCacheConfig fromShm() {
@@ -42,12 +45,12 @@ class MemoryTierCacheConfig {
4245
size_t getRatio() const noexcept { return ratio; }
4346

4447
// Allocate memory only from specified NUMA nodes
45-
MemoryTierCacheConfig& setMemBind(const NumaBitMask& _numaNodes) {
48+
MemoryTierCacheConfig& setMemBind(const bitmask_type& _numaNodes) {
4649
numaNodes = _numaNodes;
4750
return *this;
4851
}
4952

50-
const NumaBitMask& getMemBind() const noexcept { return numaNodes; }
53+
const bitmask_type& getMemBind() const noexcept { return numaNodes; }
5154

5255
size_t calculateTierSize(size_t totalCacheSize, size_t partitionNum) {
5356
// TODO: Call this method when tiers are enabled in allocator
@@ -74,7 +77,7 @@ class MemoryTierCacheConfig {
7477
size_t ratio{1};
7578

7679
// Numa node(s) to bind the tier
77-
NumaBitMask numaNodes;
80+
bitmask_type numaNodes;
7881

7982
// TODO: introduce a container for tier settings when adding support for
8083
// file-mapped memory
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "cachelib/allocator/PrivateMemoryManager.h"
18+
19+
#include <folly/ScopeGuard.h>
20+
21+
namespace facebook {
22+
namespace cachelib {
23+
24+
PrivateMemoryManager::~PrivateMemoryManager() {
25+
for (auto& entry : mappings) {
26+
util::munmapMemory(entry.first, entry.second);
27+
}
28+
}
29+
30+
void* PrivateMemoryManager::createMapping(size_t size,
31+
PrivateSegmentOpts opts) {
32+
void* addr = util::mmapAlignedZeroedMemory(opts.alignment, size);
33+
auto guard = folly::makeGuard([&]() {
34+
util::munmapMemory(addr, size);
35+
mappings.erase(addr);
36+
});
37+
38+
XDCHECK_EQ(reinterpret_cast<uint64_t>(addr) & (opts.alignment - 1), 0ULL);
39+
40+
if (!opts.memBindNumaNodes.empty()) {
41+
util::mbindMemory(addr, size, MPOL_BIND, opts.memBindNumaNodes, 0);
42+
}
43+
44+
mappings.emplace(addr, size);
45+
46+
guard.dismiss();
47+
return addr;
48+
}
49+
} // namespace cachelib
50+
} // namespace facebook
+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <cstddef>
20+
#include <unordered_map>
21+
22+
#include "cachelib/common/Utils.h"
23+
24+
namespace facebook {
25+
namespace cachelib {
26+
27+
struct PrivateSegmentOpts {
28+
size_t alignment{1}; // alignment for mapping.
29+
util::NumaBitMask memBindNumaNodes;
30+
};
31+
32+
class PrivateMemoryManager {
33+
public:
34+
PrivateMemoryManager() {}
35+
~PrivateMemoryManager();
36+
37+
void* createMapping(size_t size, PrivateSegmentOpts opts);
38+
39+
private:
40+
std::unordered_map<void*, size_t> mappings;
41+
};
42+
43+
} // namespace cachelib
44+
} // namespace facebook

cachelib/cachebench/util/CacheConfig.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ struct MemoryTierConfig : public JSONConfig {
5151
MemoryTierCacheConfig getMemoryTierCacheConfig() {
5252
MemoryTierCacheConfig config = MemoryTierCacheConfig::fromShm();
5353
config.setRatio(ratio);
54-
config.setMemBind(NumaBitMask(memBindNodes));
54+
config.setMemBind(util::NumaBitMask(memBindNodes));
5555
return config;
5656
}
5757

cachelib/common/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ target_link_libraries(cachelib_common PUBLIC
3939
Folly::folly_exception_tracer
4040
Folly::folly_exception_tracer_base
4141
Folly::folly_exception_counter
42+
numa
4243
)
4344

4445
install(TARGETS cachelib_common

cachelib/common/Utils.cpp

+17
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include <dirent.h>
1818
#include <folly/experimental/exception_tracer/ExceptionTracer.h>
19+
#include <numaif.h>
1920
#include <sys/mman.h>
2021
#include <sys/resource.h>
2122
#include <sys/shm.h>
@@ -181,6 +182,22 @@ void* mmapAlignedZeroedMemory(size_t alignment,
181182
throw std::system_error(errno, std::system_category(), "Cannot mmap");
182183
}
183184

185+
void munmapMemory(void* addr, size_t size) { munmap(addr, size); }
186+
187+
void mbindMemory(void* addr,
188+
unsigned long len,
189+
int mode,
190+
const NumaBitMask& mask,
191+
unsigned int flags) {
192+
auto nodesMask = mask.getNativeBitmask();
193+
194+
long ret = mbind(addr, len, mode, nodesMask->maskp, nodesMask->size, flags);
195+
if (ret != 0) {
196+
util::throwSystemError(
197+
errno, folly::sformat("mbind() failed: {}", std::strerror(errno)));
198+
}
199+
}
200+
184201
void setMaxLockMemory(uint64_t bytes) {
185202
struct rlimit rlim {
186203
bytes, bytes

cachelib/common/Utils.h

+72
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
#include <folly/Format.h>
2020
#include <folly/Random.h>
21+
#include <numa.h>
22+
#include <numaif.h>
2123

2224
#include <unordered_map>
2325

@@ -35,6 +37,57 @@ namespace facebook {
3537
namespace cachelib {
3638
namespace util {
3739

40+
class NumaBitMask {
41+
public:
42+
using native_bitmask_type = struct bitmask*;
43+
44+
NumaBitMask() { nodesMask = numa_allocate_nodemask(); }
45+
46+
NumaBitMask(const NumaBitMask& other) {
47+
nodesMask = numa_allocate_nodemask();
48+
copy_bitmask_to_bitmask(other.nodesMask, nodesMask);
49+
}
50+
51+
NumaBitMask(NumaBitMask&& other) {
52+
nodesMask = other.nodesMask;
53+
other.nodesMask = nullptr;
54+
}
55+
56+
NumaBitMask(const std::string& str) {
57+
nodesMask = numa_parse_nodestring_all(str.c_str());
58+
}
59+
60+
~NumaBitMask() {
61+
if (nodesMask) {
62+
numa_bitmask_free(nodesMask);
63+
}
64+
}
65+
66+
constexpr NumaBitMask& operator=(const NumaBitMask& other) {
67+
if (this != &other) {
68+
if (!nodesMask) {
69+
nodesMask = numa_allocate_nodemask();
70+
}
71+
copy_bitmask_to_bitmask(other.nodesMask, nodesMask);
72+
}
73+
return *this;
74+
}
75+
76+
native_bitmask_type getNativeBitmask() const noexcept { return nodesMask; }
77+
78+
NumaBitMask& setBit(unsigned int n) {
79+
numa_bitmask_setbit(nodesMask, n);
80+
return *this;
81+
}
82+
83+
bool empty() const noexcept {
84+
return numa_bitmask_equal(numa_no_nodes_ptr, nodesMask) == 1;
85+
}
86+
87+
protected:
88+
native_bitmask_type nodesMask = nullptr;
89+
};
90+
3891
// A wrapper class for functions to collect counters.
3992
// It can be initialized by either
4093
// 1. folly::StringPiece, double -> void, or
@@ -295,6 +348,25 @@ void* mmapAlignedZeroedMemory(size_t alignment,
295348
size_t numBytes,
296349
bool noAccess = false);
297350

351+
// destroy the mapping created by mmapAlignedZeroedMemory
352+
//
353+
// @param addr the pointer to the memory to unmap
354+
// @param size size of the memory region
355+
void munmapMemory(void* addr, size_t size);
356+
357+
// binds memory to the NUMA nodes specified by nmask.
358+
//
359+
// @param addr the pointer to the memory to bind.
360+
// @param len length of the memory.
361+
// @param mode mode supported by mmap call
362+
// @param mask mask specifies node ids
363+
// @param flags flags supported by mmap call
364+
void mbindMemory(void* addr,
365+
unsigned long len,
366+
int mode,
367+
const NumaBitMask& mask,
368+
unsigned int flags);
369+
298370
// get the number of pages in the range which are resident in the process.
299371
//
300372
// @param mem memory start which is page aligned

cachelib/shm/PosixShmSegment.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
namespace facebook {
3232
namespace cachelib {
3333

34+
using NumaBitMask = util::NumaBitMask;
35+
3436
constexpr static mode_t kRWMode = 0666;
3537
typedef struct stat stat_t;
3638

0 commit comments

Comments
 (0)