Skip to content

Commit 969d5c9

Browse files
committed
added ability for compressed pointer to use full 32 bits for addressing in single tier mode and use 31 bits for addressing in multi-tier mode
1 parent 5b693ae commit 969d5c9

File tree

8 files changed

+117
-42
lines changed

8 files changed

+117
-42
lines changed

cachelib/allocator/CCacheAllocator.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ CCacheAllocator::CCacheAllocator(MemoryAllocator& allocator,
3636
currentChunksIndex_(0) {
3737
auto& currentChunks = chunks_[currentChunksIndex_];
3838
for (auto chunk : *object.chunks()) {
39-
currentChunks.push_back(allocator_.unCompress(CompressedPtr(chunk)));
39+
// TODO : pass multi-tier flag when compact cache supports multi-tier config
40+
currentChunks.push_back(
41+
allocator_.unCompress(CompressedPtr(chunk), false /* isMultiTier */));
4042
}
4143
}
4244

@@ -97,7 +99,9 @@ CCacheAllocator::SerializationType CCacheAllocator::saveState() {
9799

98100
std::lock_guard<std::mutex> guard(resizeLock_);
99101
for (auto chunk : getCurrentChunks()) {
100-
object.chunks()->push_back(allocator_.compress(chunk).saveState());
102+
// TODO : pass multi-tier flag when compact cache supports multi-tier config
103+
object.chunks()->push_back(
104+
allocator_.compress(chunk, false /* isMultiTier */).saveState());
101105
}
102106
return object;
103107
}

cachelib/allocator/memory/CompressedPtr.h

Lines changed: 71 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,18 +27,29 @@ namespace cachelib {
2727

2828
class SlabAllocator;
2929

30-
// the following are for pointer compression for the memory allocator. We
31-
// compress pointers by storing the slab index and the alloc index of the
32-
// allocation inside the slab. With slab worth kNumSlabBits of data, if we
33-
// have the min allocation size as 64 bytes, that requires kNumSlabBits - 6
34-
// bits for storing the alloc index. This leaves the remaining (32 -
35-
// (kNumSlabBits - 6)) bits for the slab index. Hence we can index 256 GiB
36-
// of memory in slabs and index anything more than 64 byte allocations inside
37-
// the slab using a 32 bit representation.
38-
//
3930
// This CompressedPtr makes decompression fast by staying away from division and
4031
// modulo arithmetic and doing those during the compression time. We most often
41-
// decompress a CompressedPtr than compress a pointer while creating one.
32+
// decompress a CompressedPtr than compress a pointer while creating one. This
33+
// is used for pointer compression by the memory allocator.
34+
35+
// We compress pointers by storing the tier index, slab index and alloc index of
36+
// the allocation inside the slab.
37+
38+
// In original design (without memory tiers):
39+
// Each slab addresses 22 bits of allocations (kNumSlabBits). This is split into
40+
// allocation index and allocation size. If we have the min allocation size of
41+
// 64 bytes (kMinAllocPower = 6 bits), remaining kNumSlabBits(22) -
42+
// kMinAllocPower(6) = 16 bits for storing the alloc index. This leaves the
43+
// remaining 32 - (kNumSlabBits - kMinAllocPower) = 16 bits for the slab
44+
// index. Hence we can index 256 GiB of memory.
45+
46+
// In multi-tier design:
47+
// kNumSlabIds and kMinAllocPower remains unchanged. The tier id occupies the
48+
// 32nd bit only since its value cannot exceed kMaxTiers(2). This leaves the
49+
// remaining 32 - (kNumSlabBits - kMinAllocPower) - 1 bit for tier id = 15 bits
50+
// for the slab index. Hence we can index 128 GiB of memory per tier in
51+
// multi-tier configuration.
52+
4253
class CACHELIB_PACKED_ATTR CompressedPtr {
4354
public:
4455
using PtrType = uint32_t;
@@ -62,9 +73,10 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
6273
return static_cast<uint32_t>(1) << (Slab::kMinAllocPower);
6374
}
6475

65-
// maximum adressable memory for pointer compression to work.
76+
// maximum addressable memory for pointer compression to work.
6677
static constexpr size_t getMaxAddressableSize() noexcept {
67-
return static_cast<size_t>(1) << (kNumSlabIdxBits + Slab::kNumSlabBits);
78+
return static_cast<size_t>(1)
79+
<< (numSlabIdxBits(false) + Slab::kNumSlabBits);
6880
}
6981

7082
// default construct to nullptr.
@@ -89,8 +101,11 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
89101
PtrType ptr_{kNull};
90102

91103
// create a compressed pointer for a valid memory allocation.
92-
CompressedPtr(uint32_t slabIdx, uint32_t allocIdx)
93-
: ptr_(compress(slabIdx, allocIdx)) {}
104+
CompressedPtr(uint32_t slabIdx,
105+
uint32_t allocIdx,
106+
bool isMultiTiered,
107+
TierId tid = 0)
108+
: ptr_(compress(slabIdx, allocIdx, isMultiTiered, tid)) {}
94109

95110
constexpr explicit CompressedPtr(PtrType ptr) noexcept : ptr_{ptr} {}
96111

@@ -100,33 +115,63 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
100115
static constexpr unsigned int kNumAllocIdxBits =
101116
Slab::kNumSlabBits - Slab::kMinAllocPower;
102117

118+
// Use 32nd bit position for TierId
119+
static constexpr unsigned int kNumTierIdxOffset = 31;
120+
103121
static constexpr PtrType kAllocIdxMask = ((PtrType)1 << kNumAllocIdxBits) - 1;
104122

105-
// Number of bits for the slab index. This will be the top 16 bits of the
106-
// compressed ptr.
107-
static constexpr unsigned int kNumSlabIdxBits =
108-
NumBits<PtrType>::value - kNumAllocIdxBits;
123+
// kNumTierIdxBits most significant bits
124+
static constexpr PtrType kTierIdxMask = (PtrType)1 << kNumTierIdxOffset;
125+
126+
// Number of bits for the slab index.
127+
// If CacheLib is single tiered, slab index will be the top 16 bits
128+
// of the compressed ptr.
129+
// Else if CacheLib is multi-tiered, the topmost 32nd bit will be
130+
// reserved for tier id. The following 15 bits will be reserved for
131+
// the slab index.
132+
static constexpr unsigned int numSlabIdxBits(bool isMultiTiered) {
133+
return kNumTierIdxOffset - kNumAllocIdxBits + (!isMultiTiered);
134+
}
109135

110136
// Compress the given slabIdx and allocIdx into a 32-bit compressed
111137
// pointer.
112-
static PtrType compress(uint32_t slabIdx, uint32_t allocIdx) noexcept {
138+
static PtrType compress(uint32_t slabIdx,
139+
uint32_t allocIdx,
140+
bool isMultiTiered,
141+
TierId tid) noexcept {
113142
XDCHECK_LE(allocIdx, kAllocIdxMask);
114-
XDCHECK_LT(slabIdx, (1u << kNumSlabIdxBits) - 1);
115-
return (slabIdx << kNumAllocIdxBits) + allocIdx;
143+
XDCHECK_LT(slabIdx, (1u << numSlabIdxBits(isMultiTiered)) - 1);
144+
if (!isMultiTiered) {
145+
return (slabIdx << kNumAllocIdxBits) + allocIdx;
146+
}
147+
return (static_cast<uint32_t>(tid) << kNumTierIdxOffset) +
148+
(slabIdx << kNumAllocIdxBits) + allocIdx;
116149
}
117150

118151
// Get the slab index of the compressed ptr
119-
uint32_t getSlabIdx() const noexcept {
152+
uint32_t getSlabIdx(bool isMultiTiered) const noexcept {
120153
XDCHECK(!isNull());
121-
return static_cast<uint32_t>(ptr_ >> kNumAllocIdxBits);
154+
auto noTierIdPtr = isMultiTiered ? ptr_ & ~kTierIdxMask : ptr_;
155+
return static_cast<uint32_t>(noTierIdPtr >> kNumAllocIdxBits);
122156
}
123157

124158
// Get the allocation index of the compressed ptr
125159
uint32_t getAllocIdx() const noexcept {
126160
XDCHECK(!isNull());
161+
// Note: tid check not required in ptr_ since only
162+
// the lower 16 bits are being read here.
127163
return static_cast<uint32_t>(ptr_ & kAllocIdxMask);
128164
}
129165

166+
uint32_t getTierId(bool isMultiTiered) const noexcept {
167+
XDCHECK(!isNull());
168+
return isMultiTiered ? static_cast<uint32_t>(ptr_ >> kNumTierIdxOffset) : 0;
169+
}
170+
171+
void setTierId(TierId tid) noexcept {
172+
ptr_ += static_cast<uint32_t>(tid) << kNumTierIdxOffset;
173+
}
174+
130175
friend SlabAllocator;
131176
};
132177

@@ -137,11 +182,12 @@ class PtrCompressor {
137182
: allocator_(allocator) {}
138183

139184
const CompressedPtr compress(const PtrType* uncompressed) const {
140-
return allocator_.compress(uncompressed);
185+
return allocator_.compress(uncompressed, false /* isMultiTiered */);
141186
}
142187

143188
PtrType* unCompress(const CompressedPtr compressed) const {
144-
return static_cast<PtrType*>(allocator_.unCompress(compressed));
189+
return static_cast<PtrType*>(
190+
allocator_.unCompress(compressed, false /* isMultiTiered */));
145191
}
146192

147193
bool operator==(const PtrCompressor& rhs) const noexcept {

cachelib/allocator/memory/MemoryAllocator.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -534,8 +534,9 @@ class MemoryAllocator {
534534
// as the original pointer is valid.
535535
//
536536
// @throw std::invalid_argument if the ptr is invalid.
537-
CompressedPtr CACHELIB_INLINE compress(const void* ptr) const {
538-
return slabAllocator_.compress(ptr);
537+
CompressedPtr CACHELIB_INLINE compress(const void* ptr,
538+
bool isMultiTiered) const {
539+
return slabAllocator_.compress(ptr, isMultiTiered);
539540
}
540541

541542
// retrieve the raw pointer corresponding to the compressed pointer. This is
@@ -546,8 +547,9 @@ class MemoryAllocator {
546547
// @return the raw pointer corresponding to this compressed pointer.
547548
//
548549
// @throw std::invalid_argument if the compressed pointer is invalid.
549-
void* CACHELIB_INLINE unCompress(const CompressedPtr cPtr) const {
550-
return slabAllocator_.unCompress(cPtr);
550+
void* CACHELIB_INLINE unCompress(const CompressedPtr cPtr,
551+
bool isMultiTiered) const {
552+
return slabAllocator_.unCompress(cPtr, isMultiTiered);
551553
}
552554

553555
// a special implementation of pointer compression for benchmarking purposes.

cachelib/allocator/memory/Slab.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ namespace cachelib {
5050
* independantly by the SlabAllocator.
5151
*/
5252

53+
// identifier for the memory tier
54+
using TierId = int8_t;
5355
// identifier for the memory pool
5456
using PoolId = int8_t;
5557
// identifier for the allocation class

cachelib/allocator/memory/SlabAllocator.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ using PtrType = CompressedPtr::PtrType;
4848
constexpr uint64_t SlabAllocator::kAddressMask;
4949
constexpr PtrType CompressedPtr::kAllocIdxMask;
5050
constexpr unsigned int CompressedPtr::kNumAllocIdxBits;
51-
constexpr unsigned int CompressedPtr::kNumSlabIdxBits;
5251

5352
constexpr unsigned int SlabAllocator::kLockSleepMS;
5453
constexpr size_t SlabAllocator::kPagesPerStep;

cachelib/allocator/memory/SlabAllocator.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,8 @@ class SlabAllocator {
225225
// the corresponding memory allocator. trying to inline this just increases
226226
// the code size and does not move the needle on the benchmarks much.
227227
// Calling this with invalid input in optimized build is undefined behavior.
228-
CompressedPtr CACHELIB_INLINE compress(const void* ptr) const {
228+
CompressedPtr CACHELIB_INLINE compress(const void* ptr,
229+
bool isMultiTiered) const {
229230
if (ptr == nullptr) {
230231
return CompressedPtr{};
231232
}
@@ -246,18 +247,23 @@ class SlabAllocator {
246247
static_cast<uint32_t>(reinterpret_cast<const uint8_t*>(ptr) -
247248
reinterpret_cast<const uint8_t*>(slab)) /
248249
allocSize;
249-
return CompressedPtr{slabIndex, allocIdx};
250+
return CompressedPtr{slabIndex, allocIdx, isMultiTiered};
250251
}
251252

252253
// uncompress the point and return the raw ptr. This function never throws
253254
// in optimized build and assumes that the caller is responsible for calling
254255
// it with a valid compressed pointer.
255-
void* CACHELIB_INLINE unCompress(const CompressedPtr ptr) const {
256+
void* CACHELIB_INLINE unCompress(const CompressedPtr ptr,
257+
bool isMultiTiered) const {
256258
if (ptr.isNull()) {
257259
return nullptr;
258260
}
259261

260-
const SlabIdx slabIndex = ptr.getSlabIdx();
262+
/* TODO: isMultiTiered set to false by default.
263+
Multi-tiering flag will have no impact till
264+
rest of the multi-tiering changes are merged.
265+
*/
266+
const SlabIdx slabIndex = ptr.getSlabIdx(isMultiTiered);
261267
const uint32_t allocIdx = ptr.getAllocIdx();
262268
const Slab* slab = &slabMemoryStart_[slabIndex];
263269

cachelib/allocator/memory/tests/MemoryAllocatorTest.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,13 +401,28 @@ TEST_F(MemoryAllocatorTest, PointerCompression) {
401401
for (const auto& pool : poolAllocs) {
402402
const auto& allocs = pool.second;
403403
for (const auto* alloc : allocs) {
404-
CompressedPtr ptr = m.compress(alloc);
404+
CompressedPtr ptr = m.compress(alloc, false /* isMultiTiered */);
405405
ASSERT_FALSE(ptr.isNull());
406-
ASSERT_EQ(alloc, m.unCompress(ptr));
406+
ASSERT_EQ(alloc, m.unCompress(ptr, false /* isMultiTiered */));
407407
}
408408
}
409409

410-
ASSERT_EQ(nullptr, m.unCompress(m.compress(nullptr)));
410+
ASSERT_EQ(nullptr,
411+
m.unCompress(m.compress(nullptr, false /* isMultiTiered */),
412+
false /* isMultiTiered */));
413+
414+
// test pointer compression with multi-tier
415+
for (const auto& pool : poolAllocs) {
416+
const auto& allocs = pool.second;
417+
for (const auto* alloc : allocs) {
418+
CompressedPtr ptr = m.compress(alloc, true /* isMultiTiered */);
419+
ASSERT_FALSE(ptr.isNull());
420+
ASSERT_EQ(alloc, m.unCompress(ptr, true /* isMultiTiered */));
421+
}
422+
}
423+
424+
ASSERT_EQ(nullptr, m.unCompress(m.compress(nullptr, true /* isMultiTiered */),
425+
true /* isMultiTiered */));
411426
}
412427

413428
TEST_F(MemoryAllocatorTest, Restorable) {

cachelib/benchmarks/PtrCompressionBench.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ void buildAllocs(size_t poolSize) {
6161
void* alloc = ma->allocate(pid, size);
6262
XDCHECK_GE(size, CompressedPtr::getMinAllocSize());
6363
if (alloc != nullptr) {
64-
validAllocs.push_back({alloc, ma->compress(alloc)});
64+
validAllocs.push_back(
65+
{alloc, ma->compress(alloc, false /* isMultiTiered */)});
6566
validAllocsAlt.push_back({alloc, ma->compressAlt(alloc)});
6667
numAllocations++;
6768
}
@@ -83,7 +84,7 @@ BENCHMARK(CompressionAlt) {
8384

8485
BENCHMARK_RELATIVE(Compression) {
8586
for (const auto& alloc : validAllocs) {
86-
CompressedPtr c = m->compress(alloc.first);
87+
CompressedPtr c = m->compress(alloc.first, false /* isMultiTiered */);
8788
folly::doNotOptimizeAway(c);
8889
}
8990
}
@@ -97,7 +98,7 @@ BENCHMARK(DeCompressAlt) {
9798

9899
BENCHMARK_RELATIVE(DeCompress) {
99100
for (const auto& alloc : validAllocs) {
100-
void* ptr = m->unCompress(alloc.second);
101+
void* ptr = m->unCompress(alloc.second, false /* isMultiTiered */);
101102
folly::doNotOptimizeAway(ptr);
102103
}
103104
}

0 commit comments

Comments
 (0)