diff --git a/.github/workflows/build-cachelib-centos-8-5.yml b/.github/workflows/build-cachelib-centos-8-5.yml
index 5dade56439..fcb3129b22 100644
--- a/.github/workflows/build-cachelib-centos-8-5.yml
+++ b/.github/workflows/build-cachelib-centos-8-5.yml
@@ -13,11 +13,6 @@
# limitations under the License.
name: build-cachelib-centos-8.5
on:
- push:
- tags:
- - 'v*'
- pull_request:
- workflow_dispatch:
schedule:
- cron: '0 9 * * *'
jobs:
diff --git a/.github/workflows/build-cachelib-centos-long.yml b/.github/workflows/build-cachelib-centos-long.yml
new file mode 100644
index 0000000000..92165f603b
--- /dev/null
+++ b/.github/workflows/build-cachelib-centos-long.yml
@@ -0,0 +1,39 @@
+name: build-cachelib-centos-latest
+on:
+ schedule:
+ - cron: '0 7 * * *'
+
+jobs:
+ build-cachelib-centos8-latest:
+ name: "CentOS/latest - Build CacheLib with all dependencies"
+ runs-on: ubuntu-latest
+ # Docker container image name
+ container: "centos:latest"
+ steps:
+ - name: "update packages"
+ run: dnf upgrade -y
+ - name: "install sudo,git"
+ run: dnf install -y sudo git cmake gcc
+ - name: "System Information"
+ run: |
+ echo === uname ===
+ uname -a
+ echo === /etc/os-release ===
+ cat /etc/os-release
+ echo === df -hl ===
+ df -hl
+ echo === free -h ===
+ free -h
+ echo === top ===
+ top -b -n1 -1 -Eg || timeout 1 top -b -n1
+ echo === env ===
+ env
+ echo === gcc -v ===
+ gcc -v
+ - name: "checkout sources"
+ uses: actions/checkout@v2
+ - name: "build CacheLib using build script"
+ run: ./contrib/build.sh -j -v -T
+ - name: "run tests"
+ timeout-minutes: 60
+ run: cd opt/cachelib/tests && ../../../run_tests.sh long
diff --git a/.github/workflows/build-cachelib-debian.yml b/.github/workflows/build-cachelib-debian.yml
new file mode 100644
index 0000000000..5bc3ad3c70
--- /dev/null
+++ b/.github/workflows/build-cachelib-debian.yml
@@ -0,0 +1,43 @@
+name: build-cachelib-debian-10
+on:
+ schedule:
+ - cron: '30 5 * * 0,3'
+
+jobs:
+ build-cachelib-debian-10:
+ name: "Debian/Buster - Build CacheLib with all dependencies"
+ runs-on: ubuntu-latest
+ # Docker container image name
+ container: "debian:buster-slim"
+ steps:
+ - name: "update packages"
+ run: apt-get update
+ - name: "upgrade packages"
+ run: apt-get -y upgrade
+ - name: "install sudo,git"
+ run: apt-get install -y sudo git procps
+ - name: "System Information"
+ run: |
+ echo === uname ===
+ uname -a
+ echo === /etc/os-release ===
+ cat /etc/os-release
+ echo === df -hl ===
+ df -hl
+ echo === free -h ===
+ free -h
+ echo === top ===
+ top -b -n1 -1 -Eg || timeout 1 top -b -n1 ; true
+ echo === env ===
+ env
+ echo === cc -v ===
+ cc -v || true
+ echo === g++ -v ===
+ g++ - || true
+ - name: "checkout sources"
+ uses: actions/checkout@v2
+ - name: "build CacheLib using build script"
+ run: ./contrib/build.sh -j -v -T
+ - name: "run tests"
+ timeout-minutes: 60
+ run: cd opt/cachelib/tests && ../../../run_tests.sh
diff --git a/.github/workflows/build-cachelib-docker.yml b/.github/workflows/build-cachelib-docker.yml
new file mode 100644
index 0000000000..c5105cca06
--- /dev/null
+++ b/.github/workflows/build-cachelib-docker.yml
@@ -0,0 +1,50 @@
+name: build-cachelib-docker
+on:
+ push:
+ pull_request:
+
+jobs:
+ build-cachelib-docker:
+ name: "CentOS/latest - Build CacheLib with all dependencies"
+ runs-on: ubuntu-latest
+ env:
+ REPO: cachelib
+ GITHUB_REPO: intel/CacheLib
+ CONTAINER_REG: ghcr.io/pmem/cachelib
+ CONTAINER_REG_USER: ${{ secrets.GH_CR_USER }}
+ CONTAINER_REG_PASS: ${{ secrets.GH_CR_PAT }}
+ FORCE_IMAGE_ACTION: ${{ secrets.FORCE_IMAGE_ACTION }}
+ HOST_WORKDIR: ${{ github.workspace }}
+ WORKDIR: docker
+ IMG_VER: devel
+ strategy:
+ matrix:
+ CONFIG: ["OS=centos OS_VER=8streams PUSH_IMAGE=1"]
+ steps:
+ - name: "System Information"
+ run: |
+ echo === uname ===
+ uname -a
+ echo === /etc/os-release ===
+ cat /etc/os-release
+ echo === df -hl ===
+ df -hl
+ echo === free -h ===
+ free -h
+ echo === top ===
+ top -b -n1 -1 -Eg || timeout 1 top -b -n1
+ echo === env ===
+ env
+ echo === gcc -v ===
+ gcc -v
+ - name: "checkout sources"
+ uses: actions/checkout@v2
+ with:
+ submodules: recursive
+ fetch-depth: 0
+
+ - name: Pull the image or rebuild and push it
+ run: cd $WORKDIR && ${{ matrix.CONFIG }} ./pull-or-rebuild-image.sh rebuild
+
+ - name: Run the build
+ run: cd $WORKDIR && ${{ matrix.CONFIG }} ./build.sh
diff --git a/MultiTierDataMovement.md b/MultiTierDataMovement.md
new file mode 100644
index 0000000000..cccc14b947
--- /dev/null
+++ b/MultiTierDataMovement.md
@@ -0,0 +1,90 @@
+# Background Data Movement
+
+In order to reduce the number of online evictions and support asynchronous
+promotion - we have added two periodic workers to handle eviction and promotion.
+
+The diagram below shows a simplified version of how the background evictor
+thread (green) is integrated to the CacheLib architecture.
+
+
+
+
+
+## Background Evictors
+
+The background evictors scan each class to see if there are objects to move the next (lower)
+tier using a given strategy. Here we document the parameters for the different
+strategies and general parameters.
+
+- `backgroundEvictorIntervalMilSec`: The interval that this thread runs for - by default
+the background evictor threads will wake up every 10 ms to scan the AllocationClasses. Also,
+the background evictor thread will be woken up everytime there is a failed allocation (from
+a request handling thread) and the current percentage of free memory for the
+AllocationClass is lower than `lowEvictionAcWatermark`. This may render the interval parameter
+not as important when there are many allocations occuring from request handling threads.
+
+- `evictorThreads`: The number of background evictors to run - each thread is a assigned
+a set of AllocationClasses to scan and evict objects from. Currently, each thread gets
+an equal number of classes to scan - but as object size distribution may be unequal - future
+versions will attempt to balance the classes among threads. The range is 1 to number of AllocationClasses.
+The default is 1.
+
+- `maxEvictionBatch`: The number of objects to remove in a given eviction call. The
+default is 40. Lower range is 10 and the upper range is 1000. Too low and we might not
+remove objects at a reasonable rate, too high and it might increase contention with user threads.
+
+- `minEvictionBatch`: Minimum number of items to evict at any time (if there are any
+candidates)
+
+- `maxEvictionPromotionHotness`: Maximum candidates to consider for eviction. This is similar to `maxEvictionBatch`
+but it specifies how many candidates will be taken into consideration, not the actual number of items to evict.
+This option can be used to configure duration of critical section on LRU lock.
+
+
+### FreeThresholdStrategy (default)
+
+- `lowEvictionAcWatermark`: Triggers background eviction thread to run
+when this percentage of the AllocationClass is free.
+The default is `2.0`, to avoid wasting capacity we don't set this above `10.0`.
+
+- `highEvictionAcWatermark`: Stop the evictions from an AllocationClass when this
+percentage of the AllocationClass is free. The default is `5.0`, to avoid wasting capacity we
+don't set this above `10`.
+
+
+## Background Promoters
+
+The background promoters scan each class to see if there are objects to move to a lower
+tier using a given strategy. Here we document the parameters for the different
+strategies and general parameters.
+
+- `backgroundPromoterIntervalMilSec`: The interval that this thread runs for - by default
+the background promoter threads will wake up every 10 ms to scan the AllocationClasses for
+objects to promote.
+
+- `promoterThreads`: The number of background promoters to run - each thread is a assigned
+a set of AllocationClasses to scan and promote objects from. Currently, each thread gets
+an equal number of classes to scan - but as object size distribution may be unequal - future
+versions will attempt to balance the classes among threads. The range is `1` to number of AllocationClasses. The default is `1`.
+
+- `maxProtmotionBatch`: The number of objects to promote in a given promotion call. The
+default is 40. Lower range is 10 and the upper range is 1000. Too low and we might not
+remove objects at a reasonable rate, too high and it might increase contention with user threads.
+
+- `minPromotionBatch`: Minimum number of items to promote at any time (if there are any
+candidates)
+
+- `numDuplicateElements`: This allows us to promote items that have existing handles (read-only) since
+we won't need to modify the data when a user is done with the data. Therefore, for a short time
+the data could reside in both tiers until it is evicted from its current tier. The default is to
+not allow this (0). Setting the value to 100 will enable duplicate elements in tiers.
+
+### Background Promotion Strategy (only one currently)
+
+- `promotionAcWatermark`: Promote items if there is at least this
+percent of free AllocationClasses. Promotion thread will attempt to move `maxPromotionBatch` number of objects
+to that tier. The objects are chosen from the head of the LRU. The default is `4.0`.
+This value should correlate with `lowEvictionAcWatermark`, `highEvictionAcWatermark`, `minAcAllocationWatermark`, `maxAcAllocationWatermark`.
+- `maxPromotionBatch`: The number of objects to promote in batch during BG promotion. Analogous to
+`maxEvictionBatch`. It's value should be lower to decrease contention on hot items.
+
diff --git a/cachelib/CMakeLists.txt b/cachelib/CMakeLists.txt
index 506ba66bcf..32b2859e44 100644
--- a/cachelib/CMakeLists.txt
+++ b/cachelib/CMakeLists.txt
@@ -85,6 +85,11 @@ set(CMAKE_MODULE_PATH
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED True)
+if(COVERAGE_ENABLED)
+ # Add code coverage
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage -fprofile-arcs -ftest-coverage")
+endif()
+
# include(fb_cxx_flags)
message(STATUS "Update CXXFLAGS: ${CMAKE_CXX_FLAGS}")
diff --git a/cachelib/allocator/BackgroundMover-inl.h b/cachelib/allocator/BackgroundMover-inl.h
index 429dd6b65d..6d3c58ea44 100644
--- a/cachelib/allocator/BackgroundMover-inl.h
+++ b/cachelib/allocator/BackgroundMover-inl.h
@@ -49,8 +49,8 @@ template
void BackgroundMover::setAssignedMemory(
std::vector&& assignedMemory) {
XLOG(INFO, "Class assigned to background worker:");
- for (auto [pid, cid] : assignedMemory) {
- XLOGF(INFO, "Pid: {}, Cid: {}", pid, cid);
+ for (auto [tid, pid, cid] : assignedMemory) {
+ XLOGF(INFO, "Tid: {}, Pid: {}, Cid: {}", tid, pid, cid);
}
mutex_.lock_combine([this, &assignedMemory] {
@@ -65,25 +65,28 @@ void BackgroundMover::checkAndRun() {
auto assignedMemory = mutex_.lock_combine([this] { return assignedMemory_; });
unsigned int moves = 0;
+ std::set classes{};
auto batches = strategy_->calculateBatchSizes(cache_, assignedMemory);
for (size_t i = 0; i < batches.size(); i++) {
- const auto [pid, cid] = assignedMemory[i];
+ const auto [tid, pid, cid] = assignedMemory[i];
const auto batch = batches[i];
if (batch == 0) {
continue;
}
-
+ classes.insert(cid);
+ const auto& mpStats = cache_.getPoolByTid(pid, tid).getStats();
// try moving BATCH items from the class in order to reach free target
- auto moved = moverFunc(cache_, pid, cid, batch);
+ auto moved = moverFunc(cache_, tid, pid, cid, batch);
moves += moved;
- movesPerClass_[pid][cid] += moved;
- totalBytesMoved_.add(moved * cache_.getPool(pid).getAllocSizes()[cid]);
+ moves_per_class_[tid][pid][cid] += moved;
+ totalBytesMoved_.add(moved * mpStats.acStats.at(cid).allocSize );
}
numTraversals_.inc();
numMovedItems_.add(moves);
+ totalClasses_.add(classes.size());
}
template
@@ -92,24 +95,26 @@ BackgroundMoverStats BackgroundMover::getStats() const noexcept {
stats.numMovedItems = numMovedItems_.get();
stats.runCount = numTraversals_.get();
stats.totalBytesMoved = totalBytesMoved_.get();
+ stats.totalClasses = totalClasses_.get();
return stats;
}
template
-std::map>
+std::map>>
BackgroundMover::getClassStats() const noexcept {
- return movesPerClass_;
+ return moves_per_class_;
}
template
-size_t BackgroundMover::workerId(PoolId pid,
+size_t BackgroundMover::workerId(TierId tid,
+ PoolId pid,
ClassId cid,
size_t numWorkers) {
XDCHECK(numWorkers);
// TODO: came up with some better sharding (use hashing?)
- return (pid + cid) % numWorkers;
+ return (tid + pid + cid) % numWorkers;
}
} // namespace cachelib
diff --git a/cachelib/allocator/BackgroundMover.h b/cachelib/allocator/BackgroundMover.h
index a423129985..7fb88770c3 100644
--- a/cachelib/allocator/BackgroundMover.h
+++ b/cachelib/allocator/BackgroundMover.h
@@ -29,17 +29,19 @@ namespace cachelib {
template
struct BackgroundMoverAPIWrapper {
static size_t traverseAndEvictItems(C& cache,
+ unsigned int tid,
unsigned int pid,
unsigned int cid,
size_t batch) {
- return cache.traverseAndEvictItems(pid, cid, batch);
+ return cache.traverseAndEvictItems(tid, pid, cid, batch);
}
static size_t traverseAndPromoteItems(C& cache,
+ unsigned int tid,
unsigned int pid,
unsigned int cid,
size_t batch) {
- return cache.traverseAndPromoteItems(pid, cid, batch);
+ return cache.traverseAndPromoteItems(tid, pid, cid, batch);
}
};
@@ -62,16 +64,18 @@ class BackgroundMover : public PeriodicWorker {
~BackgroundMover() override;
BackgroundMoverStats getStats() const noexcept;
- std::map> getClassStats() const noexcept;
+ std::map>>
+ getClassStats() const noexcept;
void setAssignedMemory(std::vector&& assignedMemory);
// return id of the worker responsible for promoting/evicting from particlar
// pool and allocation calss (id is in range [0, numWorkers))
- static size_t workerId(PoolId pid, ClassId cid, size_t numWorkers);
+ static size_t workerId(TierId tid, PoolId pid, ClassId cid, size_t numWorkers);
private:
- std::map> movesPerClass_;
+ std::map>>
+ moves_per_class_;
// cache allocator's interface for evicting
using Item = typename Cache::Item;
@@ -79,7 +83,9 @@ class BackgroundMover : public PeriodicWorker {
std::shared_ptr strategy_;
MoverDir direction_;
- std::function moverFunc;
+ std::function
+ moverFunc;
// implements the actual logic of running the background evictor
void work() override final;
@@ -87,6 +93,7 @@ class BackgroundMover : public PeriodicWorker {
AtomicCounter numMovedItems_{0};
AtomicCounter numTraversals_{0};
+ AtomicCounter totalClasses_{0};
AtomicCounter totalBytesMoved_{0};
std::vector assignedMemory_;
diff --git a/cachelib/allocator/BackgroundMoverStrategy.h b/cachelib/allocator/BackgroundMoverStrategy.h
index abf37edd13..14bde15908 100644
--- a/cachelib/allocator/BackgroundMoverStrategy.h
+++ b/cachelib/allocator/BackgroundMoverStrategy.h
@@ -22,7 +22,9 @@ namespace facebook {
namespace cachelib {
struct MemoryDescriptorType {
- MemoryDescriptorType(PoolId pid, ClassId cid) : pid_(pid), cid_(cid) {}
+ MemoryDescriptorType(TierId tid, PoolId pid, ClassId cid) :
+ tid_(tid), pid_(pid), cid_(cid) {}
+ TierId tid_;
PoolId pid_;
ClassId cid_;
};
diff --git a/cachelib/allocator/CMakeLists.txt b/cachelib/allocator/CMakeLists.txt
index 6103cdc823..0f96a0cd7f 100644
--- a/cachelib/allocator/CMakeLists.txt
+++ b/cachelib/allocator/CMakeLists.txt
@@ -55,6 +55,7 @@ add_library (cachelib_allocator
PoolOptimizeStrategy.cpp
PoolRebalancer.cpp
PoolResizer.cpp
+ PrivateMemoryManager.cpp
RebalanceStrategy.cpp
SlabReleaseStats.cpp
TempShmMapping.cpp
diff --git a/cachelib/allocator/Cache.cpp b/cachelib/allocator/Cache.cpp
index 37bba99a67..8d958b3510 100644
--- a/cachelib/allocator/Cache.cpp
+++ b/cachelib/allocator/Cache.cpp
@@ -244,6 +244,7 @@ void CacheBase::updateGlobalCacheStats(const std::string& statPrefix) const {
statPrefix + "cache.size.configured",
memStats.configuredRamCacheSize + memStats.nvmCacheSize);
+ //TODO: add specific per-tier counters
const auto stats = getGlobalCacheStats();
// Eviction Stats
@@ -253,7 +254,8 @@ void CacheBase::updateGlobalCacheStats(const std::string& statPrefix) const {
// from both ram and nvm, this is counted as a single eviction from cache.
// Ram Evictions: item evicted from ram but it can be inserted into nvm
const std::string ramEvictionKey = statPrefix + "ram.evictions";
- counters_.updateDelta(ramEvictionKey, stats.numEvictions);
+ counters_.updateDelta(ramEvictionKey,
+ std::accumulate(stats.numEvictions.begin(), stats.numEvictions.end(), 0));
// Nvm Evictions: item evicted from nvm but it can be still in ram
const std::string nvmEvictionKey = statPrefix + "nvm.evictions";
counters_.updateDelta(nvmEvictionKey, stats.numNvmEvictions);
@@ -295,11 +297,11 @@ void CacheBase::updateGlobalCacheStats(const std::string& statPrefix) const {
}
counters_.updateDelta(statPrefix + "cache.alloc_attempts",
- stats.allocAttempts);
+ std::accumulate(stats.allocAttempts.begin(), stats.allocAttempts.end(),0));
counters_.updateDelta(statPrefix + "cache.eviction_attempts",
- stats.evictionAttempts);
+ std::accumulate(stats.evictionAttempts.begin(),stats.evictionAttempts.end(),0));
counters_.updateDelta(statPrefix + "cache.alloc_failures",
- stats.allocFailures);
+ std::accumulate(stats.allocFailures.begin(),stats.allocFailures.end(),0));
counters_.updateDelta(statPrefix + "cache.invalid_allocs",
stats.invalidAllocs);
@@ -475,6 +477,10 @@ void CacheBase::updateGlobalCacheStats(const std::string& statPrefix) const {
visitEstimates(uploadStatsNanoToMicro, stats.allocateLatencyNs,
statPrefix + "allocate.latency_us");
+ visitEstimates(uploadStatsNanoToMicro, stats.bgEvictLatencyNs,
+ statPrefix + "background.eviction.latency_us");
+ visitEstimates(uploadStatsNanoToMicro, stats.bgPromoteLatencyNs,
+ statPrefix + "background.promotion.latency_us");
visitEstimates(uploadStatsNanoToMicro, stats.moveChainedLatencyNs,
statPrefix + "move.chained.latency_us");
visitEstimates(uploadStatsNanoToMicro, stats.moveRegularLatencyNs,
diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h
index e225ba8a01..515da3ac47 100644
--- a/cachelib/allocator/Cache.h
+++ b/cachelib/allocator/Cache.h
@@ -85,6 +85,9 @@ class CacheBase {
CacheBase(CacheBase&&) = default;
CacheBase& operator=(CacheBase&&) = default;
+ // TODO: come up with some reasonable number
+ static constexpr unsigned kMaxTiers = 2;
+
// Get a string referring to the cache name for this cache
virtual const std::string getCacheName() const = 0;
@@ -96,12 +99,24 @@ class CacheBase {
// @param poolId The pool id to query
virtual const MemoryPool& getPool(PoolId poolId) const = 0;
+ // Get the reference to a memory pool using a tier id, for stats purposes
+ //
+ // @param poolId The pool id to query
+ // @param tierId The tier of the pool id
+ virtual const MemoryPool& getPoolByTid(PoolId poolId, TierId tid) const = 0;
+
// Get Pool specific stats (regular pools). This includes stats from the
// Memory Pool and also the cache.
//
// @param poolId the pool id
virtual PoolStats getPoolStats(PoolId poolId) const = 0;
+ // Get Allocation Class specific stats.
+ //
+ // @param poolId the pool id
+ // @param classId the class id
+ virtual ACStats getACStats(TierId tid, PoolId poolId, ClassId classId) const = 0;
+
// @param poolId the pool id
virtual AllSlabReleaseEvents getAllSlabReleaseEvents(PoolId poolId) const = 0;
diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h
index 75921ad11a..888a03dc40 100644
--- a/cachelib/allocator/CacheAllocator-inl.h
+++ b/cachelib/allocator/CacheAllocator-inl.h
@@ -16,6 +16,8 @@
#pragma once
+#include
+
namespace facebook {
namespace cachelib {
@@ -35,6 +37,7 @@ CacheAllocator::CacheAllocator(SharedMemNewT, Config config)
template
CacheAllocator::CacheAllocator(SharedMemAttachT, Config config)
: CacheAllocator(InitMemType::kMemAttach, config) {
+ /* TODO - per tier? */
for (auto pid : *metadata_.compactCachePools()) {
isCompactCachePool_[pid] = true;
}
@@ -56,6 +59,9 @@ CacheAllocator::CacheAllocator(
tempShm_(type == InitMemType::kNone && isOnShm_
? std::make_unique(config_.getCacheSize())
: nullptr),
+ privMemManager_(type == InitMemType::kNone && !isOnShm_
+ ? std::make_unique()
+ : nullptr),
shmManager_(type != InitMemType::kNone
? std::make_unique(config_.cacheDir,
config_.isUsingPosixShm())
@@ -67,12 +73,12 @@ CacheAllocator::CacheAllocator(
: serialization::CacheAllocatorMetadata{}},
allocator_(initAllocator(type)),
compactCacheManager_(type != InitMemType::kMemAttach
- ? std::make_unique(*allocator_)
- : restoreCCacheManager()),
+ ? std::make_unique(*allocator_[0] /* TODO: per tier */)
+ : restoreCCacheManager(0/* TODO: per tier */)),
compressor_(createPtrCompressor()),
mmContainers_(type == InitMemType::kMemAttach
? deserializeMMContainers(*deserializer_, compressor_)
- : MMContainers{}),
+ : MMContainers{getNumTiers()}),
accessContainer_(initAccessContainer(
type, detail::kShmHashTableName, config.accessConfig)),
chainedItemAccessContainer_(
@@ -107,48 +113,115 @@ CacheAllocator::~CacheAllocator() {
}
template
-ShmSegmentOpts CacheAllocator::createShmCacheOpts() {
+ShmSegmentOpts CacheAllocator::createShmCacheOpts(TierId tid) {
ShmSegmentOpts opts;
opts.alignment = sizeof(Slab);
// TODO: we support single tier so far
- if (config_.memoryTierConfigs.size() > 1) {
- throw std::invalid_argument("CacheLib only supports a single memory tier");
+ if (config_.memoryTierConfigs.size() > 2) {
+ throw std::invalid_argument("CacheLib only supports two memory tiers");
}
- opts.memBindNumaNodes = config_.memoryTierConfigs[0].getMemBind();
+ opts.memBindNumaNodes = config_.memoryTierConfigs[tid].getMemBind();
+ return opts;
+}
+
+template
+PrivateSegmentOpts CacheAllocator::createPrivateSegmentOpts(TierId tid) {
+ PrivateSegmentOpts opts;
+ opts.alignment = sizeof(Slab);
+ auto memoryTierConfigs = config_.getMemoryTierConfigs();
+ opts.memBindNumaNodes = memoryTierConfigs[tid].getMemBind();
+
return opts;
}
+template
+size_t CacheAllocator::memoryTierSize(TierId tid) const {
+ auto& memoryTierConfigs = config_.memoryTierConfigs;
+ auto partitions = std::accumulate(memoryTierConfigs.begin(), memoryTierConfigs.end(), 0UL,
+ [](const size_t i, const MemoryTierCacheConfig& config){
+ return i + config.getRatio();
+ });
+
+ return memoryTierConfigs[tid].calculateTierSize(config_.getCacheSize(), partitions);
+}
+
+template
+std::unique_ptr
+CacheAllocator::createPrivateAllocator(TierId tid) {
+ if (isOnShm_)
+ return std::make_unique(
+ getAllocatorConfig(config_),
+ tempShm_->getAddr(),
+ memoryTierSize(tid));
+ else
+ return std::make_unique(
+ getAllocatorConfig(config_),
+ privMemManager_->createMapping(config_.size, createPrivateSegmentOpts(tid)),
+ memoryTierSize(tid));
+}
+
template
std::unique_ptr
-CacheAllocator::createNewMemoryAllocator() {
+CacheAllocator::createNewMemoryAllocator(TierId tid) {
+ size_t tierSize = memoryTierSize(tid);
return std::make_unique(
getAllocatorConfig(config_),
shmManager_
- ->createShm(detail::kShmCacheName, config_.getCacheSize(),
- config_.slabMemoryBaseAddr, createShmCacheOpts())
+ ->createShm(detail::kShmCacheName + std::to_string(tid),
+ tierSize, config_.slabMemoryBaseAddr,
+ createShmCacheOpts(tid))
.addr,
- config_.getCacheSize());
+ tierSize);
}
template
std::unique_ptr
-CacheAllocator::restoreMemoryAllocator() {
+CacheAllocator::restoreMemoryAllocator(TierId tid) {
return std::make_unique(
deserializer_->deserialize(),
shmManager_
- ->attachShm(detail::kShmCacheName, config_.slabMemoryBaseAddr,
- createShmCacheOpts())
- .addr,
- config_.getCacheSize(),
+ ->attachShm(detail::kShmCacheName + std::to_string(tid),
+ config_.slabMemoryBaseAddr, createShmCacheOpts(tid)).addr,
+ memoryTierSize(tid),
config_.disableFullCoredump);
}
+template
+std::vector>
+CacheAllocator::createPrivateAllocators() {
+ std::vector> allocators;
+ for (int tid = 0; tid < getNumTiers(); tid++) {
+ allocators.emplace_back(createPrivateAllocator(tid));
+ }
+ return allocators;
+}
+
+template
+std::vector>
+CacheAllocator::createAllocators() {
+ std::vector> allocators;
+ for (int tid = 0; tid < getNumTiers(); tid++) {
+ allocators.emplace_back(createNewMemoryAllocator(tid));
+ }
+ return allocators;
+}
+
+template
+std::vector>
+CacheAllocator::restoreAllocators() {
+ std::vector> allocators;
+ for (int tid = 0; tid < getNumTiers(); tid++) {
+ allocators.emplace_back(restoreMemoryAllocator(tid));
+ }
+ return allocators;
+}
+
template
std::unique_ptr
-CacheAllocator::restoreCCacheManager() {
+CacheAllocator::restoreCCacheManager(TierId tid) {
return std::make_unique(
deserializer_->deserialize(),
- *allocator_);
+ *allocator_[tid]);
}
template
@@ -252,21 +325,15 @@ void CacheAllocator::initWorkers() {
}
template
-std::unique_ptr CacheAllocator::initAllocator(
+std::vector>
+CacheAllocator::initAllocator(
InitMemType type) {
if (type == InitMemType::kNone) {
- if (isOnShm_ == true) {
- return std::make_unique(getAllocatorConfig(config_),
- tempShm_->getAddr(),
- config_.getCacheSize());
- } else {
- return std::make_unique(getAllocatorConfig(config_),
- config_.getCacheSize());
- }
+ return createPrivateAllocators();
} else if (type == InitMemType::kMemNew) {
- return createNewMemoryAllocator();
+ return createAllocators();
} else if (type == InitMemType::kMemAttach) {
- return restoreMemoryAllocator();
+ return restoreAllocators();
}
// Invalid type
@@ -334,42 +401,49 @@ CacheAllocator::allocate(PoolId poolId,
}
template
-bool CacheAllocator::shouldWakeupBgEvictor(PoolId /* pid */,
+bool CacheAllocator::shouldWakeupBgEvictor(TierId tid,
+ PoolId /* pid */,
ClassId /* cid */) {
return false;
}
template
typename CacheAllocator::WriteHandle
-CacheAllocator::allocateInternal(PoolId pid,
- typename Item::Key key,
- uint32_t size,
- uint32_t creationTime,
- uint32_t expiryTime,
- bool fromBgThread) {
- util::LatencyTracker tracker{stats().allocateLatency_};
-
+CacheAllocator::allocateInternalTier(TierId tid,
+ PoolId pid,
+ typename Item::Key key,
+ uint32_t size,
+ uint32_t creationTime,
+ uint32_t expiryTime,
+ bool fromBgThread,
+ bool evict) {
+ util::LatencyTracker tracker{stats().allocateLatency_, static_cast(!fromBgThread)};
SCOPE_FAIL { stats_.invalidAllocs.inc(); };
// number of bytes required for this item
const auto requiredSize = Item::getRequiredSize(key, size);
// the allocation class in our memory allocator.
- const auto cid = allocator_->getAllocationClassId(pid, requiredSize);
+ const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize);
- (*stats_.allocAttempts)[pid][cid].inc();
+ util::RollingLatencyTracker rollTracker{(*stats_.classAllocLatency)[tid][pid][cid]};
- void* memory = allocator_->allocate(pid, requiredSize);
+ (*stats_.allocAttempts)[tid][pid][cid].inc();
+
+ void* memory = allocator_[tid]->allocate(pid, requiredSize);
if (backgroundEvictor_.size() && !fromBgThread &&
- (memory == nullptr || shouldWakeupBgEvictor(pid, cid))) {
+ (memory == nullptr || shouldWakeupBgEvictor(tid, pid, cid))) {
backgroundEvictor_[BackgroundMover::workerId(
- pid, cid, backgroundEvictor_.size())]
+ tid, pid, cid, backgroundEvictor_.size())]
->wakeUp();
}
if (memory == nullptr) {
- memory = findEviction(pid, cid);
+ if (!evict) {
+ return {};
+ }
+ memory = findEviction(tid, pid, cid);
}
WriteHandle handle;
@@ -380,18 +454,18 @@ CacheAllocator::allocateInternal(PoolId pid,
// for example.
SCOPE_FAIL {
// free back the memory to the allocator since we failed.
- allocator_->free(memory);
+ allocator_[tid]->free(memory);
};
handle = acquire(new (memory) Item(key, size, creationTime, expiryTime));
if (handle) {
handle.markNascent();
- (*stats_.fragmentationSize)[pid][cid].add(
+ (*stats_.fragmentationSize)[tid][pid][cid].add(
util::getFragmentation(*this, *handle));
}
} else { // failed to allocate memory.
- (*stats_.allocFailures)[pid][cid].inc();
+ (*stats_.allocFailures)[tid][pid][cid].inc();
// wake up rebalancer
if (!config_.poolRebalancerDisableForcedWakeUp && poolRebalancer_) {
poolRebalancer_->wakeUp();
@@ -408,6 +482,23 @@ CacheAllocator::allocateInternal(PoolId pid,
return handle;
}
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::allocateInternal(PoolId pid,
+ typename Item::Key key,
+ uint32_t size,
+ uint32_t creationTime,
+ uint32_t expiryTime,
+ bool fromBgThread) {
+ auto tid = 0; /* TODO: consult admission policy */
+ for(TierId tid = 0; tid < getNumTiers(); ++tid) {
+ bool evict = !config_.insertToFirstFreeTier || tid == getNumTiers() - 1;
+ auto handle = allocateInternalTier(tid, pid, key, size, creationTime, expiryTime, fromBgThread, evict);
+ if (handle) return handle;
+ }
+ return {};
+}
+
template
typename CacheAllocator::WriteHandle
CacheAllocator::allocateChainedItem(const ReadHandle& parent,
@@ -431,6 +522,19 @@ template
typename CacheAllocator::WriteHandle
CacheAllocator::allocateChainedItemInternal(const Item& parent,
uint32_t size) {
+ auto tid = 0; /* TODO: consult admission policy */
+ for(TierId tid = 0; tid < getNumTiers(); ++tid) {
+ auto handle = allocateChainedItemInternalTier(parent, size, tid);
+ if (handle) return handle;
+ }
+ return {};
+}
+
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::allocateChainedItemInternalTier(const Item& parent,
+ uint32_t size,
+ TierId tid) {
util::LatencyTracker tracker{stats().allocateLatency_};
SCOPE_FAIL { stats_.invalidAllocs.inc(); };
@@ -438,28 +542,32 @@ CacheAllocator::allocateChainedItemInternal(const Item& parent,
// number of bytes required for this item
const auto requiredSize = ChainedItem::getRequiredSize(size);
- const auto pid = allocator_->getAllocInfo(parent.getMemory()).poolId;
- const auto cid = allocator_->getAllocationClassId(pid, requiredSize);
+ const auto ptid = getTierId(parent); //it is okay because pools/classes are duplicated among the tiers
+ const auto pid = allocator_[ptid]->getAllocInfo(parent.getMemory()).poolId;
+ const auto cid = allocator_[ptid]->getAllocationClassId(pid, requiredSize);
- (*stats_.allocAttempts)[pid][cid].inc();
+ util::RollingLatencyTracker rollTracker{
+ (*stats_.classAllocLatency)[tid][pid][cid]};
+
+ (*stats_.allocAttempts)[tid][pid][cid].inc();
- void* memory = allocator_->allocate(pid, requiredSize);
+ void* memory = allocator_[tid]->allocate(pid, requiredSize);
if (memory == nullptr) {
- memory = findEviction(pid, cid);
+ memory = findEviction(tid, pid, cid);
}
if (memory == nullptr) {
- (*stats_.allocFailures)[pid][cid].inc();
+ (*stats_.allocFailures)[tid][pid][cid].inc();
return WriteHandle{};
}
- SCOPE_FAIL { allocator_->free(memory); };
+ SCOPE_FAIL { allocator_[tid]->free(memory); };
auto child = acquire(new (memory) ChainedItem(
compressor_.compress(&parent), size, util::getCurrentTimeSec()));
if (child) {
child.markNascent();
- (*stats_.fragmentationSize)[pid][cid].add(
+ (*stats_.fragmentationSize)[tid][pid][cid].add(
util::getFragmentation(*this, *child));
}
@@ -786,8 +894,8 @@ CacheAllocator::releaseBackToAllocator(Item& it,
throw std::runtime_error(
folly::sformat("cannot release this item: {}", it.toString()));
}
-
- const auto allocInfo = allocator_->getAllocInfo(it.getMemory());
+ const auto tid = getTierId(it);
+ const auto allocInfo = allocator_[tid]->getAllocInfo(it.getMemory());
if (ctx == RemoveContext::kEviction) {
const auto timeNow = util::getCurrentTimeSec();
@@ -798,21 +906,23 @@ CacheAllocator::releaseBackToAllocator(Item& it,
stats_.perPoolEvictionAgeSecs_[allocInfo.poolId].trackValue(refreshTime);
}
- (*stats_.fragmentationSize)[allocInfo.poolId][allocInfo.classId].sub(
+ (*stats_.fragmentationSize)[tid][allocInfo.poolId][allocInfo.classId].sub(
util::getFragmentation(*this, it));
// Chained items can only end up in this place if the user has allocated
// memory for a chained item but has decided not to insert the chained item
// to a parent item and instead drop the chained item handle. In this case,
// we free the chained item directly without calling remove callback.
- if (it.isChainedItem()) {
+ //
+ // Except if we are moving a chained item between tiers -
+ // then it == toRecycle and we will want the normal recycle path
+ if (it.isChainedItem() && &it != toRecycle) {
if (toRecycle) {
throw std::runtime_error(
folly::sformat("Can not recycle a chained item {}, toRecyle",
it.toString(), toRecycle->toString()));
}
-
- allocator_->free(&it);
+ allocator_[tid]->free(&it);
return ReleaseRes::kReleased;
}
@@ -879,25 +989,28 @@ CacheAllocator::releaseBackToAllocator(Item& it,
while (head) {
auto next = head->getNext(compressor_);
-
+ const auto tid = getTierId(head);
const auto childInfo =
- allocator_->getAllocInfo(static_cast(head));
- (*stats_.fragmentationSize)[childInfo.poolId][childInfo.classId].sub(
+ allocator_[tid]->getAllocInfo(static_cast(head));
+ (*stats_.fragmentationSize)[tid][childInfo.poolId][childInfo.classId].sub(
util::getFragmentation(*this, *head));
removeFromMMContainer(*head);
+ XDCHECK(!head->isMoving());
// No other thread can access any of the chained items by this point,
// so the refcount for each chained item must be equal to 1. Since
// we use 1 to mark an item as being linked to a parent item.
const auto childRef = head->decRef();
XDCHECK_EQ(0u, childRef);
+ // Item is not moving and refcount is 0, we can proceed to
+ // free it or recylce the memory
if (head == toRecycle) {
XDCHECK(ReleaseRes::kReleased != res);
res = ReleaseRes::kRecycled;
} else {
- allocator_->free(head);
+ allocator_[tid]->free(head);
}
stats_.numChainedChildItems.dec();
@@ -907,11 +1020,12 @@ CacheAllocator::releaseBackToAllocator(Item& it,
}
if (&it == toRecycle) {
+ XDCHECK_EQ(it.getRefCount(),0u);
XDCHECK(ReleaseRes::kReleased != res);
res = ReleaseRes::kRecycled;
} else {
XDCHECK(it.isDrained());
- allocator_->free(&it);
+ allocator_[tid]->free(&it);
}
return res;
@@ -1155,7 +1269,7 @@ CacheAllocator::insertOrReplace(const WriteHandle& handle) {
* Concurrent threads which are getting handle to the same key:
* 1. When a handle is created it checks if the moving flag is set
* 2. If so, Handle implementation creates waitContext and adds it to the
- * MoveCtx by calling handleWithWaitContextForMovingItem() method.
+ * MoveCtx by calling tryGetHandleWithWaitContextForMovingItem() method.
* 3. Wait until the moving thread will complete its job.
*/
template
@@ -1215,14 +1329,19 @@ bool CacheAllocator::moveRegularItem(Item& oldItem,
newItemHdl->markNvmClean();
}
- // Execute the move callback. We cannot make any guarantees about the
- // consistency of the old item beyond this point, because the callback can
- // do more than a simple memcpy() e.g. update external references. If there
- // are any remaining handles to the old item, it is the caller's
- // responsibility to invalidate them. The move can only fail after this
- // statement if the old item has been removed or replaced, in which case it
- // should be fine for it to be left in an inconsistent state.
- config_.moveCb(oldItem, *newItemHdl, nullptr);
+ if (config_.moveCb) {
+ // Execute the move callback. We cannot make any guarantees about the
+ // consistency of the old item beyond this point, because the callback can
+ // do more than a simple memcpy() e.g. update external references. If there
+ // are any remaining handles to the old item, it is the caller's
+ // responsibility to invalidate them. The move can only fail after this
+ // statement if the old item has been removed or replaced, in which case it
+ // should be fine for it to be left in an inconsistent state.
+ config_.moveCb(oldItem, *newItemHdl, nullptr);
+ } else {
+ std::memcpy(newItemHdl->getMemory(), oldItem.getMemory(),
+ oldItem.getSize());
+ }
// Adding the item to mmContainer has to succeed since no one can remove the
// item
@@ -1270,14 +1389,19 @@ bool CacheAllocator::moveChainedItem(ChainedItem& oldItem,
auto parentPtr = &parentItem;
- // Execute the move callback. We cannot make any guarantees about the
- // consistency of the old item beyond this point, because the callback can
- // do more than a simple memcpy() e.g. update external references. If there
- // are any remaining handles to the old item, it is the caller's
- // responsibility to invalidate them. The move can only fail after this
- // statement if the old item has been removed or replaced, in which case it
- // should be fine for it to be left in an inconsistent state.
- config_.moveCb(oldItem, *newItemHdl, parentPtr);
+ if (config_.moveCb) {
+ // Execute the move callback. We cannot make any guarantees about the
+ // consistency of the old item beyond this point, because the callback can
+ // do more than a simple memcpy() e.g. update external references. If there
+ // are any remaining handles to the old item, it is the caller's
+ // responsibility to invalidate them. The move can only fail after this
+ // statement if the old item has been removed or replaced, in which case it
+ // should be fine for it to be left in an inconsistent state.
+ config_.moveCb(oldItem, *newItemHdl, parentPtr);
+ } else {
+ std::memcpy(newItemHdl->getMemory(), oldItem.getMemory(),
+ oldItem.getSize());
+ }
// Replace the new item in the position of the old one before both in the
// parent's chain and the MMContainer.
@@ -1315,20 +1439,27 @@ void CacheAllocator::unlinkItemForEviction(Item& it) {
template
std::pair::Item*,
typename CacheAllocator::Item*>
-CacheAllocator::getNextCandidate(PoolId pid,
+CacheAllocator::getNextCandidate(TierId tid,
+ PoolId pid,
ClassId cid,
unsigned int& searchTries) {
typename NvmCacheT::PutToken token;
Item* toRecycle = nullptr;
+ Item* toRecycleParent = nullptr;
Item* candidate = nullptr;
- auto& mmContainer = getMMContainer(pid, cid);
-
- mmContainer.withEvictionIterator([this, pid, cid, &candidate, &toRecycle,
- &searchTries, &mmContainer,
- &token](auto&& itr) {
+ bool isExpired = false;
+ bool chainedItem = false;
+ auto& mmContainer = getMMContainer(tid, pid, cid);
+ bool lastTier = tid+1 >= getNumTiers();
+
+ mmContainer.withEvictionIterator([this, tid, pid, cid, &candidate,
+ &toRecycle, &toRecycleParent,
+ &chainedItem,
+ &searchTries, &mmContainer, &lastTier,
+ &isExpired, &token](auto&& itr) {
if (!itr) {
++searchTries;
- (*stats_.evictionAttempts)[pid][cid].inc();
+ (*stats_.evictionAttempts)[tid][pid][cid].inc();
return;
}
@@ -1336,50 +1467,84 @@ CacheAllocator::getNextCandidate(PoolId pid,
config_.evictionSearchTries > searchTries) &&
itr) {
++searchTries;
- (*stats_.evictionAttempts)[pid][cid].inc();
+ (*stats_.evictionAttempts)[tid][pid][cid].inc();
auto* toRecycle_ = itr.get();
- auto* candidate_ =
- toRecycle_->isChainedItem()
+ bool chainedItem_ = toRecycle_->isChainedItem();
+ Item* toRecycleParent_ = chainedItem_
? &toRecycle_->asChainedItem().getParentItem(compressor_)
- : toRecycle_;
+ : nullptr;
+ // in order to safely check if the expected parent (toRecycleParent_) matches
+ // the current parent on the chained item, we need to take the chained
+ // item lock so we are sure that nobody else will be editing the chain
+ auto l_ = chainedItem_
+ ? chainedItemLocks_.tryLockExclusive(toRecycleParent_->getKey())
+ : decltype(chainedItemLocks_.tryLockExclusive(toRecycle_->getKey()))();
+
+ if (chainedItem_ &&
+ ( !l_ || &toRecycle_->asChainedItem().getParentItem(compressor_)
+ != toRecycleParent_) ) {
+ // Fail moving if we either couldn't acquire the chained item lock,
+ // or if the parent had already been replaced in the meanwhile.
+ ++itr;
+ continue;
+ }
+ Item* candidate_;
+ Item* syncItem_;
+ //sync on the parent item for chained items to move to next tier
+ if (!lastTier && chainedItem_) {
+ syncItem_ = toRecycleParent_;
+ candidate_ = toRecycle_;
+ } else if (lastTier && chainedItem_) {
+ candidate_ = toRecycleParent_;
+ syncItem_ = toRecycleParent_;
+ } else {
+ candidate_ = toRecycle_;
+ syncItem_ = toRecycle_;
+ }
+ // if it's last tier, the item will be evicted
+ // need to create put token before marking it exclusive
+ const bool evictToNvmCache = lastTier && shouldWriteToNvmCache(*candidate_);
- const bool evictToNvmCache = shouldWriteToNvmCache(*candidate_);
- auto putToken = evictToNvmCache
- ? nvmCache_->createPutToken(candidate_->getKey())
- : typename NvmCacheT::PutToken{};
+ auto token_ = evictToNvmCache
+ ? nvmCache_->createPutToken(candidate_->getKey())
+ : typename NvmCacheT::PutToken{};
- if (evictToNvmCache && !putToken.isValid()) {
+ if (evictToNvmCache && !token_.isValid()) {
stats_.evictFailConcurrentFill.inc();
++itr;
continue;
}
- auto markedForEviction = candidate_->markForEviction();
- if (!markedForEviction) {
+ auto marked = (lastTier || candidate_->isExpired()) ? syncItem_->markForEviction() : syncItem_->markMoving();
+ if (!marked) {
if (candidate_->hasChainedItem()) {
stats_.evictFailParentAC.inc();
} else {
stats_.evictFailAC.inc();
}
++itr;
+ XDCHECK_EQ(toRecycle,nullptr);
+ XDCHECK_EQ(candidate,nullptr);
continue;
}
+ XDCHECK(syncItem_->isMoving() || syncItem_->isMarkedForEviction());
+ toRecycleParent = toRecycleParent_;
+ chainedItem = chainedItem_;
// markForEviction to make sure no other thead is evicting the item
- // nor holding a handle to that item
+ // nor holding a handle to that item if this is last tier
+ // since we won't be moving the item to the next tier
toRecycle = toRecycle_;
candidate = candidate_;
- token = std::move(putToken);
-
- // Check if parent changed for chained items - if yes, we cannot
- // remove the child from the mmContainer as we will not be evicting
- // it. We could abort right here, but we need to cleanup in case
- // unmarkForEviction() returns 0 - so just go through normal path.
- if (!toRecycle_->isChainedItem() ||
- &toRecycle->asChainedItem().getParentItem(compressor_) == candidate) {
- mmContainer.remove(itr);
+ isExpired = candidate_->isExpired();
+ token = std::move(token_);
+ if (chainedItem) {
+ XDCHECK(l_);
+ XDCHECK_EQ(toRecycleParent,&toRecycle_->asChainedItem().getParentItem(compressor_));
}
+ mmContainer.remove(itr);
+
return;
}
});
@@ -1390,25 +1555,106 @@ CacheAllocator::getNextCandidate(PoolId pid,
XDCHECK(toRecycle);
XDCHECK(candidate);
- XDCHECK(candidate->isMarkedForEviction());
- unlinkItemForEviction(*candidate);
+ auto evictedToNext = (lastTier || isExpired) ? nullptr
+ : tryEvictToNextMemoryTier(*candidate, false);
+ if (!evictedToNext) {
+ //failed to move a chained item - so evict the entire chain
+ if (candidate->isChainedItem()) {
+ //candidate should be parent now
+ XDCHECK(toRecycleParent->isMoving());
+ XDCHECK_EQ(candidate,toRecycle);
+ candidate = toRecycleParent; //but now we evict the chain and in
+ //doing so recycle the child
+ }
+ //if insertOrReplace was called during move
+ //then candidate will not be accessible (failed replace during tryEvict)
+ // - therefore this was why we failed to
+ // evict to the next tier and insertOrReplace
+ // will remove from NVM cache
+ //however, if candidate is accessible
+ //that means the allocation in the next
+ //tier failed - so we will continue to
+ //evict the item to NVM cache
+ bool failedToReplace = !candidate->isAccessible();
+ if (!token.isValid() && !failedToReplace) {
+ token = createPutToken(*candidate);
+ }
+ // tryEvictToNextMemoryTier can fail if:
+ // a) allocation of the new item fails in that case,
+ // it should be still possible to mark item for eviction.
+ // b) another thread calls insertOrReplace and the item
+ // is no longer accessible
+ //
+ // in case that we are on the last tier, we whould have already marked
+ // as exclusive since we will not be moving the item to the next tier
+ // but rather just evicting all together, no need to
+ // markForEvictionWhenMoving
+ auto ret = (lastTier || isExpired) ? true : candidate->markForEvictionWhenMoving();
+ XDCHECK(ret);
+
+ unlinkItemForEviction(*candidate);
+
+ if (token.isValid() && shouldWriteToNvmCacheExclusive(*candidate)
+ && !failedToReplace) {
+ nvmCache_->put(*candidate, std::move(token));
+ }
+ // wake up any readers that wait for the move to complete
+ // it's safe to do now, as we have the item marked exclusive and
+ // no other reader can be added to the waiters list
+ wakeUpWaiters(candidate->getKey(), {});
- if (token.isValid() && shouldWriteToNvmCacheExclusive(*candidate)) {
- nvmCache_->put(*candidate, std::move(token));
+ } else {
+ XDCHECK(!evictedToNext->isMarkedForEviction() && !evictedToNext->isMoving());
+ XDCHECK(!candidate->isMarkedForEviction() && !candidate->isMoving());
+ XDCHECK(!candidate->isAccessible());
+ XDCHECK(candidate->getKey() == evictedToNext->getKey());
+
+ (*stats_.numWritebacks)[tid][pid][cid].inc();
+ if (chainedItem) {
+ XDCHECK(toRecycleParent->isMoving());
+ XDCHECK_EQ(evictedToNext->getRefCount(),2u);
+ (*stats_.chainedItemEvictions)[tid][pid][cid].inc();
+ // check if by releasing the item we intend to, we actually
+ // recycle the candidate.
+ auto ret = releaseBackToAllocator(*candidate, RemoveContext::kEviction,
+ /* isNascent */ false, toRecycle);
+ XDCHECK_EQ(ret,ReleaseRes::kRecycled);
+ evictedToNext.reset(); //once we unmark moving threads will try and alloc, drop
+ //the handle now - and refcount will drop to 1
+ auto ref = toRecycleParent->unmarkMoving();
+ if (UNLIKELY(ref == 0)) {
+ wakeUpWaiters(toRecycleParent->getKey(),{});
+ const auto res =
+ releaseBackToAllocator(*toRecycleParent, RemoveContext::kNormal, false);
+ XDCHECK(res == ReleaseRes::kReleased);
+ } else {
+ auto parentHandle = acquire(toRecycleParent);
+ if (parentHandle) {
+ wakeUpWaiters(toRecycleParent->getKey(),std::move(parentHandle));
+ } //in case where parent handle is null that means some other thread
+ // would have called wakeUpWaiters with null handle and released
+ // parent back to allocator
+ }
+ } else {
+ wakeUpWaiters(candidate->getKey(), std::move(evictedToNext));
+ }
}
+
+ XDCHECK(!candidate->isMarkedForEviction() && !candidate->isMoving());
+
return {candidate, toRecycle};
}
template
typename CacheAllocator::Item*
-CacheAllocator::findEviction(PoolId pid, ClassId cid) {
+CacheAllocator::findEviction(TierId tid, PoolId pid, ClassId cid) {
// Keep searching for a candidate until we were able to evict it
// or until the search limit has been exhausted
unsigned int searchTries = 0;
while (config_.evictionSearchTries == 0 ||
config_.evictionSearchTries > searchTries) {
- auto [candidate, toRecycle] = getNextCandidate(pid, cid, searchTries);
+ auto [candidate, toRecycle] = getNextCandidate(tid, pid, cid, searchTries);
// Reached the end of the eviction queue but doulen't find a candidate,
// start again.
@@ -1419,9 +1665,9 @@ CacheAllocator::findEviction(PoolId pid, ClassId cid) {
// NULL. If `ref` == 0 then it means that we are the last holder of
// that item.
if (candidate->hasChainedItem()) {
- (*stats_.chainedItemEvictions)[pid][cid].inc();
+ (*stats_.chainedItemEvictions)[tid][pid][cid].inc();
} else {
- (*stats_.regularItemEvictions)[pid][cid].inc();
+ (*stats_.regularItemEvictions)[tid][pid][cid].inc();
}
if (auto eventTracker = getEventTracker()) {
@@ -1489,6 +1735,116 @@ bool CacheAllocator::shouldWriteToNvmCacheExclusive(
return true;
}
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::tryEvictToNextMemoryTier(
+ TierId tid, PoolId pid, Item& item, bool fromBgThread) {
+
+ TierId nextTier = tid; // TODO - calculate this based on some admission policy
+ while (++nextTier < getNumTiers()) { // try to evict down to the next memory tiers
+ // always evict item from the nextTier to make room for new item
+ bool evict = true;
+
+ // allocateInternal might trigger another eviction
+ WriteHandle newItemHdl{};
+ Item* parentItem;
+ bool chainedItem = false;
+ if(item.isChainedItem()) {
+ chainedItem = true;
+ parentItem = &item.asChainedItem().getParentItem(compressor_);
+ XDCHECK(parentItem->isMoving());
+ XDCHECK(item.isChainedItem() && item.getRefCount() == 1);
+ XDCHECK_EQ(0, parentItem->getRefCount());
+ newItemHdl = allocateChainedItemInternalTier(*parentItem,
+ item.getSize(),
+ nextTier);
+ } else {
+ // this assert can fail if parent changed
+ XDCHECK(item.isMoving());
+ XDCHECK(item.getRefCount() == 0);
+ newItemHdl = allocateInternalTier(nextTier, pid,
+ item.getKey(),
+ item.getSize(),
+ item.getCreationTime(),
+ item.getExpiryTime(),
+ fromBgThread,
+ evict);
+ }
+
+ if (newItemHdl) {
+ bool moveSuccess = chainedItem
+ ? moveChainedItem(item.asChainedItem(), newItemHdl)
+ : moveRegularItem(item, newItemHdl);
+ if (!moveSuccess) {
+ return WriteHandle{};
+ }
+ XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
+ if (!chainedItem) { // TODO: do we need it?
+ XDCHECK_EQ(newItemHdl->getKey(),item.getKey());
+ item.unmarkMoving();
+ }
+ return newItemHdl;
+ } else {
+ return WriteHandle{};
+ }
+ }
+
+ return {};
+}
+
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::tryEvictToNextMemoryTier(Item& item, bool fromBgThread) {
+ auto tid = getTierId(item);
+ auto pid = allocator_[tid]->getAllocInfo(item.getMemory()).poolId;
+ return tryEvictToNextMemoryTier(tid, pid, item, fromBgThread);
+}
+
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::tryPromoteToNextMemoryTier(
+ TierId tid, PoolId pid, Item& item, bool fromBgThread) {
+ if(item.isExpired()) { return {}; }
+ TierId nextTier = tid;
+ while (nextTier > 0) { // try to evict down to the next memory tiers
+ auto toPromoteTier = nextTier - 1;
+ --nextTier;
+
+ // always evict item from the toPromoteTier to make room for new item
+ bool evict = true;
+
+ // allocateInternal might trigger another eviction
+ auto newItemHdl = allocateInternalTier(toPromoteTier, pid,
+ item.getKey(),
+ item.getSize(),
+ item.getCreationTime(),
+ item.getExpiryTime(),
+ fromBgThread,
+ true);
+
+ if (newItemHdl) {
+ XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
+ if (!moveRegularItem(item, newItemHdl)) {
+ return WriteHandle{};
+ }
+ item.unmarkMoving();
+ return newItemHdl;
+ } else {
+ return WriteHandle{};
+ }
+ }
+
+ return {};
+}
+
+template
+typename CacheAllocator::WriteHandle
+CacheAllocator::tryPromoteToNextMemoryTier(Item& item, bool fromBgThread) {
+ auto tid = getTierId(item);
+ auto pid = allocator_[tid]->getAllocInfo(item.getMemory()).poolId;
+ return tryPromoteToNextMemoryTier(tid, pid, item, fromBgThread);
+}
+
template
typename CacheAllocator::RemoveRes
CacheAllocator::remove(typename Item::Key key) {
@@ -1689,21 +2045,57 @@ void CacheAllocator::invalidateNvm(Item& item) {
}
}
+template
+TierId
+CacheAllocator::getTierId(const Item& item) const {
+ return getTierId(item.getMemory());
+}
+
+template
+TierId
+CacheAllocator::getTierId(const void* ptr) const {
+ for (TierId tid = 0; tid < getNumTiers(); tid++) {
+ if (allocator_[tid]->isMemoryInAllocator(ptr))
+ return tid;
+ }
+
+ throw std::invalid_argument("Item does not belong to any tier!");
+}
+
template
typename CacheAllocator::MMContainer&
CacheAllocator::getMMContainer(const Item& item) const noexcept {
+ const auto tid = getTierId(item);
const auto allocInfo =
- allocator_->getAllocInfo(static_cast(&item));
- return getMMContainer(allocInfo.poolId, allocInfo.classId);
+ allocator_[tid]->getAllocInfo(static_cast(&item));
+ return getMMContainer(tid, allocInfo.poolId, allocInfo.classId);
}
template
typename CacheAllocator::MMContainer&
-CacheAllocator::getMMContainer(PoolId pid,
+CacheAllocator::getMMContainer(TierId tid,
+ PoolId pid,
ClassId cid) const noexcept {
- XDCHECK_LT(static_cast(pid), mmContainers_.size());
- XDCHECK_LT(static_cast(cid), mmContainers_[pid].size());
- return *mmContainers_[pid][cid];
+ XDCHECK_LT(static_cast(tid), mmContainers_.size());
+ XDCHECK_LT(static_cast(pid), mmContainers_[tid].size());
+ XDCHECK_LT(static_cast(cid), mmContainers_[tid][pid].size());
+ return *mmContainers_[tid][pid][cid];
+}
+
+template
+MMContainerStat CacheAllocator::getMMContainerStat(
+ TierId tid, PoolId pid, ClassId cid) const noexcept {
+ if(static_cast(tid) >= mmContainers_.size()) {
+ return MMContainerStat{};
+ }
+ if (static_cast(pid) >= mmContainers_[tid].size()) {
+ return MMContainerStat{};
+ }
+ if (static_cast(cid) >= mmContainers_[tid][pid].size()) {
+ return MMContainerStat{};
+ }
+ return mmContainers_[tid][pid][cid] ? mmContainers_[tid][pid][cid]->getStats()
+ : MMContainerStat{};
}
template
@@ -1892,23 +2284,25 @@ void CacheAllocator::markUseful(const ReadHandle& handle,
template
bool CacheAllocator::recordAccessInMMContainer(Item& item,
AccessMode mode) {
+ const auto tid = getTierId(item);
const auto allocInfo =
- allocator_->getAllocInfo(static_cast(&item));
- (*stats_.cacheHits)[allocInfo.poolId][allocInfo.classId].inc();
+ allocator_[tid]->getAllocInfo(static_cast(&item));
+ (*stats_.cacheHits)[tid][allocInfo.poolId][allocInfo.classId].inc();
// track recently accessed items if needed
if (UNLIKELY(config_.trackRecentItemsForDump)) {
ring_->trackItem(reinterpret_cast(&item), item.getSize());
}
- auto& mmContainer = getMMContainer(allocInfo.poolId, allocInfo.classId);
+ auto& mmContainer = getMMContainer(tid, allocInfo.poolId, allocInfo.classId);
return mmContainer.recordAccess(item, mode);
}
template
uint32_t CacheAllocator::getUsableSize(const Item& item) const {
+ const auto tid = getTierId(item);
const auto allocSize =
- allocator_->getAllocInfo(static_cast(&item)).allocSize;
+ allocator_[tid]->getAllocInfo(static_cast(&item)).allocSize;
return item.isChainedItem()
? allocSize - ChainedItem::getRequiredSize(0)
: allocSize - Item::getRequiredSize(item.getKey(), 0);
@@ -1917,8 +2311,10 @@ uint32_t CacheAllocator::getUsableSize(const Item& item) const {
template
typename CacheAllocator::SampleItem
CacheAllocator::getSampleItem() {
- size_t nvmCacheSize = nvmCache_ ? nvmCache_->getUsableSize() : 0;
- size_t ramCacheSize = allocator_->getMemorySizeInclAdvised();
+ // TODO: is using random tier a good idea?
+ auto tid = folly::Random::rand32() % getNumTiers();
+ static size_t nvmCacheSize = nvmCache_ ? nvmCache_->getUsableSize() : 0;
+ static size_t ramCacheSize = allocator_[tid]->getMemorySizeInclAdvised();
bool fromNvm =
folly::Random::rand64(0, nvmCacheSize + ramCacheSize) >= ramCacheSize;
@@ -1927,19 +2323,18 @@ CacheAllocator::getSampleItem() {
}
// Sampling from DRAM cache
- auto item = reinterpret_cast(allocator_->getRandomAlloc());
+ auto item = reinterpret_cast(allocator_[tid]->getRandomAlloc());
if (!item || UNLIKELY(item->isExpired())) {
return SampleItem{false /* fromNvm */};
}
// Check that item returned is the same that was sampled
-
auto sharedHdl = std::make_shared(findInternal(item->getKey()));
if (sharedHdl->get() != item) {
return SampleItem{false /* fromNvm */};
}
- const auto allocInfo = allocator_->getAllocInfo(item->getMemory());
+ const auto allocInfo = allocator_[tid]->getAllocInfo(item->getMemory());
// Convert the Item to IOBuf to make SampleItem
auto iobuf = folly::IOBuf{
@@ -1958,28 +2353,33 @@ CacheAllocator::getSampleItem() {
template
std::vector CacheAllocator::dumpEvictionIterator(
- PoolId pid, ClassId cid, size_t numItems) {
+ PoolId pid, ClassId cid, size_t numItems) {
if (numItems == 0) {
return {};
}
- if (static_cast(pid) >= mmContainers_.size() ||
- static_cast(cid) >= mmContainers_[pid].size()) {
+ // Always evict from the lowest layer.
+ int tid = getNumTiers() - 1;
+
+ if (static_cast(tid) >= mmContainers_.size() ||
+ static_cast(pid) >= mmContainers_[tid].size() ||
+ static_cast(cid) >= mmContainers_[tid][pid].size()) {
throw std::invalid_argument(
- folly::sformat("Invalid PoolId: {} and ClassId: {}.", pid, cid));
+ folly::sformat("Invalid TierId: {} and PoolId: {} and ClassId: {}.", tid, pid, cid));
}
std::vector content;
- auto& mm = *mmContainers_[pid][cid];
- auto evictItr = mm.getEvictionIterator();
- size_t i = 0;
- while (evictItr && i < numItems) {
- content.push_back(evictItr->toString());
- ++evictItr;
- ++i;
+ while (tid >= 0) {
+ auto& mm = *mmContainers_[tid][pid][cid];
+ mm.withEvictionIterator([&content, numItems](auto&& itr) {
+ while (itr && content.size() < numItems) {
+ content.push_back(itr->toString());
+ ++itr;
+ }
+ });
+ --tid;
}
-
return content;
}
@@ -2155,14 +2555,34 @@ PoolId CacheAllocator::addPool(
std::shared_ptr resizeStrategy,
bool ensureProvisionable) {
std::unique_lock w(poolsResizeAndRebalanceLock_);
- auto pid = allocator_->addPool(name, size, allocSizes, ensureProvisionable);
+
+ PoolId pid = 0;
+ size_t totalCacheSize = 0;
+
+ for (TierId tid = 0; tid < getNumTiers(); tid++) {
+ totalCacheSize += allocator_[tid]->getMemorySize();
+ }
+
+ for (TierId tid = 0; tid < getNumTiers(); tid++) {
+ auto tierSizeRatio =
+ static_cast(allocator_[tid]->getMemorySize()) / totalCacheSize;
+ size_t tierPoolSize = static_cast(tierSizeRatio * size);
+
+ // TODO: what if we manage to add pool only in one tier?
+ // we should probably remove that on failure
+ auto res = allocator_[tid]->addPool(
+ name, tierPoolSize, allocSizes, ensureProvisionable);
+ XDCHECK(tid == 0 || res == pid);
+ pid = res;
+ }
+
createMMContainers(pid, std::move(config));
setRebalanceStrategy(pid, std::move(rebalanceStrategy));
setResizeStrategy(pid, std::move(resizeStrategy));
if (backgroundEvictor_.size()) {
auto memoryAssignments =
- createBgWorkerMemoryAssignments(backgroundEvictor_.size());
+ createBgWorkerMemoryAssignments(backgroundEvictor_.size(), 0);
for (size_t id = 0; id < backgroundEvictor_.size(); id++)
backgroundEvictor_[id]->setAssignedMemory(
std::move(memoryAssignments[id]));
@@ -2170,7 +2590,7 @@ PoolId CacheAllocator::addPool(
if (backgroundPromoter_.size()) {
auto memoryAssignments =
- createBgWorkerMemoryAssignments(backgroundPromoter_.size());
+ createBgWorkerMemoryAssignments(backgroundPromoter_.size(), 1);
for (size_t id = 0; id < backgroundPromoter_.size(); id++)
backgroundPromoter_[id]->setAssignedMemory(
std::move(memoryAssignments[id]));
@@ -2182,9 +2602,9 @@ PoolId CacheAllocator::addPool(
template
void CacheAllocator::overridePoolRebalanceStrategy(
PoolId pid, std::shared_ptr rebalanceStrategy) {
- if (static_cast(pid) >= mmContainers_.size()) {
+ if (static_cast(pid) >= mmContainers_[0].size()) {
throw std::invalid_argument(folly::sformat(
- "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size()));
+ "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[0].size()));
}
setRebalanceStrategy(pid, std::move(rebalanceStrategy));
}
@@ -2192,9 +2612,9 @@ void CacheAllocator::overridePoolRebalanceStrategy(
template
void CacheAllocator::overridePoolResizeStrategy(
PoolId pid, std::shared_ptr resizeStrategy) {
- if (static_cast(pid) >= mmContainers_.size()) {
+ if (static_cast(pid) >= mmContainers_[0].size()) {
throw std::invalid_argument(folly::sformat(
- "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size()));
+ "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[0].size()));
}
setResizeStrategy(pid, std::move(resizeStrategy));
}
@@ -2206,14 +2626,14 @@ void CacheAllocator::overridePoolOptimizeStrategy(
}
template
-void CacheAllocator::overridePoolConfig(PoolId pid,
+void CacheAllocator::overridePoolConfig(TierId tid, PoolId pid,
const MMConfig& config) {
- if (static_cast(pid) >= mmContainers_.size()) {
+ // TODO: add generic tier id checking
+ if (static_cast(pid) >= mmContainers_[tid].size()) {
throw std::invalid_argument(folly::sformat(
- "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size()));
+ "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[tid].size()));
}
-
- auto& pool = allocator_->getPool(pid);
+ auto& pool = allocator_[tid]->getPool(pid);
for (unsigned int cid = 0; cid < pool.getNumClassId(); ++cid) {
MMConfig mmConfig = config;
mmConfig.addExtraConfig(
@@ -2221,29 +2641,35 @@ void CacheAllocator::overridePoolConfig(PoolId pid,
? pool.getAllocationClass(static_cast(cid))
.getAllocsPerSlab()
: 0);
- DCHECK_NOTNULL(mmContainers_[pid][cid].get());
- mmContainers_[pid][cid]->setConfig(mmConfig);
+ DCHECK_NOTNULL(mmContainers_[tid][pid][cid].get());
+ mmContainers_[tid][pid][cid]->setConfig(mmConfig);
}
}
template
void CacheAllocator::createMMContainers(const PoolId pid,
MMConfig config) {
- auto& pool = allocator_->getPool(pid);
+ // pools on each layer should have the same number of class id, etc.
+ // TODO: think about deduplication
+ auto& pool = allocator_[0]->getPool(pid);
+
for (unsigned int cid = 0; cid < pool.getNumClassId(); ++cid) {
config.addExtraConfig(
config_.trackTailHits
? pool.getAllocationClass(static_cast(cid))
.getAllocsPerSlab()
: 0);
- mmContainers_[pid][cid].reset(new MMContainer(config, compressor_));
+ for (TierId tid = 0; tid < getNumTiers(); tid++) {
+ mmContainers_[tid][pid][cid].reset(new MMContainer(config, compressor_));
+ }
}
}
template
PoolId CacheAllocator::getPoolId(
folly::StringPiece name) const noexcept {
- return allocator_->getPoolId(name.str());
+ // each tier has the same pools
+ return allocator_[0]->getPoolId(name.str());
}
// The Function returns a consolidated vector of Release Slab
@@ -2286,7 +2712,9 @@ std::set CacheAllocator::filterCompactCachePools(
template
std::set CacheAllocator::getRegularPoolIds() const {
std::shared_lock r(poolsResizeAndRebalanceLock_);
- return filterCompactCachePools(allocator_->getPoolIds());
+ // TODO - get rid of the duplication - right now, each tier
+ // holds pool objects with mostly the same info
+ return filterCompactCachePools(allocator_[0]->getPoolIds());
}
template
@@ -2311,10 +2739,9 @@ std::set CacheAllocator::getRegularPoolIdsForResize()
// getAdvisedMemorySize - then pools may be overLimit even when
// all slabs are not allocated. Otherwise, pools may be overLimit
// only after all slabs are allocated.
- //
- return (allocator_->allSlabsAllocated()) ||
- (allocator_->getAdvisedMemorySize() != 0)
- ? filterCompactCachePools(allocator_->getPoolsOverLimit())
+ return (allocator_[currentTier()]->allSlabsAllocated()) ||
+ (allocator_[currentTier()]->getAdvisedMemorySize() != 0)
+ ? filterCompactCachePools(allocator_[currentTier()]->getPoolsOverLimit())
: std::set{};
}
@@ -2323,9 +2750,21 @@ const std::string CacheAllocator::getCacheName() const {
return config_.cacheName;
}
+template
+size_t CacheAllocator::getPoolSize(PoolId poolId) const {
+ size_t poolSize = 0;
+ for (auto& allocator: allocator_) {
+ const auto& pool = allocator->getPool(poolId);
+ poolSize += pool.getPoolSize();
+ }
+ return poolSize;
+}
+
template
PoolStats CacheAllocator::getPoolStats(PoolId poolId) const {
- const auto& pool = allocator_->getPool(poolId);
+ //this pool ref is just used to get class ids, which will be the
+ //same across tiers
+ const auto& pool = allocator_[currentTier()]->getPool(poolId);
const auto& allocSizes = pool.getAllocSizes();
auto mpStats = pool.getStats();
const auto& classIds = mpStats.classIds;
@@ -2343,27 +2782,43 @@ PoolStats CacheAllocator::getPoolStats(PoolId poolId) const {
// TODO export evictions, numItems etc from compact cache directly.
if (!isCompactCache) {
for (const ClassId cid : classIds) {
- uint64_t classHits = (*stats_.cacheHits)[poolId][cid].get();
- XDCHECK(mmContainers_[poolId][cid],
- folly::sformat("Pid {}, Cid {} not initialized.", poolId, cid));
+ uint64_t allocAttempts = 0, evictionAttempts = 0, allocFailures = 0,
+ fragmentationSize = 0, classHits = 0, chainedItemEvictions = 0,
+ regularItemEvictions = 0, numWritebacks = 0;
+ MMContainerStat mmContainerStats;
+ for (TierId tid = 0; tid < getNumTiers(); tid++) {
+ allocAttempts += (*stats_.allocAttempts)[tid][poolId][cid].get();
+ evictionAttempts += (*stats_.evictionAttempts)[tid][poolId][cid].get();
+ allocFailures += (*stats_.allocFailures)[tid][poolId][cid].get();
+ fragmentationSize += (*stats_.fragmentationSize)[tid][poolId][cid].get();
+ classHits += (*stats_.cacheHits)[tid][poolId][cid].get();
+ chainedItemEvictions += (*stats_.chainedItemEvictions)[tid][poolId][cid].get();
+ regularItemEvictions += (*stats_.regularItemEvictions)[tid][poolId][cid].get();
+ numWritebacks += (*stats_.numWritebacks)[tid][poolId][cid].get();
+ mmContainerStats += getMMContainerStat(tid, poolId, cid);
+ XDCHECK(mmContainers_[tid][poolId][cid],
+ folly::sformat("Tid {}, Pid {}, Cid {} not initialized.", tid, poolId, cid));
+ }
cacheStats.insert(
{cid,
- {allocSizes[cid], (*stats_.allocAttempts)[poolId][cid].get(),
- (*stats_.evictionAttempts)[poolId][cid].get(),
- (*stats_.allocFailures)[poolId][cid].get(),
- (*stats_.fragmentationSize)[poolId][cid].get(), classHits,
- (*stats_.chainedItemEvictions)[poolId][cid].get(),
- (*stats_.regularItemEvictions)[poolId][cid].get(),
- mmContainers_[poolId][cid]->getStats()}
-
- });
+ {allocSizes[cid],
+ allocAttempts,
+ evictionAttempts,
+ allocFailures,
+ fragmentationSize,
+ classHits,
+ chainedItemEvictions,
+ regularItemEvictions,
+ numWritebacks,
+ mmContainerStats}});
totalHits += classHits;
}
}
PoolStats ret;
ret.isCompactCache = isCompactCache;
- ret.poolName = allocator_->getPoolName(poolId);
+ //pool name is also shared among tiers
+ ret.poolName = allocator_[currentTier()]->getPoolName(poolId);
ret.poolSize = pool.getPoolSize();
ret.poolUsableSize = pool.getPoolUsableSize();
ret.poolAdvisedSize = pool.getPoolAdvisedSize();
@@ -2375,22 +2830,86 @@ PoolStats CacheAllocator::getPoolStats(PoolId poolId) const {
return ret;
}
+template
+PoolStats CacheAllocator::getPoolStats(TierId tid, PoolId poolId) const {
+ const auto& pool = allocator_[tid]->getPool(poolId);
+ const auto& allocSizes = pool.getAllocSizes();
+ auto mpStats = pool.getStats();
+ const auto& classIds = mpStats.classIds;
+
+ // check if this is a compact cache.
+ bool isCompactCache = false;
+ {
+ std::shared_lock lock(compactCachePoolsLock_);
+ isCompactCache = isCompactCachePool_[poolId];
+ }
+
+ //std::unordered_map cacheStats;
+ folly::F14FastMap cacheStats;
+ uint64_t totalHits = 0;
+ // cacheStats is only menaningful for pools that are not compact caches.
+ // TODO export evictions, numItems etc from compact cache directly.
+ if (!isCompactCache) {
+ for (const ClassId cid : classIds) {
+ uint64_t classHits = (*stats_.cacheHits)[tid][poolId][cid].get();
+ XDCHECK(mmContainers_[tid][poolId][cid],
+ folly::sformat("Tid {}, Pid {}, Cid {} not initialized.", tid, poolId, cid));
+ cacheStats.insert(
+ {cid,
+ {allocSizes[cid],
+ (*stats_.allocAttempts)[tid][poolId][cid].get(),
+ (*stats_.evictionAttempts)[tid][poolId][cid].get(),
+ (*stats_.allocFailures)[tid][poolId][cid].get(),
+ (*stats_.fragmentationSize)[tid][poolId][cid].get(),
+ classHits,
+ (*stats_.chainedItemEvictions)[tid][poolId][cid].get(),
+ (*stats_.regularItemEvictions)[tid][poolId][cid].get(),
+ (*stats_.numWritebacks)[tid][poolId][cid].get(),
+ getMMContainerStat(tid, poolId, cid)}});
+ totalHits += classHits;
+ }
+ }
+
+ PoolStats ret;
+ ret.isCompactCache = isCompactCache;
+ ret.poolName = allocator_[tid]->getPoolName(poolId);
+ ret.poolSize = pool.getPoolSize();
+ ret.poolUsableSize = pool.getPoolUsableSize();
+ ret.poolAdvisedSize = pool.getPoolAdvisedSize();
+ ret.cacheStats = std::move(cacheStats);
+ ret.mpStats = std::move(mpStats);
+ ret.numPoolGetHits = totalHits;
+ ret.evictionAgeSecs = stats_.perPoolEvictionAgeSecs_[poolId].estimate();
+
+ return ret;
+}
+
+template