55#include " kv_cache_block_manager.hpp"
66
77#include < algorithm>
8- #include < iomanip>
9- #include < sstream>
108
119#include " logging.hpp"
1210#include " util.hpp"
@@ -38,7 +36,6 @@ KVCacheBlockManager::KVCacheBlockManager(uint32_t block_size,
3836 block.id = i;
3937 // block.tensor defaults to empty SoPtr (allocate on-demand)
4038 block.num_tokens = 0 ;
41- block.capacity = block_size;
4239 block.state = Block::State::FREE;
4340
4441 blocks_.push_back (std::move (block));
@@ -82,26 +79,6 @@ std::optional<uint32_t> KVCacheBlockManager::allocate_block() {
8279 return block_id;
8380}
8481
85- void KVCacheBlockManager::free_block (uint32_t block_id) {
86- validate_block_id (block_id);
87-
88- auto & block = blocks_[block_id];
89-
90- if (block.state == Block::State::FREE) {
91- LOG_WARN (" KVCacheBlockManager: Attempt to free already-free block " << block_id);
92- return ;
93- }
94-
95- // Reset block state (keep memory allocated for reuse)
96- block.num_tokens = 0 ;
97- block.state = Block::State::FREE;
98-
99- // Return to free pool
100- free_block_ids_.push (block_id);
101-
102- LOG_VERB (" KVCacheBlockManager: Freed block " << block_id << " (free blocks: " << free_block_ids_.size () << " )" );
103- }
104-
10582ov::SoPtr<ov::ITensor> KVCacheBlockManager::get_block_tensor (uint32_t block_id) {
10683 validate_block_id (block_id);
10784
@@ -117,18 +94,6 @@ ov::SoPtr<ov::ITensor> KVCacheBlockManager::get_block_tensor(uint32_t block_id)
11794 return block.tensor ;
11895}
11996
120- ov::SoPtr<const ov::ITensor> KVCacheBlockManager::get_block_tensor (uint32_t block_id) const {
121- validate_block_id (block_id);
122-
123- const auto & block = blocks_[block_id];
124-
125- if (!block.tensor ) {
126- OPENVINO_THROW (" KVCacheBlockManager: Block " , block_id, " has no allocated tensor." );
127- }
128-
129- return block.tensor ;
130- }
131-
13297void KVCacheBlockManager::update_block_tokens (uint32_t block_id, uint32_t num_tokens) {
13398 validate_block_id (block_id);
13499
@@ -160,16 +125,6 @@ uint32_t KVCacheBlockManager::get_block_tokens(uint32_t block_id) const {
160125 return blocks_[block_id].num_tokens ;
161126}
162127
163- std::optional<uint32_t > KVCacheBlockManager::find_unfilled_block () const {
164- // Search in reverse order (most recently allocated first)
165- for (auto it = blocks_.rbegin (); it != blocks_.rend (); ++it) {
166- if (it->state == Block::State::ALLOCATED && it->has_space ()) {
167- return it->id ;
168- }
169- }
170- return std::nullopt ;
171- }
172-
173128std::vector<uint32_t > KVCacheBlockManager::get_allocated_blocks () const {
174129 std::vector<uint32_t > allocated;
175130 allocated.reserve (max_blocks_);
@@ -205,63 +160,6 @@ void KVCacheBlockManager::clear_all() {
205160 LOG_DEBUG (" KVCacheBlockManager: All blocks cleared" );
206161}
207162
208- KVCacheBlockManager::Stats KVCacheBlockManager::get_stats () const {
209- Stats stats;
210- stats.total_blocks = max_blocks_;
211- stats.free_blocks = static_cast <uint32_t >(free_block_ids_.size ());
212- stats.allocated_blocks = 0 ;
213- stats.full_blocks = 0 ;
214- stats.total_tokens = 0 ;
215- stats.total_capacity = block_size_ * max_blocks_;
216-
217- uint32_t partially_filled_blocks = 0 ;
218-
219- for (const auto & block : blocks_) {
220- if (block.state != Block::State::FREE) {
221- stats.allocated_blocks ++;
222- stats.total_tokens += block.num_tokens ;
223-
224- if (block.state == Block::State::FULL) {
225- stats.full_blocks ++;
226- } else if (block.num_tokens > 0 && block.num_tokens < block_size_) {
227- partially_filled_blocks++;
228- }
229- }
230- }
231-
232- // Calculate utilization
233- if (stats.total_capacity > 0 ) {
234- stats.utilization = static_cast <float >(stats.total_tokens ) / static_cast <float >(stats.total_capacity );
235- } else {
236- stats.utilization = 0 .0f ;
237- }
238-
239- // Calculate fragmentation (ratio of partially filled blocks)
240- if (stats.allocated_blocks > 0 ) {
241- stats.fragmentation = static_cast <float >(partially_filled_blocks) / static_cast <float >(stats.allocated_blocks );
242- } else {
243- stats.fragmentation = 0 .0f ;
244- }
245-
246- return stats;
247- }
248-
249- void KVCacheBlockManager::print_stats () const {
250- auto stats = get_stats ();
251-
252- std::ostringstream oss;
253- oss << " KVCacheBlockManager Stats:\n "
254- << " Total Blocks: " << stats.total_blocks << " \n "
255- << " Allocated Blocks: " << stats.allocated_blocks << " \n "
256- << " Full Blocks: " << stats.full_blocks << " \n "
257- << " Free Blocks: " << stats.free_blocks << " \n "
258- << " Total Tokens: " << stats.total_tokens << " / " << stats.total_capacity << " \n "
259- << " Utilization: " << std::fixed << std::setprecision (1 ) << (stats.utilization * 100 .0f ) << " %\n "
260- << " Fragmentation: " << std::fixed << std::setprecision (1 ) << (stats.fragmentation * 100 .0f ) << " %" ;
261-
262- LOG_INFO (oss.str ());
263- }
264-
265163void KVCacheBlockManager::validate_block_id (uint32_t block_id) const {
266164 if (block_id >= max_blocks_) {
267165 OPENVINO_THROW (" KVCacheBlockManager: Invalid block ID " , block_id, " (valid range: 0-" , max_blocks_ - 1 , " )" );
0 commit comments