Skip to content

Commit fc05263

Browse files
committed
Clean up unused methods.
Signed-off-by: intelgaoxiong <xiong.gao@intel.com>
1 parent 6c75888 commit fc05263

File tree

2 files changed

+0
-181
lines changed

2 files changed

+0
-181
lines changed

src/plugins/intel_npu/src/plugin/npuw/kv_cache_block_manager.cpp

Lines changed: 0 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
#include "kv_cache_block_manager.hpp"
66

77
#include <algorithm>
8-
#include <iomanip>
9-
#include <sstream>
108

119
#include "logging.hpp"
1210
#include "util.hpp"
@@ -38,7 +36,6 @@ KVCacheBlockManager::KVCacheBlockManager(uint32_t block_size,
3836
block.id = i;
3937
// block.tensor defaults to empty SoPtr (allocate on-demand)
4038
block.num_tokens = 0;
41-
block.capacity = block_size;
4239
block.state = Block::State::FREE;
4340

4441
blocks_.push_back(std::move(block));
@@ -82,26 +79,6 @@ std::optional<uint32_t> KVCacheBlockManager::allocate_block() {
8279
return block_id;
8380
}
8481

85-
void KVCacheBlockManager::free_block(uint32_t block_id) {
86-
validate_block_id(block_id);
87-
88-
auto& block = blocks_[block_id];
89-
90-
if (block.state == Block::State::FREE) {
91-
LOG_WARN("KVCacheBlockManager: Attempt to free already-free block " << block_id);
92-
return;
93-
}
94-
95-
// Reset block state (keep memory allocated for reuse)
96-
block.num_tokens = 0;
97-
block.state = Block::State::FREE;
98-
99-
// Return to free pool
100-
free_block_ids_.push(block_id);
101-
102-
LOG_VERB("KVCacheBlockManager: Freed block " << block_id << " (free blocks: " << free_block_ids_.size() << ")");
103-
}
104-
10582
ov::SoPtr<ov::ITensor> KVCacheBlockManager::get_block_tensor(uint32_t block_id) {
10683
validate_block_id(block_id);
10784

@@ -117,18 +94,6 @@ ov::SoPtr<ov::ITensor> KVCacheBlockManager::get_block_tensor(uint32_t block_id)
11794
return block.tensor;
11895
}
11996

120-
ov::SoPtr<const ov::ITensor> KVCacheBlockManager::get_block_tensor(uint32_t block_id) const {
121-
validate_block_id(block_id);
122-
123-
const auto& block = blocks_[block_id];
124-
125-
if (!block.tensor) {
126-
OPENVINO_THROW("KVCacheBlockManager: Block ", block_id, " has no allocated tensor.");
127-
}
128-
129-
return block.tensor;
130-
}
131-
13297
void KVCacheBlockManager::update_block_tokens(uint32_t block_id, uint32_t num_tokens) {
13398
validate_block_id(block_id);
13499

@@ -160,16 +125,6 @@ uint32_t KVCacheBlockManager::get_block_tokens(uint32_t block_id) const {
160125
return blocks_[block_id].num_tokens;
161126
}
162127

163-
std::optional<uint32_t> KVCacheBlockManager::find_unfilled_block() const {
164-
// Search in reverse order (most recently allocated first)
165-
for (auto it = blocks_.rbegin(); it != blocks_.rend(); ++it) {
166-
if (it->state == Block::State::ALLOCATED && it->has_space()) {
167-
return it->id;
168-
}
169-
}
170-
return std::nullopt;
171-
}
172-
173128
std::vector<uint32_t> KVCacheBlockManager::get_allocated_blocks() const {
174129
std::vector<uint32_t> allocated;
175130
allocated.reserve(max_blocks_);
@@ -205,63 +160,6 @@ void KVCacheBlockManager::clear_all() {
205160
LOG_DEBUG("KVCacheBlockManager: All blocks cleared");
206161
}
207162

208-
KVCacheBlockManager::Stats KVCacheBlockManager::get_stats() const {
209-
Stats stats;
210-
stats.total_blocks = max_blocks_;
211-
stats.free_blocks = static_cast<uint32_t>(free_block_ids_.size());
212-
stats.allocated_blocks = 0;
213-
stats.full_blocks = 0;
214-
stats.total_tokens = 0;
215-
stats.total_capacity = block_size_ * max_blocks_;
216-
217-
uint32_t partially_filled_blocks = 0;
218-
219-
for (const auto& block : blocks_) {
220-
if (block.state != Block::State::FREE) {
221-
stats.allocated_blocks++;
222-
stats.total_tokens += block.num_tokens;
223-
224-
if (block.state == Block::State::FULL) {
225-
stats.full_blocks++;
226-
} else if (block.num_tokens > 0 && block.num_tokens < block_size_) {
227-
partially_filled_blocks++;
228-
}
229-
}
230-
}
231-
232-
// Calculate utilization
233-
if (stats.total_capacity > 0) {
234-
stats.utilization = static_cast<float>(stats.total_tokens) / static_cast<float>(stats.total_capacity);
235-
} else {
236-
stats.utilization = 0.0f;
237-
}
238-
239-
// Calculate fragmentation (ratio of partially filled blocks)
240-
if (stats.allocated_blocks > 0) {
241-
stats.fragmentation = static_cast<float>(partially_filled_blocks) / static_cast<float>(stats.allocated_blocks);
242-
} else {
243-
stats.fragmentation = 0.0f;
244-
}
245-
246-
return stats;
247-
}
248-
249-
void KVCacheBlockManager::print_stats() const {
250-
auto stats = get_stats();
251-
252-
std::ostringstream oss;
253-
oss << "KVCacheBlockManager Stats:\n"
254-
<< " Total Blocks: " << stats.total_blocks << "\n"
255-
<< " Allocated Blocks: " << stats.allocated_blocks << "\n"
256-
<< " Full Blocks: " << stats.full_blocks << "\n"
257-
<< " Free Blocks: " << stats.free_blocks << "\n"
258-
<< " Total Tokens: " << stats.total_tokens << " / " << stats.total_capacity << "\n"
259-
<< " Utilization: " << std::fixed << std::setprecision(1) << (stats.utilization * 100.0f) << "%\n"
260-
<< " Fragmentation: " << std::fixed << std::setprecision(1) << (stats.fragmentation * 100.0f) << "%";
261-
262-
LOG_INFO(oss.str());
263-
}
264-
265163
void KVCacheBlockManager::validate_block_id(uint32_t block_id) const {
266164
if (block_id >= max_blocks_) {
267165
OPENVINO_THROW("KVCacheBlockManager: Invalid block ID ", block_id, " (valid range: 0-", max_blocks_ - 1, ")");

src/plugins/intel_npu/src/plugin/npuw/kv_cache_block_manager.hpp

Lines changed: 0 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -46,50 +46,13 @@ class KVCacheBlockManager {
4646
uint32_t id; ///< Unique block identifier
4747
ov::SoPtr<ov::ITensor> tensor; ///< Block memory tensor
4848
uint32_t num_tokens; ///< Number of tokens stored in this block
49-
uint32_t capacity; ///< Maximum tokens this block can hold
5049

5150
enum class State {
5251
FREE, ///< Block is free and available for allocation
5352
ALLOCATED, ///< Block is allocated but not yet filled
5453
FULL, ///< Block is completely filled
55-
SHARED ///< Block is shared across multiple requests (for prefix caching)
5654
};
5755
State state;
58-
59-
/**
60-
* @brief Check if the block is completely filled
61-
*/
62-
bool is_full() const {
63-
return num_tokens >= capacity;
64-
}
65-
66-
/**
67-
* @brief Check if the block has available space
68-
*/
69-
bool has_space() const {
70-
return num_tokens < capacity;
71-
}
72-
73-
/**
74-
* @brief Get remaining capacity
75-
*/
76-
uint32_t remaining_capacity() const {
77-
return capacity - num_tokens;
78-
}
79-
};
80-
81-
/**
82-
* @brief Statistics about block manager state
83-
*/
84-
struct Stats {
85-
uint32_t total_blocks; ///< Total number of blocks in pool
86-
uint32_t allocated_blocks; ///< Number of currently allocated blocks
87-
uint32_t full_blocks; ///< Number of completely filled blocks
88-
uint32_t free_blocks; ///< Number of free blocks
89-
uint32_t total_tokens; ///< Total tokens stored across all blocks
90-
uint32_t total_capacity; ///< Total capacity (blocks * block_size)
91-
float utilization; ///< Memory utilization ratio (0.0 - 1.0)
92-
float fragmentation; ///< Fragmentation ratio (partially filled blocks)
9356
};
9457

9558
/**
@@ -126,13 +89,6 @@ class KVCacheBlockManager {
12689
*/
12790
std::optional<uint32_t> allocate_block();
12891

129-
/**
130-
* @brief Free a previously allocated block
131-
*
132-
* @param block_id ID of the block to free
133-
*/
134-
void free_block(uint32_t block_id);
135-
13692
/**
13793
* @brief Get the tensor associated with a block
13894
*
@@ -141,11 +97,6 @@ class KVCacheBlockManager {
14197
*/
14298
ov::SoPtr<ov::ITensor> get_block_tensor(uint32_t block_id);
14399

144-
/**
145-
* @brief Get the tensor associated with a block (const version)
146-
*/
147-
ov::SoPtr<const ov::ITensor> get_block_tensor(uint32_t block_id) const;
148-
149100
/**
150101
* @brief Update the number of tokens stored in a block
151102
*
@@ -162,15 +113,6 @@ class KVCacheBlockManager {
162113
*/
163114
uint32_t get_block_tokens(uint32_t block_id) const;
164115

165-
/**
166-
* @brief Find the last allocated block that is not yet full
167-
*
168-
* Useful for appending new tokens to existing blocks during generate phase
169-
*
170-
* @return Block ID if found, std::nullopt otherwise
171-
*/
172-
std::optional<uint32_t> find_unfilled_block() const;
173-
174116
/**
175117
* @brief Get list of all currently allocated block IDs
176118
*
@@ -185,20 +127,6 @@ class KVCacheBlockManager {
185127
*/
186128
void clear_all();
187129

188-
/**
189-
* @brief Get current statistics
190-
*
191-
* @return Stats structure with current state
192-
*/
193-
Stats get_stats() const;
194-
195-
/**
196-
* @brief Print statistics to log
197-
*
198-
* For debugging and monitoring purposes
199-
*/
200-
void print_stats() const;
201-
202130
/**
203131
* @brief Get block size (tokens per block)
204132
*/
@@ -213,13 +141,6 @@ class KVCacheBlockManager {
213141
return max_blocks_;
214142
}
215143

216-
/**
217-
* @brief Get total capacity (tokens)
218-
*/
219-
uint32_t get_total_capacity() const {
220-
return block_size_ * max_blocks_;
221-
}
222-
223144
/**
224145
* @brief Pair of key/value block managers for one transformer layer
225146
*/

0 commit comments

Comments
 (0)