Clean up unused methods.

intelgaoxiong · intelgaoxiong · commit fc052633befc · 2026-03-17T20:13:57.000-07:00
Signed-off-by: intelgaoxiong &lt;xiong.gao@intel.com&gt;
diff --git a/src/plugins/intel_npu/src/plugin/npuw/kv_cache_block_manager.cpp b/src/plugins/intel_npu/src/plugin/npuw/kv_cache_block_manager.cpp
@@ -5,8 +5,6 @@
 #include "kv_cache_block_manager.hpp"
 
 #include <algorithm>
-#include <iomanip>
-#include <sstream>
 
 #include "logging.hpp"
 #include "util.hpp"
@@ -38,7 +36,6 @@ KVCacheBlockManager::KVCacheBlockManager(uint32_t block_size,
         block.id = i;
         // block.tensor defaults to empty SoPtr (allocate on-demand)
         block.num_tokens = 0;
-        block.capacity = block_size;
         block.state = Block::State::FREE;
 
         blocks_.push_back(std::move(block));
@@ -82,26 +79,6 @@ std::optional<uint32_t> KVCacheBlockManager::allocate_block() {
     return block_id;
 }
 
-void KVCacheBlockManager::free_block(uint32_t block_id) {
-    validate_block_id(block_id);
-
-    auto& block = blocks_[block_id];
-
-    if (block.state == Block::State::FREE) {
-        LOG_WARN("KVCacheBlockManager: Attempt to free already-free block " << block_id);
-        return;
-    }
-
-    // Reset block state (keep memory allocated for reuse)
-    block.num_tokens = 0;
-    block.state = Block::State::FREE;
-
-    // Return to free pool
-    free_block_ids_.push(block_id);
-
-    LOG_VERB("KVCacheBlockManager: Freed block " << block_id << " (free blocks: " << free_block_ids_.size() << ")");
-}
-
 ov::SoPtr<ov::ITensor> KVCacheBlockManager::get_block_tensor(uint32_t block_id) {
     validate_block_id(block_id);
 
@@ -117,18 +94,6 @@ ov::SoPtr<ov::ITensor> KVCacheBlockManager::get_block_tensor(uint32_t block_id)
     return block.tensor;
 }
 
-ov::SoPtr<const ov::ITensor> KVCacheBlockManager::get_block_tensor(uint32_t block_id) const {
-    validate_block_id(block_id);
-
-    const auto& block = blocks_[block_id];
-
-    if (!block.tensor) {
-        OPENVINO_THROW("KVCacheBlockManager: Block ", block_id, " has no allocated tensor.");
-    }
-
-    return block.tensor;
-}
-
 void KVCacheBlockManager::update_block_tokens(uint32_t block_id, uint32_t num_tokens) {
     validate_block_id(block_id);
 
@@ -160,16 +125,6 @@ uint32_t KVCacheBlockManager::get_block_tokens(uint32_t block_id) const {
     return blocks_[block_id].num_tokens;
 }
 
-std::optional<uint32_t> KVCacheBlockManager::find_unfilled_block() const {
-    // Search in reverse order (most recently allocated first)
-    for (auto it = blocks_.rbegin(); it != blocks_.rend(); ++it) {
-        if (it->state == Block::State::ALLOCATED && it->has_space()) {
-            return it->id;
-        }
-    }
-    return std::nullopt;
-}
-
 std::vector<uint32_t> KVCacheBlockManager::get_allocated_blocks() const {
     std::vector<uint32_t> allocated;
     allocated.reserve(max_blocks_);
@@ -205,63 +160,6 @@ void KVCacheBlockManager::clear_all() {
     LOG_DEBUG("KVCacheBlockManager: All blocks cleared");
 }
 
-KVCacheBlockManager::Stats KVCacheBlockManager::get_stats() const {
-    Stats stats;
-    stats.total_blocks = max_blocks_;
-    stats.free_blocks = static_cast<uint32_t>(free_block_ids_.size());
-    stats.allocated_blocks = 0;
-    stats.full_blocks = 0;
-    stats.total_tokens = 0;
-    stats.total_capacity = block_size_ * max_blocks_;
-
-    uint32_t partially_filled_blocks = 0;
-
-    for (const auto& block : blocks_) {
-        if (block.state != Block::State::FREE) {
-            stats.allocated_blocks++;
-            stats.total_tokens += block.num_tokens;
-
-            if (block.state == Block::State::FULL) {
-                stats.full_blocks++;
-            } else if (block.num_tokens > 0 && block.num_tokens < block_size_) {
-                partially_filled_blocks++;
-            }
-        }
-    }
-
-    // Calculate utilization
-    if (stats.total_capacity > 0) {
-        stats.utilization = static_cast<float>(stats.total_tokens) / static_cast<float>(stats.total_capacity);
-    } else {
-        stats.utilization = 0.0f;
-    }
-
-    // Calculate fragmentation (ratio of partially filled blocks)
-    if (stats.allocated_blocks > 0) {
-        stats.fragmentation = static_cast<float>(partially_filled_blocks) / static_cast<float>(stats.allocated_blocks);
-    } else {
-        stats.fragmentation = 0.0f;
-    }
-
-    return stats;
-}
-
-void KVCacheBlockManager::print_stats() const {
-    auto stats = get_stats();
-
-    std::ostringstream oss;
-    oss << "KVCacheBlockManager Stats:\n"
-        << "  Total Blocks:     " << stats.total_blocks << "\n"
-        << "  Allocated Blocks: " << stats.allocated_blocks << "\n"
-        << "  Full Blocks:      " << stats.full_blocks << "\n"
-        << "  Free Blocks:      " << stats.free_blocks << "\n"
-        << "  Total Tokens:     " << stats.total_tokens << " / " << stats.total_capacity << "\n"
-        << "  Utilization:      " << std::fixed << std::setprecision(1) << (stats.utilization * 100.0f) << "%\n"
-        << "  Fragmentation:    " << std::fixed << std::setprecision(1) << (stats.fragmentation * 100.0f) << "%";
-
-    LOG_INFO(oss.str());
-}
-
 void KVCacheBlockManager::validate_block_id(uint32_t block_id) const {
     if (block_id >= max_blocks_) {
         OPENVINO_THROW("KVCacheBlockManager: Invalid block ID ", block_id, " (valid range: 0-", max_blocks_ - 1, ")");
diff --git a/src/plugins/intel_npu/src/plugin/npuw/kv_cache_block_manager.hpp b/src/plugins/intel_npu/src/plugin/npuw/kv_cache_block_manager.hpp
@@ -46,50 +46,13 @@ class KVCacheBlockManager {
         uint32_t id;                    ///< Unique block identifier
         ov::SoPtr<ov::ITensor> tensor;  ///< Block memory tensor
         uint32_t num_tokens;            ///< Number of tokens stored in this block
-        uint32_t capacity;              ///< Maximum tokens this block can hold
 
         enum class State {
             FREE,       ///< Block is free and available for allocation
             ALLOCATED,  ///< Block is allocated but not yet filled
             FULL,       ///< Block is completely filled
-            SHARED      ///< Block is shared across multiple requests (for prefix caching)
         };
         State state;
-
-        /**
-         * @brief Check if the block is completely filled
-         */
-        bool is_full() const {
-            return num_tokens >= capacity;
-        }
-
-        /**
-         * @brief Check if the block has available space
-         */
-        bool has_space() const {
-            return num_tokens < capacity;
-        }
-
-        /**
-         * @brief Get remaining capacity
-         */
-        uint32_t remaining_capacity() const {
-            return capacity - num_tokens;
-        }
-    };
-
-    /**
-     * @brief Statistics about block manager state
-     */
-    struct Stats {
-        uint32_t total_blocks;      ///< Total number of blocks in pool
-        uint32_t allocated_blocks;  ///< Number of currently allocated blocks
-        uint32_t full_blocks;       ///< Number of completely filled blocks
-        uint32_t free_blocks;       ///< Number of free blocks
-        uint32_t total_tokens;      ///< Total tokens stored across all blocks
-        uint32_t total_capacity;    ///< Total capacity (blocks * block_size)
-        float utilization;          ///< Memory utilization ratio (0.0 - 1.0)
-        float fragmentation;        ///< Fragmentation ratio (partially filled blocks)
     };
 
     /**
@@ -126,13 +89,6 @@ class KVCacheBlockManager {
      */
     std::optional<uint32_t> allocate_block();
 
-    /**
-     * @brief Free a previously allocated block
-     *
-     * @param block_id ID of the block to free
-     */
-    void free_block(uint32_t block_id);
-
     /**
      * @brief Get the tensor associated with a block
      *
@@ -141,11 +97,6 @@ class KVCacheBlockManager {
      */
     ov::SoPtr<ov::ITensor> get_block_tensor(uint32_t block_id);
 
-    /**
-     * @brief Get the tensor associated with a block (const version)
-     */
-    ov::SoPtr<const ov::ITensor> get_block_tensor(uint32_t block_id) const;
-
     /**
      * @brief Update the number of tokens stored in a block
      *
@@ -162,15 +113,6 @@ class KVCacheBlockManager {
      */
     uint32_t get_block_tokens(uint32_t block_id) const;
 
-    /**
-     * @brief Find the last allocated block that is not yet full
-     *
-     * Useful for appending new tokens to existing blocks during generate phase
-     *
-     * @return Block ID if found, std::nullopt otherwise
-     */
-    std::optional<uint32_t> find_unfilled_block() const;
-
     /**
      * @brief Get list of all currently allocated block IDs
      *
@@ -185,20 +127,6 @@ class KVCacheBlockManager {
      */
     void clear_all();
 
-    /**
-     * @brief Get current statistics
-     *
-     * @return Stats structure with current state
-     */
-    Stats get_stats() const;
-
-    /**
-     * @brief Print statistics to log
-     *
-     * For debugging and monitoring purposes
-     */
-    void print_stats() const;
-
     /**
      * @brief Get block size (tokens per block)
      */
@@ -213,13 +141,6 @@ class KVCacheBlockManager {
         return max_blocks_;
     }
 
-    /**
-     * @brief Get total capacity (tokens)
-     */
-    uint32_t get_total_capacity() const {
-        return block_size_ * max_blocks_;
-    }
-
     /**
      * @brief Pair of key/value block managers for one transformer layer
      */