Skip to content

Commit ba706b6

Browse files
[NPUW] LLMCompiledModel refactoring part 1 (#34290)
This PR is a part of series tending to refactor monolith structure of LLMCompiledModel with the goal to support different model types and different compilation pipelines. To do so all transformations done in LLMCompiledModel are wrapped into ModelPasses. The basic idea is to create transformation pipeline which can be used as base for all model pipelines. Current PR is mostly cosmetic and focused on wrapping all the transformation into passes.
1 parent ce21640 commit ba706b6

File tree

11 files changed

+383
-174
lines changed

11 files changed

+383
-174
lines changed

src/plugins/intel_npu/src/plugin/npuw/embedding_infer_request.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (C) 2025 Intel Corporation
1+
// Copyright (C) 2018-2026 Intel Corporation
22
// SPDX-License-Identifier: Apache-2.0
33
//
44

src/plugins/intel_npu/src/plugin/npuw/embedding_infer_request.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (C) 2025 Intel Corporation
1+
// Copyright (C) 2018-2026 Intel Corporation
22
// SPDX-License-Identifier: Apache-2.0
33
//
44

src/plugins/intel_npu/src/plugin/npuw/embedding_model_utils.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -370,9 +370,10 @@ class ReConstructEmbeddingModel : public ov::pass::ModelPass {
370370

371371
} // namespace
372372

373-
void ov::npuw::util::prepare_text_embedding_model(std::shared_ptr<ov::Model> model, uint32_t seq_len_dim) {
373+
bool ov::npuw::util::PrepareTextEmbeddingModel::run_on_model(const std::shared_ptr<ov::Model>& model) {
374374
ov::pass::Manager manager("prepare-embedding");
375375
manager.set_per_pass_validation(true);
376-
manager.register_pass<ReConstructEmbeddingModel>(seq_len_dim);
377-
manager.run_passes(model);
376+
manager.register_pass<ReConstructEmbeddingModel>(m_seq_len_dim);
377+
378+
return manager.run_passes(model);
378379
}
Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (C) 2025 Intel Corporation
1+
// Copyright (C) 2018-2026 Intel Corporation
22
// SPDX-License-Identifier: Apache-2.0
33
//
44

@@ -8,6 +8,15 @@
88

99
namespace ov ::npuw ::util {
1010

11-
void prepare_text_embedding_model(std::shared_ptr<ov::Model> model, uint32_t seq_len_dim);
11+
class PrepareTextEmbeddingModel : public ov::pass::ModelPass {
12+
uint32_t m_seq_len_dim;
13+
14+
public:
15+
OPENVINO_MODEL_PASS_RTTI("ov::npuw::PrepareTextEmbeddingModel");
16+
17+
explicit PrepareTextEmbeddingModel(uint32_t seq_len_dim) : m_seq_len_dim(seq_len_dim) {}
18+
19+
bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
20+
};
1221

1322
} // namespace ov::npuw::util

0 commit comments

Comments
 (0)