Skip to content

Commit bba601b

Browse files
committed
Added possibility to mock ov::npuw::CompiledModel
1 parent 2990c30 commit bba601b

File tree

12 files changed

+600
-65
lines changed

12 files changed

+600
-65
lines changed

src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -135,13 +135,12 @@ void pre_load_transform(const std::shared_ptr<ov::Model>& model, const ov::AnyMa
135135
}
136136
} // anonymous namespace
137137

138-
std::shared_ptr<ov::npuw::ICompiledModel> ov::npuw::ICompiledModel::create(
139-
const std::shared_ptr<ov::Model>& model,
140-
const std::shared_ptr<const ov::IPlugin>& plugin,
141-
const ov::AnyMap& properties) {
138+
std::shared_ptr<ov::ICompiledModel> ov::npuw::create_compiled_model(const std::shared_ptr<ov::Model>& model,
139+
const std::shared_ptr<const ov::IPlugin>& plugin,
140+
const ov::AnyMap& properties) {
142141
LOG_INFO("Choosing which NPUW CompiledModel to create");
143142
LOG_BLOCK();
144-
std::shared_ptr<ov::npuw::ICompiledModel> compiled_model;
143+
std::shared_ptr<ov::ICompiledModel> compiled_model;
145144
auto use_llm_key = ov::intel_npu::npuw::llm::enabled.name();
146145
auto use_kokoro_key = ov::intel_npu::npuw::kokoro::enabled.name();
147146

@@ -165,10 +164,6 @@ std::shared_ptr<ov::npuw::ICompiledModel> ov::npuw::ICompiledModel::create(
165164
return compiled_model;
166165
}
167166

168-
ov::npuw::ICompiledModel::ICompiledModel(const std::shared_ptr<ov::Model>& model,
169-
const std::shared_ptr<const ov::IPlugin>& plugin)
170-
: ov::ICompiledModel(model, plugin) {}
171-
172167
ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
173168
const std::shared_ptr<const ov::IPlugin>& plugin,
174169
const ov::AnyMap& properties)
@@ -1508,6 +1503,14 @@ void ov::npuw::CompiledModel::reconstruct_closure() {
15081503
}
15091504
}
15101505

1506+
std::shared_ptr<ov::npuw::weights::Bank> ov::npuw::CompiledModel::get_weights_bank() const {
1507+
return m_weights_bank;
1508+
}
1509+
1510+
void ov::npuw::CompiledModel::set_weights_bank(std::shared_ptr<ov::npuw::weights::Bank> bank) {
1511+
m_weights_bank = bank;
1512+
}
1513+
15111514
void ov::npuw::CompiledModel::finalize_weights_bank() {
15121515
LOG_INFO("Finalizing weights bank...");
15131516
std::shared_future<void> weights_bank_evaluation = std::async(std::launch::async, [&]() {
@@ -1657,6 +1660,10 @@ std::string ov::npuw::CompiledModel::funcall_mem_device(const std::size_t idx) c
16571660
return *comp_model_desc.device_it;
16581661
}
16591662

1663+
std::size_t ov::npuw::CompiledModel::num_compiled_submodels() const {
1664+
return m_compiled_submodels.size();
1665+
}
1666+
16601667
void ov::npuw::CompiledModel::remove_long_output_names(const std::shared_ptr<ov::Model>& model) {
16611668
NPUW_ASSERT(model.get() != nullptr);
16621669
for (auto node : model->get_ordered_ops()) {

src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,34 @@ class Plugin;
3131

3232
namespace ov {
3333
namespace npuw {
34+
std::shared_ptr<ov::ICompiledModel> create_compiled_model(const std::shared_ptr<ov::Model>& model,
35+
const std::shared_ptr<const ov::IPlugin>& plugin,
36+
const ov::AnyMap& properties);
37+
3438
class ICompiledModel : public ov::ICompiledModel {
3539
public:
36-
static std::shared_ptr<ov::npuw::ICompiledModel> create(const std::shared_ptr<ov::Model>& model,
37-
const std::shared_ptr<const ov::IPlugin>& plugin,
38-
const ov::AnyMap& properties);
39-
ICompiledModel(const std::shared_ptr<ov::Model>& model, const std::shared_ptr<const ov::IPlugin>& plugin);
40+
ICompiledModel(const std::shared_ptr<ov::Model>& model, const std::shared_ptr<const ov::IPlugin>& plugin)
41+
: ov::ICompiledModel(model, plugin) {}
42+
43+
// API for easily create and manage NPUW infer-requests
44+
virtual std::shared_ptr<ov::npuw::IBaseInferRequest> create_base_infer_request() const = 0;
45+
46+
virtual std::shared_ptr<ov::IAsyncInferRequest> wrap_async_infer_request(
47+
std::shared_ptr<ov::npuw::IBaseInferRequest> internal_request) const = 0;
48+
49+
virtual std::string submodel_device(const std::size_t idx) const = 0;
50+
51+
virtual std::size_t num_compiled_submodels() const = 0;
52+
53+
virtual void serialize(std::ostream& stream, const ov::npuw::s11n::CompiledContext& ctx) const = 0;
54+
55+
virtual std::shared_ptr<weights::Bank> get_weights_bank() const = 0;
56+
57+
virtual void set_weights_bank(std::shared_ptr<weights::Bank> bank) = 0;
58+
59+
virtual void finalize_weights_bank() = 0;
60+
61+
virtual void reconstruct_closure() = 0;
4062
};
4163

4264
// Forward declarations
@@ -80,7 +102,7 @@ class CompiledModel : public ov::npuw::ICompiledModel {
80102
friend class UnfoldInferRequest;
81103
friend class MemAccessSim;
82104
friend class FuncMemMgr;
83-
friend class LLMCompiledModel;
105+
friend class DefaultNPUWCompiledModelFactory;
84106
friend class LLMInferRequest;
85107
friend class moe::MoEExecutor;
86108

@@ -100,7 +122,7 @@ class CompiledModel : public ov::npuw::ICompiledModel {
100122

101123
void report_io() const;
102124

103-
void serialize(std::ostream& stream, const ov::npuw::s11n::CompiledContext& ctx) const;
125+
void serialize(std::ostream& stream, const ov::npuw::s11n::CompiledContext& ctx) const override;
104126
static std::shared_ptr<CompiledModel> deserialize(std::istream& stream,
105127
const std::shared_ptr<const ov::IPlugin>& plugin,
106128
const ov::AnyMap& properties,
@@ -116,11 +138,11 @@ class CompiledModel : public ov::npuw::ICompiledModel {
116138
std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
117139

118140
// API for easily create and manage NPUW infer-requests
119-
std::shared_ptr<ov::npuw::IBaseInferRequest> create_base_infer_request() const;
141+
std::shared_ptr<ov::npuw::IBaseInferRequest> create_base_infer_request() const override;
120142
std::shared_ptr<ov::IAsyncInferRequest> wrap_async_infer_request(
121-
std::shared_ptr<ov::npuw::IBaseInferRequest> internal_request) const;
143+
std::shared_ptr<ov::npuw::IBaseInferRequest> internal_request) const override;
122144

123-
std::string submodel_device(const std::size_t idx) const;
145+
std::string submodel_device(const std::size_t idx) const override;
124146
bool is_gather_closure(const std::size_t idx, const std::size_t cidx) const;
125147
bool unpack_required(const std::size_t idx) const;
126148
bool unpack_required(const std::size_t idx, const std::size_t cidx) const;
@@ -131,15 +153,19 @@ class CompiledModel : public ov::npuw::ICompiledModel {
131153
bool should_use_quantized_host_gather(const std::shared_ptr<ov::Model>& model, const ov::AnyMap& properties) const;
132154

133155
// For full deserialization flow with weights
134-
void reconstruct_closure();
156+
void reconstruct_closure() override;
135157
// For weightless serialization flow
136158
void store_const_offsets(const std::shared_ptr<ov::Model>& model);
137159

138-
void finalize_weights_bank();
160+
std::shared_ptr<weights::Bank> get_weights_bank() const override;
161+
void set_weights_bank(std::shared_ptr<weights::Bank> bank) override;
162+
void finalize_weights_bank() override;
139163
void detach_memory();
140164
std::string global_mem_device() const;
141165
std::string funcall_mem_device(const std::size_t idx) const;
142166

167+
std::size_t num_compiled_submodels() const override;
168+
143169
std::shared_ptr<::intel_npu::OptionsDesc> m_options_desc;
144170
::intel_npu::Config m_cfg;
145171
GetPropertiesMap m_prop_to_opt;

0 commit comments

Comments
 (0)