@@ -31,12 +31,34 @@ class Plugin;
3131
3232namespace ov {
3333namespace npuw {
34+ std::shared_ptr<ov::ICompiledModel> create_compiled_model (const std::shared_ptr<ov::Model>& model,
35+ const std::shared_ptr<const ov::IPlugin>& plugin,
36+ const ov::AnyMap& properties);
37+
3438class ICompiledModel : public ov ::ICompiledModel {
3539public:
36- static std::shared_ptr<ov::npuw::ICompiledModel> create (const std::shared_ptr<ov::Model>& model,
37- const std::shared_ptr<const ov::IPlugin>& plugin,
38- const ov::AnyMap& properties);
39- ICompiledModel (const std::shared_ptr<ov::Model>& model, const std::shared_ptr<const ov::IPlugin>& plugin);
40+ ICompiledModel (const std::shared_ptr<ov::Model>& model, const std::shared_ptr<const ov::IPlugin>& plugin)
41+ : ov::ICompiledModel(model, plugin) {}
42+
43+ // API for easily create and manage NPUW infer-requests
44+ virtual std::shared_ptr<ov::npuw::IBaseInferRequest> create_base_infer_request () const = 0;
45+
46+ virtual std::shared_ptr<ov::IAsyncInferRequest> wrap_async_infer_request (
47+ std::shared_ptr<ov::npuw::IBaseInferRequest> internal_request) const = 0;
48+
49+ virtual std::string submodel_device (const std::size_t idx) const = 0;
50+
51+ virtual std::size_t num_compiled_submodels () const = 0;
52+
53+ virtual void serialize (std::ostream& stream, const ov::npuw::s11n::CompiledContext& ctx) const = 0;
54+
55+ virtual std::shared_ptr<weights::Bank> get_weights_bank () const = 0;
56+
57+ virtual void set_weights_bank (std::shared_ptr<weights::Bank> bank) = 0;
58+
59+ virtual void finalize_weights_bank () = 0;
60+
61+ virtual void reconstruct_closure () = 0;
4062};
4163
4264// Forward declarations
@@ -80,7 +102,7 @@ class CompiledModel : public ov::npuw::ICompiledModel {
80102 friend class UnfoldInferRequest ;
81103 friend class MemAccessSim ;
82104 friend class FuncMemMgr ;
83- friend class LLMCompiledModel ;
105+ friend class DefaultNPUWCompiledModelFactory ;
84106 friend class LLMInferRequest ;
85107 friend class moe ::MoEExecutor;
86108
@@ -100,7 +122,7 @@ class CompiledModel : public ov::npuw::ICompiledModel {
100122
101123 void report_io () const ;
102124
103- void serialize (std::ostream& stream, const ov::npuw::s11n::CompiledContext& ctx) const ;
125+ void serialize (std::ostream& stream, const ov::npuw::s11n::CompiledContext& ctx) const override ;
104126 static std::shared_ptr<CompiledModel> deserialize (std::istream& stream,
105127 const std::shared_ptr<const ov::IPlugin>& plugin,
106128 const ov::AnyMap& properties,
@@ -116,11 +138,11 @@ class CompiledModel : public ov::npuw::ICompiledModel {
116138 std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request () const override ;
117139
118140 // API for easily create and manage NPUW infer-requests
119- std::shared_ptr<ov::npuw::IBaseInferRequest> create_base_infer_request () const ;
141+ std::shared_ptr<ov::npuw::IBaseInferRequest> create_base_infer_request () const override ;
120142 std::shared_ptr<ov::IAsyncInferRequest> wrap_async_infer_request (
121- std::shared_ptr<ov::npuw::IBaseInferRequest> internal_request) const ;
143+ std::shared_ptr<ov::npuw::IBaseInferRequest> internal_request) const override ;
122144
123- std::string submodel_device (const std::size_t idx) const ;
145+ std::string submodel_device (const std::size_t idx) const override ;
124146 bool is_gather_closure (const std::size_t idx, const std::size_t cidx) const ;
125147 bool unpack_required (const std::size_t idx) const ;
126148 bool unpack_required (const std::size_t idx, const std::size_t cidx) const ;
@@ -131,15 +153,19 @@ class CompiledModel : public ov::npuw::ICompiledModel {
131153 bool should_use_quantized_host_gather (const std::shared_ptr<ov::Model>& model, const ov::AnyMap& properties) const ;
132154
133155 // For full deserialization flow with weights
134- void reconstruct_closure ();
156+ void reconstruct_closure () override ;
135157 // For weightless serialization flow
136158 void store_const_offsets (const std::shared_ptr<ov::Model>& model);
137159
138- void finalize_weights_bank ();
160+ std::shared_ptr<weights::Bank> get_weights_bank () const override ;
161+ void set_weights_bank (std::shared_ptr<weights::Bank> bank) override ;
162+ void finalize_weights_bank () override ;
139163 void detach_memory ();
140164 std::string global_mem_device () const ;
141165 std::string funcall_mem_device (const std::size_t idx) const ;
142166
167+ std::size_t num_compiled_submodels () const override ;
168+
143169 std::shared_ptr<::intel_npu::OptionsDesc> m_options_desc;
144170 ::intel_npu::Config m_cfg;
145171 GetPropertiesMap m_prop_to_opt;
0 commit comments