@@ -192,3 +192,110 @@ TEST(propagate_constants, no_reselection_when_constants_are_static) {
192192 ASSERT_FALSE (impl_after->is_dynamic ());
193193 ASSERT_EQ (impl_before, impl_after);
194194}
195+
196+ // Verifies that build_implementations does not overwrite a reselected node's
197+ // pre-compiled kernel with a wrong (dynamic/shape-agnostic) kernel from another
198+ // node that shares the same kernel_impl_params key.
199+ //
200+ // This is the regression test for the kernel cache collision bug:
201+ // When propagate_constants reselects an impl from dynamic to static, the node
202+ // gets a static kernel compiled immediately. Meanwhile, another node with the
203+ // same kernel_impl_params (same layout/primitive config) but a dynamic impl
204+ // has a shape-agnostic (__sa) kernel. Without the fix, build_implementations'
205+ // add_kernels_source would register the dynamic node's __sa kernel first under
206+ // the shared params key, and init_kernels would then assign that __sa kernel to
207+ // the reselected static node — causing CL_INVALID_KERNEL_ARGS at runtime.
208+ //
209+ // Topology:
210+ // input_layout("input_static") -----------> eltwise("eltwise_reselected", sum) <-- transitions dyn→static
211+ // input_layout("input_dynamic") -----------> eltwise("eltwise_dynamic", sum) <-- stays dynamic
212+ // data("weights_a") ---+
213+ // eltwise("w_sum") --> (shared constant input for both)
214+ // data("weights_b") ---+
215+ //
216+ // Both eltwises have the same layout ({1,3,4,4} f32 bfyx) and same eltwise_mode,
217+ // so their kernel_impl_params are equal — triggering the cache key collision.
218+ // After propagate_constants + build_implementations, the reselected node must
219+ // have a non-__sa kernel and the dynamic node must have a __sa kernel.
220+ TEST (propagate_constants, build_implementations_preserves_reselected_kernel) {
221+ auto & engine = get_test_engine ();
222+
223+ auto static_input_layout = layout{{1 , 3 , 4 , 4 }, data_types::f32 , format::bfyx};
224+ auto dynamic_input_layout = layout{ov::PartialShape::dynamic (4 ), data_types::f32 , format::bfyx};
225+
226+ topology topology (
227+ input_layout (" input_static" , static_input_layout),
228+ input_layout (" input_dynamic" , dynamic_input_layout),
229+ data (" weights_a" , engine.allocate_memory (layout{{1 , 3 , 4 , 4 }, data_types::f32 , format::bfyx})),
230+ data (" weights_b" , engine.allocate_memory (layout{{1 , 3 , 4 , 4 }, data_types::f32 , format::bfyx})),
231+ eltwise (" w_sum" , input_info (" weights_a" ), input_info (" weights_b" ), eltwise_mode::sum),
232+ eltwise (" eltwise_reselected" , input_info (" input_static" ), input_info (" w_sum" ), eltwise_mode::sum),
233+ eltwise (" eltwise_dynamic" , input_info (" input_dynamic" ), input_info (" w_sum" ), eltwise_mode::sum)
234+ );
235+
236+ ExecutionConfig config = get_test_default_config (engine);
237+ config.set_property (ov::intel_gpu::optimize_data (true ));
238+ config.set_property (ov::intel_gpu::allow_new_shape_infer (true ));
239+
240+ auto prog = program::build_program (engine, topology, config, false , true );
241+
242+ // Simulate unresolved dynamic shape on the constant computation node.
243+ auto & w_sum_node = prog->get_node (" w_sum" );
244+ auto dyn_layout = layout{ov::PartialShape::dynamic (4 ), data_types::f32 , format::bfyx};
245+ w_sum_node.set_output_layout (dyn_layout, true );
246+
247+ program_wrapper::apply_opt_pass<compile_graph>(*prog);
248+
249+ // After compile_graph both eltwises should be dynamic (one input is dynamic).
250+ auto & reselected_node = prog->get_node (" eltwise_reselected" );
251+ auto & dynamic_node = prog->get_node (" eltwise_dynamic" );
252+ ASSERT_TRUE (reselected_node.get_selected_impl () == nullptr ||
253+ reselected_node.get_selected_impl ()->is_dynamic ());
254+ ASSERT_TRUE (dynamic_node.get_selected_impl () == nullptr ||
255+ dynamic_node.get_selected_impl ()->is_dynamic ());
256+
257+ // propagate_constants reselects eltwise_reselected to static impl and
258+ // immediately compiles its kernel.
259+ program_wrapper::apply_opt_pass<propagate_constants>(*prog);
260+
261+ auto reselected_impl = reselected_node.get_selected_impl ();
262+ ASSERT_NE (reselected_impl, nullptr );
263+ ASSERT_FALSE (reselected_impl->is_dynamic ());
264+
265+ // The reselected impl should already have compiled kernels (non-empty)
266+ // and its kernel sources should be reset (indicating pre-compilation).
267+ auto reselected_kernels_before = reselected_impl->get_kernels ();
268+ ASSERT_FALSE (reselected_kernels_before.empty ());
269+
270+ // Now run build_implementations — this is where the collision would occur
271+ // without the fix.
272+ program_wrapper::apply_opt_pass<build_implementations>(*prog);
273+
274+ // After build_implementations, verify the reselected node still has
275+ // a valid kernel that is NOT a shape-agnostic (__sa) kernel.
276+ auto reselected_kernels_after = reselected_impl->get_kernels ();
277+ ASSERT_FALSE (reselected_kernels_after.empty ());
278+ for (const auto & kernel : reselected_kernels_after) {
279+ auto kernel_id = kernel->get_id ();
280+ // A static impl's kernel must NOT have the __sa suffix
281+ ASSERT_EQ (kernel_id.find (" __sa" ), std::string::npos)
282+ << " Reselected static node got shape-agnostic kernel: " << kernel_id;
283+ }
284+
285+ // The dynamic node should have a shape-agnostic kernel (if it has an impl).
286+ auto dynamic_impl = dynamic_node.get_selected_impl ();
287+ if (dynamic_impl && dynamic_impl->is_dynamic ()) {
288+ auto dynamic_kernels = dynamic_impl->get_kernels ();
289+ if (!dynamic_kernels.empty ()) {
290+ bool has_sa_kernel = false ;
291+ for (const auto & kernel : dynamic_kernels) {
292+ if (kernel->get_id ().find (" __sa" ) != std::string::npos) {
293+ has_sa_kernel = true ;
294+ break ;
295+ }
296+ }
297+ ASSERT_TRUE (has_sa_kernel)
298+ << " Dynamic node should have a shape-agnostic (__sa) kernel" ;
299+ }
300+ }
301+ }
0 commit comments