Skip to content

Commit 78a5461

Browse files
committed
WA: OCL OUT_OF_RESOURCE when input token size < 8
1 parent 827a9f6 commit 78a5461

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

src/plugins/intel_gpu/src/graph/impls/ocl_v2/sdpa/paged_attention_opt.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1267,7 +1267,10 @@ class PagedAttentionOptImpl : public SDPAImplBase {
12671267

12681268
if (rt_params->stage == PagedAttentionStage::PREFILL) {
12691269
#ifdef ENABLE_ONEDNN_FOR_GPU
1270-
if (rt_params->use_micro_sdpa) {
1270+
// WA: avoid "OCL OUT OF RESOURCE" issue when running qwen3_moe with input token size < 8
1271+
// TODO: remove this limitation once micro_sdpa kernel resolve this problem.
1272+
const auto query_len = params.get_input_layout(PagedAttentionInputIdx::QUERY).get_partial_shape()[0].get_length();
1273+
if (rt_params->use_micro_sdpa && query_len >= 8) {
12711274
res_event = {execute_stage(res_event, instance, pa_sdpa_micro)};
12721275
} else {
12731276
res_event = {execute_stage(res_event, instance, pa_sdpa_opt)};

src/plugins/intel_gpu/src/graph/impls/ocl_v2/sdpa/sdpa_gen_micro.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,8 @@ sdpa_config_t xehpg_q_h64_s64_2nd = {8, 8, 8, 8, 8, 2, 8, 2};
289289
sdpa_config_t xehpg_q_h64_s128_2nd = {16, 8, 8, 8, 8, 4, 8, 4};
290290
sdpa_config_t xehpg_q_h64_2nd = {16, 16, 8, 8, 16, 2, 8, 4};
291291

292-
sdpa_config_t xehpg_h128_pa = {16, 16, 16, 16, 8, 1, 8, 1};
293-
sdpa_config_t xehpg_h128 = {16, 16, 16, 16, 8, 1, 8, 1};
292+
sdpa_config_t xehpg_h128_pa = {16, 16, 16, 16, 8, 4, 8, 4};
293+
sdpa_config_t xehpg_h128 = {16, 16, 32, 8, 8, 4, 4, 8};
294294
sdpa_config_t xehpg_h128_s32 = {16, 16, 16, 8, 16, 2, 8, 4};
295295
sdpa_config_t xehpg_h128_2nd = {8, 16, 16, 8, 16, 1, 8, 2};
296296

0 commit comments

Comments
 (0)