|
16 | 16 | ret void
|
17 | 17 | }
|
18 | 18 |
|
| 19 | + define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2() #0 { |
| 20 | + ret void |
| 21 | + } |
| 22 | + |
| 23 | + define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg() #0 { |
| 24 | + ret void |
| 25 | + } |
| 26 | + |
19 | 27 | attributes #0 = { "amdgpu-wave-limiter"="true" "amdgpu-waves-per-eu"="8,8" }
|
20 | 28 | ...
|
21 | 29 |
|
@@ -311,3 +319,173 @@ body: |
|
311 | 319 | $agpr0 = COPY %0
|
312 | 320 |
|
313 | 321 | ...
|
| 322 | + |
| 323 | +# Non-mac variant, src2 is an immediate. |
| 324 | +--- |
| 325 | +name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2 |
| 326 | +tracksRegLiveness: true |
| 327 | +machineFunctionInfo: |
| 328 | + isEntryFunction: true |
| 329 | + stackPtrOffsetReg: '$sgpr32' |
| 330 | + occupancy: 10 |
| 331 | + sgprForEXECCopy: '$sgpr100_sgpr101' |
| 332 | +body: | |
| 333 | + ; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2 |
| 334 | + ; CHECK: bb.0: |
| 335 | + ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| 336 | + ; CHECK-NEXT: {{ $}} |
| 337 | + ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0 |
| 338 | + ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0 |
| 339 | + ; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec |
| 340 | + ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0 |
| 341 | + ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr0_sgpr1 |
| 342 | + ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc |
| 343 | + ; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8 |
| 344 | + ; CHECK-NEXT: {{ $}} |
| 345 | + ; CHECK-NEXT: bb.1: |
| 346 | + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| 347 | + ; CHECK-NEXT: liveins: $vcc, $vgpr0_vgpr1 |
| 348 | + ; CHECK-NEXT: {{ $}} |
| 349 | + ; CHECK-NEXT: early-clobber renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec |
| 350 | + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc |
| 351 | + ; CHECK-NEXT: S_BRANCH %bb.2 |
| 352 | + ; CHECK-NEXT: {{ $}} |
| 353 | + ; CHECK-NEXT: bb.2: |
| 354 | + ; CHECK-NEXT: liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000FFFFFFFF |
| 355 | + ; CHECK-NEXT: {{ $}} |
| 356 | + ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 |
| 357 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 |
| 358 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 |
| 359 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 |
| 360 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 |
| 361 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 |
| 362 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 |
| 363 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 |
| 364 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 |
| 365 | + ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec |
| 366 | + ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1) |
| 367 | + ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1) |
| 368 | + ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) |
| 369 | + ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1) |
| 370 | + ; CHECK-NEXT: S_ENDPGM 0 |
| 371 | + bb.0: |
| 372 | + S_NOP 0, implicit-def $agpr0 |
| 373 | + renamable $sgpr0 = S_MOV_B32 0 |
| 374 | + undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 375 | + renamable $sgpr1 = COPY renamable $sgpr0 |
| 376 | + %1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1 |
| 377 | + renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc |
| 378 | + %0.sub9:vreg_512_align2 = COPY %0.sub8 |
| 379 | +
|
| 380 | + bb.1: |
| 381 | + liveins: $vcc |
| 382 | +
|
| 383 | + %0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, 0, 0, 0, 0, implicit $mode, implicit $exec |
| 384 | + S_CBRANCH_VCCNZ %bb.1, implicit $vcc |
| 385 | + S_BRANCH %bb.2 |
| 386 | +
|
| 387 | + bb.2: |
| 388 | + ; No VGPRs available for %0 |
| 389 | + S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 |
| 390 | + S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 |
| 391 | + S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 |
| 392 | + S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 |
| 393 | + S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 |
| 394 | + S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 |
| 395 | + S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 |
| 396 | + S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 |
| 397 | + %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| 398 | + GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1) |
| 399 | + GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1) |
| 400 | + GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) |
| 401 | + GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1) |
| 402 | + S_ENDPGM 0 |
| 403 | +
|
| 404 | +... |
| 405 | + |
| 406 | +# Non-mac variant, src2 is the same VGPR, but a different subregister. |
| 407 | +--- |
| 408 | +name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg |
| 409 | +tracksRegLiveness: true |
| 410 | +machineFunctionInfo: |
| 411 | + isEntryFunction: true |
| 412 | + stackPtrOffsetReg: '$sgpr32' |
| 413 | + occupancy: 10 |
| 414 | + sgprForEXECCopy: '$sgpr100_sgpr101' |
| 415 | +body: | |
| 416 | + ; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg |
| 417 | + ; CHECK: bb.0: |
| 418 | + ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| 419 | + ; CHECK-NEXT: {{ $}} |
| 420 | + ; CHECK-NEXT: S_NOP 0, implicit-def $agpr0 |
| 421 | + ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0 |
| 422 | + ; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec |
| 423 | + ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0 |
| 424 | + ; CHECK-NEXT: renamable $vgpr18_vgpr19 = COPY killed renamable $sgpr0_sgpr1 |
| 425 | + ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc |
| 426 | + ; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8 |
| 427 | + ; CHECK-NEXT: {{ $}} |
| 428 | + ; CHECK-NEXT: bb.1: |
| 429 | + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| 430 | + ; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19 |
| 431 | + ; CHECK-NEXT: {{ $}} |
| 432 | + ; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1) |
| 433 | + ; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1) |
| 434 | + ; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec |
| 435 | + ; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 |
| 436 | + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc |
| 437 | + ; CHECK-NEXT: S_BRANCH %bb.2 |
| 438 | + ; CHECK-NEXT: {{ $}} |
| 439 | + ; CHECK-NEXT: bb.2: |
| 440 | + ; CHECK-NEXT: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31:0x00000000FFFFFFFF |
| 441 | + ; CHECK-NEXT: {{ $}} |
| 442 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 |
| 443 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 |
| 444 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 |
| 445 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 |
| 446 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 |
| 447 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 |
| 448 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 |
| 449 | + ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 |
| 450 | + ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec |
| 451 | + ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1) |
| 452 | + ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1) |
| 453 | + ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) |
| 454 | + ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1) |
| 455 | + ; CHECK-NEXT: S_ENDPGM 0 |
| 456 | + bb.0: |
| 457 | + S_NOP 0, implicit-def $agpr0 |
| 458 | + renamable $sgpr0 = S_MOV_B32 0 |
| 459 | + undef %0.sub8:vreg_1024_align2 = V_MOV_B32_e32 0, implicit $exec |
| 460 | + renamable $sgpr1 = COPY renamable $sgpr0 |
| 461 | + %1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1 |
| 462 | + renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc |
| 463 | + %0.sub9:vreg_1024_align2 = COPY %0.sub8 |
| 464 | +
|
| 465 | + bb.1: |
| 466 | + liveins: $vcc |
| 467 | +
|
| 468 | + undef %0.sub0_sub1:vreg_1024_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1) |
| 469 | + %0.sub16_sub17:vreg_1024_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1) |
| 470 | + %0.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:vreg_1024_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31, 0, 0, 0, implicit $mode, implicit $exec |
| 471 | + S_CBRANCH_VCCNZ %bb.1, implicit $vcc |
| 472 | + S_BRANCH %bb.2 |
| 473 | +
|
| 474 | + bb.2: |
| 475 | + ; No VGPRs available for %0 |
| 476 | + S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 |
| 477 | + S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 |
| 478 | + S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 |
| 479 | + S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 |
| 480 | + S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 |
| 481 | + S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 |
| 482 | + S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 |
| 483 | + S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 |
| 484 | + %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec |
| 485 | + GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1) |
| 486 | + GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1) |
| 487 | + GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) |
| 488 | + GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1) |
| 489 | + S_ENDPGM 0 |
| 490 | +
|
| 491 | +... |
0 commit comments