Skip to content

Commit 548ca9e

Browse files
authored
AMDGPU: Fix asserting on non-register mfma src2 (#150552)
Also validate the subregister, although for the moment this is redundant since we currently only process full copies.
1 parent 236b315 commit 548ca9e

File tree

3 files changed

+270
-1
lines changed

3 files changed

+270
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,8 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
159159

160160
// If the inputs are tied and the same register, we can shortcut and
161161
// directly replace the register.
162-
if (Src2->getReg() != CopySrcReg) {
162+
if (!Src2->isReg() || Src2->getReg() != CopySrcReg ||
163+
Src2->getSubReg() != DefMI->getOperand(1).getSubReg()) {
163164
LLVM_DEBUG(
164165
dbgs()
165166
<< "Replacing untied VGPR MFMAs with AGPR form not yet handled\n");

llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-agpr-negative-tests.mir

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,14 @@
1616
ret void
1717
}
1818

19+
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2() #0 {
20+
ret void
21+
}
22+
23+
define amdgpu_kernel void @inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg() #0 {
24+
ret void
25+
}
26+
1927
attributes #0 = { "amdgpu-wave-limiter"="true" "amdgpu-waves-per-eu"="8,8" }
2028
...
2129

@@ -311,3 +319,173 @@ body: |
311319
$agpr0 = COPY %0
312320
313321
...
322+
323+
# Non-mac variant, src2 is an immediate.
324+
---
325+
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2
326+
tracksRegLiveness: true
327+
machineFunctionInfo:
328+
isEntryFunction: true
329+
stackPtrOffsetReg: '$sgpr32'
330+
occupancy: 10
331+
sgprForEXECCopy: '$sgpr100_sgpr101'
332+
body: |
333+
; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_imm_src2
334+
; CHECK: bb.0:
335+
; CHECK-NEXT: successors: %bb.1(0x80000000)
336+
; CHECK-NEXT: {{ $}}
337+
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
338+
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
339+
; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
340+
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
341+
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr0_sgpr1
342+
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
343+
; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
344+
; CHECK-NEXT: {{ $}}
345+
; CHECK-NEXT: bb.1:
346+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
347+
; CHECK-NEXT: liveins: $vcc, $vgpr0_vgpr1
348+
; CHECK-NEXT: {{ $}}
349+
; CHECK-NEXT: early-clobber renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, 0, implicit $mode, implicit $exec
350+
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
351+
; CHECK-NEXT: S_BRANCH %bb.2
352+
; CHECK-NEXT: {{ $}}
353+
; CHECK-NEXT: bb.2:
354+
; CHECK-NEXT: liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17:0x00000000FFFFFFFF
355+
; CHECK-NEXT: {{ $}}
356+
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17
357+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
358+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
359+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
360+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
361+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
362+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
363+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
364+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
365+
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
366+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
367+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
368+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
369+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
370+
; CHECK-NEXT: S_ENDPGM 0
371+
bb.0:
372+
S_NOP 0, implicit-def $agpr0
373+
renamable $sgpr0 = S_MOV_B32 0
374+
undef %0.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
375+
renamable $sgpr1 = COPY renamable $sgpr0
376+
%1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
377+
renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
378+
%0.sub9:vreg_512_align2 = COPY %0.sub8
379+
380+
bb.1:
381+
liveins: $vcc
382+
383+
%0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, 0, 0, 0, 0, implicit $mode, implicit $exec
384+
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
385+
S_BRANCH %bb.2
386+
387+
bb.2:
388+
; No VGPRs available for %0
389+
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
390+
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
391+
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
392+
S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
393+
S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
394+
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
395+
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
396+
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
397+
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
398+
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
399+
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
400+
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
401+
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
402+
S_ENDPGM 0
403+
404+
...
405+
406+
# Non-mac variant, src2 is the same VGPR, but a different subregister.
407+
---
408+
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg
409+
tracksRegLiveness: true
410+
machineFunctionInfo:
411+
isEntryFunction: true
412+
stackPtrOffsetReg: '$sgpr32'
413+
occupancy: 10
414+
sgprForEXECCopy: '$sgpr100_sgpr101'
415+
body: |
416+
; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_vgprcd_e64_src2_different_subreg
417+
; CHECK: bb.0:
418+
; CHECK-NEXT: successors: %bb.1(0x80000000)
419+
; CHECK-NEXT: {{ $}}
420+
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
421+
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
422+
; CHECK-NEXT: renamable $vgpr8 = V_MOV_B32_e32 0, implicit $exec
423+
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
424+
; CHECK-NEXT: renamable $vgpr18_vgpr19 = COPY killed renamable $sgpr0_sgpr1
425+
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
426+
; CHECK-NEXT: dead renamable $vgpr9 = COPY renamable $vgpr8
427+
; CHECK-NEXT: {{ $}}
428+
; CHECK-NEXT: bb.1:
429+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
430+
; CHECK-NEXT: liveins: $vcc, $vgpr18_vgpr19
431+
; CHECK-NEXT: {{ $}}
432+
; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
433+
; CHECK-NEXT: renamable $vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
434+
; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr18_vgpr19, $vgpr18_vgpr19, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
435+
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
436+
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
437+
; CHECK-NEXT: S_BRANCH %bb.2
438+
; CHECK-NEXT: {{ $}}
439+
; CHECK-NEXT: bb.2:
440+
; CHECK-NEXT: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31:0x00000000FFFFFFFF
441+
; CHECK-NEXT: {{ $}}
442+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
443+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
444+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
445+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
446+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
447+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
448+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
449+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
450+
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
451+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
452+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
453+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
454+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
455+
; CHECK-NEXT: S_ENDPGM 0
456+
bb.0:
457+
S_NOP 0, implicit-def $agpr0
458+
renamable $sgpr0 = S_MOV_B32 0
459+
undef %0.sub8:vreg_1024_align2 = V_MOV_B32_e32 0, implicit $exec
460+
renamable $sgpr1 = COPY renamable $sgpr0
461+
%1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
462+
renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
463+
%0.sub9:vreg_1024_align2 = COPY %0.sub8
464+
465+
bb.1:
466+
liveins: $vcc
467+
468+
undef %0.sub0_sub1:vreg_1024_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
469+
%0.sub16_sub17:vreg_1024_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
470+
%0.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:vreg_1024_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31, 0, 0, 0, implicit $mode, implicit $exec
471+
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
472+
S_BRANCH %bb.2
473+
474+
bb.2:
475+
; No VGPRs available for %0
476+
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
477+
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
478+
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
479+
S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
480+
S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
481+
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
482+
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
483+
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
484+
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
485+
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
486+
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
487+
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
488+
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
489+
S_ENDPGM 0
490+
491+
...

llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -970,3 +970,93 @@ body: |
970970
S_ENDPGM 0
971971
972972
...
973+
974+
---
975+
name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_mac_vgprcd_e64_same_subreg
976+
tracksRegLiveness: true
977+
machineFunctionInfo:
978+
isEntryFunction: true
979+
stackPtrOffsetReg: '$sgpr32'
980+
occupancy: 10
981+
sgprForEXECCopy: '$sgpr100_sgpr101'
982+
body: |
983+
; CHECK-LABEL: name: inflate_result_to_agpr__V_MFMA_F32_32X32X8F16_mac_vgprcd_e64_same_subreg
984+
; CHECK: bb.0:
985+
; CHECK-NEXT: successors: %bb.1(0x80000000)
986+
; CHECK-NEXT: {{ $}}
987+
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
988+
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 0
989+
; CHECK-NEXT: renamable $vgpr10 = V_MOV_B32_e32 0, implicit $exec
990+
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
991+
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr0_sgpr1
992+
; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
993+
; CHECK-NEXT: renamable $vgpr11 = COPY renamable $vgpr10
994+
; CHECK-NEXT: {{ $}}
995+
; CHECK-NEXT: bb.1:
996+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
997+
; CHECK-NEXT: liveins: $vcc, $vgpr0_vgpr1
998+
; CHECK-NEXT: {{ $}}
999+
; CHECK-NEXT: renamable $vgpr2_vgpr3_vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s128), addrspace 1)
1000+
; CHECK-NEXT: renamable $vgpr6_vgpr7_vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s128), addrspace 1)
1001+
; CHECK-NEXT: renamable $vgpr10_vgpr11_vgpr12_vgpr13 = GLOBAL_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s128), addrspace 1)
1002+
; CHECK-NEXT: renamable $vgpr14_vgpr15_vgpr16_vgpr17 = GLOBAL_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s128), addrspace 1)
1003+
; CHECK-NEXT: renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec
1004+
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
1005+
; CHECK-NEXT: S_BRANCH %bb.2
1006+
; CHECK-NEXT: {{ $}}
1007+
; CHECK-NEXT: bb.2:
1008+
; CHECK-NEXT: liveins: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33:0x00000000FFFFFFFF
1009+
; CHECK-NEXT: {{ $}}
1010+
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17
1011+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
1012+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
1013+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
1014+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
1015+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
1016+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
1017+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
1018+
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
1019+
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
1020+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr8_agpr9_agpr10_agpr11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
1021+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr12_agpr13_agpr14_agpr15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
1022+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR renamable $vgpr0, renamable $agpr0_agpr1_agpr2_agpr3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
1023+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr0, killed renamable $agpr4_agpr5_agpr6_agpr7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
1024+
; CHECK-NEXT: S_ENDPGM 0
1025+
bb.0:
1026+
S_NOP 0, implicit-def $agpr0
1027+
renamable $sgpr0 = S_MOV_B32 0
1028+
undef %0.sub8:vreg_1024_align2 = V_MOV_B32_e32 0, implicit $exec
1029+
renamable $sgpr1 = COPY renamable $sgpr0
1030+
%1:vreg_64_align2 = COPY killed renamable $sgpr0_sgpr1
1031+
renamable $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
1032+
%0.sub9:vreg_1024_align2 = COPY %0.sub8
1033+
1034+
bb.1:
1035+
liveins: $vcc
1036+
1037+
%0.sub0_sub1_sub2_sub3:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s128), addrspace 1)
1038+
%0.sub4_sub5_sub6_sub7:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s128), addrspace 1)
1039+
%0.sub8_sub9_sub10_sub11:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s128), addrspace 1)
1040+
%0.sub12_sub13_sub14_sub15:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s128), addrspace 1)
1041+
%0.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:vreg_1024_align2 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1, %1, %0.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, 0, 0, 0, implicit $mode, implicit $exec
1042+
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
1043+
S_BRANCH %bb.2
1044+
1045+
bb.2:
1046+
; No VGPRs available for %0
1047+
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
1048+
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
1049+
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
1050+
S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
1051+
S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
1052+
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
1053+
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
1054+
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
1055+
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
1056+
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub8_sub9_sub10_sub11, undef $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
1057+
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub12_sub13_sub14_sub15, undef $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
1058+
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub0_sub1_sub2_sub3, undef $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
1059+
GLOBAL_STORE_DWORDX4_SADDR %2, %0.sub4_sub5_sub6_sub7, killed undef $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
1060+
S_ENDPGM 0
1061+
1062+
...

0 commit comments

Comments
 (0)