Skip to content

Commit d883d5f

Browse files
committed
AMDGPU: Add testcase with bad regalloc behavior
This demonstrates poor allocation due to not ordering AV classes relative to the A and V classes
1 parent 28208c8 commit d883d5f

File tree

1 file changed

+74
-0
lines changed

1 file changed

+74
-0
lines changed
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=greedy,2 -stop-after=virtregrewriter,2 -o - %s | FileCheck %s
3+
4+
---
5+
name: bad_ra
6+
tracksRegLiveness: true
7+
registers:
8+
- { id: 0, class: sgpr_64, preferred-register: '$sgpr4_sgpr5' }
9+
- { id: 1, class: sgpr_128, preferred-register: '%2' }
10+
- { id: 2, class: areg_128, preferred-register: '%1' }
11+
- { id: 3, class: areg_128, preferred-register: '%4' }
12+
- { id: 4, class: av_128, preferred-register: '%3' }
13+
- { id: 5, class: areg_128, preferred-register: '%6' }
14+
- { id: 6, class: vreg_128, preferred-register: '%5' }
15+
- { id: 7, class: areg_128, preferred-register: '%4' }
16+
- { id: 8, class: vgpr_32 }
17+
- { id: 9, class: vgpr_32 }
18+
- { id: 10, class: vgpr_32 }
19+
- { id: 11, class: areg_128 }
20+
liveins:
21+
- { reg: '$sgpr4_sgpr5', virtual-reg: '%0' }
22+
frameInfo:
23+
maxAlignment: 1
24+
isCalleeSavedInfoValid: true
25+
machineFunctionInfo:
26+
isEntryFunction: true
27+
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
28+
stackPtrOffsetReg: '$sgpr32'
29+
occupancy: 10
30+
vgprForAGPRCopy: '$vgpr255'
31+
sgprForEXECCopy: '$sgpr100_sgpr101'
32+
body: |
33+
bb.0:
34+
liveins: $sgpr4_sgpr5
35+
36+
; CHECK-LABEL: name: bad_ra
37+
; CHECK: liveins: $sgpr4_sgpr5
38+
; CHECK-NEXT: {{ $}}
39+
; CHECK-NEXT: early-clobber renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM_ec renamable $sgpr4_sgpr5, 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
40+
; CHECK-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 0, 0 :: ("amdgpu-noclobber" load (s128), addrspace 1)
41+
; CHECK-NEXT: renamable $vgpr4 = V_MOV_B32_e32 1065353216, implicit $exec
42+
; CHECK-NEXT: renamable $vgpr5 = V_MOV_B32_e32 0, implicit $exec
43+
; CHECK-NEXT: renamable $vgpr6 = V_MOV_B32_e32 1073741824, implicit $exec
44+
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
45+
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr4, $vgpr6, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
46+
; CHECK-NEXT: renamable $vgpr1 = COPY renamable $agpr1
47+
; CHECK-NEXT: renamable $vgpr0 = COPY renamable $agpr0
48+
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr4, $vgpr6, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
49+
; CHECK-NEXT: renamable $vgpr3 = COPY renamable $agpr1
50+
; CHECK-NEXT: renamable $vgpr2 = COPY killed renamable $agpr0
51+
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3
52+
; CHECK-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 killed $vgpr4, killed $vgpr6, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
53+
; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3
54+
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr5, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s128), addrspace 1)
55+
; CHECK-NEXT: S_ENDPGM 0
56+
early-clobber renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM_ec killed renamable $sgpr4_sgpr5, 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
57+
renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 0, 0 :: ("amdgpu-noclobber" load (s128), addrspace 1)
58+
%8:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
59+
%9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
60+
%10:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec
61+
%2:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
62+
%3:areg_128 = V_MFMA_F32_4X4X1F32_e64 %8, %10, %2, 0, 0, 0, implicit $mode, implicit $exec
63+
undef %4.sub1:av_128 = COPY %3.sub1
64+
%4.sub0:av_128 = COPY %3.sub0
65+
%11:areg_128 = V_MFMA_F32_4X4X1F32_e64 %8, %10, %3, 0, 0, 0, implicit $mode, implicit $exec
66+
%4.sub3:av_128 = COPY %11.sub1
67+
%4.sub2:av_128 = COPY %11.sub0
68+
%7:areg_128 = COPY %4
69+
%5:areg_128 = V_MFMA_F32_4X4X1F32_e64 %8, %10, %7, 0, 0, 0, implicit $mode, implicit $exec
70+
%6:vreg_128 = COPY %5
71+
GLOBAL_STORE_DWORDX4_SADDR %9, %6, killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s128), addrspace 1)
72+
S_ENDPGM 0
73+
74+
...

0 commit comments

Comments
 (0)