Skip to content

Commit 02ed659

Browse files
authored
[AMDGPU] 4-align TTMP triples (#132759)
Follow up to e4284a7 "[AMDGPU] 4-align SGPR triples". Previously TTMP triples like ttmp[3:5] were aligned on a 3-TTMP boundary which has no basis in hardware. Aligning them on a 4-TTMP boundary matches what we do for SGPRs, which reduces the number of extra register classes synthesized by TableGen, bringing the total number down from 653 to 615.
1 parent 34fa037 commit 02ed659

File tree

4 files changed

+27
-27
lines changed

4 files changed

+27
-27
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
431431
def TTMP_64Regs : SIRegisterTuples<getSubRegs<2>.ret, TTMP_32, 15, 2, 2, "ttmp">;
432432

433433
// Trap handler TMP 96-bit registers
434-
def TTMP_96Regs : SIRegisterTuples<getSubRegs<3>.ret, TTMP_32, 15, 3, 3, "ttmp">;
434+
def TTMP_96Regs : SIRegisterTuples<getSubRegs<3>.ret, TTMP_32, 15, 4, 3, "ttmp">;
435435

436436
// Trap handler TMP 128-bit registers
437437
def TTMP_128Regs : SIRegisterTuples<getSubRegs<4>.ret, TTMP_32, 15, 4, 4, "ttmp">;

llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -129,13 +129,13 @@ body: |
129129
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
130130
; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0_sub1:areg_128 = COPY [[COPY]]
131131
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2_sub3:areg_128 = COPY [[COPY1]]
132-
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AReg_128 */, [[COPY2]]
132+
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_128 */, [[COPY2]]
133133
; CHECK-NEXT: SI_RETURN
134134
%0:vreg_64 = COPY $vgpr0_vgpr1
135135
%1:vreg_64 = COPY $vgpr2_vgpr3
136136
undef %2.sub0_sub1:areg_128 = COPY %0
137137
%2.sub2_sub3:areg_128 = COPY %1
138-
INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AReg_128 */, killed %2
138+
INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_128 */, killed %2
139139
SI_RETURN
140140
141141
...
@@ -154,13 +154,13 @@ body: |
154154
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
155155
; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[COPY]]
156156
; CHECK-NEXT: [[COPY2:%[0-9]+]].sub2_sub3:areg_128_align2 = COPY [[COPY1]]
157-
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_128_Align2 */, [[COPY2]]
157+
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY2]]
158158
; CHECK-NEXT: SI_RETURN
159159
%0:vreg_64 = COPY $vgpr0_vgpr1
160160
%1:vreg_64 = COPY $vgpr2_vgpr3
161161
undef %2.sub0_sub1:areg_128_align2 = COPY %0
162162
%2.sub2_sub3:areg_128_align2 = COPY %1
163-
INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_128_Align2 */, %2
163+
INLINEASM &"; use $0", 0 /* attdialect */, 6291465 /* reguse:AReg_128_Align2 */, %2
164164
SI_RETURN
165165
166166
...
@@ -399,14 +399,14 @@ body: |
399399
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_128 = COPY [[COPY]]
400400
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_128 = COPY [[COPY]]
401401
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub3:areg_128 = COPY [[COPY]]
402-
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AReg_128 */, [[COPY1]]
402+
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_128 */, [[COPY1]]
403403
; CHECK-NEXT: SI_RETURN
404404
%0:vgpr_32 = COPY $vgpr0
405405
undef %1.sub0:areg_128 = COPY %0
406406
%1.sub1:areg_128 = COPY %0
407407
%1.sub2:areg_128 = COPY %0
408408
%1.sub3:areg_128 = COPY %0
409-
INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AReg_128 */, killed %1
409+
INLINEASM &"; use $0", 0 /* attdialect */, 6029321 /* reguse:AReg_128 */, killed %1
410410
SI_RETURN
411411
412412
...
@@ -426,14 +426,14 @@ body: |
426426
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_128_align2 = COPY [[COPY]]
427427
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_128_align2 = COPY [[COPY]]
428428
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub3:areg_128_align2 = COPY [[COPY]]
429-
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_128_Align2 */, [[COPY1]]
429+
; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY1]]
430430
; CHECK-NEXT: SI_RETURN
431431
%0:vgpr_32 = COPY $vgpr0
432432
undef %1.sub0:areg_128_align2 = COPY %0
433433
%1.sub1:areg_128_align2 = COPY %0
434434
%1.sub2:areg_128_align2 = COPY %0
435435
%1.sub3:areg_128_align2 = COPY %0
436-
INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_128_Align2 */, %1
436+
INLINEASM &"; use $0", 0 /* attdialect */, 6291465 /* reguse:AReg_128_Align2 */, %1
437437
SI_RETURN
438438
439439
...

llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@
88
define amdgpu_kernel void @s_input_output_i128() {
99
; GFX908-LABEL: name: s_input_output_i128
1010
; GFX908: bb.0 (%ir-block.0):
11-
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7471114 /* regdef:SGPR_128 */, def %12
11+
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7208970 /* regdef:SGPR_128 */, def %12
1212
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %12
13-
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7471113 /* reguse:SGPR_128 */, [[COPY]]
13+
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7208969 /* reguse:SGPR_128 */, [[COPY]]
1414
; GFX908-NEXT: S_ENDPGM 0
1515
;
1616
; GFX90A-LABEL: name: s_input_output_i128
1717
; GFX90A: bb.0 (%ir-block.0):
18-
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7471114 /* regdef:SGPR_128 */, def %10
18+
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7208970 /* regdef:SGPR_128 */, def %10
1919
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %10
20-
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7471113 /* reguse:SGPR_128 */, [[COPY]]
20+
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7208969 /* reguse:SGPR_128 */, [[COPY]]
2121
; GFX90A-NEXT: S_ENDPGM 0
2222
%val = tail call i128 asm sideeffect "; def $0", "=s"()
2323
call void asm sideeffect "; use $0", "s"(i128 %val)
@@ -27,16 +27,16 @@ define amdgpu_kernel void @s_input_output_i128() {
2727
define amdgpu_kernel void @v_input_output_i128() {
2828
; GFX908-LABEL: name: v_input_output_i128
2929
; GFX908: bb.0 (%ir-block.0):
30-
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def %12
30+
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %12
3131
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %12
32-
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:VReg_128 */, [[COPY]]
32+
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6094857 /* reguse:VReg_128 */, [[COPY]]
3333
; GFX908-NEXT: S_ENDPGM 0
3434
;
3535
; GFX90A-LABEL: name: v_input_output_i128
3636
; GFX90A: bb.0 (%ir-block.0):
37-
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def %10
37+
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %10
3838
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %10
39-
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6619145 /* reguse:VReg_128_Align2 */, [[COPY]]
39+
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6422537 /* reguse:VReg_128_Align2 */, [[COPY]]
4040
; GFX90A-NEXT: S_ENDPGM 0
4141
%val = tail call i128 asm sideeffect "; def $0", "=v"()
4242
call void asm sideeffect "; use $0", "v"(i128 %val)
@@ -46,16 +46,16 @@ define amdgpu_kernel void @v_input_output_i128() {
4646
define amdgpu_kernel void @a_input_output_i128() {
4747
; GFX908-LABEL: name: a_input_output_i128
4848
; GFX908: bb.0 (%ir-block.0):
49-
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:AReg_128 */, def %12
49+
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:AReg_128 */, def %12
5050
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %12
51-
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6225929 /* reguse:AReg_128 */, [[COPY]]
51+
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6029321 /* reguse:AReg_128 */, [[COPY]]
5252
; GFX908-NEXT: S_ENDPGM 0
5353
;
5454
; GFX90A-LABEL: name: a_input_output_i128
5555
; GFX90A: bb.0 (%ir-block.0):
56-
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6488074 /* regdef:AReg_128_Align2 */, def %10
56+
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:AReg_128_Align2 */, def %10
5757
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %10
58-
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6488073 /* reguse:AReg_128_Align2 */, [[COPY]]
58+
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY]]
5959
; GFX90A-NEXT: S_ENDPGM 0
6060
%val = call i128 asm sideeffect "; def $0", "=a"()
6161
call void asm sideeffect "; use $0", "a"(i128 %val)

llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
1111
; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5
1212
; REGALLOC-GFX908-NEXT: {{ $}}
1313
; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %5:agpr_32
14-
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def %6
14+
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %6
1515
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %7
1616
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64, %6, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
1717
; REGALLOC-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
@@ -20,8 +20,8 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
2020
; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
2121
; REGALLOC-GFX908-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
2222
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64, %7, 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) poison`, addrspace 1)
23-
; REGALLOC-GFX908-NEXT: [[COPY3:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
24-
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[COPY3]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
23+
; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
24+
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
2525
; REGALLOC-GFX908-NEXT: S_ENDPGM 0
2626
;
2727
; PEI-GFX908-LABEL: name: partial_copy
@@ -32,7 +32,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
3232
; PEI-GFX908-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
3333
; PEI-GFX908-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
3434
; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef renamable $agpr0
35-
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
35+
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
3636
; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
3737
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
3838
; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
@@ -56,7 +56,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
5656
; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5
5757
; REGALLOC-GFX90A-NEXT: {{ $}}
5858
; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %5:agpr_32
59-
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def %6
59+
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %6
6060
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %7
6161
; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, %6, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
6262
; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
@@ -76,7 +76,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
7676
; PEI-GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
7777
; PEI-GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
7878
; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef renamable $agpr0
79-
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
79+
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
8080
; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
8181
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1
8282
; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)

0 commit comments

Comments
 (0)