Skip to content

Commit 54c7cab

Browse files
committed
Propagate Constants for Wave Reduction Intrinsics
1 parent a5486f1 commit 54c7cab

File tree

9 files changed

+755
-2109
lines changed

9 files changed

+755
-2109
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

100644100755
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1647,6 +1647,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
16471647
case Intrinsic::amdgcn_perm:
16481648
case Intrinsic::amdgcn_wave_reduce_umin:
16491649
case Intrinsic::amdgcn_wave_reduce_umax:
1650+
case Intrinsic::amdgcn_wave_reduce_max:
1651+
case Intrinsic::amdgcn_wave_reduce_min:
1652+
case Intrinsic::amdgcn_wave_reduce_add:
1653+
case Intrinsic::amdgcn_wave_reduce_sub:
1654+
case Intrinsic::amdgcn_wave_reduce_and:
1655+
case Intrinsic::amdgcn_wave_reduce_or:
1656+
case Intrinsic::amdgcn_wave_reduce_xor:
16501657
case Intrinsic::amdgcn_s_wqm:
16511658
case Intrinsic::amdgcn_s_quadmask:
16521659
case Intrinsic::amdgcn_s_bitreplicate:
@@ -3520,6 +3527,13 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
35203527
return ConstantInt::get(Ty, C0->abs());
35213528
case Intrinsic::amdgcn_wave_reduce_umin:
35223529
case Intrinsic::amdgcn_wave_reduce_umax:
3530+
case Intrinsic::amdgcn_wave_reduce_max:
3531+
case Intrinsic::amdgcn_wave_reduce_min:
3532+
case Intrinsic::amdgcn_wave_reduce_add:
3533+
case Intrinsic::amdgcn_wave_reduce_sub:
3534+
case Intrinsic::amdgcn_wave_reduce_and:
3535+
case Intrinsic::amdgcn_wave_reduce_or:
3536+
case Intrinsic::amdgcn_wave_reduce_xor:
35233537
return dyn_cast<Constant>(Operands[0]);
35243538
}
35253539

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll

Lines changed: 88 additions & 532 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll

Lines changed: 8 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -223,68 +223,34 @@ entry:
223223
define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
224224
; GFX8DAGISEL-LABEL: poison_value:
225225
; GFX8DAGISEL: ; %bb.0: ; %entry
226-
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
227-
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
228-
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
229-
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
230-
; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0
231226
; GFX8DAGISEL-NEXT: s_endpgm
232227
;
233228
; GFX8GISEL-LABEL: poison_value:
234229
; GFX8GISEL: ; %bb.0: ; %entry
235-
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
236-
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
237-
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
238-
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
239-
; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0
240230
; GFX8GISEL-NEXT: s_endpgm
241231
;
242232
; GFX9DAGISEL-LABEL: poison_value:
243233
; GFX9DAGISEL: ; %bb.0: ; %entry
244-
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
245-
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
246-
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
247-
; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
248234
; GFX9DAGISEL-NEXT: s_endpgm
249235
;
250236
; GFX9GISEL-LABEL: poison_value:
251237
; GFX9GISEL: ; %bb.0: ; %entry
252-
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
253-
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
254-
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
255-
; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1]
256238
; GFX9GISEL-NEXT: s_endpgm
257239
;
258240
; GFX10DAGISEL-LABEL: poison_value:
259241
; GFX10DAGISEL: ; %bb.0: ; %entry
260-
; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
261-
; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
262-
; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
263-
; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
264242
; GFX10DAGISEL-NEXT: s_endpgm
265243
;
266244
; GFX10GISEL-LABEL: poison_value:
267245
; GFX10GISEL: ; %bb.0: ; %entry
268-
; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
269-
; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0
270-
; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
271-
; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1]
272246
; GFX10GISEL-NEXT: s_endpgm
273247
;
274248
; GFX11DAGISEL-LABEL: poison_value:
275249
; GFX11DAGISEL: ; %bb.0: ; %entry
276-
; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
277-
; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0
278-
; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
279-
; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1]
280250
; GFX11DAGISEL-NEXT: s_endpgm
281251
;
282252
; GFX11GISEL-LABEL: poison_value:
283253
; GFX11GISEL: ; %bb.0: ; %entry
284-
; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
285-
; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0
286-
; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0)
287-
; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1]
288254
; GFX11GISEL-NEXT: s_endpgm
289255
entry:
290256
%result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 poison, i32 1)
@@ -1113,11 +1079,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
11131079
; GFX9DAGISEL-LABEL: const_value_i64:
11141080
; GFX9DAGISEL: ; %bb.0: ; %entry
11151081
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1116-
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
11171082
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
1118-
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
1083+
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
11191084
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
1120-
; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1085+
; GFX9DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1]
11211086
; GFX9DAGISEL-NEXT: s_endpgm
11221087
;
11231088
; GFX9GISEL-LABEL: const_value_i64:
@@ -1133,11 +1098,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
11331098
; GFX10DAGISEL-LABEL: const_value_i64:
11341099
; GFX10DAGISEL: ; %bb.0: ; %entry
11351100
; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1136-
; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
11371101
; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0
1138-
; GFX10DAGISEL-NEXT: v_mov_b32_e32 v2, 0
1102+
; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
11391103
; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
1140-
; GFX10DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1104+
; GFX10DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1]
11411105
; GFX10DAGISEL-NEXT: s_endpgm
11421106
;
11431107
; GFX10GISEL-LABEL: const_value_i64:
@@ -1153,11 +1117,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
11531117
; GFX1164DAGISEL-LABEL: const_value_i64:
11541118
; GFX1164DAGISEL: ; %bb.0: ; %entry
11551119
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
1156-
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
11571120
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
1158-
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0
1121+
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
11591122
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
1160-
; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1123+
; GFX1164DAGISEL-NEXT: global_store_b64 v1, v[0:1], s[0:1]
11611124
; GFX1164DAGISEL-NEXT: s_endpgm
11621125
;
11631126
; GFX1164GISEL-LABEL: const_value_i64:
@@ -1173,10 +1136,9 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
11731136
; GFX1132DAGISEL-LABEL: const_value_i64:
11741137
; GFX1132DAGISEL: ; %bb.0: ; %entry
11751138
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
1176-
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
1177-
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
1139+
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7b
11781140
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
1179-
; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1141+
; GFX1132DAGISEL-NEXT: global_store_b64 v1, v[0:1], s[0:1]
11801142
; GFX1132DAGISEL-NEXT: s_endpgm
11811143
;
11821144
; GFX1132GISEL-LABEL: const_value_i64:
@@ -1196,68 +1158,34 @@ entry:
11961158
define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
11971159
; GFX8DAGISEL-LABEL: poison_value_i64:
11981160
; GFX8DAGISEL: ; %bb.0: ; %entry
1199-
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1200-
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
1201-
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
1202-
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
1203-
; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
12041161
; GFX8DAGISEL-NEXT: s_endpgm
12051162
;
12061163
; GFX8GISEL-LABEL: poison_value_i64:
12071164
; GFX8GISEL: ; %bb.0: ; %entry
1208-
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1209-
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
1210-
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
1211-
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
1212-
; GFX8GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
12131165
; GFX8GISEL-NEXT: s_endpgm
12141166
;
12151167
; GFX9DAGISEL-LABEL: poison_value_i64:
12161168
; GFX9DAGISEL: ; %bb.0: ; %entry
1217-
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1218-
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
1219-
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
1220-
; GFX9DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
12211169
; GFX9DAGISEL-NEXT: s_endpgm
12221170
;
12231171
; GFX9GISEL-LABEL: poison_value_i64:
12241172
; GFX9GISEL: ; %bb.0: ; %entry
1225-
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1226-
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
1227-
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
1228-
; GFX9GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
12291173
; GFX9GISEL-NEXT: s_endpgm
12301174
;
12311175
; GFX10DAGISEL-LABEL: poison_value_i64:
12321176
; GFX10DAGISEL: ; %bb.0: ; %entry
1233-
; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1234-
; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
1235-
; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
1236-
; GFX10DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
12371177
; GFX10DAGISEL-NEXT: s_endpgm
12381178
;
12391179
; GFX10GISEL-LABEL: poison_value_i64:
12401180
; GFX10GISEL: ; %bb.0: ; %entry
1241-
; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1242-
; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0
1243-
; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
1244-
; GFX10GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
12451181
; GFX10GISEL-NEXT: s_endpgm
12461182
;
12471183
; GFX11DAGISEL-LABEL: poison_value_i64:
12481184
; GFX11DAGISEL: ; %bb.0: ; %entry
1249-
; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
1250-
; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0
1251-
; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
1252-
; GFX11DAGISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1]
12531185
; GFX11DAGISEL-NEXT: s_endpgm
12541186
;
12551187
; GFX11GISEL-LABEL: poison_value_i64:
12561188
; GFX11GISEL: ; %bb.0: ; %entry
1257-
; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
1258-
; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0
1259-
; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0)
1260-
; GFX11GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1]
12611189
; GFX11GISEL-NEXT: s_endpgm
12621190
entry:
12631191
%result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 poison, i32 1)

0 commit comments

Comments
 (0)