@@ -145,49 +145,29 @@ entry:
145145
146146; Test skipping the lower-32-bit addition if it is unnecessary.
147147define ptr @huge_offset_low_32_unused (ptr %p ) {
148- ; GFX942_PTRADD-LABEL: huge_offset_low_32_unused:
149- ; GFX942_PTRADD: ; %bb.0:
150- ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151- ; GFX942_PTRADD-NEXT: s_mov_b32 s0, 0
152- ; GFX942_PTRADD-NEXT: s_mov_b32 s1, 1
153- ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
154- ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
155- ;
156- ; GFX942_LEGACY-LABEL: huge_offset_low_32_unused:
157- ; GFX942_LEGACY: ; %bb.0:
158- ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159- ; GFX942_LEGACY-NEXT: v_add_u32_e32 v1, 1, v1
160- ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
148+ ; GFX942-LABEL: huge_offset_low_32_unused:
149+ ; GFX942: ; %bb.0:
150+ ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151+ ; GFX942-NEXT: v_add_u32_e32 v1, 1, v1
152+ ; GFX942-NEXT: s_setpc_b64 s[30:31]
161153 %gep = getelementptr inbounds i8 , ptr %p , i64 u0x100000000
162154 ret ptr %gep
163155}
164156
165157; Reassociate address computation if it leads to more scalar operations.
166158define amdgpu_kernel void @reassoc_scalar_r (ptr addrspace (1 ) %out , ptr addrspace (1 ) %p , i64 %soffset ) {
167- ; GFX942_PTRADD-LABEL: reassoc_scalar_r:
168- ; GFX942_PTRADD: ; %bb.0: ; %entry
169- ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
170- ; GFX942_PTRADD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
171- ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, 0
172- ; GFX942_PTRADD-NEXT: v_and_b32_e32 v0, 0x3ff, v0
173- ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
174- ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7]
175- ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
176- ; GFX942_PTRADD-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
177- ; GFX942_PTRADD-NEXT: s_endpgm
178- ;
179- ; GFX942_LEGACY-LABEL: reassoc_scalar_r:
180- ; GFX942_LEGACY: ; %bb.0: ; %entry
181- ; GFX942_LEGACY-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
182- ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
183- ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, 0
184- ; GFX942_LEGACY-NEXT: v_and_b32_e32 v0, 0x3ff, v0
185- ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
186- ; GFX942_LEGACY-NEXT: s_add_u32 s2, s2, s6
187- ; GFX942_LEGACY-NEXT: s_addc_u32 s3, s3, s7
188- ; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
189- ; GFX942_LEGACY-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
190- ; GFX942_LEGACY-NEXT: s_endpgm
159+ ; GFX942-LABEL: reassoc_scalar_r:
160+ ; GFX942: ; %bb.0: ; %entry
161+ ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
162+ ; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
163+ ; GFX942-NEXT: v_mov_b32_e32 v1, 0
164+ ; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
165+ ; GFX942-NEXT: s_waitcnt lgkmcnt(0)
166+ ; GFX942-NEXT: s_add_u32 s2, s2, s6
167+ ; GFX942-NEXT: s_addc_u32 s3, s3, s7
168+ ; GFX942-NEXT: v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
169+ ; GFX942-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
170+ ; GFX942-NEXT: s_endpgm
191171entry:
192172 %voffset32 = call i32 @llvm.amdgcn.workitem.id.x ()
193173 %voffset = zext i32 %voffset32 to i64
@@ -198,30 +178,18 @@ entry:
198178}
199179
200180define amdgpu_kernel void @reassoc_scalar_l (ptr addrspace (1 ) %out , ptr addrspace (1 ) %p , i64 %soffset ) {
201- ; GFX942_PTRADD-LABEL: reassoc_scalar_l:
202- ; GFX942_PTRADD: ; %bb.0: ; %entry
203- ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
204- ; GFX942_PTRADD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
205- ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v1, 0
206- ; GFX942_PTRADD-NEXT: v_and_b32_e32 v0, 0x3ff, v0
207- ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
208- ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1]
209- ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3]
210- ; GFX942_PTRADD-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
211- ; GFX942_PTRADD-NEXT: s_endpgm
212- ;
213- ; GFX942_LEGACY-LABEL: reassoc_scalar_l:
214- ; GFX942_LEGACY: ; %bb.0: ; %entry
215- ; GFX942_LEGACY-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
216- ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
217- ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v1, 0
218- ; GFX942_LEGACY-NEXT: v_and_b32_e32 v0, 0x3ff, v0
219- ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
220- ; GFX942_LEGACY-NEXT: s_add_u32 s2, s2, s6
221- ; GFX942_LEGACY-NEXT: s_addc_u32 s3, s3, s7
222- ; GFX942_LEGACY-NEXT: v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
223- ; GFX942_LEGACY-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
224- ; GFX942_LEGACY-NEXT: s_endpgm
181+ ; GFX942-LABEL: reassoc_scalar_l:
182+ ; GFX942: ; %bb.0: ; %entry
183+ ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
184+ ; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
185+ ; GFX942-NEXT: v_mov_b32_e32 v1, 0
186+ ; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
187+ ; GFX942-NEXT: s_waitcnt lgkmcnt(0)
188+ ; GFX942-NEXT: s_add_u32 s2, s2, s6
189+ ; GFX942-NEXT: s_addc_u32 s3, s3, s7
190+ ; GFX942-NEXT: v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1]
191+ ; GFX942-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
192+ ; GFX942-NEXT: s_endpgm
225193entry:
226194 %voffset32 = call i32 @llvm.amdgcn.workitem.id.x ()
227195 %voffset = zext i32 %voffset32 to i64
@@ -233,24 +201,14 @@ entry:
233201
234202; Tests the target-specific (ptradd x, shl(0 - y, k)) -> sub(x, shl(y, k)) fold
235203define ptr addrspace (1 ) @shl_neg_offset (ptr addrspace (1 ) %p , i64 %noffset , i64 %shift ) {
236- ; GFX942_PTRADD-LABEL: shl_neg_offset:
237- ; GFX942_PTRADD: ; %bb.0:
238- ; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239- ; GFX942_PTRADD-NEXT: v_sub_co_u32_e32 v2, vcc, 0, v2
240- ; GFX942_PTRADD-NEXT: s_nop 1
241- ; GFX942_PTRADD-NEXT: v_subb_co_u32_e32 v3, vcc, 0, v3, vcc
242- ; GFX942_PTRADD-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3]
243- ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
244- ; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
245- ;
246- ; GFX942_LEGACY-LABEL: shl_neg_offset:
247- ; GFX942_LEGACY: ; %bb.0:
248- ; GFX942_LEGACY-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249- ; GFX942_LEGACY-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3]
250- ; GFX942_LEGACY-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
251- ; GFX942_LEGACY-NEXT: s_nop 1
252- ; GFX942_LEGACY-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
253- ; GFX942_LEGACY-NEXT: s_setpc_b64 s[30:31]
204+ ; GFX942-LABEL: shl_neg_offset:
205+ ; GFX942: ; %bb.0:
206+ ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
207+ ; GFX942-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3]
208+ ; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
209+ ; GFX942-NEXT: s_nop 1
210+ ; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
211+ ; GFX942-NEXT: s_setpc_b64 s[30:31]
254212 %offset = sub i64 0 , %noffset
255213 %x = shl i64 %offset , %shift
256214 %gep = getelementptr inbounds i8 , ptr addrspace (1 ) %p , i64 %x
@@ -268,10 +226,9 @@ define ptr addrspace(1) @complextype_global_gep(i64 %offset) {
268226; GFX942_PTRADD: ; %bb.0:
269227; GFX942_PTRADD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
270228; GFX942_PTRADD-NEXT: s_getpc_b64 s[0:1]
271- ; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, v0@rel32@lo+4
272- ; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, v0@rel32@hi+12
229+ ; GFX942_PTRADD-NEXT: s_add_u32 s0, s0, v0@rel32@lo+14
230+ ; GFX942_PTRADD-NEXT: s_addc_u32 s1, s1, v0@rel32@hi+22
273231; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
274- ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, 10
275232; GFX942_PTRADD-NEXT: s_setpc_b64 s[30:31]
276233;
277234; GFX942_LEGACY-LABEL: complextype_global_gep:
@@ -291,27 +248,15 @@ define ptr addrspace(1) @complextype_global_gep(i64 %offset) {
291248
292249; Tests the tryFoldToMad64_32 PTRADD combine.
293250define amdgpu_kernel void @fold_mad64 (ptr addrspace (1 ) %p ) {
294- ; GFX942_PTRADD-LABEL: fold_mad64:
295- ; GFX942_PTRADD: ; %bb.0:
296- ; GFX942_PTRADD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
297- ; GFX942_PTRADD-NEXT: v_and_b32_e32 v0, 0x3ff, v0
298- ; GFX942_PTRADD-NEXT: v_mul_hi_u32_u24_e32 v1, 12, v0
299- ; GFX942_PTRADD-NEXT: v_mul_u32_u24_e32 v0, 12, v0
300- ; GFX942_PTRADD-NEXT: v_mov_b32_e32 v2, 1.0
301- ; GFX942_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
302- ; GFX942_PTRADD-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
303- ; GFX942_PTRADD-NEXT: global_store_dword v[0:1], v2, off
304- ; GFX942_PTRADD-NEXT: s_endpgm
305- ;
306- ; GFX942_LEGACY-LABEL: fold_mad64:
307- ; GFX942_LEGACY: ; %bb.0:
308- ; GFX942_LEGACY-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
309- ; GFX942_LEGACY-NEXT: v_and_b32_e32 v0, 0x3ff, v0
310- ; GFX942_LEGACY-NEXT: v_mov_b32_e32 v2, 1.0
311- ; GFX942_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
312- ; GFX942_LEGACY-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
313- ; GFX942_LEGACY-NEXT: global_store_dword v[0:1], v2, off
314- ; GFX942_LEGACY-NEXT: s_endpgm
251+ ; GFX942-LABEL: fold_mad64:
252+ ; GFX942: ; %bb.0:
253+ ; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
254+ ; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
255+ ; GFX942-NEXT: v_mov_b32_e32 v2, 1.0
256+ ; GFX942-NEXT: s_waitcnt lgkmcnt(0)
257+ ; GFX942-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[0:1]
258+ ; GFX942-NEXT: global_store_dword v[0:1], v2, off
259+ ; GFX942-NEXT: s_endpgm
315260 %voffset32 = call i32 @llvm.amdgcn.workitem.id.x ()
316261 %voffset = zext i32 %voffset32 to i64
317262 %p1 = getelementptr inbounds %S , ptr addrspace (1 ) %p , i64 %voffset , i32 0
0 commit comments