Skip to content

Commit 9a4f57a

Browse files
committed
Drop GISelShouldIgnore for the ptradd_commutative uses
The problem that made them necessary was fixed in #159329. This improves code generation with GISel for scaled offset computations and makes it more consistent with SDAG.
1 parent 1ff8970 commit 9a4f57a

8 files changed

+64
-152
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,6 @@ def BITOP3_32 : ComplexPattern<i32, 4, "SelectBITOP3", [and, or, xor]>;
1010
def BITOP3_16 : ComplexPattern<i16, 4, "SelectBITOP3", [and, or, xor]>;
1111

1212
// Matches PTRADD as a commutative operation.
13-
// FIXME: Patterns using this PatFrag together with a C++ predicate must set
14-
// GISelShouldIgnore = 1 to avoid a bug in the GlobalISelMatchTable optimizer
15-
// that hoists the predicate check above the operand matching checks.
1613
def ptradd_commutative : PatFrags<(ops node:$src0, node:$src1),
1714
[(ptradd node:$src0, node:$src1), (ptradd node:$src1, node:$src0)]>;
1815

@@ -954,9 +951,7 @@ def : GCNPat<
954951
def : GCNPat <
955952
// (ptradd z, (shl x, y)) or (ptradd (shl x, y), z) -> ((x << y) + z)
956953
(ThreeOpFrag<shl_0_to_4, ptradd_commutative> i64:$src0, i32:$src1, i64:$src2),
957-
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)> {
958-
let GISelShouldIgnore = 1;
959-
}
954+
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)>;
960955
} // End SubtargetPredicate = HasLshlAddU64Inst
961956

962957
let SubtargetPredicate = HasAddMinMaxInsts in {
@@ -1041,9 +1036,7 @@ class IMAD32_Mul24_Pats_Impl<VOP3_Pseudo inst, SDPatternOperator AddOp> : GCNPat
10411036

10421037
multiclass IMAD32_Mul24_Pats<VOP3_Pseudo inst> {
10431038
def : IMAD32_Mul24_Pats_Impl<inst, add>;
1044-
def : IMAD32_Mul24_Pats_Impl<inst, ptradd_commutative> {
1045-
let GISelShouldIgnore = 1;
1046-
}
1039+
def : IMAD32_Mul24_Pats_Impl<inst, ptradd_commutative>;
10471040
}
10481041

10491042
// exclude pre-GFX9 where it was slow

llvm/test/CodeGen/AMDGPU/fold-gep-offset.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,7 @@ define i32 @flat_offset_maybe_oob(ptr %p, i32 %i) {
119119
; GFX942-GISEL: ; %bb.0:
120120
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121121
; GFX942-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2
122-
; GFX942-GISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
123-
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
124-
; GFX942-GISEL-NEXT: s_nop 1
125-
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
122+
; GFX942-GISEL-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
126123
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 12, v0
127124
; GFX942-GISEL-NEXT: s_nop 1
128125
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.load.async.to.lds.ll

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -270,29 +270,15 @@ entry:
270270
}
271271

272272
define amdgpu_ps void @cluster_load_async_to_lds_b64_saddr_no_scale_offset(ptr addrspace(1) inreg %gaddr, ptr addrspace(3) %laddr, i32 inreg %mask, i32 %idx) {
273-
; GFX1250-SDAG-LABEL: cluster_load_async_to_lds_b64_saddr_no_scale_offset:
274-
; GFX1250-SDAG: ; %bb.0: ; %entry
275-
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1
276-
; GFX1250-SDAG-NEXT: s_mov_b32 m0, s2
277-
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
278-
; GFX1250-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2
279-
; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
280-
; GFX1250-SDAG-NEXT: cluster_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT
281-
; GFX1250-SDAG-NEXT: s_endpgm
282-
;
283-
; GFX1250-GISEL-LABEL: cluster_load_async_to_lds_b64_saddr_no_scale_offset:
284-
; GFX1250-GISEL: ; %bb.0: ; %entry
285-
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, v1
286-
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
287-
; GFX1250-GISEL-NEXT: s_mov_b32 m0, s2
288-
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
289-
; GFX1250-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2
290-
; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
291-
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
292-
; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
293-
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, v5, v3, vcc_lo
294-
; GFX1250-GISEL-NEXT: cluster_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT
295-
; GFX1250-GISEL-NEXT: s_endpgm
273+
; GFX1250-LABEL: cluster_load_async_to_lds_b64_saddr_no_scale_offset:
274+
; GFX1250: ; %bb.0: ; %entry
275+
; GFX1250-NEXT: v_mov_b32_e32 v2, v1
276+
; GFX1250-NEXT: s_mov_b32 m0, s2
277+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
278+
; GFX1250-NEXT: v_ashrrev_i32_e32 v3, 31, v2
279+
; GFX1250-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
280+
; GFX1250-NEXT: cluster_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT
281+
; GFX1250-NEXT: s_endpgm
296282
entry:
297283
%idxprom = sext i32 %idx to i64
298284
%gep = getelementptr i32, ptr addrspace(1) %gaddr, i64 %idxprom

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.async.to.lds.ll

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -160,27 +160,14 @@ entry:
160160
}
161161

162162
define amdgpu_ps void @global_load_async_to_lds_b64_saddr_no_scale_offset(ptr addrspace(1) inreg %gaddr, ptr addrspace(3) %laddr, i32 %idx) {
163-
; GFX1250-SDAG-LABEL: global_load_async_to_lds_b64_saddr_no_scale_offset:
164-
; GFX1250-SDAG: ; %bb.0: ; %entry
165-
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1
166-
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
167-
; GFX1250-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2
168-
; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
169-
; GFX1250-SDAG-NEXT: global_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT
170-
; GFX1250-SDAG-NEXT: s_endpgm
171-
;
172-
; GFX1250-GISEL-LABEL: global_load_async_to_lds_b64_saddr_no_scale_offset:
173-
; GFX1250-GISEL: ; %bb.0: ; %entry
174-
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, v1
175-
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
176-
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
177-
; GFX1250-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2
178-
; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
179-
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
180-
; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
181-
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, v5, v3, vcc_lo
182-
; GFX1250-GISEL-NEXT: global_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT
183-
; GFX1250-GISEL-NEXT: s_endpgm
163+
; GFX1250-LABEL: global_load_async_to_lds_b64_saddr_no_scale_offset:
164+
; GFX1250: ; %bb.0: ; %entry
165+
; GFX1250-NEXT: v_mov_b32_e32 v2, v1
166+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
167+
; GFX1250-NEXT: v_ashrrev_i32_e32 v3, 31, v2
168+
; GFX1250-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
169+
; GFX1250-NEXT: global_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT
170+
; GFX1250-NEXT: s_endpgm
184171
entry:
185172
%idxprom = sext i32 %idx to i64
186173
%gep = getelementptr i32, ptr addrspace(1) %gaddr, i64 %idxprom

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.async.from.lds.ll

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -160,27 +160,14 @@ entry:
160160
}
161161

162162
define amdgpu_ps void @global_store_async_from_lds_b64_saddr_no_scale_offset(ptr addrspace(1) inreg %gaddr, ptr addrspace(3) %laddr, i32 %idx) {
163-
; GFX1250-SDAG-LABEL: global_store_async_from_lds_b64_saddr_no_scale_offset:
164-
; GFX1250-SDAG: ; %bb.0: ; %entry
165-
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, v1
166-
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
167-
; GFX1250-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2
168-
; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
169-
; GFX1250-SDAG-NEXT: global_store_async_from_lds_b64 v[2:3], v0, off offset:16 th:TH_STORE_NT
170-
; GFX1250-SDAG-NEXT: s_endpgm
171-
;
172-
; GFX1250-GISEL-LABEL: global_store_async_from_lds_b64_saddr_no_scale_offset:
173-
; GFX1250-GISEL: ; %bb.0: ; %entry
174-
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, v1
175-
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
176-
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
177-
; GFX1250-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2
178-
; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
179-
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
180-
; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
181-
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, v5, v3, vcc_lo
182-
; GFX1250-GISEL-NEXT: global_store_async_from_lds_b64 v[2:3], v0, off offset:16 th:TH_STORE_NT
183-
; GFX1250-GISEL-NEXT: s_endpgm
163+
; GFX1250-LABEL: global_store_async_from_lds_b64_saddr_no_scale_offset:
164+
; GFX1250: ; %bb.0: ; %entry
165+
; GFX1250-NEXT: v_mov_b32_e32 v2, v1
166+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
167+
; GFX1250-NEXT: v_ashrrev_i32_e32 v3, 31, v2
168+
; GFX1250-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
169+
; GFX1250-NEXT: global_store_async_from_lds_b64 v[2:3], v0, off offset:16 th:TH_STORE_NT
170+
; GFX1250-NEXT: s_endpgm
184171
entry:
185172
%idxprom = sext i32 %idx to i64
186173
%gep = getelementptr i32, ptr addrspace(1) %gaddr, i64 %idxprom

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.load.monitor.gfx1250.ll

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -169,33 +169,22 @@ entry:
169169
}
170170

171171
define amdgpu_ps void @global_load_monitor_b64_saddr_no_scale_offset(ptr addrspace(1) inreg %addr, ptr addrspace(1) %use, i32 %idx) {
172-
; GFX1250-SDAG-LABEL: global_load_monitor_b64_saddr_no_scale_offset:
173-
; GFX1250-SDAG: ; %bb.0: ; %entry
174-
; GFX1250-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2
175-
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
176-
; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
177-
; GFX1250-SDAG-NEXT: global_load_monitor_b64 v[2:3], v[2:3], off th:TH_LOAD_NT
178-
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
179-
; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
180-
; GFX1250-SDAG-NEXT: s_endpgm
181-
;
182-
; GFX1250-GISEL-LABEL: global_load_monitor_b64_saddr_no_scale_offset:
183-
; GFX1250-GISEL: ; %bb.0: ; %entry
184-
; GFX1250-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2
185-
; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
186-
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
187-
; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
188-
; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
189-
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
190-
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, v5, v3, vcc_lo
191-
; GFX1250-GISEL-NEXT: global_load_monitor_b64 v[2:3], v[2:3], off th:TH_LOAD_NT
192-
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
193-
; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
194-
; GFX1250-GISEL-NEXT: s_endpgm
172+
; GFX1250-LABEL: global_load_monitor_b64_saddr_no_scale_offset:
173+
; GFX1250: ; %bb.0: ; %entry
174+
; GFX1250-NEXT: v_ashrrev_i32_e32 v3, 31, v2
175+
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
176+
; GFX1250-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
177+
; GFX1250-NEXT: global_load_monitor_b64 v[2:3], v[2:3], off th:TH_LOAD_NT
178+
; GFX1250-NEXT: s_wait_loadcnt 0x0
179+
; GFX1250-NEXT: global_store_b64 v[0:1], v[2:3], off
180+
; GFX1250-NEXT: s_endpgm
195181
entry:
196182
%idxprom = sext i32 %idx to i64
197183
%gep = getelementptr i32, ptr addrspace(1) %addr, i64 %idxprom
198184
%val = call <2 x i32> @llvm.amdgcn.global.load.monitor.b64.v2i32(ptr addrspace(1) %gep, i32 1)
199185
store <2 x i32> %val, ptr addrspace(1) %use
200186
ret void
201187
}
188+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
189+
; GFX1250-GISEL: {{.*}}
190+
; GFX1250-SDAG: {{.*}}

llvm/test/CodeGen/AMDGPU/scale-offset-flat.ll

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -28,27 +28,14 @@ entry:
2828
}
2929

3030
define amdgpu_ps float @flat_load_b32_idxprom_wrong_stride(ptr align 4 inreg %p, i32 %idx) {
31-
; SDAG-LABEL: flat_load_b32_idxprom_wrong_stride:
32-
; SDAG: ; %bb.0: ; %entry
33-
; SDAG-NEXT: v_ashrrev_i32_e32 v1, 31, v0
34-
; SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
35-
; SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 3, s[0:1]
36-
; SDAG-NEXT: flat_load_b32 v0, v[0:1]
37-
; SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
38-
; SDAG-NEXT: ; return to shader part epilog
39-
;
40-
; GISEL-LABEL: flat_load_b32_idxprom_wrong_stride:
41-
; GISEL: ; %bb.0: ; %entry
42-
; GISEL-NEXT: v_ashrrev_i32_e32 v1, 31, v0
43-
; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
44-
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
45-
; GISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 3, v[0:1]
46-
; GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
47-
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
48-
; GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
49-
; GISEL-NEXT: flat_load_b32 v0, v[0:1]
50-
; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
51-
; GISEL-NEXT: ; return to shader part epilog
31+
; GCN-LABEL: flat_load_b32_idxprom_wrong_stride:
32+
; GCN: ; %bb.0: ; %entry
33+
; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
34+
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
35+
; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 3, s[0:1]
36+
; GCN-NEXT: flat_load_b32 v0, v[0:1]
37+
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
38+
; GCN-NEXT: ; return to shader part epilog
5239
entry:
5340
%idxprom = sext i32 %idx to i64
5441
%arrayidx = getelementptr inbounds <2 x float>, ptr %p, i64 %idxprom
@@ -380,16 +367,12 @@ define amdgpu_ps <2 x float> @flat_atomicrmw_b64_rtn_idxprom(ptr align 8 inreg %
380367
;
381368
; GISEL-LABEL: flat_atomicrmw_b64_rtn_idxprom:
382369
; GISEL: ; %bb.0: ; %entry
383-
; GISEL-NEXT: v_dual_mov_b32 v2, v0 :: v_dual_mov_b32 v6, src_flat_scratch_base_hi
384-
; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1]
385-
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
386-
; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2
387-
; GISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 3, v[2:3]
370+
; GISEL-NEXT: v_dual_mov_b32 v2, v0 :: v_dual_mov_b32 v0, src_flat_scratch_base_hi
388371
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
389-
; GISEL-NEXT: v_add_co_u32 v4, vcc_lo, v4, v0
390-
; GISEL-NEXT: v_add_co_ci_u32_e64 v5, null, v5, v1, vcc_lo
372+
; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2
373+
; GISEL-NEXT: v_lshl_add_u64 v[4:5], v[2:3], 3, s[0:1]
391374
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
392-
; GISEL-NEXT: v_xor_b32_e32 v0, v5, v6
375+
; GISEL-NEXT: v_xor_b32_e32 v0, v5, v0
393376
; GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 0x4000000, v0
394377
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
395378
; GISEL-NEXT: s_and_saveexec_b32 s2, vcc_lo
@@ -405,7 +388,7 @@ define amdgpu_ps <2 x float> @flat_atomicrmw_b64_rtn_idxprom(ptr align 8 inreg %
405388
; GISEL-NEXT: s_branch .LBB21_5
406389
; GISEL-NEXT: .LBB21_3: ; %atomicrmw.global
407390
; GISEL-NEXT: v_mov_b64_e32 v[0:1], 1
408-
; GISEL-NEXT: ; implicit-def: $vgpr4
391+
; GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
409392
; GISEL-NEXT: flat_atomic_add_u64 v[0:1], v2, v[0:1], s[0:1] scale_offset th:TH_ATOMIC_RETURN scope:SCOPE_SYS
410393
; GISEL-NEXT: s_wait_xcnt 0x0
411394
; GISEL-NEXT: s_and_not1_saveexec_b32 s0, s2

llvm/test/CodeGen/AMDGPU/scale-offset-global.ll

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -28,27 +28,14 @@ entry:
2828
}
2929

3030
define amdgpu_ps float @global_load_b32_idxprom_wrong_stride(ptr addrspace(1) align 4 inreg %p, i32 %idx) {
31-
; SDAG-LABEL: global_load_b32_idxprom_wrong_stride:
32-
; SDAG: ; %bb.0: ; %entry
33-
; SDAG-NEXT: v_ashrrev_i32_e32 v1, 31, v0
34-
; SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
35-
; SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 3, s[0:1]
36-
; SDAG-NEXT: global_load_b32 v0, v[0:1], off
37-
; SDAG-NEXT: s_wait_loadcnt 0x0
38-
; SDAG-NEXT: ; return to shader part epilog
39-
;
40-
; GISEL-LABEL: global_load_b32_idxprom_wrong_stride:
41-
; GISEL: ; %bb.0: ; %entry
42-
; GISEL-NEXT: v_ashrrev_i32_e32 v1, 31, v0
43-
; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
44-
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
45-
; GISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 3, v[0:1]
46-
; GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
47-
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
48-
; GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo
49-
; GISEL-NEXT: global_load_b32 v0, v[0:1], off
50-
; GISEL-NEXT: s_wait_loadcnt 0x0
51-
; GISEL-NEXT: ; return to shader part epilog
31+
; GCN-LABEL: global_load_b32_idxprom_wrong_stride:
32+
; GCN: ; %bb.0: ; %entry
33+
; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
34+
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
35+
; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 3, s[0:1]
36+
; GCN-NEXT: global_load_b32 v0, v[0:1], off
37+
; GCN-NEXT: s_wait_loadcnt 0x0
38+
; GCN-NEXT: ; return to shader part epilog
5239
entry:
5340
%idxprom = sext i32 %idx to i64
5441
%arrayidx = getelementptr inbounds <2 x float>, ptr addrspace(1) %p, i64 %idxprom
@@ -349,3 +336,6 @@ entry:
349336
}
350337

351338
!0 = !{i32 0, i32 1024}
339+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
340+
; GISEL: {{.*}}
341+
; SDAG: {{.*}}

0 commit comments

Comments
 (0)