|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
2 | 2 | ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942,GFX942-SDAG %s
|
3 | 3 | ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942,GFX942-GISEL %s
|
4 |
| -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GFX942-STRESS,GFX942-SDAG-STRESS %s |
5 |
| -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GFX942-STRESS,GFX942-GISEL-STRESS %s |
6 | 4 |
|
7 | 5 | declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float>, <2 x float>, <4 x float>, i32, i32, i32)
|
8 | 6 | declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float>, <2 x float>, <16 x float>, i32, i32, i32)
|
@@ -51,50 +49,6 @@ define amdgpu_kernel void @test_mfma_f32_16x16x8xf32(ptr addrspace(1) %arg) #0 {
|
51 | 49 | ; GFX942-GISEL-NEXT: s_nop 5
|
52 | 50 | ; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
|
53 | 51 | ; GFX942-GISEL-NEXT: s_endpgm
|
54 |
| -; |
55 |
| -; GFX942-SDAG-STRESS-LABEL: test_mfma_f32_16x16x8xf32: |
56 |
| -; GFX942-SDAG-STRESS: ; %bb.0: ; %bb |
57 |
| -; GFX942-SDAG-STRESS-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 |
58 |
| -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v0, 1.0 |
59 |
| -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v1, 2.0 |
60 |
| -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v2, 0x40400000 |
61 |
| -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v3, 4.0 |
62 |
| -; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
63 |
| -; GFX942-SDAG-STRESS-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 |
64 |
| -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v4, 0 |
65 |
| -; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
66 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a0, s0 |
67 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a1, s1 |
68 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a2, s2 |
69 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a3, s3 |
70 |
| -; GFX942-SDAG-STRESS-NEXT: s_nop 1 |
71 |
| -; GFX942-SDAG-STRESS-NEXT: v_mfma_f32_16x16x8_xf32 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3 |
72 |
| -; GFX942-SDAG-STRESS-NEXT: s_nop 6 |
73 |
| -; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v4, a[0:3], s[6:7] |
74 |
| -; GFX942-SDAG-STRESS-NEXT: s_endpgm |
75 |
| -; |
76 |
| -; GFX942-GISEL-STRESS-LABEL: test_mfma_f32_16x16x8xf32: |
77 |
| -; GFX942-GISEL-STRESS: ; %bb.0: ; %bb |
78 |
| -; GFX942-GISEL-STRESS-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 |
79 |
| -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s0, 1.0 |
80 |
| -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s2, 0x40400000 |
81 |
| -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s1, 2.0 |
82 |
| -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s3, 4.0 |
83 |
| -; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
84 |
| -; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[2:3], s[2:3] |
85 |
| -; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
86 |
| -; GFX942-GISEL-STRESS-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 |
87 |
| -; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
88 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a0, s0 |
89 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a1, s1 |
90 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a2, s2 |
91 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a3, s3 |
92 |
| -; GFX942-GISEL-STRESS-NEXT: s_nop 1 |
93 |
| -; GFX942-GISEL-STRESS-NEXT: v_mfma_f32_16x16x8_xf32 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3 |
94 |
| -; GFX942-GISEL-STRESS-NEXT: v_mov_b32_e32 v0, 0 |
95 |
| -; GFX942-GISEL-STRESS-NEXT: s_nop 5 |
96 |
| -; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] |
97 |
| -; GFX942-GISEL-STRESS-NEXT: s_endpgm |
98 | 52 | bb:
|
99 | 53 | %in.1 = load <4 x float>, ptr addrspace(1) %arg
|
100 | 54 | %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <4 x float> %in.1, i32 1, i32 2, i32 3)
|
@@ -178,82 +132,6 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4xf32(ptr addrspace(1) %arg) #0 {
|
178 | 132 | ; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32
|
179 | 133 | ; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48
|
180 | 134 | ; GFX942-GISEL-NEXT: s_endpgm
|
181 |
| -; |
182 |
| -; GFX942-SDAG-STRESS-LABEL: test_mfma_f32_32x32x4xf32: |
183 |
| -; GFX942-SDAG-STRESS: ; %bb.0: ; %bb |
184 |
| -; GFX942-SDAG-STRESS-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24 |
185 |
| -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v0, 1.0 |
186 |
| -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v1, 2.0 |
187 |
| -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v2, 0x40400000 |
188 |
| -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v3, 4.0 |
189 |
| -; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
190 |
| -; GFX942-SDAG-STRESS-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 |
191 |
| -; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
192 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a0, s0 |
193 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a1, s1 |
194 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a2, s2 |
195 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a3, s3 |
196 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a4, s4 |
197 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a5, s5 |
198 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a6, s6 |
199 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a7, s7 |
200 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a8, s8 |
201 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a9, s9 |
202 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a10, s10 |
203 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a11, s11 |
204 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a12, s12 |
205 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a13, s13 |
206 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a14, s14 |
207 |
| -; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a15, s15 |
208 |
| -; GFX942-SDAG-STRESS-NEXT: s_nop 1 |
209 |
| -; GFX942-SDAG-STRESS-NEXT: v_mfma_f32_32x32x4_xf32 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3 |
210 |
| -; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v0, 0 |
211 |
| -; GFX942-SDAG-STRESS-NEXT: s_nop 7 |
212 |
| -; GFX942-SDAG-STRESS-NEXT: s_nop 1 |
213 |
| -; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48 |
214 |
| -; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32 |
215 |
| -; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16 |
216 |
| -; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17] |
217 |
| -; GFX942-SDAG-STRESS-NEXT: s_endpgm |
218 |
| -; |
219 |
| -; GFX942-GISEL-STRESS-LABEL: test_mfma_f32_32x32x4xf32: |
220 |
| -; GFX942-GISEL-STRESS: ; %bb.0: ; %bb |
221 |
| -; GFX942-GISEL-STRESS-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24 |
222 |
| -; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
223 |
| -; GFX942-GISEL-STRESS-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 |
224 |
| -; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0) |
225 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a0, s0 |
226 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a1, s1 |
227 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a2, s2 |
228 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a3, s3 |
229 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a4, s4 |
230 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a5, s5 |
231 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a6, s6 |
232 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a7, s7 |
233 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a8, s8 |
234 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a9, s9 |
235 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a10, s10 |
236 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a11, s11 |
237 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a12, s12 |
238 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a13, s13 |
239 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a14, s14 |
240 |
| -; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a15, s15 |
241 |
| -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s0, 1.0 |
242 |
| -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s1, 2.0 |
243 |
| -; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[0:1], s[0:1] |
244 |
| -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s0, 0x40400000 |
245 |
| -; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s1, 4.0 |
246 |
| -; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
247 |
| -; GFX942-GISEL-STRESS-NEXT: s_nop 1 |
248 |
| -; GFX942-GISEL-STRESS-NEXT: v_mfma_f32_32x32x4_xf32 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3 |
249 |
| -; GFX942-GISEL-STRESS-NEXT: v_mov_b32_e32 v0, 0 |
250 |
| -; GFX942-GISEL-STRESS-NEXT: s_nop 7 |
251 |
| -; GFX942-GISEL-STRESS-NEXT: s_nop 1 |
252 |
| -; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17] |
253 |
| -; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16 |
254 |
| -; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32 |
255 |
| -; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48 |
256 |
| -; GFX942-GISEL-STRESS-NEXT: s_endpgm |
257 | 135 | bb:
|
258 | 136 | %in.1 = load <16 x float>, ptr addrspace(1) %arg
|
259 | 137 | %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <16 x float> %in.1, i32 1, i32 2, i32 3)
|
|
264 | 142 | attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
|
265 | 143 | ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
266 | 144 | ; GFX942: {{.*}}
|
267 |
| -; GFX942-STRESS: {{.*}} |
0 commit comments