@@ -127,6 +127,34 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
127
127
ret <1 x bfloat> %ret
128
128
}
129
129
130
+ define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
131
+ ; CHECK3-LABEL: atomic_vec1_ptr:
132
+ ; CHECK3: ## %bb.0:
133
+ ; CHECK3-NEXT: pushq %rax
134
+ ; CHECK3-NEXT: movq %rdi, %rsi
135
+ ; CHECK3-NEXT: movq %rsp, %rdx
136
+ ; CHECK3-NEXT: movl $8, %edi
137
+ ; CHECK3-NEXT: movl $2, %ecx
138
+ ; CHECK3-NEXT: callq ___atomic_load
139
+ ; CHECK3-NEXT: movq (%rsp), %rax
140
+ ; CHECK3-NEXT: popq %rcx
141
+ ; CHECK3-NEXT: retq
142
+ ;
143
+ ; CHECK0-LABEL: atomic_vec1_ptr:
144
+ ; CHECK0: ## %bb.0:
145
+ ; CHECK0-NEXT: pushq %rax
146
+ ; CHECK0-NEXT: movq %rdi, %rsi
147
+ ; CHECK0-NEXT: movl $8, %edi
148
+ ; CHECK0-NEXT: movq %rsp, %rdx
149
+ ; CHECK0-NEXT: movl $2, %ecx
150
+ ; CHECK0-NEXT: callq ___atomic_load
151
+ ; CHECK0-NEXT: movq (%rsp), %rax
152
+ ; CHECK0-NEXT: popq %rcx
153
+ ; CHECK0-NEXT: retq
154
+ %ret = load atomic <1 x ptr >, ptr %x acquire , align 4
155
+ ret <1 x ptr > %ret
156
+ }
157
+
130
158
define <1 x half > @atomic_vec1_half (ptr %x ) {
131
159
; CHECK3-LABEL: atomic_vec1_half:
132
160
; CHECK3: ## %bb.0:
@@ -153,3 +181,214 @@ define <1 x float> @atomic_vec1_float(ptr %x) {
153
181
%ret = load atomic <1 x float >, ptr %x acquire , align 4
154
182
ret <1 x float > %ret
155
183
}
184
+
185
+ define <1 x i64 > @atomic_vec1_i64 (ptr %x ) nounwind {
186
+ ; CHECK3-LABEL: atomic_vec1_i64:
187
+ ; CHECK3: ## %bb.0:
188
+ ; CHECK3-NEXT: pushq %rax
189
+ ; CHECK3-NEXT: movq %rdi, %rsi
190
+ ; CHECK3-NEXT: movq %rsp, %rdx
191
+ ; CHECK3-NEXT: movl $8, %edi
192
+ ; CHECK3-NEXT: movl $2, %ecx
193
+ ; CHECK3-NEXT: callq ___atomic_load
194
+ ; CHECK3-NEXT: movq (%rsp), %rax
195
+ ; CHECK3-NEXT: popq %rcx
196
+ ; CHECK3-NEXT: retq
197
+ ;
198
+ ; CHECK0-LABEL: atomic_vec1_i64:
199
+ ; CHECK0: ## %bb.0:
200
+ ; CHECK0-NEXT: pushq %rax
201
+ ; CHECK0-NEXT: movq %rdi, %rsi
202
+ ; CHECK0-NEXT: movl $8, %edi
203
+ ; CHECK0-NEXT: movq %rsp, %rdx
204
+ ; CHECK0-NEXT: movl $2, %ecx
205
+ ; CHECK0-NEXT: callq ___atomic_load
206
+ ; CHECK0-NEXT: movq (%rsp), %rax
207
+ ; CHECK0-NEXT: popq %rcx
208
+ ; CHECK0-NEXT: retq
209
+ %ret = load atomic <1 x i64 >, ptr %x acquire , align 4
210
+ ret <1 x i64 > %ret
211
+ }
212
+
213
+ define <1 x double > @atomic_vec1_double (ptr %x ) nounwind {
214
+ ; CHECK3-LABEL: atomic_vec1_double:
215
+ ; CHECK3: ## %bb.0:
216
+ ; CHECK3-NEXT: pushq %rax
217
+ ; CHECK3-NEXT: movq %rdi, %rsi
218
+ ; CHECK3-NEXT: movq %rsp, %rdx
219
+ ; CHECK3-NEXT: movl $8, %edi
220
+ ; CHECK3-NEXT: movl $2, %ecx
221
+ ; CHECK3-NEXT: callq ___atomic_load
222
+ ; CHECK3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
223
+ ; CHECK3-NEXT: popq %rax
224
+ ; CHECK3-NEXT: retq
225
+ ;
226
+ ; CHECK0-LABEL: atomic_vec1_double:
227
+ ; CHECK0: ## %bb.0:
228
+ ; CHECK0-NEXT: pushq %rax
229
+ ; CHECK0-NEXT: movq %rdi, %rsi
230
+ ; CHECK0-NEXT: movl $8, %edi
231
+ ; CHECK0-NEXT: movq %rsp, %rdx
232
+ ; CHECK0-NEXT: movl $2, %ecx
233
+ ; CHECK0-NEXT: callq ___atomic_load
234
+ ; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
235
+ ; CHECK0-NEXT: popq %rax
236
+ ; CHECK0-NEXT: retq
237
+ %ret = load atomic <1 x double >, ptr %x acquire , align 4
238
+ ret <1 x double > %ret
239
+ }
240
+
241
+ define <2 x i32 > @atomic_vec2_i32 (ptr %x ) nounwind {
242
+ ; CHECK3-LABEL: atomic_vec2_i32:
243
+ ; CHECK3: ## %bb.0:
244
+ ; CHECK3-NEXT: pushq %rax
245
+ ; CHECK3-NEXT: movq %rdi, %rsi
246
+ ; CHECK3-NEXT: movq %rsp, %rdx
247
+ ; CHECK3-NEXT: movl $8, %edi
248
+ ; CHECK3-NEXT: movl $2, %ecx
249
+ ; CHECK3-NEXT: callq ___atomic_load
250
+ ; CHECK3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
251
+ ; CHECK3-NEXT: popq %rax
252
+ ; CHECK3-NEXT: retq
253
+ ;
254
+ ; CHECK0-LABEL: atomic_vec2_i32:
255
+ ; CHECK0: ## %bb.0:
256
+ ; CHECK0-NEXT: pushq %rax
257
+ ; CHECK0-NEXT: movq %rdi, %rsi
258
+ ; CHECK0-NEXT: movl $8, %edi
259
+ ; CHECK0-NEXT: movq %rsp, %rdx
260
+ ; CHECK0-NEXT: movl $2, %ecx
261
+ ; CHECK0-NEXT: callq ___atomic_load
262
+ ; CHECK0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
263
+ ; CHECK0-NEXT: popq %rax
264
+ ; CHECK0-NEXT: retq
265
+ %ret = load atomic <2 x i32 >, ptr %x acquire , align 4
266
+ ret <2 x i32 > %ret
267
+ }
268
+
269
+ define <4 x float > @atomic_vec4_float (ptr %x ) nounwind {
270
+ ; CHECK3-LABEL: atomic_vec4_float:
271
+ ; CHECK3: ## %bb.0:
272
+ ; CHECK3-NEXT: subq $24, %rsp
273
+ ; CHECK3-NEXT: movq %rdi, %rsi
274
+ ; CHECK3-NEXT: movq %rsp, %rdx
275
+ ; CHECK3-NEXT: movl $16, %edi
276
+ ; CHECK3-NEXT: movl $2, %ecx
277
+ ; CHECK3-NEXT: callq ___atomic_load
278
+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
279
+ ; CHECK3-NEXT: addq $24, %rsp
280
+ ; CHECK3-NEXT: retq
281
+ ;
282
+ ; CHECK0-LABEL: atomic_vec4_float:
283
+ ; CHECK0: ## %bb.0:
284
+ ; CHECK0-NEXT: subq $24, %rsp
285
+ ; CHECK0-NEXT: movq %rdi, %rsi
286
+ ; CHECK0-NEXT: movl $16, %edi
287
+ ; CHECK0-NEXT: movq %rsp, %rdx
288
+ ; CHECK0-NEXT: movl $2, %ecx
289
+ ; CHECK0-NEXT: callq ___atomic_load
290
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
291
+ ; CHECK0-NEXT: addq $24, %rsp
292
+ ; CHECK0-NEXT: retq
293
+ %ret = load atomic <4 x float >, ptr %x acquire , align 4
294
+ ret <4 x float > %ret
295
+ }
296
+
297
+ define <8 x double > @atomic_vec8_double (ptr %x ) nounwind {
298
+ ; CHECK3-LABEL: atomic_vec8_double:
299
+ ; CHECK3: ## %bb.0:
300
+ ; CHECK3-NEXT: subq $72, %rsp
301
+ ; CHECK3-NEXT: movq %rdi, %rsi
302
+ ; CHECK3-NEXT: movq %rsp, %rdx
303
+ ; CHECK3-NEXT: movl $64, %edi
304
+ ; CHECK3-NEXT: movl $2, %ecx
305
+ ; CHECK3-NEXT: callq ___atomic_load
306
+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
307
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
308
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
309
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
310
+ ; CHECK3-NEXT: addq $72, %rsp
311
+ ; CHECK3-NEXT: retq
312
+ ;
313
+ ; CHECK0-LABEL: atomic_vec8_double:
314
+ ; CHECK0: ## %bb.0:
315
+ ; CHECK0-NEXT: subq $72, %rsp
316
+ ; CHECK0-NEXT: movq %rdi, %rsi
317
+ ; CHECK0-NEXT: movl $64, %edi
318
+ ; CHECK0-NEXT: movq %rsp, %rdx
319
+ ; CHECK0-NEXT: movl $2, %ecx
320
+ ; CHECK0-NEXT: callq ___atomic_load
321
+ ; CHECK0-NEXT: movapd (%rsp), %xmm0
322
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
323
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
324
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
325
+ ; CHECK0-NEXT: addq $72, %rsp
326
+ ; CHECK0-NEXT: retq
327
+ %ret = load atomic <8 x double >, ptr %x acquire , align 4
328
+ ret <8 x double > %ret
329
+ }
330
+
331
+ define <16 x bfloat> @atomic_vec16_bfloat (ptr %x ) nounwind {
332
+ ; CHECK3-LABEL: atomic_vec16_bfloat:
333
+ ; CHECK3: ## %bb.0:
334
+ ; CHECK3-NEXT: subq $40, %rsp
335
+ ; CHECK3-NEXT: movq %rdi, %rsi
336
+ ; CHECK3-NEXT: movq %rsp, %rdx
337
+ ; CHECK3-NEXT: movl $32, %edi
338
+ ; CHECK3-NEXT: movl $2, %ecx
339
+ ; CHECK3-NEXT: callq ___atomic_load
340
+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
341
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
342
+ ; CHECK3-NEXT: addq $40, %rsp
343
+ ; CHECK3-NEXT: retq
344
+ ;
345
+ ; CHECK0-LABEL: atomic_vec16_bfloat:
346
+ ; CHECK0: ## %bb.0:
347
+ ; CHECK0-NEXT: subq $40, %rsp
348
+ ; CHECK0-NEXT: movq %rdi, %rsi
349
+ ; CHECK0-NEXT: movl $32, %edi
350
+ ; CHECK0-NEXT: movq %rsp, %rdx
351
+ ; CHECK0-NEXT: movl $2, %ecx
352
+ ; CHECK0-NEXT: callq ___atomic_load
353
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
354
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
355
+ ; CHECK0-NEXT: addq $40, %rsp
356
+ ; CHECK0-NEXT: retq
357
+ %ret = load atomic <16 x bfloat>, ptr %x acquire , align 4
358
+ ret <16 x bfloat> %ret
359
+ }
360
+
361
+ define <32 x half > @atomic_vec32_half (ptr %x ) nounwind {
362
+ ; CHECK3-LABEL: atomic_vec32_half:
363
+ ; CHECK3: ## %bb.0:
364
+ ; CHECK3-NEXT: subq $72, %rsp
365
+ ; CHECK3-NEXT: movq %rdi, %rsi
366
+ ; CHECK3-NEXT: movq %rsp, %rdx
367
+ ; CHECK3-NEXT: movl $64, %edi
368
+ ; CHECK3-NEXT: movl $2, %ecx
369
+ ; CHECK3-NEXT: callq ___atomic_load
370
+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
371
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
372
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
373
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
374
+ ; CHECK3-NEXT: addq $72, %rsp
375
+ ; CHECK3-NEXT: retq
376
+ ;
377
+ ; CHECK0-LABEL: atomic_vec32_half:
378
+ ; CHECK0: ## %bb.0:
379
+ ; CHECK0-NEXT: subq $72, %rsp
380
+ ; CHECK0-NEXT: movq %rdi, %rsi
381
+ ; CHECK0-NEXT: movl $64, %edi
382
+ ; CHECK0-NEXT: movq %rsp, %rdx
383
+ ; CHECK0-NEXT: movl $2, %ecx
384
+ ; CHECK0-NEXT: callq ___atomic_load
385
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
386
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
387
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
388
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
389
+ ; CHECK0-NEXT: addq $72, %rsp
390
+ ; CHECK0-NEXT: retq
391
+ %ret = load atomic <32 x half >, ptr %x acquire , align 4
392
+ ret <32 x half > %ret
393
+ }
394
+
0 commit comments