@@ -110,3 +110,226 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
110
110
ret <1 x bfloat> %ret
111
111
}
112
112
113
+ define <1 x i64 > @atomic_vec1_i64 (ptr %x ) {
114
+ ; CHECK-LABEL: atomic_vec1_i64:
115
+ ; CHECK: ## %bb.0:
116
+ ; CHECK-NEXT: pushq %rax
117
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
118
+ ; CHECK-NEXT: movq %rdi, %rsi
119
+ ; CHECK-NEXT: movq %rsp, %rdx
120
+ ; CHECK-NEXT: movl $8, %edi
121
+ ; CHECK-NEXT: movl $2, %ecx
122
+ ; CHECK-NEXT: callq ___atomic_load
123
+ ; CHECK-NEXT: movq (%rsp), %rax
124
+ ; CHECK-NEXT: popq %rcx
125
+ ; CHECK-NEXT: retq
126
+ ;
127
+ ; CHECK0-LABEL: atomic_vec1_i64:
128
+ ; CHECK0: ## %bb.0:
129
+ ; CHECK0-NEXT: pushq %rax
130
+ ; CHECK0-NEXT: .cfi_def_cfa_offset 16
131
+ ; CHECK0-NEXT: movq %rdi, %rsi
132
+ ; CHECK0-NEXT: movl $8, %edi
133
+ ; CHECK0-NEXT: movq %rsp, %rdx
134
+ ; CHECK0-NEXT: movl $2, %ecx
135
+ ; CHECK0-NEXT: callq ___atomic_load
136
+ ; CHECK0-NEXT: movq (%rsp), %rax
137
+ ; CHECK0-NEXT: popq %rcx
138
+ ; CHECK0-NEXT: retq
139
+ %ret = load atomic <1 x i64 >, ptr %x acquire , align 4
140
+ ret <1 x i64 > %ret
141
+ }
142
+
143
+ define <1 x double > @atomic_vec1_double (ptr %x ) {
144
+ ; CHECK-LABEL: atomic_vec1_double:
145
+ ; CHECK: ## %bb.0:
146
+ ; CHECK-NEXT: pushq %rax
147
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
148
+ ; CHECK-NEXT: movq %rdi, %rsi
149
+ ; CHECK-NEXT: movq %rsp, %rdx
150
+ ; CHECK-NEXT: movl $8, %edi
151
+ ; CHECK-NEXT: movl $2, %ecx
152
+ ; CHECK-NEXT: callq ___atomic_load
153
+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
154
+ ; CHECK-NEXT: popq %rax
155
+ ; CHECK-NEXT: retq
156
+ ;
157
+ ; CHECK0-LABEL: atomic_vec1_double:
158
+ ; CHECK0: ## %bb.0:
159
+ ; CHECK0-NEXT: pushq %rax
160
+ ; CHECK0-NEXT: .cfi_def_cfa_offset 16
161
+ ; CHECK0-NEXT: movq %rdi, %rsi
162
+ ; CHECK0-NEXT: movl $8, %edi
163
+ ; CHECK0-NEXT: movq %rsp, %rdx
164
+ ; CHECK0-NEXT: movl $2, %ecx
165
+ ; CHECK0-NEXT: callq ___atomic_load
166
+ ; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
167
+ ; CHECK0-NEXT: popq %rax
168
+ ; CHECK0-NEXT: retq
169
+ %ret = load atomic <1 x double >, ptr %x acquire , align 4
170
+ ret <1 x double > %ret
171
+ }
172
+
173
+ define <2 x i32 > @atomic_vec2_i32 (ptr %x ) {
174
+ ; CHECK-LABEL: atomic_vec2_i32:
175
+ ; CHECK: ## %bb.0:
176
+ ; CHECK-NEXT: pushq %rax
177
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
178
+ ; CHECK-NEXT: movq %rdi, %rsi
179
+ ; CHECK-NEXT: movq %rsp, %rdx
180
+ ; CHECK-NEXT: movl $8, %edi
181
+ ; CHECK-NEXT: movl $2, %ecx
182
+ ; CHECK-NEXT: callq ___atomic_load
183
+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
184
+ ; CHECK-NEXT: popq %rax
185
+ ; CHECK-NEXT: retq
186
+ ;
187
+ ; CHECK0-LABEL: atomic_vec2_i32:
188
+ ; CHECK0: ## %bb.0:
189
+ ; CHECK0-NEXT: pushq %rax
190
+ ; CHECK0-NEXT: .cfi_def_cfa_offset 16
191
+ ; CHECK0-NEXT: movq %rdi, %rsi
192
+ ; CHECK0-NEXT: movl $8, %edi
193
+ ; CHECK0-NEXT: movq %rsp, %rdx
194
+ ; CHECK0-NEXT: movl $2, %ecx
195
+ ; CHECK0-NEXT: callq ___atomic_load
196
+ ; CHECK0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
197
+ ; CHECK0-NEXT: popq %rax
198
+ ; CHECK0-NEXT: retq
199
+ %ret = load atomic <2 x i32 >, ptr %x acquire , align 4
200
+ ret <2 x i32 > %ret
201
+ }
202
+
203
+ define <4 x float > @atomic_vec4_float (ptr %x ) {
204
+ ; CHECK-LABEL: atomic_vec4_float:
205
+ ; CHECK: ## %bb.0:
206
+ ; CHECK-NEXT: subq $24, %rsp
207
+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
208
+ ; CHECK-NEXT: movq %rdi, %rsi
209
+ ; CHECK-NEXT: movq %rsp, %rdx
210
+ ; CHECK-NEXT: movl $16, %edi
211
+ ; CHECK-NEXT: movl $2, %ecx
212
+ ; CHECK-NEXT: callq ___atomic_load
213
+ ; CHECK-NEXT: movaps (%rsp), %xmm0
214
+ ; CHECK-NEXT: addq $24, %rsp
215
+ ; CHECK-NEXT: retq
216
+ ;
217
+ ; CHECK0-LABEL: atomic_vec4_float:
218
+ ; CHECK0: ## %bb.0:
219
+ ; CHECK0-NEXT: subq $24, %rsp
220
+ ; CHECK0-NEXT: .cfi_def_cfa_offset 32
221
+ ; CHECK0-NEXT: movq %rdi, %rsi
222
+ ; CHECK0-NEXT: movl $16, %edi
223
+ ; CHECK0-NEXT: movq %rsp, %rdx
224
+ ; CHECK0-NEXT: movl $2, %ecx
225
+ ; CHECK0-NEXT: callq ___atomic_load
226
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
227
+ ; CHECK0-NEXT: addq $24, %rsp
228
+ ; CHECK0-NEXT: retq
229
+ %ret = load atomic <4 x float >, ptr %x acquire , align 4
230
+ ret <4 x float > %ret
231
+ }
232
+
233
+ define <8 x double > @atomic_vec8_double (ptr %x ) {
234
+ ; CHECK-LABEL: atomic_vec8_double:
235
+ ; CHECK: ## %bb.0:
236
+ ; CHECK-NEXT: subq $72, %rsp
237
+ ; CHECK-NEXT: .cfi_def_cfa_offset 80
238
+ ; CHECK-NEXT: movq %rdi, %rsi
239
+ ; CHECK-NEXT: movq %rsp, %rdx
240
+ ; CHECK-NEXT: movl $64, %edi
241
+ ; CHECK-NEXT: movl $2, %ecx
242
+ ; CHECK-NEXT: callq ___atomic_load
243
+ ; CHECK-NEXT: movaps (%rsp), %xmm0
244
+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
245
+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
246
+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
247
+ ; CHECK-NEXT: addq $72, %rsp
248
+ ; CHECK-NEXT: retq
249
+ ;
250
+ ; CHECK0-LABEL: atomic_vec8_double:
251
+ ; CHECK0: ## %bb.0:
252
+ ; CHECK0-NEXT: subq $72, %rsp
253
+ ; CHECK0-NEXT: .cfi_def_cfa_offset 80
254
+ ; CHECK0-NEXT: movq %rdi, %rsi
255
+ ; CHECK0-NEXT: movl $64, %edi
256
+ ; CHECK0-NEXT: movq %rsp, %rdx
257
+ ; CHECK0-NEXT: movl $2, %ecx
258
+ ; CHECK0-NEXT: callq ___atomic_load
259
+ ; CHECK0-NEXT: movapd (%rsp), %xmm0
260
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
261
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
262
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
263
+ ; CHECK0-NEXT: addq $72, %rsp
264
+ ; CHECK0-NEXT: retq
265
+ %ret = load atomic <8 x double >, ptr %x acquire , align 4
266
+ ret <8 x double > %ret
267
+ }
268
+
269
+ define <16 x bfloat> @atomic_vec16_bfloat (ptr %x ) {
270
+ ; CHECK-LABEL: atomic_vec16_bfloat:
271
+ ; CHECK: ## %bb.0:
272
+ ; CHECK-NEXT: subq $40, %rsp
273
+ ; CHECK-NEXT: .cfi_def_cfa_offset 48
274
+ ; CHECK-NEXT: movq %rdi, %rsi
275
+ ; CHECK-NEXT: movq %rsp, %rdx
276
+ ; CHECK-NEXT: movl $32, %edi
277
+ ; CHECK-NEXT: movl $2, %ecx
278
+ ; CHECK-NEXT: callq ___atomic_load
279
+ ; CHECK-NEXT: movaps (%rsp), %xmm0
280
+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
281
+ ; CHECK-NEXT: addq $40, %rsp
282
+ ; CHECK-NEXT: retq
283
+ ;
284
+ ; CHECK0-LABEL: atomic_vec16_bfloat:
285
+ ; CHECK0: ## %bb.0:
286
+ ; CHECK0-NEXT: subq $40, %rsp
287
+ ; CHECK0-NEXT: .cfi_def_cfa_offset 48
288
+ ; CHECK0-NEXT: movq %rdi, %rsi
289
+ ; CHECK0-NEXT: movl $32, %edi
290
+ ; CHECK0-NEXT: movq %rsp, %rdx
291
+ ; CHECK0-NEXT: movl $2, %ecx
292
+ ; CHECK0-NEXT: callq ___atomic_load
293
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
294
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
295
+ ; CHECK0-NEXT: addq $40, %rsp
296
+ ; CHECK0-NEXT: retq
297
+ %ret = load atomic <16 x bfloat>, ptr %x acquire , align 4
298
+ ret <16 x bfloat> %ret
299
+ }
300
+
301
+ define <32 x half > @atomic_vec32_half (ptr %x ) {
302
+ ; CHECK-LABEL: atomic_vec32_half:
303
+ ; CHECK: ## %bb.0:
304
+ ; CHECK-NEXT: subq $72, %rsp
305
+ ; CHECK-NEXT: .cfi_def_cfa_offset 80
306
+ ; CHECK-NEXT: movq %rdi, %rsi
307
+ ; CHECK-NEXT: movq %rsp, %rdx
308
+ ; CHECK-NEXT: movl $64, %edi
309
+ ; CHECK-NEXT: movl $2, %ecx
310
+ ; CHECK-NEXT: callq ___atomic_load
311
+ ; CHECK-NEXT: movaps (%rsp), %xmm0
312
+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
313
+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
314
+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
315
+ ; CHECK-NEXT: addq $72, %rsp
316
+ ; CHECK-NEXT: retq
317
+ ;
318
+ ; CHECK0-LABEL: atomic_vec32_half:
319
+ ; CHECK0: ## %bb.0:
320
+ ; CHECK0-NEXT: subq $72, %rsp
321
+ ; CHECK0-NEXT: .cfi_def_cfa_offset 80
322
+ ; CHECK0-NEXT: movq %rdi, %rsi
323
+ ; CHECK0-NEXT: movl $64, %edi
324
+ ; CHECK0-NEXT: movq %rsp, %rdx
325
+ ; CHECK0-NEXT: movl $2, %ecx
326
+ ; CHECK0-NEXT: callq ___atomic_load
327
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
328
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
329
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
330
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
331
+ ; CHECK0-NEXT: addq $72, %rsp
332
+ ; CHECK0-NEXT: retq
333
+ %ret = load atomic <32 x half >, ptr %x acquire , align 4
334
+ ret <32 x half > %ret
335
+ }
0 commit comments