@@ -208,29 +208,12 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
208
208
define <2 x half > @atomic_vec2_half (ptr %x ) {
209
209
; CHECK3-LABEL: atomic_vec2_half:
210
210
; CHECK3: ## %bb.0:
211
- ; CHECK3-NEXT: movl (%rdi), %eax
212
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
213
- ; CHECK3-NEXT: shrl $16, %eax
214
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
215
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
211
+ ; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
216
212
; CHECK3-NEXT: retq
217
213
;
218
214
; CHECK0-LABEL: atomic_vec2_half:
219
215
; CHECK0: ## %bb.0:
220
- ; CHECK0-NEXT: movl (%rdi), %eax
221
- ; CHECK0-NEXT: movl %eax, %ecx
222
- ; CHECK0-NEXT: shrl $16, %ecx
223
- ; CHECK0-NEXT: movw %cx, %dx
224
- ; CHECK0-NEXT: ## implicit-def: $ecx
225
- ; CHECK0-NEXT: movw %dx, %cx
226
- ; CHECK0-NEXT: ## implicit-def: $xmm1
227
- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
228
- ; CHECK0-NEXT: movw %ax, %cx
229
- ; CHECK0-NEXT: ## implicit-def: $eax
230
- ; CHECK0-NEXT: movw %cx, %ax
231
- ; CHECK0-NEXT: ## implicit-def: $xmm0
232
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
233
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
216
+ ; CHECK0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
234
217
; CHECK0-NEXT: retq
235
218
%ret = load atomic <2 x half >, ptr %x acquire , align 4
236
219
ret <2 x half > %ret
@@ -239,29 +222,12 @@ define <2 x half> @atomic_vec2_half(ptr %x) {
239
222
define <2 x bfloat> @atomic_vec2_bfloat (ptr %x ) {
240
223
; CHECK3-LABEL: atomic_vec2_bfloat:
241
224
; CHECK3: ## %bb.0:
242
- ; CHECK3-NEXT: movl (%rdi), %eax
243
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
244
- ; CHECK3-NEXT: shrl $16, %eax
245
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
246
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
225
+ ; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
247
226
; CHECK3-NEXT: retq
248
227
;
249
228
; CHECK0-LABEL: atomic_vec2_bfloat:
250
229
; CHECK0: ## %bb.0:
251
- ; CHECK0-NEXT: movl (%rdi), %eax
252
- ; CHECK0-NEXT: movl %eax, %ecx
253
- ; CHECK0-NEXT: shrl $16, %ecx
254
- ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
255
- ; CHECK0-NEXT: movw %ax, %dx
256
- ; CHECK0-NEXT: ## implicit-def: $eax
257
- ; CHECK0-NEXT: movw %dx, %ax
258
- ; CHECK0-NEXT: ## implicit-def: $xmm0
259
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
260
- ; CHECK0-NEXT: ## implicit-def: $eax
261
- ; CHECK0-NEXT: movw %cx, %ax
262
- ; CHECK0-NEXT: ## implicit-def: $xmm1
263
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
264
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
230
+ ; CHECK0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
265
231
; CHECK0-NEXT: retq
266
232
%ret = load atomic <2 x bfloat>, ptr %x acquire , align 4
267
233
ret <2 x bfloat> %ret
@@ -440,110 +406,19 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
440
406
}
441
407
442
408
define <4 x half > @atomic_vec4_half (ptr %x ) nounwind {
443
- ; CHECK3-LABEL: atomic_vec4_half:
444
- ; CHECK3: ## %bb.0:
445
- ; CHECK3-NEXT: movq (%rdi), %rax
446
- ; CHECK3-NEXT: movl %eax, %ecx
447
- ; CHECK3-NEXT: shrl $16, %ecx
448
- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
449
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
450
- ; CHECK3-NEXT: movq %rax, %rcx
451
- ; CHECK3-NEXT: shrq $32, %rcx
452
- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
453
- ; CHECK3-NEXT: shrq $48, %rax
454
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
455
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
456
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
457
- ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
458
- ; CHECK3-NEXT: retq
459
- ;
460
- ; CHECK0-LABEL: atomic_vec4_half:
461
- ; CHECK0: ## %bb.0:
462
- ; CHECK0-NEXT: movq (%rdi), %rax
463
- ; CHECK0-NEXT: movl %eax, %ecx
464
- ; CHECK0-NEXT: shrl $16, %ecx
465
- ; CHECK0-NEXT: movw %cx, %dx
466
- ; CHECK0-NEXT: ## implicit-def: $ecx
467
- ; CHECK0-NEXT: movw %dx, %cx
468
- ; CHECK0-NEXT: ## implicit-def: $xmm2
469
- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
470
- ; CHECK0-NEXT: movw %ax, %dx
471
- ; CHECK0-NEXT: ## implicit-def: $ecx
472
- ; CHECK0-NEXT: movw %dx, %cx
473
- ; CHECK0-NEXT: ## implicit-def: $xmm0
474
- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm0
475
- ; CHECK0-NEXT: movq %rax, %rcx
476
- ; CHECK0-NEXT: shrq $32, %rcx
477
- ; CHECK0-NEXT: movw %cx, %dx
478
- ; CHECK0-NEXT: ## implicit-def: $ecx
479
- ; CHECK0-NEXT: movw %dx, %cx
480
- ; CHECK0-NEXT: ## implicit-def: $xmm1
481
- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
482
- ; CHECK0-NEXT: shrq $48, %rax
483
- ; CHECK0-NEXT: movw %ax, %cx
484
- ; CHECK0-NEXT: ## implicit-def: $eax
485
- ; CHECK0-NEXT: movw %cx, %ax
486
- ; CHECK0-NEXT: ## implicit-def: $xmm3
487
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm3
488
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
489
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
490
- ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
491
- ; CHECK0-NEXT: retq
409
+ ; CHECK-LABEL: atomic_vec4_half:
410
+ ; CHECK: ## %bb.0:
411
+ ; CHECK-NEXT: movq (%rdi), %xmm0
412
+ ; CHECK-NEXT: retq
492
413
%ret = load atomic <4 x half >, ptr %x acquire , align 8
493
414
ret <4 x half > %ret
494
415
}
495
416
496
417
define <4 x bfloat> @atomic_vec4_bfloat (ptr %x ) nounwind {
497
- ; CHECK3-LABEL: atomic_vec4_bfloat:
498
- ; CHECK3: ## %bb.0:
499
- ; CHECK3-NEXT: movq (%rdi), %rax
500
- ; CHECK3-NEXT: movq %rax, %rcx
501
- ; CHECK3-NEXT: movq %rax, %rdx
502
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
503
- ; CHECK3-NEXT: ## kill: def $eax killed $eax killed $rax
504
- ; CHECK3-NEXT: shrl $16, %eax
505
- ; CHECK3-NEXT: shrq $32, %rcx
506
- ; CHECK3-NEXT: shrq $48, %rdx
507
- ; CHECK3-NEXT: pinsrw $0, %edx, %xmm1
508
- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
509
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
510
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
511
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
512
- ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
513
- ; CHECK3-NEXT: retq
514
- ;
515
- ; CHECK0-LABEL: atomic_vec4_bfloat:
516
- ; CHECK0: ## %bb.0:
517
- ; CHECK0-NEXT: movq (%rdi), %rax
518
- ; CHECK0-NEXT: movl %eax, %ecx
519
- ; CHECK0-NEXT: shrl $16, %ecx
520
- ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
521
- ; CHECK0-NEXT: movw %ax, %dx
522
- ; CHECK0-NEXT: movq %rax, %rsi
523
- ; CHECK0-NEXT: shrq $32, %rsi
524
- ; CHECK0-NEXT: ## kill: def $si killed $si killed $rsi
525
- ; CHECK0-NEXT: shrq $48, %rax
526
- ; CHECK0-NEXT: movw %ax, %di
527
- ; CHECK0-NEXT: ## implicit-def: $eax
528
- ; CHECK0-NEXT: movw %di, %ax
529
- ; CHECK0-NEXT: ## implicit-def: $xmm0
530
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
531
- ; CHECK0-NEXT: ## implicit-def: $eax
532
- ; CHECK0-NEXT: movw %si, %ax
533
- ; CHECK0-NEXT: ## implicit-def: $xmm1
534
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
535
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
536
- ; CHECK0-NEXT: ## implicit-def: $eax
537
- ; CHECK0-NEXT: movw %dx, %ax
538
- ; CHECK0-NEXT: ## implicit-def: $xmm0
539
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
540
- ; CHECK0-NEXT: ## implicit-def: $eax
541
- ; CHECK0-NEXT: movw %cx, %ax
542
- ; CHECK0-NEXT: ## implicit-def: $xmm2
543
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
544
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
545
- ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
546
- ; CHECK0-NEXT: retq
418
+ ; CHECK-LABEL: atomic_vec4_bfloat:
419
+ ; CHECK: ## %bb.0:
420
+ ; CHECK-NEXT: movq (%rdi), %xmm0
421
+ ; CHECK-NEXT: retq
547
422
%ret = load atomic <4 x bfloat>, ptr %x acquire , align 8
548
423
ret <4 x bfloat> %ret
549
424
}
0 commit comments