@@ -2368,25 +2368,57 @@ define <4 x double> @unpckh_v4f64(<4 x double> %x, <4 x double> %y) {
23682368}
23692369
23702370define <4 x double > @blend_broadcasts_v1f64 (ptr %p0 , ptr %p1 ) {
2371- ; ALL-LABEL: blend_broadcasts_v1f64:
2372- ; ALL: # %bb.0:
2373- ; ALL-NEXT: vbroadcastsd (%rsi), %ymm0
2374- ; ALL-NEXT: vbroadcastsd (%rdi), %ymm1
2375- ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2376- ; ALL-NEXT: retq
2371+ ; AVX1-LABEL: blend_broadcasts_v1f64:
2372+ ; AVX1: # %bb.0:
2373+ ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2374+ ; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
2375+ ; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm1[0],xmm0[0]
2376+ ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2377+ ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2378+ ; AVX1-NEXT: retq
2379+ ;
2380+ ; AVX2-LABEL: blend_broadcasts_v1f64:
2381+ ; AVX2: # %bb.0:
2382+ ; AVX2-NEXT: vbroadcastsd (%rsi), %ymm0
2383+ ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1
2384+ ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2385+ ; AVX2-NEXT: retq
2386+ ;
2387+ ; AVX512VL-LABEL: blend_broadcasts_v1f64:
2388+ ; AVX512VL: # %bb.0:
2389+ ; AVX512VL-NEXT: vbroadcastsd (%rsi), %ymm0
2390+ ; AVX512VL-NEXT: vbroadcastsd (%rdi), %ymm1
2391+ ; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2392+ ; AVX512VL-NEXT: retq
23772393 %ld0 = load <1 x double >, ptr %p0 , align 32
23782394 %ld1 = load <1 x double >, ptr %p1 , align 32
23792395 %blend = shufflevector <1 x double > %ld0 , <1 x double > %ld1 , <4 x i32 > <i32 0 , i32 1 , i32 1 , i32 0 >
23802396 ret <4 x double > %blend
23812397}
23822398
23832399define <4 x double > @blend_broadcasts_v1f64_4x (ptr %p0 , ptr %p1 ) {
2384- ; ALL-LABEL: blend_broadcasts_v1f64_4x:
2385- ; ALL: # %bb.0:
2386- ; ALL-NEXT: vbroadcastsd (%rsi), %ymm0
2387- ; ALL-NEXT: vbroadcastsd (%rdi), %ymm1
2388- ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2389- ; ALL-NEXT: retq
2400+ ; AVX1-LABEL: blend_broadcasts_v1f64_4x:
2401+ ; AVX1: # %bb.0:
2402+ ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2403+ ; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
2404+ ; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm1[0],xmm0[0]
2405+ ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2406+ ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2407+ ; AVX1-NEXT: retq
2408+ ;
2409+ ; AVX2-LABEL: blend_broadcasts_v1f64_4x:
2410+ ; AVX2: # %bb.0:
2411+ ; AVX2-NEXT: vbroadcastsd (%rsi), %ymm0
2412+ ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1
2413+ ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2414+ ; AVX2-NEXT: retq
2415+ ;
2416+ ; AVX512VL-LABEL: blend_broadcasts_v1f64_4x:
2417+ ; AVX512VL: # %bb.0:
2418+ ; AVX512VL-NEXT: vbroadcastsd (%rsi), %ymm0
2419+ ; AVX512VL-NEXT: vbroadcastsd (%rdi), %ymm1
2420+ ; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2421+ ; AVX512VL-NEXT: retq
23902422 %ld0 = load <1 x double >, ptr %p0 , align 32
23912423 %ld1 = load <1 x double >, ptr %p1 , align 32
23922424 %bcst0 = shufflevector <1 x double > %ld0 , <1 x double > poison, <4 x i32 > zeroinitializer
@@ -2396,12 +2428,28 @@ define <4 x double> @blend_broadcasts_v1f64_4x(ptr %p0, ptr %p1) {
23962428}
23972429
23982430define <4 x double > @blend_broadcasts_v1f64_2x (ptr %p0 , ptr %p1 ) {
2399- ; ALL-LABEL: blend_broadcasts_v1f64_2x:
2400- ; ALL: # %bb.0:
2401- ; ALL-NEXT: vbroadcastsd (%rsi), %ymm0
2402- ; ALL-NEXT: vbroadcastsd (%rdi), %ymm1
2403- ; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2404- ; ALL-NEXT: retq
2431+ ; AVX1-LABEL: blend_broadcasts_v1f64_2x:
2432+ ; AVX1: # %bb.0:
2433+ ; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2434+ ; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
2435+ ; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm1[0],xmm0[0]
2436+ ; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2437+ ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2438+ ; AVX1-NEXT: retq
2439+ ;
2440+ ; AVX2-LABEL: blend_broadcasts_v1f64_2x:
2441+ ; AVX2: # %bb.0:
2442+ ; AVX2-NEXT: vbroadcastsd (%rsi), %ymm0
2443+ ; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1
2444+ ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2445+ ; AVX2-NEXT: retq
2446+ ;
2447+ ; AVX512VL-LABEL: blend_broadcasts_v1f64_2x:
2448+ ; AVX512VL: # %bb.0:
2449+ ; AVX512VL-NEXT: vbroadcastsd (%rsi), %ymm0
2450+ ; AVX512VL-NEXT: vbroadcastsd (%rdi), %ymm1
2451+ ; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2452+ ; AVX512VL-NEXT: retq
24052453 %ld0 = load <1 x double >, ptr %p0 , align 32
24062454 %ld1 = load <1 x double >, ptr %p1 , align 32
24072455 %bcst0 = shufflevector <1 x double > %ld0 , <1 x double > poison, <2 x i32 > zeroinitializer
0 commit comments