From a64fbf96ed73a5445bc016bdeb4c22367388a598 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 20 Aug 2025 18:02:12 +0200 Subject: [PATCH 1/8] s390x: define `unpack_low` using `core::intrinsics::simd` --- crates/core_arch/src/s390x/vector.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs index 9927ff2c62..2969516c9a 100644 --- a/crates/core_arch/src/s390x/vector.rs +++ b/crates/core_arch/src/s390x/vector.rs @@ -169,13 +169,6 @@ unsafe extern "unadjusted" { #[link_name = "llvm.s390.vpklsfs"] fn vpklsfs(a: vector_unsigned_int, b: vector_unsigned_int) -> PackedTuple; #[link_name = "llvm.s390.vpklsgs"] fn vpklsgs(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> PackedTuple; - #[link_name = "llvm.s390.vuplb"] fn vuplb (a: vector_signed_char) -> vector_signed_short; - #[link_name = "llvm.s390.vuplhw"] fn vuplhw (a: vector_signed_short) -> vector_signed_int; - #[link_name = "llvm.s390.vuplf"] fn vuplf (a: vector_signed_int) -> vector_signed_long_long; - #[link_name = "llvm.s390.vupllb"] fn vupllb (a: vector_unsigned_char) -> vector_unsigned_short; - #[link_name = "llvm.s390.vupllh"] fn vupllh (a: vector_unsigned_short) -> vector_unsigned_int; - #[link_name = "llvm.s390.vupllf"] fn vupllf (a: vector_unsigned_int) -> vector_unsigned_long_long; - #[link_name = "llvm.s390.vavgb"] fn vavgb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; #[link_name = "llvm.s390.vavgh"] fn vavgh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; #[link_name = "llvm.s390.vavgf"] fn vavgf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; @@ -2583,8 +2576,14 @@ mod sealed { unsafe fn vec_unpackl(self) -> Self::Result; } - // FIXME(llvm): a shuffle + simd_as does not currently optimize into a single instruction like - // unpachk above. Tracked in https://github.com/llvm/llvm-project/issues/129576. + // NOTE: `vuplh` is used for "unpack logical high", hence `vuplhw`. + impl_vec_unpack!(unpack_low vuplb vector_signed_char i8x8 vector_signed_short 8); + impl_vec_unpack!(unpack_low vuplhw vector_signed_short i16x4 vector_signed_int 4); + impl_vec_unpack!(unpack_low vuplf vector_signed_int i32x2 vector_signed_long_long 2); + + impl_vec_unpack!(unpack_low vupllb vector_unsigned_char u8x8 vector_unsigned_short 8); + impl_vec_unpack!(unpack_low vupllh vector_unsigned_short u16x4 vector_unsigned_int 4); + impl_vec_unpack!(unpack_low vupllf vector_unsigned_int u32x2 vector_unsigned_long_long 2); impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplb (vector_signed_char) -> vector_signed_short} impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplhw (vector_signed_short) -> vector_signed_int} From 963823761b5bb232dae6ea83aaf88fab5083e733 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 20 Aug 2025 18:03:04 +0200 Subject: [PATCH 2/8] s390x: add `assert_instr` for `vec_round` --- crates/core_arch/src/s390x/vector.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs index 2969516c9a..066ae16073 100644 --- a/crates/core_arch/src/s390x/vector.rs +++ b/crates/core_arch/src/s390x/vector.rs @@ -1194,10 +1194,8 @@ mod sealed { test_impl! { vec_roundc_f32 (a: vector_float) -> vector_float [nearbyint_v4f32, "vector-enhancements-1" vfisb] } test_impl! { vec_roundc_f64 (a: vector_double) -> vector_double [nearbyint_v2f64, vfidb] } - // FIXME(llvm) llvm trunk already lowers roundeven to vfidb, but rust does not use it yet - // use https://godbolt.org/z/cWq95fexe to check, and enable the instruction test when it works - test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] } - test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] } + test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, "vector-enhancements-1" vfisb] } + test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, vfidb] } #[unstable(feature = "stdarch_s390x", issue = "135681")] pub trait VectorRoundc { From 12125d51881285064ac952a9c8539baa4a62473d Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 20 Aug 2025 18:03:41 +0200 Subject: [PATCH 3/8] s390x: add `assert_instr` for `vec_extend` --- crates/core_arch/src/s390x/vector.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs index 066ae16073..b010b7e382 100644 --- a/crates/core_arch/src/s390x/vector.rs +++ b/crates/core_arch/src/s390x/vector.rs @@ -3319,8 +3319,7 @@ mod sealed { #[inline] #[target_feature(enable = "vector")] - // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899 - // #[cfg_attr(test, assert_instr(vsegb))] + #[cfg_attr(test, assert_instr(vsegb))] pub unsafe fn vsegb(a: vector_signed_char) -> vector_signed_long_long { simd_as(simd_shuffle::<_, _, i8x2>( a, @@ -3331,8 +3330,7 @@ mod sealed { #[inline] #[target_feature(enable = "vector")] - // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899 - // #[cfg_attr(test, assert_instr(vsegh))] + #[cfg_attr(test, assert_instr(vsegh))] pub unsafe fn vsegh(a: vector_signed_short) -> vector_signed_long_long { simd_as(simd_shuffle::<_, _, i16x2>( a, @@ -3343,8 +3341,7 @@ mod sealed { #[inline] #[target_feature(enable = "vector")] - // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899 - // #[cfg_attr(test, assert_instr(vsegf))] + #[cfg_attr(test, assert_instr(vsegf))] pub unsafe fn vsegf(a: vector_signed_int) -> vector_signed_long_long { simd_as(simd_shuffle::<_, _, i32x2>( a, From 0ec13ab37c3f42bf59c631b7a3d1f052e932394f Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 20 Aug 2025 18:04:15 +0200 Subject: [PATCH 4/8] s390x: implement `vec_mule` using `core::intrinsics::simd` --- crates/core_arch/src/s390x/vector.rs | 85 +++++++++++++++------------- 1 file changed, 47 insertions(+), 38 deletions(-) diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs index b010b7e382..2af5edb9fb 100644 --- a/crates/core_arch/src/s390x/vector.rs +++ b/crates/core_arch/src/s390x/vector.rs @@ -181,14 +181,6 @@ unsafe extern "unadjusted" { #[link_name = "llvm.s390.vcksm"] fn vcksm(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; - #[link_name = "llvm.s390.vmeb"] fn vmeb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short; - #[link_name = "llvm.s390.vmeh"] fn vmeh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int; - #[link_name = "llvm.s390.vmef"] fn vmef(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long; - - #[link_name = "llvm.s390.vmleb"] fn vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short; - #[link_name = "llvm.s390.vmleh"] fn vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int; - #[link_name = "llvm.s390.vmlef"] fn vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long; - #[link_name = "llvm.s390.vmob"] fn vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short; #[link_name = "llvm.s390.vmoh"] fn vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int; #[link_name = "llvm.s390.vmof"] fn vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long; @@ -372,6 +364,19 @@ impl ShuffleMask { ShuffleMask(mask) } + const fn even() -> Self { + let mut mask = [0; N]; + let mut i = 0; + let mut index = 0; + while index < N { + mask[index] = i as u32; + + i += 2; + index += 1; + } + ShuffleMask(mask) + } + const fn pack() -> Self { let mut mask = [0; N]; let mut i = 1; @@ -2642,40 +2647,44 @@ mod sealed { unsafe fn vec_mule(self, b: Self) -> Result; } - // FIXME(llvm) sadly this does not yet work https://github.com/llvm/llvm-project/issues/129705 - // #[target_feature(enable = "vector")] - // #[cfg_attr(test, assert_instr(vmleh))] - // unsafe fn vec_vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int { - // let even_a: vector_unsigned_int = simd_as(simd_shuffle::<_, _, u16x4>( - // a, - // a, - // const { ShuffleMask([0, 2, 4, 6]) }, - // )); - // - // let even_b: vector_unsigned_int = simd_as(simd_shuffle::<_, _, u16x4>( - // b, - // b, - // const { ShuffleMask([0, 2, 4, 6]) }, - // )); - // - // simd_mul(even_a, even_b) - // } + macro_rules! impl_vec_mule { + ($instr:ident $src:ident $shuffled:ident $dst:ident $width:literal) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($instr))] + unsafe fn $instr(a: $src, b: $src) -> $dst { + let even_a: $dst = simd_as(simd_shuffle::<_, _, $shuffled>( + a, + a, + const { ShuffleMask::<$width>::even() }, + )); + + let even_b: $dst = simd_as(simd_shuffle::<_, _, $shuffled>( + b, + b, + const { ShuffleMask::<$width>::even() }, + )); + + simd_mul(even_a, even_b) + } + }; + } - test_impl! { vec_vmeb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short [ vmeb, vmeb ] } - test_impl! { vec_vmeh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int[ vmeh, vmeh ] } - test_impl! { vec_vmef(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long [ vmef, vmef ] } + impl_vec_mule! { vmeb vector_signed_char i8x8 vector_signed_short 8 } + impl_vec_mule! { vmeh vector_signed_short i16x4 vector_signed_int 4 } + impl_vec_mule! { vmef vector_signed_int i32x2 vector_signed_long_long 2 } - test_impl! { vec_vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vmleb, vmleb ] } - test_impl! { vec_vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vmleh, vmleh ] } - test_impl! { vec_vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vmlef, vmlef ] } + impl_vec_mule! { vmleb vector_unsigned_char u8x8 vector_unsigned_short 8 } + impl_vec_mule! { vmleh vector_unsigned_short u16x4 vector_unsigned_int 4 } + impl_vec_mule! { vmlef vector_unsigned_int u32x2 vector_unsigned_long_long 2 } - impl_mul!([VectorMule vec_mule] vec_vmeb (vector_signed_char, vector_signed_char) -> vector_signed_short ); - impl_mul!([VectorMule vec_mule] vec_vmeh (vector_signed_short, vector_signed_short) -> vector_signed_int); - impl_mul!([VectorMule vec_mule] vec_vmef (vector_signed_int, vector_signed_int) -> vector_signed_long_long ); + impl_mul!([VectorMule vec_mule] vmeb (vector_signed_char, vector_signed_char) -> vector_signed_short ); + impl_mul!([VectorMule vec_mule] vmeh (vector_signed_short, vector_signed_short) -> vector_signed_int); + impl_mul!([VectorMule vec_mule] vmef (vector_signed_int, vector_signed_int) -> vector_signed_long_long ); - impl_mul!([VectorMule vec_mule] vec_vmleb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short ); - impl_mul!([VectorMule vec_mule] vec_vmleh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int); - impl_mul!([VectorMule vec_mule] vec_vmlef (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long ); + impl_mul!([VectorMule vec_mule] vmleb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short ); + impl_mul!([VectorMule vec_mule] vmleh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int); + impl_mul!([VectorMule vec_mule] vmlef (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long ); #[unstable(feature = "stdarch_s390x", issue = "135681")] pub trait VectorMulo { From a21bddbf95acc590db0ff32093f210acf1969fd2 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 20 Aug 2025 21:18:39 +0200 Subject: [PATCH 5/8] s390x: implement `vec_mulo` using `core::intrinsics::simd` --- crates/core_arch/src/s390x/vector.rs | 68 +++++++++++++--------------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs index 2af5edb9fb..6775225788 100644 --- a/crates/core_arch/src/s390x/vector.rs +++ b/crates/core_arch/src/s390x/vector.rs @@ -181,14 +181,6 @@ unsafe extern "unadjusted" { #[link_name = "llvm.s390.vcksm"] fn vcksm(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; - #[link_name = "llvm.s390.vmob"] fn vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short; - #[link_name = "llvm.s390.vmoh"] fn vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int; - #[link_name = "llvm.s390.vmof"] fn vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long; - - #[link_name = "llvm.s390.vmlob"] fn vmlob(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short; - #[link_name = "llvm.s390.vmloh"] fn vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int; - #[link_name = "llvm.s390.vmlof"] fn vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long; - #[link_name = "llvm.s390.vmhb"] fn vmhb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; #[link_name = "llvm.s390.vmhh"] fn vmhh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; #[link_name = "llvm.s390.vmhf"] fn vmhf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; @@ -377,7 +369,7 @@ impl ShuffleMask { ShuffleMask(mask) } - const fn pack() -> Self { + const fn odd() -> Self { let mut mask = [0; N]; let mut i = 1; let mut index = 0; @@ -390,6 +382,10 @@ impl ShuffleMask { ShuffleMask(mask) } + const fn pack() -> Self { + Self::odd() + } + const fn unpack_low() -> Self { let mut mask = [0; N]; let mut i = 0; @@ -2647,36 +2643,36 @@ mod sealed { unsafe fn vec_mule(self, b: Self) -> Result; } - macro_rules! impl_vec_mule { - ($instr:ident $src:ident $shuffled:ident $dst:ident $width:literal) => { + macro_rules! impl_vec_mul_even_odd { + ($mask:ident $instr:ident $src:ident $shuffled:ident $dst:ident $width:literal) => { #[inline] #[target_feature(enable = "vector")] #[cfg_attr(test, assert_instr($instr))] unsafe fn $instr(a: $src, b: $src) -> $dst { - let even_a: $dst = simd_as(simd_shuffle::<_, _, $shuffled>( - a, + let elems_a: $dst = simd_as(simd_shuffle::<_, _, $shuffled>( a, - const { ShuffleMask::<$width>::even() }, + a, // this argument is ignored entirely. + const { ShuffleMask::<$width>::$mask() }, )); - let even_b: $dst = simd_as(simd_shuffle::<_, _, $shuffled>( + let elems_b: $dst = simd_as(simd_shuffle::<_, _, $shuffled>( b, - b, - const { ShuffleMask::<$width>::even() }, + b, // this argument is ignored entirely. + const { ShuffleMask::<$width>::$mask() }, )); - simd_mul(even_a, even_b) + simd_mul(elems_a, elems_b) } }; } - impl_vec_mule! { vmeb vector_signed_char i8x8 vector_signed_short 8 } - impl_vec_mule! { vmeh vector_signed_short i16x4 vector_signed_int 4 } - impl_vec_mule! { vmef vector_signed_int i32x2 vector_signed_long_long 2 } + impl_vec_mul_even_odd! { even vmeb vector_signed_char i8x8 vector_signed_short 8 } + impl_vec_mul_even_odd! { even vmeh vector_signed_short i16x4 vector_signed_int 4 } + impl_vec_mul_even_odd! { even vmef vector_signed_int i32x2 vector_signed_long_long 2 } - impl_vec_mule! { vmleb vector_unsigned_char u8x8 vector_unsigned_short 8 } - impl_vec_mule! { vmleh vector_unsigned_short u16x4 vector_unsigned_int 4 } - impl_vec_mule! { vmlef vector_unsigned_int u32x2 vector_unsigned_long_long 2 } + impl_vec_mul_even_odd! { even vmleb vector_unsigned_char u8x8 vector_unsigned_short 8 } + impl_vec_mul_even_odd! { even vmleh vector_unsigned_short u16x4 vector_unsigned_int 4 } + impl_vec_mul_even_odd! { even vmlef vector_unsigned_int u32x2 vector_unsigned_long_long 2 } impl_mul!([VectorMule vec_mule] vmeb (vector_signed_char, vector_signed_char) -> vector_signed_short ); impl_mul!([VectorMule vec_mule] vmeh (vector_signed_short, vector_signed_short) -> vector_signed_int); @@ -2691,21 +2687,21 @@ mod sealed { unsafe fn vec_mulo(self, b: Self) -> Result; } - test_impl! { vec_vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short [ vmob, vmob ] } - test_impl! { vec_vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int[ vmoh, vmoh ] } - test_impl! { vec_vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long [ vmof, vmof ] } + impl_vec_mul_even_odd! { odd vmob vector_signed_char i8x8 vector_signed_short 8 } + impl_vec_mul_even_odd! { odd vmoh vector_signed_short i16x4 vector_signed_int 4 } + impl_vec_mul_even_odd! { odd vmof vector_signed_int i32x2 vector_signed_long_long 2 } - test_impl! { vec_vmlob(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vmlob, vmlob ] } - test_impl! { vec_vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vmloh, vmloh ] } - test_impl! { vec_vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vmlof, vmlof ] } + impl_vec_mul_even_odd! { odd vmlob vector_unsigned_char u8x8 vector_unsigned_short 8 } + impl_vec_mul_even_odd! { odd vmloh vector_unsigned_short u16x4 vector_unsigned_int 4 } + impl_vec_mul_even_odd! { odd vmlof vector_unsigned_int u32x2 vector_unsigned_long_long 2 } - impl_mul!([VectorMulo vec_mulo] vec_vmob (vector_signed_char, vector_signed_char) -> vector_signed_short ); - impl_mul!([VectorMulo vec_mulo] vec_vmoh (vector_signed_short, vector_signed_short) -> vector_signed_int); - impl_mul!([VectorMulo vec_mulo] vec_vmof (vector_signed_int, vector_signed_int) -> vector_signed_long_long ); + impl_mul!([VectorMulo vec_mulo] vmob (vector_signed_char, vector_signed_char) -> vector_signed_short ); + impl_mul!([VectorMulo vec_mulo] vmoh (vector_signed_short, vector_signed_short) -> vector_signed_int); + impl_mul!([VectorMulo vec_mulo] vmof (vector_signed_int, vector_signed_int) -> vector_signed_long_long ); - impl_mul!([VectorMulo vec_mulo] vec_vmlob (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short ); - impl_mul!([VectorMulo vec_mulo] vec_vmloh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int); - impl_mul!([VectorMulo vec_mulo] vec_vmlof (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long ); + impl_mul!([VectorMulo vec_mulo] vmlob (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short ); + impl_mul!([VectorMulo vec_mulo] vmloh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int); + impl_mul!([VectorMulo vec_mulo] vmlof (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long ); #[unstable(feature = "stdarch_s390x", issue = "135681")] pub trait VectorMulh { From 52cc71920a260d05b39755f918df9bbcf7640f02 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 20 Aug 2025 18:08:51 +0200 Subject: [PATCH 6/8] s390x: implement `vec_subc_u128` using `overflowing_sub` --- crates/core_arch/src/s390x/vector.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs index 6775225788..4e4161dbb7 100644 --- a/crates/core_arch/src/s390x/vector.rs +++ b/crates/core_arch/src/s390x/vector.rs @@ -4678,11 +4678,9 @@ pub unsafe fn vec_subc_u128( a: vector_unsigned_char, b: vector_unsigned_char, ) -> vector_unsigned_char { - // FIXME(llvm) sadly this does not work https://github.com/llvm/llvm-project/issues/129608 - // let a: u128 = transmute(a); - // let b: u128 = transmute(b); - // transmute(!a.overflowing_sub(b).1 as u128) - transmute(vscbiq(transmute(a), transmute(b))) + let a: u128 = transmute(a); + let b: u128 = transmute(b); + transmute(!a.overflowing_sub(b).1 as u128) } /// Vector Add Compute Carryout unsigned 128-bits @@ -4714,7 +4712,7 @@ pub unsafe fn vec_adde_u128( let a: u128 = transmute(a); let b: u128 = transmute(b); let c: u128 = transmute(c); - // FIXME(llvm) sadly this does not work + // FIXME(llvm) https://github.com/llvm/llvm-project/pull/153557 // let (d, _carry) = a.carrying_add(b, c & 1 != 0); // transmute(d) transmute(vacq(a, b, c)) From 1dcc3b0c072c5a0855fb52c01283bb54c1542389 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 20 Aug 2025 20:00:22 +0200 Subject: [PATCH 7/8] s390x: implement `vec_sld` using `fshl` --- crates/core_arch/src/s390x/vector.rs | 55 +++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs index 4e4161dbb7..9c2941129c 100644 --- a/crates/core_arch/src/s390x/vector.rs +++ b/crates/core_arch/src/s390x/vector.rs @@ -94,8 +94,6 @@ unsafe extern "unadjusted" { #[link_name = "llvm.s390.vsrlb"] fn vsrlb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; #[link_name = "llvm.s390.vslb"] fn vslb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; - #[link_name = "llvm.s390.vsldb"] fn vsldb(a: i8x16, b: i8x16, c: u32) -> i8x16; - #[link_name = "llvm.s390.vsld"] fn vsld(a: i8x16, b: i8x16, c: u32) -> i8x16; #[link_name = "llvm.s390.vsrd"] fn vsrd(a: i8x16, b: i8x16, c: u32) -> i8x16; #[link_name = "llvm.s390.verimb"] fn verimb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char, d: i32) -> vector_signed_char; @@ -3484,10 +3482,44 @@ mod sealed { unsafe fn vec_sldb(self, b: Self) -> Self; } - // FIXME(llvm) https://github.com/llvm/llvm-project/issues/129955 - // ideally we could implement this in terms of llvm.fshl.i128 - // #[link_name = "llvm.fshl.i128"] fn fshl_i128(a: u128, b: u128, c: u128) -> u128; - // transmute(fshl_i128(transmute(a), transmute(b), const { C * 8 } )) + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vsldb))] + unsafe fn test_vec_sld(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + a.vec_sld::<13>(b) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vsldb))] + unsafe fn test_vec_sldw(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + a.vec_sldw::<3>(b) + } + + #[inline] + #[target_feature(enable = "vector-enhancements-2")] + #[cfg_attr(test, assert_instr(vsld))] + unsafe fn test_vec_sldb(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + a.vec_sldb::<7>(b) + } + + #[inline] + #[target_feature(enable = "vector-enhancements-2")] + #[cfg_attr(test, assert_instr(vsrd))] + unsafe fn test_vec_srdb(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { + a.vec_srdb::<7>(b) + } + + unsafe fn funnel_shl_u128(a: u128, b: u128, c: u128) -> u128 { + #[repr(simd)] + struct Single([u128; 1]); + + transmute(simd_funnel_shl::( + transmute(a), + transmute(b), + transmute(c), + )) + } macro_rules! impl_vec_sld { ($($ty:ident)*) => { @@ -3498,21 +3530,21 @@ mod sealed { #[target_feature(enable = "vector")] unsafe fn vec_sld(self, b: Self) -> Self { static_assert_uimm_bits!(C, 4); - transmute(vsldb(transmute(self), transmute(b), C)) + transmute(funnel_shl_u128(transmute(self), transmute(b), const { C as u128 * 8 })) } #[inline] #[target_feature(enable = "vector")] unsafe fn vec_sldw(self, b: Self) -> Self { static_assert_uimm_bits!(C, 2); - transmute(vsldb(transmute(self), transmute(b), const { 4 * C })) + transmute(funnel_shl_u128(transmute(self), transmute(b), const { C as u128 * 4 * 8 })) } #[inline] #[target_feature(enable = "vector-enhancements-2")] unsafe fn vec_sldb(self, b: Self) -> Self { static_assert_uimm_bits!(C, 3); - transmute(vsld(transmute(self), transmute(b), C)) + transmute(funnel_shl_u128(transmute(self), transmute(b), const { C as u128 })) } } @@ -3523,6 +3555,11 @@ mod sealed { unsafe fn vec_srdb(self, b: Self) -> Self { static_assert_uimm_bits!(C, 3); transmute(vsrd(transmute(self), transmute(b), C)) + // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129955#issuecomment-3207488190 + // LLVM currently rewrites `fshr` to `fshl`, and the logic in the s390x + // backend cannot deal with that yet. + // #[link_name = "llvm.fshr.i128"] fn fshr_i128(a: u128, b: u128, c: u128) -> u128; + // transmute(fshr_i128(transmute(self), transmute(b), const { C as u128 })) } } )* From 1383b3bdc5d5b96a188aae9631a026d6c1bd5799 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Wed, 20 Aug 2025 21:12:52 +0200 Subject: [PATCH 8/8] s390x: link to a missed optimization --- crates/core_arch/src/s390x/vector.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs index 9c2941129c..f6d14c45e0 100644 --- a/crates/core_arch/src/s390x/vector.rs +++ b/crates/core_arch/src/s390x/vector.rs @@ -2352,6 +2352,9 @@ mod sealed { unsafe fn vec_packs(self, b: Other) -> Self::Result; } + // FIXME(llvm): https://github.com/llvm/llvm-project/issues/153655 + // Other targets can use a min/max for the saturation + a truncation. + impl_vec_trait! { [VectorPacks vec_packs] vpksh (vector_signed_short, vector_signed_short) -> vector_signed_char } impl_vec_trait! { [VectorPacks vec_packs] vpklsh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char } impl_vec_trait! { [VectorPacks vec_packs] vpksf (vector_signed_int, vector_signed_int) -> vector_signed_short }