[WASM] Add f64x2 operations and unzip (#37)

ajakubowicz-canva · web-flow · commit 3d1a77cfb451 · 2025-07-18T12:09:16.000+02:00
diff --git a/fearless_simd/src/generated/wasm.rs b/fearless_simd/src/generated/wasm.rs
@@ -112,11 +112,11 @@ impl Simd for WasmSimd128 {
     }
     #[inline(always)]
     fn unzip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
-        todo!()
+        u32x4_shuffle::<0, 2, 4, 6>(a.into(), b.into()).simd_into(self)
     }
     #[inline(always)]
     fn unzip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
-        todo!()
+        u32x4_shuffle::<1, 3, 5, 7>(a.into(), b.into()).simd_into(self)
     }
     #[inline(always)]
     fn max_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
@@ -136,19 +136,19 @@ impl Simd for WasmSimd128 {
     }
     #[inline(always)]
     fn madd_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
-        self.add_f32x4(a, self.mul_f32x4(b, c))
+        a.add(b.mul(c))
     }
     #[inline(always)]
     fn msub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
-        self.sub_f32x4(a, self.mul_f32x4(b, c))
+        a.sub(b.mul(c))
     }
     #[inline(always)]
     fn floor_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
         f32x4_floor(a.into()).simd_into(self)
     }
     #[inline(always)]
     fn fract_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
-        self.sub_f32x4(a, self.trunc_f32x4(a))
+        a.sub(a.trunc())
     }
     #[inline(always)]
     fn trunc_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
@@ -263,11 +263,19 @@ impl Simd for WasmSimd128 {
     }
     #[inline(always)]
     fn unzip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
-        todo!()
+        u8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(
+            a.into(),
+            b.into(),
+        )
+        .simd_into(self)
     }
     #[inline(always)]
     fn unzip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
-        todo!()
+        u8x16_shuffle::<1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31>(
+            a.into(),
+            b.into(),
+        )
+        .simd_into(self)
     }
     #[inline(always)]
     fn select_i8x16(self, a: mask8x16<Self>, b: i8x16<Self>, c: i8x16<Self>) -> i8x16<Self> {
@@ -370,11 +378,19 @@ impl Simd for WasmSimd128 {
     }
     #[inline(always)]
     fn unzip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
-        todo!()
+        u8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(
+            a.into(),
+            b.into(),
+        )
+        .simd_into(self)
     }
     #[inline(always)]
     fn unzip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
-        todo!()
+        u8x16_shuffle::<1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31>(
+            a.into(),
+            b.into(),
+        )
+        .simd_into(self)
     }
     #[inline(always)]
     fn select_u8x16(self, a: mask8x16<Self>, b: u8x16<Self>, c: u8x16<Self>) -> u8x16<Self> {
@@ -511,11 +527,11 @@ impl Simd for WasmSimd128 {
     }
     #[inline(always)]
     fn unzip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
-        todo!()
+        u16x8_shuffle::<0, 2, 4, 6, 8, 10, 12, 14>(a.into(), b.into()).simd_into(self)
     }
     #[inline(always)]
     fn unzip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
-        todo!()
+        u16x8_shuffle::<1, 3, 5, 7, 9, 11, 13, 15>(a.into(), b.into()).simd_into(self)
     }
     #[inline(always)]
     fn select_i16x8(self, a: mask16x8<Self>, b: i16x8<Self>, c: i16x8<Self>) -> i16x8<Self> {
@@ -610,11 +626,11 @@ impl Simd for WasmSimd128 {
     }
     #[inline(always)]
     fn unzip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
-        todo!()
+        u16x8_shuffle::<0, 2, 4, 6, 8, 10, 12, 14>(a.into(), b.into()).simd_into(self)
     }
     #[inline(always)]
     fn unzip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
-        todo!()
+        u16x8_shuffle::<1, 3, 5, 7, 9, 11, 13, 15>(a.into(), b.into()).simd_into(self)
     }
     #[inline(always)]
     fn select_u16x8(self, a: mask16x8<Self>, b: u16x8<Self>, c: u16x8<Self>) -> u16x8<Self> {
@@ -749,11 +765,11 @@ impl Simd for WasmSimd128 {
     }
     #[inline(always)]
     fn unzip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
-        todo!()
+        u32x4_shuffle::<0, 2, 4, 6>(a.into(), b.into()).simd_into(self)
     }
     #[inline(always)]
     fn unzip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
-        todo!()
+        u32x4_shuffle::<1, 3, 5, 7>(a.into(), b.into()).simd_into(self)
     }
     #[inline(always)]
     fn select_i32x4(self, a: mask32x4<Self>, b: i32x4<Self>, c: i32x4<Self>) -> i32x4<Self> {
@@ -852,11 +868,11 @@ impl Simd for WasmSimd128 {
     }
     #[inline(always)]
     fn unzip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
-        todo!()
+        u32x4_shuffle::<0, 2, 4, 6>(a.into(), b.into()).simd_into(self)
     }
     #[inline(always)]
     fn unzip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
-        todo!()
+        u32x4_shuffle::<1, 3, 5, 7>(a.into(), b.into()).simd_into(self)
     }
     #[inline(always)]
     fn select_u32x4(self, a: mask32x4<Self>, b: u32x4<Self>, c: u32x4<Self>) -> u32x4<Self> {
@@ -931,15 +947,15 @@ impl Simd for WasmSimd128 {
     }
     #[inline(always)]
     fn abs_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
-        todo!();
+        f64x2_abs(a.into()).simd_into(self)
     }
     #[inline(always)]
     fn neg_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
-        todo!();
+        f64x2_neg(a.into()).simd_into(self)
     }
     #[inline(always)]
     fn sqrt_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
-        todo!();
+        f64x2_sqrt(a.into()).simd_into(self)
     }
     #[inline(always)]
     fn add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
@@ -959,7 +975,10 @@ impl Simd for WasmSimd128 {
     }
     #[inline(always)]
     fn copysign_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
-        todo!()
+        let sign_mask = f64x2_splat(-0.0_f64);
+        let sign_bits = v128_and(b.into(), sign_mask.into());
+        let magnitude = v128_andnot(a.into(), sign_mask.into());
+        v128_or(magnitude, sign_bits).simd_into(self)
     }
     #[inline(always)]
     fn simd_eq_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
@@ -991,11 +1010,11 @@ impl Simd for WasmSimd128 {
     }
     #[inline(always)]
     fn unzip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
-        todo!()
+        u64x2_shuffle::<0, 2>(a.into(), b.into()).simd_into(self)
     }
     #[inline(always)]
     fn unzip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
-        todo!()
+        u64x2_shuffle::<1, 3>(a.into(), b.into()).simd_into(self)
     }
     #[inline(always)]
     fn max_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
@@ -1014,24 +1033,24 @@ impl Simd for WasmSimd128 {
         f64x2_pmin(b.into(), a.into()).simd_into(self)
     }
     #[inline(always)]
-    fn madd_f64x2(self, _: f64x2<Self>, _: f64x2<Self>, _: f64x2<Self>) -> f64x2<Self> {
-        todo!()
+    fn madd_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
+        a.add(b.mul(c))
     }
     #[inline(always)]
-    fn msub_f64x2(self, _: f64x2<Self>, _: f64x2<Self>, _: f64x2<Self>) -> f64x2<Self> {
-        todo!()
+    fn msub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
+        a.sub(b.mul(c))
     }
     #[inline(always)]
     fn floor_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
-        todo!();
+        f64x2_floor(a.into()).simd_into(self)
     }
     #[inline(always)]
     fn fract_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
-        todo!();
+        a.sub(a.trunc())
     }
     #[inline(always)]
     fn trunc_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
-        todo!();
+        f64x2_trunc(a.into()).simd_into(self)
     }
     #[inline(always)]
     fn select_f64x2(self, a: mask64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs
@@ -9,7 +9,7 @@ use crate::{
     arch::{Arch, wasm::Wasm},
     generic::{generic_combine, generic_op, generic_split},
     ops::{OpSig, TyFlavor, ops_for_type},
-    types::{SIMD_TYPES, ScalarType, VecType, type_imports},
+    types::{SIMD_TYPES, ScalarType, type_imports},
 };
 
 #[derive(Clone, Copy)]
@@ -65,50 +65,43 @@ fn mk_simd_impl(level: Level) -> TokenStream {
                     }
                 }
                 OpSig::Unary => {
-                    if vec_ty.scalar_bits != 64 || vec_ty.scalar != ScalarType::Float {
-                        let args = [quote! { a.into() }];
-                        let expr = if matches!(method, "fract") {
-                            assert_eq!(ty_name, "f32x4", "only support fract_f32x4");
-                            quote! {
-                                self.sub_f32x4(a, self.trunc_f32x4(a))
-                            }
-                        } else {
-                            let expr = Wasm.expr(method, vec_ty, &args);
-                            quote! { #expr.simd_into(self) }
-                        };
+                    let args = [quote! { a.into() }];
+                    let expr = if matches!(method, "fract") {
+                        assert_eq!(
+                            vec_ty.scalar,
+                            ScalarType::Float,
+                            "only float supports fract"
+                        );
 
                         quote! {
-                            #[inline(always)]
-                            fn #method_ident(self, a: #ty<Self>) -> #ret_ty {
-                                #expr
-                            }
+                            a.sub(a.trunc())
                         }
                     } else {
-                        quote! {
-                            #[inline(always)]
-                            fn #method_ident(self, a: #ty<Self>) -> #ret_ty {
-                                todo!();
-                            }
+                        let expr = Wasm.expr(method, vec_ty, &args);
+                        quote! { #expr.simd_into(self) }
+                    };
+
+                    quote! {
+                        #[inline(always)]
+                        fn #method_ident(self, a: #ty<Self>) -> #ret_ty {
+                            #expr
                         }
                     }
                 }
                 OpSig::Binary if method == "copysign" => {
-                    if ty_name == "f32x4" {
-                        quote! {
-                            #[inline(always)]
-                            fn #method_ident(self, a: #ty<Self>, b: #ty<Self>) -> #ret_ty {
-                                let sign_mask = f32x4_splat(-0.0_f32);
-                                let sign_bits = v128_and(b.into(), sign_mask.into());
-                                let magnitude = v128_andnot(a.into(), sign_mask.into());
-                                v128_or(magnitude, sign_bits).simd_into(self)
-                            }
-                        }
-                    } else {
-                        quote! {
-                            #[inline(always)]
-                            fn #method_ident(self, a: #ty<Self>, b: #ty<Self>) -> #ret_ty {
-                                todo!()
-                            }
+                    let splat: Ident = format_ident!("{}_splat", vec_ty.rust_name());
+                    let sign_mask_literal = match vec_ty.scalar_bits {
+                        32 => quote! { -0.0_f32 },
+                        64 => quote! { -0.0_f64 },
+                        _ => unimplemented!(),
+                    };
+                    quote! {
+                        #[inline(always)]
+                        fn #method_ident(self, a: #ty<Self>, b: #ty<Self>) -> #ret_ty {
+                            let sign_mask = #splat(#sign_mask_literal);
+                            let sign_bits = v128_and(b.into(), sign_mask.into());
+                            let magnitude = v128_andnot(a.into(), sign_mask.into());
+                            v128_or(magnitude, sign_bits).simd_into(self)
                         }
                     }
                 }
@@ -162,37 +155,18 @@ fn mk_simd_impl(level: Level) -> TokenStream {
                     }
                 }
                 OpSig::Ternary => {
-                    if vec_ty.scalar_bits == 64 && vec_ty.scalar == ScalarType::Float {
-                        quote! {
-                            #[inline(always)]
-                            fn #method_ident(self, _: #ty<Self>, _: #ty<Self>, _: #ty<Self>) -> #ret_ty {
-                                todo!()
-                            }
-                        }
-                    } else if matches!(method, "madd" | "msub") {
-                        let first_ident = {
-                            let str = if method == "madd" {
-                                "add_f32x4"
-                            } else {
-                                "sub_f32x4"
-                            };
-
-                            Ident::new(str, Span::call_site())
+                    if matches!(method, "madd" | "msub") {
+                        let first_ident = if method == "madd" {
+                            quote! {add}
+                        } else {
+                            quote! {sub}
                         };
 
-                        assert_eq!(
-                            vec_ty,
-                            &VecType {
-                                scalar: ScalarType::Float,
-                                scalar_bits: 32,
-                                len: 4,
-                            }
-                        );
                         // TODO: `relaxed-simd` has madd.
                         quote! {
                             #[inline(always)]
                             fn #method_ident(self, a: #ty<Self>, b: #ty<Self>, c: #ty<Self>) -> #ret_ty {
-                                self.#first_ident(a, self.mul_f32x4(b, c))
+                                a.#first_ident(b.mul(c))
                             }
                         }
                     } else {
@@ -265,10 +239,45 @@ fn mk_simd_impl(level: Level) -> TokenStream {
                     }
                 }
                 OpSig::Unzip(is_low) => {
+                    let (indices, shuffle_fn) = match vec_ty.scalar_bits {
+                        8 => {
+                            let indices = if is_low {
+                                quote! { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }
+                            } else {
+                                quote! { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }
+                            };
+                            (indices, quote! { u8x16_shuffle })
+                        }
+                        16 => {
+                            let indices = if is_low {
+                                quote! { 0, 2, 4, 6, 8, 10, 12, 14 }
+                            } else {
+                                quote! { 1, 3, 5, 7, 9, 11, 13, 15 }
+                            };
+                            (indices, quote! { u16x8_shuffle })
+                        }
+                        32 => {
+                            let indices = if is_low {
+                                quote! { 0, 2, 4, 6 }
+                            } else {
+                                quote! { 1, 3, 5, 7 }
+                            };
+                            (indices, quote! { u32x4_shuffle })
+                        }
+                        64 => {
+                            let indices = if is_low {
+                                quote! { 0, 2 }
+                            } else {
+                                quote! { 1, 3 }
+                            };
+                            (indices, quote! { u64x2_shuffle })
+                        }
+                        _ => panic!("unsupported scalar_bits for unzip operation"),
+                    };
                     quote! {
                         #[inline(always)]
                         fn #method_ident(self, a: #ty<Self>, b: #ty<Self>) -> #ret_ty {
-                            todo!()
+                            #shuffle_fn::<#indices>(a.into(), b.into()).simd_into(self)
                         }
                     }
                 }
diff --git a/fearless_simd_tests/tests/wasm.rs b/fearless_simd_tests/tests/wasm.rs

Original file line number	Diff line number	Diff line change
`@@ -112,11 +112,11 @@ impl Simd for WasmSimd128 {`
`112`	`112`	`}`
`113`	`113`	`#[inline(always)]`
`114`	`114`	`fn unzip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {`
`115`		`- todo!()`
	`115`	`+ u32x4_shuffle::<0, 2, 4, 6>(a.into(), b.into()).simd_into(self)`
`116`	`116`	`}`
`117`	`117`	`#[inline(always)]`
`118`	`118`	`fn unzip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {`
`119`		`- todo!()`
	`119`	`+ u32x4_shuffle::<1, 3, 5, 7>(a.into(), b.into()).simd_into(self)`
`120`	`120`	`}`
`121`	`121`	`#[inline(always)]`
`122`	`122`	`fn max_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {`
`@@ -136,19 +136,19 @@ impl Simd for WasmSimd128 {`
`136`	`136`	`}`
`137`	`137`	`#[inline(always)]`
`138`	`138`	`fn madd_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {`
`139`		`- self.add_f32x4(a, self.mul_f32x4(b, c))`
	`139`	`+ a.add(b.mul(c))`
`140`	`140`	`}`
`141`	`141`	`#[inline(always)]`
`142`	`142`	`fn msub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {`
`143`		`- self.sub_f32x4(a, self.mul_f32x4(b, c))`
	`143`	`+ a.sub(b.mul(c))`
`144`	`144`	`}`
`145`	`145`	`#[inline(always)]`
`146`	`146`	`fn floor_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {`
`147`	`147`	`f32x4_floor(a.into()).simd_into(self)`
`148`	`148`	`}`
`149`	`149`	`#[inline(always)]`
`150`	`150`	`fn fract_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {`
`151`		`- self.sub_f32x4(a, self.trunc_f32x4(a))`
	`151`	`+ a.sub(a.trunc())`
`152`	`152`	`}`
`153`	`153`	`#[inline(always)]`
`154`	`154`	`fn trunc_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {`
`@@ -263,11 +263,19 @@ impl Simd for WasmSimd128 {`
`263`	`263`	`}`
`264`	`264`	`#[inline(always)]`
`265`	`265`	`fn unzip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {`
`266`		`- todo!()`
	`266`	`+ u8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(`
	`267`	`+ a.into(),`
	`268`	`+ b.into(),`
	`269`	`+ )`
	`270`	`+ .simd_into(self)`
`267`	`271`	`}`
`268`	`272`	`#[inline(always)]`
`269`	`273`	`fn unzip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {`
`270`		`- todo!()`
	`274`	`+ u8x16_shuffle::<1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31>(`
	`275`	`+ a.into(),`
	`276`	`+ b.into(),`
	`277`	`+ )`
	`278`	`+ .simd_into(self)`
`271`	`279`	`}`
`272`	`280`	`#[inline(always)]`
`273`	`281`	`fn select_i8x16(self, a: mask8x16<Self>, b: i8x16<Self>, c: i8x16<Self>) -> i8x16<Self> {`
`@@ -370,11 +378,19 @@ impl Simd for WasmSimd128 {`
`370`	`378`	`}`
`371`	`379`	`#[inline(always)]`
`372`	`380`	`fn unzip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {`
`373`		`- todo!()`
	`381`	`+ u8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(`
	`382`	`+ a.into(),`
	`383`	`+ b.into(),`
	`384`	`+ )`
	`385`	`+ .simd_into(self)`
`374`	`386`	`}`
`375`	`387`	`#[inline(always)]`
`376`	`388`	`fn unzip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {`
`377`		`- todo!()`
	`389`	`+ u8x16_shuffle::<1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31>(`
	`390`	`+ a.into(),`
	`391`	`+ b.into(),`
	`392`	`+ )`
	`393`	`+ .simd_into(self)`
`378`	`394`	`}`
`379`	`395`	`#[inline(always)]`
`380`	`396`	`fn select_u8x16(self, a: mask8x16<Self>, b: u8x16<Self>, c: u8x16<Self>) -> u8x16<Self> {`
`@@ -511,11 +527,11 @@ impl Simd for WasmSimd128 {`
`511`	`527`	`}`
`512`	`528`	`#[inline(always)]`
`513`	`529`	`fn unzip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {`
`514`		`- todo!()`
	`530`	`+ u16x8_shuffle::<0, 2, 4, 6, 8, 10, 12, 14>(a.into(), b.into()).simd_into(self)`
`515`	`531`	`}`
`516`	`532`	`#[inline(always)]`
`517`	`533`	`fn unzip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {`
`518`		`- todo!()`
	`534`	`+ u16x8_shuffle::<1, 3, 5, 7, 9, 11, 13, 15>(a.into(), b.into()).simd_into(self)`
`519`	`535`	`}`
`520`	`536`	`#[inline(always)]`
`521`	`537`	`fn select_i16x8(self, a: mask16x8<Self>, b: i16x8<Self>, c: i16x8<Self>) -> i16x8<Self> {`
`@@ -610,11 +626,11 @@ impl Simd for WasmSimd128 {`
`610`	`626`	`}`
`611`	`627`	`#[inline(always)]`
`612`	`628`	`fn unzip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {`
`613`		`- todo!()`
	`629`	`+ u16x8_shuffle::<0, 2, 4, 6, 8, 10, 12, 14>(a.into(), b.into()).simd_into(self)`
`614`	`630`	`}`
`615`	`631`	`#[inline(always)]`
`616`	`632`	`fn unzip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {`
`617`		`- todo!()`
	`633`	`+ u16x8_shuffle::<1, 3, 5, 7, 9, 11, 13, 15>(a.into(), b.into()).simd_into(self)`
`618`	`634`	`}`
`619`	`635`	`#[inline(always)]`
`620`	`636`	`fn select_u16x8(self, a: mask16x8<Self>, b: u16x8<Self>, c: u16x8<Self>) -> u16x8<Self> {`
`@@ -749,11 +765,11 @@ impl Simd for WasmSimd128 {`
`749`	`765`	`}`
`750`	`766`	`#[inline(always)]`
`751`	`767`	`fn unzip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {`
`752`		`- todo!()`
	`768`	`+ u32x4_shuffle::<0, 2, 4, 6>(a.into(), b.into()).simd_into(self)`
`753`	`769`	`}`
`754`	`770`	`#[inline(always)]`
`755`	`771`	`fn unzip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {`
`756`		`- todo!()`
	`772`	`+ u32x4_shuffle::<1, 3, 5, 7>(a.into(), b.into()).simd_into(self)`
`757`	`773`	`}`
`758`	`774`	`#[inline(always)]`
`759`	`775`	`fn select_i32x4(self, a: mask32x4<Self>, b: i32x4<Self>, c: i32x4<Self>) -> i32x4<Self> {`
`@@ -852,11 +868,11 @@ impl Simd for WasmSimd128 {`
`852`	`868`	`}`
`853`	`869`	`#[inline(always)]`
`854`	`870`	`fn unzip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {`
`855`		`- todo!()`
	`871`	`+ u32x4_shuffle::<0, 2, 4, 6>(a.into(), b.into()).simd_into(self)`
`856`	`872`	`}`
`857`	`873`	`#[inline(always)]`
`858`	`874`	`fn unzip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {`
`859`		`- todo!()`
	`875`	`+ u32x4_shuffle::<1, 3, 5, 7>(a.into(), b.into()).simd_into(self)`
`860`	`876`	`}`
`861`	`877`	`#[inline(always)]`
`862`	`878`	`fn select_u32x4(self, a: mask32x4<Self>, b: u32x4<Self>, c: u32x4<Self>) -> u32x4<Self> {`
`@@ -931,15 +947,15 @@ impl Simd for WasmSimd128 {`
`931`	`947`	`}`
`932`	`948`	`#[inline(always)]`
`933`	`949`	`fn abs_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {`
`934`		`- todo!();`
	`950`	`+ f64x2_abs(a.into()).simd_into(self)`
`935`	`951`	`}`
`936`	`952`	`#[inline(always)]`
`937`	`953`	`fn neg_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {`
`938`		`- todo!();`
	`954`	`+ f64x2_neg(a.into()).simd_into(self)`
`939`	`955`	`}`
`940`	`956`	`#[inline(always)]`
`941`	`957`	`fn sqrt_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {`
`942`		`- todo!();`
	`958`	`+ f64x2_sqrt(a.into()).simd_into(self)`
`943`	`959`	`}`
`944`	`960`	`#[inline(always)]`
`945`	`961`	`fn add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {`
`@@ -959,7 +975,10 @@ impl Simd for WasmSimd128 {`
`959`	`975`	`}`
`960`	`976`	`#[inline(always)]`
`961`	`977`	`fn copysign_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {`
`962`		`- todo!()`
	`978`	`+ let sign_mask = f64x2_splat(-0.0_f64);`
	`979`	`+ let sign_bits = v128_and(b.into(), sign_mask.into());`
	`980`	`+ let magnitude = v128_andnot(a.into(), sign_mask.into());`
	`981`	`+ v128_or(magnitude, sign_bits).simd_into(self)`
`963`	`982`	`}`
`964`	`983`	`#[inline(always)]`
`965`	`984`	`fn simd_eq_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {`
`@@ -991,11 +1010,11 @@ impl Simd for WasmSimd128 {`
`991`	`1010`	`}`
`992`	`1011`	`#[inline(always)]`
`993`	`1012`	`fn unzip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {`
`994`		`- todo!()`
	`1013`	`+ u64x2_shuffle::<0, 2>(a.into(), b.into()).simd_into(self)`
`995`	`1014`	`}`
`996`	`1015`	`#[inline(always)]`
`997`	`1016`	`fn unzip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {`
`998`		`- todo!()`
	`1017`	`+ u64x2_shuffle::<1, 3>(a.into(), b.into()).simd_into(self)`
`999`	`1018`	`}`
`1000`	`1019`	`#[inline(always)]`
`1001`	`1020`	`fn max_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {`
`@@ -1014,24 +1033,24 @@ impl Simd for WasmSimd128 {`
`1014`	`1033`	`f64x2_pmin(b.into(), a.into()).simd_into(self)`
`1015`	`1034`	`}`
`1016`	`1035`	`#[inline(always)]`
`1017`		`- fn madd_f64x2(self, _: f64x2<Self>, _: f64x2<Self>, _: f64x2<Self>) -> f64x2<Self> {`
`1018`		`- todo!()`
	`1036`	`+ fn madd_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {`
	`1037`	`+ a.add(b.mul(c))`
`1019`	`1038`	`}`
`1020`	`1039`	`#[inline(always)]`
`1021`		`- fn msub_f64x2(self, _: f64x2<Self>, _: f64x2<Self>, _: f64x2<Self>) -> f64x2<Self> {`
`1022`		`- todo!()`
	`1040`	`+ fn msub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {`
	`1041`	`+ a.sub(b.mul(c))`
`1023`	`1042`	`}`
`1024`	`1043`	`#[inline(always)]`
`1025`	`1044`	`fn floor_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {`
`1026`		`- todo!();`
	`1045`	`+ f64x2_floor(a.into()).simd_into(self)`
`1027`	`1046`	`}`
`1028`	`1047`	`#[inline(always)]`
`1029`	`1048`	`fn fract_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {`
`1030`		`- todo!();`
	`1049`	`+ a.sub(a.trunc())`
`1031`	`1050`	`}`
`1032`	`1051`	`#[inline(always)]`
`1033`	`1052`	`fn trunc_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {`
`1034`		`- todo!();`
	`1053`	`+ f64x2_trunc(a.into()).simd_into(self)`
`1035`	`1054`	`}`
`1036`	`1055`	`#[inline(always)]`
`1037`	`1056`	`fn select_f64x2(self, a: mask64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {`