Skip to content

Commit 3d1a77c

Browse files
[WASM] Add f64x2 operations and unzip (#37)
1 parent 5e700ca commit 3d1a77c

File tree

3 files changed

+371
-93
lines changed

3 files changed

+371
-93
lines changed

fearless_simd/src/generated/wasm.rs

Lines changed: 49 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,11 @@ impl Simd for WasmSimd128 {
112112
}
113113
#[inline(always)]
114114
fn unzip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
115-
todo!()
115+
u32x4_shuffle::<0, 2, 4, 6>(a.into(), b.into()).simd_into(self)
116116
}
117117
#[inline(always)]
118118
fn unzip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
119-
todo!()
119+
u32x4_shuffle::<1, 3, 5, 7>(a.into(), b.into()).simd_into(self)
120120
}
121121
#[inline(always)]
122122
fn max_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
@@ -136,19 +136,19 @@ impl Simd for WasmSimd128 {
136136
}
137137
#[inline(always)]
138138
fn madd_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
139-
self.add_f32x4(a, self.mul_f32x4(b, c))
139+
a.add(b.mul(c))
140140
}
141141
#[inline(always)]
142142
fn msub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
143-
self.sub_f32x4(a, self.mul_f32x4(b, c))
143+
a.sub(b.mul(c))
144144
}
145145
#[inline(always)]
146146
fn floor_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
147147
f32x4_floor(a.into()).simd_into(self)
148148
}
149149
#[inline(always)]
150150
fn fract_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
151-
self.sub_f32x4(a, self.trunc_f32x4(a))
151+
a.sub(a.trunc())
152152
}
153153
#[inline(always)]
154154
fn trunc_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
@@ -263,11 +263,19 @@ impl Simd for WasmSimd128 {
263263
}
264264
#[inline(always)]
265265
fn unzip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
266-
todo!()
266+
u8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(
267+
a.into(),
268+
b.into(),
269+
)
270+
.simd_into(self)
267271
}
268272
#[inline(always)]
269273
fn unzip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
270-
todo!()
274+
u8x16_shuffle::<1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31>(
275+
a.into(),
276+
b.into(),
277+
)
278+
.simd_into(self)
271279
}
272280
#[inline(always)]
273281
fn select_i8x16(self, a: mask8x16<Self>, b: i8x16<Self>, c: i8x16<Self>) -> i8x16<Self> {
@@ -370,11 +378,19 @@ impl Simd for WasmSimd128 {
370378
}
371379
#[inline(always)]
372380
fn unzip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
373-
todo!()
381+
u8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(
382+
a.into(),
383+
b.into(),
384+
)
385+
.simd_into(self)
374386
}
375387
#[inline(always)]
376388
fn unzip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
377-
todo!()
389+
u8x16_shuffle::<1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31>(
390+
a.into(),
391+
b.into(),
392+
)
393+
.simd_into(self)
378394
}
379395
#[inline(always)]
380396
fn select_u8x16(self, a: mask8x16<Self>, b: u8x16<Self>, c: u8x16<Self>) -> u8x16<Self> {
@@ -511,11 +527,11 @@ impl Simd for WasmSimd128 {
511527
}
512528
#[inline(always)]
513529
fn unzip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
514-
todo!()
530+
u16x8_shuffle::<0, 2, 4, 6, 8, 10, 12, 14>(a.into(), b.into()).simd_into(self)
515531
}
516532
#[inline(always)]
517533
fn unzip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
518-
todo!()
534+
u16x8_shuffle::<1, 3, 5, 7, 9, 11, 13, 15>(a.into(), b.into()).simd_into(self)
519535
}
520536
#[inline(always)]
521537
fn select_i16x8(self, a: mask16x8<Self>, b: i16x8<Self>, c: i16x8<Self>) -> i16x8<Self> {
@@ -610,11 +626,11 @@ impl Simd for WasmSimd128 {
610626
}
611627
#[inline(always)]
612628
fn unzip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
613-
todo!()
629+
u16x8_shuffle::<0, 2, 4, 6, 8, 10, 12, 14>(a.into(), b.into()).simd_into(self)
614630
}
615631
#[inline(always)]
616632
fn unzip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
617-
todo!()
633+
u16x8_shuffle::<1, 3, 5, 7, 9, 11, 13, 15>(a.into(), b.into()).simd_into(self)
618634
}
619635
#[inline(always)]
620636
fn select_u16x8(self, a: mask16x8<Self>, b: u16x8<Self>, c: u16x8<Self>) -> u16x8<Self> {
@@ -749,11 +765,11 @@ impl Simd for WasmSimd128 {
749765
}
750766
#[inline(always)]
751767
fn unzip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
752-
todo!()
768+
u32x4_shuffle::<0, 2, 4, 6>(a.into(), b.into()).simd_into(self)
753769
}
754770
#[inline(always)]
755771
fn unzip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
756-
todo!()
772+
u32x4_shuffle::<1, 3, 5, 7>(a.into(), b.into()).simd_into(self)
757773
}
758774
#[inline(always)]
759775
fn select_i32x4(self, a: mask32x4<Self>, b: i32x4<Self>, c: i32x4<Self>) -> i32x4<Self> {
@@ -852,11 +868,11 @@ impl Simd for WasmSimd128 {
852868
}
853869
#[inline(always)]
854870
fn unzip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
855-
todo!()
871+
u32x4_shuffle::<0, 2, 4, 6>(a.into(), b.into()).simd_into(self)
856872
}
857873
#[inline(always)]
858874
fn unzip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
859-
todo!()
875+
u32x4_shuffle::<1, 3, 5, 7>(a.into(), b.into()).simd_into(self)
860876
}
861877
#[inline(always)]
862878
fn select_u32x4(self, a: mask32x4<Self>, b: u32x4<Self>, c: u32x4<Self>) -> u32x4<Self> {
@@ -931,15 +947,15 @@ impl Simd for WasmSimd128 {
931947
}
932948
#[inline(always)]
933949
fn abs_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
934-
todo!();
950+
f64x2_abs(a.into()).simd_into(self)
935951
}
936952
#[inline(always)]
937953
fn neg_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
938-
todo!();
954+
f64x2_neg(a.into()).simd_into(self)
939955
}
940956
#[inline(always)]
941957
fn sqrt_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
942-
todo!();
958+
f64x2_sqrt(a.into()).simd_into(self)
943959
}
944960
#[inline(always)]
945961
fn add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
@@ -959,7 +975,10 @@ impl Simd for WasmSimd128 {
959975
}
960976
#[inline(always)]
961977
fn copysign_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
962-
todo!()
978+
let sign_mask = f64x2_splat(-0.0_f64);
979+
let sign_bits = v128_and(b.into(), sign_mask.into());
980+
let magnitude = v128_andnot(a.into(), sign_mask.into());
981+
v128_or(magnitude, sign_bits).simd_into(self)
963982
}
964983
#[inline(always)]
965984
fn simd_eq_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
@@ -991,11 +1010,11 @@ impl Simd for WasmSimd128 {
9911010
}
9921011
#[inline(always)]
9931012
fn unzip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
994-
todo!()
1013+
u64x2_shuffle::<0, 2>(a.into(), b.into()).simd_into(self)
9951014
}
9961015
#[inline(always)]
9971016
fn unzip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
998-
todo!()
1017+
u64x2_shuffle::<1, 3>(a.into(), b.into()).simd_into(self)
9991018
}
10001019
#[inline(always)]
10011020
fn max_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
@@ -1014,24 +1033,24 @@ impl Simd for WasmSimd128 {
10141033
f64x2_pmin(b.into(), a.into()).simd_into(self)
10151034
}
10161035
#[inline(always)]
1017-
fn madd_f64x2(self, _: f64x2<Self>, _: f64x2<Self>, _: f64x2<Self>) -> f64x2<Self> {
1018-
todo!()
1036+
fn madd_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
1037+
a.add(b.mul(c))
10191038
}
10201039
#[inline(always)]
1021-
fn msub_f64x2(self, _: f64x2<Self>, _: f64x2<Self>, _: f64x2<Self>) -> f64x2<Self> {
1022-
todo!()
1040+
fn msub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
1041+
a.sub(b.mul(c))
10231042
}
10241043
#[inline(always)]
10251044
fn floor_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
1026-
todo!();
1045+
f64x2_floor(a.into()).simd_into(self)
10271046
}
10281047
#[inline(always)]
10291048
fn fract_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
1030-
todo!();
1049+
a.sub(a.trunc())
10311050
}
10321051
#[inline(always)]
10331052
fn trunc_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
1034-
todo!();
1053+
f64x2_trunc(a.into()).simd_into(self)
10351054
}
10361055
#[inline(always)]
10371056
fn select_f64x2(self, a: mask64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {

fearless_simd_gen/src/mk_wasm.rs

Lines changed: 72 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::{
99
arch::{Arch, wasm::Wasm},
1010
generic::{generic_combine, generic_op, generic_split},
1111
ops::{OpSig, TyFlavor, ops_for_type},
12-
types::{SIMD_TYPES, ScalarType, VecType, type_imports},
12+
types::{SIMD_TYPES, ScalarType, type_imports},
1313
};
1414

1515
#[derive(Clone, Copy)]
@@ -65,50 +65,43 @@ fn mk_simd_impl(level: Level) -> TokenStream {
6565
}
6666
}
6767
OpSig::Unary => {
68-
if vec_ty.scalar_bits != 64 || vec_ty.scalar != ScalarType::Float {
69-
let args = [quote! { a.into() }];
70-
let expr = if matches!(method, "fract") {
71-
assert_eq!(ty_name, "f32x4", "only support fract_f32x4");
72-
quote! {
73-
self.sub_f32x4(a, self.trunc_f32x4(a))
74-
}
75-
} else {
76-
let expr = Wasm.expr(method, vec_ty, &args);
77-
quote! { #expr.simd_into(self) }
78-
};
68+
let args = [quote! { a.into() }];
69+
let expr = if matches!(method, "fract") {
70+
assert_eq!(
71+
vec_ty.scalar,
72+
ScalarType::Float,
73+
"only float supports fract"
74+
);
7975

8076
quote! {
81-
#[inline(always)]
82-
fn #method_ident(self, a: #ty<Self>) -> #ret_ty {
83-
#expr
84-
}
77+
a.sub(a.trunc())
8578
}
8679
} else {
87-
quote! {
88-
#[inline(always)]
89-
fn #method_ident(self, a: #ty<Self>) -> #ret_ty {
90-
todo!();
91-
}
80+
let expr = Wasm.expr(method, vec_ty, &args);
81+
quote! { #expr.simd_into(self) }
82+
};
83+
84+
quote! {
85+
#[inline(always)]
86+
fn #method_ident(self, a: #ty<Self>) -> #ret_ty {
87+
#expr
9288
}
9389
}
9490
}
9591
OpSig::Binary if method == "copysign" => {
96-
if ty_name == "f32x4" {
97-
quote! {
98-
#[inline(always)]
99-
fn #method_ident(self, a: #ty<Self>, b: #ty<Self>) -> #ret_ty {
100-
let sign_mask = f32x4_splat(-0.0_f32);
101-
let sign_bits = v128_and(b.into(), sign_mask.into());
102-
let magnitude = v128_andnot(a.into(), sign_mask.into());
103-
v128_or(magnitude, sign_bits).simd_into(self)
104-
}
105-
}
106-
} else {
107-
quote! {
108-
#[inline(always)]
109-
fn #method_ident(self, a: #ty<Self>, b: #ty<Self>) -> #ret_ty {
110-
todo!()
111-
}
92+
let splat: Ident = format_ident!("{}_splat", vec_ty.rust_name());
93+
let sign_mask_literal = match vec_ty.scalar_bits {
94+
32 => quote! { -0.0_f32 },
95+
64 => quote! { -0.0_f64 },
96+
_ => unimplemented!(),
97+
};
98+
quote! {
99+
#[inline(always)]
100+
fn #method_ident(self, a: #ty<Self>, b: #ty<Self>) -> #ret_ty {
101+
let sign_mask = #splat(#sign_mask_literal);
102+
let sign_bits = v128_and(b.into(), sign_mask.into());
103+
let magnitude = v128_andnot(a.into(), sign_mask.into());
104+
v128_or(magnitude, sign_bits).simd_into(self)
112105
}
113106
}
114107
}
@@ -162,37 +155,18 @@ fn mk_simd_impl(level: Level) -> TokenStream {
162155
}
163156
}
164157
OpSig::Ternary => {
165-
if vec_ty.scalar_bits == 64 && vec_ty.scalar == ScalarType::Float {
166-
quote! {
167-
#[inline(always)]
168-
fn #method_ident(self, _: #ty<Self>, _: #ty<Self>, _: #ty<Self>) -> #ret_ty {
169-
todo!()
170-
}
171-
}
172-
} else if matches!(method, "madd" | "msub") {
173-
let first_ident = {
174-
let str = if method == "madd" {
175-
"add_f32x4"
176-
} else {
177-
"sub_f32x4"
178-
};
179-
180-
Ident::new(str, Span::call_site())
158+
if matches!(method, "madd" | "msub") {
159+
let first_ident = if method == "madd" {
160+
quote! {add}
161+
} else {
162+
quote! {sub}
181163
};
182164

183-
assert_eq!(
184-
vec_ty,
185-
&VecType {
186-
scalar: ScalarType::Float,
187-
scalar_bits: 32,
188-
len: 4,
189-
}
190-
);
191165
// TODO: `relaxed-simd` has madd.
192166
quote! {
193167
#[inline(always)]
194168
fn #method_ident(self, a: #ty<Self>, b: #ty<Self>, c: #ty<Self>) -> #ret_ty {
195-
self.#first_ident(a, self.mul_f32x4(b, c))
169+
a.#first_ident(b.mul(c))
196170
}
197171
}
198172
} else {
@@ -265,10 +239,45 @@ fn mk_simd_impl(level: Level) -> TokenStream {
265239
}
266240
}
267241
OpSig::Unzip(is_low) => {
242+
let (indices, shuffle_fn) = match vec_ty.scalar_bits {
243+
8 => {
244+
let indices = if is_low {
245+
quote! { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }
246+
} else {
247+
quote! { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }
248+
};
249+
(indices, quote! { u8x16_shuffle })
250+
}
251+
16 => {
252+
let indices = if is_low {
253+
quote! { 0, 2, 4, 6, 8, 10, 12, 14 }
254+
} else {
255+
quote! { 1, 3, 5, 7, 9, 11, 13, 15 }
256+
};
257+
(indices, quote! { u16x8_shuffle })
258+
}
259+
32 => {
260+
let indices = if is_low {
261+
quote! { 0, 2, 4, 6 }
262+
} else {
263+
quote! { 1, 3, 5, 7 }
264+
};
265+
(indices, quote! { u32x4_shuffle })
266+
}
267+
64 => {
268+
let indices = if is_low {
269+
quote! { 0, 2 }
270+
} else {
271+
quote! { 1, 3 }
272+
};
273+
(indices, quote! { u64x2_shuffle })
274+
}
275+
_ => panic!("unsupported scalar_bits for unzip operation"),
276+
};
268277
quote! {
269278
#[inline(always)]
270279
fn #method_ident(self, a: #ty<Self>, b: #ty<Self>) -> #ret_ty {
271-
todo!()
280+
#shuffle_fn::<#indices>(a.into(), b.into()).simd_into(self)
272281
}
273282
}
274283
}

0 commit comments

Comments
 (0)