f64 wip

tgross35 · tgross35 · commit 0b159b2c4fef · 2025-01-06T11:14:42.000Z
diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs
@@ -20,7 +20,6 @@ pub struct f8(u8);
 impl Float for f8 {
     type Int = u8;
     type SignedInt = i8;
-    type ExpInt = i8;
 
     const ZERO: Self = Self(0b0_0000_000);
     const NEG_ZERO: Self = Self(0b1_0000_000);
@@ -62,8 +61,8 @@ impl Float for f8 {
         self.0 & Self::SIGN_MASK != 0
     }
 
-    fn exp(self) -> Self::ExpInt {
-        unimplemented!()
+    fn exp(self) -> i32 {
+        ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as i32
     }
 
     fn from_bits(a: Self::Int) -> Self {
diff --git a/src/math/fma.rs b/src/math/fma.rs
@@ -42,6 +42,10 @@ fn mul(x: u64, y: u64) -> (u64, u64) {
 /// according to the rounding mode characterized by the value of FLT_ROUNDS.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fma(x: f64, y: f64, z: f64) -> f64 {
+    if true {
+        return super::generic::fma(x, y, z, scalbn);
+    }
+
     let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
     let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63
 
diff --git a/src/math/fmaf128.rs b/src/math/fmaf128.rs
@@ -1,4 +1,6 @@
+#[expect(unused)]
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
-    super::generic::fma(x, y, z)
+    // super::generic::fma(x, y, z)
+    todo!()
 }
diff --git a/src/math/generic/fma.rs b/src/math/generic/fma.rs
@@ -1,13 +1,216 @@
 #![allow(unused)]
 
+use core::ops::{Shl, Shr};
+
 use super::super::fenv::{
     FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept,
 };
+use super::super::support::{DInt, HInt, Int};
 use super::super::{CastFrom, CastInto, Float, IntTy, MinInt};
 
+const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1;
+
 /// Fused multiply add.
-pub fn fma<F: Float>(x: F, y: F, z: F) -> F {
-    todo!()
+pub fn fma<F: Float>(x: F, y: F, z: F, scbn: impl FnOnce(F, i32) -> F) -> F
+where
+    F::Int: CastFrom<u32>,
+    F::Int: HInt,
+    F::Int: Shr<i32, Output = F::Int>,
+    F::Int: Shl<i32, Output = F::Int>,
+    F::SignedInt: CastInto<F>,
+    u32: CastInto<F::Int>,
+    bool: CastInto<F::Int>,
+{
+    let one = F::Int::ONE;
+    let zero = F::Int::ZERO;
+
+    let nx = Norm::from_float(x);
+    let ny = Norm::from_float(y);
+    let nz = Norm::from_float(z);
+
+    if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN {
+        return x * y + z;
+    }
+    if nz.e >= ZEROINFNAN {
+        if nz.e > ZEROINFNAN {
+            /* z==0 */
+            return x * y + z;
+        }
+        return z;
+    }
+
+    let zhi: F::Int;
+    let zlo: F::Int;
+
+    let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi();
+
+    let mut e: i32 = nx.e + ny.e;
+    let mut d: i32 = nz.e - e;
+
+    let fbits = F::BITS as i32;
+
+    if d > 0 {
+        if d < fbits {
+            zlo = nz.m << d;
+            zhi = nz.m >> (fbits - d);
+        } else {
+            zlo = zero;
+            zhi = nz.m;
+            e = nz.e - fbits;
+            d -= fbits;
+            if d == 0 {
+            } else if d < fbits {
+                rlo = (rhi << (fbits - d)) | (rlo >> d) | ((rlo << (fbits - d)) != zero).cast();
+                rhi = rhi >> d;
+            } else {
+                rlo = one;
+                rhi = zero;
+            }
+        }
+    } else {
+        zhi = zero;
+        d = -d;
+        if d == 0 {
+            zlo = nz.m;
+        } else if d < fbits {
+            zlo = (nz.m >> d) | ((nz.m << (fbits - d)) != zero).cast();
+        } else {
+            zlo = one;
+        }
+    }
+
+    /* add */
+    let mut neg: bool = nx.neg ^ ny.neg;
+    let samesign: bool = neg ^ nz.neg;
+    let mut nonzero: i32 = 1;
+    if samesign {
+        /* r += z */
+        rlo = rlo.wrapping_add(zlo);
+        rhi += zhi + (rlo < zlo).cast();
+    } else {
+        /* r -= z */
+        let (res, borrow) = rlo.overflowing_sub(zlo);
+        rlo = res;
+        rhi = rhi.wrapping_sub(zhi.wrapping_add(borrow.cast()));
+        if (rhi >> (F::BITS - 1)) != zero {
+            rlo = (rlo.signed()).wrapping_neg().unsigned();
+            rhi = (rhi.signed()).wrapping_neg().unsigned() - (rlo != zero).cast();
+            neg = !neg;
+        }
+        nonzero = (rhi != zero) as i32;
+    }
+
+    /* set rhi to top 63bit of the result (last bit is sticky) */
+    if nonzero != 0 {
+        e += fbits;
+        d = rhi.leading_zeros() as i32 - 1;
+        /* note: d > 0 */
+        rhi = (rhi << d) | (rlo >> (fbits - d)) | ((rlo << d) != zero).cast();
+    } else if rlo != zero {
+        d = rlo.leading_zeros() as i32 - 1;
+        if d < 0 {
+            rhi = (rlo >> 1) | (rlo & one);
+        } else {
+            rhi = rlo << d;
+        }
+    } else {
+        /* exact +-0 */
+        return x * y + z;
+    }
+    e -= d;
+
+    /* convert to double */
+    let mut i: F::SignedInt = rhi.signed(); /* i is in [1<<62,(1<<63)-1] */
+    if neg {
+        i = -i;
+    }
+    let mut r: F = i.cast(); /* |r| is in [0x1p62,0x1p63] */
+
+    if e < -1022 - 62 {
+        /* result is subnormal before rounding */
+        if e == -1022 - 63 {
+            let mut c: F = foo::<F>();
+            if neg {
+                c = -c;
+            }
+            if r == c {
+                /* min normal after rounding, underflow depends
+                on arch behaviour which can be imitated by
+                a double to float conversion */
+                // let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32;
+                // return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64;
+                todo!()
+            }
+            /* one bit is lost when scaled, add another top bit to
+            only round once at conversion if it is inexact */
+            if (rhi << (F::SIG_BITS + 1)) != zero {
+                let tmp: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2));
+                i = tmp.signed();
+                if neg {
+                    i = -i;
+                }
+                r = i.cast();
+                r = (F::ONE + F::ONE) * r - c; /* remove top bit */
+
+                /* raise underflow portably, such that it
+                cannot be optimized away */
+                {
+                    // let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r;
+                    // r += (tiny * tiny) * (r - r);
+                    todo!()
+                }
+            }
+        } else {
+            /* only round once when scaled */
+            d = 10;
+            i = (((rhi >> d) | ((rhi << (fbits - d)) != zero).cast()) << d).signed();
+            if neg {
+                i = -i;
+            }
+            r = i.cast();
+        }
+    }
+
+    // todo!()
+    //
+    scbn(r, e)
+}
+
+struct Norm<F: Float> {
+    m: F::Int,
+    e: i32,
+    neg: bool,
+}
+
+impl<F: Float> Norm<F> {
+    fn from_float(x: F) -> Self
+    where
+        F::Int: CastFrom<u32>,
+        u32: CastInto<F::Int>,
+    {
+        let mut ix = x.to_bits();
+        let mut e = x.exp();
+        let neg = x.is_sign_negative();
+        if e.is_zero() {
+            ix = (x * foo::<F>()).to_bits();
+            e = x.exp();
+            e = if e != 0 { e - (F::BITS as i32) } else { 0x800 };
+        }
+        ix &= F::SIG_MASK;
+        ix |= F::IMPLICIT_BIT;
+        ix <<= 1;
+        e -= 0x3ff + 52 + 1;
+
+        Self { m: ix, e, neg }
+    }
+}
+
+// 1p63 magic number
+fn foo<F: Float>() -> F
+where
+    u32: CastInto<F::Int>,
+{
+    F::from_parts(false, (F::BITS - 1).cast(), F::Int::ZERO)
 }
 
 /// FMA implementation when there is a larger float type available.
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
@@ -1,4 +1,5 @@
-use core::{fmt, mem, ops};
+use core::ops::{self, Neg};
+use core::{fmt, mem};
 
 use super::int_traits::{Int, MinInt};
 
@@ -23,10 +24,9 @@ pub trait Float:
     type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
 
     /// A int of the same width as the float
-    type SignedInt: Int + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>;
-
-    /// An int capable of containing the exponent bits plus a sign bit. This is signed.
-    type ExpInt: Int;
+    type SignedInt: Int
+        + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>
+        + Neg<Output = Self::SignedInt>;
 
     const ZERO: Self;
     const NEG_ZERO: Self;
@@ -98,7 +98,7 @@ pub trait Float:
     }
 
     /// Returns the exponent, not adjusting for bias.
-    fn exp(self) -> Self::ExpInt;
+    fn exp(self) -> i32;
 
     /// Returns the significand with no implicit bit (or the "fractional" part)
     fn frac(self) -> Self::Int {
@@ -145,15 +145,13 @@ macro_rules! float_impl {
         $ty:ident,
         $ity:ident,
         $sity:ident,
-        $expty:ident,
         $bits:expr,
         $significand_bits:expr,
         $from_bits:path
     ) => {
         impl Float for $ty {
             type Int = $ity;
             type SignedInt = $sity;
-            type ExpInt = $expty;
 
             const ZERO: Self = 0.0;
             const NEG_ZERO: Self = -0.0;
@@ -190,8 +188,8 @@ macro_rules! float_impl {
             fn is_sign_negative(self) -> bool {
                 self.is_sign_negative()
             }
-            fn exp(self) -> Self::ExpInt {
-                ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt
+            fn exp(self) -> i32 {
+                ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as i32
             }
             fn from_bits(a: Self::Int) -> Self {
                 Self::from_bits(a)
@@ -225,11 +223,11 @@ macro_rules! float_impl {
 }
 
 #[cfg(f16_enabled)]
-float_impl!(f16, u16, i16, i8, 16, 10, f16::from_bits);
-float_impl!(f32, u32, i32, i16, 32, 23, f32_from_bits);
-float_impl!(f64, u64, i64, i16, 64, 52, f64_from_bits);
+float_impl!(f16, u16, i16, 16, 10, f16::from_bits);
+float_impl!(f32, u32, i32, 32, 23, f32_from_bits);
+float_impl!(f64, u64, i64, 64, 52, f64_from_bits);
 #[cfg(f128_enabled)]
-float_impl!(f128, u128, i128, i16, 128, 112, f128::from_bits);
+float_impl!(f128, u128, i128, 128, 112, f128::from_bits);
 
 /* FIXME(msrv): vendor some things that are not const stable at our MSRV */
 
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
@@ -82,6 +82,7 @@ pub trait Int:
     fn wrapping_shr(self, other: u32) -> Self;
     fn rotate_left(self, other: u32) -> Self;
     fn overflowing_add(self, other: Self) -> (Self, bool);
+    fn overflowing_sub(self, other: Self) -> (Self, bool);
     fn leading_zeros(self) -> u32;
     fn ilog2(self) -> u32;
 }
@@ -140,6 +141,10 @@ macro_rules! int_impl_common {
             <Self>::overflowing_add(self, other)
         }
 
+        fn overflowing_sub(self, other: Self) -> (Self, bool) {
+            <Self>::overflowing_sub(self, other)
+        }
+
         fn leading_zeros(self) -> u32 {
             <Self>::leading_zeros(self)
         }
@@ -383,9 +388,16 @@ cast_into!(i64);
 cast_into!(u128);
 cast_into!(i128);
 
+cast_into!(i64; f32);
+cast_into!(i64; f64);
 cast_into!(f32; f64);
 cast_into!(f64; f32);
 
+cast_into!(bool; u16);
+cast_into!(bool; u32);
+cast_into!(bool; u64);
+cast_into!(bool; u128);
+
 cfg_if! {
     if #[cfg(f16_enabled)] {
         cast_into!(f16; f32, f64);

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,6 @@`
	`1`	`+#[expect(unused)]`
`1`	`2`	`#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]`
`2`	`3`	`pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {`
`3`		`- super::generic::fma(x, y, z)`
	`4`	`+ // super::generic::fma(x, y, z)`
	`5`	`+ todo!()`
`4`	`6`	`}`