WIP f16 fma

tgross35 · tgross35 · commit 9ee41374ebd5 · 2025-01-11T20:56:40.000Z
diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
@@ -92,6 +92,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         None,
         &["copysignf128"],
     ),
+    (
+        // `(f16, f16, f16) -> f16`
+        FloatTy::F16,
+        Signature { args: &[Ty::F16, Ty::F16, Ty::F16], returns: &[Ty::F16] },
+        None,
+        &["fmaf16"],
+    ),
     (
         // `(f32, f32, f32) -> f32`
         FloatTy::F32,
diff --git a/crates/libm-test/src/f8_impl.rs b/crates/libm-test/src/f8_impl.rs
@@ -20,7 +20,6 @@ pub struct f8(u8);
 impl Float for f8 {
     type Int = u8;
     type SignedInt = i8;
-    type ExpInt = i8;
 
     const ZERO: Self = Self(0b0_0000_000);
     const NEG_ZERO: Self = Self(0b1_0000_000);
@@ -62,8 +61,8 @@ impl Float for f8 {
         self.0 & Self::SIGN_MASK != 0
     }
 
-    fn exp(self) -> Self::ExpInt {
-        unimplemented!()
+    fn exp(self) -> i32 {
+        ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as i32
     }
 
     fn from_bits(a: Self::Int) -> Self {
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
@@ -147,7 +147,7 @@ libm_macros::for_each_function! {
         expm1 | expm1f => exp_m1,
         fabs | fabsf => abs,
         fdim | fdimf => positive_diff,
-        fma | fmaf => mul_add,
+        fma | fmaf | fmaf16 => mul_add,
         fmax | fmaxf => max,
         fmin | fminf => min,
         lgamma | lgammaf => ln_gamma,
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
@@ -485,7 +485,12 @@ fn bessel_prec_dropoff<F: Float>(
     None
 }
 
+#[cfg(f16_enabled)]
+impl MaybeOverride<(f16, f16, f16)> for SpecialCase {}
 impl MaybeOverride<(f32, f32, f32)> for SpecialCase {}
 impl MaybeOverride<(f64, f64, f64)> for SpecialCase {}
+#[cfg(f128_enabled)]
+impl MaybeOverride<(f128, f128, f128)> for SpecialCase {}
+
 impl MaybeOverride<(f32, i32)> for SpecialCase {}
 impl MaybeOverride<(f64, i32)> for SpecialCase {}
diff --git a/crates/libm-test/tests/multiprecision.rs b/crates/libm-test/tests/multiprecision.rs
@@ -122,6 +122,7 @@ libm_macros::for_each_function! {
         fdimf,
         fma,
         fmaf,
+        fmaf16,
         fmax,
         fmaxf,
         fmin,
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
@@ -328,6 +328,12 @@
         ],
         "type": "f32"
     },
+    "fmaf16": {
+        "sources": [
+            "src/math/fmaf16.rs"
+        ],
+        "type": "f16"
+    },
     "fmax": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
@@ -47,6 +47,7 @@ floor
 floorf
 fma
 fmaf
+fmaf16
 fmax
 fmaxf
 fmin
diff --git a/src/math/fmaf.rs b/src/math/fmaf.rs
@@ -47,6 +47,10 @@ use super::fenv::{
 /// according to the rounding mode characterized by the value of FLT_ROUNDS.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 {
+    if true {
+        return super::generic::fma_big::<f32, f64>(x, y, z);
+    }
+
     let xy: f64;
     let mut result: f64;
     let mut ui: u64;
diff --git a/src/math/fmaf16.rs b/src/math/fmaf16.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaf16(x: f16, y: f16, z: f16) -> f16 {
+    super::generic::fma_big::<f16, f32>(x, y, z)
+}
diff --git a/src/math/generic/fma.rs b/src/math/generic/fma.rs
@@ -0,0 +1,53 @@
+use super::super::fenv::{
+    FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept,
+};
+use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, IntTy, MinInt};
+
+/// FMA implementation when there is a larger float type available.
+pub fn fma_big<F, B>(x: F, y: F, z: F) -> F
+where
+    F: Float + HFloat<D = B>,
+    B: Float + DFloat<H = F>,
+    // F: Float + CastInto<B>,
+    // B: Float + CastInto<F> + CastFrom<F>,
+    B::Int: CastInto<i32>,
+    i32: CastFrom<i32>,
+{
+    let one = IntTy::<B>::ONE;
+
+    let xy: B;
+    let mut result: B;
+    let mut ui: B::Int;
+    let e: i32;
+
+    xy = x.widen() * y.widen();
+    result = xy + z.widen();
+    ui = result.to_bits();
+    e = i32::cast_from(ui >> F::SIG_BITS) & F::EXP_MAX as i32;
+    let zb: B = z.widen();
+
+    let prec_diff = B::SIG_BITS - F::SIG_BITS;
+    let excess_prec = ui & ((one << prec_diff) - one);
+    let x = one << (prec_diff - 1);
+
+    // Common case: the larger precision is fine
+    if excess_prec != x
+        || e == i32::cast_from(F::EXP_MAX)
+        || (result - xy == zb && result - zb == xy)
+        || fegetround() != FE_TONEAREST
+    {
+        // TODO: feclearexcept
+
+        return result.narrow();
+    }
+
+    let neg = ui & B::SIGN_MASK > IntTy::<B>::ZERO;
+    let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy };
+    if neg == (err < B::ZERO) {
+        ui += one;
+    } else {
+        ui -= one;
+    }
+
+    B::from_bits(ui).narrow()
+}
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
@@ -1,5 +1,7 @@
 mod copysign;
 mod fabs;
+mod fma;
 
 pub use copysign::copysign;
 pub use fabs::fabs;
+pub use fma::fma_big;
diff --git a/src/math/mod.rs b/src/math/mod.rs
@@ -121,7 +121,7 @@ use self::rem_pio2::rem_pio2;
 use self::rem_pio2_large::rem_pio2_large;
 use self::rem_pio2f::rem_pio2f;
 #[allow(unused_imports)]
-use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt};
+use self::support::{CastFrom, CastInto, DFloat, DInt, Float, HFloat, HInt, Int, IntTy, MinInt};
 
 // Public modules
 mod acos;
@@ -343,9 +343,11 @@ cfg_if! {
     if #[cfg(f16_enabled)] {
         mod copysignf16;
         mod fabsf16;
+        mod fmaf16;
 
         pub use self::copysignf16::copysignf16;
         pub use self::fabsf16::fabsf16;
+        pub use self::fmaf16::fmaf16;
     }
 }
 
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
@@ -1,4 +1,5 @@
-use core::{fmt, mem, ops};
+use core::ops::{self, Neg};
+use core::{fmt, mem};
 
 use super::int_traits::{Int, MinInt};
 
@@ -23,10 +24,9 @@ pub trait Float:
     type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
 
     /// A int of the same width as the float
-    type SignedInt: Int + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>;
-
-    /// An int capable of containing the exponent bits plus a sign bit. This is signed.
-    type ExpInt: Int;
+    type SignedInt: Int
+        + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>
+        + Neg<Output = Self::SignedInt>;
 
     const ZERO: Self;
     const NEG_ZERO: Self;
@@ -98,7 +98,7 @@ pub trait Float:
     }
 
     /// Returns the exponent, not adjusting for bias.
-    fn exp(self) -> Self::ExpInt;
+    fn exp(self) -> i32;
 
     /// Returns the significand with no implicit bit (or the "fractional" part)
     fn frac(self) -> Self::Int {
@@ -138,23 +138,20 @@ pub trait Float:
 }
 
 /// Access the associated `Int` type from a float (helper to avoid ambiguous associated types).
-#[allow(dead_code)]
 pub type IntTy<F> = <F as Float>::Int;
 
 macro_rules! float_impl {
     (
         $ty:ident,
         $ity:ident,
         $sity:ident,
-        $expty:ident,
         $bits:expr,
         $significand_bits:expr,
         $from_bits:path
     ) => {
         impl Float for $ty {
             type Int = $ity;
             type SignedInt = $sity;
-            type ExpInt = $expty;
 
             const ZERO: Self = 0.0;
             const NEG_ZERO: Self = -0.0;
@@ -191,8 +188,8 @@ macro_rules! float_impl {
             fn is_sign_negative(self) -> bool {
                 self.is_sign_negative()
             }
-            fn exp(self) -> Self::ExpInt {
-                ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt
+            fn exp(self) -> i32 {
+                ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as i32
             }
             fn from_bits(a: Self::Int) -> Self {
                 Self::from_bits(a)
@@ -226,11 +223,11 @@ macro_rules! float_impl {
 }
 
 #[cfg(f16_enabled)]
-float_impl!(f16, u16, i16, i8, 16, 10, f16::from_bits);
-float_impl!(f32, u32, i32, i16, 32, 23, f32_from_bits);
-float_impl!(f64, u64, i64, i16, 64, 52, f64_from_bits);
+float_impl!(f16, u16, i16, 16, 10, f16::from_bits);
+float_impl!(f32, u32, i32, 32, 23, f32_from_bits);
+float_impl!(f64, u64, i64, 64, 52, f64_from_bits);
 #[cfg(f128_enabled)]
-float_impl!(f128, u128, i128, i16, 128, 112, f128::from_bits);
+float_impl!(f128, u128, i128, 128, 112, f128::from_bits);
 
 /* FIXME(msrv): vendor some things that are not const stable at our MSRV */
 
@@ -245,3 +242,63 @@ pub const fn f64_from_bits(bits: u64) -> f64 {
     // SAFETY: POD cast with no preconditions
     unsafe { mem::transmute::<u64, f64>(bits) }
 }
+
+/// Trait for floats twice the bit width of another integer.
+#[allow(unused)]
+pub trait DFloat: Float {
+    /// Float that is half the bit width of the floatthis trait is implemented for.
+    type H: HFloat<D = Self>;
+
+    /// Narrow the float type.
+    fn narrow(self) -> Self::H;
+}
+
+/// Trait for floats half the bit width of another float.
+#[allow(unused)]
+pub trait HFloat: Float {
+    /// Float that is double the bit width of the float this trait is implemented for.
+    type D: DFloat<H = Self>;
+
+    /// Widen the float type.
+    fn widen(self) -> Self::D;
+}
+
+macro_rules! impl_d_float {
+    ($($X:ident $D:ident),*) => {
+        $(
+            impl DFloat for $D {
+                type H = $X;
+
+                fn narrow(self) -> Self::H {
+                    self as $X
+                }
+            }
+        )*
+    };
+}
+
+macro_rules! impl_h_float {
+    ($($H:ident $X:ident),*) => {
+        $(
+            impl HFloat for $H {
+                type D = $X;
+
+                fn widen(self) -> Self::D {
+                    self as $X
+                }
+            }
+        )*
+    };
+}
+
+impl_d_float!(f32 f64);
+#[cfg(f16_enabled)]
+impl_d_float!(f16 f32);
+#[cfg(f128_enabled)]
+impl_d_float!(f64 f128);
+
+impl_h_float!(f32 f64);
+#[cfg(f16_enabled)]
+impl_h_float!(f16 f32);
+#[cfg(f128_enabled)]
+impl_h_float!(f64 f128);
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
@@ -82,6 +82,7 @@ pub trait Int:
     fn wrapping_shr(self, other: u32) -> Self;
     fn rotate_left(self, other: u32) -> Self;
     fn overflowing_add(self, other: Self) -> (Self, bool);
+    fn overflowing_sub(self, other: Self) -> (Self, bool);
     fn leading_zeros(self) -> u32;
     fn ilog2(self) -> u32;
 }
@@ -140,6 +141,10 @@ macro_rules! int_impl_common {
             <Self>::overflowing_add(self, other)
         }
 
+        fn overflowing_sub(self, other: Self) -> (Self, bool) {
+            <Self>::overflowing_sub(self, other)
+        }
+
         fn leading_zeros(self) -> u32 {
             <Self>::leading_zeros(self)
         }
@@ -382,3 +387,29 @@ cast_into!(u64);
 cast_into!(i64);
 cast_into!(u128);
 cast_into!(i128);
+
+cast_into!(i64; f32);
+cast_into!(i64; f64);
+cast_into!(f32; f64);
+cast_into!(f64; f32);
+
+cast_into!(bool; u16);
+cast_into!(bool; u32);
+cast_into!(bool; u64);
+cast_into!(bool; u128);
+
+cfg_if! {
+    if #[cfg(f16_enabled)] {
+        cast_into!(f16; f32, f64);
+        cast_into!(f32; f16);
+        cast_into!(f64; f16);
+    }
+}
+
+cfg_if! {
+    if #[cfg(f128_enabled)] {
+        cast_into!(f128; f32, f64);
+        cast_into!(f32; f128);
+        cast_into!(f64; f128);
+    }
+}
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
@@ -5,7 +5,7 @@ mod hex_float;
 mod int_traits;
 
 #[allow(unused_imports)]
-pub use float_traits::{Float, IntTy};
+pub use float_traits::{DFloat, Float, HFloat, IntTy};
 pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
 #[allow(unused_imports)]
 pub use hex_float::{hf32, hf64};

-Original file line number
+Diff line change
         fdimf,
         fma,
         fmaf,
 +        fmaf16,
         fmax,
         fmaxf,
         fmin,