Merge pull request #622 from tgross35/f128-div

tgross35 · web-flow · commit e86ef273e85a · 2024-09-24T18:40:16.000+02:00
Add `__divtf3`
diff --git a/README.md b/README.md
@@ -222,7 +222,7 @@ of being added to Rust.
 
 - [x] addtf3.c
 - [x] comparetf2.c
-- [ ] divtf3.c
+- [x] divtf3.c
 - [x] extenddftf2.c
 - [x] extendhfsf2.c
 - [x] extendhftf2.c
diff --git a/build.rs b/build.rs
@@ -526,7 +526,6 @@ mod c {
                 ("__floatsitf", "floatsitf.c"),
                 ("__floatunditf", "floatunditf.c"),
                 ("__floatunsitf", "floatunsitf.c"),
-                ("__divtf3", "divtf3.c"),
                 ("__powitf2", "powitf2.c"),
                 ("__fe_getround", "fp_mode.c"),
                 ("__fe_raise_inexact", "fp_mode.c"),
diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs
@@ -256,6 +256,10 @@ mod intrinsics {
         a * b
     }
 
+    pub fn divtf(a: f128, b: f128) -> f128 {
+        a / b
+    }
+
     pub fn subtf(a: f128, b: f128) -> f128 {
         a - b
     }
@@ -440,6 +444,7 @@ fn run() {
     bb(aeabi_uldivmod(bb(2), bb(3)));
     bb(ashlti3(bb(2), bb(2)));
     bb(ashrti3(bb(2), bb(2)));
+    bb(divtf(bb(2.), bb(2.)));
     bb(divti3(bb(2), bb(2)));
     bb(eqtf(bb(2.), bb(2.)));
     bb(extendhfdf(bb(2.)));
diff --git a/src/float/div.rs b/src/float/div.rs
diff --git a/src/float/mod.rs b/src/float/mod.rs
@@ -31,10 +31,10 @@ pub(crate) trait Float:
     + ops::Rem<Output = Self>
 {
     /// A uint of the same width as the float
-    type Int: Int;
+    type Int: Int<OtherSign = Self::SignedInt, UnsignedInt = Self::Int>;
 
     /// A int of the same width as the float
-    type SignedInt: Int;
+    type SignedInt: Int + MinInt<OtherSign = Self::Int, UnsignedInt = Self::Int>;
 
     /// An int capable of containing the exponent bits plus a sign bit. This is signed.
     type ExpInt: Int;
@@ -51,7 +51,7 @@ pub(crate) trait Float:
     /// The bitwidth of the exponent
     const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
 
-    /// The maximum value of the exponent
+    /// The saturated value of the exponent (infinite representation), in the rightmost postiion.
     const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
 
     /// The exponent bias value
@@ -83,7 +83,7 @@ pub(crate) trait Float:
     /// Returns true if the sign is negative
     fn is_sign_negative(self) -> bool;
 
-    /// Returns the exponent with bias
+    /// Returns the exponent, not adjusting for bias.
     fn exp(self) -> Self::ExpInt;
 
     /// Returns the significand with no implicit bit (or the "fractional" part)
@@ -175,7 +175,7 @@ macro_rules! float_impl {
             fn normalize(significand: Self::Int) -> (i32, Self::Int) {
                 let shift = significand
                     .leading_zeros()
-                    .wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros());
+                    .wrapping_sub(Self::EXPONENT_BITS);
                 (
                     1i32.wrapping_sub(shift as i32),
                     significand << shift as Self::Int,
diff --git a/src/int/big.rs b/src/int/big.rs
@@ -93,7 +93,7 @@ macro_rules! impl_common {
             type Output = Self;
 
             fn shl(self, rhs: u32) -> Self::Output {
-                todo!()
+                unimplemented!("only used to meet trait bounds")
             }
         }
     };
@@ -102,6 +102,41 @@ macro_rules! impl_common {
 impl_common!(i256);
 impl_common!(u256);
 
+impl ops::Shr<u32> for u256 {
+    type Output = Self;
+
+    fn shr(self, rhs: u32) -> Self::Output {
+        assert!(rhs < Self::BITS, "attempted to shift right with overflow");
+
+        if rhs == 0 {
+            return self;
+        }
+
+        let mut ret = self;
+        let byte_shift = rhs / 64;
+        let bit_shift = rhs % 64;
+
+        for idx in 0..4 {
+            let base_idx = idx + byte_shift as usize;
+
+            let Some(base) = ret.0.get(base_idx) else {
+                ret.0[idx] = 0;
+                continue;
+            };
+
+            let mut new_val = base >> bit_shift;
+
+            if let Some(new) = ret.0.get(base_idx + 1) {
+                new_val |= new.overflowing_shl(64 - bit_shift).0;
+            }
+
+            ret.0[idx] = new_val;
+        }
+
+        ret
+    }
+}
+
 macro_rules! word {
     (1, $val:expr) => {
         (($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64
diff --git a/testcrate/benches/float_div.rs b/testcrate/benches/float_div.rs
@@ -1,5 +1,7 @@
+#![cfg_attr(f128_enabled, feature(f128))]
+
 use compiler_builtins::float::div;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{criterion_main, Criterion};
 use testcrate::float_bench;
 
 float_bench! {
@@ -64,5 +66,28 @@ float_bench! {
     ],
 }
 
-criterion_group!(float_div, div_f32, div_f64);
+#[cfg(f128_enabled)]
+float_bench! {
+    name: div_f128,
+    sig: (a: f128, b: f128) -> f128,
+    crate_fn: div::__divtf3,
+    crate_fn_ppc: div::__divkf3,
+    sys_fn: __divtf3,
+    sys_fn_ppc: __divkf3,
+    sys_available: not(feature = "no-sys-f128"),
+    asm: []
+}
+
+pub fn float_div() {
+    let mut criterion = Criterion::default().configure_from_args();
+
+    div_f32(&mut criterion);
+    div_f64(&mut criterion);
+
+    #[cfg(f128_enabled)]
+    {
+        div_f128(&mut criterion);
+    }
+}
+
 criterion_main!(float_div);
diff --git a/testcrate/src/bench.rs b/testcrate/src/bench.rs
@@ -30,13 +30,14 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
 
     // FIXME(f16_f128): system symbols have incorrect results
     // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
-    const X86_NO_SSE_SKIPPED: &[&str] =
-        &["add_f128", "sub_f128", "mul_f128", "powi_f32", "powi_f64"];
+    const X86_NO_SSE_SKIPPED: &[&str] = &[
+        "add_f128", "sub_f128", "mul_f128", "div_f128", "powi_f32", "powi_f64",
+    ];
 
     // FIXME(f16_f128): Wide multiply carry bug in `compiler-rt`, re-enable when nightly no longer
     // uses `compiler-rt` version.
     // <https://github.com/llvm/llvm-project/issues/91840>
-    const AARCH64_SKIPPED: &[&str] = &["mul_f128"];
+    const AARCH64_SKIPPED: &[&str] = &["mul_f128", "div_f128"];
 
     // FIXME(llvm): system symbols have incorrect results on Windows
     // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2121359807>
diff --git a/testcrate/tests/big.rs b/testcrate/tests/big.rs
@@ -59,3 +59,76 @@ fn widen_mul_u128() {
     }
     assert!(errors.is_empty());
 }
+
+#[test]
+fn not_u128() {
+    assert_eq!(!u256::ZERO, u256::MAX);
+}
+
+#[test]
+fn shr_u128() {
+    let only_low = [
+        1,
+        u16::MAX.into(),
+        u32::MAX.into(),
+        u64::MAX.into(),
+        u128::MAX,
+    ];
+
+    let mut errors = Vec::new();
+
+    for a in only_low {
+        for perturb in 0..10 {
+            let a = a.saturating_add(perturb);
+            for shift in 0..128 {
+                let res = a.widen() >> shift;
+                let expected = (a >> shift).widen();
+                if res != expected {
+                    errors.push((a.widen(), shift, res, expected));
+                }
+            }
+        }
+    }
+
+    let check = [
+        (
+            u256::MAX,
+            1,
+            u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1]),
+        ),
+        (
+            u256::MAX,
+            5,
+            u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5]),
+        ),
+        (u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])),
+        (u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])),
+        (u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])),
+        (u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])),
+        (u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])),
+        (u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])),
+        (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
+        (u256::MAX, 192, u256([u64::MAX, 0, 0, 0])),
+        (u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])),
+        (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
+        (u256::MAX, 254, u256([0b11, 0, 0, 0])),
+        (u256::MAX, 255, u256([1, 0, 0, 0])),
+    ];
+
+    for (input, shift, expected) in check {
+        let res = input >> shift;
+        if res != expected {
+            errors.push((input, shift, res, expected));
+        }
+    }
+
+    for (a, b, res, expected) in &errors {
+        eprintln!(
+            "FAILURE: {} >> {b} = {} got {}",
+            hexu(*a),
+            hexu(*expected),
+            hexu(*res),
+        );
+    }
+    assert!(errors.is_empty());
+}
diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs
@@ -1,3 +1,4 @@
+#![feature(f128)]
 #![allow(unused_macros)]
 
 use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4};
@@ -115,7 +116,13 @@ macro_rules! float {
                 fuzz_float_2(N, |x: $f, y: $f| {
                     let quo0: $f = apfloat_fallback!($f, $apfloat_ty, $sys_available, Div::div, x, y);
                     let quo1: $f = $fn(x, y);
-                    #[cfg(not(target_arch = "arm"))]
+
+                    // ARM SIMD instructions always flush subnormals to zero
+                    if cfg!(target_arch = "arm") &&
+                        ((Float::is_subnormal(quo0)) || Float::is_subnormal(quo1)) {
+                        return;
+                    }
+
                     if !Float::eq_repr(quo0, quo1) {
                         panic!(
                             "{}({:?}, {:?}): std: {:?}, builtins: {:?}",
@@ -126,21 +133,6 @@ macro_rules! float {
                             quo1
                         );
                     }
-
-                    // ARM SIMD instructions always flush subnormals to zero
-                    #[cfg(target_arch = "arm")]
-                    if !(Float::is_subnormal(quo0) || Float::is_subnormal(quo1)) {
-                        if !Float::eq_repr(quo0, quo1) {
-                            panic!(
-                                "{}({:?}, {:?}): std: {:?}, builtins: {:?}",
-                                stringify!($fn),
-                                x,
-                                y,
-                                quo0,
-                                quo1
-                            );
-                        }
-                    }
                 });
             }
         )*
@@ -155,4 +147,19 @@ mod float_div {
         f32, __divsf3, Single, all();
         f64, __divdf3, Double, all();
     }
+
+    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
+    float! {
+        f128, __divtf3, Quad,
+        // FIXME(llvm): there is a bug in LLVM rt.
+        // See <https://github.com/llvm/llvm-project/issues/91840>.
+        not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux")));
+    }
+
+    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
+    float! {
+        f128, __divkf3, Quad, not(feature = "no-sys-f128");
+    }
 }