rust-lang · tesuji · May 27, 2019 · Jul 7, 2019 · Jul 7, 2019 · May 28, 2019
diff --git a/src/math/cbrtf.rs b/src/math/cbrtf.rs
@@ -1,8 +1,7 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */
-/*
+/* sf_cbrt.c -- float version of s_cbrt.c.
  * Conversion to float by Ian Lance Taylor, Cygnus Support, [email protected].
- * Debugged and optimized by Bruce D. Evans.
  */
+
 /*
  * ====================================================
  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
@@ -12,65 +11,52 @@
  * software is freely granted, provided that this notice
  * is preserved.
  * ====================================================
+ *
  */
-/* cbrtf(x)
- * Return cube root of x
- */
 
-use core::f32;
+use super::fdlibm::{FLT_UWORD_IS_FINITE, FLT_UWORD_IS_SUBNORMAL, FLT_UWORD_IS_ZERO};
+
+const B1: u32 = 709_958_130; /* B1 = (84+2/3-0.03306235651)*2**23 */
+const B2: u32 = 642_849_266; /* B2 = (76+2/3-0.03306235651)*2**23 */
 
-const B1: u32 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */
-const B2: u32 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
+const C: f32 = 5.428_571_701_0e-01; /* 19/35        = 0x3f0a_f8b0 */
+const D: f32 = -7.053_061_127_7e-01; /* -864/1225   = 0xbf34_8ef1 */
+const E: f32 = 1.414_285_659_8e+00; /* 99/70        = 0x3fb5_0750 */
+const F: f32 = 1.607_142_806_1e+00; /* 45/28        = 0x3fcd_b6db */
+const G: f32 = 3.571_428_656_6e-01; /* 5/14         = 0x3eb6_db6e */
 
 /// Cube root (f32)
 ///
 /// Computes the cube root of the argument.
 #[inline]
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn cbrtf(x: f32) -> f32 {
-    let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24
+pub extern "C" fn cbrtf(x: f32) -> f32 {
+    let hx: u32 = x.to_bits() & 0x7fff_ffff; /* |x| */
+    let sign: u32 = x.to_bits() & 0x8000_0000; /* sign = sign(x) */
 
-    let mut r: f64;
-    let mut t: f64;
-    let mut ui: u32 = x.to_bits();
-    let mut hx: u32 = ui & 0x7fffffff;
-
-    if hx >= 0x7f800000 {
-        /* cbrt(NaN,INF) is itself */
-        return x + x;
+    if !FLT_UWORD_IS_FINITE(hx) {
+        return x + x; /* cbrt(NaN,INF) is itself */
+    }
+    if FLT_UWORD_IS_ZERO(hx) {
+        return x; /* cbrt(0) is itself */
     }
 
+    let x = f32::from_bits(hx); /* x <- |x| */
     /* rough cbrt to 5 bits */
-    if hx < 0x00800000 {
-        /* zero or subnormal? */
-        if hx == 0 {
-            return x; /* cbrt(+-0) is itself */
-        }
-        ui = (x * x1p24).to_bits();
-        hx = ui & 0x7fffffff;
-        hx = hx / 3 + B2;
+    let mut t: f32 = if FLT_UWORD_IS_SUBNORMAL(hx) {
+        /* subnormal number */
+        const X1P24: f32 = 16777216f32; // 2 ** 24
+        let high: u32 = (x * X1P24).to_bits();
+        f32::from_bits((high / 3).wrapping_add(B2))
     } else {
-        hx = hx / 3 + B1;
-    }
-    ui &= 0x80000000;
-    ui |= hx;
-
-    /*
-     * First step Newton iteration (solving t*t-x/t == 0) to 16 bits.  In
-     * double precision so that its terms can be arranged for efficiency
-     * without causing overflow or underflow.
-     */
-    t = f32::from_bits(ui) as f64;
-    r = t * t * t;
-    t = t * (x as f64 + x as f64 + r) / (x as f64 + r + r);
+        f32::from_bits((hx / 3).wrapping_add(B1))
+    };
 
-    /*
-     * Second step Newton iteration to 47 bits.  In double precision for
-     * efficiency and accuracy.
-     */
-    r = t * t * t;
-    t = t * (x as f64 + x as f64 + r) / (x as f64 + r + r);
+    /* new cbrt to 23 bits */
+    let r: f32 = t * t / x;
+    let s: f32 = C + r * t;
+    t *= G + F / (s + E + D / s);
 
-    /* rounding to 24 bits is perfect in round-to-nearest mode */
-    t as f32
+    /* restore the sign bit */
+    f32::from_bits(t.to_bits() | sign)
 }
diff --git a/src/math/mod.rs b/src/math/mod.rs
@@ -344,3 +344,93 @@ fn with_set_low_word(f: f64, lo: u32) -> f64 {
 fn combine_words(hi: u32, lo: u32) -> f64 {
     f64::from_bits((hi as u64) << 32 | lo as u64)
 }
+
+mod fdlibm {
+    #![allow(dead_code)] // FIXME: remove it after other dependent codes merged
+    #![allow(non_snake_case)]
+
+    /* @(#)fdlibm.h 5.1 93/09/24 */
+    /*
+     * ====================================================
+     * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+     *
+     * Developed at SunPro, a Sun Microsystems, Inc. business.
+     * Permission to use, copy, modify, and distribute this
+     * software is freely granted, provided that this notice
+     * is preserved.
+     * ====================================================
+     */
+
+    /* Most routines need to check whether a float is finite, infinite, or not a
+    number, and many need to know whether the result of an operation will
+    overflow.  These conditions depend on whether the largest exponent is
+    used for NaNs & infinities, or whether it's used for finite numbers. */
+
+    /// True if a positive float with bitmask X is finite.
+    #[inline]
+    pub(crate) fn FLT_UWORD_IS_FINITE(x: u32) -> bool {
+        x < INFINITE
+    }
+
+    /// True if a positive float with bitmask X is not a number.
+    #[inline]
+    pub(crate) fn FLT_UWORD_IS_NAN(x: u32) -> bool {
+        x > INFINITE
+    }
+
+    /// True if a positive float with bitmask X is +infinity.
+    #[inline]
+    pub(crate) fn FLT_UWORD_IS_INFINITE(x: u32) -> bool {
+        x == INFINITE
+    }
+
+    const INFINITE: u32 = 0x7f80_0000;
+
+    /// The bitmask of FLT_MAX.
+    pub(crate) const FLT_UWORD_MAX: u32 = 0x7f7f_ffff;
+    /// The bitmask of FLT_MAX/2.
+    pub(crate) const FLT_UWORD_HALF_MAX: u32 = FLT_UWORD_MAX - (1 << 23);
+    /// The bitmask of the largest finite exponent (129 if the largest
+    /// exponent is used for finite numbers, 128 otherwise).
+    pub(crate) const FLT_UWORD_EXP_MAX: u32 = 0x4300_0000;
+    /// The bitmask of log(FLT_MAX), rounded down.  This value is the largest
+    /// input that can be passed to exp() without producing overflow.
+    pub(crate) const FLT_UWORD_LOG_MAX: u32 = 0x42b1_7217;
+    /// The bitmask of log(2*FLT_MAX), rounded down.  This value is the
+    /// largest input than can be passed to cosh() without producing
+    /// overflow.
+    pub(crate) const FLT_UWORD_LOG_2MAX: u32 = 0x42b2_d4fc;
+    /// The largest biased exponent that can be used for finite numbers
+    /// (255 if the largest exponent is used for finite numbers, 254
+    /// otherwise)
+    pub(crate) const FLT_LARGEST_EXP: u32 = FLT_UWORD_MAX >> 23;
+    pub(crate) const HUGE: f32 = 3.402_823_466_385_288_60e+38;
+
+    /* Many routines check for zero and subnormal numbers.  Such things depend
+    on whether the target supports denormals or not */
+
+    /// True if a positive float with bitmask X is +0.  Without denormals,
+    /// any float with a zero exponent is a +0 representation.  With
+    /// denormals, the only +0 representation is a 0 bitmask.
+    #[inline]
+    pub(crate) fn FLT_UWORD_IS_ZERO(x: u32) -> bool {
+        x == 0
+    }
+
+    /// True if a non-zero positive float with bitmask X is subnormal.
+    /// (Routines should check for zeros first.)
+    #[inline]
+    pub(crate) fn FLT_UWORD_IS_SUBNORMAL(x: u32) -> bool {
+        x < 0x0080_0000
+    }
+
+    /// The bitmask of the smallest float above +0.  Call this number REAL_FLT_MIN...
+    pub(crate) const FLT_UWORD_MIN: u32 = 0x0000_0001;
+    /// The bitmask of the float representation of REAL_FLT_MIN's exponent.
+    pub(crate) const FLT_UWORD_EXP_MIN: u32 = 0x4316_0000;
+    /// The bitmask of |log(REAL_FLT_MIN)|, rounding down.
+    pub(crate) const FLT_UWORD_LOG_MIN: u32 = 0x42cf_f1b5;
+    /// REAL_FLT_MIN's exponent - EXP_BIAS (1 if denormals are not supported,
+    /// -22 if they are).
+    pub(crate) const FLT_SMALLEST_EXP: i32 = -22;
+}