diff --git a/src/math/cbrtf.rs b/src/math/cbrtf.rs index 6e589c099..a8a426e54 100644 --- a/src/math/cbrtf.rs +++ b/src/math/cbrtf.rs @@ -1,8 +1,7 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */ -/* +/* sf_cbrt.c -- float version of s_cbrt.c. * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - * Debugged and optimized by Bruce D. Evans. */ + /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. @@ -12,65 +11,52 @@ * software is freely granted, provided that this notice * is preserved. * ==================================================== + * */ -/* cbrtf(x) - * Return cube root of x - */ -use core::f32; +use super::fdlibm::{FLT_UWORD_IS_FINITE, FLT_UWORD_IS_SUBNORMAL, FLT_UWORD_IS_ZERO}; + +const B1: u32 = 709_958_130; /* B1 = (84+2/3-0.03306235651)*2**23 */ +const B2: u32 = 642_849_266; /* B2 = (76+2/3-0.03306235651)*2**23 */ -const B1: u32 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */ -const B2: u32 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */ +const C: f32 = 5.428_571_701_0e-01; /* 19/35 = 0x3f0a_f8b0 */ +const D: f32 = -7.053_061_127_7e-01; /* -864/1225 = 0xbf34_8ef1 */ +const E: f32 = 1.414_285_659_8e+00; /* 99/70 = 0x3fb5_0750 */ +const F: f32 = 1.607_142_806_1e+00; /* 45/28 = 0x3fcd_b6db */ +const G: f32 = 3.571_428_656_6e-01; /* 5/14 = 0x3eb6_db6e */ /// Cube root (f32) /// /// Computes the cube root of the argument. #[inline] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn cbrtf(x: f32) -> f32 { - let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 +pub extern "C" fn cbrtf(x: f32) -> f32 { + let hx: u32 = x.to_bits() & 0x7fff_ffff; /* |x| */ + let sign: u32 = x.to_bits() & 0x8000_0000; /* sign = sign(x) */ - let mut r: f64; - let mut t: f64; - let mut ui: u32 = x.to_bits(); - let mut hx: u32 = ui & 0x7fffffff; - - if hx >= 0x7f800000 { - /* cbrt(NaN,INF) is itself */ - return x + x; + if !FLT_UWORD_IS_FINITE(hx) { + return x + x; /* cbrt(NaN,INF) is itself */ + } + if FLT_UWORD_IS_ZERO(hx) { + return x; /* cbrt(0) is itself */ } + let x = f32::from_bits(hx); /* x <- |x| */ /* rough cbrt to 5 bits */ - if hx < 0x00800000 { - /* zero or subnormal? */ - if hx == 0 { - return x; /* cbrt(+-0) is itself */ - } - ui = (x * x1p24).to_bits(); - hx = ui & 0x7fffffff; - hx = hx / 3 + B2; + let mut t: f32 = if FLT_UWORD_IS_SUBNORMAL(hx) { + /* subnormal number */ + const X1P24: f32 = 16777216f32; // 2 ** 24 + let high: u32 = (x * X1P24).to_bits(); + f32::from_bits((high / 3).wrapping_add(B2)) } else { - hx = hx / 3 + B1; - } - ui &= 0x80000000; - ui |= hx; - - /* - * First step Newton iteration (solving t*t-x/t == 0) to 16 bits. In - * double precision so that its terms can be arranged for efficiency - * without causing overflow or underflow. - */ - t = f32::from_bits(ui) as f64; - r = t * t * t; - t = t * (x as f64 + x as f64 + r) / (x as f64 + r + r); + f32::from_bits((hx / 3).wrapping_add(B1)) + }; - /* - * Second step Newton iteration to 47 bits. In double precision for - * efficiency and accuracy. - */ - r = t * t * t; - t = t * (x as f64 + x as f64 + r) / (x as f64 + r + r); + /* new cbrt to 23 bits */ + let r: f32 = t * t / x; + let s: f32 = C + r * t; + t *= G + F / (s + E + D / s); - /* rounding to 24 bits is perfect in round-to-nearest mode */ - t as f32 + /* restore the sign bit */ + f32::from_bits(t.to_bits() | sign) } diff --git a/src/math/mod.rs b/src/math/mod.rs index 48b400a92..3c7d05111 100644 --- a/src/math/mod.rs +++ b/src/math/mod.rs @@ -344,3 +344,93 @@ fn with_set_low_word(f: f64, lo: u32) -> f64 { fn combine_words(hi: u32, lo: u32) -> f64 { f64::from_bits((hi as u64) << 32 | lo as u64) } + +mod fdlibm { + #![allow(dead_code)] // FIXME: remove it after other dependent codes merged + #![allow(non_snake_case)] + + /* @(#)fdlibm.h 5.1 93/09/24 */ + /* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + + /* Most routines need to check whether a float is finite, infinite, or not a + number, and many need to know whether the result of an operation will + overflow. These conditions depend on whether the largest exponent is + used for NaNs & infinities, or whether it's used for finite numbers. */ + + /// True if a positive float with bitmask X is finite. + #[inline] + pub(crate) fn FLT_UWORD_IS_FINITE(x: u32) -> bool { + x < INFINITE + } + + /// True if a positive float with bitmask X is not a number. + #[inline] + pub(crate) fn FLT_UWORD_IS_NAN(x: u32) -> bool { + x > INFINITE + } + + /// True if a positive float with bitmask X is +infinity. + #[inline] + pub(crate) fn FLT_UWORD_IS_INFINITE(x: u32) -> bool { + x == INFINITE + } + + const INFINITE: u32 = 0x7f80_0000; + + /// The bitmask of FLT_MAX. + pub(crate) const FLT_UWORD_MAX: u32 = 0x7f7f_ffff; + /// The bitmask of FLT_MAX/2. + pub(crate) const FLT_UWORD_HALF_MAX: u32 = FLT_UWORD_MAX - (1 << 23); + /// The bitmask of the largest finite exponent (129 if the largest + /// exponent is used for finite numbers, 128 otherwise). + pub(crate) const FLT_UWORD_EXP_MAX: u32 = 0x4300_0000; + /// The bitmask of log(FLT_MAX), rounded down. This value is the largest + /// input that can be passed to exp() without producing overflow. + pub(crate) const FLT_UWORD_LOG_MAX: u32 = 0x42b1_7217; + /// The bitmask of log(2*FLT_MAX), rounded down. This value is the + /// largest input than can be passed to cosh() without producing + /// overflow. + pub(crate) const FLT_UWORD_LOG_2MAX: u32 = 0x42b2_d4fc; + /// The largest biased exponent that can be used for finite numbers + /// (255 if the largest exponent is used for finite numbers, 254 + /// otherwise) + pub(crate) const FLT_LARGEST_EXP: u32 = FLT_UWORD_MAX >> 23; + pub(crate) const HUGE: f32 = 3.402_823_466_385_288_60e+38; + + /* Many routines check for zero and subnormal numbers. Such things depend + on whether the target supports denormals or not */ + + /// True if a positive float with bitmask X is +0. Without denormals, + /// any float with a zero exponent is a +0 representation. With + /// denormals, the only +0 representation is a 0 bitmask. + #[inline] + pub(crate) fn FLT_UWORD_IS_ZERO(x: u32) -> bool { + x == 0 + } + + /// True if a non-zero positive float with bitmask X is subnormal. + /// (Routines should check for zeros first.) + #[inline] + pub(crate) fn FLT_UWORD_IS_SUBNORMAL(x: u32) -> bool { + x < 0x0080_0000 + } + + /// The bitmask of the smallest float above +0. Call this number REAL_FLT_MIN... + pub(crate) const FLT_UWORD_MIN: u32 = 0x0000_0001; + /// The bitmask of the float representation of REAL_FLT_MIN's exponent. + pub(crate) const FLT_UWORD_EXP_MIN: u32 = 0x4316_0000; + /// The bitmask of |log(REAL_FLT_MIN)|, rounding down. + pub(crate) const FLT_UWORD_LOG_MIN: u32 = 0x42cf_f1b5; + /// REAL_FLT_MIN's exponent - EXP_BIAS (1 if denormals are not supported, + /// -22 if they are). + pub(crate) const FLT_SMALLEST_EXP: i32 = -22; +}