rust-lang · andy-thomason · Feb 14, 2022 · Feb 14, 2022 · Feb 14, 2022 · Feb 14, 2022
diff --git a/crates/std_float/benches/bench_libm.rs b/crates/std_float/benches/bench_libm.rs
@@ -0,0 +1,177 @@
+#![feature(test)]
+#![feature(portable_simd)]
+#![feature(concat_idents)]
+
+extern crate test;
+use std_float::StdLibm;
+
+use test::{black_box, Bencher};
+
+use core_simd::{f32x16, f32x4, f64x4, f64x8};
+
+const N: usize = 1024;
+
+fn init_f32x4() -> Vec<f32x4> {
+    vec![f32x4::splat(black_box(0.5)); N / 4]
+}
+
+fn init_f32x16() -> Vec<f32x16> {
+    vec![f32x16::splat(black_box(0.5)); N / 16]
+}
+
+fn init_f32() -> Vec<f32> {
+    vec![black_box(0.5); N]
+}
+
+fn init_f64x4() -> Vec<f64x4> {
+    vec![f64x4::splat(black_box(0.5)); N / 4]
+}
+
+fn init_f64x8() -> Vec<f64x8> {
+    vec![f64x8::splat(black_box(0.5)); N / 8]
+}
+
+fn init_f64() -> Vec<f64> {
+    vec![black_box(1.0); N]
+}
+
+// These fuctions are not inlined to make it easier to check the asm.
+//
+// Build with:
+//
+// RUSTFLAGS="-C target-cpu=native --emit asm" cargo bench
+
+macro_rules! benchmark_libm {
+    (
+        functions ($(
+            $names : ident,
+            $functions : expr,
+            $init : expr
+        )*)
+    ) => {
+
+        $(
+            #[bench]
+            #[inline(never)]
+            fn $names(b: &mut Bencher) {
+                let x = $init;
+                let mut y = $init;
+                b.iter(|| {
+                    for (x, y) in x.iter().zip(y.iter_mut()) {
+                        *y = ($functions)(*x);
+                    }
+                })
+            }
+        )*
+    }
+}
+
+benchmark_libm! {
+    functions (
+        sin_f32x4,  |x : f32x4| x.sin(),  init_f32x4()
+        sin_f32x16, |x : f32x16| x.sin(), init_f32x16()
+        sin_f32,    |x : f32| x.sin(),    init_f32()
+        sin_f64x4,  |x : f64x4| x.sin(),  init_f64x4()
+        sin_f64x8,  |x : f64x8| x.sin(),  init_f64x8()
+        sin_f64,    |x : f64| x.sin(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        cos_f32x4,  |x : f32x4| x.cos(),  init_f32x4()
+        cos_f32x16, |x : f32x16| x.cos(), init_f32x16()
+        cos_f32,    |x : f32| x.cos(),    init_f32()
+        cos_f64x4,  |x : f64x4| x.cos(),  init_f64x4()
+        cos_f64x8,  |x : f64x8| x.cos(),  init_f64x8()
+        cos_f64,    |x : f64| x.cos(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        tan_f32x4,  |x : f32x4| x.tan(),  init_f32x4()
+        tan_f32x16, |x : f32x16| x.tan(), init_f32x16()
+        tan_f32,    |x : f32| x.tan(),    init_f32()
+        tan_f64x4,  |x : f64x4| x.tan(),  init_f64x4()
+        tan_f64x8,  |x : f64x8| x.tan(),  init_f64x8()
+        tan_f64,    |x : f64| x.tan(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        asin_f32x4,  |x : f32x4| x.asin(),  init_f32x4()
+        asin_f32x16, |x : f32x16| x.asin(), init_f32x16()
+        asin_f32,    |x : f32| x.asin(),    init_f32()
+        asin_f64x4,  |x : f64x4| x.asin(),  init_f64x4()
+        asin_f64x8,  |x : f64x8| x.asin(),  init_f64x8()
+        asin_f64,    |x : f64| x.asin(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        acos_f32x4,  |x : f32x4| x.acos(),  init_f32x4()
+        acos_f32x16, |x : f32x16| x.acos(), init_f32x16()
+        acos_f32,    |x : f32| x.acos(),    init_f32()
+        acos_f64x4,  |x : f64x4| x.acos(),  init_f64x4()
+        acos_f64x8,  |x : f64x8| x.acos(),  init_f64x8()
+        acos_f64,    |x : f64| x.acos(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        atan_f32x4,  |x : f32x4| x.atan(),  init_f32x4()
+        atan_f32x16, |x : f32x16| x.atan(), init_f32x16()
+        atan_f32,    |x : f32| x.atan(),    init_f32()
+        atan_f64x4,  |x : f64x4| x.atan(),  init_f64x4()
+        atan_f64x8,  |x : f64x8| x.atan(),  init_f64x8()
+        atan_f64,    |x : f64| x.atan(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        exp2_f32x4,  |x : f32x4| x.exp2(),  init_f32x4()
+        exp2_f32x16, |x : f32x16| x.exp2(), init_f32x16()
+        exp2_f32,    |x : f32| x.exp2(),    init_f32()
+        exp2_f64x4,  |x : f64x4| x.exp2(),  init_f64x4()
+        exp2_f64x8,  |x : f64x8| x.exp2(),  init_f64x8()
+        exp2_f64,    |x : f64| x.exp2(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        exp_f32x4,  |x : f32x4| x.exp(),  init_f32x4()
+        exp_f32x16, |x : f32x16| x.exp(), init_f32x16()
+        exp_f32,    |x : f32| x.exp(),    init_f32()
+        exp_f64x4,  |x : f64x4| x.exp(),  init_f64x4()
+        exp_f64x8,  |x : f64x8| x.exp(),  init_f64x8()
+        exp_f64,    |x : f64| x.exp(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        log2_f32x4,  |x : f32x4| x.log2(),  init_f32x4()
+        log2_f32x16, |x : f32x16| x.log2(), init_f32x16()
+        log2_f32,    |x : f32| x.log2(),    init_f32()
+        log2_f64x4,  |x : f64x4| x.log2(),  init_f64x4()
+        log2_f64x8,  |x : f64x8| x.log2(),  init_f64x8()
+        log2_f64,    |x : f64| x.log2(),    init_f64()
+    )
+}
+
+benchmark_libm! {
+    functions (
+        ln_f32x4,  |x : f32x4| x.ln(),  init_f32x4()
+        ln_f32x16, |x : f32x16| x.ln(), init_f32x16()
+        ln_f32,    |x : f32| x.ln(),    init_f32()
+        ln_f64x4,  |x : f64x4| x.ln(),  init_f64x4()
+        ln_f64x8,  |x : f64x8| x.ln(),  init_f64x8()
+        ln_f64,    |x : f64| x.ln(),    init_f64()
+    )
+}
diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs
@@ -11,6 +11,12 @@ use core_simd::simd;
 
 use simd::{LaneCount, Simd, SupportedLaneCount};
 
+mod libm32;
+mod libm64;
+
+#[cfg(test)]
+mod test_libm;
+
 #[cfg(feature = "as_crate")]
 mod experimental {
     pub trait Sealed {}
@@ -117,6 +123,102 @@ pub trait StdFloat: Sealed + Sized {
     fn fract(self) -> Self;
 }
 
+pub trait StdLibm: StdFloat {
+    /// Signed integer type with the same number of bits as this floating point type.
+    type IntType;
+
+    /// Unsigned integer type with the same number of bits as this floating point type.
+    type UintType;
+
+    /// Computes the sine of a number (in radians).
+    fn sin(self) -> Self;
+
+    /// Computes the cosine of a number (in radians).
+    fn cos(self) -> Self;
+
+    /// Computes the tangent of a number (in radians).
+    fn tan(self) -> Self;
+
+    /// Computes the arcsine of a number. Return value is in radians in
+    /// the range [-pi/2, pi/2] or NaN if the number is outside the range
+    /// [-1, 1].
+    fn asin(self) -> Self;
+
+    /// Computes the arccosine of a number. Return value is in radians in
+    /// the range [0, pi] or NaN if the number is outside the range
+    /// [-1, 1].
+    fn acos(self) -> Self;
+
+    /// Computes the arctangent of a number. Return value is in radians in the
+    /// range [-pi/2, pi/2];
+    fn atan(self) -> Self;
+
+    /// Computes the four quadrant arctangent of `self` (`y`) and `other` (`x`) in radians.
+    ///
+    /// * `x = 0`, `y = 0`: `0`
+    /// * `x >= 0`: `arctan(y/x)` -> `[-pi/2, pi/2]`
+    /// * `y >= 0`: `arctan(y/x) + pi` -> `(pi/2, pi]`
+    /// * `y < 0`: `arctan(y/x) - pi` -> `(-pi, -pi/2)`
+    fn atan2(self, x: Self) -> Self;
+
+    /// Returns `2^(self)`.
+    fn exp2(self) -> Self;
+
+    /// Returns `e^(self)`, (the exponential function).
+    fn exp(self) -> Self;
+
+    /// Returns `e^(self) - 1` in a way that is accurate even if the
+    /// number is close to zero.
+    fn exp_m1(self) -> Self;
+
+    /// Returns the base 2 logarithm of the number.
+    fn log2(self) -> Self;
+
+    /// Returns `ln(1+n)` (natural logarithm) more accurately than if
+    /// the operations were performed separately.
+    fn ln_1p(self) -> Self;
+
+    /// Returns the natural logarithm of the number.
+    fn ln(self) -> Self;
+
+    /// Returns the base 10 logarithm of the number.
+    fn log10(self) -> Self;
+
+    /// Returns the logarithm of the number with respect to an arbitrary base.
+    fn log(self, base: Self) -> Self;
+
+    /// Raises a number to a floating point power.
+    fn powf(self, y: Self) -> Self;
+
+    /// Raises a number to an integer power.
+    fn powi(self, y: Self::IntType) -> Self;
+
+    /// Hyperbolic sine function.
+    fn sinh(self) -> Self;
+
+    /// Hyperbolic cosine function.
+    fn cosh(self) -> Self;
+
+    /// Hyperbolic tangent function.
+    fn tanh(self) -> Self;
+
+    /// Inverse hyperbolic sine function.
+    fn asinh(self) -> Self;
+
+    /// Inverse hyperbolic cosine function.
+    fn acosh(self) -> Self;
+
+    /// Inverse hyperbolic tangent function.
+    fn atanh(self) -> Self;
+
+    /// Returns the cube root of a number.
+    fn cbrt(self) -> Self;
+
+    /// Calculates the length of the hypotenuse of a right-angle triangle given
+    /// legs of length `x` and `y`.
+    fn hypot(self, other: Self) -> Self;
+}
+
 impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
 impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}
 
@@ -161,5 +263,6 @@ mod tests {
         let _xfma = x.mul_add(x, x);
         let _xsqrt = x.sqrt();
         let _ = x2.abs() * x2;
+        let _ = x.sin();
     }
 }