Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LIBM - sin, cos, ln, exp and friends. #126

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
177 changes: 177 additions & 0 deletions crates/std_float/benches/bench_libm.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#![feature(test)]
#![feature(portable_simd)]
#![feature(concat_idents)]

extern crate test;
use std_float::StdLibm;

use test::{black_box, Bencher};

use core_simd::{f32x16, f32x4, f64x4, f64x8};

const N: usize = 1024;

fn init_f32x4() -> Vec<f32x4> {
vec![f32x4::splat(black_box(0.5)); N / 4]
}

fn init_f32x16() -> Vec<f32x16> {
vec![f32x16::splat(black_box(0.5)); N / 16]
}

fn init_f32() -> Vec<f32> {
vec![black_box(0.5); N]
}

fn init_f64x4() -> Vec<f64x4> {
vec![f64x4::splat(black_box(0.5)); N / 4]
}

fn init_f64x8() -> Vec<f64x8> {
vec![f64x8::splat(black_box(0.5)); N / 8]
}

fn init_f64() -> Vec<f64> {
vec![black_box(1.0); N]
}

// These fuctions are not inlined to make it easier to check the asm.
//
// Build with:
//
// RUSTFLAGS="-C target-cpu=native --emit asm" cargo bench

macro_rules! benchmark_libm {
(
functions ($(
$names : ident,
$functions : expr,
$init : expr
)*)
) => {

$(
#[bench]
#[inline(never)]
fn $names(b: &mut Bencher) {
let x = $init;
let mut y = $init;
b.iter(|| {
for (x, y) in x.iter().zip(y.iter_mut()) {
*y = ($functions)(*x);
}
})
}
)*
}
}

benchmark_libm! {
functions (
sin_f32x4, |x : f32x4| x.sin(), init_f32x4()
sin_f32x16, |x : f32x16| x.sin(), init_f32x16()
sin_f32, |x : f32| x.sin(), init_f32()
sin_f64x4, |x : f64x4| x.sin(), init_f64x4()
sin_f64x8, |x : f64x8| x.sin(), init_f64x8()
sin_f64, |x : f64| x.sin(), init_f64()
)
}

benchmark_libm! {
functions (
cos_f32x4, |x : f32x4| x.cos(), init_f32x4()
cos_f32x16, |x : f32x16| x.cos(), init_f32x16()
cos_f32, |x : f32| x.cos(), init_f32()
cos_f64x4, |x : f64x4| x.cos(), init_f64x4()
cos_f64x8, |x : f64x8| x.cos(), init_f64x8()
cos_f64, |x : f64| x.cos(), init_f64()
)
}

benchmark_libm! {
functions (
tan_f32x4, |x : f32x4| x.tan(), init_f32x4()
tan_f32x16, |x : f32x16| x.tan(), init_f32x16()
tan_f32, |x : f32| x.tan(), init_f32()
tan_f64x4, |x : f64x4| x.tan(), init_f64x4()
tan_f64x8, |x : f64x8| x.tan(), init_f64x8()
tan_f64, |x : f64| x.tan(), init_f64()
)
}

benchmark_libm! {
functions (
asin_f32x4, |x : f32x4| x.asin(), init_f32x4()
asin_f32x16, |x : f32x16| x.asin(), init_f32x16()
asin_f32, |x : f32| x.asin(), init_f32()
asin_f64x4, |x : f64x4| x.asin(), init_f64x4()
asin_f64x8, |x : f64x8| x.asin(), init_f64x8()
asin_f64, |x : f64| x.asin(), init_f64()
)
}

benchmark_libm! {
functions (
acos_f32x4, |x : f32x4| x.acos(), init_f32x4()
acos_f32x16, |x : f32x16| x.acos(), init_f32x16()
acos_f32, |x : f32| x.acos(), init_f32()
acos_f64x4, |x : f64x4| x.acos(), init_f64x4()
acos_f64x8, |x : f64x8| x.acos(), init_f64x8()
acos_f64, |x : f64| x.acos(), init_f64()
)
}

benchmark_libm! {
functions (
atan_f32x4, |x : f32x4| x.atan(), init_f32x4()
atan_f32x16, |x : f32x16| x.atan(), init_f32x16()
atan_f32, |x : f32| x.atan(), init_f32()
atan_f64x4, |x : f64x4| x.atan(), init_f64x4()
atan_f64x8, |x : f64x8| x.atan(), init_f64x8()
atan_f64, |x : f64| x.atan(), init_f64()
)
}

benchmark_libm! {
functions (
exp2_f32x4, |x : f32x4| x.exp2(), init_f32x4()
exp2_f32x16, |x : f32x16| x.exp2(), init_f32x16()
exp2_f32, |x : f32| x.exp2(), init_f32()
exp2_f64x4, |x : f64x4| x.exp2(), init_f64x4()
exp2_f64x8, |x : f64x8| x.exp2(), init_f64x8()
exp2_f64, |x : f64| x.exp2(), init_f64()
)
}

benchmark_libm! {
functions (
exp_f32x4, |x : f32x4| x.exp(), init_f32x4()
exp_f32x16, |x : f32x16| x.exp(), init_f32x16()
exp_f32, |x : f32| x.exp(), init_f32()
exp_f64x4, |x : f64x4| x.exp(), init_f64x4()
exp_f64x8, |x : f64x8| x.exp(), init_f64x8()
exp_f64, |x : f64| x.exp(), init_f64()
)
}

benchmark_libm! {
functions (
log2_f32x4, |x : f32x4| x.log2(), init_f32x4()
log2_f32x16, |x : f32x16| x.log2(), init_f32x16()
log2_f32, |x : f32| x.log2(), init_f32()
log2_f64x4, |x : f64x4| x.log2(), init_f64x4()
log2_f64x8, |x : f64x8| x.log2(), init_f64x8()
log2_f64, |x : f64| x.log2(), init_f64()
)
}

benchmark_libm! {
functions (
ln_f32x4, |x : f32x4| x.ln(), init_f32x4()
ln_f32x16, |x : f32x16| x.ln(), init_f32x16()
ln_f32, |x : f32| x.ln(), init_f32()
ln_f64x4, |x : f64x4| x.ln(), init_f64x4()
ln_f64x8, |x : f64x8| x.ln(), init_f64x8()
ln_f64, |x : f64| x.ln(), init_f64()
)
}
103 changes: 103 additions & 0 deletions crates/std_float/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ use core_simd::simd;

use simd::{LaneCount, Simd, SupportedLaneCount};

mod libm32;
mod libm64;

#[cfg(test)]
mod test_libm;

#[cfg(feature = "as_crate")]
mod experimental {
pub trait Sealed {}
Expand Down Expand Up @@ -117,6 +123,102 @@ pub trait StdFloat: Sealed + Sized {
fn fract(self) -> Self;
}

pub trait StdLibm: StdFloat {
/// Signed integer type with the same number of bits as this floating point type.
type IntType;

/// Unsigned integer type with the same number of bits as this floating point type.
type UintType;

/// Computes the sine of a number (in radians).
fn sin(self) -> Self;

/// Computes the cosine of a number (in radians).
fn cos(self) -> Self;

/// Computes the tangent of a number (in radians).
fn tan(self) -> Self;

/// Computes the arcsine of a number. Return value is in radians in
/// the range [-pi/2, pi/2] or NaN if the number is outside the range
/// [-1, 1].
fn asin(self) -> Self;

/// Computes the arccosine of a number. Return value is in radians in
/// the range [0, pi] or NaN if the number is outside the range
/// [-1, 1].
fn acos(self) -> Self;

/// Computes the arctangent of a number. Return value is in radians in the
/// range [-pi/2, pi/2];
fn atan(self) -> Self;

/// Computes the four quadrant arctangent of `self` (`y`) and `other` (`x`) in radians.
///
/// * `x = 0`, `y = 0`: `0`
/// * `x >= 0`: `arctan(y/x)` -> `[-pi/2, pi/2]`
/// * `y >= 0`: `arctan(y/x) + pi` -> `(pi/2, pi]`
/// * `y < 0`: `arctan(y/x) - pi` -> `(-pi, -pi/2)`
fn atan2(self, x: Self) -> Self;

/// Returns `2^(self)`.
fn exp2(self) -> Self;

/// Returns `e^(self)`, (the exponential function).
fn exp(self) -> Self;

/// Returns `e^(self) - 1` in a way that is accurate even if the
/// number is close to zero.
fn exp_m1(self) -> Self;

/// Returns the base 2 logarithm of the number.
fn log2(self) -> Self;

/// Returns `ln(1+n)` (natural logarithm) more accurately than if
/// the operations were performed separately.
fn ln_1p(self) -> Self;

/// Returns the natural logarithm of the number.
fn ln(self) -> Self;

/// Returns the base 10 logarithm of the number.
fn log10(self) -> Self;

/// Returns the logarithm of the number with respect to an arbitrary base.
fn log(self, base: Self) -> Self;

/// Raises a number to a floating point power.
fn powf(self, y: Self) -> Self;

/// Raises a number to an integer power.
fn powi(self, y: Self::IntType) -> Self;

/// Hyperbolic sine function.
fn sinh(self) -> Self;

/// Hyperbolic cosine function.
fn cosh(self) -> Self;

/// Hyperbolic tangent function.
fn tanh(self) -> Self;

/// Inverse hyperbolic sine function.
fn asinh(self) -> Self;

/// Inverse hyperbolic cosine function.
fn acosh(self) -> Self;

/// Inverse hyperbolic tangent function.
fn atanh(self) -> Self;

/// Returns the cube root of a number.
fn cbrt(self) -> Self;

/// Calculates the length of the hypotenuse of a right-angle triangle given
/// legs of length `x` and `y`.
fn hypot(self, other: Self) -> Self;
}

impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}

Expand Down Expand Up @@ -161,5 +263,6 @@ mod tests {
let _xfma = x.mul_add(x, x);
let _xsqrt = x.sqrt();
let _ = x2.abs() * x2;
let _ = x.sin();
}
}
Loading