More inv trig.

andy-thomason · andy-thomason · commit a067a6ffbf1d · 2022-01-30T14:38:56.000Z
diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs
@@ -133,6 +133,8 @@ pub trait StdLibm : StdFloat {
     fn acos(self) -> Self;
 
     fn atan(self) -> Self;
+
+    fn atan2(self, x: Self) -> Self;
 }
 
 impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
diff --git a/crates/std_float/src/libm32.rs b/crates/std_float/src/libm32.rs
@@ -11,66 +11,79 @@ where
 {
     #[inline]
     fn asin(self) -> Self {
-        let PI_BY_2 = Self::splat(1.5707964f32);
-        let LIM: Self = Self::splat(0.9f32);
-        let c: Self = ((self).lanes_lt(Self::splat(0f32))).select(-PI_BY_2, PI_BY_2);
-        let s: Self =
-            ((self).lanes_lt(Self::splat(0f32))).select(-Self::splat(1f32), Self::splat(1f32));
-        let x: Self = ((self * self).lanes_lt(LIM * LIM))
-            .select(self, (Self::splat(1f32) - self * self).sqrt());
-        let y: Self = (Self::splat(4374.977f32))
-            .mul_add(x * x, -Self::splat(13781.558f32))
-            .mul_add(x * x, Self::splat(17105.695f32))
-            .mul_add(x * x, -Self::splat(10486.649f32))
-            .mul_add(x * x, Self::splat(3231.7603f32))
-            .mul_add(x * x, -Self::splat(447.56482f32))
-            .mul_add(x * x, Self::splat(21.78206f32))
-            .mul_add(x * x, Self::splat(0.84158415f32))
-            * x;
-        ((self * self).lanes_lt(LIM * LIM)).select(y, c - y * s)
+        let arg = self;
+        arg.atan2((Self::splat(1f32) - arg * arg).sqrt())
     }
     #[inline]
     fn acos(self) -> Self {
         let PI_BY_2 = Self::splat(1.5707964f32);
         let PI = Self::splat(3.1415927f32);
+        let arg = self;
         let LIM: Self = Self::splat(0.9f32);
-        let c: Self = ((self).lanes_lt(Self::splat(0f32))).select(PI, Self::splat(0f32));
+        let c: Self = ((arg).lanes_lt(Self::splat(0f32))).select(PI, Self::splat(0f32));
         let s: Self =
-            ((self).lanes_lt(Self::splat(0f32))).select(Self::splat(1f32), -Self::splat(1f32));
-        let x: Self = ((self * self).lanes_lt(LIM * LIM))
-            .select(self, (Self::splat(1f32) - self * self).sqrt());
-        let y: Self = (Self::splat(4374.977f32))
-            .mul_add(x * x, -Self::splat(13781.558f32))
-            .mul_add(x * x, Self::splat(17105.695f32))
-            .mul_add(x * x, -Self::splat(10486.649f32))
-            .mul_add(x * x, Self::splat(3231.7603f32))
-            .mul_add(x * x, -Self::splat(447.56482f32))
-            .mul_add(x * x, Self::splat(21.78206f32))
-            .mul_add(x * x, Self::splat(0.84158415f32))
+            ((arg).lanes_lt(Self::splat(0f32))).select(Self::splat(1f32), -Self::splat(1f32));
+        let x: Self =
+            ((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1f32) - arg * arg).sqrt());
+        let y: Self = (Self::splat(1.3740137f32))
+            .mul_add(x * x, -Self::splat(3.1993167f32))
+            .mul_add(x * x, Self::splat(3.103398f32))
+            .mul_add(x * x, -Self::splat(1.4533828f32))
+            .mul_add(x * x, Self::splat(0.41395915f32))
+            .mul_add(x * x, Self::splat(0.03113007f32))
+            .mul_add(x * x, Self::splat(0.16861732f32))
+            .mul_add(x * x, Self::splat(0.99998593f32))
             * x;
-        ((self * self).lanes_lt(LIM * LIM)).select(PI_BY_2 - y, c - y * s)
+        ((arg * arg).lanes_lt(LIM * LIM)).select(PI_BY_2 - y, c - y * s)
     }
     #[inline]
     fn atan(self) -> Self {
         let PI_BY_2 = Self::splat(1.5707964f32);
+        let arg = self;
         let LIM: Self = Self::splat(1f32);
-        let c: Self = ((self).lanes_lt(Self::splat(0f32))).select(-PI_BY_2, PI_BY_2);
-        let x: Self = ((self.abs()).lanes_lt(LIM)).select(self, self.recip());
-        let y: Self = (-Self::splat(95.70126f32))
-            .mul_add(x * x, Self::splat(424.99908f32))
-            .mul_add(x * x, -Self::splat(767.4826f32))
-            .mul_add(x * x, Self::splat(714.51953f32))
-            .mul_add(x * x, -Self::splat(354.32654f32))
-            .mul_add(x * x, Self::splat(83.9618f32))
-            .mul_add(x * x, -Self::splat(6.2395816f32))
-            .mul_add(x * x, Self::splat(1.0549852f32))
+        let c: Self = ((arg).lanes_lt(Self::splat(0f32))).select(-PI_BY_2, PI_BY_2);
+        let x: Self = ((arg.abs()).lanes_lt(LIM)).select(arg, arg.recip());
+        let y: Self = (-Self::splat(0.0039602574f32))
+            .mul_add(x * x, Self::splat(0.021659138f32))
+            .mul_add(x * x, -Self::splat(0.05587457f32))
+            .mul_add(x * x, Self::splat(0.09664151f32))
+            .mul_add(x * x, -Self::splat(0.13930209f32))
+            .mul_add(x * x, Self::splat(0.19954468f32))
+            .mul_add(x * x, -Self::splat(0.33331004f32))
+            .mul_add(x * x, Self::splat(0.9999998f32))
             * x;
-        ((self.abs()).lanes_lt(LIM)).select(y, c - y)
+        ((arg.abs()).lanes_lt(LIM)).select(y, c - y)
+    }
+    #[inline]
+    fn atan2(self, x: Self) -> Self {
+        let PI_BY_2 = Self::splat(1.5707964f32);
+        let PI = Self::splat(3.1415927f32);
+        let y = self;
+        let offset180: Self = ((y).lanes_lt(Self::splat(0f32))).select(-PI, PI);
+        let x1: Self = ((x).lanes_lt(Self::splat(0f32))).select(-x, x);
+        let y1: Self = ((x).lanes_lt(Self::splat(0f32))).select(-y, y);
+        let offset1: Self = ((x).lanes_lt(Self::splat(0f32))).select(offset180, Self::splat(0f32));
+        let offset90: Self = ((y).lanes_lt(Self::splat(0f32))).select(-PI_BY_2, PI_BY_2);
+        let x2: Self = ((y1.abs()).lanes_gt(x1)).select(y1, x1);
+        let y2: Self = ((y1.abs()).lanes_gt(x1)).select(-x1, y1);
+        let offset2: Self = ((y1.abs()).lanes_gt(x1)).select(offset1 + offset90, offset1);
+        let x3: Self = y2 / x2;
+        let y3: Self = (-Self::splat(0.0039602574f32))
+            .mul_add(x3 * x3, Self::splat(0.021659138f32))
+            .mul_add(x3 * x3, -Self::splat(0.05587457f32))
+            .mul_add(x3 * x3, Self::splat(0.09664151f32))
+            .mul_add(x3 * x3, -Self::splat(0.13930209f32))
+            .mul_add(x3 * x3, Self::splat(0.19954468f32))
+            .mul_add(x3 * x3, -Self::splat(0.33331004f32))
+            .mul_add(x3 * x3, Self::splat(0.9999998f32))
+            * x3;
+        y3 + offset2
     }
     #[inline]
     fn sin(self) -> Self {
         let RECIP_2PI = Self::splat(0.15915494f32);
-        let scaled: Self = self * RECIP_2PI;
+        let arg = self;
+        let scaled: Self = arg * RECIP_2PI;
         let x: Self = scaled - scaled.round();
         (-Self::splat(12.26886f32))
             .mul_add(x * x, Self::splat(41.21624f32))
@@ -83,7 +96,8 @@ where
     #[inline]
     fn cos(self) -> Self {
         let RECIP_2PI = Self::splat(0.15915494f32);
-        let scaled: Self = self * RECIP_2PI;
+        let arg = self;
+        let scaled: Self = arg * RECIP_2PI;
         let x: Self = scaled - scaled.round();
         (Self::splat(6.5286584f32))
             .mul_add(x * x, -Self::splat(25.973276f32))
@@ -96,7 +110,8 @@ where
     #[inline]
     fn tan(self) -> Self {
         let RECIP_PI = Self::splat(0.31830987f32);
-        let scaled: Self = self * RECIP_PI;
+        let arg = self;
+        let scaled: Self = arg * RECIP_PI;
         let x: Self = scaled - scaled.round();
         let recip: Self = Self::splat(1f32) / (x * x - Self::splat(0.25f32));
         let y: Self = (Self::splat(0.014397301f32))
diff --git a/crates/std_float/src/test_libm32.rs b/crates/std_float/src/test_libm32.rs
@@ -158,3 +158,49 @@ fn tan_f32() {
         vector_type: f32x4,
     );
 }
+
+#[test]
+fn asin_f32() {
+    use core_simd::f32x4;
+    use crate::StdLibm;
+
+    let one_ulp = (2.0_f32).powi(-23);
+
+    test_range!(
+        min: -1.0,
+        max: 1.0,
+        limit: one_ulp * 8.0,
+        scalar_fn: |x : f32| x.asin(),
+        vector_fn: |x : f32x4| x.asin(),
+        scalar_type: f32,
+        vector_type: f32x4,
+    );
+}
+
+#[test]
+fn atan_f32() {
+    use core_simd::f32x4;
+    use crate::StdLibm;
+
+    let one_ulp = (2.0_f32).powi(-23);
+
+    test_range!(
+        min: -1.0,
+        max: 1.0,
+        limit: one_ulp * 8.0,
+        scalar_fn: |x : f32| x.atan(),
+        vector_fn: |x : f32x4| x.atan(),
+        scalar_type: f32,
+        vector_type: f32x4,
+    );
+
+    test_range!(
+        min: -1.0,
+        max: 1.0,
+        limit: one_ulp * 8.0,
+        scalar_fn: |x : f32| x.recip().atan(),
+        vector_fn: |x : f32x4| x.recip().atan(),
+        scalar_type: f32,
+        vector_type: f32x4,
+    );
+}

Original file line number	Diff line number	Diff line change
`@@ -133,6 +133,8 @@ pub trait StdLibm : StdFloat {`
`133`	`133`	`fn acos(self) -> Self;`
`134`	`134`
`135`	`135`	`fn atan(self) -> Self;`
	`136`	`+`
	`137`	`+ fn atan2(self, x: Self) -> Self;`
`136`	`138`	`}`
`137`	`139`
`138`	`140`	`impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}`