Skip to content

Commit a067a6f

Browse files
author
andy-thomason
committed
More inv trig.
1 parent 8e3dbcf commit a067a6f

File tree

3 files changed

+107
-44
lines changed

3 files changed

+107
-44
lines changed

crates/std_float/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ pub trait StdLibm : StdFloat {
133133
fn acos(self) -> Self;
134134

135135
fn atan(self) -> Self;
136+
137+
fn atan2(self, x: Self) -> Self;
136138
}
137139

138140
impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}

crates/std_float/src/libm32.rs

+59-44
Original file line numberDiff line numberDiff line change
@@ -11,66 +11,79 @@ where
1111
{
1212
#[inline]
1313
fn asin(self) -> Self {
14-
let PI_BY_2 = Self::splat(1.5707964f32);
15-
let LIM: Self = Self::splat(0.9f32);
16-
let c: Self = ((self).lanes_lt(Self::splat(0f32))).select(-PI_BY_2, PI_BY_2);
17-
let s: Self =
18-
((self).lanes_lt(Self::splat(0f32))).select(-Self::splat(1f32), Self::splat(1f32));
19-
let x: Self = ((self * self).lanes_lt(LIM * LIM))
20-
.select(self, (Self::splat(1f32) - self * self).sqrt());
21-
let y: Self = (Self::splat(4374.977f32))
22-
.mul_add(x * x, -Self::splat(13781.558f32))
23-
.mul_add(x * x, Self::splat(17105.695f32))
24-
.mul_add(x * x, -Self::splat(10486.649f32))
25-
.mul_add(x * x, Self::splat(3231.7603f32))
26-
.mul_add(x * x, -Self::splat(447.56482f32))
27-
.mul_add(x * x, Self::splat(21.78206f32))
28-
.mul_add(x * x, Self::splat(0.84158415f32))
29-
* x;
30-
((self * self).lanes_lt(LIM * LIM)).select(y, c - y * s)
14+
let arg = self;
15+
arg.atan2((Self::splat(1f32) - arg * arg).sqrt())
3116
}
3217
#[inline]
3318
fn acos(self) -> Self {
3419
let PI_BY_2 = Self::splat(1.5707964f32);
3520
let PI = Self::splat(3.1415927f32);
21+
let arg = self;
3622
let LIM: Self = Self::splat(0.9f32);
37-
let c: Self = ((self).lanes_lt(Self::splat(0f32))).select(PI, Self::splat(0f32));
23+
let c: Self = ((arg).lanes_lt(Self::splat(0f32))).select(PI, Self::splat(0f32));
3824
let s: Self =
39-
((self).lanes_lt(Self::splat(0f32))).select(Self::splat(1f32), -Self::splat(1f32));
40-
let x: Self = ((self * self).lanes_lt(LIM * LIM))
41-
.select(self, (Self::splat(1f32) - self * self).sqrt());
42-
let y: Self = (Self::splat(4374.977f32))
43-
.mul_add(x * x, -Self::splat(13781.558f32))
44-
.mul_add(x * x, Self::splat(17105.695f32))
45-
.mul_add(x * x, -Self::splat(10486.649f32))
46-
.mul_add(x * x, Self::splat(3231.7603f32))
47-
.mul_add(x * x, -Self::splat(447.56482f32))
48-
.mul_add(x * x, Self::splat(21.78206f32))
49-
.mul_add(x * x, Self::splat(0.84158415f32))
25+
((arg).lanes_lt(Self::splat(0f32))).select(Self::splat(1f32), -Self::splat(1f32));
26+
let x: Self =
27+
((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1f32) - arg * arg).sqrt());
28+
let y: Self = (Self::splat(1.3740137f32))
29+
.mul_add(x * x, -Self::splat(3.1993167f32))
30+
.mul_add(x * x, Self::splat(3.103398f32))
31+
.mul_add(x * x, -Self::splat(1.4533828f32))
32+
.mul_add(x * x, Self::splat(0.41395915f32))
33+
.mul_add(x * x, Self::splat(0.03113007f32))
34+
.mul_add(x * x, Self::splat(0.16861732f32))
35+
.mul_add(x * x, Self::splat(0.99998593f32))
5036
* x;
51-
((self * self).lanes_lt(LIM * LIM)).select(PI_BY_2 - y, c - y * s)
37+
((arg * arg).lanes_lt(LIM * LIM)).select(PI_BY_2 - y, c - y * s)
5238
}
5339
#[inline]
5440
fn atan(self) -> Self {
5541
let PI_BY_2 = Self::splat(1.5707964f32);
42+
let arg = self;
5643
let LIM: Self = Self::splat(1f32);
57-
let c: Self = ((self).lanes_lt(Self::splat(0f32))).select(-PI_BY_2, PI_BY_2);
58-
let x: Self = ((self.abs()).lanes_lt(LIM)).select(self, self.recip());
59-
let y: Self = (-Self::splat(95.70126f32))
60-
.mul_add(x * x, Self::splat(424.99908f32))
61-
.mul_add(x * x, -Self::splat(767.4826f32))
62-
.mul_add(x * x, Self::splat(714.51953f32))
63-
.mul_add(x * x, -Self::splat(354.32654f32))
64-
.mul_add(x * x, Self::splat(83.9618f32))
65-
.mul_add(x * x, -Self::splat(6.2395816f32))
66-
.mul_add(x * x, Self::splat(1.0549852f32))
44+
let c: Self = ((arg).lanes_lt(Self::splat(0f32))).select(-PI_BY_2, PI_BY_2);
45+
let x: Self = ((arg.abs()).lanes_lt(LIM)).select(arg, arg.recip());
46+
let y: Self = (-Self::splat(0.0039602574f32))
47+
.mul_add(x * x, Self::splat(0.021659138f32))
48+
.mul_add(x * x, -Self::splat(0.05587457f32))
49+
.mul_add(x * x, Self::splat(0.09664151f32))
50+
.mul_add(x * x, -Self::splat(0.13930209f32))
51+
.mul_add(x * x, Self::splat(0.19954468f32))
52+
.mul_add(x * x, -Self::splat(0.33331004f32))
53+
.mul_add(x * x, Self::splat(0.9999998f32))
6754
* x;
68-
((self.abs()).lanes_lt(LIM)).select(y, c - y)
55+
((arg.abs()).lanes_lt(LIM)).select(y, c - y)
56+
}
57+
#[inline]
58+
fn atan2(self, x: Self) -> Self {
59+
let PI_BY_2 = Self::splat(1.5707964f32);
60+
let PI = Self::splat(3.1415927f32);
61+
let y = self;
62+
let offset180: Self = ((y).lanes_lt(Self::splat(0f32))).select(-PI, PI);
63+
let x1: Self = ((x).lanes_lt(Self::splat(0f32))).select(-x, x);
64+
let y1: Self = ((x).lanes_lt(Self::splat(0f32))).select(-y, y);
65+
let offset1: Self = ((x).lanes_lt(Self::splat(0f32))).select(offset180, Self::splat(0f32));
66+
let offset90: Self = ((y).lanes_lt(Self::splat(0f32))).select(-PI_BY_2, PI_BY_2);
67+
let x2: Self = ((y1.abs()).lanes_gt(x1)).select(y1, x1);
68+
let y2: Self = ((y1.abs()).lanes_gt(x1)).select(-x1, y1);
69+
let offset2: Self = ((y1.abs()).lanes_gt(x1)).select(offset1 + offset90, offset1);
70+
let x3: Self = y2 / x2;
71+
let y3: Self = (-Self::splat(0.0039602574f32))
72+
.mul_add(x3 * x3, Self::splat(0.021659138f32))
73+
.mul_add(x3 * x3, -Self::splat(0.05587457f32))
74+
.mul_add(x3 * x3, Self::splat(0.09664151f32))
75+
.mul_add(x3 * x3, -Self::splat(0.13930209f32))
76+
.mul_add(x3 * x3, Self::splat(0.19954468f32))
77+
.mul_add(x3 * x3, -Self::splat(0.33331004f32))
78+
.mul_add(x3 * x3, Self::splat(0.9999998f32))
79+
* x3;
80+
y3 + offset2
6981
}
7082
#[inline]
7183
fn sin(self) -> Self {
7284
let RECIP_2PI = Self::splat(0.15915494f32);
73-
let scaled: Self = self * RECIP_2PI;
85+
let arg = self;
86+
let scaled: Self = arg * RECIP_2PI;
7487
let x: Self = scaled - scaled.round();
7588
(-Self::splat(12.26886f32))
7689
.mul_add(x * x, Self::splat(41.21624f32))
@@ -83,7 +96,8 @@ where
8396
#[inline]
8497
fn cos(self) -> Self {
8598
let RECIP_2PI = Self::splat(0.15915494f32);
86-
let scaled: Self = self * RECIP_2PI;
99+
let arg = self;
100+
let scaled: Self = arg * RECIP_2PI;
87101
let x: Self = scaled - scaled.round();
88102
(Self::splat(6.5286584f32))
89103
.mul_add(x * x, -Self::splat(25.973276f32))
@@ -96,7 +110,8 @@ where
96110
#[inline]
97111
fn tan(self) -> Self {
98112
let RECIP_PI = Self::splat(0.31830987f32);
99-
let scaled: Self = self * RECIP_PI;
113+
let arg = self;
114+
let scaled: Self = arg * RECIP_PI;
100115
let x: Self = scaled - scaled.round();
101116
let recip: Self = Self::splat(1f32) / (x * x - Self::splat(0.25f32));
102117
let y: Self = (Self::splat(0.014397301f32))

crates/std_float/src/test_libm32.rs

+46
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,49 @@ fn tan_f32() {
158158
vector_type: f32x4,
159159
);
160160
}
161+
162+
#[test]
163+
fn asin_f32() {
164+
use core_simd::f32x4;
165+
use crate::StdLibm;
166+
167+
let one_ulp = (2.0_f32).powi(-23);
168+
169+
test_range!(
170+
min: -1.0,
171+
max: 1.0,
172+
limit: one_ulp * 8.0,
173+
scalar_fn: |x : f32| x.asin(),
174+
vector_fn: |x : f32x4| x.asin(),
175+
scalar_type: f32,
176+
vector_type: f32x4,
177+
);
178+
}
179+
180+
#[test]
181+
fn atan_f32() {
182+
use core_simd::f32x4;
183+
use crate::StdLibm;
184+
185+
let one_ulp = (2.0_f32).powi(-23);
186+
187+
test_range!(
188+
min: -1.0,
189+
max: 1.0,
190+
limit: one_ulp * 8.0,
191+
scalar_fn: |x : f32| x.atan(),
192+
vector_fn: |x : f32x4| x.atan(),
193+
scalar_type: f32,
194+
vector_type: f32x4,
195+
);
196+
197+
test_range!(
198+
min: -1.0,
199+
max: 1.0,
200+
limit: one_ulp * 8.0,
201+
scalar_fn: |x : f32| x.recip().atan(),
202+
vector_fn: |x : f32x4| x.recip().atan(),
203+
scalar_type: f32,
204+
vector_type: f32x4,
205+
);
206+
}

0 commit comments

Comments
 (0)