Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 8e3dbcf

Browse files
author
andy-thomason
committedJan 27, 2022
Fully auto-generated trig functions.
1 parent 8db3885 commit 8e3dbcf

File tree

3 files changed

+277
-288
lines changed

3 files changed

+277
-288
lines changed
 

‎crates/std_float/src/lib.rs

+6-288
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ use core_simd::simd;
1111

1212
use simd::{LaneCount, Simd, SupportedLaneCount};
1313

14+
mod libm32;
15+
#[cfg(test)]
16+
mod test_libm32;
17+
1418
#[cfg(feature = "as_crate")]
1519
mod experimental {
1620
pub trait Sealed {}
@@ -115,7 +119,9 @@ pub trait StdFloat: Sealed + Sized {
115119
/// Returns the floating point's fractional value, with its integer part removed.
116120
#[must_use = "method returns a new vector and does not mutate the original value"]
117121
fn fract(self) -> Self;
122+
}
118123

124+
pub trait StdLibm : StdFloat {
119125
fn sin(self) -> Self;
120126

121127
fn cos(self) -> Self;
@@ -143,112 +149,6 @@ where
143149
fn fract(self) -> Self {
144150
self - self.trunc()
145151
}
146-
147-
/// Calculate the sine of the angle
148-
/// Note: this is hand-edited from generated scalar code.
149-
/// In an ideal world, we would generate this directly by code transformation.
150-
#[inline]
151-
fn sin(self) -> Self {
152-
#[allow(non_snake_case)]
153-
let RECIP_2PI = Self::splat(0.15915494);
154-
155-
let scaled = self * RECIP_2PI;
156-
let x = scaled - scaled.round();
157-
Self::splat(-12.26885994095919635608)
158-
.mul_add(x * x, Self::splat(41.21624105096574396575))
159-
.mul_add(x * x, Self::splat(-76.58672703333290836700))
160-
.mul_add(x * x, Self::splat(81.59746095374827019356))
161-
.mul_add(x * x, Self::splat(-41.34151143437582891705))
162-
.mul_add(x * x, Self::splat(6.28318452581127506328))
163-
* x
164-
}
165-
166-
fn cos(self) -> Self {
167-
#[allow(non_snake_case)]
168-
let RECIP_2PI = Self::splat(0.15915494);
169-
170-
let scaled = self * RECIP_2PI;
171-
let x = scaled - scaled.round();
172-
Self::splat(6.52865816174499269880)
173-
.mul_add(x * x, Self::splat(-25.97327546890330396608))
174-
.mul_add(x * x, Self::splat(60.17118230812820383560))
175-
.mul_add(x * x, Self::splat(-85.45091743827674607508))
176-
.mul_add(x * x, Self::splat(64.93918704099473042873))
177-
.mul_add(x * x, Self::splat(-19.73920667935656472596))
178-
.mul_add(x * x, Self::splat(1.00000000000000000000))
179-
}
180-
181-
fn tan(self) -> Self {
182-
use core::f32::consts::PI;
183-
let scaled: Self = self * Self::splat(1.0 / PI);
184-
let x: Self = scaled - scaled.round();
185-
let recip: Self = (x * x - Self::splat(0.25)).recip();
186-
let y: Self = Self::splat(0.01439730036301634345)
187-
.mul_add(x * x, Self::splat(0.02101734538976238579))
188-
.mul_add(x * x, Self::splat(0.05285888255895108345))
189-
.mul_add(x * x, Self::splat(0.13475448281475060771))
190-
.mul_add(x * x, Self::splat(0.55773663386075044866))
191-
.mul_add(x * x, Self::splat(-0.78539816491781455948))
192-
* x;
193-
y * recip
194-
}
195-
196-
fn asin(self) -> Self {
197-
use core::f32::consts::PI;
198-
let lim: Self = Self::splat(0.9);
199-
let c: Self = self.lanes_lt(Self::splat(0.0)).select(Self::splat(-PI / 2.0), Self::splat(PI / 2.0));
200-
let s: Self = self.lanes_lt(Self::splat(0.0)).select(Self::splat(-1.0), Self::splat(1.0));
201-
let x: Self = (self * self).lanes_lt(lim * lim).select(self, (Self::splat(1.0) - self * self).sqrt());
202-
let y: Self = Self::splat(4374.97702992533695457424)
203-
.mul_add(x * x, Self::splat(-13781.55764426881951685974))
204-
.mul_add(x * x, Self::splat(17105.69475701115952774357))
205-
.mul_add(x * x, Self::splat(-10486.64894150265898388567))
206-
.mul_add(x * x, Self::splat(3231.76028705607279348342))
207-
.mul_add(x * x, Self::splat(-447.56480696327035255708))
208-
.mul_add(x * x, Self::splat(21.78206149264184872939))
209-
.mul_add(x * x, Self::splat(0.84158415752395745675))
210-
* x;
211-
(self * self).lanes_lt(lim * lim).select(y, c - y * s)
212-
}
213-
214-
fn acos(self) -> Self {
215-
use core::f32::consts::PI;
216-
let lim: Self = Self::splat(0.9);
217-
let c: Self = self.lanes_lt(Self::splat(0.0)).select(Self::splat(PI), Self::splat(0.0));
218-
let s: Self = self.lanes_lt(Self::splat(0.0)).select(Self::splat(1.0), Self::splat(-1.0));
219-
let x: Self = (self * self).lanes_lt(lim * lim).select(self, (Self::splat(1.0) - self * self).sqrt());
220-
// let c: Self = select(self < 0.0, PI, 0.0);
221-
// let s: Self = select(self < 0.0, 1.0, -1.0);
222-
// let x: Self = select(self * self < lim * lim, self, (1.0 - self * self).sqrt());
223-
let y: Self = Self::splat(4374.97702992533695457424)
224-
.mul_add(x * x, Self::splat(-13781.55764426881951685974))
225-
.mul_add(x * x, Self::splat(17105.69475701115952774357))
226-
.mul_add(x * x, Self::splat(-10486.64894150265898388567))
227-
.mul_add(x * x, Self::splat(3231.76028705607279348342))
228-
.mul_add(x * x, Self::splat(-447.56480696327035255708))
229-
.mul_add(x * x, Self::splat(21.78206149264184872939))
230-
.mul_add(x * x, Self::splat(0.84158415752395745675))
231-
* x;
232-
(self * self).lanes_lt(lim * lim).select(y, c - y * s)
233-
}
234-
235-
fn atan(self) -> Self {
236-
use core::f32::consts::PI;
237-
let lim: Self = Self::splat(1.0);
238-
let c: Self = self.lanes_lt(Self::splat(0.0)).select(Self::splat(-PI / 2.0), Self::splat(PI / 2.0));
239-
let small = self.abs().lanes_lt(lim);
240-
let x: Self = small.select(self, self.recip());
241-
let y: Self = Self::splat(95.70126383842530559360)
242-
.mul_add(x * x, Self::splat(424.99907022806059540464))
243-
.mul_add(x * x, Self::splat(-767.48259680040570156003))
244-
.mul_add(x * x, Self::splat(714.51953012224223415829))
245-
.mul_add(x * x, Self::splat(-354.32654395426962592865))
246-
.mul_add(x * x, Self::splat(83.96179897148539189638))
247-
.mul_add(x * x, Self::splat(-6.23958170715441509270))
248-
.mul_add(x * x, Self::splat(1.05498514186427524914))
249-
* x;
250-
small.select(y, c - y)
251-
}
252152
}
253153

254154
impl<const N: usize> StdFloat for Simd<f64, N>
@@ -261,36 +161,6 @@ where
261161
fn fract(self) -> Self {
262162
self - self.trunc()
263163
}
264-
265-
#[inline]
266-
fn sin(self) -> Self {
267-
self
268-
}
269-
270-
#[inline]
271-
fn cos(self) -> Self {
272-
self
273-
}
274-
275-
#[inline]
276-
fn tan(self) -> Self {
277-
self
278-
}
279-
280-
#[inline]
281-
fn asin(self) -> Self {
282-
self
283-
}
284-
285-
#[inline]
286-
fn acos(self) -> Self {
287-
self
288-
}
289-
290-
#[inline]
291-
fn atan(self) -> Self {
292-
self
293-
}
294164
}
295165

296166
#[cfg(test)]
@@ -311,156 +181,4 @@ mod tests {
311181
let _ = x2.abs() * x2;
312182
let _ = x.sin();
313183
}
314-
315-
const NUM_ITER: usize = 0x10000;
316-
317-
macro_rules! test_range {
318-
(
319-
min: $min: expr,
320-
max: $max: expr,
321-
limit: $limit: expr,
322-
scalar_fn: $scalar_fn: expr,
323-
vector_fn: $vector_fn: expr,
324-
scalar_type: $scalar_type: ty,
325-
vector_type: $vector_type: ty,
326-
) => {{
327-
let limit = <$vector_type>::splat($limit);
328-
let b = (($max) - ($min)) * (1.0 / NUM_ITER as $scalar_type);
329-
let a = $min;
330-
let sf = $scalar_fn;
331-
let vf = $vector_fn;
332-
for i in (0..NUM_ITER / 4) {
333-
let fi = (i * 4) as $scalar_type;
334-
let x = <$vector_type>::from_array([
335-
(fi + 0.0) * b + a,
336-
(fi + 1.0) * b + a,
337-
(fi + 2.0) * b + a,
338-
(fi + 3.0) * b + a,
339-
]);
340-
let yref = <$vector_type>::from_array([sf(x[0]), sf(x[1]), sf(x[2]), sf(x[3])]);
341-
let y = vf(x);
342-
let e = (y - yref);
343-
if !(e.abs().lanes_le(limit)).all() {
344-
panic!("\nx ={:20.16?}\ne ={:20.16?}\nlimit ={:20.16?}\nvector={:20.16?}\nscalar={:20.16?}\nvector_fn={}", x, e, limit, y, yref, stringify!($vector_fn));
345-
}
346-
}
347-
}};
348-
}
349-
350-
#[test]
351-
fn sin_f32() {
352-
use core::f32::consts::PI;
353-
let one_ulp = (2.0_f32).powi(-23);
354-
355-
test_range!(
356-
min: -PI/4.0,
357-
max: PI/4.0,
358-
limit: one_ulp * 1.0,
359-
scalar_fn: |x : f32| x.sin(),
360-
vector_fn: |x : f32x4| x.sin(),
361-
scalar_type: f32,
362-
vector_type: f32x4,
363-
);
364-
365-
test_range!(
366-
min: -PI/2.0,
367-
max: PI/2.0,
368-
limit: one_ulp * 2.0,
369-
scalar_fn: |x : f32| x.sin(),
370-
vector_fn: |x : f32x4| x.sin(),
371-
scalar_type: f32,
372-
vector_type: f32x4,
373-
);
374-
375-
test_range!(
376-
min: -PI,
377-
max: PI,
378-
limit: one_ulp * 8.0,
379-
scalar_fn: |x : f32| x.sin(),
380-
vector_fn: |x : f32x4| x.sin(),
381-
scalar_type: f32,
382-
vector_type: f32x4,
383-
);
384-
}
385-
386-
#[test]
387-
fn cos_f32() {
388-
use core::f32::consts::PI;
389-
let one_ulp = (2.0_f32).powi(-23);
390-
391-
// In the range +/- pi/4 the input has 1 ulp of error.
392-
test_range!(
393-
min: -PI/4.0,
394-
max: PI/4.0,
395-
limit: one_ulp * 1.0,
396-
scalar_fn: |x : f32| x.cos(),
397-
vector_fn: |x : f32x4| x.cos(),
398-
scalar_type: f32,
399-
vector_type: f32x4,
400-
);
401-
402-
// In the range +/- pi/2 the input and output has 2 ulp of error.
403-
test_range!(
404-
min: -PI/2.0,
405-
max: PI/2.0,
406-
limit: one_ulp * 2.0,
407-
scalar_fn: |x : f32| x.cos(),
408-
vector_fn: |x : f32x4| x.cos(),
409-
scalar_type: f32,
410-
vector_type: f32x4,
411-
);
412-
413-
// In the range +/- pi the input has 4 ulp of error and the output has 5.
414-
// Note that the scalar cos also has this error but the implementation
415-
// is different.
416-
test_range!(
417-
min: -PI,
418-
max: PI,
419-
limit: one_ulp * 8.0,
420-
scalar_fn: |x : f32| x.cos(),
421-
vector_fn: |x : f32x4| x.cos(),
422-
scalar_type: f32,
423-
vector_type: f32x4,
424-
);
425-
}
426-
427-
#[test]
428-
fn tan_f32() {
429-
use core::f32::consts::PI;
430-
let one_ulp = (2.0_f32).powi(-23);
431-
432-
// For the outsides, reciprocal accuracy is important.
433-
// Note that the vector function correctly gets -inf for -PI/2
434-
// but the scalar function does not.
435-
test_range!(
436-
min: -PI/2.0 + 0.00001,
437-
max: -PI/4.0,
438-
limit: one_ulp * 3.0,
439-
scalar_fn: |x : f32| x.tan().recip(),
440-
vector_fn: |x : f32x4| x.tan().recip(),
441-
scalar_type: f32,
442-
vector_type: f32x4,
443-
);
444-
445-
// For the insides, absolute accuracy is important.
446-
test_range!(
447-
min: -PI/4.0,
448-
max: PI/4.0,
449-
limit: one_ulp * 2.0,
450-
scalar_fn: |x : f32| x.tan(),
451-
vector_fn: |x : f32x4| x.tan(),
452-
scalar_type: f32,
453-
vector_type: f32x4,
454-
);
455-
456-
test_range!(
457-
min: PI/4.0,
458-
max: PI/2.0 - 0.00001,
459-
limit: one_ulp * 3.0,
460-
scalar_fn: |x : f32| x.tan().recip(),
461-
vector_fn: |x : f32x4| x.tan().recip(),
462-
scalar_type: f32,
463-
vector_type: f32x4,
464-
);
465-
}
466184
}

‎crates/std_float/src/libm32.rs

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#![allow(non_snake_case)]
2+
use super::StdLibm;
3+
4+
use super::StdFloat;
5+
6+
use super::simd::{LaneCount, Simd, SupportedLaneCount};
7+
8+
impl<const N: usize> StdLibm for Simd<f32, N>
9+
where
10+
LaneCount<N>: SupportedLaneCount,
11+
{
12+
#[inline]
13+
fn asin(self) -> Self {
14+
let PI_BY_2 = Self::splat(1.5707964f32);
15+
let LIM: Self = Self::splat(0.9f32);
16+
let c: Self = ((self).lanes_lt(Self::splat(0f32))).select(-PI_BY_2, PI_BY_2);
17+
let s: Self =
18+
((self).lanes_lt(Self::splat(0f32))).select(-Self::splat(1f32), Self::splat(1f32));
19+
let x: Self = ((self * self).lanes_lt(LIM * LIM))
20+
.select(self, (Self::splat(1f32) - self * self).sqrt());
21+
let y: Self = (Self::splat(4374.977f32))
22+
.mul_add(x * x, -Self::splat(13781.558f32))
23+
.mul_add(x * x, Self::splat(17105.695f32))
24+
.mul_add(x * x, -Self::splat(10486.649f32))
25+
.mul_add(x * x, Self::splat(3231.7603f32))
26+
.mul_add(x * x, -Self::splat(447.56482f32))
27+
.mul_add(x * x, Self::splat(21.78206f32))
28+
.mul_add(x * x, Self::splat(0.84158415f32))
29+
* x;
30+
((self * self).lanes_lt(LIM * LIM)).select(y, c - y * s)
31+
}
32+
#[inline]
33+
fn acos(self) -> Self {
34+
let PI_BY_2 = Self::splat(1.5707964f32);
35+
let PI = Self::splat(3.1415927f32);
36+
let LIM: Self = Self::splat(0.9f32);
37+
let c: Self = ((self).lanes_lt(Self::splat(0f32))).select(PI, Self::splat(0f32));
38+
let s: Self =
39+
((self).lanes_lt(Self::splat(0f32))).select(Self::splat(1f32), -Self::splat(1f32));
40+
let x: Self = ((self * self).lanes_lt(LIM * LIM))
41+
.select(self, (Self::splat(1f32) - self * self).sqrt());
42+
let y: Self = (Self::splat(4374.977f32))
43+
.mul_add(x * x, -Self::splat(13781.558f32))
44+
.mul_add(x * x, Self::splat(17105.695f32))
45+
.mul_add(x * x, -Self::splat(10486.649f32))
46+
.mul_add(x * x, Self::splat(3231.7603f32))
47+
.mul_add(x * x, -Self::splat(447.56482f32))
48+
.mul_add(x * x, Self::splat(21.78206f32))
49+
.mul_add(x * x, Self::splat(0.84158415f32))
50+
* x;
51+
((self * self).lanes_lt(LIM * LIM)).select(PI_BY_2 - y, c - y * s)
52+
}
53+
#[inline]
54+
fn atan(self) -> Self {
55+
let PI_BY_2 = Self::splat(1.5707964f32);
56+
let LIM: Self = Self::splat(1f32);
57+
let c: Self = ((self).lanes_lt(Self::splat(0f32))).select(-PI_BY_2, PI_BY_2);
58+
let x: Self = ((self.abs()).lanes_lt(LIM)).select(self, self.recip());
59+
let y: Self = (-Self::splat(95.70126f32))
60+
.mul_add(x * x, Self::splat(424.99908f32))
61+
.mul_add(x * x, -Self::splat(767.4826f32))
62+
.mul_add(x * x, Self::splat(714.51953f32))
63+
.mul_add(x * x, -Self::splat(354.32654f32))
64+
.mul_add(x * x, Self::splat(83.9618f32))
65+
.mul_add(x * x, -Self::splat(6.2395816f32))
66+
.mul_add(x * x, Self::splat(1.0549852f32))
67+
* x;
68+
((self.abs()).lanes_lt(LIM)).select(y, c - y)
69+
}
70+
#[inline]
71+
fn sin(self) -> Self {
72+
let RECIP_2PI = Self::splat(0.15915494f32);
73+
let scaled: Self = self * RECIP_2PI;
74+
let x: Self = scaled - scaled.round();
75+
(-Self::splat(12.26886f32))
76+
.mul_add(x * x, Self::splat(41.21624f32))
77+
.mul_add(x * x, -Self::splat(76.58672f32))
78+
.mul_add(x * x, Self::splat(81.59746f32))
79+
.mul_add(x * x, -Self::splat(41.34151f32))
80+
.mul_add(x * x, Self::splat(6.2831845f32))
81+
* x
82+
}
83+
#[inline]
84+
fn cos(self) -> Self {
85+
let RECIP_2PI = Self::splat(0.15915494f32);
86+
let scaled: Self = self * RECIP_2PI;
87+
let x: Self = scaled - scaled.round();
88+
(Self::splat(6.5286584f32))
89+
.mul_add(x * x, -Self::splat(25.973276f32))
90+
.mul_add(x * x, Self::splat(60.17118f32))
91+
.mul_add(x * x, -Self::splat(85.45092f32))
92+
.mul_add(x * x, Self::splat(64.939186f32))
93+
.mul_add(x * x, -Self::splat(19.739206f32))
94+
.mul_add(x * x, Self::splat(1f32))
95+
}
96+
#[inline]
97+
fn tan(self) -> Self {
98+
let RECIP_PI = Self::splat(0.31830987f32);
99+
let scaled: Self = self * RECIP_PI;
100+
let x: Self = scaled - scaled.round();
101+
let recip: Self = Self::splat(1f32) / (x * x - Self::splat(0.25f32));
102+
let y: Self = (Self::splat(0.014397301f32))
103+
.mul_add(x * x, Self::splat(0.021017345f32))
104+
.mul_add(x * x, Self::splat(0.05285888f32))
105+
.mul_add(x * x, Self::splat(0.13475448f32))
106+
.mul_add(x * x, Self::splat(0.55773664f32))
107+
.mul_add(x * x, -Self::splat(0.7853982f32))
108+
* x;
109+
y * recip
110+
}
111+
}

‎crates/std_float/src/test_libm32.rs

+160
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
const NUM_ITER: usize = 0x10000;
2+
3+
macro_rules! test_range {
4+
(
5+
min: $min: expr,
6+
max: $max: expr,
7+
limit: $limit: expr,
8+
scalar_fn: $scalar_fn: expr,
9+
vector_fn: $vector_fn: expr,
10+
scalar_type: $scalar_type: ty,
11+
vector_type: $vector_type: ty,
12+
) => {{
13+
let limit = <$vector_type>::splat($limit);
14+
let b = (($max) - ($min)) * (1.0 / NUM_ITER as $scalar_type);
15+
let a = $min;
16+
let sf = $scalar_fn;
17+
let vf = $vector_fn;
18+
for i in (0..NUM_ITER / 4) {
19+
let fi = (i * 4) as $scalar_type;
20+
let x = <$vector_type>::from_array([
21+
(fi + 0.0) * b + a,
22+
(fi + 1.0) * b + a,
23+
(fi + 2.0) * b + a,
24+
(fi + 3.0) * b + a,
25+
]);
26+
let yref = <$vector_type>::from_array([sf(x[0]), sf(x[1]), sf(x[2]), sf(x[3])]);
27+
let y = vf(x);
28+
let e = (y - yref);
29+
if !(e.abs().lanes_le(limit)).all() {
30+
panic!("\nx ={:20.16?}\ne ={:20.16?}\nlimit ={:20.16?}\nvector={:20.16?}\nscalar={:20.16?}\nvector_fn={}", x, e, limit, y, yref, stringify!($vector_fn));
31+
}
32+
}
33+
}};
34+
}
35+
36+
#[test]
37+
fn sin_f32() {
38+
use core::f32::consts::PI;
39+
use core_simd::f32x4;
40+
use crate::StdLibm;
41+
42+
let one_ulp = (2.0_f32).powi(-23);
43+
44+
test_range!(
45+
min: -PI/4.0,
46+
max: PI/4.0,
47+
limit: one_ulp * 1.0,
48+
scalar_fn: |x : f32| x.sin(),
49+
vector_fn: |x : f32x4| x.sin(),
50+
scalar_type: f32,
51+
vector_type: f32x4,
52+
);
53+
54+
test_range!(
55+
min: -PI/2.0,
56+
max: PI/2.0,
57+
limit: one_ulp * 2.0,
58+
scalar_fn: |x : f32| x.sin(),
59+
vector_fn: |x : f32x4| x.sin(),
60+
scalar_type: f32,
61+
vector_type: f32x4,
62+
);
63+
64+
test_range!(
65+
min: -PI,
66+
max: PI,
67+
limit: one_ulp * 8.0,
68+
scalar_fn: |x : f32| x.sin(),
69+
vector_fn: |x : f32x4| x.sin(),
70+
scalar_type: f32,
71+
vector_type: f32x4,
72+
);
73+
}
74+
75+
#[test]
76+
fn cos_f32() {
77+
use core::f32::consts::PI;
78+
use core_simd::f32x4;
79+
use crate::StdLibm;
80+
81+
let one_ulp = (2.0_f32).powi(-23);
82+
83+
// In the range +/- pi/4 the input has 1 ulp of error.
84+
test_range!(
85+
min: -PI/4.0,
86+
max: PI/4.0,
87+
limit: one_ulp * 1.0,
88+
scalar_fn: |x : f32| x.cos(),
89+
vector_fn: |x : f32x4| x.cos(),
90+
scalar_type: f32,
91+
vector_type: f32x4,
92+
);
93+
94+
// In the range +/- pi/2 the input and output has 2 ulp of error.
95+
test_range!(
96+
min: -PI/2.0,
97+
max: PI/2.0,
98+
limit: one_ulp * 2.0,
99+
scalar_fn: |x : f32| x.cos(),
100+
vector_fn: |x : f32x4| x.cos(),
101+
scalar_type: f32,
102+
vector_type: f32x4,
103+
);
104+
105+
// In the range +/- pi the input has 4 ulp of error and the output has 5.
106+
// Note that the scalar cos also has this error but the implementation
107+
// is different.
108+
test_range!(
109+
min: -PI,
110+
max: PI,
111+
limit: one_ulp * 8.0,
112+
scalar_fn: |x : f32| x.cos(),
113+
vector_fn: |x : f32x4| x.cos(),
114+
scalar_type: f32,
115+
vector_type: f32x4,
116+
);
117+
}
118+
119+
#[test]
120+
fn tan_f32() {
121+
use core::f32::consts::PI;
122+
use core_simd::f32x4;
123+
use crate::StdLibm;
124+
125+
let one_ulp = (2.0_f32).powi(-23);
126+
127+
// For the outsides, reciprocal accuracy is important.
128+
// Note that the vector function correctly gets -inf for -PI/2
129+
// but the scalar function does not.
130+
test_range!(
131+
min: -PI/2.0 + 0.00001,
132+
max: -PI/4.0,
133+
limit: one_ulp * 3.0,
134+
scalar_fn: |x : f32| x.tan().recip(),
135+
vector_fn: |x : f32x4| x.tan().recip(),
136+
scalar_type: f32,
137+
vector_type: f32x4,
138+
);
139+
140+
// For the insides, absolute accuracy is important.
141+
test_range!(
142+
min: -PI/4.0,
143+
max: PI/4.0,
144+
limit: one_ulp * 2.0,
145+
scalar_fn: |x : f32| x.tan(),
146+
vector_fn: |x : f32x4| x.tan(),
147+
scalar_type: f32,
148+
vector_type: f32x4,
149+
);
150+
151+
test_range!(
152+
min: PI/4.0,
153+
max: PI/2.0 - 0.00001,
154+
limit: one_ulp * 3.0,
155+
scalar_fn: |x : f32| x.tan().recip(),
156+
vector_fn: |x : f32x4| x.tan().recip(),
157+
scalar_type: f32,
158+
vector_type: f32x4,
159+
);
160+
}

0 commit comments

Comments
 (0)
Please sign in to comment.