You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
let b = (($max) - ($min))*(1.0 / NUM_ITERas $scalar_type);
329
-
let a = $min;
330
-
let sf = $scalar_fn;
331
-
let vf = $vector_fn;
332
-
for i in (0..NUM_ITER / 4){
333
-
let fi = (i *4)as $scalar_type;
334
-
let x = <$vector_type>::from_array([
335
-
(fi + 0.0)* b + a,
336
-
(fi + 1.0)* b + a,
337
-
(fi + 2.0)* b + a,
338
-
(fi + 3.0)* b + a,
339
-
]);
340
-
let yref = <$vector_type>::from_array([sf(x[0]), sf(x[1]), sf(x[2]), sf(x[3])]);
341
-
let y = vf(x);
342
-
let e = (y - yref);
343
-
if !(e.abs().lanes_le(limit)).all(){
344
-
panic!("\nx ={:20.16?}\ne ={:20.16?}\nlimit ={:20.16?}\nvector={:20.16?}\nscalar={:20.16?}\nvector_fn={}", x, e, limit, y, yref, stringify!($vector_fn));
345
-
}
346
-
}
347
-
}};
348
-
}
349
-
350
-
#[test]
351
-
fnsin_f32(){
352
-
use core::f32::consts::PI;
353
-
let one_ulp = (2.0_f32).powi(-23);
354
-
355
-
test_range!(
356
-
min: -PI/4.0,
357
-
max:PI/4.0,
358
-
limit: one_ulp *1.0,
359
-
scalar_fn: |x :f32| x.sin(),
360
-
vector_fn: |x : f32x4| x.sin(),
361
-
scalar_type:f32,
362
-
vector_type: f32x4,
363
-
);
364
-
365
-
test_range!(
366
-
min: -PI/2.0,
367
-
max:PI/2.0,
368
-
limit: one_ulp *2.0,
369
-
scalar_fn: |x :f32| x.sin(),
370
-
vector_fn: |x : f32x4| x.sin(),
371
-
scalar_type:f32,
372
-
vector_type: f32x4,
373
-
);
374
-
375
-
test_range!(
376
-
min: -PI,
377
-
max:PI,
378
-
limit: one_ulp *8.0,
379
-
scalar_fn: |x :f32| x.sin(),
380
-
vector_fn: |x : f32x4| x.sin(),
381
-
scalar_type:f32,
382
-
vector_type: f32x4,
383
-
);
384
-
}
385
-
386
-
#[test]
387
-
fncos_f32(){
388
-
use core::f32::consts::PI;
389
-
let one_ulp = (2.0_f32).powi(-23);
390
-
391
-
// In the range +/- pi/4 the input has 1 ulp of error.
392
-
test_range!(
393
-
min: -PI/4.0,
394
-
max:PI/4.0,
395
-
limit: one_ulp *1.0,
396
-
scalar_fn: |x :f32| x.cos(),
397
-
vector_fn: |x : f32x4| x.cos(),
398
-
scalar_type:f32,
399
-
vector_type: f32x4,
400
-
);
401
-
402
-
// In the range +/- pi/2 the input and output has 2 ulp of error.
403
-
test_range!(
404
-
min: -PI/2.0,
405
-
max:PI/2.0,
406
-
limit: one_ulp *2.0,
407
-
scalar_fn: |x :f32| x.cos(),
408
-
vector_fn: |x : f32x4| x.cos(),
409
-
scalar_type:f32,
410
-
vector_type: f32x4,
411
-
);
412
-
413
-
// In the range +/- pi the input has 4 ulp of error and the output has 5.
414
-
// Note that the scalar cos also has this error but the implementation
415
-
// is different.
416
-
test_range!(
417
-
min: -PI,
418
-
max:PI,
419
-
limit: one_ulp *8.0,
420
-
scalar_fn: |x :f32| x.cos(),
421
-
vector_fn: |x : f32x4| x.cos(),
422
-
scalar_type:f32,
423
-
vector_type: f32x4,
424
-
);
425
-
}
426
-
427
-
#[test]
428
-
fntan_f32(){
429
-
use core::f32::consts::PI;
430
-
let one_ulp = (2.0_f32).powi(-23);
431
-
432
-
// For the outsides, reciprocal accuracy is important.
433
-
// Note that the vector function correctly gets -inf for -PI/2
434
-
// but the scalar function does not.
435
-
test_range!(
436
-
min: -PI/2.0 + 0.00001,
437
-
max: -PI/4.0,
438
-
limit: one_ulp *3.0,
439
-
scalar_fn: |x :f32| x.tan().recip(),
440
-
vector_fn: |x : f32x4| x.tan().recip(),
441
-
scalar_type:f32,
442
-
vector_type: f32x4,
443
-
);
444
-
445
-
// For the insides, absolute accuracy is important.
let b = (($max) - ($min))*(1.0 / NUM_ITERas $scalar_type);
15
+
let a = $min;
16
+
let sf = $scalar_fn;
17
+
let vf = $vector_fn;
18
+
for i in (0..NUM_ITER / 4){
19
+
let fi = (i *4)as $scalar_type;
20
+
let x = <$vector_type>::from_array([
21
+
(fi + 0.0)* b + a,
22
+
(fi + 1.0)* b + a,
23
+
(fi + 2.0)* b + a,
24
+
(fi + 3.0)* b + a,
25
+
]);
26
+
let yref = <$vector_type>::from_array([sf(x[0]), sf(x[1]), sf(x[2]), sf(x[3])]);
27
+
let y = vf(x);
28
+
let e = (y - yref);
29
+
if !(e.abs().lanes_le(limit)).all(){
30
+
panic!("\nx ={:20.16?}\ne ={:20.16?}\nlimit ={:20.16?}\nvector={:20.16?}\nscalar={:20.16?}\nvector_fn={}", x, e, limit, y, yref, stringify!($vector_fn));
31
+
}
32
+
}
33
+
}};
34
+
}
35
+
36
+
#[test]
37
+
fnsin_f32(){
38
+
use core::f32::consts::PI;
39
+
use core_simd::f32x4;
40
+
usecrate::StdLibm;
41
+
42
+
let one_ulp = (2.0_f32).powi(-23);
43
+
44
+
test_range!(
45
+
min: -PI/4.0,
46
+
max:PI/4.0,
47
+
limit: one_ulp *1.0,
48
+
scalar_fn: |x :f32| x.sin(),
49
+
vector_fn: |x : f32x4| x.sin(),
50
+
scalar_type:f32,
51
+
vector_type: f32x4,
52
+
);
53
+
54
+
test_range!(
55
+
min: -PI/2.0,
56
+
max:PI/2.0,
57
+
limit: one_ulp *2.0,
58
+
scalar_fn: |x :f32| x.sin(),
59
+
vector_fn: |x : f32x4| x.sin(),
60
+
scalar_type:f32,
61
+
vector_type: f32x4,
62
+
);
63
+
64
+
test_range!(
65
+
min: -PI,
66
+
max:PI,
67
+
limit: one_ulp *8.0,
68
+
scalar_fn: |x :f32| x.sin(),
69
+
vector_fn: |x : f32x4| x.sin(),
70
+
scalar_type:f32,
71
+
vector_type: f32x4,
72
+
);
73
+
}
74
+
75
+
#[test]
76
+
fncos_f32(){
77
+
use core::f32::consts::PI;
78
+
use core_simd::f32x4;
79
+
usecrate::StdLibm;
80
+
81
+
let one_ulp = (2.0_f32).powi(-23);
82
+
83
+
// In the range +/- pi/4 the input has 1 ulp of error.
84
+
test_range!(
85
+
min: -PI/4.0,
86
+
max:PI/4.0,
87
+
limit: one_ulp *1.0,
88
+
scalar_fn: |x :f32| x.cos(),
89
+
vector_fn: |x : f32x4| x.cos(),
90
+
scalar_type:f32,
91
+
vector_type: f32x4,
92
+
);
93
+
94
+
// In the range +/- pi/2 the input and output has 2 ulp of error.
95
+
test_range!(
96
+
min: -PI/2.0,
97
+
max:PI/2.0,
98
+
limit: one_ulp *2.0,
99
+
scalar_fn: |x :f32| x.cos(),
100
+
vector_fn: |x : f32x4| x.cos(),
101
+
scalar_type:f32,
102
+
vector_type: f32x4,
103
+
);
104
+
105
+
// In the range +/- pi the input has 4 ulp of error and the output has 5.
106
+
// Note that the scalar cos also has this error but the implementation
107
+
// is different.
108
+
test_range!(
109
+
min: -PI,
110
+
max:PI,
111
+
limit: one_ulp *8.0,
112
+
scalar_fn: |x :f32| x.cos(),
113
+
vector_fn: |x : f32x4| x.cos(),
114
+
scalar_type:f32,
115
+
vector_type: f32x4,
116
+
);
117
+
}
118
+
119
+
#[test]
120
+
fntan_f32(){
121
+
use core::f32::consts::PI;
122
+
use core_simd::f32x4;
123
+
usecrate::StdLibm;
124
+
125
+
let one_ulp = (2.0_f32).powi(-23);
126
+
127
+
// For the outsides, reciprocal accuracy is important.
128
+
// Note that the vector function correctly gets -inf for -PI/2
129
+
// but the scalar function does not.
130
+
test_range!(
131
+
min: -PI/2.0 + 0.00001,
132
+
max: -PI/4.0,
133
+
limit: one_ulp *3.0,
134
+
scalar_fn: |x :f32| x.tan().recip(),
135
+
vector_fn: |x : f32x4| x.tan().recip(),
136
+
scalar_type:f32,
137
+
vector_type: f32x4,
138
+
);
139
+
140
+
// For the insides, absolute accuracy is important.
0 commit comments