Skip to content

Commit e29bd9b

Browse files
author
andy-thomason
committed
Fix cast issues.
1 parent f6a47fa commit e29bd9b

File tree

1 file changed

+79
-49
lines changed

1 file changed

+79
-49
lines changed

crates/std_float/src/libm32.rs

+79-49
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,19 @@ where
1616
fn asin(self) -> Self {
1717
let PI_BY_2 = Self::splat(1.57079632679489661923);
1818
let arg = self;
19-
let LIM: Self = Self::splat(0.70710678118654752440);
20-
let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
21-
let s: Self =
22-
((arg).lanes_lt(Self::splat(0.0))).select(-Self::splat(1.0), Self::splat(1.0));
23-
let x: Self =
19+
let LIM = Self::splat(0.70710678118654752440);
20+
let c = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
21+
let s = ((arg).lanes_lt(Self::splat(0.0))).select(-Self::splat(1.0), Self::splat(1.0));
22+
let x =
2423
((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1.0) - arg * arg).sqrt());
25-
let y: Self = (Self::splat(0.11644821f32))
26-
.mul_add(x * x, Self::splat(0.04343228f32))
27-
.mul_add(x * x, Self::splat(0.17078044f32))
28-
.mul_add(x * x, Self::splat(0.99991643f32))
24+
let y = (Self::splat(0.12778643f32))
25+
.mul_add(x * x, -Self::splat(0.12145509f32))
26+
.mul_add(x * x, Self::splat(0.09684546f32))
27+
.mul_add(x * x, Self::splat(0.009571692f32))
28+
.mul_add(x * x, Self::splat(0.047712374f32))
29+
.mul_add(x * x, Self::splat(0.07478066f32))
30+
.mul_add(x * x, Self::splat(0.1666726f32))
31+
.mul_add(x * x, Self::splat(1f32))
2932
* x;
3033
((arg * arg).lanes_lt(LIM * LIM)).select(y, c - y * s)
3134
}
@@ -34,31 +37,30 @@ where
3437
let PI_BY_2 = Self::splat(1.57079632679489661923);
3538
let PI = Self::splat(3.14159265358979323846);
3639
let arg = self;
37-
let LIM: Self = Self::splat(0.9);
38-
let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(PI, Self::splat(0.0));
39-
let s: Self =
40-
((arg).lanes_lt(Self::splat(0.0))).select(Self::splat(1.0), -Self::splat(1.0));
41-
let x: Self =
40+
let LIM = Self::splat(0.70710678118654752440);
41+
let c = ((arg).lanes_lt(Self::splat(0.0))).select(PI, Self::splat(0.0));
42+
let s = ((arg).lanes_lt(Self::splat(0.0))).select(Self::splat(1.0), -Self::splat(1.0));
43+
let x =
4244
((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1.0) - arg * arg).sqrt());
43-
let y: Self = (Self::splat(1.3740137f32))
44-
.mul_add(x * x, -Self::splat(3.1993167f32))
45-
.mul_add(x * x, Self::splat(3.103398f32))
46-
.mul_add(x * x, -Self::splat(1.4533828f32))
47-
.mul_add(x * x, Self::splat(0.41395915f32))
48-
.mul_add(x * x, Self::splat(0.03113007f32))
49-
.mul_add(x * x, Self::splat(0.16861732f32))
50-
.mul_add(x * x, Self::splat(0.99998593f32))
45+
let y = (Self::splat(0.12778643f32))
46+
.mul_add(x * x, -Self::splat(0.12145509f32))
47+
.mul_add(x * x, Self::splat(0.09684546f32))
48+
.mul_add(x * x, Self::splat(0.009571692f32))
49+
.mul_add(x * x, Self::splat(0.047712374f32))
50+
.mul_add(x * x, Self::splat(0.07478066f32))
51+
.mul_add(x * x, Self::splat(0.1666726f32))
52+
.mul_add(x * x, Self::splat(1f32))
5153
* x;
5254
((arg * arg).lanes_lt(LIM * LIM)).select(PI_BY_2 - y, c - y * s)
5355
}
5456
#[inline]
5557
fn atan(self) -> Self {
5658
let PI_BY_2 = Self::splat(1.57079632679489661923);
5759
let arg = self;
58-
let LIM: Self = Self::splat(1.0);
59-
let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
60-
let x: Self = ((arg.abs()).lanes_lt(LIM)).select(arg, arg.recip());
61-
let y: Self = (-Self::splat(0.0039602574f32))
60+
let LIM = Self::splat(1.0);
61+
let c = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
62+
let x = ((arg.abs()).lanes_lt(LIM)).select(arg, arg.recip());
63+
let y = (-Self::splat(0.0039602574f32))
6264
.mul_add(x * x, Self::splat(0.021659138f32))
6365
.mul_add(x * x, -Self::splat(0.05587457f32))
6466
.mul_add(x * x, Self::splat(0.09664151f32))
@@ -74,16 +76,16 @@ where
7476
let PI_BY_2 = Self::splat(1.57079632679489661923);
7577
let PI = Self::splat(3.14159265358979323846);
7678
let y = self;
77-
let offset180: Self = ((y).lanes_lt(Self::splat(0.0))).select(-PI, PI);
78-
let x1: Self = ((x).lanes_lt(Self::splat(0.0))).select(-x, x);
79-
let y1: Self = ((x).lanes_lt(Self::splat(0.0))).select(-y, y);
80-
let offset1: Self = ((x).lanes_lt(Self::splat(0.0))).select(offset180, Self::splat(0.0));
81-
let offset90: Self = ((y).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
82-
let x2: Self = ((y1.abs()).lanes_gt(x1)).select(y1, x1);
83-
let y2: Self = ((y1.abs()).lanes_gt(x1)).select(-x1, y1);
84-
let offset2: Self = ((y1.abs()).lanes_gt(x1)).select(offset1 + offset90, offset1);
85-
let x3: Self = y2 / x2;
86-
let y3: Self = (-Self::splat(0.0039602574f32))
79+
let offset180 = ((y).lanes_lt(Self::splat(0.0))).select(-PI, PI);
80+
let x1 = ((x).lanes_lt(Self::splat(0.0))).select(-x, x);
81+
let y1 = ((x).lanes_lt(Self::splat(0.0))).select(-y, y);
82+
let offset1 = ((x).lanes_lt(Self::splat(0.0))).select(offset180, Self::splat(0.0));
83+
let offset90 = ((y).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2);
84+
let x2 = ((y1.abs()).lanes_gt(x1)).select(y1, x1);
85+
let y2 = ((y1.abs()).lanes_gt(x1)).select(-x1, y1);
86+
let offset2 = ((y1.abs()).lanes_gt(x1)).select(offset1 + offset90, offset1);
87+
let x3 = y2 / x2;
88+
let y3 = (-Self::splat(0.0039602574f32))
8789
.mul_add(x3 * x3, Self::splat(0.021659138f32))
8890
.mul_add(x3 * x3, -Self::splat(0.05587457f32))
8991
.mul_add(x3 * x3, Self::splat(0.09664151f32))
@@ -96,12 +98,12 @@ where
9698
}
9799
#[inline]
98100
fn exp2(self) -> Self {
101+
let EXP2_SCALE = Self::splat(8388608.0f32);
102+
let EXP2_ONE = Self::splat(1065353216.0f32);
99103
let arg = self;
100-
let r: Self = arg.round();
101-
let mul: Self = Self::from_bits(unsafe {
102-
(r.mul_add(Self::splat(8388608.0f32), Self::splat(1065353216.0f32))).to_uint_unchecked()
103-
});
104-
let x: Self = arg - r;
104+
let r = arg.round();
105+
let mul = Self::from_bits((r.mul_add(EXP2_SCALE, EXP2_ONE)).cast::<u32>());
106+
let x = arg - r;
105107
(Self::splat(0.000015310081f32))
106108
.mul_add(x, Self::splat(0.0001547802f32))
107109
.mul_add(x, Self::splat(0.0013333454f32))
@@ -119,11 +121,39 @@ where
119121
(arg * LOG2_E).exp2()
120122
}
121123
#[inline]
124+
fn log2(self) -> Self {
125+
let ONE_BITS = Self::UintType::splat(0x3f800000_u32);
126+
let ONE_MASK = Self::UintType::splat(0x007fffff_u32);
127+
let LOG2_OFFSET = Self::IntType::splat(127_i32);
128+
let LOG2_SHIFT = Self::IntType::splat(23_i32);
129+
let arg = self;
130+
let arg_bits = arg.to_bits();
131+
let exponent = (arg_bits.cast::<i32>() >> LOG2_SHIFT) - LOG2_OFFSET;
132+
let x = Self::from_bits((arg_bits & ONE_MASK) | ONE_BITS) - Self::splat(1.5);
133+
let y = (Self::splat(0.00033940058f32))
134+
.mul_add(x, -Self::splat(0.0005435155f32))
135+
.mul_add(x, Self::splat(0.00051382656f32))
136+
.mul_add(x, -Self::splat(0.0008369385f32))
137+
.mul_add(x, Self::splat(0.0015296092f32))
138+
.mul_add(x, -Self::splat(0.0025230509f32))
139+
.mul_add(x, Self::splat(0.0041680275f32))
140+
.mul_add(x, -Self::splat(0.007033716f32))
141+
.mul_add(x, Self::splat(0.012062632f32))
142+
.mul_add(x, -Self::splat(0.021109587f32))
143+
.mul_add(x, Self::splat(0.037996903f32))
144+
.mul_add(x, -Self::splat(0.071244195f32))
145+
.mul_add(x, Self::splat(0.1424884f32))
146+
.mul_add(x, -Self::splat(0.3205989f32))
147+
.mul_add(x, Self::splat(0.9617967f32))
148+
.mul_add(x, Self::splat(0.5849625f32));
149+
y + (exponent.cast::<f32>())
150+
}
151+
#[inline]
122152
fn sin(self) -> Self {
123153
let RECIP_2PI = Self::splat(0.15915494309189533577);
124154
let arg = self;
125-
let scaled: Self = arg * RECIP_2PI;
126-
let x: Self = scaled - scaled.round();
155+
let scaled = arg * RECIP_2PI;
156+
let x = scaled - scaled.round();
127157
(-Self::splat(12.26886f32))
128158
.mul_add(x * x, Self::splat(41.21624f32))
129159
.mul_add(x * x, -Self::splat(76.58672f32))
@@ -136,8 +166,8 @@ where
136166
fn cos(self) -> Self {
137167
let RECIP_2PI = Self::splat(0.15915494309189533577);
138168
let arg = self;
139-
let scaled: Self = arg * RECIP_2PI;
140-
let x: Self = scaled - scaled.round();
169+
let scaled = arg * RECIP_2PI;
170+
let x = scaled - scaled.round();
141171
(Self::splat(6.5286584f32))
142172
.mul_add(x * x, -Self::splat(25.973276f32))
143173
.mul_add(x * x, Self::splat(60.17118f32))
@@ -150,10 +180,10 @@ where
150180
fn tan(self) -> Self {
151181
let RECIP_PI = Self::splat(0.31830988618379067154);
152182
let arg = self;
153-
let scaled: Self = arg * RECIP_PI;
154-
let x: Self = scaled - scaled.round();
155-
let recip: Self = Self::splat(1.0) / (x * x - Self::splat(0.25));
156-
let y: Self = (Self::splat(0.014397301f32))
183+
let scaled = arg * RECIP_PI;
184+
let x = scaled - scaled.round();
185+
let recip = Self::splat(1.0) / (x * x - Self::splat(0.25));
186+
let y = (Self::splat(0.014397301f32))
157187
.mul_add(x * x, Self::splat(0.021017345f32))
158188
.mul_add(x * x, Self::splat(0.05285888f32))
159189
.mul_add(x * x, Self::splat(0.13475448f32))

0 commit comments

Comments
 (0)