Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit 370831d

Browse files
authored
Merge pull request #391 from rust-lang/tgross35/f16-f128-scalbn
Add `ldexpf16`, `ldexpf128`, `scalbnf16`, and `scalbnf128`
2 parents 41d2216 + e12dae1 commit 370831d

File tree

16 files changed

+200
-64
lines changed

16 files changed

+200
-64
lines changed

libm/crates/libm-macros/src/shared.rs

+14
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
134134
None,
135135
&["jn", "yn"],
136136
),
137+
(
138+
// `(f16, i32) -> f16`
139+
FloatTy::F16,
140+
Signature { args: &[Ty::F16, Ty::I32], returns: &[Ty::F16] },
141+
None,
142+
&["scalbnf16", "ldexpf16"],
143+
),
137144
(
138145
// `(f32, i32) -> f32`
139146
FloatTy::F32,
@@ -148,6 +155,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
148155
None,
149156
&["scalbn", "ldexp"],
150157
),
158+
(
159+
// `(f128, i32) -> f128`
160+
FloatTy::F128,
161+
Signature { args: &[Ty::F128, Ty::I32], returns: &[Ty::F128] },
162+
None,
163+
&["scalbnf128", "ldexpf128"],
164+
),
151165
(
152166
// `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)`
153167
FloatTy::F32,

libm/crates/libm-test/benches/icount.rs

+4
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ main!(
131131
icount_bench_jn_group,
132132
icount_bench_jnf_group,
133133
icount_bench_ldexp_group,
134+
icount_bench_ldexpf128_group,
135+
icount_bench_ldexpf16_group,
134136
icount_bench_ldexpf_group,
135137
icount_bench_lgamma_group,
136138
icount_bench_lgamma_r_group,
@@ -163,6 +165,8 @@ main!(
163165
icount_bench_roundf16_group,
164166
icount_bench_roundf_group,
165167
icount_bench_scalbn_group,
168+
icount_bench_scalbnf128_group,
169+
icount_bench_scalbnf16_group,
166170
icount_bench_scalbnf_group,
167171
icount_bench_sin_group,
168172
icount_bench_sinf_group,

libm/crates/libm-test/benches/random.rs

+4
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,14 @@ libm_macros::for_each_function! {
133133
| fminf16
134134
| fmodf128
135135
| fmodf16
136+
| ldexpf128
137+
| ldexpf16
136138
| rintf128
137139
| rintf16
138140
| roundf128
139141
| roundf16
142+
| scalbnf128
143+
| scalbnf16
140144
| sqrtf128
141145
| sqrtf16
142146
| truncf128

libm/crates/libm-test/src/mpfloat.rs

+33-28
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ libm_macros::for_each_function! {
159159
jnf,
160160
ldexp,
161161
ldexpf,
162+
ldexpf128,
163+
ldexpf16,
162164
lgamma_r,
163165
lgammaf_r,
164166
modf,
@@ -178,6 +180,8 @@ libm_macros::for_each_function! {
178180
roundf16,
179181
scalbn,
180182
scalbnf,
183+
scalbnf128,
184+
scalbnf16,
181185
sincos,sincosf,
182186
trunc,
183187
truncf,
@@ -351,34 +355,6 @@ macro_rules! impl_op_for_ty {
351355
}
352356
}
353357

354-
// `ldexp` and `scalbn` are the same for binary floating point, so just forward all
355-
// methods.
356-
impl MpOp for crate::op::[<ldexp $suffix>]::Routine {
357-
type MpTy = <crate::op::[<scalbn $suffix>]::Routine as MpOp>::MpTy;
358-
359-
fn new_mp() -> Self::MpTy {
360-
<crate::op::[<scalbn $suffix>]::Routine as MpOp>::new_mp()
361-
}
362-
363-
fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
364-
<crate::op::[<scalbn $suffix>]::Routine as MpOp>::run(this, input)
365-
}
366-
}
367-
368-
impl MpOp for crate::op::[<scalbn $suffix>]::Routine {
369-
type MpTy = MpFloat;
370-
371-
fn new_mp() -> Self::MpTy {
372-
new_mpfloat::<Self::FTy>()
373-
}
374-
375-
fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
376-
this.assign(input.0);
377-
*this <<= input.1;
378-
prep_retval::<Self::FTy>(this, Ordering::Equal)
379-
}
380-
}
381-
382358
impl MpOp for crate::op::[<sincos $suffix>]::Routine {
383359
type MpTy = (MpFloat, MpFloat);
384360

@@ -464,6 +440,35 @@ macro_rules! impl_op_for_ty_all {
464440
this.1.assign(input.1);
465441
let ord = this.0.rem_assign_round(&this.1, Nearest);
466442
prep_retval::<Self::RustRet>(&mut this.0, ord)
443+
444+
}
445+
}
446+
447+
// `ldexp` and `scalbn` are the same for binary floating point, so just forward all
448+
// methods.
449+
impl MpOp for crate::op::[<ldexp $suffix>]::Routine {
450+
type MpTy = <crate::op::[<scalbn $suffix>]::Routine as MpOp>::MpTy;
451+
452+
fn new_mp() -> Self::MpTy {
453+
<crate::op::[<scalbn $suffix>]::Routine as MpOp>::new_mp()
454+
}
455+
456+
fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
457+
<crate::op::[<scalbn $suffix>]::Routine as MpOp>::run(this, input)
458+
}
459+
}
460+
461+
impl MpOp for crate::op::[<scalbn $suffix>]::Routine {
462+
type MpTy = MpFloat;
463+
464+
fn new_mp() -> Self::MpTy {
465+
new_mpfloat::<Self::FTy>()
466+
}
467+
468+
fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
469+
this.assign(input.0);
470+
*this <<= input.1;
471+
prep_retval::<Self::FTy>(this, Ordering::Equal)
467472
}
468473
}
469474
}

libm/crates/libm-test/src/precision.rs

+4
Original file line numberDiff line numberDiff line change
@@ -551,8 +551,12 @@ fn int_float_common<F1: Float, F2: Float>(
551551
DEFAULT
552552
}
553553

554+
#[cfg(f16_enabled)]
555+
impl MaybeOverride<(f16, i32)> for SpecialCase {}
554556
impl MaybeOverride<(f32, i32)> for SpecialCase {}
555557
impl MaybeOverride<(f64, i32)> for SpecialCase {}
558+
#[cfg(f128_enabled)]
559+
impl MaybeOverride<(f128, i32)> for SpecialCase {}
556560

557561
impl MaybeOverride<(f32, f32, f32)> for SpecialCase {
558562
fn check_float<F: Float>(

libm/crates/libm-test/tests/compare_built_musl.rs

+4
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,14 @@ libm_macros::for_each_function! {
9595
fminf16,
9696
fmodf128,
9797
fmodf16,
98+
ldexpf128,
99+
ldexpf16,
98100
rintf128,
99101
rintf16,
100102
roundf128,
101103
roundf16,
104+
scalbnf128,
105+
scalbnf16,
102106
sqrtf128,
103107
sqrtf16,
104108
truncf128,

libm/crates/util/src/main.rs

+4
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,14 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
102102
| fminf16
103103
| fmodf128
104104
| fmodf16
105+
| ldexpf128
106+
| ldexpf16
105107
| rintf128
106108
| rintf16
107109
| roundf128
108110
| roundf16
111+
| scalbnf128
112+
| scalbnf16
109113
| sqrtf128
110114
| sqrtf16
111115
| truncf128

libm/etc/function-definitions.json

+26
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,18 @@
554554
],
555555
"type": "f32"
556556
},
557+
"ldexpf128": {
558+
"sources": [
559+
"src/math/ldexpf128.rs"
560+
],
561+
"type": "f128"
562+
},
563+
"ldexpf16": {
564+
"sources": [
565+
"src/math/ldexpf16.rs"
566+
],
567+
"type": "f16"
568+
},
557569
"lgamma": {
558570
"sources": [
559571
"src/libm_helper.rs",
@@ -774,6 +786,20 @@
774786
],
775787
"type": "f32"
776788
},
789+
"scalbnf128": {
790+
"sources": [
791+
"src/math/generic/scalbn.rs",
792+
"src/math/scalbnf128.rs"
793+
],
794+
"type": "f128"
795+
},
796+
"scalbnf16": {
797+
"sources": [
798+
"src/math/generic/scalbn.rs",
799+
"src/math/scalbnf16.rs"
800+
],
801+
"type": "f16"
802+
},
777803
"sin": {
778804
"sources": [
779805
"src/libm_helper.rs",

libm/etc/function-list.txt

+4
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ jn
7979
jnf
8080
ldexp
8181
ldexpf
82+
ldexpf128
83+
ldexpf16
8284
lgamma
8385
lgamma_r
8486
lgammaf
@@ -111,6 +113,8 @@ roundf128
111113
roundf16
112114
scalbn
113115
scalbnf
116+
scalbnf128
117+
scalbnf16
114118
sin
115119
sincos
116120
sincosf

libm/src/math/generic/scalbn.rs

+74-11
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,27 @@ where
3131
let exp_max: i32 = F::EXP_BIAS as i32;
3232
let exp_min = -(exp_max - 1);
3333

34-
// 2 ^ Emax, where Emax is the maximum biased exponent value (1023 for f64)
34+
// 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64)
3535
let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero);
3636

37-
// 2 ^ Emin, where Emin is the minimum biased exponent value (-1022 for f64)
37+
// 2 ^ Emin, minimum positive normal with null significand (0x1p-1022 for f64)
3838
let f_exp_min = F::from_parts(false, 1, zero);
3939

40-
// 2 ^ sig_total_bits, representation of what can be accounted for with subnormals
41-
let f_exp_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero);
40+
// 2 ^ sig_total_bits, moltiplier to normalize subnormals (0x1p53 for f64)
41+
let f_pow_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero);
42+
43+
/*
44+
* The goal is to multiply `x` by a scale factor that applies `n`. However, there are cases
45+
* where `2^n` is not representable by `F` but the result should be, e.g. `x = 2^Emin` with
46+
* `n = -EMin + 2` (one out of range of 2^Emax). To get around this, reduce the magnitude of
47+
* the final scale operation by prescaling by the max/min power representable by `F`.
48+
*/
4249

4350
if n > exp_max {
51+
// Worse case positive `n`: `x` is the minimum subnormal value, the result is `F::MAX`.
52+
// This can be reached by three scaling multiplications (two here and one final).
53+
debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= exp_max * 3);
54+
4455
x *= f_exp_max;
4556
n -= exp_max;
4657
if n > exp_max {
@@ -51,21 +62,61 @@ where
5162
}
5263
}
5364
} else if n < exp_min {
54-
let mul = f_exp_min * f_exp_subnorm;
55-
let add = (exp_max - 1) - sig_total_bits as i32;
65+
// When scaling toward 0, the prescaling is limited to a value that does not allow `x` to
66+
// go subnormal. This avoids double rounding.
67+
if F::BITS > 16 {
68+
// `mul` s.t. `!(x * mul).is_subnormal() ∀ x`
69+
let mul = f_exp_min * f_pow_subnorm;
70+
let add = -exp_min - sig_total_bits as i32;
71+
72+
// Worse case negative `n`: `x` is the maximum positive value, the result is `F::MIN`.
73+
// This must be reachable by three scaling multiplications (two here and one final).
74+
debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= add * 2 + -exp_min);
5675

57-
x *= mul;
58-
n += add;
59-
if n < exp_min {
6076
x *= mul;
6177
n += add;
78+
6279
if n < exp_min {
63-
n = exp_min;
80+
x *= mul;
81+
n += add;
82+
83+
if n < exp_min {
84+
n = exp_min;
85+
}
86+
}
87+
} else {
88+
// `f16` is unique compared to other float types in that the difference between the
89+
// minimum exponent and the significand bits (`add = -exp_min - sig_total_bits`) is
90+
// small, only three. The above method depend on decrementing `n` by `add` two times;
91+
// for other float types this works out because `add` is a substantial fraction of
92+
// the exponent range. For `f16`, however, 3 is relatively small compared to the
93+
// exponent range (which is 39), so that requires ~10 prescale rounds rather than two.
94+
//
95+
// Work aroudn this by using a different algorithm that calculates the prescale
96+
// dynamically based on the maximum possible value. This adds more operations per round
97+
// since it needs to construct the scale, but works better in the general case.
98+
let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
99+
let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero);
100+
101+
x *= mul;
102+
n += add;
103+
104+
if n < exp_min {
105+
let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
106+
let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero);
107+
108+
x *= mul;
109+
n += add;
110+
111+
if n < exp_min {
112+
n = exp_min;
113+
}
64114
}
65115
}
66116
}
67117

68-
x * F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero)
118+
let scale = F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero);
119+
x * scale
69120
}
70121

71122
#[cfg(test)]
@@ -111,6 +162,12 @@ mod tests {
111162
assert!(scalbn(-F::NAN, -10).is_nan());
112163
}
113164

165+
#[test]
166+
#[cfg(f16_enabled)]
167+
fn spec_test_f16() {
168+
spec_test::<f16>();
169+
}
170+
114171
#[test]
115172
fn spec_test_f32() {
116173
spec_test::<f32>();
@@ -120,4 +177,10 @@ mod tests {
120177
fn spec_test_f64() {
121178
spec_test::<f64>();
122179
}
180+
181+
#[test]
182+
#[cfg(f128_enabled)]
183+
fn spec_test_f128() {
184+
spec_test::<f128>();
185+
}
123186
}

libm/src/math/ldexpf128.rs

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
2+
pub fn ldexpf128(x: f128, n: i32) -> f128 {
3+
super::scalbnf128(x, n)
4+
}

libm/src/math/ldexpf16.rs

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
2+
pub fn ldexpf16(x: f16, n: i32) -> f16 {
3+
super::scalbnf16(x, n)
4+
}

0 commit comments

Comments
 (0)