Skip to content

Commit e86ef27

Browse files
authored
Merge pull request #622 from tgross35/f128-div
Add `__divtf3`
2 parents 4797774 + 5c153cf commit e86ef27

File tree

10 files changed

+550
-697
lines changed

10 files changed

+550
-697
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ of being added to Rust.
222222

223223
- [x] addtf3.c
224224
- [x] comparetf2.c
225-
- [ ] divtf3.c
225+
- [x] divtf3.c
226226
- [x] extenddftf2.c
227227
- [x] extendhfsf2.c
228228
- [x] extendhftf2.c

build.rs

-1
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,6 @@ mod c {
526526
("__floatsitf", "floatsitf.c"),
527527
("__floatunditf", "floatunditf.c"),
528528
("__floatunsitf", "floatunsitf.c"),
529-
("__divtf3", "divtf3.c"),
530529
("__powitf2", "powitf2.c"),
531530
("__fe_getround", "fp_mode.c"),
532531
("__fe_raise_inexact", "fp_mode.c"),

examples/intrinsics.rs

+5
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,10 @@ mod intrinsics {
256256
a * b
257257
}
258258

259+
pub fn divtf(a: f128, b: f128) -> f128 {
260+
a / b
261+
}
262+
259263
pub fn subtf(a: f128, b: f128) -> f128 {
260264
a - b
261265
}
@@ -440,6 +444,7 @@ fn run() {
440444
bb(aeabi_uldivmod(bb(2), bb(3)));
441445
bb(ashlti3(bb(2), bb(2)));
442446
bb(ashrti3(bb(2), bb(2)));
447+
bb(divtf(bb(2.), bb(2.)));
443448
bb(divti3(bb(2), bb(2)));
444449
bb(eqtf(bb(2.), bb(2.)));
445450
bb(extendhfdf(bb(2.)));

src/float/div.rs

+376-668
Large diffs are not rendered by default.

src/float/mod.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ pub(crate) trait Float:
3131
+ ops::Rem<Output = Self>
3232
{
3333
/// A uint of the same width as the float
34-
type Int: Int;
34+
type Int: Int<OtherSign = Self::SignedInt, UnsignedInt = Self::Int>;
3535

3636
/// A int of the same width as the float
37-
type SignedInt: Int;
37+
type SignedInt: Int + MinInt<OtherSign = Self::Int, UnsignedInt = Self::Int>;
3838

3939
/// An int capable of containing the exponent bits plus a sign bit. This is signed.
4040
type ExpInt: Int;
@@ -51,7 +51,7 @@ pub(crate) trait Float:
5151
/// The bitwidth of the exponent
5252
const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
5353

54-
/// The maximum value of the exponent
54+
/// The saturated value of the exponent (infinite representation), in the rightmost postiion.
5555
const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
5656

5757
/// The exponent bias value
@@ -83,7 +83,7 @@ pub(crate) trait Float:
8383
/// Returns true if the sign is negative
8484
fn is_sign_negative(self) -> bool;
8585

86-
/// Returns the exponent with bias
86+
/// Returns the exponent, not adjusting for bias.
8787
fn exp(self) -> Self::ExpInt;
8888

8989
/// Returns the significand with no implicit bit (or the "fractional" part)
@@ -175,7 +175,7 @@ macro_rules! float_impl {
175175
fn normalize(significand: Self::Int) -> (i32, Self::Int) {
176176
let shift = significand
177177
.leading_zeros()
178-
.wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros());
178+
.wrapping_sub(Self::EXPONENT_BITS);
179179
(
180180
1i32.wrapping_sub(shift as i32),
181181
significand << shift as Self::Int,

src/int/big.rs

+36-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ macro_rules! impl_common {
9393
type Output = Self;
9494

9595
fn shl(self, rhs: u32) -> Self::Output {
96-
todo!()
96+
unimplemented!("only used to meet trait bounds")
9797
}
9898
}
9999
};
@@ -102,6 +102,41 @@ macro_rules! impl_common {
102102
impl_common!(i256);
103103
impl_common!(u256);
104104

105+
impl ops::Shr<u32> for u256 {
106+
type Output = Self;
107+
108+
fn shr(self, rhs: u32) -> Self::Output {
109+
assert!(rhs < Self::BITS, "attempted to shift right with overflow");
110+
111+
if rhs == 0 {
112+
return self;
113+
}
114+
115+
let mut ret = self;
116+
let byte_shift = rhs / 64;
117+
let bit_shift = rhs % 64;
118+
119+
for idx in 0..4 {
120+
let base_idx = idx + byte_shift as usize;
121+
122+
let Some(base) = ret.0.get(base_idx) else {
123+
ret.0[idx] = 0;
124+
continue;
125+
};
126+
127+
let mut new_val = base >> bit_shift;
128+
129+
if let Some(new) = ret.0.get(base_idx + 1) {
130+
new_val |= new.overflowing_shl(64 - bit_shift).0;
131+
}
132+
133+
ret.0[idx] = new_val;
134+
}
135+
136+
ret
137+
}
138+
}
139+
105140
macro_rules! word {
106141
(1, $val:expr) => {
107142
(($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64

testcrate/benches/float_div.rs

+27-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
#![cfg_attr(f128_enabled, feature(f128))]
2+
13
use compiler_builtins::float::div;
2-
use criterion::{criterion_group, criterion_main, Criterion};
4+
use criterion::{criterion_main, Criterion};
35
use testcrate::float_bench;
46

57
float_bench! {
@@ -64,5 +66,28 @@ float_bench! {
6466
],
6567
}
6668

67-
criterion_group!(float_div, div_f32, div_f64);
69+
#[cfg(f128_enabled)]
70+
float_bench! {
71+
name: div_f128,
72+
sig: (a: f128, b: f128) -> f128,
73+
crate_fn: div::__divtf3,
74+
crate_fn_ppc: div::__divkf3,
75+
sys_fn: __divtf3,
76+
sys_fn_ppc: __divkf3,
77+
sys_available: not(feature = "no-sys-f128"),
78+
asm: []
79+
}
80+
81+
pub fn float_div() {
82+
let mut criterion = Criterion::default().configure_from_args();
83+
84+
div_f32(&mut criterion);
85+
div_f64(&mut criterion);
86+
87+
#[cfg(f128_enabled)]
88+
{
89+
div_f128(&mut criterion);
90+
}
91+
}
92+
6893
criterion_main!(float_div);

testcrate/src/bench.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,14 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
3030

3131
// FIXME(f16_f128): system symbols have incorrect results
3232
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
33-
const X86_NO_SSE_SKIPPED: &[&str] =
34-
&["add_f128", "sub_f128", "mul_f128", "powi_f32", "powi_f64"];
33+
const X86_NO_SSE_SKIPPED: &[&str] = &[
34+
"add_f128", "sub_f128", "mul_f128", "div_f128", "powi_f32", "powi_f64",
35+
];
3536

3637
// FIXME(f16_f128): Wide multiply carry bug in `compiler-rt`, re-enable when nightly no longer
3738
// uses `compiler-rt` version.
3839
// <https://github.com/llvm/llvm-project/issues/91840>
39-
const AARCH64_SKIPPED: &[&str] = &["mul_f128"];
40+
const AARCH64_SKIPPED: &[&str] = &["mul_f128", "div_f128"];
4041

4142
// FIXME(llvm): system symbols have incorrect results on Windows
4243
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2121359807>

testcrate/tests/big.rs

+73
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,76 @@ fn widen_mul_u128() {
5959
}
6060
assert!(errors.is_empty());
6161
}
62+
63+
#[test]
64+
fn not_u128() {
65+
assert_eq!(!u256::ZERO, u256::MAX);
66+
}
67+
68+
#[test]
69+
fn shr_u128() {
70+
let only_low = [
71+
1,
72+
u16::MAX.into(),
73+
u32::MAX.into(),
74+
u64::MAX.into(),
75+
u128::MAX,
76+
];
77+
78+
let mut errors = Vec::new();
79+
80+
for a in only_low {
81+
for perturb in 0..10 {
82+
let a = a.saturating_add(perturb);
83+
for shift in 0..128 {
84+
let res = a.widen() >> shift;
85+
let expected = (a >> shift).widen();
86+
if res != expected {
87+
errors.push((a.widen(), shift, res, expected));
88+
}
89+
}
90+
}
91+
}
92+
93+
let check = [
94+
(
95+
u256::MAX,
96+
1,
97+
u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1]),
98+
),
99+
(
100+
u256::MAX,
101+
5,
102+
u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5]),
103+
),
104+
(u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])),
105+
(u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])),
106+
(u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])),
107+
(u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])),
108+
(u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])),
109+
(u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])),
110+
(u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
111+
(u256::MAX, 192, u256([u64::MAX, 0, 0, 0])),
112+
(u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])),
113+
(u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
114+
(u256::MAX, 254, u256([0b11, 0, 0, 0])),
115+
(u256::MAX, 255, u256([1, 0, 0, 0])),
116+
];
117+
118+
for (input, shift, expected) in check {
119+
let res = input >> shift;
120+
if res != expected {
121+
errors.push((input, shift, res, expected));
122+
}
123+
}
124+
125+
for (a, b, res, expected) in &errors {
126+
eprintln!(
127+
"FAILURE: {} >> {b} = {} got {}",
128+
hexu(*a),
129+
hexu(*expected),
130+
hexu(*res),
131+
);
132+
}
133+
assert!(errors.is_empty());
134+
}

testcrate/tests/div_rem.rs

+23-16
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#![feature(f128)]
12
#![allow(unused_macros)]
23

34
use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4};
@@ -115,7 +116,13 @@ macro_rules! float {
115116
fuzz_float_2(N, |x: $f, y: $f| {
116117
let quo0: $f = apfloat_fallback!($f, $apfloat_ty, $sys_available, Div::div, x, y);
117118
let quo1: $f = $fn(x, y);
118-
#[cfg(not(target_arch = "arm"))]
119+
120+
// ARM SIMD instructions always flush subnormals to zero
121+
if cfg!(target_arch = "arm") &&
122+
((Float::is_subnormal(quo0)) || Float::is_subnormal(quo1)) {
123+
return;
124+
}
125+
119126
if !Float::eq_repr(quo0, quo1) {
120127
panic!(
121128
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
@@ -126,21 +133,6 @@ macro_rules! float {
126133
quo1
127134
);
128135
}
129-
130-
// ARM SIMD instructions always flush subnormals to zero
131-
#[cfg(target_arch = "arm")]
132-
if !(Float::is_subnormal(quo0) || Float::is_subnormal(quo1)) {
133-
if !Float::eq_repr(quo0, quo1) {
134-
panic!(
135-
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
136-
stringify!($fn),
137-
x,
138-
y,
139-
quo0,
140-
quo1
141-
);
142-
}
143-
}
144136
});
145137
}
146138
)*
@@ -155,4 +147,19 @@ mod float_div {
155147
f32, __divsf3, Single, all();
156148
f64, __divdf3, Double, all();
157149
}
150+
151+
#[cfg(not(feature = "no-f16-f128"))]
152+
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
153+
float! {
154+
f128, __divtf3, Quad,
155+
// FIXME(llvm): there is a bug in LLVM rt.
156+
// See <https://github.com/llvm/llvm-project/issues/91840>.
157+
not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux")));
158+
}
159+
160+
#[cfg(not(feature = "no-f16-f128"))]
161+
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
162+
float! {
163+
f128, __divkf3, Quad, not(feature = "no-sys-f128");
164+
}
158165
}

0 commit comments

Comments
 (0)