From 2683197c25d223cda7c2d4dfa552fda4267c8a43 Mon Sep 17 00:00:00 2001 From: quaternic <57393910+quaternic@users.noreply.github.com> Date: Sun, 20 Jul 2025 22:27:01 +0300 Subject: [PATCH 1/2] substitute truncation for flooring in rempio2_large --- libm/src/math/rem_pio2_large.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs index 6d679bbe9..20d58c41a 100644 --- a/libm/src/math/rem_pio2_large.rs +++ b/libm/src/math/rem_pio2_large.rs @@ -11,7 +11,7 @@ * ==================================================== */ -use super::{floor, scalbn}; +use super::{scalbn, trunc}; // initial value for jk const INIT_JK: [usize; 4] = [3, 4, 4, 6]; @@ -290,7 +290,7 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> /* compute n */ z = scalbn(z, q0); /* actual value of z */ - z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */ + z -= 8.0 * trunc(z * 0.125); /* trim off integer >= 8 */ n = z as i32; z -= n as f64; ih = 0; From 37b4f278a2e57cd75d03468e1e33d5b375f33364 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 13 Jul 2025 13:26:02 +0200 Subject: [PATCH 2/2] implement `floor` and `ceil` with inline assembly on `i586` --- libm-test/src/precision.rs | 22 ---------- libm/src/math/arch/i586.rs | 85 +++++++++++++++++++++++++------------- 2 files changed, 56 insertions(+), 51 deletions(-) diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs index 32825b15d..3fb8c1b37 100644 --- a/libm-test/src/precision.rs +++ b/libm-test/src/precision.rs @@ -271,18 +271,6 @@ impl MaybeOverride<(f32,)> for SpecialCase { impl MaybeOverride<(f64,)> for SpecialCase { fn check_float(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction { - if cfg!(x86_no_sse) - && ctx.base_name == BaseName::Ceil - && ctx.basis == CheckBasis::Musl - && input.0 < 0.0 - && input.0 > -1.0 - && expected == F::ZERO - && actual == F::ZERO - { - // musl returns -0.0, we return +0.0 - return XFAIL("i586 ceil signed zero"); - } - if cfg!(x86_no_sse) && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven) && (expected - actual).abs() <= F::ONE @@ -292,16 +280,6 @@ impl MaybeOverride<(f64,)> for SpecialCase { return XFAIL("i586 rint rounding mode"); } - if cfg!(x86_no_sse) - && (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor) - && expected.eq_repr(F::NEG_ZERO) - && actual.eq_repr(F::ZERO) - { - // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0. - // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955 - return XFAIL("i586 ceil/floor signed zero"); - } - if cfg!(x86_no_sse) && (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2) { diff --git a/libm/src/math/arch/i586.rs b/libm/src/math/arch/i586.rs index f92b9a2af..b897bd231 100644 --- a/libm/src/math/arch/i586.rs +++ b/libm/src/math/arch/i586.rs @@ -1,37 +1,64 @@ //! Architecture-specific support for x86-32 without SSE2 +//! +//! We use an alternative implementation on x86, because the +//! main implementation fails with the x87 FPU used by +//! debian i386, probably due to excess precision issues. +//! +//! See https://github.com/rust-lang/compiler-builtins/pull/976 for discussion on why these +//! functions are implemented in this way. -use super::super::fabs; +// FIXME: when the MSRV allows, use naked functions instead. -/// Use an alternative implementation on x86, because the -/// main implementation fails with the x87 FPU used by -/// debian i386, probably due to excess precision issues. -/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. -pub fn ceil(x: f64) -> f64 { - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { - let truncated = x as i64 as f64; - if truncated < x { - return truncated + 1.0; - } else { - return truncated; - } - } else { - return x; +pub extern "C" fn ceil(mut x: f64) -> f64 { + unsafe { + core::arch::asm!( + "fld qword ptr [{x}]", + // Save the FPU control word, using `x` as scratch space. + "fstcw [{x}]", + // Set rounding control to 0b10 (+∞). + "mov word ptr [{x} + 2], 0x0b7f", + "fldcw [{x} + 2]", + // Round. + "frndint", + // Restore FPU control word. + "fldcw [{x}]", + // Save rounded value to memory. + "fstp qword ptr [{x}]", + x = in(reg) &mut x, + // All the x87 FPU stack is used, all registers must be clobbered + out("st(0)") _, out("st(1)") _, + out("st(2)") _, out("st(3)") _, + out("st(4)") _, out("st(5)") _, + out("st(6)") _, out("st(7)") _, + options(nostack), + ); } + x } -/// Use an alternative implementation on x86, because the -/// main implementation fails with the x87 FPU used by -/// debian i386, probably due to excess precision issues. -/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. -pub fn floor(x: f64) -> f64 { - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { - let truncated = x as i64 as f64; - if truncated > x { - return truncated - 1.0; - } else { - return truncated; - } - } else { - return x; +pub extern "C" fn floor(mut x: f64) -> f64 { + unsafe { + core::arch::asm!( + "fld qword ptr [{x}]", + // Save the FPU control word, using `x` as scratch space. + "fstcw [{x}]", + // Set rounding control to 0b01 (-∞). + "mov word ptr [{x} + 2], 0x077f", + "fldcw [{x} + 2]", + // Round. + "frndint", + // Restore FPU control word. + "fldcw [{x}]", + // Save rounded value to memory. + "fstp qword ptr [{x}]", + x = in(reg) &mut x, + // All the x87 FPU stack is used, all registers must be clobbered + out("st(0)") _, out("st(1)") _, + out("st(2)") _, out("st(3)") _, + out("st(4)") _, out("st(5)") _, + out("st(6)") _, out("st(7)") _, + options(nostack), + ); } + x }