diff --git a/libm/src/lib.rs b/libm/src/lib.rs index 85ed5e2c9..e44e19130 100644 --- a/libm/src/lib.rs +++ b/libm/src/lib.rs @@ -23,6 +23,7 @@ #![allow(clippy::unreadable_literal)] #![allow(clippy::zero_divided_by_zero)] #![forbid(unsafe_op_in_unsafe_fn)] +#![feature(funnel_shifts)] mod libm_helper; mod math; diff --git a/libm/src/math/support/big.rs b/libm/src/math/support/big.rs index c316d93f5..379342eb2 100644 --- a/libm/src/math/support/big.rs +++ b/libm/src/math/support/big.rs @@ -6,8 +6,11 @@ mod tests; use core::{fmt, ops}; use super::{DInt, HInt, Int, MinInt}; +use crate::support::Word; const U128_LO_MASK: u128 = u64::MAX as u128; +const U128_WORDS: usize = (u128::BITS / Word::BITS) as usize; +const U256_WORDS: usize = U128_WORDS * 2; /// A 256-bit unsigned integer represented as two 128-bit native-endian limbs. #[allow(non_camel_case_types)] @@ -31,6 +34,29 @@ impl u256 { hi: self.hi as i128, } } + + /// Split into words, with the least significant word first. + fn to_words(self) -> [Word; U256_WORDS] { + // The result with 64-bit words will be: [lo.lo(), lo.hi(), hi.lo(), hi.hi()]. + let mut ret: [Word; _] = [0; U256_WORDS]; + for i in 0..U128_WORDS { + let shift = i as u32 * Word::BITS; + ret[i] = (self.lo >> shift) as Word; + ret[i + U128_WORDS] = (self.hi >> shift) as Word; + } + ret + } + + /// Perform the opposite of [`to_words`]. + fn from_words(words: [Word; U256_WORDS]) -> Self { + let mut ret = u256::ZERO; + for i in 0..U128_WORDS { + let shift = i as u32 * usize::BITS; + ret.lo |= (words[i] as u128) << shift; + ret.hi |= (words[i + U128_WORDS] as u128) << shift; + } + ret + } } /// A 256-bit signed integer represented as two 128-bit native-endian limbs. @@ -58,6 +84,16 @@ impl i256 { hi: self.hi as u128, } } + + /// Split into words, with the least significant word first. + fn to_words(self) -> [Word; U256_WORDS] { + self.unsigned().to_words() + } + + /// Perform the opposite of [`to_words`]. + fn from_words(words: [Word; U256_WORDS]) -> Self { + u256::from_words(words).signed() + } } impl MinInt for u256 { @@ -129,60 +165,100 @@ macro_rules! impl_common { Self { lo, hi } } } + }; +} - impl ops::Shl for $ty { - type Output = Self; +impl ops::Shr for u256 { + type Output = Self; - fn shl(mut self, rhs: u32) -> Self::Output { - debug_assert!(rhs < Self::BITS, "attempt to shift left with overflow"); + // #[inline(never)] + fn shr(self, rhs: u32) -> Self::Output { + debug_assert!(rhs < Self::BITS, "attempt to shift right with overflow"); - let half_bits = Self::BITS / 2; - let low_mask = half_bits - 1; - let s = rhs & low_mask; + if rhs < 128 { + let lo = u128::funnel_shr(self.hi, self.lo, rhs); + let hi = self.hi >> rhs; + Self { lo, hi } + } else { + let lo = self.hi >> (rhs - 128); + Self { lo, hi: 0 } + } + } +} - let lo = self.lo; - let hi = self.hi; +impl ops::Shr for i256 { + type Output = Self; - self.lo = lo << s; + // #[inline(never)] + fn shr(self, rhs: u32) -> Self::Output { + debug_assert!(rhs < Self::BITS, "attempt to shift right with overflow"); - if rhs & half_bits == 0 { - self.hi = (lo >> (low_mask ^ s) >> 1) as _; - self.hi |= hi << s; - } else { - self.hi = self.lo as _; - self.lo = 0; - } - self + // Set up an array with the input in the low half, zeros in the upper half + let mut words = [Word::ZERO; U256_WORDS * 2]; + words[..U256_WORDS].copy_from_slice(&self.to_words()); + + if i256::SIGNED { + // For i256, branchlessly set the upper words to all ones if the input + // is negative. + let top_word = words[U256_WORDS - 1].signed() >> (Word::BITS - 1); + for x in &mut words[U256_WORDS..] { + *x = top_word.unsigned(); } } - impl ops::Shr for $ty { - type Output = Self; - - fn shr(mut self, rhs: u32) -> Self::Output { - debug_assert!(rhs < Self::BITS, "attempt to shift right with overflow"); + let shift = rhs & 255; // limit to 255 in cases of overflow + let word_shift = (shift / Word::BITS) as usize; + let bit_shift = shift % Word::BITS; + + let mut ret: [Word; U256_WORDS] = [0; _]; + + // Each output word is a coarse (word-sized) shift plus a small bit shift. Note that + // these loops get unrolled. + for i in 0..U256_WORDS { + if i < (U256_WORDS - 1) { + let hi = words[word_shift + i + 1]; + let lo = words[word_shift + i]; + + ret[i] = ::funnel_shr(hi, lo, bit_shift); + } else if i256::SIGNED { + // The upper word doesn't get any sign bits via a funnel shift, so we need + // an arithmetic shift to preserve sign. + let mut x = words[word_shift + i].signed(); + x >>= bit_shift; + ret[i] = x.unsigned(); + } else { + ret[i] = words[word_shift + i] >> bit_shift; + } + } - let half_bits = Self::BITS / 2; - let low_mask = half_bits - 1; - let s = rhs & low_mask; + i256::from_words(ret) + } +} - let lo = self.lo; - let hi = self.hi; +impl ops::Shl for u256 { + type Output = Self; - self.hi = hi >> s; + #[inline(never)] + fn shl(self, rhs: u32) -> Self::Output { + debug_assert!(rhs < Self::BITS, "attempt to shift left with overflow"); - #[allow(unused_comparisons)] - if rhs & half_bits == 0 { - self.lo = (hi << (low_mask ^ s) << 1) as _; - self.lo |= lo >> s; - } else { - self.lo = self.hi as _; - self.hi = if hi < 0 { !0 } else { 0 }; - } - self - } + if rhs < 128 { + let hi = u128::funnel_shl(self.hi, self.lo, rhs); + let lo = self.lo << rhs; + Self { lo, hi } + } else { + let hi = self.lo << (rhs - 128); + Self { lo: 0, hi } } - }; + } +} + +impl ops::Shl for i256 { + type Output = Self; + + fn shl(self, rhs: u32) -> Self::Output { + (self.unsigned() << rhs).signed() + } } impl_common!(i256); diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs index f113f9d62..52a7a2280 100644 --- a/libm/src/math/support/int_traits.rs +++ b/libm/src/math/support/int_traits.rs @@ -347,6 +347,26 @@ pub trait HInt: Int { fn zero_widen_mul(self, rhs: Self) -> Self::D; /// Widening multiplication. This cannot overflow. fn widen_mul(self, rhs: Self) -> Self::D; + + // FIXME(msrv): Use funnel shifts from `core` as a trait on `Int` when available. + + /// Concatenate `self` and `right`, shift by `shift`, and return the upper half. + #[allow(unused)] + fn funnel_shl(self, right: Self, shift: u32) -> Self { + assert!(!Self::SIGNED, "unsupported for signed integers"); + assert!(shift < Self::BITS, "attempt to funnel shift with overflow"); + let n = Self::D::from_lo_hi(right, self); + (n << shift).hi() + } + + /// Concatenate `self` and `right`, shift by `shift`, and return the lower half. + #[allow(unused)] + fn funnel_shr(self, right: Self, shift: u32) -> Self { + assert!(!Self::SIGNED, "unsupported for signed integers"); + assert!(shift < Self::BITS, "attempt to funnel shift with overflow"); + let n = Self::D::from_lo_hi(right, self); + (n >> shift).lo() + } } macro_rules! impl_d_int { diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs index f28c02104..f55e62a4a 100644 --- a/libm/src/math/support/mod.rs +++ b/libm/src/math/support/mod.rs @@ -30,6 +30,21 @@ pub use hex_float::{DisplayHex, Hex, hf32, hf64}; pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt, NarrowingDiv}; pub use modular::linear_mul_reduction; +cfg_if! { + if #[cfg(target_pointer_width = "16")] { + /// Concrete sized integer compatible with `usize` (exists for using `DInt`/`HInt`). + pub type Word = u16; + } else if #[cfg(target_pointer_width = "32")] { + /// Concrete sized integer compatible with `usize` (exists for using `DInt`/`HInt`). + pub type Word = u32; + } else if #[cfg(target_pointer_width = "64")] { + /// Concrete sized integer compatible with `usize` (exists for using `DInt`/`HInt`). + pub type Word = u64; + } else { + compile_error!("unsupported pointer width"); + } +} + /// Hint to the compiler that the current path is cold. pub fn cold_path() { #[cfg(intrinsics_enabled)] @@ -68,3 +83,14 @@ pub unsafe fn unchecked_div_isize(x: isize, y: isize) -> isize { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn word_size() { + assert_eq!(size_of::(), size_of::()); + assert_eq!(align_of::(), align_of::()); + } +}