diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ca1b92e7..a6a3d50f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,7 +8,7 @@ env: # If the compilation fails, then the version specified here needs to be bumped up to reality. # Be sure to also update the rust-version property in the workspace Cargo.toml file, # plus all the README.md files of the affected packages. - RUST_MIN_VER: "1.86" + RUST_MIN_VER: "1.88" # List of packages that will be checked with the minimum supported Rust version. # This should be limited to packages that are intended for publishing. RUST_MIN_VER_PKGS: "-p fearless_simd" diff --git a/CHANGELOG.md b/CHANGELOG.md index 34ac3b4a..79e51879 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,20 @@ You can find its changes [documented below](#020-2025-10-10). ## [Unreleased] -This release has an [MSRV][] of 1.86. +This release has an [MSRV][] of 1.88. + +### Changed + +- Breaking change: `Level::fallback` has been removed, replaced with `Level::baseline`. ([#105][] by [@DJMcNab][]) + This corresponds with a change to avoid compiling in support for the fallback level on compilation targets which don't + require it; this is most impactful for binary size on WASM, Apple Silicon Macs or Android. + A consequence of this is that the available variants on `Level` are now dependent on the target features you are compiling with. + The fallback level can be restored with the `force_support_fallback` cargo feature. We don't expect this to be necessary outside + of tests. + +### Removed + +- Breaking change: The (deprecated) `simd_dispatch!` macro. ([#105][] by [@DJMcNab][]) ## [0.3.0][] (2025-10-10) @@ -85,6 +98,7 @@ No changelog was kept for this release. [#93]: https://github.com/linebender/fearless_simd/pull/93 [#96]: https://github.com/linebender/fearless_simd/pull/96 [#99]: https://github.com/linebender/fearless_simd/pull/99 +[#105]: https://github.com/linebender/fearless_simd/pull/105 [Unreleased]: https://github.com/linebender/fearless_simd/compare/v0.3.0...HEAD [0.3.0]: https://github.com/linebender/fearless_simd/compare/v0.3.0...v0.2.0 diff --git a/Cargo.toml b/Cargo.toml index 2b307961..81395978 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ license = "Apache-2.0 OR MIT" repository = "https://github.com/linebender/fearless_simd" # Keep in sync with RUST_MIN_VER in .github/workflows/ci.yml, with the relevant README.md files # and with the MSRV in the `Unreleased` section of CHANGELOG.md. -rust-version = "1.86" +rust-version = "1.88" [workspace.lints] diff --git a/README.md b/README.md index 60c2ef07..7c3d95fe 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ It benefited from conversations with Luca Versari, though he is not responsible ## Minimum supported Rust Version (MSRV) -This version of Fearless SIMD has been verified to compile with **Rust 1.86** and later. +This version of Fearless SIMD has been verified to compile with **Rust 1.88** and later. Future versions of Fearless SIMD might increase the Rust version requirement. It will not be treated as a breaking change and as such can even happen with small patch releases. diff --git a/check_targets.sh b/check_targets.sh new file mode 100644 index 00000000..90b09fb7 --- /dev/null +++ b/check_targets.sh @@ -0,0 +1,41 @@ +# A script to run cargo check for fearless_simd on each supported platform. +# We currently don't run this in CI, as we expect it would take too long. +# Before using, you must run: +# rustup target add aarch64-linux-android x86_64-unknown-linux-gnu i686-pc-windows-msvc wasm32-unknown-unknown riscv64gc-unknown-linux-gnu + +# Run using `sh ./check_targets.sh` +# TODO: Make into an xtask like thing so that windows users can use easily. + +# aarch64, both with and without neon support. +# Note that doing `-neon` is not sound due to the standard library's ABI, but the +# binary is never executed (nor indeed is it even created), and it's still a useful sanity check. +RUSTFLAGS=-Ctarget-feature=-neon cargo check -p fearless_simd --target aarch64-linux-android +RUSTFLAGS=-Ctarget-feature=-neon cargo check -p fearless_simd --target aarch64-linux-android --features force_support_fallback +cargo check -p fearless_simd --target aarch64-linux-android --features force_support_fallback +cargo check -p fearless_simd --target aarch64-linux-android + +# x86_64, at all supported static SIMD levels. +RUSTFLAGS=-Ctarget-feature=+avx2,+fma cargo check -p fearless_simd --target x86_64-unknown-linux-gnu +RUSTFLAGS=-Ctarget-feature=+avx2,+fma cargo check -p fearless_simd --target x86_64-unknown-linux-gnu --features force_support_fallback +RUSTFLAGS=-Ctarget-feature=+sse4.2 cargo check -p fearless_simd --target x86_64-unknown-linux-gnu +RUSTFLAGS=-Ctarget-feature=+sse4.2 cargo check -p fearless_simd --target x86_64-unknown-linux-gnu --features force_support_fallback +cargo check -p fearless_simd --target x86_64-unknown-linux-gnu +cargo check -p fearless_simd --target x86_64-unknown-linux-gnu --features force_support_fallback + +# x86 (i.e. 32 bit) at all supported static SIMD levels. +RUSTFLAGS=-Ctarget-feature=+avx2,+fma cargo check -p fearless_simd --target i686-pc-windows-msvc +RUSTFLAGS=-Ctarget-feature=+avx2,+fma cargo check -p fearless_simd --target i686-pc-windows-msvc --features force_support_fallback +RUSTFLAGS=-Ctarget-feature=+sse4.2 cargo check -p fearless_simd --target i686-pc-windows-msvc +RUSTFLAGS=-Ctarget-feature=+sse4.2 cargo check -p fearless_simd --target i686-pc-windows-msvc --features force_support_fallback +cargo check -p fearless_simd --target i686-pc-windows-msvc +cargo check -p fearless_simd --target i686-pc-windows-msvc --features force_support_fallback + +# Wasm, both with and without SIMD. +cargo check -p fearless_simd --target wasm32-unknown-unknown +cargo check -p fearless_simd --target wasm32-unknown-unknown --features force_support_fallback +RUSTFLAGS=-Ctarget-feature=+simd128 cargo check -p fearless_simd --target wasm32-unknown-unknown +RUSTFLAGS=-Ctarget-feature=+simd128 cargo check -p fearless_simd --target wasm32-unknown-unknown --features force_support_fallback + +# riscv64, which is importantly a target we don't support any SIMD levels for. +cargo check -p fearless_simd --target riscv64gc-unknown-linux-gnu +cargo check -p fearless_simd --target riscv64gc-unknown-linux-gnu --features force_support_fallback diff --git a/fearless_simd/Cargo.toml b/fearless_simd/Cargo.toml index 5f613979..33f82c6a 100644 --- a/fearless_simd/Cargo.toml +++ b/fearless_simd/Cargo.toml @@ -30,6 +30,10 @@ libm = ["dep:libm"] # beyond the basic SIMD operations abstracted on all platforms safe_wrappers = [] +# Force the "fallback" SIMD level to be supported +# This is primarily used for tests +force_support_fallback = [] + [lints] workspace = true diff --git a/fearless_simd/README.md b/fearless_simd/README.md index fc38deb2..ef074efa 100644 --- a/fearless_simd/README.md +++ b/fearless_simd/README.md @@ -115,6 +115,7 @@ The following crate [feature flags](https://doc.rust-lang.org/cargo/reference/fe - `libm`: Use floating point implementations from [libm]. - `safe_wrappers`: Include safe wrappers for (some) target feature specific intrinsics, beyond the basic SIMD operations abstracted on all platforms. +- `force_support_fallback`: Force scalar fallback, to be supported, even if your compilation target has a better baseline. At least one of `std` and `libm` is required; `std` overrides `libm`. @@ -124,7 +125,7 @@ At least one of `std` and `libm` is required; `std` overrides `libm`. ## Minimum supported Rust Version (MSRV) -This version of Fearless SIMD has been verified to compile with **Rust 1.86** and later. +This version of Fearless SIMD has been verified to compile with **Rust 1.88** and later. Future versions of Fearless SIMD might increase the Rust version requirement. It will not be treated as a breaking change and as such can even happen with small patch releases. diff --git a/fearless_simd/src/generated/fallback.rs b/fearless_simd/src/generated/fallback.rs index 6aea21d1..8e7d1db5 100644 --- a/fearless_simd/src/generated/fallback.rs +++ b/fearless_simd/src/generated/fallback.rs @@ -83,7 +83,10 @@ impl Simd for Fallback { type mask32s = mask32x4; #[inline(always)] fn level(self) -> Level { - Level::Fallback(self) + #[cfg(feature = "force_support_fallback")] + return Level::Fallback(self); + #[cfg(not(feature = "force_support_fallback"))] + Level::baseline() } #[inline] fn vectorize R, R>(self, f: F) -> R { diff --git a/fearless_simd/src/generated/sse4_2.rs b/fearless_simd/src/generated/sse4_2.rs index 6e2541fc..6496ecb1 100644 --- a/fearless_simd/src/generated/sse4_2.rs +++ b/fearless_simd/src/generated/sse4_2.rs @@ -52,7 +52,12 @@ impl Simd for Sse4_2 { type mask32s = mask32x4; #[inline(always)] fn level(self) -> Level { - Level::Sse4_2(self) + #[cfg(not(all(target_feature = "avx2", target_feature = "fma")))] + return Level::Sse4_2(self); + #[cfg(all(target_feature = "avx2", target_feature = "fma"))] + { + Level::baseline() + } } #[inline] fn vectorize R, R>(self, f: F) -> R { diff --git a/fearless_simd/src/lib.rs b/fearless_simd/src/lib.rs index e3c1c738..17437000 100644 --- a/fearless_simd/src/lib.rs +++ b/fearless_simd/src/lib.rs @@ -77,6 +77,7 @@ //! - `libm`: Use floating point implementations from [libm]. //! - `safe_wrappers`: Include safe wrappers for (some) target feature specific intrinsics, //! beyond the basic SIMD operations abstracted on all platforms. +//! - `force_support_fallback`: Force scalar fallback, to be supported, even if your compilation target has a better baseline. //! //! At least one of `std` and `libm` is required; `std` overrides `libm`. //! @@ -149,7 +150,25 @@ pub enum Level { /// Scalar fallback level, i.e. no supported SIMD features are to be used. /// /// This can be created with [`Level::fallback`]. - // TODO: Allow not compiling this in (probably only on web, but maybe elsewhere?) + // We only want to compile the fallback implementation if: + // - We're on a supported architecture, but don't statically support the lowest alternative level; OR + // - We're on an unsupported architecture; OR + // - The fallback is forcibly enabled + #[cfg(any( + all(target_arch = "aarch64", not(target_feature = "neon")), + all( + any(target_arch = "x86", target_arch = "x86_64"), + not(target_feature = "sse4.2") + ), + all(target_arch = "wasm32", not(target_feature = "simd128")), + not(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "wasm32" + )), + feature = "force_support_fallback" + ))] Fallback(Fallback), /// The Neon instruction set on 64 bit ARM. #[cfg(target_arch = "aarch64")] @@ -158,7 +177,11 @@ pub enum Level { #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] WasmSimd128(WasmSimd128), /// The SSE4.2 instruction set on (32 and 64 bit) x86. - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + // We don't need to support this if the compilation target definitely supports something better. + #[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + not(all(target_feature = "avx2", target_feature = "fma")) + ))] Sse4_2(Sse4_2), /// The AVX2 and FMA instruction set on (32 and 64 bit) x86. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] @@ -204,8 +227,6 @@ impl Level { // WASM always either has the SIMD feature compiled in or not. #[cfg(target_feature = "simd128")] return Level::WasmSimd128(WasmSimd128::new_unchecked()); - #[cfg(not(target_feature = "simd128"))] - return Level::fallback(); } #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { @@ -214,11 +235,34 @@ impl Level { { return unsafe { Level::Avx2(Avx2::new_unchecked()) }; } else if std::arch::is_x86_feature_detected!("sse4.2") { + #[cfg(not(all(target_feature = "avx2", target_feature = "fma")))] return unsafe { Level::Sse4_2(Sse4_2::new_unchecked()) }; } } - #[cfg(not(target_arch = "wasm32"))] - Self::fallback() + #[cfg(any( + all(target_arch = "aarch64", not(target_feature = "neon")), + all( + any(target_arch = "x86", target_arch = "x86_64"), + not(target_feature = "sse4.2") + ), + all(target_arch = "wasm32", not(target_feature = "simd128")), + not(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "wasm32" + )), + ))] + { + return Level::Fallback(Fallback::new()); + } + #[allow( + unreachable_code, + reason = "`is_x86_feature_detected` or equivalents will have returned `true`, or Fallback was used." + )] + { + unreachable!() + } } /// Get the target feature level suitable for this run. @@ -246,6 +290,10 @@ impl Level { #[cfg(target_arch = "aarch64")] #[inline] pub fn as_neon(self) -> Option { + #[allow( + unreachable_patterns, + reason = "On machines which statically support `neon`, there is only one variant." + )] match self { Level::Neon(neon) => Some(neon), _ => None, @@ -263,6 +311,10 @@ impl Level { #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] #[inline] pub fn as_wasm_simd128(self) -> Option { + #[allow( + unreachable_patterns, + reason = "On machines which statically support `simd128`, there is only one variant." + )] match self { Level::WasmSimd128(simd128) => Some(simd128), _ => None, @@ -281,7 +333,16 @@ impl Level { #[inline] pub fn as_sse4_2(self) -> Option { match self { + // Safety: The Avx2 struct represents the `avx2` and `fma` target features being enabled. + // The `avx2` target feature *also* implicitly enables the "sse4.2" target feature, which is + // the only target feature required to make our Sse4_2 token. + Level::Avx2(_avx) => unsafe { Some(Sse4_2::new_unchecked()) }, + #[cfg(not(all(target_feature = "avx2", target_feature = "fma")))] Level::Sse4_2(sse42) => Some(sse42), + #[allow( + unreachable_patterns, + reason = "This arm is reachable on baseline x86/x86_64." + )] _ => None, } } @@ -297,16 +358,86 @@ impl Level { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[inline] pub fn as_avx2(self) -> Option { + #[allow( + unreachable_patterns, + reason = "On machines which statically support `avx2`, there is only one variant." + )] match self { Level::Avx2(avx2) => Some(avx2), _ => None, } } + /// Get the strongest statically supported SIMD level. + /// + /// That is, if your compilation run ambiently declares that a target feature is enabled, + /// this method will take that into account. + /// In most cases, you should use [`Level::new`] or [`Level::try_detect`]. + /// This method is mainly useful for libraries, where: + /// + /// 1) Your crate features request that you not use the standard library, i.e. doesn't enable + /// your `"std"` crate feature reason (so you can't use [`Level::new`] and + /// [`Level::try_detect`] returns `None`); AND + /// 2) Your caller does not provide a [`Level`]; AND + /// 3) The library doesn't want to panic when it can't find a SIMD level. + /// + /// Note that in these cases, the library should clearly inform the integrator + /// that it is using a fallback and so not getting optimal performance (e.g. by panicking if + /// `debug_assertions` are enabled, and emitting a log with the "error" level otherwise). + /// The messages given should also provide actionable fixes, such as pointing to the + /// entry-point which provides a `Level`, or your `"std"` feature. + /// + /// Note that this is unaffected by the `force-support-fallback` feature. + /// Instead, you should use [`Level::fallback`] if you require the fallback level. + pub const fn baseline() -> Self { + // TODO: How do we possibly test that this method works in all cases? + // Note that you can use the `check_targets.sh` script to at least ensure that it compiles in all reasonable cases. + #[cfg(not(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "wasm32" + )))] + { + return Level::Fallback(Fallback::new()); + } + #[cfg(target_arch = "aarch64")] + { + #[cfg(target_feature = "neon")] + return unsafe { Level::Neon(Neon::new_unchecked()) }; + #[cfg(not(target_feature = "neon"))] + return Level::Fallback(Fallback::new()); + } + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + #[cfg(all(target_feature = "avx2", target_feature = "fma"))] + return unsafe { Level::Avx2(Avx2::new_unchecked()) }; + #[cfg(all( + target_feature = "sse4.2", + not(all(target_feature = "avx2", target_feature = "fma")) + ))] + return unsafe { Level::Sse4_2(Sse4_2::new_unchecked()) }; + #[cfg(not(target_feature = "sse4.2"))] + return Level::Fallback(Fallback::new()); + } + #[cfg(target_arch = "wasm32")] + { + #[cfg(target_feature = "simd128")] + return Level::WasmSimd128(WasmSimd128::new_unchecked()); + #[cfg(not(target_feature = "simd128"))] + return Level::Fallback(Fallback::new()); + } + } + /// Create a scalar fallback level, which uses no SIMD instructions. /// - /// This is primarily intended for tests; most users should prefer [`Level::new`]. + /// This is primarily intended for tests; most users should prefer [`Level::new`] or [`Level::baseline`]. + /// + /// Note that enabling the scalar fallback does *not* mean that the fallback branch will not + /// contain SIMD instructions. This is because the "ambient" compilation environment has SIMD + /// instructions available, which may be utilised by LLVM to auto-vectorise that path. #[inline] + #[cfg(feature = "force_support_fallback")] pub const fn fallback() -> Self { Self::Fallback(Fallback::new()) } @@ -328,50 +459,12 @@ impl Level { /// /// [enabled]: https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute #[inline] + #[expect( + unreachable_patterns, + reason = "Level is `non_exhaustive`, but we are in the crate it's defined." + )] pub fn dispatch(self, f: W) -> W::Output { - #[cfg(target_arch = "aarch64")] - #[target_feature(enable = "neon")] - #[inline] - fn dispatch_neon(f: W, neon: Neon) -> W::Output { - f.with_simd(neon) - } - - #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] - #[inline] - fn dispatch_simd128(f: W, simd128: WasmSimd128) -> W::Output { - f.with_simd(simd128) - } - - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - #[target_feature(enable = "sse4.2")] - #[inline] - fn dispatch_sse4_2(f: W, sse4_2: Sse4_2) -> W::Output { - f.with_simd(sse4_2) - } - - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - #[target_feature(enable = "avx2,fma")] - #[inline] - fn dispatch_avx2(f: W, avx2: Avx2) -> W::Output { - f.with_simd(avx2) - } - - #[inline] - fn dispatch_fallback(f: W, fallback: Fallback) -> W::Output { - f.with_simd(fallback) - } - - match self { - #[cfg(target_arch = "aarch64")] - Level::Neon(neon) => unsafe { dispatch_neon(f, neon) }, - #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] - Level::WasmSimd128(simd128) => dispatch_simd128(f, simd128), - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - Level::Sse4_2(sse4_2) => unsafe { dispatch_sse4_2(f, sse4_2) }, - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - Level::Avx2(avx2) => unsafe { dispatch_avx2(f, avx2) }, - Level::Fallback(fallback) => dispatch_fallback(f, fallback), - } + dispatch!(self, simd => f.with_simd(simd)) } } diff --git a/fearless_simd/src/macros.rs b/fearless_simd/src/macros.rs index 76bdd505..1c25948d 100644 --- a/fearless_simd/src/macros.rs +++ b/fearless_simd/src/macros.rs @@ -3,117 +3,6 @@ //! Macros publicly exported -/// Defines a new function which dispatches to a SIMD-generic function, enabling the correct -/// target features. -/// -/// The `fn` token in the definition can be prefixed with a visibility (e.g. `pub`), -/// to set the visibility of the outer function. -/// We recommend that the implementation function remains private, and -/// should only be called through the dispatch function. -/// (The exact patterns for SIMD functions using Fearleess SIMD have not -/// yet been designed/enumerated). -/// -/// The implementation function (which is outside of this macro) *should* have the -/// `#[inline(always)]` attribute. -/// There are likely to be severe performance consequences if this is not the case, as -/// Rust will be unable to inline SIMD intrinsics in that case. -/// -/// The `fn` token in the definition can be prefixed with `unsafe`, to allow an unsafe inner function. -/// The safety comment added by you in the call to `simd_dispatch` the function must have -/// the preconditions required to call the inner function. -/// -/// # Examples -/// -/// ```rust -/// use fearless_simd::{Simd, simd_dispatch}; -/// -/// #[inline(always)] -/// fn sigmoid_impl(simd: S, x: &[f32], out: &mut [f32]) { /* ... */ } -/// -/// simd_dispatch!(fn sigmoid(level, x: &[f32], out: &mut [f32]) = sigmoid_impl); -/// ``` -/// -/// The signature of the generated function will be: -/// -/// ```rust -/// use fearless_simd::Level; -/// fn sigmoid(level: Level, x: &[f32], out: &mut [f32]) { /* ... */ } -/// ``` -#[macro_export] -#[deprecated = "use dispatch!(level, simd => operation) instead"] -macro_rules! simd_dispatch { - ( - $( #[$meta:meta] )* $vis:vis - unsafe fn $func:ident ( level $( , $arg:ident : $ty:ty $(,)? )* ) $( -> $ret:ty )? - = $inner:ident - ) => { - simd_dispatch!{@impl => $(#[$meta])* $vis (unsafe) fn $func (level, $(,$arg:$ty,)*) $(->$ret)? = $inner} - }; - ( - $( #[$meta:meta] )* $vis:vis - fn $func:ident ( level $( , $arg:ident : $ty:ty $(,)? )* ) $( -> $ret:ty )? - = $inner:ident - ) => { - simd_dispatch!{@impl => $(#[$meta])* $vis () fn $func (level $(,$arg:$ty)*) $(->$ret)? = $inner} - }; - ( - @impl => $( #[$meta:meta] )* $vis:vis - ($($unsafe: ident)?) fn $func:ident ( level $( , $arg:ident : $ty:ty $(,)? )* ) $( -> $ret:ty )? - = $inner:ident - ) => { - $( #[$meta] )* $vis - $($unsafe)? fn $func(level: $crate::Level $(, $arg: $ty )*) $( -> $ret )? { - #[cfg(target_arch = "aarch64")] - #[target_feature(enable = "neon")] - #[inline] - $($unsafe)? fn inner_neon(neon: $crate::aarch64::Neon $( , $arg: $ty )* ) $( -> $ret )? { - $($unsafe)? { - $inner( neon $( , $arg )* ) - } - } - #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] - #[inline] - $($unsafe)? fn inner_wasm_simd128(simd128: $crate::wasm32::WasmSimd128 $( , $arg: $ty )* ) $( -> $ret )? { - $($unsafe)? { - $inner( simd128 $( , $arg )* ) - } - } - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - #[target_feature(enable = "sse4.2")] - #[inline] - $($unsafe)? fn inner_sse4_2(sse4_2: $crate::x86::Sse4_2 $( , $arg: $ty )* ) $( -> $ret )? { - $($unsafe)? { - $inner( sse4_2 $( , $arg )* ) - } - } - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - #[target_feature(enable = "avx2,fma")] - #[inline] - $($unsafe)? fn inner_avx2(avx2: $crate::x86::Avx2 $( , $arg: $ty )* ) $( -> $ret )? { - $($unsafe)? { - $inner( avx2 $( , $arg )* ) - } - } - match level { - $crate::Level::Fallback(fb) => { - $($unsafe)? { - $inner(fb $( , $arg )* ) - } - }, - #[cfg(target_arch = "aarch64")] - $crate::Level::Neon(neon) => unsafe { inner_neon (neon $( , $arg )* ) } - #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] - $crate::Level::WasmSimd128(wasm) => unsafe { inner_wasm_simd128 (wasm $( , $arg )* ) } - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - $crate::Level::Sse4_2(sse4_2) => unsafe { inner_sse4_2(sse4_2 $( , $arg)* ) } - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - $crate::Level::Avx2(avx2) => unsafe { inner_avx2(avx2 $( , $arg)* ) } - _ => unreachable!() - } - } - }; -} - /// Access the applicable [`Simd`] for a given `level`, and perform an operation using it. /// /// This macro is the root of how any explicitly written SIMD functions in this crate are @@ -158,24 +47,19 @@ macro_rules! simd_dispatch { /// [`Simd`]: crate::Simd #[macro_export] macro_rules! dispatch { - ($level:expr, $simd:pat => $op:expr) => {{ + // This falls through to the next branch, but with `forced_fallback_arm` turned into a boolean literal + // indicating whether or not the `force_support_fallback` crate feature is enabled. + ($level:expr, $simd:pat => $op:expr) => {{ $crate::internal_unstable_dispatch_inner!($level, $simd => $op) }}; + (@impl $level:expr, $simd:pat => $op:expr; $forced_fallback_arm: literal) => {{ /// Convert the `Simd` value into an `impl Simd`, which enforces that /// it is correctly handled. + // TODO: Just make into a `pub` function in fearless_simd itself? #[inline(always)] fn launder(x: S) -> impl $crate::Simd { x } match $level { - $crate::Level::Fallback(fb) => { - let $simd = launder(fb); - // This vectorize call does nothing, but it is reasonable to be consistent here. - $crate::Simd::vectorize( - fb, - #[inline(always)] - || $op, - ) - } #[cfg(target_arch = "aarch64")] $crate::Level::Neon(neon) => { let $simd = launder(neon); @@ -194,7 +78,11 @@ macro_rules! dispatch { || $op, ) } - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + // This fallthrough logic is documented at the definition site of `Level`. + #[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + not(all(target_feature = "avx2", target_feature = "fma")) + ))] $crate::Level::Sse4_2(sse4_2) => { let $simd = launder(sse4_2); $crate::Simd::vectorize( @@ -212,11 +100,62 @@ macro_rules! dispatch { || $op, ) } + #[cfg(any( + all(target_arch = "aarch64", not(target_feature = "neon")), + all( + any(target_arch = "x86", target_arch = "x86_64"), + not(target_feature = "sse4.2") + ), + all(target_arch = "wasm32", not(target_feature = "simd128")), + not(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "wasm32" + )), + $forced_fallback_arm + ))] + $crate::Level::Fallback(fb) => { + let $simd = launder(fb); + // This vectorize call does nothing, but it is reasonable to be consistent here. + $crate::Simd::vectorize( + fb, + #[inline(always)] + || $op, + ) + } _ => unreachable!(), } }}; } +// This macro turns whether the `force_support_fallback` macro is enabled into a boolean literal +// in `dispatch`, which allows it to be used correctly cross-crate. +// This trickery is required because macros are expanded in the context of the calling crate, including for +// evaluating `cfg`s. + +/// Implementation detail of [`crate::dispatch`]; this is not public API. +#[macro_export] +#[doc(hidden)] +#[cfg(feature = "force_support_fallback")] +macro_rules! internal_unstable_dispatch_inner { + ($level:expr, $simd:pat => $op:expr) => { + $crate::dispatch!( + @impl $level, $simd => $op; true + ) + }; +} + +/// Implementation detail of [`crate::dispatch`]; this is not public API. +#[macro_export] +#[doc(hidden)] +#[cfg(not(feature = "force_support_fallback"))] +macro_rules! internal_unstable_dispatch_inner { + ($level:expr, $simd:pat => $op:expr) => { + $crate::dispatch!(@impl $level, $simd => $op; false) + }; +} + #[cfg(test)] // This expect also validates that we haven't missed any levels! #[expect( diff --git a/fearless_simd_gen/src/mk_fallback.rs b/fearless_simd_gen/src/mk_fallback.rs index a69eca0f..1a94b986 100644 --- a/fearless_simd_gen/src/mk_fallback.rs +++ b/fearless_simd_gen/src/mk_fallback.rs @@ -407,7 +407,10 @@ fn mk_simd_impl() -> TokenStream { type mask32s = mask32x4; #[inline(always)] fn level(self) -> Level { - Level::#level_tok(self) + #[cfg(feature = "force_support_fallback")] + return Level::#level_tok(self); + #[cfg(not(feature = "force_support_fallback"))] + Level::baseline() } #[inline] diff --git a/fearless_simd_gen/src/mk_sse4_2.rs b/fearless_simd_gen/src/mk_sse4_2.rs index 00ed79e4..3baf0b55 100644 --- a/fearless_simd_gen/src/mk_sse4_2.rs +++ b/fearless_simd_gen/src/mk_sse4_2.rs @@ -115,7 +115,12 @@ fn mk_simd_impl() -> TokenStream { type mask32s = mask32x4; #[inline(always)] fn level(self) -> Level { - Level::#level_tok(self) + #[cfg(not(all(target_feature = "avx2", target_feature = "fma")))] + return Level::#level_tok(self); + #[cfg(all(target_feature = "avx2", target_feature = "fma"))] + { + Level::baseline() + } } #[inline]