Skip to content

Commit 36fd3f5

Browse files
committed
x86: make SSE2 required for i686 targets and use it to pass SIMD types
1 parent 13f3924 commit 36fd3f5

16 files changed

+245
-84
lines changed

compiler/rustc_target/src/callconv/mod.rs

+66-43
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use crate::abi::{
1010
TyAndLayout,
1111
};
1212
use crate::spec::abi::Abi as SpecAbi;
13-
use crate::spec::{self, HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, WasmCAbi};
13+
use crate::spec::{self, HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, RustAbi, WasmCAbi};
1414

1515
mod aarch64;
1616
mod amdgpu;
@@ -736,14 +736,30 @@ impl<'a, Ty> FnAbi<'a, Ty> {
736736
C: HasDataLayout + HasTargetSpec,
737737
{
738738
let spec = cx.target_spec();
739-
match &spec.arch[..] {
739+
match &*spec.arch {
740740
"x86" => x86::compute_rust_abi_info(cx, self, abi),
741741
"riscv32" | "riscv64" => riscv::compute_rust_abi_info(cx, self, abi),
742742
"loongarch64" => loongarch::compute_rust_abi_info(cx, self, abi),
743743
"aarch64" => aarch64::compute_rust_abi_info(cx, self),
744744
_ => {}
745745
};
746746

747+
// Decides whether we can pass the given SIMD argument via `PassMode::Direct`.
748+
// May only return `true` if the target will always pass those arguments the same way,
749+
// no matter what the user does with `-Ctarget-feature`! In other words, whatever
750+
// target features are required to pass a SIMD value in registers must be listed in
751+
// the `abi_required_features` for the current target and ABI.
752+
let can_pass_simd_directly = |arg: &ArgAbi<'_, Ty>| match &*spec.arch {
753+
// On x86, if we have SSE2 (which we have by default for x86_64), we can always pass up
754+
// to 128-bit-sized vectors.
755+
"x86" if spec.rust_abi == Some(RustAbi::X86Sse2) => arg.layout.size.bits() <= 128,
756+
"x86_64" if spec.rust_abi != Some(RustAbi::X86Softfloat) => {
757+
arg.layout.size.bits() <= 128
758+
}
759+
// So far, we haven't implemented this logic for any other target.
760+
_ => false,
761+
};
762+
747763
for (arg_idx, arg) in self
748764
.args
749765
.iter_mut()
@@ -755,7 +771,10 @@ impl<'a, Ty> FnAbi<'a, Ty> {
755771
continue;
756772
}
757773

758-
if arg_idx.is_none() && arg.layout.size > Pointer(AddressSpace::DATA).size(cx) * 2 {
774+
if arg_idx.is_none()
775+
&& arg.layout.size > Pointer(AddressSpace::DATA).size(cx) * 2
776+
&& !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. })
777+
{
759778
// Return values larger than 2 registers using a return area
760779
// pointer. LLVM and Cranelift disagree about how to return
761780
// values that don't fit in the registers designated for return
@@ -794,53 +813,57 @@ impl<'a, Ty> FnAbi<'a, Ty> {
794813
// rustc_target already ensure any return value which doesn't
795814
// fit in the available amount of return registers is passed in
796815
// the right way for the current target.
816+
// The adjustment is also not necessary nor desired for types with
817+
// a vector representation; those are handled below.
797818
arg.make_indirect();
798819
continue;
799820
}
800821

801822
match arg.layout.backend_repr {
802-
BackendRepr::Memory { .. } => {}
803-
804-
// This is a fun case! The gist of what this is doing is
805-
// that we want callers and callees to always agree on the
806-
// ABI of how they pass SIMD arguments. If we were to *not*
807-
// make these arguments indirect then they'd be immediates
808-
// in LLVM, which means that they'd used whatever the
809-
// appropriate ABI is for the callee and the caller. That
810-
// means, for example, if the caller doesn't have AVX
811-
// enabled but the callee does, then passing an AVX argument
812-
// across this boundary would cause corrupt data to show up.
813-
//
814-
// This problem is fixed by unconditionally passing SIMD
815-
// arguments through memory between callers and callees
816-
// which should get them all to agree on ABI regardless of
817-
// target feature sets. Some more information about this
818-
// issue can be found in #44367.
819-
//
820-
// Note that the intrinsic ABI is exempt here as
821-
// that's how we connect up to LLVM and it's unstable
822-
// anyway, we control all calls to it in libstd.
823-
BackendRepr::Vector { .. }
824-
if abi != SpecAbi::RustIntrinsic && spec.simd_types_indirect =>
825-
{
826-
arg.make_indirect();
827-
continue;
823+
BackendRepr::Memory { .. } => {
824+
// Compute `Aggregate` ABI.
825+
826+
let is_indirect_not_on_stack =
827+
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
828+
assert!(is_indirect_not_on_stack);
829+
830+
let size = arg.layout.size;
831+
if arg.layout.is_sized() && size <= Pointer(AddressSpace::DATA).size(cx) {
832+
// We want to pass small aggregates as immediates, but using
833+
// an LLVM aggregate type for this leads to bad optimizations,
834+
// so we pick an appropriately sized integer type instead.
835+
arg.cast_to(Reg { kind: RegKind::Integer, size });
836+
}
828837
}
829838

830-
_ => continue,
831-
}
832-
// Compute `Aggregate` ABI.
833-
834-
let is_indirect_not_on_stack =
835-
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
836-
assert!(is_indirect_not_on_stack);
837-
838-
let size = arg.layout.size;
839-
if !arg.layout.is_unsized() && size <= Pointer(AddressSpace::DATA).size(cx) {
840-
// We want to pass small aggregates as immediates, but using
841-
// an LLVM aggregate type for this leads to bad optimizations,
842-
// so we pick an appropriately sized integer type instead.
843-
arg.cast_to(Reg { kind: RegKind::Integer, size });
839+
BackendRepr::Vector { .. } => {
840+
// This is a fun case! The gist of what this is doing is
841+
// that we want callers and callees to always agree on the
842+
// ABI of how they pass SIMD arguments. If we were to *not*
843+
// make these arguments indirect then they'd be immediates
844+
// in LLVM, which means that they'd used whatever the
845+
// appropriate ABI is for the callee and the caller. That
846+
// means, for example, if the caller doesn't have AVX
847+
// enabled but the callee does, then passing an AVX argument
848+
// across this boundary would cause corrupt data to show up.
849+
//
850+
// This problem is fixed by unconditionally passing SIMD
851+
// arguments through memory between callers and callees
852+
// which should get them all to agree on ABI regardless of
853+
// target feature sets. Some more information about this
854+
// issue can be found in #44367.
855+
//
856+
// Note that the intrinsic ABI is exempt here as tjpse are not
857+
// real functions anyway, and LLVM expects certain types.
858+
if abi != SpecAbi::RustIntrinsic
859+
&& spec.simd_types_indirect
860+
&& !can_pass_simd_directly(arg)
861+
{
862+
arg.make_indirect();
863+
}
864+
}
865+
866+
_ => {}
844867
}
845868
}
846869
}

compiler/rustc_target/src/spec/json.rs

+15
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,19 @@ impl Target {
128128
Some(Ok(()))
129129
})).unwrap_or(Ok(()))
130130
} );
131+
($key_name:ident, RustAbi) => ( {
132+
let name = (stringify!($key_name)).replace("_", "-");
133+
obj.remove(&name).and_then(|o| o.as_str().and_then(|s| {
134+
match s.parse::<super::RustAbi>() {
135+
Ok(rust_abi) => base.$key_name = Some(rust_abi),
136+
_ => return Some(Err(format!(
137+
"'{s}' is not a valid value for rust-abi. \
138+
Use 'x86-softfloat' or 'x86-sse2'."
139+
))),
140+
}
141+
Some(Ok(()))
142+
})).unwrap_or(Ok(()))
143+
} );
131144
($key_name:ident, RelocModel) => ( {
132145
let name = (stringify!($key_name)).replace("_", "-");
133146
obj.remove(&name).and_then(|o| o.as_str().and_then(|s| {
@@ -611,6 +624,7 @@ impl Target {
611624
key!(llvm_mcount_intrinsic, optional);
612625
key!(llvm_abiname);
613626
key!(llvm_floatabi, FloatAbi)?;
627+
key!(rust_abi, RustAbi)?;
614628
key!(relax_elf_relocations, bool);
615629
key!(llvm_args, list);
616630
key!(use_ctors_section, bool);
@@ -786,6 +800,7 @@ impl ToJson for Target {
786800
target_option_val!(llvm_mcount_intrinsic);
787801
target_option_val!(llvm_abiname);
788802
target_option_val!(llvm_floatabi);
803+
target_option_val!(rust_abi);
789804
target_option_val!(relax_elf_relocations);
790805
target_option_val!(llvm_args);
791806
target_option_val!(use_ctors_section);

compiler/rustc_target/src/spec/mod.rs

+54-4
Original file line numberDiff line numberDiff line change
@@ -1114,6 +1114,37 @@ impl ToJson for FloatAbi {
11141114
}
11151115
}
11161116

1117+
/// The Rust-specific variant of the ABI used for this target.
1118+
#[derive(Clone, Copy, PartialEq, Hash, Debug)]
1119+
pub enum RustAbi {
1120+
/// On x86-32 only: make use of SSE and SSE2 for ABI purposes.
1121+
X86Sse2,
1122+
/// On x86-32/64 only: do not use any FPU or SIMD registers for the ABI/
1123+
X86Softfloat,
1124+
}
1125+
1126+
impl FromStr for RustAbi {
1127+
type Err = ();
1128+
1129+
fn from_str(s: &str) -> Result<RustAbi, ()> {
1130+
Ok(match s {
1131+
"x86-sse2" => RustAbi::X86Sse2,
1132+
"x86-softfloat" => RustAbi::X86Softfloat,
1133+
_ => return Err(()),
1134+
})
1135+
}
1136+
}
1137+
1138+
impl ToJson for RustAbi {
1139+
fn to_json(&self) -> Json {
1140+
match *self {
1141+
RustAbi::X86Sse2 => "x86-sse2",
1142+
RustAbi::X86Softfloat => "x86-softfloat",
1143+
}
1144+
.to_json()
1145+
}
1146+
}
1147+
11171148
#[derive(Clone, Copy, PartialEq, Hash, Debug)]
11181149
pub enum TlsModel {
11191150
GeneralDynamic,
@@ -2493,6 +2524,12 @@ pub struct TargetOptions {
24932524
/// If not provided, LLVM will infer the float ABI from the target triple (`llvm_target`).
24942525
pub llvm_floatabi: Option<FloatAbi>,
24952526

2527+
/// Picks a specific ABI for this target. This is *not* just for "Rust" ABI functions,
2528+
/// it can also affect "C" ABI functions; the point is that this flag is interpreted by
2529+
/// rustc and not forwarded to LLVM.
2530+
/// So far, this is only used on x86.
2531+
pub rust_abi: Option<RustAbi>,
2532+
24962533
/// Whether or not RelaxElfRelocation flag will be passed to the linker
24972534
pub relax_elf_relocations: bool,
24982535

@@ -2652,10 +2689,6 @@ impl TargetOptions {
26522689
.collect();
26532690
}
26542691
}
2655-
2656-
pub(crate) fn has_feature(&self, search_feature: &str) -> bool {
2657-
self.features.split(',').any(|f| f.strip_prefix('+').is_some_and(|f| f == search_feature))
2658-
}
26592692
}
26602693

26612694
impl Default for TargetOptions {
@@ -2761,6 +2794,7 @@ impl Default for TargetOptions {
27612794
llvm_mcount_intrinsic: None,
27622795
llvm_abiname: "".into(),
27632796
llvm_floatabi: None,
2797+
rust_abi: None,
27642798
relax_elf_relocations: false,
27652799
llvm_args: cvs![],
27662800
use_ctors_section: false,
@@ -3221,6 +3255,22 @@ impl Target {
32213255
_ => {}
32223256
}
32233257

3258+
// Check consistency of Rust ABI declaration.
3259+
if let Some(rust_abi) = self.rust_abi {
3260+
match rust_abi {
3261+
RustAbi::X86Sse2 => check_matches!(
3262+
&*self.arch,
3263+
"x86",
3264+
"`x86-sse2` ABI is only valid for x86-32 targets"
3265+
),
3266+
RustAbi::X86Softfloat => check_matches!(
3267+
&*self.arch,
3268+
"x86" | "x86_64",
3269+
"`x86-softfloat` ABI is only valid for x86 targets"
3270+
),
3271+
}
3272+
}
3273+
32243274
// Check that the given target-features string makes some basic sense.
32253275
if !self.features.is_empty() {
32263276
let mut features_enabled = FxHashSet::default();

compiler/rustc_target/src/spec/targets/i586_unknown_linux_gnu.rs

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::spec::Target;
22

33
pub(crate) fn target() -> Target {
44
let mut base = super::i686_unknown_linux_gnu::target();
5+
base.rust_abi = None;
56
base.cpu = "pentium".into();
67
base.llvm_target = "i586-unknown-linux-gnu".into();
78
base

compiler/rustc_target/src/spec/targets/i586_unknown_linux_musl.rs

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::spec::Target;
22

33
pub(crate) fn target() -> Target {
44
let mut base = super::i686_unknown_linux_musl::target();
5+
base.rust_abi = None;
56
base.cpu = "pentium".into();
67
base.llvm_target = "i586-unknown-linux-musl".into();
78
// FIXME(compiler-team#422): musl targets should be dynamically linked by default.

compiler/rustc_target/src/spec/targets/i686_unknown_linux_gnu.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
1-
use crate::spec::{Cc, LinkerFlavor, Lld, SanitizerSet, StackProbeType, Target, base};
1+
use crate::spec::{Cc, LinkerFlavor, Lld, RustAbi, SanitizerSet, StackProbeType, Target, base};
22

33
pub(crate) fn target() -> Target {
44
let mut base = base::linux_gnu::opts();
5+
base.rust_abi = Some(RustAbi::X86Sse2);
6+
// Dear distribution packager, if you are changing the base CPU model with the goal of removing
7+
// the SSE2 requirement, make sure to also set the `rust_abi` to `None` above or else SSE2 will
8+
// still be effectively required.
9+
// Also note that x86 without SSE2 is *not* considered a Tier 1 target by the Rust project.
510
base.cpu = "pentium4".into();
611
base.max_atomic_width = Some(64);
712
base.supported_sanitizers = SanitizerSet::ADDRESS;

compiler/rustc_target/src/spec/targets/i686_unknown_linux_musl.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
use crate::spec::{Cc, FramePointer, LinkerFlavor, Lld, StackProbeType, Target, base};
1+
use crate::spec::{Cc, FramePointer, LinkerFlavor, Lld, RustAbi, StackProbeType, Target, base};
22

33
pub(crate) fn target() -> Target {
44
let mut base = base::linux_musl::opts();
5+
base.rust_abi = Some(RustAbi::X86Sse2);
56
base.cpu = "pentium4".into();
67
base.max_atomic_width = Some(64);
78
base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-m32", "-Wl,-melf_i386"]);

compiler/rustc_target/src/spec/targets/i686_unknown_uefi.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
// The cdecl ABI is used. It differs from the stdcall or fastcall ABI.
66
// "i686-unknown-windows" is used to get the minimal subset of windows-specific features.
77

8-
use crate::spec::{Target, base};
8+
use crate::spec::{RustAbi, Target, base};
99

1010
pub(crate) fn target() -> Target {
1111
let mut base = base::uefi_msvc::opts();
@@ -22,6 +22,7 @@ pub(crate) fn target() -> Target {
2222
// If you initialize FP units yourself, you can override these flags with custom linker
2323
// arguments, thus giving you access to full MMX/SSE acceleration.
2424
base.features = "-mmx,-sse,+soft-float".into();
25+
base.rust_abi = Some(RustAbi::X86Softfloat);
2526

2627
// Use -GNU here, because of the reason below:
2728
// Background and Problem:

compiler/rustc_target/src/spec/targets/x86_64_unknown_none.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
// features.
66

77
use crate::spec::{
8-
Cc, CodeModel, LinkerFlavor, Lld, PanicStrategy, RelroLevel, SanitizerSet, StackProbeType,
9-
Target, TargetOptions,
8+
Cc, CodeModel, LinkerFlavor, Lld, PanicStrategy, RelroLevel, RustAbi, SanitizerSet,
9+
StackProbeType, Target, TargetOptions,
1010
};
1111

1212
pub(crate) fn target() -> Target {
@@ -20,6 +20,7 @@ pub(crate) fn target() -> Target {
2020
relro_level: RelroLevel::Full,
2121
linker_flavor: LinkerFlavor::Gnu(Cc::No, Lld::Yes),
2222
linker: Some("rust-lld".into()),
23+
rust_abi: Some(RustAbi::X86Softfloat),
2324
features: "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,+soft-float".into(),
2425
supported_sanitizers: SanitizerSet::KCFI | SanitizerSet::KERNELADDRESS,
2526
disable_redzone: true,

compiler/rustc_target/src/spec/targets/x86_64_unknown_uefi.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
// LLVM. "x86_64-unknown-windows" is used to get the minimal subset of windows-specific features.
77

88
use crate::abi::call::Conv;
9-
use crate::spec::{Target, base};
9+
use crate::spec::{RustAbi, Target, base};
1010

1111
pub(crate) fn target() -> Target {
1212
let mut base = base::uefi_msvc::opts();
@@ -26,6 +26,7 @@ pub(crate) fn target() -> Target {
2626
// If you initialize FP units yourself, you can override these flags with custom linker
2727
// arguments, thus giving you access to full MMX/SSE acceleration.
2828
base.features = "-mmx,-sse,+soft-float".into();
29+
base.rust_abi = Some(RustAbi::X86Softfloat);
2930

3031
Target {
3132
llvm_target: "x86_64-unknown-windows".into(),

0 commit comments

Comments
 (0)