Skip to content

Commit 6f24f49

Browse files
committed
x86-sse2 ABI: use SSE registers for floats and SIMD
1 parent 18b9ee7 commit 6f24f49

File tree

10 files changed

+218
-122
lines changed

10 files changed

+218
-122
lines changed

compiler/rustc_target/src/callconv/mod.rs

+74-44
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use rustc_abi::{
88
use rustc_macros::HashStable_Generic;
99
use rustc_span::Symbol;
1010

11-
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, WasmCAbi};
11+
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, RustcAbi, WasmCAbi};
1212

1313
mod aarch64;
1414
mod amdgpu;
@@ -387,6 +387,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
387387
/// Pass this argument directly instead. Should NOT be used!
388388
/// Only exists because of past ABI mistakes that will take time to fix
389389
/// (see <https://github.com/rust-lang/rust/issues/115666>).
390+
#[track_caller]
390391
pub fn make_direct_deprecated(&mut self) {
391392
match self.mode {
392393
PassMode::Indirect { .. } => {
@@ -399,6 +400,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
399400

400401
/// Pass this argument indirectly, by passing a (thin or wide) pointer to the argument instead.
401402
/// This is valid for both sized and unsized arguments.
403+
#[track_caller]
402404
pub fn make_indirect(&mut self) {
403405
match self.mode {
404406
PassMode::Direct(_) | PassMode::Pair(_, _) => {
@@ -413,6 +415,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
413415

414416
/// Same as `make_indirect`, but for arguments that are ignored. Only needed for ABIs that pass
415417
/// ZSTs indirectly.
418+
#[track_caller]
416419
pub fn make_indirect_from_ignore(&mut self) {
417420
match self.mode {
418421
PassMode::Ignore => {
@@ -736,27 +739,46 @@ impl<'a, Ty> FnAbi<'a, Ty> {
736739
C: HasDataLayout + HasTargetSpec,
737740
{
738741
let spec = cx.target_spec();
739-
match &spec.arch[..] {
742+
match &*spec.arch {
740743
"x86" => x86::compute_rust_abi_info(cx, self, abi),
741744
"riscv32" | "riscv64" => riscv::compute_rust_abi_info(cx, self, abi),
742745
"loongarch64" => loongarch::compute_rust_abi_info(cx, self, abi),
743746
"aarch64" => aarch64::compute_rust_abi_info(cx, self),
744747
_ => {}
745748
};
746749

750+
// Decides whether we can pass the given SIMD argument via `PassMode::Direct`.
751+
// May only return `true` if the target will always pass those arguments the same way,
752+
// no matter what the user does with `-Ctarget-feature`! In other words, whatever
753+
// target features are required to pass a SIMD value in registers must be listed in
754+
// the `abi_required_features` for the current target and ABI.
755+
let can_pass_simd_directly = |arg: &ArgAbi<'_, Ty>| match &*spec.arch {
756+
// On x86, if we have SSE2 (which we have by default for x86_64), we can always pass up
757+
// to 128-bit-sized vectors.
758+
"x86" if spec.rustc_abi == Some(RustcAbi::X86Sse2) => arg.layout.size.bits() <= 128,
759+
"x86_64" if spec.rustc_abi != Some(RustcAbi::X86Softfloat) => {
760+
arg.layout.size.bits() <= 128
761+
}
762+
// So far, we haven't implemented this logic for any other target.
763+
_ => false,
764+
};
765+
747766
for (arg_idx, arg) in self
748767
.args
749768
.iter_mut()
750769
.enumerate()
751770
.map(|(idx, arg)| (Some(idx), arg))
752771
.chain(iter::once((None, &mut self.ret)))
753772
{
754-
if arg.is_ignore() {
773+
// If the logic above already picked a specific type to cast the argument to, leave that
774+
// in place.
775+
if matches!(arg.mode, PassMode::Ignore | PassMode::Cast { .. }) {
755776
continue;
756777
}
757778

758779
if arg_idx.is_none()
759780
&& arg.layout.size > Primitive::Pointer(AddressSpace::DATA).size(cx) * 2
781+
&& !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. })
760782
{
761783
// Return values larger than 2 registers using a return area
762784
// pointer. LLVM and Cranelift disagree about how to return
@@ -766,7 +788,8 @@ impl<'a, Ty> FnAbi<'a, Ty> {
766788
// return value independently and decide to pass it in a
767789
// register or not, which would result in the return value
768790
// being passed partially in registers and partially through a
769-
// return area pointer.
791+
// return area pointer. For large IR-level values such as `i128`,
792+
// cranelift will even split up the value into smaller chunks.
770793
//
771794
// While Cranelift may need to be fixed as the LLVM behavior is
772795
// generally more correct with respect to the surface language,
@@ -796,53 +819,60 @@ impl<'a, Ty> FnAbi<'a, Ty> {
796819
// rustc_target already ensure any return value which doesn't
797820
// fit in the available amount of return registers is passed in
798821
// the right way for the current target.
822+
//
823+
// The adjustment is not necessary nor desired for types with a vector
824+
// representation; those are handled below.
799825
arg.make_indirect();
800826
continue;
801827
}
802828

803829
match arg.layout.backend_repr {
804-
BackendRepr::Memory { .. } => {}
805-
806-
// This is a fun case! The gist of what this is doing is
807-
// that we want callers and callees to always agree on the
808-
// ABI of how they pass SIMD arguments. If we were to *not*
809-
// make these arguments indirect then they'd be immediates
810-
// in LLVM, which means that they'd used whatever the
811-
// appropriate ABI is for the callee and the caller. That
812-
// means, for example, if the caller doesn't have AVX
813-
// enabled but the callee does, then passing an AVX argument
814-
// across this boundary would cause corrupt data to show up.
815-
//
816-
// This problem is fixed by unconditionally passing SIMD
817-
// arguments through memory between callers and callees
818-
// which should get them all to agree on ABI regardless of
819-
// target feature sets. Some more information about this
820-
// issue can be found in #44367.
821-
//
822-
// Note that the intrinsic ABI is exempt here as
823-
// that's how we connect up to LLVM and it's unstable
824-
// anyway, we control all calls to it in libstd.
825-
BackendRepr::Vector { .. }
826-
if abi != ExternAbi::RustIntrinsic && spec.simd_types_indirect =>
827-
{
828-
arg.make_indirect();
829-
continue;
830+
BackendRepr::Memory { .. } => {
831+
// Compute `Aggregate` ABI.
832+
833+
let is_indirect_not_on_stack =
834+
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
835+
assert!(is_indirect_not_on_stack);
836+
837+
let size = arg.layout.size;
838+
if arg.layout.is_sized()
839+
&& size <= Primitive::Pointer(AddressSpace::DATA).size(cx)
840+
{
841+
// We want to pass small aggregates as immediates, but using
842+
// an LLVM aggregate type for this leads to bad optimizations,
843+
// so we pick an appropriately sized integer type instead.
844+
arg.cast_to(Reg { kind: RegKind::Integer, size });
845+
}
830846
}
831847

832-
_ => continue,
833-
}
834-
// Compute `Aggregate` ABI.
835-
836-
let is_indirect_not_on_stack =
837-
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
838-
assert!(is_indirect_not_on_stack);
839-
840-
let size = arg.layout.size;
841-
if !arg.layout.is_unsized() && size <= Primitive::Pointer(AddressSpace::DATA).size(cx) {
842-
// We want to pass small aggregates as immediates, but using
843-
// an LLVM aggregate type for this leads to bad optimizations,
844-
// so we pick an appropriately sized integer type instead.
845-
arg.cast_to(Reg { kind: RegKind::Integer, size });
848+
BackendRepr::Vector { .. } => {
849+
// This is a fun case! The gist of what this is doing is
850+
// that we want callers and callees to always agree on the
851+
// ABI of how they pass SIMD arguments. If we were to *not*
852+
// make these arguments indirect then they'd be immediates
853+
// in LLVM, which means that they'd used whatever the
854+
// appropriate ABI is for the callee and the caller. That
855+
// means, for example, if the caller doesn't have AVX
856+
// enabled but the callee does, then passing an AVX argument
857+
// across this boundary would cause corrupt data to show up.
858+
//
859+
// This problem is fixed by unconditionally passing SIMD
860+
// arguments through memory between callers and callees
861+
// which should get them all to agree on ABI regardless of
862+
// target feature sets. Some more information about this
863+
// issue can be found in #44367.
864+
//
865+
// Note that the intrinsic ABI is exempt here as those are not
866+
// real functions anyway, and the backend expects very specific types.
867+
if abi != ExternAbi::RustIntrinsic
868+
&& spec.simd_types_indirect
869+
&& !can_pass_simd_directly(arg)
870+
{
871+
arg.make_indirect();
872+
}
873+
}
874+
875+
_ => {}
846876
}
847877
}
848878
}

compiler/rustc_target/src/callconv/x86.rs

+11-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use rustc_abi::{
44
};
55

66
use crate::callconv::{ArgAttribute, FnAbi, PassMode};
7-
use crate::spec::HasTargetSpec;
7+
use crate::spec::{HasTargetSpec, RustcAbi};
88

99
#[derive(PartialEq)]
1010
pub(crate) enum Flavor {
@@ -236,8 +236,16 @@ where
236236
_ => false, // anyway not passed via registers on x86
237237
};
238238
if has_float {
239-
if fn_abi.ret.layout.size <= Primitive::Pointer(AddressSpace::DATA).size(cx) {
240-
// Same size or smaller than pointer, return in a register.
239+
if cx.target_spec().rustc_abi == Some(RustcAbi::X86Sse2)
240+
&& fn_abi.ret.layout.backend_repr.is_scalar()
241+
&& fn_abi.ret.layout.size.bits() <= 128
242+
{
243+
// This is a single scalar that fits into an SSE register, and the target uses the
244+
// SSE ABI. We prefer this over integer registers as float scalars need to be in SSE
245+
// registers for float operations, so that's the best place to pass them around.
246+
fn_abi.ret.cast_to(Reg { kind: RegKind::Vector, size: fn_abi.ret.layout.size });
247+
} else if fn_abi.ret.layout.size <= Primitive::Pointer(AddressSpace::DATA).size(cx) {
248+
// Same size or smaller than pointer, return in an integer register.
241249
fn_abi.ret.cast_to(Reg { kind: RegKind::Integer, size: fn_abi.ret.layout.size });
242250
} else {
243251
// Larger than a pointer, return indirectly.

tests/assembly/closure-inherit-target-feature.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ pub unsafe fn sse41_blend_nofeature(x: __m128, y: __m128) -> __m128 {
1515
// check that _mm_blend_ps is not being inlined into the closure
1616
// CHECK-LABEL: {{sse41_blend_nofeature.*closure.*:}}
1717
// CHECK-NOT: blendps
18-
// CHECK: {{call .*_mm_blend_ps.*}}
18+
// CHECK: {{jmp .*_mm_blend_ps.*}}
1919
// CHECK-NOT: blendps
20-
// CHECK: ret
2120
#[inline(never)]
2221
|x, y| _mm_blend_ps(x, y, 0b0101)
2322
};

0 commit comments

Comments
 (0)