Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 8bfa689

Browse files
committedJan 12, 2025·
x86: use SSE registers to return float values
1 parent bbc7084 commit 8bfa689

File tree

3 files changed

+74
-58
lines changed

3 files changed

+74
-58
lines changed
 

‎compiler/rustc_target/src/callconv/mod.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
389389
/// Pass this argument directly instead. Should NOT be used!
390390
/// Only exists because of past ABI mistakes that will take time to fix
391391
/// (see <https://github.com/rust-lang/rust/issues/115666>).
392+
#[track_caller]
392393
pub fn make_direct_deprecated(&mut self) {
393394
match self.mode {
394395
PassMode::Indirect { .. } => {
@@ -401,6 +402,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
401402

402403
/// Pass this argument indirectly, by passing a (thin or wide) pointer to the argument instead.
403404
/// This is valid for both sized and unsized arguments.
405+
#[track_caller]
404406
pub fn make_indirect(&mut self) {
405407
match self.mode {
406408
PassMode::Direct(_) | PassMode::Pair(_, _) => {
@@ -415,6 +417,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
415417

416418
/// Same as `make_indirect`, but for arguments that are ignored. Only needed for ABIs that pass
417419
/// ZSTs indirectly.
420+
#[track_caller]
418421
pub fn make_indirect_from_ignore(&mut self) {
419422
match self.mode {
420423
PassMode::Ignore => {
@@ -773,9 +776,9 @@ impl<'a, Ty> FnAbi<'a, Ty> {
773776

774777
if arg_idx.is_none()
775778
&& arg.layout.size > Pointer(AddressSpace::DATA).size(cx) * 2
776-
&& !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. })
779+
&& arg.layout.is_aggregate()
777780
{
778-
// Return values larger than 2 registers using a return area
781+
// Return aggregate values larger than 2 registers using a return area
779782
// pointer. LLVM and Cranelift disagree about how to return
780783
// values that don't fit in the registers designated for return
781784
// values. LLVM will force the entire return value to be passed
@@ -813,8 +816,6 @@ impl<'a, Ty> FnAbi<'a, Ty> {
813816
// rustc_target already ensure any return value which doesn't
814817
// fit in the available amount of return registers is passed in
815818
// the right way for the current target.
816-
// The adjustment is also not necessary nor desired for types with
817-
// a vector representation; those are handled below.
818819
arg.make_indirect();
819820
continue;
820821
}

‎compiler/rustc_target/src/callconv/x86.rs

+9-3
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ use crate::abi::call::{ArgAttribute, FnAbi, PassMode, Reg, RegKind};
22
use crate::abi::{
33
AddressSpace, Align, BackendRepr, Float, HasDataLayout, Pointer, TyAbiInterface, TyAndLayout,
44
};
5-
use crate::spec::HasTargetSpec;
65
use crate::spec::abi::Abi as SpecAbi;
6+
use crate::spec::{HasTargetSpec, RustAbi};
77

88
#[derive(PartialEq)]
99
pub(crate) enum Flavor {
@@ -234,8 +234,14 @@ where
234234
_ => false, // anyway not passed via registers on x86
235235
};
236236
if has_float {
237-
if fn_abi.ret.layout.size <= Pointer(AddressSpace::DATA).size(cx) {
238-
// Same size or smaller than pointer, return in a register.
237+
if cx.target_spec().rust_abi == Some(RustAbi::X86Sse2)
238+
&& fn_abi.ret.layout.backend_repr.is_scalar()
239+
&& fn_abi.ret.layout.size.bits() <= 128
240+
{
241+
// This is a single scalar that fits into an SSE register.
242+
fn_abi.ret.cast_to(Reg { kind: RegKind::Vector, size: fn_abi.ret.layout.size });
243+
} else if fn_abi.ret.layout.size <= Pointer(AddressSpace::DATA).size(cx) {
244+
// Same size or smaller than pointer, return in an integer register.
239245
fn_abi.ret.cast_to(Reg { kind: RegKind::Integer, size: fn_abi.ret.layout.size });
240246
} else {
241247
// Larger than a pointer, return indirectly.

‎tests/assembly/x86-return-float.rs

+60-51
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,31 @@
11
//@ assembly-output: emit-asm
2-
//@ only-x86
3-
// FIXME(#114479): LLVM miscompiles loading and storing `f32` and `f64` when SSE is disabled.
4-
// There's no compiletest directive to ignore a test on i586 only, so just always explicitly enable
5-
// SSE2.
6-
// Use the same target CPU as `i686` so that LLVM orders the instructions in the same order.
7-
//@ compile-flags: -Ctarget-feature=+sse2 -Ctarget-cpu=pentium4
2+
//@ revisions: sse nosse
3+
//@[sse] compile-flags: --target i686-unknown-linux-gnu
4+
//@[sse] needs-llvm-components: x86
5+
// We make SSE available but don't use it for the ABI.
6+
//@[nosse] compile-flags: --target i586-unknown-linux-gnu -Ctarget-feature=+sse2 -Ctarget-cpu=pentium4
7+
//@[nosse] needs-llvm-components: x86
8+
89
// Force frame pointers to make ASM more consistent between targets
910
//@ compile-flags: -O -C force-frame-pointers
1011
//@ filecheck-flags: --implicit-check-not fld --implicit-check-not fst
11-
//@ revisions: normal win
12-
//@[normal] ignore-windows
13-
//@[win] only-windows
1412

15-
#![crate_type = "lib"]
1613
#![feature(f16, f128)]
14+
#![feature(no_core, lang_items, rustc_attrs, repr_simd)]
15+
#![no_core]
16+
#![crate_type = "lib"]
17+
18+
#[lang = "sized"]
19+
trait Sized {}
20+
21+
#[lang = "copy"]
22+
trait Copy {}
23+
24+
impl Copy for f16 {}
25+
impl Copy for f32 {}
26+
impl Copy for f64 {}
27+
impl Copy for f128 {}
28+
impl Copy for usize {}
1729

1830
// Tests that returning `f32` and `f64` with the "Rust" ABI on 32-bit x86 doesn't use the x87
1931
// floating point stack, as loading and storing `f32`s and `f64`s to and from the x87 stack quietens
@@ -24,7 +36,8 @@
2436
// CHECK-LABEL: return_f32:
2537
#[no_mangle]
2638
pub fn return_f32(x: f32) -> f32 {
27-
// CHECK: movl {{.*}}(%ebp), %eax
39+
// sse: movss {{.*}}(%ebp), %xmm0
40+
// nosse: movl {{.*}}(%ebp), %eax
2841
// CHECK-NOT: ax
2942
// CHECK: retl
3043
x
@@ -33,9 +46,11 @@ pub fn return_f32(x: f32) -> f32 {
3346
// CHECK-LABEL: return_f64:
3447
#[no_mangle]
3548
pub fn return_f64(x: f64) -> f64 {
36-
// CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
37-
// CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL:.*]]
38-
// CHECK-NEXT: movsd %[[VAL]], (%[[PTR]])
49+
// nosse: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
50+
// nosse-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL:.*]]
51+
// nosse-NEXT: movsd %[[VAL]], (%[[PTR]])
52+
// sse: movsd {{.*}}(%ebp), %xmm0
53+
// sse-NOT: ax
3954
// CHECK: retl
4055
x
4156
}
@@ -148,7 +163,8 @@ pub unsafe fn call_f32(x: &mut f32) {
148163
}
149164
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
150165
// CHECK: calll {{()|_}}get_f32
151-
// CHECK-NEXT: movl %eax, (%[[PTR]])
166+
// sse-NEXT: movss %xmm0, (%[[PTR]])
167+
// nosse-NEXT: movl %eax, (%[[PTR]])
152168
*x = get_f32();
153169
}
154170

@@ -160,8 +176,9 @@ pub unsafe fn call_f64(x: &mut f64) {
160176
}
161177
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
162178
// CHECK: calll {{()|_}}get_f64
163-
// CHECK: movsd {{.*}}(%{{ebp|esp}}), %[[VAL:.*]]
164-
// CHECK-NEXT: movsd %[[VAL:.*]], (%[[PTR]])
179+
// sse: movlps %xmm0, (%[[PTR]])
180+
// nosse: movsd {{.*}}(%{{ebp|esp}}), %[[VAL:.*]]
181+
// nosse-NEXT: movsd %[[VAL:.*]], (%[[PTR]])
165182
*x = get_f64();
166183
}
167184

@@ -190,10 +207,8 @@ pub unsafe fn call_f64_f64(x: &mut (f64, f64)) {
190207
}
191208
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
192209
// CHECK: calll {{()|_}}get_f64_f64
193-
// normal: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
194-
// normal-NEXT: movsd [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
195-
// win: movsd (%esp), %[[VAL1:.*]]
196-
// win-NEXT: movsd 8(%esp), %[[VAL2:.*]]
210+
// CHECK: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
211+
// CHECK-NEXT: movsd [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
197212
// CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
198213
// CHECK-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
199214
*x = get_f64_f64();
@@ -207,13 +222,10 @@ pub unsafe fn call_f32_f64(x: &mut (f32, f64)) {
207222
}
208223
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
209224
// CHECK: calll {{()|_}}get_f32_f64
210-
// normal: movss [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
211-
// normal-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
212-
// win: movss (%esp), %[[VAL1:.*]]
213-
// win-NEXT: movsd 8(%esp), %[[VAL2:.*]]
225+
// CHECK: movss [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
226+
// CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
214227
// CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
215-
// normal-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
216-
// win-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
228+
// CHECK-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
217229
*x = get_f32_f64();
218230
}
219231

@@ -225,10 +237,8 @@ pub unsafe fn call_f64_f32(x: &mut (f64, f32)) {
225237
}
226238
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
227239
// CHECK: calll {{()|_}}get_f64_f32
228-
// normal: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
229-
// normal-NEXT: movss [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
230-
// win: movsd (%esp), %[[VAL1:.*]]
231-
// win-NEXT: movss 8(%esp), %[[VAL2:.*]]
240+
// CHECK: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
241+
// CHECK-NEXT: movss [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
232242
// CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
233243
// CHECK-NEXT: movss %[[VAL2]], 8(%[[PTR]])
234244
*x = get_f64_f32();
@@ -257,10 +267,8 @@ pub unsafe fn call_f64_other(x: &mut (f64, usize)) {
257267
}
258268
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
259269
// CHECK: calll {{()|_}}get_f64_other
260-
// normal: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
261-
// normal-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
262-
// win: movsd (%esp), %[[VAL1:.*]]
263-
// win-NEXT: movl 8(%esp), %[[VAL2:.*]]
270+
// CHECK: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
271+
// CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
264272
// CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
265273
// CHECK-NEXT: movl %[[VAL2]], 8(%[[PTR]])
266274
*x = get_f64_other();
@@ -289,13 +297,10 @@ pub unsafe fn call_other_f64(x: &mut (usize, f64)) {
289297
}
290298
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
291299
// CHECK: calll {{()|_}}get_other_f64
292-
// normal: movl [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
293-
// normal-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
294-
// win: movl (%esp), %[[VAL1:.*]]
295-
// win-NEXT: movsd 8(%esp), %[[VAL2:.*]]
300+
// CHECK: movl [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
301+
// CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
296302
// CHECK-NEXT: movl %[[VAL1]], (%[[PTR]])
297-
// normal-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
298-
// win-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
303+
// CHECK-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
299304
*x = get_other_f64();
300305
}
301306

@@ -307,7 +312,8 @@ pub unsafe fn call_other_f64(x: &mut (usize, f64)) {
307312
pub fn return_f16(x: f16) -> f16 {
308313
// CHECK: pushl %ebp
309314
// CHECK: movl %esp, %ebp
310-
// CHECK: movzwl 8(%ebp), %eax
315+
// nosse: movzwl 8(%ebp), %eax
316+
// sse: pinsrw $0, 8(%ebp), %xmm0
311317
// CHECK: popl %ebp
312318
// CHECK: retl
313319
x
@@ -316,15 +322,18 @@ pub fn return_f16(x: f16) -> f16 {
316322
// CHECK-LABEL: return_f128:
317323
#[no_mangle]
318324
pub fn return_f128(x: f128) -> f128 {
319-
// CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
320-
// CHECK-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
321-
// CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
322-
// CHECK-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]]
323-
// CHECK-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]]
324-
// CHECK-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]])
325-
// CHECK-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]])
326-
// CHECK-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]])
327-
// CHECK-NEXT: movl %[[VAL1:.*]] (%[[PTR]])
325+
// CHECK: pushl %ebp
326+
// sse: movaps [[#%d,OFFSET:]](%ebp), %xmm0
327+
// nosse: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
328+
// nosse-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
329+
// nosse-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
330+
// nosse-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]]
331+
// nosse-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]]
332+
// nosse-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]])
333+
// nosse-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]])
334+
// nosse-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]])
335+
// nosse-NEXT: movl %[[VAL1:.*]] (%[[PTR]])
336+
// CHECK: popl %ebp
328337
// CHECK: retl
329338
x
330339
}

0 commit comments

Comments
 (0)
Please sign in to comment.