Skip to content

Commit 1c0655d

Browse files
committed
x86: use SSE registers to return float values
1 parent 1a6422e commit 1c0655d

File tree

3 files changed

+73
-58
lines changed

3 files changed

+73
-58
lines changed

compiler/rustc_target/src/callconv/mod.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
389389
/// Pass this argument directly instead. Should NOT be used!
390390
/// Only exists because of past ABI mistakes that will take time to fix
391391
/// (see <https://github.com/rust-lang/rust/issues/115666>).
392+
#[track_caller]
392393
pub fn make_direct_deprecated(&mut self) {
393394
match self.mode {
394395
PassMode::Indirect { .. } => {
@@ -401,6 +402,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
401402

402403
/// Pass this argument indirectly, by passing a (thin or wide) pointer to the argument instead.
403404
/// This is valid for both sized and unsized arguments.
405+
#[track_caller]
404406
pub fn make_indirect(&mut self) {
405407
match self.mode {
406408
PassMode::Direct(_) | PassMode::Pair(_, _) => {
@@ -415,6 +417,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
415417

416418
/// Same as `make_indirect`, but for arguments that are ignored. Only needed for ABIs that pass
417419
/// ZSTs indirectly.
420+
#[track_caller]
418421
pub fn make_indirect_from_ignore(&mut self) {
419422
match self.mode {
420423
PassMode::Ignore => {
@@ -773,9 +776,9 @@ impl<'a, Ty> FnAbi<'a, Ty> {
773776

774777
if arg_idx.is_none()
775778
&& arg.layout.size > Pointer(AddressSpace::DATA).size(cx) * 2
776-
&& !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. })
779+
&& arg.layout.is_aggregate()
777780
{
778-
// Return values larger than 2 registers using a return area
781+
// Return aggregate values larger than 2 registers using a return area
779782
// pointer. LLVM and Cranelift disagree about how to return
780783
// values that don't fit in the registers designated for return
781784
// values. LLVM will force the entire return value to be passed
@@ -813,8 +816,6 @@ impl<'a, Ty> FnAbi<'a, Ty> {
813816
// rustc_target already ensure any return value which doesn't
814817
// fit in the available amount of return registers is passed in
815818
// the right way for the current target.
816-
// The adjustment is also not necessary nor desired for types with
817-
// a vector representation; those are handled below.
818819
arg.make_indirect();
819820
continue;
820821
}

compiler/rustc_target/src/callconv/x86.rs

+9-3
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ use crate::abi::call::{ArgAttribute, FnAbi, PassMode, Reg, RegKind};
22
use crate::abi::{
33
AddressSpace, Align, BackendRepr, Float, HasDataLayout, Pointer, TyAbiInterface, TyAndLayout,
44
};
5-
use crate::spec::HasTargetSpec;
65
use crate::spec::abi::Abi as SpecAbi;
6+
use crate::spec::{HasTargetSpec, RustAbi};
77

88
#[derive(PartialEq)]
99
pub(crate) enum Flavor {
@@ -234,8 +234,14 @@ where
234234
_ => false, // anyway not passed via registers on x86
235235
};
236236
if has_float {
237-
if fn_abi.ret.layout.size <= Pointer(AddressSpace::DATA).size(cx) {
238-
// Same size or smaller than pointer, return in a register.
237+
if cx.target_spec().rust_abi == Some(RustAbi::X86Sse2)
238+
&& fn_abi.ret.layout.backend_repr.is_scalar()
239+
&& fn_abi.ret.layout.size.bits() <= 128
240+
{
241+
// This is a single scalar that fits into an SSE register.
242+
fn_abi.ret.cast_to(Reg { kind: RegKind::Vector, size: fn_abi.ret.layout.size });
243+
} else if fn_abi.ret.layout.size <= Pointer(AddressSpace::DATA).size(cx) {
244+
// Same size or smaller than pointer, return in an integer register.
239245
fn_abi.ret.cast_to(Reg { kind: RegKind::Integer, size: fn_abi.ret.layout.size });
240246
} else {
241247
// Larger than a pointer, return indirectly.

tests/assembly/x86-return-float.rs

+59-51
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,30 @@
11
//@ assembly-output: emit-asm
2-
//@ only-x86
3-
// FIXME(#114479): LLVM miscompiles loading and storing `f32` and `f64` when SSE is disabled.
4-
// There's no compiletest directive to ignore a test on i586 only, so just always explicitly enable
5-
// SSE2.
6-
// Use the same target CPU as `i686` so that LLVM orders the instructions in the same order.
7-
//@ compile-flags: -Ctarget-feature=+sse2 -Ctarget-cpu=pentium4
2+
//@ needs-llvm-components: x86
3+
//@ revisions: sse nosse
4+
//@[sse] compile-flags: --target i686-unknown-linux-gnu
5+
// We make SSE available but don't use it for the ABI.
6+
//@[nosse] compile-flags: --target i586-unknown-linux-gnu -Ctarget-feature=+sse2 -Ctarget-cpu=pentium4
7+
88
// Force frame pointers to make ASM more consistent between targets
99
//@ compile-flags: -O -C force-frame-pointers
1010
//@ filecheck-flags: --implicit-check-not fld --implicit-check-not fst
11-
//@ revisions: normal win
12-
//@[normal] ignore-windows
13-
//@[win] only-windows
1411

15-
#![crate_type = "lib"]
1612
#![feature(f16, f128)]
13+
#![feature(no_core, lang_items, rustc_attrs, repr_simd)]
14+
#![no_core]
15+
#![crate_type = "lib"]
16+
17+
#[lang = "sized"]
18+
trait Sized {}
19+
20+
#[lang = "copy"]
21+
trait Copy {}
22+
23+
impl Copy for f16 {}
24+
impl Copy for f32 {}
25+
impl Copy for f64 {}
26+
impl Copy for f128 {}
27+
impl Copy for usize {}
1728

1829
// Tests that returning `f32` and `f64` with the "Rust" ABI on 32-bit x86 doesn't use the x87
1930
// floating point stack, as loading and storing `f32`s and `f64`s to and from the x87 stack quietens
@@ -24,7 +35,8 @@
2435
// CHECK-LABEL: return_f32:
2536
#[no_mangle]
2637
pub fn return_f32(x: f32) -> f32 {
27-
// CHECK: movl {{.*}}(%ebp), %eax
38+
// sse: movss {{.*}}(%ebp), %xmm0
39+
// nosse: movl {{.*}}(%ebp), %eax
2840
// CHECK-NOT: ax
2941
// CHECK: retl
3042
x
@@ -33,9 +45,11 @@ pub fn return_f32(x: f32) -> f32 {
3345
// CHECK-LABEL: return_f64:
3446
#[no_mangle]
3547
pub fn return_f64(x: f64) -> f64 {
36-
// CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
37-
// CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL:.*]]
38-
// CHECK-NEXT: movsd %[[VAL]], (%[[PTR]])
48+
// nosse: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
49+
// nosse-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL:.*]]
50+
// nosse-NEXT: movsd %[[VAL]], (%[[PTR]])
51+
// sse: movsd {{.*}}(%ebp), %xmm0
52+
// sse-NOT: ax
3953
// CHECK: retl
4054
x
4155
}
@@ -148,7 +162,8 @@ pub unsafe fn call_f32(x: &mut f32) {
148162
}
149163
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
150164
// CHECK: calll {{()|_}}get_f32
151-
// CHECK-NEXT: movl %eax, (%[[PTR]])
165+
// sse-NEXT: movss %xmm0, (%[[PTR]])
166+
// nosse-NEXT: movl %eax, (%[[PTR]])
152167
*x = get_f32();
153168
}
154169

@@ -160,8 +175,9 @@ pub unsafe fn call_f64(x: &mut f64) {
160175
}
161176
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
162177
// CHECK: calll {{()|_}}get_f64
163-
// CHECK: movsd {{.*}}(%{{ebp|esp}}), %[[VAL:.*]]
164-
// CHECK-NEXT: movsd %[[VAL:.*]], (%[[PTR]])
178+
// sse: movlps %xmm0, (%[[PTR]])
179+
// nosse: movsd {{.*}}(%{{ebp|esp}}), %[[VAL:.*]]
180+
// nosse-NEXT: movsd %[[VAL:.*]], (%[[PTR]])
165181
*x = get_f64();
166182
}
167183

@@ -190,10 +206,8 @@ pub unsafe fn call_f64_f64(x: &mut (f64, f64)) {
190206
}
191207
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
192208
// CHECK: calll {{()|_}}get_f64_f64
193-
// normal: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
194-
// normal-NEXT: movsd [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
195-
// win: movsd (%esp), %[[VAL1:.*]]
196-
// win-NEXT: movsd 8(%esp), %[[VAL2:.*]]
209+
// CHECK: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
210+
// CHECK-NEXT: movsd [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
197211
// CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
198212
// CHECK-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
199213
*x = get_f64_f64();
@@ -207,13 +221,10 @@ pub unsafe fn call_f32_f64(x: &mut (f32, f64)) {
207221
}
208222
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
209223
// CHECK: calll {{()|_}}get_f32_f64
210-
// normal: movss [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
211-
// normal-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
212-
// win: movss (%esp), %[[VAL1:.*]]
213-
// win-NEXT: movsd 8(%esp), %[[VAL2:.*]]
224+
// CHECK: movss [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
225+
// CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
214226
// CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
215-
// normal-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
216-
// win-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
227+
// CHECK-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
217228
*x = get_f32_f64();
218229
}
219230

@@ -225,10 +236,8 @@ pub unsafe fn call_f64_f32(x: &mut (f64, f32)) {
225236
}
226237
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
227238
// CHECK: calll {{()|_}}get_f64_f32
228-
// normal: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
229-
// normal-NEXT: movss [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
230-
// win: movsd (%esp), %[[VAL1:.*]]
231-
// win-NEXT: movss 8(%esp), %[[VAL2:.*]]
239+
// CHECK: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
240+
// CHECK-NEXT: movss [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
232241
// CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
233242
// CHECK-NEXT: movss %[[VAL2]], 8(%[[PTR]])
234243
*x = get_f64_f32();
@@ -257,10 +266,8 @@ pub unsafe fn call_f64_other(x: &mut (f64, usize)) {
257266
}
258267
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
259268
// CHECK: calll {{()|_}}get_f64_other
260-
// normal: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
261-
// normal-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
262-
// win: movsd (%esp), %[[VAL1:.*]]
263-
// win-NEXT: movl 8(%esp), %[[VAL2:.*]]
269+
// CHECK: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
270+
// CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
264271
// CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
265272
// CHECK-NEXT: movl %[[VAL2]], 8(%[[PTR]])
266273
*x = get_f64_other();
@@ -289,13 +296,10 @@ pub unsafe fn call_other_f64(x: &mut (usize, f64)) {
289296
}
290297
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
291298
// CHECK: calll {{()|_}}get_other_f64
292-
// normal: movl [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
293-
// normal-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
294-
// win: movl (%esp), %[[VAL1:.*]]
295-
// win-NEXT: movsd 8(%esp), %[[VAL2:.*]]
299+
// CHECK: movl [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
300+
// CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
296301
// CHECK-NEXT: movl %[[VAL1]], (%[[PTR]])
297-
// normal-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
298-
// win-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
302+
// CHECK-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
299303
*x = get_other_f64();
300304
}
301305

@@ -307,7 +311,8 @@ pub unsafe fn call_other_f64(x: &mut (usize, f64)) {
307311
pub fn return_f16(x: f16) -> f16 {
308312
// CHECK: pushl %ebp
309313
// CHECK: movl %esp, %ebp
310-
// CHECK: movzwl 8(%ebp), %eax
314+
// nosse: movzwl 8(%ebp), %eax
315+
// sse: pinsrw $0, 8(%ebp), %xmm0
311316
// CHECK: popl %ebp
312317
// CHECK: retl
313318
x
@@ -316,15 +321,18 @@ pub fn return_f16(x: f16) -> f16 {
316321
// CHECK-LABEL: return_f128:
317322
#[no_mangle]
318323
pub fn return_f128(x: f128) -> f128 {
319-
// CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
320-
// CHECK-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
321-
// CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
322-
// CHECK-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]]
323-
// CHECK-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]]
324-
// CHECK-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]])
325-
// CHECK-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]])
326-
// CHECK-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]])
327-
// CHECK-NEXT: movl %[[VAL1:.*]] (%[[PTR]])
324+
// CHECK: pushl %ebp
325+
// sse: movaps [[#%d,OFFSET:]](%ebp), %xmm0
326+
// nosse: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
327+
// nosse-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
328+
// nosse-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
329+
// nosse-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]]
330+
// nosse-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]]
331+
// nosse-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]])
332+
// nosse-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]])
333+
// nosse-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]])
334+
// nosse-NEXT: movl %[[VAL1:.*]] (%[[PTR]])
335+
// CHECK: popl %ebp
328336
// CHECK: retl
329337
x
330338
}

0 commit comments

Comments
 (0)