Skip to content

Commit 38593bf

Browse files
authored
Unrolled build for rust-lang#129536
Rollup merge of rust-lang#129536 - beetrees:f16-f128-inline-asm-aarch64, r=Amanieu Add `f16` and `f128` inline ASM support for `aarch64` Adds `f16` and `f128` inline ASM support for `aarch64`. SIMD vector types are taken from [the ARM intrinsics list](https://developer.arm.com/architectures/instruction-sets/intrinsics/#f:`@navigationhierarchiesreturnbasetype=[float]&f:@navigationhierarchieselementbitsize=[16]&f:@navigationhierarchiesarchitectures=[A64]).` Based on the work of `@lengrongfu` in rust-lang#127043. Relevant issue: rust-lang#125398 Tracking issue: rust-lang#116909 `@rustbot` label +F-f16_and_f128 try-job: aarch64-gnu try-job: aarch64-apple
2 parents bf662eb + abd44fc commit 38593bf

File tree

5 files changed

+154
-15
lines changed

5 files changed

+154
-15
lines changed

compiler/rustc_codegen_llvm/src/asm.rs

+11-3
Original file line numberDiff line numberDiff line change
@@ -913,8 +913,10 @@ fn llvm_asm_scalar_type<'ll>(cx: &CodegenCx<'ll, '_>, scalar: Scalar) -> &'ll Ty
913913
Primitive::Int(Integer::I16, _) => cx.type_i16(),
914914
Primitive::Int(Integer::I32, _) => cx.type_i32(),
915915
Primitive::Int(Integer::I64, _) => cx.type_i64(),
916+
Primitive::Float(Float::F16) => cx.type_f16(),
916917
Primitive::Float(Float::F32) => cx.type_f32(),
917918
Primitive::Float(Float::F64) => cx.type_f64(),
919+
Primitive::Float(Float::F128) => cx.type_f128(),
918920
// FIXME(erikdesjardins): handle non-default addrspace ptr sizes
919921
Primitive::Pointer(_) => cx.type_from_integer(dl.ptr_sized_integer()),
920922
_ => unreachable!(),
@@ -948,7 +950,9 @@ fn llvm_fixup_input<'ll, 'tcx>(
948950
value
949951
}
950952
}
951-
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
953+
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
954+
if s.primitive() != Primitive::Float(Float::F128) =>
955+
{
952956
let elem_ty = llvm_asm_scalar_type(bx.cx, s);
953957
let count = 16 / layout.size.bytes();
954958
let vec_ty = bx.cx.type_vector(elem_ty, count);
@@ -1090,7 +1094,9 @@ fn llvm_fixup_output<'ll, 'tcx>(
10901094
value
10911095
}
10921096
}
1093-
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
1097+
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
1098+
if s.primitive() != Primitive::Float(Float::F128) =>
1099+
{
10941100
value = bx.extract_element(value, bx.const_i32(0));
10951101
if let Primitive::Pointer(_) = s.primitive() {
10961102
value = bx.inttoptr(value, layout.llvm_type(bx.cx));
@@ -1222,7 +1228,9 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
12221228
layout.llvm_type(cx)
12231229
}
12241230
}
1225-
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
1231+
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
1232+
if s.primitive() != Primitive::Float(Float::F128) =>
1233+
{
12261234
let elem_ty = llvm_asm_scalar_type(cx, s);
12271235
let count = 16 / layout.size.bytes();
12281236
cx.type_vector(elem_ty, count)

compiler/rustc_target/src/asm/aarch64.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,11 @@ impl AArch64InlineAsmRegClass {
5959
_arch: InlineAsmArch,
6060
) -> &'static [(InlineAsmType, Option<Symbol>)] {
6161
match self {
62-
Self::reg => types! { _: I8, I16, I32, I64, F32, F64; },
62+
Self::reg => types! { _: I8, I16, I32, I64, F16, F32, F64; },
6363
Self::vreg | Self::vreg_low16 => types! {
64-
neon: I8, I16, I32, I64, F32, F64,
65-
VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2), VecF64(1),
66-
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2);
64+
neon: I8, I16, I32, I64, F16, F32, F64, F128,
65+
VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2), VecF64(1),
66+
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2);
6767
},
6868
Self::preg => &[],
6969
}

tests/assembly/asm/aarch64-types.rs

+111-5
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55
//@ [arm64ec] compile-flags: --target arm64ec-pc-windows-msvc
66
//@ [arm64ec] needs-llvm-components: aarch64
77

8-
#![feature(no_core, lang_items, rustc_attrs, repr_simd, asm_experimental_arch)]
8+
#![feature(no_core, lang_items, rustc_attrs, repr_simd, asm_experimental_arch, f16, f128)]
99
#![crate_type = "rlib"]
1010
#![no_core]
1111
#![allow(asm_sub_register, non_camel_case_types)]
12+
// FIXME(f16_f128): Only needed for FIXME in check! and check_reg!
13+
#![feature(auto_traits)]
1214

1315
#[rustc_builtin_macro]
1416
macro_rules! asm {
@@ -39,6 +41,8 @@ pub struct i32x2(i32, i32);
3941
#[repr(simd)]
4042
pub struct i64x1(i64);
4143
#[repr(simd)]
44+
pub struct f16x4(f16, f16, f16, f16);
45+
#[repr(simd)]
4246
pub struct f32x2(f32, f32);
4347
#[repr(simd)]
4448
pub struct f64x1(f64);
@@ -51,30 +55,42 @@ pub struct i32x4(i32, i32, i32, i32);
5155
#[repr(simd)]
5256
pub struct i64x2(i64, i64);
5357
#[repr(simd)]
58+
pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16);
59+
#[repr(simd)]
5460
pub struct f32x4(f32, f32, f32, f32);
5561
#[repr(simd)]
5662
pub struct f64x2(f64, f64);
5763

5864
impl Copy for i8 {}
5965
impl Copy for i16 {}
66+
impl Copy for f16 {}
6067
impl Copy for i32 {}
6168
impl Copy for f32 {}
6269
impl Copy for i64 {}
6370
impl Copy for f64 {}
71+
impl Copy for f128 {}
6472
impl Copy for ptr {}
6573
impl Copy for i8x8 {}
6674
impl Copy for i16x4 {}
6775
impl Copy for i32x2 {}
6876
impl Copy for i64x1 {}
77+
impl Copy for f16x4 {}
6978
impl Copy for f32x2 {}
7079
impl Copy for f64x1 {}
7180
impl Copy for i8x16 {}
7281
impl Copy for i16x8 {}
7382
impl Copy for i32x4 {}
7483
impl Copy for i64x2 {}
84+
impl Copy for f16x8 {}
7585
impl Copy for f32x4 {}
7686
impl Copy for f64x2 {}
7787

88+
// FIXME(f16_f128): Only needed for FIXME in check! and check_reg!
89+
#[lang = "freeze"]
90+
unsafe auto trait Freeze {}
91+
#[lang = "unpin"]
92+
auto trait Unpin {}
93+
7894
extern "C" {
7995
fn extern_func();
8096
static extern_static: u8;
@@ -111,38 +127,44 @@ pub unsafe fn issue_75761() {
111127

112128
macro_rules! check {
113129
($func:ident $ty:ident $class:ident $mov:literal $modifier:literal) => {
130+
// FIXME(f16_f128): Change back to `$func(x: $ty) -> $ty` once arm64ec can pass and return
131+
// `f16` and `f128` without LLVM erroring.
132+
// LLVM issue: <https://github.com/llvm/llvm-project/issues/94434>
114133
#[no_mangle]
115-
pub unsafe fn $func(x: $ty) -> $ty {
134+
pub unsafe fn $func(inp: &$ty, out: &mut $ty) {
116135
// Hack to avoid function merging
117136
extern "Rust" {
118137
fn dont_merge(s: &str);
119138
}
120139
dont_merge(stringify!($func));
121140

141+
let x = *inp;
122142
let y;
123143
asm!(
124144
concat!($mov, " {:", $modifier, "}, {:", $modifier, "}"),
125145
out($class) y,
126146
in($class) x
127147
);
128-
y
148+
*out = y;
129149
}
130150
};
131151
}
132152

133153
macro_rules! check_reg {
134154
($func:ident $ty:ident $reg:tt $mov:literal) => {
155+
// FIXME(f16_f128): See FIXME in `check!`
135156
#[no_mangle]
136-
pub unsafe fn $func(x: $ty) -> $ty {
157+
pub unsafe fn $func(inp: &$ty, out: &mut $ty) {
137158
// Hack to avoid function merging
138159
extern "Rust" {
139160
fn dont_merge(s: &str);
140161
}
141162
dont_merge(stringify!($func));
142163

164+
let x = *inp;
143165
let y;
144166
asm!(concat!($mov, " ", $reg, ", ", $reg), lateout($reg) y, in($reg) x);
145-
y
167+
*out = y;
146168
}
147169
};
148170
}
@@ -159,6 +181,12 @@ check!(reg_i8 i8 reg "mov" "");
159181
// CHECK: //NO_APP
160182
check!(reg_i16 i16 reg "mov" "");
161183

184+
// CHECK-LABEL: {{("#)?}}reg_f16{{"?}}
185+
// CHECK: //APP
186+
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
187+
// CHECK: //NO_APP
188+
check!(reg_f16 f16 reg "mov" "");
189+
162190
// CHECK-LABEL: {{("#)?}}reg_i32{{"?}}
163191
// CHECK: //APP
164192
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
@@ -201,6 +229,12 @@ check!(vreg_i8 i8 vreg "fmov" "s");
201229
// CHECK: //NO_APP
202230
check!(vreg_i16 i16 vreg "fmov" "s");
203231

232+
// CHECK-LABEL: {{("#)?}}vreg_f16{{"?}}
233+
// CHECK: //APP
234+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
235+
// CHECK: //NO_APP
236+
check!(vreg_f16 f16 vreg "fmov" "s");
237+
204238
// CHECK-LABEL: {{("#)?}}vreg_i32{{"?}}
205239
// CHECK: //APP
206240
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -225,6 +259,12 @@ check!(vreg_i64 i64 vreg "fmov" "s");
225259
// CHECK: //NO_APP
226260
check!(vreg_f64 f64 vreg "fmov" "s");
227261

262+
// CHECK-LABEL: {{("#)?}}vreg_f128{{"?}}
263+
// CHECK: //APP
264+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
265+
// CHECK: //NO_APP
266+
check!(vreg_f128 f128 vreg "fmov" "s");
267+
228268
// CHECK-LABEL: {{("#)?}}vreg_ptr{{"?}}
229269
// CHECK: //APP
230270
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -255,6 +295,12 @@ check!(vreg_i32x2 i32x2 vreg "fmov" "s");
255295
// CHECK: //NO_APP
256296
check!(vreg_i64x1 i64x1 vreg "fmov" "s");
257297

298+
// CHECK-LABEL: {{("#)?}}vreg_f16x4{{"?}}
299+
// CHECK: //APP
300+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
301+
// CHECK: //NO_APP
302+
check!(vreg_f16x4 f16x4 vreg "fmov" "s");
303+
258304
// CHECK-LABEL: {{("#)?}}vreg_f32x2{{"?}}
259305
// CHECK: //APP
260306
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -291,6 +337,12 @@ check!(vreg_i32x4 i32x4 vreg "fmov" "s");
291337
// CHECK: //NO_APP
292338
check!(vreg_i64x2 i64x2 vreg "fmov" "s");
293339

340+
// CHECK-LABEL: {{("#)?}}vreg_f16x8{{"?}}
341+
// CHECK: //APP
342+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
343+
// CHECK: //NO_APP
344+
check!(vreg_f16x8 f16x8 vreg "fmov" "s");
345+
294346
// CHECK-LABEL: {{("#)?}}vreg_f32x4{{"?}}
295347
// CHECK: //APP
296348
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -315,6 +367,12 @@ check!(vreg_low16_i8 i8 vreg_low16 "fmov" "s");
315367
// CHECK: //NO_APP
316368
check!(vreg_low16_i16 i16 vreg_low16 "fmov" "s");
317369

370+
// CHECK-LABEL: {{("#)?}}vreg_low16_f16{{"?}}
371+
// CHECK: //APP
372+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
373+
// CHECK: //NO_APP
374+
check!(vreg_low16_f16 f16 vreg_low16 "fmov" "s");
375+
318376
// CHECK-LABEL: {{("#)?}}vreg_low16_f32{{"?}}
319377
// CHECK: //APP
320378
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -333,6 +391,12 @@ check!(vreg_low16_i64 i64 vreg_low16 "fmov" "s");
333391
// CHECK: //NO_APP
334392
check!(vreg_low16_f64 f64 vreg_low16 "fmov" "s");
335393

394+
// CHECK-LABEL: {{("#)?}}vreg_low16_f128{{"?}}
395+
// CHECK: //APP
396+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
397+
// CHECK: //NO_APP
398+
check!(vreg_low16_f128 f128 vreg_low16 "fmov" "s");
399+
336400
// CHECK-LABEL: {{("#)?}}vreg_low16_ptr{{"?}}
337401
// CHECK: //APP
338402
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -363,6 +427,12 @@ check!(vreg_low16_i32x2 i32x2 vreg_low16 "fmov" "s");
363427
// CHECK: //NO_APP
364428
check!(vreg_low16_i64x1 i64x1 vreg_low16 "fmov" "s");
365429

430+
// CHECK-LABEL: {{("#)?}}vreg_low16_f16x4{{"?}}
431+
// CHECK: //APP
432+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
433+
// CHECK: //NO_APP
434+
check!(vreg_low16_f16x4 f16x4 vreg_low16 "fmov" "s");
435+
366436
// CHECK-LABEL: {{("#)?}}vreg_low16_f32x2{{"?}}
367437
// CHECK: //APP
368438
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -399,6 +469,12 @@ check!(vreg_low16_i32x4 i32x4 vreg_low16 "fmov" "s");
399469
// CHECK: //NO_APP
400470
check!(vreg_low16_i64x2 i64x2 vreg_low16 "fmov" "s");
401471

472+
// CHECK-LABEL: {{("#)?}}vreg_low16_f16x8{{"?}}
473+
// CHECK: //APP
474+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
475+
// CHECK: //NO_APP
476+
check!(vreg_low16_f16x8 f16x8 vreg_low16 "fmov" "s");
477+
402478
// CHECK-LABEL: {{("#)?}}vreg_low16_f32x4{{"?}}
403479
// CHECK: //APP
404480
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -423,6 +499,12 @@ check_reg!(x0_i8 i8 "x0" "mov");
423499
// CHECK: //NO_APP
424500
check_reg!(x0_i16 i16 "x0" "mov");
425501

502+
// CHECK-LABEL: {{("#)?}}x0_f16{{"?}}
503+
// CHECK: //APP
504+
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
505+
// CHECK: //NO_APP
506+
check_reg!(x0_f16 f16 "x0" "mov");
507+
426508
// CHECK-LABEL: {{("#)?}}x0_i32{{"?}}
427509
// CHECK: //APP
428510
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
@@ -465,6 +547,12 @@ check_reg!(v0_i8 i8 "s0" "fmov");
465547
// CHECK: //NO_APP
466548
check_reg!(v0_i16 i16 "s0" "fmov");
467549

550+
// CHECK-LABEL: {{("#)?}}v0_f16{{"?}}
551+
// CHECK: //APP
552+
// CHECK: fmov s0, s0
553+
// CHECK: //NO_APP
554+
check_reg!(v0_f16 f16 "s0" "fmov");
555+
468556
// CHECK-LABEL: {{("#)?}}v0_i32{{"?}}
469557
// CHECK: //APP
470558
// CHECK: fmov s0, s0
@@ -489,6 +577,12 @@ check_reg!(v0_i64 i64 "s0" "fmov");
489577
// CHECK: //NO_APP
490578
check_reg!(v0_f64 f64 "s0" "fmov");
491579

580+
// CHECK-LABEL: {{("#)?}}v0_f128{{"?}}
581+
// CHECK: //APP
582+
// CHECK: fmov s0, s0
583+
// CHECK: //NO_APP
584+
check_reg!(v0_f128 f128 "s0" "fmov");
585+
492586
// CHECK-LABEL: {{("#)?}}v0_ptr{{"?}}
493587
// CHECK: //APP
494588
// CHECK: fmov s0, s0
@@ -519,6 +613,12 @@ check_reg!(v0_i32x2 i32x2 "s0" "fmov");
519613
// CHECK: //NO_APP
520614
check_reg!(v0_i64x1 i64x1 "s0" "fmov");
521615

616+
// CHECK-LABEL: {{("#)?}}v0_f16x4{{"?}}
617+
// CHECK: //APP
618+
// CHECK: fmov s0, s0
619+
// CHECK: //NO_APP
620+
check_reg!(v0_f16x4 f16x4 "s0" "fmov");
621+
522622
// CHECK-LABEL: {{("#)?}}v0_f32x2{{"?}}
523623
// CHECK: //APP
524624
// CHECK: fmov s0, s0
@@ -555,6 +655,12 @@ check_reg!(v0_i32x4 i32x4 "s0" "fmov");
555655
// CHECK: //NO_APP
556656
check_reg!(v0_i64x2 i64x2 "s0" "fmov");
557657

658+
// CHECK-LABEL: {{("#)?}}v0_f16x8{{"?}}
659+
// CHECK: //APP
660+
// CHECK: fmov s0, s0
661+
// CHECK: //NO_APP
662+
check_reg!(v0_f16x8 f16x8 "s0" "fmov");
663+
558664
// CHECK-LABEL: {{("#)?}}v0_f32x4{{"?}}
559665
// CHECK: //APP
560666
// CHECK: fmov s0, s0

tests/ui/asm/aarch64/type-check-3.stderr

+3-3
Original file line numberDiff line numberDiff line change
@@ -95,23 +95,23 @@ error: type `i128` cannot be used with this register class
9595
LL | asm!("{}", in(reg) 0i128);
9696
| ^^^^^
9797
|
98-
= note: register class `reg` supports these types: i8, i16, i32, i64, f32, f64
98+
= note: register class `reg` supports these types: i8, i16, i32, i64, f16, f32, f64
9999

100100
error: type `float64x2_t` cannot be used with this register class
101101
--> $DIR/type-check-3.rs:75:28
102102
|
103103
LL | asm!("{}", in(reg) f64x2);
104104
| ^^^^^
105105
|
106-
= note: register class `reg` supports these types: i8, i16, i32, i64, f32, f64
106+
= note: register class `reg` supports these types: i8, i16, i32, i64, f16, f32, f64
107107

108108
error: type `Simd256bit` cannot be used with this register class
109109
--> $DIR/type-check-3.rs:77:29
110110
|
111111
LL | asm!("{}", in(vreg) f64x4);
112112
| ^^^^^
113113
|
114-
= note: register class `vreg` supports these types: i8, i16, i32, i64, f32, f64, i8x8, i16x4, i32x2, i64x1, f32x2, f64x1, i8x16, i16x8, i32x4, i64x2, f32x4, f64x2
114+
= note: register class `vreg` supports these types: i8, i16, i32, i64, f16, f32, f64, f128, i8x8, i16x4, i32x2, i64x1, f16x4, f32x2, f64x1, i8x16, i16x8, i32x4, i64x2, f16x8, f32x4, f64x2
115115

116116
error: incompatible types for asm inout argument
117117
--> $DIR/type-check-3.rs:88:33

0 commit comments

Comments
 (0)