Skip to content

Commit 4b492a1

Browse files
committed
Add f16 inline ASM support for 32-bit ARM
1 parent 12b33d3 commit 4b492a1

File tree

3 files changed

+384
-183
lines changed

3 files changed

+384
-183
lines changed

compiler/rustc_codegen_llvm/src/asm.rs

+58
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,26 @@ fn llvm_fixup_input<'ll, 'tcx>(
10201020
value
10211021
}
10221022
}
1023+
(
1024+
InlineAsmRegClass::Arm(
1025+
ArmInlineAsmRegClass::dreg
1026+
| ArmInlineAsmRegClass::dreg_low8
1027+
| ArmInlineAsmRegClass::dreg_low16,
1028+
),
1029+
Abi::Vector { element, count: 4 },
1030+
) if element.primitive() == Primitive::Float(Float::F16) => {
1031+
bx.bitcast(value, bx.type_f64())
1032+
}
1033+
(
1034+
InlineAsmRegClass::Arm(
1035+
ArmInlineAsmRegClass::qreg
1036+
| ArmInlineAsmRegClass::qreg_low4
1037+
| ArmInlineAsmRegClass::qreg_low8,
1038+
),
1039+
Abi::Vector { element, count: 8 },
1040+
) if element.primitive() == Primitive::Float(Float::F16) => {
1041+
bx.bitcast(value, bx.type_vector(bx.type_i16(), 8))
1042+
}
10231043
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
10241044
match s.primitive() {
10251045
// MIPS only supports register-length arithmetics.
@@ -1130,6 +1150,26 @@ fn llvm_fixup_output<'ll, 'tcx>(
11301150
value
11311151
}
11321152
}
1153+
(
1154+
InlineAsmRegClass::Arm(
1155+
ArmInlineAsmRegClass::dreg
1156+
| ArmInlineAsmRegClass::dreg_low8
1157+
| ArmInlineAsmRegClass::dreg_low16,
1158+
),
1159+
Abi::Vector { element, count: 4 },
1160+
) if element.primitive() == Primitive::Float(Float::F16) => {
1161+
bx.bitcast(value, bx.type_vector(bx.type_f16(), 4))
1162+
}
1163+
(
1164+
InlineAsmRegClass::Arm(
1165+
ArmInlineAsmRegClass::qreg
1166+
| ArmInlineAsmRegClass::qreg_low4
1167+
| ArmInlineAsmRegClass::qreg_low8,
1168+
),
1169+
Abi::Vector { element, count: 8 },
1170+
) if element.primitive() == Primitive::Float(Float::F16) => {
1171+
bx.bitcast(value, bx.type_vector(bx.type_f16(), 8))
1172+
}
11331173
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
11341174
match s.primitive() {
11351175
// MIPS only supports register-length arithmetics.
@@ -1233,6 +1273,24 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
12331273
layout.llvm_type(cx)
12341274
}
12351275
}
1276+
(
1277+
InlineAsmRegClass::Arm(
1278+
ArmInlineAsmRegClass::dreg
1279+
| ArmInlineAsmRegClass::dreg_low8
1280+
| ArmInlineAsmRegClass::dreg_low16,
1281+
),
1282+
Abi::Vector { element, count: 4 },
1283+
) if element.primitive() == Primitive::Float(Float::F16) => cx.type_f64(),
1284+
(
1285+
InlineAsmRegClass::Arm(
1286+
ArmInlineAsmRegClass::qreg
1287+
| ArmInlineAsmRegClass::qreg_low4
1288+
| ArmInlineAsmRegClass::qreg_low8,
1289+
),
1290+
Abi::Vector { element, count: 8 },
1291+
) if element.primitive() == Primitive::Float(Float::F16) => {
1292+
cx.type_vector(cx.type_i16(), 8)
1293+
}
12361294
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
12371295
match s.primitive() {
12381296
// MIPS only supports register-length arithmetics.

compiler/rustc_target/src/asm/arm.rs

+7-5
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,18 @@ impl ArmInlineAsmRegClass {
4747
_arch: InlineAsmArch,
4848
) -> &'static [(InlineAsmType, Option<Symbol>)] {
4949
match self {
50-
Self::reg => types! { _: I8, I16, I32, F32; },
51-
Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F32; },
50+
Self::reg => types! { _: I8, I16, I32, F16, F32; },
51+
Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F16, F32; },
5252
Self::dreg_low16 | Self::dreg_low8 => types! {
53-
vfp2: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2);
53+
vfp2: I64, F64;
54+
neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
5455
},
5556
Self::dreg => types! {
56-
d32: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2);
57+
d32: I64, F64;
58+
neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
5759
},
5860
Self::qreg | Self::qreg_low8 | Self::qreg_low4 => types! {
59-
neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4);
61+
neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4);
6062
},
6163
}
6264
}

0 commit comments

Comments
 (0)