From 753fb070bb2962fc6f909c6d7adf568b6b28bbb5 Mon Sep 17 00:00:00 2001
From: beetrees <b@beetr.ee>
Date: Sun, 16 Jun 2024 16:56:39 +0100
Subject: [PATCH] Add `f16` inline ASM support for 32-bit ARM

---
 compiler/rustc_codegen_llvm/src/asm.rs |  39 ++
 compiler/rustc_target/src/asm/arm.rs   |  12 +-
 tests/assembly/asm/arm-types.rs        | 497 ++++++++++++++++---------
 3 files changed, 365 insertions(+), 183 deletions(-)

diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs
index 60e63b956db6e..b3df9470b3ad5 100644
--- a/compiler/rustc_codegen_llvm/src/asm.rs
+++ b/compiler/rustc_codegen_llvm/src/asm.rs
@@ -1020,6 +1020,19 @@ fn llvm_fixup_input<'ll, 'tcx>(
                 value
             }
         }
+        (
+            InlineAsmRegClass::Arm(
+                ArmInlineAsmRegClass::dreg
+                | ArmInlineAsmRegClass::dreg_low8
+                | ArmInlineAsmRegClass::dreg_low16
+                | ArmInlineAsmRegClass::qreg
+                | ArmInlineAsmRegClass::qreg_low4
+                | ArmInlineAsmRegClass::qreg_low8,
+            ),
+            Abi::Vector { element, count: count @ (4 | 8) },
+        ) if element.primitive() == Primitive::Float(Float::F16) => {
+            bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
+        }
         (InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
             match s.primitive() {
                 // MIPS only supports register-length arithmetics.
@@ -1130,6 +1143,19 @@ fn llvm_fixup_output<'ll, 'tcx>(
                 value
             }
         }
+        (
+            InlineAsmRegClass::Arm(
+                ArmInlineAsmRegClass::dreg
+                | ArmInlineAsmRegClass::dreg_low8
+                | ArmInlineAsmRegClass::dreg_low16
+                | ArmInlineAsmRegClass::qreg
+                | ArmInlineAsmRegClass::qreg_low4
+                | ArmInlineAsmRegClass::qreg_low8,
+            ),
+            Abi::Vector { element, count: count @ (4 | 8) },
+        ) if element.primitive() == Primitive::Float(Float::F16) => {
+            bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
+        }
         (InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
             match s.primitive() {
                 // MIPS only supports register-length arithmetics.
@@ -1233,6 +1259,19 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
                 layout.llvm_type(cx)
             }
         }
+        (
+            InlineAsmRegClass::Arm(
+                ArmInlineAsmRegClass::dreg
+                | ArmInlineAsmRegClass::dreg_low8
+                | ArmInlineAsmRegClass::dreg_low16
+                | ArmInlineAsmRegClass::qreg
+                | ArmInlineAsmRegClass::qreg_low4
+                | ArmInlineAsmRegClass::qreg_low8,
+            ),
+            Abi::Vector { element, count: count @ (4 | 8) },
+        ) if element.primitive() == Primitive::Float(Float::F16) => {
+            cx.type_vector(cx.type_i16(), count)
+        }
         (InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
             match s.primitive() {
                 // MIPS only supports register-length arithmetics.
diff --git a/compiler/rustc_target/src/asm/arm.rs b/compiler/rustc_target/src/asm/arm.rs
index 37184393a730b..9d79faadd619f 100644
--- a/compiler/rustc_target/src/asm/arm.rs
+++ b/compiler/rustc_target/src/asm/arm.rs
@@ -47,16 +47,18 @@ impl ArmInlineAsmRegClass {
         _arch: InlineAsmArch,
     ) -> &'static [(InlineAsmType, Option<Symbol>)] {
         match self {
-            Self::reg => types! { _: I8, I16, I32, F32; },
-            Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F32; },
+            Self::reg => types! { _: I8, I16, I32, F16, F32; },
+            Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F16, F32; },
             Self::dreg_low16 | Self::dreg_low8 => types! {
-                vfp2: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2);
+                vfp2: I64, F64;
+                neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
             },
             Self::dreg => types! {
-                d32: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2);
+                d32: I64, F64;
+                neon: VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
             },
             Self::qreg | Self::qreg_low8 | Self::qreg_low4 => types! {
-                neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4);
+                neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4);
             },
         }
     }
diff --git a/tests/assembly/asm/arm-types.rs b/tests/assembly/asm/arm-types.rs
index 280b6d4a2280b..eeff1a070b492 100644
--- a/tests/assembly/asm/arm-types.rs
+++ b/tests/assembly/asm/arm-types.rs
@@ -1,10 +1,13 @@
+//@ revisions: base d32 neon
 //@ assembly-output: emit-asm
 //@ compile-flags: --target armv7-unknown-linux-gnueabihf
-//@ compile-flags: -C target-feature=+neon
 //@ compile-flags: -C opt-level=0
+//@[d32] compile-flags: -C target-feature=+d32
+//@[neon] compile-flags: -C target-feature=+neon --cfg d32
+//@[neon] filecheck-flags: --check-prefix d32
 //@ needs-llvm-components: arm
 
-#![feature(no_core, lang_items, rustc_attrs, repr_simd)]
+#![feature(no_core, lang_items, rustc_attrs, repr_simd, f16)]
 #![crate_type = "rlib"]
 #![no_core]
 #![allow(asm_sub_register, non_camel_case_types)]
@@ -38,6 +41,8 @@ pub struct i32x2(i32, i32);
 #[repr(simd)]
 pub struct i64x1(i64);
 #[repr(simd)]
+pub struct f16x4(f16, f16, f16, f16);
+#[repr(simd)]
 pub struct f32x2(f32, f32);
 #[repr(simd)]
 pub struct i8x16(i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8);
@@ -48,11 +53,14 @@ pub struct i32x4(i32, i32, i32, i32);
 #[repr(simd)]
 pub struct i64x2(i64, i64);
 #[repr(simd)]
+pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16);
+#[repr(simd)]
 pub struct f32x4(f32, f32, f32, f32);
 
 impl Copy for i8 {}
 impl Copy for i16 {}
 impl Copy for i32 {}
+impl Copy for f16 {}
 impl Copy for f32 {}
 impl Copy for i64 {}
 impl Copy for f64 {}
@@ -61,11 +69,13 @@ impl Copy for i8x8 {}
 impl Copy for i16x4 {}
 impl Copy for i32x2 {}
 impl Copy for i64x1 {}
+impl Copy for f16x4 {}
 impl Copy for f32x2 {}
 impl Copy for i8x16 {}
 impl Copy for i16x8 {}
 impl Copy for i32x4 {}
 impl Copy for i64x2 {}
+impl Copy for f16x8 {}
 impl Copy for f32x4 {}
 
 extern "C" {
@@ -152,6 +162,12 @@ check!(reg_i16 i16 reg "mov");
 // CHECK: @NO_APP
 check!(reg_i32 i32 reg "mov");
 
+// CHECK-LABEL: reg_f16:
+// CHECK: @APP
+// CHECK: mov {{[a-z0-9]+}}, {{[a-z0-9]+}}
+// CHECK: @NO_APP
+check!(reg_f16 f16 reg "mov");
+
 // CHECK-LABEL: reg_f32:
 // CHECK: @APP
 // CHECK: mov {{[a-z0-9]+}}, {{[a-z0-9]+}}
@@ -170,6 +186,12 @@ check!(reg_ptr ptr reg "mov");
 // CHECK: @NO_APP
 check!(sreg_i32 i32 sreg "vmov.f32");
 
+// CHECK-LABEL: sreg_f16:
+// CHECK: @APP
+// CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
+// CHECK: @NO_APP
+check!(sreg_f16 f16 sreg "vmov.f32");
+
 // CHECK-LABEL: sreg_f32:
 // CHECK: @APP
 // CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
@@ -188,52 +210,72 @@ check!(sreg_ptr ptr sreg "vmov.f32");
 // CHECK: @NO_APP
 check!(sreg_low16_i32 i32 sreg_low16 "vmov.f32");
 
-// CHECK-LABEL: sreg_low16_f32:
+// CHECK-LABEL: sreg_low16_f16:
 // CHECK: @APP
 // CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
 // CHECK: @NO_APP
-check!(sreg_low16_f32 f32 sreg_low16 "vmov.f32");
+check!(sreg_low16_f16 f16 sreg_low16 "vmov.f32");
 
-// CHECK-LABEL: dreg_i64:
+// CHECK-LABEL: sreg_low16_f32:
 // CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
 // CHECK: @NO_APP
+check!(sreg_low16_f32 f32 sreg_low16 "vmov.f32");
+
+// d32-LABEL: dreg_i64:
+// d32: @APP
+// d32: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// d32: @NO_APP
+#[cfg(d32)]
 check!(dreg_i64 i64 dreg "vmov.f64");
 
-// CHECK-LABEL: dreg_f64:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// d32-LABEL: dreg_f64:
+// d32: @APP
+// d32: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// d32: @NO_APP
+#[cfg(d32)]
 check!(dreg_f64 f64 dreg "vmov.f64");
 
-// CHECK-LABEL: dreg_i8x8:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_i8x8:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_i8x8 i8x8 dreg "vmov.f64");
 
-// CHECK-LABEL: dreg_i16x4:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_i16x4:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_i16x4 i16x4 dreg "vmov.f64");
 
-// CHECK-LABEL: dreg_i32x2:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_i32x2:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_i32x2 i32x2 dreg "vmov.f64");
 
-// CHECK-LABEL: dreg_i64x1:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_i64x1:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_i64x1 i64x1 dreg "vmov.f64");
 
-// CHECK-LABEL: dreg_f32x2:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_f16x4:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
+check!(dreg_f16x4 f16x4 dreg "vmov.f64");
+
+// neon-LABEL: dreg_f32x2:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_f32x2 f32x2 dreg "vmov.f64");
 
 // CHECK-LABEL: dreg_low16_i64:
@@ -248,34 +290,46 @@ check!(dreg_low16_i64 i64 dreg_low16 "vmov.f64");
 // CHECK: @NO_APP
 check!(dreg_low16_f64 f64 dreg_low16 "vmov.f64");
 
-// CHECK-LABEL: dreg_low16_i8x8:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_low16_i8x8:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_low16_i8x8 i8x8 dreg_low16 "vmov.f64");
 
-// CHECK-LABEL: dreg_low16_i16x4:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_low16_i16x4:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_low16_i16x4 i16x4 dreg_low16 "vmov.f64");
 
-// CHECK-LABEL: dreg_low16_i32x2:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_low16_i32x2:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_low16_i32x2 i32x2 dreg_low16 "vmov.f64");
 
-// CHECK-LABEL: dreg_low16_i64x1:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_low16_i64x1:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_low16_i64x1 i64x1 dreg_low16 "vmov.f64");
 
-// CHECK-LABEL: dreg_low16_f32x2:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_low16_f16x4:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
+check!(dreg_low16_f16x4 f16x4 dreg_low16 "vmov.f64");
+
+// neon-LABEL: dreg_low16_f32x2:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_low16_f32x2 f32x2 dreg_low16 "vmov.f64");
 
 // CHECK-LABEL: dreg_low8_i64:
@@ -290,124 +344,172 @@ check!(dreg_low8_i64 i64 dreg_low8 "vmov.f64");
 // CHECK: @NO_APP
 check!(dreg_low8_f64 f64 dreg_low8 "vmov.f64");
 
-// CHECK-LABEL: dreg_low8_i8x8:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_low8_i8x8:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_low8_i8x8 i8x8 dreg_low8 "vmov.f64");
 
-// CHECK-LABEL: dreg_low8_i16x4:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_low8_i16x4:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_low8_i16x4 i16x4 dreg_low8 "vmov.f64");
 
-// CHECK-LABEL: dreg_low8_i32x2:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_low8_i32x2:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_low8_i32x2 i32x2 dreg_low8 "vmov.f64");
 
-// CHECK-LABEL: dreg_low8_i64x1:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_low8_i64x1:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_low8_i64x1 i64x1 dreg_low8 "vmov.f64");
 
-// CHECK-LABEL: dreg_low8_f32x2:
-// CHECK: @APP
-// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: dreg_low8_f16x4:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
+check!(dreg_low8_f16x4 f16x4 dreg_low8 "vmov.f64");
+
+// neon-LABEL: dreg_low8_f32x2:
+// neon: @APP
+// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(dreg_low8_f32x2 f32x2 dreg_low8 "vmov.f64");
 
-// CHECK-LABEL: qreg_i8x16:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_i8x16:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_i8x16 i8x16 qreg "vmov");
 
-// CHECK-LABEL: qreg_i16x8:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_i16x8:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_i16x8 i16x8 qreg "vmov");
 
-// CHECK-LABEL: qreg_i32x4:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_i32x4:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_i32x4 i32x4 qreg "vmov");
 
-// CHECK-LABEL: qreg_i64x2:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_i64x2:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_i64x2 i64x2 qreg "vmov");
 
-// CHECK-LABEL: qreg_f32x4:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_f16x8:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
+check!(qreg_f16x8 f16x8 qreg "vmov");
+
+// neon-LABEL: qreg_f32x4:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_f32x4 f32x4 qreg "vmov");
 
-// CHECK-LABEL: qreg_low8_i8x16:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_low8_i8x16:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_low8_i8x16 i8x16 qreg_low8 "vmov");
 
-// CHECK-LABEL: qreg_low8_i16x8:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_low8_i16x8:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_low8_i16x8 i16x8 qreg_low8 "vmov");
 
-// CHECK-LABEL: qreg_low8_i32x4:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_low8_i32x4:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_low8_i32x4 i32x4 qreg_low8 "vmov");
 
-// CHECK-LABEL: qreg_low8_i64x2:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_low8_i64x2:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_low8_i64x2 i64x2 qreg_low8 "vmov");
 
-// CHECK-LABEL: qreg_low8_f32x4:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_low8_f16x8:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
+check!(qreg_low8_f16x8 f16x8 qreg_low8 "vmov");
+
+// neon-LABEL: qreg_low8_f32x4:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_low8_f32x4 f32x4 qreg_low8 "vmov");
 
-// CHECK-LABEL: qreg_low4_i8x16:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_low4_i8x16:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_low4_i8x16 i8x16 qreg_low4 "vmov");
 
-// CHECK-LABEL: qreg_low4_i16x8:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_low4_i16x8:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_low4_i16x8 i16x8 qreg_low4 "vmov");
 
-// CHECK-LABEL: qreg_low4_i32x4:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_low4_i32x4:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_low4_i32x4 i32x4 qreg_low4 "vmov");
 
-// CHECK-LABEL: qreg_low4_i64x2:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_low4_i64x2:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_low4_i64x2 i64x2 qreg_low4 "vmov");
 
-// CHECK-LABEL: qreg_low4_f32x4:
-// CHECK: @APP
-// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// CHECK: @NO_APP
+// neon-LABEL: qreg_low4_f16x8:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
+check!(qreg_low4_f16x8 f16x8 qreg_low4 "vmov");
+
+// neon-LABEL: qreg_low4_f32x4:
+// neon: @APP
+// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// neon: @NO_APP
+#[cfg(neon)]
 check!(qreg_low4_f32x4 f32x4 qreg_low4 "vmov");
 
 // CHECK-LABEL: r0_i8:
@@ -428,6 +530,12 @@ check_reg!(r0_i16 i16 "r0" "mov");
 // CHECK: @NO_APP
 check_reg!(r0_i32 i32 "r0" "mov");
 
+// CHECK-LABEL: r0_f16:
+// CHECK: @APP
+// CHECK: mov r0, r0
+// CHECK: @NO_APP
+check_reg!(r0_f16 f16 "r0" "mov");
+
 // CHECK-LABEL: r0_f32:
 // CHECK: @APP
 // CHECK: mov r0, r0
@@ -446,6 +554,12 @@ check_reg!(r0_ptr ptr "r0" "mov");
 // CHECK: @NO_APP
 check_reg!(s0_i32 i32 "s0" "vmov.f32");
 
+// CHECK-LABEL: s0_f16:
+// CHECK: @APP
+// CHECK: vmov.f32 s0, s0
+// CHECK: @NO_APP
+check_reg!(s0_f16 f16 "s0" "vmov.f32");
+
 // CHECK-LABEL: s0_f32:
 // CHECK: @APP
 // CHECK: vmov.f32 s0, s0
@@ -458,74 +572,101 @@ check_reg!(s0_f32 f32 "s0" "vmov.f32");
 // CHECK: @NO_APP
 check_reg!(s0_ptr ptr "s0" "vmov.f32");
 
-// CHECK-LABEL: d0_i64:
-// CHECK: @APP
-// CHECK: vmov.f64 d0, d0
-// CHECK: @NO_APP
+// FIXME(#126797): "d0" should work with `i64` and `f64` even when `d32` is disabled.
+// d32-LABEL: d0_i64:
+// d32: @APP
+// d32: vmov.f64 d0, d0
+// d32: @NO_APP
+#[cfg(d32)]
 check_reg!(d0_i64 i64 "d0" "vmov.f64");
 
-// CHECK-LABEL: d0_f64:
-// CHECK: @APP
-// CHECK: vmov.f64 d0, d0
-// CHECK: @NO_APP
+// d32-LABEL: d0_f64:
+// d32: @APP
+// d32: vmov.f64 d0, d0
+// d32: @NO_APP
+#[cfg(d32)]
 check_reg!(d0_f64 f64 "d0" "vmov.f64");
 
-// CHECK-LABEL: d0_i8x8:
-// CHECK: @APP
-// CHECK: vmov.f64 d0, d0
-// CHECK: @NO_APP
+// neon-LABEL: d0_i8x8:
+// neon: @APP
+// neon: vmov.f64 d0, d0
+// neon: @NO_APP
+#[cfg(neon)]
 check_reg!(d0_i8x8 i8x8 "d0" "vmov.f64");
 
-// CHECK-LABEL: d0_i16x4:
-// CHECK: @APP
-// CHECK: vmov.f64 d0, d0
-// CHECK: @NO_APP
+// neon-LABEL: d0_i16x4:
+// neon: @APP
+// neon: vmov.f64 d0, d0
+// neon: @NO_APP
+#[cfg(neon)]
 check_reg!(d0_i16x4 i16x4 "d0" "vmov.f64");
 
-// CHECK-LABEL: d0_i32x2:
-// CHECK: @APP
-// CHECK: vmov.f64 d0, d0
-// CHECK: @NO_APP
+// neon-LABEL: d0_i32x2:
+// neon: @APP
+// neon: vmov.f64 d0, d0
+// neon: @NO_APP
+#[cfg(neon)]
 check_reg!(d0_i32x2 i32x2 "d0" "vmov.f64");
 
-// CHECK-LABEL: d0_i64x1:
-// CHECK: @APP
-// CHECK: vmov.f64 d0, d0
-// CHECK: @NO_APP
+// neon-LABEL: d0_i64x1:
+// neon: @APP
+// neon: vmov.f64 d0, d0
+// neon: @NO_APP
+#[cfg(neon)]
 check_reg!(d0_i64x1 i64x1 "d0" "vmov.f64");
 
-// CHECK-LABEL: d0_f32x2:
-// CHECK: @APP
-// CHECK: vmov.f64 d0, d0
-// CHECK: @NO_APP
+// neon-LABEL: d0_f16x4:
+// neon: @APP
+// neon: vmov.f64 d0, d0
+// neon: @NO_APP
+#[cfg(neon)]
+check_reg!(d0_f16x4 f16x4 "d0" "vmov.f64");
+
+// neon-LABEL: d0_f32x2:
+// neon: @APP
+// neon: vmov.f64 d0, d0
+// neon: @NO_APP
+#[cfg(neon)]
 check_reg!(d0_f32x2 f32x2 "d0" "vmov.f64");
 
-// CHECK-LABEL: q0_i8x16:
-// CHECK: @APP
-// CHECK: vorr q0, q0, q0
-// CHECK: @NO_APP
+// neon-LABEL: q0_i8x16:
+// neon: @APP
+// neon: vorr q0, q0, q0
+// neon: @NO_APP
+#[cfg(neon)]
 check_reg!(q0_i8x16 i8x16 "q0" "vmov");
 
-// CHECK-LABEL: q0_i16x8:
-// CHECK: @APP
-// CHECK: vorr q0, q0, q0
-// CHECK: @NO_APP
+// neon-LABEL: q0_i16x8:
+// neon: @APP
+// neon: vorr q0, q0, q0
+// neon: @NO_APP
+#[cfg(neon)]
 check_reg!(q0_i16x8 i16x8 "q0" "vmov");
 
-// CHECK-LABEL: q0_i32x4:
-// CHECK: @APP
-// CHECK: vorr q0, q0, q0
-// CHECK: @NO_APP
+// neon-LABEL: q0_i32x4:
+// neon: @APP
+// neon: vorr q0, q0, q0
+// neon: @NO_APP
+#[cfg(neon)]
 check_reg!(q0_i32x4 i32x4 "q0" "vmov");
 
-// CHECK-LABEL: q0_i64x2:
-// CHECK: @APP
-// CHECK: vorr q0, q0, q0
-// CHECK: @NO_APP
+// neon-LABEL: q0_i64x2:
+// neon: @APP
+// neon: vorr q0, q0, q0
+// neon: @NO_APP
+#[cfg(neon)]
 check_reg!(q0_i64x2 i64x2 "q0" "vmov");
 
-// CHECK-LABEL: q0_f32x4:
-// CHECK: @APP
-// CHECK: vorr q0, q0, q0
-// CHECK: @NO_APP
+// neon-LABEL: q0_f16x8:
+// neon: @APP
+// neon: vorr q0, q0, q0
+// neon: @NO_APP
+#[cfg(neon)]
+check_reg!(q0_f16x8 f16x8 "q0" "vmov");
+
+// neon-LABEL: q0_f32x4:
+// neon: @APP
+// neon: vorr q0, q0, q0
+// neon: @NO_APP
+#[cfg(neon)]
 check_reg!(q0_f32x4 f32x4 "q0" "vmov");