From fae5b016c4387fa96094dffc7373667d396d89dd Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Tue, 28 Jan 2025 14:39:04 +0000 Subject: [PATCH] Add fp8 version of vget_lane intrinsic --- neon_intrinsics/advsimd.md | 4 +++- tools/intrinsic_db/advsimd.csv | 4 +++- tools/intrinsic_db/advsimd_classification.csv | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/neon_intrinsics/advsimd.md b/neon_intrinsics/advsimd.md index e2aa7a5f..9094b1d4 100644 --- a/neon_intrinsics/advsimd.md +++ b/neon_intrinsics/advsimd.md @@ -3411,6 +3411,8 @@ The intrinsics in this section are guarded by the macro ``__ARM_NEON``. | int64_t vget_lane_s64(
     int64x1_t v,
     const int lane)
| `lane==0`
`v -> Vn.1D` | `UMOV Rd,Vn.D[lane]` | `Rd -> result` | `v7/A32/A64` | | poly8_t vget_lane_p8(
     poly8x8_t v,
     const int lane)
| `0<=lane<=7`
`v -> Vn.8B` | `UMOV Rd,Vn.B[lane]` | `Rd -> result` | `v7/A32/A64` | | poly16_t vget_lane_p16(
     poly16x4_t v,
     const int lane)
| `0<=lane<=3`
`v -> Vn.4H` | `UMOV Rd,Vn.H[lane]` | `Rd -> result` | `v7/A32/A64` | +| mfloat8_t vget_lane_mf8(
     mfloat8x8_t v,
     const int lane)
| `0<=lane<=7`
`v -> Vn.8B` | `DUP Bd,Vn.B[lane]` | `Bd -> result` | `v7/A32/A64` | +| float16_t vget_lane_f16(
     float16x4_t v,
     const int lane)
| `0<=lane<=3`
`v -> Vn.4H` | `DUP Hd,Vn.H[lane]` | `Hd -> result` | `v7/A32/A64` | | float32_t vget_lane_f32(
     float32x2_t v,
     const int lane)
| `0<=lane<=1`
`v -> Vn.2S` | `DUP Sd,Vn.S[lane]` | `Sd -> result` | `v7/A32/A64` | | float64_t vget_lane_f64(
     float64x1_t v,
     const int lane)
| `lane==0`
`v -> Vn.1D` | `DUP Dd,Vn.D[lane]` | `Dd -> result` | `A64` | | uint8_t vgetq_lane_u8(
     uint8x16_t v,
     const int lane)
| `0<=lane<=15`
`v -> Vn.16B` | `UMOV Rd,Vn.B[lane]` | `Rd -> result` | `v7/A32/A64` | @@ -3424,7 +3426,7 @@ The intrinsics in this section are guarded by the macro ``__ARM_NEON``. | int64_t vgetq_lane_s64(
     int64x2_t v,
     const int lane)
| `0<=lane<=1`
`v -> Vn.2D` | `UMOV Rd,Vn.D[lane]` | `Rd -> result` | `v7/A32/A64` | | poly8_t vgetq_lane_p8(
     poly8x16_t v,
     const int lane)
| `0<=lane<=15`
`v -> Vn.16B` | `UMOV Rd,Vn.B[lane]` | `Rd -> result` | `v7/A32/A64` | | poly16_t vgetq_lane_p16(
     poly16x8_t v,
     const int lane)
| `0<=lane<=7`
`v -> Vn.8H` | `UMOV Rd,Vn.H[lane]` | `Rd -> result` | `v7/A32/A64` | -| float16_t vget_lane_f16(
     float16x4_t v,
     const int lane)
| `0<=lane<=3`
`v -> Vn.4H` | `DUP Hd,Vn.H[lane]` | `Hd -> result` | `v7/A32/A64` | +| mfloat8_t vgetq_lane_mf8(
     mfloat8x16_t v,
     const int lane)
| `0<=lane<=15`
`v -> Vn.16B` | `DUP Bd,Vn.B[lane]` | `Bd -> result` | `v7/A32/A64` | | float16_t vgetq_lane_f16(
     float16x8_t v,
     const int lane)
| `0<=lane<=7`
`v -> Vn.8H` | `DUP Hd,Vn.H[lane]` | `Hd -> result` | `v7/A32/A64` | | float32_t vgetq_lane_f32(
     float32x4_t v,
     const int lane)
| `0<=lane<=3`
`v -> Vn.4S` | `DUP Sd,Vn.S[lane]` | `Sd -> result` | `v7/A32/A64` | | float64_t vgetq_lane_f64(
     float64x2_t v,
     const int lane)
| `0<=lane<=1`
`v -> Vn.2D` | `DUP Dd,Vn.D[lane]` | `Dd -> result` | `A64` | diff --git a/tools/intrinsic_db/advsimd.csv b/tools/intrinsic_db/advsimd.csv index a800d90d..7b51c965 100644 --- a/tools/intrinsic_db/advsimd.csv +++ b/tools/intrinsic_db/advsimd.csv @@ -3349,6 +3349,8 @@ int32_t vget_lane_s32(int32x2_t v, __builtin_constant_p(lane)) 0<=lane<=1;v -> V int64_t vget_lane_s64(int64x1_t v, __builtin_constant_p(lane)) lane==0;v -> Vn.1D UMOV Rd,Vn.D[lane] Rd -> result v7/A32/A64 poly8_t vget_lane_p8(poly8x8_t v, __builtin_constant_p(lane)) 0<=lane<=7;v -> Vn.8B UMOV Rd,Vn.B[lane] Rd -> result v7/A32/A64 poly16_t vget_lane_p16(poly16x4_t v, __builtin_constant_p(lane)) 0<=lane<=3;v -> Vn.4H UMOV Rd,Vn.H[lane] Rd -> result v7/A32/A64 +mfloat8_t vget_lane_mf8(mfloat8x8_t v, __builtin_constant_p(lane)) 0<=lane<=7;v -> Vn.8B DUP Bd,Vn.B[lane] Bd -> result v7/A32/A64 +float16_t vget_lane_f16(float16x4_t v, __builtin_constant_p(lane)) 0<=lane<=3;v -> Vn.4H DUP Hd,Vn.H[lane] Hd -> result v7/A32/A64 float32_t vget_lane_f32(float32x2_t v, __builtin_constant_p(lane)) 0<=lane<=1;v -> Vn.2S DUP Sd,Vn.S[lane] Sd -> result v7/A32/A64 float64_t vget_lane_f64(float64x1_t v, __builtin_constant_p(lane)) lane==0;v -> Vn.1D DUP Dd,Vn.D[lane] Dd -> result A64 uint8_t vgetq_lane_u8(uint8x16_t v, __builtin_constant_p(lane)) 0<=lane<=15;v -> Vn.16B UMOV Rd,Vn.B[lane] Rd -> result v7/A32/A64 @@ -3362,7 +3364,7 @@ int32_t vgetq_lane_s32(int32x4_t v, __builtin_constant_p(lane)) 0<=lane<=3;v -> int64_t vgetq_lane_s64(int64x2_t v, __builtin_constant_p(lane)) 0<=lane<=1;v -> Vn.2D UMOV Rd,Vn.D[lane] Rd -> result v7/A32/A64 poly8_t vgetq_lane_p8(poly8x16_t v, __builtin_constant_p(lane)) 0<=lane<=15;v -> Vn.16B UMOV Rd,Vn.B[lane] Rd -> result v7/A32/A64 poly16_t vgetq_lane_p16(poly16x8_t v, __builtin_constant_p(lane)) 0<=lane<=7;v -> Vn.8H UMOV Rd,Vn.H[lane] Rd -> result v7/A32/A64 -float16_t vget_lane_f16(float16x4_t v, __builtin_constant_p(lane)) 0<=lane<=3;v -> Vn.4H DUP Hd,Vn.H[lane] Hd -> result v7/A32/A64 +mfloat8_t vgetq_lane_mf8(mfloat8x16_t v, __builtin_constant_p(lane)) 0<=lane<=15;v -> Vn.16B DUP Bd,Vn.B[lane] Bd -> result v7/A32/A64 float16_t vgetq_lane_f16(float16x8_t v, __builtin_constant_p(lane)) 0<=lane<=7;v -> Vn.8H DUP Hd,Vn.H[lane] Hd -> result v7/A32/A64 float32_t vgetq_lane_f32(float32x4_t v, __builtin_constant_p(lane)) 0<=lane<=3;v -> Vn.4S DUP Sd,Vn.S[lane] Sd -> result v7/A32/A64 float64_t vgetq_lane_f64(float64x2_t v, __builtin_constant_p(lane)) 0<=lane<=1;v -> Vn.2D DUP Dd,Vn.D[lane] Dd -> result A64 diff --git a/tools/intrinsic_db/advsimd_classification.csv b/tools/intrinsic_db/advsimd_classification.csv index e0e79f25..a5e75510 100644 --- a/tools/intrinsic_db/advsimd_classification.csv +++ b/tools/intrinsic_db/advsimd_classification.csv @@ -3348,6 +3348,8 @@ vget_lane_s32 Vector manipulation|Extract one element from vector vget_lane_s64 Vector manipulation|Extract one element from vector vget_lane_p8 Vector manipulation|Extract one element from vector vget_lane_p16 Vector manipulation|Extract one element from vector +vget_lane_mf8 Vector manipulation|Extract one element from vector +vget_lane_f16 Vector manipulation|Extract one element from vector vget_lane_f32 Vector manipulation|Extract one element from vector vget_lane_f64 Vector manipulation|Extract one element from vector vgetq_lane_u8 Vector manipulation|Extract one element from vector @@ -3361,7 +3363,7 @@ vgetq_lane_s32 Vector manipulation|Extract one element from vector vgetq_lane_s64 Vector manipulation|Extract one element from vector vgetq_lane_p8 Vector manipulation|Extract one element from vector vgetq_lane_p16 Vector manipulation|Extract one element from vector -vget_lane_f16 Vector manipulation|Extract one element from vector +vgetq_lane_mf8 Vector manipulation|Extract one element from vector vgetq_lane_f16 Vector manipulation|Extract one element from vector vgetq_lane_f32 Vector manipulation|Extract one element from vector vgetq_lane_f64 Vector manipulation|Extract one element from vector