Skip to content

Commit

Permalink
Add fp8 version of vget_lane intrinsic
Browse files Browse the repository at this point in the history
  • Loading branch information
Lukacma committed Jan 28, 2025
1 parent 3a5295a commit fae5b01
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 3 deletions.
4 changes: 3 additions & 1 deletion neon_intrinsics/advsimd.md
Original file line number Diff line number Diff line change
Expand Up @@ -3411,6 +3411,8 @@ The intrinsics in this section are guarded by the macro ``__ARM_NEON``.
| <code>int64_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_s64" target="_blank">vget_lane_s64</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; int64x1_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `lane==0`<br>`v -> Vn.1D` | `UMOV Rd,Vn.D[lane]` | `Rd -> result` | `v7/A32/A64` |
| <code>poly8_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p8" target="_blank">vget_lane_p8</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; poly8x8_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `0<=lane<=7`<br>`v -> Vn.8B` | `UMOV Rd,Vn.B[lane]` | `Rd -> result` | `v7/A32/A64` |
| <code>poly16_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_p16" target="_blank">vget_lane_p16</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; poly16x4_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `0<=lane<=3`<br>`v -> Vn.4H` | `UMOV Rd,Vn.H[lane]` | `Rd -> result` | `v7/A32/A64` |
| <code>mfloat8_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_mf8" target="_blank">vget_lane_mf8</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; mfloat8x8_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `0<=lane<=7`<br>`v -> Vn.8B` | `DUP Bd,Vn.B[lane]` | `Bd -> result` | `v7/A32/A64` |
| <code>float16_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16" target="_blank">vget_lane_f16</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; float16x4_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `0<=lane<=3`<br>`v -> Vn.4H` | `DUP Hd,Vn.H[lane]` | `Hd -> result` | `v7/A32/A64` |
| <code>float32_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f32" target="_blank">vget_lane_f32</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; float32x2_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `0<=lane<=1`<br>`v -> Vn.2S` | `DUP Sd,Vn.S[lane]` | `Sd -> result` | `v7/A32/A64` |
| <code>float64_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f64" target="_blank">vget_lane_f64</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; float64x1_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `lane==0`<br>`v -> Vn.1D` | `DUP Dd,Vn.D[lane]` | `Dd -> result` | `A64` |
| <code>uint8_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_u8" target="_blank">vgetq_lane_u8</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; uint8x16_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `0<=lane<=15`<br>`v -> Vn.16B` | `UMOV Rd,Vn.B[lane]` | `Rd -> result` | `v7/A32/A64` |
Expand All @@ -3424,7 +3426,7 @@ The intrinsics in this section are guarded by the macro ``__ARM_NEON``.
| <code>int64_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_s64" target="_blank">vgetq_lane_s64</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; int64x2_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `0<=lane<=1`<br>`v -> Vn.2D` | `UMOV Rd,Vn.D[lane]` | `Rd -> result` | `v7/A32/A64` |
| <code>poly8_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p8" target="_blank">vgetq_lane_p8</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; poly8x16_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `0<=lane<=15`<br>`v -> Vn.16B` | `UMOV Rd,Vn.B[lane]` | `Rd -> result` | `v7/A32/A64` |
| <code>poly16_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_p16" target="_blank">vgetq_lane_p16</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; poly16x8_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `0<=lane<=7`<br>`v -> Vn.8H` | `UMOV Rd,Vn.H[lane]` | `Rd -> result` | `v7/A32/A64` |
| <code>float16_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16" target="_blank">vget_lane_f16</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; float16x4_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `0<=lane<=3`<br>`v -> Vn.4H` | `DUP Hd,Vn.H[lane]` | `Hd -> result` | `v7/A32/A64` |
| <code>mfloat8_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_mf8" target="_blank">vgetq_lane_mf8</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; mfloat8x16_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `0<=lane<=15`<br>`v -> Vn.16B` | `DUP Bd,Vn.B[lane]` | `Bd -> result` | `v7/A32/A64` |
| <code>float16_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f16" target="_blank">vgetq_lane_f16</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; float16x8_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `0<=lane<=7`<br>`v -> Vn.8H` | `DUP Hd,Vn.H[lane]` | `Hd -> result` | `v7/A32/A64` |
| <code>float32_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f32" target="_blank">vgetq_lane_f32</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; float32x4_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `0<=lane<=3`<br>`v -> Vn.4S` | `DUP Sd,Vn.S[lane]` | `Sd -> result` | `v7/A32/A64` |
| <code>float64_t <a href="https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f64" target="_blank">vgetq_lane_f64</a>(<br>&nbsp;&nbsp;&nbsp;&nbsp; float64x2_t v,<br>&nbsp;&nbsp;&nbsp;&nbsp; const int lane)</code> | `0<=lane<=1`<br>`v -> Vn.2D` | `DUP Dd,Vn.D[lane]` | `Dd -> result` | `A64` |
Expand Down
4 changes: 3 additions & 1 deletion tools/intrinsic_db/advsimd.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3349,6 +3349,8 @@ int32_t vget_lane_s32(int32x2_t v, __builtin_constant_p(lane)) 0<=lane<=1;v -> V
int64_t vget_lane_s64(int64x1_t v, __builtin_constant_p(lane)) lane==0;v -> Vn.1D UMOV Rd,Vn.D[lane] Rd -> result v7/A32/A64
poly8_t vget_lane_p8(poly8x8_t v, __builtin_constant_p(lane)) 0<=lane<=7;v -> Vn.8B UMOV Rd,Vn.B[lane] Rd -> result v7/A32/A64
poly16_t vget_lane_p16(poly16x4_t v, __builtin_constant_p(lane)) 0<=lane<=3;v -> Vn.4H UMOV Rd,Vn.H[lane] Rd -> result v7/A32/A64
mfloat8_t vget_lane_mf8(mfloat8x8_t v, __builtin_constant_p(lane)) 0<=lane<=7;v -> Vn.8B DUP Bd,Vn.B[lane] Bd -> result v7/A32/A64
float16_t vget_lane_f16(float16x4_t v, __builtin_constant_p(lane)) 0<=lane<=3;v -> Vn.4H DUP Hd,Vn.H[lane] Hd -> result v7/A32/A64
float32_t vget_lane_f32(float32x2_t v, __builtin_constant_p(lane)) 0<=lane<=1;v -> Vn.2S DUP Sd,Vn.S[lane] Sd -> result v7/A32/A64
float64_t vget_lane_f64(float64x1_t v, __builtin_constant_p(lane)) lane==0;v -> Vn.1D DUP Dd,Vn.D[lane] Dd -> result A64
uint8_t vgetq_lane_u8(uint8x16_t v, __builtin_constant_p(lane)) 0<=lane<=15;v -> Vn.16B UMOV Rd,Vn.B[lane] Rd -> result v7/A32/A64
Expand All @@ -3362,7 +3364,7 @@ int32_t vgetq_lane_s32(int32x4_t v, __builtin_constant_p(lane)) 0<=lane<=3;v ->
int64_t vgetq_lane_s64(int64x2_t v, __builtin_constant_p(lane)) 0<=lane<=1;v -> Vn.2D UMOV Rd,Vn.D[lane] Rd -> result v7/A32/A64
poly8_t vgetq_lane_p8(poly8x16_t v, __builtin_constant_p(lane)) 0<=lane<=15;v -> Vn.16B UMOV Rd,Vn.B[lane] Rd -> result v7/A32/A64
poly16_t vgetq_lane_p16(poly16x8_t v, __builtin_constant_p(lane)) 0<=lane<=7;v -> Vn.8H UMOV Rd,Vn.H[lane] Rd -> result v7/A32/A64
float16_t vget_lane_f16(float16x4_t v, __builtin_constant_p(lane)) 0<=lane<=3;v -> Vn.4H DUP Hd,Vn.H[lane] Hd -> result v7/A32/A64
mfloat8_t vgetq_lane_mf8(mfloat8x16_t v, __builtin_constant_p(lane)) 0<=lane<=15;v -> Vn.16B DUP Bd,Vn.B[lane] Bd -> result v7/A32/A64
float16_t vgetq_lane_f16(float16x8_t v, __builtin_constant_p(lane)) 0<=lane<=7;v -> Vn.8H DUP Hd,Vn.H[lane] Hd -> result v7/A32/A64
float32_t vgetq_lane_f32(float32x4_t v, __builtin_constant_p(lane)) 0<=lane<=3;v -> Vn.4S DUP Sd,Vn.S[lane] Sd -> result v7/A32/A64
float64_t vgetq_lane_f64(float64x2_t v, __builtin_constant_p(lane)) 0<=lane<=1;v -> Vn.2D DUP Dd,Vn.D[lane] Dd -> result A64
Expand Down
4 changes: 3 additions & 1 deletion tools/intrinsic_db/advsimd_classification.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3348,6 +3348,8 @@ vget_lane_s32 Vector manipulation|Extract one element from vector
vget_lane_s64 Vector manipulation|Extract one element from vector
vget_lane_p8 Vector manipulation|Extract one element from vector
vget_lane_p16 Vector manipulation|Extract one element from vector
vget_lane_mf8 Vector manipulation|Extract one element from vector
vget_lane_f16 Vector manipulation|Extract one element from vector
vget_lane_f32 Vector manipulation|Extract one element from vector
vget_lane_f64 Vector manipulation|Extract one element from vector
vgetq_lane_u8 Vector manipulation|Extract one element from vector
Expand All @@ -3361,7 +3363,7 @@ vgetq_lane_s32 Vector manipulation|Extract one element from vector
vgetq_lane_s64 Vector manipulation|Extract one element from vector
vgetq_lane_p8 Vector manipulation|Extract one element from vector
vgetq_lane_p16 Vector manipulation|Extract one element from vector
vget_lane_f16 Vector manipulation|Extract one element from vector
vgetq_lane_mf8 Vector manipulation|Extract one element from vector
vgetq_lane_f16 Vector manipulation|Extract one element from vector
vgetq_lane_f32 Vector manipulation|Extract one element from vector
vgetq_lane_f64 Vector manipulation|Extract one element from vector
Expand Down

0 comments on commit fae5b01

Please sign in to comment.