Skip to content

Commit f5edbc8

Browse files
committed
Add add_mul to vector
1 parent 4231f44 commit f5edbc8

File tree

2 files changed

+15
-6
lines changed

2 files changed

+15
-6
lines changed

include/kernel_float/prelude.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,14 @@ KERNEL_FLOAT_TYPE_ALIAS(f64x, double)
6161
KERNEL_FLOAT_TYPE_ALIAS(float64x, double)
6262

6363
#if KERNEL_FLOAT_FP16_AVAILABLE
64-
KERNEL_FLOAT_TYPE_ALIAS(half, __half)
65-
KERNEL_FLOAT_TYPE_ALIAS(f16x, __half)
66-
KERNEL_FLOAT_TYPE_ALIAS(float16x, __half)
64+
KERNEL_FLOAT_TYPE_ALIAS(half, half_t)
65+
KERNEL_FLOAT_TYPE_ALIAS(f16x, half_t)
66+
KERNEL_FLOAT_TYPE_ALIAS(float16x, half_t)
6767
#endif
6868

6969
#if KERNEL_FLOAT_BF16_AVAILABLE
70-
KERNEL_FLOAT_TYPE_ALIAS(bfloat16x, __bfloat16)
71-
KERNEL_FLOAT_TYPE_ALIAS(bf16x, __bfloat16)
70+
KERNEL_FLOAT_TYPE_ALIAS(bfloat16x, bfloat16_t)
71+
KERNEL_FLOAT_TYPE_ALIAS(bf16x, bfloat16_t)
7272
#endif
7373

7474
#if KERNEL_FLOAT_BF8_AVAILABLE

include/kernel_float/vector.h

+10-1
Original file line numberDiff line numberDiff line change
@@ -287,11 +287,20 @@ struct vector: public S {
287287
}
288288

289289
/**
290-
* Returns the result of `*this + lhs * rhs`.
290+
* Returns the result of `this + lhs * rhs`.
291291
*
292292
* The operation is performed using a single `kernel_float::fma` call, which may be faster then perform
293293
* the addition and multiplication separately.
294294
*/
295+
template<
296+
typename L,
297+
typename R,
298+
typename T2 = promote_t<T, vector_value_type<L>, vector_value_type<R>>,
299+
typename E2 = broadcast_extent<E, vector_extent_type<L>, vector_extent_type<R>>>
300+
KERNEL_FLOAT_INLINE vector<T2, E2> add_mul(const L& lhs, const R& rhs) const {
301+
return ::kernel_float::fma(lhs, rhs, *this);
302+
}
303+
295304
template<
296305
typename L,
297306
typename R,

0 commit comments

Comments
 (0)