diff --git a/include/ccmath/internal/math/runtime/pp/CMakeLists.txt b/include/ccmath/internal/math/runtime/pp/CMakeLists.txt index 6470e45..9bc2740 100644 --- a/include/ccmath/internal/math/runtime/pp/CMakeLists.txt +++ b/include/ccmath/internal/math/runtime/pp/CMakeLists.txt @@ -1,13 +1,17 @@ ccm_add_headers( - simd.hpp + arm_detail.hpp + assume_aligned.hpp const_eval.hpp + constexpr_wrapper.hpp debug_print.hpp detail.hpp - may_alias.hpp - simd_intrinsic.hpp - trap.hpp - constexpr_wrapper.hpp + flags.hpp fwddecl.hpp + may_alias.hpp + simd.hpp + simd_config.hpp simd_meta.hpp + trap.hpp + vec_detail.hpp x86_include.hpp ) diff --git a/include/ccmath/internal/math/runtime/pp/arm_detail.hpp b/include/ccmath/internal/math/runtime/pp/arm_detail.hpp new file mode 100644 index 0000000..b6393fb --- /dev/null +++ b/include/ccmath/internal/math/runtime/pp/arm_detail.hpp @@ -0,0 +1,2 @@ + +#pragma once \ No newline at end of file diff --git a/include/ccmath/internal/math/runtime/pp/assume_aligned.hpp b/include/ccmath/internal/math/runtime/pp/assume_aligned.hpp new file mode 100644 index 0000000..1c01c3d --- /dev/null +++ b/include/ccmath/internal/math/runtime/pp/assume_aligned.hpp @@ -0,0 +1,27 @@ + +#pragma once + +#include + +#include "ccmath/internal/predef/attributes/always_inline.hpp" + +#if defined(_MSC_VER) && !defined(__clang__) + #include +#endif + +namespace ccm::pp +{ + template + CCM_ALWAYS_INLINE void * assume_aligned(void * ptr) + { + static_assert((Alignment & (Alignment - 1)) == 0, "Alignment must be a power of 2"); // TODO: Might remove this check not sure if it's necessary +#if defined(__GNUC__) || (defined(__clang__) && !defined(_MSC_VER)) + return __builtin_assume_aligned(ptr, Alignment); +#elif defined(_MSC_VER) + __assume((reinterpret_cast(ptr) & (Alignment - 1)) == 0); + return ptr; +#else + return ptr; +#endif + } +} // namespace ccm::pp \ No newline at end of file diff --git a/include/ccmath/internal/math/runtime/pp/constexpr_wrapper.hpp b/include/ccmath/internal/math/runtime/pp/constexpr_wrapper.hpp index 99ddf32..7a537a4 100644 --- a/include/ccmath/internal/math/runtime/pp/constexpr_wrapper.hpp +++ b/include/ccmath/internal/math/runtime/pp/constexpr_wrapper.hpp @@ -1,2 +1,11 @@ +/* + * Copyright (c) Ian Pike + * Copyright (c) CCMath contributors + * + * CCMath is provided under the Apache-2.0 License WITH LLVM-exception. + * See LICENSE for more information. + * + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ #pragma once diff --git a/include/ccmath/internal/math/runtime/pp/detail.hpp b/include/ccmath/internal/math/runtime/pp/detail.hpp index 3f8617f..f2ba8eb 100644 --- a/include/ccmath/internal/math/runtime/pp/detail.hpp +++ b/include/ccmath/internal/math/runtime/pp/detail.hpp @@ -20,7 +20,6 @@ #include "const_eval.hpp" #include "debug_print.hpp" #include "may_alias.hpp" -#include "simd_intrinsic.hpp" #include "trap.hpp" #include "ccmath/internal/predef/attributes/always_inline.hpp" @@ -57,7 +56,7 @@ namespace ccm::pp::detail inline constexpr PrivateInit private_init = PrivateInit{}; template - CCM_ATTR_SIMD_INTRINSIC static void bit_iteration(T mask, F && func) + CCM_SIMD_INTRINSIC static void bit_iteration(T mask, F && func) { static_assert(sizeof(0ULL) >= sizeof(T)); using ConditionalType = std::conditional_t; @@ -70,7 +69,7 @@ namespace ccm::pp::detail } template - CCM_ATTR_SIMD_INTRINSIC static void bit_iteration(BitMask mask, F && func) + CCM_SIMD_INTRINSIC static void bit_iteration(BitMask mask, F && func) { bit_iteration(mask.sanitized().to_bits(), func); } @@ -270,13 +269,13 @@ namespace ccm::pp::detail }(); template , int> = 0> - CCM_ATTR_SIMD_INTRINSIC SimdSizeType lowest_bit(Integral bits) + CCM_SIMD_INTRINSIC SimdSizeType lowest_bit(Integral bits) { return ccm::support::ctz(bits); } template , int> = 0> - CCM_ATTR_SIMD_INTRINSIC SimdSizeType highest_bit(Integral bits) + CCM_SIMD_INTRINSIC SimdSizeType highest_bit(Integral bits) { if constexpr (sizeof(bits) <= sizeof(int)) { return sizeof(int) * CHAR_BIT - 1 - ccm::support::countl_zero(bits); } else if constexpr (sizeof(bits) <= sizeof(long)) { return sizeof(long) * CHAR_BIT - 1 - ccm::support::countl_zero(bits); } @@ -403,25 +402,25 @@ namespace ccm::pp::detail SimdSizeType index; Up & obj; - CCM_ATTR_SIMD_INTRINSIC constexpr ValueType read() const noexcept { return Accessor::get(obj, index); } + CCM_SIMD_INTRINSIC constexpr ValueType read() const noexcept { return Accessor::get(obj, index); } template - CCM_ATTR_SIMD_INTRINSIC constexpr void write(Tp && x) const + CCM_SIMD_INTRINSIC constexpr void write(Tp && x) const { Accessor::set(obj, index, std::forward(x)); } public: - CCM_ATTR_SIMD_INTRINSIC constexpr SmartReference(Up & o, SimdSizeType i) noexcept : index(i), obj(o) {} + CCM_SIMD_INTRINSIC constexpr SmartReference(Up & o, SimdSizeType i) noexcept : index(i), obj(o) {} using value_type = ValueType; SmartReference(const SmartReference &) = delete; - CCM_ATTR_SIMD_INTRINSIC constexpr operator value_type() const noexcept { return read(); } + CCM_SIMD_INTRINSIC constexpr operator value_type() const noexcept { return read(); } template - CCM_ATTR_SIMD_INTRINSIC constexpr SmartReference operator=(Tp && x) && + CCM_SIMD_INTRINSIC constexpr SmartReference operator=(Tp && x) && { write(std::forward(x)); return {obj, index}; @@ -429,7 +428,7 @@ namespace ccm::pp::detail #define CCM_SIMD_OP(op) \ template \ - CCM_ATTR_SIMD_INTRINSIC constexpr SmartReference operator op##=(Tp && x) && \ + CCM_SIMD_INTRINSIC constexpr SmartReference operator op##=(Tp && x) && \ { \ const value_type & lhs = read(); \ write(lhs op std::forward(x)); \ @@ -450,7 +449,7 @@ namespace ccm::pp::detail #undef CCM_SIMD_OP template &>())> - CCM_ATTR_SIMD_INTRINSIC constexpr SmartReference operator++() && + CCM_SIMD_INTRINSIC constexpr SmartReference operator++() && { value_type x = read(); write(++x); @@ -458,7 +457,7 @@ namespace ccm::pp::detail } template &>()++)> - CCM_ATTR_SIMD_INTRINSIC constexpr value_type operator++(int) && + CCM_SIMD_INTRINSIC constexpr value_type operator++(int) && { const value_type r = read(); value_type x = r; @@ -467,7 +466,7 @@ namespace ccm::pp::detail } template &>())> - CCM_ATTR_SIMD_INTRINSIC constexpr SmartReference operator--() && + CCM_SIMD_INTRINSIC constexpr SmartReference operator--() && { value_type x = read(); write(--x); @@ -475,7 +474,7 @@ namespace ccm::pp::detail } template &>()--)> - CCM_ATTR_SIMD_INTRINSIC constexpr value_type operator--(int) && + CCM_SIMD_INTRINSIC constexpr value_type operator--(int) && { const value_type r = read(); value_type x = r; @@ -483,7 +482,7 @@ namespace ccm::pp::detail return r; } - CCM_ATTR_SIMD_INTRINSIC friend constexpr void swap(SmartReference && a, SmartReference && b) noexcept( + CCM_SIMD_INTRINSIC friend constexpr void swap(SmartReference && a, SmartReference && b) noexcept( std::conjunction_v, std::is_nothrow_assignable>) { value_type tmp = std::forward(a); @@ -491,7 +490,7 @@ namespace ccm::pp::detail std::forward(b) = std::move(tmp); } - CCM_ATTR_SIMD_INTRINSIC friend constexpr void swap(value_type & a, SmartReference && b) noexcept( + CCM_SIMD_INTRINSIC friend constexpr void swap(value_type & a, SmartReference && b) noexcept( std::conjunction_v, std::is_nothrow_assignable, std::is_nothrow_assignable>) { @@ -500,7 +499,7 @@ namespace ccm::pp::detail std::forward(b) = std::move(tmp); } - CCM_ATTR_SIMD_INTRINSIC friend constexpr void swap(SmartReference && a, value_type & b) noexcept( + CCM_SIMD_INTRINSIC friend constexpr void swap(SmartReference && a, value_type & b) noexcept( std::conjunction_v, std::is_nothrow_assignable, std::is_nothrow_assignable>) { diff --git a/include/ccmath/internal/math/runtime/pp/flags.hpp b/include/ccmath/internal/math/runtime/pp/flags.hpp new file mode 100644 index 0000000..aed0674 --- /dev/null +++ b/include/ccmath/internal/math/runtime/pp/flags.hpp @@ -0,0 +1,179 @@ +/* + * Copyright (c) Ian Pike + * Copyright (c) CCMath contributors + * + * CCMath is provided under the Apache-2.0 License WITH LLVM-exception. + * See LICENSE for more information. + * + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#pragma once + +#include "ccmath/internal/predef/attributes/always_inline.hpp" +#include "ccmath/internal/support/bits.hpp" +#include "ccmath/internal/support/floating_point_traits.hpp" + +// ReSharper disable once CppUnusedIncludeDirective +#include "assume_aligned.hpp" +#include "fwddecl.hpp" +#include "simd_config.hpp" + +namespace ccm::pp +{ + namespace detail + { + struct LoadStoreTag + { + }; + + struct Convert : LoadStoreTag + { + }; + + template + struct ConvertTo : LoadStoreTag + { + using type = T; + }; + + struct Aligned : LoadStoreTag + { + template + CCM_SIMD_INTRINSIC static constexpr U * adjust_pointer(U * ptr) + { + return static_cast(pp::assume_aligned>(ptr)); + } + }; + + template + struct Overaligned : LoadStoreTag + { + static_assert(support::has_single_bit(N)); + + template + CCM_SIMD_INTRINSIC static constexpr U * adjust_pointer(U * ptr) + { + return static_cast(pp::assume_aligned(ptr)); + } + }; + + struct Streaming : LoadStoreTag + { + }; + + template + struct Prefetch : LoadStoreTag + { + template + CCM_ALWAYS_INLINE static U * adjust_pointer(U * ptr) + { + // one read: 0, 0 + // L1: 0, 1 + // L2: 0, 2 + // L3: 0, 3 + // (exclusive cache line) for writing: 1, 0 / 1, 1 + /* constexpr int write = 1; + constexpr int level = 0-3; + __builtin_prefetch(ptr, write, level) + _mm_prefetch(reinterpret_cast(ptr), _MM_HINT_T0); + _mm_prefetch(reinterpret_cast(ptr), _MM_HINT_T1); + _mm_prefetch(reinterpret_cast(ptr), _MM_HINT_T2); + _mm_prefetch(reinterpret_cast(ptr), _MM_HINT_ET0); + _mm_prefetch(reinterpret_cast(ptr), _MM_HINT_ET1); + _mm_prefetch(reinterpret_cast(ptr), _MM_HINT_NTA);*/ + return ptr; + } + }; + + template + using is_loadstore_tag = std::is_base_of; + } // namespace detail + + template + struct simd_flags + { + static_assert((detail::is_loadstore_tag::value && ...), "All Flags must derive from LoadStoreTag"); + + // ReSharper disable once CppMemberFunctionMayBeStatic + CCM_CONSTEVAL bool is_equal(simd_flags) const { return true; } + + template + CCM_CONSTEVAL bool is_equal([[maybe_unused]] simd_flags other) const + { + return std::is_same_v, decltype(xor_flags(other))>; + } + + template + CCM_CONSTEVAL bool test(simd_flags other) const noexcept + { + return other.is_equal(and_flags(other)); + } + + friend CCM_CONSTEVAL auto operator|(simd_flags, simd_flags<>) { return simd_flags{}; } + + template + friend CCM_CONSTEVAL auto operator|(simd_flags, simd_flags) + { + if constexpr ((std::is_same_v || ...)) { return simd_flags{} | simd_flags{}; } + else { return simd_flags{} | simd_flags{}; } + } + + // ReSharper disable once CppMemberFunctionMayBeStatic + CCM_CONSTEVAL auto and_flags(simd_flags<>) const { return simd_flags<>{}; } + + template + CCM_CONSTEVAL auto and_flags(simd_flags) const + { + if constexpr ((std::is_same_v || ...)) { return simd_flags{} | (simd_flags{}.and_flags(simd_flags{})); } + else { return simd_flags{}.and_flags(simd_flags{}); } + } + + CCM_CONSTEVAL auto xor_flags(simd_flags<>) const { return simd_flags{}; } + + template + CCM_CONSTEVAL auto xor_flags(simd_flags) const + { + if constexpr ((std::is_same_v || ...)) + { + constexpr auto removed = (std::conditional_t, simd_flags<>, simd_flags>{} | ...); + return removed.xor_flags(simd_flags{}); + } + else { return (simd_flags{} | simd_flags{}).xor_flags(simd_flags{}); } + } + + template + static constexpr void apply_adjust_pointer(Ptr & ptr) + { + if constexpr (std::is_same_v(ptr)), void>) { ptr = F0::template adjust_pointer(ptr); } + } + + template + static constexpr Up * adjust_pointer(Up * ptr) + { + (apply_adjust_pointer(ptr), ...); + return ptr; + } + }; + + // [simd.flags] + inline constexpr simd_flags<> simd_flag_default; + + inline constexpr simd_flags simd_flag_convert; + + inline constexpr simd_flags simd_flag_aligned; + + template + requires(support::has_single_bit(N)) + inline constexpr simd_flags> simd_flag_overaligned; + + // extensions + template + inline constexpr simd_flags> simd_flag_convert_to; + + inline constexpr simd_flags simd_flag_streaming; + + template + inline constexpr simd_flags> simd_flag_prefetch; + +} // namespace ccm::pp \ No newline at end of file diff --git a/include/ccmath/internal/math/runtime/pp/fwddecl.hpp b/include/ccmath/internal/math/runtime/pp/fwddecl.hpp index a3d0c10..b69b83a 100644 --- a/include/ccmath/internal/math/runtime/pp/fwddecl.hpp +++ b/include/ccmath/internal/math/runtime/pp/fwddecl.hpp @@ -1,9 +1,17 @@ +/* + * Copyright (c) Ian Pike + * Copyright (c) CCMath contributors + * + * CCMath is provided under the Apache-2.0 License WITH LLVM-exception. + * See LICENSE for more information. + * + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ #pragma once -// include config - #include "const_eval.hpp" +#include "simd_config.hpp" // ReSharper disable once CppUnusedIncludeDirective #include "ccmath/internal/predef/attributes/always_inline.hpp" diff --git a/include/ccmath/internal/math/runtime/pp/simd_config.hpp b/include/ccmath/internal/math/runtime/pp/simd_config.hpp new file mode 100644 index 0000000..bc9e65c --- /dev/null +++ b/include/ccmath/internal/math/runtime/pp/simd_config.hpp @@ -0,0 +1,319 @@ +/* + * Copyright (c) Ian Pike + * Copyright (c) CCMath contributors + * + * CCMath is provided under the Apache-2.0 License WITH LLVM-exception. + * See LICENSE for more information. + * + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#pragma once + +#include // For std::integer_sequence + +// x86 macros + +#ifdef __MMX__ + #define CCMATH_SIMD_HAVE_MMX 1 +#else + #define CCMATH_SIMD_HAVE_MMX 0 +#endif + +#if defined __SSE__ || defined __x86_64__ + #define CCMATH_SIMD_HAVE_SSE 1 +#else + #define CCMATH_SIMD_HAVE_SSE 0 +#endif + +#if defined __SSE2__ || defined __x86_64__ + #define CCMATH_SIMD_HAVE_SSE2 1 +#else + #define CCMATH_SIMD_HAVE_SSE2 0 +#endif + +#ifdef __SSE3__ + #define CCMATH_SIMD_HAVE_SSE3 1 +#else + #define CCMATH_SIMD_HAVE_SSE3 0 +#endif + +#ifdef __SSSE3__ + #define CCMATH_SIMD_HAVE_SSSE3 1 +#else + #define CCMATH_SIMD_HAVE_SSSE3 0 +#endif + +#ifdef __SSE4_1__ + #define CCMATH_SIMD_HAVE_SSE4_1 1 +#else + #define CCMATH_SIMD_HAVE_SSE4_1 0 +#endif + +#ifdef __SSE4_2__ + #define CCMATH_SIMD_HAVE_SSE4_2 1 +#else + #define CCMATH_SIMD_HAVE_SSE4_2 0 +#endif + +#ifdef __XOP__ + #define CCMATH_SIMD_HAVE_XOP 1 +#else + #define CCMATH_SIMD_HAVE_XOP 0 +#endif + +#ifdef __AVX__ + #define CCMATH_SIMD_HAVE_AVX 1 +#else + #define CCMATH_SIMD_HAVE_AVX 0 +#endif + +#ifdef __AVX2__ + #define CCMATH_SIMD_HAVE_AVX2 1 +#else + #define CCMATH_SIMD_HAVE_AVX2 0 +#endif + +#ifdef __BMI__ + #define CCMATH_SIMD_HAVE_BMI 1 +#else + #define CCMATH_SIMD_HAVE_BMI 0 +#endif + +#ifdef __BMI2__ + #define CCMATH_SIMD_HAVE_BMI2 1 +#else + #define CCMATH_SIMD_HAVE_BMI2 0 +#endif + +#ifdef __LZCNT__ + #define CCMATH_SIMD_HAVE_LZCNT 1 +#else + #define CCMATH_SIMD_HAVE_LZCNT 0 +#endif + +#ifdef __SSE4A__ + #define CCMATH_SIMD_HAVE_SSE4A 1 +#else + #define CCMATH_SIMD_HAVE_SSE4A 0 +#endif + +#ifdef __FMA__ + #define CCMATH_SIMD_HAVE_FMA 1 +#else + #define CCMATH_SIMD_HAVE_FMA 0 +#endif + +#ifdef __FMA4__ + #define CCMATH_SIMD_HAVE_FMA4 1 +#else + #define CCMATH_SIMD_HAVE_FMA4 0 +#endif + +#ifdef __F16C__ + #define CCMATH_SIMD_HAVE_F16C 1 +#else + #define CCMATH_SIMD_HAVE_F16C 0 +#endif + +#ifdef __POPCNT__ + #define CCMATH_SIMD_HAVE_POPCNT 1 +#else + #define CCMATH_SIMD_HAVE_POPCNT 0 +#endif + +#ifdef __AVX512F__ + #define CCMATH_SIMD_HAVE_AVX512F 1 +#else + #define CCMATH_SIMD_HAVE_AVX512F 0 +#endif + +#ifdef __AVX512DQ__ + #define CCMATH_SIMD_HAVE_AVX512DQ 1 +#else + #define CCMATH_SIMD_HAVE_AVX512DQ 0 +#endif + +#ifdef __AVX512VL__ + #define CCMATH_SIMD_HAVE_AVX512VL 1 +#else + #define CCMATH_SIMD_HAVE_AVX512VL 0 +#endif + +#ifdef __AVX512BW__ + #define CCMATH_SIMD_HAVE_AVX512BW 1 +#else + #define CCMATH_SIMD_HAVE_AVX512BW 0 +#endif + +#ifdef __AVX512BITALG__ + #define CCMATH_SIMD_HAVE_AVX512BITALG 1 +#else + #define CCMATH_SIMD_HAVE_AVX512BITALG 0 +#endif + +#ifdef __AVX512VBMI2__ + #define CCMATH_SIMD_HAVE_AVX512VBMI2 1 +#else + #define CCMATH_SIMD_HAVE_AVX512VBMI2 0 +#endif + +#ifdef __AVX512VBMI__ + #define CCMATH_SIMD_HAVE_AVX512VBMI 1 +#else + #define CCMATH_SIMD_HAVE_AVX512VBMI 0 +#endif + +#ifdef __AVX512IFMA__ + #define CCMATH_SIMD_HAVE_AVX512IFMA 1 +#else + #define CCMATH_SIMD_HAVE_AVX512IFMA 0 +#endif + +#ifdef __AVX512CD__ + #define CCMATH_SIMD_HAVE_AVX512CD 1 +#else + #define CCMATH_SIMD_HAVE_AVX512CD 0 +#endif + +#ifdef __AVX512VNNI__ + #define CCMATH_SIMD_HAVE_AVX512VNNI 1 +#else + #define CCMATH_SIMD_HAVE_AVX512VNNI 0 +#endif + +#ifdef __AVX512VPOPCNTDQ__ + #define CCMATH_SIMD_HAVE_AVX512VPOPCNTDQ 1 +#else + #define CCMATH_SIMD_HAVE_AVX512VPOPCNTDQ 0 +#endif + +#ifdef __AVX512VP2INTERSECT__ + #define CCMATH_SIMD_HAVE_AVX512VP2INTERSECT 1 +#else + #define CCMATH_SIMD_HAVE_AVX512VP2INTERSECT 0 +#endif + +#ifdef __AVX512FP16__ + #define CCMATH_SIMD_HAVE_AVX512FP16 1 +#else + #define CCMATH_SIMD_HAVE_AVX512FP16 0 +#endif + +#if CCMATH_SIMD_HAVE_SSE + #define CCMATH_SIMD_HAVE_SSE_ABI 1 +#else + #define CCMATH_SIMD_HAVE_SSE_ABI 0 +#endif + +#if CCMATH_SIMD_HAVE_SSE2 + #define CCMATH_SIMD_HAVE_FULL_SSE_ABI 1 +#else + #define CCMATH_SIMD_HAVE_FULL_SSE_ABI 0 +#endif + +#if CCMATH_SIMD_HAVE_AVX + #define CCMATH_SIMD_HAVE_AVX_ABI 1 +#else + #define CCMATH_SIMD_HAVE_AVX_ABI 0 +#endif + +#if CCMATH_SIMD_HAVE_AVX2 + #define CCMATH_SIMD_HAVE_FULL_AVX_ABI 1 +#else + #define CCMATH_SIMD_HAVE_FULL_AVX_ABI 0 +#endif + +#if CCMATH_SIMD_HAVE_AVX512F + #define CCMATH_SIMD_HAVE_AVX512_ABI 1 +#else + #define CCMATH_SIMD_HAVE_AVX512_ABI 0 +#endif + +#if CCMATH_SIMD_HAVE_AVX512BW + #define CCMATH_SIMD_HAVE_FULL_AVX512_ABI 1 +#else + #define CCMATH_SIMD_HAVE_FULL_AVX512_ABI 0 +#endif + +// Intel Short Vector Math Library (SVML) +// As far as I am aware, there is no reliable way to detect SVML support at compile-time. +#if defined(CCM_CONFIG_RT_SIMD_HAS_SVML) + #ifndef CCMATH_HAS_SIMD + #define CCMATH_HAS_SIMD 1 + #endif + #define CCMATH_HAS_SIMD_SVML 1 +#endif + +#if defined __x86_64__ && !CCMATH_SIMD_HAVE_SSE2 + #error "Use of SSE2 is required on AMD64" +#endif + +// ARM macros +#if defined __ARM_NEON + #define CCMATH_SIMD_HAVE_NEON 1 +#else + #define CCMATH_SIMD_HAVE_NEON 0 +#endif +#if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__) + #define CCMATH_SIMD_HAVE_NEON_A32 1 +#else + #define CCMATH_SIMD_HAVE_NEON_A32 0 +#endif +#if defined __ARM_NEON && defined __aarch64__ + #define CCMATH_SIMD_HAVE_NEON_A64 1 +#else + #define CCMATH_SIMD_HAVE_NEON_A64 0 +#endif +#if (__ARM_FEATURE_SVE_BITS > 0 && __ARM_FEATURE_SVE_VECTOR_OPERATORS == 1) + #define CCMATH_SIMD_HAVE_SVE 1 +#else + #define CCMATH_SIMD_HAVE_SVE 0 +#endif + +// CCM_SIMD_INTRINSIC +#if defined(CCM_CONFIG_NO_SIMD_INLINE) + #define CCM_SIMD_INTRINSIC +#elif defined(CCM_CONFIG_NO_FORCED_SIMD_INLINE) + #define CCM_SIMD_INTRINSIC inline +#elif defined(__GNUC__) || (defined(__clang__) && !defined(_MSC_VER)) + #define CCM_SIMD_INTRINSIC __attribute__((always_inline, artificial)) inline +#elif defined(_MSC_VER) + #define CCM_SIMD_INTRINSIC __forceinline +#else + #define CCM_SIMD_INTRINSIC inline +#endif + +// CCM_SIMD_ENFORCED_ALWAYS_INLINE +#if defined(__GNUC__) || (defined(__clang__) && !defined(_MSC_VER)) + #define CCM_SIMD_ENFORCED_ALWAYS_INLINE __attribute__((always_inline)) inline +#elif defined(_MSC_VER) + #define CCM_SIMD_ENFORCED_ALWAYS_INLINE __forceinline +#else + #define CCM_SIMD_ENFORCED_ALWAYS_INLINE inline +#endif + +#ifndef CCMATH_SIMD_LIST_BINARY + #define CCMATH_SIMD_LIST_BINARY(MACRO) MACRO(|) MACRO(&) MACRO(^) + #define CCMATH_SIMD_LIST_SHIFTS(MACRO) MACRO(<<) MACRO(>>) + #define CCMATH_SIMD_LIST_ARITHMETICS(MACRO) MACRO(+) MACRO(-) MACRO(*) MACRO(/) MACRO(%) + + #define CCMATH_SIMD_ALL_BINARY(MACRO) CCMATH_SIMD_LIST_BINARY(MACRO) static_assert(true) + #define CCMATH_SIMD_ALL_SHIFTS(MACRO) CCMATH_SIMD_LIST_SHIFTS(MACRO) static_assert(true) + #define CCMATH_SIMD_ALL_ARITHMETICS(MACRO) CCMATH_SIMD_LIST_ARITHMETICS(MACRO) static_assert(true) +#endif + +#if defined __GXX_CONDITIONAL_IS_OVERLOADABLE__ && SIMD_CONDITIONAL_OPERATOR + #define simd_select_impl operator?: +#endif + +namespace ccm::pp::config::detail +{ + template + CCM_SIMD_ENFORCED_ALWAYS_INLINE constexpr void simd_int_pack(std::integer_sequence, F && code) + { + code(std::integer_sequence{}); + } +} // namespace ccm::pp::config::detail + +#define CCMATH_SIMD_INT_PACK(N, CODE) ccm::pp::config::detail::simd_int_pack(std::make_integer_sequence{}, CODE) diff --git a/include/ccmath/internal/math/runtime/pp/simd_intrinsic.hpp b/include/ccmath/internal/math/runtime/pp/simd_intrinsic.hpp deleted file mode 100644 index 2a9918e..0000000 --- a/include/ccmath/internal/math/runtime/pp/simd_intrinsic.hpp +++ /dev/null @@ -1,14 +0,0 @@ - -#pragma once - -#if defined(CCM_CONFIG_NO_SIMD_INLINE) - #define CCM_ATTR_SIMD_INTRINSIC -#elif defined(CCM_CONFIG_NO_FORCED_SIMD_INLINE) - #define CCM_ATTR_SIMD_INTRINSIC inline -#elif defined(__GNUC__) || (defined(__clang__) && !defined(_MSC_VER)) - #define CCM_ATTR_SIMD_INTRINSIC __attribute__((always_inline, artificial)) inline -#elif defined(_MSC_VER) - #define CCM_ATTR_SIMD_INTRINSIC __forceinline -#else - #define CCM_ATTR_SIMD_INTRINSIC inline -#endif diff --git a/include/ccmath/internal/math/runtime/pp/simd_meta.hpp b/include/ccmath/internal/math/runtime/pp/simd_meta.hpp index 5c7d78d..32a31b6 100644 --- a/include/ccmath/internal/math/runtime/pp/simd_meta.hpp +++ b/include/ccmath/internal/math/runtime/pp/simd_meta.hpp @@ -1,3 +1,12 @@ +/* + * Copyright (c) Ian Pike + * Copyright (c) CCMath contributors + * + * CCMath is provided under the Apache-2.0 License WITH LLVM-exception. + * See LICENSE for more information. + * + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ #pragma once diff --git a/include/ccmath/internal/math/runtime/pp/trap.hpp b/include/ccmath/internal/math/runtime/pp/trap.hpp index ab4b499..963f506 100644 --- a/include/ccmath/internal/math/runtime/pp/trap.hpp +++ b/include/ccmath/internal/math/runtime/pp/trap.hpp @@ -1,4 +1,3 @@ - /* * Copyright (c) Ian Pike * Copyright (c) CCMath contributors diff --git a/include/ccmath/internal/math/runtime/pp/vec_detail.hpp b/include/ccmath/internal/math/runtime/pp/vec_detail.hpp new file mode 100644 index 0000000..b6393fb --- /dev/null +++ b/include/ccmath/internal/math/runtime/pp/vec_detail.hpp @@ -0,0 +1,2 @@ + +#pragma once \ No newline at end of file diff --git a/include/ccmath/internal/math/runtime/pp/x86_include.hpp b/include/ccmath/internal/math/runtime/pp/x86_include.hpp index b6393fb..ac0a777 100644 --- a/include/ccmath/internal/math/runtime/pp/x86_include.hpp +++ b/include/ccmath/internal/math/runtime/pp/x86_include.hpp @@ -1,2 +1,13 @@ +/* + * Copyright (c) Ian Pike + * Copyright (c) CCMath contributors + * + * CCMath is provided under the Apache-2.0 License WITH LLVM-exception. + * See LICENSE for more information. + * + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ -#pragma once \ No newline at end of file +#pragma once + +#include "simd_meta.hpp" \ No newline at end of file