From 80fd31af8283bc222e97db356b9cef18b3fc3c4c Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Thu, 14 Nov 2024 16:33:26 -0800 Subject: [PATCH 1/3] Fix up more SSE implementations for nontrapping-fp Fixes lto2.test_sse1 and test_sse2 with checks similar to --- system/include/compat/emmintrin.h | 8 +++++--- system/include/compat/xmmintrin.h | 12 +++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/system/include/compat/emmintrin.h b/system/include/compat/emmintrin.h index 0ee70b14abed5..61ac5074563e7 100644 --- a/system/include/compat/emmintrin.h +++ b/system/include/compat/emmintrin.h @@ -7,6 +7,7 @@ #ifndef __emscripten_emmintrin_h__ #define __emscripten_emmintrin_h__ +#include #ifndef __SSE2__ #error "SSE2 instruction set not enabled" #endif @@ -1008,9 +1009,10 @@ static __inline__ long long __attribute__((__always_inline__, __nodebug__)) _mm_cvtsd_si64(__m128d __a) { // TODO: optimize - if (isnan(__a[0]) || isinf(__a[0])) return 0x8000000000000000LL; - long long x = llrint(__a[0]); - if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabs(__a[0]) < 2.f)) + double e = __a[0]; + if (isnan(e) || isinf(e)) return 0x8000000000000000LL; + long long x = llrint(e); + if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabs(e) < 2.f) && e <= LLONG_MAX && e >= LLONG_MIN) return x; else return 0x8000000000000000LL; diff --git a/system/include/compat/xmmintrin.h b/system/include/compat/xmmintrin.h index 692770c17e5f0..bd958f5a61b33 100644 --- a/system/include/compat/xmmintrin.h +++ b/system/include/compat/xmmintrin.h @@ -596,8 +596,9 @@ _mm_cvtsi32_ss(__m128 __a, int __b) static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cvtss_si32(__m128 __a) { - int x = lrint(((__f32x4)__a)[0]); - if (x != 0 || fabsf(((__f32x4)__a)[0]) < 2.f) + float e = ((__f32x4)__a)[0]; + int x = lrint(e); + if ((x != 0 || fabsf(e)) < 2.f && !isnan(e) && e <= (float)INT_MAX && e >= INT_MIN) return x; else return (int)0x80000000; @@ -627,9 +628,10 @@ _mm_cvtsi64_ss(__m128 __a, long long __b) static __inline__ long long __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cvtss_si64(__m128 __a) { - if (isnan(((__f32x4)__a)[0]) || isinf(((__f32x4)__a)[0])) return 0x8000000000000000LL; - long long x = llrintf(((__f32x4)__a)[0]); - if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(((__f32x4)__a)[0]) < 2.f)) + float e = ((__f32x4)__a)[0]; + if (isnan(e) || isinf(e)) return 0x8000000000000000LL; + long long x = llrintf(e); + if ((x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f)) && e <= (float)LLONG_MAX && e >= LLONG_MIN) return x; else return 0x8000000000000000LL; From 75dab8b51ad62e202d1dde7a74fe9572d10766e3 Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Thu, 14 Nov 2024 16:42:03 -0800 Subject: [PATCH 2/3] remove auto-added include --- system/include/compat/emmintrin.h | 1 - 1 file changed, 1 deletion(-) diff --git a/system/include/compat/emmintrin.h b/system/include/compat/emmintrin.h index 61ac5074563e7..43f19ea091539 100644 --- a/system/include/compat/emmintrin.h +++ b/system/include/compat/emmintrin.h @@ -7,7 +7,6 @@ #ifndef __emscripten_emmintrin_h__ #define __emscripten_emmintrin_h__ -#include #ifndef __SSE2__ #error "SSE2 instruction set not enabled" #endif From 3eadffde7eaf9ae5f7e89cef0d0ba0d6acc1bfd9 Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Fri, 15 Nov 2024 09:07:26 -0800 Subject: [PATCH 3/3] make style consistent --- system/include/compat/emmintrin.h | 3 +-- system/include/compat/xmmintrin.h | 11 ++++------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/system/include/compat/emmintrin.h b/system/include/compat/emmintrin.h index 43f19ea091539..b9edbb4e19b1a 100644 --- a/system/include/compat/emmintrin.h +++ b/system/include/compat/emmintrin.h @@ -449,8 +449,7 @@ _mm_cvttsd_si32(__m128d __a) { // TODO: OPTIMIZE! float elem = __a[0]; - if (isnan(elem) || elem > INT_MAX || elem < INT_MIN) return (int)0x80000000; - if (lrint(elem) != 0 || fabs(elem) < 2.0) + if ((lrint(elem) != 0 || fabs(elem) < 2.0) && !isnanf(elem) && elem <= INT_MAX && elem >= INT_MIN) // Use the trapping instruction here since we have explicit bounds checks // above. return __builtin_wasm_trunc_s_i32_f32(elem); diff --git a/system/include/compat/xmmintrin.h b/system/include/compat/xmmintrin.h index bd958f5a61b33..5f3cefa3a19a0 100644 --- a/system/include/compat/xmmintrin.h +++ b/system/include/compat/xmmintrin.h @@ -598,7 +598,7 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SL { float e = ((__f32x4)__a)[0]; int x = lrint(e); - if ((x != 0 || fabsf(e)) < 2.f && !isnan(e) && e <= (float)INT_MAX && e >= INT_MIN) + if ((x != 0 || fabsf(e)) < 2.f && !isnan(e) && e <= INT_MAX && e >= INT_MIN) return x; else return (int)0x80000000; @@ -608,9 +608,8 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SL static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cvttss_si32(__m128 __a) { float e = ((__f32x4)__a)[0]; - if (isnanf(e) || e > INT_MAX || e < INT_MIN) return (int)0x80000000; int x = lrint(e); - if ((x != 0 || fabsf(e) < 2.f)) + if ((x != 0 || fabsf(e) < 2.f) && !isnanf(e) && e <= INT_MAX && e >= INT_MIN) return (int)e; else return (int)0x80000000; @@ -629,9 +628,8 @@ static __inline__ long long __attribute__((__always_inline__, __nodebug__, DIAGN _mm_cvtss_si64(__m128 __a) { float e = ((__f32x4)__a)[0]; - if (isnan(e) || isinf(e)) return 0x8000000000000000LL; long long x = llrintf(e); - if ((x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f)) && e <= (float)LLONG_MAX && e >= LLONG_MIN) + if ((x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f)) && !isnanf(e) && e <= LLONG_MAX && e >= LLONG_MIN) return x; else return 0x8000000000000000LL; @@ -641,9 +639,8 @@ static __inline__ long long __attribute__((__always_inline__, __nodebug__, DIAGN _mm_cvttss_si64(__m128 __a) { float e = ((__f32x4)__a)[0]; - if (isnan(e) || isinf(e) || e > LLONG_MAX || e < LLONG_MIN) return 0x8000000000000000LL; long long x = llrintf(e); - if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f)) + if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f) && !isnanf(e) && e <= LLONG_MAX && e >= LLONG_MIN) return (long long)e; else return 0x8000000000000000LL;