|
25 | 25 | // We define a translation layer for both x86 and ARM for the ease of use and
|
26 | 26 | // most performance gains.
|
27 | 27 |
|
28 |
| -// This implementation requires 64-bit CRC instructions (part of SSE 4.2) and |
29 |
| -// PCLMULQDQ instructions. 32-bit builds with SSE 4.2 do exist, so the |
30 |
| -// __x86_64__ condition is necessary. |
31 |
| -#if defined(__x86_64__) && defined(__SSE4_2__) && defined(__PCLMUL__) |
| 28 | +// This implementation requires CRC instructions (part of SSE 4.2) and |
| 29 | +// PCLMULQDQ instructions. |
| 30 | +#if defined(__SSE4_2__) && defined(__PCLMUL__) |
32 | 31 |
|
33 | 32 | #include <x86intrin.h>
|
34 | 33 | #define ABSL_CRC_INTERNAL_HAVE_X86_SIMD
|
35 | 34 |
|
36 |
| -#elif defined(_MSC_VER) && !defined(__clang__) && defined(__AVX__) && \ |
37 |
| - defined(_M_AMD64) |
| 35 | +#elif defined(_MSC_VER) && !defined(__clang__) && defined(__AVX__) |
38 | 36 |
|
39 | 37 | // MSVC AVX (/arch:AVX) implies SSE 4.2 and PCLMULQDQ.
|
40 | 38 | #include <intrin.h>
|
@@ -143,7 +141,13 @@ inline uint32_t CRC32_u32(uint32_t crc, uint32_t v) {
|
143 | 141 | }
|
144 | 142 |
|
145 | 143 | inline uint32_t CRC32_u64(uint32_t crc, uint64_t v) {
|
| 144 | +#if defined(__x86_64__) || defined(_M_X64) |
146 | 145 | return static_cast<uint32_t>(_mm_crc32_u64(crc, v));
|
| 146 | +#else |
| 147 | + uint32_t v_lo = static_cast<uint32_t>(v); |
| 148 | + uint32_t v_hi = static_cast<uint32_t>(v >> 32); |
| 149 | + return _mm_crc32_u32(_mm_crc32_u32(crc, v_lo), v_hi); |
| 150 | +#endif |
147 | 151 | }
|
148 | 152 |
|
149 | 153 | inline V128 V128_Load(const V128* src) { return _mm_load_si128(src); }
|
@@ -191,7 +195,15 @@ inline uint64_t V128_Extract64(const V128 l) {
|
191 | 195 | return static_cast<uint64_t>(_mm_extract_epi64(l, imm));
|
192 | 196 | }
|
193 | 197 |
|
194 |
| -inline int64_t V128_Low64(const V128 l) { return _mm_cvtsi128_si64(l); } |
| 198 | +inline int64_t V128_Low64(const V128 l) { |
| 199 | +#if defined(__x86_64__) || defined(_M_X64) |
| 200 | + return _mm_cvtsi128_si64(l); |
| 201 | +#else |
| 202 | + uint32_t r_lo = static_cast<uint32_t>(_mm_extract_epi32(l, 0)); |
| 203 | + uint32_t r_hi = static_cast<uint32_t>(_mm_extract_epi32(l, 1)); |
| 204 | + return static_cast<int64_t>((static_cast<uint64_t>(r_hi) << 32) | r_lo); |
| 205 | +#endif |
| 206 | +} |
195 | 207 |
|
196 | 208 | inline V128 V128_ShiftLeft64(const V128 l, const V128 r) {
|
197 | 209 | return _mm_sll_epi64(l, r);
|
|
0 commit comments