Skip to content

Commit 82b4ac8

Browse files
committed
Moving mask to const, eliminating transmutes and ensuring const evaluation and therefore uses the alignment of the destination
1 parent 677a3b3 commit 82b4ac8

File tree

1 file changed

+5
-15
lines changed

1 file changed

+5
-15
lines changed

src/simd/x86.rs

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,7 @@ impl ValueOps for Value {
4848
// SAFETY: This is only implemented if the target supports sse2, sse4.1, and pclmulqdq
4949
unsafe {
5050
Self(arch::_mm_xor_si128(
51-
arch::_mm_clmulepi64_si128(
52-
arch::_mm_and_si128(self.0, mem::transmute((1u128 << 32) - 1)),
53-
x_mod_p.0,
54-
0x00,
55-
),
51+
arch::_mm_clmulepi64_si128(arch::_mm_and_si128(self.0, MASK), x_mod_p.0, 0x00),
5652
arch::_mm_srli_si128(self.0, 4),
5753
))
5854
}
@@ -62,17 +58,11 @@ impl ValueOps for Value {
6258
fn barret_reduction_32(self, px_u: Self) -> u32 {
6359
// SAFETY: This is only implemented if the target supports sse2, sse4.1, and pclmulqdq
6460
unsafe {
65-
let t1 = arch::_mm_clmulepi64_si128(
66-
arch::_mm_and_si128(self.0, mem::transmute((1u128 << 32) - 1)),
67-
px_u.0,
68-
0x10,
69-
);
70-
let t2 = arch::_mm_clmulepi64_si128(
71-
arch::_mm_and_si128(t1, mem::transmute((1u128 << 32) - 1)),
72-
px_u.0,
73-
0x00,
74-
);
61+
let t1 = arch::_mm_clmulepi64_si128(arch::_mm_and_si128(self.0, MASK), px_u.0, 0x10);
62+
let t2 = arch::_mm_clmulepi64_si128(arch::_mm_and_si128(t1, MASK), px_u.0, 0x00);
7563
arch::_mm_extract_epi32(arch::_mm_xor_si128(self.0, t2), 1) as u32
7664
}
7765
}
7866
}
67+
68+
const MASK: arch::__m128i = unsafe { mem::transmute((1u128 << 32) - 1) };

0 commit comments

Comments
 (0)