Skip to content

Commit c49bba0

Browse files
valpackettsrijs
authored andcommitted
Add support for AArch64 CRC32 instructions
1 parent 5371d23 commit c49bba0

File tree

5 files changed

+91
-0
lines changed

5 files changed

+91
-0
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ rand = "0.4"
2222
[features]
2323
default = ["std"]
2424
std = []
25+
nightly = []
2526

2627
[[bench]]
2728
name = "bench"

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ This crate contains multiple CRC32 implementations:
2727

2828
- A fast baseline implementation which processes up to 16 bytes per iteration
2929
- An optimized implementation for modern `x86` using `sse` and `pclmulqdq` instructions
30+
- An optimized implementation for `aarch64` using `crc32` instructions
3031

3132
Calling the `Hasher::new` constructor at runtime will perform a feature detection to select the most
3233
optimal implementation for the current CPU feature set.

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
//! optimal implementation for the current CPU feature set.
2020
2121
#![cfg_attr(not(feature = "std"), no_std)]
22+
#![cfg_attr(all(feature = "nightly", target_arch = "aarch64"), feature(stdsimd, aarch64_target_feature))]
2223

2324
#[deny(missing_docs)]
2425
#[cfg(test)]

src/specialized/aarch64.rs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
use std::arch::aarch64 as arch;
2+
3+
#[derive(Clone)]
4+
pub struct State {
5+
state: u32,
6+
}
7+
8+
impl State {
9+
pub fn new() -> Option<Self> {
10+
if is_aarch64_feature_detected!("crc") {
11+
// SAFETY: The conditions above ensure that all
12+
// required instructions are supported by the CPU.
13+
Some(Self { state: 0 })
14+
} else {
15+
None
16+
}
17+
}
18+
19+
pub fn update(&mut self, buf: &[u8]) {
20+
// SAFETY: The `State::new` constructor ensures that all
21+
// required instructions are supported by the CPU.
22+
self.state = unsafe { calculate(self.state, buf) }
23+
}
24+
25+
pub fn finalize(self) -> u32 {
26+
self.state
27+
}
28+
29+
pub fn reset(&mut self) {
30+
self.state = 0;
31+
}
32+
33+
pub fn combine(&mut self, other: u32, amount: u64) {
34+
self.state = ::combine::combine(self.state, other, amount);
35+
}
36+
}
37+
38+
// target_feature is necessary to allow rustc to inline the crc32* wrappers
39+
#[target_feature(enable = "crc")]
40+
pub unsafe fn calculate(crc: u32, data: &[u8]) -> u32 {
41+
let mut c32 = !crc;
42+
let (pre_quad, quads, post_quad) = data.align_to::<u64>();
43+
44+
c32 = pre_quad.iter().fold(c32, |acc, &b| arch::__crc32b(acc, b));
45+
46+
// unrolling increases performance by a lot
47+
let mut quad_iter = quads.chunks_exact(8);
48+
for chunk in &mut quad_iter {
49+
c32 = arch::__crc32d(c32, chunk[0]);
50+
c32 = arch::__crc32d(c32, chunk[1]);
51+
c32 = arch::__crc32d(c32, chunk[2]);
52+
c32 = arch::__crc32d(c32, chunk[3]);
53+
c32 = arch::__crc32d(c32, chunk[4]);
54+
c32 = arch::__crc32d(c32, chunk[5]);
55+
c32 = arch::__crc32d(c32, chunk[6]);
56+
c32 = arch::__crc32d(c32, chunk[7]);
57+
}
58+
c32 = quad_iter.remainder().iter().fold(c32, |acc, &q| arch::__crc32d(acc, q));
59+
60+
c32 = post_quad.iter().fold(c32, |acc, &b| arch::__crc32b(acc, b));
61+
62+
!c32
63+
}
64+
65+
#[cfg(test)]
66+
mod test {
67+
quickcheck! {
68+
fn check_against_baseline(chunks: Vec<(Vec<u8>, usize)>) -> bool {
69+
let mut baseline = super::super::super::baseline::State::new();
70+
let mut aarch64 = super::State::new().expect("not supported");
71+
for (chunk, mut offset) in chunks {
72+
// simulate random alignments by offsetting the slice by up to 15 bytes
73+
offset = offset & 0xF;
74+
if chunk.len() <= offset {
75+
baseline.update(&chunk);
76+
aarch64.update(&chunk);
77+
} else {
78+
baseline.update(&chunk[offset..]);
79+
aarch64.update(&chunk[offset..]);
80+
}
81+
}
82+
aarch64.finalize() == baseline.finalize()
83+
}
84+
}
85+
}

src/specialized/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ cfg_if! {
55
))] {
66
mod pclmulqdq;
77
pub use self::pclmulqdq::State;
8+
} else if #[cfg(all(feature = "nightly", target_arch = "aarch64"))] {
9+
mod aarch64;
10+
pub use self::aarch64::State;
811
} else {
912
#[derive(Clone)]
1013
pub enum State {}

0 commit comments

Comments
 (0)