Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions lib/common/zstd_internal.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use core::mem::MaybeUninit;

use libc::size_t;

const fn const_max(a: usize, b: usize) -> usize {
Expand Down Expand Up @@ -68,12 +70,28 @@ pub(crate) static OF_defaultNorm: [i16; 29] = [
pub(crate) const OF_DEFAULTNORMLOG: u32 = 5;
pub(crate) static OF_defaultNormLog: u32 = OF_DEFAULTNORMLOG;

#[inline]
pub(crate) unsafe fn ZSTD_copy16(dst: *mut u8, src: *const u8) {
// We use `copy` instead of `copy_nonoverlapping` here because the literal buffer can now
// be located within the dst buffer. In circumstances where the op "catches up" to where the
// literal buffer is, there can be partial overlaps in this call on the final
// copy if the literal is being shifted by less than 16 bytes.
core::ptr::copy(src, dst, 16)
crate::cfg_select!(
target_arch = "x86_64" => {
#[repr(C)]
struct V {
x: i64,
y: i64,
}

// This generates the same instructions, but surrounding code optimizes better.
let v = core::ptr::read_unaligned(src as *const MaybeUninit<V>);
core::ptr::write_unaligned(dst as *mut MaybeUninit<V>, v);
}
_ => {
core::ptr::copy(src, dst, 16);
}
);
}

pub(crate) const WILDCOPY_OVERLENGTH: usize = 32;
Expand Down Expand Up @@ -126,7 +144,7 @@ pub(crate) unsafe fn ZSTD_wildcopy(
// probabilities. Since it is almost certain to be short, only do
// one 16-byte copy in the first call. Then, do two calls per loop since
// at that point it is more likely to have a high trip count.
core::ptr::copy(ip, op, 16);
ZSTD_copy16(op, ip);

if 16 >= length {
return;
Expand All @@ -136,11 +154,11 @@ pub(crate) unsafe fn ZSTD_wildcopy(
ip = ip.add(16);

loop {
core::ptr::copy(ip, op, 16);
ZSTD_copy16(op, ip);
op = op.add(16);
ip = ip.add(16);

core::ptr::copy(ip, op, 16);
ZSTD_copy16(op, ip);
op = op.add(16);
ip = ip.add(16);

Expand Down
12 changes: 8 additions & 4 deletions test-libzstd-rs-sys/examples/decompress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,16 @@ fn main() {

match variant.as_str() {
"c" => {
let err = c(&input);
assert_eq!(err, 0);
for _ in 0..20 {
let err = c(&input);
assert_eq!(err, 0);
}
}
"rs" => {
let err = rs(&input);
assert_eq!(err, 0);
for _ in 0..20 {
let err = rs(&input);
assert_eq!(err, 0);
}
}
bad => panic!("invalid command {bad}"),
}
Expand Down