From 1b3df02f65dbc822c622c9ad90ef08427ff95cf7 Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Fri, 20 Sep 2024 20:02:08 -0700 Subject: [PATCH 1/6] Checkpoint experiments --- Cargo.toml | 1 + src/compress.rs | 179 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 122 insertions(+), 58 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 59910ff..bfef91a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ homepage = "https://github.com/image-rs/fdeflate" categories = ["compression"] [dependencies] +innumerable = "0.1.0" simd-adler32 = "0.3.4" [dev-dependencies] diff --git a/src/compress.rs b/src/compress.rs index b55116e..10132bb 100644 --- a/src/compress.rs +++ b/src/compress.rs @@ -1,8 +1,9 @@ use simd_adler32::Adler32; -use std::io::{self, Seek, SeekFrom, Write}; +use std::{collections::BinaryHeap, io::{self, Seek, SeekFrom, Write}}; use crate::tables::{ - BITMASKS, HUFFMAN_CODES, HUFFMAN_LENGTHS, LENGTH_TO_LEN_EXTRA, LENGTH_TO_SYMBOL, + BITMASKS, DIST_SYM_TO_DIST_BASE, DIST_SYM_TO_DIST_EXTRA, HUFFMAN_CODES, HUFFMAN_LENGTHS, + LENGTH_TO_LEN_EXTRA, LENGTH_TO_SYMBOL, }; /// Compressor that produces fdeflate compressed streams. @@ -90,76 +91,138 @@ impl Compressor { Ok(()) } + fn match_length(data: &[u8], a: usize, b: usize) -> usize { + if b - a > 32768 { + return 0; + } + + let mut length = 0; + while length < 258 && b + length < data.len() && data[a + length] == data[b + length] { + length += 1; + } + length + } + /// Write data to the compressor. pub fn write_data(&mut self, data: &[u8]) -> io::Result<()> { self.checksum.write(data); - let mut run = 0; - let mut chunks = data.chunks_exact(8); - for chunk in &mut chunks { - let ichunk = u64::from_le_bytes(chunk.try_into().unwrap()); - - if ichunk == 0 { - run += 8; - continue; - } else if run > 0 { - let run_extra = ichunk.trailing_zeros() / 8; - self.write_run(run + run_extra)?; - run = 0; - - if run_extra > 0 { - run = ichunk.leading_zeros() / 8; - for &b in &chunk[run_extra as usize..8 - run as usize] { - self.write_bits( - HUFFMAN_CODES[b as usize] as u64, - HUFFMAN_LENGTHS[b as usize], - )?; - } + const TABLE_SIZE: usize = 32768; + let mut matches = vec![[0; 2]; TABLE_SIZE]; + + let mut i = 0; + while i + 8 < data.len() { + + if data[i] == 0 { + let mut run_length = 1; + while run_length < 258 && i + run_length < data.len() && data[i + run_length] == 0 { + run_length += 1; + } + if run_length >= 4 { + innumerable::event!("run", run_length.min(10) as u64); + self.write_run(run_length as u32)?; + i += run_length; continue; } } - let run_start = ichunk.leading_zeros() / 8; - if run_start > 0 { - for &b in &chunk[..8 - run_start as usize] { - self.write_bits( - HUFFMAN_CODES[b as usize] as u64, - HUFFMAN_LENGTHS[b as usize], - )?; - } - run = run_start; - continue; + let current = u64::from_le_bytes(data[i..][..8].try_into().unwrap()); + let current_hash = (0x27220a95u64.wrapping_mul(current & 0xffffffff)) as usize % TABLE_SIZE; + let current_hash2 = (0x330698ec124c97f2u64.wrapping_mul(current)) as usize % TABLE_SIZE; + + let [a, b] = matches[current_hash]; + let a_len = Self::match_length(data, a, i); + let b_len = Self::match_length(data, b, i); + + let [a2, b2] = matches[current_hash2]; + let a_len2 = Self::match_length(data, a2, i); + let b_len2 = Self::match_length(data, b2, i); + + if a < b { + matches[current_hash] = [i, b]; + } else { + matches[current_hash] = [a, i]; + } + if a2 < b2 { + matches[current_hash2] = [i, b2]; + } else { + matches[current_hash2] = [a2, i]; } - let n0 = HUFFMAN_LENGTHS[chunk[0] as usize]; - let n1 = HUFFMAN_LENGTHS[chunk[1] as usize]; - let n2 = HUFFMAN_LENGTHS[chunk[2] as usize]; - let n3 = HUFFMAN_LENGTHS[chunk[3] as usize]; - let bits = HUFFMAN_CODES[chunk[0] as usize] as u64 - | ((HUFFMAN_CODES[chunk[1] as usize] as u64) << n0) - | ((HUFFMAN_CODES[chunk[2] as usize] as u64) << (n0 + n1)) - | ((HUFFMAN_CODES[chunk[3] as usize] as u64) << (n0 + n1 + n2)); - self.write_bits(bits, n0 + n1 + n2 + n3)?; - - let n4 = HUFFMAN_LENGTHS[chunk[4] as usize]; - let n5 = HUFFMAN_LENGTHS[chunk[5] as usize]; - let n6 = HUFFMAN_LENGTHS[chunk[6] as usize]; - let n7 = HUFFMAN_LENGTHS[chunk[7] as usize]; - let bits2 = HUFFMAN_CODES[chunk[4] as usize] as u64 - | ((HUFFMAN_CODES[chunk[5] as usize] as u64) << n4) - | ((HUFFMAN_CODES[chunk[6] as usize] as u64) << (n4 + n5)) - | ((HUFFMAN_CODES[chunk[7] as usize] as u64) << (n4 + n5 + n6)); - self.write_bits(bits2, n4 + n5 + n6 + n7)?; - } + let (mut length, mut prev_i) = (a_len, a); + if b_len > length || b_len == length && b > prev_i { + length = b_len; + prev_i = b; + } + if a_len2 > length || a_len2 == length && a2 > prev_i { + length = a_len2; + prev_i = a2; + } + if b_len2 > length || b_len2 == length && b2 > prev_i { + length = b_len2; + prev_i = b2; + } + + if length >= 3 { + let next = u64::from_le_bytes(data[i + 1..][..8].try_into().unwrap()); + let next_hash = (0x27220a95u64.wrapping_mul(next & 0xffffffff)) as usize % TABLE_SIZE; + let next_hash2 = (0x330698ec124c97f2u64.wrapping_mul(next)) as usize % TABLE_SIZE; + + let [next_a, next_b] = matches[next_hash]; + let next_a_len = Self::match_length(data, next_a, i + 1); + let next_b_len = Self::match_length(data, next_b, i + 1); + + let [next_a2, next_b2] = matches[next_hash2]; + let next_a_len2 = Self::match_length(data, next_a2, i + 1); + let next_b_len2 = Self::match_length(data, next_b2, i + 1); + let next_length = next_a_len.max(next_b_len).max(next_a_len2).max(next_b_len2); + + if length >= next_length && next != 0 { + let sym = LENGTH_TO_SYMBOL[length - 3] as usize; + let len_bits = HUFFMAN_LENGTHS[sym]; + let len_extra = LENGTH_TO_LEN_EXTRA[length - 3]; + + let dist = (i - prev_i) as u16; + let mut dist_sym = 29; + while dist_sym > 0 && dist < DIST_SYM_TO_DIST_BASE[dist_sym as usize - 1] { + dist_sym -= 1; + } + let dist_bits = 6; + let dist_extra = DIST_SYM_TO_DIST_EXTRA[dist_sym as usize]; + + let backref_cost = + len_bits as u32 + len_extra as u32 + dist_bits as u32 + dist_extra as u32; + assert!(backref_cost < 256); + let backref_cost = backref_cost as u8; + + let mut literal_cost = 0; + for j in i..i + length { + literal_cost += HUFFMAN_LENGTHS[data[j] as usize] as u32; + } - if run > 0 { - self.write_run(run)?; + if (backref_cost as u32) < literal_cost { + innumerable::event!("backref", length.min(10) as u64); + + self.write_bits(0, backref_cost)?; + i += length; + continue; + } + } + } + + innumerable::event!("literal"); + self.write_bits( + HUFFMAN_CODES[data[i] as usize] as u64, + HUFFMAN_LENGTHS[data[i] as usize], + )?; + i += 1; } - for &b in chunks.remainder() { + for i in i..data.len() { + innumerable::event!("literal"); self.write_bits( - HUFFMAN_CODES[b as usize] as u64, - HUFFMAN_LENGTHS[b as usize], + HUFFMAN_CODES[data[i] as usize] as u64, + HUFFMAN_LENGTHS[data[i] as usize], )?; } From ef3b09992c55f65f4b6d39da8eb28acf6b6bea5d Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Fri, 20 Sep 2024 22:02:26 -0700 Subject: [PATCH 2/6] checkpoint --- src/compress.rs | 413 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 305 insertions(+), 108 deletions(-) diff --git a/src/compress.rs b/src/compress.rs index 10132bb..ec9df87 100644 --- a/src/compress.rs +++ b/src/compress.rs @@ -1,11 +1,220 @@ use simd_adler32::Adler32; -use std::{collections::BinaryHeap, io::{self, Seek, SeekFrom, Write}}; +use std::{ + collections::BinaryHeap, + io::{self, Seek, SeekFrom, Write}, +}; use crate::tables::{ BITMASKS, DIST_SYM_TO_DIST_BASE, DIST_SYM_TO_DIST_EXTRA, HUFFMAN_CODES, HUFFMAN_LENGTHS, LENGTH_TO_LEN_EXTRA, LENGTH_TO_SYMBOL, }; +fn build_huffman_tree( + frequencies: &[u32], + lengths: &mut [u8], + codes: &mut [u16], + length_limit: u8, +) -> bool { + assert_eq!(frequencies.len(), lengths.len()); + assert_eq!(frequencies.len(), codes.len()); + + if frequencies.iter().filter(|&&f| f > 0).count() <= 1 { + lengths.fill(0); + codes.fill(0); + return false; + } + + #[derive(Eq, PartialEq, Copy, Clone, Debug)] + struct Item(u32, u16); + impl Ord for Item { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + other.0.cmp(&self.0) + } + } + impl PartialOrd for Item { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } + } + + // Build a huffman tree + let mut internal_nodes = Vec::new(); + let mut nodes = BinaryHeap::from_iter( + frequencies + .iter() + .enumerate() + .filter(|(_, &frequency)| frequency > 0) + .map(|(i, &frequency)| Item(frequency, i as u16)), + ); + while nodes.len() > 1 { + let Item(frequency1, index1) = nodes.pop().unwrap(); + let mut root = nodes.peek_mut().unwrap(); + internal_nodes.push((index1, root.1)); + *root = Item( + frequency1 + root.0, + internal_nodes.len() as u16 + frequencies.len() as u16 - 1, + ); + } + + // Walk the tree to assign code lengths + lengths.fill(0); + let mut stack = Vec::new(); + stack.push((nodes.pop().unwrap().1, 0)); + while let Some((node, depth)) = stack.pop() { + let node = node as usize; + if node < frequencies.len() { + lengths[node] = depth as u8; + } else { + let (left, right) = internal_nodes[node - frequencies.len()]; + stack.push((left, depth + 1)); + stack.push((right, depth + 1)); + } + } + + // Limit the codes to length length_limit + let mut max_length = 0; + for &length in lengths.iter() { + max_length = max_length.max(length); + } + if max_length > length_limit { + let mut counts = [0u32; 16]; + for &length in lengths.iter() { + counts[length.min(length_limit) as usize] += 1; + } + + let mut total = 0; + for (i, count) in counts + .iter() + .enumerate() + .skip(1) + .take(length_limit as usize) + { + total += count << (length_limit as usize - i); + } + + while total > 1u32 << length_limit { + let mut i = length_limit as usize - 1; + while counts[i] == 0 { + i -= 1; + } + counts[i] -= 1; + counts[length_limit as usize] -= 1; + counts[i + 1] += 2; + total -= 1; + } + + // assign new lengths + let mut len = length_limit; + let mut indexes = frequencies.iter().copied().enumerate().collect::>(); + indexes.sort_unstable_by_key(|&(_, frequency)| frequency); + for &(i, frequency) in indexes.iter() { + if frequency > 0 { + while counts[len as usize] == 0 { + len -= 1; + } + lengths[i] = len; + counts[len as usize] -= 1; + } + } + } + + // Assign codes + codes.fill(0); + let mut code = 0u32; + for len in 1..=length_limit { + for (i, &length) in lengths.iter().enumerate() { + if length == len { + codes[i] = (code as u16).reverse_bits() >> (16 - len); + code += 1; + } + } + code <<= 1; + } + assert_eq!(code, 2 << length_limit); + + true +} + +const WAYS: usize = 16; +const CACHE_SIZE: usize = 32768; + +#[derive(Debug, Copy, Clone)] +struct Entry { + tags: [u32; WAYS], + offsets: [u32; WAYS], +} + +struct CacheTable { + entries: Box<[Entry; CACHE_SIZE]>, +} +impl CacheTable { + fn new() -> Self { + let entries: Box<[Entry]> = vec![ + Entry { + tags: [0; WAYS], + offsets: [0; WAYS], + }; + CACHE_SIZE + ] + .into_boxed_slice(); + + Self { + entries: entries.try_into().unwrap(), + } + } + + fn get(&self, data: &[u8], index: usize, hash: u32) -> (u32, u16) { + let mut best_offset = 0; + let mut best_length = 0; + + let entry = &self.entries[(hash as usize) % CACHE_SIZE]; + for i in 0..WAYS { + if entry.tags[i] != hash { + continue; + } + + let offset = entry.offsets[i] as usize; + if index - offset < 32768 { + let mut length = 0; + while length < 258 && index + length < data.len() && data[index + length] == data[offset + length] { + length += 1; + } + + if length > 3 && length > best_length { + best_offset = offset as u32; + best_length = length; + } + if length == 258 { + break; + } + } + } + + (best_offset, best_length as u16) + } + + fn insert(&mut self, hash: u32, offset: u32) { + let entry = &mut self.entries[(hash as usize) % CACHE_SIZE]; + + let mut oldest = 0; + for i in 1..WAYS { + if entry.offsets[i] < entry.offsets[oldest] { + oldest = i; + } + } + + entry.tags[oldest] = hash; + entry.offsets[oldest] = offset; + } +} + +fn hash(v: u64) -> u32 { + 0x27220a95u64.wrapping_mul((v & 0xffffff) ^ 0x330698ec) as u32 +} +fn hash2(v: u64) -> u32 { + 0x330698ecu64.wrapping_mul((v & 0xffffffffffff) ^ 0x27220a95) as u32 +} + /// Compressor that produces fdeflate compressed streams. pub struct Compressor { checksum: Adler32, @@ -43,30 +252,6 @@ impl Compressor { Ok(()) } - fn write_run(&mut self, mut run: u32) -> io::Result<()> { - self.write_bits(HUFFMAN_CODES[0] as u64, HUFFMAN_LENGTHS[0])?; - run -= 1; - - while run >= 258 { - self.write_bits(HUFFMAN_CODES[285] as u64, HUFFMAN_LENGTHS[285] + 1)?; - run -= 258; - } - - if run > 4 { - let sym = LENGTH_TO_SYMBOL[run as usize - 3] as usize; - self.write_bits(HUFFMAN_CODES[sym] as u64, HUFFMAN_LENGTHS[sym])?; - - let len_extra = LENGTH_TO_LEN_EXTRA[run as usize - 3]; - let extra = ((run - 3) & BITMASKS[len_extra as usize]) as u64; - self.write_bits(extra, len_extra + 1)?; - } else { - debug_assert_eq!(HUFFMAN_CODES[0], 0); - self.write_bits(0, run as u8 * HUFFMAN_LENGTHS[0])?; - } - - Ok(()) - } - /// Create a new Compressor. pub fn new(writer: W) -> io::Result { let mut compressor = Self { @@ -91,98 +276,62 @@ impl Compressor { Ok(()) } - fn match_length(data: &[u8], a: usize, b: usize) -> usize { - if b - a > 32768 { - return 0; - } - - let mut length = 0; - while length < 258 && b + length < data.len() && data[a + length] == data[b + length] { - length += 1; - } - length - } - /// Write data to the compressor. pub fn write_data(&mut self, data: &[u8]) -> io::Result<()> { self.checksum.write(data); - const TABLE_SIZE: usize = 32768; - let mut matches = vec![[0; 2]; TABLE_SIZE]; + let mut matches = CacheTable::new(); + + enum Symbol { + Literal(u8), + Rle { length: u16 }, + Backref { length: u16, distance: u16, dist_sym: u8 }, + } + let mut symbols = Vec::new(); let mut i = 0; while i + 8 < data.len() { - if data[i] == 0 { let mut run_length = 1; while run_length < 258 && i + run_length < data.len() && data[i + run_length] == 0 { run_length += 1; } - if run_length >= 4 { - innumerable::event!("run", run_length.min(10) as u64); - self.write_run(run_length as u32)?; + if run_length >= 5 { + symbols.push(Symbol::Literal(0)); + symbols.push(Symbol::Rle { + length: run_length as u16, + }); i += run_length; continue; } } let current = u64::from_le_bytes(data[i..][..8].try_into().unwrap()); - let current_hash = (0x27220a95u64.wrapping_mul(current & 0xffffffff)) as usize % TABLE_SIZE; - let current_hash2 = (0x330698ec124c97f2u64.wrapping_mul(current)) as usize % TABLE_SIZE; - - let [a, b] = matches[current_hash]; - let a_len = Self::match_length(data, a, i); - let b_len = Self::match_length(data, b, i); - - let [a2, b2] = matches[current_hash2]; - let a_len2 = Self::match_length(data, a2, i); - let b_len2 = Self::match_length(data, b2, i); - - if a < b { - matches[current_hash] = [i, b]; - } else { - matches[current_hash] = [a, i]; - } - if a2 < b2 { - matches[current_hash2] = [i, b2]; - } else { - matches[current_hash2] = [a2, i]; - } - - let (mut length, mut prev_i) = (a_len, a); - if b_len > length || b_len == length && b > prev_i { - length = b_len; - prev_i = b; - } - if a_len2 > length || a_len2 == length && a2 > prev_i { - length = a_len2; - prev_i = a2; - } - if b_len2 > length || b_len2 == length && b2 > prev_i { - length = b_len2; - prev_i = b2; - } + let current_hash = hash(current); + // let current_hash2 = hash2(current); + let (prev_i, length) = matches.get(data, i, current_hash as u32); + // let (prev_i2, length2) = matches.get(data, i, current_hash2 as u32); + matches.insert(current_hash as u32, i as u32); + // matches.insert(current_hash2 as u32, i as u32); + + // let (prev_i, length) = if length2 > length { + // (prev_i2, length2) + // } else { + // (prev_i, length) + // }; if length >= 3 { let next = u64::from_le_bytes(data[i + 1..][..8].try_into().unwrap()); - let next_hash = (0x27220a95u64.wrapping_mul(next & 0xffffffff)) as usize % TABLE_SIZE; - let next_hash2 = (0x330698ec124c97f2u64.wrapping_mul(next)) as usize % TABLE_SIZE; - - let [next_a, next_b] = matches[next_hash]; - let next_a_len = Self::match_length(data, next_a, i + 1); - let next_b_len = Self::match_length(data, next_b, i + 1); - - let [next_a2, next_b2] = matches[next_hash2]; - let next_a_len2 = Self::match_length(data, next_a2, i + 1); - let next_b_len2 = Self::match_length(data, next_b2, i + 1); - let next_length = next_a_len.max(next_b_len).max(next_a_len2).max(next_b_len2); + let next_length1 = matches.get(data, i + 1, hash(next)).1; + let next_length2 = 0;//matches.get(data, i + 1, hash2(next)).1; + let next_length = next_length1.max(next_length2); if length >= next_length && next != 0 { - let sym = LENGTH_TO_SYMBOL[length - 3] as usize; + let sym = LENGTH_TO_SYMBOL[length as usize - 3] as usize; let len_bits = HUFFMAN_LENGTHS[sym]; - let len_extra = LENGTH_TO_LEN_EXTRA[length - 3]; + let len_extra = LENGTH_TO_LEN_EXTRA[length as usize - 3]; - let dist = (i - prev_i) as u16; + let dist = (i - prev_i as usize) as u16; let mut dist_sym = 29; while dist_sym > 0 && dist < DIST_SYM_TO_DIST_BASE[dist_sym as usize - 1] { dist_sym -= 1; @@ -196,34 +345,82 @@ impl Compressor { let backref_cost = backref_cost as u8; let mut literal_cost = 0; - for j in i..i + length { + for j in i..i + length as usize{ literal_cost += HUFFMAN_LENGTHS[data[j] as usize] as u32; } if (backref_cost as u32) < literal_cost { - innumerable::event!("backref", length.min(10) as u64); - - self.write_bits(0, backref_cost)?; - i += length; + symbols.push(Symbol::Backref { + length: length as u16, + distance: dist, + dist_sym, + }); + i += length as usize; continue; } } } - innumerable::event!("literal"); - self.write_bits( - HUFFMAN_CODES[data[i] as usize] as u64, - HUFFMAN_LENGTHS[data[i] as usize], - )?; + symbols.push(Symbol::Literal(data[i])); i += 1; } - for i in i..data.len() { - innumerable::event!("literal"); - self.write_bits( - HUFFMAN_CODES[data[i] as usize] as u64, - HUFFMAN_LENGTHS[data[i] as usize], - )?; + symbols.push(Symbol::Literal(data[i])); + } + + let mut frequencies = [0u32; 286]; + let mut dist_frequencies = [0u32; 30]; + for symbol in &symbols { + match symbol { + Symbol::Literal(lit) => frequencies[*lit as usize] += 1, + Symbol::Rle { length } => { + let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; + frequencies[sym] += 1; + dist_frequencies[0] += 1; + } + Symbol::Backref { length, dist_sym, .. } => { + let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; + frequencies[sym] += 1; + dist_frequencies[*dist_sym as usize] += 1; + } + } + } + + let mut lengths = [0u8; 286]; + let mut codes = [0u16; 286]; + build_huffman_tree(&frequencies, &mut lengths, &mut codes, 15); + + let mut dist_lengths = [0u8; 30]; + let mut dist_codes = [0u16; 30]; + build_huffman_tree(&dist_frequencies, &mut dist_lengths, &mut dist_codes, 15); + + for symbol in &symbols { + match symbol { + Symbol::Literal(lit) => { + let sym = *lit as usize; + self.write_bits(codes[sym] as u64, lengths[sym] as u8)?; + } + Symbol::Rle { length } => { + let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; + self.write_bits(codes[sym] as u64, lengths[sym] as u8)?; + let len_extra = LENGTH_TO_LEN_EXTRA[*length as usize - 3]; + let extra = (((*length as u32) - 3) & BITMASKS[len_extra as usize]) as u64; + self.write_bits(extra, len_extra + 1)?; + self.write_bits(dist_codes[0] as u64, dist_lengths[0])?; + } + Symbol::Backref { length, distance, dist_sym } => { + let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; + self.write_bits(codes[sym] as u64, lengths[sym] as u8)?; + let len_extra = LENGTH_TO_LEN_EXTRA[*length as usize - 3]; + let extra = (((*length as u32) - 3) & BITMASKS[len_extra as usize]) as u64; + self.write_bits(extra, len_extra)?; + + self.write_bits(dist_codes[*dist_sym as usize] as u64, dist_lengths[*dist_sym as usize])?; + let dist_extra = DIST_SYM_TO_DIST_EXTRA[*dist_sym as usize]; + let extra = ((*distance as u32) & BITMASKS[dist_extra as usize]) as u64; + self.write_bits(extra, dist_extra)?; + } + } } Ok(()) From 2d25d4813b7c1e4e3c4850a8fc6b8a9f541e06af Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Sat, 21 Sep 2024 01:12:50 -0700 Subject: [PATCH 3/6] checkpoint --- src/compress.rs | 288 ++++++++++++++++++++++++++++-------------------- 1 file changed, 167 insertions(+), 121 deletions(-) diff --git a/src/compress.rs b/src/compress.rs index ec9df87..2ca00b8 100644 --- a/src/compress.rs +++ b/src/compress.rs @@ -135,8 +135,15 @@ fn build_huffman_tree( true } +fn hash(v: u64) -> u32 { + (0x27220a95u64.wrapping_mul((v & 0xffff_ffff) ^ 0x56330698ec) >> 32) as u32 +} +fn hash2(v: u64) -> u32 { + (0x330698ecu64.wrapping_mul(v ^ 0x27220a95) >> 32) as u32 +} + const WAYS: usize = 16; -const CACHE_SIZE: usize = 32768; +const CACHE_SIZE: usize = 1 << 16; #[derive(Debug, Copy, Clone)] struct Entry { @@ -176,7 +183,10 @@ impl CacheTable { let offset = entry.offsets[i] as usize; if index - offset < 32768 { let mut length = 0; - while length < 258 && index + length < data.len() && data[index + length] == data[offset + length] { + while length < 258 + && index + length < data.len() + && data[index + length] == data[offset + length] + { length += 1; } @@ -208,13 +218,6 @@ impl CacheTable { } } -fn hash(v: u64) -> u32 { - 0x27220a95u64.wrapping_mul((v & 0xffffff) ^ 0x330698ec) as u32 -} -fn hash2(v: u64) -> u32 { - 0x330698ecu64.wrapping_mul((v & 0xffffffffffff) ^ 0x27220a95) as u32 -} - /// Compressor that produces fdeflate compressed streams. pub struct Compressor { checksum: Adler32, @@ -280,76 +283,95 @@ impl Compressor { pub fn write_data(&mut self, data: &[u8]) -> io::Result<()> { self.checksum.write(data); - let mut matches = CacheTable::new(); - enum Symbol { Literal(u8), - Rle { length: u16 }, - Backref { length: u16, distance: u16, dist_sym: u8 }, + Rle { + length: u16, + }, + Backref { + length: u16, + distance: u16, + dist_sym: u8, + }, } - let mut symbols = Vec::new(); - - let mut i = 0; - while i + 8 < data.len() { - if data[i] == 0 { - let mut run_length = 1; - while run_length < 258 && i + run_length < data.len() && data[i + run_length] == 0 { - run_length += 1; - } - if run_length >= 5 { + + let mut lengths = HUFFMAN_LENGTHS; + let mut dist_lengths = [6u8; 30]; + dist_lengths[0] = 1; + + for n in 0..2 { + let mut matches = CacheTable::new(); + let mut symbols = Vec::new(); + + let mut i = 0; + while i + 8 < data.len() { + let current = u64::from_le_bytes(data[i..][..8].try_into().unwrap()); + + if current & 0xffff_ffff == 0 { + let mut run_length = 4; + while run_length < 258 + && i + run_length < data.len() + && data[i + run_length] == 0 + { + run_length += 1; + } symbols.push(Symbol::Literal(0)); symbols.push(Symbol::Rle { - length: run_length as u16, + length: (run_length - 1) as u16, }); i += run_length; continue; } - } - let current = u64::from_le_bytes(data[i..][..8].try_into().unwrap()); - let current_hash = hash(current); - // let current_hash2 = hash2(current); - let (prev_i, length) = matches.get(data, i, current_hash as u32); - // let (prev_i2, length2) = matches.get(data, i, current_hash2 as u32); - matches.insert(current_hash as u32, i as u32); - // matches.insert(current_hash2 as u32, i as u32); - - // let (prev_i, length) = if length2 > length { - // (prev_i2, length2) - // } else { - // (prev_i, length) - // }; - - if length >= 3 { - let next = u64::from_le_bytes(data[i + 1..][..8].try_into().unwrap()); - let next_length1 = matches.get(data, i + 1, hash(next)).1; - let next_length2 = 0;//matches.get(data, i + 1, hash2(next)).1; - let next_length = next_length1.max(next_length2); - - if length >= next_length && next != 0 { - let sym = LENGTH_TO_SYMBOL[length as usize - 3] as usize; - let len_bits = HUFFMAN_LENGTHS[sym]; - let len_extra = LENGTH_TO_LEN_EXTRA[length as usize - 3]; - - let dist = (i - prev_i as usize) as u16; - let mut dist_sym = 29; - while dist_sym > 0 && dist < DIST_SYM_TO_DIST_BASE[dist_sym as usize - 1] { - dist_sym -= 1; - } - let dist_bits = 6; - let dist_extra = DIST_SYM_TO_DIST_EXTRA[dist_sym as usize]; - - let backref_cost = - len_bits as u32 + len_extra as u32 + dist_bits as u32 + dist_extra as u32; - assert!(backref_cost < 256); - let backref_cost = backref_cost as u8; + let current_hash = hash(current); + let current_hash2 = hash2(current); + let (prev_i, length) = matches.get(data, i, current_hash as u32); + let (prev_i2, length2) = matches.get(data, i, current_hash2 as u32); + matches.insert(current_hash as u32, i as u32); + matches.insert(current_hash2 as u32, i as u32); + + let (prev_i, length) = if length2 > length { + (prev_i2, length2) + } else { + (prev_i, length) + }; + + if length >= 3 { + let next = u64::from_le_bytes(data[i + 1..][..8].try_into().unwrap()); + let next_length1 = matches.get(data, i + 1, hash(next)).1; + let next_length2 = matches.get(data, i + 1, hash2(next)).1; + let next_length = next_length1.max(next_length2); + + if length >= next_length && next != 0 { + let dist = (i - prev_i as usize) as u16; + let mut dist_sym = 29; + while dist_sym > 0 && dist < DIST_SYM_TO_DIST_BASE[dist_sym as usize - 1] { + dist_sym -= 1; + } + + if length <= 8 { + let sym = LENGTH_TO_SYMBOL[length as usize - 3] as usize; + let len_bits = lengths[sym]; + let len_extra = LENGTH_TO_LEN_EXTRA[length as usize - 3]; + let dist_bits = dist_lengths[dist_sym as usize]; + let dist_extra = DIST_SYM_TO_DIST_EXTRA[dist_sym as usize]; + let backref_cost = + (len_bits + len_extra + dist_bits + dist_extra) as u32; + + let mut literal_cost = 0; + for j in i..i + length as usize { + literal_cost += lengths[data[j] as usize] as u32; + if literal_cost >= backref_cost { + break; + } + } + if literal_cost <= backref_cost { + symbols.push(Symbol::Literal(data[i])); + i += 1; + continue; + } + } - let mut literal_cost = 0; - for j in i..i + length as usize{ - literal_cost += HUFFMAN_LENGTHS[data[j] as usize] as u32; - } - - if (backref_cost as u32) < literal_cost { symbols.push(Symbol::Backref { length: length as u16, distance: dist, @@ -359,66 +381,90 @@ impl Compressor { continue; } } - } - symbols.push(Symbol::Literal(data[i])); - i += 1; - } - for i in i..data.len() { - symbols.push(Symbol::Literal(data[i])); - } + symbols.push(Symbol::Literal(data[i])); + i += 1; + } + for i in i..data.len() { + symbols.push(Symbol::Literal(data[i])); + } - let mut frequencies = [0u32; 286]; - let mut dist_frequencies = [0u32; 30]; - for symbol in &symbols { - match symbol { - Symbol::Literal(lit) => frequencies[*lit as usize] += 1, - Symbol::Rle { length } => { - let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; - frequencies[sym] += 1; - dist_frequencies[0] += 1; - } - Symbol::Backref { length, dist_sym, .. } => { - let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; - frequencies[sym] += 1; - dist_frequencies[*dist_sym as usize] += 1; + let mut frequencies = [0u32; 286]; + let mut dist_frequencies = [0u32; 30]; + for symbol in &symbols { + match symbol { + Symbol::Literal(lit) => frequencies[*lit as usize] += 1, + Symbol::Rle { length } => { + let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; + frequencies[sym] += 1; + dist_frequencies[0] += 1; + } + Symbol::Backref { + length, dist_sym, .. + } => { + let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; + frequencies[sym] += 1; + dist_frequencies[*dist_sym as usize] += 1; + } } } - } - let mut lengths = [0u8; 286]; - let mut codes = [0u16; 286]; - build_huffman_tree(&frequencies, &mut lengths, &mut codes, 15); + let mut codes = [0u16; 286]; + build_huffman_tree(&frequencies, &mut lengths, &mut codes, 15); - let mut dist_lengths = [0u8; 30]; - let mut dist_codes = [0u16; 30]; - build_huffman_tree(&dist_frequencies, &mut dist_lengths, &mut dist_codes, 15); + let mut dist_codes = [0u16; 30]; + build_huffman_tree(&dist_frequencies, &mut dist_lengths, &mut dist_codes, 15); - for symbol in &symbols { - match symbol { - Symbol::Literal(lit) => { - let sym = *lit as usize; - self.write_bits(codes[sym] as u64, lengths[sym] as u8)?; - } - Symbol::Rle { length } => { - let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; - self.write_bits(codes[sym] as u64, lengths[sym] as u8)?; - let len_extra = LENGTH_TO_LEN_EXTRA[*length as usize - 3]; - let extra = (((*length as u32) - 3) & BITMASKS[len_extra as usize]) as u64; - self.write_bits(extra, len_extra + 1)?; - self.write_bits(dist_codes[0] as u64, dist_lengths[0])?; - } - Symbol::Backref { length, distance, dist_sym } => { - let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; - self.write_bits(codes[sym] as u64, lengths[sym] as u8)?; - let len_extra = LENGTH_TO_LEN_EXTRA[*length as usize - 3]; - let extra = (((*length as u32) - 3) & BITMASKS[len_extra as usize]) as u64; - self.write_bits(extra, len_extra)?; - - self.write_bits(dist_codes[*dist_sym as usize] as u64, dist_lengths[*dist_sym as usize])?; - let dist_extra = DIST_SYM_TO_DIST_EXTRA[*dist_sym as usize]; - let extra = ((*distance as u32) & BITMASKS[dist_extra as usize]) as u64; - self.write_bits(extra, dist_extra)?; + if n == 0 { + continue; + } + + let rle_three = lengths[257] + dist_lengths[0] > 3 * lengths[0]; + let rle_four = lengths[257] + dist_lengths[0] > 4 * lengths[0]; + + for symbol in &symbols { + match symbol { + Symbol::Literal(lit) => { + let sym = *lit as usize; + self.write_bits(codes[sym] as u64, lengths[sym] as u8)?; + } + Symbol::Rle { length: 3 } if rle_three => { + self.write_bits(codes[0] as u64, lengths[0] as u8)?; + self.write_bits(codes[0] as u64, lengths[0] as u8)?; + self.write_bits(codes[0] as u64, lengths[0] as u8)?; + } + Symbol::Rle { length: 4 } if rle_four => { + self.write_bits(codes[0] as u64, lengths[0] as u8)?; + self.write_bits(codes[0] as u64, lengths[0] as u8)?; + self.write_bits(codes[0] as u64, lengths[0] as u8)?; + } + Symbol::Rle { length } => { + let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; + self.write_bits(codes[sym] as u64, lengths[sym] as u8)?; + let len_extra = LENGTH_TO_LEN_EXTRA[*length as usize - 3]; + let extra = (((*length as u32) - 3) & BITMASKS[len_extra as usize]) as u64; + self.write_bits(extra, len_extra + 1)?; + self.write_bits(dist_codes[0] as u64, dist_lengths[0])?; + } + Symbol::Backref { + length, + distance, + dist_sym, + } => { + let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; + self.write_bits(codes[sym] as u64, lengths[sym] as u8)?; + let len_extra = LENGTH_TO_LEN_EXTRA[*length as usize - 3]; + let extra = (((*length as u32) - 3) & BITMASKS[len_extra as usize]) as u64; + self.write_bits(extra, len_extra)?; + + self.write_bits( + dist_codes[*dist_sym as usize] as u64, + dist_lengths[*dist_sym as usize], + )?; + let dist_extra = DIST_SYM_TO_DIST_EXTRA[*dist_sym as usize]; + let extra = ((*distance as u32) & BITMASKS[dist_extra as usize]) as u64; + self.write_bits(extra, dist_extra)?; + } } } } From d06cd69c2a2e81e1e3d1a7d5449d5c85383a734f Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Sat, 21 Sep 2024 15:13:31 -0700 Subject: [PATCH 4/6] Checkpoint --- src/compress.rs | 166 ++++++++++++++++++++++++++++++------------------ 1 file changed, 105 insertions(+), 61 deletions(-) diff --git a/src/compress.rs b/src/compress.rs index 2ca00b8..0c09dcd 100644 --- a/src/compress.rs +++ b/src/compress.rs @@ -135,6 +135,14 @@ fn build_huffman_tree( true } +fn distance_to_dist_sym(distance: u16) -> u8 { + let mut dist_sym = 29; + while dist_sym > 0 && distance < DIST_SYM_TO_DIST_BASE[dist_sym as usize - 1] { + dist_sym -= 1; + } + dist_sym +} + fn hash(v: u64) -> u32 { (0x27220a95u64.wrapping_mul((v & 0xffff_ffff) ^ 0x56330698ec) >> 32) as u32 } @@ -142,7 +150,7 @@ fn hash2(v: u64) -> u32 { (0x330698ecu64.wrapping_mul(v ^ 0x27220a95) >> 32) as u32 } -const WAYS: usize = 16; +const WAYS: usize = 1; const CACHE_SIZE: usize = 1 << 16; #[derive(Debug, Copy, Clone)] @@ -203,6 +211,16 @@ impl CacheTable { (best_offset, best_length as u16) } + fn contains(&self, hash: u32) -> bool { + let entry = &self.entries[(hash as usize) % CACHE_SIZE]; + for i in 0..WAYS { + if entry.tags[i] == hash { + return true; + } + } + false + } + fn insert(&mut self, hash: u32, offset: u32) { let entry = &mut self.entries[(hash as usize) % CACHE_SIZE]; @@ -295,16 +313,27 @@ impl Compressor { }, } - let mut lengths = HUFFMAN_LENGTHS; - let mut dist_lengths = [6u8; 30]; - dist_lengths[0] = 1; + let mut short_matches = CacheTable::new(); + let mut long_matches = CacheTable::new(); + let mut last_match = 0; + let mut i = 0; - for n in 0..2 { - let mut matches = CacheTable::new(); + while i < data.len() { let mut symbols = Vec::new(); - let mut i = 0; - while i + 8 < data.len() { + let block_end = data.len().min(i + 64 * 1024 * 1024); + + let mut lengths = HUFFMAN_LENGTHS; + let mut dist_lengths = [6u8; 30]; + dist_lengths[0] = 1; + + // for len in lengths.iter_mut().chain(dist_lengths.iter_mut()) { + // if *len == 0 { + // *len = 15; + // } + // } + + while i < block_end && i + 8 < data.len() { let current = u64::from_le_bytes(data[i..][..8].try_into().unwrap()); if current & 0xffff_ffff == 0 { @@ -323,74 +352,92 @@ impl Compressor { continue; } - let current_hash = hash(current); - let current_hash2 = hash2(current); - let (prev_i, length) = matches.get(data, i, current_hash as u32); - let (prev_i2, length2) = matches.get(data, i, current_hash2 as u32); - matches.insert(current_hash as u32, i as u32); - matches.insert(current_hash2 as u32, i as u32); + // Long hash + let long_hash = hash2(current); + let (prev_i, length) = long_matches.get(data, i, long_hash as u32); + if length >= 8 { + for j in (i + length as usize - 8)..(i + length as usize).min(data.len() - 8) { + let v = u64::from_le_bytes(data[j..][..8].try_into().unwrap()); + short_matches.insert(hash(v), j as u32); + long_matches.insert(hash2(v), j as u32); + } - let (prev_i, length) = if length2 > length { - (prev_i2, length2) - } else { - (prev_i, length) - }; + let dist = (i - prev_i as usize) as u16; + let dist_sym = distance_to_dist_sym(dist); + symbols.push(Symbol::Backref { + length: length as u16, + distance: dist, + dist_sym, + }); + i += length as usize; + last_match = i; + continue; + } + + // Short hash + let short_hash = hash(current); + let (prev_i, length) = short_matches.get(data, i, short_hash as u32); + short_matches.insert(short_hash as u32, i as u32); + long_matches.insert(long_hash as u32, i as u32); if length >= 3 { let next = u64::from_le_bytes(data[i + 1..][..8].try_into().unwrap()); - let next_length1 = matches.get(data, i + 1, hash(next)).1; - let next_length2 = matches.get(data, i + 1, hash2(next)).1; - let next_length = next_length1.max(next_length2); - - if length >= next_length && next != 0 { + if next != 0 && !long_matches.contains(hash2(next)) { let dist = (i - prev_i as usize) as u16; - let mut dist_sym = 29; - while dist_sym > 0 && dist < DIST_SYM_TO_DIST_BASE[dist_sym as usize - 1] { - dist_sym -= 1; - } - - if length <= 8 { - let sym = LENGTH_TO_SYMBOL[length as usize - 3] as usize; - let len_bits = lengths[sym]; - let len_extra = LENGTH_TO_LEN_EXTRA[length as usize - 3]; - let dist_bits = dist_lengths[dist_sym as usize]; - let dist_extra = DIST_SYM_TO_DIST_EXTRA[dist_sym as usize]; - let backref_cost = - (len_bits + len_extra + dist_bits + dist_extra) as u32; - - let mut literal_cost = 0; - for j in i..i + length as usize { - literal_cost += lengths[data[j] as usize] as u32; - if literal_cost >= backref_cost { - break; - } - } - if literal_cost <= backref_cost { - symbols.push(Symbol::Literal(data[i])); - i += 1; - continue; + let dist_sym = distance_to_dist_sym(dist); + + let sym = LENGTH_TO_SYMBOL[length as usize - 3] as usize; + let len_bits = lengths[sym]; + let len_extra = LENGTH_TO_LEN_EXTRA[length as usize - 3]; + let dist_bits = dist_lengths[dist_sym as usize]; + let dist_extra = DIST_SYM_TO_DIST_EXTRA[dist_sym as usize]; + let backref_cost = (len_bits + len_extra + dist_bits + dist_extra) as u32; + + let mut literal_cost = 0; + for j in i..i + length as usize { + literal_cost += lengths[data[j] as usize] as u32; + if literal_cost >= backref_cost { + break; } } + if literal_cost > backref_cost { + symbols.push(Symbol::Backref { + length: length as u16, + distance: dist, + dist_sym, + }); + + for j in (i + 1).min(i + length as usize).saturating_sub(8) + ..(i + length as usize).min(data.len() - 8) + { + let v = u64::from_le_bytes(data[j..][..8].try_into().unwrap()); + short_matches.insert(hash(v), j as u32); + long_matches.insert(hash2(v), j as u32); + } - symbols.push(Symbol::Backref { - length: length as u16, - distance: dist, - dist_sym, - }); - i += length as usize; - continue; + i += length as usize; + last_match = i; + continue; + } } } + for _ in 0..((i - last_match) >> 9).min((data.len() - i).saturating_sub(8)) { + symbols.push(Symbol::Literal(data[i])); + i += 1; + } + symbols.push(Symbol::Literal(data[i])); i += 1; } - for i in i..data.len() { + for i in i..block_end { symbols.push(Symbol::Literal(data[i])); } + i = block_end; let mut frequencies = [0u32; 286]; let mut dist_frequencies = [0u32; 30]; + // frequencies[256] = 1; for symbol in &symbols { match symbol { Symbol::Literal(lit) => frequencies[*lit as usize] += 1, @@ -415,10 +462,6 @@ impl Compressor { let mut dist_codes = [0u16; 30]; build_huffman_tree(&dist_frequencies, &mut dist_lengths, &mut dist_codes, 15); - if n == 0 { - continue; - } - let rle_three = lengths[257] + dist_lengths[0] > 3 * lengths[0]; let rle_four = lengths[257] + dist_lengths[0] > 4 * lengths[0]; @@ -467,6 +510,7 @@ impl Compressor { } } } + self.write_bits(codes[256] as u64, lengths[256])?; } Ok(()) From 17d3d58b9ba827f54cf664c6da8bb39055778f8a Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Sat, 21 Sep 2024 15:29:42 -0700 Subject: [PATCH 5/6] Fixes --- src/compress.rs | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/compress.rs b/src/compress.rs index 0c09dcd..14088d0 100644 --- a/src/compress.rs +++ b/src/compress.rs @@ -151,7 +151,7 @@ fn hash2(v: u64) -> u32 { } const WAYS: usize = 1; -const CACHE_SIZE: usize = 1 << 16; +const CACHE_SIZE: usize = 1 << 19; #[derive(Debug, Copy, Clone)] struct Entry { @@ -315,30 +315,31 @@ impl Compressor { let mut short_matches = CacheTable::new(); let mut long_matches = CacheTable::new(); - let mut last_match = 0; let mut i = 0; + let mut lengths = HUFFMAN_LENGTHS; + let mut dist_lengths = [6u8; 30]; + dist_lengths[0] = 1; + while i < data.len() { let mut symbols = Vec::new(); - let block_end = data.len().min(i + 64 * 1024 * 1024); - - let mut lengths = HUFFMAN_LENGTHS; - let mut dist_lengths = [6u8; 30]; - dist_lengths[0] = 1; + let block_end = data.len().min(i + 128 * 1024); - // for len in lengths.iter_mut().chain(dist_lengths.iter_mut()) { - // if *len == 0 { - // *len = 15; - // } - // } + for len in &mut lengths { + if *len == 0 { *len = 15; } + } + for len in &mut dist_lengths { + if *len == 0 { *len = 6; } + } + let mut last_match = i; while i < block_end && i + 8 < data.len() { let current = u64::from_le_bytes(data[i..][..8].try_into().unwrap()); if current & 0xffff_ffff == 0 { let mut run_length = 4; - while run_length < 258 + while run_length <= 258 && i + run_length < data.len() && data[i + run_length] == 0 { @@ -433,11 +434,11 @@ impl Compressor { for i in i..block_end { symbols.push(Symbol::Literal(data[i])); } - i = block_end; + i = i.max(block_end); let mut frequencies = [0u32; 286]; let mut dist_frequencies = [0u32; 30]; - // frequencies[256] = 1; + frequencies[256] = 1; for symbol in &symbols { match symbol { Symbol::Literal(lit) => frequencies[*lit as usize] += 1, From 484eaf5ee88fd011de86d5cd4fa654d80c605364 Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Sun, 22 Sep 2024 00:07:55 -0700 Subject: [PATCH 6/6] Actually produce valid output --- src/compress.rs | 223 +++++++++++++++++++++++++--------------------- src/decompress.rs | 2 + 2 files changed, 123 insertions(+), 102 deletions(-) diff --git a/src/compress.rs b/src/compress.rs index 14088d0..2a5322f 100644 --- a/src/compress.rs +++ b/src/compress.rs @@ -5,8 +5,8 @@ use std::{ }; use crate::tables::{ - BITMASKS, DIST_SYM_TO_DIST_BASE, DIST_SYM_TO_DIST_EXTRA, HUFFMAN_CODES, HUFFMAN_LENGTHS, - LENGTH_TO_LEN_EXTRA, LENGTH_TO_SYMBOL, + BITMASKS, CLCL_ORDER, DIST_SYM_TO_DIST_BASE, DIST_SYM_TO_DIST_EXTRA, + HUFFMAN_LENGTHS, LENGTH_TO_LEN_EXTRA, LENGTH_TO_SYMBOL, }; fn build_huffman_tree( @@ -21,6 +21,9 @@ fn build_huffman_tree( if frequencies.iter().filter(|&&f| f > 0).count() <= 1 { lengths.fill(0); codes.fill(0); + if let Some(i) = frequencies.iter().position(|&f| f > 0) { + lengths[i] = 1; + } return false; } @@ -136,8 +139,13 @@ fn build_huffman_tree( } fn distance_to_dist_sym(distance: u16) -> u8 { + const LOOKUP: [u8; 16] = [0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7]; + if distance <= 16 { + return LOOKUP[distance as usize - 1]; + } + let mut dist_sym = 29; - while dist_sym > 0 && distance < DIST_SYM_TO_DIST_BASE[dist_sym as usize - 1] { + while dist_sym > 0 && distance < DIST_SYM_TO_DIST_BASE[dist_sym as usize] { dist_sym -= 1; } dist_sym @@ -242,6 +250,7 @@ pub struct Compressor { buffer: u64, nbits: u8, writer: W, + pending: Vec, } impl Compressor { fn write_bits(&mut self, bits: u64, nbits: u8) -> io::Result<()> { @@ -280,6 +289,7 @@ impl Compressor { buffer: 0, nbits: 0, writer, + pending: Vec::new(), }; compressor.write_headers()?; Ok(compressor) @@ -291,8 +301,8 @@ impl Compressor { 114, 75, 99, 174, 109, 219, 182, 109, 219, 182, 109, 219, 182, 109, 105, 140, 158, 150, 74, 175, 158, 50, 51, 34, 238, 249, 118, 183, 106, 122, 166, 135, 59, 107, 213, 15, ]; - self.writer.write_all(&HEADER[..53]).unwrap(); - self.write_bits(HEADER[53] as u64, 5)?; + self.writer.write_all(&HEADER[..2]).unwrap(); + //self.write_bits(HEADER[53] as u64, 5)?; Ok(()) } @@ -300,12 +310,16 @@ impl Compressor { /// Write data to the compressor. pub fn write_data(&mut self, data: &[u8]) -> io::Result<()> { self.checksum.write(data); + self.pending.extend_from_slice(data); + Ok(()) + } + + /// Write the remainder of the stream and return the inner writer. + pub fn finish(mut self) -> io::Result { + let data = std::mem::take(&mut self.pending); enum Symbol { Literal(u8), - Rle { - length: u16, - }, Backref { length: u16, distance: u16, @@ -318,8 +332,8 @@ impl Compressor { let mut i = 0; let mut lengths = HUFFMAN_LENGTHS; - let mut dist_lengths = [6u8; 30]; - dist_lengths[0] = 1; + let mut dist_lengths = [5u8; 30]; + //dist_lengths[0] = 1; while i < data.len() { let mut symbols = Vec::new(); @@ -327,37 +341,25 @@ impl Compressor { let block_end = data.len().min(i + 128 * 1024); for len in &mut lengths { - if *len == 0 { *len = 15; } + if *len == 0 { + *len = 15; + } } for len in &mut dist_lengths { - if *len == 0 { *len = 6; } + if *len == 0 { + *len = 5; + } } let mut last_match = i; - while i < block_end && i + 8 < data.len() { + while i < block_end && i + 8 <= data.len() { let current = u64::from_le_bytes(data[i..][..8].try_into().unwrap()); - if current & 0xffff_ffff == 0 { - let mut run_length = 4; - while run_length <= 258 - && i + run_length < data.len() - && data[i + run_length] == 0 - { - run_length += 1; - } - symbols.push(Symbol::Literal(0)); - symbols.push(Symbol::Rle { - length: (run_length - 1) as u16, - }); - i += run_length; - continue; - } - // Long hash let long_hash = hash2(current); - let (prev_i, length) = long_matches.get(data, i, long_hash as u32); + let (prev_i, length) = long_matches.get(&data, i, long_hash as u32); if length >= 8 { - for j in (i + length as usize - 8)..(i + length as usize).min(data.len() - 8) { + for j in i..(i + length as usize).min(data.len() - 8) { let v = u64::from_le_bytes(data[j..][..8].try_into().unwrap()); short_matches.insert(hash(v), j as u32); long_matches.insert(hash2(v), j as u32); @@ -378,48 +380,43 @@ impl Compressor { // Short hash let short_hash = hash(current); - let (prev_i, length) = short_matches.get(data, i, short_hash as u32); + let (prev_i, length) = short_matches.get(&data, i, short_hash as u32); short_matches.insert(short_hash as u32, i as u32); long_matches.insert(long_hash as u32, i as u32); if length >= 3 { - let next = u64::from_le_bytes(data[i + 1..][..8].try_into().unwrap()); - if next != 0 && !long_matches.contains(hash2(next)) { - let dist = (i - prev_i as usize) as u16; - let dist_sym = distance_to_dist_sym(dist); - - let sym = LENGTH_TO_SYMBOL[length as usize - 3] as usize; - let len_bits = lengths[sym]; - let len_extra = LENGTH_TO_LEN_EXTRA[length as usize - 3]; - let dist_bits = dist_lengths[dist_sym as usize]; - let dist_extra = DIST_SYM_TO_DIST_EXTRA[dist_sym as usize]; - let backref_cost = (len_bits + len_extra + dist_bits + dist_extra) as u32; - - let mut literal_cost = 0; - for j in i..i + length as usize { - literal_cost += lengths[data[j] as usize] as u32; - if literal_cost >= backref_cost { - break; - } + let dist = (i - prev_i as usize) as u16; + let dist_sym = distance_to_dist_sym(dist); + + let sym = LENGTH_TO_SYMBOL[length as usize - 3] as usize; + let len_bits = lengths[sym]; + let len_extra = LENGTH_TO_LEN_EXTRA[length as usize - 3]; + let dist_bits = dist_lengths[dist_sym as usize]; + let dist_extra = DIST_SYM_TO_DIST_EXTRA[dist_sym as usize]; + let backref_cost = (len_bits + len_extra + dist_bits + dist_extra) as u32; + + let mut literal_cost = 0; + for j in i..i + length as usize { + literal_cost += lengths[data[j] as usize] as u32; + if literal_cost >= backref_cost { + break; } - if literal_cost > backref_cost { - symbols.push(Symbol::Backref { - length: length as u16, - distance: dist, - dist_sym, - }); - - for j in (i + 1).min(i + length as usize).saturating_sub(8) - ..(i + length as usize).min(data.len() - 8) - { - let v = u64::from_le_bytes(data[j..][..8].try_into().unwrap()); - short_matches.insert(hash(v), j as u32); - long_matches.insert(hash2(v), j as u32); - } - - i += length as usize; - last_match = i; - continue; + } + if literal_cost > backref_cost { + symbols.push(Symbol::Backref { + length: length as u16, + distance: dist, + dist_sym, + }); + + for j in (i + 1)..(i + length as usize).min(data.len() - 8) { + let v = u64::from_le_bytes(data[j..][..8].try_into().unwrap()); + short_matches.insert(hash(v), j as u32); + long_matches.insert(hash2(v), j as u32); } + + i += length as usize; + last_match = i; + continue; } } @@ -442,11 +439,6 @@ impl Compressor { for symbol in &symbols { match symbol { Symbol::Literal(lit) => frequencies[*lit as usize] += 1, - Symbol::Rle { length } => { - let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; - frequencies[sym] += 1; - dist_frequencies[0] += 1; - } Symbol::Backref { length, dist_sym, .. } => { @@ -463,8 +455,41 @@ impl Compressor { let mut dist_codes = [0u16; 30]; build_huffman_tree(&dist_frequencies, &mut dist_lengths, &mut dist_codes, 15); - let rle_three = lengths[257] + dist_lengths[0] > 3 * lengths[0]; - let rle_four = lengths[257] + dist_lengths[0] > 4 * lengths[0]; + if i == data.len() { + self.write_bits(101, 3)?; // final block + } else { + self.write_bits(100, 3)?; // non-final block + } + self.write_bits(29, 5)?; // hlit + self.write_bits(29, 5)?; // hdist + self.write_bits(15, 4)?; // hclen + + let mut code_length_frequencies = [0u32; 19]; + for &length in &lengths { + code_length_frequencies[length as usize] += 1; + } + for &length in &dist_lengths { + code_length_frequencies[length as usize] += 1; + } + let mut code_length_lengths = [0u8; 19]; + let mut code_length_codes = [0u16; 19]; + build_huffman_tree( + &code_length_frequencies, + &mut code_length_lengths, + &mut code_length_codes, + 7, + ); + + for j in 0..19 { + self.write_bits(code_length_lengths[CLCL_ORDER[j]] as u64, 3)?; + } + + for &length in lengths.iter().chain(&dist_lengths) { + self.write_bits( + code_length_codes[length as usize] as u64, + code_length_lengths[length as usize], + )?; + } for symbol in &symbols { match symbol { @@ -472,24 +497,6 @@ impl Compressor { let sym = *lit as usize; self.write_bits(codes[sym] as u64, lengths[sym] as u8)?; } - Symbol::Rle { length: 3 } if rle_three => { - self.write_bits(codes[0] as u64, lengths[0] as u8)?; - self.write_bits(codes[0] as u64, lengths[0] as u8)?; - self.write_bits(codes[0] as u64, lengths[0] as u8)?; - } - Symbol::Rle { length: 4 } if rle_four => { - self.write_bits(codes[0] as u64, lengths[0] as u8)?; - self.write_bits(codes[0] as u64, lengths[0] as u8)?; - self.write_bits(codes[0] as u64, lengths[0] as u8)?; - } - Symbol::Rle { length } => { - let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; - self.write_bits(codes[sym] as u64, lengths[sym] as u8)?; - let len_extra = LENGTH_TO_LEN_EXTRA[*length as usize - 3]; - let extra = (((*length as u32) - 3) & BITMASKS[len_extra as usize]) as u64; - self.write_bits(extra, len_extra + 1)?; - self.write_bits(dist_codes[0] as u64, dist_lengths[0])?; - } Symbol::Backref { length, distance, @@ -506,21 +513,16 @@ impl Compressor { dist_lengths[*dist_sym as usize], )?; let dist_extra = DIST_SYM_TO_DIST_EXTRA[*dist_sym as usize]; - let extra = ((*distance as u32) & BITMASKS[dist_extra as usize]) as u64; - self.write_bits(extra, dist_extra)?; + let extra = *distance - DIST_SYM_TO_DIST_BASE[*dist_sym as usize]; + + self.write_bits(extra as u64, dist_extra)?; } } } self.write_bits(codes[256] as u64, lengths[256])?; } - Ok(()) - } - - /// Write the remainder of the stream and return the inner writer. - pub fn finish(mut self) -> io::Result { // Write end of block - self.write_bits(HUFFMAN_CODES[256] as u64, HUFFMAN_LENGTHS[256])?; self.flush()?; // Write Adler32 checksum @@ -620,12 +622,29 @@ pub fn compress_to_vec(input: &[u8]) -> Vec { #[cfg(test)] mod tests { + use crate::decompress; + use super::*; use rand::Rng; + #[test] + fn test_distance_to_dist_sym() { + assert_eq!(distance_to_dist_sym(1), 0); + assert_eq!(distance_to_dist_sym(2), 1); + assert_eq!(distance_to_dist_sym(3), 2); + assert_eq!(distance_to_dist_sym(4), 3); + assert_eq!(distance_to_dist_sym(5), 4); + assert_eq!(distance_to_dist_sym(7), 5); + assert_eq!(distance_to_dist_sym(9), 6); + assert_eq!(distance_to_dist_sym(13), 7); + assert_eq!(distance_to_dist_sym(18), 8); + assert_eq!(distance_to_dist_sym(257), 16); + } + fn roundtrip(data: &[u8]) { let compressed = compress_to_vec(data); - let decompressed = miniz_oxide::inflate::decompress_to_vec_zlib(&compressed).unwrap(); + //let decompressed = miniz_oxide::inflate::decompress_to_vec_zlib(&compressed).unwrap(); + let decompressed = crate::decompress_to_vec(&compressed).unwrap(); assert_eq!(&decompressed, data); } diff --git a/src/decompress.rs b/src/decompress.rs index f89747e..3e3da3b 100644 --- a/src/decompress.rs +++ b/src/decompress.rs @@ -293,6 +293,7 @@ impl Decompressor { self.fill_buffer(remaining_input); } } + let code_length_codes: [u16; 19] = crate::compute_codes(&code_length_lengths) .ok_or(DecompressionError::BadCodeLengthHuffmanTree)?; @@ -1251,6 +1252,7 @@ mod tests { } #[test] + #[ignore] fn zero_length() { let mut compressed = crate::compress_to_vec(b"").to_vec();