diff --git a/.github/workflows/kani.yml b/.github/workflows/kani.yml new file mode 100644 index 0000000..6274944 --- /dev/null +++ b/.github/workflows/kani.yml @@ -0,0 +1,22 @@ +name: Kani + +on: + push: + branches: ["develop"] + pull_request: {} + workflow_dispatch: {} + +permissions: + actions: read + contents: read + +jobs: + kani: + name: "kani" + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + - name: Run Kani verification + uses: model-checking/kani-github-action@v1.1 + with: + args: --output-format terse diff --git a/Cargo.toml b/Cargo.toml index 950d3f7..bbbd414 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ name = "fsst" [lints.rust] warnings = "deny" missing_docs = "deny" +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)'] } [lints.clippy] all = { level = "deny", priority = -1 } diff --git a/src/builder.rs b/src/builder.rs index 31cace5..e49230e 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -986,3 +986,40 @@ mod test { ); } } + +#[cfg(kani)] +mod kani_proofs { + use super::*; + + /// Tier 2: Verify bitmap set/is_set consistency. + /// + /// After calling `set(index)` on a bitmap, `is_set(index)` must return true. + /// This proof verifies the bit manipulation in the bitmap implementation + /// correctly tracks which indices have been set. + #[kani::proof] + fn proof_bitmap_set_is_set() { + let index: usize = kani::any(); + kani::assume(index <= FSST_CODE_MASK as usize); + + let mut bitmap = CodesBitmap::default(); + bitmap.set(index); + assert!(bitmap.is_set(index)); + } + + /// Tier 2: Verify bitmap index calculation stays in bounds. + /// + /// The bitmap uses `index >> 6` to select which u64 in the array to modify, + /// and `index % 64` to select which bit. This proof verifies that for any + /// valid index (0..=511), the array access is always in bounds. + #[kani::proof] + fn proof_bitmap_index_bounds() { + let index: usize = kani::any(); + kani::assume(index <= FSST_CODE_MASK as usize); + + let map = index >> 6; + assert!(map < 8); + + let bit = index % 64; + assert!(bit < 64); + } +} diff --git a/src/lib.rs b/src/lib.rs index 8b6d219..2c76b60 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -905,7 +905,7 @@ pub(crate) fn advance_8byte_word(word: u64, bytes: usize) -> u64 { fn validate_symbol_order(symbol_lens: &[u8]) { // Ensure that the symbol table is ordered by length, 23456781 let mut expected = 2; - for (idx, &len) in symbol_lens.iter().enumerate() { + for (_idx, &len) in symbol_lens.iter().enumerate() { if expected == 1 { assert_eq!( len, 1, @@ -1007,3 +1007,111 @@ mod test { assert_eq!(built_symbols, symbols); } } + +#[cfg(kani)] +mod kani_proofs { + use super::*; + + /// Tier 1: Verify escape code detection bit manipulation correctness. + /// + /// The escape mask formula detects bytes equal to 0xFF (ESCAPE_CODE) in an 8-byte word. + /// This proof verifies that for any input word, the high bit in each byte position + /// of the escape_mask is set if and only if that byte equals ESCAPE_CODE (0xFF). + #[kani::proof] + fn proof_escape_mask_correctness() { + let next_block: u64 = kani::any(); + let escape_mask = (next_block & 0x8080808080808080) + & ((((!next_block) & 0x7F7F7F7F7F7F7F7F) + 0x7F7F7F7F7F7F7F7F) ^ 0x8080808080808080); + + // Verify: high bit set in each byte position iff that byte == 0xFF + for i in 0..8u32 { + let byte = ((next_block >> (i * 8)) & 0xFF) as u8; + let mask_bit = (escape_mask >> (i * 8 + 7)) & 1; + assert!((byte == ESCAPE_CODE) == (mask_bit == 1)); + } + } + + /// Tier 1: Verify symbol length calculation bounds. + /// + /// Symbol::len() must always return a value in [1, 8] for any possible u64 representation. + /// The special case of all-zeros (0x0000000000000000) should return 1, not 0. + #[kani::proof] + fn proof_symbol_len_bounds() { + let value: u64 = kani::any(); + let len = Symbol(value).len(); + assert!(len >= 1 && len <= 8); + } + + /// Tier 1: Verify symbol concatenation produces valid results. + /// + /// When two symbols are concatenated, the resulting symbol must have a valid length + /// (between the length of the first symbol and 8). + #[kani::proof] + fn proof_symbol_concat_valid() { + let a: u64 = kani::any(); + let b: u64 = kani::any(); + let sym_a = Symbol(a); + let sym_b = Symbol(b); + kani::assume(sym_a.len() + sym_b.len() <= 8); + + let result = sym_a.concat(sym_b); + assert!(result.len() >= sym_a.len()); + assert!(result.len() <= 8); + } + + /// Tier 1: Verify word advancement has no undefined behavior from oversized shifts. + /// + /// Shifting a u64 by 64 or more bits is undefined behavior in Rust/LLVM. + /// This proof verifies that advance_8byte_word correctly handles the edge case + /// where bytes == 8, which would otherwise require a 64-bit shift. + #[kani::proof] + fn proof_advance_word_no_ub() { + let word: u64 = kani::any(); + let bytes: usize = kani::any(); + kani::assume(bytes <= 8); + + let result = advance_8byte_word(word, bytes); + if bytes == 8 { + assert!(result == 0); + } + } + + /// Tier 2: Verify code bit-packing produces correct field extraction. + /// + /// The Code type packs a code value (8 bits), an "is symbol" flag (1 bit), + /// and a length (4 bits) into a u16. This proof verifies that these fields + /// can be correctly extracted after construction. + #[kani::proof] + fn proof_code_field_extraction() { + let code_val: u8 = kani::any(); + let len: usize = kani::any(); + kani::assume(len >= 1 && len <= 8); + kani::assume(code_val < 255); // Code 255 is reserved for ESCAPE_CODE + + let code = Code::new_symbol(code_val, len); + assert!(code.code() == code_val); + assert!(code.len() == len as u16); + } + + /// Tier 3: Bounded round-trip verification for small inputs. + /// + /// This proof verifies that for an empty compressor (no trained symbols), + /// compression followed by decompression produces the original input. + /// With an empty symbol table, all bytes are escaped, making this a + /// tractable verification target. + #[kani::proof] + #[kani::unwind(17)] + fn proof_roundtrip_empty_compressor() { + let input: [u8; 4] = kani::any(); + let compressor = CompressorBuilder::new().build(); + let decompressor = compressor.decompressor(); + + let compressed = compressor.compress(&input); + let decompressed = decompressor.decompress(&compressed); + + assert!(decompressed.len() == input.len()); + for i in 0..input.len() { + assert!(decompressed[i] == input[i]); + } + } +} diff --git a/src/lossy_pht.rs b/src/lossy_pht.rs index 83a3203..bdad3c6 100644 --- a/src/lossy_pht.rs +++ b/src/lossy_pht.rs @@ -126,3 +126,21 @@ impl Default for LossyPHT { Self::new() } } + +#[cfg(kani)] +mod kani_proofs { + use super::*; + + /// Tier 2: Verify hash slot calculation always produces in-bounds index. + /// + /// The hash table lookup uses `fsst_hash(prefix_3bytes) & (HASH_TABLE_SIZE - 1)` + /// to compute the slot index. This proof verifies that for any input word, + /// the computed slot is always less than HASH_TABLE_SIZE. + #[kani::proof] + fn proof_hash_slot_bounds() { + let word: u64 = kani::any(); + let prefix_3bytes = word & 0xFF_FF_FF; + let slot = fsst_hash(prefix_3bytes) as usize & (HASH_TABLE_SIZE - 1); + assert!(slot < HASH_TABLE_SIZE); + } +}