From c9fe47e4ff7859e8c52b10cdc7fd972cb2e5fc36 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Thu, 6 Mar 2025 20:55:21 +0100 Subject: [PATCH 01/11] perf: replace partition_point in block index and disjoint level --- src/level_manifest/level.rs | 23 ++++++++++++++++++----- src/segment/block_index/mod.rs | 23 +++++++++++++++++++---- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/src/level_manifest/level.rs b/src/level_manifest/level.rs index e3203333..f29881cf 100644 --- a/src/level_manifest/level.rs +++ b/src/level_manifest/level.rs @@ -176,15 +176,28 @@ impl<'a> DisjointLevel<'a> { /// Returns the segment that possibly contains the key. pub fn get_segment_containing_key(&self, key: &[u8]) -> Option { let level = &self.0; + let segments = &level.segments; - let idx = level - .segments - .partition_point(|x| &*x.metadata.key_range.1 < key); + // NOTE: PERF: For some reason, hand-rolling a binary search is + // faster than using slice::partition_point + let mut left = 0; + let mut right = segments.len(); + + while left < right { + let mid = (left + right) / 2; + let segment = segments.get(mid).expect("should exist"); + + if segment.metadata.key_range.max() < &key { + left = mid + 1; + } else { + right = mid; + } + } level .segments - .get(idx) - .filter(|x| x.is_key_in_key_range(key)) + .get(left) + .filter(|x| x.metadata.key_range.min() <= &key) .cloned() } diff --git a/src/segment/block_index/mod.rs b/src/segment/block_index/mod.rs index a539fcc5..a6247471 100644 --- a/src/segment/block_index/mod.rs +++ b/src/segment/block_index/mod.rs @@ -44,8 +44,23 @@ impl KeyedBlockIndex for [KeyedBlockHandle] { key: &[u8], _: CachePolicy, ) -> crate::Result> { - let idx = self.partition_point(|x| &*x.end_key < key); - Ok(self.get(idx)) + // NOTE: PERF: For some reason, hand-rolling a binary search is + // faster than using slice::partition_point + let mut left = 0; + let mut right = self.len(); + + while left < right { + let mid = (left + right) / 2; + let item = self.get(mid).expect("should exist"); + + if item.end_key < key { + left = mid + 1; + } else { + right = mid; + } + } + + Ok(self.get(left)) } fn get_last_block_containing_key( @@ -129,10 +144,10 @@ pub enum BlockIndexImpl { #[allow(clippy::expect_used)] mod tests { use super::*; - use crate::{segment::value_block::BlockOffset, Slice}; + use crate::{segment::value_block::BlockOffset, UserKey}; use test_log::test; - fn bh>(end_key: K, offset: BlockOffset) -> KeyedBlockHandle { + fn bh>(end_key: K, offset: BlockOffset) -> KeyedBlockHandle { KeyedBlockHandle { end_key: end_key.into(), offset, From f1e460effcbb7f5911755680556dbfa4b5578e1d Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Thu, 6 Mar 2025 21:20:07 +0100 Subject: [PATCH 02/11] comment --- src/segment/value_block_consumer.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/segment/value_block_consumer.rs b/src/segment/value_block_consumer.rs index 0e399d28..eb167a06 100644 --- a/src/segment/value_block_consumer.rs +++ b/src/segment/value_block_consumer.rs @@ -18,6 +18,7 @@ impl ValueBlockConsumer { Self::with_bounds(inner, None, None) } + // TODO: PERF: benchmark replacing partition_point #[must_use] pub fn with_bounds( inner: Arc, From 0466df888cdc020a2a70dcc0a37c9f2f48b70178 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Sat, 8 Mar 2025 00:55:16 +0100 Subject: [PATCH 03/11] benchmark custom partition_point --- Cargo.toml | 6 +++ benches/partition_point.rs | 21 +++++++++ src/binary_search.rs | 93 ++++++++++++++++++++++++++++++++++++++ src/lib.rs | 3 ++ 4 files changed, 123 insertions(+) create mode 100644 benches/partition_point.rs create mode 100644 src/binary_search.rs diff --git a/Cargo.toml b/Cargo.toml index 921cf71a..a9a64e6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -98,3 +98,9 @@ name = "fd_table" harness = false path = "benches/fd_table.rs" required-features = [] + +[[bench]] +name = "partition_point" +harness = false +path = "benches/partition_point.rs" +required-features = [] diff --git a/benches/partition_point.rs b/benches/partition_point.rs new file mode 100644 index 00000000..58d55bf4 --- /dev/null +++ b/benches/partition_point.rs @@ -0,0 +1,21 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use lsm_tree::binary_search::partition_point; + +fn bench_partition_point(c: &mut Criterion) { + let mut group = c.benchmark_group("partition_point"); + + for item_count in [10, 100, 1_000, 10_000, 100_000, 1_000_000] { + let items = (0..item_count).collect::>(); + + group.bench_function(format!("native {item_count}"), |b| { + b.iter(|| items.partition_point(|&x| x <= 5_000)) + }); + + group.bench_function(format!("rewrite {item_count}"), |b| { + b.iter(|| partition_point(&items, |&x| x <= 5_000)) + }); + } +} + +criterion_group!(benches, bench_partition_point); +criterion_main!(benches); diff --git a/src/binary_search.rs b/src/binary_search.rs new file mode 100644 index 00000000..05f67493 --- /dev/null +++ b/src/binary_search.rs @@ -0,0 +1,93 @@ +// Copyright (c) 2024-present, fjall-rs +// This source code is licensed under both the Apache 2.0 and MIT License +// (found in the LICENSE-* files in the repository) + +// NOTE: PERF: For some reason, hand-rolling a binary search is +// faster than using slice::partition_point + +/// Returns the index of the partition point according to the given predicate +/// (the index of the first element of the second partition). +/// +/// Faster alternative to [`slice::partition_point`] (according to benchmarks). +pub fn partition_point(slice: &[T], pred: F) -> usize +where + F: Fn(&T) -> bool, +{ + let mut left = 0; + let mut right = slice.len(); + + if right == 0 { + return 0; + } + + while left < right { + let mid = (left + right) / 2; + + // TODO: PERF: could use get_unchecked for perf... but unsafe + let item = slice.get(mid).expect("should exist"); + + if pred(item) { + left = mid + 1; + } else { + right = mid; + } + } + + left +} + +#[cfg(test)] +mod tests { + use super::partition_point; + use test_log::test; + + #[test] + fn binary_search_first() { + let items = [1, 2, 3, 4, 5]; + let idx = partition_point(&items, |&x| x < 1); + assert_eq!(0, idx); + + let pp_idx = items.partition_point(|&x| x < 1); + assert_eq!(pp_idx, idx); + } + + #[test] + fn binary_search_last() { + let items = [1, 2, 3, 4, 5]; + let idx = partition_point(&items, |&x| x < 5); + assert_eq!(4, idx); + + let pp_idx = items.partition_point(|&x| x < 5); + assert_eq!(pp_idx, idx); + } + + #[test] + fn binary_search_middle() { + let items = [1, 2, 3, 4, 5]; + let idx = partition_point(&items, |&x| x < 3); + assert_eq!(2, idx); + + let pp_idx = items.partition_point(|&x| x < 3); + assert_eq!(pp_idx, idx); + } + + #[test] + fn binary_search_none() { + let items = [1, 2, 3, 4, 5]; + let idx = partition_point(&items, |&x| x < 10); + assert_eq!(5, idx); + + let pp_idx = items.partition_point(|&x| x < 10); + assert_eq!(pp_idx, idx); + } + + #[test] + fn binary_search_empty() { + let items: [i32; 0] = []; + let idx = partition_point(&items, |&x| x < 10); + assert_eq!(0, idx); + + let pp_idx = items.partition_point(|&x| x < 10); + assert_eq!(pp_idx, idx); + } +} diff --git a/src/lib.rs b/src/lib.rs index 3a7920c5..6942c9bd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -124,6 +124,9 @@ mod any_tree; mod r#abstract; +#[doc(hidden)] +pub mod binary_search; + #[doc(hidden)] pub mod blob_tree; From ffeeea569f347a85c3eac933c93efee6f0d62de7 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Sat, 8 Mar 2025 00:55:26 +0100 Subject: [PATCH 04/11] perf: replace partition_point everywhere --- src/level_manifest/level.rs | 33 +++++++++++------------------ src/segment/block_index/mod.rs | 22 ++++--------------- src/segment/value_block.rs | 8 +++++-- src/segment/value_block_consumer.rs | 6 +++--- 4 files changed, 25 insertions(+), 44 deletions(-) diff --git a/src/level_manifest/level.rs b/src/level_manifest/level.rs index a769320f..267a3421 100644 --- a/src/level_manifest/level.rs +++ b/src/level_manifest/level.rs @@ -2,7 +2,10 @@ // This source code is licensed under both the Apache 2.0 and MIT License // (found in the LICENSE-* files in the repository) -use crate::{key_range::KeyRange, segment::meta::SegmentId, HashSet, Segment, UserKey}; +use crate::{ + binary_search::partition_point, key_range::KeyRange, segment::meta::SegmentId, HashSet, + Segment, UserKey, +}; use std::ops::Bound; /// Level of an LSM-tree @@ -175,25 +178,13 @@ pub struct DisjointLevel<'a>(&'a Level); impl<'a> DisjointLevel<'a> { /// Returns the segment that possibly contains the key. pub fn get_segment_containing_key(&self, key: &[u8]) -> Option { - // NOTE: PERF: For some reason, hand-rolling a binary search is - // faster than using slice::partition_point - let mut left = 0; - let mut right = self.0.segments.len(); - - while left < right { - let mid = (left + right) / 2; - let segment = self.0.segments.get(mid).expect("should exist"); - - if segment.metadata.key_range.max() < &key { - left = mid + 1; - } else { - right = mid; - } - } + let idx = partition_point(&self.0.segments, |segment| { + segment.metadata.key_range.max() < &key + }); self.0 .segments - .get(left) + .get(idx) .filter(|x| x.metadata.key_range.min() <= &key) .cloned() } @@ -208,10 +199,10 @@ impl<'a> DisjointLevel<'a> { let lo = match &key_range.0 { Bound::Unbounded => 0, Bound::Included(start_key) => { - level.partition_point(|segment| segment.metadata.key_range.1 < start_key) + partition_point(level, |segment| segment.metadata.key_range.1 < start_key) } Bound::Excluded(start_key) => { - level.partition_point(|segment| segment.metadata.key_range.1 <= start_key) + partition_point(level, |segment| segment.metadata.key_range.1 <= start_key) } }; @@ -222,7 +213,7 @@ impl<'a> DisjointLevel<'a> { let hi = match &key_range.1 { Bound::Unbounded => level.len() - 1, Bound::Included(end_key) => { - let idx = level.partition_point(|segment| segment.metadata.key_range.0 <= end_key); + let idx = partition_point(level, |segment| segment.metadata.key_range.0 <= end_key); if idx == 0 { return None; @@ -231,7 +222,7 @@ impl<'a> DisjointLevel<'a> { idx.saturating_sub(1) // To avoid underflow } Bound::Excluded(end_key) => { - let idx = level.partition_point(|segment| segment.metadata.key_range.0 < end_key); + let idx = partition_point(level, |segment| segment.metadata.key_range.0 < end_key); if idx == 0 { return None; diff --git a/src/segment/block_index/mod.rs b/src/segment/block_index/mod.rs index a6247471..eb94972e 100644 --- a/src/segment/block_index/mod.rs +++ b/src/segment/block_index/mod.rs @@ -12,6 +12,7 @@ use super::{ block::Block, value_block::{BlockOffset, CachePolicy}, }; +use crate::binary_search::partition_point; use block_handle::KeyedBlockHandle; use full_index::FullBlockIndex; use two_level_index::TwoLevelBlockIndex; @@ -44,23 +45,8 @@ impl KeyedBlockIndex for [KeyedBlockHandle] { key: &[u8], _: CachePolicy, ) -> crate::Result> { - // NOTE: PERF: For some reason, hand-rolling a binary search is - // faster than using slice::partition_point - let mut left = 0; - let mut right = self.len(); - - while left < right { - let mid = (left + right) / 2; - let item = self.get(mid).expect("should exist"); - - if item.end_key < key { - left = mid + 1; - } else { - right = mid; - } - } - - Ok(self.get(left)) + let idx = partition_point(self, |item| item.end_key < key); + Ok(self.get(idx)) } fn get_last_block_containing_key( @@ -68,7 +54,7 @@ impl KeyedBlockIndex for [KeyedBlockHandle] { key: &[u8], _: CachePolicy, ) -> crate::Result> { - let idx = self.partition_point(|x| &*x.end_key <= key); + let idx = partition_point(self, |x| &*x.end_key <= key); if idx == 0 { return Ok(self.first()); diff --git a/src/segment/value_block.rs b/src/segment/value_block.rs index 40c923dc..6ab6163c 100644 --- a/src/segment/value_block.rs +++ b/src/segment/value_block.rs @@ -3,7 +3,10 @@ // (found in the LICENSE-* files in the repository) use super::{block::Block, id::GlobalSegmentId}; -use crate::{descriptor_table::FileDescriptorTable, value::InternalValue, BlockCache}; +use crate::{ + binary_search::partition_point, descriptor_table::FileDescriptorTable, value::InternalValue, + BlockCache, +}; use std::sync::Arc; #[derive(Copy, Clone, Default, Debug, std::hash::Hash, PartialEq, Eq, Ord, PartialOrd)] @@ -54,7 +57,8 @@ pub type ValueBlock = Block; impl ValueBlock { #[must_use] pub fn get_latest(&self, key: &[u8]) -> Option<&InternalValue> { - let idx = self.items.partition_point(|item| &*item.key.user_key < key); + // TODO: bench hand rolled binary search + let idx = partition_point(&self.items, |item| &*item.key.user_key < key); self.items .get(idx) diff --git a/src/segment/value_block_consumer.rs b/src/segment/value_block_consumer.rs index eb167a06..292a8d84 100644 --- a/src/segment/value_block_consumer.rs +++ b/src/segment/value_block_consumer.rs @@ -3,7 +3,7 @@ // (found in the LICENSE-* files in the repository) use super::value_block::ValueBlock; -use crate::value::InternalValue; +use crate::{binary_search::partition_point, value::InternalValue}; use std::sync::Arc; pub struct ValueBlockConsumer { @@ -26,13 +26,13 @@ impl ValueBlockConsumer { end_key: Option<&[u8]>, ) -> Self { let mut lo = start_key.as_ref().map_or(0, |key| { - inner.items.partition_point(|x| &*x.key.user_key < *key) + partition_point(&inner.items, |x| &*x.key.user_key < *key) }); let hi = end_key.as_ref().map_or_else( || inner.items.len() - 1, |key| { - let idx = inner.items.partition_point(|x| &*x.key.user_key <= *key); + let idx = partition_point(&inner.items, |x| &*x.key.user_key <= *key); if idx == 0 { let first = inner From 26eb2f36e6b00930f11a9c373c65351d46597ac3 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Sat, 8 Mar 2025 01:01:34 +0100 Subject: [PATCH 05/11] refactor --- src/segment/value_block.rs | 1 - src/segment/value_block_consumer.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/src/segment/value_block.rs b/src/segment/value_block.rs index 6ab6163c..630ee0bc 100644 --- a/src/segment/value_block.rs +++ b/src/segment/value_block.rs @@ -57,7 +57,6 @@ pub type ValueBlock = Block; impl ValueBlock { #[must_use] pub fn get_latest(&self, key: &[u8]) -> Option<&InternalValue> { - // TODO: bench hand rolled binary search let idx = partition_point(&self.items, |item| &*item.key.user_key < key); self.items diff --git a/src/segment/value_block_consumer.rs b/src/segment/value_block_consumer.rs index 292a8d84..916253bf 100644 --- a/src/segment/value_block_consumer.rs +++ b/src/segment/value_block_consumer.rs @@ -18,7 +18,6 @@ impl ValueBlockConsumer { Self::with_bounds(inner, None, None) } - // TODO: PERF: benchmark replacing partition_point #[must_use] pub fn with_bounds( inner: Arc, From 2d8686e873369bd9c4ff2b562ed988c1cea38331 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Tue, 11 Mar 2025 18:04:56 +0100 Subject: [PATCH 06/11] perf: get_unchecked in partition_point --- src/binary_search.rs | 28 +++++++++++++--------------- src/lib.rs | 2 +- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/binary_search.rs b/src/binary_search.rs index 05f67493..0370da6d 100644 --- a/src/binary_search.rs +++ b/src/binary_search.rs @@ -2,9 +2,6 @@ // This source code is licensed under both the Apache 2.0 and MIT License // (found in the LICENSE-* files in the repository) -// NOTE: PERF: For some reason, hand-rolling a binary search is -// faster than using slice::partition_point - /// Returns the index of the partition point according to the given predicate /// (the index of the first element of the second partition). /// @@ -23,8 +20,9 @@ where while left < right { let mid = (left + right) / 2; - // TODO: PERF: could use get_unchecked for perf... but unsafe - let item = slice.get(mid).expect("should exist"); + // SAFETY: See https://github.com/rust-lang/rust/blob/ebf0cf75d368c035f4c7e7246d203bd469ee4a51/library/core/src/slice/mod.rs#L2834-L2836 + #[warn(unsafe_code)] + let item = unsafe { slice.get_unchecked(mid) }; if pred(item) { left = mid + 1; @@ -47,8 +45,8 @@ mod tests { let idx = partition_point(&items, |&x| x < 1); assert_eq!(0, idx); - let pp_idx = items.partition_point(|&x| x < 1); - assert_eq!(pp_idx, idx); + let std_pp_idx = items.partition_point(|&x| x < 1); + assert_eq!(std_pp_idx, idx); } #[test] @@ -57,8 +55,8 @@ mod tests { let idx = partition_point(&items, |&x| x < 5); assert_eq!(4, idx); - let pp_idx = items.partition_point(|&x| x < 5); - assert_eq!(pp_idx, idx); + let std_pp_idx = items.partition_point(|&x| x < 5); + assert_eq!(std_pp_idx, idx); } #[test] @@ -67,8 +65,8 @@ mod tests { let idx = partition_point(&items, |&x| x < 3); assert_eq!(2, idx); - let pp_idx = items.partition_point(|&x| x < 3); - assert_eq!(pp_idx, idx); + let std_pp_idx = items.partition_point(|&x| x < 3); + assert_eq!(std_pp_idx, idx); } #[test] @@ -77,8 +75,8 @@ mod tests { let idx = partition_point(&items, |&x| x < 10); assert_eq!(5, idx); - let pp_idx = items.partition_point(|&x| x < 10); - assert_eq!(pp_idx, idx); + let std_pp_idx = items.partition_point(|&x| x < 10); + assert_eq!(std_pp_idx, idx); } #[test] @@ -87,7 +85,7 @@ mod tests { let idx = partition_point(&items, |&x| x < 10); assert_eq!(0, idx); - let pp_idx = items.partition_point(|&x| x < 10); - assert_eq!(pp_idx, idx); + let std_pp_idx = items.partition_point(|&x| x < 10); + assert_eq!(std_pp_idx, idx); } } diff --git a/src/lib.rs b/src/lib.rs index 6942c9bd..a8c575ed 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -90,7 +90,7 @@ #![doc(html_logo_url = "https://raw.githubusercontent.com/fjall-rs/lsm-tree/main/logo.png")] #![doc(html_favicon_url = "https://raw.githubusercontent.com/fjall-rs/lsm-tree/main/logo.png")] -#![forbid(unsafe_code)] +#![deny(unsafe_code)] #![deny(clippy::all, missing_docs, clippy::cargo)] #![deny(clippy::unwrap_used)] #![deny(clippy::indexing_slicing)] From 4982254b5f889d8722dabf40f83eade6e2bd9dec Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Tue, 11 Mar 2025 18:12:28 +0100 Subject: [PATCH 07/11] test: added partition_point fuzz test --- .gitignore | 3 --- README.md | 2 +- UNSAFE.md | 5 +++++ fuzz/.gitignore | 2 ++ fuzz/Cargo.toml | 19 +++++++++++++++++++ fuzz/fuzz_targets/partition_point.rs | 19 +++++++++++++++++++ 6 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 UNSAFE.md create mode 100644 fuzz/.gitignore create mode 100644 fuzz/Cargo.toml create mode 100644 fuzz/fuzz_targets/partition_point.rs diff --git a/.gitignore b/.gitignore index 721a867c..6fd45755 100644 --- a/.gitignore +++ b/.gitignore @@ -14,8 +14,5 @@ Cargo.lock *.pdb .lsm.data -.data -/old_* .test* -.block_index_test .bench diff --git a/README.md b/README.md index 150a32a2..b1368150 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs) in Rus This is the most feature-rich LSM-tree implementation in Rust! It features: - Thread-safe BTreeMap-like API -- 100% safe & stable Rust +- [99.9% safe](./UNSAFE.md) & stable Rust - Block-based tables with compression support - Range & prefix searching with forward and reverse iteration - Size-tiered, (concurrent) Leveled and FIFO compaction diff --git a/UNSAFE.md b/UNSAFE.md new file mode 100644 index 00000000..5b7f6033 --- /dev/null +++ b/UNSAFE.md @@ -0,0 +1,5 @@ +# Unsafe usage + +Currently, the project itself only uses one **1** unsafe block (ignoring dependencies which are tested themselves separately): + +- https://github.com/fjall-rs/lsm-tree/blob/2d8686e873369bd9c4ff2b562ed988c1cea38331/src/binary_search.rs#L23-L25 diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 00000000..b400c278 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,2 @@ +corpus +artifacts diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 00000000..81c62b56 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "lsm-tree-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +lsm-tree = { path = ".." } + +[[bin]] +name = "partition_point" +path = "fuzz_targets/partition_point.rs" +test = false +doc = false +bench = false diff --git a/fuzz/fuzz_targets/partition_point.rs b/fuzz/fuzz_targets/partition_point.rs new file mode 100644 index 00000000..356e1c86 --- /dev/null +++ b/fuzz/fuzz_targets/partition_point.rs @@ -0,0 +1,19 @@ +#![no_main] +use libfuzzer_sys::{ + arbitrary::{Arbitrary, Unstructured}, + fuzz_target, +}; +use lsm_tree::binary_search::partition_point; + +fuzz_target!(|data: &[u8]| { + let mut unstructured = Unstructured::new(data); + + if let Ok(mut items) = as Arbitrary>::arbitrary(&mut unstructured) { + items.sort(); + items.dedup(); + + let idx = partition_point(&items, |&x| x < 128); + let std_pp_idx = items.partition_point(|&x| x < 128); + assert_eq!(std_pp_idx, idx); + } +}); From 2ab305dc702378449ec493ce64883f47d4cb7459 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Tue, 11 Mar 2025 19:38:44 +0100 Subject: [PATCH 08/11] doc --- src/binary_search.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/binary_search.rs b/src/binary_search.rs index 0370da6d..c33a8690 100644 --- a/src/binary_search.rs +++ b/src/binary_search.rs @@ -5,7 +5,7 @@ /// Returns the index of the partition point according to the given predicate /// (the index of the first element of the second partition). /// -/// Faster alternative to [`slice::partition_point`] (according to benchmarks). +/// Faster alternative to [`std::slice::partition_point`] (according to benchmarks). pub fn partition_point(slice: &[T], pred: F) -> usize where F: Fn(&T) -> bool, From 258d4858756c5f46fbbfdb19b3a65cef29578715 Mon Sep 17 00:00:00 2001 From: Marvin <33938500+marvin-j97@users.noreply.github.com> Date: Sun, 23 Mar 2025 02:14:29 +0100 Subject: [PATCH 09/11] Update binary_search.rs --- src/binary_search.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/binary_search.rs b/src/binary_search.rs index c33a8690..5cb7913f 100644 --- a/src/binary_search.rs +++ b/src/binary_search.rs @@ -5,7 +5,7 @@ /// Returns the index of the partition point according to the given predicate /// (the index of the first element of the second partition). /// -/// Faster alternative to [`std::slice::partition_point`] (according to benchmarks). +/// This is a monkey patch for a compiler regression in rustc: https://github.com/rust-lang/rust/issues/138796 pub fn partition_point(slice: &[T], pred: F) -> usize where F: Fn(&T) -> bool, From 16e0e57b5964b9df982811e92ad77701b1a58383 Mon Sep 17 00:00:00 2001 From: Marvin <33938500+marvin-j97@users.noreply.github.com> Date: Sat, 29 Mar 2025 19:55:50 +0100 Subject: [PATCH 10/11] Update partition_point.rs --- benches/partition_point.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benches/partition_point.rs b/benches/partition_point.rs index 58d55bf4..c528022f 100644 --- a/benches/partition_point.rs +++ b/benches/partition_point.rs @@ -7,6 +7,8 @@ fn bench_partition_point(c: &mut Criterion) { for item_count in [10, 100, 1_000, 10_000, 100_000, 1_000_000] { let items = (0..item_count).collect::>(); + // TODO: replace search key with random integer + group.bench_function(format!("native {item_count}"), |b| { b.iter(|| items.partition_point(|&x| x <= 5_000)) }); From d23a0f1d6700ffe372da8cd4976112f9e2ce365e Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Sat, 29 Mar 2025 20:04:45 +0100 Subject: [PATCH 11/11] fix --- src/segment/value_block.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/segment/value_block.rs b/src/segment/value_block.rs index 6a552ac6..7668a592 100644 --- a/src/segment/value_block.rs +++ b/src/segment/value_block.rs @@ -5,9 +5,8 @@ use super::{block::Block, id::GlobalSegmentId}; use crate::{ binary_search::partition_point, descriptor_table::FileDescriptorTable, - segment::block::offset::BlockOffset, value::InternalValue, BlockCache, + segment::block::offset::BlockOffset, value::InternalValue, Cache, }; -use crate::{cache::Cache, descriptor_table::FileDescriptorTable, value::InternalValue}; use std::sync::Arc; #[derive(Copy, Clone, Debug, PartialEq, Eq)]