Skip to content

feat(parquet-writer)!: enable multiple compaction output #5292

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/mito2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,8 @@ required-features = ["test"]
name = "bench_filter_time_partition"
harness = false
required-features = ["test"]

[[bench]]
name = "run_bench"
harness = false
required-features = ["test"]
140 changes: 140 additions & 0 deletions src/mito2/benches/run_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use mito2::compaction::run::{
find_overlapping_items, find_sorted_runs, merge_seq_files, reduce_runs, Item, Ranged,
};

#[derive(Clone, Debug, Eq, Hash, PartialEq)]
struct MockFile {
start: i64,
end: i64,
size: usize,
}

impl Ranged for MockFile {
type BoundType = i64;

fn range(&self) -> (Self::BoundType, Self::BoundType) {
(self.start, self.end)
}
}

impl Item for MockFile {
fn size(&self) -> usize {
self.size
}
}

fn generate_test_files(n: usize) -> Vec<MockFile> {
let mut files = Vec::with_capacity(n);
for _ in 0..n {
// Create slightly overlapping ranges to force multiple sorted runs
files.push(MockFile {
start: 0,
end: 10,
size: 10,
});
}
files
}

fn bench_find_sorted_runs(c: &mut Criterion) {
let mut group = c.benchmark_group("find_sorted_runs");

for size in [10, 100, 1000].iter() {
group.bench_function(format!("size_{}", size), |b| {
let mut files = generate_test_files(*size);
b.iter(|| {
find_sorted_runs(black_box(&mut files));
});
});
}
group.finish();
}

fn bench_reduce_runs(c: &mut Criterion) {
let mut group = c.benchmark_group("reduce_runs");

for size in [10, 100, 1000].iter() {
group.bench_function(format!("size_{}", size), |b| {
let mut files = generate_test_files(*size);
let runs = find_sorted_runs(&mut files);
b.iter(|| {
reduce_runs(black_box(runs.clone()));
});
});
}
group.finish();
}

fn bench_find_overlapping_items(c: &mut Criterion) {
let mut group = c.benchmark_group("find_overlapping_items");

for size in [10, 100, 1000].iter() {
group.bench_function(format!("size_{}", size), |b| {
// Create two sets of files with some overlapping ranges
let mut files1 = Vec::with_capacity(*size);
let mut files2 = Vec::with_capacity(*size);

for i in 0..*size {
files1.push(MockFile {
start: i as i64,
end: (i + 5) as i64,
size: 10,
});

files2.push(MockFile {
start: (i + 3) as i64,
end: (i + 8) as i64,
size: 10,
});
}

b.iter(|| {
find_overlapping_items(black_box(&mut files1), black_box(&mut files2));
});
});
}
group.finish();
}

fn bench_merge_seq_files(c: &mut Criterion) {
let mut group = c.benchmark_group("merge_seq_files");

for size in [10, 100, 1000].iter() {
group.bench_function(format!("size_{}", size), |b| {
// Create a set of files with varying sizes
let mut files = Vec::with_capacity(*size);

for i in 0..*size {
// Create files with different sizes to test the scoring algorithm
let file_size = if i % 3 == 0 {
5
} else if i % 3 == 1 {
10
} else {
15
};

files.push(MockFile {
start: i as i64,
end: (i + 1) as i64,
size: file_size,
});
}

b.iter(|| {
merge_seq_files(black_box(&files), black_box(Some(50)));
});
});
}
group.finish();
}

criterion_group!(
benches,
bench_find_sorted_runs,
bench_reduce_runs,
bench_find_overlapping_items,
bench_merge_seq_files
);
criterion_main!(benches);
2 changes: 1 addition & 1 deletion src/mito2/src/access_layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ impl FilePathProvider for WriteCachePathProvider {
/// Path provider that builds paths in region storage path.
#[derive(Clone, Debug)]
pub(crate) struct RegionFilePathFactory {
region_dir: String,
pub(crate) region_dir: String,
}

impl RegionFilePathFactory {
Expand Down
2 changes: 1 addition & 1 deletion src/mito2/src/compaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
mod buckets;
pub mod compactor;
pub mod picker;
mod run;
pub mod run;
mod task;
#[cfg(test)]
mod test_util;
Expand Down
1 change: 1 addition & 0 deletions src/mito2/src/compaction/compactor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ impl Compactor for DefaultCompactor {
compacted_inputs.extend(output.inputs.iter().map(|f| f.meta_ref().clone()));
let write_opts = WriteOptions {
write_buffer_size: compaction_region.engine_config.sst_write_buffer_size,
max_file_size: picker_output.max_file_size,
..Default::default()
};

Expand Down
11 changes: 7 additions & 4 deletions src/mito2/src/compaction/picker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ pub struct PickerOutput {
pub outputs: Vec<CompactionOutput>,
pub expired_ssts: Vec<FileHandle>,
pub time_window_size: i64,
/// Max single output file size in bytes.
pub max_file_size: Option<usize>,
}

/// SerializedPickerOutput is a serialized version of PickerOutput by replacing [CompactionOutput] and [FileHandle] with [SerializedCompactionOutput] and [FileMeta].
Expand All @@ -53,6 +55,7 @@ pub struct SerializedPickerOutput {
pub outputs: Vec<SerializedCompactionOutput>,
pub expired_ssts: Vec<FileMeta>,
pub time_window_size: i64,
pub max_file_size: Option<usize>,
}

impl From<&PickerOutput> for SerializedPickerOutput {
Expand All @@ -76,6 +79,7 @@ impl From<&PickerOutput> for SerializedPickerOutput {
outputs,
expired_ssts,
time_window_size: input.time_window_size,
max_file_size: input.max_file_size,
}
}
}
Expand Down Expand Up @@ -111,6 +115,7 @@ impl PickerOutput {
outputs,
expired_ssts,
time_window_size: input.time_window_size,
max_file_size: input.max_file_size,
}
}
}
Expand All @@ -131,10 +136,7 @@ pub fn new_picker(
} else {
match compaction_options {
CompactionOptions::Twcs(twcs_opts) => Arc::new(TwcsPicker {
max_active_window_runs: twcs_opts.max_active_window_runs,
max_active_window_files: twcs_opts.max_active_window_files,
max_inactive_window_runs: twcs_opts.max_inactive_window_runs,
max_inactive_window_files: twcs_opts.max_inactive_window_files,
trigger_file_num: twcs_opts.trigger_file_num,
time_window_seconds: twcs_opts.time_window_seconds(),
max_output_file_size: twcs_opts.max_output_file_size.map(|r| r.as_bytes()),
append_mode,
Expand Down Expand Up @@ -179,6 +181,7 @@ mod tests {
],
expired_ssts: expired_ssts_file_handle.clone(),
time_window_size: 1000,
max_file_size: None,
};

let picker_output_str =
Expand Down
Loading
Loading