Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion benchmarks/compress-bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ async fn run_compress(

match display_format {
DisplayFormat::Table => {
render_table(&mut writer, measurements.timings, &targets)?;
render_table(&mut writer, measurements.timings, &targets, None)?;
render_table(
&mut writer,
measurements.ratios,
Expand All @@ -201,6 +201,7 @@ async fn run_compress(
} else {
vec![]
},
None,
)
}
DisplayFormat::GhJson => {
Expand Down
84 changes: 80 additions & 4 deletions benchmarks/duckdb-bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,26 @@ use clap::value_parser;
use duckdb_bench::DuckClient;
use tokio::runtime::Runtime;
use vortex::metrics::tracing::set_global_labels;
use vortex::utils::aliases::hash_map::HashMap;
use vortex::utils::aliases::hash_set::HashSet;
use vortex_bench::BenchmarkArg;
use vortex_bench::CompactionStrategy;
use vortex_bench::Engine;
use vortex_bench::Format;
use vortex_bench::Opt;
use vortex_bench::Opts;
use vortex_bench::Target;
use vortex_bench::conversions::convert_parquet_directory_to_vortex;
use vortex_bench::create_benchmark;
use vortex_bench::create_output_writer;
use vortex_bench::display::DisplayFormat;
use vortex_bench::display::render_table;
use vortex_bench::runner::BenchmarkMode;
use vortex_bench::runner::SqlBenchmarkRunner;
use vortex_bench::runner::filter_queries;
use vortex_bench::setup_logging_and_tracing;
use vortex_bench::v3;
use vortex_bench::v3::V3Record;

/// Common arguments shared across benchmarks
#[derive(Parser)]
Expand Down Expand Up @@ -64,6 +69,11 @@ struct Args {
#[arg(long)]
gh_json_v3: Option<PathBuf>,

/// Compare with a JSONL file exported with --gh-json-v3.
/// The intersection of queries, formats, and datasets is displayed.
#[arg(short, long)]
baseline: Option<PathBuf>,

#[arg(long, default_value_t = false)]
track_memory: bool,

Expand All @@ -73,7 +83,7 @@ struct Args {
#[arg(long, default_value = "unknown")]
runner: String,

#[arg(long, value_delimiter = ',', value_parser = value_parser!(Format))]
#[arg(long, default_value = "vortex", value_delimiter = ',', value_parser = value_parser!(Format))]
formats: Vec<Format>,

#[arg(long = "opt", value_delimiter = ',', value_parser = value_parser!(Opt))]
Expand Down Expand Up @@ -196,14 +206,80 @@ fn main() -> anyhow::Result<()> {
)?;

if !args.explain {
let current_records = runner.v3_records();

if let Some(path) = args.gh_json_v3.as_ref() {
v3::write_jsonl_to_path(path, &runner.v3_records())?;
v3::write_jsonl_to_path(path, &current_records)?;
}

let benchmark_id = format!("duckdb-{}", benchmark.dataset_name());
let writer = create_output_writer(&args.display_format, args.output_path, &benchmark_id)?;
runner.export_to(&args.display_format, writer)?;
let mut writer =
create_output_writer(&args.display_format, args.output_path, &benchmark_id)?;

if let Some(baseline_path) = args.baseline
&& matches!(args.display_format, DisplayFormat::Table)
{
let baseline_records = v3::read_jsonl_from_path(&baseline_path)?;
let baseline_map = build_query_baseline_map(&baseline_records, &current_records);
let targets = args
.formats
.iter()
.map(|f| Target::new(Engine::DuckDB, *f))
.collect::<Vec<_>>();
let results = runner.into_results();
if !results.memory_measurements.is_empty() {
render_table(&mut writer, results.memory_measurements, &targets, None)?;
}
render_table(
&mut writer,
results.query_measurements,
&targets,
Some(&baseline_map),
)?;
} else {
runner.export_to(&args.display_format, writer)?;
}
}

Ok(())
}

fn build_query_baseline_map(
baseline: &[V3Record],
current: &[V3Record],
) -> HashMap<(u32, String), u64> {
let current_dims: HashSet<(String, Option<String>, Option<String>)> = current
.iter()
.filter_map(|r| {
if let V3Record::QueryMeasurement(qm) = r {
Some((
qm.dataset.clone(),
qm.dataset_variant.clone(),
qm.scale_factor.clone(),
))
} else {
None
}
})
.collect();

baseline
.iter()
.filter_map(|r| {
if let V3Record::QueryMeasurement(qm) = r {
let dims = (
qm.dataset.clone(),
qm.dataset_variant.clone(),
qm.scale_factor.clone(),
);
if current_dims.contains(&dims) {
Some(((qm.query_idx, qm.format.clone()), qm.value_ns))
} else {
None
}
} else {
None
}
})
.collect()
}
2 changes: 1 addition & 1 deletion benchmarks/random-access-bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ async fn run_random_access(

match display_format {
DisplayFormat::Table => {
render_table(&mut writer, measurements, &targets)?;
render_table(&mut writer, measurements, &targets, None)?;
}
DisplayFormat::GhJson => {
print_measurements_json(&mut writer, measurements)?;
Expand Down
94 changes: 70 additions & 24 deletions vortex-bench/src/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
use std::io::Write;
use std::iter;

use anyhow::Result;
use clap::ValueEnum;
use itertools::Itertools;
use tabled::builder::Builder;
Expand All @@ -29,7 +30,8 @@ pub fn render_table<W: Write, T: ToTable>(
writer: &mut W,
all_measurements: Vec<T>,
targets: &[Target],
) -> anyhow::Result<()> {
baseline: Option<&HashMap<(u32, String), u64>>,
) -> Result<()> {
let mut measurements: HashMap<Target, Vec<TableValue>> =
HashMap::with_capacity(all_measurements.len().div_ceil(targets.len()));

Expand All @@ -45,9 +47,8 @@ pub fn render_table<W: Write, T: ToTable>(

measurements.values_mut().sorted_unstable();

// The first format serves as the baseline
let baseline_target = &targets[0];
let baseline = measurements[baseline_target].clone();
let first_target = &targets[0];
let reference = measurements[first_target].clone();

let mut table_builder = Builder::default();
let mut colors = vec![];
Expand All @@ -57,37 +58,82 @@ pub fn render_table<W: Write, T: ToTable>(
if engines.len() > 1 {
table_builder.push_record(
iter::once("".to_owned())
.chain(targets.iter().map(move |t| format!("{}", t.engine)))
.chain(targets.iter().flat_map(|t| {
let label = format!("{}", t.engine);
if baseline.is_some() {
vec![label, String::new()]
} else {
vec![label]
}
}))
.collect::<Vec<String>>(),
);
}

table_builder.push_record(
iter::once("Benchmark".to_owned())
.chain(targets.iter().map(|t| format!("{}", t.format)))
.chain(targets.iter().flat_map(|t| {
if baseline.is_some() {
vec![
format!("{} (baseline)", t.format),
format!("{} (current)", t.format),
]
} else {
vec![format!("{}", t.format)]
}
}))
.collect::<Vec<String>>(),
);

for (idx, baseline_measure) in baseline.iter().enumerate() {
let query_baseline = baseline_measure.value;
let mut row = vec![baseline_measure.name.clone()];
for (col_idx, target) in targets.iter().enumerate() {
let measurement = &measurements[target][idx];
let value = measurement.value;

if target != baseline_target {
let color = color(query_baseline, value);

let mut row = Vec::with_capacity(1 + targets.len() * (1 + baseline.is_some() as usize));
for (row_idx, ref_m) in reference.iter().enumerate() {
row.clear();
row.push(ref_m.name.clone());

if let Some(baseline) = baseline {
let query_id = ref_m.id.map(|i| i as u32);
for (target_col, target) in targets.iter().enumerate() {
let measurement = &measurements[target][row_idx];
let value = measurement.value;
// baseline stores nanoseconds, TableValue uses microseconds.
let bv_us = query_id.and_then(|id| {
baseline
.get(&(id, target.format.name().to_string()))
.map(|&ns| ns / 1_000)
});
let Some(bv_us) = bv_us else {
// We have already filtered missing values in
// build_query_baseline_map
anyhow::bail!("Query id or baseline value missing");
};

assert!(bv_us > 0);
let bv = MeasurementValue::Int(bv_us as u128);
let ratio = value / bv;
row.push(format!("{bv:.2} {}", measurement.unit));
row.push(format!("{value:.2} {} ({ratio:.2})", measurement.unit));
colors.push(Colorization::exact(
vec![color],
(idx + header_count, col_idx + 1),
))
vec![color(bv, value)],
(row_idx + header_count, 2 + target_col * 2),
));
}
} else {
let query_baseline = ref_m.value;
for (col_idx, target) in targets.iter().enumerate() {
let measurement = &measurements[target][row_idx];
let value = measurement.value;
if target != first_target {
colors.push(Colorization::exact(
vec![color(query_baseline, value)],
(row_idx + header_count, col_idx + 1),
));
}
let ratio = value / query_baseline;
row.push(format!("{value:.2} {} ({ratio:.2})", measurement.unit));
}

let ratio = value / query_baseline;
row.push(format!("{value:.2} {} ({ratio:.2})", measurement.unit));
}
table_builder.push_record(row);

table_builder.push_record(&row);
}

let mut table = table_builder.build();
Expand All @@ -105,7 +151,7 @@ pub fn render_table<W: Write, T: ToTable>(
pub fn print_measurements_json<T: ToJson>(
writer: &mut dyn Write,
all_measurements: Vec<T>,
) -> anyhow::Result<()> {
) -> Result<()> {
for measurement in all_measurements {
writeln!(writer, "{}", measurement.to_json())?;
}
Expand Down
4 changes: 2 additions & 2 deletions vortex-bench/src/runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -465,13 +465,13 @@ pub fn export_results<W: Write>(

if !memory.is_empty() {
match display_format {
DisplayFormat::Table => render_table(&mut output, memory, &targets)?,
DisplayFormat::Table => render_table(&mut output, memory, &targets, None)?,
DisplayFormat::GhJson => print_measurements_json(&mut output, memory)?,
};
}

match display_format {
DisplayFormat::Table => render_table(&mut output, queries, &targets)?,
DisplayFormat::Table => render_table(&mut output, queries, &targets, None)?,
DisplayFormat::GhJson => print_measurements_json(&mut output, queries)?,
};

Expand Down
Loading
Loading