Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .cargo/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ rustflags = ["--cfg", "tokio_unstable"]

[alias]
bump-versions = "run -p upgrade-version --"
llm = "run --package xtask-llm-benchmark --bin llm_benchmark --"

[target.x86_64-pc-windows-msvc]
# Use a different linker. Otherwise, the build fails with some obscure linker error that
Expand Down
16 changes: 16 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -427,3 +427,19 @@ jobs:
echo "It looks like the CLI docs have changed:"
exit 1
fi

llm_ci_check:
name: Verify docs/rustdoc_json hashes
permissions:
contents: read
if: ${{ github.event_name == 'pull_request' }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2

- name: Run hash check (both langs)
working-directory: crates/xtask-llm-benchmark
run: cargo llm ci-check
21 changes: 21 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ members = [
"tools/license-check",
"crates/bindings-typescript/test-app/server",
"crates/bindings-typescript/test-react-router-app/server",
"crates/xtask-llm-benchmark",
]
default-members = ["crates/cli", "crates/standalone", "crates/update"]
# cargo feature graph resolver. v3 is default in edition2024 but workspace
Expand Down
33 changes: 33 additions & 0 deletions crates/xtask-llm-benchmark/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
[package]
name = "xtask-llm-benchmark"
version = "0.1.0"
edition.workspace = true
rust-version.workspace = true
default-run = "llm_benchmark"

[lints]
workspace = true

[dependencies]
anyhow.workspace = true
serde.workspace = true
serde_json.workspace = true
blake3.workspace = true
chrono = { version = "0.4", features = ["clock", "serde"] }
async-trait = "0.1.89"
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
urlencoding = "2.1.3"
reqwest = { version = "0.12", features = ["json"] }
futures = "0.3.31"
tempfile = "3.23.0"
fs2 = "0.4.3"
heck = "0.5.0"
thiserror = "2.0.17"
stdb = { package = "spacetimedb", path = "../bindings" }

[lib]
path = "src/lib.rs"

[[bin]]
name = "llm_benchmark"
path = "src/bin/llm_benchmark.rs"
198 changes: 198 additions & 0 deletions crates/xtask-llm-benchmark/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
#![allow(clippy::disallowed_macros)]

use std::{
env, fs, io,
path::{Component, Path, PathBuf},
process::Command,
};

fn main() {
// === Paths ===
let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
let benches_root = manifest_dir.join("src/benchmarks");

let gen_dir = manifest_dir.join("src/generated");
let registry_rs = gen_dir.join("registry.rs");

fs::create_dir_all(&gen_dir).unwrap();

// We'll gather generated module blocks + match arms
let mut mods_src = String::new();
let mut arms_src = String::new();

// Track whether we actually saw anything. If we saw nothing,
// that's almost always a wrong-path / didn't put build.rs in right crate problem.
let mut found_any = false;

// Walk: src/benchmarks/<category>/<task>/spec.rs
for cat_entry in read_dir_sorted(&benches_root) {
let cat_entry = cat_entry.unwrap();
let cat_path = cat_entry.path();
if !cat_path.is_dir() {
continue;
}
let category = file_name_string(&cat_path);

for task_entry in read_dir_sorted(&cat_path) {
let task_entry = task_entry.unwrap();
let task_path = task_entry.path();
if !task_path.is_dir() {
continue;
}
let task = file_name_string(&task_path);

let spec_path = task_path.join("spec.rs");
if !spec_path.is_file() {
continue;
}

found_any = true;

// ex: basics_t_005_update
let mod_ident = format_ident(&category, &task);

// registry.rs (we are generating) → ../../benchmarks/.../spec.rs (relative include path)
let rel_spec_path = relative_path(&registry_rs, &spec_path);

// inline submodule
mods_src.push_str(&format!(
"#[allow(dead_code)]\n#[allow(clippy::all)]\nmod {mod_ident} {{\n include!(\"{rel_spec_path}\");\n}}\n\n"
));

// map ("category","task") → that module's spec() fn
arms_src.push_str(&format!(" (\"{category}\", \"{task}\") => {mod_ident}::spec,\n"));
}
}

if !found_any {
// Fail fast instead of silently letting the stub compile.
panic!(
"build.rs: did not find any benchmark specs under {:?}.
This usually means one of two things:
1) The benchmarks actually live somewhere else (path mismatch).
2) build.rs is not in the same crate root as the code you're compiling, \
so Cargo is not running this script for that crate.",
benches_root
);
}

// Build final file string
let file_contents = format!(
"use crate::eval::BenchmarkSpec;
use anyhow::{{anyhow, Result}};
use std::path::Path;

{mods_src}pub fn resolve_by_path(task_root: &Path) -> Result<fn() -> BenchmarkSpec> {{
let task = task_root
.file_name()
.and_then(|s| s.to_str())
.ok_or_else(|| anyhow!(\"missing task name\"))?;
let category = task_root
.parent()
.and_then(|p| p.file_name().and_then(|s| s.to_str()))
.ok_or_else(|| anyhow!(\"missing category name\"))?;

let ctor = match (category, task) {{
{arms_src} _ => return Err(anyhow!(
\"no spec registered for {{}}/{{}} (need spec.rs)\",
category,
task
)),
}};

Ok(ctor)
}}
"
);

// Write unformatted first
fs::write(&registry_rs, file_contents).unwrap();

// Best-effort: format it so CI/rustfmt is happy
let _ = Command::new("rustup").args(["component", "add", "rustfmt"]).status();

let _ = Command::new("rustfmt")
.arg("--edition")
.arg("2021")
.arg(registry_rs.to_string_lossy().to_string())
.status();
}

/// Deterministic read_dir so output order is stable.
fn read_dir_sorted(dir: &Path) -> Vec<io::Result<fs::DirEntry>> {
let mut entries: Vec<_> = fs::read_dir(dir).unwrap().collect();
entries.sort_by_key(|res| {
res.as_ref()
.ok()
.and_then(|e| e.file_name().into_string().ok())
.unwrap_or_default()
});
entries
}

/// Get final path segment as String.
fn file_name_string(p: &Path) -> String {
p.file_name()
.and_then(|s| s.to_str())
.expect("utf8 dir name")
.to_string()
}

/// Turn ("basics","t_005_update") into "basics_t_005_update"
/// - lowercase
/// - non [a-z0-9_] → '_'
/// - if first char is digit, prefix '_'
fn format_ident(category: &str, task: &str) -> String {
fn sanitize(s: &str) -> String {
s.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == '_' {
c.to_ascii_lowercase()
} else {
'_'
}
})
.collect()
}

let mut ident = format!("{}_{}", sanitize(category), sanitize(task));
if ident.chars().next().map(|c| c.is_ascii_digit()).unwrap_or(false) {
ident.insert(0, '_');
}
ident
}

/// Build a relative path string from `from` file to `to` file,
/// normalized to `/` for portability so `include!` is valid.
fn relative_path(from: &Path, to: &Path) -> String {
let base_dir = from.parent().expect("registry.rs must have a parent dir");
let rel = diff_paths(to, base_dir).unwrap_or_else(|| to.to_path_buf());
rel.to_string_lossy().replace('\\', "/")
}

/// Minimal diff_paths (no extra crate).
fn diff_paths(path: &Path, base: &Path) -> Option<PathBuf> {
let path_comps: Vec<Component<'_>> = path.components().collect();
let base_comps: Vec<Component<'_>> = base.components().collect();

// find shared prefix
let common_len = path_comps.iter().zip(&base_comps).take_while(|(a, b)| a == b).count();

// walk back from base
let mut out = PathBuf::new();
for _ in base_comps.iter().skip(common_len) {
out.push("..");
}

// then walk forward into path
for comp in path_comps.iter().skip(common_len) {
match comp {
Component::Normal(os) => out.push(os),
Component::CurDir => out.push("."),
Component::ParentDir => out.push(".."),
Component::RootDir | Component::Prefix(_) => return None,
}
}

Some(out)
}
12 changes: 12 additions & 0 deletions crates/xtask-llm-benchmark/src/bench/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
pub mod publishers;
pub(crate) mod results_merge;
pub mod runner;
pub mod spacetime_guard;
mod templates;
pub mod types;
pub(crate) mod utils;

pub use publishers::{DotnetPublisher, Publisher, SpacetimeRustPublisher};
pub use runner::TaskRunner;
pub use types::{RunOutcome, TaskPaths};
pub use utils::bench_route_concurrency;
Loading
Loading