Skip to content

Commit ae6332e

Browse files
authored
Blazing fast for file (#66)
* adding git stage support * finding file owner without walking the whole repo * rebase * adding for file benchmarks * bumping version
1 parent e788e3d commit ae6332e

File tree

15 files changed

+747
-11
lines changed

15 files changed

+747
-11
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "codeowners"
3-
version = "0.2.5"
3+
version = "0.2.6"
44
edition = "2024"
55

66
[profile.release]

dev/run_benchmarks_for_file.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/bash
2+
3+
mkdir -p tmp
4+
5+
# Check if the file exists before removing it
6+
if [ -f "tmp/codeowners_for_file_benchmarks.md" ]; then
7+
rm tmp/codeowners_for_file_benchmarks.md
8+
fi
9+
10+
echo "To run these benchmarks on your application, you can place this repo next to your rails application and run /usr/bin/env bash ../rubyatscale/codeowners-rs/dev/run_benchmarks_for_file.sh <path/to/file>" >> tmp/codeowners_for_file_benchmarks.md
11+
12+
hyperfine --warmup=2 --runs=3 --export-markdown tmp/codeowners_for_file_benchmarks.md \
13+
"../rubyatscale/codeowners-rs/target/release/codeowners for-file \"$1\"" \
14+
"bin/codeowners for_file \"$1\"" \
15+
"bin/codeownership for_file \"$1\""

dev/run_benchmarks.sh renamed to dev/run_benchmarks_for_gv.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22

33
# Check if the file exists before removing it
44
if [ -f "tmp/codeowners_benchmarks.md" ]; then
5-
rm tmp/codeowners_benchmarks.md
5+
rm tmp/codeowners_benchmarks_gv.md
66
fi
77

8-
echo "To run these benchmarks on your application, you can place this repo next to your rails application and run /usr/bin/env bash ../rubyatscale/codeowners-rs/dev/run_benchmarks.sh from the root of your application" >> tmp/codeowners_benchmarks.md
8+
echo "To run these benchmarks on your application, you can place this repo next to your rails application and run /usr/bin/env bash ../rubyatscale/codeowners-rs/dev/run_benchmarks_for_gv.sh from the root of your application" >> tmp/codeowners_benchmarks_gv.md
99

10-
hyperfine --warmup=2 --runs=3 --export-markdown tmp/codeowners_benchmarks.md \
10+
hyperfine --warmup=2 --runs=3 --export-markdown tmp/codeowners_benchmarks_gv.md \
1111
'../rubyatscale/codeowners-rs/target/release/codeowners gv' \
12-
'bin/codeowners-rs gv'
12+
'bin/codeowners validate' \
13+
'bin/codeownership validate'

src/bin/compare_for_file.rs

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
// This is a tool to compare the output of the original codeowners CLI with the optimized version.
2+
// It's useful for verifying that the optimized version is correct.
3+
//
4+
// It's not used in CI, but it's useful for debugging.
5+
//
6+
// To run it, use `cargo run --bin compare_for_file <absolute_project_root>`
7+
//
8+
// It will compare the output of the original codeowners CLI with the optimized version for all files in the project.
9+
10+
use std::{
11+
fs::File,
12+
io::{self, Write},
13+
path::{Path, PathBuf},
14+
process::Command,
15+
};
16+
17+
use codeowners::config::Config as OwnershipConfig;
18+
use codeowners::ownership::{FileOwner, for_file_fast};
19+
use codeowners::runner::{RunConfig, Runner};
20+
use ignore::WalkBuilder;
21+
22+
fn main() {
23+
let project_root = std::env::args().nth(1).expect("usage: compare_for_file <absolute_project_root>");
24+
let project_root = PathBuf::from(project_root);
25+
if !project_root.is_absolute() {
26+
eprintln!("Project root must be absolute");
27+
std::process::exit(2);
28+
}
29+
30+
let codeowners_file_path = project_root.join(".github/CODEOWNERS");
31+
let config_path = project_root.join("config/code_ownership.yml");
32+
33+
let run_config = RunConfig {
34+
project_root: project_root.clone(),
35+
codeowners_file_path,
36+
config_path: config_path.clone(),
37+
no_cache: false,
38+
};
39+
40+
// Build the original, accurate-but-slower runner once
41+
let runner = match Runner::new(&run_config) {
42+
Ok(r) => r,
43+
Err(e) => {
44+
eprintln!("Failed to initialize Runner: {}", e);
45+
std::process::exit(1);
46+
}
47+
};
48+
49+
// Load config once for the optimized path
50+
let config_file = match File::open(&config_path) {
51+
Ok(f) => f,
52+
Err(e) => {
53+
eprintln!("Can't open config file {}: {}", config_path.display(), e);
54+
std::process::exit(1);
55+
}
56+
};
57+
let optimized_config: OwnershipConfig = match serde_yaml::from_reader(config_file) {
58+
Ok(c) => c,
59+
Err(e) => {
60+
eprintln!("Can't parse config file {}: {}", config_path.display(), e);
61+
std::process::exit(1);
62+
}
63+
};
64+
65+
let mut total_files: usize = 0;
66+
let mut diff_count: usize = 0;
67+
68+
// Prefer tracked files from git; fall back to walking the FS if git is unavailable
69+
let tracked_files_output = Command::new("git").arg("-C").arg(&project_root).arg("ls-files").arg("-z").output();
70+
71+
match tracked_files_output {
72+
Ok(output) if output.status.success() => {
73+
let bytes = output.stdout;
74+
for rel in bytes.split(|b| *b == 0u8) {
75+
if rel.is_empty() {
76+
continue;
77+
}
78+
let rel_str = match std::str::from_utf8(rel) {
79+
Ok(s) => s,
80+
Err(_) => continue,
81+
};
82+
let abs_path = project_root.join(rel_str);
83+
// Only process regular files that currently exist
84+
if !abs_path.is_file() {
85+
continue;
86+
}
87+
88+
total_files += 1;
89+
let original = run_original(&runner, &abs_path);
90+
let optimized = run_optimized(&project_root, &optimized_config, &abs_path);
91+
92+
if original != optimized {
93+
diff_count += 1;
94+
println!("\n==== {} ====", abs_path.display());
95+
println!("ORIGINAL:\n{}", original);
96+
println!("OPTIMIZED:\n{}", optimized);
97+
let _ = io::stdout().flush();
98+
}
99+
100+
if total_files % 1000 == 0 {
101+
eprintln!("Processed {} files... diffs so far: {}", total_files, diff_count);
102+
}
103+
}
104+
}
105+
_ => {
106+
eprintln!("git ls-files failed; falling back to filesystem walk (untracked files may be included)");
107+
let walker = WalkBuilder::new(&project_root)
108+
.hidden(false)
109+
.git_ignore(true)
110+
.git_exclude(true)
111+
.follow_links(false)
112+
.build();
113+
114+
for result in walker {
115+
let entry = match result {
116+
Ok(e) => e,
117+
Err(err) => {
118+
eprintln!("walk error: {}", err);
119+
continue;
120+
}
121+
};
122+
if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
123+
continue;
124+
}
125+
let path = entry.path();
126+
total_files += 1;
127+
128+
let original = run_original(&runner, path);
129+
let optimized = run_optimized(&project_root, &optimized_config, path);
130+
131+
if original != optimized {
132+
diff_count += 1;
133+
println!("\n==== {} ====", path.display());
134+
println!("ORIGINAL:\n{}", original);
135+
println!("OPTIMIZED:\n{}", optimized);
136+
let _ = io::stdout().flush();
137+
}
138+
139+
if total_files % 1000 == 0 {
140+
eprintln!("Processed {} files... diffs so far: {}", total_files, diff_count);
141+
}
142+
}
143+
}
144+
}
145+
146+
println!("Checked {} files. Diffs: {}", total_files, diff_count);
147+
if diff_count > 0 {
148+
std::process::exit(3);
149+
}
150+
}
151+
152+
fn run_original(runner: &Runner, file_path: &Path) -> String {
153+
let result = runner.for_file(&file_path.to_string_lossy());
154+
if !result.validation_errors.is_empty() {
155+
return result.validation_errors.join("\n");
156+
}
157+
if !result.io_errors.is_empty() {
158+
return format!("IO_ERROR: {}", result.io_errors.join(" | "));
159+
}
160+
result.info_messages.join("\n")
161+
}
162+
163+
fn run_optimized(project_root: &Path, config: &OwnershipConfig, file_path: &Path) -> String {
164+
let owners: Vec<FileOwner> = match for_file_fast::find_file_owners(project_root, config, file_path) {
165+
Ok(v) => v,
166+
Err(e) => return format!("IO_ERROR: {}", e),
167+
};
168+
match owners.len() {
169+
0 => format!("{}", FileOwner::default()),
170+
1 => format!("{}", owners[0]),
171+
_ => {
172+
let mut lines = vec!["Error: file is owned by multiple teams!".to_string()];
173+
for owner in owners {
174+
lines.push(format!("\n{}", owner));
175+
}
176+
lines.join("\n")
177+
}
178+
}
179+
}

src/ownership.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use tracing::{info, instrument};
1111

1212
mod file_generator;
1313
mod file_owner_finder;
14+
pub mod for_file_fast;
1415
pub(crate) mod mapper;
1516
pub(crate) mod parser;
1617
mod validator;
@@ -32,7 +33,7 @@ use self::{
3233
pub struct Ownership {
3334
project: Arc<Project>,
3435
}
35-
36+
#[derive(Debug)]
3637
pub struct FileOwner {
3738
pub team: Team,
3839
pub team_config_file_path: String,

0 commit comments

Comments
 (0)