diff --git a/METHODOLOGY.md b/METHODOLOGY.md index c1bc483..62b879b 100644 --- a/METHODOLOGY.md +++ b/METHODOLOGY.md @@ -57,6 +57,7 @@ This document describes how `cmt` assembles git diff context and sends it to the | `context_lines` | 20 | Lines of context around changes in unified diff | | `max_lines_per_file` | 2000 | Maximum diff lines per file before truncation | | `max_line_width` | 500 | Maximum characters per line before truncation | +| `max_file_lines` | 5000 | Maximum total line changes per file before prompting to add to .cmtignore | ### Process @@ -75,6 +76,8 @@ pub struct DiffStats { pub insertions: usize, pub deletions: usize, pub file_changes: Vec<(String, usize, usize)>, // (filename, adds, dels) + pub skipped_files: Vec<(String, usize, usize)>, // Files exceeding max_file_lines threshold + pub ignored_files: Vec<(String, usize, usize)>, // Files matched by .cmtignore pub has_unstaged: bool, } ``` @@ -98,6 +101,44 @@ Files excluded from the diff sent to the LLM: ### Build Artifacts - Paths starting with: `dist/`, `build/` +### .cmtignore File + +**Source:** `src/cmtignore.rs` + +You can create a `.cmtignore` file in your repository root to permanently exclude files from commit message generation. This is useful for large generated files (migrations, schemas, etc.) that would overwhelm the LLM context. + +**Format:** +``` +# Lines starting with # are comments +# Glob patterns, one per line + +migrations/target_schema.sql +*.generated.ts +dist/** +``` + +**Supported patterns:** +- Exact paths: `migrations/schema.sql` +- Single glob (`*`): `*.sql` matches files in current directory only +- Double glob (`**`): `dist/**` matches all files recursively, `**/*.tsx` matches .tsx files at any depth + +When a file exceeds the `max_file_lines` threshold (default: 5000 total line changes), `cmt` will prompt you to add it to `.cmtignore` for future runs: + +``` +The following files exceed 5000 lines changed: + - migrations/target_schema.sql (102K lines) + +Would you like to add them to .cmtignore? [Y/n] +``` + +**Important:** Files in `.cmtignore` are only skipped for LLM analysis - they are still committed normally. The diff statistics (file count, insertions, deletions) include all files. Skipped files are shown dimmed with a `~` marker: + +``` +Staged: 12 files +102850 -9883 + src/main.rs +45 -10 + migrations/target_schema.sql +102607 -9738 ~ +``` + ## 3. Semantic Analysis **Source:** `src/analysis.rs` - `analyze_diff()` function @@ -537,6 +578,7 @@ Custom templates stored in `~/.config/cmt/templates/*.hbs` | `context_lines` | 20 | `src/config/defaults.rs` | | `max_lines_per_file` | 2000 | `src/config/defaults.rs` | | `max_line_width` | 500 | `src/config/defaults.rs` | +| `max_file_lines` | 5000 | `src/config/defaults.rs` | | `temperature` | 0.3 | `src/ai/mod.rs` | | `thinking` | `low` | `src/config/cli.rs` | | `provider` | `gemini` | `src/config/defaults.rs` | @@ -552,6 +594,7 @@ Custom templates stored in `~/.config/cmt/templates/*.hbs` |-----------|--------| | >100 files OR >20k changes | Reduce context to 8-15 lines, cap 500 lines/file | | >150 files OR >50k changes | Skip recent commits context entirely | +| Single file >5000 line changes | Prompt to add to `.cmtignore` | ### Token Budget diff --git a/src/bin/main.rs b/src/bin/main.rs index e2e040d..27e16ad 100644 --- a/src/bin/main.rs +++ b/src/bin/main.rs @@ -4,8 +4,8 @@ use cmt::config_mod::{file as config_file, Config}; use cmt::pricing::{self, PricingCache}; use cmt::template_mod::TemplateManager; use cmt::{ - analyze_diff, create_commit, generate_commit_message, get_current_branch, get_readme_excerpt, - Args, CommitError, CommitOptions, Spinner, + analyze_diff, append_to_cmtignore, create_commit, generate_commit_message, get_current_branch, + get_readme_excerpt, load_cmtignore, Args, CommitError, CommitOptions, Spinner, }; use colored::*; use dotenv::dotenv; @@ -204,12 +204,20 @@ async fn main() { } }; + // Get repository root for .cmtignore + let repo_root = repo.workdir().unwrap_or_else(|| std::path::Path::new(".")); + + // Load .cmtignore patterns + let cmtignore_patterns = load_cmtignore(repo_root); + // Get staged changes (includes both diff text and stats in one pass) let staged = match cmt::get_staged_changes( &repo, args.context_lines, args.max_lines_per_file, args.max_line_width, + args.max_file_lines, + &cmtignore_patterns, ) { Ok(changes) => changes, Err(e) => { @@ -218,6 +226,78 @@ async fn main() { process::exit(1); } }; + + // Handle files that exceed the threshold (prompt to add to .cmtignore) + if !staged.stats.skipped_files.is_empty() && !args.yes && !args.message_only { + println!(); + println!( + "{}", + format!( + "The following files exceed {} lines changed:", + args.max_file_lines + ) + .yellow() + .bold() + ); + for (file, adds, dels) in &staged.stats.skipped_files { + let total = adds + dels; + let lines_display = if total >= 1000 { + format!("{}K lines", total / 1000) + } else { + format!("{} lines", total) + }; + println!(" - {} ({})", file, lines_display); + } + println!(); + + print!( + "{}", + "Would you like to add them to .cmtignore? [Y/n] ".cyan() + ); + io::stdout().flush().unwrap(); + + let mut input = String::new(); + let should_add = if io::stdin().read_line(&mut input).is_ok() { + let input = input.trim().to_lowercase(); + input.is_empty() || input == "y" || input == "yes" + } else { + false + }; + + if should_add { + let files_to_add: Vec = staged + .stats + .skipped_files + .iter() + .map(|(f, _, _)| f.clone()) + .collect(); + + match append_to_cmtignore(repo_root, &files_to_add) { + Ok(()) => { + println!( + "{}", + "Added to .cmtignore. These files will be skipped for analysis in future runs." + .green() + ); + println!( + "{}", + "(They will still be committed normally, just not sent to the LLM.)" + .dimmed() + ); + println!(); + } + Err(e) => { + eprintln!( + "{}", + format!("Warning: Failed to update .cmtignore: {}", e) + .yellow() + .bold() + ); + } + } + } + } + let staged_changes = staged.diff_text.clone(); // Determine diff size for adaptive behaviors (very high thresholds - Gemini supports 1M tokens) diff --git a/src/cmtignore.rs b/src/cmtignore.rs new file mode 100644 index 0000000..133ea22 --- /dev/null +++ b/src/cmtignore.rs @@ -0,0 +1,219 @@ +//! .cmtignore file support for excluding files from commit message generation + +use std::fs; +use std::io::{self, Write}; +use std::path::Path; + +use crate::config::defaults::CMTIGNORE_FILENAME; + +/// Load patterns from .cmtignore file in the repository root +/// +/// Returns an empty vector if the file doesn't exist or can't be read. +/// Lines starting with # are treated as comments and ignored. +/// Empty lines are also ignored. +pub fn load_cmtignore(repo_root: &Path) -> Vec { + let cmtignore_path = repo_root.join(CMTIGNORE_FILENAME); + + if !cmtignore_path.exists() { + return Vec::new(); + } + + match fs::read_to_string(&cmtignore_path) { + Ok(content) => content + .lines() + .map(|line| line.trim()) + .filter(|line| !line.is_empty() && !line.starts_with('#')) + .map(|line| line.to_string()) + .collect(), + Err(_) => Vec::new(), + } +} + +/// Append file patterns to .cmtignore +/// +/// Creates the file if it doesn't exist. Adds patterns on new lines. +/// Returns Ok(()) on success or an io::Error on failure. +pub fn append_to_cmtignore(repo_root: &Path, files: &[String]) -> io::Result<()> { + let cmtignore_path = repo_root.join(CMTIGNORE_FILENAME); + + // Check if file exists and if it ends with a newline + let needs_leading_newline = if cmtignore_path.exists() { + let content = fs::read_to_string(&cmtignore_path)?; + !content.is_empty() && !content.ends_with('\n') + } else { + false + }; + + let mut file = fs::OpenOptions::new() + .create(true) + .append(true) + .open(&cmtignore_path)?; + + // Add leading newline if needed + if needs_leading_newline { + writeln!(file)?; + } + + // Write each file pattern on its own line + for pattern in files { + writeln!(file, "{}", pattern)?; + } + + Ok(()) +} + +/// Check if a file path matches a .cmtignore pattern +/// +/// Supports simple glob patterns: +/// - `*` matches any sequence of characters (except `/`) +/// - `**` matches any sequence of characters (including `/`) +/// - Exact matches work as expected +pub fn matches_pattern(path: &str, pattern: &str) -> bool { + // Normalize path separators + let path = path.replace('\\', "/"); + let pattern = pattern.replace('\\', "/"); + + // Handle ** glob (matches any path depth) + if pattern.contains("**") { + let parts: Vec<&str> = pattern.split("**").collect(); + if parts.len() == 2 { + let prefix = parts[0]; + let suffix = parts[1].trim_start_matches('/'); + + // Check if path starts with prefix (if any) and ends with suffix (if any) + let matches_prefix = prefix.is_empty() || path.starts_with(prefix); + + // For suffix, we need to handle patterns like "*.tsx" - match against filename + let matches_suffix = if suffix.is_empty() { + true + } else if let Some(ext_pattern) = suffix.strip_prefix('*') { + // Suffix like "*.tsx" - check if any path component matches + // ext_pattern is e.g., ".tsx" + path.ends_with(ext_pattern) + } else { + path.ends_with(suffix) + }; + + return matches_prefix && matches_suffix; + } + } + + // Handle * glob (matches within single path component) + if pattern.contains('*') { + let parts: Vec<&str> = pattern.split('*').collect(); + if parts.len() == 2 { + let prefix = parts[0]; + let suffix = parts[1]; + + // For single *, don't match across directory separators + let matches_prefix = path.starts_with(prefix); + let matches_suffix = path.ends_with(suffix); + + if matches_prefix && matches_suffix { + // Check that the middle part doesn't contain / + let middle_start = prefix.len(); + let middle_end = path.len().saturating_sub(suffix.len()); + if middle_start <= middle_end { + let middle = &path[middle_start..middle_end]; + return !middle.contains('/'); + } + } + return false; + } + } + + // Exact match + path == pattern +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_load_cmtignore_nonexistent() { + let temp_dir = TempDir::new().unwrap(); + let patterns = load_cmtignore(temp_dir.path()); + assert!(patterns.is_empty()); + } + + #[test] + fn test_load_cmtignore_with_patterns() { + let temp_dir = TempDir::new().unwrap(); + let cmtignore_path = temp_dir.path().join(CMTIGNORE_FILENAME); + + fs::write( + &cmtignore_path, + "# Comment line\n\nmigrations/*.sql\n*.generated.ts\ndist/**\n", + ) + .unwrap(); + + let patterns = load_cmtignore(temp_dir.path()); + assert_eq!(patterns.len(), 3); + assert_eq!(patterns[0], "migrations/*.sql"); + assert_eq!(patterns[1], "*.generated.ts"); + assert_eq!(patterns[2], "dist/**"); + } + + #[test] + fn test_append_to_cmtignore_new_file() { + let temp_dir = TempDir::new().unwrap(); + + append_to_cmtignore( + temp_dir.path(), + &["file1.sql".to_string(), "file2.sql".to_string()], + ) + .unwrap(); + + let content = fs::read_to_string(temp_dir.path().join(CMTIGNORE_FILENAME)).unwrap(); + assert_eq!(content, "file1.sql\nfile2.sql\n"); + } + + #[test] + fn test_append_to_cmtignore_existing_file() { + let temp_dir = TempDir::new().unwrap(); + let cmtignore_path = temp_dir.path().join(CMTIGNORE_FILENAME); + + fs::write(&cmtignore_path, "existing.txt\n").unwrap(); + + append_to_cmtignore(temp_dir.path(), &["new.txt".to_string()]).unwrap(); + + let content = fs::read_to_string(&cmtignore_path).unwrap(); + assert_eq!(content, "existing.txt\nnew.txt\n"); + } + + #[test] + fn test_matches_pattern_exact() { + assert!(matches_pattern( + "migrations/schema.sql", + "migrations/schema.sql" + )); + assert!(!matches_pattern( + "migrations/schema.sql", + "migrations/other.sql" + )); + } + + #[test] + fn test_matches_pattern_single_star() { + assert!(matches_pattern("file.generated.ts", "*.generated.ts")); + assert!(matches_pattern("foo.generated.ts", "*.generated.ts")); + assert!(!matches_pattern("src/file.generated.ts", "*.generated.ts")); + + assert!(matches_pattern("migrations/schema.sql", "migrations/*.sql")); + assert!(matches_pattern("migrations/data.sql", "migrations/*.sql")); + assert!(!matches_pattern("other/schema.sql", "migrations/*.sql")); + } + + #[test] + fn test_matches_pattern_double_star() { + assert!(matches_pattern("dist/file.js", "dist/**")); + assert!(matches_pattern("dist/sub/file.js", "dist/**")); + assert!(matches_pattern("dist/a/b/c/file.js", "dist/**")); + assert!(!matches_pattern("src/file.js", "dist/**")); + + assert!(matches_pattern("src/components/Button.tsx", "**/*.tsx")); + assert!(matches_pattern("Button.tsx", "**/*.tsx")); + } +} diff --git a/src/config/cli.rs b/src/config/cli.rs index 93831b0..dca5750 100644 --- a/src/config/cli.rs +++ b/src/config/cli.rs @@ -44,6 +44,10 @@ pub struct Args { #[arg(long, default_value_t = 500)] pub max_line_width: usize, + /// Maximum total line changes per file before prompting to ignore + #[arg(long, default_value_t = 5000)] + pub max_file_lines: usize, + /// Use a specific template for the commit message #[arg(long)] pub template: Option, @@ -292,6 +296,20 @@ mod tests { assert_eq!(args.context_lines, 10); } + #[test] + fn test_max_file_lines_option() { + let args = Args::new_from( + ["cmt", "--max-file-lines", "10000"] + .iter() + .map(ToString::to_string), + ); + assert_eq!(args.max_file_lines, 10000); + + // Default should be 5000 + let args = Args::new_from(["cmt"].iter().map(ToString::to_string)); + assert_eq!(args.max_file_lines, 5000); + } + #[test] fn test_list_templates_flag() { let args = Args::new_from(["cmt", "--list-templates"].iter().map(ToString::to_string)); diff --git a/src/config/defaults.rs b/src/config/defaults.rs index 0b36cf8..44b365b 100644 --- a/src/config/defaults.rs +++ b/src/config/defaults.rs @@ -7,6 +7,10 @@ pub const SHOW_RAW_DIFF: bool = false; pub const CONTEXT_LINES: u32 = 20; // Full function context - Gemini Flash supports 1M tokens pub const MAX_LINES_PER_FILE: usize = 2000; // Allow large files - we have token budget pub const MAX_LINE_WIDTH: usize = 500; // Allow wider lines for better context +pub const MAX_FILE_LINES: usize = 5000; // Threshold for prompting to add to .cmtignore + +// Cmtignore defaults +pub const CMTIGNORE_FILENAME: &str = ".cmtignore"; // AI provider defaults pub const DEFAULT_PROVIDER: &str = "gemini"; @@ -48,6 +52,7 @@ show_raw_diff = {} context_lines = {} max_lines_per_file = {} max_line_width = {} +max_file_lines = {} # AI provider options provider = "{}" # Options: {} @@ -70,6 +75,7 @@ recent_commits_count = {} CONTEXT_LINES, MAX_LINES_PER_FILE, MAX_LINE_WIDTH, + MAX_FILE_LINES, DEFAULT_PROVIDER, AVAILABLE_PROVIDERS.join(", "), DEFAULT_CLAUDE_MODEL, diff --git a/src/config/mod.rs b/src/config/mod.rs index fc1b517..72fb23d 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -46,6 +46,7 @@ pub struct Config { pub context_lines: u32, pub max_lines_per_file: usize, pub max_line_width: usize, + pub max_file_lines: usize, // AI provider options pub provider: String, @@ -72,6 +73,7 @@ impl Default for Config { context_lines: defaults::CONTEXT_LINES, max_lines_per_file: defaults::MAX_LINES_PER_FILE, max_line_width: defaults::MAX_LINE_WIDTH, + max_file_lines: defaults::MAX_FILE_LINES, provider: defaults::DEFAULT_PROVIDER.to_string(), model: None, temperature: None, @@ -153,6 +155,9 @@ impl Config { if other.max_line_width != defaults::MAX_LINE_WIDTH { self.max_line_width = other.max_line_width; } + if other.max_file_lines != defaults::MAX_FILE_LINES { + self.max_file_lines = other.max_file_lines; + } if other.provider != defaults::DEFAULT_PROVIDER { self.provider = other.provider.clone(); } @@ -185,6 +190,7 @@ impl Config { context_lines: args.context_lines, max_lines_per_file: args.max_lines_per_file, max_line_width: args.max_line_width, + max_file_lines: args.max_file_lines, provider: args.provider.clone(), model: args.model.clone(), temperature: args.temperature, diff --git a/src/git.rs b/src/git.rs index f6c723b..b8fce47 100644 --- a/src/git.rs +++ b/src/git.rs @@ -1,8 +1,11 @@ use colored::*; use git2::{Error as GitError, Repository, Sort}; use std::cmp; +use std::collections::HashSet; use std::path::Path; +use crate::cmtignore::matches_pattern; + /// Stats about staged changes for display #[derive(Debug, Clone)] pub struct DiffStats { @@ -10,6 +13,8 @@ pub struct DiffStats { pub insertions: usize, pub deletions: usize, pub file_changes: Vec<(String, usize, usize)>, // (filename, adds, dels) + pub skipped_files: Vec<(String, usize, usize)>, // Files exceeding max_file_lines threshold + pub ignored_files: Vec<(String, usize, usize)>, // Files matched by .cmtignore pub has_unstaged: bool, } @@ -29,10 +34,48 @@ impl DiffStats { println!(); } - // Print compact header + // Collect all files for calculating column widths + let all_files: Vec<_> = self + .file_changes + .iter() + .chain(self.ignored_files.iter()) + .chain(self.skipped_files.iter()) + .collect(); + + // Calculate column widths + let max_name_len = all_files.iter().map(|(f, _, _)| f.len()).max().unwrap_or(0); + let max_adds = all_files + .iter() + .map(|(_, a, _)| *a) + .max() + .unwrap_or(0) + .max(self.insertions); + let max_dels = all_files + .iter() + .map(|(_, _, d)| *d) + .max() + .unwrap_or(0) + .max(self.deletions); + + // Calculate digit widths for alignment + let adds_width = format!("{}", max_adds).len(); + let dels_width = format!("{}", max_dels).len(); + + // Print header with +/- aligned to file columns + // Header prefix: "Staged: N file(s) " + let header_prefix = format!( + "Staged: {} file{} ", + self.files_changed, + if self.files_changed == 1 { "" } else { "s" } + ); + // File column width: " " (indent) + filename + 4 spaces padding + let file_col_width = 2 + max_name_len + 4; + // Padding needed to align header + with file + column + let header_pad = file_col_width.saturating_sub(header_prefix.len()); + + print!("{}", "Staged:".blue()); print!( - "{} {} ", - "Staged:".blue(), + " {} ", format!( "{} file{}", self.files_changed, @@ -40,34 +83,98 @@ impl DiffStats { ) .white() ); - if self.insertions > 0 { - print!("{} ", format!("+{}", self.insertions).green()); - } - if self.deletions > 0 { - print!("{}", format!("-{}", self.deletions).red()); + // Align + column: header_pad spaces + adds_width + 1 for the '+' sign + // Pad the string first, then colorize (ANSI codes break format width) + print!( + "{}", + format!( + "{:>width$}", + format!("+{}", self.insertions), + width = header_pad + adds_width + 1 + ) + .green() + ); + // 3 spaces between + and - columns (print separately to avoid ANSI interference) + print!(" "); + println!( + "{}", + format!( + "{:>width$}", + format!("-{}", self.deletions), + width = dels_width + 1 + ) + .red() + ); + + // Print regular file list + for (file, adds, dels) in &self.file_changes { + print!(" {: 0 { + print!( + "{}", + format!("{:>width$}", format!("+{}", adds), width = adds_width + 1).green() + ); + } else { + print!("{:>width$}", "", width = adds_width + 1); + } + if *dels > 0 { + // Print spacing separately to avoid ANSI code interference + print!(" "); + println!( + "{}", + format!("{:>width$}", format!("-{}", dels), width = dels_width + 1).red() + ); + } else { + println!(); + } } - println!(); - // Print file list (compact) - let max_len = self - .file_changes - .iter() - .map(|(f, _, _)| f.len()) - .max() - .unwrap_or(0); + // Print ignored files (auto-skipped + .cmtignore) - dimmed with ~ marker + for (file, adds, dels) in &self.ignored_files { + print!(" {: 0 { + print!( + "{}", + format!("{:>width$}", format!("+{}", adds), width = adds_width + 1).dimmed() + ); + } else { + print!("{:>width$}", "", width = adds_width + 1); + } + if *dels > 0 { + print!(" "); + print!( + "{}", + format!("{:>width$}", format!("-{}", dels), width = dels_width + 1).dimmed() + ); + } else { + print!("{:>width$}", "", width = dels_width + 4); + } + println!("{}", " ~".dimmed()); + } - for (file, adds, dels) in &self.file_changes { - print!(" {: 0 { - print!("{}", format!("+{:<3}", adds).green()); + print!( + "{}", + format!("{:>width$}", format!("+{}", adds), width = adds_width + 1).dimmed() + ); } else { - print!(" "); + print!("{:>width$}", "", width = adds_width + 1); } if *dels > 0 { - print!("{}", format!("-{}", dels).red()); + print!(" "); + print!( + "{}", + format!("{:>width$}", format!("-{}", dels), width = dels_width + 1).dimmed() + ); + } else { + print!("{:>width$}", "", width = dels_width + 4); } - println!(); + println!("{}", " ~".dimmed()); } + println!(); // Space before next section } } @@ -190,6 +297,8 @@ pub fn get_staged_changes( context_lines: u32, max_lines_per_file: usize, max_line_width: usize, + max_file_lines: usize, + cmtignore_patterns: &[String], ) -> Result { let mut opts = git2::DiffOptions::new(); opts.context_lines(context_lines); @@ -210,11 +319,15 @@ pub fn get_staged_changes( .diff_tree_to_index(Some(&tree), None, Some(&mut opts)) .map_err(|e| GitError::from_str(&format!("Failed to get repository diff: {}", e)))?; - // Get stats in the same pass - let git_stats = diff.stats()?; + // Get stats (for reference, though we calculate our own for accurate filtering) + let _git_stats = diff.stats()?; // Collect per-file stats using Patch API for accurate line counts + // Separate into regular files, ignored files, and skipped files let mut file_changes: Vec<(String, usize, usize)> = Vec::new(); + let mut ignored_files: Vec<(String, usize, usize)> = Vec::new(); + let mut skipped_files: Vec<(String, usize, usize)> = Vec::new(); + for delta_idx in 0..diff.deltas().len() { if let Ok(Some(patch)) = git2::Patch::from_diff(&diff, delta_idx) { let file_path = patch @@ -225,20 +338,57 @@ pub fn get_staged_changes( .map(|p| p.to_string_lossy().to_string()) .unwrap_or_default(); + if file_path.is_empty() { + continue; + } + // line_stats returns (context_lines, additions, deletions) let (_, additions, deletions) = patch.line_stats().unwrap_or((0, 0, 0)); - - if !file_path.is_empty() { + let total_lines = additions + deletions; + + // Check if file is auto-skipped (lock files, images, build artifacts) + let file_path_obj = Path::new(&file_path); + let is_auto_skipped = is_skippable(file_path_obj); + + // Check if file matches any .cmtignore pattern + let is_ignored = cmtignore_patterns + .iter() + .any(|pattern| matches_pattern(&file_path, pattern)); + + if is_auto_skipped || is_ignored { + // Auto-skipped and .cmtignore files go to ignored_files + ignored_files.push((file_path, additions, deletions)); + } else if max_file_lines > 0 && total_lines > max_file_lines { + // File exceeds threshold (only check if threshold > 0) + skipped_files.push((file_path, additions, deletions)); + } else { file_changes.push((file_path, additions, deletions)); } } } + // Calculate total insertions/deletions across ALL files (including ignored/skipped) + let total_insertions: usize = file_changes + .iter() + .chain(ignored_files.iter()) + .chain(skipped_files.iter()) + .map(|(_, a, _)| a) + .sum(); + let total_deletions: usize = file_changes + .iter() + .chain(ignored_files.iter()) + .chain(skipped_files.iter()) + .map(|(_, _, d)| d) + .sum(); + let total_files = file_changes.len() + ignored_files.len() + skipped_files.len(); + let stats = DiffStats { - files_changed: git_stats.files_changed(), - insertions: git_stats.insertions(), - deletions: git_stats.deletions(), + files_changed: total_files, + insertions: total_insertions, + deletions: total_deletions, file_changes, + skipped_files, + ignored_files, has_unstaged: has_unstaged_changes(repo).unwrap_or(false), }; @@ -266,6 +416,14 @@ pub fn get_staged_changes( diff }; + // Build set of files to exclude from diff text (ignored + skipped) + let excluded_files: HashSet = stats + .ignored_files + .iter() + .chain(stats.skipped_files.iter()) + .map(|(f, _, _)| f.clone()) + .collect(); + // Build diff text let mut diff_str = String::new(); let mut line_count = 0; @@ -276,8 +434,16 @@ pub fn get_staged_changes( .new_file() .path() .unwrap_or_else(|| std::path::Path::new("")); + + // Skip .lock files and other auto-skippable files if is_skippable(file_path) { - return true; // Skip .lock files + return true; + } + + // Skip files that are ignored or exceed threshold + let file_path_str = file_path.to_string_lossy(); + if excluded_files.contains(file_path_str.as_ref()) { + return true; } if line_count < effective_max_lines_per_file { @@ -370,7 +536,7 @@ mod tests { #[test] fn test_get_staged_changes_empty_repo() { let (_temp_dir, repo) = setup_test_repo(); - let result = get_staged_changes(&repo, 0, 100, 300); + let result = get_staged_changes(&repo, 0, 100, 300, 0, &[]); assert!(result.is_err()); assert_eq!( result.unwrap_err().message(), @@ -385,7 +551,7 @@ mod tests { // Create and stage a new file create_and_stage_file(&repo, "test.txt", "Hello, World!"); - let staged = get_staged_changes(&repo, 0, 100, 300).unwrap(); + let staged = get_staged_changes(&repo, 0, 100, 300, 0, &[]).unwrap(); assert!(staged.diff_text.contains("Hello, World!")); } @@ -400,7 +566,7 @@ mod tests { // Modify and stage the file create_and_stage_file(&repo, "test.txt", "Modified content"); - let staged = get_staged_changes(&repo, 0, 100, 300).unwrap(); + let staged = get_staged_changes(&repo, 0, 100, 300, 0, &[]).unwrap(); assert!(staged.diff_text.contains("Initial content")); assert!(staged.diff_text.contains("Modified content")); } @@ -442,7 +608,7 @@ mod tests { create_and_stage_file(&repo, "new-staged.txt", "New staged content"); // Should succeed and detect unstaged changes - let result = get_staged_changes(&repo, 3, 100, 300).unwrap(); + let result = get_staged_changes(&repo, 3, 100, 300, 0, &[]).unwrap(); assert!(result.stats.has_unstaged); } @@ -456,12 +622,21 @@ mod tests { // Create and stage a regular file create_and_stage_file(&repo, "test.txt", "This is a regular file."); - let staged = get_staged_changes(&repo, 0, 100, 300).unwrap(); + let staged = get_staged_changes(&repo, 0, 100, 300, 0, &[]).unwrap(); - // Assert that the .lock file content is not in the diff - assert!(!staged.diff_text.contains("This is a lock file.")); + // Lock file should be in ignored_files (auto-skipped) + assert_eq!(staged.stats.ignored_files.len(), 1); + assert_eq!(staged.stats.ignored_files[0].0, "test.lock"); + + // Regular file should be in file_changes + assert_eq!(staged.stats.file_changes.len(), 1); + assert_eq!(staged.stats.file_changes[0].0, "test.txt"); + + // Total stats should include both files + assert_eq!(staged.stats.files_changed, 2); - // Assert that the regular file content is in the diff + // Diff text (sent to LLM) should only contain regular file + assert!(!staged.diff_text.contains("This is a lock file.")); assert!(staged.diff_text.contains("This is a regular file.")); } @@ -478,7 +653,7 @@ mod tests { // Set max_lines_per_file to 10 for testing let max_lines_per_file = 10; - let staged = get_staged_changes(&repo, 0, max_lines_per_file, 300).unwrap(); + let staged = get_staged_changes(&repo, 0, max_lines_per_file, 300, 0, &[]).unwrap(); // Assert that the diff output does not exceed the max_lines_per_file limit // Allow extra lines for headers and metadata @@ -506,7 +681,7 @@ mod tests { // Set max_line_width to 100 for testing let max_line_width = 100; - let staged = get_staged_changes(&repo, 0, 100, max_line_width).unwrap(); + let staged = get_staged_changes(&repo, 0, 100, max_line_width, 0, &[]).unwrap(); // Assert that the line is truncated to max_line_width assert!(staged.diff_text.contains(&long_line[..max_line_width])); @@ -521,7 +696,7 @@ mod tests { let content = "line1\nline2\nline3\nline4\nline5"; create_and_stage_file(&repo, "test.txt", content); - let staged = get_staged_changes(&repo, 0, 100, 300).unwrap(); + let staged = get_staged_changes(&repo, 0, 100, 300, 0, &[]).unwrap(); // Check overall stats assert_eq!(staged.stats.files_changed, 1); @@ -547,7 +722,7 @@ mod tests { // Modify file: change line2, add line4 create_and_stage_file(&repo, "test.txt", "line1\nmodified\nline3\nline4"); - let staged = get_staged_changes(&repo, 0, 100, 300).unwrap(); + let staged = get_staged_changes(&repo, 0, 100, 300, 0, &[]).unwrap(); // Check per-file stats - should have 2 insertions (modified, line4) and 1 deletion (line2) assert_eq!(staged.stats.file_changes.len(), 1); @@ -571,7 +746,7 @@ mod tests { create_and_stage_file(&repo, "file2.txt", "x"); // -1 create_and_stage_file(&repo, "file3.txt", "new1\nnew2\nnew3"); // +3 - let staged = get_staged_changes(&repo, 0, 100, 300).unwrap(); + let staged = get_staged_changes(&repo, 0, 100, 300, 0, &[]).unwrap(); // Check overall stats assert_eq!(staged.stats.files_changed, 3); @@ -627,7 +802,7 @@ mod tests { create_and_stage_file(&repo, "b.txt", "a"); // -2 create_and_stage_file(&repo, "c.txt", "new\nfile"); // +2 - let staged = get_staged_changes(&repo, 0, 100, 300).unwrap(); + let staged = get_staged_changes(&repo, 0, 100, 300, 0, &[]).unwrap(); // Sum up per-file stats let total_adds: usize = staged.stats.file_changes.iter().map(|(_, a, _)| a).sum(); @@ -659,7 +834,89 @@ mod tests { ); // Verify we can still access staged changes from the discovered repo - let staged = get_staged_changes(&discovered, 0, 100, 300).unwrap(); + let staged = get_staged_changes(&discovered, 0, 100, 300, 0, &[]).unwrap(); assert!(staged.diff_text.contains("Hello from root")); } + + #[test] + fn test_cmtignore_pattern_matching() { + let (_temp_dir, repo) = setup_test_repo(); + + // Create and stage files + create_and_stage_file(&repo, "regular.txt", "regular content"); + create_and_stage_file(&repo, "ignored.sql", "ignored content"); + + // Use cmtignore pattern to ignore .sql files + let patterns = vec!["*.sql".to_string()]; + let staged = get_staged_changes(&repo, 0, 100, 300, 0, &patterns).unwrap(); + + // Regular file should be in file_changes + assert_eq!(staged.stats.file_changes.len(), 1); + assert_eq!(staged.stats.file_changes[0].0, "regular.txt"); + + // SQL file should be in ignored_files + assert_eq!(staged.stats.ignored_files.len(), 1); + assert_eq!(staged.stats.ignored_files[0].0, "ignored.sql"); + + // Total stats should include BOTH files (ignored files are only skipped for analysis) + assert_eq!(staged.stats.files_changed, 2); + assert_eq!(staged.stats.insertions, 2); // 1 line each + + // Diff text (sent to LLM) should only contain regular file content + assert!(staged.diff_text.contains("regular content")); + assert!(!staged.diff_text.contains("ignored content")); + } + + #[test] + fn test_max_file_lines_threshold() { + let (_temp_dir, repo) = setup_test_repo(); + + // Create a small file (under threshold) + create_and_stage_file(&repo, "small.txt", "line1\nline2\nline3"); + + // Create a large file (over threshold of 5 lines) + let large_content = (0..10) + .map(|i| format!("line{}", i)) + .collect::>() + .join("\n"); + create_and_stage_file(&repo, "large.txt", &large_content); + + // Use max_file_lines of 5 + let staged = get_staged_changes(&repo, 0, 100, 300, 5, &[]).unwrap(); + + // Small file should be in file_changes + assert_eq!(staged.stats.file_changes.len(), 1); + assert_eq!(staged.stats.file_changes[0].0, "small.txt"); + + // Large file should be in skipped_files + assert_eq!(staged.stats.skipped_files.len(), 1); + assert_eq!(staged.stats.skipped_files[0].0, "large.txt"); + + // Total stats should include BOTH files (skipped files are only skipped for analysis) + assert_eq!(staged.stats.files_changed, 2); + assert_eq!(staged.stats.insertions, 13); // 3 + 10 lines + + // Diff text (sent to LLM) should only contain small file content + assert!(staged.diff_text.contains("line1")); + assert!(!staged.diff_text.contains("line9")); + } + + #[test] + fn test_max_file_lines_zero_disables_check() { + let (_temp_dir, repo) = setup_test_repo(); + + // Create a large file + let large_content = (0..100) + .map(|i| format!("line{}", i)) + .collect::>() + .join("\n"); + create_and_stage_file(&repo, "large.txt", &large_content); + + // Use max_file_lines of 0 (disabled) + let staged = get_staged_changes(&repo, 0, 100, 300, 0, &[]).unwrap(); + + // Large file should be in file_changes (not skipped) + assert_eq!(staged.stats.file_changes.len(), 1); + assert_eq!(staged.stats.skipped_files.len(), 0); + } } diff --git a/src/lib.rs b/src/lib.rs index 848034a..64e87e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,7 @@ pub use crate::git::{ mod ai; mod analysis; +mod cmtignore; mod commit; mod config; mod git; @@ -14,6 +15,8 @@ mod progress; mod prompts; mod templates; +pub use cmtignore::{append_to_cmtignore, load_cmtignore}; + pub use commit::{create_commit, CommitError, CommitOptions, CommitResult}; pub use pricing::PricingCache;