diff --git a/src/cli.rs b/src/cli.rs index fde16cb..f1f9235 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -50,6 +50,33 @@ pub enum Commands { /// List cached templates List, + /// Create a template by finding commonality across 2+ projects + Distill { + /// Project directories to compare (minimum 2) + #[arg(required = true, num_args = 2..)] + projects: Vec, + + /// Variable values as they appear in the first project + #[arg(short, long = "var", value_name = "KEY=VALUE")] + vars: Vec, + + /// Output directory for the template + #[arg(short, long, default_value = "distilled-template")] + output: String, + + /// Max directory depth (0 = top-level only, 1 = one level of subdirs) + #[arg(long, default_value = "1")] + depth: usize, + + /// Show what would be generated without writing + #[arg(long)] + dry_run: bool, + + /// Overwrite output directory if it exists + #[arg(long)] + force: bool, + }, + /// Extract a template from an existing project Extract { /// Source project directory diff --git a/src/commands/distill.rs b/src/commands/distill.rs new file mode 100644 index 0000000..ab50af6 --- /dev/null +++ b/src/commands/distill.rs @@ -0,0 +1,98 @@ +use std::path::PathBuf; + +use diecut::distill::{self, DistillOptions, DistilledContent}; +use miette::Result; + +pub fn run( + projects: Vec, + vars: Vec, + output: String, + depth: usize, + dry_run: bool, + force: bool, +) -> Result<()> { + // Parse --var key=value pairs + let variables: Vec<(String, String)> = vars + .iter() + .map(|v| { + let parts: Vec<&str> = v.splitn(2, '=').collect(); + if parts.len() != 2 { + miette::bail!("Invalid --var format '{}': expected KEY=VALUE", v); + } + Ok((parts[0].to_string(), parts[1].to_string())) + }) + .collect::>>()?; + + // Warn on short values + for (name, value) in &variables { + if value.len() < 3 { + eprintln!( + " warning: variable '{}' has short value '{}' — may cause false matches", + name, value + ); + } + } + + // Warn on duplicate values across different variable names + let mut seen_values: std::collections::HashMap<&str, &str> = std::collections::HashMap::new(); + for (name, value) in &variables { + if let Some(other_name) = seen_values.get(value.as_str()) { + eprintln!( + " warning: variables '{}' and '{}' have the same value '{}'", + other_name, name, value + ); + } else { + seen_values.insert(value, name); + } + } + + let options = DistillOptions { + projects: projects.iter().map(PathBuf::from).collect(), + variables, + output_dir: PathBuf::from(&output), + max_depth: Some(depth), + dry_run, + force, + }; + + let plan = distill::plan_distill(options)?; + + // Print suppressed variable warnings + for (name, reason) in &plan.suppressed_variables { + eprintln!(" warning: variable '{}' suppressed — {}", name, reason); + } + + if dry_run { + println!("Distill plan (dry run):"); + println!( + " Variables: {}", + plan.active_variables + .iter() + .map(|v| v.name.as_str()) + .collect::>() + .join(", ") + ); + println!(" Files:"); + for file in &plan.files { + let suffix = match &file.content { + DistilledContent::Text { + replacement_count, .. + } => format!(" ({} replacements)", replacement_count), + DistilledContent::Binary(_) => " (binary)".to_string(), + DistilledContent::Static(_) => " (static)".to_string(), + }; + println!(" {}{}", file.template_path.display(), suffix); + } + return Ok(()); + } + + distill::execute_distill(&plan)?; + println!("Template distilled to {}/", output); + println!( + " {} files, {} variables", + plan.files.len(), + plan.active_variables.len() + ); + + Ok(()) +} diff --git a/src/commands/mod.rs b/src/commands/mod.rs index 8c884a4..6e8ae57 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,3 +1,4 @@ +pub mod distill; pub mod extract; pub mod list; pub mod new; diff --git a/src/distill/intersect.rs b/src/distill/intersect.rs new file mode 100644 index 0000000..ac5202e --- /dev/null +++ b/src/distill/intersect.rs @@ -0,0 +1,252 @@ +use std::collections::HashMap; +use std::path::PathBuf; + +use crate::extract::scan::{ScanResult, ScannedFile}; + +/// A file that appears in all scanned projects, with per-project content aligned by scan index. +pub struct AlignedFile { + /// Path relative to each project root. + pub relative_path: PathBuf, + /// Per-project text content. `None` means binary (or absent, but absent files are excluded). + pub contents: Vec>, + /// Per-project raw bytes for binary files. `None` for text files. + pub raw_bytes: Vec>>, + /// `true` if ANY project's copy of this file is binary. + pub any_binary: bool, +} + +/// Intersect multiple scan results, keeping only files present in ALL scans. +/// +/// Returns one `AlignedFile` per common relative path, with per-project content +/// stored in the same order as `scans`. Results are sorted by relative path. +pub fn intersect_scans(scans: &[ScanResult]) -> Vec { + if scans.is_empty() { + return Vec::new(); + } + + // Count how many scans contain each relative path. + let mut path_count: HashMap<&PathBuf, usize> = HashMap::new(); + for scan in scans { + for file in &scan.files { + *path_count.entry(&file.relative_path).or_insert(0) += 1; + } + } + + let num_scans = scans.len(); + + // Collect paths present in every scan. + let mut common_paths: Vec<&PathBuf> = path_count + .into_iter() + .filter_map(|(path, count)| if count == num_scans { Some(path) } else { None }) + .collect(); + + common_paths.sort(); + + // Build an AlignedFile for each common path. + common_paths + .into_iter() + .map(|path| { + let mut contents: Vec> = Vec::with_capacity(num_scans); + let mut raw_bytes: Vec>> = Vec::with_capacity(num_scans); + let mut any_binary = false; + + for scan in scans { + let file: &ScannedFile = scan + .files + .iter() + .find(|f| &f.relative_path == path) + .expect("path was counted as present in every scan"); + + if file.is_binary { + any_binary = true; + contents.push(None); + let bytes = std::fs::read(&file.absolute_path).ok(); + raw_bytes.push(bytes); + } else { + contents.push(file.content.clone()); + raw_bytes.push(None); + } + } + + AlignedFile { + relative_path: path.clone(), + contents, + raw_bytes, + any_binary, + } + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::extract::scan::ScannedFile; + use rstest::rstest; + use std::path::PathBuf; + use tempfile::tempdir; + + fn make_text_file(relative: &str, absolute: &str, content: &str) -> ScannedFile { + ScannedFile { + relative_path: PathBuf::from(relative), + absolute_path: PathBuf::from(absolute), + is_binary: false, + content: Some(content.to_string()), + } + } + + fn make_binary_file(relative: &str, absolute: PathBuf) -> ScannedFile { + ScannedFile { + relative_path: PathBuf::from(relative), + absolute_path: absolute, + is_binary: true, + content: None, + } + } + + fn make_scan(files: Vec) -> ScanResult { + ScanResult { + files, + excluded_count: 0, + } + } + + #[test] + fn common_files_kept_unique_files_discarded() { + let scans = vec![ + make_scan(vec![ + make_text_file("README.md", "/a/README.md", "# A"), + make_text_file("only_in_a.txt", "/a/only_in_a.txt", "unique"), + ]), + make_scan(vec![ + make_text_file("README.md", "/b/README.md", "# B"), + make_text_file("only_in_b.txt", "/b/only_in_b.txt", "unique"), + ]), + ]; + + let result = intersect_scans(&scans); + + assert_eq!(result.len(), 1); + assert_eq!(result[0].relative_path, PathBuf::from("README.md")); + assert_eq!(result[0].contents[0], Some("# A".to_string())); + assert_eq!(result[0].contents[1], Some("# B".to_string())); + } + + #[test] + fn empty_intersection_when_no_common_files() { + let scans = vec![ + make_scan(vec![make_text_file("only_a.txt", "/a/only_a.txt", "a")]), + make_scan(vec![make_text_file("only_b.txt", "/b/only_b.txt", "b")]), + ]; + + let result = intersect_scans(&scans); + + assert!(result.is_empty()); + } + + #[test] + fn binary_file_detection_any_binary_flag() { + // Write real binary bytes so std::fs::read works. + let dir_a = tempdir().unwrap(); + let dir_b = tempdir().unwrap(); + + let bin_a = dir_a.path().join("logo.png"); + let bin_b = dir_b.path().join("logo.png"); + std::fs::write(&bin_a, b"\x89PNG\r\n").unwrap(); + std::fs::write(&bin_b, b"\x89PNG\r\n").unwrap(); + + let scans = vec![ + make_scan(vec![ + make_binary_file("logo.png", bin_a), + make_text_file("main.rs", "/a/main.rs", "fn main() {}"), + ]), + make_scan(vec![ + make_binary_file("logo.png", bin_b), + make_text_file("main.rs", "/b/main.rs", "fn main() {}"), + ]), + ]; + + let result = intersect_scans(&scans); + assert_eq!(result.len(), 2); + + let logo = result + .iter() + .find(|f| f.relative_path == PathBuf::from("logo.png")) + .unwrap(); + assert!(logo.any_binary); + assert!(logo.raw_bytes[0].is_some()); + assert!(logo.raw_bytes[1].is_some()); + assert_eq!(logo.contents[0], None); + assert_eq!(logo.contents[1], None); + + let main_rs = result + .iter() + .find(|f| f.relative_path == PathBuf::from("main.rs")) + .unwrap(); + assert!(!main_rs.any_binary); + } + + #[rstest] + #[case("shared.txt", true)] + #[case("unique_c.txt", false)] + fn three_project_intersection(#[case] path: &str, #[case] expected_present: bool) { + let scans = vec![ + make_scan(vec![ + make_text_file("shared.txt", "/a/shared.txt", "content a"), + make_text_file("only_a.txt", "/a/only_a.txt", "only a"), + ]), + make_scan(vec![ + make_text_file("shared.txt", "/b/shared.txt", "content b"), + make_text_file("only_b.txt", "/b/only_b.txt", "only b"), + ]), + make_scan(vec![ + make_text_file("shared.txt", "/c/shared.txt", "content c"), + make_text_file("unique_c.txt", "/c/unique_c.txt", "unique c"), + ]), + ]; + + let result = intersect_scans(&scans); + + let found = result + .iter() + .any(|f| f.relative_path == PathBuf::from(path)); + assert_eq!(found, expected_present); + + if expected_present { + let file = result + .iter() + .find(|f| f.relative_path == PathBuf::from(path)) + .unwrap(); + assert_eq!(file.contents.len(), 3); + } + } + + #[test] + fn empty_scans_slice_returns_empty() { + let result = intersect_scans(&[]); + assert!(result.is_empty()); + } + + #[test] + fn results_sorted_by_relative_path() { + let scans = vec![ + make_scan(vec![ + make_text_file("z.txt", "/a/z.txt", "z"), + make_text_file("a.txt", "/a/a.txt", "a"), + make_text_file("m.txt", "/a/m.txt", "m"), + ]), + make_scan(vec![ + make_text_file("z.txt", "/b/z.txt", "z"), + make_text_file("a.txt", "/b/a.txt", "a"), + make_text_file("m.txt", "/b/m.txt", "m"), + ]), + ]; + + let result = intersect_scans(&scans); + + let paths: Vec<&PathBuf> = result.iter().map(|f| &f.relative_path).collect(); + let mut sorted = paths.clone(); + sorted.sort(); + assert_eq!(paths, sorted); + } +} diff --git a/src/distill/mod.rs b/src/distill/mod.rs new file mode 100644 index 0000000..7fc5ad3 --- /dev/null +++ b/src/distill/mod.rs @@ -0,0 +1,370 @@ +pub mod intersect; +pub mod validate; + +use std::path::PathBuf; + +use regex_lite::Regex; + +use crate::error::{DicecutError, Result}; +use crate::extract::config_gen::{ + generate_config_toml, ComputedVariable, ConfigGenOptions, PromptedVariable, +}; +use crate::extract::exclude::all_default_excludes; +use crate::extract::replace::{ + apply_path_replacements, apply_replacements, build_replacement_rules, ReplacementRule, +}; +use crate::extract::scan::scan_project; +use crate::extract::variants::{ + computed_expression, detect_separator, generate_variants, is_canonical_variant, +}; + +use self::intersect::intersect_scans; +use self::validate::{is_variable_active, DistillVariable}; + +// ── Public API types ───────────────────────────────────────────────────────── + +pub struct DistillOptions { + pub projects: Vec, + pub variables: Vec<(String, String)>, + pub output_dir: PathBuf, + pub max_depth: Option, + pub dry_run: bool, + pub force: bool, +} + +pub enum DistilledContent { + Text { + content: String, + replacement_count: usize, + }, + Binary(Vec), + Static(String), +} + +pub struct DistilledFile { + pub template_path: PathBuf, + pub content: DistilledContent, +} + +pub struct DistillPlan { + pub output_dir: PathBuf, + pub files: Vec, + pub config_toml: String, + pub active_variables: Vec, + pub suppressed_variables: Vec<(String, String)>, // (name, reason) + pub dry_run: bool, +} + +// ── Main entry points ───────────────────────────────────────────────────────── + +/// Build a distill plan without writing any files. +pub fn plan_distill(options: DistillOptions) -> Result { + // Phase 0: validate inputs + validate_inputs(&options)?; + + // Phase 1: expand variables into replacement rules + let (distill_vars, mut rules) = expand_variables(&options.variables); + + // Phase 2: scan all projects + let excludes = all_default_excludes(); + let mut scans = Vec::with_capacity(options.projects.len()); + for project in &options.projects { + let scan = scan_project(project, &excludes, options.max_depth)?; + scans.push(scan); + } + + // Phase 3: intersect + let aligned = intersect_scans(&scans); + if aligned.is_empty() { + return Err(DicecutError::DistillNoCommonFiles(options.projects.len())); + } + + // Phase 4: cross-validate — determine which variables are active + let mut active_variables: Vec = Vec::new(); + let mut suppressed_variables: Vec<(String, String)> = Vec::new(); + + for var in distill_vars { + if is_variable_active(&var, &aligned) { + active_variables.push(var); + } else { + let reason = format!( + "'{}' does not vary across projects or does not appear in shared files", + var.value_in_p0 + ); + suppressed_variables.push((var.name, reason)); + } + } + + // Keep only rules for active variables + rules.retain(|r| active_variables.iter().any(|v| v.name == r.variable)); + + // Phase 5: process files + let files = process_files(&aligned, &rules); + + // Phase 6: generate diecut.toml + let config_toml = generate_config(&active_variables, &options.output_dir); + + Ok(DistillPlan { + output_dir: options.output_dir, + files, + config_toml, + active_variables, + suppressed_variables, + dry_run: options.dry_run, + }) +} + +/// Write the distill plan to disk. +pub fn execute_distill(plan: &DistillPlan) -> Result<()> { + if plan.dry_run { + return Ok(()); + } + + let template_dir = plan.output_dir.join("template"); + std::fs::create_dir_all(&template_dir).map_err(|e| DicecutError::Io { + context: format!("creating output directory {}", template_dir.display()), + source: e, + })?; + + // Write diecut.toml + let config_path = plan.output_dir.join("diecut.toml"); + std::fs::write(&config_path, &plan.config_toml).map_err(|e| DicecutError::Io { + context: format!("writing config to {}", config_path.display()), + source: e, + })?; + + // Write each distilled file + for distilled in &plan.files { + let dest = template_dir.join(&distilled.template_path); + + // Create parent directories + if let Some(parent) = dest.parent() { + std::fs::create_dir_all(parent).map_err(|e| DicecutError::Io { + context: format!("creating directory {}", parent.display()), + source: e, + })?; + } + + match &distilled.content { + DistilledContent::Text { content, .. } => { + std::fs::write(&dest, content).map_err(|e| DicecutError::Io { + context: format!("writing file {}", dest.display()), + source: e, + })?; + } + DistilledContent::Binary(bytes) => { + std::fs::write(&dest, bytes).map_err(|e| DicecutError::Io { + context: format!("writing binary file {}", dest.display()), + source: e, + })?; + } + DistilledContent::Static(content) => { + std::fs::write(&dest, content).map_err(|e| DicecutError::Io { + context: format!("writing static file {}", dest.display()), + source: e, + })?; + } + } + } + + Ok(()) +} + +// ── Phase implementations ───────────────────────────────────────────────────── + +fn validate_inputs(options: &DistillOptions) -> Result<()> { + // At least 2 projects required + if options.projects.len() < 2 { + return Err(DicecutError::DistillMinProjects); + } + + // All project paths must be directories + for project in &options.projects { + if !project.is_dir() { + return Err(DicecutError::TemplateDirectoryMissing { + path: project.clone(), + }); + } + } + + // Validate variable names and values + let name_re = Regex::new(r"^[a-z][a-z0-9_]*$").unwrap(); + for (name, value) in &options.variables { + if !name_re.is_match(name) { + return Err(DicecutError::DistillInvalidVarName(name.clone())); + } + if value.is_empty() { + return Err(DicecutError::DistillEmptyValue(name.clone())); + } + if value.contains('/') { + return Err(DicecutError::DistillSlashInValue(value.clone())); + } + } + + // Output dir must not exist unless --force + if options.output_dir.exists() && !options.force { + return Err(DicecutError::DistillOutputExists( + options.output_dir.display().to_string(), + )); + } + + Ok(()) +} + +/// Expand variables into DistillVariable entries and ReplacementRule entries. +/// +/// Returns (distill_vars, sorted_rules). +fn expand_variables( + variables: &[(String, String)], +) -> (Vec, Vec) { + let mut distill_vars = Vec::new(); + let mut rules: Vec = Vec::new(); + + for (name, value) in variables { + let variants = generate_variants(name, value); + + for variant in &variants { + rules.push(ReplacementRule { + literal: variant.literal.clone(), + replacement: variant.tera_expr.clone(), + variable: name.clone(), + variant: variant.name.to_string(), + }); + } + + distill_vars.push(DistillVariable { + name: name.clone(), + value_in_p0: value.clone(), + variants, + }); + } + + build_replacement_rules(&mut rules); + (distill_vars, rules) +} + +/// Process all aligned files: apply replacements to project[0]'s content, +/// determine template paths, and classify as Text/Binary/Static. +fn process_files( + aligned: &[intersect::AlignedFile], + rules: &[ReplacementRule], +) -> Vec { + let mut result = Vec::new(); + + for file in aligned { + if file.any_binary { + // Binary: copy raw bytes from project[0] + let bytes = file + .raw_bytes + .first() + .and_then(|b| b.clone()) + .unwrap_or_default(); + let template_path = apply_path_replacements(&file.relative_path, rules); + result.push(DistilledFile { + template_path, + content: DistilledContent::Binary(bytes), + }); + } else { + // Text: apply replacements to project[0]'s content + let p0_content = file + .contents + .first() + .and_then(|c| c.as_deref()) + .unwrap_or(""); + + let (rendered, count) = apply_replacements(p0_content, rules); + + // Warn about existing template expressions + if count > 0 { + let brace_count = rendered.matches("{{").count(); + if brace_count > count { + eprintln!(" warning: {} contains existing '{{{{}}}}' syntax that may need {{% raw %}} blocks", + file.relative_path.display()); + } + } + + let template_path = if count > 0 { + // File has replacements: add .die suffix + let mut path = apply_path_replacements(&file.relative_path, rules); + let new_name = format!( + "{}.die", + path.file_name() + .map(|n| n.to_string_lossy().into_owned()) + .unwrap_or_default() + ); + path.set_file_name(new_name); + path + } else { + apply_path_replacements(&file.relative_path, rules) + }; + + let content = if count > 0 { + DistilledContent::Text { + content: rendered, + replacement_count: count, + } + } else { + DistilledContent::Static(rendered) + }; + + result.push(DistilledFile { + template_path, + content, + }); + } + } + + result +} + +/// Generate diecut.toml content for the active variables. +fn generate_config(active_variables: &[DistillVariable], output_dir: &std::path::Path) -> String { + let template_name = output_dir + .file_name() + .map(|n| n.to_string_lossy().into_owned()) + .unwrap_or_else(|| "my-template".to_string()); + + let mut prompted = Vec::new(); + let mut computed = Vec::new(); + + for var in active_variables { + let canonical_sep = detect_separator(&var.value_in_p0); + + for variant in &var.variants { + if variant.name == "verbatim" || is_canonical_variant(variant.name, canonical_sep) { + // Canonical variant → prompted variable (no default value, user must supply it) + if !prompted + .iter() + .any(|p: &PromptedVariable| p.name == var.name) + { + prompted.push(PromptedVariable { + name: var.name.clone(), + default_value: String::new(), + prompt: format!( + "{} (e.g. {})", + var.name.replace('_', " "), + var.value_in_p0 + ), + }); + } + } else { + // Non-canonical variant → computed variable + let computed_name = format!("{}_{}", var.name, variant.name); + let expr = computed_expression(&var.name, variant.name, canonical_sep); + computed.push(ComputedVariable { + name: computed_name, + expression: expr, + }); + } + } + } + + generate_config_toml(&ConfigGenOptions { + template_name, + prompted_variables: prompted, + computed_variables: computed, + exclude_patterns: Vec::new(), + copy_without_render: Vec::new(), + conditional_entries: Vec::new(), + }) +} diff --git a/src/distill/validate.rs b/src/distill/validate.rs new file mode 100644 index 0000000..e706329 --- /dev/null +++ b/src/distill/validate.rs @@ -0,0 +1,179 @@ +use crate::extract::variants::CaseVariant; + +use super::intersect::AlignedFile; + +/// A distill variable candidate: its name, the value from project[0], and its case variants. +pub struct DistillVariable { + pub name: String, + pub value_in_p0: String, + pub variants: Vec, +} + +/// Check whether a variable actually varies across projects. +/// +/// Returns `true` if, for at least one non-binary aligned file: +/// - Any variant literal appears in project[0]'s content, AND +/// - At least one other project has different content for that file. +/// +/// Returns `false` if no file qualifies (variable is suppressed). +pub fn is_variable_active(var: &DistillVariable, aligned_files: &[AlignedFile]) -> bool { + for file in aligned_files { + // Skip binary files + if file.any_binary { + continue; + } + + // Get project[0]'s content; skip if absent + let Some(Some(p0_content)) = file.contents.first() else { + continue; + }; + + // Check if any variant literal appears in project[0]'s content + let literal_in_p0 = var.variants.iter().any(|v| p0_content.contains(&v.literal)); + + if !literal_in_p0 { + continue; + } + + // Check if at least one other project has different content for this file + let any_differs = file + .contents + .iter() + .skip(1) + .any(|c| c.as_deref() != Some(p0_content.as_str())); + + if any_differs { + return true; + } + } + + false +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use rstest::rstest; + + use super::*; + use crate::extract::variants::CaseVariant; + + fn make_variant(literal: &str) -> CaseVariant { + CaseVariant { + name: "verbatim", + literal: literal.to_string(), + tera_expr: format!("{{{{ var }}}}"), + } + } + + fn make_var(value: &str) -> DistillVariable { + DistillVariable { + name: "project_name".to_string(), + value_in_p0: value.to_string(), + variants: vec![make_variant(value)], + } + } + + fn make_file(contents: Vec>, any_binary: bool) -> AlignedFile { + AlignedFile { + relative_path: PathBuf::from("README.md"), + contents: contents + .into_iter() + .map(|c| c.map(|s| s.to_string())) + .collect(), + raw_bytes: vec![], + any_binary, + } + } + + #[test] + fn active_when_content_differs_across_projects() { + let var = make_var("my-app"); + let file = make_file( + vec![ + Some("# my-app\nA project."), + Some("# other-proj\nA project."), + ], + false, + ); + assert!(is_variable_active(&var, &[file])); + } + + #[test] + fn suppressed_when_all_content_identical() { + let var = make_var("my-app"); + let file = make_file( + vec![Some("# my-app\nA project."), Some("# my-app\nA project.")], + false, + ); + assert!(!is_variable_active(&var, &[file])); + } + + #[test] + fn suppressed_when_literal_not_in_p0() { + let var = make_var("my-app"); + // p0 does not contain "my-app" + let file = make_file( + vec![ + Some("# something-else\nNo match here."), + Some("# other-proj\nDifferent content."), + ], + false, + ); + assert!(!is_variable_active(&var, &[file])); + } + + #[rstest] + #[case( + vec![Some("# my-app"), Some("# my-app"), Some("# other-proj")], + true, + "active with 3 projects where only one differs" + )] + #[case( + vec![Some("# my-app"), Some("# my-app"), Some("# my-app")], + false, + "suppressed when all 3 identical" + )] + fn three_project_cases( + #[case] contents: Vec>, + #[case] expected: bool, + #[case] _label: &str, + ) { + let var = make_var("my-app"); + let file = make_file(contents, false); + assert_eq!(is_variable_active(&var, &[file]), expected); + } + + #[test] + fn binary_files_are_skipped() { + let var = make_var("my-app"); + // Mark the only file as binary — should not count even if content differs + let file = make_file( + vec![ + Some("# my-app\nA project."), + Some("# other-proj\nA project."), + ], + true, + ); + assert!(!is_variable_active(&var, &[file])); + } + + #[test] + fn active_when_one_file_qualifies_among_many() { + let var = make_var("my-app"); + let binary_file = make_file( + vec![Some("# my-app content"), Some("# other-proj content")], + true, // binary, skipped + ); + let no_literal_file = make_file(vec![Some("no match here"), Some("also no match")], false); + let qualifying_file = make_file( + vec![Some("project: my-app"), Some("project: other-proj")], + false, + ); + assert!(is_variable_active( + &var, + &[binary_file, no_literal_file, qualifying_file] + )); + } +} diff --git a/src/error.rs b/src/error.rs index 3ad5597..57bbf47 100644 --- a/src/error.rs +++ b/src/error.rs @@ -141,6 +141,29 @@ pub enum DicecutError { #[error("Directory already contains a diecut.toml: {path}")] #[diagnostic(help("This directory is already a diecut template"))] ExtractAlreadyTemplate { path: PathBuf }, + + #[error("at least 2 project directories are required for distill")] + #[diagnostic(help("Usage: diecut distill [project3...]"))] + DistillMinProjects, + + #[error("no common files found across all {0} projects")] + #[diagnostic(help( + "Try increasing --depth or check that the projects share a similar structure" + ))] + DistillNoCommonFiles(usize), + + #[error("invalid variable name '{0}': must match [a-z][a-z0-9_]*")] + DistillInvalidVarName(String), + + #[error("variable value must not contain '/' (got '{0}')")] + DistillSlashInValue(String), + + #[error("variable value must not be empty for '{0}'")] + DistillEmptyValue(String), + + #[error("output directory '{0}' already exists")] + #[diagnostic(help("Use --force to overwrite"))] + DistillOutputExists(String), } pub type Result = std::result::Result; diff --git a/src/extract/auto_detect.rs b/src/extract/auto_detect.rs index 193bbac..d37ccfd 100644 --- a/src/extract/auto_detect.rs +++ b/src/extract/auto_detect.rs @@ -1126,7 +1126,7 @@ mod tests { ) .unwrap(); - let scan = crate::extract::scan::scan_project(&project_dir, &[]).unwrap(); + let scan = crate::extract::scan::scan_project(&project_dir, &[], None).unwrap(); let result = auto_detect(&project_dir, &scan); assert!(!result.candidates.is_empty()); diff --git a/src/extract/mod.rs b/src/extract/mod.rs index f9d1318..cd1fffa 100644 --- a/src/extract/mod.rs +++ b/src/extract/mod.rs @@ -167,7 +167,7 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result { "\n{}", style(format!("Scanning {}...", source_dir.display())).bold() ); - let mut scan_result = scan_project(source_dir, &scan_excludes)?; + let mut scan_result = scan_project(source_dir, &scan_excludes, None)?; // Drop non-boilerplate files deeper than stub_depth before auto-detect sees them. // This prevents frequency analysis from detecting variables that only appear in diff --git a/src/extract/scan.rs b/src/extract/scan.rs index 544aa87..b90611a 100644 --- a/src/extract/scan.rs +++ b/src/extract/scan.rs @@ -28,7 +28,15 @@ pub struct ScanResult { /// Scan a project directory, applying exclude patterns. /// /// Returns all non-excluded files with their content loaded (for text files). -pub fn scan_project(project_dir: &Path, excludes: &[String]) -> crate::error::Result { +/// +/// `max_depth` limits directory traversal depth relative to `project_dir`. +/// `Some(0)` returns only top-level files; `Some(1)` includes one level of +/// subdirectories; `None` is unlimited. +pub fn scan_project( + project_dir: &Path, + excludes: &[String], + max_depth: Option, +) -> crate::error::Result { let project_dir = project_dir .canonicalize() .map_err(|e| crate::error::DicecutError::Io { @@ -39,7 +47,11 @@ pub fn scan_project(project_dir: &Path, excludes: &[String]) -> crate::error::Re let mut files = Vec::new(); let mut excluded_count = 0; - for entry in WalkDir::new(&project_dir).min_depth(1) { + let mut walker = WalkDir::new(&project_dir).min_depth(1); + if let Some(depth) = max_depth { + walker = walker.max_depth(depth + 1); + } + for entry in walker { let entry = entry.map_err(|e| crate::error::DicecutError::Io { context: format!("walking project directory: {}", e), source: e @@ -109,7 +121,7 @@ mod tests { std::fs::create_dir(dir.path().join("src")).unwrap(); std::fs::write(dir.path().join("src/main.rs"), "fn main() {}").unwrap(); - let result = scan_project(dir.path(), &[]).unwrap(); + let result = scan_project(dir.path(), &[], None).unwrap(); assert_eq!(result.files.len(), 2); assert_eq!(result.excluded_count, 0); } @@ -122,7 +134,7 @@ mod tests { std::fs::write(dir.path().join(".git/config"), "").unwrap(); let excludes = vec![".git".to_string()]; - let result = scan_project(dir.path(), &excludes).unwrap(); + let result = scan_project(dir.path(), &excludes, None).unwrap(); assert_eq!(result.files.len(), 1); assert_eq!(result.excluded_count, 1); assert_eq!(result.files[0].relative_path, PathBuf::from("README.md")); @@ -140,7 +152,7 @@ mod tests { std::fs::write(subdir.join("nested.txt"), "nested").unwrap(); std::os::unix::fs::symlink(&subdir, dir.path().join("link-to-dir")).unwrap(); - let result = scan_project(dir.path(), &[]).unwrap(); + let result = scan_project(dir.path(), &[], None).unwrap(); // Should find real.txt and subdir/nested.txt, but NOT choke on link-to-dir let paths: Vec = result .files @@ -162,7 +174,7 @@ mod tests { ) .unwrap(); - let result = scan_project(dir.path(), &[]).unwrap(); + let result = scan_project(dir.path(), &[], None).unwrap(); let text_file = result .files .iter() @@ -179,4 +191,42 @@ mod tests { assert!(binary_file.is_binary); assert!(binary_file.content.is_none()); } + + #[test] + fn scan_project_respects_max_depth() { + let dir = Path::new("tests/fixtures/distill-project-a"); + let excludes = vec![]; + + // depth 0: only top-level files + let result = scan_project(dir, &excludes, Some(0)).unwrap(); + let paths: Vec = result + .files + .iter() + .map(|f| f.relative_path.to_string_lossy().to_string()) + .collect(); + assert!( + !paths.iter().any(|p| p.contains("src/")), + "depth 0 should not include src/ files" + ); + assert!( + !paths.iter().any(|p| p.contains("assets/")), + "depth 0 should not include assets/ files" + ); + + // depth 1: top-level + one level of subdirectories + let result = scan_project(dir, &excludes, Some(1)).unwrap(); + let paths: Vec = result + .files + .iter() + .map(|f| f.relative_path.to_string_lossy().to_string()) + .collect(); + assert!( + paths.iter().any(|p| p.contains("src/")), + "depth 1 should include src/ files" + ); + + // None: unlimited depth + let result = scan_project(dir, &excludes, None).unwrap(); + assert!(!result.files.is_empty()); + } } diff --git a/src/lib.rs b/src/lib.rs index 4091828..0254dde 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ pub mod adapter; pub mod answers; pub mod config; +pub mod distill; pub mod error; pub mod extract; pub mod hooks; diff --git a/src/main.rs b/src/main.rs index 11dec94..65dc4e1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,6 +18,14 @@ fn main() -> miette::Result<()> { } => commands::new::run( template, output, data, defaults, overwrite, no_hooks, dry_run, verbose, ), + Commands::Distill { + projects, + vars, + output, + depth, + dry_run, + force, + } => commands::distill::run(projects, vars, output, depth, dry_run, force), Commands::List => commands::list::run(), Commands::Extract { source, diff --git a/tests/fixtures/distill-project-a/.gitignore b/tests/fixtures/distill-project-a/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/tests/fixtures/distill-project-a/.gitignore @@ -0,0 +1 @@ +/target diff --git a/tests/fixtures/distill-project-a/Cargo.toml b/tests/fixtures/distill-project-a/Cargo.toml new file mode 100644 index 0000000..205f2f9 --- /dev/null +++ b/tests/fixtures/distill-project-a/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "my-tool" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = "4" diff --git a/tests/fixtures/distill-project-a/README.md b/tests/fixtures/distill-project-a/README.md new file mode 100644 index 0000000..27236b1 --- /dev/null +++ b/tests/fixtures/distill-project-a/README.md @@ -0,0 +1,3 @@ +# my-tool + +A CLI tool by Alice. diff --git a/tests/fixtures/distill-project-a/assets/logo.png b/tests/fixtures/distill-project-a/assets/logo.png new file mode 100644 index 0000000..2371c64 --- /dev/null +++ b/tests/fixtures/distill-project-a/assets/logo.png @@ -0,0 +1 @@ +‰PNG \ No newline at end of file diff --git a/tests/fixtures/distill-project-a/src/main.rs b/tests/fixtures/distill-project-a/src/main.rs new file mode 100644 index 0000000..d84232f --- /dev/null +++ b/tests/fixtures/distill-project-a/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello from my-tool!"); +} diff --git a/tests/fixtures/distill-project-b/.gitignore b/tests/fixtures/distill-project-b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/tests/fixtures/distill-project-b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/tests/fixtures/distill-project-b/Cargo.toml b/tests/fixtures/distill-project-b/Cargo.toml new file mode 100644 index 0000000..f954183 --- /dev/null +++ b/tests/fixtures/distill-project-b/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "other-tool" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = "4" diff --git a/tests/fixtures/distill-project-b/README.md b/tests/fixtures/distill-project-b/README.md new file mode 100644 index 0000000..8e64ccd --- /dev/null +++ b/tests/fixtures/distill-project-b/README.md @@ -0,0 +1,3 @@ +# other-tool + +A CLI tool by Bob. diff --git a/tests/fixtures/distill-project-b/assets/logo.png b/tests/fixtures/distill-project-b/assets/logo.png new file mode 100644 index 0000000..2371c64 --- /dev/null +++ b/tests/fixtures/distill-project-b/assets/logo.png @@ -0,0 +1 @@ +‰PNG \ No newline at end of file diff --git a/tests/fixtures/distill-project-b/src/main.rs b/tests/fixtures/distill-project-b/src/main.rs new file mode 100644 index 0000000..3195fa5 --- /dev/null +++ b/tests/fixtures/distill-project-b/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello from other-tool!"); +} diff --git a/tests/fixtures/distill-project-c/Cargo.toml b/tests/fixtures/distill-project-c/Cargo.toml new file mode 100644 index 0000000..0c5e66d --- /dev/null +++ b/tests/fixtures/distill-project-c/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "third-thing" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = "4" diff --git a/tests/fixtures/distill-project-c/LICENSE b/tests/fixtures/distill-project-c/LICENSE new file mode 100644 index 0000000..8ab70c0 --- /dev/null +++ b/tests/fixtures/distill-project-c/LICENSE @@ -0,0 +1 @@ +MIT \ No newline at end of file diff --git a/tests/fixtures/distill-project-c/README.md b/tests/fixtures/distill-project-c/README.md new file mode 100644 index 0000000..9ac2f66 --- /dev/null +++ b/tests/fixtures/distill-project-c/README.md @@ -0,0 +1,3 @@ +# third-thing + +A CLI tool by Carol. diff --git a/tests/fixtures/distill-project-c/src/main.rs b/tests/fixtures/distill-project-c/src/main.rs new file mode 100644 index 0000000..6c2fb80 --- /dev/null +++ b/tests/fixtures/distill-project-c/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello from third-thing!"); +} diff --git a/tests/integration.rs b/tests/integration.rs index bee61fc..7adaa0e 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -1105,3 +1105,259 @@ fn test_extract_min_confidence_filters() { "high min_confidence should filter out all candidates" ); } + +// ── Distill command tests ───────────────────────────────────────────────────── + +use diecut::distill::{execute_distill, plan_distill, DistillOptions}; + +#[test] +fn distill_two_projects_produces_template() { + let output = tempfile::tempdir().unwrap(); + let output_dir = output.path().join("my-cli-template"); + + let options = DistillOptions { + projects: vec![ + fixture_path("distill-project-a"), + fixture_path("distill-project-b"), + ], + variables: vec![("project_name".to_string(), "my-tool".to_string())], + output_dir: output_dir.clone(), + max_depth: None, + dry_run: false, + force: false, + }; + + let plan = plan_distill(options).unwrap(); + + // project_name should be active (my-tool vs other-tool differ) + assert!( + plan.active_variables + .iter() + .any(|v| v.name == "project_name"), + "project_name should be active" + ); + assert!( + plan.suppressed_variables.is_empty(), + "no variables should be suppressed, got: {:?}", + plan.suppressed_variables + .iter() + .map(|(n, _)| n.as_str()) + .collect::>() + ); + + // Common files: README.md, Cargo.toml, src/main.rs, .gitignore, assets/logo.png + assert!(!plan.files.is_empty(), "plan should have files"); + + let file_paths: Vec = plan + .files + .iter() + .map(|f| f.template_path.to_string_lossy().to_string()) + .collect(); + + // README.md has "my-tool" -> gets .die suffix + assert!( + file_paths + .iter() + .any(|p| p.contains("README") && p.ends_with(".die")), + "README.md with replacements should have .die suffix, got: {file_paths:?}" + ); + + // config_toml should contain project_name variable + assert!(plan.config_toml.contains("[variables.project_name]")); + + // Execute and verify output structure + execute_distill(&plan).unwrap(); + + assert!( + output_dir.join("diecut.toml").exists(), + "diecut.toml should be written" + ); + assert!( + output_dir.join("template").exists(), + "template/ directory should be created" + ); +} + +#[test] +fn distill_suppresses_static_variable() { + let output = tempfile::tempdir().unwrap(); + let output_dir = output.path().join("out"); + + // "edition=2021" — "2021" appears in both Cargo.toml files identically and files with + // that literal do not vary between projects in a way that depends on this variable, + // BUT more directly: "missing-value" does not appear in any shared file at all, + // so the variable literal is never found in p0 content → suppressed. + let options = DistillOptions { + projects: vec![ + fixture_path("distill-project-a"), + fixture_path("distill-project-b"), + ], + variables: vec![ + ("project_name".to_string(), "my-tool".to_string()), + ("ghost_var".to_string(), "notpresent".to_string()), + ], + output_dir, + max_depth: None, + dry_run: true, + force: false, + }; + + let plan = plan_distill(options).unwrap(); + + assert!( + plan.active_variables + .iter() + .any(|v| v.name == "project_name"), + "project_name should be active" + ); + assert!( + plan.suppressed_variables.iter().any(|(n, _)| n == "ghost_var"), + "ghost_var should be suppressed (literal 'notpresent' not in any shared file), suppressed: {:?}, active: {:?}", + plan.suppressed_variables, + plan.active_variables.iter().map(|v| &v.name).collect::>() + ); +} + +#[test] +fn distill_three_projects_intersection() { + let output = tempfile::tempdir().unwrap(); + let output_dir = output.path().join("out"); + + // project-c has no .gitignore and no assets/logo.png, and has a LICENSE + // so the intersection should only include: README.md, Cargo.toml, src/main.rs + let options = DistillOptions { + projects: vec![ + fixture_path("distill-project-a"), + fixture_path("distill-project-b"), + fixture_path("distill-project-c"), + ], + variables: vec![("project_name".to_string(), "my-tool".to_string())], + output_dir, + max_depth: None, + dry_run: true, + force: false, + }; + + let plan = plan_distill(options).unwrap(); + + let file_paths: Vec = plan + .files + .iter() + .map(|f| f.template_path.to_string_lossy().to_string()) + .collect(); + + // LICENSE is only in project-c, should NOT be in intersection + assert!( + !file_paths.iter().any(|p| p.contains("LICENSE")), + "LICENSE should not be in output (only in project-c), got: {file_paths:?}" + ); + + // .gitignore is only in project-a and project-b, NOT in project-c + assert!( + !file_paths.iter().any(|p| p.contains(".gitignore")), + ".gitignore should not be in output (not in project-c), got: {file_paths:?}" + ); + + // assets/logo.png is only in project-a and project-b, NOT in project-c + assert!( + !file_paths.iter().any(|p| p.contains("logo.png")), + "assets/logo.png should not be in output (not in project-c), got: {file_paths:?}" + ); + + // README.md, Cargo.toml, src/main.rs ARE in all three + assert!( + file_paths.iter().any(|p| p.contains("README")), + "README.md should be in output (present in all 3), got: {file_paths:?}" + ); + assert!( + file_paths.iter().any(|p| p.contains("Cargo.toml")), + "Cargo.toml should be in output (present in all 3), got: {file_paths:?}" + ); + assert!( + file_paths.iter().any(|p| p.contains("main.rs")), + "src/main.rs should be in output (present in all 3), got: {file_paths:?}" + ); +} + +#[test] +fn distill_errors_with_one_project() { + let output = tempfile::tempdir().unwrap(); + let output_dir = output.path().join("out"); + + let options = DistillOptions { + projects: vec![fixture_path("distill-project-a")], + variables: vec![("project_name".to_string(), "my-tool".to_string())], + output_dir, + max_depth: None, + dry_run: true, + force: false, + }; + + let result = plan_distill(options); + assert!(result.is_err(), "should fail with only 1 project"); + + let err_str = result.err().unwrap().to_string(); + assert!( + err_str.contains("2") || err_str.contains("least"), + "error should mention requiring at least 2 projects, got: {err_str}" + ); +} + +#[test] +fn distill_then_new_produces_working_project() { + let project_a = fixture_path("distill-project-a"); + let project_b = fixture_path("distill-project-b"); + let template_dir = tempfile::tempdir().unwrap(); + let output_dir = tempfile::tempdir().unwrap(); + + // Step 1: Distill a template from the two projects + let plan = plan_distill(DistillOptions { + projects: vec![project_a.clone(), project_b.clone()], + variables: vec![("project_name".to_string(), "my-tool".to_string())], + output_dir: template_dir.path().to_path_buf(), + max_depth: None, + dry_run: false, + force: true, + }) + .unwrap(); + execute_distill(&plan).unwrap(); + + // Verify diecut.toml exists and contains the variable + let config_content = std::fs::read_to_string(template_dir.path().join("diecut.toml")).unwrap(); + assert!( + config_content.contains("project_name"), + "diecut.toml should contain project_name variable" + ); + + // Step 2: Generate a new project from the distilled template + let new_project_path = output_dir.path().join("new-project"); + let gen_result = diecut::generate(diecut::GenerateOptions { + template: template_dir.path().to_string_lossy().to_string(), + output: Some(new_project_path.to_string_lossy().to_string()), + data: vec![("project_name".to_string(), "brand-new-tool".to_string())], + defaults: true, + overwrite: false, + no_hooks: true, + }); + assert!( + gen_result.is_ok(), + "diecut new failed: {:?}", + gen_result.err() + ); + + // Step 3: Verify the generated project has correct substitutions in README.md + let readme_path = new_project_path.join("README.md"); + assert!( + readme_path.exists(), + "README.md should exist in generated project" + ); + let readme = std::fs::read_to_string(&readme_path).unwrap(); + assert!( + readme.contains("brand-new-tool"), + "README should contain new project name, got: {readme}" + ); + assert!( + !readme.contains("my-tool"), + "README should not contain old project name, got: {readme}" + ); +}