From 100695c02d1da1485887d85bcebc522432ed269f Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Fri, 27 Feb 2026 00:31:48 -0500
Subject: [PATCH 01/29] feat: add `diecut extract` command to create templates
 from existing projects
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Automates the biggest friction point in diecut: turning an existing project
into a reusable template. Point it at a project, tell it which values are
variables, and it produces a ready-to-use template with diecut.toml, .die
suffixed files, and computed case variants.

Key capabilities:
- Auto-detects case variants (kebab, snake, PascalCase, SCREAMING_SNAKE, etc.)
- Longest-match-first replacement prevents overlapping value corruption
- Templates path components (my-app/src/ → {{ project_name }}/src/)
- Detects conditional files (.github/, Dockerfile, etc.) for optional inclusion
- Interactive by default with --batch for CI/scripting
- --dry-run to preview without writing
- Generates commented diecut.toml with prompted + computed variables
---
 src/cli.rs                 |  26 ++
 src/commands/extract.rs    |  95 ++++++
 src/commands/mod.rs        |   1 +
 src/error.rs               |  20 ++
 src/extract/conditional.rs | 170 +++++++++
 src/extract/config_gen.rs  | 206 +++++++++++
 src/extract/exclude.rs     | 216 ++++++++++++
 src/extract/mod.rs         | 682 +++++++++++++++++++++++++++++++++++++
 src/extract/replace.rs     | 143 ++++++++
 src/extract/scan.rs        | 147 ++++++++
 src/extract/variants.rs    | 329 ++++++++++++++++++
 src/lib.rs                 |   1 +
 src/main.rs                |   8 +
 tests/integration.rs       | 293 ++++++++++++++++
 14 files changed, 2337 insertions(+)
 create mode 100644 src/commands/extract.rs
 create mode 100644 src/extract/conditional.rs
 create mode 100644 src/extract/config_gen.rs
 create mode 100644 src/extract/exclude.rs
 create mode 100644 src/extract/mod.rs
 create mode 100644 src/extract/replace.rs
 create mode 100644 src/extract/scan.rs
 create mode 100644 src/extract/variants.rs
diff --git a/src/cli.rs b/src/cli.rs
index ab84986..a051ff6 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -49,4 +49,30 @@ pub enum Commands {
 
     /// List cached templates
     List,
+
+    /// Extract a template from an existing project
+    Extract {
+        /// Source project directory
+        source: String,
+
+        /// Variable values to templatize (can be repeated: --var key=value)
+        #[arg(long = "var", value_name = "KEY=VALUE")]
+        vars: Vec<String>,
+
+        /// Output directory for the extracted template
+        #[arg(short, long)]
+        output: Option<String>,
+
+        /// Convert the source directory in-place
+        #[arg(long)]
+        in_place: bool,
+
+        /// Skip all interactive prompts
+        #[arg(long)]
+        batch: bool,
+
+        /// Show what would be extracted without writing files
+        #[arg(long)]
+        dry_run: bool,
+    },
 }
diff --git a/src/commands/extract.rs b/src/commands/extract.rs
new file mode 100644
index 0000000..86fee13
--- /dev/null
+++ b/src/commands/extract.rs
@@ -0,0 +1,95 @@
+use std::path::PathBuf;
+
+use console::style;
+
+use diecut::error::DicecutError;
+use diecut::extract::{execute_extraction, plan_extraction, ExtractOptions};
+use miette::Result;
+
+pub fn run(
+    source: String,
+    vars: Vec<String>,
+    output: Option<String>,
+    in_place: bool,
+    batch: bool,
+    dry_run: bool,
+) -> Result<()> {
+    let variables = parse_vars(&vars)?;
+
+    let options = ExtractOptions {
+        source_dir: PathBuf::from(&source),
+        variables,
+        output_dir: output.map(PathBuf::from),
+        in_place,
+        batch,
+        dry_run,
+    };
+
+    let plan = plan_extraction(&options)?;
+
+    if dry_run {
+        print_dry_run(&plan);
+        return Ok(());
+    }
+
+    execute_extraction(&plan, in_place)?;
+
+    Ok(())
+}
+
+fn parse_vars(vars: &[String]) -> diecut::error::Result<Vec<(String, String)>> {
+    let mut parsed = Vec::new();
+
+    for var in vars {
+        let (key, value) = var
+            .split_once('=')
+            .ok_or_else(|| DicecutError::ExtractNoVariables)?;
+        parsed.push((key.trim().to_string(), value.trim().to_string()));
+    }
+
+    Ok(parsed)
+}
+
+fn print_dry_run(plan: &diecut::extract::ExtractionPlan) {
+    eprintln!(
+        "\n{} Dry run — no files will be written\n",
+        style("⚡").yellow().bold()
+    );
+
+    eprintln!(
+        "Output directory: {}",
+        style(plan.output_dir.display()).cyan()
+    );
+
+    let templated: Vec<_> = plan.files.iter().filter(|f| f.has_replacements).collect();
+    let copied: Vec<_> = plan.files.iter().filter(|f| !f.has_replacements).collect();
+
+    eprintln!("\nTemplated files ({}):", templated.len());
+    for file in &templated {
+        eprintln!(
+            "  {} ({} replacements)",
+            file.template_path.display(),
+            file.replacement_count
+        );
+    }
+
+    eprintln!("\nCopied verbatim ({}):", copied.len());
+    for file in &copied {
+        eprintln!("  {}", file.template_path.display());
+    }
+
+    eprintln!("\nVariables:");
+    for var in &plan.variables {
+        eprintln!("  {} = {:?}", var.name, var.value);
+        for variant in &var.variants {
+            if variant.name != "verbatim" {
+                eprintln!("    {} → {}", variant.name, variant.literal);
+            }
+        }
+    }
+
+    eprintln!("\nGenerated diecut.toml:");
+    eprintln!("{}", style("─".repeat(60)).dim());
+    eprint!("{}", plan.config_toml);
+    eprintln!("{}", style("─".repeat(60)).dim());
+}
diff --git a/src/commands/mod.rs b/src/commands/mod.rs
index 33661b9..8c884a4 100644
--- a/src/commands/mod.rs
+++ b/src/commands/mod.rs
@@ -1,2 +1,3 @@
+pub mod extract;
 pub mod list;
 pub mod new;
diff --git a/src/error.rs b/src/error.rs
index 834b7d2..d6a4ee4 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -117,6 +117,26 @@ pub enum DicecutError {
         #[source]
         source: toml::de::Error,
     },
+
+    #[error("Source directory not found: {path}")]
+    #[diagnostic(help("Provide the path to an existing project directory"))]
+    ExtractSourceNotFound { path: PathBuf },
+
+    #[error("No variables provided for extraction")]
+    #[diagnostic(help(
+        "Use --var key=value to specify which values should become template variables"
+    ))]
+    ExtractNoVariables,
+
+    #[error("Output directory already exists: {path}")]
+    #[diagnostic(help(
+        "Choose a different output path with -o, or remove the existing directory"
+    ))]
+    ExtractOutputExists { path: PathBuf },
+
+    #[error("Directory already contains a diecut.toml: {path}")]
+    #[diagnostic(help("This directory is already a diecut template"))]
+    ExtractAlreadyTemplate { path: PathBuf },
 }
 
 pub type Result<T> = std::result::Result<T, DicecutError>;
diff --git a/src/extract/conditional.rs b/src/extract/conditional.rs
new file mode 100644
index 0000000..67e7346
--- /dev/null
+++ b/src/extract/conditional.rs
@@ -0,0 +1,170 @@
+use std::path::Path;
+
+/// A known optional file pattern that can be made conditional in the template.
+#[derive(Debug, Clone)]
+pub struct ConditionalPattern {
+    /// Glob pattern to match files.
+    pub pattern: &'static str,
+    /// Variable name to control inclusion.
+    pub variable: &'static str,
+    /// Human-readable description.
+    pub description: &'static str,
+}
+
+/// Curated list of known optional file patterns.
+const KNOWN_PATTERNS: &[ConditionalPattern] = &[
+    ConditionalPattern {
+        pattern: ".github/**",
+        variable: "use_github_actions",
+        description: "GitHub Actions CI",
+    },
+    ConditionalPattern {
+        pattern: ".gitlab-ci.yml",
+        variable: "use_gitlab_ci",
+        description: "GitLab CI",
+    },
+    ConditionalPattern {
+        pattern: "Dockerfile",
+        variable: "use_docker",
+        description: "Docker support",
+    },
+    ConditionalPattern {
+        pattern: "docker-compose.yml",
+        variable: "use_docker",
+        description: "Docker support",
+    },
+    ConditionalPattern {
+        pattern: "docker-compose.yaml",
+        variable: "use_docker",
+        description: "Docker support",
+    },
+    ConditionalPattern {
+        pattern: ".pre-commit-config.yaml",
+        variable: "use_pre_commit",
+        description: "Pre-commit hooks",
+    },
+    ConditionalPattern {
+        pattern: "Makefile",
+        variable: "use_make",
+        description: "Make build system",
+    },
+    ConditionalPattern {
+        pattern: "Justfile",
+        variable: "use_just",
+        description: "Just command runner",
+    },
+    ConditionalPattern {
+        pattern: ".editorconfig",
+        variable: "use_editorconfig",
+        description: "EditorConfig",
+    },
+    ConditionalPattern {
+        pattern: "renovate.json",
+        variable: "use_renovate",
+        description: "Renovate dependency updates",
+    },
+    ConditionalPattern {
+        pattern: ".renovaterc",
+        variable: "use_renovate",
+        description: "Renovate dependency updates",
+    },
+    ConditionalPattern {
+        pattern: ".github/dependabot.yml",
+        variable: "use_dependabot",
+        description: "Dependabot",
+    },
+    ConditionalPattern {
+        pattern: ".husky/**",
+        variable: "use_husky",
+        description: "Git hooks (JS)",
+    },
+];
+
+/// A detected conditional file in the project.
+#[derive(Debug, Clone)]
+pub struct DetectedConditional {
+    /// The pattern that matched.
+    pub pattern: String,
+    /// The variable name to control this pattern.
+    pub variable: String,
+    /// Human-readable description.
+    pub description: String,
+}
+
+/// Detect which known optional file patterns exist in the project.
+///
+/// Groups by variable name — e.g., multiple Docker files share `use_docker`.
+pub fn detect_conditional_files(project_dir: &Path) -> Vec<DetectedConditional> {
+    let mut detected = Vec::new();
+    let mut seen_variables = std::collections::HashSet::new();
+
+    for known in KNOWN_PATTERNS {
+        let exists = if known.pattern.contains("**") {
+            // Directory pattern — check if the directory exists
+            let dir_part = known.pattern.split("/**").next().unwrap_or(known.pattern);
+            project_dir.join(dir_part).exists()
+        } else {
+            project_dir.join(known.pattern).exists()
+        };
+
+        if exists && seen_variables.insert(known.variable) {
+            detected.push(DetectedConditional {
+                pattern: known.pattern.to_string(),
+                variable: known.variable.to_string(),
+                description: known.description.to_string(),
+            });
+        }
+    }
+
+    detected
+}
+
+/// Get all patterns for a given variable name from the known patterns list.
+pub fn patterns_for_variable(variable: &str) -> Vec<&'static str> {
+    KNOWN_PATTERNS
+        .iter()
+        .filter(|p| p.variable == variable)
+        .map(|p| p.pattern)
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_detect_conditional_files_github() {
+        let dir = tempfile::tempdir().unwrap();
+        std::fs::create_dir_all(dir.path().join(".github/workflows")).unwrap();
+
+        let detected = detect_conditional_files(dir.path());
+        assert_eq!(detected.len(), 1);
+        assert_eq!(detected[0].variable, "use_github_actions");
+    }
+
+    #[test]
+    fn test_detect_conditional_files_docker() {
+        let dir = tempfile::tempdir().unwrap();
+        std::fs::write(dir.path().join("Dockerfile"), "FROM alpine").unwrap();
+        std::fs::write(dir.path().join("docker-compose.yml"), "version: '3'").unwrap();
+
+        let detected = detect_conditional_files(dir.path());
+        // Should deduplicate by variable name
+        assert_eq!(detected.len(), 1);
+        assert_eq!(detected[0].variable, "use_docker");
+    }
+
+    #[test]
+    fn test_detect_conditional_files_empty() {
+        let dir = tempfile::tempdir().unwrap();
+        let detected = detect_conditional_files(dir.path());
+        assert!(detected.is_empty());
+    }
+
+    #[test]
+    fn test_patterns_for_variable() {
+        let docker_patterns = patterns_for_variable("use_docker");
+        assert!(docker_patterns.contains(&"Dockerfile"));
+        assert!(docker_patterns.contains(&"docker-compose.yml"));
+    }
+}
diff --git a/src/extract/config_gen.rs b/src/extract/config_gen.rs
new file mode 100644
index 0000000..5fd8222
--- /dev/null
+++ b/src/extract/config_gen.rs
@@ -0,0 +1,206 @@
+/// A prompted variable entry for the generated config.
+pub struct PromptedVariable {
+    pub name: String,
+    pub default_value: String,
+    pub prompt: String,
+}
+
+/// A computed variable entry for the generated config.
+pub struct ComputedVariable {
+    pub name: String,
+    pub expression: String,
+}
+
+/// A conditional file entry for the generated config.
+#[derive(Debug, Clone)]
+pub struct ConditionalEntry {
+    pub patterns: Vec<String>,
+    pub variable: String,
+    pub description: String,
+}
+
+/// Options for generating the diecut.toml config file.
+pub struct ConfigGenOptions {
+    pub template_name: String,
+    pub prompted_variables: Vec<PromptedVariable>,
+    pub computed_variables: Vec<ComputedVariable>,
+    pub exclude_patterns: Vec<String>,
+    pub copy_without_render: Vec<String>,
+    pub conditional_entries: Vec<ConditionalEntry>,
+}
+
+/// Generate a diecut.toml config string with comments for readability.
+///
+/// Uses manual TOML string building because the `toml` crate can't serialize comments,
+/// and users need to read and edit this file.
+pub fn generate_config_toml(options: &ConfigGenOptions) -> String {
+    let mut out = String::new();
+
+    // [template] section
+    out.push_str("[template]\n");
+    out.push_str(&format!(
+        "name = {}\n",
+        escape_toml_string(&options.template_name)
+    ));
+    out.push_str("version = \"1.0.0\"\n");
+    out.push_str("# description = \"A project template\"\n");
+    out.push('\n');
+
+    // [variables] section — prompted variables first
+    if !options.prompted_variables.is_empty() || !options.computed_variables.is_empty() {
+        out.push_str("# ── Variables ──────────────────────────────────────────\n");
+        out.push_str("# Prompted variables are asked during `diecut new`.\n");
+        out.push_str("# Computed variables are auto-derived and never prompted.\n");
+        out.push('\n');
+    }
+
+    for var in &options.prompted_variables {
+        out.push_str(&format!("[variables.{}]\n", var.name));
+        out.push_str("type = \"string\"\n");
+        out.push_str(&format!("prompt = {}\n", escape_toml_string(&var.prompt)));
+        out.push_str(&format!(
+            "default = {}\n",
+            escape_toml_string(&var.default_value)
+        ));
+        out.push('\n');
+    }
+
+    // Conditional file boolean variables
+    for entry in &options.conditional_entries {
+        out.push_str(&format!("# {} ({})\n", entry.variable, entry.description));
+        out.push_str(&format!("[variables.{}]\n", entry.variable));
+        out.push_str("type = \"bool\"\n");
+        out.push_str(&format!(
+            "prompt = {}\n",
+            escape_toml_string(&format!("Include {}?", entry.description.to_lowercase()))
+        ));
+        out.push_str("default = true\n");
+        out.push('\n');
+    }
+
+    // Computed variables
+    for var in &options.computed_variables {
+        out.push_str(&format!("[variables.{}]\n", var.name));
+        out.push_str("type = \"string\"\n");
+        out.push_str(&format!(
+            "computed = {}\n",
+            escape_toml_string(&var.expression)
+        ));
+        out.push('\n');
+    }
+
+    // [files] section
+    out.push_str("# ── Files ─────────────────────────────────────────────\n");
+    out.push_str("[files]\n");
+
+    if !options.exclude_patterns.is_empty() {
+        out.push_str("exclude = [\n");
+        for pattern in &options.exclude_patterns {
+            out.push_str(&format!("    {},\n", escape_toml_string(pattern)));
+        }
+        out.push_str("]\n");
+    }
+
+    if !options.copy_without_render.is_empty() {
+        out.push_str("copy_without_render = [\n");
+        for pattern in &options.copy_without_render {
+            out.push_str(&format!("    {},\n", escape_toml_string(pattern)));
+        }
+        out.push_str("]\n");
+    }
+
+    out.push('\n');
+
+    // [[files.conditional]] entries
+    for entry in &options.conditional_entries {
+        for pattern in &entry.patterns {
+            out.push_str(&format!("# {}\n", entry.description));
+            out.push_str("[[files.conditional]]\n");
+            out.push_str(&format!("pattern = {}\n", escape_toml_string(pattern)));
+            out.push_str(&format!("when = {}\n", escape_toml_string(&entry.variable)));
+            out.push('\n');
+        }
+    }
+
+    // [hooks] section
+    out.push_str("# ── Hooks ─────────────────────────────────────────────\n");
+    out.push_str("# [hooks]\n");
+    out.push_str("# post_create = \"echo 'Project created!'\"\n");
+
+    out
+}
+
+/// Escape a string for TOML output.
+fn escape_toml_string(s: &str) -> String {
+    toml::Value::String(s.to_string()).to_string()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_generate_config_basic() {
+        let options = ConfigGenOptions {
+            template_name: "my-template".to_string(),
+            prompted_variables: vec![PromptedVariable {
+                name: "project_name".to_string(),
+                default_value: "my-app".to_string(),
+                prompt: "Project name".to_string(),
+            }],
+            computed_variables: vec![ComputedVariable {
+                name: "project_name_snake".to_string(),
+                expression: "project_name | replace(from=\"-\", to=\"_\")".to_string(),
+            }],
+            exclude_patterns: vec![".git/".to_string()],
+            copy_without_render: vec!["*.png".to_string()],
+            conditional_entries: vec![],
+        };
+
+        let toml = generate_config_toml(&options);
+
+        assert!(toml.contains("[template]"));
+        assert!(toml.contains("name = \"my-template\""));
+        assert!(toml.contains("[variables.project_name]"));
+        assert!(toml.contains("type = \"string\""));
+        assert!(toml.contains("[variables.project_name_snake]"));
+        assert!(toml.contains("computed ="));
+        assert!(toml.contains("[files]"));
+        assert!(toml.contains("\".git/\""));
+        assert!(toml.contains("\"*.png\""));
+    }
+
+    #[test]
+    fn test_generate_config_with_conditionals() {
+        let options = ConfigGenOptions {
+            template_name: "test".to_string(),
+            prompted_variables: vec![],
+            computed_variables: vec![],
+            exclude_patterns: vec![],
+            copy_without_render: vec![],
+            conditional_entries: vec![ConditionalEntry {
+                patterns: vec![".github/**".to_string()],
+                variable: "use_github_actions".to_string(),
+                description: "GitHub Actions CI".to_string(),
+            }],
+        };
+
+        let toml = generate_config_toml(&options);
+
+        assert!(toml.contains("[variables.use_github_actions]"));
+        assert!(toml.contains("type = \"bool\""));
+        assert!(toml.contains("default = true"));
+        assert!(toml.contains("[[files.conditional]]"));
+        assert!(toml.contains("pattern = \".github/**\""));
+        assert!(toml.contains("when = \"use_github_actions\""));
+    }
+
+    #[test]
+    fn test_escape_toml_string() {
+        assert_eq!(escape_toml_string("hello"), "\"hello\"");
+        // toml crate uses multi-line strings for values containing quotes
+        let escaped = escape_toml_string("it's \"fine\"");
+        assert!(escaped.contains("it's"));
+        assert!(escaped.contains("fine"));
+    }
+}
diff --git a/src/extract/exclude.rs b/src/extract/exclude.rs
new file mode 100644
index 0000000..8c4c082
--- /dev/null
+++ b/src/extract/exclude.rs
@@ -0,0 +1,216 @@
+use std::path::Path;
+
+/// Default directories and files to exclude from template extraction.
+const DEFAULT_EXCLUDES: &[&str] = &[
+    ".git",
+    ".git/",
+    ".hg",
+    ".svn",
+    "node_modules",
+    "node_modules/",
+    ".DS_Store",
+    "Thumbs.db",
+    "__pycache__",
+    "__pycache__/",
+    "*.pyc",
+    ".tox",
+    ".nox",
+    ".mypy_cache",
+    ".ruff_cache",
+    ".pytest_cache",
+    "target",
+    "target/",
+    ".venv",
+    ".env",
+    "dist",
+    "build",
+    ".next",
+    ".nuxt",
+    ".output",
+    ".turbo",
+    ".diecut-answers.toml",
+];
+
+/// Patterns for files that should be copied without rendering (binary-like or problematic).
+const DEFAULT_COPY_WITHOUT_RENDER: &[&str] = &[
+    "*.png",
+    "*.jpg",
+    "*.jpeg",
+    "*.gif",
+    "*.ico",
+    "*.svg",
+    "*.webp",
+    "*.woff",
+    "*.woff2",
+    "*.ttf",
+    "*.eot",
+    "*.otf",
+    "*.zip",
+    "*.tar",
+    "*.gz",
+    "*.bz2",
+    "*.xz",
+    "*.pdf",
+    "*.lock",
+    "package-lock.json",
+    "yarn.lock",
+    "pnpm-lock.yaml",
+    "Cargo.lock",
+    "Gemfile.lock",
+    "poetry.lock",
+    "composer.lock",
+];
+
+/// Detect which default exclude patterns actually exist in the project.
+pub fn detect_excludes(project_dir: &Path) -> Vec<String> {
+    let mut found = Vec::new();
+
+    for pattern in DEFAULT_EXCLUDES {
+        let clean = pattern.trim_end_matches('/');
+        // Skip glob patterns — they're always included
+        if clean.contains('*') {
+            found.push(pattern.to_string());
+            continue;
+        }
+        if project_dir.join(clean).exists() {
+            found.push(pattern.to_string());
+        }
+    }
+
+    found
+}
+
+/// Detect which copy-without-render patterns are relevant based on files present.
+pub fn detect_copy_without_render(
+    _project_dir: &Path,
+    files: &[std::path::PathBuf],
+) -> Vec<String> {
+    let mut found = Vec::new();
+
+    for pattern in DEFAULT_COPY_WITHOUT_RENDER {
+        if pattern.starts_with('*') {
+            // Extension pattern — check if any file matches
+            let ext = pattern.trim_start_matches("*.");
+            if files.iter().any(|f| {
+                f.extension()
+                    .map(|e| e.to_string_lossy().eq_ignore_ascii_case(ext))
+                    .unwrap_or(false)
+            }) {
+                found.push(pattern.to_string());
+            }
+        } else {
+            // Exact filename — check if present
+            if files.iter().any(|f| {
+                f.file_name()
+                    .map(|n| n.to_string_lossy() == *pattern)
+                    .unwrap_or(false)
+            }) {
+                found.push(pattern.to_string());
+            }
+        }
+    }
+
+    found
+}
+
+/// Check if a path should be excluded based on the exclude patterns.
+pub fn should_exclude(relative_path: &Path, excludes: &[String]) -> bool {
+    let path_str = relative_path.to_string_lossy();
+
+    for pattern in excludes {
+        let clean = pattern.trim_end_matches('/');
+
+        if clean.contains('*') {
+            // Glob-style matching: *.pyc matches any .pyc file
+            if let Some(ext) = clean.strip_prefix("*.") {
+                if let Some(file_ext) = relative_path.extension() {
+                    if file_ext.to_string_lossy().eq_ignore_ascii_case(ext) {
+                        return true;
+                    }
+                }
+            }
+            continue;
+        }
+
+        // Exact directory/file match at any level
+        for component in relative_path.components() {
+            if let std::path::Component::Normal(os_str) = component {
+                if os_str.to_string_lossy() == clean {
+                    return true;
+                }
+            }
+        }
+
+        // Full path match
+        if path_str == clean || path_str.starts_with(&format!("{clean}/")) {
+            return true;
+        }
+    }
+
+    false
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::path::PathBuf;
+
+    #[test]
+    fn test_should_exclude_git() {
+        let excludes = vec![".git/".to_string()];
+        assert!(should_exclude(Path::new(".git/config"), &excludes));
+        assert!(should_exclude(Path::new(".git/HEAD"), &excludes));
+    }
+
+    #[test]
+    fn test_should_exclude_node_modules() {
+        let excludes = vec!["node_modules".to_string()];
+        assert!(should_exclude(
+            Path::new("node_modules/express/index.js"),
+            &excludes
+        ));
+    }
+
+    #[test]
+    fn test_should_exclude_glob() {
+        let excludes = vec!["*.pyc".to_string()];
+        assert!(should_exclude(
+            Path::new("module/__pycache__/foo.pyc"),
+            &excludes
+        ));
+        assert!(!should_exclude(Path::new("module/foo.py"), &excludes));
+    }
+
+    #[test]
+    fn test_should_not_exclude_normal_file() {
+        let excludes = vec![".git/".to_string(), "node_modules".to_string()];
+        assert!(!should_exclude(Path::new("src/main.rs"), &excludes));
+        assert!(!should_exclude(Path::new("README.md"), &excludes));
+    }
+
+    #[test]
+    fn test_detect_excludes() {
+        let dir = tempfile::tempdir().unwrap();
+        std::fs::create_dir(dir.path().join(".git")).unwrap();
+        std::fs::write(dir.path().join(".DS_Store"), "").unwrap();
+
+        let found = detect_excludes(dir.path());
+        assert!(found.iter().any(|e| e.contains(".git")));
+        assert!(found.iter().any(|e| e == ".DS_Store"));
+        // Glob patterns should always be included
+        assert!(found.iter().any(|e| e == "*.pyc"));
+    }
+
+    #[test]
+    fn test_detect_copy_without_render() {
+        let files = vec![
+            PathBuf::from("logo.png"),
+            PathBuf::from("font.woff2"),
+            PathBuf::from("README.md"),
+        ];
+        let found = detect_copy_without_render(Path::new("."), &files);
+        assert!(found.contains(&"*.png".to_string()));
+        assert!(found.contains(&"*.woff2".to_string()));
+        assert!(!found.contains(&"*.jpg".to_string()));
+    }
+}
diff --git a/src/extract/mod.rs b/src/extract/mod.rs
new file mode 100644
index 0000000..2364891
--- /dev/null
+++ b/src/extract/mod.rs
@@ -0,0 +1,682 @@
+pub mod conditional;
+pub mod config_gen;
+pub mod exclude;
+pub mod replace;
+pub mod scan;
+pub mod variants;
+
+use std::collections::HashMap;
+use std::path::{Path, PathBuf};
+
+use console::style;
+use inquire::{Confirm, Text};
+
+use crate::config::schema::DEFAULT_TEMPLATES_SUFFIX;
+use crate::error::{DicecutError, Result};
+
+use self::conditional::{detect_conditional_files, patterns_for_variable, DetectedConditional};
+use self::config_gen::{
+    generate_config_toml, ComputedVariable, ConditionalEntry, ConfigGenOptions, PromptedVariable,
+};
+use self::exclude::{detect_copy_without_render, detect_excludes};
+use self::replace::{
+    apply_path_replacements, apply_replacements, build_replacement_rules, ReplacementRule,
+};
+use self::scan::{scan_project, ScannedFile};
+use self::variants::{computed_expression, detect_separator, generate_variants, CaseVariant};
+
+/// A variable with its value and confirmed case variants.
+#[derive(Debug, Clone)]
+pub struct ExtractVariable {
+    pub name: String,
+    pub value: String,
+    pub variants: Vec<CaseVariant>,
+    /// Per-variant occurrence counts: (variant_name, file_count, total_hits).
+    pub occurrence_counts: Vec<(String, usize, usize)>,
+}
+
+/// A file that will be part of the extracted template.
+#[derive(Debug, Clone)]
+pub struct PlannedExtractFile {
+    /// Relative path in the output template (may contain template expressions).
+    pub template_path: PathBuf,
+    /// Content (with replacements applied), or None for binary files.
+    pub content: Option<String>,
+    /// Original bytes for binary files.
+    pub binary_content: Option<Vec<u8>>,
+    /// Whether this file had template replacements applied.
+    pub has_replacements: bool,
+    /// Number of replacements made.
+    pub replacement_count: usize,
+    /// Whether this is a binary file.
+    pub is_binary: bool,
+}
+
+/// The full extraction plan, ready to be executed or reviewed.
+#[derive(Debug)]
+pub struct ExtractionPlan {
+    pub output_dir: PathBuf,
+    pub files: Vec<PlannedExtractFile>,
+    pub config_toml: String,
+    pub variables: Vec<ExtractVariable>,
+    pub conditional_entries: Vec<ConditionalEntry>,
+    pub exclude_patterns: Vec<String>,
+    pub copy_without_render: Vec<String>,
+}
+
+/// Options for the extraction process.
+pub struct ExtractOptions {
+    pub source_dir: PathBuf,
+    pub variables: Vec<(String, String)>,
+    pub output_dir: Option<PathBuf>,
+    pub in_place: bool,
+    pub batch: bool,
+    pub dry_run: bool,
+}
+
+/// Plan an extraction: scan the project, detect variants, build replacement rules.
+pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
+    let source_dir = &options.source_dir;
+
+    if !source_dir.exists() {
+        return Err(DicecutError::ExtractSourceNotFound {
+            path: source_dir.clone(),
+        });
+    }
+
+    if options.variables.is_empty() {
+        return Err(DicecutError::ExtractNoVariables);
+    }
+
+    // Check if this is already a template
+    if source_dir.join("diecut.toml").exists() {
+        return Err(DicecutError::ExtractAlreadyTemplate {
+            path: source_dir.clone(),
+        });
+    }
+
+    let output_dir = if options.in_place {
+        source_dir.clone()
+    } else if let Some(ref out) = options.output_dir {
+        out.clone()
+    } else {
+        // Default: source dir name + "-template"
+        let dir_name = source_dir
+            .file_name()
+            .map(|n| n.to_string_lossy().to_string())
+            .unwrap_or_else(|| "template".to_string());
+        source_dir
+            .parent()
+            .unwrap_or(Path::new("."))
+            .join(format!("{dir_name}-template"))
+    };
+
+    if !options.in_place && output_dir.exists() {
+        return Err(DicecutError::ExtractOutputExists {
+            path: output_dir.clone(),
+        });
+    }
+
+    // Phase 1: Detect excludes
+    let mut excludes = detect_excludes(source_dir);
+
+    // Phase 2: Scan project
+    eprintln!(
+        "\n{}",
+        style(format!("Scanning {}...", source_dir.display())).bold()
+    );
+    let scan_result = scan_project(source_dir, &excludes)?;
+    eprintln!(
+        "  {} files found, {} excluded",
+        scan_result.files.len(),
+        scan_result.excluded_count
+    );
+
+    // Phase 3: Generate variants and count occurrences
+    let mut extract_variables = Vec::new();
+
+    for (var_name, var_value) in &options.variables {
+        let all_variants = generate_variants(var_name, var_value);
+
+        let mut occurrence_counts = Vec::new();
+        for variant in &all_variants {
+            let (file_count, total_hits) =
+                count_variant_occurrences(&variant.literal, &scan_result.files);
+            occurrence_counts.push((variant.name.to_string(), file_count, total_hits));
+        }
+
+        extract_variables.push(ExtractVariable {
+            name: var_name.clone(),
+            value: var_value.clone(),
+            variants: all_variants,
+            occurrence_counts,
+        });
+    }
+
+    // Phase 4: Interactive variant confirmation
+    let confirmed_variables = if options.batch {
+        // Batch mode: auto-accept all found variants
+        extract_variables
+            .into_iter()
+            .map(|mut var| {
+                var.variants.retain(|v| {
+                    var.occurrence_counts
+                        .iter()
+                        .any(|(name, _, hits)| name == v.name && *hits > 0)
+                        || v.name == "verbatim"
+                });
+                // Always keep at least the verbatim/canonical variant
+                if var.variants.is_empty() {
+                    let all = generate_variants(&var.name, &var.value);
+                    if let Some(first) = all.into_iter().next() {
+                        var.variants.push(first);
+                    }
+                }
+                var
+            })
+            .collect()
+    } else {
+        confirm_variants_interactive(extract_variables)?
+    };
+
+    // Phase 5: Interactive exclude confirmation
+    if !options.batch {
+        excludes = confirm_excludes_interactive(excludes)?;
+    }
+
+    // Phase 6: Detect conditional files
+    let detected_conditionals = if options.batch {
+        vec![] // Batch mode: no conditional files
+    } else {
+        let detected = detect_conditional_files(source_dir);
+        if detected.is_empty() {
+            vec![]
+        } else {
+            confirm_conditionals_interactive(detected)?
+        }
+    };
+
+    // Phase 7: Build replacement rules
+    let mut rules = Vec::new();
+    for var in &confirmed_variables {
+        for variant in &var.variants {
+            rules.push(ReplacementRule {
+                literal: variant.literal.clone(),
+                replacement: variant.tera_expr.clone(),
+                variable: var.name.clone(),
+                variant: variant.name.to_string(),
+            });
+        }
+    }
+    build_replacement_rules(&mut rules);
+
+    // Phase 8: Detect copy_without_render patterns
+    let file_paths: Vec<PathBuf> = scan_result
+        .files
+        .iter()
+        .map(|f| f.relative_path.clone())
+        .collect();
+    let copy_without_render = detect_copy_without_render(source_dir, &file_paths);
+
+    // Phase 9: Apply replacements to files
+    let mut planned_files = Vec::new();
+
+    for file in &scan_result.files {
+        let template_path = apply_path_replacements(&file.relative_path, &rules);
+
+        if file.is_binary {
+            let binary_content =
+                std::fs::read(&file.absolute_path).map_err(|e| DicecutError::Io {
+                    context: format!("reading binary file {}", file.absolute_path.display()),
+                    source: e,
+                })?;
+            planned_files.push(PlannedExtractFile {
+                template_path,
+                content: None,
+                binary_content: Some(binary_content),
+                has_replacements: false,
+                replacement_count: 0,
+                is_binary: true,
+            });
+        } else if let Some(ref content) = file.content {
+            let (replaced, count) = apply_replacements(content, &rules);
+            let has_replacements = count > 0;
+
+            // Add .die suffix if file has template replacements
+            let final_path = if has_replacements {
+                let mut p = template_path.as_os_str().to_string_lossy().to_string();
+                p.push_str(DEFAULT_TEMPLATES_SUFFIX);
+                PathBuf::from(p)
+            } else {
+                template_path
+            };
+
+            planned_files.push(PlannedExtractFile {
+                template_path: final_path,
+                content: Some(replaced),
+                binary_content: None,
+                has_replacements,
+                replacement_count: count,
+                is_binary: false,
+            });
+        }
+    }
+
+    // Phase 10: Interactive file confirmation
+    if !options.batch {
+        confirm_files_interactive(&planned_files)?;
+    }
+
+    // Phase 11: Build conditional entries
+    let conditional_entries: Vec<ConditionalEntry> = detected_conditionals
+        .iter()
+        .map(|d| {
+            let patterns = patterns_for_variable(&d.variable)
+                .into_iter()
+                .map(|p| p.to_string())
+                .collect();
+            ConditionalEntry {
+                patterns,
+                variable: d.variable.clone(),
+                description: d.description.clone(),
+            }
+        })
+        .collect();
+
+    // Phase 12: Generate config
+    let canonical_seps: HashMap<String, &str> = confirmed_variables
+        .iter()
+        .map(|v| (v.name.clone(), detect_separator(&v.value)))
+        .collect();
+
+    let prompted_vars: Vec<PromptedVariable> = confirmed_variables
+        .iter()
+        .map(|v| PromptedVariable {
+            name: v.name.clone(),
+            default_value: v.value.clone(),
+            prompt: v.name.replace(['_', '-'], " "),
+        })
+        .collect();
+
+    let mut computed_vars = Vec::new();
+    for var in &confirmed_variables {
+        let canonical_sep = canonical_seps.get(&var.name).copied().unwrap_or("-");
+        for variant in &var.variants {
+            // Skip the canonical variant (it uses the variable directly)
+            if variant.name == "verbatim" {
+                continue;
+            }
+            // Skip the variant that matches the canonical separator
+            let is_canonical = matches!(
+                (variant.name, canonical_sep),
+                ("kebab", "-") | ("snake", "_") | ("dot", ".")
+            );
+            if is_canonical {
+                continue;
+            }
+
+            let computed_name = format!("{}_{}", var.name, variant.name);
+            let expression = computed_expression(&var.name, variant.name, canonical_sep);
+            // Don't add if expression is just the variable name
+            if expression != var.name {
+                computed_vars.push(ComputedVariable {
+                    name: computed_name,
+                    expression,
+                });
+            }
+        }
+    }
+
+    let config_toml = generate_config_toml(&ConfigGenOptions {
+        template_name: source_dir
+            .file_name()
+            .map(|n| n.to_string_lossy().to_string())
+            .unwrap_or_else(|| "template".to_string()),
+        prompted_variables: prompted_vars,
+        computed_variables: computed_vars,
+        exclude_patterns: excludes.clone(),
+        copy_without_render: copy_without_render.clone(),
+        conditional_entries: conditional_entries.clone(),
+    });
+
+    Ok(ExtractionPlan {
+        output_dir,
+        files: planned_files,
+        config_toml,
+        variables: confirmed_variables,
+        conditional_entries,
+        exclude_patterns: excludes,
+        copy_without_render,
+    })
+}
+
+/// Execute an extraction plan: write files and config to the output directory.
+pub fn execute_extraction(plan: &ExtractionPlan, _in_place: bool) -> Result<()> {
+    let output_dir = &plan.output_dir;
+    let template_dir = output_dir.join("template");
+
+    // Create output structure
+    std::fs::create_dir_all(&template_dir).map_err(|e| DicecutError::Io {
+        context: format!("creating template directory {}", template_dir.display()),
+        source: e,
+    })?;
+
+    // Write template files
+    let mut rendered_count = 0;
+    let mut copied_count = 0;
+
+    for file in &plan.files {
+        let dest = template_dir.join(&file.template_path);
+
+        // Ensure parent directory exists
+        if let Some(parent) = dest.parent() {
+            std::fs::create_dir_all(parent).map_err(|e| DicecutError::Io {
+                context: format!("creating directory {}", parent.display()),
+                source: e,
+            })?;
+        }
+
+        if let Some(ref content) = file.content {
+            std::fs::write(&dest, content).map_err(|e| DicecutError::Io {
+                context: format!("writing file {}", dest.display()),
+                source: e,
+            })?;
+            if file.has_replacements {
+                rendered_count += 1;
+            } else {
+                copied_count += 1;
+            }
+        } else if let Some(ref bytes) = file.binary_content {
+            std::fs::write(&dest, bytes).map_err(|e| DicecutError::Io {
+                context: format!("writing binary file {}", dest.display()),
+                source: e,
+            })?;
+            copied_count += 1;
+        }
+    }
+
+    // Write diecut.toml
+    let config_path = output_dir.join("diecut.toml");
+    std::fs::write(&config_path, &plan.config_toml).map_err(|e| DicecutError::Io {
+        context: format!("writing {}", config_path.display()),
+        source: e,
+    })?;
+
+    // Summary
+    let prompted_count = plan.variables.len();
+    let computed_count = plan
+        .variables
+        .iter()
+        .flat_map(|v| &v.variants)
+        .filter(|variant| {
+            variant.name != "verbatim"
+                && !matches!(
+                    (
+                        variant.name,
+                        detect_separator(
+                            plan.variables
+                                .iter()
+                                .find(|v2| v2.variants.contains(variant))
+                                .map(|v2| v2.value.as_str())
+                                .unwrap_or("")
+                        )
+                    ),
+                    ("kebab", "-") | ("snake", "_") | ("dot", ".")
+                )
+        })
+        .count();
+
+    eprintln!(
+        "\n{} Template extracted to {}",
+        style("✓").green().bold(),
+        style(output_dir.display()).cyan()
+    );
+    eprintln!(
+        "  {} variables ({} prompted, {} computed)",
+        prompted_count + computed_count,
+        prompted_count,
+        computed_count
+    );
+    eprintln!(
+        "  {} files templated, {} files copied",
+        rendered_count, copied_count
+    );
+    if !plan.conditional_entries.is_empty() {
+        eprintln!(
+            "  {} conditional patterns added",
+            plan.conditional_entries.len()
+        );
+    }
+    eprintln!("  Review diecut.toml to fine-tune");
+
+    Ok(())
+}
+
+// ── Interactive helpers ──────────────────────────────────────────────────
+
+fn count_variant_occurrences(literal: &str, files: &[ScannedFile]) -> (usize, usize) {
+    let mut file_count = 0;
+    let mut total_hits = 0;
+
+    for file in files {
+        if let Some(ref content) = file.content {
+            let hits = content.matches(literal).count();
+            if hits > 0 {
+                file_count += 1;
+                total_hits += hits;
+            }
+        }
+    }
+
+    // Also check path components
+    for file in files {
+        let path_str = file.relative_path.to_string_lossy();
+        let hits = path_str.matches(literal).count();
+        if hits > 0 {
+            // Don't double-count file_count if already counted from content
+            total_hits += hits;
+        }
+    }
+
+    (file_count, total_hits)
+}
+
+fn confirm_variants_interactive(variables: Vec<ExtractVariable>) -> Result<Vec<ExtractVariable>> {
+    let mut confirmed = Vec::new();
+
+    for mut var in variables {
+        eprintln!(
+            "\n{} {} = {:?} {}",
+            style("──").dim(),
+            style(&var.name).bold(),
+            var.value,
+            style("──────────────────────────────────────").dim()
+        );
+
+        if var.variants.len() == 1 && var.variants[0].name == "verbatim" {
+            // Simple value — just show occurrence count
+            let (file_count, total_hits) = var
+                .occurrence_counts
+                .first()
+                .map(|(_, fc, th)| (*fc, *th))
+                .unwrap_or((0, 0));
+            if total_hits > 0 {
+                eprintln!(
+                    "  Found in {} files ({} occurrences)",
+                    file_count, total_hits
+                );
+            } else {
+                eprintln!(
+                    "  {} Value not found in any file (will still be added to config)",
+                    style("⚠").yellow()
+                );
+            }
+            confirmed.push(var);
+            continue;
+        }
+
+        // Show detected variants with counts
+        eprintln!("  Detected case variants:");
+        let mut found_any = false;
+        for (i, variant) in var.variants.iter().enumerate() {
+            let (_, file_count, total_hits) = &var.occurrence_counts[i];
+            let mark = if *total_hits > 0 {
+                found_any = true;
+                style("✓").green().to_string()
+            } else {
+                style("✗").dim().to_string()
+            };
+            let hits_str = if *total_hits > 0 {
+                format!(
+                    "{} {} across {} {}",
+                    total_hits,
+                    if *total_hits == 1 { "hit" } else { "hits" },
+                    file_count,
+                    if *file_count == 1 { "file" } else { "files" }
+                )
+            } else {
+                "not found".to_string()
+            };
+            eprintln!(
+                "    {} {:<16} {:<20} {}",
+                mark,
+                variant.literal,
+                variant.name,
+                style(&hits_str).dim()
+            );
+        }
+
+        if !found_any {
+            eprintln!(
+                "  {} No occurrences found for any variant (will still be added to config)",
+                style("⚠").yellow()
+            );
+            // Keep just the first variant
+            var.variants.truncate(1);
+            confirmed.push(var);
+            continue;
+        }
+
+        let keep = Confirm::new("Keep detected variants?")
+            .with_default(true)
+            .prompt()
+            .map_err(|_| DicecutError::PromptCancelled)?;
+
+        if keep {
+            // Remove variants with zero occurrences
+            let counts = var.occurrence_counts.clone();
+            var.variants.retain(|v| {
+                counts
+                    .iter()
+                    .any(|(name, _, hits)| name == v.name && *hits > 0)
+            });
+            if var.variants.is_empty() {
+                let all = generate_variants(&var.name, &var.value);
+                if let Some(first) = all.into_iter().next() {
+                    var.variants.push(first);
+                }
+            }
+        } else {
+            // Keep only the canonical variant
+            var.variants.truncate(1);
+        }
+
+        confirmed.push(var);
+    }
+
+    Ok(confirmed)
+}
+
+fn confirm_excludes_interactive(mut excludes: Vec<String>) -> Result<Vec<String>> {
+    eprintln!(
+        "\n{} Excludes {}",
+        style("──").dim(),
+        style("─────────────────────────────────────────────").dim()
+    );
+    eprintln!("  Auto-detected:");
+    for e in &excludes {
+        eprintln!("    {}", e);
+    }
+
+    let extra = Text::new("Add any others? (comma-separated, enter to accept)")
+        .with_default("")
+        .prompt()
+        .map_err(|_| DicecutError::PromptCancelled)?;
+
+    if !extra.is_empty() {
+        for pattern in extra.split(',') {
+            let trimmed = pattern.trim().to_string();
+            if !trimmed.is_empty() {
+                excludes.push(trimmed);
+            }
+        }
+    }
+
+    Ok(excludes)
+}
+
+fn confirm_conditionals_interactive(
+    detected: Vec<DetectedConditional>,
+) -> Result<Vec<DetectedConditional>> {
+    eprintln!(
+        "\n{} Conditional files {}",
+        style("──").dim(),
+        style("────────────────────────────────────").dim()
+    );
+    eprintln!("  These look optional. Make them conditional?");
+
+    let mut confirmed = Vec::new();
+    for cond in detected {
+        let prompt = format!("  {} → {}", cond.pattern, cond.variable);
+        let include = Confirm::new(&prompt)
+            .with_default(false)
+            .prompt()
+            .map_err(|_| DicecutError::PromptCancelled)?;
+
+        if include {
+            confirmed.push(cond);
+        }
+    }
+
+    Ok(confirmed)
+}
+
+fn confirm_files_interactive(files: &[PlannedExtractFile]) -> Result<()> {
+    let templated: Vec<_> = files.iter().filter(|f| f.has_replacements).collect();
+    let copied: Vec<_> = files.iter().filter(|f| !f.has_replacements).collect();
+    let binary_count = files.iter().filter(|f| f.is_binary).count();
+
+    eprintln!(
+        "\n{} Files to template {}",
+        style("──").dim(),
+        style("────────────────────────────────────").dim()
+    );
+    eprintln!(
+        "  Will get {} suffix (template replacements made):",
+        DEFAULT_TEMPLATES_SUFFIX
+    );
+    for file in &templated {
+        eprintln!(
+            "    {:<40} {} replacements",
+            file.template_path.display(),
+            file.replacement_count
+        );
+    }
+
+    eprintln!(
+        "\n  Copied verbatim: {} files (including {} binary)",
+        copied.len(),
+        binary_count
+    );
+
+    let proceed = Confirm::new("Proceed?")
+        .with_default(true)
+        .prompt()
+        .map_err(|_| DicecutError::PromptCancelled)?;
+
+    if !proceed {
+        return Err(DicecutError::PromptCancelled);
+    }
+
+    Ok(())
+}
diff --git a/src/extract/replace.rs b/src/extract/replace.rs
new file mode 100644
index 0000000..37af444
--- /dev/null
+++ b/src/extract/replace.rs
@@ -0,0 +1,143 @@
+use std::path::{Path, PathBuf};
+
+/// A single replacement rule: find `literal` and replace with `replacement`.
+#[derive(Debug, Clone)]
+pub struct ReplacementRule {
+    pub literal: String,
+    pub replacement: String,
+    /// Which variable this rule belongs to (for reporting).
+    pub variable: String,
+    /// Which variant this rule belongs to (for reporting).
+    pub variant: String,
+}
+
+/// Build replacement rules from all variables and their confirmed variants.
+///
+/// Rules are sorted by descending literal length so that longest matches apply first.
+/// This prevents shorter overlapping matches from corrupting longer ones.
+pub fn build_replacement_rules(rules: &mut [ReplacementRule]) {
+    rules.sort_by(|a, b| b.literal.len().cmp(&a.literal.len()));
+}
+
+/// Apply replacement rules to a string, longest-match-first.
+///
+/// Returns the modified string and the number of replacements made.
+pub fn apply_replacements(content: &str, rules: &[ReplacementRule]) -> (String, usize) {
+    if rules.is_empty() {
+        return (content.to_string(), 0);
+    }
+
+    let mut result = content.to_string();
+    let mut total_count = 0;
+
+    for rule in rules {
+        if rule.literal.is_empty() {
+            continue;
+        }
+        let count = result.matches(&rule.literal).count();
+        if count > 0 {
+            result = result.replace(&rule.literal, &rule.replacement);
+            total_count += count;
+        }
+    }
+
+    (result, total_count)
+}
+
+/// Apply replacement rules to path components.
+///
+/// Returns the new path with template expressions in directory and file names.
+pub fn apply_path_replacements(path: &Path, rules: &[ReplacementRule]) -> PathBuf {
+    let mut components = Vec::new();
+
+    for component in path.components() {
+        match component {
+            std::path::Component::Normal(os_str) => {
+                let s = os_str.to_string_lossy();
+                let (replaced, _) = apply_replacements(&s, rules);
+                components.push(replaced);
+            }
+            other => {
+                components.push(other.as_os_str().to_string_lossy().into_owned());
+            }
+        }
+    }
+
+    components.iter().collect()
+}
+
+/// Count occurrences of a literal in a string.
+pub fn count_occurrences(content: &str, literal: &str) -> usize {
+    if literal.is_empty() {
+        return 0;
+    }
+    content.matches(literal).count()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_rule(literal: &str, replacement: &str) -> ReplacementRule {
+        ReplacementRule {
+            literal: literal.to_string(),
+            replacement: replacement.to_string(),
+            variable: "test".to_string(),
+            variant: "test".to_string(),
+        }
+    }
+
+    #[test]
+    fn test_apply_replacements_basic() {
+        let rules = vec![make_rule("my-app", "{{ project_name }}")];
+        let (result, count) = apply_replacements("Welcome to my-app!", &rules);
+        assert_eq!(result, "Welcome to {{ project_name }}!");
+        assert_eq!(count, 1);
+    }
+
+    #[test]
+    fn test_apply_replacements_multiple() {
+        let rules = vec![make_rule("my-app", "{{ project_name }}")];
+        let (result, count) = apply_replacements("my-app is great, use my-app", &rules);
+        assert_eq!(
+            result,
+            "{{ project_name }} is great, use {{ project_name }}"
+        );
+        assert_eq!(count, 2);
+    }
+
+    #[test]
+    fn test_longest_match_first() {
+        let mut rules = vec![
+            make_rule("my", "{{ org }}"),
+            make_rule("my-app", "{{ project_name }}"),
+        ];
+        build_replacement_rules(&mut rules);
+
+        // "my-app" should match before "my"
+        assert_eq!(rules[0].literal, "my-app");
+        assert_eq!(rules[1].literal, "my");
+    }
+
+    #[test]
+    fn test_apply_replacements_empty_rules() {
+        let (result, count) = apply_replacements("hello world", &[]);
+        assert_eq!(result, "hello world");
+        assert_eq!(count, 0);
+    }
+
+    #[test]
+    fn test_apply_path_replacements() {
+        let rules = vec![make_rule("my-app", "{{ project_name }}")];
+        let path = Path::new("my-app/src/main.rs");
+        let result = apply_path_replacements(path, &rules);
+        assert_eq!(result, PathBuf::from("{{ project_name }}/src/main.rs"));
+    }
+
+    #[test]
+    fn test_count_occurrences() {
+        assert_eq!(count_occurrences("my-app and my-app", "my-app"), 2);
+        assert_eq!(count_occurrences("hello world", "missing"), 0);
+        assert_eq!(count_occurrences("anything", ""), 0);
+    }
+}
diff --git a/src/extract/scan.rs b/src/extract/scan.rs
new file mode 100644
index 0000000..278fd75
--- /dev/null
+++ b/src/extract/scan.rs
@@ -0,0 +1,147 @@
+use std::path::{Path, PathBuf};
+
+use walkdir::WalkDir;
+
+use super::exclude::should_exclude;
+use crate::render::file::is_binary_file;
+
+/// A scanned file from the project directory.
+#[derive(Debug, Clone)]
+pub struct ScannedFile {
+    /// Path relative to the project root.
+    pub relative_path: PathBuf,
+    /// Absolute path on disk.
+    pub absolute_path: PathBuf,
+    /// Whether the file is binary.
+    pub is_binary: bool,
+    /// File content (only loaded for text files).
+    pub content: Option<String>,
+}
+
+/// Result of scanning a project directory.
+#[derive(Debug)]
+pub struct ScanResult {
+    pub files: Vec<ScannedFile>,
+    pub excluded_count: usize,
+}
+
+/// Scan a project directory, applying exclude patterns.
+///
+/// Returns all non-excluded files with their content loaded (for text files).
+pub fn scan_project(project_dir: &Path, excludes: &[String]) -> crate::error::Result<ScanResult> {
+    let project_dir = project_dir
+        .canonicalize()
+        .map_err(|e| crate::error::DicecutError::Io {
+            context: format!("canonicalizing project directory {}", project_dir.display()),
+            source: e,
+        })?;
+
+    let mut files = Vec::new();
+    let mut excluded_count = 0;
+
+    for entry in WalkDir::new(&project_dir).min_depth(1) {
+        let entry = entry.map_err(|e| crate::error::DicecutError::Io {
+            context: format!("walking project directory: {}", e),
+            source: e
+                .into_io_error()
+                .unwrap_or_else(|| std::io::Error::other("walkdir error")),
+        })?;
+
+        // Skip directories themselves (we only care about files)
+        if entry.file_type().is_dir() {
+            continue;
+        }
+
+        let relative_path = entry
+            .path()
+            .strip_prefix(&project_dir)
+            .unwrap_or(entry.path())
+            .to_path_buf();
+
+        if should_exclude(&relative_path, excludes) {
+            excluded_count += 1;
+            continue;
+        }
+
+        let absolute_path = entry.path().to_path_buf();
+        let is_binary = is_binary_file(&absolute_path);
+
+        let content = if is_binary {
+            None
+        } else {
+            // If we can't read as UTF-8, treat as binary
+            std::fs::read_to_string(&absolute_path).ok()
+        };
+
+        files.push(ScannedFile {
+            relative_path,
+            absolute_path,
+            is_binary,
+            content,
+        });
+    }
+
+    Ok(ScanResult {
+        files,
+        excluded_count,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_scan_project_basic() {
+        let dir = tempfile::tempdir().unwrap();
+        std::fs::write(dir.path().join("README.md"), "# Hello").unwrap();
+        std::fs::create_dir(dir.path().join("src")).unwrap();
+        std::fs::write(dir.path().join("src/main.rs"), "fn main() {}").unwrap();
+
+        let result = scan_project(dir.path(), &[]).unwrap();
+        assert_eq!(result.files.len(), 2);
+        assert_eq!(result.excluded_count, 0);
+    }
+
+    #[test]
+    fn test_scan_project_with_excludes() {
+        let dir = tempfile::tempdir().unwrap();
+        std::fs::write(dir.path().join("README.md"), "# Hello").unwrap();
+        std::fs::create_dir(dir.path().join(".git")).unwrap();
+        std::fs::write(dir.path().join(".git/config"), "").unwrap();
+
+        let excludes = vec![".git".to_string()];
+        let result = scan_project(dir.path(), &excludes).unwrap();
+        assert_eq!(result.files.len(), 1);
+        assert_eq!(result.excluded_count, 1);
+        assert_eq!(result.files[0].relative_path, PathBuf::from("README.md"));
+    }
+
+    #[test]
+    fn test_scan_project_binary_detection() {
+        let dir = tempfile::tempdir().unwrap();
+        std::fs::write(dir.path().join("text.txt"), "hello").unwrap();
+        std::fs::write(
+            dir.path().join("binary.bin"),
+            &(0..256).map(|i| i as u8).collect::<Vec<u8>>(),
+        )
+        .unwrap();
+
+        let result = scan_project(dir.path(), &[]).unwrap();
+        let text_file = result
+            .files
+            .iter()
+            .find(|f| f.relative_path.to_string_lossy() == "text.txt")
+            .unwrap();
+        let binary_file = result
+            .files
+            .iter()
+            .find(|f| f.relative_path.to_string_lossy() == "binary.bin")
+            .unwrap();
+
+        assert!(!text_file.is_binary);
+        assert!(text_file.content.is_some());
+        assert!(binary_file.is_binary);
+        assert!(binary_file.content.is_none());
+    }
+}
diff --git a/src/extract/variants.rs b/src/extract/variants.rs
new file mode 100644
index 0000000..e4259bd
--- /dev/null
+++ b/src/extract/variants.rs
@@ -0,0 +1,329 @@
+use regex_lite::Regex;
+
+/// A case variant of a variable value, with its literal text and Tera expression.
+#[derive(Debug, Clone, PartialEq)]
+pub struct CaseVariant {
+    pub name: &'static str,
+    pub literal: String,
+    pub tera_expr: String,
+}
+
+/// Split a string value into words for case variant generation.
+///
+/// Handles kebab-case, snake_case, camelCase, PascalCase, dot.case, and space-separated.
+pub fn split_into_words(value: &str) -> Vec<String> {
+    if value.contains('-') {
+        return value.split('-').map(|s| s.to_lowercase()).collect();
+    }
+    if value.contains('_') {
+        return value.split('_').map(|s| s.to_lowercase()).collect();
+    }
+    if value.contains('.') {
+        return value.split('.').map(|s| s.to_lowercase()).collect();
+    }
+    if value.contains(' ') {
+        return value.split_whitespace().map(|s| s.to_lowercase()).collect();
+    }
+
+    // camelCase / PascalCase splitting
+    let re = Regex::new(r"[A-Z][a-z]*|[a-z]+|[0-9]+").unwrap();
+    let words: Vec<String> = re
+        .find_iter(value)
+        .map(|m| m.as_str().to_lowercase())
+        .collect();
+
+    if words.is_empty() {
+        vec![value.to_lowercase()]
+    } else {
+        words
+    }
+}
+
+/// Detect if a value is "multi-word" in a way that supports case variants.
+///
+/// Single words and space-separated phrases skip variant detection.
+fn supports_case_variants(value: &str) -> bool {
+    let words = split_into_words(value);
+    if words.len() < 2 {
+        return false;
+    }
+    // Space-separated values (like author names) skip variant detection
+    if value.contains(' ') {
+        return false;
+    }
+    true
+}
+
+fn to_kebab(words: &[String]) -> String {
+    words.join("-")
+}
+
+fn to_snake(words: &[String]) -> String {
+    words.join("_")
+}
+
+fn to_screaming_snake(words: &[String]) -> String {
+    words
+        .iter()
+        .map(|w| w.to_uppercase())
+        .collect::<Vec<_>>()
+        .join("_")
+}
+
+fn to_screaming_kebab(words: &[String]) -> String {
+    words
+        .iter()
+        .map(|w| w.to_uppercase())
+        .collect::<Vec<_>>()
+        .join("-")
+}
+
+fn to_pascal(words: &[String]) -> String {
+    words
+        .iter()
+        .map(|w| {
+            let mut chars = w.chars();
+            match chars.next() {
+                Some(c) => {
+                    let upper: String = c.to_uppercase().collect();
+                    upper + chars.as_str()
+                }
+                None => String::new(),
+            }
+        })
+        .collect()
+}
+
+fn to_camel(words: &[String]) -> String {
+    let pascal = to_pascal(words);
+    let mut chars = pascal.chars();
+    match chars.next() {
+        Some(c) => {
+            let lower: String = c.to_lowercase().collect();
+            lower + chars.as_str()
+        }
+        None => String::new(),
+    }
+}
+
+fn to_dot(words: &[String]) -> String {
+    words.join(".")
+}
+
+/// Detect the canonical separator in the original value.
+pub fn detect_separator(value: &str) -> &'static str {
+    if value.contains('-') {
+        "-"
+    } else if value.contains('_') {
+        "_"
+    } else if value.contains('.') {
+        "."
+    } else {
+        // PascalCase/camelCase — treat as kebab canonical
+        "-"
+    }
+}
+
+/// Build a Tera expression for a variant, given the variable name and canonical separator.
+fn tera_expr_for_variant(var_name: &str, variant_name: &str, canonical_sep: &str) -> String {
+    match (variant_name, canonical_sep) {
+        ("kebab", "-") => format!("{{{{ {var_name} }}}}"),
+        ("kebab", sep) => {
+            format!("{{{{ {var_name} | replace(from=\"{sep}\", to=\"-\") }}}}")
+        }
+        ("snake", "_") => format!("{{{{ {var_name} }}}}"),
+        ("snake", sep) => {
+            format!("{{{{ {var_name} | replace(from=\"{sep}\", to=\"_\") }}}}")
+        }
+        ("screaming_snake", sep) => {
+            if sep == "_" {
+                format!("{{{{ {var_name} | upper }}}}")
+            } else {
+                format!("{{{{ {var_name} | replace(from=\"{sep}\", to=\"_\") | upper }}}}")
+            }
+        }
+        ("screaming_kebab", sep) => {
+            if sep == "-" {
+                format!("{{{{ {var_name} | upper }}}}")
+            } else {
+                format!("{{{{ {var_name} | replace(from=\"{sep}\", to=\"-\") | upper }}}}")
+            }
+        }
+        ("pascal", sep) => {
+            format!(
+                "{{{{ {var_name} | replace(from=\"{sep}\", to=\" \") | title | replace(from=\" \", to=\"\") }}}}"
+            )
+        }
+        ("camel", _sep) => {
+            // No built-in camelCase filter in Tera, so we use a computed variable name
+            format!("{{{{ {var_name}_camel }}}}")
+        }
+        ("dot", ".") => format!("{{{{ {var_name} }}}}"),
+        ("dot", sep) => {
+            format!("{{{{ {var_name} | replace(from=\"{sep}\", to=\".\") }}}}")
+        }
+        _ => format!("{{{{ {var_name} }}}}"),
+    }
+}
+
+/// Generate all case variants for a given variable value.
+///
+/// Returns the canonical variant first, followed by alternatives.
+/// Only returns variants whose literal differs from the canonical form.
+/// Single-word values and space-separated phrases return only a verbatim replacement.
+pub fn generate_variants(var_name: &str, value: &str) -> Vec<CaseVariant> {
+    if !supports_case_variants(value) {
+        return vec![CaseVariant {
+            name: "verbatim",
+            literal: value.to_string(),
+            tera_expr: format!("{{{{ {var_name} }}}}"),
+        }];
+    }
+
+    let words = split_into_words(value);
+    let canonical_sep = detect_separator(value);
+
+    let candidates: Vec<(&str, String)> = vec![
+        ("kebab", to_kebab(&words)),
+        ("snake", to_snake(&words)),
+        ("screaming_snake", to_screaming_snake(&words)),
+        ("screaming_kebab", to_screaming_kebab(&words)),
+        ("pascal", to_pascal(&words)),
+        ("camel", to_camel(&words)),
+        ("dot", to_dot(&words)),
+    ];
+
+    // Deduplicate: some variants produce the same literal (e.g., single-word)
+    let mut seen = std::collections::HashSet::new();
+    let mut variants = Vec::new();
+
+    for (name, literal) in candidates {
+        if seen.insert(literal.clone()) {
+            let tera_expr = tera_expr_for_variant(var_name, name, canonical_sep);
+            variants.push(CaseVariant {
+                name,
+                literal,
+                tera_expr,
+            });
+        }
+    }
+
+    variants
+}
+
+/// Build a computed Tera expression for a named variant variable.
+///
+/// This is used in diecut.toml for computed variables like `project_name_snake`.
+pub fn computed_expression(var_name: &str, variant_name: &str, canonical_sep: &str) -> String {
+    match (variant_name, canonical_sep) {
+        ("snake", sep) if sep != "_" => {
+            format!("{var_name} | replace(from=\"{sep}\", to=\"_\")")
+        }
+        ("screaming_snake", sep) => {
+            if sep == "_" {
+                format!("{var_name} | upper")
+            } else {
+                format!("{var_name} | replace(from=\"{sep}\", to=\"_\") | upper")
+            }
+        }
+        ("screaming_kebab", sep) => {
+            if sep == "-" {
+                format!("{var_name} | upper")
+            } else {
+                format!("{var_name} | replace(from=\"{sep}\", to=\"-\") | upper")
+            }
+        }
+        ("pascal", sep) => {
+            format!("{var_name} | replace(from=\"{sep}\", to=\" \") | title | replace(from=\" \", to=\"\")")
+        }
+        ("camel", _sep) => {
+            // Tera doesn't have a built-in camelCase, but we can chain:
+            // title-case then lowercase-first-char isn't directly expressible.
+            // Use a workaround: same as pascal — users may need to adjust.
+            format!(
+                "{var_name} | replace(from=\"-\", to=\" \") | title | replace(from=\" \", to=\"\")"
+            )
+        }
+        ("kebab", sep) if sep != "-" => {
+            format!("{var_name} | replace(from=\"{sep}\", to=\"-\")")
+        }
+        ("dot", sep) if sep != "." => {
+            format!("{var_name} | replace(from=\"{sep}\", to=\".\")")
+        }
+        _ => var_name.to_string(),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rstest::rstest;
+
+    #[rstest]
+    #[case("my-app", vec!["my", "app"])]
+    #[case("my_app", vec!["my", "app"])]
+    #[case("MyApp", vec!["my", "app"])]
+    #[case("myApp", vec!["my", "app"])]
+    #[case("my.app", vec!["my", "app"])]
+    #[case("my app", vec!["my", "app"])]
+    #[case("single", vec!["single"])]
+    fn test_split_into_words(#[case] input: &str, #[case] expected: Vec<&str>) {
+        assert_eq!(split_into_words(input), expected);
+    }
+
+    #[test]
+    fn test_generate_variants_kebab() {
+        let variants = generate_variants("project_name", "my-app");
+        let names: Vec<&str> = variants.iter().map(|v| v.name).collect();
+        assert!(names.contains(&"kebab"));
+        assert!(names.contains(&"snake"));
+        assert!(names.contains(&"pascal"));
+
+        let kebab = variants.iter().find(|v| v.name == "kebab").unwrap();
+        assert_eq!(kebab.literal, "my-app");
+
+        let snake = variants.iter().find(|v| v.name == "snake").unwrap();
+        assert_eq!(snake.literal, "my_app");
+
+        let pascal = variants.iter().find(|v| v.name == "pascal").unwrap();
+        assert_eq!(pascal.literal, "MyApp");
+    }
+
+    #[test]
+    fn test_generate_variants_single_word() {
+        let variants = generate_variants("name", "hello");
+        assert_eq!(variants.len(), 1);
+        assert_eq!(variants[0].name, "verbatim");
+        assert_eq!(variants[0].literal, "hello");
+    }
+
+    #[test]
+    fn test_generate_variants_space_separated() {
+        let variants = generate_variants("author", "Jane Doe");
+        assert_eq!(variants.len(), 1);
+        assert_eq!(variants[0].name, "verbatim");
+        assert_eq!(variants[0].literal, "Jane Doe");
+    }
+
+    #[test]
+    fn test_generate_variants_screaming_snake() {
+        let variants = generate_variants("project_name", "my-app");
+        let ss = variants
+            .iter()
+            .find(|v| v.name == "screaming_snake")
+            .unwrap();
+        assert_eq!(ss.literal, "MY_APP");
+    }
+
+    #[test]
+    fn test_tera_expr_kebab_canonical() {
+        let expr = tera_expr_for_variant("project_name", "kebab", "-");
+        assert_eq!(expr, "{{ project_name }}");
+    }
+
+    #[test]
+    fn test_tera_expr_snake_from_kebab() {
+        let expr = tera_expr_for_variant("project_name", "snake", "-");
+        assert_eq!(expr, "{{ project_name | replace(from=\"-\", to=\"_\") }}");
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index a57e60c..4091828 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,6 +2,7 @@ pub mod adapter;
 pub mod answers;
 pub mod config;
 pub mod error;
+pub mod extract;
 pub mod hooks;
 pub mod prompt;
 pub mod render;
diff --git a/src/main.rs b/src/main.rs
index 20cf462..f540fe9 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -19,5 +19,13 @@ fn main() -> miette::Result<()> {
             template, output, data, defaults, overwrite, no_hooks, dry_run, verbose,
         ),
         Commands::List => commands::list::run(),
+        Commands::Extract {
+            source,
+            vars,
+            output,
+            in_place,
+            batch,
+            dry_run,
+        } => commands::extract::run(source, vars, output, in_place, batch, dry_run),
     }
 }
diff --git a/tests/integration.rs b/tests/integration.rs
index 4005080..1ad3a59 100644
--- a/tests/integration.rs
+++ b/tests/integration.rs
@@ -3,6 +3,7 @@ use std::path::PathBuf;
 
 use diecut::adapter;
 use diecut::config::load_config;
+use diecut::extract::{execute_extraction, plan_extraction, ExtractOptions};
 use diecut::prompt::PromptOptions;
 use diecut::render::{build_context, execute_plan, plan_render, walk_and_render};
 use diecut::template::source::{resolve_source, resolve_source_full};
@@ -622,3 +623,295 @@ fn test_plan_generation_verbose_has_content() {
         "at least one rendered file should contain the resolved project name"
     );
 }
+
+// ── Extract command tests ────────────────────────────────────────────────
+
+#[test]
+fn test_extract_batch_basic() {
+    // Create a simple project to extract from
+    let project = tempfile::tempdir().unwrap();
+    std::fs::write(project.path().join("README.md"), "# my-app\nBy Jane Doe\n").unwrap();
+    std::fs::create_dir(project.path().join("src")).unwrap();
+    std::fs::write(
+        project.path().join("src/main.rs"),
+        "fn main() {\n    println!(\"Welcome to my-app!\");\n}\n",
+    )
+    .unwrap();
+    std::fs::write(
+        project.path().join("Cargo.toml"),
+        "[package]\nname = \"my-app\"\nversion = \"0.1.0\"\n",
+    )
+    .unwrap();
+
+    let output = tempfile::tempdir().unwrap();
+    let output_path = output.path().join("extracted");
+
+    let options = ExtractOptions {
+        source_dir: project.path().to_path_buf(),
+        variables: vec![
+            ("project_name".to_string(), "my-app".to_string()),
+            ("author".to_string(), "Jane Doe".to_string()),
+        ],
+        output_dir: Some(output_path.clone()),
+        in_place: false,
+        batch: true,
+        dry_run: false,
+    };
+
+    let plan = plan_extraction(&options).unwrap();
+    execute_extraction(&plan, false).unwrap();
+
+    // Verify diecut.toml was created
+    assert!(output_path.join("diecut.toml").exists());
+    let config_content = std::fs::read_to_string(output_path.join("diecut.toml")).unwrap();
+    assert!(config_content.contains("[template]"));
+    assert!(config_content.contains("[variables.project_name]"));
+    assert!(config_content.contains("[variables.author]"));
+
+    // Verify template directory structure
+    assert!(output_path.join("template").exists());
+
+    // Verify files with replacements got .die suffix
+    let template_dir = output_path.join("template");
+    let has_die_files = walkdir::WalkDir::new(&template_dir)
+        .into_iter()
+        .filter_map(|e| e.ok())
+        .any(|e| e.path().to_string_lossy().ends_with(".die"));
+    assert!(has_die_files, "should have files with .die suffix");
+}
+
+#[test]
+fn test_extract_detects_case_variants() {
+    let project = tempfile::tempdir().unwrap();
+    std::fs::write(
+        project.path().join("config.toml"),
+        "[package]\nname = \"my-app\"\nmodule = \"my_app\"\nclass = \"MyApp\"\nenv = \"MY_APP_PORT\"\n",
+    )
+    .unwrap();
+
+    let output = tempfile::tempdir().unwrap();
+    let output_path = output.path().join("extracted");
+
+    let options = ExtractOptions {
+        source_dir: project.path().to_path_buf(),
+        variables: vec![("project_name".to_string(), "my-app".to_string())],
+        output_dir: Some(output_path.clone()),
+        in_place: false,
+        batch: true,
+        dry_run: false,
+    };
+
+    let plan = plan_extraction(&options).unwrap();
+
+    // Should detect variants used in the file
+    let var = plan
+        .variables
+        .iter()
+        .find(|v| v.name == "project_name")
+        .unwrap();
+    let variant_names: Vec<&str> = var.variants.iter().map(|v| v.name).collect();
+    assert!(
+        variant_names.contains(&"kebab"),
+        "should detect kebab variant"
+    );
+    assert!(
+        variant_names.contains(&"snake"),
+        "should detect snake variant"
+    );
+    assert!(
+        variant_names.contains(&"pascal"),
+        "should detect pascal variant"
+    );
+    assert!(
+        variant_names.contains(&"screaming_snake"),
+        "should detect screaming_snake variant"
+    );
+
+    execute_extraction(&plan, false).unwrap();
+
+    // The config should have computed variables for variants
+    let config = std::fs::read_to_string(output_path.join("diecut.toml")).unwrap();
+    assert!(
+        config.contains("project_name_snake"),
+        "should have snake computed var"
+    );
+}
+
+#[test]
+fn test_extract_dry_run_writes_nothing() {
+    let project = tempfile::tempdir().unwrap();
+    std::fs::write(project.path().join("hello.txt"), "hello my-app").unwrap();
+
+    let output = tempfile::tempdir().unwrap();
+    let output_path = output.path().join("dry-run-output");
+
+    let options = ExtractOptions {
+        source_dir: project.path().to_path_buf(),
+        variables: vec![("project_name".to_string(), "my-app".to_string())],
+        output_dir: Some(output_path.clone()),
+        in_place: false,
+        batch: true,
+        dry_run: true,
+    };
+
+    let plan = plan_extraction(&options).unwrap();
+    // Don't execute — just verify plan exists and no output written
+    assert!(!plan.files.is_empty());
+    assert!(!plan.config_toml.is_empty());
+    assert!(
+        !output_path.exists(),
+        "dry run should not create output directory"
+    );
+}
+
+#[test]
+fn test_extract_rejects_already_template() {
+    let project = tempfile::tempdir().unwrap();
+    std::fs::write(
+        project.path().join("diecut.toml"),
+        "[template]\nname = \"existing\"",
+    )
+    .unwrap();
+
+    let options = ExtractOptions {
+        source_dir: project.path().to_path_buf(),
+        variables: vec![("name".to_string(), "val".to_string())],
+        output_dir: None,
+        in_place: false,
+        batch: true,
+        dry_run: false,
+    };
+
+    let result = plan_extraction(&options);
+    assert!(result.is_err());
+}
+
+#[test]
+fn test_extract_rejects_no_variables() {
+    let project = tempfile::tempdir().unwrap();
+    std::fs::write(project.path().join("hello.txt"), "hello").unwrap();
+
+    let options = ExtractOptions {
+        source_dir: project.path().to_path_buf(),
+        variables: vec![],
+        output_dir: None,
+        in_place: false,
+        batch: true,
+        dry_run: false,
+    };
+
+    let result = plan_extraction(&options);
+    assert!(result.is_err());
+}
+
+#[test]
+fn test_extract_templates_path_components() {
+    let project = tempfile::tempdir().unwrap();
+    std::fs::create_dir(project.path().join("my-app")).unwrap();
+    std::fs::write(project.path().join("my-app/main.rs"), "fn main() {}\n").unwrap();
+
+    let output = tempfile::tempdir().unwrap();
+    let output_path = output.path().join("extracted");
+
+    let options = ExtractOptions {
+        source_dir: project.path().to_path_buf(),
+        variables: vec![("project_name".to_string(), "my-app".to_string())],
+        output_dir: Some(output_path.clone()),
+        in_place: false,
+        batch: true,
+        dry_run: false,
+    };
+
+    let plan = plan_extraction(&options).unwrap();
+
+    // Check that path components got templated
+    let has_templated_path = plan.files.iter().any(|f| {
+        f.template_path
+            .to_string_lossy()
+            .contains("{{ project_name }}")
+    });
+    assert!(
+        has_templated_path,
+        "should template path components containing the variable value"
+    );
+
+    execute_extraction(&plan, false).unwrap();
+}
+
+#[test]
+fn test_extract_round_trip() {
+    // Step 1: Generate a project from an existing template
+    let template_dir = fixture_path("basic-template");
+    let resolved = adapter::resolve_template(&template_dir).unwrap();
+
+    let mut variables = BTreeMap::new();
+    variables.insert(
+        "project_name".to_string(),
+        tera::Value::String("my-app".to_string()),
+    );
+    variables.insert(
+        "author".to_string(),
+        tera::Value::String("Jane Doe".to_string()),
+    );
+    variables.insert("use_docker".to_string(), tera::Value::Bool(false));
+    variables.insert(
+        "license".to_string(),
+        tera::Value::String("MIT".to_string()),
+    );
+    variables.insert(
+        "project_slug".to_string(),
+        tera::Value::String("my-app".to_string()),
+    );
+
+    let context = build_context(&variables);
+    let generated = tempfile::tempdir().unwrap();
+    walk_and_render(&resolved, generated.path(), &variables, &context).unwrap();
+
+    // The generated project has files under generated/my-app/
+    let project_dir = generated.path().join("my-app");
+    assert!(project_dir.exists(), "generated project should exist");
+
+    // Step 2: Extract it back into a template
+    let extracted = tempfile::tempdir().unwrap();
+    let extracted_path = extracted.path().join("extracted-template");
+
+    let options = ExtractOptions {
+        source_dir: project_dir.clone(),
+        variables: vec![("project_name".to_string(), "my-app".to_string())],
+        output_dir: Some(extracted_path.clone()),
+        in_place: false,
+        batch: true,
+        dry_run: false,
+    };
+
+    let plan = plan_extraction(&options).unwrap();
+    execute_extraction(&plan, false).unwrap();
+
+    // Verify the extracted template has the key structure
+    assert!(extracted_path.join("diecut.toml").exists());
+    assert!(extracted_path.join("template").exists());
+
+    let config = std::fs::read_to_string(extracted_path.join("diecut.toml")).unwrap();
+    assert!(config.contains("project_name"));
+
+    // Verify template files exist and contain template syntax
+    let template_files: Vec<_> = walkdir::WalkDir::new(extracted_path.join("template"))
+        .into_iter()
+        .filter_map(|e| e.ok())
+        .filter(|e| e.file_type().is_file())
+        .collect();
+    assert!(!template_files.is_empty(), "should have template files");
+
+    // Files with .die suffix should contain template expressions
+    for entry in &template_files {
+        if entry.path().to_string_lossy().ends_with(".die") {
+            let content = std::fs::read_to_string(entry.path()).unwrap();
+            assert!(
+                content.contains("{{") || content.contains("{%"),
+                "file {} should contain template syntax, got: {}",
+                entry.path().display(),
+                content
+            );
+        }
+    }
+}

From 1ddb7d3995d00e308fe1aaf54ec45374b967e5b1 Mon Sep 17 00:00:00 2001
From: Robert Roskam <robert.roskam@pantheon.io>
Date: Fri, 27 Feb 2026 16:32:02 -0500
Subject: [PATCH 02/29] fix(extract): correct camelCase handling and computed
 variable expressions

Add a custom `camelcase` Tera filter that properly lowercases the first
word and title-cases the rest (e.g., "my-app" -> "myApp"). Register it
via tera_with_filters() in the prompt engine and render walker.

Fix computed variable expressions in generated diecut.toml to include
{{ }} delimiters so they evaluate as Tera templates rather than being
treated as literal text.
---
 src/extract/config_gen.rs |  2 +-
 src/extract/variants.rs   |  9 +----
 src/prompt/engine.rs      |  2 +-
 src/render/context.rs     | 81 ++++++++++++++++++++++++++++++++++++++-
 src/render/mod.rs         |  2 +-
 src/render/walker.rs      |  4 +-
 6 files changed, 86 insertions(+), 14 deletions(-)

diff --git a/src/extract/config_gen.rs b/src/extract/config_gen.rs
index 5fd8222..91dea6c 100644
--- a/src/extract/config_gen.rs
+++ b/src/extract/config_gen.rs
@@ -84,7 +84,7 @@ pub fn generate_config_toml(options: &ConfigGenOptions) -> String {
         out.push_str("type = \"string\"\n");
         out.push_str(&format!(
             "computed = {}\n",
-            escape_toml_string(&var.expression)
+            escape_toml_string(&format!("{{{{ {} }}}}", var.expression))
         ));
         out.push('\n');
     }
diff --git a/src/extract/variants.rs b/src/extract/variants.rs
index e4259bd..222a220 100644
--- a/src/extract/variants.rs
+++ b/src/extract/variants.rs
@@ -236,13 +236,8 @@ pub fn computed_expression(var_name: &str, variant_name: &str, canonical_sep: &s
         ("pascal", sep) => {
             format!("{var_name} | replace(from=\"{sep}\", to=\" \") | title | replace(from=\" \", to=\"\")")
         }
-        ("camel", _sep) => {
-            // Tera doesn't have a built-in camelCase, but we can chain:
-            // title-case then lowercase-first-char isn't directly expressible.
-            // Use a workaround: same as pascal — users may need to adjust.
-            format!(
-                "{var_name} | replace(from=\"-\", to=\" \") | title | replace(from=\" \", to=\"\")"
-            )
+        ("camel", sep) => {
+            format!("{var_name} | camelcase(sep=\"{sep}\")")
         }
         ("kebab", sep) if sep != "-" => {
             format!("{var_name} | replace(from=\"{sep}\", to=\"-\")")
diff --git a/src/prompt/engine.rs b/src/prompt/engine.rs
index 4de7253..47fc847 100644
--- a/src/prompt/engine.rs
+++ b/src/prompt/engine.rs
@@ -96,7 +96,7 @@ fn evaluate_computed(
     computed_expr: &str,
     values: &BTreeMap<String, Value>,
 ) -> Result<Value> {
-    let mut tera = tera::Tera::default();
+    let mut tera = crate::render::tera_with_filters();
     tera.add_raw_template("__computed__", computed_expr)
         .map_err(|e| DicecutError::ComputedEvaluation {
             name: name.to_string(),
diff --git a/src/render/context.rs b/src/render/context.rs
index f29f678..4680c64 100644
--- a/src/render/context.rs
+++ b/src/render/context.rs
@@ -1,4 +1,4 @@
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, HashMap};
 
 use tera::{Context, Tera, Value};
 
@@ -10,14 +10,91 @@ pub fn build_context(variables: &BTreeMap<String, Value>) -> Context {
     context
 }
 
+/// Create a Tera instance with custom filters registered.
+///
+/// This should be used instead of `Tera::default()` anywhere templates or
+/// computed expressions are evaluated, so that custom filters like `camelcase`
+/// are available.
+pub fn tera_with_filters() -> Tera {
+    let mut tera = Tera::default();
+    tera.register_filter("camelcase", camelcase_filter);
+    tera
+}
+
+/// Custom Tera filter: convert a separated string to camelCase.
+///
+/// Usage: `{{ value | camelcase }}` or `{{ value | camelcase(sep="-") }}`
+///
+/// Splits on the separator (default `-`), lowercases the first word,
+/// title-cases the rest, and joins them.
+fn camelcase_filter(
+    value: &Value,
+    args: &HashMap<String, Value>,
+) -> Result<Value, tera::Error> {
+    let s = value
+        .as_str()
+        .ok_or_else(|| tera::Error::msg("camelcase filter requires a string value"))?;
+
+    let sep = args
+        .get("sep")
+        .and_then(|v| v.as_str())
+        .unwrap_or("-");
+
+    let words: Vec<&str> = s.split(sep).collect();
+    if words.is_empty() {
+        return Ok(Value::String(String::new()));
+    }
+
+    let mut result = words[0].to_lowercase();
+    for word in &words[1..] {
+        let mut chars = word.chars();
+        if let Some(first) = chars.next() {
+            result.extend(first.to_uppercase());
+            result.push_str(&chars.as_str().to_lowercase());
+        }
+    }
+
+    Ok(Value::String(result))
+}
+
 /// Evaluate a Tera boolean expression against a variable context.
 ///
 /// Returns `Ok(true)` if the expression evaluates to true, `Ok(false)` otherwise.
 /// Returns `Err` if the expression fails to parse or render.
 pub fn eval_bool_expr(expr: &str, context: &Context) -> std::result::Result<bool, tera::Error> {
-    let mut tera = Tera::default();
+    let mut tera = tera_with_filters();
     let template_str = format!("{{% if {expr} %}}true{{% else %}}false{{% endif %}}");
     tera.add_raw_template("__when__", &template_str)?;
     let result = tera.render("__when__", context)?;
     Ok(result.trim() == "true")
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_camelcase_filter_kebab() {
+        let val = Value::String("my-cool-app".to_string());
+        let args = HashMap::new();
+        let result = camelcase_filter(&val, &args).unwrap();
+        assert_eq!(result, Value::String("myCoolApp".to_string()));
+    }
+
+    #[test]
+    fn test_camelcase_filter_custom_sep() {
+        let val = Value::String("my_cool_app".to_string());
+        let mut args = HashMap::new();
+        args.insert("sep".to_string(), Value::String("_".to_string()));
+        let result = camelcase_filter(&val, &args).unwrap();
+        assert_eq!(result, Value::String("myCoolApp".to_string()));
+    }
+
+    #[test]
+    fn test_camelcase_filter_single_word() {
+        let val = Value::String("hello".to_string());
+        let args = HashMap::new();
+        let result = camelcase_filter(&val, &args).unwrap();
+        assert_eq!(result, Value::String("hello".to_string()));
+    }
+}
diff --git a/src/render/mod.rs b/src/render/mod.rs
index 5674674..8a87f30 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -2,7 +2,7 @@ pub mod context;
 pub mod file;
 pub mod walker;
 
-pub use context::{build_context, eval_bool_expr};
+pub use context::{build_context, eval_bool_expr, tera_with_filters};
 pub use walker::{
     execute_plan, plan_render, walk_and_render, GeneratedProject, GenerationPlan, PlannedFile,
 };
diff --git a/src/render/walker.rs b/src/render/walker.rs
index caf9e26..97b1e96 100644
--- a/src/render/walker.rs
+++ b/src/render/walker.rs
@@ -2,7 +2,7 @@ use std::collections::BTreeMap;
 use std::path::{Path, PathBuf};
 
 use globset::{Glob, GlobSet, GlobSetBuilder};
-use tera::{Context, Tera, Value};
+use tera::{Context, Value};
 use walkdir::WalkDir;
 
 use crate::adapter::ResolvedTemplate;
@@ -104,7 +104,7 @@ pub fn plan_render(
                 source: e,
             })?;
 
-            let mut tera = Tera::default();
+            let mut tera = crate::render::tera_with_filters();
             let template_name = rel_str.to_string();
             let parse_result = tera.add_raw_template(&template_name, &content);
             let render_result = parse_result.and_then(|_| tera.render(&template_name, context));

From 69099b85f4abb0b1030b782598d550cf3ba9e7c5 Mon Sep 17 00:00:00 2001
From: Robert Roskam <robert.roskam@pantheon.io>
Date: Fri, 27 Feb 2026 16:28:07 -0500
Subject: [PATCH 03/29] fix(extract): prevent substring collision in
 replacements

Use word-boundary-aware matching so short variable values like "app"
don't get replaced inside longer words like "application". A match is
only accepted when the characters immediately before and after it are
not word-like (alphanumeric, underscore, or hyphen).
---
 src/extract/replace.rs | 113 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 111 insertions(+), 2 deletions(-)

diff --git a/src/extract/replace.rs b/src/extract/replace.rs
index 37af444..42914ec 100644
--- a/src/extract/replace.rs
+++ b/src/extract/replace.rs
@@ -19,8 +19,76 @@ pub fn build_replacement_rules(rules: &mut [ReplacementRule]) {
     rules.sort_by(|a, b| b.literal.len().cmp(&a.literal.len()));
 }
 
+/// Whether a character is "word-like" for the purpose of boundary detection.
+///
+/// Alphanumeric, underscore, and hyphen are all considered word characters
+/// because they appear as separators in identifiers (kebab-case, snake_case).
+fn is_word_char(c: char) -> bool {
+    c.is_alphanumeric() || c == '_' || c == '-'
+}
+
+/// Replace `literal` in `text` only at word boundaries.
+///
+/// A match is at a word boundary when the characters immediately before and
+/// after the match are not word-like (alphanumeric, `_`, or `-`), or the
+/// match is at the start/end of the string.
+///
+/// Multi-word literals (containing a separator like `-`, `_`, or `.`) always
+/// use boundary-aware replacement since false positives are unlikely but still
+/// possible in paths and compound tokens.
+fn replace_whole_word(text: &str, literal: &str, replacement: &str) -> (String, usize) {
+    let literal_len = literal.len();
+    let text_len = text.len();
+
+    if literal_len == 0 || text_len < literal_len {
+        return (text.to_string(), 0);
+    }
+
+    let mut result = String::with_capacity(text.len());
+    let mut count = 0;
+    let mut start = 0;
+
+    while start <= text_len - literal_len {
+        match text[start..].find(literal) {
+            Some(pos) => {
+                let match_start = start + pos;
+                let match_end = match_start + literal_len;
+
+                let ok_before = match_start == 0
+                    || !is_word_char(text[..match_start].chars().next_back().unwrap());
+                let ok_after = match_end == text_len
+                    || !is_word_char(text[match_end..].chars().next().unwrap());
+
+                if ok_before && ok_after {
+                    result.push_str(&text[start..match_start]);
+                    result.push_str(replacement);
+                    count += 1;
+                    start = match_end;
+                } else {
+                    // Not a word boundary — advance past the start of this match
+                    let next = match_start
+                        + text[match_start..]
+                            .char_indices()
+                            .nth(1)
+                            .map(|(i, _)| i)
+                            .unwrap_or(1);
+                    result.push_str(&text[start..next]);
+                    start = next;
+                }
+            }
+            None => break,
+        }
+    }
+
+    result.push_str(&text[start..]);
+    (result, count)
+}
+
 /// Apply replacement rules to a string, longest-match-first.
 ///
+/// Uses word-boundary-aware matching to prevent replacing substrings
+/// inside longer words (e.g., "app" inside "application").
+///
 /// Returns the modified string and the number of replacements made.
 pub fn apply_replacements(content: &str, rules: &[ReplacementRule]) -> (String, usize) {
     if rules.is_empty() {
@@ -34,9 +102,9 @@ pub fn apply_replacements(content: &str, rules: &[ReplacementRule]) -> (String,
         if rule.literal.is_empty() {
             continue;
         }
-        let count = result.matches(&rule.literal).count();
+        let (replaced, count) = replace_whole_word(&result, &rule.literal, &rule.replacement);
         if count > 0 {
-            result = result.replace(&rule.literal, &rule.replacement);
+            result = replaced;
             total_count += count;
         }
     }
@@ -140,4 +208,45 @@ mod tests {
         assert_eq!(count_occurrences("hello world", "missing"), 0);
         assert_eq!(count_occurrences("anything", ""), 0);
     }
+
+    #[test]
+    fn test_no_substring_collision_suffix() {
+        let rules = vec![make_rule("app", "{{ name }}")];
+        let (result, count) = apply_replacements("application startup", &rules);
+        assert_eq!(result, "application startup");
+        assert_eq!(count, 0);
+    }
+
+    #[test]
+    fn test_no_substring_collision_prefix() {
+        let rules = vec![make_rule("app", "{{ name }}")];
+        let (result, count) = apply_replacements("webapp is cool", &rules);
+        assert_eq!(result, "webapp is cool");
+        assert_eq!(count, 0);
+    }
+
+    #[test]
+    fn test_standalone_match_with_punctuation() {
+        let rules = vec![make_rule("app", "{{ name }}")];
+        let (result, count) = apply_replacements("run app. start app!", &rules);
+        assert_eq!(result, "run {{ name }}. start {{ name }}!");
+        assert_eq!(count, 2);
+    }
+
+    #[test]
+    fn test_match_at_string_boundaries() {
+        let rules = vec![make_rule("app", "{{ name }}")];
+        let (result, count) = apply_replacements("app", &rules);
+        assert_eq!(result, "{{ name }}");
+        assert_eq!(count, 1);
+    }
+
+    #[test]
+    fn test_compound_literal_still_matches() {
+        // Multi-word literals like "my-app" should still match inside strings
+        let rules = vec![make_rule("my-app", "{{ name }}")];
+        let (result, count) = apply_replacements("name = \"my-app\"", &rules);
+        assert_eq!(result, "name = \"{{ name }}\"");
+        assert_eq!(count, 1);
+    }
 }

From f5ad9edc86bc074c1541d12b5f7f3caf12cf92cc Mon Sep 17 00:00:00 2001
From: Robert Roskam <robert.roskam@pantheon.io>
Date: Fri, 27 Feb 2026 16:26:38 -0500
Subject: [PATCH 04/29] refactor(extract): use computed variable names in
 template files

Instead of repeating verbose inline filter chains like
{{ project_name | replace(from="-", to=" ") | title | replace(from=" ", to="") }}
in every template file, reference the computed variable names already
defined in diecut.toml (e.g., {{ project_name_pascal }}).

Extract is_canonical_variant() as a public helper to deduplicate the
canonical-variant check between replacement rule building and computed
variable generation.
---
 src/extract/mod.rs      | 10 +++-----
 src/extract/variants.rs | 57 ++++++++++++++---------------------------
 2 files changed, 23 insertions(+), 44 deletions(-)

diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index 2364891..ba568bd 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -23,7 +23,9 @@ use self::replace::{
     apply_path_replacements, apply_replacements, build_replacement_rules, ReplacementRule,
 };
 use self::scan::{scan_project, ScannedFile};
-use self::variants::{computed_expression, detect_separator, generate_variants, CaseVariant};
+use self::variants::{
+    computed_expression, detect_separator, generate_variants, is_canonical_variant, CaseVariant,
+};
 
 /// A variable with its value and confirmed case variants.
 #[derive(Debug, Clone)]
@@ -307,11 +309,7 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
                 continue;
             }
             // Skip the variant that matches the canonical separator
-            let is_canonical = matches!(
-                (variant.name, canonical_sep),
-                ("kebab", "-") | ("snake", "_") | ("dot", ".")
-            );
-            if is_canonical {
+            if is_canonical_variant(variant.name, canonical_sep) {
                 continue;
             }
 
diff --git a/src/extract/variants.rs b/src/extract/variants.rs
index 222a220..8458b29 100644
--- a/src/extract/variants.rs
+++ b/src/extract/variants.rs
@@ -124,46 +124,27 @@ pub fn detect_separator(value: &str) -> &'static str {
     }
 }
 
+/// Check whether a variant is the canonical one (matches the input separator).
+///
+/// Canonical variants use the bare `{{ var_name }}` expression and do not get
+/// a computed variable in diecut.toml.
+pub fn is_canonical_variant(variant_name: &str, canonical_sep: &str) -> bool {
+    matches!(
+        (variant_name, canonical_sep),
+        ("kebab", "-") | ("snake", "_") | ("dot", ".")
+    )
+}
+
 /// Build a Tera expression for a variant, given the variable name and canonical separator.
+///
+/// Canonical variants use `{{ var_name }}` directly. Non-canonical variants reference
+/// their computed variable (e.g., `{{ var_name_snake }}`), which is defined in diecut.toml.
 fn tera_expr_for_variant(var_name: &str, variant_name: &str, canonical_sep: &str) -> String {
-    match (variant_name, canonical_sep) {
-        ("kebab", "-") => format!("{{{{ {var_name} }}}}"),
-        ("kebab", sep) => {
-            format!("{{{{ {var_name} | replace(from=\"{sep}\", to=\"-\") }}}}")
-        }
-        ("snake", "_") => format!("{{{{ {var_name} }}}}"),
-        ("snake", sep) => {
-            format!("{{{{ {var_name} | replace(from=\"{sep}\", to=\"_\") }}}}")
-        }
-        ("screaming_snake", sep) => {
-            if sep == "_" {
-                format!("{{{{ {var_name} | upper }}}}")
-            } else {
-                format!("{{{{ {var_name} | replace(from=\"{sep}\", to=\"_\") | upper }}}}")
-            }
-        }
-        ("screaming_kebab", sep) => {
-            if sep == "-" {
-                format!("{{{{ {var_name} | upper }}}}")
-            } else {
-                format!("{{{{ {var_name} | replace(from=\"{sep}\", to=\"-\") | upper }}}}")
-            }
-        }
-        ("pascal", sep) => {
-            format!(
-                "{{{{ {var_name} | replace(from=\"{sep}\", to=\" \") | title | replace(from=\" \", to=\"\") }}}}"
-            )
-        }
-        ("camel", _sep) => {
-            // No built-in camelCase filter in Tera, so we use a computed variable name
-            format!("{{{{ {var_name}_camel }}}}")
-        }
-        ("dot", ".") => format!("{{{{ {var_name} }}}}"),
-        ("dot", sep) => {
-            format!("{{{{ {var_name} | replace(from=\"{sep}\", to=\".\") }}}}")
-        }
-        _ => format!("{{{{ {var_name} }}}}"),
+    if variant_name == "verbatim" || is_canonical_variant(variant_name, canonical_sep) {
+        return format!("{{{{ {var_name} }}}}");
     }
+    // Non-canonical variants reference their computed variable name
+    format!("{{{{ {var_name}_{variant_name} }}}}")
 }
 
 /// Generate all case variants for a given variable value.
@@ -319,6 +300,6 @@ mod tests {
     #[test]
     fn test_tera_expr_snake_from_kebab() {
         let expr = tera_expr_for_variant("project_name", "snake", "-");
-        assert_eq!(expr, "{{ project_name | replace(from=\"-\", to=\"_\") }}");
+        assert_eq!(expr, "{{ project_name_snake }}");
     }
 }

From 5ccdaa181f8ed10e567351ded2441b0a903a8939 Mon Sep 17 00:00:00 2001
From: Robert Roskam <robert.roskam@pantheon.io>
Date: Fri, 27 Feb 2026 19:04:32 -0500
Subject: [PATCH 05/29] feat(extract): add auto-detection of template variables

Add 4-tier automatic variable detection for `diecut extract`:
- Tier 1: Directory name (0.95 confidence)
- Tier 2: Ecosystem configs - Cargo.toml, package.json, pyproject.toml,
  go.mod (0.85-0.90 confidence)
- Tier 3: Git metadata - remote org, user.name (0.65-0.70 confidence)
- Tier 4: Frequency analysis with Levenshtein merging (scored 0.30-1.0)

Auto-detection runs when no --var flags are provided. Includes noise
filtering for language keywords, common libraries, file format words,
and stopwords. Scoring emphasizes variant diversity to prefer
identifiers that appear in multiple case forms.
---
 Cargo.lock                 |    1 +
 Cargo.toml                 |    1 +
 src/cli.rs                 |    4 +
 src/commands/extract.rs    |    5 +
 src/error.rs               |    2 +-
 src/extract/auto_detect.rs | 1315 ++++++++++++++++++++++++++++++++++++
 src/extract/mod.rs         |  124 +++-
 src/main.rs                |    3 +-
 tests/integration.rs       |  145 ++++
 9 files changed, 1593 insertions(+), 7 deletions(-)
 create mode 100644 src/extract/auto_detect.rs

diff --git a/Cargo.lock b/Cargo.lock
index ce434bc..113ae0e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -462,6 +462,7 @@ dependencies = [
  "serde",
  "serde_json",
  "sha2",
+ "strsim",
  "tempfile",
  "tera",
  "thiserror",
diff --git a/Cargo.toml b/Cargo.toml
index 3a5902b..64ef434 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -35,6 +35,7 @@ sha2 = "0.10"
 fs4 = "0.12"
 content_inspector = "0.2"
 indexmap = { version = "2.11.4", features = ["serde"] }
+strsim = "0.11"
 
 [dev-dependencies]
 rstest = "0.23"
diff --git a/src/cli.rs b/src/cli.rs
index a051ff6..0301617 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -74,5 +74,9 @@ pub enum Commands {
         /// Show what would be extracted without writing files
         #[arg(long)]
         dry_run: bool,
+
+        /// Auto-detect template variables from project metadata and content
+        #[arg(long)]
+        auto: bool,
     },
 }
diff --git a/src/commands/extract.rs b/src/commands/extract.rs
index 86fee13..6576387 100644
--- a/src/commands/extract.rs
+++ b/src/commands/extract.rs
@@ -13,9 +13,13 @@ pub fn run(
     in_place: bool,
     batch: bool,
     dry_run: bool,
+    auto: bool,
 ) -> Result<()> {
     let variables = parse_vars(&vars)?;
 
+    // Default auto to true when no vars are provided
+    let auto = auto || variables.is_empty();
+
     let options = ExtractOptions {
         source_dir: PathBuf::from(&source),
         variables,
@@ -23,6 +27,7 @@ pub fn run(
         in_place,
         batch,
         dry_run,
+        auto,
     };
 
     let plan = plan_extraction(&options)?;
diff --git a/src/error.rs b/src/error.rs
index d6a4ee4..ccac949 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -124,7 +124,7 @@ pub enum DicecutError {
 
     #[error("No variables provided for extraction")]
     #[diagnostic(help(
-        "Use --var key=value to specify which values should become template variables"
+        "Use --var key=value to specify variables, or --auto to detect them automatically"
     ))]
     ExtractNoVariables,
 
diff --git a/src/extract/auto_detect.rs b/src/extract/auto_detect.rs
new file mode 100644
index 0000000..5cc76da
--- /dev/null
+++ b/src/extract/auto_detect.rs
@@ -0,0 +1,1315 @@
+use std::collections::{HashMap, HashSet};
+use std::path::Path;
+use std::process::Command;
+
+use regex_lite::Regex;
+
+use super::scan::ScanResult;
+use super::variants::split_into_words;
+
+/// Confidence tier indicating how a candidate variable was detected.
+#[derive(Debug, Clone, PartialEq)]
+pub enum ConfidenceTier {
+    DirectoryName,
+    ConfigFile,
+    GitMetadata,
+    FrequencyAnalysis,
+}
+
+impl std::fmt::Display for ConfidenceTier {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            ConfidenceTier::DirectoryName => write!(f, "directory name"),
+            ConfidenceTier::ConfigFile => write!(f, "config file"),
+            ConfidenceTier::GitMetadata => write!(f, "git metadata"),
+            ConfidenceTier::FrequencyAnalysis => write!(f, "frequency analysis"),
+        }
+    }
+}
+
+/// A candidate variable detected by auto-detection.
+#[derive(Debug, Clone)]
+pub struct DetectedCandidate {
+    pub suggested_name: String,
+    pub value: String,
+    pub tier: ConfidenceTier,
+    pub confidence: f64,
+    pub reason: String,
+    pub file_count: usize,
+    pub total_occurrences: usize,
+}
+
+/// Result of running auto-detection.
+#[derive(Debug)]
+pub struct AutoDetectResult {
+    pub candidates: Vec<DetectedCandidate>,
+}
+
+struct TokenCluster {
+    normalized: Vec<String>,
+    literals: Vec<String>,
+    total_occurrences: usize,
+    file_count: usize,
+    matches_dir_name: bool,
+    in_config_value: bool,
+}
+
+// ── Entry point ──────────────────────────────────────────────────────────
+
+/// Run all 4 auto-detection tiers against a scanned project.
+pub fn auto_detect(project_dir: &Path, scan_result: &ScanResult) -> AutoDetectResult {
+    let mut candidates = Vec::new();
+
+    // Tier 1: Directory name
+    candidates.extend(detect_directory_name(project_dir, scan_result));
+
+    // Tier 2: Ecosystem config files
+    candidates.extend(detect_config_files(project_dir, scan_result));
+
+    // Tier 3: Git metadata
+    candidates.extend(detect_git_metadata(project_dir, scan_result));
+
+    // Collect values already covered by tiers 1-3
+    let covered_values: HashSet<String> = candidates
+        .iter()
+        .map(|c| c.value.to_lowercase())
+        .collect();
+
+    // Collect config values for frequency analysis boosting
+    let config_values: HashSet<String> = candidates
+        .iter()
+        .filter(|c| c.tier == ConfidenceTier::ConfigFile)
+        .map(|c| c.value.to_lowercase())
+        .collect();
+
+    let dir_name = project_dir
+        .file_name()
+        .map(|n| n.to_string_lossy().to_lowercase())
+        .unwrap_or_default();
+
+    // Tier 4: Frequency analysis
+    candidates.extend(detect_frequency(
+        scan_result,
+        &covered_values,
+        &config_values,
+        &dir_name,
+    ));
+
+    // Deduplicate by normalized word list, keeping highest confidence
+    deduplicate_candidates(&mut candidates);
+
+    // Sort by confidence descending
+    candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
+
+    AutoDetectResult { candidates }
+}
+
+// ── Tier 1: Directory name ───────────────────────────────────────────────
+
+const GENERIC_DIR_NAMES: &[&str] = &[
+    "src", "app", "project", "tmp", "temp", "build", "dist", "out", "output", "lib", "bin",
+    "test", "tests", "example", "examples", "docs", "doc", "assets", "public", "static",
+    "vendor", "node_modules", "target", "pkg", "cmd", "internal", "api", "web", "server",
+    "client", "frontend", "backend", "service", "services", "workspace", "repo", "code",
+];
+
+fn detect_directory_name(project_dir: &Path, scan_result: &ScanResult) -> Vec<DetectedCandidate> {
+    let dir_name = match project_dir.file_name() {
+        Some(name) => name.to_string_lossy().to_string(),
+        None => return vec![],
+    };
+
+    if GENERIC_DIR_NAMES.contains(&dir_name.to_lowercase().as_str()) {
+        return vec![];
+    }
+
+    // Must have at least 2 chars
+    if dir_name.len() < 2 {
+        return vec![];
+    }
+
+    let (file_count, total_occurrences) = count_occurrences(&dir_name, scan_result);
+
+    vec![DetectedCandidate {
+        suggested_name: "project_name".to_string(),
+        value: dir_name.clone(),
+        tier: ConfidenceTier::DirectoryName,
+        confidence: 0.95,
+        reason: format!("directory name \"{}\"", dir_name),
+        file_count,
+        total_occurrences,
+    }]
+}
+
+// ── Tier 2: Ecosystem config files ───────────────────────────────────────
+
+fn detect_config_files(
+    project_dir: &Path,
+    scan_result: &ScanResult,
+) -> Vec<DetectedCandidate> {
+    let mut candidates = Vec::new();
+
+    if let Some(mut c) = parse_cargo_toml(project_dir, scan_result) {
+        candidates.append(&mut c);
+    }
+    if let Some(mut c) = parse_package_json(project_dir, scan_result) {
+        candidates.append(&mut c);
+    }
+    if let Some(mut c) = parse_pyproject_toml(project_dir, scan_result) {
+        candidates.append(&mut c);
+    }
+    if let Some(mut c) = parse_go_mod(project_dir, scan_result) {
+        candidates.append(&mut c);
+    }
+
+    candidates
+}
+
+fn parse_cargo_toml(
+    project_dir: &Path,
+    scan_result: &ScanResult,
+) -> Option<Vec<DetectedCandidate>> {
+    let path = project_dir.join("Cargo.toml");
+    let content = std::fs::read_to_string(&path).ok()?;
+    let parsed: toml::Value = content.parse().ok()?;
+
+    let mut candidates = Vec::new();
+
+    if let Some(name) = parsed
+        .get("package")
+        .and_then(|p| p.get("name"))
+        .and_then(|n| n.as_str())
+    {
+        let (file_count, total_occurrences) = count_occurrences(name, scan_result);
+        candidates.push(DetectedCandidate {
+            suggested_name: "project_name".to_string(),
+            value: name.to_string(),
+            tier: ConfidenceTier::ConfigFile,
+            confidence: 0.90,
+            reason: "Cargo.toml [package].name".to_string(),
+            file_count,
+            total_occurrences,
+        });
+    }
+
+    if let Some(authors) = parsed
+        .get("package")
+        .and_then(|p| p.get("authors"))
+        .and_then(|a| a.as_array())
+    {
+        if let Some(first) = authors.first().and_then(|a| a.as_str()) {
+            let author = strip_email(first);
+            if !author.is_empty() {
+                let (file_count, total_occurrences) = count_occurrences(&author, scan_result);
+                candidates.push(DetectedCandidate {
+                    suggested_name: "author".to_string(),
+                    value: author.clone(),
+                    tier: ConfidenceTier::ConfigFile,
+                    confidence: 0.85,
+                    reason: "Cargo.toml [package].authors[0]".to_string(),
+                    file_count,
+                    total_occurrences,
+                });
+            }
+        }
+    }
+
+    Some(candidates)
+}
+
+fn parse_package_json(
+    project_dir: &Path,
+    scan_result: &ScanResult,
+) -> Option<Vec<DetectedCandidate>> {
+    let path = project_dir.join("package.json");
+    let content = std::fs::read_to_string(&path).ok()?;
+    let parsed: serde_json::Value = serde_json::from_str(&content).ok()?;
+
+    let mut candidates = Vec::new();
+
+    if let Some(name) = parsed.get("name").and_then(|n| n.as_str()) {
+        // Strip npm scope @org/
+        let clean_name = strip_npm_scope(name);
+        let (file_count, total_occurrences) = count_occurrences(clean_name, scan_result);
+        candidates.push(DetectedCandidate {
+            suggested_name: "project_name".to_string(),
+            value: clean_name.to_string(),
+            tier: ConfidenceTier::ConfigFile,
+            confidence: 0.90,
+            reason: "package.json \"name\"".to_string(),
+            file_count,
+            total_occurrences,
+        });
+    }
+
+    if let Some(author) = parsed.get("author") {
+        let author_str = match author {
+            serde_json::Value::String(s) => Some(strip_email(s)),
+            serde_json::Value::Object(obj) => {
+                obj.get("name").and_then(|n| n.as_str()).map(String::from)
+            }
+            _ => None,
+        };
+        if let Some(author_name) = author_str {
+            if !author_name.is_empty() {
+                let (file_count, total_occurrences) =
+                    count_occurrences(&author_name, scan_result);
+                candidates.push(DetectedCandidate {
+                    suggested_name: "author".to_string(),
+                    value: author_name,
+                    tier: ConfidenceTier::ConfigFile,
+                    confidence: 0.85,
+                    reason: "package.json \"author\"".to_string(),
+                    file_count,
+                    total_occurrences,
+                });
+            }
+        }
+    }
+
+    Some(candidates)
+}
+
+fn parse_pyproject_toml(
+    project_dir: &Path,
+    scan_result: &ScanResult,
+) -> Option<Vec<DetectedCandidate>> {
+    let path = project_dir.join("pyproject.toml");
+    let content = std::fs::read_to_string(&path).ok()?;
+    let parsed: toml::Value = content.parse().ok()?;
+
+    let mut candidates = Vec::new();
+
+    if let Some(name) = parsed
+        .get("project")
+        .and_then(|p| p.get("name"))
+        .and_then(|n| n.as_str())
+    {
+        let (file_count, total_occurrences) = count_occurrences(name, scan_result);
+        candidates.push(DetectedCandidate {
+            suggested_name: "project_name".to_string(),
+            value: name.to_string(),
+            tier: ConfidenceTier::ConfigFile,
+            confidence: 0.90,
+            reason: "pyproject.toml [project].name".to_string(),
+            file_count,
+            total_occurrences,
+        });
+    }
+
+    if let Some(authors) = parsed
+        .get("project")
+        .and_then(|p| p.get("authors"))
+        .and_then(|a| a.as_array())
+    {
+        if let Some(first) = authors.first() {
+            let author_name = first
+                .get("name")
+                .and_then(|n| n.as_str())
+                .or_else(|| first.as_str().map(|s| s))
+                .map(|s| strip_email(s));
+            if let Some(name) = author_name {
+                if !name.is_empty() {
+                    let (file_count, total_occurrences) = count_occurrences(&name, scan_result);
+                    candidates.push(DetectedCandidate {
+                        suggested_name: "author".to_string(),
+                        value: name,
+                        tier: ConfidenceTier::ConfigFile,
+                        confidence: 0.85,
+                        reason: "pyproject.toml [project].authors[0].name".to_string(),
+                        file_count,
+                        total_occurrences,
+                    });
+                }
+            }
+        }
+    }
+
+    Some(candidates)
+}
+
+fn parse_go_mod(
+    project_dir: &Path,
+    scan_result: &ScanResult,
+) -> Option<Vec<DetectedCandidate>> {
+    let path = project_dir.join("go.mod");
+    let content = std::fs::read_to_string(&path).ok()?;
+
+    let re = Regex::new(r"^module\s+(\S+)").unwrap();
+    let module_path = re
+        .captures(&content)?
+        .get(1)?
+        .as_str();
+
+    let segments: Vec<&str> = module_path.split('/').collect();
+
+    // Extract last path segment as project name
+    let name = segments.last().copied()?;
+    if name.is_empty() {
+        return None;
+    }
+
+    let mut candidates = Vec::new();
+
+    let (file_count, total_occurrences) = count_occurrences(name, scan_result);
+    candidates.push(DetectedCandidate {
+        suggested_name: "project_name".to_string(),
+        value: name.to_string(),
+        tier: ConfidenceTier::ConfigFile,
+        confidence: 0.90,
+        reason: format!("go.mod module \"{}\"", module_path),
+        file_count,
+        total_occurrences,
+    });
+
+    // Extract org name (second-to-last segment for github.com/org/repo patterns)
+    if segments.len() >= 3 {
+        let org = segments[segments.len() - 2];
+        if !org.is_empty() && org != name {
+            let (org_file_count, org_total_occurrences) = count_occurrences(org, scan_result);
+            if org_total_occurrences > 0 {
+                candidates.push(DetectedCandidate {
+                    suggested_name: "org_name".to_string(),
+                    value: org.to_string(),
+                    tier: ConfidenceTier::ConfigFile,
+                    confidence: 0.85,
+                    reason: format!("go.mod module org \"{}\"", org),
+                    file_count: org_file_count,
+                    total_occurrences: org_total_occurrences,
+                });
+            }
+        }
+    }
+
+    Some(candidates)
+}
+
+// ── Tier 3: Git metadata ─────────────────────────────────────────────────
+
+fn detect_git_metadata(
+    project_dir: &Path,
+    scan_result: &ScanResult,
+) -> Vec<DetectedCandidate> {
+    let mut candidates = Vec::new();
+
+    // Try to get remote origin URL
+    if let Some(url) = git_config_get(project_dir, "remote.origin.url") {
+        if let Some(org) = parse_org_from_url(&url) {
+            let (file_count, total_occurrences) = count_occurrences(&org, scan_result);
+            // Only include if org name actually appears in files
+            if total_occurrences > 0 {
+                candidates.push(DetectedCandidate {
+                    suggested_name: "org_name".to_string(),
+                    value: org.clone(),
+                    tier: ConfidenceTier::GitMetadata,
+                    confidence: 0.70,
+                    reason: format!("git remote org \"{}\"", org),
+                    file_count,
+                    total_occurrences,
+                });
+            }
+        }
+    }
+
+    // Try to get user name
+    if let Some(user_name) = git_config_get(project_dir, "user.name") {
+        if !user_name.is_empty() {
+            let (file_count, total_occurrences) = count_occurrences(&user_name, scan_result);
+            candidates.push(DetectedCandidate {
+                suggested_name: "author".to_string(),
+                value: user_name.clone(),
+                tier: ConfidenceTier::GitMetadata,
+                confidence: 0.65,
+                reason: format!("git config user.name \"{}\"", user_name),
+                file_count,
+                total_occurrences,
+            });
+        }
+    }
+
+    candidates
+}
+
+fn git_config_get(project_dir: &Path, key: &str) -> Option<String> {
+    let output = Command::new("git")
+        .arg("config")
+        .arg("--get")
+        .arg(key)
+        .current_dir(project_dir)
+        .output()
+        .ok()?;
+
+    if !output.status.success() {
+        return None;
+    }
+
+    let value = String::from_utf8(output.stdout).ok()?.trim().to_string();
+    if value.is_empty() {
+        None
+    } else {
+        Some(value)
+    }
+}
+
+fn parse_org_from_url(url: &str) -> Option<String> {
+    // SSH: git@github.com:org/repo.git
+    if let Some(rest) = url.strip_prefix("git@") {
+        let after_colon = rest.split(':').nth(1)?;
+        let org = after_colon.split('/').next()?;
+        if !org.is_empty() {
+            return Some(org.to_string());
+        }
+    }
+
+    // HTTPS: https://github.com/org/repo.git
+    if url.starts_with("https://") || url.starts_with("http://") {
+        let parts: Vec<&str> = url.split('/').collect();
+        // https://host/org/repo → parts[3] is org
+        if parts.len() >= 4 && !parts[3].is_empty() {
+            return Some(parts[3].to_string());
+        }
+    }
+
+    None
+}
+
+// ── Tier 4: Frequency analysis ───────────────────────────────────────────
+
+fn detect_frequency(
+    scan_result: &ScanResult,
+    covered_values: &HashSet<String>,
+    config_values: &HashSet<String>,
+    dir_name: &str,
+) -> Vec<DetectedCandidate> {
+    // Tokenize all text file content
+    let token_re = Regex::new(
+        r"[a-zA-Z][a-zA-Z0-9]*(?:[-_.][a-zA-Z0-9]+)+|[A-Z][a-z]+(?:[A-Z][a-z]+)+|[a-z]+(?:[A-Z][a-z]+)+|[A-Z]{2,}(?:_[A-Z]{2,})+"
+    ).unwrap();
+
+    let mut token_file_map: HashMap<String, HashSet<usize>> = HashMap::new();
+    let mut token_counts: HashMap<String, usize> = HashMap::new();
+
+    for (file_idx, file) in scan_result.files.iter().enumerate() {
+        if let Some(ref content) = file.content {
+            for mat in token_re.find_iter(content) {
+                let token = mat.as_str().to_string();
+                token_file_map
+                    .entry(token.clone())
+                    .or_default()
+                    .insert(file_idx);
+                *token_counts.entry(token).or_insert(0) += 1;
+            }
+        }
+    }
+
+    // Build clusters by normalized word list
+    let mut clusters: HashMap<String, TokenCluster> = HashMap::new();
+
+    for (token, count) in &token_counts {
+        let words = split_into_words(token);
+
+        // Filter noise
+        if words.iter().all(|w| w.len() < 3) {
+            continue;
+        }
+        if is_noise_token(token, &words) {
+            continue;
+        }
+
+        let normalized_key = words.join(" ");
+
+        let file_count = token_file_map
+            .get(token)
+            .map(|s| s.len())
+            .unwrap_or(0);
+
+        // Skip single-occurrence-single-file tokens
+        if *count == 1 && file_count <= 1 {
+            continue;
+        }
+
+        let matches_dir = normalized_key == split_into_words(dir_name).join(" ")
+            && !dir_name.is_empty();
+        let in_config = config_values.contains(&token.to_lowercase());
+
+        let cluster = clusters.entry(normalized_key.clone()).or_insert_with(|| {
+            TokenCluster {
+                normalized: words.clone(),
+                literals: Vec::new(),
+                total_occurrences: 0,
+                file_count: 0,
+                matches_dir_name: false,
+                in_config_value: false,
+            }
+        });
+
+        if !cluster.literals.contains(token) {
+            cluster.literals.push(token.clone());
+        }
+        cluster.total_occurrences += count;
+        // Merge file sets for accurate file_count
+        let files_for_token = token_file_map.get(token).map(|s| s.len()).unwrap_or(0);
+        if files_for_token > cluster.file_count {
+            cluster.file_count = files_for_token;
+        }
+        cluster.matches_dir_name = cluster.matches_dir_name || matches_dir;
+        cluster.in_config_value = cluster.in_config_value || in_config;
+    }
+
+    // Merge near-misses using Levenshtein distance
+    merge_similar_clusters(&mut clusters);
+
+    // Score and convert to candidates
+    let mut freq_candidates: Vec<DetectedCandidate> = Vec::new();
+
+    for (key, cluster) in &clusters {
+        // Skip if already covered by higher tiers
+        if cluster.literals.iter().any(|l| covered_values.contains(&l.to_lowercase())) {
+            continue;
+        }
+
+        let score = score_cluster(cluster);
+
+        // Filter low-scoring candidates
+        if score < 0.30 {
+            continue;
+        }
+
+        let best_literal = &cluster.literals[0];
+        let suggested_name = suggest_variable_name(&cluster.normalized, key);
+
+        freq_candidates.push(DetectedCandidate {
+            suggested_name,
+            value: best_literal.clone(),
+            tier: ConfidenceTier::FrequencyAnalysis,
+            confidence: score,
+            reason: format!(
+                "{} occurrences across {} files, {} variant(s)",
+                cluster.total_occurrences,
+                cluster.file_count,
+                cluster.literals.len()
+            ),
+            file_count: cluster.file_count,
+            total_occurrences: cluster.total_occurrences,
+        });
+    }
+
+    // Sort by confidence, take top 5
+    freq_candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
+    freq_candidates.truncate(5);
+
+    freq_candidates
+}
+
+fn score_cluster(cluster: &TokenCluster) -> f64 {
+    // Occurrence count (log-scaled, 0.0..1.0)
+    let occ_score = (cluster.total_occurrences as f64).ln_1p() / 10.0_f64.ln_1p();
+    let occ_score = occ_score.min(1.0);
+
+    // File spread (log-scaled, 0.0..1.0)
+    let file_score = (cluster.file_count as f64).ln_1p() / 10.0_f64.ln_1p();
+    let file_score = file_score.min(1.0);
+
+    // Variant diversity
+    let variant_score = match cluster.literals.len() {
+        0 | 1 => 0.0,
+        2 => 0.5,
+        3 => 0.75,
+        _ => 1.0,
+    };
+
+    // Directory name match (binary)
+    let dir_score = if cluster.matches_dir_name { 1.0 } else { 0.0 };
+
+    // Config value match (binary)
+    let config_score = if cluster.in_config_value { 1.0 } else { 0.0 };
+
+    0.15 * occ_score + 0.20 * file_score + 0.35 * variant_score + 0.20 * dir_score + 0.10 * config_score
+}
+
+fn merge_similar_clusters(clusters: &mut HashMap<String, TokenCluster>) {
+    let keys: Vec<String> = clusters.keys().cloned().collect();
+    let mut merge_map: HashMap<String, String> = HashMap::new();
+
+    for i in 0..keys.len() {
+        for j in (i + 1)..keys.len() {
+            if merge_map.contains_key(&keys[j]) {
+                continue;
+            }
+            let dist = strsim::levenshtein(&keys[i], &keys[j]);
+            if dist <= 1 {
+                let size_i = clusters.get(&keys[i]).map(|c| c.total_occurrences).unwrap_or(0);
+                let size_j = clusters.get(&keys[j]).map(|c| c.total_occurrences).unwrap_or(0);
+                if size_i >= size_j {
+                    merge_map.insert(keys[j].clone(), keys[i].clone());
+                } else {
+                    merge_map.insert(keys[i].clone(), keys[j].clone());
+                }
+            }
+        }
+    }
+
+    for (from, to) in &merge_map {
+        if let Some(removed) = clusters.remove(from) {
+            if let Some(target) = clusters.get_mut(to) {
+                for lit in removed.literals {
+                    if !target.literals.contains(&lit) {
+                        target.literals.push(lit);
+                    }
+                }
+                target.total_occurrences += removed.total_occurrences;
+                if removed.file_count > target.file_count {
+                    target.file_count = removed.file_count;
+                }
+                target.matches_dir_name = target.matches_dir_name || removed.matches_dir_name;
+                target.in_config_value = target.in_config_value || removed.in_config_value;
+            }
+        }
+    }
+}
+
+fn suggest_variable_name(words: &[String], _key: &str) -> String {
+    if words.len() <= 3 {
+        words.join("_")
+    } else {
+        // Truncate long names
+        words[..3].join("_")
+    }
+}
+
+// ── Noise filtering ──────────────────────────────────────────────────────
+
+fn is_noise_token(token: &str, words: &[String]) -> bool {
+    let lower = token.to_lowercase();
+
+    // Too short
+    if lower.len() < 3 {
+        return true;
+    }
+
+    // Language keywords
+    if LANGUAGE_KEYWORDS.contains(&lower.as_str()) {
+        return true;
+    }
+
+    // Common library names
+    if COMMON_LIBRARIES.contains(&lower.as_str()) {
+        return true;
+    }
+
+    // Stopwords (individual words)
+    if words.len() == 1 && STOPWORDS.contains(&lower.as_str()) {
+        return true;
+    }
+
+    // All words are stopwords, file-format words, or very short
+    if words.iter().all(|w| {
+        w.len() < 3
+            || STOPWORDS.contains(&w.as_str())
+            || FILE_FORMAT_WORDS.contains(&w.as_str())
+    }) {
+        return true;
+    }
+
+    false
+}
+
+const FILE_FORMAT_WORDS: &[&str] = &[
+    "toml", "json", "yaml", "yml", "xml", "csv", "html", "css", "md", "txt",
+    "log", "cfg", "ini", "env", "lock", "mod", "rs", "js", "ts", "py", "go",
+    "rb", "java", "kt", "swift", "cpp", "hpp", "vue", "jsx", "tsx",
+];
+
+const LANGUAGE_KEYWORDS: &[&str] = &[
+    // Rust
+    "async", "await", "break", "const", "continue", "crate", "dyn", "else", "enum", "extern",
+    "false", "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut",
+    "pub", "ref", "return", "self", "static", "struct", "super", "trait", "true", "type",
+    "unsafe", "use", "where", "while", "yield",
+    // JS/TS
+    "abstract", "arguments", "boolean", "byte", "case", "catch", "char", "class", "debugger",
+    "default", "delete", "do", "double", "eval", "export", "extends", "final", "finally",
+    "float", "function", "goto", "implements", "import", "instanceof", "int", "interface",
+    "long", "native", "new", "null", "package", "private", "protected", "public", "short",
+    "switch", "synchronized", "this", "throw", "throws", "transient", "try", "typeof",
+    "undefined", "var", "void", "volatile", "with",
+    // Python
+    "and", "as", "assert", "class", "def", "del", "elif", "except", "exec", "from",
+    "global", "is", "lambda", "nonlocal", "not", "or", "pass", "print", "raise",
+    "with", "yield",
+    // Go
+    "chan", "defer", "fallthrough", "go", "goroutine", "interface", "map", "range",
+    "select", "func",
+];
+
+const COMMON_LIBRARIES: &[&str] = &[
+    "react", "redux", "webpack", "babel", "eslint", "prettier", "jest", "mocha", "chai",
+    "express", "fastify", "next", "nuxt", "vue", "angular", "svelte",
+    "serde", "tokio", "actix", "axum", "clap", "anyhow", "thiserror", "tracing",
+    "reqwest", "hyper", "warp", "rocket", "diesel", "sqlx",
+    "django", "flask", "fastapi", "pytest", "numpy", "pandas", "scipy",
+    "spring", "hibernate", "junit", "maven", "gradle",
+    "gin", "echo", "fiber", "gorm",
+    "lodash", "axios", "moment", "dayjs", "ramda", "underscore",
+    "tailwind", "bootstrap", "material",
+    "typescript", "javascript", "python", "golang", "rustlang",
+];
+
+const STOPWORDS: &[&str] = &[
+    // English stopwords
+    "the", "and", "for", "are", "but", "not", "you", "all", "can", "had", "her", "was",
+    "one", "our", "out", "get", "set", "has", "his", "how", "its", "let", "may", "new",
+    "now", "old", "see", "way", "who", "did", "got", "has", "him", "into", "just",
+    "like", "make", "many", "some", "than", "them", "then", "very", "when", "with",
+    "have", "from", "been", "also", "each", "that", "this", "will", "your", "what",
+    "which", "their", "about", "would", "there", "could", "other", "after", "first",
+    "these", "those", "being", "where", "should", "because",
+    // Short generic words common in code identifiers
+    "my", "no", "is", "on", "in", "to", "by", "do", "up", "so", "or",
+    "app", "run", "dry", "log", "cmd", "arg", "env", "dir", "key", "map",
+    "max", "min", "raw", "ref", "src", "str", "tmp", "url", "var", "buf",
+    "msg", "req", "res", "err", "pkg", "lib", "bin", "fmt", "ctx", "cfg",
+    "opt", "val", "idx", "len", "ptr", "num", "std", "gen", "pre", "sub",
+    // Programming type/concept words
+    "string", "number", "bool", "boolean", "array", "object", "value", "result",
+    "error", "option", "none", "some", "true", "false", "null", "undefined",
+    "file", "path", "name", "type", "data", "info", "list", "item", "node",
+    "index", "count", "size", "length", "config", "settings", "options",
+    "input", "output", "source", "target", "test", "main", "init", "setup",
+    "todo", "fixme", "hack", "note", "warning", "debug", "trace", "level",
+    "mode", "flag", "status", "state", "cache", "hook", "hooks",
+];
+
+// ── Helpers ──────────────────────────────────────────────────────────────
+
+fn count_occurrences(value: &str, scan_result: &ScanResult) -> (usize, usize) {
+    let mut file_count = 0;
+    let mut total = 0;
+
+    for file in &scan_result.files {
+        if let Some(ref content) = file.content {
+            let hits = content.matches(value).count();
+            if hits > 0 {
+                file_count += 1;
+                total += hits;
+            }
+        }
+        // Also check path
+        let path_str = file.relative_path.to_string_lossy();
+        let path_hits = path_str.matches(value).count();
+        total += path_hits;
+    }
+
+    (file_count, total)
+}
+
+pub fn strip_email(s: &str) -> String {
+    // "Jane Doe <jane@example.com>" → "Jane Doe"
+    if let Some(idx) = s.find('<') {
+        s[..idx].trim().to_string()
+    } else if s.contains('@') {
+        // Bare email — use part before @
+        s.split('@').next().unwrap_or("").trim().to_string()
+    } else {
+        s.trim().to_string()
+    }
+}
+
+fn strip_npm_scope(name: &str) -> &str {
+    if let Some(rest) = name.strip_prefix('@') {
+        rest.split('/').nth(1).unwrap_or(name)
+    } else {
+        name
+    }
+}
+
+fn deduplicate_candidates(candidates: &mut Vec<DetectedCandidate>) {
+    let mut seen_value: HashMap<String, usize> = HashMap::new();
+    let mut seen_name: HashMap<String, usize> = HashMap::new();
+    let mut to_remove = Vec::new();
+
+    for (i, candidate) in candidates.iter().enumerate() {
+        // Deduplicate by value (same literal, different tiers)
+        let value_key = candidate.value.to_lowercase();
+        if let Some(&prev_idx) = seen_value.get(&value_key) {
+            if candidate.confidence > candidates[prev_idx].confidence {
+                to_remove.push(prev_idx);
+                seen_value.insert(value_key, i);
+            } else {
+                to_remove.push(i);
+                continue;
+            }
+        } else {
+            seen_value.insert(value_key, i);
+        }
+
+        // Deduplicate by suggested_name (e.g., two different "author" candidates)
+        let name_key = candidate.suggested_name.clone();
+        if let Some(&prev_idx) = seen_name.get(&name_key) {
+            if to_remove.contains(&prev_idx) {
+                // Previous holder was already removed, replace it
+                seen_name.insert(name_key, i);
+            } else if candidate.confidence > candidates[prev_idx].confidence {
+                to_remove.push(prev_idx);
+                seen_name.insert(name_key, i);
+            } else {
+                to_remove.push(i);
+            }
+        } else {
+            seen_name.insert(name_key, i);
+        }
+    }
+
+    to_remove.sort_unstable();
+    to_remove.dedup();
+    for idx in to_remove.into_iter().rev() {
+        candidates.remove(idx);
+    }
+}
+
+// ── Tests ────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::extract::scan::ScannedFile;
+    use std::path::PathBuf;
+
+    fn make_scan_result(files: Vec<(&str, &str)>) -> ScanResult {
+        ScanResult {
+            files: files
+                .into_iter()
+                .map(|(path, content)| ScannedFile {
+                    relative_path: PathBuf::from(path),
+                    absolute_path: PathBuf::from(path),
+                    is_binary: false,
+                    content: Some(content.to_string()),
+                })
+                .collect(),
+            excluded_count: 0,
+        }
+    }
+
+    // ── Tier 1 tests ─────────────────────────────────────────────────
+
+    #[test]
+    fn test_tier1_basic_dir_name() {
+        let scan = make_scan_result(vec![
+            ("README.md", "# my-widget\nA widget project"),
+            ("src/lib.rs", "// my-widget core"),
+        ]);
+        let dir = PathBuf::from("/projects/my-widget");
+        let candidates = detect_directory_name(&dir, &scan);
+
+        assert_eq!(candidates.len(), 1);
+        assert_eq!(candidates[0].value, "my-widget");
+        assert_eq!(candidates[0].suggested_name, "project_name");
+        assert_eq!(candidates[0].confidence, 0.95);
+        assert!(candidates[0].total_occurrences >= 2);
+    }
+
+    #[test]
+    fn test_tier1_generic_name_skipped() {
+        let scan = make_scan_result(vec![("main.rs", "fn main() {}")]);
+        let dir = PathBuf::from("/projects/src");
+        let candidates = detect_directory_name(&dir, &scan);
+        assert!(candidates.is_empty());
+    }
+
+    #[test]
+    fn test_tier1_occurrence_counting() {
+        let scan = make_scan_result(vec![
+            ("a.txt", "hello hello hello"),
+            ("b.txt", "hello world"),
+        ]);
+        let dir = PathBuf::from("/projects/hello");
+        let candidates = detect_directory_name(&dir, &scan);
+        assert_eq!(candidates.len(), 1);
+        assert_eq!(candidates[0].file_count, 2);
+        assert!(candidates[0].total_occurrences >= 4);
+    }
+
+    // ── Tier 2 tests ─────────────────────────────────────────────────
+
+    #[test]
+    fn test_tier2_cargo_toml() {
+        let dir = tempfile::tempdir().unwrap();
+        std::fs::write(
+            dir.path().join("Cargo.toml"),
+            "[package]\nname = \"data-pipeline\"\nauthors = [\"Alice <alice@example.com>\"]\n",
+        )
+        .unwrap();
+
+        let scan = make_scan_result(vec![("src/main.rs", "data-pipeline runs here")]);
+        let candidates = parse_cargo_toml(dir.path(), &scan).unwrap();
+
+        assert!(candidates.iter().any(|c| c.value == "data-pipeline"));
+        assert!(candidates.iter().any(|c| c.value == "Alice"));
+    }
+
+    #[test]
+    fn test_tier2_package_json_with_scope() {
+        let dir = tempfile::tempdir().unwrap();
+        std::fs::write(
+            dir.path().join("package.json"),
+            r#"{"name": "@myorg/cool-widget", "author": "Bob Smith <bob@example.com>"}"#,
+        )
+        .unwrap();
+
+        let scan = make_scan_result(vec![("index.js", "cool-widget stuff")]);
+        let candidates = parse_package_json(dir.path(), &scan).unwrap();
+
+        let name_candidate = candidates.iter().find(|c| c.suggested_name == "project_name").unwrap();
+        assert_eq!(name_candidate.value, "cool-widget");
+
+        let author_candidate = candidates.iter().find(|c| c.suggested_name == "author").unwrap();
+        assert_eq!(author_candidate.value, "Bob Smith");
+    }
+
+    #[test]
+    fn test_tier2_pyproject_toml() {
+        let dir = tempfile::tempdir().unwrap();
+        std::fs::write(
+            dir.path().join("pyproject.toml"),
+            "[project]\nname = \"my-tool\"\n\n[[project.authors]]\nname = \"Charlie\"\n",
+        )
+        .unwrap();
+
+        let scan = make_scan_result(vec![("setup.py", "my-tool setup")]);
+        let candidates = parse_pyproject_toml(dir.path(), &scan).unwrap();
+
+        assert!(candidates.iter().any(|c| c.value == "my-tool"));
+        assert!(candidates.iter().any(|c| c.value == "Charlie"));
+    }
+
+    #[test]
+    fn test_tier2_go_mod() {
+        let dir = tempfile::tempdir().unwrap();
+        std::fs::write(
+            dir.path().join("go.mod"),
+            "module github.com/acme/my-service\n\ngo 1.21\n",
+        )
+        .unwrap();
+
+        let scan = make_scan_result(vec![("main.go", "package main // my-service by acme")]);
+        let candidates = parse_go_mod(dir.path(), &scan).unwrap();
+
+        let project = candidates.iter().find(|c| c.suggested_name == "project_name");
+        assert!(project.is_some());
+        assert_eq!(project.unwrap().value, "my-service");
+
+        let org = candidates.iter().find(|c| c.suggested_name == "org_name");
+        assert!(org.is_some(), "should extract org from go.mod module path");
+        assert_eq!(org.unwrap().value, "acme");
+    }
+
+    #[test]
+    fn test_tier2_missing_file() {
+        let dir = tempfile::tempdir().unwrap();
+        let scan = make_scan_result(vec![]);
+
+        assert!(parse_cargo_toml(dir.path(), &scan).is_none());
+        assert!(parse_package_json(dir.path(), &scan).is_none());
+        assert!(parse_pyproject_toml(dir.path(), &scan).is_none());
+        assert!(parse_go_mod(dir.path(), &scan).is_none());
+    }
+
+    #[test]
+    fn test_tier2_malformed_cargo_toml() {
+        let dir = tempfile::tempdir().unwrap();
+        std::fs::write(dir.path().join("Cargo.toml"), "this is not valid toml {{{}}}").unwrap();
+        let scan = make_scan_result(vec![]);
+        assert!(parse_cargo_toml(dir.path(), &scan).is_none());
+    }
+
+    // ── Tier 3 tests ─────────────────────────────────────────────────
+
+    #[test]
+    fn test_parse_org_from_url_ssh() {
+        assert_eq!(
+            parse_org_from_url("git@github.com:acme-corp/my-repo.git"),
+            Some("acme-corp".to_string())
+        );
+    }
+
+    #[test]
+    fn test_parse_org_from_url_https() {
+        assert_eq!(
+            parse_org_from_url("https://github.com/acme-corp/my-repo.git"),
+            Some("acme-corp".to_string())
+        );
+    }
+
+    #[test]
+    fn test_strip_email_with_angle_brackets() {
+        assert_eq!(strip_email("Jane Doe <jane@example.com>"), "Jane Doe");
+    }
+
+    #[test]
+    fn test_strip_email_bare_email() {
+        assert_eq!(strip_email("jane@example.com"), "jane");
+    }
+
+    #[test]
+    fn test_strip_email_no_email() {
+        assert_eq!(strip_email("Jane Doe"), "Jane Doe");
+    }
+
+    // ── Tier 4 tests ─────────────────────────────────────────────────
+
+    #[test]
+    fn test_frequency_finds_repeated_identifier() {
+        let scan = make_scan_result(vec![
+            ("a.txt", "data-pipeline is great\ndata-pipeline rocks"),
+            ("b.txt", "use data_pipeline here\ndata_pipeline again"),
+            ("c.txt", "DataPipeline class\nDataPipeline impl"),
+            ("d.txt", "DATA_PIPELINE env var\nDATA_PIPELINE config"),
+        ]);
+
+        let covered = HashSet::new();
+        let config_vals = HashSet::new();
+        let candidates = detect_frequency(&scan, &covered, &config_vals, "");
+
+        assert!(!candidates.is_empty());
+        // Should find "data-pipeline" cluster
+        let found = candidates.iter().any(|c| {
+            let words = split_into_words(&c.value);
+            words == vec!["data", "pipeline"]
+        });
+        assert!(found, "should find data-pipeline cluster, got: {:?}", candidates);
+    }
+
+    #[test]
+    fn test_frequency_filters_keywords() {
+        let scan = make_scan_result(vec![
+            ("a.rs", "fn async_handler() {}"),
+            ("b.rs", "fn async_handler() {}"),
+            ("c.rs", "fn async_handler() {}"),
+        ]);
+
+        let covered = HashSet::new();
+        let config_vals = HashSet::new();
+        let candidates = detect_frequency(&scan, &covered, &config_vals, "");
+
+        // "async" alone should be filtered
+        for c in &candidates {
+            let lower = c.value.to_lowercase();
+            assert!(!LANGUAGE_KEYWORDS.contains(&lower.as_str()) || c.value.contains('-') || c.value.contains('_'));
+        }
+    }
+
+    #[test]
+    fn test_frequency_filters_short_tokens() {
+        let scan = make_scan_result(vec![
+            ("a.txt", "ab cd ef gh"),
+            ("b.txt", "ab cd ef gh"),
+        ]);
+
+        let covered = HashSet::new();
+        let config_vals = HashSet::new();
+        let candidates = detect_frequency(&scan, &covered, &config_vals, "");
+
+        assert!(candidates.is_empty(), "short tokens should be filtered");
+    }
+
+    #[test]
+    fn test_frequency_skips_covered_values() {
+        let scan = make_scan_result(vec![
+            ("a.txt", "my-widget rocks"),
+            ("b.txt", "my-widget is great"),
+            ("c.txt", "my_widget too"),
+        ]);
+
+        let mut covered = HashSet::new();
+        covered.insert("my-widget".to_string());
+        let config_vals = HashSet::new();
+        let candidates = detect_frequency(&scan, &covered, &config_vals, "");
+
+        let has_widget = candidates.iter().any(|c| c.value.to_lowercase().contains("widget"));
+        assert!(!has_widget, "covered values should be skipped");
+    }
+
+    #[test]
+    fn test_score_cluster_multi_variant_boost() {
+        let single_variant = TokenCluster {
+            normalized: vec!["my".into(), "app".into()],
+            literals: vec!["my-app".into()],
+            total_occurrences: 10,
+            file_count: 5,
+            matches_dir_name: false,
+            in_config_value: false,
+        };
+
+        let multi_variant = TokenCluster {
+            normalized: vec!["my".into(), "app".into()],
+            literals: vec!["my-app".into(), "my_app".into(), "MyApp".into()],
+            total_occurrences: 10,
+            file_count: 5,
+            matches_dir_name: false,
+            in_config_value: false,
+        };
+
+        assert!(score_cluster(&multi_variant) > score_cluster(&single_variant));
+    }
+
+    #[test]
+    fn test_score_cluster_dir_name_boost() {
+        let no_dir = TokenCluster {
+            normalized: vec!["my".into(), "app".into()],
+            literals: vec!["my-app".into()],
+            total_occurrences: 5,
+            file_count: 3,
+            matches_dir_name: false,
+            in_config_value: false,
+        };
+
+        let with_dir = TokenCluster {
+            normalized: vec!["my".into(), "app".into()],
+            literals: vec!["my-app".into()],
+            total_occurrences: 5,
+            file_count: 3,
+            matches_dir_name: true,
+            in_config_value: false,
+        };
+
+        assert!(score_cluster(&with_dir) > score_cluster(&no_dir));
+    }
+
+    #[test]
+    fn test_levenshtein_merging() {
+        let mut clusters = HashMap::new();
+        clusters.insert(
+            "data pipeline".to_string(),
+            TokenCluster {
+                normalized: vec!["data".into(), "pipeline".into()],
+                literals: vec!["data-pipeline".into()],
+                total_occurrences: 10,
+                file_count: 5,
+                matches_dir_name: false,
+                in_config_value: false,
+            },
+        );
+        clusters.insert(
+            "data pipelin".to_string(), // typo / near miss
+            TokenCluster {
+                normalized: vec!["data".into(), "pipelin".into()],
+                literals: vec!["data-pipelin".into()],
+                total_occurrences: 2,
+                file_count: 1,
+                matches_dir_name: false,
+                in_config_value: false,
+            },
+        );
+
+        merge_similar_clusters(&mut clusters);
+
+        // Should merge into one cluster
+        assert_eq!(clusters.len(), 1);
+        let remaining = clusters.values().next().unwrap();
+        assert_eq!(remaining.total_occurrences, 12);
+    }
+
+    // ── Helper tests ─────────────────────────────────────────────────
+
+    #[test]
+    fn test_deduplication_keeps_highest_confidence() {
+        let mut candidates = vec![
+            DetectedCandidate {
+                suggested_name: "project_name".to_string(),
+                value: "my-app".to_string(),
+                tier: ConfidenceTier::ConfigFile,
+                confidence: 0.90,
+                reason: "Cargo.toml".to_string(),
+                file_count: 3,
+                total_occurrences: 10,
+            },
+            DetectedCandidate {
+                suggested_name: "project_name".to_string(),
+                value: "my-app".to_string(),
+                tier: ConfidenceTier::DirectoryName,
+                confidence: 0.95,
+                reason: "directory name".to_string(),
+                file_count: 3,
+                total_occurrences: 10,
+            },
+        ];
+
+        deduplicate_candidates(&mut candidates);
+        assert_eq!(candidates.len(), 1);
+        assert_eq!(candidates[0].confidence, 0.95);
+    }
+
+    #[test]
+    fn test_deduplication_by_suggested_name() {
+        let mut candidates = vec![
+            DetectedCandidate {
+                suggested_name: "author".to_string(),
+                value: "Alice Johnson".to_string(),
+                tier: ConfidenceTier::ConfigFile,
+                confidence: 0.85,
+                reason: "package.json".to_string(),
+                file_count: 3,
+                total_occurrences: 5,
+            },
+            DetectedCandidate {
+                suggested_name: "author".to_string(),
+                value: "Robert Roskam".to_string(),
+                tier: ConfidenceTier::GitMetadata,
+                confidence: 0.65,
+                reason: "git config".to_string(),
+                file_count: 0,
+                total_occurrences: 0,
+            },
+        ];
+
+        deduplicate_candidates(&mut candidates);
+        assert_eq!(candidates.len(), 1, "should deduplicate by suggested_name");
+        assert_eq!(candidates[0].value, "Alice Johnson", "should keep highest confidence");
+    }
+
+    #[test]
+    fn test_suggest_variable_name() {
+        assert_eq!(
+            suggest_variable_name(&["my".into(), "app".into()], "my app"),
+            "my_app"
+        );
+        assert_eq!(
+            suggest_variable_name(
+                &["very".into(), "long".into(), "name".into(), "here".into()],
+                "very long name here"
+            ),
+            "very_long_name"
+        );
+    }
+
+    #[test]
+    fn test_strip_npm_scope() {
+        assert_eq!(strip_npm_scope("@myorg/cool-widget"), "cool-widget");
+        assert_eq!(strip_npm_scope("plain-package"), "plain-package");
+    }
+
+    #[test]
+    fn test_auto_detect_integration() {
+        let dir = tempfile::tempdir().unwrap();
+        let project_dir = dir.path().join("my-widget");
+        std::fs::create_dir(&project_dir).unwrap();
+        std::fs::write(
+            project_dir.join("README.md"),
+            "# my-widget\nWelcome to my-widget",
+        )
+        .unwrap();
+        std::fs::write(
+            project_dir.join("lib.rs"),
+            "pub mod my_widget;\nstruct MyWidget;",
+        )
+        .unwrap();
+
+        let scan = crate::extract::scan::scan_project(&project_dir, &[]).unwrap();
+        let result = auto_detect(&project_dir, &scan);
+
+        assert!(!result.candidates.is_empty());
+        let project_name = result.candidates.iter().find(|c| c.suggested_name == "project_name");
+        assert!(project_name.is_some(), "should detect project_name");
+        assert_eq!(project_name.unwrap().value, "my-widget");
+    }
+}
diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index ba568bd..d06315b 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -1,3 +1,4 @@
+pub mod auto_detect;
 pub mod conditional;
 pub mod config_gen;
 pub mod exclude;
@@ -22,6 +23,7 @@ use self::exclude::{detect_copy_without_render, detect_excludes};
 use self::replace::{
     apply_path_replacements, apply_replacements, build_replacement_rules, ReplacementRule,
 };
+use self::auto_detect::{auto_detect, DetectedCandidate};
 use self::scan::{scan_project, ScannedFile};
 use self::variants::{
     computed_expression, detect_separator, generate_variants, is_canonical_variant, CaseVariant,
@@ -74,6 +76,7 @@ pub struct ExtractOptions {
     pub in_place: bool,
     pub batch: bool,
     pub dry_run: bool,
+    pub auto: bool,
 }
 
 /// Plan an extraction: scan the project, detect variants, build replacement rules.
@@ -86,10 +89,6 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         });
     }
 
-    if options.variables.is_empty() {
-        return Err(DicecutError::ExtractNoVariables);
-    }
-
     // Check if this is already a template
     if source_dir.join("diecut.toml").exists() {
         return Err(DicecutError::ExtractAlreadyTemplate {
@@ -134,10 +133,47 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         scan_result.excluded_count
     );
 
+    // Phase 2.5: Auto-detect variables if none provided and --auto is enabled
+    let variables = if options.variables.is_empty() && options.auto {
+        let detect_result = auto_detect(source_dir, &scan_result);
+
+        if detect_result.candidates.is_empty() {
+            return Err(DicecutError::ExtractNoVariables);
+        }
+
+        let accepted = if options.batch {
+            let accepted: Vec<_> = detect_result
+                .candidates
+                .into_iter()
+                .filter(|c| c.confidence >= 0.50)
+                .collect();
+            if accepted.is_empty() {
+                return Err(DicecutError::ExtractNoVariables);
+            }
+            print_auto_detected_batch(&accepted);
+            accepted
+        } else {
+            let accepted = confirm_auto_detected_interactive(detect_result.candidates)?;
+            if accepted.is_empty() {
+                return Err(DicecutError::ExtractNoVariables);
+            }
+            accepted
+        };
+
+        accepted
+            .into_iter()
+            .map(|c| (c.suggested_name, c.value))
+            .collect()
+    } else if options.variables.is_empty() {
+        return Err(DicecutError::ExtractNoVariables);
+    } else {
+        options.variables.clone()
+    };
+
     // Phase 3: Generate variants and count occurrences
     let mut extract_variables = Vec::new();
 
-    for (var_name, var_value) in &options.variables {
+    for (var_name, var_value) in &variables {
         let all_variants = generate_variants(var_name, var_value);
 
         let mut occurrence_counts = Vec::new();
@@ -639,6 +675,84 @@ fn confirm_conditionals_interactive(
     Ok(confirmed)
 }
 
+fn print_auto_detected_batch(candidates: &[DetectedCandidate]) {
+    eprintln!(
+        "\n{} Auto-detected variables {}",
+        style("──").dim(),
+        style("──────────────────────────────────").dim()
+    );
+    for c in candidates {
+        eprintln!(
+            "  {} {} = {:?} ({:.0}% confidence, {})",
+            style("✓").green(),
+            style(&c.suggested_name).bold(),
+            c.value,
+            c.confidence * 100.0,
+            c.tier
+        );
+        eprintln!(
+            "    {}",
+            style(&c.reason).dim()
+        );
+    }
+}
+
+fn confirm_auto_detected_interactive(
+    candidates: Vec<DetectedCandidate>,
+) -> Result<Vec<DetectedCandidate>> {
+    eprintln!(
+        "\n{} Auto-detected variables {}",
+        style("──").dim(),
+        style("──────────────────────────────────").dim()
+    );
+
+    let mut accepted = Vec::new();
+
+    for candidate in candidates {
+        let default_accept = candidate.confidence >= 0.70;
+        eprintln!(
+            "\n  {} = {:?} ({:.0}% confidence, {})",
+            style(&candidate.suggested_name).bold(),
+            candidate.value,
+            candidate.confidence * 100.0,
+            candidate.tier
+        );
+        eprintln!("    {}", style(&candidate.reason).dim());
+        if candidate.total_occurrences > 0 {
+            eprintln!(
+                "    {} occurrences across {} files",
+                candidate.total_occurrences,
+                candidate.file_count
+            );
+        }
+
+        let accept = Confirm::new(&format!("Accept \"{}\"?", candidate.suggested_name))
+            .with_default(default_accept)
+            .prompt()
+            .map_err(|_| DicecutError::PromptCancelled)?;
+
+        if accept {
+            let name = Text::new("Variable name:")
+                .with_default(&candidate.suggested_name)
+                .prompt()
+                .map_err(|_| DicecutError::PromptCancelled)?;
+
+            let value = Text::new("Value:")
+                .with_default(&candidate.value)
+                .prompt()
+                .map_err(|_| DicecutError::PromptCancelled)?;
+
+            accepted.push(DetectedCandidate {
+                suggested_name: name,
+                value,
+                ..candidate
+            });
+        }
+    }
+
+    Ok(accepted)
+}
+
 fn confirm_files_interactive(files: &[PlannedExtractFile]) -> Result<()> {
     let templated: Vec<_> = files.iter().filter(|f| f.has_replacements).collect();
     let copied: Vec<_> = files.iter().filter(|f| !f.has_replacements).collect();
diff --git a/src/main.rs b/src/main.rs
index f540fe9..bb0d2ff 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -26,6 +26,7 @@ fn main() -> miette::Result<()> {
             in_place,
             batch,
             dry_run,
-        } => commands::extract::run(source, vars, output, in_place, batch, dry_run),
+            auto,
+        } => commands::extract::run(source, vars, output, in_place, batch, dry_run, auto),
     }
 }
diff --git a/tests/integration.rs b/tests/integration.rs
index 1ad3a59..55e32e9 100644
--- a/tests/integration.rs
+++ b/tests/integration.rs
@@ -656,6 +656,7 @@ fn test_extract_batch_basic() {
         in_place: false,
         batch: true,
         dry_run: false,
+        auto: false,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -699,6 +700,7 @@ fn test_extract_detects_case_variants() {
         in_place: false,
         batch: true,
         dry_run: false,
+        auto: false,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -752,6 +754,7 @@ fn test_extract_dry_run_writes_nothing() {
         in_place: false,
         batch: true,
         dry_run: true,
+        auto: false,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -780,6 +783,7 @@ fn test_extract_rejects_already_template() {
         in_place: false,
         batch: true,
         dry_run: false,
+        auto: false,
     };
 
     let result = plan_extraction(&options);
@@ -798,6 +802,7 @@ fn test_extract_rejects_no_variables() {
         in_place: false,
         batch: true,
         dry_run: false,
+        auto: false,
     };
 
     let result = plan_extraction(&options);
@@ -820,6 +825,7 @@ fn test_extract_templates_path_components() {
         in_place: false,
         batch: true,
         dry_run: false,
+        auto: false,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -882,6 +888,7 @@ fn test_extract_round_trip() {
         in_place: false,
         batch: true,
         dry_run: false,
+        auto: false,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -915,3 +922,141 @@ fn test_extract_round_trip() {
         }
     }
 }
+
+// ── Auto-detect tests ────────────────────────────────────────────────────
+
+#[test]
+fn test_extract_auto_batch() {
+    let project = tempfile::tempdir().unwrap();
+    let project_dir = project.path().join("data-pipeline");
+    std::fs::create_dir(&project_dir).unwrap();
+    std::fs::write(
+        project_dir.join("Cargo.toml"),
+        "[package]\nname = \"data-pipeline\"\nversion = \"0.1.0\"\n",
+    )
+    .unwrap();
+    std::fs::write(
+        project_dir.join("README.md"),
+        "# data-pipeline\nWelcome to data-pipeline\n",
+    )
+    .unwrap();
+    std::fs::create_dir(project_dir.join("src")).unwrap();
+    std::fs::write(
+        project_dir.join("src/main.rs"),
+        "fn main() {\n    println!(\"data-pipeline starting\");\n}\n",
+    )
+    .unwrap();
+
+    let output = tempfile::tempdir().unwrap();
+    let output_path = output.path().join("auto-extracted");
+
+    let options = ExtractOptions {
+        source_dir: project_dir.clone(),
+        variables: vec![],
+        output_dir: Some(output_path.clone()),
+        in_place: false,
+        batch: true,
+        dry_run: false,
+        auto: true,
+    };
+
+    let plan = plan_extraction(&options).unwrap();
+    execute_extraction(&plan, false).unwrap();
+
+    let project_var = plan
+        .variables
+        .iter()
+        .find(|v| v.name == "project_name");
+    assert!(
+        project_var.is_some(),
+        "should auto-detect project_name, got vars: {:?}",
+        plan.variables.iter().map(|v| &v.name).collect::<Vec<_>>()
+    );
+    assert_eq!(project_var.unwrap().value, "data-pipeline");
+
+    assert!(output_path.join("diecut.toml").exists());
+    let config = std::fs::read_to_string(output_path.join("diecut.toml")).unwrap();
+    assert!(config.contains("project_name"));
+}
+
+#[test]
+fn test_extract_auto_explicit_vars_priority() {
+    let project = tempfile::tempdir().unwrap();
+    let project_dir = project.path().join("my-service");
+    std::fs::create_dir(&project_dir).unwrap();
+    std::fs::write(
+        project_dir.join("Cargo.toml"),
+        "[package]\nname = \"my-service\"\n",
+    )
+    .unwrap();
+    std::fs::write(project_dir.join("README.md"), "# my-service\n").unwrap();
+
+    let output = tempfile::tempdir().unwrap();
+    let output_path = output.path().join("explicit-extracted");
+
+    let options = ExtractOptions {
+        source_dir: project_dir.clone(),
+        variables: vec![("app_name".to_string(), "my-service".to_string())],
+        output_dir: Some(output_path.clone()),
+        in_place: false,
+        batch: true,
+        dry_run: false,
+        auto: true,
+    };
+
+    let plan = plan_extraction(&options).unwrap();
+
+    let has_app_name = plan.variables.iter().any(|v| v.name == "app_name");
+    let has_project_name = plan.variables.iter().any(|v| v.name == "project_name");
+    assert!(has_app_name, "should use explicit var app_name");
+    assert!(!has_project_name, "should not auto-detect project_name when explicit vars given");
+}
+
+#[test]
+fn test_extract_auto_frequency_fallback() {
+    let project = tempfile::tempdir().unwrap();
+    let project_dir = project.path().join("cool-widget");
+    std::fs::create_dir(&project_dir).unwrap();
+    std::fs::write(
+        project_dir.join("main.txt"),
+        "cool-widget is great\ncool_widget module\nCoolWidget class\n",
+    )
+    .unwrap();
+    std::fs::write(
+        project_dir.join("config.txt"),
+        "name = cool-widget\nmodule = cool_widget\n",
+    )
+    .unwrap();
+    std::fs::write(
+        project_dir.join("test.txt"),
+        "testing cool-widget\nCOOL_WIDGET env\n",
+    )
+    .unwrap();
+
+    let output = tempfile::tempdir().unwrap();
+    let output_path = output.path().join("freq-extracted");
+
+    let options = ExtractOptions {
+        source_dir: project_dir.clone(),
+        variables: vec![],
+        output_dir: Some(output_path.clone()),
+        in_place: false,
+        batch: true,
+        dry_run: false,
+        auto: true,
+    };
+
+    let plan = plan_extraction(&options).unwrap();
+
+    let has_relevant_var = plan.variables.iter().any(|v| {
+        v.value.contains("cool") || v.name.contains("cool")
+    });
+    assert!(
+        has_relevant_var,
+        "should detect cool-widget related variable, got: {:?}",
+        plan.variables
+            .iter()
+            .map(|v| format!("{}={}", v.name, v.value))
+            .collect::<Vec<_>>()
+    );
+}

From 3ec68001ec31057c8333812a6e9551e5c9895493 Mon Sep 17 00:00:00 2001
From: Robert Roskam <robert.roskam@pantheon.io>
Date: Fri, 27 Feb 2026 19:53:59 -0500
Subject: [PATCH 06/29] refactor(extract): make auto-detect default, rename
 --batch to -y/--yes

Auto-detect now always runs when no --var is provided instead of
requiring --auto. Renamed --batch to -y/--yes to align with CLI
conventions. Added --min-confidence threshold flag. Name collisions
from multiple detection sources are now preserved for interactive
resolution instead of silently deduplicating.
---
 src/cli.rs                 |  14 +--
 src/commands/extract.rs    |  11 +-
 src/error.rs               |   2 +-
 src/extract/auto_detect.rs |  31 ++---
 src/extract/mod.rs         | 231 +++++++++++++++++++++++++------------
 src/main.rs                |   6 +-
 tests/integration.rs       |  84 ++++++++++----
 7 files changed, 242 insertions(+), 137 deletions(-)

diff --git a/src/cli.rs b/src/cli.rs
index 0301617..6c687a0 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -67,16 +67,16 @@ pub enum Commands {
         #[arg(long)]
         in_place: bool,
 
-        /// Skip all interactive prompts
-        #[arg(long)]
-        batch: bool,
+        /// Accept all defaults without prompting
+        #[arg(short = 'y', long)]
+        yes: bool,
+
+        /// Minimum confidence threshold for auto-detected variables (0.0-1.0)
+        #[arg(long, default_value = "0.5")]
+        min_confidence: f64,
 
         /// Show what would be extracted without writing files
         #[arg(long)]
         dry_run: bool,
-
-        /// Auto-detect template variables from project metadata and content
-        #[arg(long)]
-        auto: bool,
     },
 }
diff --git a/src/commands/extract.rs b/src/commands/extract.rs
index 6576387..faf0018 100644
--- a/src/commands/extract.rs
+++ b/src/commands/extract.rs
@@ -11,23 +11,20 @@ pub fn run(
     vars: Vec<String>,
     output: Option<String>,
     in_place: bool,
-    batch: bool,
+    yes: bool,
+    min_confidence: f64,
     dry_run: bool,
-    auto: bool,
 ) -> Result<()> {
     let variables = parse_vars(&vars)?;
 
-    // Default auto to true when no vars are provided
-    let auto = auto || variables.is_empty();
-
     let options = ExtractOptions {
         source_dir: PathBuf::from(&source),
         variables,
         output_dir: output.map(PathBuf::from),
         in_place,
-        batch,
+        yes,
+        min_confidence,
         dry_run,
-        auto,
     };
 
     let plan = plan_extraction(&options)?;
diff --git a/src/error.rs b/src/error.rs
index ccac949..a612908 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -124,7 +124,7 @@ pub enum DicecutError {
 
     #[error("No variables provided for extraction")]
     #[diagnostic(help(
-        "Use --var key=value to specify variables, or --auto to detect them automatically"
+        "Use --var key=value to specify variables, or ensure the project has identifiable names in config files or directory name"
     ))]
     ExtractNoVariables,
 
diff --git a/src/extract/auto_detect.rs b/src/extract/auto_detect.rs
index 5cc76da..881991e 100644
--- a/src/extract/auto_detect.rs
+++ b/src/extract/auto_detect.rs
@@ -824,12 +824,13 @@ fn strip_npm_scope(name: &str) -> &str {
 }
 
 fn deduplicate_candidates(candidates: &mut Vec<DetectedCandidate>) {
+    // Only deduplicate by value (same literal from multiple tiers → keep highest confidence).
+    // Name collisions (e.g., two different "author" candidates) are preserved
+    // for the interactive/yes layer to resolve.
     let mut seen_value: HashMap<String, usize> = HashMap::new();
-    let mut seen_name: HashMap<String, usize> = HashMap::new();
     let mut to_remove = Vec::new();
 
     for (i, candidate) in candidates.iter().enumerate() {
-        // Deduplicate by value (same literal, different tiers)
         let value_key = candidate.value.to_lowercase();
         if let Some(&prev_idx) = seen_value.get(&value_key) {
             if candidate.confidence > candidates[prev_idx].confidence {
@@ -837,27 +838,10 @@ fn deduplicate_candidates(candidates: &mut Vec<DetectedCandidate>) {
                 seen_value.insert(value_key, i);
             } else {
                 to_remove.push(i);
-                continue;
             }
         } else {
             seen_value.insert(value_key, i);
         }
-
-        // Deduplicate by suggested_name (e.g., two different "author" candidates)
-        let name_key = candidate.suggested_name.clone();
-        if let Some(&prev_idx) = seen_name.get(&name_key) {
-            if to_remove.contains(&prev_idx) {
-                // Previous holder was already removed, replace it
-                seen_name.insert(name_key, i);
-            } else if candidate.confidence > candidates[prev_idx].confidence {
-                to_remove.push(prev_idx);
-                seen_name.insert(name_key, i);
-            } else {
-                to_remove.push(i);
-            }
-        } else {
-            seen_name.insert(name_key, i);
-        }
     }
 
     to_remove.sort_unstable();
@@ -1240,7 +1224,7 @@ mod tests {
     }
 
     #[test]
-    fn test_deduplication_by_suggested_name() {
+    fn test_name_collisions_preserved() {
         let mut candidates = vec![
             DetectedCandidate {
                 suggested_name: "author".to_string(),
@@ -1263,8 +1247,11 @@ mod tests {
         ];
 
         deduplicate_candidates(&mut candidates);
-        assert_eq!(candidates.len(), 1, "should deduplicate by suggested_name");
-        assert_eq!(candidates[0].value, "Alice Johnson", "should keep highest confidence");
+        assert_eq!(
+            candidates.len(),
+            2,
+            "name collisions should be preserved for interactive resolution"
+        );
     }
 
     #[test]
diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index d06315b..96e7ecb 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -6,11 +6,11 @@ pub mod replace;
 pub mod scan;
 pub mod variants;
 
-use std::collections::HashMap;
+use std::collections::{BTreeMap, HashMap};
 use std::path::{Path, PathBuf};
 
 use console::style;
-use inquire::{Confirm, Text};
+use inquire::{Confirm, Select, Text};
 
 use crate::config::schema::DEFAULT_TEMPLATES_SUFFIX;
 use crate::error::{DicecutError, Result};
@@ -74,9 +74,9 @@ pub struct ExtractOptions {
     pub variables: Vec<(String, String)>,
     pub output_dir: Option<PathBuf>,
     pub in_place: bool,
-    pub batch: bool,
+    pub yes: bool,
+    pub min_confidence: f64,
     pub dry_run: bool,
-    pub auto: bool,
 }
 
 /// Plan an extraction: scan the project, detect variants, build replacement rules.
@@ -133,41 +133,44 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         scan_result.excluded_count
     );
 
-    // Phase 2.5: Auto-detect variables if none provided and --auto is enabled
-    let variables = if options.variables.is_empty() && options.auto {
+    // Phase 2.5: Auto-detect variables (always runs), merge with explicit --var entries
+    let variables = {
+        let explicit_vars = options.variables.clone();
         let detect_result = auto_detect(source_dir, &scan_result);
 
-        if detect_result.candidates.is_empty() {
+        // Filter candidates below min_confidence threshold
+        let candidates: Vec<_> = detect_result
+            .candidates
+            .into_iter()
+            .filter(|c| c.confidence >= options.min_confidence)
+            .collect();
+
+        if candidates.is_empty() && explicit_vars.is_empty() {
             return Err(DicecutError::ExtractNoVariables);
         }
 
-        let accepted = if options.batch {
-            let accepted: Vec<_> = detect_result
-                .candidates
-                .into_iter()
-                .filter(|c| c.confidence >= 0.50)
-                .collect();
-            if accepted.is_empty() {
-                return Err(DicecutError::ExtractNoVariables);
-            }
-            print_auto_detected_batch(&accepted);
-            accepted
+        // Resolve auto-detected candidates (merge with explicit vars)
+        let auto_vars = if candidates.is_empty() {
+            vec![]
+        } else if options.yes {
+            resolve_candidates_yes(&candidates, &explicit_vars)
         } else {
-            let accepted = confirm_auto_detected_interactive(detect_result.candidates)?;
-            if accepted.is_empty() {
-                return Err(DicecutError::ExtractNoVariables);
-            }
-            accepted
+            confirm_auto_detected_interactive(candidates, &explicit_vars)?
         };
 
-        accepted
-            .into_iter()
-            .map(|c| (c.suggested_name, c.value))
-            .collect()
-    } else if options.variables.is_empty() {
-        return Err(DicecutError::ExtractNoVariables);
-    } else {
-        options.variables.clone()
+        // Merge: explicit vars first (pre-accepted), then auto-detected additions
+        let mut merged = explicit_vars;
+        for (name, value) in auto_vars {
+            if !merged.iter().any(|(n, _)| n == &name) {
+                merged.push((name, value));
+            }
+        }
+
+        if merged.is_empty() {
+            return Err(DicecutError::ExtractNoVariables);
+        }
+
+        merged
     };
 
     // Phase 3: Generate variants and count occurrences
@@ -192,7 +195,7 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
     }
 
     // Phase 4: Interactive variant confirmation
-    let confirmed_variables = if options.batch {
+    let confirmed_variables = if options.yes {
         // Batch mode: auto-accept all found variants
         extract_variables
             .into_iter()
@@ -218,12 +221,12 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
     };
 
     // Phase 5: Interactive exclude confirmation
-    if !options.batch {
+    if !options.yes {
         excludes = confirm_excludes_interactive(excludes)?;
     }
 
     // Phase 6: Detect conditional files
-    let detected_conditionals = if options.batch {
+    let detected_conditionals = if options.yes {
         vec![] // Batch mode: no conditional files
     } else {
         let detected = detect_conditional_files(source_dir);
@@ -301,7 +304,7 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
     }
 
     // Phase 10: Interactive file confirmation
-    if !options.batch {
+    if !options.yes {
         confirm_files_interactive(&planned_files)?;
     }
 
@@ -675,78 +678,160 @@ fn confirm_conditionals_interactive(
     Ok(confirmed)
 }
 
-fn print_auto_detected_batch(candidates: &[DetectedCandidate]) {
+fn resolve_candidates_yes(
+    candidates: &[DetectedCandidate],
+    explicit_vars: &[(String, String)],
+) -> Vec<(String, String)> {
     eprintln!(
         "\n{} Auto-detected variables {}",
         style("──").dim(),
         style("──────────────────────────────────").dim()
     );
+
+    // Group candidates by suggested_name
+    let mut groups: BTreeMap<String, Vec<&DetectedCandidate>> = BTreeMap::new();
     for c in candidates {
+        groups.entry(c.suggested_name.clone()).or_default().push(c);
+    }
+
+    let mut result = Vec::new();
+
+    for (name, mut group) in groups {
+        // Skip names already covered by explicit --var
+        if explicit_vars.iter().any(|(n, _)| n == &name) {
+            eprintln!(
+                "  {} {} (explicit --var, skipping auto-detect)",
+                style("·").dim(),
+                style(&name).dim()
+            );
+            continue;
+        }
+
+        // For name collisions, pick highest confidence
+        group.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
+        let winner = group[0];
+
         eprintln!(
             "  {} {} = {:?} ({:.0}% confidence, {})",
             style("✓").green(),
-            style(&c.suggested_name).bold(),
-            c.value,
-            c.confidence * 100.0,
-            c.tier
-        );
-        eprintln!(
-            "    {}",
-            style(&c.reason).dim()
+            style(&winner.suggested_name).bold(),
+            winner.value,
+            winner.confidence * 100.0,
+            winner.tier
         );
+        eprintln!("    {}", style(&winner.reason).dim());
+
+        if group.len() > 1 {
+            eprintln!(
+                "    {} {} other candidates for this name (picked highest confidence)",
+                style("⚠").yellow(),
+                group.len() - 1
+            );
+        }
+
+        result.push((winner.suggested_name.clone(), winner.value.clone()));
     }
+
+    result
 }
 
 fn confirm_auto_detected_interactive(
     candidates: Vec<DetectedCandidate>,
-) -> Result<Vec<DetectedCandidate>> {
+    explicit_vars: &[(String, String)],
+) -> Result<Vec<(String, String)>> {
     eprintln!(
         "\n{} Auto-detected variables {}",
         style("──").dim(),
         style("──────────────────────────────────").dim()
     );
 
+    // Group candidates by suggested_name
+    let mut groups: BTreeMap<String, Vec<DetectedCandidate>> = BTreeMap::new();
+    for c in candidates {
+        groups.entry(c.suggested_name.clone()).or_default().push(c);
+    }
+
     let mut accepted = Vec::new();
 
-    for candidate in candidates {
-        let default_accept = candidate.confidence >= 0.70;
-        eprintln!(
-            "\n  {} = {:?} ({:.0}% confidence, {})",
-            style(&candidate.suggested_name).bold(),
-            candidate.value,
-            candidate.confidence * 100.0,
-            candidate.tier
-        );
-        eprintln!("    {}", style(&candidate.reason).dim());
-        if candidate.total_occurrences > 0 {
+    for (name, mut group) in groups {
+        // Skip names already covered by explicit --var
+        if explicit_vars.iter().any(|(n, _)| n == &name) {
             eprintln!(
-                "    {} occurrences across {} files",
-                candidate.total_occurrences,
-                candidate.file_count
+                "\n  {} {} (provided via --var, skipping)",
+                style("·").dim(),
+                style(&name).dim()
             );
+            continue;
         }
 
-        let accept = Confirm::new(&format!("Accept \"{}\"?", candidate.suggested_name))
-            .with_default(default_accept)
-            .prompt()
-            .map_err(|_| DicecutError::PromptCancelled)?;
+        // Sort by confidence descending
+        group.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
 
-        if accept {
-            let name = Text::new("Variable name:")
-                .with_default(&candidate.suggested_name)
+        if group.len() == 1 {
+            // Single candidate — simple confirm
+            let candidate = &group[0];
+            eprintln!(
+                "\n  {} = {:?} ({:.0}% confidence, {})",
+                style(&candidate.suggested_name).bold(),
+                candidate.value,
+                candidate.confidence * 100.0,
+                candidate.tier
+            );
+            eprintln!("    {}", style(&candidate.reason).dim());
+            if candidate.total_occurrences > 0 {
+                eprintln!(
+                    "    {} occurrences across {} files",
+                    candidate.total_occurrences,
+                    candidate.file_count
+                );
+            }
+
+            let accept = Confirm::new(&format!("Accept \"{}\"?", candidate.suggested_name))
+                .with_default(true)
                 .prompt()
                 .map_err(|_| DicecutError::PromptCancelled)?;
 
-            let value = Text::new("Value:")
-                .with_default(&candidate.value)
+            if accept {
+                accepted.push((candidate.suggested_name.clone(), candidate.value.clone()));
+            }
+        } else {
+            // Name collision — show selection prompt
+            eprintln!(
+                "\n  {} Multiple candidates for {}:",
+                style("⚠").yellow(),
+                style(&name).bold()
+            );
+
+            let mut options: Vec<String> = group
+                .iter()
+                .map(|c| {
+                    format!(
+                        "{:?} ({:.0}% confidence, {})",
+                        c.value,
+                        c.confidence * 100.0,
+                        c.tier
+                    )
+                })
+                .collect();
+            options.push("Skip".to_string());
+
+            let selection = Select::new(&format!("Which value for \"{}\"?", name), options)
                 .prompt()
                 .map_err(|_| DicecutError::PromptCancelled)?;
 
-            accepted.push(DetectedCandidate {
-                suggested_name: name,
-                value,
-                ..candidate
-            });
+            if selection != "Skip" {
+                // Find the matching candidate
+                if let Some(chosen) = group.iter().find(|c| {
+                    format!(
+                        "{:?} ({:.0}% confidence, {})",
+                        c.value,
+                        c.confidence * 100.0,
+                        c.tier
+                    ) == selection
+                }) {
+                    accepted.push((chosen.suggested_name.clone(), chosen.value.clone()));
+                }
+            }
         }
     }
 
diff --git a/src/main.rs b/src/main.rs
index bb0d2ff..4999bb2 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -24,9 +24,9 @@ fn main() -> miette::Result<()> {
             vars,
             output,
             in_place,
-            batch,
+            yes,
+            min_confidence,
             dry_run,
-            auto,
-        } => commands::extract::run(source, vars, output, in_place, batch, dry_run, auto),
+        } => commands::extract::run(source, vars, output, in_place, yes, min_confidence, dry_run),
     }
 }
diff --git a/tests/integration.rs b/tests/integration.rs
index 55e32e9..0310b92 100644
--- a/tests/integration.rs
+++ b/tests/integration.rs
@@ -654,9 +654,9 @@ fn test_extract_batch_basic() {
         ],
         output_dir: Some(output_path.clone()),
         in_place: false,
-        batch: true,
+        yes: true,
+        min_confidence: 0.5,
         dry_run: false,
-        auto: false,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -698,9 +698,9 @@ fn test_extract_detects_case_variants() {
         variables: vec![("project_name".to_string(), "my-app".to_string())],
         output_dir: Some(output_path.clone()),
         in_place: false,
-        batch: true,
+        yes: true,
+        min_confidence: 0.5,
         dry_run: false,
-        auto: false,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -752,9 +752,9 @@ fn test_extract_dry_run_writes_nothing() {
         variables: vec![("project_name".to_string(), "my-app".to_string())],
         output_dir: Some(output_path.clone()),
         in_place: false,
-        batch: true,
+        yes: true,
+        min_confidence: 0.5,
         dry_run: true,
-        auto: false,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -781,9 +781,9 @@ fn test_extract_rejects_already_template() {
         variables: vec![("name".to_string(), "val".to_string())],
         output_dir: None,
         in_place: false,
-        batch: true,
+        yes: true,
+        min_confidence: 0.5,
         dry_run: false,
-        auto: false,
     };
 
     let result = plan_extraction(&options);
@@ -795,14 +795,16 @@ fn test_extract_rejects_no_variables() {
     let project = tempfile::tempdir().unwrap();
     std::fs::write(project.path().join("hello.txt"), "hello").unwrap();
 
+    // With min_confidence=1.0, no auto-detected candidates can pass, and no explicit
+    // vars are given, so extraction should fail with ExtractNoVariables
     let options = ExtractOptions {
         source_dir: project.path().to_path_buf(),
         variables: vec![],
         output_dir: None,
         in_place: false,
-        batch: true,
+        yes: true,
+        min_confidence: 1.0,
         dry_run: false,
-        auto: false,
     };
 
     let result = plan_extraction(&options);
@@ -823,9 +825,9 @@ fn test_extract_templates_path_components() {
         variables: vec![("project_name".to_string(), "my-app".to_string())],
         output_dir: Some(output_path.clone()),
         in_place: false,
-        batch: true,
+        yes: true,
+        min_confidence: 0.5,
         dry_run: false,
-        auto: false,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -886,9 +888,9 @@ fn test_extract_round_trip() {
         variables: vec![("project_name".to_string(), "my-app".to_string())],
         output_dir: Some(extracted_path.clone()),
         in_place: false,
-        batch: true,
+        yes: true,
+        min_confidence: 0.5,
         dry_run: false,
-        auto: false,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -926,7 +928,7 @@ fn test_extract_round_trip() {
 // ── Auto-detect tests ────────────────────────────────────────────────────
 
 #[test]
-fn test_extract_auto_batch() {
+fn test_extract_auto_yes() {
     let project = tempfile::tempdir().unwrap();
     let project_dir = project.path().join("data-pipeline");
     std::fs::create_dir(&project_dir).unwrap();
@@ -955,9 +957,9 @@ fn test_extract_auto_batch() {
         variables: vec![],
         output_dir: Some(output_path.clone()),
         in_place: false,
-        batch: true,
+        yes: true,
+        min_confidence: 0.5,
         dry_run: false,
-        auto: true,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -980,7 +982,7 @@ fn test_extract_auto_batch() {
 }
 
 #[test]
-fn test_extract_auto_explicit_vars_priority() {
+fn test_extract_auto_explicit_vars_merged() {
     let project = tempfile::tempdir().unwrap();
     let project_dir = project.path().join("my-service");
     std::fs::create_dir(&project_dir).unwrap();
@@ -999,17 +1001,17 @@ fn test_extract_auto_explicit_vars_priority() {
         variables: vec![("app_name".to_string(), "my-service".to_string())],
         output_dir: Some(output_path.clone()),
         in_place: false,
-        batch: true,
+        yes: true,
+        min_confidence: 0.5,
         dry_run: false,
-        auto: true,
     };
 
     let plan = plan_extraction(&options).unwrap();
 
     let has_app_name = plan.variables.iter().any(|v| v.name == "app_name");
-    let has_project_name = plan.variables.iter().any(|v| v.name == "project_name");
     assert!(has_app_name, "should use explicit var app_name");
-    assert!(!has_project_name, "should not auto-detect project_name when explicit vars given");
+    // Auto-detect still runs and merges additional candidates
+    // (project_name may or may not appear depending on dedup with app_name's value)
 }
 
 #[test]
@@ -1041,9 +1043,9 @@ fn test_extract_auto_frequency_fallback() {
         variables: vec![],
         output_dir: Some(output_path.clone()),
         in_place: false,
-        batch: true,
+        yes: true,
+        min_confidence: 0.5,
         dry_run: false,
-        auto: true,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -1060,3 +1062,37 @@ fn test_extract_auto_frequency_fallback() {
             .collect::<Vec<_>>()
     );
 }
+
+#[test]
+fn test_extract_min_confidence_filters() {
+    let project = tempfile::tempdir().unwrap();
+    let project_dir = project.path().join("tiny-app");
+    std::fs::create_dir(&project_dir).unwrap();
+    std::fs::write(
+        project_dir.join("Cargo.toml"),
+        "[package]\nname = \"tiny-app\"\nversion = \"0.1.0\"\n",
+    )
+    .unwrap();
+    std::fs::write(
+        project_dir.join("README.md"),
+        "# tiny-app\nWelcome to tiny-app\n",
+    )
+    .unwrap();
+
+    // With a very high threshold, all auto-detected candidates should be filtered out
+    let options = ExtractOptions {
+        source_dir: project_dir.clone(),
+        variables: vec![],
+        output_dir: None,
+        in_place: false,
+        yes: true,
+        min_confidence: 0.99,
+        dry_run: true,
+    };
+
+    let result = plan_extraction(&options);
+    assert!(
+        result.is_err(),
+        "high min_confidence should filter out all candidates"
+    );
+}

From 375616a3823ddc06b6766e7af90ef19663ee0b7e Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Fri, 27 Feb 2026 21:20:02 -0500
Subject: [PATCH 07/29] fix: resolve cargo fmt and clippy warnings

Run cargo fmt to fix formatting issues and fix two clippy lints:
- Remove redundant closure in strip_email call
- Remove identity map on first.as_str()
---
 src/extract/auto_detect.rs | 565 ++++++++++++++++++++++++++++++-------
 src/extract/mod.rs         |   5 +-
 src/render/context.rs      |  10 +-
 tests/integration.rs       |  12 +-
 4 files changed, 470 insertions(+), 122 deletions(-)

diff --git a/src/extract/auto_detect.rs b/src/extract/auto_detect.rs
index 881991e..b0306d9 100644
--- a/src/extract/auto_detect.rs
+++ b/src/extract/auto_detect.rs
@@ -70,10 +70,8 @@ pub fn auto_detect(project_dir: &Path, scan_result: &ScanResult) -> AutoDetectRe
     candidates.extend(detect_git_metadata(project_dir, scan_result));
 
     // Collect values already covered by tiers 1-3
-    let covered_values: HashSet<String> = candidates
-        .iter()
-        .map(|c| c.value.to_lowercase())
-        .collect();
+    let covered_values: HashSet<String> =
+        candidates.iter().map(|c| c.value.to_lowercase()).collect();
 
     // Collect config values for frequency analysis boosting
     let config_values: HashSet<String> = candidates
@@ -107,10 +105,43 @@ pub fn auto_detect(project_dir: &Path, scan_result: &ScanResult) -> AutoDetectRe
 // ── Tier 1: Directory name ───────────────────────────────────────────────
 
 const GENERIC_DIR_NAMES: &[&str] = &[
-    "src", "app", "project", "tmp", "temp", "build", "dist", "out", "output", "lib", "bin",
-    "test", "tests", "example", "examples", "docs", "doc", "assets", "public", "static",
-    "vendor", "node_modules", "target", "pkg", "cmd", "internal", "api", "web", "server",
-    "client", "frontend", "backend", "service", "services", "workspace", "repo", "code",
+    "src",
+    "app",
+    "project",
+    "tmp",
+    "temp",
+    "build",
+    "dist",
+    "out",
+    "output",
+    "lib",
+    "bin",
+    "test",
+    "tests",
+    "example",
+    "examples",
+    "docs",
+    "doc",
+    "assets",
+    "public",
+    "static",
+    "vendor",
+    "node_modules",
+    "target",
+    "pkg",
+    "cmd",
+    "internal",
+    "api",
+    "web",
+    "server",
+    "client",
+    "frontend",
+    "backend",
+    "service",
+    "services",
+    "workspace",
+    "repo",
+    "code",
 ];
 
 fn detect_directory_name(project_dir: &Path, scan_result: &ScanResult) -> Vec<DetectedCandidate> {
@@ -143,10 +174,7 @@ fn detect_directory_name(project_dir: &Path, scan_result: &ScanResult) -> Vec<De
 
 // ── Tier 2: Ecosystem config files ───────────────────────────────────────
 
-fn detect_config_files(
-    project_dir: &Path,
-    scan_result: &ScanResult,
-) -> Vec<DetectedCandidate> {
+fn detect_config_files(project_dir: &Path, scan_result: &ScanResult) -> Vec<DetectedCandidate> {
     let mut candidates = Vec::new();
 
     if let Some(mut c) = parse_cargo_toml(project_dir, scan_result) {
@@ -252,8 +280,7 @@ fn parse_package_json(
         };
         if let Some(author_name) = author_str {
             if !author_name.is_empty() {
-                let (file_count, total_occurrences) =
-                    count_occurrences(&author_name, scan_result);
+                let (file_count, total_occurrences) = count_occurrences(&author_name, scan_result);
                 candidates.push(DetectedCandidate {
                     suggested_name: "author".to_string(),
                     value: author_name,
@@ -306,8 +333,8 @@ fn parse_pyproject_toml(
             let author_name = first
                 .get("name")
                 .and_then(|n| n.as_str())
-                .or_else(|| first.as_str().map(|s| s))
-                .map(|s| strip_email(s));
+                .or_else(|| first.as_str())
+                .map(strip_email);
             if let Some(name) = author_name {
                 if !name.is_empty() {
                     let (file_count, total_occurrences) = count_occurrences(&name, scan_result);
@@ -328,18 +355,12 @@ fn parse_pyproject_toml(
     Some(candidates)
 }
 
-fn parse_go_mod(
-    project_dir: &Path,
-    scan_result: &ScanResult,
-) -> Option<Vec<DetectedCandidate>> {
+fn parse_go_mod(project_dir: &Path, scan_result: &ScanResult) -> Option<Vec<DetectedCandidate>> {
     let path = project_dir.join("go.mod");
     let content = std::fs::read_to_string(&path).ok()?;
 
     let re = Regex::new(r"^module\s+(\S+)").unwrap();
-    let module_path = re
-        .captures(&content)?
-        .get(1)?
-        .as_str();
+    let module_path = re.captures(&content)?.get(1)?.as_str();
 
     let segments: Vec<&str> = module_path.split('/').collect();
 
@@ -386,10 +407,7 @@ fn parse_go_mod(
 
 // ── Tier 3: Git metadata ─────────────────────────────────────────────────
 
-fn detect_git_metadata(
-    project_dir: &Path,
-    scan_result: &ScanResult,
-) -> Vec<DetectedCandidate> {
+fn detect_git_metadata(project_dir: &Path, scan_result: &ScanResult) -> Vec<DetectedCandidate> {
     let mut candidates = Vec::new();
 
     // Try to get remote origin URL
@@ -518,30 +536,27 @@ fn detect_frequency(
 
         let normalized_key = words.join(" ");
 
-        let file_count = token_file_map
-            .get(token)
-            .map(|s| s.len())
-            .unwrap_or(0);
+        let file_count = token_file_map.get(token).map(|s| s.len()).unwrap_or(0);
 
         // Skip single-occurrence-single-file tokens
         if *count == 1 && file_count <= 1 {
             continue;
         }
 
-        let matches_dir = normalized_key == split_into_words(dir_name).join(" ")
-            && !dir_name.is_empty();
+        let matches_dir =
+            normalized_key == split_into_words(dir_name).join(" ") && !dir_name.is_empty();
         let in_config = config_values.contains(&token.to_lowercase());
 
-        let cluster = clusters.entry(normalized_key.clone()).or_insert_with(|| {
-            TokenCluster {
+        let cluster = clusters
+            .entry(normalized_key.clone())
+            .or_insert_with(|| TokenCluster {
                 normalized: words.clone(),
                 literals: Vec::new(),
                 total_occurrences: 0,
                 file_count: 0,
                 matches_dir_name: false,
                 in_config_value: false,
-            }
-        });
+            });
 
         if !cluster.literals.contains(token) {
             cluster.literals.push(token.clone());
@@ -564,7 +579,11 @@ fn detect_frequency(
 
     for (key, cluster) in &clusters {
         // Skip if already covered by higher tiers
-        if cluster.literals.iter().any(|l| covered_values.contains(&l.to_lowercase())) {
+        if cluster
+            .literals
+            .iter()
+            .any(|l| covered_values.contains(&l.to_lowercase()))
+        {
             continue;
         }
 
@@ -624,7 +643,11 @@ fn score_cluster(cluster: &TokenCluster) -> f64 {
     // Config value match (binary)
     let config_score = if cluster.in_config_value { 1.0 } else { 0.0 };
 
-    0.15 * occ_score + 0.20 * file_score + 0.35 * variant_score + 0.20 * dir_score + 0.10 * config_score
+    0.15 * occ_score
+        + 0.20 * file_score
+        + 0.35 * variant_score
+        + 0.20 * dir_score
+        + 0.10 * config_score
 }
 
 fn merge_similar_clusters(clusters: &mut HashMap<String, TokenCluster>) {
@@ -638,8 +661,14 @@ fn merge_similar_clusters(clusters: &mut HashMap<String, TokenCluster>) {
             }
             let dist = strsim::levenshtein(&keys[i], &keys[j]);
             if dist <= 1 {
-                let size_i = clusters.get(&keys[i]).map(|c| c.total_occurrences).unwrap_or(0);
-                let size_j = clusters.get(&keys[j]).map(|c| c.total_occurrences).unwrap_or(0);
+                let size_i = clusters
+                    .get(&keys[i])
+                    .map(|c| c.total_occurrences)
+                    .unwrap_or(0);
+                let size_j = clusters
+                    .get(&keys[j])
+                    .map(|c| c.total_occurrences)
+                    .unwrap_or(0);
                 if size_i >= size_j {
                     merge_map.insert(keys[j].clone(), keys[i].clone());
                 } else {
@@ -704,9 +733,7 @@ fn is_noise_token(token: &str, words: &[String]) -> bool {
 
     // All words are stopwords, file-format words, or very short
     if words.iter().all(|w| {
-        w.len() < 3
-            || STOPWORDS.contains(&w.as_str())
-            || FILE_FORMAT_WORDS.contains(&w.as_str())
+        w.len() < 3 || STOPWORDS.contains(&w.as_str()) || FILE_FORMAT_WORDS.contains(&w.as_str())
     }) {
         return true;
     }
@@ -715,69 +742,377 @@ fn is_noise_token(token: &str, words: &[String]) -> bool {
 }
 
 const FILE_FORMAT_WORDS: &[&str] = &[
-    "toml", "json", "yaml", "yml", "xml", "csv", "html", "css", "md", "txt",
-    "log", "cfg", "ini", "env", "lock", "mod", "rs", "js", "ts", "py", "go",
-    "rb", "java", "kt", "swift", "cpp", "hpp", "vue", "jsx", "tsx",
+    "toml", "json", "yaml", "yml", "xml", "csv", "html", "css", "md", "txt", "log", "cfg", "ini",
+    "env", "lock", "mod", "rs", "js", "ts", "py", "go", "rb", "java", "kt", "swift", "cpp", "hpp",
+    "vue", "jsx", "tsx",
 ];
 
 const LANGUAGE_KEYWORDS: &[&str] = &[
     // Rust
-    "async", "await", "break", "const", "continue", "crate", "dyn", "else", "enum", "extern",
-    "false", "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut",
-    "pub", "ref", "return", "self", "static", "struct", "super", "trait", "true", "type",
-    "unsafe", "use", "where", "while", "yield",
+    "async",
+    "await",
+    "break",
+    "const",
+    "continue",
+    "crate",
+    "dyn",
+    "else",
+    "enum",
+    "extern",
+    "false",
+    "fn",
+    "for",
+    "if",
+    "impl",
+    "in",
+    "let",
+    "loop",
+    "match",
+    "mod",
+    "move",
+    "mut",
+    "pub",
+    "ref",
+    "return",
+    "self",
+    "static",
+    "struct",
+    "super",
+    "trait",
+    "true",
+    "type",
+    "unsafe",
+    "use",
+    "where",
+    "while",
+    "yield",
     // JS/TS
-    "abstract", "arguments", "boolean", "byte", "case", "catch", "char", "class", "debugger",
-    "default", "delete", "do", "double", "eval", "export", "extends", "final", "finally",
-    "float", "function", "goto", "implements", "import", "instanceof", "int", "interface",
-    "long", "native", "new", "null", "package", "private", "protected", "public", "short",
-    "switch", "synchronized", "this", "throw", "throws", "transient", "try", "typeof",
-    "undefined", "var", "void", "volatile", "with",
+    "abstract",
+    "arguments",
+    "boolean",
+    "byte",
+    "case",
+    "catch",
+    "char",
+    "class",
+    "debugger",
+    "default",
+    "delete",
+    "do",
+    "double",
+    "eval",
+    "export",
+    "extends",
+    "final",
+    "finally",
+    "float",
+    "function",
+    "goto",
+    "implements",
+    "import",
+    "instanceof",
+    "int",
+    "interface",
+    "long",
+    "native",
+    "new",
+    "null",
+    "package",
+    "private",
+    "protected",
+    "public",
+    "short",
+    "switch",
+    "synchronized",
+    "this",
+    "throw",
+    "throws",
+    "transient",
+    "try",
+    "typeof",
+    "undefined",
+    "var",
+    "void",
+    "volatile",
+    "with",
     // Python
-    "and", "as", "assert", "class", "def", "del", "elif", "except", "exec", "from",
-    "global", "is", "lambda", "nonlocal", "not", "or", "pass", "print", "raise",
-    "with", "yield",
+    "and",
+    "as",
+    "assert",
+    "class",
+    "def",
+    "del",
+    "elif",
+    "except",
+    "exec",
+    "from",
+    "global",
+    "is",
+    "lambda",
+    "nonlocal",
+    "not",
+    "or",
+    "pass",
+    "print",
+    "raise",
+    "with",
+    "yield",
     // Go
-    "chan", "defer", "fallthrough", "go", "goroutine", "interface", "map", "range",
-    "select", "func",
+    "chan",
+    "defer",
+    "fallthrough",
+    "go",
+    "goroutine",
+    "interface",
+    "map",
+    "range",
+    "select",
+    "func",
 ];
 
 const COMMON_LIBRARIES: &[&str] = &[
-    "react", "redux", "webpack", "babel", "eslint", "prettier", "jest", "mocha", "chai",
-    "express", "fastify", "next", "nuxt", "vue", "angular", "svelte",
-    "serde", "tokio", "actix", "axum", "clap", "anyhow", "thiserror", "tracing",
-    "reqwest", "hyper", "warp", "rocket", "diesel", "sqlx",
-    "django", "flask", "fastapi", "pytest", "numpy", "pandas", "scipy",
-    "spring", "hibernate", "junit", "maven", "gradle",
-    "gin", "echo", "fiber", "gorm",
-    "lodash", "axios", "moment", "dayjs", "ramda", "underscore",
-    "tailwind", "bootstrap", "material",
-    "typescript", "javascript", "python", "golang", "rustlang",
+    "react",
+    "redux",
+    "webpack",
+    "babel",
+    "eslint",
+    "prettier",
+    "jest",
+    "mocha",
+    "chai",
+    "express",
+    "fastify",
+    "next",
+    "nuxt",
+    "vue",
+    "angular",
+    "svelte",
+    "serde",
+    "tokio",
+    "actix",
+    "axum",
+    "clap",
+    "anyhow",
+    "thiserror",
+    "tracing",
+    "reqwest",
+    "hyper",
+    "warp",
+    "rocket",
+    "diesel",
+    "sqlx",
+    "django",
+    "flask",
+    "fastapi",
+    "pytest",
+    "numpy",
+    "pandas",
+    "scipy",
+    "spring",
+    "hibernate",
+    "junit",
+    "maven",
+    "gradle",
+    "gin",
+    "echo",
+    "fiber",
+    "gorm",
+    "lodash",
+    "axios",
+    "moment",
+    "dayjs",
+    "ramda",
+    "underscore",
+    "tailwind",
+    "bootstrap",
+    "material",
+    "typescript",
+    "javascript",
+    "python",
+    "golang",
+    "rustlang",
 ];
 
 const STOPWORDS: &[&str] = &[
     // English stopwords
-    "the", "and", "for", "are", "but", "not", "you", "all", "can", "had", "her", "was",
-    "one", "our", "out", "get", "set", "has", "his", "how", "its", "let", "may", "new",
-    "now", "old", "see", "way", "who", "did", "got", "has", "him", "into", "just",
-    "like", "make", "many", "some", "than", "them", "then", "very", "when", "with",
-    "have", "from", "been", "also", "each", "that", "this", "will", "your", "what",
-    "which", "their", "about", "would", "there", "could", "other", "after", "first",
-    "these", "those", "being", "where", "should", "because",
+    "the",
+    "and",
+    "for",
+    "are",
+    "but",
+    "not",
+    "you",
+    "all",
+    "can",
+    "had",
+    "her",
+    "was",
+    "one",
+    "our",
+    "out",
+    "get",
+    "set",
+    "has",
+    "his",
+    "how",
+    "its",
+    "let",
+    "may",
+    "new",
+    "now",
+    "old",
+    "see",
+    "way",
+    "who",
+    "did",
+    "got",
+    "has",
+    "him",
+    "into",
+    "just",
+    "like",
+    "make",
+    "many",
+    "some",
+    "than",
+    "them",
+    "then",
+    "very",
+    "when",
+    "with",
+    "have",
+    "from",
+    "been",
+    "also",
+    "each",
+    "that",
+    "this",
+    "will",
+    "your",
+    "what",
+    "which",
+    "their",
+    "about",
+    "would",
+    "there",
+    "could",
+    "other",
+    "after",
+    "first",
+    "these",
+    "those",
+    "being",
+    "where",
+    "should",
+    "because",
     // Short generic words common in code identifiers
-    "my", "no", "is", "on", "in", "to", "by", "do", "up", "so", "or",
-    "app", "run", "dry", "log", "cmd", "arg", "env", "dir", "key", "map",
-    "max", "min", "raw", "ref", "src", "str", "tmp", "url", "var", "buf",
-    "msg", "req", "res", "err", "pkg", "lib", "bin", "fmt", "ctx", "cfg",
-    "opt", "val", "idx", "len", "ptr", "num", "std", "gen", "pre", "sub",
+    "my",
+    "no",
+    "is",
+    "on",
+    "in",
+    "to",
+    "by",
+    "do",
+    "up",
+    "so",
+    "or",
+    "app",
+    "run",
+    "dry",
+    "log",
+    "cmd",
+    "arg",
+    "env",
+    "dir",
+    "key",
+    "map",
+    "max",
+    "min",
+    "raw",
+    "ref",
+    "src",
+    "str",
+    "tmp",
+    "url",
+    "var",
+    "buf",
+    "msg",
+    "req",
+    "res",
+    "err",
+    "pkg",
+    "lib",
+    "bin",
+    "fmt",
+    "ctx",
+    "cfg",
+    "opt",
+    "val",
+    "idx",
+    "len",
+    "ptr",
+    "num",
+    "std",
+    "gen",
+    "pre",
+    "sub",
     // Programming type/concept words
-    "string", "number", "bool", "boolean", "array", "object", "value", "result",
-    "error", "option", "none", "some", "true", "false", "null", "undefined",
-    "file", "path", "name", "type", "data", "info", "list", "item", "node",
-    "index", "count", "size", "length", "config", "settings", "options",
-    "input", "output", "source", "target", "test", "main", "init", "setup",
-    "todo", "fixme", "hack", "note", "warning", "debug", "trace", "level",
-    "mode", "flag", "status", "state", "cache", "hook", "hooks",
+    "string",
+    "number",
+    "bool",
+    "boolean",
+    "array",
+    "object",
+    "value",
+    "result",
+    "error",
+    "option",
+    "none",
+    "some",
+    "true",
+    "false",
+    "null",
+    "undefined",
+    "file",
+    "path",
+    "name",
+    "type",
+    "data",
+    "info",
+    "list",
+    "item",
+    "node",
+    "index",
+    "count",
+    "size",
+    "length",
+    "config",
+    "settings",
+    "options",
+    "input",
+    "output",
+    "source",
+    "target",
+    "test",
+    "main",
+    "init",
+    "setup",
+    "todo",
+    "fixme",
+    "hack",
+    "note",
+    "warning",
+    "debug",
+    "trace",
+    "level",
+    "mode",
+    "flag",
+    "status",
+    "state",
+    "cache",
+    "hook",
+    "hooks",
 ];
 
 // ── Helpers ──────────────────────────────────────────────────────────────
@@ -943,10 +1278,16 @@ mod tests {
         let scan = make_scan_result(vec![("index.js", "cool-widget stuff")]);
         let candidates = parse_package_json(dir.path(), &scan).unwrap();
 
-        let name_candidate = candidates.iter().find(|c| c.suggested_name == "project_name").unwrap();
+        let name_candidate = candidates
+            .iter()
+            .find(|c| c.suggested_name == "project_name")
+            .unwrap();
         assert_eq!(name_candidate.value, "cool-widget");
 
-        let author_candidate = candidates.iter().find(|c| c.suggested_name == "author").unwrap();
+        let author_candidate = candidates
+            .iter()
+            .find(|c| c.suggested_name == "author")
+            .unwrap();
         assert_eq!(author_candidate.value, "Bob Smith");
     }
 
@@ -978,7 +1319,9 @@ mod tests {
         let scan = make_scan_result(vec![("main.go", "package main // my-service by acme")]);
         let candidates = parse_go_mod(dir.path(), &scan).unwrap();
 
-        let project = candidates.iter().find(|c| c.suggested_name == "project_name");
+        let project = candidates
+            .iter()
+            .find(|c| c.suggested_name == "project_name");
         assert!(project.is_some());
         assert_eq!(project.unwrap().value, "my-service");
 
@@ -1001,7 +1344,11 @@ mod tests {
     #[test]
     fn test_tier2_malformed_cargo_toml() {
         let dir = tempfile::tempdir().unwrap();
-        std::fs::write(dir.path().join("Cargo.toml"), "this is not valid toml {{{}}}").unwrap();
+        std::fs::write(
+            dir.path().join("Cargo.toml"),
+            "this is not valid toml {{{}}}",
+        )
+        .unwrap();
         let scan = make_scan_result(vec![]);
         assert!(parse_cargo_toml(dir.path(), &scan).is_none());
     }
@@ -1060,7 +1407,11 @@ mod tests {
             let words = split_into_words(&c.value);
             words == vec!["data", "pipeline"]
         });
-        assert!(found, "should find data-pipeline cluster, got: {:?}", candidates);
+        assert!(
+            found,
+            "should find data-pipeline cluster, got: {:?}",
+            candidates
+        );
     }
 
     #[test]
@@ -1078,16 +1429,17 @@ mod tests {
         // "async" alone should be filtered
         for c in &candidates {
             let lower = c.value.to_lowercase();
-            assert!(!LANGUAGE_KEYWORDS.contains(&lower.as_str()) || c.value.contains('-') || c.value.contains('_'));
+            assert!(
+                !LANGUAGE_KEYWORDS.contains(&lower.as_str())
+                    || c.value.contains('-')
+                    || c.value.contains('_')
+            );
         }
     }
 
     #[test]
     fn test_frequency_filters_short_tokens() {
-        let scan = make_scan_result(vec![
-            ("a.txt", "ab cd ef gh"),
-            ("b.txt", "ab cd ef gh"),
-        ]);
+        let scan = make_scan_result(vec![("a.txt", "ab cd ef gh"), ("b.txt", "ab cd ef gh")]);
 
         let covered = HashSet::new();
         let config_vals = HashSet::new();
@@ -1109,7 +1461,9 @@ mod tests {
         let config_vals = HashSet::new();
         let candidates = detect_frequency(&scan, &covered, &config_vals, "");
 
-        let has_widget = candidates.iter().any(|c| c.value.to_lowercase().contains("widget"));
+        let has_widget = candidates
+            .iter()
+            .any(|c| c.value.to_lowercase().contains("widget"));
         assert!(!has_widget, "covered values should be skipped");
     }
 
@@ -1295,7 +1649,10 @@ mod tests {
         let result = auto_detect(&project_dir, &scan);
 
         assert!(!result.candidates.is_empty());
-        let project_name = result.candidates.iter().find(|c| c.suggested_name == "project_name");
+        let project_name = result
+            .candidates
+            .iter()
+            .find(|c| c.suggested_name == "project_name");
         assert!(project_name.is_some(), "should detect project_name");
         assert_eq!(project_name.unwrap().value, "my-widget");
     }
diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index 96e7ecb..8fb9790 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -15,6 +15,7 @@ use inquire::{Confirm, Select, Text};
 use crate::config::schema::DEFAULT_TEMPLATES_SUFFIX;
 use crate::error::{DicecutError, Result};
 
+use self::auto_detect::{auto_detect, DetectedCandidate};
 use self::conditional::{detect_conditional_files, patterns_for_variable, DetectedConditional};
 use self::config_gen::{
     generate_config_toml, ComputedVariable, ConditionalEntry, ConfigGenOptions, PromptedVariable,
@@ -23,7 +24,6 @@ use self::exclude::{detect_copy_without_render, detect_excludes};
 use self::replace::{
     apply_path_replacements, apply_replacements, build_replacement_rules, ReplacementRule,
 };
-use self::auto_detect::{auto_detect, DetectedCandidate};
 use self::scan::{scan_project, ScannedFile};
 use self::variants::{
     computed_expression, detect_separator, generate_variants, is_canonical_variant, CaseVariant,
@@ -781,8 +781,7 @@ fn confirm_auto_detected_interactive(
             if candidate.total_occurrences > 0 {
                 eprintln!(
                     "    {} occurrences across {} files",
-                    candidate.total_occurrences,
-                    candidate.file_count
+                    candidate.total_occurrences, candidate.file_count
                 );
             }
 
diff --git a/src/render/context.rs b/src/render/context.rs
index 4680c64..f530022 100644
--- a/src/render/context.rs
+++ b/src/render/context.rs
@@ -27,18 +27,12 @@ pub fn tera_with_filters() -> Tera {
 ///
 /// Splits on the separator (default `-`), lowercases the first word,
 /// title-cases the rest, and joins them.
-fn camelcase_filter(
-    value: &Value,
-    args: &HashMap<String, Value>,
-) -> Result<Value, tera::Error> {
+fn camelcase_filter(value: &Value, args: &HashMap<String, Value>) -> Result<Value, tera::Error> {
     let s = value
         .as_str()
         .ok_or_else(|| tera::Error::msg("camelcase filter requires a string value"))?;
 
-    let sep = args
-        .get("sep")
-        .and_then(|v| v.as_str())
-        .unwrap_or("-");
+    let sep = args.get("sep").and_then(|v| v.as_str()).unwrap_or("-");
 
     let words: Vec<&str> = s.split(sep).collect();
     if words.is_empty() {
diff --git a/tests/integration.rs b/tests/integration.rs
index 0310b92..243935c 100644
--- a/tests/integration.rs
+++ b/tests/integration.rs
@@ -965,10 +965,7 @@ fn test_extract_auto_yes() {
     let plan = plan_extraction(&options).unwrap();
     execute_extraction(&plan, false).unwrap();
 
-    let project_var = plan
-        .variables
-        .iter()
-        .find(|v| v.name == "project_name");
+    let project_var = plan.variables.iter().find(|v| v.name == "project_name");
     assert!(
         project_var.is_some(),
         "should auto-detect project_name, got vars: {:?}",
@@ -1050,9 +1047,10 @@ fn test_extract_auto_frequency_fallback() {
 
     let plan = plan_extraction(&options).unwrap();
 
-    let has_relevant_var = plan.variables.iter().any(|v| {
-        v.value.contains("cool") || v.name.contains("cool")
-    });
+    let has_relevant_var = plan
+        .variables
+        .iter()
+        .any(|v| v.value.contains("cool") || v.name.contains("cool"));
     assert!(
         has_relevant_var,
         "should detect cool-widget related variable, got: {:?}",

From cc93d6164c177540b1624de47ccfa222258f2044 Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Fri, 27 Feb 2026 21:27:06 -0500
Subject: [PATCH 08/29] fix(extract): resolve merge chains in cluster
 deduplication

---
 src/extract/auto_detect.rs | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/extract/auto_detect.rs b/src/extract/auto_detect.rs
index b0306d9..115e8c8 100644
--- a/src/extract/auto_detect.rs
+++ b/src/extract/auto_detect.rs
@@ -678,7 +678,19 @@ fn merge_similar_clusters(clusters: &mut HashMap<String, TokenCluster>) {
         }
     }
 
-    for (from, to) in &merge_map {
+    // Resolve merge chains: if A→B and B→C, then A→C
+    let resolved: HashMap<String, String> = merge_map
+        .keys()
+        .map(|k| {
+            let mut target = merge_map[k].clone();
+            while let Some(next) = merge_map.get(&target) {
+                target = next.clone();
+            }
+            (k.clone(), target)
+        })
+        .collect();
+
+    for (from, to) in &resolved {
         if let Some(removed) = clusters.remove(from) {
             if let Some(target) = clusters.get_mut(to) {
                 for lit in removed.literals {

From bdd420bf24d2c215e80512f076d8fc7ac3ba97df Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Fri, 27 Feb 2026 21:31:37 -0500
Subject: [PATCH 09/29] refactor(extract): use enum for PlannedExtractFile
 content

---
 src/commands/extract.rs |  10 ++--
 src/extract/mod.rs      | 102 +++++++++++++++++++++++++---------------
 2 files changed, 71 insertions(+), 41 deletions(-)

diff --git a/src/commands/extract.rs b/src/commands/extract.rs
index faf0018..93738fe 100644
--- a/src/commands/extract.rs
+++ b/src/commands/extract.rs
@@ -63,15 +63,19 @@ fn print_dry_run(plan: &diecut::extract::ExtractionPlan) {
         style(plan.output_dir.display()).cyan()
     );
 
-    let templated: Vec<_> = plan.files.iter().filter(|f| f.has_replacements).collect();
-    let copied: Vec<_> = plan.files.iter().filter(|f| !f.has_replacements).collect();
+    let templated: Vec<_> = plan.files.iter().filter(|f| f.has_replacements()).collect();
+    let copied: Vec<_> = plan
+        .files
+        .iter()
+        .filter(|f| !f.has_replacements())
+        .collect();
 
     eprintln!("\nTemplated files ({}):", templated.len());
     for file in &templated {
         eprintln!(
             "  {} ({} replacements)",
             file.template_path.display(),
-            file.replacement_count
+            file.replacement_count()
         );
     }
 
diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index 8fb9790..66bc88b 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -39,21 +39,47 @@ pub struct ExtractVariable {
     pub occurrence_counts: Vec<(String, usize, usize)>,
 }
 
+/// The content of an extracted template file.
+#[derive(Debug, Clone)]
+pub enum ExtractedContent {
+    /// A text file with optional template replacements applied.
+    Text {
+        content: String,
+        replacement_count: usize,
+    },
+    /// A binary file copied verbatim.
+    Binary(Vec<u8>),
+}
+
 /// A file that will be part of the extracted template.
 #[derive(Debug, Clone)]
 pub struct PlannedExtractFile {
     /// Relative path in the output template (may contain template expressions).
     pub template_path: PathBuf,
-    /// Content (with replacements applied), or None for binary files.
-    pub content: Option<String>,
-    /// Original bytes for binary files.
-    pub binary_content: Option<Vec<u8>>,
+    /// The file content (text with replacements, or binary bytes).
+    pub content: ExtractedContent,
+}
+
+impl PlannedExtractFile {
     /// Whether this file had template replacements applied.
-    pub has_replacements: bool,
-    /// Number of replacements made.
-    pub replacement_count: usize,
+    pub fn has_replacements(&self) -> bool {
+        matches!(&self.content, ExtractedContent::Text { replacement_count, .. } if *replacement_count > 0)
+    }
+
     /// Whether this is a binary file.
-    pub is_binary: bool,
+    pub fn is_binary(&self) -> bool {
+        matches!(&self.content, ExtractedContent::Binary(_))
+    }
+
+    /// Number of replacements made (0 for binary files).
+    pub fn replacement_count(&self) -> usize {
+        match &self.content {
+            ExtractedContent::Text {
+                replacement_count, ..
+            } => *replacement_count,
+            ExtractedContent::Binary(_) => 0,
+        }
+    }
 }
 
 /// The full extraction plan, ready to be executed or reviewed.
@@ -273,18 +299,13 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
                 })?;
             planned_files.push(PlannedExtractFile {
                 template_path,
-                content: None,
-                binary_content: Some(binary_content),
-                has_replacements: false,
-                replacement_count: 0,
-                is_binary: true,
+                content: ExtractedContent::Binary(binary_content),
             });
         } else if let Some(ref content) = file.content {
             let (replaced, count) = apply_replacements(content, &rules);
-            let has_replacements = count > 0;
 
             // Add .die suffix if file has template replacements
-            let final_path = if has_replacements {
+            let final_path = if count > 0 {
                 let mut p = template_path.as_os_str().to_string_lossy().to_string();
                 p.push_str(DEFAULT_TEMPLATES_SUFFIX);
                 PathBuf::from(p)
@@ -294,11 +315,10 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
 
             planned_files.push(PlannedExtractFile {
                 template_path: final_path,
-                content: Some(replaced),
-                binary_content: None,
-                has_replacements,
-                replacement_count: count,
-                is_binary: false,
+                content: ExtractedContent::Text {
+                    content: replaced,
+                    replacement_count: count,
+                },
             });
         }
     }
@@ -413,22 +433,28 @@ pub fn execute_extraction(plan: &ExtractionPlan, _in_place: bool) -> Result<()>
             })?;
         }
 
-        if let Some(ref content) = file.content {
-            std::fs::write(&dest, content).map_err(|e| DicecutError::Io {
-                context: format!("writing file {}", dest.display()),
-                source: e,
-            })?;
-            if file.has_replacements {
-                rendered_count += 1;
-            } else {
+        match &file.content {
+            ExtractedContent::Text {
+                content,
+                replacement_count,
+            } => {
+                std::fs::write(&dest, content).map_err(|e| DicecutError::Io {
+                    context: format!("writing file {}", dest.display()),
+                    source: e,
+                })?;
+                if *replacement_count > 0 {
+                    rendered_count += 1;
+                } else {
+                    copied_count += 1;
+                }
+            }
+            ExtractedContent::Binary(bytes) => {
+                std::fs::write(&dest, bytes).map_err(|e| DicecutError::Io {
+                    context: format!("writing binary file {}", dest.display()),
+                    source: e,
+                })?;
                 copied_count += 1;
             }
-        } else if let Some(ref bytes) = file.binary_content {
-            std::fs::write(&dest, bytes).map_err(|e| DicecutError::Io {
-                context: format!("writing binary file {}", dest.display()),
-                source: e,
-            })?;
-            copied_count += 1;
         }
     }
 
@@ -838,9 +864,9 @@ fn confirm_auto_detected_interactive(
 }
 
 fn confirm_files_interactive(files: &[PlannedExtractFile]) -> Result<()> {
-    let templated: Vec<_> = files.iter().filter(|f| f.has_replacements).collect();
-    let copied: Vec<_> = files.iter().filter(|f| !f.has_replacements).collect();
-    let binary_count = files.iter().filter(|f| f.is_binary).count();
+    let templated: Vec<_> = files.iter().filter(|f| f.has_replacements()).collect();
+    let copied: Vec<_> = files.iter().filter(|f| !f.has_replacements()).collect();
+    let binary_count = files.iter().filter(|f| f.is_binary()).count();
 
     eprintln!(
         "\n{} Files to template {}",
@@ -855,7 +881,7 @@ fn confirm_files_interactive(files: &[PlannedExtractFile]) -> Result<()> {
         eprintln!(
             "    {:<40} {} replacements",
             file.template_path.display(),
-            file.replacement_count
+            file.replacement_count()
         );
     }
 

From 917ea7706c1cd4dacb89f69ee2779458667d2910 Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Fri, 27 Feb 2026 21:32:23 -0500
Subject: [PATCH 10/29] fix(extract): replace partial_cmp().unwrap() with
 total_cmp() for NaN safety

---
 src/extract/auto_detect.rs | 4 ++--
 src/extract/mod.rs         | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/extract/auto_detect.rs b/src/extract/auto_detect.rs
index 115e8c8..3b38c34 100644
--- a/src/extract/auto_detect.rs
+++ b/src/extract/auto_detect.rs
@@ -97,7 +97,7 @@ pub fn auto_detect(project_dir: &Path, scan_result: &ScanResult) -> AutoDetectRe
     deduplicate_candidates(&mut candidates);
 
     // Sort by confidence descending
-    candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
+    candidates.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
 
     AutoDetectResult { candidates }
 }
@@ -614,7 +614,7 @@ fn detect_frequency(
     }
 
     // Sort by confidence, take top 5
-    freq_candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
+    freq_candidates.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
     freq_candidates.truncate(5);
 
     freq_candidates
diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index 66bc88b..150f08c 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -734,7 +734,7 @@ fn resolve_candidates_yes(
         }
 
         // For name collisions, pick highest confidence
-        group.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
+        group.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
         let winner = group[0];
 
         eprintln!(
@@ -791,7 +791,7 @@ fn confirm_auto_detected_interactive(
         }
 
         // Sort by confidence descending
-        group.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
+        group.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
 
         if group.len() == 1 {
             // Single candidate — simple confirm

From a35eb2fd58ce5fb7142a729f14b513c6e53c3bc9 Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Fri, 27 Feb 2026 21:35:49 -0500
Subject: [PATCH 11/29] fix(extract): use dedicated error variant for malformed
 --var arguments

---
 src/commands/extract.rs | 2 +-
 src/error.rs            | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/commands/extract.rs b/src/commands/extract.rs
index 93738fe..04c23b3 100644
--- a/src/commands/extract.rs
+++ b/src/commands/extract.rs
@@ -45,7 +45,7 @@ fn parse_vars(vars: &[String]) -> diecut::error::Result<Vec<(String, String)>> {
     for var in vars {
         let (key, value) = var
             .split_once('=')
-            .ok_or_else(|| DicecutError::ExtractNoVariables)?;
+            .ok_or_else(|| DicecutError::ExtractInvalidVar { input: var.clone() })?;
         parsed.push((key.trim().to_string(), value.trim().to_string()));
     }
 
diff --git a/src/error.rs b/src/error.rs
index a612908..3ad5597 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -128,6 +128,10 @@ pub enum DicecutError {
     ))]
     ExtractNoVariables,
 
+    #[error("Invalid --var argument: {input} (expected key=value)")]
+    #[diagnostic(help("Use --var key=value format, e.g., --var project_name=my-app"))]
+    ExtractInvalidVar { input: String },
+
     #[error("Output directory already exists: {path}")]
     #[diagnostic(help(
         "Choose a different output path with -o, or remove the existing directory"

From 9bb5be9425b01bb8b0d3949dc67470bf1aaee9d9 Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Fri, 27 Feb 2026 21:35:46 -0500
Subject: [PATCH 12/29] fix(extract): disable git terminal prompts during
 auto-detection

---
 src/extract/auto_detect.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/extract/auto_detect.rs b/src/extract/auto_detect.rs
index 3b38c34..3224e46 100644
--- a/src/extract/auto_detect.rs
+++ b/src/extract/auto_detect.rs
@@ -454,6 +454,7 @@ fn git_config_get(project_dir: &Path, key: &str) -> Option<String> {
         .arg("--get")
         .arg(key)
         .current_dir(project_dir)
+        .env("GIT_TERMINAL_PROMPT", "0")
         .output()
         .ok()?;
 

From b490f8c5dd79b6bdec43ef1bdbb1b42d7763840f Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Fri, 27 Feb 2026 21:35:48 -0500
Subject: [PATCH 13/29] refactor(extract): consolidate duplicate
 count_occurrences functions

---
 src/extract/auto_detect.rs |  2 +-
 src/extract/mod.rs         | 34 +++-------------------------------
 2 files changed, 4 insertions(+), 32 deletions(-)

diff --git a/src/extract/auto_detect.rs b/src/extract/auto_detect.rs
index 3224e46..dfec675 100644
--- a/src/extract/auto_detect.rs
+++ b/src/extract/auto_detect.rs
@@ -1130,7 +1130,7 @@ const STOPWORDS: &[&str] = &[
 
 // ── Helpers ──────────────────────────────────────────────────────────────
 
-fn count_occurrences(value: &str, scan_result: &ScanResult) -> (usize, usize) {
+pub fn count_occurrences(value: &str, scan_result: &ScanResult) -> (usize, usize) {
     let mut file_count = 0;
     let mut total = 0;
 
diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index 150f08c..4388207 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -15,7 +15,7 @@ use inquire::{Confirm, Select, Text};
 use crate::config::schema::DEFAULT_TEMPLATES_SUFFIX;
 use crate::error::{DicecutError, Result};
 
-use self::auto_detect::{auto_detect, DetectedCandidate};
+use self::auto_detect::{auto_detect, count_occurrences, DetectedCandidate};
 use self::conditional::{detect_conditional_files, patterns_for_variable, DetectedConditional};
 use self::config_gen::{
     generate_config_toml, ComputedVariable, ConditionalEntry, ConfigGenOptions, PromptedVariable,
@@ -24,7 +24,7 @@ use self::exclude::{detect_copy_without_render, detect_excludes};
 use self::replace::{
     apply_path_replacements, apply_replacements, build_replacement_rules, ReplacementRule,
 };
-use self::scan::{scan_project, ScannedFile};
+use self::scan::scan_project;
 use self::variants::{
     computed_expression, detect_separator, generate_variants, is_canonical_variant, CaseVariant,
 };
@@ -207,8 +207,7 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
 
         let mut occurrence_counts = Vec::new();
         for variant in &all_variants {
-            let (file_count, total_hits) =
-                count_variant_occurrences(&variant.literal, &scan_result.files);
+            let (file_count, total_hits) = count_occurrences(&variant.literal, &scan_result);
             occurrence_counts.push((variant.name.to_string(), file_count, total_hits));
         }
 
@@ -517,33 +516,6 @@ pub fn execute_extraction(plan: &ExtractionPlan, _in_place: bool) -> Result<()>
 
 // ── Interactive helpers ──────────────────────────────────────────────────
 
-fn count_variant_occurrences(literal: &str, files: &[ScannedFile]) -> (usize, usize) {
-    let mut file_count = 0;
-    let mut total_hits = 0;
-
-    for file in files {
-        if let Some(ref content) = file.content {
-            let hits = content.matches(literal).count();
-            if hits > 0 {
-                file_count += 1;
-                total_hits += hits;
-            }
-        }
-    }
-
-    // Also check path components
-    for file in files {
-        let path_str = file.relative_path.to_string_lossy();
-        let hits = path_str.matches(literal).count();
-        if hits > 0 {
-            // Don't double-count file_count if already counted from content
-            total_hits += hits;
-        }
-    }
-
-    (file_count, total_hits)
-}
-
 fn confirm_variants_interactive(variables: Vec<ExtractVariable>) -> Result<Vec<ExtractVariable>> {
     let mut confirmed = Vec::new();
 

From c163f04a5f1f1e9437832d82c9cc09785ede1610 Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Fri, 27 Feb 2026 21:35:58 -0500
Subject: [PATCH 14/29] perf(extract): use LazyLock for Regex compilation

Replace Regex::new() calls inside function bodies with
std::sync::LazyLock statics so regexes are compiled once
instead of on every invocation. Bumps MSRV to 1.80.
---
 Cargo.toml                 |  2 +-
 src/extract/auto_detect.rs | 19 ++++++++++++-------
 src/extract/variants.rs    |  8 ++++++--
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 64ef434..ecca649 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,7 +5,7 @@ name = "diecut"
 version = "0.3.4"
 edition = "2021"
 license = "MIT"
-rust-version = "1.75"
+rust-version = "1.80"
 description = "A single binary project template generator"
 
 [lib]
diff --git a/src/extract/auto_detect.rs b/src/extract/auto_detect.rs
index dfec675..a9a2612 100644
--- a/src/extract/auto_detect.rs
+++ b/src/extract/auto_detect.rs
@@ -1,9 +1,19 @@
 use std::collections::{HashMap, HashSet};
 use std::path::Path;
 use std::process::Command;
+use std::sync::LazyLock;
 
 use regex_lite::Regex;
 
+static GO_MOD_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^module\s+(\S+)").unwrap());
+
+static TOKEN_RE: LazyLock<Regex> = LazyLock::new(|| {
+    Regex::new(
+        r"[a-zA-Z][a-zA-Z0-9]*(?:[-_.][a-zA-Z0-9]+)+|[A-Z][a-z]+(?:[A-Z][a-z]+)+|[a-z]+(?:[A-Z][a-z]+)+|[A-Z]{2,}(?:_[A-Z]{2,})+",
+    )
+    .unwrap()
+});
+
 use super::scan::ScanResult;
 use super::variants::split_into_words;
 
@@ -359,8 +369,7 @@ fn parse_go_mod(project_dir: &Path, scan_result: &ScanResult) -> Option<Vec<Dete
     let path = project_dir.join("go.mod");
     let content = std::fs::read_to_string(&path).ok()?;
 
-    let re = Regex::new(r"^module\s+(\S+)").unwrap();
-    let module_path = re.captures(&content)?.get(1)?.as_str();
+    let module_path = GO_MOD_RE.captures(&content)?.get(1)?.as_str();
 
     let segments: Vec<&str> = module_path.split('/').collect();
 
@@ -501,16 +510,12 @@ fn detect_frequency(
     dir_name: &str,
 ) -> Vec<DetectedCandidate> {
     // Tokenize all text file content
-    let token_re = Regex::new(
-        r"[a-zA-Z][a-zA-Z0-9]*(?:[-_.][a-zA-Z0-9]+)+|[A-Z][a-z]+(?:[A-Z][a-z]+)+|[a-z]+(?:[A-Z][a-z]+)+|[A-Z]{2,}(?:_[A-Z]{2,})+"
-    ).unwrap();
-
     let mut token_file_map: HashMap<String, HashSet<usize>> = HashMap::new();
     let mut token_counts: HashMap<String, usize> = HashMap::new();
 
     for (file_idx, file) in scan_result.files.iter().enumerate() {
         if let Some(ref content) = file.content {
-            for mat in token_re.find_iter(content) {
+            for mat in TOKEN_RE.find_iter(content) {
                 let token = mat.as_str().to_string();
                 token_file_map
                     .entry(token.clone())
diff --git a/src/extract/variants.rs b/src/extract/variants.rs
index 8458b29..525b475 100644
--- a/src/extract/variants.rs
+++ b/src/extract/variants.rs
@@ -1,5 +1,10 @@
+use std::sync::LazyLock;
+
 use regex_lite::Regex;
 
+static CAMEL_SPLIT_RE: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r"[A-Z][a-z]*|[a-z]+|[0-9]+").unwrap());
+
 /// A case variant of a variable value, with its literal text and Tera expression.
 #[derive(Debug, Clone, PartialEq)]
 pub struct CaseVariant {
@@ -26,8 +31,7 @@ pub fn split_into_words(value: &str) -> Vec<String> {
     }
 
     // camelCase / PascalCase splitting
-    let re = Regex::new(r"[A-Z][a-z]*|[a-z]+|[0-9]+").unwrap();
-    let words: Vec<String> = re
+    let words: Vec<String> = CAMEL_SPLIT_RE
         .find_iter(value)
         .map(|m| m.as_str().to_lowercase())
         .collect();

From 761fcd582d1cfc0be347970e4bf00e2338687e2b Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Fri, 27 Feb 2026 22:49:31 -0500
Subject: [PATCH 15/29] fix(extract): address code audit findings

- Guard against infinite loop in merge-chain resolution by tracking
  visited nodes when walking the merge map
- Count path-only occurrences in file_count so confidence scoring
  doesn't miss values that appear only in file paths
- Rewrite apply_replacements as a single-pass algorithm that collects
  all match positions first, preventing later rules from corrupting
  Tera expressions inserted by earlier rules
- Propagate IO errors (e.g. permission denied) from scan instead of
  silently dropping unreadable files; only downgrade to binary on
  InvalidData (UTF-8 decode failure)
---
 src/extract/auto_detect.rs |  18 +++++-
 src/extract/replace.rs     | 121 ++++++++++++++++++-------------------
 src/extract/scan.rs        |  17 ++++--
 3 files changed, 88 insertions(+), 68 deletions(-)

diff --git a/src/extract/auto_detect.rs b/src/extract/auto_detect.rs
index a9a2612..0777fb9 100644
--- a/src/extract/auto_detect.rs
+++ b/src/extract/auto_detect.rs
@@ -685,11 +685,17 @@ fn merge_similar_clusters(clusters: &mut HashMap<String, TokenCluster>) {
     }
 
     // Resolve merge chains: if A→B and B→C, then A→C
+    // Use a visited set to guard against cycles.
     let resolved: HashMap<String, String> = merge_map
         .keys()
         .map(|k| {
             let mut target = merge_map[k].clone();
+            let mut seen = HashSet::new();
+            seen.insert(k.clone());
             while let Some(next) = merge_map.get(&target) {
+                if !seen.insert(next.clone()) {
+                    break;
+                }
                 target = next.clone();
             }
             (k.clone(), target)
@@ -1140,17 +1146,25 @@ pub fn count_occurrences(value: &str, scan_result: &ScanResult) -> (usize, usize
     let mut total = 0;
 
     for file in &scan_result.files {
+        let mut counted_file = false;
+
         if let Some(ref content) = file.content {
             let hits = content.matches(value).count();
             if hits > 0 {
                 file_count += 1;
+                counted_file = true;
                 total += hits;
             }
         }
-        // Also check path
+
         let path_str = file.relative_path.to_string_lossy();
         let path_hits = path_str.matches(value).count();
-        total += path_hits;
+        if path_hits > 0 {
+            total += path_hits;
+            if !counted_file {
+                file_count += 1;
+            }
+        }
     }
 
     (file_count, total)
diff --git a/src/extract/replace.rs b/src/extract/replace.rs
index 42914ec..95e36ad 100644
--- a/src/extract/replace.rs
+++ b/src/extract/replace.rs
@@ -27,87 +27,86 @@ fn is_word_char(c: char) -> bool {
     c.is_alphanumeric() || c == '_' || c == '-'
 }
 
-/// Replace `literal` in `text` only at word boundaries.
+/// Apply replacement rules to a string, longest-match-first, in a single pass.
 ///
-/// A match is at a word boundary when the characters immediately before and
-/// after the match are not word-like (alphanumeric, `_`, or `-`), or the
-/// match is at the start/end of the string.
+/// All match positions are identified first against the original text, then
+/// applied in one pass so that replacement output is never re-scanned by later
+/// rules. Uses word-boundary-aware matching to prevent replacing substrings
+/// inside longer words (e.g., "app" inside "application").
 ///
-/// Multi-word literals (containing a separator like `-`, `_`, or `.`) always
-/// use boundary-aware replacement since false positives are unlikely but still
-/// possible in paths and compound tokens.
-fn replace_whole_word(text: &str, literal: &str, replacement: &str) -> (String, usize) {
-    let literal_len = literal.len();
-    let text_len = text.len();
-
-    if literal_len == 0 || text_len < literal_len {
-        return (text.to_string(), 0);
-    }
-
-    let mut result = String::with_capacity(text.len());
-    let mut count = 0;
-    let mut start = 0;
-
-    while start <= text_len - literal_len {
-        match text[start..].find(literal) {
-            Some(pos) => {
-                let match_start = start + pos;
-                let match_end = match_start + literal_len;
-
-                let ok_before = match_start == 0
-                    || !is_word_char(text[..match_start].chars().next_back().unwrap());
-                let ok_after = match_end == text_len
-                    || !is_word_char(text[match_end..].chars().next().unwrap());
-
-                if ok_before && ok_after {
-                    result.push_str(&text[start..match_start]);
-                    result.push_str(replacement);
-                    count += 1;
-                    start = match_end;
-                } else {
-                    // Not a word boundary — advance past the start of this match
+/// Returns the modified string and the number of replacements made.
+pub fn apply_replacements(content: &str, rules: &[ReplacementRule]) -> (String, usize) {
+    if rules.is_empty() {
+        return (content.to_string(), 0);
+    }
+
+    // Collect all (start, end, replacement_index) matches across all rules.
+    let mut matches: Vec<(usize, usize, usize)> = Vec::new();
+
+    for (rule_idx, rule) in rules.iter().enumerate() {
+        if rule.literal.is_empty() {
+            continue;
+        }
+        let literal = &rule.literal;
+        let literal_len = literal.len();
+        let text_len = content.len();
+
+        if text_len < literal_len {
+            continue;
+        }
+
+        let mut start = 0;
+        while start <= text_len - literal_len {
+            match content[start..].find(literal) {
+                Some(pos) => {
+                    let match_start = start + pos;
+                    let match_end = match_start + literal_len;
+
+                    let ok_before = match_start == 0
+                        || !is_word_char(content[..match_start].chars().next_back().unwrap());
+                    let ok_after = match_end == text_len
+                        || !is_word_char(content[match_end..].chars().next().unwrap());
+
+                    if ok_before && ok_after {
+                        matches.push((match_start, match_end, rule_idx));
+                    }
+
                     let next = match_start
-                        + text[match_start..]
+                        + content[match_start..]
                             .char_indices()
                             .nth(1)
                             .map(|(i, _)| i)
                             .unwrap_or(1);
-                    result.push_str(&text[start..next]);
                     start = next;
                 }
+                None => break,
             }
-            None => break,
         }
     }
 
-    result.push_str(&text[start..]);
-    (result, count)
-}
-
-/// Apply replacement rules to a string, longest-match-first.
-///
-/// Uses word-boundary-aware matching to prevent replacing substrings
-/// inside longer words (e.g., "app" inside "application").
-///
-/// Returns the modified string and the number of replacements made.
-pub fn apply_replacements(content: &str, rules: &[ReplacementRule]) -> (String, usize) {
-    if rules.is_empty() {
+    if matches.is_empty() {
         return (content.to_string(), 0);
     }
 
-    let mut result = content.to_string();
+    // Sort by start position; on tie, prefer the longer match (lower rule index
+    // already means longer literal due to build_replacement_rules sorting).
+    matches.sort_by(|a, b| a.0.cmp(&b.0).then(b.1.cmp(&a.1)));
+
+    // Greedily select non-overlapping matches.
+    let mut result = String::with_capacity(content.len());
     let mut total_count = 0;
+    let mut cursor = 0;
 
-    for rule in rules {
-        if rule.literal.is_empty() {
-            continue;
-        }
-        let (replaced, count) = replace_whole_word(&result, &rule.literal, &rule.replacement);
-        if count > 0 {
-            result = replaced;
-            total_count += count;
+    for (m_start, m_end, rule_idx) in &matches {
+        if *m_start < cursor {
+            continue; // overlaps with a previously accepted match
         }
+        result.push_str(&content[cursor..*m_start]);
+        result.push_str(&rules[*rule_idx].replacement);
+        total_count += 1;
+        cursor = *m_end;
     }
+    result.push_str(&content[cursor..]);
 
     (result, total_count)
 }
diff --git a/src/extract/scan.rs b/src/extract/scan.rs
index 278fd75..bd5bb6f 100644
--- a/src/extract/scan.rs
+++ b/src/extract/scan.rs
@@ -64,13 +64,20 @@ pub fn scan_project(project_dir: &Path, excludes: &[String]) -> crate::error::Re
         }
 
         let absolute_path = entry.path().to_path_buf();
-        let is_binary = is_binary_file(&absolute_path);
 
-        let content = if is_binary {
-            None
+        let (is_binary, content) = if is_binary_file(&absolute_path) {
+            (true, None)
         } else {
-            // If we can't read as UTF-8, treat as binary
-            std::fs::read_to_string(&absolute_path).ok()
+            match std::fs::read_to_string(&absolute_path) {
+                Ok(s) => (false, Some(s)),
+                Err(e) if e.kind() == std::io::ErrorKind::InvalidData => (true, None),
+                Err(e) => {
+                    return Err(crate::error::DicecutError::Io {
+                        context: format!("reading file {}", absolute_path.display()),
+                        source: e,
+                    });
+                }
+            }
         };
 
         files.push(ScannedFile {

From fb22906355eda9bb9b680438fbb6d24682630409 Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Sat, 28 Feb 2026 11:07:57 -0500
Subject: [PATCH 16/29] fix(extract): handle nested excludes and symlinks to
 directories

detect_excludes only checked if exclude patterns existed at the project
root, missing patterns like node_modules at deeper levels (e.g.
docs/node_modules/). Always include all DEFAULT_EXCLUDES since
should_exclude already handles nested matching via path components.

Also skip symlinks that resolve to directories during scan. pnpm's
node_modules/.pnpm uses symlinks to directories, and walkdir reports
these as non-directory entries, causing read_to_string to fail with
"Is a directory".
---
 src/extract/exclude.rs | 26 +++++++-------------------
 src/extract/scan.rs    | 30 +++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/src/extract/exclude.rs b/src/extract/exclude.rs
index 8c4c082..8f3254b 100644
--- a/src/extract/exclude.rs
+++ b/src/extract/exclude.rs
@@ -62,22 +62,11 @@ const DEFAULT_COPY_WITHOUT_RENDER: &[&str] = &[
 ];
 
 /// Detect which default exclude patterns actually exist in the project.
-pub fn detect_excludes(project_dir: &Path) -> Vec<String> {
-    let mut found = Vec::new();
-
-    for pattern in DEFAULT_EXCLUDES {
-        let clean = pattern.trim_end_matches('/');
-        // Skip glob patterns — they're always included
-        if clean.contains('*') {
-            found.push(pattern.to_string());
-            continue;
-        }
-        if project_dir.join(clean).exists() {
-            found.push(pattern.to_string());
-        }
-    }
-
-    found
+///
+/// All DEFAULT_EXCLUDES are always included because patterns like `node_modules`
+/// can appear at any depth (e.g. `docs/node_modules/`), not just the project root.
+pub fn detect_excludes(_project_dir: &Path) -> Vec<String> {
+    DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect()
 }
 
 /// Detect which copy-without-render patterns are relevant based on files present.
@@ -191,14 +180,13 @@ mod tests {
     #[test]
     fn test_detect_excludes() {
         let dir = tempfile::tempdir().unwrap();
-        std::fs::create_dir(dir.path().join(".git")).unwrap();
-        std::fs::write(dir.path().join(".DS_Store"), "").unwrap();
 
         let found = detect_excludes(dir.path());
+        // All DEFAULT_EXCLUDES are always included regardless of what exists on disk
         assert!(found.iter().any(|e| e.contains(".git")));
         assert!(found.iter().any(|e| e == ".DS_Store"));
-        // Glob patterns should always be included
         assert!(found.iter().any(|e| e == "*.pyc"));
+        assert!(found.iter().any(|e| e.contains("node_modules")));
     }
 
     #[test]
diff --git a/src/extract/scan.rs b/src/extract/scan.rs
index bd5bb6f..544aa87 100644
--- a/src/extract/scan.rs
+++ b/src/extract/scan.rs
@@ -47,10 +47,14 @@ pub fn scan_project(project_dir: &Path, excludes: &[String]) -> crate::error::Re
                 .unwrap_or_else(|| std::io::Error::other("walkdir error")),
         })?;
 
-        // Skip directories themselves (we only care about files)
+        // Skip directories (including symlinks to directories, e.g. pnpm's
+        // node_modules/.pnpm uses symlinks that point to directories).
         if entry.file_type().is_dir() {
             continue;
         }
+        if entry.path_is_symlink() && entry.path().is_dir() {
+            continue;
+        }
 
         let relative_path = entry
             .path()
@@ -124,6 +128,30 @@ mod tests {
         assert_eq!(result.files[0].relative_path, PathBuf::from("README.md"));
     }
 
+    #[cfg(unix)]
+    #[test]
+    fn test_scan_project_skips_symlinks_to_directories() {
+        let dir = tempfile::tempdir().unwrap();
+        std::fs::write(dir.path().join("real.txt"), "hello").unwrap();
+
+        // Create a subdirectory and a symlink pointing to it
+        let subdir = dir.path().join("subdir");
+        std::fs::create_dir(&subdir).unwrap();
+        std::fs::write(subdir.join("nested.txt"), "nested").unwrap();
+        std::os::unix::fs::symlink(&subdir, dir.path().join("link-to-dir")).unwrap();
+
+        let result = scan_project(dir.path(), &[]).unwrap();
+        // Should find real.txt and subdir/nested.txt, but NOT choke on link-to-dir
+        let paths: Vec<String> = result
+            .files
+            .iter()
+            .map(|f| f.relative_path.to_string_lossy().to_string())
+            .collect();
+        assert!(paths.contains(&"real.txt".to_string()));
+        assert!(paths.contains(&"subdir/nested.txt".to_string()));
+        assert!(!paths.iter().any(|p| p.contains("link-to-dir")));
+    }
+
     #[test]
     fn test_scan_project_binary_detection() {
         let dir = tempfile::tempdir().unwrap();

From e28b004dbd49a2115972f4304e7219dd9fc7b619 Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Sat, 28 Feb 2026 11:25:15 -0500
Subject: [PATCH 17/29] fix(extract): exclude .worktrees/ from template
 extraction

Git worktrees are working copies, not part of the project source.
Without this, extract would template duplicate files from any
active worktrees in the project.
---
 src/extract/exclude.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/extract/exclude.rs b/src/extract/exclude.rs
index 8f3254b..b86752f 100644
--- a/src/extract/exclude.rs
+++ b/src/extract/exclude.rs
@@ -28,6 +28,7 @@ const DEFAULT_EXCLUDES: &[&str] = &[
     ".nuxt",
     ".output",
     ".turbo",
+    ".worktrees",
     ".diecut-answers.toml",
 ];
 

From 5cc96722a376903f7f0ebce80267da79b75b7909 Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Sat, 28 Feb 2026 12:56:35 -0500
Subject: [PATCH 18/29] feat(extract): stub content files instead of copying
 verbatim

Classify text files with 0 template replacements as boilerplate
(config, dotfiles, CI) or content (prose, source). Boilerplate is
copied in full; content files are stubbed to minimal placeholders
so templates preserve structure without project-specific prose.

Interactive confirmation now shows three categories: Templated,
Boilerplate, and Stubbed.
---
 src/extract/mod.rs  | 146 +++++++++++++++++++++++---------
 src/extract/stub.rs | 199 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 307 insertions(+), 38 deletions(-)
 create mode 100644 src/extract/stub.rs

diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index 4388207..9f0c8e4 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -4,6 +4,7 @@ pub mod config_gen;
 pub mod exclude;
 pub mod replace;
 pub mod scan;
+pub mod stub;
 pub mod variants;
 
 use std::collections::{BTreeMap, HashMap};
@@ -20,11 +21,12 @@ use self::conditional::{detect_conditional_files, patterns_for_variable, Detecte
 use self::config_gen::{
     generate_config_toml, ComputedVariable, ConditionalEntry, ConfigGenOptions, PromptedVariable,
 };
-use self::exclude::{detect_copy_without_render, detect_excludes};
+use self::exclude::{all_default_excludes, detect_copy_without_render, relevant_config_excludes};
 use self::replace::{
     apply_path_replacements, apply_replacements, build_replacement_rules, ReplacementRule,
 };
 use self::scan::scan_project;
+use self::stub::{classify_file, generate_stub, FileRole};
 use self::variants::{
     computed_expression, detect_separator, generate_variants, is_canonical_variant, CaseVariant,
 };
@@ -58,6 +60,8 @@ pub struct PlannedExtractFile {
     pub template_path: PathBuf,
     /// The file content (text with replacements, or binary bytes).
     pub content: ExtractedContent,
+    /// Whether this file was stubbed (content replaced with a minimal placeholder).
+    pub stubbed: bool,
 }
 
 impl PlannedExtractFile {
@@ -144,15 +148,15 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         });
     }
 
-    // Phase 1: Detect excludes
-    let mut excludes = detect_excludes(source_dir);
+    // Phase 1: All default excludes for scanning (safety — never walks into .git/, node_modules/, etc.)
+    let scan_excludes = all_default_excludes();
 
     // Phase 2: Scan project
     eprintln!(
         "\n{}",
         style(format!("Scanning {}...", source_dir.display())).bold()
     );
-    let scan_result = scan_project(source_dir, &excludes)?;
+    let scan_result = scan_project(source_dir, &scan_excludes)?;
     eprintln!(
         "  {} files found, {} excluded",
         scan_result.files.len(),
@@ -245,11 +249,6 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         confirm_variants_interactive(extract_variables)?
     };
 
-    // Phase 5: Interactive exclude confirmation
-    if !options.yes {
-        excludes = confirm_excludes_interactive(excludes)?;
-    }
-
     // Phase 6: Detect conditional files
     let detected_conditionals = if options.yes {
         vec![] // Batch mode: no conditional files
@@ -299,29 +298,64 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
             planned_files.push(PlannedExtractFile {
                 template_path,
                 content: ExtractedContent::Binary(binary_content),
+                stubbed: false,
             });
         } else if let Some(ref content) = file.content {
             let (replaced, count) = apply_replacements(content, &rules);
 
-            // Add .die suffix if file has template replacements
-            let final_path = if count > 0 {
+            if count > 0 {
+                // Has template replacements — keep content, add .die suffix
                 let mut p = template_path.as_os_str().to_string_lossy().to_string();
                 p.push_str(DEFAULT_TEMPLATES_SUFFIX);
-                PathBuf::from(p)
+                planned_files.push(PlannedExtractFile {
+                    template_path: PathBuf::from(p),
+                    content: ExtractedContent::Text {
+                        content: replaced,
+                        replacement_count: count,
+                    },
+                    stubbed: false,
+                });
             } else {
-                template_path
-            };
-
-            planned_files.push(PlannedExtractFile {
-                template_path: final_path,
-                content: ExtractedContent::Text {
-                    content: replaced,
-                    replacement_count: count,
-                },
-            });
+                // No replacements — classify as boilerplate or content
+                match classify_file(&file.relative_path) {
+                    FileRole::Boilerplate => {
+                        planned_files.push(PlannedExtractFile {
+                            template_path,
+                            content: ExtractedContent::Text {
+                                content: replaced,
+                                replacement_count: 0,
+                            },
+                            stubbed: false,
+                        });
+                    }
+                    FileRole::Content => {
+                        let stub = generate_stub(&file.relative_path);
+                        planned_files.push(PlannedExtractFile {
+                            template_path,
+                            content: ExtractedContent::Text {
+                                content: stub,
+                                replacement_count: 0,
+                            },
+                            stubbed: true,
+                        });
+                    }
+                }
+            }
         }
     }
 
+    // Phase 9.5: Compute config-appropriate excludes from planned template files
+    // Only patterns that match files actually in the template are worth writing to diecut.toml
+    let template_paths: Vec<PathBuf> = planned_files
+        .iter()
+        .map(|f| f.template_path.clone())
+        .collect();
+    let mut config_excludes = relevant_config_excludes(&template_paths);
+
+    if !options.yes {
+        config_excludes = confirm_excludes_interactive(config_excludes)?;
+    }
+
     // Phase 10: Interactive file confirmation
     if !options.yes {
         confirm_files_interactive(&planned_files)?;
@@ -390,7 +424,7 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
             .unwrap_or_else(|| "template".to_string()),
         prompted_variables: prompted_vars,
         computed_variables: computed_vars,
-        exclude_patterns: excludes.clone(),
+        exclude_patterns: config_excludes.clone(),
         copy_without_render: copy_without_render.clone(),
         conditional_entries: conditional_entries.clone(),
     });
@@ -401,7 +435,7 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         config_toml,
         variables: confirmed_variables,
         conditional_entries,
-        exclude_patterns: excludes,
+        exclude_patterns: config_excludes,
         copy_without_render,
     })
 }
@@ -420,6 +454,7 @@ pub fn execute_extraction(plan: &ExtractionPlan, _in_place: bool) -> Result<()>
     // Write template files
     let mut rendered_count = 0;
     let mut copied_count = 0;
+    let mut stubbed_count = 0;
 
     for file in &plan.files {
         let dest = template_dir.join(&file.template_path);
@@ -443,6 +478,8 @@ pub fn execute_extraction(plan: &ExtractionPlan, _in_place: bool) -> Result<()>
                 })?;
                 if *replacement_count > 0 {
                     rendered_count += 1;
+                } else if file.stubbed {
+                    stubbed_count += 1;
                 } else {
                     copied_count += 1;
                 }
@@ -500,8 +537,8 @@ pub fn execute_extraction(plan: &ExtractionPlan, _in_place: bool) -> Result<()>
         computed_count
     );
     eprintln!(
-        "  {} files templated, {} files copied",
-        rendered_count, copied_count
+        "  {} files templated, {} files copied, {} files stubbed",
+        rendered_count, copied_count, stubbed_count
     );
     if !plan.conditional_entries.is_empty() {
         eprintln!(
@@ -628,12 +665,16 @@ fn confirm_excludes_interactive(mut excludes: Vec<String>) -> Result<Vec<String>
         style("──").dim(),
         style("─────────────────────────────────────────────").dim()
     );
-    eprintln!("  Auto-detected:");
-    for e in &excludes {
-        eprintln!("    {}", e);
+    if excludes.is_empty() {
+        eprintln!("  No exclude patterns needed for this template.");
+    } else {
+        eprintln!("  Patterns matching template files:");
+        for e in &excludes {
+            eprintln!("    {}", e);
+        }
     }
 
-    let extra = Text::new("Add any others? (comma-separated, enter to accept)")
+    let extra = Text::new("Add extra exclude patterns? (comma-separated, enter to skip)")
         .with_default("")
         .prompt()
         .map_err(|_| DicecutError::PromptCancelled)?;
@@ -837,31 +878,60 @@ fn confirm_auto_detected_interactive(
 
 fn confirm_files_interactive(files: &[PlannedExtractFile]) -> Result<()> {
     let templated: Vec<_> = files.iter().filter(|f| f.has_replacements()).collect();
-    let copied: Vec<_> = files.iter().filter(|f| !f.has_replacements()).collect();
+    let boilerplate: Vec<_> = files
+        .iter()
+        .filter(|f| !f.has_replacements() && !f.stubbed && !f.is_binary())
+        .collect();
+    let stubbed: Vec<_> = files.iter().filter(|f| f.stubbed).collect();
     let binary_count = files.iter().filter(|f| f.is_binary()).count();
 
     eprintln!(
-        "\n{} Files to template {}",
+        "\n{} File plan {}",
         style("──").dim(),
-        style("────────────────────────────────────").dim()
+        style("──────────────────────────────────────────").dim()
     );
+
+    // Templated files
     eprintln!(
-        "  Will get {} suffix (template replacements made):",
+        "\n  {} ({} files, {} suffix):",
+        style("Templated").bold(),
+        templated.len(),
         DEFAULT_TEMPLATES_SUFFIX
     );
     for file in &templated {
         eprintln!(
-            "    {:<40} {} replacements",
+            "    {:<50} {} replacements",
             file.template_path.display(),
             file.replacement_count()
         );
     }
 
+    // Boilerplate files
     eprintln!(
-        "\n  Copied verbatim: {} files (including {} binary)",
-        copied.len(),
-        binary_count
+        "\n  {} (copied in full, {} files{}):",
+        style("Boilerplate").bold(),
+        boilerplate.len() + binary_count,
+        if binary_count > 0 {
+            format!(", {} binary", binary_count)
+        } else {
+            String::new()
+        }
     );
+    for file in &boilerplate {
+        eprintln!("    {}", file.template_path.display());
+    }
+
+    // Stubbed files
+    if !stubbed.is_empty() {
+        eprintln!(
+            "\n  {} (structure only, {} files):",
+            style("Stubbed").bold(),
+            stubbed.len()
+        );
+        for file in &stubbed {
+            eprintln!("    {}", file.template_path.display());
+        }
+    }
 
     let proceed = Confirm::new("Proceed?")
         .with_default(true)
diff --git a/src/extract/stub.rs b/src/extract/stub.rs
new file mode 100644
index 0000000..cf19e19
--- /dev/null
+++ b/src/extract/stub.rs
@@ -0,0 +1,199 @@
+use std::path::Path;
+
+/// Whether a file is boilerplate (copy in full) or content (stub to placeholder).
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum FileRole {
+    /// Config, dotfiles, CI — copy verbatim into the template.
+    Boilerplate,
+    /// Prose, docs, source — stub to minimal placeholder.
+    Content,
+}
+
+/// Filenames (case-insensitive) that are always boilerplate.
+const BOILERPLATE_FILENAMES: &[&str] = &[
+    ".gitignore",
+    ".gitattributes",
+    ".editorconfig",
+    ".prettierrc",
+    ".npmrc",
+    ".nvmrc",
+    ".gitkeep",
+    "makefile",
+    "dockerfile",
+    "justfile",
+    "license",
+    "licence",
+    "procfile",
+];
+
+/// Extensions (case-insensitive, without dot) that are always boilerplate.
+const BOILERPLATE_EXTENSIONS: &[&str] = &[
+    "toml", "yaml", "yml", "json", "jsonc", "json5", "xml", "sh", "bash", "zsh", "bat", "cmd",
+    "ps1", "cfg", "ini", "conf",
+];
+
+/// Directory prefixes — files under these dirs are boilerplate.
+const BOILERPLATE_DIR_PREFIXES: &[&str] = &[".github/", ".gitlab/", ".circleci/", ".vscode/"];
+
+/// Classify a file as boilerplate or content based on its relative path.
+///
+/// Only called for text files with 0 template replacements.
+pub fn classify_file(path: &Path) -> FileRole {
+    let path_str = path.to_string_lossy();
+
+    // Check directory prefix
+    for prefix in BOILERPLATE_DIR_PREFIXES {
+        if path_str.starts_with(prefix) {
+            return FileRole::Boilerplate;
+        }
+    }
+
+    // Check filename (case-insensitive)
+    if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
+        let lower = filename.to_lowercase();
+        if BOILERPLATE_FILENAMES.contains(&lower.as_str()) {
+            return FileRole::Boilerplate;
+        }
+    }
+
+    // Check extension (case-insensitive)
+    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
+        let lower = ext.to_lowercase();
+        if BOILERPLATE_EXTENSIONS.contains(&lower.as_str()) {
+            return FileRole::Boilerplate;
+        }
+    }
+
+    FileRole::Content
+}
+
+/// Generate a minimal stub for a content file.
+///
+/// - `.md` files get `# {Title}\n` where Title is derived from the filename.
+/// - Everything else gets an empty string.
+pub fn generate_stub(path: &Path) -> String {
+    let is_md = path
+        .extension()
+        .and_then(|e| e.to_str())
+        .is_some_and(|e| e.eq_ignore_ascii_case("md"));
+
+    if is_md {
+        let title = path
+            .file_stem()
+            .and_then(|s| s.to_str())
+            .unwrap_or("Untitled");
+        // Title-case: capitalize first letter, leave rest as-is
+        let title = title_case(title);
+        format!("# {title}\n")
+    } else {
+        String::new()
+    }
+}
+
+/// Convert a filename stem like "craft" or "SKILL" into title case.
+///
+/// Splits on `-` and `_`, capitalizes each word's first letter.
+fn title_case(s: &str) -> String {
+    s.split(['-', '_'])
+        .filter(|w| !w.is_empty())
+        .map(|word| {
+            let mut chars = word.chars();
+            match chars.next() {
+                Some(first) => {
+                    let rest: String = chars.collect::<String>().to_lowercase();
+                    format!("{}{rest}", first.to_uppercase())
+                }
+                None => String::new(),
+            }
+        })
+        .collect::<Vec<_>>()
+        .join(" ")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rstest::rstest;
+
+    // ── classify_file ────────────────────────────────────────────────
+
+    #[rstest]
+    #[case(".gitignore", FileRole::Boilerplate)]
+    #[case(".editorconfig", FileRole::Boilerplate)]
+    #[case("Makefile", FileRole::Boilerplate)]
+    #[case("Dockerfile", FileRole::Boilerplate)]
+    #[case("LICENSE", FileRole::Boilerplate)]
+    #[case("Procfile", FileRole::Boilerplate)]
+    fn classify_boilerplate_filenames(#[case] filename: &str, #[case] expected: FileRole) {
+        assert_eq!(classify_file(Path::new(filename)), expected);
+    }
+
+    #[rstest]
+    #[case("Cargo.toml", FileRole::Boilerplate)]
+    #[case("config.yaml", FileRole::Boilerplate)]
+    #[case("settings.yml", FileRole::Boilerplate)]
+    #[case("package.json", FileRole::Boilerplate)]
+    #[case("tsconfig.json", FileRole::Boilerplate)]
+    #[case("setup.cfg", FileRole::Boilerplate)]
+    #[case("build.sh", FileRole::Boilerplate)]
+    #[case("deploy.ps1", FileRole::Boilerplate)]
+    #[case("app.conf", FileRole::Boilerplate)]
+    fn classify_boilerplate_extensions(#[case] filename: &str, #[case] expected: FileRole) {
+        assert_eq!(classify_file(Path::new(filename)), expected);
+    }
+
+    #[rstest]
+    #[case(".github/workflows/ci.yml", FileRole::Boilerplate)]
+    #[case(".github/CODEOWNERS", FileRole::Boilerplate)]
+    #[case(".gitlab/ci/deploy.yml", FileRole::Boilerplate)]
+    #[case(".circleci/config.yml", FileRole::Boilerplate)]
+    #[case(".vscode/settings.json", FileRole::Boilerplate)]
+    fn classify_boilerplate_directories(#[case] path: &str, #[case] expected: FileRole) {
+        assert_eq!(classify_file(Path::new(path)), expected);
+    }
+
+    #[rstest]
+    #[case("README.md")]
+    #[case("docs/guide.md")]
+    #[case("src/main.rs")]
+    #[case("src/lib.py")]
+    #[case("index.html")]
+    #[case("app.css")]
+    #[case("skills/convention-mining/SKILL.md")]
+    fn classify_content(#[case] path: &str) {
+        assert_eq!(classify_file(Path::new(path)), FileRole::Content);
+    }
+
+    // ── generate_stub ────────────────────────────────────────────────
+
+    #[rstest]
+    #[case("README.md", "# Readme\n")]
+    #[case("craft.md", "# Craft\n")]
+    #[case("SKILL.md", "# Skill\n")]
+    #[case("getting-started.md", "# Getting Started\n")]
+    #[case("my_notes.md", "# My Notes\n")]
+    fn stub_md_files(#[case] filename: &str, #[case] expected: &str) {
+        assert_eq!(generate_stub(Path::new(filename)), expected);
+    }
+
+    #[rstest]
+    #[case("src/main.rs")]
+    #[case("index.html")]
+    #[case("app.css")]
+    #[case("data.txt")]
+    fn stub_non_md_files(#[case] filename: &str) {
+        assert_eq!(generate_stub(Path::new(filename)), "");
+    }
+
+    // ── title_case ───────────────────────────────────────────────────
+
+    #[rstest]
+    #[case("craft", "Craft")]
+    #[case("SKILL", "Skill")]
+    #[case("getting-started", "Getting Started")]
+    #[case("my_notes", "My Notes")]
+    #[case("README", "Readme")]
+    fn test_title_case(#[case] input: &str, #[case] expected: &str) {
+        assert_eq!(title_case(input), expected);
+    }
+}

From fc75d152439210180a2fbb89915d2d8a4eb743d6 Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Sat, 28 Feb 2026 12:57:38 -0500
Subject: [PATCH 19/29] fix(extract): commit missing exclude.rs refactor

The rename of detect_excludes to all_default_excludes and the new
relevant_config_excludes function were already referenced by mod.rs
but the file itself was not staged in the previous commit.
---
 src/extract/exclude.rs | 60 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 51 insertions(+), 9 deletions(-)

diff --git a/src/extract/exclude.rs b/src/extract/exclude.rs
index b86752f..f228830 100644
--- a/src/extract/exclude.rs
+++ b/src/extract/exclude.rs
@@ -62,14 +62,30 @@ const DEFAULT_COPY_WITHOUT_RENDER: &[&str] = &[
     "composer.lock",
 ];
 
-/// Detect which default exclude patterns actually exist in the project.
+/// Return all default exclude patterns for use during scanning.
 ///
-/// All DEFAULT_EXCLUDES are always included because patterns like `node_modules`
-/// can appear at any depth (e.g. `docs/node_modules/`), not just the project root.
-pub fn detect_excludes(_project_dir: &Path) -> Vec<String> {
+/// All DEFAULT_EXCLUDES are always used during the scan phase because patterns
+/// like `node_modules` can appear at any depth (e.g. `docs/node_modules/`).
+pub fn all_default_excludes() -> Vec<String> {
     DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect()
 }
 
+/// Return only the DEFAULT_EXCLUDES patterns that match at least one file in the
+/// template output. These are the patterns worth writing to `diecut.toml`'s
+/// `[files] exclude` — directory patterns like `.git/` or `node_modules/` that
+/// were filtered during scan are omitted since those files never appear in the
+/// template.
+pub fn relevant_config_excludes(template_files: &[std::path::PathBuf]) -> Vec<String> {
+    let all = all_default_excludes();
+    all.into_iter()
+        .filter(|pattern| {
+            template_files
+                .iter()
+                .any(|f| should_exclude(f, std::slice::from_ref(pattern)))
+        })
+        .collect()
+}
+
 /// Detect which copy-without-render patterns are relevant based on files present.
 pub fn detect_copy_without_render(
     _project_dir: &Path,
@@ -179,17 +195,43 @@ mod tests {
     }
 
     #[test]
-    fn test_detect_excludes() {
-        let dir = tempfile::tempdir().unwrap();
-
-        let found = detect_excludes(dir.path());
-        // All DEFAULT_EXCLUDES are always included regardless of what exists on disk
+    fn test_all_default_excludes() {
+        let found = all_default_excludes();
+        // All DEFAULT_EXCLUDES are always included
         assert!(found.iter().any(|e| e.contains(".git")));
         assert!(found.iter().any(|e| e == ".DS_Store"));
         assert!(found.iter().any(|e| e == "*.pyc"));
         assert!(found.iter().any(|e| e.contains("node_modules")));
     }
 
+    #[test]
+    fn test_relevant_config_excludes_empty_when_no_matches() {
+        // Typical template files won't match any DEFAULT_EXCLUDES
+        let files = vec![
+            PathBuf::from("src/main.rs"),
+            PathBuf::from("README.md"),
+            PathBuf::from("Cargo.toml"),
+        ];
+        let relevant = relevant_config_excludes(&files);
+        assert!(relevant.is_empty());
+    }
+
+    #[test]
+    fn test_relevant_config_excludes_finds_matching_patterns() {
+        let files = vec![
+            PathBuf::from("src/main.py"),
+            PathBuf::from("src/__pycache__/main.pyc"),
+            PathBuf::from(".DS_Store"),
+        ];
+        let relevant = relevant_config_excludes(&files);
+        assert!(relevant.contains(&"*.pyc".to_string()));
+        assert!(relevant.contains(&".DS_Store".to_string()));
+        assert!(relevant.contains(&"__pycache__".to_string()));
+        // Directory excludes that don't match should not appear
+        assert!(!relevant.contains(&".git".to_string()));
+        assert!(!relevant.contains(&"node_modules".to_string()));
+    }
+
     #[test]
     fn test_detect_copy_without_render() {
         let files = vec![

From 257fea509cdf6bc88f282ddcc827daeedbe589d9 Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Sat, 28 Feb 2026 13:42:10 -0500
Subject: [PATCH 20/29] feat(extract): drop deep content files, add
 --stub-depth flag

Content files deeper than N path components (default 2) are now dropped
entirely instead of being stubbed. Shallow content files like README.md
or docs/guide.md are still stubbed as before. The threshold is
configurable via --stub-depth.
---
 src/cli.rs              |  4 +++
 src/commands/extract.rs | 26 ++++++++++++++++---
 src/extract/mod.rs      | 28 ++++++++++++++++-----
 src/extract/stub.rs     | 55 +++++++++++++++++++++++++++++------------
 src/main.rs             | 12 ++++++++-
 tests/integration.rs    | 11 +++++++++
 6 files changed, 109 insertions(+), 27 deletions(-)

diff --git a/src/cli.rs b/src/cli.rs
index 6c687a0..fde16cb 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -75,6 +75,10 @@ pub enum Commands {
         #[arg(long, default_value = "0.5")]
         min_confidence: f64,
 
+        /// Max path depth for stubbing content files (deeper files are dropped)
+        #[arg(long, default_value = "2")]
+        stub_depth: usize,
+
         /// Show what would be extracted without writing files
         #[arg(long)]
         dry_run: bool,
diff --git a/src/commands/extract.rs b/src/commands/extract.rs
index 04c23b3..6251044 100644
--- a/src/commands/extract.rs
+++ b/src/commands/extract.rs
@@ -6,6 +6,7 @@ use diecut::error::DicecutError;
 use diecut::extract::{execute_extraction, plan_extraction, ExtractOptions};
 use miette::Result;
 
+#[allow(clippy::too_many_arguments)]
 pub fn run(
     source: String,
     vars: Vec<String>,
@@ -13,6 +14,7 @@ pub fn run(
     in_place: bool,
     yes: bool,
     min_confidence: f64,
+    stub_depth: usize,
     dry_run: bool,
 ) -> Result<()> {
     let variables = parse_vars(&vars)?;
@@ -24,6 +26,7 @@ pub fn run(
         in_place,
         yes,
         min_confidence,
+        stub_depth,
         dry_run,
     };
 
@@ -64,11 +67,12 @@ fn print_dry_run(plan: &diecut::extract::ExtractionPlan) {
     );
 
     let templated: Vec<_> = plan.files.iter().filter(|f| f.has_replacements()).collect();
-    let copied: Vec<_> = plan
+    let boilerplate: Vec<_> = plan
         .files
         .iter()
-        .filter(|f| !f.has_replacements())
+        .filter(|f| !f.has_replacements() && !f.stubbed)
         .collect();
+    let stubbed: Vec<_> = plan.files.iter().filter(|f| f.stubbed).collect();
 
     eprintln!("\nTemplated files ({}):", templated.len());
     for file in &templated {
@@ -79,11 +83,25 @@ fn print_dry_run(plan: &diecut::extract::ExtractionPlan) {
         );
     }
 
-    eprintln!("\nCopied verbatim ({}):", copied.len());
-    for file in &copied {
+    eprintln!("\nBoilerplate ({}):", boilerplate.len());
+    for file in &boilerplate {
         eprintln!("  {}", file.template_path.display());
     }
 
+    if !stubbed.is_empty() {
+        eprintln!("\nStubbed ({}):", stubbed.len());
+        for file in &stubbed {
+            eprintln!("  {}", file.template_path.display());
+        }
+    }
+
+    if plan.dropped_count > 0 {
+        eprintln!("\nDropped ({}):", plan.dropped_count);
+        for path in &plan.dropped_paths {
+            eprintln!("  {}", path.display());
+        }
+    }
+
     eprintln!("\nVariables:");
     for var in &plan.variables {
         eprintln!("  {} = {:?}", var.name, var.value);
diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index 9f0c8e4..c9a6e34 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -96,6 +96,8 @@ pub struct ExtractionPlan {
     pub conditional_entries: Vec<ConditionalEntry>,
     pub exclude_patterns: Vec<String>,
     pub copy_without_render: Vec<String>,
+    pub dropped_count: usize,
+    pub dropped_paths: Vec<PathBuf>,
 }
 
 /// Options for the extraction process.
@@ -106,6 +108,7 @@ pub struct ExtractOptions {
     pub in_place: bool,
     pub yes: bool,
     pub min_confidence: f64,
+    pub stub_depth: usize,
     pub dry_run: bool,
 }
 
@@ -285,6 +288,8 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
 
     // Phase 9: Apply replacements to files
     let mut planned_files = Vec::new();
+    let mut dropped_count = 0;
+    let mut dropped_paths = Vec::new();
 
     for file in &scan_result.files {
         let template_path = apply_path_replacements(&file.relative_path, &rules);
@@ -316,8 +321,8 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
                     stubbed: false,
                 });
             } else {
-                // No replacements — classify as boilerplate or content
-                match classify_file(&file.relative_path) {
+                // No replacements — classify as boilerplate, content, or dropped
+                match classify_file(&file.relative_path, options.stub_depth) {
                     FileRole::Boilerplate => {
                         planned_files.push(PlannedExtractFile {
                             template_path,
@@ -339,6 +344,10 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
                             stubbed: true,
                         });
                     }
+                    FileRole::Dropped => {
+                        dropped_count += 1;
+                        dropped_paths.push(file.relative_path.clone());
+                    }
                 }
             }
         }
@@ -358,7 +367,7 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
 
     // Phase 10: Interactive file confirmation
     if !options.yes {
-        confirm_files_interactive(&planned_files)?;
+        confirm_files_interactive(&planned_files, dropped_count)?;
     }
 
     // Phase 11: Build conditional entries
@@ -437,6 +446,8 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         conditional_entries,
         exclude_patterns: config_excludes,
         copy_without_render,
+        dropped_count,
+        dropped_paths,
     })
 }
 
@@ -537,8 +548,8 @@ pub fn execute_extraction(plan: &ExtractionPlan, _in_place: bool) -> Result<()>
         computed_count
     );
     eprintln!(
-        "  {} files templated, {} files copied, {} files stubbed",
-        rendered_count, copied_count, stubbed_count
+        "  {} files templated, {} files copied, {} files stubbed, {} files dropped",
+        rendered_count, copied_count, stubbed_count, plan.dropped_count
     );
     if !plan.conditional_entries.is_empty() {
         eprintln!(
@@ -876,7 +887,7 @@ fn confirm_auto_detected_interactive(
     Ok(accepted)
 }
 
-fn confirm_files_interactive(files: &[PlannedExtractFile]) -> Result<()> {
+fn confirm_files_interactive(files: &[PlannedExtractFile], dropped_count: usize) -> Result<()> {
     let templated: Vec<_> = files.iter().filter(|f| f.has_replacements()).collect();
     let boilerplate: Vec<_> = files
         .iter()
@@ -933,6 +944,11 @@ fn confirm_files_interactive(files: &[PlannedExtractFile]) -> Result<()> {
         }
     }
 
+    // Dropped files
+    if dropped_count > 0 {
+        eprintln!("\n  {} ({} files):", style("Dropped").bold(), dropped_count);
+    }
+
     let proceed = Confirm::new("Proceed?")
         .with_default(true)
         .prompt()
diff --git a/src/extract/stub.rs b/src/extract/stub.rs
index cf19e19..8c6ce47 100644
--- a/src/extract/stub.rs
+++ b/src/extract/stub.rs
@@ -1,12 +1,14 @@
 use std::path::Path;
 
-/// Whether a file is boilerplate (copy in full) or content (stub to placeholder).
+/// Whether a file is boilerplate (copy in full), content (stub), or too deep (drop).
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum FileRole {
     /// Config, dotfiles, CI — copy verbatim into the template.
     Boilerplate,
     /// Prose, docs, source — stub to minimal placeholder.
     Content,
+    /// Content deeper than stub_depth — drop entirely.
+    Dropped,
 }
 
 /// Filenames (case-insensitive) that are always boilerplate.
@@ -35,10 +37,11 @@ const BOILERPLATE_EXTENSIONS: &[&str] = &[
 /// Directory prefixes — files under these dirs are boilerplate.
 const BOILERPLATE_DIR_PREFIXES: &[&str] = &[".github/", ".gitlab/", ".circleci/", ".vscode/"];
 
-/// Classify a file as boilerplate or content based on its relative path.
+/// Classify a file as boilerplate, content, or dropped based on its relative path.
 ///
 /// Only called for text files with 0 template replacements.
-pub fn classify_file(path: &Path) -> FileRole {
+/// Files deeper than `stub_depth` path components are dropped entirely.
+pub fn classify_file(path: &Path, stub_depth: usize) -> FileRole {
     let path_str = path.to_string_lossy();
 
     // Check directory prefix
@@ -64,7 +67,12 @@ pub fn classify_file(path: &Path) -> FileRole {
         }
     }
 
-    FileRole::Content
+    let depth = path.components().count();
+    if depth > stub_depth {
+        FileRole::Dropped
+    } else {
+        FileRole::Content
+    }
 }
 
 /// Generate a minimal stub for a content file.
@@ -125,7 +133,7 @@ mod tests {
     #[case("LICENSE", FileRole::Boilerplate)]
     #[case("Procfile", FileRole::Boilerplate)]
     fn classify_boilerplate_filenames(#[case] filename: &str, #[case] expected: FileRole) {
-        assert_eq!(classify_file(Path::new(filename)), expected);
+        assert_eq!(classify_file(Path::new(filename), 2), expected);
     }
 
     #[rstest]
@@ -139,7 +147,7 @@ mod tests {
     #[case("deploy.ps1", FileRole::Boilerplate)]
     #[case("app.conf", FileRole::Boilerplate)]
     fn classify_boilerplate_extensions(#[case] filename: &str, #[case] expected: FileRole) {
-        assert_eq!(classify_file(Path::new(filename)), expected);
+        assert_eq!(classify_file(Path::new(filename), 2), expected);
     }
 
     #[rstest]
@@ -149,19 +157,34 @@ mod tests {
     #[case(".circleci/config.yml", FileRole::Boilerplate)]
     #[case(".vscode/settings.json", FileRole::Boilerplate)]
     fn classify_boilerplate_directories(#[case] path: &str, #[case] expected: FileRole) {
-        assert_eq!(classify_file(Path::new(path)), expected);
+        assert_eq!(classify_file(Path::new(path), 2), expected);
     }
 
     #[rstest]
-    #[case("README.md")]
-    #[case("docs/guide.md")]
-    #[case("src/main.rs")]
-    #[case("src/lib.py")]
-    #[case("index.html")]
-    #[case("app.css")]
-    #[case("skills/convention-mining/SKILL.md")]
-    fn classify_content(#[case] path: &str) {
-        assert_eq!(classify_file(Path::new(path)), FileRole::Content);
+    #[case("README.md", 2)]
+    #[case("docs/guide.md", 2)]
+    #[case("src/main.rs", 2)]
+    #[case("src/lib.py", 2)]
+    #[case("index.html", 2)]
+    #[case("app.css", 2)]
+    #[case("skills/convention-mining/SKILL.md", 3)] // depth 3, stub_depth 3 → Content
+    fn classify_content(#[case] path: &str, #[case] stub_depth: usize) {
+        assert_eq!(
+            classify_file(Path::new(path), stub_depth),
+            FileRole::Content
+        );
+    }
+
+    #[rstest]
+    #[case("skills/convention-mining/SKILL.md", 2)] // depth 3 > stub_depth 2
+    #[case("skills/writing-skills/craft.md", 2)] // depth 3 > stub_depth 2
+    #[case("a/b/c/deep.md", 2)] // depth 4 > stub_depth 2
+    #[case("docs/guide.md", 1)] // depth 2 > stub_depth 1
+    fn classify_dropped(#[case] path: &str, #[case] stub_depth: usize) {
+        assert_eq!(
+            classify_file(Path::new(path), stub_depth),
+            FileRole::Dropped
+        );
     }
 
     // ── generate_stub ────────────────────────────────────────────────
diff --git a/src/main.rs b/src/main.rs
index 4999bb2..11dec94 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -26,7 +26,17 @@ fn main() -> miette::Result<()> {
             in_place,
             yes,
             min_confidence,
+            stub_depth,
             dry_run,
-        } => commands::extract::run(source, vars, output, in_place, yes, min_confidence, dry_run),
+        } => commands::extract::run(
+            source,
+            vars,
+            output,
+            in_place,
+            yes,
+            min_confidence,
+            stub_depth,
+            dry_run,
+        ),
     }
 }
diff --git a/tests/integration.rs b/tests/integration.rs
index 243935c..bee61fc 100644
--- a/tests/integration.rs
+++ b/tests/integration.rs
@@ -656,6 +656,7 @@ fn test_extract_batch_basic() {
         in_place: false,
         yes: true,
         min_confidence: 0.5,
+        stub_depth: 2,
         dry_run: false,
     };
 
@@ -700,6 +701,7 @@ fn test_extract_detects_case_variants() {
         in_place: false,
         yes: true,
         min_confidence: 0.5,
+        stub_depth: 2,
         dry_run: false,
     };
 
@@ -754,6 +756,7 @@ fn test_extract_dry_run_writes_nothing() {
         in_place: false,
         yes: true,
         min_confidence: 0.5,
+        stub_depth: 2,
         dry_run: true,
     };
 
@@ -783,6 +786,7 @@ fn test_extract_rejects_already_template() {
         in_place: false,
         yes: true,
         min_confidence: 0.5,
+        stub_depth: 2,
         dry_run: false,
     };
 
@@ -804,6 +808,7 @@ fn test_extract_rejects_no_variables() {
         in_place: false,
         yes: true,
         min_confidence: 1.0,
+        stub_depth: 2,
         dry_run: false,
     };
 
@@ -827,6 +832,7 @@ fn test_extract_templates_path_components() {
         in_place: false,
         yes: true,
         min_confidence: 0.5,
+        stub_depth: 2,
         dry_run: false,
     };
 
@@ -890,6 +896,7 @@ fn test_extract_round_trip() {
         in_place: false,
         yes: true,
         min_confidence: 0.5,
+        stub_depth: 2,
         dry_run: false,
     };
 
@@ -959,6 +966,7 @@ fn test_extract_auto_yes() {
         in_place: false,
         yes: true,
         min_confidence: 0.5,
+        stub_depth: 2,
         dry_run: false,
     };
 
@@ -1000,6 +1008,7 @@ fn test_extract_auto_explicit_vars_merged() {
         in_place: false,
         yes: true,
         min_confidence: 0.5,
+        stub_depth: 2,
         dry_run: false,
     };
 
@@ -1042,6 +1051,7 @@ fn test_extract_auto_frequency_fallback() {
         in_place: false,
         yes: true,
         min_confidence: 0.5,
+        stub_depth: 2,
         dry_run: false,
     };
 
@@ -1085,6 +1095,7 @@ fn test_extract_min_confidence_filters() {
         in_place: false,
         yes: true,
         min_confidence: 0.99,
+        stub_depth: 2,
         dry_run: true,
     };
 

From b0d69da6d716ea37495b8c2b6b9743124daed4a1 Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Sat, 28 Feb 2026 14:15:25 -0500
Subject: [PATCH 21/29] refactor: autodetect

---
 src/extract/auto_detect.rs | 103 +++++++++++++++++++++++++++++++++++--
 1 file changed, 100 insertions(+), 3 deletions(-)

diff --git a/src/extract/auto_detect.rs b/src/extract/auto_detect.rs
index 0777fb9..ae0fc7b 100644
--- a/src/extract/auto_detect.rs
+++ b/src/extract/auto_detect.rs
@@ -230,6 +230,25 @@ fn parse_cargo_toml(
         });
     }
 
+    if let Some(version) = parsed
+        .get("package")
+        .and_then(|p| p.get("version"))
+        .and_then(|v| v.as_str())
+    {
+        if !version.is_empty() {
+            let (file_count, total_occurrences) = count_occurrences(version, scan_result);
+            candidates.push(DetectedCandidate {
+                suggested_name: "version".to_string(),
+                value: version.to_string(),
+                tier: ConfidenceTier::ConfigFile,
+                confidence: 0.85,
+                reason: "Cargo.toml [package].version".to_string(),
+                file_count,
+                total_occurrences,
+            });
+        }
+    }
+
     if let Some(authors) = parsed
         .get("package")
         .and_then(|p| p.get("authors"))
@@ -280,6 +299,21 @@ fn parse_package_json(
         });
     }
 
+    if let Some(version) = parsed.get("version").and_then(|v| v.as_str()) {
+        if !version.is_empty() {
+            let (file_count, total_occurrences) = count_occurrences(version, scan_result);
+            candidates.push(DetectedCandidate {
+                suggested_name: "version".to_string(),
+                value: version.to_string(),
+                tier: ConfidenceTier::ConfigFile,
+                confidence: 0.85,
+                reason: "package.json \"version\"".to_string(),
+                file_count,
+                total_occurrences,
+            });
+        }
+    }
+
     if let Some(author) = parsed.get("author") {
         let author_str = match author {
             serde_json::Value::String(s) => Some(strip_email(s)),
@@ -334,6 +368,25 @@ fn parse_pyproject_toml(
         });
     }
 
+    if let Some(version) = parsed
+        .get("project")
+        .and_then(|p| p.get("version"))
+        .and_then(|v| v.as_str())
+    {
+        if !version.is_empty() {
+            let (file_count, total_occurrences) = count_occurrences(version, scan_result);
+            candidates.push(DetectedCandidate {
+                suggested_name: "version".to_string(),
+                value: version.to_string(),
+                tier: ConfidenceTier::ConfigFile,
+                confidence: 0.85,
+                reason: "pyproject.toml [project].version".to_string(),
+                file_count,
+                total_occurrences,
+            });
+        }
+    }
+
     if let Some(authors) = parsed
         .get("project")
         .and_then(|p| p.get("authors"))
@@ -1287,7 +1340,7 @@ mod tests {
         let dir = tempfile::tempdir().unwrap();
         std::fs::write(
             dir.path().join("Cargo.toml"),
-            "[package]\nname = \"data-pipeline\"\nauthors = [\"Alice <alice@example.com>\"]\n",
+            "[package]\nname = \"data-pipeline\"\nversion = \"0.3.1\"\nauthors = [\"Alice <alice@example.com>\"]\n",
         )
         .unwrap();
 
@@ -1295,6 +1348,9 @@ mod tests {
         let candidates = parse_cargo_toml(dir.path(), &scan).unwrap();
 
         assert!(candidates.iter().any(|c| c.value == "data-pipeline"));
+        assert!(candidates
+            .iter()
+            .any(|c| c.value == "0.3.1" && c.suggested_name == "version" && c.confidence == 0.85));
         assert!(candidates.iter().any(|c| c.value == "Alice"));
     }
 
@@ -1303,7 +1359,7 @@ mod tests {
         let dir = tempfile::tempdir().unwrap();
         std::fs::write(
             dir.path().join("package.json"),
-            r#"{"name": "@myorg/cool-widget", "author": "Bob Smith <bob@example.com>"}"#,
+            r#"{"name": "@myorg/cool-widget", "version": "2.1.0", "author": "Bob Smith <bob@example.com>"}"#,
         )
         .unwrap();
 
@@ -1316,6 +1372,13 @@ mod tests {
             .unwrap();
         assert_eq!(name_candidate.value, "cool-widget");
 
+        let version_candidate = candidates
+            .iter()
+            .find(|c| c.suggested_name == "version")
+            .unwrap();
+        assert_eq!(version_candidate.value, "2.1.0");
+        assert_eq!(version_candidate.confidence, 0.85);
+
         let author_candidate = candidates
             .iter()
             .find(|c| c.suggested_name == "author")
@@ -1328,7 +1391,7 @@ mod tests {
         let dir = tempfile::tempdir().unwrap();
         std::fs::write(
             dir.path().join("pyproject.toml"),
-            "[project]\nname = \"my-tool\"\n\n[[project.authors]]\nname = \"Charlie\"\n",
+            "[project]\nname = \"my-tool\"\nversion = \"1.0.0\"\n\n[[project.authors]]\nname = \"Charlie\"\n",
         )
         .unwrap();
 
@@ -1336,6 +1399,9 @@ mod tests {
         let candidates = parse_pyproject_toml(dir.path(), &scan).unwrap();
 
         assert!(candidates.iter().any(|c| c.value == "my-tool"));
+        assert!(candidates
+            .iter()
+            .any(|c| c.value == "1.0.0" && c.suggested_name == "version" && c.confidence == 0.85));
         assert!(candidates.iter().any(|c| c.value == "Charlie"));
     }
 
@@ -1385,6 +1451,37 @@ mod tests {
         assert!(parse_cargo_toml(dir.path(), &scan).is_none());
     }
 
+    #[test]
+    fn test_tier2_version_missing() {
+        let dir = tempfile::tempdir().unwrap();
+        std::fs::write(
+            dir.path().join("Cargo.toml"),
+            "[package]\nname = \"no-version-crate\"\n",
+        )
+        .unwrap();
+        std::fs::write(
+            dir.path().join("package.json"),
+            r#"{"name": "no-version-pkg"}"#,
+        )
+        .unwrap();
+        std::fs::write(
+            dir.path().join("pyproject.toml"),
+            "[project]\nname = \"no-version-py\"\n",
+        )
+        .unwrap();
+
+        let scan = make_scan_result(vec![]);
+
+        let cargo = parse_cargo_toml(dir.path(), &scan).unwrap();
+        assert!(!cargo.iter().any(|c| c.suggested_name == "version"));
+
+        let pkg = parse_package_json(dir.path(), &scan).unwrap();
+        assert!(!pkg.iter().any(|c| c.suggested_name == "version"));
+
+        let pyproj = parse_pyproject_toml(dir.path(), &scan).unwrap();
+        assert!(!pyproj.iter().any(|c| c.suggested_name == "version"));
+    }
+
     // ── Tier 3 tests ─────────────────────────────────────────────────
 
     #[test]

From 5657b71f4e001cefbe05a1db1d645f9c4d40e8eb Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Sat, 28 Feb 2026 14:40:48 -0500
Subject: [PATCH 22/29] refactor(extract): simplify auto-detect and extract
 interactive UI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Extract 6 interactive UI functions from mod.rs into interactive.rs
- Deduplicate config parsers with push_config_candidate helper
- Replace Tier 4 frequency analysis (~770 lines of noise-filter lists)
  with a ~60-line multi-variant heuristic requiring ≥2 case forms
- Remove strsim dependency (no longer needed)
---
 Cargo.lock                 |    1 -
 Cargo.toml                 |    1 -
 src/extract/auto_detect.rs | 1002 +++++++-----------------------------
 src/extract/interactive.rs |  411 +++++++++++++++
 src/extract/mod.rs         |  412 +--------------
 5 files changed, 597 insertions(+), 1230 deletions(-)
 create mode 100644 src/extract/interactive.rs

diff --git a/Cargo.lock b/Cargo.lock
index 113ae0e..ce434bc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -462,7 +462,6 @@ dependencies = [
  "serde",
  "serde_json",
  "sha2",
- "strsim",
  "tempfile",
  "tera",
  "thiserror",
diff --git a/Cargo.toml b/Cargo.toml
index ecca649..09bd987 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -35,7 +35,6 @@ sha2 = "0.10"
 fs4 = "0.12"
 content_inspector = "0.2"
 indexmap = { version = "2.11.4", features = ["serde"] }
-strsim = "0.11"
 
 [dev-dependencies]
 rstest = "0.23"
diff --git a/src/extract/auto_detect.rs b/src/extract/auto_detect.rs
index ae0fc7b..193bbac 100644
--- a/src/extract/auto_detect.rs
+++ b/src/extract/auto_detect.rs
@@ -55,15 +55,6 @@ pub struct AutoDetectResult {
     pub candidates: Vec<DetectedCandidate>,
 }
 
-struct TokenCluster {
-    normalized: Vec<String>,
-    literals: Vec<String>,
-    total_occurrences: usize,
-    file_count: usize,
-    matches_dir_name: bool,
-    in_config_value: bool,
-}
-
 // ── Entry point ──────────────────────────────────────────────────────────
 
 /// Run all 4 auto-detection tiers against a scanned project.
@@ -83,25 +74,8 @@ pub fn auto_detect(project_dir: &Path, scan_result: &ScanResult) -> AutoDetectRe
     let covered_values: HashSet<String> =
         candidates.iter().map(|c| c.value.to_lowercase()).collect();
 
-    // Collect config values for frequency analysis boosting
-    let config_values: HashSet<String> = candidates
-        .iter()
-        .filter(|c| c.tier == ConfidenceTier::ConfigFile)
-        .map(|c| c.value.to_lowercase())
-        .collect();
-
-    let dir_name = project_dir
-        .file_name()
-        .map(|n| n.to_string_lossy().to_lowercase())
-        .unwrap_or_default();
-
     // Tier 4: Frequency analysis
-    candidates.extend(detect_frequency(
-        scan_result,
-        &covered_values,
-        &config_values,
-        &dir_name,
-    ));
+    candidates.extend(detect_frequency(scan_result, &covered_values));
 
     // Deduplicate by normalized word list, keeping highest confidence
     deduplicate_candidates(&mut candidates);
@@ -203,6 +177,26 @@ fn detect_config_files(project_dir: &Path, scan_result: &ScanResult) -> Vec<Dete
     candidates
 }
 
+fn push_config_candidate(
+    candidates: &mut Vec<DetectedCandidate>,
+    value: &str,
+    suggested_name: &str,
+    confidence: f64,
+    reason: &str,
+    scan_result: &ScanResult,
+) {
+    let (file_count, total_occurrences) = count_occurrences(value, scan_result);
+    candidates.push(DetectedCandidate {
+        suggested_name: suggested_name.to_string(),
+        value: value.to_string(),
+        tier: ConfidenceTier::ConfigFile,
+        confidence,
+        reason: reason.to_string(),
+        file_count,
+        total_occurrences,
+    });
+}
+
 fn parse_cargo_toml(
     project_dir: &Path,
     scan_result: &ScanResult,
@@ -218,16 +212,14 @@ fn parse_cargo_toml(
         .and_then(|p| p.get("name"))
         .and_then(|n| n.as_str())
     {
-        let (file_count, total_occurrences) = count_occurrences(name, scan_result);
-        candidates.push(DetectedCandidate {
-            suggested_name: "project_name".to_string(),
-            value: name.to_string(),
-            tier: ConfidenceTier::ConfigFile,
-            confidence: 0.90,
-            reason: "Cargo.toml [package].name".to_string(),
-            file_count,
-            total_occurrences,
-        });
+        push_config_candidate(
+            &mut candidates,
+            name,
+            "project_name",
+            0.90,
+            "Cargo.toml [package].name",
+            scan_result,
+        );
     }
 
     if let Some(version) = parsed
@@ -236,16 +228,14 @@ fn parse_cargo_toml(
         .and_then(|v| v.as_str())
     {
         if !version.is_empty() {
-            let (file_count, total_occurrences) = count_occurrences(version, scan_result);
-            candidates.push(DetectedCandidate {
-                suggested_name: "version".to_string(),
-                value: version.to_string(),
-                tier: ConfidenceTier::ConfigFile,
-                confidence: 0.85,
-                reason: "Cargo.toml [package].version".to_string(),
-                file_count,
-                total_occurrences,
-            });
+            push_config_candidate(
+                &mut candidates,
+                version,
+                "version",
+                0.85,
+                "Cargo.toml [package].version",
+                scan_result,
+            );
         }
     }
 
@@ -257,16 +247,14 @@ fn parse_cargo_toml(
         if let Some(first) = authors.first().and_then(|a| a.as_str()) {
             let author = strip_email(first);
             if !author.is_empty() {
-                let (file_count, total_occurrences) = count_occurrences(&author, scan_result);
-                candidates.push(DetectedCandidate {
-                    suggested_name: "author".to_string(),
-                    value: author.clone(),
-                    tier: ConfidenceTier::ConfigFile,
-                    confidence: 0.85,
-                    reason: "Cargo.toml [package].authors[0]".to_string(),
-                    file_count,
-                    total_occurrences,
-                });
+                push_config_candidate(
+                    &mut candidates,
+                    &author,
+                    "author",
+                    0.85,
+                    "Cargo.toml [package].authors[0]",
+                    scan_result,
+                );
             }
         }
     }
@@ -285,32 +273,27 @@ fn parse_package_json(
     let mut candidates = Vec::new();
 
     if let Some(name) = parsed.get("name").and_then(|n| n.as_str()) {
-        // Strip npm scope @org/
         let clean_name = strip_npm_scope(name);
-        let (file_count, total_occurrences) = count_occurrences(clean_name, scan_result);
-        candidates.push(DetectedCandidate {
-            suggested_name: "project_name".to_string(),
-            value: clean_name.to_string(),
-            tier: ConfidenceTier::ConfigFile,
-            confidence: 0.90,
-            reason: "package.json \"name\"".to_string(),
-            file_count,
-            total_occurrences,
-        });
+        push_config_candidate(
+            &mut candidates,
+            clean_name,
+            "project_name",
+            0.90,
+            "package.json \"name\"",
+            scan_result,
+        );
     }
 
     if let Some(version) = parsed.get("version").and_then(|v| v.as_str()) {
         if !version.is_empty() {
-            let (file_count, total_occurrences) = count_occurrences(version, scan_result);
-            candidates.push(DetectedCandidate {
-                suggested_name: "version".to_string(),
-                value: version.to_string(),
-                tier: ConfidenceTier::ConfigFile,
-                confidence: 0.85,
-                reason: "package.json \"version\"".to_string(),
-                file_count,
-                total_occurrences,
-            });
+            push_config_candidate(
+                &mut candidates,
+                version,
+                "version",
+                0.85,
+                "package.json \"version\"",
+                scan_result,
+            );
         }
     }
 
@@ -324,16 +307,14 @@ fn parse_package_json(
         };
         if let Some(author_name) = author_str {
             if !author_name.is_empty() {
-                let (file_count, total_occurrences) = count_occurrences(&author_name, scan_result);
-                candidates.push(DetectedCandidate {
-                    suggested_name: "author".to_string(),
-                    value: author_name,
-                    tier: ConfidenceTier::ConfigFile,
-                    confidence: 0.85,
-                    reason: "package.json \"author\"".to_string(),
-                    file_count,
-                    total_occurrences,
-                });
+                push_config_candidate(
+                    &mut candidates,
+                    &author_name,
+                    "author",
+                    0.85,
+                    "package.json \"author\"",
+                    scan_result,
+                );
             }
         }
     }
@@ -356,16 +337,14 @@ fn parse_pyproject_toml(
         .and_then(|p| p.get("name"))
         .and_then(|n| n.as_str())
     {
-        let (file_count, total_occurrences) = count_occurrences(name, scan_result);
-        candidates.push(DetectedCandidate {
-            suggested_name: "project_name".to_string(),
-            value: name.to_string(),
-            tier: ConfidenceTier::ConfigFile,
-            confidence: 0.90,
-            reason: "pyproject.toml [project].name".to_string(),
-            file_count,
-            total_occurrences,
-        });
+        push_config_candidate(
+            &mut candidates,
+            name,
+            "project_name",
+            0.90,
+            "pyproject.toml [project].name",
+            scan_result,
+        );
     }
 
     if let Some(version) = parsed
@@ -374,16 +353,14 @@ fn parse_pyproject_toml(
         .and_then(|v| v.as_str())
     {
         if !version.is_empty() {
-            let (file_count, total_occurrences) = count_occurrences(version, scan_result);
-            candidates.push(DetectedCandidate {
-                suggested_name: "version".to_string(),
-                value: version.to_string(),
-                tier: ConfidenceTier::ConfigFile,
-                confidence: 0.85,
-                reason: "pyproject.toml [project].version".to_string(),
-                file_count,
-                total_occurrences,
-            });
+            push_config_candidate(
+                &mut candidates,
+                version,
+                "version",
+                0.85,
+                "pyproject.toml [project].version",
+                scan_result,
+            );
         }
     }
 
@@ -400,16 +377,14 @@ fn parse_pyproject_toml(
                 .map(strip_email);
             if let Some(name) = author_name {
                 if !name.is_empty() {
-                    let (file_count, total_occurrences) = count_occurrences(&name, scan_result);
-                    candidates.push(DetectedCandidate {
-                        suggested_name: "author".to_string(),
-                        value: name,
-                        tier: ConfidenceTier::ConfigFile,
-                        confidence: 0.85,
-                        reason: "pyproject.toml [project].authors[0].name".to_string(),
-                        file_count,
-                        total_occurrences,
-                    });
+                    push_config_candidate(
+                        &mut candidates,
+                        &name,
+                        "author",
+                        0.85,
+                        "pyproject.toml [project].authors[0].name",
+                        scan_result,
+                    );
                 }
             }
         }
@@ -434,32 +409,29 @@ fn parse_go_mod(project_dir: &Path, scan_result: &ScanResult) -> Option<Vec<Dete
 
     let mut candidates = Vec::new();
 
-    let (file_count, total_occurrences) = count_occurrences(name, scan_result);
-    candidates.push(DetectedCandidate {
-        suggested_name: "project_name".to_string(),
-        value: name.to_string(),
-        tier: ConfidenceTier::ConfigFile,
-        confidence: 0.90,
-        reason: format!("go.mod module \"{}\"", module_path),
-        file_count,
-        total_occurrences,
-    });
+    push_config_candidate(
+        &mut candidates,
+        name,
+        "project_name",
+        0.90,
+        &format!("go.mod module \"{}\"", module_path),
+        scan_result,
+    );
 
     // Extract org name (second-to-last segment for github.com/org/repo patterns)
     if segments.len() >= 3 {
         let org = segments[segments.len() - 2];
         if !org.is_empty() && org != name {
-            let (org_file_count, org_total_occurrences) = count_occurrences(org, scan_result);
+            let (_, org_total_occurrences) = count_occurrences(org, scan_result);
             if org_total_occurrences > 0 {
-                candidates.push(DetectedCandidate {
-                    suggested_name: "org_name".to_string(),
-                    value: org.to_string(),
-                    tier: ConfidenceTier::ConfigFile,
-                    confidence: 0.85,
-                    reason: format!("go.mod module org \"{}\"", org),
-                    file_count: org_file_count,
-                    total_occurrences: org_total_occurrences,
-                });
+                push_config_candidate(
+                    &mut candidates,
+                    org,
+                    "org_name",
+                    0.85,
+                    &format!("go.mod module org \"{}\"", org),
+                    scan_result,
+                );
             }
         }
     }
@@ -559,8 +531,6 @@ fn parse_org_from_url(url: &str) -> Option<String> {
 fn detect_frequency(
     scan_result: &ScanResult,
     covered_values: &HashSet<String>,
-    config_values: &HashSet<String>,
-    dir_name: &str,
 ) -> Vec<DetectedCandidate> {
     // Tokenize all text file content
     let mut token_file_map: HashMap<String, HashSet<usize>> = HashMap::new();
@@ -579,64 +549,58 @@ fn detect_frequency(
         }
     }
 
-    // Build clusters by normalized word list
-    let mut clusters: HashMap<String, TokenCluster> = HashMap::new();
+    // Group tokens by normalized word list to find multi-variant clusters
+    struct Cluster {
+        literals: Vec<String>,
+        total_occurrences: usize,
+        files: HashSet<usize>,
+    }
+
+    let mut clusters: HashMap<String, Cluster> = HashMap::new();
 
     for (token, count) in &token_counts {
         let words = split_into_words(token);
-
-        // Filter noise
-        if words.iter().all(|w| w.len() < 3) {
-            continue;
-        }
-        if is_noise_token(token, &words) {
-            continue;
-        }
-
         let normalized_key = words.join(" ");
 
-        let file_count = token_file_map.get(token).map(|s| s.len()).unwrap_or(0);
-
-        // Skip single-occurrence-single-file tokens
-        if *count == 1 && file_count <= 1 {
+        // Token must be at least 4 chars
+        if token.len() < 4 {
             continue;
         }
 
-        let matches_dir =
-            normalized_key == split_into_words(dir_name).join(" ") && !dir_name.is_empty();
-        let in_config = config_values.contains(&token.to_lowercase());
-
-        let cluster = clusters
-            .entry(normalized_key.clone())
-            .or_insert_with(|| TokenCluster {
-                normalized: words.clone(),
-                literals: Vec::new(),
-                total_occurrences: 0,
-                file_count: 0,
-                matches_dir_name: false,
-                in_config_value: false,
-            });
+        let cluster = clusters.entry(normalized_key).or_insert_with(|| Cluster {
+            literals: Vec::new(),
+            total_occurrences: 0,
+            files: HashSet::new(),
+        });
 
         if !cluster.literals.contains(token) {
             cluster.literals.push(token.clone());
         }
         cluster.total_occurrences += count;
-        // Merge file sets for accurate file_count
-        let files_for_token = token_file_map.get(token).map(|s| s.len()).unwrap_or(0);
-        if files_for_token > cluster.file_count {
-            cluster.file_count = files_for_token;
+        if let Some(file_set) = token_file_map.get(token) {
+            cluster.files.extend(file_set);
         }
-        cluster.matches_dir_name = cluster.matches_dir_name || matches_dir;
-        cluster.in_config_value = cluster.in_config_value || in_config;
     }
 
-    // Merge near-misses using Levenshtein distance
-    merge_similar_clusters(&mut clusters);
-
-    // Score and convert to candidates
+    // Filter and convert to candidates
     let mut freq_candidates: Vec<DetectedCandidate> = Vec::new();
 
-    for (key, cluster) in &clusters {
+    for cluster in clusters.values() {
+        // Must have ≥2 distinct case variants (the key multi-variant heuristic)
+        if cluster.literals.len() < 2 {
+            continue;
+        }
+
+        // Must have ≥3 total occurrences
+        if cluster.total_occurrences < 3 {
+            continue;
+        }
+
+        // Must appear in ≥2 files
+        if cluster.files.len() < 2 {
+            continue;
+        }
+
         // Skip if already covered by higher tiers
         if cluster
             .literals
@@ -646,552 +610,42 @@ fn detect_frequency(
             continue;
         }
 
-        let score = score_cluster(cluster);
-
-        // Filter low-scoring candidates
-        if score < 0.30 {
-            continue;
-        }
-
         let best_literal = &cluster.literals[0];
-        let suggested_name = suggest_variable_name(&cluster.normalized, key);
+        let words = split_into_words(best_literal);
+        let suggested_name = if words.len() <= 3 {
+            words.join("_")
+        } else {
+            words[..3].join("_")
+        };
 
+        let file_count = cluster.files.len();
         freq_candidates.push(DetectedCandidate {
             suggested_name,
             value: best_literal.clone(),
             tier: ConfidenceTier::FrequencyAnalysis,
-            confidence: score,
+            confidence: 0.60,
             reason: format!(
                 "{} occurrences across {} files, {} variant(s)",
                 cluster.total_occurrences,
-                cluster.file_count,
+                file_count,
                 cluster.literals.len()
             ),
-            file_count: cluster.file_count,
+            file_count,
             total_occurrences: cluster.total_occurrences,
         });
     }
 
-    // Sort by confidence, take top 5
-    freq_candidates.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
+    // Sort by file_count * total_occurrences descending, take top 5
+    freq_candidates.sort_by(|a, b| {
+        let score_a = a.file_count * a.total_occurrences;
+        let score_b = b.file_count * b.total_occurrences;
+        score_b.cmp(&score_a)
+    });
     freq_candidates.truncate(5);
 
     freq_candidates
 }
 
-fn score_cluster(cluster: &TokenCluster) -> f64 {
-    // Occurrence count (log-scaled, 0.0..1.0)
-    let occ_score = (cluster.total_occurrences as f64).ln_1p() / 10.0_f64.ln_1p();
-    let occ_score = occ_score.min(1.0);
-
-    // File spread (log-scaled, 0.0..1.0)
-    let file_score = (cluster.file_count as f64).ln_1p() / 10.0_f64.ln_1p();
-    let file_score = file_score.min(1.0);
-
-    // Variant diversity
-    let variant_score = match cluster.literals.len() {
-        0 | 1 => 0.0,
-        2 => 0.5,
-        3 => 0.75,
-        _ => 1.0,
-    };
-
-    // Directory name match (binary)
-    let dir_score = if cluster.matches_dir_name { 1.0 } else { 0.0 };
-
-    // Config value match (binary)
-    let config_score = if cluster.in_config_value { 1.0 } else { 0.0 };
-
-    0.15 * occ_score
-        + 0.20 * file_score
-        + 0.35 * variant_score
-        + 0.20 * dir_score
-        + 0.10 * config_score
-}
-
-fn merge_similar_clusters(clusters: &mut HashMap<String, TokenCluster>) {
-    let keys: Vec<String> = clusters.keys().cloned().collect();
-    let mut merge_map: HashMap<String, String> = HashMap::new();
-
-    for i in 0..keys.len() {
-        for j in (i + 1)..keys.len() {
-            if merge_map.contains_key(&keys[j]) {
-                continue;
-            }
-            let dist = strsim::levenshtein(&keys[i], &keys[j]);
-            if dist <= 1 {
-                let size_i = clusters
-                    .get(&keys[i])
-                    .map(|c| c.total_occurrences)
-                    .unwrap_or(0);
-                let size_j = clusters
-                    .get(&keys[j])
-                    .map(|c| c.total_occurrences)
-                    .unwrap_or(0);
-                if size_i >= size_j {
-                    merge_map.insert(keys[j].clone(), keys[i].clone());
-                } else {
-                    merge_map.insert(keys[i].clone(), keys[j].clone());
-                }
-            }
-        }
-    }
-
-    // Resolve merge chains: if A→B and B→C, then A→C
-    // Use a visited set to guard against cycles.
-    let resolved: HashMap<String, String> = merge_map
-        .keys()
-        .map(|k| {
-            let mut target = merge_map[k].clone();
-            let mut seen = HashSet::new();
-            seen.insert(k.clone());
-            while let Some(next) = merge_map.get(&target) {
-                if !seen.insert(next.clone()) {
-                    break;
-                }
-                target = next.clone();
-            }
-            (k.clone(), target)
-        })
-        .collect();
-
-    for (from, to) in &resolved {
-        if let Some(removed) = clusters.remove(from) {
-            if let Some(target) = clusters.get_mut(to) {
-                for lit in removed.literals {
-                    if !target.literals.contains(&lit) {
-                        target.literals.push(lit);
-                    }
-                }
-                target.total_occurrences += removed.total_occurrences;
-                if removed.file_count > target.file_count {
-                    target.file_count = removed.file_count;
-                }
-                target.matches_dir_name = target.matches_dir_name || removed.matches_dir_name;
-                target.in_config_value = target.in_config_value || removed.in_config_value;
-            }
-        }
-    }
-}
-
-fn suggest_variable_name(words: &[String], _key: &str) -> String {
-    if words.len() <= 3 {
-        words.join("_")
-    } else {
-        // Truncate long names
-        words[..3].join("_")
-    }
-}
-
-// ── Noise filtering ──────────────────────────────────────────────────────
-
-fn is_noise_token(token: &str, words: &[String]) -> bool {
-    let lower = token.to_lowercase();
-
-    // Too short
-    if lower.len() < 3 {
-        return true;
-    }
-
-    // Language keywords
-    if LANGUAGE_KEYWORDS.contains(&lower.as_str()) {
-        return true;
-    }
-
-    // Common library names
-    if COMMON_LIBRARIES.contains(&lower.as_str()) {
-        return true;
-    }
-
-    // Stopwords (individual words)
-    if words.len() == 1 && STOPWORDS.contains(&lower.as_str()) {
-        return true;
-    }
-
-    // All words are stopwords, file-format words, or very short
-    if words.iter().all(|w| {
-        w.len() < 3 || STOPWORDS.contains(&w.as_str()) || FILE_FORMAT_WORDS.contains(&w.as_str())
-    }) {
-        return true;
-    }
-
-    false
-}
-
-const FILE_FORMAT_WORDS: &[&str] = &[
-    "toml", "json", "yaml", "yml", "xml", "csv", "html", "css", "md", "txt", "log", "cfg", "ini",
-    "env", "lock", "mod", "rs", "js", "ts", "py", "go", "rb", "java", "kt", "swift", "cpp", "hpp",
-    "vue", "jsx", "tsx",
-];
-
-const LANGUAGE_KEYWORDS: &[&str] = &[
-    // Rust
-    "async",
-    "await",
-    "break",
-    "const",
-    "continue",
-    "crate",
-    "dyn",
-    "else",
-    "enum",
-    "extern",
-    "false",
-    "fn",
-    "for",
-    "if",
-    "impl",
-    "in",
-    "let",
-    "loop",
-    "match",
-    "mod",
-    "move",
-    "mut",
-    "pub",
-    "ref",
-    "return",
-    "self",
-    "static",
-    "struct",
-    "super",
-    "trait",
-    "true",
-    "type",
-    "unsafe",
-    "use",
-    "where",
-    "while",
-    "yield",
-    // JS/TS
-    "abstract",
-    "arguments",
-    "boolean",
-    "byte",
-    "case",
-    "catch",
-    "char",
-    "class",
-    "debugger",
-    "default",
-    "delete",
-    "do",
-    "double",
-    "eval",
-    "export",
-    "extends",
-    "final",
-    "finally",
-    "float",
-    "function",
-    "goto",
-    "implements",
-    "import",
-    "instanceof",
-    "int",
-    "interface",
-    "long",
-    "native",
-    "new",
-    "null",
-    "package",
-    "private",
-    "protected",
-    "public",
-    "short",
-    "switch",
-    "synchronized",
-    "this",
-    "throw",
-    "throws",
-    "transient",
-    "try",
-    "typeof",
-    "undefined",
-    "var",
-    "void",
-    "volatile",
-    "with",
-    // Python
-    "and",
-    "as",
-    "assert",
-    "class",
-    "def",
-    "del",
-    "elif",
-    "except",
-    "exec",
-    "from",
-    "global",
-    "is",
-    "lambda",
-    "nonlocal",
-    "not",
-    "or",
-    "pass",
-    "print",
-    "raise",
-    "with",
-    "yield",
-    // Go
-    "chan",
-    "defer",
-    "fallthrough",
-    "go",
-    "goroutine",
-    "interface",
-    "map",
-    "range",
-    "select",
-    "func",
-];
-
-const COMMON_LIBRARIES: &[&str] = &[
-    "react",
-    "redux",
-    "webpack",
-    "babel",
-    "eslint",
-    "prettier",
-    "jest",
-    "mocha",
-    "chai",
-    "express",
-    "fastify",
-    "next",
-    "nuxt",
-    "vue",
-    "angular",
-    "svelte",
-    "serde",
-    "tokio",
-    "actix",
-    "axum",
-    "clap",
-    "anyhow",
-    "thiserror",
-    "tracing",
-    "reqwest",
-    "hyper",
-    "warp",
-    "rocket",
-    "diesel",
-    "sqlx",
-    "django",
-    "flask",
-    "fastapi",
-    "pytest",
-    "numpy",
-    "pandas",
-    "scipy",
-    "spring",
-    "hibernate",
-    "junit",
-    "maven",
-    "gradle",
-    "gin",
-    "echo",
-    "fiber",
-    "gorm",
-    "lodash",
-    "axios",
-    "moment",
-    "dayjs",
-    "ramda",
-    "underscore",
-    "tailwind",
-    "bootstrap",
-    "material",
-    "typescript",
-    "javascript",
-    "python",
-    "golang",
-    "rustlang",
-];
-
-const STOPWORDS: &[&str] = &[
-    // English stopwords
-    "the",
-    "and",
-    "for",
-    "are",
-    "but",
-    "not",
-    "you",
-    "all",
-    "can",
-    "had",
-    "her",
-    "was",
-    "one",
-    "our",
-    "out",
-    "get",
-    "set",
-    "has",
-    "his",
-    "how",
-    "its",
-    "let",
-    "may",
-    "new",
-    "now",
-    "old",
-    "see",
-    "way",
-    "who",
-    "did",
-    "got",
-    "has",
-    "him",
-    "into",
-    "just",
-    "like",
-    "make",
-    "many",
-    "some",
-    "than",
-    "them",
-    "then",
-    "very",
-    "when",
-    "with",
-    "have",
-    "from",
-    "been",
-    "also",
-    "each",
-    "that",
-    "this",
-    "will",
-    "your",
-    "what",
-    "which",
-    "their",
-    "about",
-    "would",
-    "there",
-    "could",
-    "other",
-    "after",
-    "first",
-    "these",
-    "those",
-    "being",
-    "where",
-    "should",
-    "because",
-    // Short generic words common in code identifiers
-    "my",
-    "no",
-    "is",
-    "on",
-    "in",
-    "to",
-    "by",
-    "do",
-    "up",
-    "so",
-    "or",
-    "app",
-    "run",
-    "dry",
-    "log",
-    "cmd",
-    "arg",
-    "env",
-    "dir",
-    "key",
-    "map",
-    "max",
-    "min",
-    "raw",
-    "ref",
-    "src",
-    "str",
-    "tmp",
-    "url",
-    "var",
-    "buf",
-    "msg",
-    "req",
-    "res",
-    "err",
-    "pkg",
-    "lib",
-    "bin",
-    "fmt",
-    "ctx",
-    "cfg",
-    "opt",
-    "val",
-    "idx",
-    "len",
-    "ptr",
-    "num",
-    "std",
-    "gen",
-    "pre",
-    "sub",
-    // Programming type/concept words
-    "string",
-    "number",
-    "bool",
-    "boolean",
-    "array",
-    "object",
-    "value",
-    "result",
-    "error",
-    "option",
-    "none",
-    "some",
-    "true",
-    "false",
-    "null",
-    "undefined",
-    "file",
-    "path",
-    "name",
-    "type",
-    "data",
-    "info",
-    "list",
-    "item",
-    "node",
-    "index",
-    "count",
-    "size",
-    "length",
-    "config",
-    "settings",
-    "options",
-    "input",
-    "output",
-    "source",
-    "target",
-    "test",
-    "main",
-    "init",
-    "setup",
-    "todo",
-    "fixme",
-    "hack",
-    "note",
-    "warning",
-    "debug",
-    "trace",
-    "level",
-    "mode",
-    "flag",
-    "status",
-    "state",
-    "cache",
-    "hook",
-    "hooks",
-];
-
 // ── Helpers ──────────────────────────────────────────────────────────────
 
 pub fn count_occurrences(value: &str, scan_result: &ScanResult) -> (usize, usize) {
@@ -1527,11 +981,10 @@ mod tests {
         ]);
 
         let covered = HashSet::new();
-        let config_vals = HashSet::new();
-        let candidates = detect_frequency(&scan, &covered, &config_vals, "");
+        let candidates = detect_frequency(&scan, &covered);
 
         assert!(!candidates.is_empty());
-        // Should find "data-pipeline" cluster
+        // Should find "data-pipeline" cluster (multi-variant)
         let found = candidates.iter().any(|c| {
             let words = split_into_words(&c.value);
             words == vec!["data", "pipeline"]
@@ -1543,36 +996,12 @@ mod tests {
         );
     }
 
-    #[test]
-    fn test_frequency_filters_keywords() {
-        let scan = make_scan_result(vec![
-            ("a.rs", "fn async_handler() {}"),
-            ("b.rs", "fn async_handler() {}"),
-            ("c.rs", "fn async_handler() {}"),
-        ]);
-
-        let covered = HashSet::new();
-        let config_vals = HashSet::new();
-        let candidates = detect_frequency(&scan, &covered, &config_vals, "");
-
-        // "async" alone should be filtered
-        for c in &candidates {
-            let lower = c.value.to_lowercase();
-            assert!(
-                !LANGUAGE_KEYWORDS.contains(&lower.as_str())
-                    || c.value.contains('-')
-                    || c.value.contains('_')
-            );
-        }
-    }
-
     #[test]
     fn test_frequency_filters_short_tokens() {
         let scan = make_scan_result(vec![("a.txt", "ab cd ef gh"), ("b.txt", "ab cd ef gh")]);
 
         let covered = HashSet::new();
-        let config_vals = HashSet::new();
-        let candidates = detect_frequency(&scan, &covered, &config_vals, "");
+        let candidates = detect_frequency(&scan, &covered);
 
         assert!(candidates.is_empty(), "short tokens should be filtered");
     }
@@ -1587,8 +1016,7 @@ mod tests {
 
         let mut covered = HashSet::new();
         covered.insert("my-widget".to_string());
-        let config_vals = HashSet::new();
-        let candidates = detect_frequency(&scan, &covered, &config_vals, "");
+        let candidates = detect_frequency(&scan, &covered);
 
         let has_widget = candidates
             .iter()
@@ -1597,83 +1025,22 @@ mod tests {
     }
 
     #[test]
-    fn test_score_cluster_multi_variant_boost() {
-        let single_variant = TokenCluster {
-            normalized: vec!["my".into(), "app".into()],
-            literals: vec!["my-app".into()],
-            total_occurrences: 10,
-            file_count: 5,
-            matches_dir_name: false,
-            in_config_value: false,
-        };
-
-        let multi_variant = TokenCluster {
-            normalized: vec!["my".into(), "app".into()],
-            literals: vec!["my-app".into(), "my_app".into(), "MyApp".into()],
-            total_occurrences: 10,
-            file_count: 5,
-            matches_dir_name: false,
-            in_config_value: false,
-        };
-
-        assert!(score_cluster(&multi_variant) > score_cluster(&single_variant));
-    }
-
-    #[test]
-    fn test_score_cluster_dir_name_boost() {
-        let no_dir = TokenCluster {
-            normalized: vec!["my".into(), "app".into()],
-            literals: vec!["my-app".into()],
-            total_occurrences: 5,
-            file_count: 3,
-            matches_dir_name: false,
-            in_config_value: false,
-        };
-
-        let with_dir = TokenCluster {
-            normalized: vec!["my".into(), "app".into()],
-            literals: vec!["my-app".into()],
-            total_occurrences: 5,
-            file_count: 3,
-            matches_dir_name: true,
-            in_config_value: false,
-        };
+    fn test_frequency_requires_multi_variant() {
+        // Single variant only — should NOT be detected even with many occurrences
+        let scan = make_scan_result(vec![
+            ("a.txt", "async_handler async_handler async_handler"),
+            ("b.txt", "async_handler async_handler"),
+            ("c.txt", "async_handler"),
+        ]);
 
-        assert!(score_cluster(&with_dir) > score_cluster(&no_dir));
-    }
+        let covered = HashSet::new();
+        let candidates = detect_frequency(&scan, &covered);
 
-    #[test]
-    fn test_levenshtein_merging() {
-        let mut clusters = HashMap::new();
-        clusters.insert(
-            "data pipeline".to_string(),
-            TokenCluster {
-                normalized: vec!["data".into(), "pipeline".into()],
-                literals: vec!["data-pipeline".into()],
-                total_occurrences: 10,
-                file_count: 5,
-                matches_dir_name: false,
-                in_config_value: false,
-            },
-        );
-        clusters.insert(
-            "data pipelin".to_string(), // typo / near miss
-            TokenCluster {
-                normalized: vec!["data".into(), "pipelin".into()],
-                literals: vec!["data-pipelin".into()],
-                total_occurrences: 2,
-                file_count: 1,
-                matches_dir_name: false,
-                in_config_value: false,
-            },
+        assert!(
+            candidates.is_empty(),
+            "single-variant tokens should be filtered, got: {:?}",
+            candidates
         );
-
-        merge_similar_clusters(&mut clusters);
-
-        // Should merge into one cluster
-        assert_eq!(clusters.len(), 1);
-        let remaining = clusters.values().next().unwrap();
-        assert_eq!(remaining.total_occurrences, 12);
     }
 
     // ── Helper tests ─────────────────────────────────────────────────
@@ -1737,21 +1104,6 @@ mod tests {
         );
     }
 
-    #[test]
-    fn test_suggest_variable_name() {
-        assert_eq!(
-            suggest_variable_name(&["my".into(), "app".into()], "my app"),
-            "my_app"
-        );
-        assert_eq!(
-            suggest_variable_name(
-                &["very".into(), "long".into(), "name".into(), "here".into()],
-                "very long name here"
-            ),
-            "very_long_name"
-        );
-    }
-
     #[test]
     fn test_strip_npm_scope() {
         assert_eq!(strip_npm_scope("@myorg/cool-widget"), "cool-widget");
diff --git a/src/extract/interactive.rs b/src/extract/interactive.rs
new file mode 100644
index 0000000..6b6c55d
--- /dev/null
+++ b/src/extract/interactive.rs
@@ -0,0 +1,411 @@
+use std::collections::BTreeMap;
+
+use console::style;
+use inquire::{Confirm, Select, Text};
+
+use crate::config::schema::DEFAULT_TEMPLATES_SUFFIX;
+use crate::error::{DicecutError, Result};
+
+use super::auto_detect::DetectedCandidate;
+use super::conditional::DetectedConditional;
+use super::variants::generate_variants;
+use super::{ExtractVariable, PlannedExtractFile};
+
+pub fn confirm_variants_interactive(
+    variables: Vec<ExtractVariable>,
+) -> Result<Vec<ExtractVariable>> {
+    let mut confirmed = Vec::new();
+
+    for mut var in variables {
+        eprintln!(
+            "\n{} {} = {:?} {}",
+            style("──").dim(),
+            style(&var.name).bold(),
+            var.value,
+            style("──────────────────────────────────────").dim()
+        );
+
+        if var.variants.len() == 1 && var.variants[0].name == "verbatim" {
+            // Simple value — just show occurrence count
+            let (file_count, total_hits) = var
+                .occurrence_counts
+                .first()
+                .map(|(_, fc, th)| (*fc, *th))
+                .unwrap_or((0, 0));
+            if total_hits > 0 {
+                eprintln!(
+                    "  Found in {} files ({} occurrences)",
+                    file_count, total_hits
+                );
+            } else {
+                eprintln!(
+                    "  {} Value not found in any file (will still be added to config)",
+                    style("⚠").yellow()
+                );
+            }
+            confirmed.push(var);
+            continue;
+        }
+
+        // Show detected variants with counts
+        eprintln!("  Detected case variants:");
+        let mut found_any = false;
+        for (i, variant) in var.variants.iter().enumerate() {
+            let (_, file_count, total_hits) = &var.occurrence_counts[i];
+            let mark = if *total_hits > 0 {
+                found_any = true;
+                style("✓").green().to_string()
+            } else {
+                style("✗").dim().to_string()
+            };
+            let hits_str = if *total_hits > 0 {
+                format!(
+                    "{} {} across {} {}",
+                    total_hits,
+                    if *total_hits == 1 { "hit" } else { "hits" },
+                    file_count,
+                    if *file_count == 1 { "file" } else { "files" }
+                )
+            } else {
+                "not found".to_string()
+            };
+            eprintln!(
+                "    {} {:<16} {:<20} {}",
+                mark,
+                variant.literal,
+                variant.name,
+                style(&hits_str).dim()
+            );
+        }
+
+        if !found_any {
+            eprintln!(
+                "  {} No occurrences found for any variant (will still be added to config)",
+                style("⚠").yellow()
+            );
+            // Keep just the first variant
+            var.variants.truncate(1);
+            confirmed.push(var);
+            continue;
+        }
+
+        let keep = Confirm::new("Keep detected variants?")
+            .with_default(true)
+            .prompt()
+            .map_err(|_| DicecutError::PromptCancelled)?;
+
+        if keep {
+            // Remove variants with zero occurrences
+            let counts = var.occurrence_counts.clone();
+            var.variants.retain(|v| {
+                counts
+                    .iter()
+                    .any(|(name, _, hits)| name == v.name && *hits > 0)
+            });
+            if var.variants.is_empty() {
+                let all = generate_variants(&var.name, &var.value);
+                if let Some(first) = all.into_iter().next() {
+                    var.variants.push(first);
+                }
+            }
+        } else {
+            // Keep only the canonical variant
+            var.variants.truncate(1);
+        }
+
+        confirmed.push(var);
+    }
+
+    Ok(confirmed)
+}
+
+pub fn confirm_excludes_interactive(mut excludes: Vec<String>) -> Result<Vec<String>> {
+    eprintln!(
+        "\n{} Excludes {}",
+        style("──").dim(),
+        style("─────────────────────────────────────────────").dim()
+    );
+    if excludes.is_empty() {
+        eprintln!("  No exclude patterns needed for this template.");
+    } else {
+        eprintln!("  Patterns matching template files:");
+        for e in &excludes {
+            eprintln!("    {}", e);
+        }
+    }
+
+    let extra = Text::new("Add extra exclude patterns? (comma-separated, enter to skip)")
+        .with_default("")
+        .prompt()
+        .map_err(|_| DicecutError::PromptCancelled)?;
+
+    if !extra.is_empty() {
+        for pattern in extra.split(',') {
+            let trimmed = pattern.trim().to_string();
+            if !trimmed.is_empty() {
+                excludes.push(trimmed);
+            }
+        }
+    }
+
+    Ok(excludes)
+}
+
+pub fn confirm_conditionals_interactive(
+    detected: Vec<DetectedConditional>,
+) -> Result<Vec<DetectedConditional>> {
+    eprintln!(
+        "\n{} Conditional files {}",
+        style("──").dim(),
+        style("────────────────────────────────────").dim()
+    );
+    eprintln!("  These look optional. Make them conditional?");
+
+    let mut confirmed = Vec::new();
+    for cond in detected {
+        let prompt = format!("  {} → {}", cond.pattern, cond.variable);
+        let include = Confirm::new(&prompt)
+            .with_default(false)
+            .prompt()
+            .map_err(|_| DicecutError::PromptCancelled)?;
+
+        if include {
+            confirmed.push(cond);
+        }
+    }
+
+    Ok(confirmed)
+}
+
+pub fn resolve_candidates_yes(
+    candidates: &[DetectedCandidate],
+    explicit_vars: &[(String, String)],
+) -> Vec<(String, String)> {
+    eprintln!(
+        "\n{} Auto-detected variables {}",
+        style("──").dim(),
+        style("──────────────────────────────────").dim()
+    );
+
+    // Group candidates by suggested_name
+    let mut groups: BTreeMap<String, Vec<&DetectedCandidate>> = BTreeMap::new();
+    for c in candidates {
+        groups.entry(c.suggested_name.clone()).or_default().push(c);
+    }
+
+    let mut result = Vec::new();
+
+    for (name, mut group) in groups {
+        // Skip names already covered by explicit --var
+        if explicit_vars.iter().any(|(n, _)| n == &name) {
+            eprintln!(
+                "  {} {} (explicit --var, skipping auto-detect)",
+                style("·").dim(),
+                style(&name).dim()
+            );
+            continue;
+        }
+
+        // For name collisions, pick highest confidence
+        group.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
+        let winner = group[0];
+
+        eprintln!(
+            "  {} {} = {:?} ({:.0}% confidence, {})",
+            style("✓").green(),
+            style(&winner.suggested_name).bold(),
+            winner.value,
+            winner.confidence * 100.0,
+            winner.tier
+        );
+        eprintln!("    {}", style(&winner.reason).dim());
+
+        if group.len() > 1 {
+            eprintln!(
+                "    {} {} other candidates for this name (picked highest confidence)",
+                style("⚠").yellow(),
+                group.len() - 1
+            );
+        }
+
+        result.push((winner.suggested_name.clone(), winner.value.clone()));
+    }
+
+    result
+}
+
+pub fn confirm_auto_detected_interactive(
+    candidates: Vec<DetectedCandidate>,
+    explicit_vars: &[(String, String)],
+) -> Result<Vec<(String, String)>> {
+    eprintln!(
+        "\n{} Auto-detected variables {}",
+        style("──").dim(),
+        style("──────────────────────────────────").dim()
+    );
+
+    // Group candidates by suggested_name
+    let mut groups: BTreeMap<String, Vec<DetectedCandidate>> = BTreeMap::new();
+    for c in candidates {
+        groups.entry(c.suggested_name.clone()).or_default().push(c);
+    }
+
+    let mut accepted = Vec::new();
+
+    for (name, mut group) in groups {
+        // Skip names already covered by explicit --var
+        if explicit_vars.iter().any(|(n, _)| n == &name) {
+            eprintln!(
+                "\n  {} {} (provided via --var, skipping)",
+                style("·").dim(),
+                style(&name).dim()
+            );
+            continue;
+        }
+
+        // Sort by confidence descending
+        group.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
+
+        if group.len() == 1 {
+            // Single candidate — simple confirm
+            let candidate = &group[0];
+            eprintln!(
+                "\n  {} = {:?} ({:.0}% confidence, {})",
+                style(&candidate.suggested_name).bold(),
+                candidate.value,
+                candidate.confidence * 100.0,
+                candidate.tier
+            );
+            eprintln!("    {}", style(&candidate.reason).dim());
+            if candidate.total_occurrences > 0 {
+                eprintln!(
+                    "    {} occurrences across {} files",
+                    candidate.total_occurrences, candidate.file_count
+                );
+            }
+
+            let accept = Confirm::new(&format!("Accept \"{}\"?", candidate.suggested_name))
+                .with_default(true)
+                .prompt()
+                .map_err(|_| DicecutError::PromptCancelled)?;
+
+            if accept {
+                accepted.push((candidate.suggested_name.clone(), candidate.value.clone()));
+            }
+        } else {
+            // Name collision — show selection prompt
+            eprintln!(
+                "\n  {} Multiple candidates for {}:",
+                style("⚠").yellow(),
+                style(&name).bold()
+            );
+
+            let mut options: Vec<String> = group
+                .iter()
+                .map(|c| {
+                    format!(
+                        "{:?} ({:.0}% confidence, {})",
+                        c.value,
+                        c.confidence * 100.0,
+                        c.tier
+                    )
+                })
+                .collect();
+            options.push("Skip".to_string());
+
+            let selection = Select::new(&format!("Which value for \"{}\"?", name), options)
+                .prompt()
+                .map_err(|_| DicecutError::PromptCancelled)?;
+
+            if selection != "Skip" {
+                // Find the matching candidate
+                if let Some(chosen) = group.iter().find(|c| {
+                    format!(
+                        "{:?} ({:.0}% confidence, {})",
+                        c.value,
+                        c.confidence * 100.0,
+                        c.tier
+                    ) == selection
+                }) {
+                    accepted.push((chosen.suggested_name.clone(), chosen.value.clone()));
+                }
+            }
+        }
+    }
+
+    Ok(accepted)
+}
+
+pub fn confirm_files_interactive(files: &[PlannedExtractFile], dropped_count: usize) -> Result<()> {
+    let templated: Vec<_> = files.iter().filter(|f| f.has_replacements()).collect();
+    let boilerplate: Vec<_> = files
+        .iter()
+        .filter(|f| !f.has_replacements() && !f.stubbed && !f.is_binary())
+        .collect();
+    let stubbed: Vec<_> = files.iter().filter(|f| f.stubbed).collect();
+    let binary_count = files.iter().filter(|f| f.is_binary()).count();
+
+    eprintln!(
+        "\n{} File plan {}",
+        style("──").dim(),
+        style("──────────────────────────────────────────").dim()
+    );
+
+    // Templated files
+    eprintln!(
+        "\n  {} ({} files, {} suffix):",
+        style("Templated").bold(),
+        templated.len(),
+        DEFAULT_TEMPLATES_SUFFIX
+    );
+    for file in &templated {
+        eprintln!(
+            "    {:<50} {} replacements",
+            file.template_path.display(),
+            file.replacement_count()
+        );
+    }
+
+    // Boilerplate files
+    eprintln!(
+        "\n  {} (copied in full, {} files{}):",
+        style("Boilerplate").bold(),
+        boilerplate.len() + binary_count,
+        if binary_count > 0 {
+            format!(", {} binary", binary_count)
+        } else {
+            String::new()
+        }
+    );
+    for file in &boilerplate {
+        eprintln!("    {}", file.template_path.display());
+    }
+
+    // Stubbed files
+    if !stubbed.is_empty() {
+        eprintln!(
+            "\n  {} (structure only, {} files):",
+            style("Stubbed").bold(),
+            stubbed.len()
+        );
+        for file in &stubbed {
+            eprintln!("    {}", file.template_path.display());
+        }
+    }
+
+    // Dropped files
+    if dropped_count > 0 {
+        eprintln!("\n  {} ({} files):", style("Dropped").bold(), dropped_count);
+    }
+
+    let proceed = Confirm::new("Proceed?")
+        .with_default(true)
+        .prompt()
+        .map_err(|_| DicecutError::PromptCancelled)?;
+
+    if !proceed {
+        return Err(DicecutError::PromptCancelled);
+    }
+
+    Ok(())
+}
diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index c9a6e34..a3b524d 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -2,26 +2,31 @@ pub mod auto_detect;
 pub mod conditional;
 pub mod config_gen;
 pub mod exclude;
+pub mod interactive;
 pub mod replace;
 pub mod scan;
 pub mod stub;
 pub mod variants;
 
-use std::collections::{BTreeMap, HashMap};
+use std::collections::HashMap;
 use std::path::{Path, PathBuf};
 
 use console::style;
-use inquire::{Confirm, Select, Text};
 
 use crate::config::schema::DEFAULT_TEMPLATES_SUFFIX;
 use crate::error::{DicecutError, Result};
 
-use self::auto_detect::{auto_detect, count_occurrences, DetectedCandidate};
-use self::conditional::{detect_conditional_files, patterns_for_variable, DetectedConditional};
+use self::auto_detect::{auto_detect, count_occurrences};
+use self::conditional::{detect_conditional_files, patterns_for_variable};
 use self::config_gen::{
     generate_config_toml, ComputedVariable, ConditionalEntry, ConfigGenOptions, PromptedVariable,
 };
 use self::exclude::{all_default_excludes, detect_copy_without_render, relevant_config_excludes};
+use self::interactive::{
+    confirm_auto_detected_interactive, confirm_conditionals_interactive,
+    confirm_excludes_interactive, confirm_files_interactive, confirm_variants_interactive,
+    resolve_candidates_yes,
+};
 use self::replace::{
     apply_path_replacements, apply_replacements, build_replacement_rules, ReplacementRule,
 };
@@ -561,402 +566,3 @@ pub fn execute_extraction(plan: &ExtractionPlan, _in_place: bool) -> Result<()>
 
     Ok(())
 }
-
-// ── Interactive helpers ──────────────────────────────────────────────────
-
-fn confirm_variants_interactive(variables: Vec<ExtractVariable>) -> Result<Vec<ExtractVariable>> {
-    let mut confirmed = Vec::new();
-
-    for mut var in variables {
-        eprintln!(
-            "\n{} {} = {:?} {}",
-            style("──").dim(),
-            style(&var.name).bold(),
-            var.value,
-            style("──────────────────────────────────────").dim()
-        );
-
-        if var.variants.len() == 1 && var.variants[0].name == "verbatim" {
-            // Simple value — just show occurrence count
-            let (file_count, total_hits) = var
-                .occurrence_counts
-                .first()
-                .map(|(_, fc, th)| (*fc, *th))
-                .unwrap_or((0, 0));
-            if total_hits > 0 {
-                eprintln!(
-                    "  Found in {} files ({} occurrences)",
-                    file_count, total_hits
-                );
-            } else {
-                eprintln!(
-                    "  {} Value not found in any file (will still be added to config)",
-                    style("⚠").yellow()
-                );
-            }
-            confirmed.push(var);
-            continue;
-        }
-
-        // Show detected variants with counts
-        eprintln!("  Detected case variants:");
-        let mut found_any = false;
-        for (i, variant) in var.variants.iter().enumerate() {
-            let (_, file_count, total_hits) = &var.occurrence_counts[i];
-            let mark = if *total_hits > 0 {
-                found_any = true;
-                style("✓").green().to_string()
-            } else {
-                style("✗").dim().to_string()
-            };
-            let hits_str = if *total_hits > 0 {
-                format!(
-                    "{} {} across {} {}",
-                    total_hits,
-                    if *total_hits == 1 { "hit" } else { "hits" },
-                    file_count,
-                    if *file_count == 1 { "file" } else { "files" }
-                )
-            } else {
-                "not found".to_string()
-            };
-            eprintln!(
-                "    {} {:<16} {:<20} {}",
-                mark,
-                variant.literal,
-                variant.name,
-                style(&hits_str).dim()
-            );
-        }
-
-        if !found_any {
-            eprintln!(
-                "  {} No occurrences found for any variant (will still be added to config)",
-                style("⚠").yellow()
-            );
-            // Keep just the first variant
-            var.variants.truncate(1);
-            confirmed.push(var);
-            continue;
-        }
-
-        let keep = Confirm::new("Keep detected variants?")
-            .with_default(true)
-            .prompt()
-            .map_err(|_| DicecutError::PromptCancelled)?;
-
-        if keep {
-            // Remove variants with zero occurrences
-            let counts = var.occurrence_counts.clone();
-            var.variants.retain(|v| {
-                counts
-                    .iter()
-                    .any(|(name, _, hits)| name == v.name && *hits > 0)
-            });
-            if var.variants.is_empty() {
-                let all = generate_variants(&var.name, &var.value);
-                if let Some(first) = all.into_iter().next() {
-                    var.variants.push(first);
-                }
-            }
-        } else {
-            // Keep only the canonical variant
-            var.variants.truncate(1);
-        }
-
-        confirmed.push(var);
-    }
-
-    Ok(confirmed)
-}
-
-fn confirm_excludes_interactive(mut excludes: Vec<String>) -> Result<Vec<String>> {
-    eprintln!(
-        "\n{} Excludes {}",
-        style("──").dim(),
-        style("─────────────────────────────────────────────").dim()
-    );
-    if excludes.is_empty() {
-        eprintln!("  No exclude patterns needed for this template.");
-    } else {
-        eprintln!("  Patterns matching template files:");
-        for e in &excludes {
-            eprintln!("    {}", e);
-        }
-    }
-
-    let extra = Text::new("Add extra exclude patterns? (comma-separated, enter to skip)")
-        .with_default("")
-        .prompt()
-        .map_err(|_| DicecutError::PromptCancelled)?;
-
-    if !extra.is_empty() {
-        for pattern in extra.split(',') {
-            let trimmed = pattern.trim().to_string();
-            if !trimmed.is_empty() {
-                excludes.push(trimmed);
-            }
-        }
-    }
-
-    Ok(excludes)
-}
-
-fn confirm_conditionals_interactive(
-    detected: Vec<DetectedConditional>,
-) -> Result<Vec<DetectedConditional>> {
-    eprintln!(
-        "\n{} Conditional files {}",
-        style("──").dim(),
-        style("────────────────────────────────────").dim()
-    );
-    eprintln!("  These look optional. Make them conditional?");
-
-    let mut confirmed = Vec::new();
-    for cond in detected {
-        let prompt = format!("  {} → {}", cond.pattern, cond.variable);
-        let include = Confirm::new(&prompt)
-            .with_default(false)
-            .prompt()
-            .map_err(|_| DicecutError::PromptCancelled)?;
-
-        if include {
-            confirmed.push(cond);
-        }
-    }
-
-    Ok(confirmed)
-}
-
-fn resolve_candidates_yes(
-    candidates: &[DetectedCandidate],
-    explicit_vars: &[(String, String)],
-) -> Vec<(String, String)> {
-    eprintln!(
-        "\n{} Auto-detected variables {}",
-        style("──").dim(),
-        style("──────────────────────────────────").dim()
-    );
-
-    // Group candidates by suggested_name
-    let mut groups: BTreeMap<String, Vec<&DetectedCandidate>> = BTreeMap::new();
-    for c in candidates {
-        groups.entry(c.suggested_name.clone()).or_default().push(c);
-    }
-
-    let mut result = Vec::new();
-
-    for (name, mut group) in groups {
-        // Skip names already covered by explicit --var
-        if explicit_vars.iter().any(|(n, _)| n == &name) {
-            eprintln!(
-                "  {} {} (explicit --var, skipping auto-detect)",
-                style("·").dim(),
-                style(&name).dim()
-            );
-            continue;
-        }
-
-        // For name collisions, pick highest confidence
-        group.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
-        let winner = group[0];
-
-        eprintln!(
-            "  {} {} = {:?} ({:.0}% confidence, {})",
-            style("✓").green(),
-            style(&winner.suggested_name).bold(),
-            winner.value,
-            winner.confidence * 100.0,
-            winner.tier
-        );
-        eprintln!("    {}", style(&winner.reason).dim());
-
-        if group.len() > 1 {
-            eprintln!(
-                "    {} {} other candidates for this name (picked highest confidence)",
-                style("⚠").yellow(),
-                group.len() - 1
-            );
-        }
-
-        result.push((winner.suggested_name.clone(), winner.value.clone()));
-    }
-
-    result
-}
-
-fn confirm_auto_detected_interactive(
-    candidates: Vec<DetectedCandidate>,
-    explicit_vars: &[(String, String)],
-) -> Result<Vec<(String, String)>> {
-    eprintln!(
-        "\n{} Auto-detected variables {}",
-        style("──").dim(),
-        style("──────────────────────────────────").dim()
-    );
-
-    // Group candidates by suggested_name
-    let mut groups: BTreeMap<String, Vec<DetectedCandidate>> = BTreeMap::new();
-    for c in candidates {
-        groups.entry(c.suggested_name.clone()).or_default().push(c);
-    }
-
-    let mut accepted = Vec::new();
-
-    for (name, mut group) in groups {
-        // Skip names already covered by explicit --var
-        if explicit_vars.iter().any(|(n, _)| n == &name) {
-            eprintln!(
-                "\n  {} {} (provided via --var, skipping)",
-                style("·").dim(),
-                style(&name).dim()
-            );
-            continue;
-        }
-
-        // Sort by confidence descending
-        group.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
-
-        if group.len() == 1 {
-            // Single candidate — simple confirm
-            let candidate = &group[0];
-            eprintln!(
-                "\n  {} = {:?} ({:.0}% confidence, {})",
-                style(&candidate.suggested_name).bold(),
-                candidate.value,
-                candidate.confidence * 100.0,
-                candidate.tier
-            );
-            eprintln!("    {}", style(&candidate.reason).dim());
-            if candidate.total_occurrences > 0 {
-                eprintln!(
-                    "    {} occurrences across {} files",
-                    candidate.total_occurrences, candidate.file_count
-                );
-            }
-
-            let accept = Confirm::new(&format!("Accept \"{}\"?", candidate.suggested_name))
-                .with_default(true)
-                .prompt()
-                .map_err(|_| DicecutError::PromptCancelled)?;
-
-            if accept {
-                accepted.push((candidate.suggested_name.clone(), candidate.value.clone()));
-            }
-        } else {
-            // Name collision — show selection prompt
-            eprintln!(
-                "\n  {} Multiple candidates for {}:",
-                style("⚠").yellow(),
-                style(&name).bold()
-            );
-
-            let mut options: Vec<String> = group
-                .iter()
-                .map(|c| {
-                    format!(
-                        "{:?} ({:.0}% confidence, {})",
-                        c.value,
-                        c.confidence * 100.0,
-                        c.tier
-                    )
-                })
-                .collect();
-            options.push("Skip".to_string());
-
-            let selection = Select::new(&format!("Which value for \"{}\"?", name), options)
-                .prompt()
-                .map_err(|_| DicecutError::PromptCancelled)?;
-
-            if selection != "Skip" {
-                // Find the matching candidate
-                if let Some(chosen) = group.iter().find(|c| {
-                    format!(
-                        "{:?} ({:.0}% confidence, {})",
-                        c.value,
-                        c.confidence * 100.0,
-                        c.tier
-                    ) == selection
-                }) {
-                    accepted.push((chosen.suggested_name.clone(), chosen.value.clone()));
-                }
-            }
-        }
-    }
-
-    Ok(accepted)
-}
-
-fn confirm_files_interactive(files: &[PlannedExtractFile], dropped_count: usize) -> Result<()> {
-    let templated: Vec<_> = files.iter().filter(|f| f.has_replacements()).collect();
-    let boilerplate: Vec<_> = files
-        .iter()
-        .filter(|f| !f.has_replacements() && !f.stubbed && !f.is_binary())
-        .collect();
-    let stubbed: Vec<_> = files.iter().filter(|f| f.stubbed).collect();
-    let binary_count = files.iter().filter(|f| f.is_binary()).count();
-
-    eprintln!(
-        "\n{} File plan {}",
-        style("──").dim(),
-        style("──────────────────────────────────────────").dim()
-    );
-
-    // Templated files
-    eprintln!(
-        "\n  {} ({} files, {} suffix):",
-        style("Templated").bold(),
-        templated.len(),
-        DEFAULT_TEMPLATES_SUFFIX
-    );
-    for file in &templated {
-        eprintln!(
-            "    {:<50} {} replacements",
-            file.template_path.display(),
-            file.replacement_count()
-        );
-    }
-
-    // Boilerplate files
-    eprintln!(
-        "\n  {} (copied in full, {} files{}):",
-        style("Boilerplate").bold(),
-        boilerplate.len() + binary_count,
-        if binary_count > 0 {
-            format!(", {} binary", binary_count)
-        } else {
-            String::new()
-        }
-    );
-    for file in &boilerplate {
-        eprintln!("    {}", file.template_path.display());
-    }
-
-    // Stubbed files
-    if !stubbed.is_empty() {
-        eprintln!(
-            "\n  {} (structure only, {} files):",
-            style("Stubbed").bold(),
-            stubbed.len()
-        );
-        for file in &stubbed {
-            eprintln!("    {}", file.template_path.display());
-        }
-    }
-
-    // Dropped files
-    if dropped_count > 0 {
-        eprintln!("\n  {} ({} files):", style("Dropped").bold(), dropped_count);
-    }
-
-    let proceed = Confirm::new("Proceed?")
-        .with_default(true)
-        .prompt()
-        .map_err(|_| DicecutError::PromptCancelled)?;
-
-    if !proceed {
-        return Err(DicecutError::PromptCancelled);
-    }
-
-    Ok(())
-}

From 23491a81cd0757680e2f469e59aae75e20a09d5c Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Sat, 28 Feb 2026 14:44:51 -0500
Subject: [PATCH 23/29] fix(extract): apply stub-depth to templated files too

Files deeper than stub_depth were only dropped when they had 0 template
replacements. Deep files with incidental replacements (e.g. a project
name appearing in a nested reference doc) were still kept as .die
templates. Now the depth check applies regardless of replacement count.
---
 src/extract/mod.rs | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index a3b524d..b1fe890 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -314,17 +314,23 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
             let (replaced, count) = apply_replacements(content, &rules);
 
             if count > 0 {
-                // Has template replacements — keep content, add .die suffix
-                let mut p = template_path.as_os_str().to_string_lossy().to_string();
-                p.push_str(DEFAULT_TEMPLATES_SUFFIX);
-                planned_files.push(PlannedExtractFile {
-                    template_path: PathBuf::from(p),
-                    content: ExtractedContent::Text {
-                        content: replaced,
-                        replacement_count: count,
-                    },
-                    stubbed: false,
-                });
+                // Has template replacements — but still drop if too deep
+                let depth = file.relative_path.components().count();
+                if depth > options.stub_depth {
+                    dropped_count += 1;
+                    dropped_paths.push(file.relative_path.clone());
+                } else {
+                    let mut p = template_path.as_os_str().to_string_lossy().to_string();
+                    p.push_str(DEFAULT_TEMPLATES_SUFFIX);
+                    planned_files.push(PlannedExtractFile {
+                        template_path: PathBuf::from(p),
+                        content: ExtractedContent::Text {
+                            content: replaced,
+                            replacement_count: count,
+                        },
+                        stubbed: false,
+                    });
+                }
             } else {
                 // No replacements — classify as boilerplate, content, or dropped
                 match classify_file(&file.relative_path, options.stub_depth) {

From 0760e235f6643e0cea26c94152ef0dcf87b0499f Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Sat, 28 Feb 2026 16:46:58 -0500
Subject: [PATCH 24/29] fix(extract): filter deep files before auto-detect

Non-boilerplate files deeper than stub_depth are now removed from the
scan result before frequency analysis runs. This prevents detecting
variables that only appear in files that would be dropped anyway.
---
 src/extract/mod.rs | 53 ++++++++++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index b1fe890..13c2727 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -164,11 +164,28 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         "\n{}",
         style(format!("Scanning {}...", source_dir.display())).bold()
     );
-    let scan_result = scan_project(source_dir, &scan_excludes)?;
+    let mut scan_result = scan_project(source_dir, &scan_excludes)?;
+
+    // Drop non-boilerplate files deeper than stub_depth before auto-detect sees them.
+    // This prevents frequency analysis from detecting variables that only appear in
+    // files that would be dropped anyway.
+    let pre_filter_count = scan_result.files.len();
+    scan_result.files.retain(|f| {
+        let depth = f.relative_path.components().count();
+        depth <= options.stub_depth
+            || classify_file(&f.relative_path, options.stub_depth) == FileRole::Boilerplate
+    });
+    let depth_dropped = pre_filter_count - scan_result.files.len();
+
     eprintln!(
-        "  {} files found, {} excluded",
+        "  {} files found, {} excluded{}",
         scan_result.files.len(),
-        scan_result.excluded_count
+        scan_result.excluded_count,
+        if depth_dropped > 0 {
+            format!(", {} too deep", depth_dropped)
+        } else {
+            String::new()
+        }
     );
 
     // Phase 2.5: Auto-detect variables (always runs), merge with explicit --var entries
@@ -293,7 +310,7 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
 
     // Phase 9: Apply replacements to files
     let mut planned_files = Vec::new();
-    let mut dropped_count = 0;
+    let mut dropped_count = depth_dropped;
     let mut dropped_paths = Vec::new();
 
     for file in &scan_result.files {
@@ -314,23 +331,17 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
             let (replaced, count) = apply_replacements(content, &rules);
 
             if count > 0 {
-                // Has template replacements — but still drop if too deep
-                let depth = file.relative_path.components().count();
-                if depth > options.stub_depth {
-                    dropped_count += 1;
-                    dropped_paths.push(file.relative_path.clone());
-                } else {
-                    let mut p = template_path.as_os_str().to_string_lossy().to_string();
-                    p.push_str(DEFAULT_TEMPLATES_SUFFIX);
-                    planned_files.push(PlannedExtractFile {
-                        template_path: PathBuf::from(p),
-                        content: ExtractedContent::Text {
-                            content: replaced,
-                            replacement_count: count,
-                        },
-                        stubbed: false,
-                    });
-                }
+                // Has template replacements — add .die suffix
+                let mut p = template_path.as_os_str().to_string_lossy().to_string();
+                p.push_str(DEFAULT_TEMPLATES_SUFFIX);
+                planned_files.push(PlannedExtractFile {
+                    template_path: PathBuf::from(p),
+                    content: ExtractedContent::Text {
+                        content: replaced,
+                        replacement_count: count,
+                    },
+                    stubbed: false,
+                });
             } else {
                 // No replacements — classify as boilerplate, content, or dropped
                 match classify_file(&file.relative_path, options.stub_depth) {

From 2f6240490e1095c4c2df54a4f7560b9f2ed2a80e Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Sat, 28 Feb 2026 17:26:09 -0500
Subject: [PATCH 25/29] refactor: improve extraction

---
 src/extract/exclude.rs     | 58 ++++++++++++++++++++++++++++++++++++++
 src/extract/interactive.rs | 17 ++++++++++-
 src/extract/mod.rs         | 18 +++++++++++-
 3 files changed, 91 insertions(+), 2 deletions(-)

diff --git a/src/extract/exclude.rs b/src/extract/exclude.rs
index f228830..d8bac71 100644
--- a/src/extract/exclude.rs
+++ b/src/extract/exclude.rs
@@ -29,6 +29,8 @@ const DEFAULT_EXCLUDES: &[&str] = &[
     ".output",
     ".turbo",
     ".worktrees",
+    ".claude/worktrees",
+    ".astro",
     ".diecut-answers.toml",
 ];
 
@@ -119,6 +121,27 @@ pub fn detect_copy_without_render(
     found
 }
 
+/// Check if a file should be copied without rendering (lock files, binary-like assets).
+///
+/// These files are included in the template but should never have replacements
+/// applied during extraction — they're copied verbatim.
+pub fn is_copy_without_render(path: &Path) -> bool {
+    for pattern in DEFAULT_COPY_WITHOUT_RENDER {
+        if let Some(ext) = pattern.strip_prefix("*.") {
+            if let Some(file_ext) = path.extension() {
+                if file_ext.to_string_lossy().eq_ignore_ascii_case(ext) {
+                    return true;
+                }
+            }
+        } else if let Some(file_name) = path.file_name() {
+            if file_name.to_string_lossy() == *pattern {
+                return true;
+            }
+        }
+    }
+    false
+}
+
 /// Check if a path should be excluded based on the exclude patterns.
 pub fn should_exclude(relative_path: &Path, excludes: &[String]) -> bool {
     let path_str = relative_path.to_string_lossy();
@@ -232,6 +255,41 @@ mod tests {
         assert!(!relevant.contains(&"node_modules".to_string()));
     }
 
+    #[test]
+    fn test_should_exclude_claude_worktrees() {
+        let excludes = all_default_excludes();
+        assert!(should_exclude(
+            Path::new(".claude/worktrees/agent-abc/Cargo.toml"),
+            &excludes
+        ));
+        // .claude/settings.local.json should NOT be excluded
+        assert!(!should_exclude(
+            Path::new(".claude/settings.local.json"),
+            &excludes
+        ));
+    }
+
+    #[test]
+    fn test_should_exclude_astro() {
+        let excludes = all_default_excludes();
+        assert!(should_exclude(
+            Path::new("docs/.astro/data-store.json"),
+            &excludes
+        ));
+        assert!(should_exclude(Path::new(".astro/settings.json"), &excludes));
+    }
+
+    #[test]
+    fn test_is_copy_without_render() {
+        assert!(is_copy_without_render(Path::new("Cargo.lock")));
+        assert!(is_copy_without_render(Path::new("pnpm-lock.yaml")));
+        assert!(is_copy_without_render(Path::new("package-lock.json")));
+        assert!(is_copy_without_render(Path::new("logo.png")));
+        assert!(is_copy_without_render(Path::new("deep/nested/file.lock")));
+        assert!(!is_copy_without_render(Path::new("src/main.rs")));
+        assert!(!is_copy_without_render(Path::new("README.md")));
+    }
+
     #[test]
     fn test_detect_copy_without_render() {
         let files = vec![
diff --git a/src/extract/interactive.rs b/src/extract/interactive.rs
index 6b6c55d..2f9b8e9 100644
--- a/src/extract/interactive.rs
+++ b/src/extract/interactive.rs
@@ -6,7 +6,7 @@ use inquire::{Confirm, Select, Text};
 use crate::config::schema::DEFAULT_TEMPLATES_SUFFIX;
 use crate::error::{DicecutError, Result};
 
-use super::auto_detect::DetectedCandidate;
+use super::auto_detect::{ConfidenceTier, DetectedCandidate};
 use super::conditional::DetectedConditional;
 use super::variants::generate_variants;
 use super::{ExtractVariable, PlannedExtractFile};
@@ -194,6 +194,7 @@ pub fn resolve_candidates_yes(
     }
 
     let mut result = Vec::new();
+    let mut skipped_freq = 0;
 
     for (name, mut group) in groups {
         // Skip names already covered by explicit --var
@@ -210,6 +211,12 @@ pub fn resolve_candidates_yes(
         group.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
         let winner = group[0];
 
+        // Skip frequency-analysis candidates in -y mode — too noisy for auto-accept
+        if winner.tier == ConfidenceTier::FrequencyAnalysis {
+            skipped_freq += 1;
+            continue;
+        }
+
         eprintln!(
             "  {} {} = {:?} ({:.0}% confidence, {})",
             style("✓").green(),
@@ -231,6 +238,14 @@ pub fn resolve_candidates_yes(
         result.push((winner.suggested_name.clone(), winner.value.clone()));
     }
 
+    if skipped_freq > 0 {
+        eprintln!(
+            "  {} {} frequency-detected candidate(s) skipped (use interactive mode to review)",
+            style("·").dim(),
+            skipped_freq
+        );
+    }
+
     result
 }
 
diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index 13c2727..f9d1318 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -21,7 +21,10 @@ use self::conditional::{detect_conditional_files, patterns_for_variable};
 use self::config_gen::{
     generate_config_toml, ComputedVariable, ConditionalEntry, ConfigGenOptions, PromptedVariable,
 };
-use self::exclude::{all_default_excludes, detect_copy_without_render, relevant_config_excludes};
+use self::exclude::{
+    all_default_excludes, detect_copy_without_render, is_copy_without_render,
+    relevant_config_excludes,
+};
 use self::interactive::{
     confirm_auto_detected_interactive, confirm_conditionals_interactive,
     confirm_excludes_interactive, confirm_files_interactive, confirm_variants_interactive,
@@ -328,6 +331,19 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
                 stubbed: false,
             });
         } else if let Some(ref content) = file.content {
+            // Lock files and other copy-without-render files: skip replacement
+            if is_copy_without_render(&file.relative_path) {
+                planned_files.push(PlannedExtractFile {
+                    template_path,
+                    content: ExtractedContent::Text {
+                        content: content.clone(),
+                        replacement_count: 0,
+                    },
+                    stubbed: false,
+                });
+                continue;
+            }
+
             let (replaced, count) = apply_replacements(content, &rules);
 
             if count > 0 {

From 674be8ca3b667449b48b9d2ebdc8414bde1b34a0 Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Wed, 4 Mar 2026 12:05:38 -0500
Subject: [PATCH 26/29] refactor(extract): trim to engine-only for PR 1

Remove auto-detect, interactive prompts, and conditional files to
reduce PR scope. These features are preserved on feat/extract-auto-detect
for a follow-up PR.

- Delete auto_detect.rs (1,140 lines), interactive.rs (426 lines),
  conditional.rs (170 lines)
- Remove --yes and --min-confidence CLI flags
- Move count_occurrences to scan.rs (test-only)
- Remove 4 auto-detect integration tests
- Strip dead params and deduplicate DEFAULT_EXCLUDES
---
 src/cli.rs                 |    8 -
 src/commands/extract.rs    |    8 +-
 src/extract/auto_detect.rs | 1140 ------------------------------------
 src/extract/conditional.rs |  170 ------
 src/extract/exclude.rs     |   11 +-
 src/extract/interactive.rs |  426 --------------
 src/extract/mod.rs         |  159 +----
 src/extract/replace.rs     |   15 +-
 src/extract/scan.rs        |   30 +
 src/main.rs                |   13 +-
 tests/integration.rs       |  206 +------
 11 files changed, 77 insertions(+), 2109 deletions(-)
 delete mode 100644 src/extract/auto_detect.rs
 delete mode 100644 src/extract/conditional.rs
 delete mode 100644 src/extract/interactive.rs

diff --git a/src/cli.rs b/src/cli.rs
index fde16cb..92bc8e1 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -67,14 +67,6 @@ pub enum Commands {
         #[arg(long)]
         in_place: bool,
 
-        /// Accept all defaults without prompting
-        #[arg(short = 'y', long)]
-        yes: bool,
-
-        /// Minimum confidence threshold for auto-detected variables (0.0-1.0)
-        #[arg(long, default_value = "0.5")]
-        min_confidence: f64,
-
         /// Max path depth for stubbing content files (deeper files are dropped)
         #[arg(long, default_value = "2")]
         stub_depth: usize,
diff --git a/src/commands/extract.rs b/src/commands/extract.rs
index 6251044..b7f7e9f 100644
--- a/src/commands/extract.rs
+++ b/src/commands/extract.rs
@@ -6,14 +6,11 @@ use diecut::error::DicecutError;
 use diecut::extract::{execute_extraction, plan_extraction, ExtractOptions};
 use miette::Result;
 
-#[allow(clippy::too_many_arguments)]
 pub fn run(
     source: String,
     vars: Vec<String>,
     output: Option<String>,
     in_place: bool,
-    yes: bool,
-    min_confidence: f64,
     stub_depth: usize,
     dry_run: bool,
 ) -> Result<()> {
@@ -24,10 +21,7 @@ pub fn run(
         variables,
         output_dir: output.map(PathBuf::from),
         in_place,
-        yes,
-        min_confidence,
         stub_depth,
-        dry_run,
     };
 
     let plan = plan_extraction(&options)?;
@@ -37,7 +31,7 @@ pub fn run(
         return Ok(());
     }
 
-    execute_extraction(&plan, in_place)?;
+    execute_extraction(&plan)?;
 
     Ok(())
 }
diff --git a/src/extract/auto_detect.rs b/src/extract/auto_detect.rs
deleted file mode 100644
index 193bbac..0000000
--- a/src/extract/auto_detect.rs
+++ /dev/null
@@ -1,1140 +0,0 @@
-use std::collections::{HashMap, HashSet};
-use std::path::Path;
-use std::process::Command;
-use std::sync::LazyLock;
-
-use regex_lite::Regex;
-
-static GO_MOD_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^module\s+(\S+)").unwrap());
-
-static TOKEN_RE: LazyLock<Regex> = LazyLock::new(|| {
-    Regex::new(
-        r"[a-zA-Z][a-zA-Z0-9]*(?:[-_.][a-zA-Z0-9]+)+|[A-Z][a-z]+(?:[A-Z][a-z]+)+|[a-z]+(?:[A-Z][a-z]+)+|[A-Z]{2,}(?:_[A-Z]{2,})+",
-    )
-    .unwrap()
-});
-
-use super::scan::ScanResult;
-use super::variants::split_into_words;
-
-/// Confidence tier indicating how a candidate variable was detected.
-#[derive(Debug, Clone, PartialEq)]
-pub enum ConfidenceTier {
-    DirectoryName,
-    ConfigFile,
-    GitMetadata,
-    FrequencyAnalysis,
-}
-
-impl std::fmt::Display for ConfidenceTier {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            ConfidenceTier::DirectoryName => write!(f, "directory name"),
-            ConfidenceTier::ConfigFile => write!(f, "config file"),
-            ConfidenceTier::GitMetadata => write!(f, "git metadata"),
-            ConfidenceTier::FrequencyAnalysis => write!(f, "frequency analysis"),
-        }
-    }
-}
-
-/// A candidate variable detected by auto-detection.
-#[derive(Debug, Clone)]
-pub struct DetectedCandidate {
-    pub suggested_name: String,
-    pub value: String,
-    pub tier: ConfidenceTier,
-    pub confidence: f64,
-    pub reason: String,
-    pub file_count: usize,
-    pub total_occurrences: usize,
-}
-
-/// Result of running auto-detection.
-#[derive(Debug)]
-pub struct AutoDetectResult {
-    pub candidates: Vec<DetectedCandidate>,
-}
-
-// ── Entry point ──────────────────────────────────────────────────────────
-
-/// Run all 4 auto-detection tiers against a scanned project.
-pub fn auto_detect(project_dir: &Path, scan_result: &ScanResult) -> AutoDetectResult {
-    let mut candidates = Vec::new();
-
-    // Tier 1: Directory name
-    candidates.extend(detect_directory_name(project_dir, scan_result));
-
-    // Tier 2: Ecosystem config files
-    candidates.extend(detect_config_files(project_dir, scan_result));
-
-    // Tier 3: Git metadata
-    candidates.extend(detect_git_metadata(project_dir, scan_result));
-
-    // Collect values already covered by tiers 1-3
-    let covered_values: HashSet<String> =
-        candidates.iter().map(|c| c.value.to_lowercase()).collect();
-
-    // Tier 4: Frequency analysis
-    candidates.extend(detect_frequency(scan_result, &covered_values));
-
-    // Deduplicate by normalized word list, keeping highest confidence
-    deduplicate_candidates(&mut candidates);
-
-    // Sort by confidence descending
-    candidates.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
-
-    AutoDetectResult { candidates }
-}
-
-// ── Tier 1: Directory name ───────────────────────────────────────────────
-
-const GENERIC_DIR_NAMES: &[&str] = &[
-    "src",
-    "app",
-    "project",
-    "tmp",
-    "temp",
-    "build",
-    "dist",
-    "out",
-    "output",
-    "lib",
-    "bin",
-    "test",
-    "tests",
-    "example",
-    "examples",
-    "docs",
-    "doc",
-    "assets",
-    "public",
-    "static",
-    "vendor",
-    "node_modules",
-    "target",
-    "pkg",
-    "cmd",
-    "internal",
-    "api",
-    "web",
-    "server",
-    "client",
-    "frontend",
-    "backend",
-    "service",
-    "services",
-    "workspace",
-    "repo",
-    "code",
-];
-
-fn detect_directory_name(project_dir: &Path, scan_result: &ScanResult) -> Vec<DetectedCandidate> {
-    let dir_name = match project_dir.file_name() {
-        Some(name) => name.to_string_lossy().to_string(),
-        None => return vec![],
-    };
-
-    if GENERIC_DIR_NAMES.contains(&dir_name.to_lowercase().as_str()) {
-        return vec![];
-    }
-
-    // Must have at least 2 chars
-    if dir_name.len() < 2 {
-        return vec![];
-    }
-
-    let (file_count, total_occurrences) = count_occurrences(&dir_name, scan_result);
-
-    vec![DetectedCandidate {
-        suggested_name: "project_name".to_string(),
-        value: dir_name.clone(),
-        tier: ConfidenceTier::DirectoryName,
-        confidence: 0.95,
-        reason: format!("directory name \"{}\"", dir_name),
-        file_count,
-        total_occurrences,
-    }]
-}
-
-// ── Tier 2: Ecosystem config files ───────────────────────────────────────
-
-fn detect_config_files(project_dir: &Path, scan_result: &ScanResult) -> Vec<DetectedCandidate> {
-    let mut candidates = Vec::new();
-
-    if let Some(mut c) = parse_cargo_toml(project_dir, scan_result) {
-        candidates.append(&mut c);
-    }
-    if let Some(mut c) = parse_package_json(project_dir, scan_result) {
-        candidates.append(&mut c);
-    }
-    if let Some(mut c) = parse_pyproject_toml(project_dir, scan_result) {
-        candidates.append(&mut c);
-    }
-    if let Some(mut c) = parse_go_mod(project_dir, scan_result) {
-        candidates.append(&mut c);
-    }
-
-    candidates
-}
-
-fn push_config_candidate(
-    candidates: &mut Vec<DetectedCandidate>,
-    value: &str,
-    suggested_name: &str,
-    confidence: f64,
-    reason: &str,
-    scan_result: &ScanResult,
-) {
-    let (file_count, total_occurrences) = count_occurrences(value, scan_result);
-    candidates.push(DetectedCandidate {
-        suggested_name: suggested_name.to_string(),
-        value: value.to_string(),
-        tier: ConfidenceTier::ConfigFile,
-        confidence,
-        reason: reason.to_string(),
-        file_count,
-        total_occurrences,
-    });
-}
-
-fn parse_cargo_toml(
-    project_dir: &Path,
-    scan_result: &ScanResult,
-) -> Option<Vec<DetectedCandidate>> {
-    let path = project_dir.join("Cargo.toml");
-    let content = std::fs::read_to_string(&path).ok()?;
-    let parsed: toml::Value = content.parse().ok()?;
-
-    let mut candidates = Vec::new();
-
-    if let Some(name) = parsed
-        .get("package")
-        .and_then(|p| p.get("name"))
-        .and_then(|n| n.as_str())
-    {
-        push_config_candidate(
-            &mut candidates,
-            name,
-            "project_name",
-            0.90,
-            "Cargo.toml [package].name",
-            scan_result,
-        );
-    }
-
-    if let Some(version) = parsed
-        .get("package")
-        .and_then(|p| p.get("version"))
-        .and_then(|v| v.as_str())
-    {
-        if !version.is_empty() {
-            push_config_candidate(
-                &mut candidates,
-                version,
-                "version",
-                0.85,
-                "Cargo.toml [package].version",
-                scan_result,
-            );
-        }
-    }
-
-    if let Some(authors) = parsed
-        .get("package")
-        .and_then(|p| p.get("authors"))
-        .and_then(|a| a.as_array())
-    {
-        if let Some(first) = authors.first().and_then(|a| a.as_str()) {
-            let author = strip_email(first);
-            if !author.is_empty() {
-                push_config_candidate(
-                    &mut candidates,
-                    &author,
-                    "author",
-                    0.85,
-                    "Cargo.toml [package].authors[0]",
-                    scan_result,
-                );
-            }
-        }
-    }
-
-    Some(candidates)
-}
-
-fn parse_package_json(
-    project_dir: &Path,
-    scan_result: &ScanResult,
-) -> Option<Vec<DetectedCandidate>> {
-    let path = project_dir.join("package.json");
-    let content = std::fs::read_to_string(&path).ok()?;
-    let parsed: serde_json::Value = serde_json::from_str(&content).ok()?;
-
-    let mut candidates = Vec::new();
-
-    if let Some(name) = parsed.get("name").and_then(|n| n.as_str()) {
-        let clean_name = strip_npm_scope(name);
-        push_config_candidate(
-            &mut candidates,
-            clean_name,
-            "project_name",
-            0.90,
-            "package.json \"name\"",
-            scan_result,
-        );
-    }
-
-    if let Some(version) = parsed.get("version").and_then(|v| v.as_str()) {
-        if !version.is_empty() {
-            push_config_candidate(
-                &mut candidates,
-                version,
-                "version",
-                0.85,
-                "package.json \"version\"",
-                scan_result,
-            );
-        }
-    }
-
-    if let Some(author) = parsed.get("author") {
-        let author_str = match author {
-            serde_json::Value::String(s) => Some(strip_email(s)),
-            serde_json::Value::Object(obj) => {
-                obj.get("name").and_then(|n| n.as_str()).map(String::from)
-            }
-            _ => None,
-        };
-        if let Some(author_name) = author_str {
-            if !author_name.is_empty() {
-                push_config_candidate(
-                    &mut candidates,
-                    &author_name,
-                    "author",
-                    0.85,
-                    "package.json \"author\"",
-                    scan_result,
-                );
-            }
-        }
-    }
-
-    Some(candidates)
-}
-
-fn parse_pyproject_toml(
-    project_dir: &Path,
-    scan_result: &ScanResult,
-) -> Option<Vec<DetectedCandidate>> {
-    let path = project_dir.join("pyproject.toml");
-    let content = std::fs::read_to_string(&path).ok()?;
-    let parsed: toml::Value = content.parse().ok()?;
-
-    let mut candidates = Vec::new();
-
-    if let Some(name) = parsed
-        .get("project")
-        .and_then(|p| p.get("name"))
-        .and_then(|n| n.as_str())
-    {
-        push_config_candidate(
-            &mut candidates,
-            name,
-            "project_name",
-            0.90,
-            "pyproject.toml [project].name",
-            scan_result,
-        );
-    }
-
-    if let Some(version) = parsed
-        .get("project")
-        .and_then(|p| p.get("version"))
-        .and_then(|v| v.as_str())
-    {
-        if !version.is_empty() {
-            push_config_candidate(
-                &mut candidates,
-                version,
-                "version",
-                0.85,
-                "pyproject.toml [project].version",
-                scan_result,
-            );
-        }
-    }
-
-    if let Some(authors) = parsed
-        .get("project")
-        .and_then(|p| p.get("authors"))
-        .and_then(|a| a.as_array())
-    {
-        if let Some(first) = authors.first() {
-            let author_name = first
-                .get("name")
-                .and_then(|n| n.as_str())
-                .or_else(|| first.as_str())
-                .map(strip_email);
-            if let Some(name) = author_name {
-                if !name.is_empty() {
-                    push_config_candidate(
-                        &mut candidates,
-                        &name,
-                        "author",
-                        0.85,
-                        "pyproject.toml [project].authors[0].name",
-                        scan_result,
-                    );
-                }
-            }
-        }
-    }
-
-    Some(candidates)
-}
-
-fn parse_go_mod(project_dir: &Path, scan_result: &ScanResult) -> Option<Vec<DetectedCandidate>> {
-    let path = project_dir.join("go.mod");
-    let content = std::fs::read_to_string(&path).ok()?;
-
-    let module_path = GO_MOD_RE.captures(&content)?.get(1)?.as_str();
-
-    let segments: Vec<&str> = module_path.split('/').collect();
-
-    // Extract last path segment as project name
-    let name = segments.last().copied()?;
-    if name.is_empty() {
-        return None;
-    }
-
-    let mut candidates = Vec::new();
-
-    push_config_candidate(
-        &mut candidates,
-        name,
-        "project_name",
-        0.90,
-        &format!("go.mod module \"{}\"", module_path),
-        scan_result,
-    );
-
-    // Extract org name (second-to-last segment for github.com/org/repo patterns)
-    if segments.len() >= 3 {
-        let org = segments[segments.len() - 2];
-        if !org.is_empty() && org != name {
-            let (_, org_total_occurrences) = count_occurrences(org, scan_result);
-            if org_total_occurrences > 0 {
-                push_config_candidate(
-                    &mut candidates,
-                    org,
-                    "org_name",
-                    0.85,
-                    &format!("go.mod module org \"{}\"", org),
-                    scan_result,
-                );
-            }
-        }
-    }
-
-    Some(candidates)
-}
-
-// ── Tier 3: Git metadata ─────────────────────────────────────────────────
-
-fn detect_git_metadata(project_dir: &Path, scan_result: &ScanResult) -> Vec<DetectedCandidate> {
-    let mut candidates = Vec::new();
-
-    // Try to get remote origin URL
-    if let Some(url) = git_config_get(project_dir, "remote.origin.url") {
-        if let Some(org) = parse_org_from_url(&url) {
-            let (file_count, total_occurrences) = count_occurrences(&org, scan_result);
-            // Only include if org name actually appears in files
-            if total_occurrences > 0 {
-                candidates.push(DetectedCandidate {
-                    suggested_name: "org_name".to_string(),
-                    value: org.clone(),
-                    tier: ConfidenceTier::GitMetadata,
-                    confidence: 0.70,
-                    reason: format!("git remote org \"{}\"", org),
-                    file_count,
-                    total_occurrences,
-                });
-            }
-        }
-    }
-
-    // Try to get user name
-    if let Some(user_name) = git_config_get(project_dir, "user.name") {
-        if !user_name.is_empty() {
-            let (file_count, total_occurrences) = count_occurrences(&user_name, scan_result);
-            candidates.push(DetectedCandidate {
-                suggested_name: "author".to_string(),
-                value: user_name.clone(),
-                tier: ConfidenceTier::GitMetadata,
-                confidence: 0.65,
-                reason: format!("git config user.name \"{}\"", user_name),
-                file_count,
-                total_occurrences,
-            });
-        }
-    }
-
-    candidates
-}
-
-fn git_config_get(project_dir: &Path, key: &str) -> Option<String> {
-    let output = Command::new("git")
-        .arg("config")
-        .arg("--get")
-        .arg(key)
-        .current_dir(project_dir)
-        .env("GIT_TERMINAL_PROMPT", "0")
-        .output()
-        .ok()?;
-
-    if !output.status.success() {
-        return None;
-    }
-
-    let value = String::from_utf8(output.stdout).ok()?.trim().to_string();
-    if value.is_empty() {
-        None
-    } else {
-        Some(value)
-    }
-}
-
-fn parse_org_from_url(url: &str) -> Option<String> {
-    // SSH: git@github.com:org/repo.git
-    if let Some(rest) = url.strip_prefix("git@") {
-        let after_colon = rest.split(':').nth(1)?;
-        let org = after_colon.split('/').next()?;
-        if !org.is_empty() {
-            return Some(org.to_string());
-        }
-    }
-
-    // HTTPS: https://github.com/org/repo.git
-    if url.starts_with("https://") || url.starts_with("http://") {
-        let parts: Vec<&str> = url.split('/').collect();
-        // https://host/org/repo → parts[3] is org
-        if parts.len() >= 4 && !parts[3].is_empty() {
-            return Some(parts[3].to_string());
-        }
-    }
-
-    None
-}
-
-// ── Tier 4: Frequency analysis ───────────────────────────────────────────
-
-fn detect_frequency(
-    scan_result: &ScanResult,
-    covered_values: &HashSet<String>,
-) -> Vec<DetectedCandidate> {
-    // Tokenize all text file content
-    let mut token_file_map: HashMap<String, HashSet<usize>> = HashMap::new();
-    let mut token_counts: HashMap<String, usize> = HashMap::new();
-
-    for (file_idx, file) in scan_result.files.iter().enumerate() {
-        if let Some(ref content) = file.content {
-            for mat in TOKEN_RE.find_iter(content) {
-                let token = mat.as_str().to_string();
-                token_file_map
-                    .entry(token.clone())
-                    .or_default()
-                    .insert(file_idx);
-                *token_counts.entry(token).or_insert(0) += 1;
-            }
-        }
-    }
-
-    // Group tokens by normalized word list to find multi-variant clusters
-    struct Cluster {
-        literals: Vec<String>,
-        total_occurrences: usize,
-        files: HashSet<usize>,
-    }
-
-    let mut clusters: HashMap<String, Cluster> = HashMap::new();
-
-    for (token, count) in &token_counts {
-        let words = split_into_words(token);
-        let normalized_key = words.join(" ");
-
-        // Token must be at least 4 chars
-        if token.len() < 4 {
-            continue;
-        }
-
-        let cluster = clusters.entry(normalized_key).or_insert_with(|| Cluster {
-            literals: Vec::new(),
-            total_occurrences: 0,
-            files: HashSet::new(),
-        });
-
-        if !cluster.literals.contains(token) {
-            cluster.literals.push(token.clone());
-        }
-        cluster.total_occurrences += count;
-        if let Some(file_set) = token_file_map.get(token) {
-            cluster.files.extend(file_set);
-        }
-    }
-
-    // Filter and convert to candidates
-    let mut freq_candidates: Vec<DetectedCandidate> = Vec::new();
-
-    for cluster in clusters.values() {
-        // Must have ≥2 distinct case variants (the key multi-variant heuristic)
-        if cluster.literals.len() < 2 {
-            continue;
-        }
-
-        // Must have ≥3 total occurrences
-        if cluster.total_occurrences < 3 {
-            continue;
-        }
-
-        // Must appear in ≥2 files
-        if cluster.files.len() < 2 {
-            continue;
-        }
-
-        // Skip if already covered by higher tiers
-        if cluster
-            .literals
-            .iter()
-            .any(|l| covered_values.contains(&l.to_lowercase()))
-        {
-            continue;
-        }
-
-        let best_literal = &cluster.literals[0];
-        let words = split_into_words(best_literal);
-        let suggested_name = if words.len() <= 3 {
-            words.join("_")
-        } else {
-            words[..3].join("_")
-        };
-
-        let file_count = cluster.files.len();
-        freq_candidates.push(DetectedCandidate {
-            suggested_name,
-            value: best_literal.clone(),
-            tier: ConfidenceTier::FrequencyAnalysis,
-            confidence: 0.60,
-            reason: format!(
-                "{} occurrences across {} files, {} variant(s)",
-                cluster.total_occurrences,
-                file_count,
-                cluster.literals.len()
-            ),
-            file_count,
-            total_occurrences: cluster.total_occurrences,
-        });
-    }
-
-    // Sort by file_count * total_occurrences descending, take top 5
-    freq_candidates.sort_by(|a, b| {
-        let score_a = a.file_count * a.total_occurrences;
-        let score_b = b.file_count * b.total_occurrences;
-        score_b.cmp(&score_a)
-    });
-    freq_candidates.truncate(5);
-
-    freq_candidates
-}
-
-// ── Helpers ──────────────────────────────────────────────────────────────
-
-pub fn count_occurrences(value: &str, scan_result: &ScanResult) -> (usize, usize) {
-    let mut file_count = 0;
-    let mut total = 0;
-
-    for file in &scan_result.files {
-        let mut counted_file = false;
-
-        if let Some(ref content) = file.content {
-            let hits = content.matches(value).count();
-            if hits > 0 {
-                file_count += 1;
-                counted_file = true;
-                total += hits;
-            }
-        }
-
-        let path_str = file.relative_path.to_string_lossy();
-        let path_hits = path_str.matches(value).count();
-        if path_hits > 0 {
-            total += path_hits;
-            if !counted_file {
-                file_count += 1;
-            }
-        }
-    }
-
-    (file_count, total)
-}
-
-pub fn strip_email(s: &str) -> String {
-    // "Jane Doe <jane@example.com>" → "Jane Doe"
-    if let Some(idx) = s.find('<') {
-        s[..idx].trim().to_string()
-    } else if s.contains('@') {
-        // Bare email — use part before @
-        s.split('@').next().unwrap_or("").trim().to_string()
-    } else {
-        s.trim().to_string()
-    }
-}
-
-fn strip_npm_scope(name: &str) -> &str {
-    if let Some(rest) = name.strip_prefix('@') {
-        rest.split('/').nth(1).unwrap_or(name)
-    } else {
-        name
-    }
-}
-
-fn deduplicate_candidates(candidates: &mut Vec<DetectedCandidate>) {
-    // Only deduplicate by value (same literal from multiple tiers → keep highest confidence).
-    // Name collisions (e.g., two different "author" candidates) are preserved
-    // for the interactive/yes layer to resolve.
-    let mut seen_value: HashMap<String, usize> = HashMap::new();
-    let mut to_remove = Vec::new();
-
-    for (i, candidate) in candidates.iter().enumerate() {
-        let value_key = candidate.value.to_lowercase();
-        if let Some(&prev_idx) = seen_value.get(&value_key) {
-            if candidate.confidence > candidates[prev_idx].confidence {
-                to_remove.push(prev_idx);
-                seen_value.insert(value_key, i);
-            } else {
-                to_remove.push(i);
-            }
-        } else {
-            seen_value.insert(value_key, i);
-        }
-    }
-
-    to_remove.sort_unstable();
-    to_remove.dedup();
-    for idx in to_remove.into_iter().rev() {
-        candidates.remove(idx);
-    }
-}
-
-// ── Tests ────────────────────────────────────────────────────────────────
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::extract::scan::ScannedFile;
-    use std::path::PathBuf;
-
-    fn make_scan_result(files: Vec<(&str, &str)>) -> ScanResult {
-        ScanResult {
-            files: files
-                .into_iter()
-                .map(|(path, content)| ScannedFile {
-                    relative_path: PathBuf::from(path),
-                    absolute_path: PathBuf::from(path),
-                    is_binary: false,
-                    content: Some(content.to_string()),
-                })
-                .collect(),
-            excluded_count: 0,
-        }
-    }
-
-    // ── Tier 1 tests ─────────────────────────────────────────────────
-
-    #[test]
-    fn test_tier1_basic_dir_name() {
-        let scan = make_scan_result(vec![
-            ("README.md", "# my-widget\nA widget project"),
-            ("src/lib.rs", "// my-widget core"),
-        ]);
-        let dir = PathBuf::from("/projects/my-widget");
-        let candidates = detect_directory_name(&dir, &scan);
-
-        assert_eq!(candidates.len(), 1);
-        assert_eq!(candidates[0].value, "my-widget");
-        assert_eq!(candidates[0].suggested_name, "project_name");
-        assert_eq!(candidates[0].confidence, 0.95);
-        assert!(candidates[0].total_occurrences >= 2);
-    }
-
-    #[test]
-    fn test_tier1_generic_name_skipped() {
-        let scan = make_scan_result(vec![("main.rs", "fn main() {}")]);
-        let dir = PathBuf::from("/projects/src");
-        let candidates = detect_directory_name(&dir, &scan);
-        assert!(candidates.is_empty());
-    }
-
-    #[test]
-    fn test_tier1_occurrence_counting() {
-        let scan = make_scan_result(vec![
-            ("a.txt", "hello hello hello"),
-            ("b.txt", "hello world"),
-        ]);
-        let dir = PathBuf::from("/projects/hello");
-        let candidates = detect_directory_name(&dir, &scan);
-        assert_eq!(candidates.len(), 1);
-        assert_eq!(candidates[0].file_count, 2);
-        assert!(candidates[0].total_occurrences >= 4);
-    }
-
-    // ── Tier 2 tests ─────────────────────────────────────────────────
-
-    #[test]
-    fn test_tier2_cargo_toml() {
-        let dir = tempfile::tempdir().unwrap();
-        std::fs::write(
-            dir.path().join("Cargo.toml"),
-            "[package]\nname = \"data-pipeline\"\nversion = \"0.3.1\"\nauthors = [\"Alice <alice@example.com>\"]\n",
-        )
-        .unwrap();
-
-        let scan = make_scan_result(vec![("src/main.rs", "data-pipeline runs here")]);
-        let candidates = parse_cargo_toml(dir.path(), &scan).unwrap();
-
-        assert!(candidates.iter().any(|c| c.value == "data-pipeline"));
-        assert!(candidates
-            .iter()
-            .any(|c| c.value == "0.3.1" && c.suggested_name == "version" && c.confidence == 0.85));
-        assert!(candidates.iter().any(|c| c.value == "Alice"));
-    }
-
-    #[test]
-    fn test_tier2_package_json_with_scope() {
-        let dir = tempfile::tempdir().unwrap();
-        std::fs::write(
-            dir.path().join("package.json"),
-            r#"{"name": "@myorg/cool-widget", "version": "2.1.0", "author": "Bob Smith <bob@example.com>"}"#,
-        )
-        .unwrap();
-
-        let scan = make_scan_result(vec![("index.js", "cool-widget stuff")]);
-        let candidates = parse_package_json(dir.path(), &scan).unwrap();
-
-        let name_candidate = candidates
-            .iter()
-            .find(|c| c.suggested_name == "project_name")
-            .unwrap();
-        assert_eq!(name_candidate.value, "cool-widget");
-
-        let version_candidate = candidates
-            .iter()
-            .find(|c| c.suggested_name == "version")
-            .unwrap();
-        assert_eq!(version_candidate.value, "2.1.0");
-        assert_eq!(version_candidate.confidence, 0.85);
-
-        let author_candidate = candidates
-            .iter()
-            .find(|c| c.suggested_name == "author")
-            .unwrap();
-        assert_eq!(author_candidate.value, "Bob Smith");
-    }
-
-    #[test]
-    fn test_tier2_pyproject_toml() {
-        let dir = tempfile::tempdir().unwrap();
-        std::fs::write(
-            dir.path().join("pyproject.toml"),
-            "[project]\nname = \"my-tool\"\nversion = \"1.0.0\"\n\n[[project.authors]]\nname = \"Charlie\"\n",
-        )
-        .unwrap();
-
-        let scan = make_scan_result(vec![("setup.py", "my-tool setup")]);
-        let candidates = parse_pyproject_toml(dir.path(), &scan).unwrap();
-
-        assert!(candidates.iter().any(|c| c.value == "my-tool"));
-        assert!(candidates
-            .iter()
-            .any(|c| c.value == "1.0.0" && c.suggested_name == "version" && c.confidence == 0.85));
-        assert!(candidates.iter().any(|c| c.value == "Charlie"));
-    }
-
-    #[test]
-    fn test_tier2_go_mod() {
-        let dir = tempfile::tempdir().unwrap();
-        std::fs::write(
-            dir.path().join("go.mod"),
-            "module github.com/acme/my-service\n\ngo 1.21\n",
-        )
-        .unwrap();
-
-        let scan = make_scan_result(vec![("main.go", "package main // my-service by acme")]);
-        let candidates = parse_go_mod(dir.path(), &scan).unwrap();
-
-        let project = candidates
-            .iter()
-            .find(|c| c.suggested_name == "project_name");
-        assert!(project.is_some());
-        assert_eq!(project.unwrap().value, "my-service");
-
-        let org = candidates.iter().find(|c| c.suggested_name == "org_name");
-        assert!(org.is_some(), "should extract org from go.mod module path");
-        assert_eq!(org.unwrap().value, "acme");
-    }
-
-    #[test]
-    fn test_tier2_missing_file() {
-        let dir = tempfile::tempdir().unwrap();
-        let scan = make_scan_result(vec![]);
-
-        assert!(parse_cargo_toml(dir.path(), &scan).is_none());
-        assert!(parse_package_json(dir.path(), &scan).is_none());
-        assert!(parse_pyproject_toml(dir.path(), &scan).is_none());
-        assert!(parse_go_mod(dir.path(), &scan).is_none());
-    }
-
-    #[test]
-    fn test_tier2_malformed_cargo_toml() {
-        let dir = tempfile::tempdir().unwrap();
-        std::fs::write(
-            dir.path().join("Cargo.toml"),
-            "this is not valid toml {{{}}}",
-        )
-        .unwrap();
-        let scan = make_scan_result(vec![]);
-        assert!(parse_cargo_toml(dir.path(), &scan).is_none());
-    }
-
-    #[test]
-    fn test_tier2_version_missing() {
-        let dir = tempfile::tempdir().unwrap();
-        std::fs::write(
-            dir.path().join("Cargo.toml"),
-            "[package]\nname = \"no-version-crate\"\n",
-        )
-        .unwrap();
-        std::fs::write(
-            dir.path().join("package.json"),
-            r#"{"name": "no-version-pkg"}"#,
-        )
-        .unwrap();
-        std::fs::write(
-            dir.path().join("pyproject.toml"),
-            "[project]\nname = \"no-version-py\"\n",
-        )
-        .unwrap();
-
-        let scan = make_scan_result(vec![]);
-
-        let cargo = parse_cargo_toml(dir.path(), &scan).unwrap();
-        assert!(!cargo.iter().any(|c| c.suggested_name == "version"));
-
-        let pkg = parse_package_json(dir.path(), &scan).unwrap();
-        assert!(!pkg.iter().any(|c| c.suggested_name == "version"));
-
-        let pyproj = parse_pyproject_toml(dir.path(), &scan).unwrap();
-        assert!(!pyproj.iter().any(|c| c.suggested_name == "version"));
-    }
-
-    // ── Tier 3 tests ─────────────────────────────────────────────────
-
-    #[test]
-    fn test_parse_org_from_url_ssh() {
-        assert_eq!(
-            parse_org_from_url("git@github.com:acme-corp/my-repo.git"),
-            Some("acme-corp".to_string())
-        );
-    }
-
-    #[test]
-    fn test_parse_org_from_url_https() {
-        assert_eq!(
-            parse_org_from_url("https://github.com/acme-corp/my-repo.git"),
-            Some("acme-corp".to_string())
-        );
-    }
-
-    #[test]
-    fn test_strip_email_with_angle_brackets() {
-        assert_eq!(strip_email("Jane Doe <jane@example.com>"), "Jane Doe");
-    }
-
-    #[test]
-    fn test_strip_email_bare_email() {
-        assert_eq!(strip_email("jane@example.com"), "jane");
-    }
-
-    #[test]
-    fn test_strip_email_no_email() {
-        assert_eq!(strip_email("Jane Doe"), "Jane Doe");
-    }
-
-    // ── Tier 4 tests ─────────────────────────────────────────────────
-
-    #[test]
-    fn test_frequency_finds_repeated_identifier() {
-        let scan = make_scan_result(vec![
-            ("a.txt", "data-pipeline is great\ndata-pipeline rocks"),
-            ("b.txt", "use data_pipeline here\ndata_pipeline again"),
-            ("c.txt", "DataPipeline class\nDataPipeline impl"),
-            ("d.txt", "DATA_PIPELINE env var\nDATA_PIPELINE config"),
-        ]);
-
-        let covered = HashSet::new();
-        let candidates = detect_frequency(&scan, &covered);
-
-        assert!(!candidates.is_empty());
-        // Should find "data-pipeline" cluster (multi-variant)
-        let found = candidates.iter().any(|c| {
-            let words = split_into_words(&c.value);
-            words == vec!["data", "pipeline"]
-        });
-        assert!(
-            found,
-            "should find data-pipeline cluster, got: {:?}",
-            candidates
-        );
-    }
-
-    #[test]
-    fn test_frequency_filters_short_tokens() {
-        let scan = make_scan_result(vec![("a.txt", "ab cd ef gh"), ("b.txt", "ab cd ef gh")]);
-
-        let covered = HashSet::new();
-        let candidates = detect_frequency(&scan, &covered);
-
-        assert!(candidates.is_empty(), "short tokens should be filtered");
-    }
-
-    #[test]
-    fn test_frequency_skips_covered_values() {
-        let scan = make_scan_result(vec![
-            ("a.txt", "my-widget rocks"),
-            ("b.txt", "my-widget is great"),
-            ("c.txt", "my_widget too"),
-        ]);
-
-        let mut covered = HashSet::new();
-        covered.insert("my-widget".to_string());
-        let candidates = detect_frequency(&scan, &covered);
-
-        let has_widget = candidates
-            .iter()
-            .any(|c| c.value.to_lowercase().contains("widget"));
-        assert!(!has_widget, "covered values should be skipped");
-    }
-
-    #[test]
-    fn test_frequency_requires_multi_variant() {
-        // Single variant only — should NOT be detected even with many occurrences
-        let scan = make_scan_result(vec![
-            ("a.txt", "async_handler async_handler async_handler"),
-            ("b.txt", "async_handler async_handler"),
-            ("c.txt", "async_handler"),
-        ]);
-
-        let covered = HashSet::new();
-        let candidates = detect_frequency(&scan, &covered);
-
-        assert!(
-            candidates.is_empty(),
-            "single-variant tokens should be filtered, got: {:?}",
-            candidates
-        );
-    }
-
-    // ── Helper tests ─────────────────────────────────────────────────
-
-    #[test]
-    fn test_deduplication_keeps_highest_confidence() {
-        let mut candidates = vec![
-            DetectedCandidate {
-                suggested_name: "project_name".to_string(),
-                value: "my-app".to_string(),
-                tier: ConfidenceTier::ConfigFile,
-                confidence: 0.90,
-                reason: "Cargo.toml".to_string(),
-                file_count: 3,
-                total_occurrences: 10,
-            },
-            DetectedCandidate {
-                suggested_name: "project_name".to_string(),
-                value: "my-app".to_string(),
-                tier: ConfidenceTier::DirectoryName,
-                confidence: 0.95,
-                reason: "directory name".to_string(),
-                file_count: 3,
-                total_occurrences: 10,
-            },
-        ];
-
-        deduplicate_candidates(&mut candidates);
-        assert_eq!(candidates.len(), 1);
-        assert_eq!(candidates[0].confidence, 0.95);
-    }
-
-    #[test]
-    fn test_name_collisions_preserved() {
-        let mut candidates = vec![
-            DetectedCandidate {
-                suggested_name: "author".to_string(),
-                value: "Alice Johnson".to_string(),
-                tier: ConfidenceTier::ConfigFile,
-                confidence: 0.85,
-                reason: "package.json".to_string(),
-                file_count: 3,
-                total_occurrences: 5,
-            },
-            DetectedCandidate {
-                suggested_name: "author".to_string(),
-                value: "Robert Roskam".to_string(),
-                tier: ConfidenceTier::GitMetadata,
-                confidence: 0.65,
-                reason: "git config".to_string(),
-                file_count: 0,
-                total_occurrences: 0,
-            },
-        ];
-
-        deduplicate_candidates(&mut candidates);
-        assert_eq!(
-            candidates.len(),
-            2,
-            "name collisions should be preserved for interactive resolution"
-        );
-    }
-
-    #[test]
-    fn test_strip_npm_scope() {
-        assert_eq!(strip_npm_scope("@myorg/cool-widget"), "cool-widget");
-        assert_eq!(strip_npm_scope("plain-package"), "plain-package");
-    }
-
-    #[test]
-    fn test_auto_detect_integration() {
-        let dir = tempfile::tempdir().unwrap();
-        let project_dir = dir.path().join("my-widget");
-        std::fs::create_dir(&project_dir).unwrap();
-        std::fs::write(
-            project_dir.join("README.md"),
-            "# my-widget\nWelcome to my-widget",
-        )
-        .unwrap();
-        std::fs::write(
-            project_dir.join("lib.rs"),
-            "pub mod my_widget;\nstruct MyWidget;",
-        )
-        .unwrap();
-
-        let scan = crate::extract::scan::scan_project(&project_dir, &[]).unwrap();
-        let result = auto_detect(&project_dir, &scan);
-
-        assert!(!result.candidates.is_empty());
-        let project_name = result
-            .candidates
-            .iter()
-            .find(|c| c.suggested_name == "project_name");
-        assert!(project_name.is_some(), "should detect project_name");
-        assert_eq!(project_name.unwrap().value, "my-widget");
-    }
-}
diff --git a/src/extract/conditional.rs b/src/extract/conditional.rs
deleted file mode 100644
index 67e7346..0000000
--- a/src/extract/conditional.rs
+++ /dev/null
@@ -1,170 +0,0 @@
-use std::path::Path;
-
-/// A known optional file pattern that can be made conditional in the template.
-#[derive(Debug, Clone)]
-pub struct ConditionalPattern {
-    /// Glob pattern to match files.
-    pub pattern: &'static str,
-    /// Variable name to control inclusion.
-    pub variable: &'static str,
-    /// Human-readable description.
-    pub description: &'static str,
-}
-
-/// Curated list of known optional file patterns.
-const KNOWN_PATTERNS: &[ConditionalPattern] = &[
-    ConditionalPattern {
-        pattern: ".github/**",
-        variable: "use_github_actions",
-        description: "GitHub Actions CI",
-    },
-    ConditionalPattern {
-        pattern: ".gitlab-ci.yml",
-        variable: "use_gitlab_ci",
-        description: "GitLab CI",
-    },
-    ConditionalPattern {
-        pattern: "Dockerfile",
-        variable: "use_docker",
-        description: "Docker support",
-    },
-    ConditionalPattern {
-        pattern: "docker-compose.yml",
-        variable: "use_docker",
-        description: "Docker support",
-    },
-    ConditionalPattern {
-        pattern: "docker-compose.yaml",
-        variable: "use_docker",
-        description: "Docker support",
-    },
-    ConditionalPattern {
-        pattern: ".pre-commit-config.yaml",
-        variable: "use_pre_commit",
-        description: "Pre-commit hooks",
-    },
-    ConditionalPattern {
-        pattern: "Makefile",
-        variable: "use_make",
-        description: "Make build system",
-    },
-    ConditionalPattern {
-        pattern: "Justfile",
-        variable: "use_just",
-        description: "Just command runner",
-    },
-    ConditionalPattern {
-        pattern: ".editorconfig",
-        variable: "use_editorconfig",
-        description: "EditorConfig",
-    },
-    ConditionalPattern {
-        pattern: "renovate.json",
-        variable: "use_renovate",
-        description: "Renovate dependency updates",
-    },
-    ConditionalPattern {
-        pattern: ".renovaterc",
-        variable: "use_renovate",
-        description: "Renovate dependency updates",
-    },
-    ConditionalPattern {
-        pattern: ".github/dependabot.yml",
-        variable: "use_dependabot",
-        description: "Dependabot",
-    },
-    ConditionalPattern {
-        pattern: ".husky/**",
-        variable: "use_husky",
-        description: "Git hooks (JS)",
-    },
-];
-
-/// A detected conditional file in the project.
-#[derive(Debug, Clone)]
-pub struct DetectedConditional {
-    /// The pattern that matched.
-    pub pattern: String,
-    /// The variable name to control this pattern.
-    pub variable: String,
-    /// Human-readable description.
-    pub description: String,
-}
-
-/// Detect which known optional file patterns exist in the project.
-///
-/// Groups by variable name — e.g., multiple Docker files share `use_docker`.
-pub fn detect_conditional_files(project_dir: &Path) -> Vec<DetectedConditional> {
-    let mut detected = Vec::new();
-    let mut seen_variables = std::collections::HashSet::new();
-
-    for known in KNOWN_PATTERNS {
-        let exists = if known.pattern.contains("**") {
-            // Directory pattern — check if the directory exists
-            let dir_part = known.pattern.split("/**").next().unwrap_or(known.pattern);
-            project_dir.join(dir_part).exists()
-        } else {
-            project_dir.join(known.pattern).exists()
-        };
-
-        if exists && seen_variables.insert(known.variable) {
-            detected.push(DetectedConditional {
-                pattern: known.pattern.to_string(),
-                variable: known.variable.to_string(),
-                description: known.description.to_string(),
-            });
-        }
-    }
-
-    detected
-}
-
-/// Get all patterns for a given variable name from the known patterns list.
-pub fn patterns_for_variable(variable: &str) -> Vec<&'static str> {
-    KNOWN_PATTERNS
-        .iter()
-        .filter(|p| p.variable == variable)
-        .map(|p| p.pattern)
-        .collect()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_detect_conditional_files_github() {
-        let dir = tempfile::tempdir().unwrap();
-        std::fs::create_dir_all(dir.path().join(".github/workflows")).unwrap();
-
-        let detected = detect_conditional_files(dir.path());
-        assert_eq!(detected.len(), 1);
-        assert_eq!(detected[0].variable, "use_github_actions");
-    }
-
-    #[test]
-    fn test_detect_conditional_files_docker() {
-        let dir = tempfile::tempdir().unwrap();
-        std::fs::write(dir.path().join("Dockerfile"), "FROM alpine").unwrap();
-        std::fs::write(dir.path().join("docker-compose.yml"), "version: '3'").unwrap();
-
-        let detected = detect_conditional_files(dir.path());
-        // Should deduplicate by variable name
-        assert_eq!(detected.len(), 1);
-        assert_eq!(detected[0].variable, "use_docker");
-    }
-
-    #[test]
-    fn test_detect_conditional_files_empty() {
-        let dir = tempfile::tempdir().unwrap();
-        let detected = detect_conditional_files(dir.path());
-        assert!(detected.is_empty());
-    }
-
-    #[test]
-    fn test_patterns_for_variable() {
-        let docker_patterns = patterns_for_variable("use_docker");
-        assert!(docker_patterns.contains(&"Dockerfile"));
-        assert!(docker_patterns.contains(&"docker-compose.yml"));
-    }
-}
diff --git a/src/extract/exclude.rs b/src/extract/exclude.rs
index d8bac71..0f75060 100644
--- a/src/extract/exclude.rs
+++ b/src/extract/exclude.rs
@@ -3,15 +3,12 @@ use std::path::Path;
 /// Default directories and files to exclude from template extraction.
 const DEFAULT_EXCLUDES: &[&str] = &[
     ".git",
-    ".git/",
     ".hg",
     ".svn",
     "node_modules",
-    "node_modules/",
     ".DS_Store",
     "Thumbs.db",
     "__pycache__",
-    "__pycache__/",
     "*.pyc",
     ".tox",
     ".nox",
@@ -19,7 +16,6 @@ const DEFAULT_EXCLUDES: &[&str] = &[
     ".ruff_cache",
     ".pytest_cache",
     "target",
-    "target/",
     ".venv",
     ".env",
     "dist",
@@ -89,10 +85,7 @@ pub fn relevant_config_excludes(template_files: &[std::path::PathBuf]) -> Vec<St
 }
 
 /// Detect which copy-without-render patterns are relevant based on files present.
-pub fn detect_copy_without_render(
-    _project_dir: &Path,
-    files: &[std::path::PathBuf],
-) -> Vec<String> {
+pub fn detect_copy_without_render(files: &[std::path::PathBuf]) -> Vec<String> {
     let mut found = Vec::new();
 
     for pattern in DEFAULT_COPY_WITHOUT_RENDER {
@@ -297,7 +290,7 @@ mod tests {
             PathBuf::from("font.woff2"),
             PathBuf::from("README.md"),
         ];
-        let found = detect_copy_without_render(Path::new("."), &files);
+        let found = detect_copy_without_render(&files);
         assert!(found.contains(&"*.png".to_string()));
         assert!(found.contains(&"*.woff2".to_string()));
         assert!(!found.contains(&"*.jpg".to_string()));
diff --git a/src/extract/interactive.rs b/src/extract/interactive.rs
deleted file mode 100644
index 2f9b8e9..0000000
--- a/src/extract/interactive.rs
+++ /dev/null
@@ -1,426 +0,0 @@
-use std::collections::BTreeMap;
-
-use console::style;
-use inquire::{Confirm, Select, Text};
-
-use crate::config::schema::DEFAULT_TEMPLATES_SUFFIX;
-use crate::error::{DicecutError, Result};
-
-use super::auto_detect::{ConfidenceTier, DetectedCandidate};
-use super::conditional::DetectedConditional;
-use super::variants::generate_variants;
-use super::{ExtractVariable, PlannedExtractFile};
-
-pub fn confirm_variants_interactive(
-    variables: Vec<ExtractVariable>,
-) -> Result<Vec<ExtractVariable>> {
-    let mut confirmed = Vec::new();
-
-    for mut var in variables {
-        eprintln!(
-            "\n{} {} = {:?} {}",
-            style("──").dim(),
-            style(&var.name).bold(),
-            var.value,
-            style("──────────────────────────────────────").dim()
-        );
-
-        if var.variants.len() == 1 && var.variants[0].name == "verbatim" {
-            // Simple value — just show occurrence count
-            let (file_count, total_hits) = var
-                .occurrence_counts
-                .first()
-                .map(|(_, fc, th)| (*fc, *th))
-                .unwrap_or((0, 0));
-            if total_hits > 0 {
-                eprintln!(
-                    "  Found in {} files ({} occurrences)",
-                    file_count, total_hits
-                );
-            } else {
-                eprintln!(
-                    "  {} Value not found in any file (will still be added to config)",
-                    style("⚠").yellow()
-                );
-            }
-            confirmed.push(var);
-            continue;
-        }
-
-        // Show detected variants with counts
-        eprintln!("  Detected case variants:");
-        let mut found_any = false;
-        for (i, variant) in var.variants.iter().enumerate() {
-            let (_, file_count, total_hits) = &var.occurrence_counts[i];
-            let mark = if *total_hits > 0 {
-                found_any = true;
-                style("✓").green().to_string()
-            } else {
-                style("✗").dim().to_string()
-            };
-            let hits_str = if *total_hits > 0 {
-                format!(
-                    "{} {} across {} {}",
-                    total_hits,
-                    if *total_hits == 1 { "hit" } else { "hits" },
-                    file_count,
-                    if *file_count == 1 { "file" } else { "files" }
-                )
-            } else {
-                "not found".to_string()
-            };
-            eprintln!(
-                "    {} {:<16} {:<20} {}",
-                mark,
-                variant.literal,
-                variant.name,
-                style(&hits_str).dim()
-            );
-        }
-
-        if !found_any {
-            eprintln!(
-                "  {} No occurrences found for any variant (will still be added to config)",
-                style("⚠").yellow()
-            );
-            // Keep just the first variant
-            var.variants.truncate(1);
-            confirmed.push(var);
-            continue;
-        }
-
-        let keep = Confirm::new("Keep detected variants?")
-            .with_default(true)
-            .prompt()
-            .map_err(|_| DicecutError::PromptCancelled)?;
-
-        if keep {
-            // Remove variants with zero occurrences
-            let counts = var.occurrence_counts.clone();
-            var.variants.retain(|v| {
-                counts
-                    .iter()
-                    .any(|(name, _, hits)| name == v.name && *hits > 0)
-            });
-            if var.variants.is_empty() {
-                let all = generate_variants(&var.name, &var.value);
-                if let Some(first) = all.into_iter().next() {
-                    var.variants.push(first);
-                }
-            }
-        } else {
-            // Keep only the canonical variant
-            var.variants.truncate(1);
-        }
-
-        confirmed.push(var);
-    }
-
-    Ok(confirmed)
-}
-
-pub fn confirm_excludes_interactive(mut excludes: Vec<String>) -> Result<Vec<String>> {
-    eprintln!(
-        "\n{} Excludes {}",
-        style("──").dim(),
-        style("─────────────────────────────────────────────").dim()
-    );
-    if excludes.is_empty() {
-        eprintln!("  No exclude patterns needed for this template.");
-    } else {
-        eprintln!("  Patterns matching template files:");
-        for e in &excludes {
-            eprintln!("    {}", e);
-        }
-    }
-
-    let extra = Text::new("Add extra exclude patterns? (comma-separated, enter to skip)")
-        .with_default("")
-        .prompt()
-        .map_err(|_| DicecutError::PromptCancelled)?;
-
-    if !extra.is_empty() {
-        for pattern in extra.split(',') {
-            let trimmed = pattern.trim().to_string();
-            if !trimmed.is_empty() {
-                excludes.push(trimmed);
-            }
-        }
-    }
-
-    Ok(excludes)
-}
-
-pub fn confirm_conditionals_interactive(
-    detected: Vec<DetectedConditional>,
-) -> Result<Vec<DetectedConditional>> {
-    eprintln!(
-        "\n{} Conditional files {}",
-        style("──").dim(),
-        style("────────────────────────────────────").dim()
-    );
-    eprintln!("  These look optional. Make them conditional?");
-
-    let mut confirmed = Vec::new();
-    for cond in detected {
-        let prompt = format!("  {} → {}", cond.pattern, cond.variable);
-        let include = Confirm::new(&prompt)
-            .with_default(false)
-            .prompt()
-            .map_err(|_| DicecutError::PromptCancelled)?;
-
-        if include {
-            confirmed.push(cond);
-        }
-    }
-
-    Ok(confirmed)
-}
-
-pub fn resolve_candidates_yes(
-    candidates: &[DetectedCandidate],
-    explicit_vars: &[(String, String)],
-) -> Vec<(String, String)> {
-    eprintln!(
-        "\n{} Auto-detected variables {}",
-        style("──").dim(),
-        style("──────────────────────────────────").dim()
-    );
-
-    // Group candidates by suggested_name
-    let mut groups: BTreeMap<String, Vec<&DetectedCandidate>> = BTreeMap::new();
-    for c in candidates {
-        groups.entry(c.suggested_name.clone()).or_default().push(c);
-    }
-
-    let mut result = Vec::new();
-    let mut skipped_freq = 0;
-
-    for (name, mut group) in groups {
-        // Skip names already covered by explicit --var
-        if explicit_vars.iter().any(|(n, _)| n == &name) {
-            eprintln!(
-                "  {} {} (explicit --var, skipping auto-detect)",
-                style("·").dim(),
-                style(&name).dim()
-            );
-            continue;
-        }
-
-        // For name collisions, pick highest confidence
-        group.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
-        let winner = group[0];
-
-        // Skip frequency-analysis candidates in -y mode — too noisy for auto-accept
-        if winner.tier == ConfidenceTier::FrequencyAnalysis {
-            skipped_freq += 1;
-            continue;
-        }
-
-        eprintln!(
-            "  {} {} = {:?} ({:.0}% confidence, {})",
-            style("✓").green(),
-            style(&winner.suggested_name).bold(),
-            winner.value,
-            winner.confidence * 100.0,
-            winner.tier
-        );
-        eprintln!("    {}", style(&winner.reason).dim());
-
-        if group.len() > 1 {
-            eprintln!(
-                "    {} {} other candidates for this name (picked highest confidence)",
-                style("⚠").yellow(),
-                group.len() - 1
-            );
-        }
-
-        result.push((winner.suggested_name.clone(), winner.value.clone()));
-    }
-
-    if skipped_freq > 0 {
-        eprintln!(
-            "  {} {} frequency-detected candidate(s) skipped (use interactive mode to review)",
-            style("·").dim(),
-            skipped_freq
-        );
-    }
-
-    result
-}
-
-pub fn confirm_auto_detected_interactive(
-    candidates: Vec<DetectedCandidate>,
-    explicit_vars: &[(String, String)],
-) -> Result<Vec<(String, String)>> {
-    eprintln!(
-        "\n{} Auto-detected variables {}",
-        style("──").dim(),
-        style("──────────────────────────────────").dim()
-    );
-
-    // Group candidates by suggested_name
-    let mut groups: BTreeMap<String, Vec<DetectedCandidate>> = BTreeMap::new();
-    for c in candidates {
-        groups.entry(c.suggested_name.clone()).or_default().push(c);
-    }
-
-    let mut accepted = Vec::new();
-
-    for (name, mut group) in groups {
-        // Skip names already covered by explicit --var
-        if explicit_vars.iter().any(|(n, _)| n == &name) {
-            eprintln!(
-                "\n  {} {} (provided via --var, skipping)",
-                style("·").dim(),
-                style(&name).dim()
-            );
-            continue;
-        }
-
-        // Sort by confidence descending
-        group.sort_by(|a, b| b.confidence.total_cmp(&a.confidence));
-
-        if group.len() == 1 {
-            // Single candidate — simple confirm
-            let candidate = &group[0];
-            eprintln!(
-                "\n  {} = {:?} ({:.0}% confidence, {})",
-                style(&candidate.suggested_name).bold(),
-                candidate.value,
-                candidate.confidence * 100.0,
-                candidate.tier
-            );
-            eprintln!("    {}", style(&candidate.reason).dim());
-            if candidate.total_occurrences > 0 {
-                eprintln!(
-                    "    {} occurrences across {} files",
-                    candidate.total_occurrences, candidate.file_count
-                );
-            }
-
-            let accept = Confirm::new(&format!("Accept \"{}\"?", candidate.suggested_name))
-                .with_default(true)
-                .prompt()
-                .map_err(|_| DicecutError::PromptCancelled)?;
-
-            if accept {
-                accepted.push((candidate.suggested_name.clone(), candidate.value.clone()));
-            }
-        } else {
-            // Name collision — show selection prompt
-            eprintln!(
-                "\n  {} Multiple candidates for {}:",
-                style("⚠").yellow(),
-                style(&name).bold()
-            );
-
-            let mut options: Vec<String> = group
-                .iter()
-                .map(|c| {
-                    format!(
-                        "{:?} ({:.0}% confidence, {})",
-                        c.value,
-                        c.confidence * 100.0,
-                        c.tier
-                    )
-                })
-                .collect();
-            options.push("Skip".to_string());
-
-            let selection = Select::new(&format!("Which value for \"{}\"?", name), options)
-                .prompt()
-                .map_err(|_| DicecutError::PromptCancelled)?;
-
-            if selection != "Skip" {
-                // Find the matching candidate
-                if let Some(chosen) = group.iter().find(|c| {
-                    format!(
-                        "{:?} ({:.0}% confidence, {})",
-                        c.value,
-                        c.confidence * 100.0,
-                        c.tier
-                    ) == selection
-                }) {
-                    accepted.push((chosen.suggested_name.clone(), chosen.value.clone()));
-                }
-            }
-        }
-    }
-
-    Ok(accepted)
-}
-
-pub fn confirm_files_interactive(files: &[PlannedExtractFile], dropped_count: usize) -> Result<()> {
-    let templated: Vec<_> = files.iter().filter(|f| f.has_replacements()).collect();
-    let boilerplate: Vec<_> = files
-        .iter()
-        .filter(|f| !f.has_replacements() && !f.stubbed && !f.is_binary())
-        .collect();
-    let stubbed: Vec<_> = files.iter().filter(|f| f.stubbed).collect();
-    let binary_count = files.iter().filter(|f| f.is_binary()).count();
-
-    eprintln!(
-        "\n{} File plan {}",
-        style("──").dim(),
-        style("──────────────────────────────────────────").dim()
-    );
-
-    // Templated files
-    eprintln!(
-        "\n  {} ({} files, {} suffix):",
-        style("Templated").bold(),
-        templated.len(),
-        DEFAULT_TEMPLATES_SUFFIX
-    );
-    for file in &templated {
-        eprintln!(
-            "    {:<50} {} replacements",
-            file.template_path.display(),
-            file.replacement_count()
-        );
-    }
-
-    // Boilerplate files
-    eprintln!(
-        "\n  {} (copied in full, {} files{}):",
-        style("Boilerplate").bold(),
-        boilerplate.len() + binary_count,
-        if binary_count > 0 {
-            format!(", {} binary", binary_count)
-        } else {
-            String::new()
-        }
-    );
-    for file in &boilerplate {
-        eprintln!("    {}", file.template_path.display());
-    }
-
-    // Stubbed files
-    if !stubbed.is_empty() {
-        eprintln!(
-            "\n  {} (structure only, {} files):",
-            style("Stubbed").bold(),
-            stubbed.len()
-        );
-        for file in &stubbed {
-            eprintln!("    {}", file.template_path.display());
-        }
-    }
-
-    // Dropped files
-    if dropped_count > 0 {
-        eprintln!("\n  {} ({} files):", style("Dropped").bold(), dropped_count);
-    }
-
-    let proceed = Confirm::new("Proceed?")
-        .with_default(true)
-        .prompt()
-        .map_err(|_| DicecutError::PromptCancelled)?;
-
-    if !proceed {
-        return Err(DicecutError::PromptCancelled);
-    }
-
-    Ok(())
-}
diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index f9d1318..767d7ae 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -1,8 +1,5 @@
-pub mod auto_detect;
-pub mod conditional;
 pub mod config_gen;
 pub mod exclude;
-pub mod interactive;
 pub mod replace;
 pub mod scan;
 pub mod stub;
@@ -16,24 +13,17 @@ use console::style;
 use crate::config::schema::DEFAULT_TEMPLATES_SUFFIX;
 use crate::error::{DicecutError, Result};
 
-use self::auto_detect::{auto_detect, count_occurrences};
-use self::conditional::{detect_conditional_files, patterns_for_variable};
 use self::config_gen::{
-    generate_config_toml, ComputedVariable, ConditionalEntry, ConfigGenOptions, PromptedVariable,
+    generate_config_toml, ComputedVariable, ConfigGenOptions, PromptedVariable,
 };
 use self::exclude::{
     all_default_excludes, detect_copy_without_render, is_copy_without_render,
     relevant_config_excludes,
 };
-use self::interactive::{
-    confirm_auto_detected_interactive, confirm_conditionals_interactive,
-    confirm_excludes_interactive, confirm_files_interactive, confirm_variants_interactive,
-    resolve_candidates_yes,
-};
 use self::replace::{
     apply_path_replacements, apply_replacements, build_replacement_rules, ReplacementRule,
 };
-use self::scan::scan_project;
+use self::scan::{count_occurrences, scan_project};
 use self::stub::{classify_file, generate_stub, FileRole};
 use self::variants::{
     computed_expression, detect_separator, generate_variants, is_canonical_variant, CaseVariant,
@@ -101,7 +91,6 @@ pub struct ExtractionPlan {
     pub files: Vec<PlannedExtractFile>,
     pub config_toml: String,
     pub variables: Vec<ExtractVariable>,
-    pub conditional_entries: Vec<ConditionalEntry>,
     pub exclude_patterns: Vec<String>,
     pub copy_without_render: Vec<String>,
     pub dropped_count: usize,
@@ -114,10 +103,7 @@ pub struct ExtractOptions {
     pub variables: Vec<(String, String)>,
     pub output_dir: Option<PathBuf>,
     pub in_place: bool,
-    pub yes: bool,
-    pub min_confidence: f64,
     pub stub_depth: usize,
-    pub dry_run: bool,
 }
 
 /// Plan an extraction: scan the project, detect variants, build replacement rules.
@@ -191,45 +177,11 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         }
     );
 
-    // Phase 2.5: Auto-detect variables (always runs), merge with explicit --var entries
-    let variables = {
-        let explicit_vars = options.variables.clone();
-        let detect_result = auto_detect(source_dir, &scan_result);
-
-        // Filter candidates below min_confidence threshold
-        let candidates: Vec<_> = detect_result
-            .candidates
-            .into_iter()
-            .filter(|c| c.confidence >= options.min_confidence)
-            .collect();
-
-        if candidates.is_empty() && explicit_vars.is_empty() {
-            return Err(DicecutError::ExtractNoVariables);
-        }
-
-        // Resolve auto-detected candidates (merge with explicit vars)
-        let auto_vars = if candidates.is_empty() {
-            vec![]
-        } else if options.yes {
-            resolve_candidates_yes(&candidates, &explicit_vars)
-        } else {
-            confirm_auto_detected_interactive(candidates, &explicit_vars)?
-        };
-
-        // Merge: explicit vars first (pre-accepted), then auto-detected additions
-        let mut merged = explicit_vars;
-        for (name, value) in auto_vars {
-            if !merged.iter().any(|(n, _)| n == &name) {
-                merged.push((name, value));
-            }
-        }
-
-        if merged.is_empty() {
-            return Err(DicecutError::ExtractNoVariables);
-        }
-
-        merged
-    };
+    // Validate that at least one --var was provided
+    let variables = options.variables.clone();
+    if variables.is_empty() {
+        return Err(DicecutError::ExtractNoVariables);
+    }
 
     // Phase 3: Generate variants and count occurrences
     let mut extract_variables = Vec::new();
@@ -251,43 +203,26 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         });
     }
 
-    // Phase 4: Interactive variant confirmation
-    let confirmed_variables = if options.yes {
-        // Batch mode: auto-accept all found variants
-        extract_variables
-            .into_iter()
-            .map(|mut var| {
-                var.variants.retain(|v| {
-                    var.occurrence_counts
-                        .iter()
-                        .any(|(name, _, hits)| name == v.name && *hits > 0)
-                        || v.name == "verbatim"
-                });
-                // Always keep at least the verbatim/canonical variant
-                if var.variants.is_empty() {
-                    let all = generate_variants(&var.name, &var.value);
-                    if let Some(first) = all.into_iter().next() {
-                        var.variants.push(first);
-                    }
+    // Phase 4: Auto-accept found variants (keep those with occurrences + verbatim)
+    let confirmed_variables: Vec<ExtractVariable> = extract_variables
+        .into_iter()
+        .map(|mut var| {
+            var.variants.retain(|v| {
+                var.occurrence_counts
+                    .iter()
+                    .any(|(name, _, hits)| name == v.name && *hits > 0)
+                    || v.name == "verbatim"
+            });
+            // Always keep at least the verbatim/canonical variant
+            if var.variants.is_empty() {
+                let all = generate_variants(&var.name, &var.value);
+                if let Some(first) = all.into_iter().next() {
+                    var.variants.push(first);
                 }
-                var
-            })
-            .collect()
-    } else {
-        confirm_variants_interactive(extract_variables)?
-    };
-
-    // Phase 6: Detect conditional files
-    let detected_conditionals = if options.yes {
-        vec![] // Batch mode: no conditional files
-    } else {
-        let detected = detect_conditional_files(source_dir);
-        if detected.is_empty() {
-            vec![]
-        } else {
-            confirm_conditionals_interactive(detected)?
-        }
-    };
+            }
+            var
+        })
+        .collect();
 
     // Phase 7: Build replacement rules
     let mut rules = Vec::new();
@@ -309,7 +244,7 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         .iter()
         .map(|f| f.relative_path.clone())
         .collect();
-    let copy_without_render = detect_copy_without_render(source_dir, &file_paths);
+    let copy_without_render = detect_copy_without_render(&file_paths);
 
     // Phase 9: Apply replacements to files
     let mut planned_files = Vec::new();
@@ -397,34 +332,9 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         .iter()
         .map(|f| f.template_path.clone())
         .collect();
-    let mut config_excludes = relevant_config_excludes(&template_paths);
-
-    if !options.yes {
-        config_excludes = confirm_excludes_interactive(config_excludes)?;
-    }
-
-    // Phase 10: Interactive file confirmation
-    if !options.yes {
-        confirm_files_interactive(&planned_files, dropped_count)?;
-    }
-
-    // Phase 11: Build conditional entries
-    let conditional_entries: Vec<ConditionalEntry> = detected_conditionals
-        .iter()
-        .map(|d| {
-            let patterns = patterns_for_variable(&d.variable)
-                .into_iter()
-                .map(|p| p.to_string())
-                .collect();
-            ConditionalEntry {
-                patterns,
-                variable: d.variable.clone(),
-                description: d.description.clone(),
-            }
-        })
-        .collect();
+    let config_excludes = relevant_config_excludes(&template_paths);
 
-    // Phase 12: Generate config
+    // Generate config
     let canonical_seps: HashMap<String, &str> = confirmed_variables
         .iter()
         .map(|v| (v.name.clone(), detect_separator(&v.value)))
@@ -473,7 +383,7 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         computed_variables: computed_vars,
         exclude_patterns: config_excludes.clone(),
         copy_without_render: copy_without_render.clone(),
-        conditional_entries: conditional_entries.clone(),
+        conditional_entries: vec![],
     });
 
     Ok(ExtractionPlan {
@@ -481,7 +391,6 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         files: planned_files,
         config_toml,
         variables: confirmed_variables,
-        conditional_entries,
         exclude_patterns: config_excludes,
         copy_without_render,
         dropped_count,
@@ -490,7 +399,7 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
 }
 
 /// Execute an extraction plan: write files and config to the output directory.
-pub fn execute_extraction(plan: &ExtractionPlan, _in_place: bool) -> Result<()> {
+pub fn execute_extraction(plan: &ExtractionPlan) -> Result<()> {
     let output_dir = &plan.output_dir;
     let template_dir = output_dir.join("template");
 
@@ -589,12 +498,6 @@ pub fn execute_extraction(plan: &ExtractionPlan, _in_place: bool) -> Result<()>
         "  {} files templated, {} files copied, {} files stubbed, {} files dropped",
         rendered_count, copied_count, stubbed_count, plan.dropped_count
     );
-    if !plan.conditional_entries.is_empty() {
-        eprintln!(
-            "  {} conditional patterns added",
-            plan.conditional_entries.len()
-        );
-    }
     eprintln!("  Review diecut.toml to fine-tune");
 
     Ok(())
diff --git a/src/extract/replace.rs b/src/extract/replace.rs
index 95e36ad..5cbe93c 100644
--- a/src/extract/replace.rs
+++ b/src/extract/replace.rs
@@ -133,18 +133,17 @@ pub fn apply_path_replacements(path: &Path, rules: &[ReplacementRule]) -> PathBu
     components.iter().collect()
 }
 
-/// Count occurrences of a literal in a string.
-pub fn count_occurrences(content: &str, literal: &str) -> usize {
-    if literal.is_empty() {
-        return 0;
-    }
-    content.matches(literal).count()
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
 
+    fn count_occurrences(content: &str, literal: &str) -> usize {
+        if literal.is_empty() {
+            return 0;
+        }
+        content.matches(literal).count()
+    }
+
     fn make_rule(literal: &str, replacement: &str) -> ReplacementRule {
         ReplacementRule {
             literal: literal.to_string(),
diff --git a/src/extract/scan.rs b/src/extract/scan.rs
index 544aa87..088d6dd 100644
--- a/src/extract/scan.rs
+++ b/src/extract/scan.rs
@@ -98,6 +98,36 @@ pub fn scan_project(project_dir: &Path, excludes: &[String]) -> crate::error::Re
     })
 }
 
+/// Count how many files contain `value` and the total number of hits across all files.
+pub fn count_occurrences(value: &str, scan_result: &ScanResult) -> (usize, usize) {
+    let mut file_count = 0;
+    let mut total = 0;
+
+    for file in &scan_result.files {
+        let mut counted_file = false;
+
+        if let Some(ref content) = file.content {
+            let hits = content.matches(value).count();
+            if hits > 0 {
+                file_count += 1;
+                counted_file = true;
+                total += hits;
+            }
+        }
+
+        let path_str = file.relative_path.to_string_lossy();
+        let path_hits = path_str.matches(value).count();
+        if path_hits > 0 {
+            total += path_hits;
+            if !counted_file {
+                file_count += 1;
+            }
+        }
+    }
+
+    (file_count, total)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/src/main.rs b/src/main.rs
index 11dec94..0e6ef6a 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -24,19 +24,8 @@ fn main() -> miette::Result<()> {
             vars,
             output,
             in_place,
-            yes,
-            min_confidence,
             stub_depth,
             dry_run,
-        } => commands::extract::run(
-            source,
-            vars,
-            output,
-            in_place,
-            yes,
-            min_confidence,
-            stub_depth,
-            dry_run,
-        ),
+        } => commands::extract::run(source, vars, output, in_place, stub_depth, dry_run),
     }
 }
diff --git a/tests/integration.rs b/tests/integration.rs
index bee61fc..f56835d 100644
--- a/tests/integration.rs
+++ b/tests/integration.rs
@@ -654,14 +654,11 @@ fn test_extract_batch_basic() {
         ],
         output_dir: Some(output_path.clone()),
         in_place: false,
-        yes: true,
-        min_confidence: 0.5,
         stub_depth: 2,
-        dry_run: false,
     };
 
     let plan = plan_extraction(&options).unwrap();
-    execute_extraction(&plan, false).unwrap();
+    execute_extraction(&plan).unwrap();
 
     // Verify diecut.toml was created
     assert!(output_path.join("diecut.toml").exists());
@@ -699,10 +696,7 @@ fn test_extract_detects_case_variants() {
         variables: vec![("project_name".to_string(), "my-app".to_string())],
         output_dir: Some(output_path.clone()),
         in_place: false,
-        yes: true,
-        min_confidence: 0.5,
         stub_depth: 2,
-        dry_run: false,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -731,7 +725,7 @@ fn test_extract_detects_case_variants() {
         "should detect screaming_snake variant"
     );
 
-    execute_extraction(&plan, false).unwrap();
+    execute_extraction(&plan).unwrap();
 
     // The config should have computed variables for variants
     let config = std::fs::read_to_string(output_path.join("diecut.toml")).unwrap();
@@ -754,10 +748,7 @@ fn test_extract_dry_run_writes_nothing() {
         variables: vec![("project_name".to_string(), "my-app".to_string())],
         output_dir: Some(output_path.clone()),
         in_place: false,
-        yes: true,
-        min_confidence: 0.5,
         stub_depth: 2,
-        dry_run: true,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -784,10 +775,7 @@ fn test_extract_rejects_already_template() {
         variables: vec![("name".to_string(), "val".to_string())],
         output_dir: None,
         in_place: false,
-        yes: true,
-        min_confidence: 0.5,
         stub_depth: 2,
-        dry_run: false,
     };
 
     let result = plan_extraction(&options);
@@ -799,17 +787,13 @@ fn test_extract_rejects_no_variables() {
     let project = tempfile::tempdir().unwrap();
     std::fs::write(project.path().join("hello.txt"), "hello").unwrap();
 
-    // With min_confidence=1.0, no auto-detected candidates can pass, and no explicit
-    // vars are given, so extraction should fail with ExtractNoVariables
+    // No --var provided → should fail with ExtractNoVariables
     let options = ExtractOptions {
         source_dir: project.path().to_path_buf(),
         variables: vec![],
         output_dir: None,
         in_place: false,
-        yes: true,
-        min_confidence: 1.0,
         stub_depth: 2,
-        dry_run: false,
     };
 
     let result = plan_extraction(&options);
@@ -830,10 +814,7 @@ fn test_extract_templates_path_components() {
         variables: vec![("project_name".to_string(), "my-app".to_string())],
         output_dir: Some(output_path.clone()),
         in_place: false,
-        yes: true,
-        min_confidence: 0.5,
         stub_depth: 2,
-        dry_run: false,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -849,7 +830,7 @@ fn test_extract_templates_path_components() {
         "should template path components containing the variable value"
     );
 
-    execute_extraction(&plan, false).unwrap();
+    execute_extraction(&plan).unwrap();
 }
 
 #[test]
@@ -894,14 +875,11 @@ fn test_extract_round_trip() {
         variables: vec![("project_name".to_string(), "my-app".to_string())],
         output_dir: Some(extracted_path.clone()),
         in_place: false,
-        yes: true,
-        min_confidence: 0.5,
         stub_depth: 2,
-        dry_run: false,
     };
 
     let plan = plan_extraction(&options).unwrap();
-    execute_extraction(&plan, false).unwrap();
+    execute_extraction(&plan).unwrap();
 
     // Verify the extracted template has the key structure
     assert!(extracted_path.join("diecut.toml").exists());
@@ -931,177 +909,3 @@ fn test_extract_round_trip() {
         }
     }
 }
-
-// ── Auto-detect tests ────────────────────────────────────────────────────
-
-#[test]
-fn test_extract_auto_yes() {
-    let project = tempfile::tempdir().unwrap();
-    let project_dir = project.path().join("data-pipeline");
-    std::fs::create_dir(&project_dir).unwrap();
-    std::fs::write(
-        project_dir.join("Cargo.toml"),
-        "[package]\nname = \"data-pipeline\"\nversion = \"0.1.0\"\n",
-    )
-    .unwrap();
-    std::fs::write(
-        project_dir.join("README.md"),
-        "# data-pipeline\nWelcome to data-pipeline\n",
-    )
-    .unwrap();
-    std::fs::create_dir(project_dir.join("src")).unwrap();
-    std::fs::write(
-        project_dir.join("src/main.rs"),
-        "fn main() {\n    println!(\"data-pipeline starting\");\n}\n",
-    )
-    .unwrap();
-
-    let output = tempfile::tempdir().unwrap();
-    let output_path = output.path().join("auto-extracted");
-
-    let options = ExtractOptions {
-        source_dir: project_dir.clone(),
-        variables: vec![],
-        output_dir: Some(output_path.clone()),
-        in_place: false,
-        yes: true,
-        min_confidence: 0.5,
-        stub_depth: 2,
-        dry_run: false,
-    };
-
-    let plan = plan_extraction(&options).unwrap();
-    execute_extraction(&plan, false).unwrap();
-
-    let project_var = plan.variables.iter().find(|v| v.name == "project_name");
-    assert!(
-        project_var.is_some(),
-        "should auto-detect project_name, got vars: {:?}",
-        plan.variables.iter().map(|v| &v.name).collect::<Vec<_>>()
-    );
-    assert_eq!(project_var.unwrap().value, "data-pipeline");
-
-    assert!(output_path.join("diecut.toml").exists());
-    let config = std::fs::read_to_string(output_path.join("diecut.toml")).unwrap();
-    assert!(config.contains("project_name"));
-}
-
-#[test]
-fn test_extract_auto_explicit_vars_merged() {
-    let project = tempfile::tempdir().unwrap();
-    let project_dir = project.path().join("my-service");
-    std::fs::create_dir(&project_dir).unwrap();
-    std::fs::write(
-        project_dir.join("Cargo.toml"),
-        "[package]\nname = \"my-service\"\n",
-    )
-    .unwrap();
-    std::fs::write(project_dir.join("README.md"), "# my-service\n").unwrap();
-
-    let output = tempfile::tempdir().unwrap();
-    let output_path = output.path().join("explicit-extracted");
-
-    let options = ExtractOptions {
-        source_dir: project_dir.clone(),
-        variables: vec![("app_name".to_string(), "my-service".to_string())],
-        output_dir: Some(output_path.clone()),
-        in_place: false,
-        yes: true,
-        min_confidence: 0.5,
-        stub_depth: 2,
-        dry_run: false,
-    };
-
-    let plan = plan_extraction(&options).unwrap();
-
-    let has_app_name = plan.variables.iter().any(|v| v.name == "app_name");
-    assert!(has_app_name, "should use explicit var app_name");
-    // Auto-detect still runs and merges additional candidates
-    // (project_name may or may not appear depending on dedup with app_name's value)
-}
-
-#[test]
-fn test_extract_auto_frequency_fallback() {
-    let project = tempfile::tempdir().unwrap();
-    let project_dir = project.path().join("cool-widget");
-    std::fs::create_dir(&project_dir).unwrap();
-    std::fs::write(
-        project_dir.join("main.txt"),
-        "cool-widget is great\ncool_widget module\nCoolWidget class\n",
-    )
-    .unwrap();
-    std::fs::write(
-        project_dir.join("config.txt"),
-        "name = cool-widget\nmodule = cool_widget\n",
-    )
-    .unwrap();
-    std::fs::write(
-        project_dir.join("test.txt"),
-        "testing cool-widget\nCOOL_WIDGET env\n",
-    )
-    .unwrap();
-
-    let output = tempfile::tempdir().unwrap();
-    let output_path = output.path().join("freq-extracted");
-
-    let options = ExtractOptions {
-        source_dir: project_dir.clone(),
-        variables: vec![],
-        output_dir: Some(output_path.clone()),
-        in_place: false,
-        yes: true,
-        min_confidence: 0.5,
-        stub_depth: 2,
-        dry_run: false,
-    };
-
-    let plan = plan_extraction(&options).unwrap();
-
-    let has_relevant_var = plan
-        .variables
-        .iter()
-        .any(|v| v.value.contains("cool") || v.name.contains("cool"));
-    assert!(
-        has_relevant_var,
-        "should detect cool-widget related variable, got: {:?}",
-        plan.variables
-            .iter()
-            .map(|v| format!("{}={}", v.name, v.value))
-            .collect::<Vec<_>>()
-    );
-}
-
-#[test]
-fn test_extract_min_confidence_filters() {
-    let project = tempfile::tempdir().unwrap();
-    let project_dir = project.path().join("tiny-app");
-    std::fs::create_dir(&project_dir).unwrap();
-    std::fs::write(
-        project_dir.join("Cargo.toml"),
-        "[package]\nname = \"tiny-app\"\nversion = \"0.1.0\"\n",
-    )
-    .unwrap();
-    std::fs::write(
-        project_dir.join("README.md"),
-        "# tiny-app\nWelcome to tiny-app\n",
-    )
-    .unwrap();
-
-    // With a very high threshold, all auto-detected candidates should be filtered out
-    let options = ExtractOptions {
-        source_dir: project_dir.clone(),
-        variables: vec![],
-        output_dir: None,
-        in_place: false,
-        yes: true,
-        min_confidence: 0.99,
-        stub_depth: 2,
-        dry_run: true,
-    };
-
-    let result = plan_extraction(&options);
-    assert!(
-        result.is_err(),
-        "high min_confidence should filter out all candidates"
-    );
-}

From 094222a71726d5cb71ed27ea7e3077bb6ce269f3 Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Wed, 4 Mar 2026 23:19:48 -0500
Subject: [PATCH 27/29] refactor(extract): trim to verbatim-only for PR review

Defer variants, stub/drop, copy-without-render, camelcase filter,
and config_gen module to a follow-up PR. Inline minimal config
generation. Remove --stub-depth flag.

991 lines changed (down from 2,539).
---
 Cargo.toml                |   2 +-
 src/cli.rs                |   4 -
 src/commands/extract.rs   |  30 +---
 src/extract/config_gen.rs | 206 -------------------------
 src/extract/exclude.rs    | 185 +---------------------
 src/extract/mod.rs        | 312 ++++++++------------------------------
 src/extract/replace.rs    | 116 --------------
 src/extract/scan.rs       |  83 ----------
 src/extract/stub.rs       | 222 ---------------------------
 src/extract/variants.rs   | 309 -------------------------------------
 src/main.rs               |   3 +-
 src/prompt/engine.rs      |   2 +-
 src/render/context.rs     |  75 +--------
 src/render/mod.rs         |   2 +-
 src/render/walker.rs      |   4 +-
 tests/integration.rs      | 138 -----------------
 16 files changed, 76 insertions(+), 1617 deletions(-)
 delete mode 100644 src/extract/config_gen.rs
 delete mode 100644 src/extract/stub.rs
 delete mode 100644 src/extract/variants.rs

diff --git a/Cargo.toml b/Cargo.toml
index 09bd987..3a5902b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,7 +5,7 @@ name = "diecut"
 version = "0.3.4"
 edition = "2021"
 license = "MIT"
-rust-version = "1.80"
+rust-version = "1.75"
 description = "A single binary project template generator"
 
 [lib]
diff --git a/src/cli.rs b/src/cli.rs
index 92bc8e1..d87f2e8 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -67,10 +67,6 @@ pub enum Commands {
         #[arg(long)]
         in_place: bool,
 
-        /// Max path depth for stubbing content files (deeper files are dropped)
-        #[arg(long, default_value = "2")]
-        stub_depth: usize,
-
         /// Show what would be extracted without writing files
         #[arg(long)]
         dry_run: bool,
diff --git a/src/commands/extract.rs b/src/commands/extract.rs
index b7f7e9f..37a31be 100644
--- a/src/commands/extract.rs
+++ b/src/commands/extract.rs
@@ -11,7 +11,6 @@ pub fn run(
     vars: Vec<String>,
     output: Option<String>,
     in_place: bool,
-    stub_depth: usize,
     dry_run: bool,
 ) -> Result<()> {
     let variables = parse_vars(&vars)?;
@@ -21,7 +20,6 @@ pub fn run(
         variables,
         output_dir: output.map(PathBuf::from),
         in_place,
-        stub_depth,
     };
 
     let plan = plan_extraction(&options)?;
@@ -61,12 +59,11 @@ fn print_dry_run(plan: &diecut::extract::ExtractionPlan) {
     );
 
     let templated: Vec<_> = plan.files.iter().filter(|f| f.has_replacements()).collect();
-    let boilerplate: Vec<_> = plan
+    let copied: Vec<_> = plan
         .files
         .iter()
-        .filter(|f| !f.has_replacements() && !f.stubbed)
+        .filter(|f| !f.has_replacements())
         .collect();
-    let stubbed: Vec<_> = plan.files.iter().filter(|f| f.stubbed).collect();
 
     eprintln!("\nTemplated files ({}):", templated.len());
     for file in &templated {
@@ -77,33 +74,14 @@ fn print_dry_run(plan: &diecut::extract::ExtractionPlan) {
         );
     }
 
-    eprintln!("\nBoilerplate ({}):", boilerplate.len());
-    for file in &boilerplate {
+    eprintln!("\nCopied ({}):", copied.len());
+    for file in &copied {
         eprintln!("  {}", file.template_path.display());
     }
 
-    if !stubbed.is_empty() {
-        eprintln!("\nStubbed ({}):", stubbed.len());
-        for file in &stubbed {
-            eprintln!("  {}", file.template_path.display());
-        }
-    }
-
-    if plan.dropped_count > 0 {
-        eprintln!("\nDropped ({}):", plan.dropped_count);
-        for path in &plan.dropped_paths {
-            eprintln!("  {}", path.display());
-        }
-    }
-
     eprintln!("\nVariables:");
     for var in &plan.variables {
         eprintln!("  {} = {:?}", var.name, var.value);
-        for variant in &var.variants {
-            if variant.name != "verbatim" {
-                eprintln!("    {} → {}", variant.name, variant.literal);
-            }
-        }
     }
 
     eprintln!("\nGenerated diecut.toml:");
diff --git a/src/extract/config_gen.rs b/src/extract/config_gen.rs
deleted file mode 100644
index 91dea6c..0000000
--- a/src/extract/config_gen.rs
+++ /dev/null
@@ -1,206 +0,0 @@
-/// A prompted variable entry for the generated config.
-pub struct PromptedVariable {
-    pub name: String,
-    pub default_value: String,
-    pub prompt: String,
-}
-
-/// A computed variable entry for the generated config.
-pub struct ComputedVariable {
-    pub name: String,
-    pub expression: String,
-}
-
-/// A conditional file entry for the generated config.
-#[derive(Debug, Clone)]
-pub struct ConditionalEntry {
-    pub patterns: Vec<String>,
-    pub variable: String,
-    pub description: String,
-}
-
-/// Options for generating the diecut.toml config file.
-pub struct ConfigGenOptions {
-    pub template_name: String,
-    pub prompted_variables: Vec<PromptedVariable>,
-    pub computed_variables: Vec<ComputedVariable>,
-    pub exclude_patterns: Vec<String>,
-    pub copy_without_render: Vec<String>,
-    pub conditional_entries: Vec<ConditionalEntry>,
-}
-
-/// Generate a diecut.toml config string with comments for readability.
-///
-/// Uses manual TOML string building because the `toml` crate can't serialize comments,
-/// and users need to read and edit this file.
-pub fn generate_config_toml(options: &ConfigGenOptions) -> String {
-    let mut out = String::new();
-
-    // [template] section
-    out.push_str("[template]\n");
-    out.push_str(&format!(
-        "name = {}\n",
-        escape_toml_string(&options.template_name)
-    ));
-    out.push_str("version = \"1.0.0\"\n");
-    out.push_str("# description = \"A project template\"\n");
-    out.push('\n');
-
-    // [variables] section — prompted variables first
-    if !options.prompted_variables.is_empty() || !options.computed_variables.is_empty() {
-        out.push_str("# ── Variables ──────────────────────────────────────────\n");
-        out.push_str("# Prompted variables are asked during `diecut new`.\n");
-        out.push_str("# Computed variables are auto-derived and never prompted.\n");
-        out.push('\n');
-    }
-
-    for var in &options.prompted_variables {
-        out.push_str(&format!("[variables.{}]\n", var.name));
-        out.push_str("type = \"string\"\n");
-        out.push_str(&format!("prompt = {}\n", escape_toml_string(&var.prompt)));
-        out.push_str(&format!(
-            "default = {}\n",
-            escape_toml_string(&var.default_value)
-        ));
-        out.push('\n');
-    }
-
-    // Conditional file boolean variables
-    for entry in &options.conditional_entries {
-        out.push_str(&format!("# {} ({})\n", entry.variable, entry.description));
-        out.push_str(&format!("[variables.{}]\n", entry.variable));
-        out.push_str("type = \"bool\"\n");
-        out.push_str(&format!(
-            "prompt = {}\n",
-            escape_toml_string(&format!("Include {}?", entry.description.to_lowercase()))
-        ));
-        out.push_str("default = true\n");
-        out.push('\n');
-    }
-
-    // Computed variables
-    for var in &options.computed_variables {
-        out.push_str(&format!("[variables.{}]\n", var.name));
-        out.push_str("type = \"string\"\n");
-        out.push_str(&format!(
-            "computed = {}\n",
-            escape_toml_string(&format!("{{{{ {} }}}}", var.expression))
-        ));
-        out.push('\n');
-    }
-
-    // [files] section
-    out.push_str("# ── Files ─────────────────────────────────────────────\n");
-    out.push_str("[files]\n");
-
-    if !options.exclude_patterns.is_empty() {
-        out.push_str("exclude = [\n");
-        for pattern in &options.exclude_patterns {
-            out.push_str(&format!("    {},\n", escape_toml_string(pattern)));
-        }
-        out.push_str("]\n");
-    }
-
-    if !options.copy_without_render.is_empty() {
-        out.push_str("copy_without_render = [\n");
-        for pattern in &options.copy_without_render {
-            out.push_str(&format!("    {},\n", escape_toml_string(pattern)));
-        }
-        out.push_str("]\n");
-    }
-
-    out.push('\n');
-
-    // [[files.conditional]] entries
-    for entry in &options.conditional_entries {
-        for pattern in &entry.patterns {
-            out.push_str(&format!("# {}\n", entry.description));
-            out.push_str("[[files.conditional]]\n");
-            out.push_str(&format!("pattern = {}\n", escape_toml_string(pattern)));
-            out.push_str(&format!("when = {}\n", escape_toml_string(&entry.variable)));
-            out.push('\n');
-        }
-    }
-
-    // [hooks] section
-    out.push_str("# ── Hooks ─────────────────────────────────────────────\n");
-    out.push_str("# [hooks]\n");
-    out.push_str("# post_create = \"echo 'Project created!'\"\n");
-
-    out
-}
-
-/// Escape a string for TOML output.
-fn escape_toml_string(s: &str) -> String {
-    toml::Value::String(s.to_string()).to_string()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_generate_config_basic() {
-        let options = ConfigGenOptions {
-            template_name: "my-template".to_string(),
-            prompted_variables: vec![PromptedVariable {
-                name: "project_name".to_string(),
-                default_value: "my-app".to_string(),
-                prompt: "Project name".to_string(),
-            }],
-            computed_variables: vec![ComputedVariable {
-                name: "project_name_snake".to_string(),
-                expression: "project_name | replace(from=\"-\", to=\"_\")".to_string(),
-            }],
-            exclude_patterns: vec![".git/".to_string()],
-            copy_without_render: vec!["*.png".to_string()],
-            conditional_entries: vec![],
-        };
-
-        let toml = generate_config_toml(&options);
-
-        assert!(toml.contains("[template]"));
-        assert!(toml.contains("name = \"my-template\""));
-        assert!(toml.contains("[variables.project_name]"));
-        assert!(toml.contains("type = \"string\""));
-        assert!(toml.contains("[variables.project_name_snake]"));
-        assert!(toml.contains("computed ="));
-        assert!(toml.contains("[files]"));
-        assert!(toml.contains("\".git/\""));
-        assert!(toml.contains("\"*.png\""));
-    }
-
-    #[test]
-    fn test_generate_config_with_conditionals() {
-        let options = ConfigGenOptions {
-            template_name: "test".to_string(),
-            prompted_variables: vec![],
-            computed_variables: vec![],
-            exclude_patterns: vec![],
-            copy_without_render: vec![],
-            conditional_entries: vec![ConditionalEntry {
-                patterns: vec![".github/**".to_string()],
-                variable: "use_github_actions".to_string(),
-                description: "GitHub Actions CI".to_string(),
-            }],
-        };
-
-        let toml = generate_config_toml(&options);
-
-        assert!(toml.contains("[variables.use_github_actions]"));
-        assert!(toml.contains("type = \"bool\""));
-        assert!(toml.contains("default = true"));
-        assert!(toml.contains("[[files.conditional]]"));
-        assert!(toml.contains("pattern = \".github/**\""));
-        assert!(toml.contains("when = \"use_github_actions\""));
-    }
-
-    #[test]
-    fn test_escape_toml_string() {
-        assert_eq!(escape_toml_string("hello"), "\"hello\"");
-        // toml crate uses multi-line strings for values containing quotes
-        let escaped = escape_toml_string("it's \"fine\"");
-        assert!(escaped.contains("it's"));
-        assert!(escaped.contains("fine"));
-    }
-}
diff --git a/src/extract/exclude.rs b/src/extract/exclude.rs
index 0f75060..2f6beff 100644
--- a/src/extract/exclude.rs
+++ b/src/extract/exclude.rs
@@ -30,36 +30,6 @@ const DEFAULT_EXCLUDES: &[&str] = &[
     ".diecut-answers.toml",
 ];
 
-/// Patterns for files that should be copied without rendering (binary-like or problematic).
-const DEFAULT_COPY_WITHOUT_RENDER: &[&str] = &[
-    "*.png",
-    "*.jpg",
-    "*.jpeg",
-    "*.gif",
-    "*.ico",
-    "*.svg",
-    "*.webp",
-    "*.woff",
-    "*.woff2",
-    "*.ttf",
-    "*.eot",
-    "*.otf",
-    "*.zip",
-    "*.tar",
-    "*.gz",
-    "*.bz2",
-    "*.xz",
-    "*.pdf",
-    "*.lock",
-    "package-lock.json",
-    "yarn.lock",
-    "pnpm-lock.yaml",
-    "Cargo.lock",
-    "Gemfile.lock",
-    "poetry.lock",
-    "composer.lock",
-];
-
 /// Return all default exclude patterns for use during scanning.
 ///
 /// All DEFAULT_EXCLUDES are always used during the scan phase because patterns
@@ -68,73 +38,6 @@ pub fn all_default_excludes() -> Vec<String> {
     DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect()
 }
 
-/// Return only the DEFAULT_EXCLUDES patterns that match at least one file in the
-/// template output. These are the patterns worth writing to `diecut.toml`'s
-/// `[files] exclude` — directory patterns like `.git/` or `node_modules/` that
-/// were filtered during scan are omitted since those files never appear in the
-/// template.
-pub fn relevant_config_excludes(template_files: &[std::path::PathBuf]) -> Vec<String> {
-    let all = all_default_excludes();
-    all.into_iter()
-        .filter(|pattern| {
-            template_files
-                .iter()
-                .any(|f| should_exclude(f, std::slice::from_ref(pattern)))
-        })
-        .collect()
-}
-
-/// Detect which copy-without-render patterns are relevant based on files present.
-pub fn detect_copy_without_render(files: &[std::path::PathBuf]) -> Vec<String> {
-    let mut found = Vec::new();
-
-    for pattern in DEFAULT_COPY_WITHOUT_RENDER {
-        if pattern.starts_with('*') {
-            // Extension pattern — check if any file matches
-            let ext = pattern.trim_start_matches("*.");
-            if files.iter().any(|f| {
-                f.extension()
-                    .map(|e| e.to_string_lossy().eq_ignore_ascii_case(ext))
-                    .unwrap_or(false)
-            }) {
-                found.push(pattern.to_string());
-            }
-        } else {
-            // Exact filename — check if present
-            if files.iter().any(|f| {
-                f.file_name()
-                    .map(|n| n.to_string_lossy() == *pattern)
-                    .unwrap_or(false)
-            }) {
-                found.push(pattern.to_string());
-            }
-        }
-    }
-
-    found
-}
-
-/// Check if a file should be copied without rendering (lock files, binary-like assets).
-///
-/// These files are included in the template but should never have replacements
-/// applied during extraction — they're copied verbatim.
-pub fn is_copy_without_render(path: &Path) -> bool {
-    for pattern in DEFAULT_COPY_WITHOUT_RENDER {
-        if let Some(ext) = pattern.strip_prefix("*.") {
-            if let Some(file_ext) = path.extension() {
-                if file_ext.to_string_lossy().eq_ignore_ascii_case(ext) {
-                    return true;
-                }
-            }
-        } else if let Some(file_name) = path.file_name() {
-            if file_name.to_string_lossy() == *pattern {
-                return true;
-            }
-        }
-    }
-    false
-}
-
 /// Check if a path should be excluded based on the exclude patterns.
 pub fn should_exclude(relative_path: &Path, excludes: &[String]) -> bool {
     let path_str = relative_path.to_string_lossy();
@@ -175,7 +78,7 @@ pub fn should_exclude(relative_path: &Path, excludes: &[String]) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use std::path::PathBuf;
+    use std::path::Path;
 
     #[test]
     fn test_should_exclude_git() {
@@ -209,90 +112,4 @@ mod tests {
         assert!(!should_exclude(Path::new("src/main.rs"), &excludes));
         assert!(!should_exclude(Path::new("README.md"), &excludes));
     }
-
-    #[test]
-    fn test_all_default_excludes() {
-        let found = all_default_excludes();
-        // All DEFAULT_EXCLUDES are always included
-        assert!(found.iter().any(|e| e.contains(".git")));
-        assert!(found.iter().any(|e| e == ".DS_Store"));
-        assert!(found.iter().any(|e| e == "*.pyc"));
-        assert!(found.iter().any(|e| e.contains("node_modules")));
-    }
-
-    #[test]
-    fn test_relevant_config_excludes_empty_when_no_matches() {
-        // Typical template files won't match any DEFAULT_EXCLUDES
-        let files = vec![
-            PathBuf::from("src/main.rs"),
-            PathBuf::from("README.md"),
-            PathBuf::from("Cargo.toml"),
-        ];
-        let relevant = relevant_config_excludes(&files);
-        assert!(relevant.is_empty());
-    }
-
-    #[test]
-    fn test_relevant_config_excludes_finds_matching_patterns() {
-        let files = vec![
-            PathBuf::from("src/main.py"),
-            PathBuf::from("src/__pycache__/main.pyc"),
-            PathBuf::from(".DS_Store"),
-        ];
-        let relevant = relevant_config_excludes(&files);
-        assert!(relevant.contains(&"*.pyc".to_string()));
-        assert!(relevant.contains(&".DS_Store".to_string()));
-        assert!(relevant.contains(&"__pycache__".to_string()));
-        // Directory excludes that don't match should not appear
-        assert!(!relevant.contains(&".git".to_string()));
-        assert!(!relevant.contains(&"node_modules".to_string()));
-    }
-
-    #[test]
-    fn test_should_exclude_claude_worktrees() {
-        let excludes = all_default_excludes();
-        assert!(should_exclude(
-            Path::new(".claude/worktrees/agent-abc/Cargo.toml"),
-            &excludes
-        ));
-        // .claude/settings.local.json should NOT be excluded
-        assert!(!should_exclude(
-            Path::new(".claude/settings.local.json"),
-            &excludes
-        ));
-    }
-
-    #[test]
-    fn test_should_exclude_astro() {
-        let excludes = all_default_excludes();
-        assert!(should_exclude(
-            Path::new("docs/.astro/data-store.json"),
-            &excludes
-        ));
-        assert!(should_exclude(Path::new(".astro/settings.json"), &excludes));
-    }
-
-    #[test]
-    fn test_is_copy_without_render() {
-        assert!(is_copy_without_render(Path::new("Cargo.lock")));
-        assert!(is_copy_without_render(Path::new("pnpm-lock.yaml")));
-        assert!(is_copy_without_render(Path::new("package-lock.json")));
-        assert!(is_copy_without_render(Path::new("logo.png")));
-        assert!(is_copy_without_render(Path::new("deep/nested/file.lock")));
-        assert!(!is_copy_without_render(Path::new("src/main.rs")));
-        assert!(!is_copy_without_render(Path::new("README.md")));
-    }
-
-    #[test]
-    fn test_detect_copy_without_render() {
-        let files = vec![
-            PathBuf::from("logo.png"),
-            PathBuf::from("font.woff2"),
-            PathBuf::from("README.md"),
-        ];
-        let found = detect_copy_without_render(&files);
-        assert!(found.contains(&"*.png".to_string()));
-        assert!(found.contains(&"*.woff2".to_string()));
-        assert!(!found.contains(&"*.jpg".to_string()));
-    }
 }
diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index 767d7ae..29f7cd5 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -1,11 +1,7 @@
-pub mod config_gen;
 pub mod exclude;
 pub mod replace;
 pub mod scan;
-pub mod stub;
-pub mod variants;
 
-use std::collections::HashMap;
 use std::path::{Path, PathBuf};
 
 use console::style;
@@ -13,30 +9,17 @@ use console::style;
 use crate::config::schema::DEFAULT_TEMPLATES_SUFFIX;
 use crate::error::{DicecutError, Result};
 
-use self::config_gen::{
-    generate_config_toml, ComputedVariable, ConfigGenOptions, PromptedVariable,
-};
-use self::exclude::{
-    all_default_excludes, detect_copy_without_render, is_copy_without_render,
-    relevant_config_excludes,
-};
+use self::exclude::all_default_excludes;
 use self::replace::{
     apply_path_replacements, apply_replacements, build_replacement_rules, ReplacementRule,
 };
-use self::scan::{count_occurrences, scan_project};
-use self::stub::{classify_file, generate_stub, FileRole};
-use self::variants::{
-    computed_expression, detect_separator, generate_variants, is_canonical_variant, CaseVariant,
-};
+use self::scan::scan_project;
 
-/// A variable with its value and confirmed case variants.
+/// A variable with its value.
 #[derive(Debug, Clone)]
 pub struct ExtractVariable {
     pub name: String,
     pub value: String,
-    pub variants: Vec<CaseVariant>,
-    /// Per-variant occurrence counts: (variant_name, file_count, total_hits).
-    pub occurrence_counts: Vec<(String, usize, usize)>,
 }
 
 /// The content of an extracted template file.
@@ -58,8 +41,6 @@ pub struct PlannedExtractFile {
     pub template_path: PathBuf,
     /// The file content (text with replacements, or binary bytes).
     pub content: ExtractedContent,
-    /// Whether this file was stubbed (content replaced with a minimal placeholder).
-    pub stubbed: bool,
 }
 
 impl PlannedExtractFile {
@@ -91,10 +72,6 @@ pub struct ExtractionPlan {
     pub files: Vec<PlannedExtractFile>,
     pub config_toml: String,
     pub variables: Vec<ExtractVariable>,
-    pub exclude_patterns: Vec<String>,
-    pub copy_without_render: Vec<String>,
-    pub dropped_count: usize,
-    pub dropped_paths: Vec<PathBuf>,
 }
 
 /// Options for the extraction process.
@@ -103,10 +80,9 @@ pub struct ExtractOptions {
     pub variables: Vec<(String, String)>,
     pub output_dir: Option<PathBuf>,
     pub in_place: bool,
-    pub stub_depth: usize,
 }
 
-/// Plan an extraction: scan the project, detect variants, build replacement rules.
+/// Plan an extraction: scan the project, build replacement rules, apply replacements.
 pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
     let source_dir = &options.source_dir;
 
@@ -145,36 +121,18 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         });
     }
 
-    // Phase 1: All default excludes for scanning (safety — never walks into .git/, node_modules/, etc.)
+    // Scan project
     let scan_excludes = all_default_excludes();
-
-    // Phase 2: Scan project
     eprintln!(
         "\n{}",
         style(format!("Scanning {}...", source_dir.display())).bold()
     );
-    let mut scan_result = scan_project(source_dir, &scan_excludes)?;
-
-    // Drop non-boilerplate files deeper than stub_depth before auto-detect sees them.
-    // This prevents frequency analysis from detecting variables that only appear in
-    // files that would be dropped anyway.
-    let pre_filter_count = scan_result.files.len();
-    scan_result.files.retain(|f| {
-        let depth = f.relative_path.components().count();
-        depth <= options.stub_depth
-            || classify_file(&f.relative_path, options.stub_depth) == FileRole::Boilerplate
-    });
-    let depth_dropped = pre_filter_count - scan_result.files.len();
+    let scan_result = scan_project(source_dir, &scan_excludes)?;
 
     eprintln!(
-        "  {} files found, {} excluded{}",
+        "  {} files found, {} excluded",
         scan_result.files.len(),
         scan_result.excluded_count,
-        if depth_dropped > 0 {
-            format!(", {} too deep", depth_dropped)
-        } else {
-            String::new()
-        }
     );
 
     // Validate that at least one --var was provided
@@ -183,73 +141,29 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
         return Err(DicecutError::ExtractNoVariables);
     }
 
-    // Phase 3: Generate variants and count occurrences
-    let mut extract_variables = Vec::new();
-
-    for (var_name, var_value) in &variables {
-        let all_variants = generate_variants(var_name, var_value);
-
-        let mut occurrence_counts = Vec::new();
-        for variant in &all_variants {
-            let (file_count, total_hits) = count_occurrences(&variant.literal, &scan_result);
-            occurrence_counts.push((variant.name.to_string(), file_count, total_hits));
-        }
-
-        extract_variables.push(ExtractVariable {
-            name: var_name.clone(),
-            value: var_value.clone(),
-            variants: all_variants,
-            occurrence_counts,
-        });
-    }
-
-    // Phase 4: Auto-accept found variants (keep those with occurrences + verbatim)
-    let confirmed_variables: Vec<ExtractVariable> = extract_variables
-        .into_iter()
-        .map(|mut var| {
-            var.variants.retain(|v| {
-                var.occurrence_counts
-                    .iter()
-                    .any(|(name, _, hits)| name == v.name && *hits > 0)
-                    || v.name == "verbatim"
-            });
-            // Always keep at least the verbatim/canonical variant
-            if var.variants.is_empty() {
-                let all = generate_variants(&var.name, &var.value);
-                if let Some(first) = all.into_iter().next() {
-                    var.variants.push(first);
-                }
-            }
-            var
+    // Build extract variables (verbatim only)
+    let extract_variables: Vec<ExtractVariable> = variables
+        .iter()
+        .map(|(name, value)| ExtractVariable {
+            name: name.clone(),
+            value: value.clone(),
         })
         .collect();
 
-    // Phase 7: Build replacement rules
-    let mut rules = Vec::new();
-    for var in &confirmed_variables {
-        for variant in &var.variants {
-            rules.push(ReplacementRule {
-                literal: variant.literal.clone(),
-                replacement: variant.tera_expr.clone(),
-                variable: var.name.clone(),
-                variant: variant.name.to_string(),
-            });
-        }
-    }
-    build_replacement_rules(&mut rules);
-
-    // Phase 8: Detect copy_without_render patterns
-    let file_paths: Vec<PathBuf> = scan_result
-        .files
+    // Build replacement rules — one rule per variable, verbatim only
+    let mut rules: Vec<ReplacementRule> = extract_variables
         .iter()
-        .map(|f| f.relative_path.clone())
+        .map(|var| ReplacementRule {
+            literal: var.value.clone(),
+            replacement: format!("{{{{ {} }}}}", var.name),
+            variable: var.name.clone(),
+            variant: "verbatim".to_string(),
+        })
         .collect();
-    let copy_without_render = detect_copy_without_render(&file_paths);
+    build_replacement_rules(&mut rules);
 
-    // Phase 9: Apply replacements to files
+    // Apply replacements to files
     let mut planned_files = Vec::new();
-    let mut dropped_count = depth_dropped;
-    let mut dropped_paths = Vec::new();
 
     for file in &scan_result.files {
         let template_path = apply_path_replacements(&file.relative_path, &rules);
@@ -263,22 +177,8 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
             planned_files.push(PlannedExtractFile {
                 template_path,
                 content: ExtractedContent::Binary(binary_content),
-                stubbed: false,
             });
         } else if let Some(ref content) = file.content {
-            // Lock files and other copy-without-render files: skip replacement
-            if is_copy_without_render(&file.relative_path) {
-                planned_files.push(PlannedExtractFile {
-                    template_path,
-                    content: ExtractedContent::Text {
-                        content: content.clone(),
-                        replacement_count: 0,
-                    },
-                    stubbed: false,
-                });
-                continue;
-            }
-
             let (replaced, count) = apply_replacements(content, &rules);
 
             if count > 0 {
@@ -291,110 +191,33 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
                         content: replaced,
                         replacement_count: count,
                     },
-                    stubbed: false,
                 });
             } else {
-                // No replacements — classify as boilerplate, content, or dropped
-                match classify_file(&file.relative_path, options.stub_depth) {
-                    FileRole::Boilerplate => {
-                        planned_files.push(PlannedExtractFile {
-                            template_path,
-                            content: ExtractedContent::Text {
-                                content: replaced,
-                                replacement_count: 0,
-                            },
-                            stubbed: false,
-                        });
-                    }
-                    FileRole::Content => {
-                        let stub = generate_stub(&file.relative_path);
-                        planned_files.push(PlannedExtractFile {
-                            template_path,
-                            content: ExtractedContent::Text {
-                                content: stub,
-                                replacement_count: 0,
-                            },
-                            stubbed: true,
-                        });
-                    }
-                    FileRole::Dropped => {
-                        dropped_count += 1;
-                        dropped_paths.push(file.relative_path.clone());
-                    }
-                }
-            }
-        }
-    }
-
-    // Phase 9.5: Compute config-appropriate excludes from planned template files
-    // Only patterns that match files actually in the template are worth writing to diecut.toml
-    let template_paths: Vec<PathBuf> = planned_files
-        .iter()
-        .map(|f| f.template_path.clone())
-        .collect();
-    let config_excludes = relevant_config_excludes(&template_paths);
-
-    // Generate config
-    let canonical_seps: HashMap<String, &str> = confirmed_variables
-        .iter()
-        .map(|v| (v.name.clone(), detect_separator(&v.value)))
-        .collect();
-
-    let prompted_vars: Vec<PromptedVariable> = confirmed_variables
-        .iter()
-        .map(|v| PromptedVariable {
-            name: v.name.clone(),
-            default_value: v.value.clone(),
-            prompt: v.name.replace(['_', '-'], " "),
-        })
-        .collect();
-
-    let mut computed_vars = Vec::new();
-    for var in &confirmed_variables {
-        let canonical_sep = canonical_seps.get(&var.name).copied().unwrap_or("-");
-        for variant in &var.variants {
-            // Skip the canonical variant (it uses the variable directly)
-            if variant.name == "verbatim" {
-                continue;
-            }
-            // Skip the variant that matches the canonical separator
-            if is_canonical_variant(variant.name, canonical_sep) {
-                continue;
-            }
-
-            let computed_name = format!("{}_{}", var.name, variant.name);
-            let expression = computed_expression(&var.name, variant.name, canonical_sep);
-            // Don't add if expression is just the variable name
-            if expression != var.name {
-                computed_vars.push(ComputedVariable {
-                    name: computed_name,
-                    expression,
+                // No replacements — copy verbatim
+                planned_files.push(PlannedExtractFile {
+                    template_path,
+                    content: ExtractedContent::Text {
+                        content: replaced,
+                        replacement_count: 0,
+                    },
                 });
             }
         }
     }
 
-    let config_toml = generate_config_toml(&ConfigGenOptions {
-        template_name: source_dir
-            .file_name()
-            .map(|n| n.to_string_lossy().to_string())
-            .unwrap_or_else(|| "template".to_string()),
-        prompted_variables: prompted_vars,
-        computed_variables: computed_vars,
-        exclude_patterns: config_excludes.clone(),
-        copy_without_render: copy_without_render.clone(),
-        conditional_entries: vec![],
-    });
+    // Generate minimal config TOML inline
+    let template_name = source_dir
+        .file_name()
+        .map(|n| n.to_string_lossy().to_string())
+        .unwrap_or_else(|| "template".to_string());
+
+    let config_toml = generate_minimal_config(&template_name, &extract_variables);
 
     Ok(ExtractionPlan {
         output_dir,
         files: planned_files,
         config_toml,
-        variables: confirmed_variables,
-        exclude_patterns: config_excludes,
-        copy_without_render,
-        dropped_count,
-        dropped_paths,
+        variables: extract_variables,
     })
 }
 
@@ -412,7 +235,6 @@ pub fn execute_extraction(plan: &ExtractionPlan) -> Result<()> {
     // Write template files
     let mut rendered_count = 0;
     let mut copied_count = 0;
-    let mut stubbed_count = 0;
 
     for file in &plan.files {
         let dest = template_dir.join(&file.template_path);
@@ -436,8 +258,6 @@ pub fn execute_extraction(plan: &ExtractionPlan) -> Result<()> {
                 })?;
                 if *replacement_count > 0 {
                     rendered_count += 1;
-                } else if file.stubbed {
-                    stubbed_count += 1;
                 } else {
                     copied_count += 1;
                 }
@@ -460,45 +280,39 @@ pub fn execute_extraction(plan: &ExtractionPlan) -> Result<()> {
     })?;
 
     // Summary
-    let prompted_count = plan.variables.len();
-    let computed_count = plan
-        .variables
-        .iter()
-        .flat_map(|v| &v.variants)
-        .filter(|variant| {
-            variant.name != "verbatim"
-                && !matches!(
-                    (
-                        variant.name,
-                        detect_separator(
-                            plan.variables
-                                .iter()
-                                .find(|v2| v2.variants.contains(variant))
-                                .map(|v2| v2.value.as_str())
-                                .unwrap_or("")
-                        )
-                    ),
-                    ("kebab", "-") | ("snake", "_") | ("dot", ".")
-                )
-        })
-        .count();
-
     eprintln!(
         "\n{} Template extracted to {}",
         style("✓").green().bold(),
         style(output_dir.display()).cyan()
     );
     eprintln!(
-        "  {} variables ({} prompted, {} computed)",
-        prompted_count + computed_count,
-        prompted_count,
-        computed_count
-    );
-    eprintln!(
-        "  {} files templated, {} files copied, {} files stubbed, {} files dropped",
-        rendered_count, copied_count, stubbed_count, plan.dropped_count
+        "  {} variables, {} files templated, {} files copied",
+        plan.variables.len(),
+        rendered_count,
+        copied_count,
     );
     eprintln!("  Review diecut.toml to fine-tune");
 
     Ok(())
 }
+
+fn generate_minimal_config(template_name: &str, variables: &[ExtractVariable]) -> String {
+    let escape = |s: &str| toml::Value::String(s.to_string()).to_string();
+    let mut out = String::new();
+
+    out.push_str(&format!("[template]\nname = {}\n", escape(template_name)));
+    out.push_str("version = \"1.0.0\"\n\n");
+
+    for var in variables {
+        out.push_str(&format!("[variables.{}]\n", var.name));
+        out.push_str(&format!(
+            "type = \"string\"\nprompt = {}\n",
+            escape(&var.name.replace(['_', '-'], " "))
+        ));
+        out.push_str(&format!("default = {}\n\n", escape(&var.value)));
+    }
+
+    out.push_str("[files]\n# exclude = []\n# copy_without_render = []\n\n");
+    out.push_str("# [hooks]\n# post_create = \"echo 'Project created!'\"\n");
+    out
+}
diff --git a/src/extract/replace.rs b/src/extract/replace.rs
index 5cbe93c..419405a 100644
--- a/src/extract/replace.rs
+++ b/src/extract/replace.rs
@@ -132,119 +132,3 @@ pub fn apply_path_replacements(path: &Path, rules: &[ReplacementRule]) -> PathBu
 
     components.iter().collect()
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn count_occurrences(content: &str, literal: &str) -> usize {
-        if literal.is_empty() {
-            return 0;
-        }
-        content.matches(literal).count()
-    }
-
-    fn make_rule(literal: &str, replacement: &str) -> ReplacementRule {
-        ReplacementRule {
-            literal: literal.to_string(),
-            replacement: replacement.to_string(),
-            variable: "test".to_string(),
-            variant: "test".to_string(),
-        }
-    }
-
-    #[test]
-    fn test_apply_replacements_basic() {
-        let rules = vec![make_rule("my-app", "{{ project_name }}")];
-        let (result, count) = apply_replacements("Welcome to my-app!", &rules);
-        assert_eq!(result, "Welcome to {{ project_name }}!");
-        assert_eq!(count, 1);
-    }
-
-    #[test]
-    fn test_apply_replacements_multiple() {
-        let rules = vec![make_rule("my-app", "{{ project_name }}")];
-        let (result, count) = apply_replacements("my-app is great, use my-app", &rules);
-        assert_eq!(
-            result,
-            "{{ project_name }} is great, use {{ project_name }}"
-        );
-        assert_eq!(count, 2);
-    }
-
-    #[test]
-    fn test_longest_match_first() {
-        let mut rules = vec![
-            make_rule("my", "{{ org }}"),
-            make_rule("my-app", "{{ project_name }}"),
-        ];
-        build_replacement_rules(&mut rules);
-
-        // "my-app" should match before "my"
-        assert_eq!(rules[0].literal, "my-app");
-        assert_eq!(rules[1].literal, "my");
-    }
-
-    #[test]
-    fn test_apply_replacements_empty_rules() {
-        let (result, count) = apply_replacements("hello world", &[]);
-        assert_eq!(result, "hello world");
-        assert_eq!(count, 0);
-    }
-
-    #[test]
-    fn test_apply_path_replacements() {
-        let rules = vec![make_rule("my-app", "{{ project_name }}")];
-        let path = Path::new("my-app/src/main.rs");
-        let result = apply_path_replacements(path, &rules);
-        assert_eq!(result, PathBuf::from("{{ project_name }}/src/main.rs"));
-    }
-
-    #[test]
-    fn test_count_occurrences() {
-        assert_eq!(count_occurrences("my-app and my-app", "my-app"), 2);
-        assert_eq!(count_occurrences("hello world", "missing"), 0);
-        assert_eq!(count_occurrences("anything", ""), 0);
-    }
-
-    #[test]
-    fn test_no_substring_collision_suffix() {
-        let rules = vec![make_rule("app", "{{ name }}")];
-        let (result, count) = apply_replacements("application startup", &rules);
-        assert_eq!(result, "application startup");
-        assert_eq!(count, 0);
-    }
-
-    #[test]
-    fn test_no_substring_collision_prefix() {
-        let rules = vec![make_rule("app", "{{ name }}")];
-        let (result, count) = apply_replacements("webapp is cool", &rules);
-        assert_eq!(result, "webapp is cool");
-        assert_eq!(count, 0);
-    }
-
-    #[test]
-    fn test_standalone_match_with_punctuation() {
-        let rules = vec![make_rule("app", "{{ name }}")];
-        let (result, count) = apply_replacements("run app. start app!", &rules);
-        assert_eq!(result, "run {{ name }}. start {{ name }}!");
-        assert_eq!(count, 2);
-    }
-
-    #[test]
-    fn test_match_at_string_boundaries() {
-        let rules = vec![make_rule("app", "{{ name }}")];
-        let (result, count) = apply_replacements("app", &rules);
-        assert_eq!(result, "{{ name }}");
-        assert_eq!(count, 1);
-    }
-
-    #[test]
-    fn test_compound_literal_still_matches() {
-        // Multi-word literals like "my-app" should still match inside strings
-        let rules = vec![make_rule("my-app", "{{ name }}")];
-        let (result, count) = apply_replacements("name = \"my-app\"", &rules);
-        assert_eq!(result, "name = \"{{ name }}\"");
-        assert_eq!(count, 1);
-    }
-}
diff --git a/src/extract/scan.rs b/src/extract/scan.rs
index 088d6dd..c5ea216 100644
--- a/src/extract/scan.rs
+++ b/src/extract/scan.rs
@@ -127,86 +127,3 @@ pub fn count_occurrences(value: &str, scan_result: &ScanResult) -> (usize, usize
 
     (file_count, total)
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_scan_project_basic() {
-        let dir = tempfile::tempdir().unwrap();
-        std::fs::write(dir.path().join("README.md"), "# Hello").unwrap();
-        std::fs::create_dir(dir.path().join("src")).unwrap();
-        std::fs::write(dir.path().join("src/main.rs"), "fn main() {}").unwrap();
-
-        let result = scan_project(dir.path(), &[]).unwrap();
-        assert_eq!(result.files.len(), 2);
-        assert_eq!(result.excluded_count, 0);
-    }
-
-    #[test]
-    fn test_scan_project_with_excludes() {
-        let dir = tempfile::tempdir().unwrap();
-        std::fs::write(dir.path().join("README.md"), "# Hello").unwrap();
-        std::fs::create_dir(dir.path().join(".git")).unwrap();
-        std::fs::write(dir.path().join(".git/config"), "").unwrap();
-
-        let excludes = vec![".git".to_string()];
-        let result = scan_project(dir.path(), &excludes).unwrap();
-        assert_eq!(result.files.len(), 1);
-        assert_eq!(result.excluded_count, 1);
-        assert_eq!(result.files[0].relative_path, PathBuf::from("README.md"));
-    }
-
-    #[cfg(unix)]
-    #[test]
-    fn test_scan_project_skips_symlinks_to_directories() {
-        let dir = tempfile::tempdir().unwrap();
-        std::fs::write(dir.path().join("real.txt"), "hello").unwrap();
-
-        // Create a subdirectory and a symlink pointing to it
-        let subdir = dir.path().join("subdir");
-        std::fs::create_dir(&subdir).unwrap();
-        std::fs::write(subdir.join("nested.txt"), "nested").unwrap();
-        std::os::unix::fs::symlink(&subdir, dir.path().join("link-to-dir")).unwrap();
-
-        let result = scan_project(dir.path(), &[]).unwrap();
-        // Should find real.txt and subdir/nested.txt, but NOT choke on link-to-dir
-        let paths: Vec<String> = result
-            .files
-            .iter()
-            .map(|f| f.relative_path.to_string_lossy().to_string())
-            .collect();
-        assert!(paths.contains(&"real.txt".to_string()));
-        assert!(paths.contains(&"subdir/nested.txt".to_string()));
-        assert!(!paths.iter().any(|p| p.contains("link-to-dir")));
-    }
-
-    #[test]
-    fn test_scan_project_binary_detection() {
-        let dir = tempfile::tempdir().unwrap();
-        std::fs::write(dir.path().join("text.txt"), "hello").unwrap();
-        std::fs::write(
-            dir.path().join("binary.bin"),
-            &(0..256).map(|i| i as u8).collect::<Vec<u8>>(),
-        )
-        .unwrap();
-
-        let result = scan_project(dir.path(), &[]).unwrap();
-        let text_file = result
-            .files
-            .iter()
-            .find(|f| f.relative_path.to_string_lossy() == "text.txt")
-            .unwrap();
-        let binary_file = result
-            .files
-            .iter()
-            .find(|f| f.relative_path.to_string_lossy() == "binary.bin")
-            .unwrap();
-
-        assert!(!text_file.is_binary);
-        assert!(text_file.content.is_some());
-        assert!(binary_file.is_binary);
-        assert!(binary_file.content.is_none());
-    }
-}
diff --git a/src/extract/stub.rs b/src/extract/stub.rs
deleted file mode 100644
index 8c6ce47..0000000
--- a/src/extract/stub.rs
+++ /dev/null
@@ -1,222 +0,0 @@
-use std::path::Path;
-
-/// Whether a file is boilerplate (copy in full), content (stub), or too deep (drop).
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum FileRole {
-    /// Config, dotfiles, CI — copy verbatim into the template.
-    Boilerplate,
-    /// Prose, docs, source — stub to minimal placeholder.
-    Content,
-    /// Content deeper than stub_depth — drop entirely.
-    Dropped,
-}
-
-/// Filenames (case-insensitive) that are always boilerplate.
-const BOILERPLATE_FILENAMES: &[&str] = &[
-    ".gitignore",
-    ".gitattributes",
-    ".editorconfig",
-    ".prettierrc",
-    ".npmrc",
-    ".nvmrc",
-    ".gitkeep",
-    "makefile",
-    "dockerfile",
-    "justfile",
-    "license",
-    "licence",
-    "procfile",
-];
-
-/// Extensions (case-insensitive, without dot) that are always boilerplate.
-const BOILERPLATE_EXTENSIONS: &[&str] = &[
-    "toml", "yaml", "yml", "json", "jsonc", "json5", "xml", "sh", "bash", "zsh", "bat", "cmd",
-    "ps1", "cfg", "ini", "conf",
-];
-
-/// Directory prefixes — files under these dirs are boilerplate.
-const BOILERPLATE_DIR_PREFIXES: &[&str] = &[".github/", ".gitlab/", ".circleci/", ".vscode/"];
-
-/// Classify a file as boilerplate, content, or dropped based on its relative path.
-///
-/// Only called for text files with 0 template replacements.
-/// Files deeper than `stub_depth` path components are dropped entirely.
-pub fn classify_file(path: &Path, stub_depth: usize) -> FileRole {
-    let path_str = path.to_string_lossy();
-
-    // Check directory prefix
-    for prefix in BOILERPLATE_DIR_PREFIXES {
-        if path_str.starts_with(prefix) {
-            return FileRole::Boilerplate;
-        }
-    }
-
-    // Check filename (case-insensitive)
-    if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
-        let lower = filename.to_lowercase();
-        if BOILERPLATE_FILENAMES.contains(&lower.as_str()) {
-            return FileRole::Boilerplate;
-        }
-    }
-
-    // Check extension (case-insensitive)
-    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
-        let lower = ext.to_lowercase();
-        if BOILERPLATE_EXTENSIONS.contains(&lower.as_str()) {
-            return FileRole::Boilerplate;
-        }
-    }
-
-    let depth = path.components().count();
-    if depth > stub_depth {
-        FileRole::Dropped
-    } else {
-        FileRole::Content
-    }
-}
-
-/// Generate a minimal stub for a content file.
-///
-/// - `.md` files get `# {Title}\n` where Title is derived from the filename.
-/// - Everything else gets an empty string.
-pub fn generate_stub(path: &Path) -> String {
-    let is_md = path
-        .extension()
-        .and_then(|e| e.to_str())
-        .is_some_and(|e| e.eq_ignore_ascii_case("md"));
-
-    if is_md {
-        let title = path
-            .file_stem()
-            .and_then(|s| s.to_str())
-            .unwrap_or("Untitled");
-        // Title-case: capitalize first letter, leave rest as-is
-        let title = title_case(title);
-        format!("# {title}\n")
-    } else {
-        String::new()
-    }
-}
-
-/// Convert a filename stem like "craft" or "SKILL" into title case.
-///
-/// Splits on `-` and `_`, capitalizes each word's first letter.
-fn title_case(s: &str) -> String {
-    s.split(['-', '_'])
-        .filter(|w| !w.is_empty())
-        .map(|word| {
-            let mut chars = word.chars();
-            match chars.next() {
-                Some(first) => {
-                    let rest: String = chars.collect::<String>().to_lowercase();
-                    format!("{}{rest}", first.to_uppercase())
-                }
-                None => String::new(),
-            }
-        })
-        .collect::<Vec<_>>()
-        .join(" ")
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use rstest::rstest;
-
-    // ── classify_file ────────────────────────────────────────────────
-
-    #[rstest]
-    #[case(".gitignore", FileRole::Boilerplate)]
-    #[case(".editorconfig", FileRole::Boilerplate)]
-    #[case("Makefile", FileRole::Boilerplate)]
-    #[case("Dockerfile", FileRole::Boilerplate)]
-    #[case("LICENSE", FileRole::Boilerplate)]
-    #[case("Procfile", FileRole::Boilerplate)]
-    fn classify_boilerplate_filenames(#[case] filename: &str, #[case] expected: FileRole) {
-        assert_eq!(classify_file(Path::new(filename), 2), expected);
-    }
-
-    #[rstest]
-    #[case("Cargo.toml", FileRole::Boilerplate)]
-    #[case("config.yaml", FileRole::Boilerplate)]
-    #[case("settings.yml", FileRole::Boilerplate)]
-    #[case("package.json", FileRole::Boilerplate)]
-    #[case("tsconfig.json", FileRole::Boilerplate)]
-    #[case("setup.cfg", FileRole::Boilerplate)]
-    #[case("build.sh", FileRole::Boilerplate)]
-    #[case("deploy.ps1", FileRole::Boilerplate)]
-    #[case("app.conf", FileRole::Boilerplate)]
-    fn classify_boilerplate_extensions(#[case] filename: &str, #[case] expected: FileRole) {
-        assert_eq!(classify_file(Path::new(filename), 2), expected);
-    }
-
-    #[rstest]
-    #[case(".github/workflows/ci.yml", FileRole::Boilerplate)]
-    #[case(".github/CODEOWNERS", FileRole::Boilerplate)]
-    #[case(".gitlab/ci/deploy.yml", FileRole::Boilerplate)]
-    #[case(".circleci/config.yml", FileRole::Boilerplate)]
-    #[case(".vscode/settings.json", FileRole::Boilerplate)]
-    fn classify_boilerplate_directories(#[case] path: &str, #[case] expected: FileRole) {
-        assert_eq!(classify_file(Path::new(path), 2), expected);
-    }
-
-    #[rstest]
-    #[case("README.md", 2)]
-    #[case("docs/guide.md", 2)]
-    #[case("src/main.rs", 2)]
-    #[case("src/lib.py", 2)]
-    #[case("index.html", 2)]
-    #[case("app.css", 2)]
-    #[case("skills/convention-mining/SKILL.md", 3)] // depth 3, stub_depth 3 → Content
-    fn classify_content(#[case] path: &str, #[case] stub_depth: usize) {
-        assert_eq!(
-            classify_file(Path::new(path), stub_depth),
-            FileRole::Content
-        );
-    }
-
-    #[rstest]
-    #[case("skills/convention-mining/SKILL.md", 2)] // depth 3 > stub_depth 2
-    #[case("skills/writing-skills/craft.md", 2)] // depth 3 > stub_depth 2
-    #[case("a/b/c/deep.md", 2)] // depth 4 > stub_depth 2
-    #[case("docs/guide.md", 1)] // depth 2 > stub_depth 1
-    fn classify_dropped(#[case] path: &str, #[case] stub_depth: usize) {
-        assert_eq!(
-            classify_file(Path::new(path), stub_depth),
-            FileRole::Dropped
-        );
-    }
-
-    // ── generate_stub ────────────────────────────────────────────────
-
-    #[rstest]
-    #[case("README.md", "# Readme\n")]
-    #[case("craft.md", "# Craft\n")]
-    #[case("SKILL.md", "# Skill\n")]
-    #[case("getting-started.md", "# Getting Started\n")]
-    #[case("my_notes.md", "# My Notes\n")]
-    fn stub_md_files(#[case] filename: &str, #[case] expected: &str) {
-        assert_eq!(generate_stub(Path::new(filename)), expected);
-    }
-
-    #[rstest]
-    #[case("src/main.rs")]
-    #[case("index.html")]
-    #[case("app.css")]
-    #[case("data.txt")]
-    fn stub_non_md_files(#[case] filename: &str) {
-        assert_eq!(generate_stub(Path::new(filename)), "");
-    }
-
-    // ── title_case ───────────────────────────────────────────────────
-
-    #[rstest]
-    #[case("craft", "Craft")]
-    #[case("SKILL", "Skill")]
-    #[case("getting-started", "Getting Started")]
-    #[case("my_notes", "My Notes")]
-    #[case("README", "Readme")]
-    fn test_title_case(#[case] input: &str, #[case] expected: &str) {
-        assert_eq!(title_case(input), expected);
-    }
-}
diff --git a/src/extract/variants.rs b/src/extract/variants.rs
deleted file mode 100644
index 525b475..0000000
--- a/src/extract/variants.rs
+++ /dev/null
@@ -1,309 +0,0 @@
-use std::sync::LazyLock;
-
-use regex_lite::Regex;
-
-static CAMEL_SPLIT_RE: LazyLock<Regex> =
-    LazyLock::new(|| Regex::new(r"[A-Z][a-z]*|[a-z]+|[0-9]+").unwrap());
-
-/// A case variant of a variable value, with its literal text and Tera expression.
-#[derive(Debug, Clone, PartialEq)]
-pub struct CaseVariant {
-    pub name: &'static str,
-    pub literal: String,
-    pub tera_expr: String,
-}
-
-/// Split a string value into words for case variant generation.
-///
-/// Handles kebab-case, snake_case, camelCase, PascalCase, dot.case, and space-separated.
-pub fn split_into_words(value: &str) -> Vec<String> {
-    if value.contains('-') {
-        return value.split('-').map(|s| s.to_lowercase()).collect();
-    }
-    if value.contains('_') {
-        return value.split('_').map(|s| s.to_lowercase()).collect();
-    }
-    if value.contains('.') {
-        return value.split('.').map(|s| s.to_lowercase()).collect();
-    }
-    if value.contains(' ') {
-        return value.split_whitespace().map(|s| s.to_lowercase()).collect();
-    }
-
-    // camelCase / PascalCase splitting
-    let words: Vec<String> = CAMEL_SPLIT_RE
-        .find_iter(value)
-        .map(|m| m.as_str().to_lowercase())
-        .collect();
-
-    if words.is_empty() {
-        vec![value.to_lowercase()]
-    } else {
-        words
-    }
-}
-
-/// Detect if a value is "multi-word" in a way that supports case variants.
-///
-/// Single words and space-separated phrases skip variant detection.
-fn supports_case_variants(value: &str) -> bool {
-    let words = split_into_words(value);
-    if words.len() < 2 {
-        return false;
-    }
-    // Space-separated values (like author names) skip variant detection
-    if value.contains(' ') {
-        return false;
-    }
-    true
-}
-
-fn to_kebab(words: &[String]) -> String {
-    words.join("-")
-}
-
-fn to_snake(words: &[String]) -> String {
-    words.join("_")
-}
-
-fn to_screaming_snake(words: &[String]) -> String {
-    words
-        .iter()
-        .map(|w| w.to_uppercase())
-        .collect::<Vec<_>>()
-        .join("_")
-}
-
-fn to_screaming_kebab(words: &[String]) -> String {
-    words
-        .iter()
-        .map(|w| w.to_uppercase())
-        .collect::<Vec<_>>()
-        .join("-")
-}
-
-fn to_pascal(words: &[String]) -> String {
-    words
-        .iter()
-        .map(|w| {
-            let mut chars = w.chars();
-            match chars.next() {
-                Some(c) => {
-                    let upper: String = c.to_uppercase().collect();
-                    upper + chars.as_str()
-                }
-                None => String::new(),
-            }
-        })
-        .collect()
-}
-
-fn to_camel(words: &[String]) -> String {
-    let pascal = to_pascal(words);
-    let mut chars = pascal.chars();
-    match chars.next() {
-        Some(c) => {
-            let lower: String = c.to_lowercase().collect();
-            lower + chars.as_str()
-        }
-        None => String::new(),
-    }
-}
-
-fn to_dot(words: &[String]) -> String {
-    words.join(".")
-}
-
-/// Detect the canonical separator in the original value.
-pub fn detect_separator(value: &str) -> &'static str {
-    if value.contains('-') {
-        "-"
-    } else if value.contains('_') {
-        "_"
-    } else if value.contains('.') {
-        "."
-    } else {
-        // PascalCase/camelCase — treat as kebab canonical
-        "-"
-    }
-}
-
-/// Check whether a variant is the canonical one (matches the input separator).
-///
-/// Canonical variants use the bare `{{ var_name }}` expression and do not get
-/// a computed variable in diecut.toml.
-pub fn is_canonical_variant(variant_name: &str, canonical_sep: &str) -> bool {
-    matches!(
-        (variant_name, canonical_sep),
-        ("kebab", "-") | ("snake", "_") | ("dot", ".")
-    )
-}
-
-/// Build a Tera expression for a variant, given the variable name and canonical separator.
-///
-/// Canonical variants use `{{ var_name }}` directly. Non-canonical variants reference
-/// their computed variable (e.g., `{{ var_name_snake }}`), which is defined in diecut.toml.
-fn tera_expr_for_variant(var_name: &str, variant_name: &str, canonical_sep: &str) -> String {
-    if variant_name == "verbatim" || is_canonical_variant(variant_name, canonical_sep) {
-        return format!("{{{{ {var_name} }}}}");
-    }
-    // Non-canonical variants reference their computed variable name
-    format!("{{{{ {var_name}_{variant_name} }}}}")
-}
-
-/// Generate all case variants for a given variable value.
-///
-/// Returns the canonical variant first, followed by alternatives.
-/// Only returns variants whose literal differs from the canonical form.
-/// Single-word values and space-separated phrases return only a verbatim replacement.
-pub fn generate_variants(var_name: &str, value: &str) -> Vec<CaseVariant> {
-    if !supports_case_variants(value) {
-        return vec![CaseVariant {
-            name: "verbatim",
-            literal: value.to_string(),
-            tera_expr: format!("{{{{ {var_name} }}}}"),
-        }];
-    }
-
-    let words = split_into_words(value);
-    let canonical_sep = detect_separator(value);
-
-    let candidates: Vec<(&str, String)> = vec![
-        ("kebab", to_kebab(&words)),
-        ("snake", to_snake(&words)),
-        ("screaming_snake", to_screaming_snake(&words)),
-        ("screaming_kebab", to_screaming_kebab(&words)),
-        ("pascal", to_pascal(&words)),
-        ("camel", to_camel(&words)),
-        ("dot", to_dot(&words)),
-    ];
-
-    // Deduplicate: some variants produce the same literal (e.g., single-word)
-    let mut seen = std::collections::HashSet::new();
-    let mut variants = Vec::new();
-
-    for (name, literal) in candidates {
-        if seen.insert(literal.clone()) {
-            let tera_expr = tera_expr_for_variant(var_name, name, canonical_sep);
-            variants.push(CaseVariant {
-                name,
-                literal,
-                tera_expr,
-            });
-        }
-    }
-
-    variants
-}
-
-/// Build a computed Tera expression for a named variant variable.
-///
-/// This is used in diecut.toml for computed variables like `project_name_snake`.
-pub fn computed_expression(var_name: &str, variant_name: &str, canonical_sep: &str) -> String {
-    match (variant_name, canonical_sep) {
-        ("snake", sep) if sep != "_" => {
-            format!("{var_name} | replace(from=\"{sep}\", to=\"_\")")
-        }
-        ("screaming_snake", sep) => {
-            if sep == "_" {
-                format!("{var_name} | upper")
-            } else {
-                format!("{var_name} | replace(from=\"{sep}\", to=\"_\") | upper")
-            }
-        }
-        ("screaming_kebab", sep) => {
-            if sep == "-" {
-                format!("{var_name} | upper")
-            } else {
-                format!("{var_name} | replace(from=\"{sep}\", to=\"-\") | upper")
-            }
-        }
-        ("pascal", sep) => {
-            format!("{var_name} | replace(from=\"{sep}\", to=\" \") | title | replace(from=\" \", to=\"\")")
-        }
-        ("camel", sep) => {
-            format!("{var_name} | camelcase(sep=\"{sep}\")")
-        }
-        ("kebab", sep) if sep != "-" => {
-            format!("{var_name} | replace(from=\"{sep}\", to=\"-\")")
-        }
-        ("dot", sep) if sep != "." => {
-            format!("{var_name} | replace(from=\"{sep}\", to=\".\")")
-        }
-        _ => var_name.to_string(),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use rstest::rstest;
-
-    #[rstest]
-    #[case("my-app", vec!["my", "app"])]
-    #[case("my_app", vec!["my", "app"])]
-    #[case("MyApp", vec!["my", "app"])]
-    #[case("myApp", vec!["my", "app"])]
-    #[case("my.app", vec!["my", "app"])]
-    #[case("my app", vec!["my", "app"])]
-    #[case("single", vec!["single"])]
-    fn test_split_into_words(#[case] input: &str, #[case] expected: Vec<&str>) {
-        assert_eq!(split_into_words(input), expected);
-    }
-
-    #[test]
-    fn test_generate_variants_kebab() {
-        let variants = generate_variants("project_name", "my-app");
-        let names: Vec<&str> = variants.iter().map(|v| v.name).collect();
-        assert!(names.contains(&"kebab"));
-        assert!(names.contains(&"snake"));
-        assert!(names.contains(&"pascal"));
-
-        let kebab = variants.iter().find(|v| v.name == "kebab").unwrap();
-        assert_eq!(kebab.literal, "my-app");
-
-        let snake = variants.iter().find(|v| v.name == "snake").unwrap();
-        assert_eq!(snake.literal, "my_app");
-
-        let pascal = variants.iter().find(|v| v.name == "pascal").unwrap();
-        assert_eq!(pascal.literal, "MyApp");
-    }
-
-    #[test]
-    fn test_generate_variants_single_word() {
-        let variants = generate_variants("name", "hello");
-        assert_eq!(variants.len(), 1);
-        assert_eq!(variants[0].name, "verbatim");
-        assert_eq!(variants[0].literal, "hello");
-    }
-
-    #[test]
-    fn test_generate_variants_space_separated() {
-        let variants = generate_variants("author", "Jane Doe");
-        assert_eq!(variants.len(), 1);
-        assert_eq!(variants[0].name, "verbatim");
-        assert_eq!(variants[0].literal, "Jane Doe");
-    }
-
-    #[test]
-    fn test_generate_variants_screaming_snake() {
-        let variants = generate_variants("project_name", "my-app");
-        let ss = variants
-            .iter()
-            .find(|v| v.name == "screaming_snake")
-            .unwrap();
-        assert_eq!(ss.literal, "MY_APP");
-    }
-
-    #[test]
-    fn test_tera_expr_kebab_canonical() {
-        let expr = tera_expr_for_variant("project_name", "kebab", "-");
-        assert_eq!(expr, "{{ project_name }}");
-    }
-
-    #[test]
-    fn test_tera_expr_snake_from_kebab() {
-        let expr = tera_expr_for_variant("project_name", "snake", "-");
-        assert_eq!(expr, "{{ project_name_snake }}");
-    }
-}
diff --git a/src/main.rs b/src/main.rs
index 0e6ef6a..6a13faa 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -24,8 +24,7 @@ fn main() -> miette::Result<()> {
             vars,
             output,
             in_place,
-            stub_depth,
             dry_run,
-        } => commands::extract::run(source, vars, output, in_place, stub_depth, dry_run),
+        } => commands::extract::run(source, vars, output, in_place, dry_run),
     }
 }
diff --git a/src/prompt/engine.rs b/src/prompt/engine.rs
index 47fc847..4de7253 100644
--- a/src/prompt/engine.rs
+++ b/src/prompt/engine.rs
@@ -96,7 +96,7 @@ fn evaluate_computed(
     computed_expr: &str,
     values: &BTreeMap<String, Value>,
 ) -> Result<Value> {
-    let mut tera = crate::render::tera_with_filters();
+    let mut tera = tera::Tera::default();
     tera.add_raw_template("__computed__", computed_expr)
         .map_err(|e| DicecutError::ComputedEvaluation {
             name: name.to_string(),
diff --git a/src/render/context.rs b/src/render/context.rs
index f530022..f29f678 100644
--- a/src/render/context.rs
+++ b/src/render/context.rs
@@ -1,4 +1,4 @@
-use std::collections::{BTreeMap, HashMap};
+use std::collections::BTreeMap;
 
 use tera::{Context, Tera, Value};
 
@@ -10,85 +10,14 @@ pub fn build_context(variables: &BTreeMap<String, Value>) -> Context {
     context
 }
 
-/// Create a Tera instance with custom filters registered.
-///
-/// This should be used instead of `Tera::default()` anywhere templates or
-/// computed expressions are evaluated, so that custom filters like `camelcase`
-/// are available.
-pub fn tera_with_filters() -> Tera {
-    let mut tera = Tera::default();
-    tera.register_filter("camelcase", camelcase_filter);
-    tera
-}
-
-/// Custom Tera filter: convert a separated string to camelCase.
-///
-/// Usage: `{{ value | camelcase }}` or `{{ value | camelcase(sep="-") }}`
-///
-/// Splits on the separator (default `-`), lowercases the first word,
-/// title-cases the rest, and joins them.
-fn camelcase_filter(value: &Value, args: &HashMap<String, Value>) -> Result<Value, tera::Error> {
-    let s = value
-        .as_str()
-        .ok_or_else(|| tera::Error::msg("camelcase filter requires a string value"))?;
-
-    let sep = args.get("sep").and_then(|v| v.as_str()).unwrap_or("-");
-
-    let words: Vec<&str> = s.split(sep).collect();
-    if words.is_empty() {
-        return Ok(Value::String(String::new()));
-    }
-
-    let mut result = words[0].to_lowercase();
-    for word in &words[1..] {
-        let mut chars = word.chars();
-        if let Some(first) = chars.next() {
-            result.extend(first.to_uppercase());
-            result.push_str(&chars.as_str().to_lowercase());
-        }
-    }
-
-    Ok(Value::String(result))
-}
-
 /// Evaluate a Tera boolean expression against a variable context.
 ///
 /// Returns `Ok(true)` if the expression evaluates to true, `Ok(false)` otherwise.
 /// Returns `Err` if the expression fails to parse or render.
 pub fn eval_bool_expr(expr: &str, context: &Context) -> std::result::Result<bool, tera::Error> {
-    let mut tera = tera_with_filters();
+    let mut tera = Tera::default();
     let template_str = format!("{{% if {expr} %}}true{{% else %}}false{{% endif %}}");
     tera.add_raw_template("__when__", &template_str)?;
     let result = tera.render("__when__", context)?;
     Ok(result.trim() == "true")
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_camelcase_filter_kebab() {
-        let val = Value::String("my-cool-app".to_string());
-        let args = HashMap::new();
-        let result = camelcase_filter(&val, &args).unwrap();
-        assert_eq!(result, Value::String("myCoolApp".to_string()));
-    }
-
-    #[test]
-    fn test_camelcase_filter_custom_sep() {
-        let val = Value::String("my_cool_app".to_string());
-        let mut args = HashMap::new();
-        args.insert("sep".to_string(), Value::String("_".to_string()));
-        let result = camelcase_filter(&val, &args).unwrap();
-        assert_eq!(result, Value::String("myCoolApp".to_string()));
-    }
-
-    #[test]
-    fn test_camelcase_filter_single_word() {
-        let val = Value::String("hello".to_string());
-        let args = HashMap::new();
-        let result = camelcase_filter(&val, &args).unwrap();
-        assert_eq!(result, Value::String("hello".to_string()));
-    }
-}
diff --git a/src/render/mod.rs b/src/render/mod.rs
index 8a87f30..5674674 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -2,7 +2,7 @@ pub mod context;
 pub mod file;
 pub mod walker;
 
-pub use context::{build_context, eval_bool_expr, tera_with_filters};
+pub use context::{build_context, eval_bool_expr};
 pub use walker::{
     execute_plan, plan_render, walk_and_render, GeneratedProject, GenerationPlan, PlannedFile,
 };
diff --git a/src/render/walker.rs b/src/render/walker.rs
index 97b1e96..caf9e26 100644
--- a/src/render/walker.rs
+++ b/src/render/walker.rs
@@ -2,7 +2,7 @@ use std::collections::BTreeMap;
 use std::path::{Path, PathBuf};
 
 use globset::{Glob, GlobSet, GlobSetBuilder};
-use tera::{Context, Value};
+use tera::{Context, Tera, Value};
 use walkdir::WalkDir;
 
 use crate::adapter::ResolvedTemplate;
@@ -104,7 +104,7 @@ pub fn plan_render(
                 source: e,
             })?;
 
-            let mut tera = crate::render::tera_with_filters();
+            let mut tera = Tera::default();
             let template_name = rel_str.to_string();
             let parse_result = tera.add_raw_template(&template_name, &content);
             let render_result = parse_result.and_then(|_| tera.render(&template_name, context));
diff --git a/tests/integration.rs b/tests/integration.rs
index f56835d..024d6e4 100644
--- a/tests/integration.rs
+++ b/tests/integration.rs
@@ -654,7 +654,6 @@ fn test_extract_batch_basic() {
         ],
         output_dir: Some(output_path.clone()),
         in_place: false,
-        stub_depth: 2,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -679,62 +678,6 @@ fn test_extract_batch_basic() {
     assert!(has_die_files, "should have files with .die suffix");
 }
 
-#[test]
-fn test_extract_detects_case_variants() {
-    let project = tempfile::tempdir().unwrap();
-    std::fs::write(
-        project.path().join("config.toml"),
-        "[package]\nname = \"my-app\"\nmodule = \"my_app\"\nclass = \"MyApp\"\nenv = \"MY_APP_PORT\"\n",
-    )
-    .unwrap();
-
-    let output = tempfile::tempdir().unwrap();
-    let output_path = output.path().join("extracted");
-
-    let options = ExtractOptions {
-        source_dir: project.path().to_path_buf(),
-        variables: vec![("project_name".to_string(), "my-app".to_string())],
-        output_dir: Some(output_path.clone()),
-        in_place: false,
-        stub_depth: 2,
-    };
-
-    let plan = plan_extraction(&options).unwrap();
-
-    // Should detect variants used in the file
-    let var = plan
-        .variables
-        .iter()
-        .find(|v| v.name == "project_name")
-        .unwrap();
-    let variant_names: Vec<&str> = var.variants.iter().map(|v| v.name).collect();
-    assert!(
-        variant_names.contains(&"kebab"),
-        "should detect kebab variant"
-    );
-    assert!(
-        variant_names.contains(&"snake"),
-        "should detect snake variant"
-    );
-    assert!(
-        variant_names.contains(&"pascal"),
-        "should detect pascal variant"
-    );
-    assert!(
-        variant_names.contains(&"screaming_snake"),
-        "should detect screaming_snake variant"
-    );
-
-    execute_extraction(&plan).unwrap();
-
-    // The config should have computed variables for variants
-    let config = std::fs::read_to_string(output_path.join("diecut.toml")).unwrap();
-    assert!(
-        config.contains("project_name_snake"),
-        "should have snake computed var"
-    );
-}
-
 #[test]
 fn test_extract_dry_run_writes_nothing() {
     let project = tempfile::tempdir().unwrap();
@@ -748,7 +691,6 @@ fn test_extract_dry_run_writes_nothing() {
         variables: vec![("project_name".to_string(), "my-app".to_string())],
         output_dir: Some(output_path.clone()),
         in_place: false,
-        stub_depth: 2,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -775,7 +717,6 @@ fn test_extract_rejects_already_template() {
         variables: vec![("name".to_string(), "val".to_string())],
         output_dir: None,
         in_place: false,
-        stub_depth: 2,
     };
 
     let result = plan_extraction(&options);
@@ -793,7 +734,6 @@ fn test_extract_rejects_no_variables() {
         variables: vec![],
         output_dir: None,
         in_place: false,
-        stub_depth: 2,
     };
 
     let result = plan_extraction(&options);
@@ -814,7 +754,6 @@ fn test_extract_templates_path_components() {
         variables: vec![("project_name".to_string(), "my-app".to_string())],
         output_dir: Some(output_path.clone()),
         in_place: false,
-        stub_depth: 2,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -832,80 +771,3 @@ fn test_extract_templates_path_components() {
 
     execute_extraction(&plan).unwrap();
 }
-
-#[test]
-fn test_extract_round_trip() {
-    // Step 1: Generate a project from an existing template
-    let template_dir = fixture_path("basic-template");
-    let resolved = adapter::resolve_template(&template_dir).unwrap();
-
-    let mut variables = BTreeMap::new();
-    variables.insert(
-        "project_name".to_string(),
-        tera::Value::String("my-app".to_string()),
-    );
-    variables.insert(
-        "author".to_string(),
-        tera::Value::String("Jane Doe".to_string()),
-    );
-    variables.insert("use_docker".to_string(), tera::Value::Bool(false));
-    variables.insert(
-        "license".to_string(),
-        tera::Value::String("MIT".to_string()),
-    );
-    variables.insert(
-        "project_slug".to_string(),
-        tera::Value::String("my-app".to_string()),
-    );
-
-    let context = build_context(&variables);
-    let generated = tempfile::tempdir().unwrap();
-    walk_and_render(&resolved, generated.path(), &variables, &context).unwrap();
-
-    // The generated project has files under generated/my-app/
-    let project_dir = generated.path().join("my-app");
-    assert!(project_dir.exists(), "generated project should exist");
-
-    // Step 2: Extract it back into a template
-    let extracted = tempfile::tempdir().unwrap();
-    let extracted_path = extracted.path().join("extracted-template");
-
-    let options = ExtractOptions {
-        source_dir: project_dir.clone(),
-        variables: vec![("project_name".to_string(), "my-app".to_string())],
-        output_dir: Some(extracted_path.clone()),
-        in_place: false,
-        stub_depth: 2,
-    };
-
-    let plan = plan_extraction(&options).unwrap();
-    execute_extraction(&plan).unwrap();
-
-    // Verify the extracted template has the key structure
-    assert!(extracted_path.join("diecut.toml").exists());
-    assert!(extracted_path.join("template").exists());
-
-    let config = std::fs::read_to_string(extracted_path.join("diecut.toml")).unwrap();
-    assert!(config.contains("project_name"));
-
-    // Verify template files exist and contain template syntax
-    let template_files: Vec<_> = walkdir::WalkDir::new(extracted_path.join("template"))
-        .into_iter()
-        .filter_map(|e| e.ok())
-        .filter(|e| e.file_type().is_file())
-        .collect();
-    assert!(!template_files.is_empty(), "should have template files");
-
-    // Files with .die suffix should contain template expressions
-    for entry in &template_files {
-        if entry.path().to_string_lossy().ends_with(".die") {
-            let content = std::fs::read_to_string(entry.path()).unwrap();
-            assert!(
-                content.contains("{{") || content.contains("{%"),
-                "file {} should contain template syntax, got: {}",
-                entry.path().display(),
-                content
-            );
-        }
-    }
-}

From e9e76b8f823d3a2c4fc1191e8b81086afc445574 Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Wed, 4 Mar 2026 23:24:17 -0500
Subject: [PATCH 28/29] refactor(extract): load excludes from embedded file
 with override

Move exclude patterns to default_excludes.txt (Rust + macOS only).
Add --exclude-from flag to use a custom exclude file.
Replace all_default_excludes() with load_excludes(Option<&Path>).
---
 src/cli.rs                       |   4 ++
 src/commands/extract.rs          |   2 +
 src/extract/default_excludes.txt |  12 ++++
 src/extract/exclude.rs           | 103 +++++++++++--------------------
 src/extract/mod.rs               |   5 +-
 src/main.rs                      |   3 +-
 tests/integration.rs             |   5 ++
 7 files changed, 63 insertions(+), 71 deletions(-)
 create mode 100644 src/extract/default_excludes.txt

diff --git a/src/cli.rs b/src/cli.rs
index d87f2e8..58594f4 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -67,6 +67,10 @@ pub enum Commands {
         #[arg(long)]
         in_place: bool,
 
+        /// File with exclude patterns (one per line, # comments)
+        #[arg(long, value_name = "FILE")]
+        exclude_from: Option<String>,
+
         /// Show what would be extracted without writing files
         #[arg(long)]
         dry_run: bool,
diff --git a/src/commands/extract.rs b/src/commands/extract.rs
index 37a31be..5b6efc6 100644
--- a/src/commands/extract.rs
+++ b/src/commands/extract.rs
@@ -11,6 +11,7 @@ pub fn run(
     vars: Vec<String>,
     output: Option<String>,
     in_place: bool,
+    exclude_from: Option<String>,
     dry_run: bool,
 ) -> Result<()> {
     let variables = parse_vars(&vars)?;
@@ -20,6 +21,7 @@ pub fn run(
         variables,
         output_dir: output.map(PathBuf::from),
         in_place,
+        exclude_file: exclude_from.map(PathBuf::from),
     };
 
     let plan = plan_extraction(&options)?;
diff --git a/src/extract/default_excludes.txt b/src/extract/default_excludes.txt
new file mode 100644
index 0000000..4219dd1
--- /dev/null
+++ b/src/extract/default_excludes.txt
@@ -0,0 +1,12 @@
+# Version control
+.git
+
+# Rust
+target
+Cargo.lock
+
+# macOS
+.DS_Store
+
+# Diecut
+.diecut-answers.toml
diff --git a/src/extract/exclude.rs b/src/extract/exclude.rs
index 2f6beff..b9e7b55 100644
--- a/src/extract/exclude.rs
+++ b/src/extract/exclude.rs
@@ -1,41 +1,20 @@
 use std::path::Path;
 
-/// Default directories and files to exclude from template extraction.
-const DEFAULT_EXCLUDES: &[&str] = &[
-    ".git",
-    ".hg",
-    ".svn",
-    "node_modules",
-    ".DS_Store",
-    "Thumbs.db",
-    "__pycache__",
-    "*.pyc",
-    ".tox",
-    ".nox",
-    ".mypy_cache",
-    ".ruff_cache",
-    ".pytest_cache",
-    "target",
-    ".venv",
-    ".env",
-    "dist",
-    "build",
-    ".next",
-    ".nuxt",
-    ".output",
-    ".turbo",
-    ".worktrees",
-    ".claude/worktrees",
-    ".astro",
-    ".diecut-answers.toml",
-];
+const DEFAULT_EXCLUDES: &str = include_str!("default_excludes.txt");
 
-/// Return all default exclude patterns for use during scanning.
-///
-/// All DEFAULT_EXCLUDES are always used during the scan phase because patterns
-/// like `node_modules` can appear at any depth (e.g. `docs/node_modules/`).
-pub fn all_default_excludes() -> Vec<String> {
-    DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect()
+/// Load exclude patterns from a file, or use the built-in defaults.
+pub fn load_excludes(override_file: Option<&Path>) -> Vec<String> {
+    let text = match override_file {
+        Some(path) => {
+            std::fs::read_to_string(path).unwrap_or_else(|_| DEFAULT_EXCLUDES.to_string())
+        }
+        None => DEFAULT_EXCLUDES.to_string(),
+    };
+    text.lines()
+        .map(|l| l.trim())
+        .filter(|l| !l.is_empty() && !l.starts_with('#'))
+        .map(|l| l.to_string())
+        .collect()
 }
 
 /// Check if a path should be excluded based on the exclude patterns.
@@ -45,19 +24,15 @@ pub fn should_exclude(relative_path: &Path, excludes: &[String]) -> bool {
     for pattern in excludes {
         let clean = pattern.trim_end_matches('/');
 
-        if clean.contains('*') {
-            // Glob-style matching: *.pyc matches any .pyc file
-            if let Some(ext) = clean.strip_prefix("*.") {
-                if let Some(file_ext) = relative_path.extension() {
-                    if file_ext.to_string_lossy().eq_ignore_ascii_case(ext) {
-                        return true;
-                    }
+        if let Some(ext) = clean.strip_prefix("*.") {
+            if let Some(file_ext) = relative_path.extension() {
+                if file_ext.to_string_lossy().eq_ignore_ascii_case(ext) {
+                    return true;
                 }
             }
             continue;
         }
 
-        // Exact directory/file match at any level
         for component in relative_path.components() {
             if let std::path::Component::Normal(os_str) = component {
                 if os_str.to_string_lossy() == clean {
@@ -66,7 +41,6 @@ pub fn should_exclude(relative_path: &Path, excludes: &[String]) -> bool {
             }
         }
 
-        // Full path match
         if path_str == clean || path_str.starts_with(&format!("{clean}/")) {
             return true;
         }
@@ -78,38 +52,31 @@ pub fn should_exclude(relative_path: &Path, excludes: &[String]) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use std::path::Path;
 
     #[test]
-    fn test_should_exclude_git() {
-        let excludes = vec![".git/".to_string()];
-        assert!(should_exclude(Path::new(".git/config"), &excludes));
-        assert!(should_exclude(Path::new(".git/HEAD"), &excludes));
+    fn test_load_defaults() {
+        let excludes = load_excludes(None);
+        assert!(excludes.contains(&".git".to_string()));
+        assert!(excludes.contains(&"target".to_string()));
+        assert!(excludes.contains(&".DS_Store".to_string()));
+        assert!(!excludes.iter().any(|e| e.starts_with('#')));
     }
 
     #[test]
-    fn test_should_exclude_node_modules() {
-        let excludes = vec!["node_modules".to_string()];
-        assert!(should_exclude(
-            Path::new("node_modules/express/index.js"),
-            &excludes
-        ));
+    fn test_should_exclude_matches() {
+        let excludes = vec![".git".to_string(), "*.pyc".to_string()];
+        assert!(should_exclude(Path::new(".git/HEAD"), &excludes));
+        assert!(should_exclude(Path::new("pkg/foo.pyc"), &excludes));
+        assert!(!should_exclude(Path::new("src/main.rs"), &excludes));
     }
 
     #[test]
-    fn test_should_exclude_glob() {
-        let excludes = vec!["*.pyc".to_string()];
-        assert!(should_exclude(
-            Path::new("module/__pycache__/foo.pyc"),
-            &excludes
-        ));
-        assert!(!should_exclude(Path::new("module/foo.py"), &excludes));
-    }
+    fn test_override_file() {
+        let dir = tempfile::tempdir().unwrap();
+        let file = dir.path().join("excludes.txt");
+        std::fs::write(&file, "# custom\nvendor\n*.log\n").unwrap();
 
-    #[test]
-    fn test_should_not_exclude_normal_file() {
-        let excludes = vec![".git/".to_string(), "node_modules".to_string()];
-        assert!(!should_exclude(Path::new("src/main.rs"), &excludes));
-        assert!(!should_exclude(Path::new("README.md"), &excludes));
+        let excludes = load_excludes(Some(&file));
+        assert_eq!(excludes, vec!["vendor", "*.log"]);
     }
 }
diff --git a/src/extract/mod.rs b/src/extract/mod.rs
index 29f7cd5..094f27c 100644
--- a/src/extract/mod.rs
+++ b/src/extract/mod.rs
@@ -9,7 +9,7 @@ use console::style;
 use crate::config::schema::DEFAULT_TEMPLATES_SUFFIX;
 use crate::error::{DicecutError, Result};
 
-use self::exclude::all_default_excludes;
+use self::exclude::load_excludes;
 use self::replace::{
     apply_path_replacements, apply_replacements, build_replacement_rules, ReplacementRule,
 };
@@ -80,6 +80,7 @@ pub struct ExtractOptions {
     pub variables: Vec<(String, String)>,
     pub output_dir: Option<PathBuf>,
     pub in_place: bool,
+    pub exclude_file: Option<PathBuf>,
 }
 
 /// Plan an extraction: scan the project, build replacement rules, apply replacements.
@@ -122,7 +123,7 @@ pub fn plan_extraction(options: &ExtractOptions) -> Result<ExtractionPlan> {
     }
 
     // Scan project
-    let scan_excludes = all_default_excludes();
+    let scan_excludes = load_excludes(options.exclude_file.as_deref());
     eprintln!(
         "\n{}",
         style(format!("Scanning {}...", source_dir.display())).bold()
diff --git a/src/main.rs b/src/main.rs
index 6a13faa..e2c0167 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -24,7 +24,8 @@ fn main() -> miette::Result<()> {
             vars,
             output,
             in_place,
+            exclude_from,
             dry_run,
-        } => commands::extract::run(source, vars, output, in_place, dry_run),
+        } => commands::extract::run(source, vars, output, in_place, exclude_from, dry_run),
     }
 }
diff --git a/tests/integration.rs b/tests/integration.rs
index 024d6e4..ca73855 100644
--- a/tests/integration.rs
+++ b/tests/integration.rs
@@ -654,6 +654,7 @@ fn test_extract_batch_basic() {
         ],
         output_dir: Some(output_path.clone()),
         in_place: false,
+        exclude_file: None,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -691,6 +692,7 @@ fn test_extract_dry_run_writes_nothing() {
         variables: vec![("project_name".to_string(), "my-app".to_string())],
         output_dir: Some(output_path.clone()),
         in_place: false,
+        exclude_file: None,
     };
 
     let plan = plan_extraction(&options).unwrap();
@@ -717,6 +719,7 @@ fn test_extract_rejects_already_template() {
         variables: vec![("name".to_string(), "val".to_string())],
         output_dir: None,
         in_place: false,
+        exclude_file: None,
     };
 
     let result = plan_extraction(&options);
@@ -734,6 +737,7 @@ fn test_extract_rejects_no_variables() {
         variables: vec![],
         output_dir: None,
         in_place: false,
+        exclude_file: None,
     };
 
     let result = plan_extraction(&options);
@@ -754,6 +758,7 @@ fn test_extract_templates_path_components() {
         variables: vec![("project_name".to_string(), "my-app".to_string())],
         output_dir: Some(output_path.clone()),
         in_place: false,
+        exclude_file: None,
     };
 
     let plan = plan_extraction(&options).unwrap();

From 13c313ad3454734f34bdd908cbbaac1773d6717c Mon Sep 17 00:00:00 2001
From: rroskam <raiderrobert@gmail.com>
Date: Wed, 4 Mar 2026 23:42:18 -0500
Subject: [PATCH 29/29] test(extract): add unit tests for replace.rs

Cover word-boundary matching, longest-match-first ordering, overlap
resolution, no-rescan guarantee, Unicode handling, and path replacements.
---
 src/extract/replace.rs | 274 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 274 insertions(+)

diff --git a/src/extract/replace.rs b/src/extract/replace.rs
index 419405a..1ea9092 100644
--- a/src/extract/replace.rs
+++ b/src/extract/replace.rs
@@ -132,3 +132,277 @@ pub fn apply_path_replacements(path: &Path, rules: &[ReplacementRule]) -> PathBu
 
     components.iter().collect()
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rstest::rstest;
+    use std::path::Path;
+
+    /// Helper to build a single rule with minimal boilerplate.
+    fn rule(literal: &str, replacement: &str) -> ReplacementRule {
+        ReplacementRule {
+            literal: literal.to_string(),
+            replacement: replacement.to_string(),
+            variable: "var".to_string(),
+            variant: "verbatim".to_string(),
+        }
+    }
+
+    /// Helper to build and sort a rule set, ready for apply_replacements.
+    fn sorted(rules: Vec<ReplacementRule>) -> Vec<ReplacementRule> {
+        let mut rules = rules;
+        build_replacement_rules(&mut rules);
+        rules
+    }
+
+    // ── is_word_char ──────────────────────────────────────────────
+
+    #[rstest]
+    #[case('a', true)]
+    #[case('Z', true)]
+    #[case('0', true)]
+    #[case('_', true)]
+    #[case('-', true)]
+    #[case(' ', false)]
+    #[case('.', false)]
+    #[case('/', false)]
+    #[case('{', false)]
+    #[case('é', true)] // alphanumeric per char::is_alphanumeric
+    fn word_char(#[case] c: char, #[case] expected: bool) {
+        assert_eq!(is_word_char(c), expected, "is_word_char({c:?})");
+    }
+
+    // ── build_replacement_rules (sorting) ─────────────────────────
+
+    #[test]
+    fn sorts_longest_literal_first() {
+        let mut rules = vec![
+            rule("app", "{{ x }}"),
+            rule("my-app", "{{ y }}"),
+            rule("a", "{{ z }}"),
+        ];
+        build_replacement_rules(&mut rules);
+
+        let lengths: Vec<usize> = rules.iter().map(|r| r.literal.len()).collect();
+        assert_eq!(lengths, vec![6, 3, 1]);
+    }
+
+    // ── apply_replacements: basic ─────────────────────────────────
+
+    #[test]
+    fn no_rules_returns_original() {
+        let (out, count) = apply_replacements("hello world", &[]);
+        assert_eq!(out, "hello world");
+        assert_eq!(count, 0);
+    }
+
+    #[test]
+    fn no_match_returns_original() {
+        let rules = sorted(vec![rule("missing", "{{ x }}")]);
+        let (out, count) = apply_replacements("hello world", &rules);
+        assert_eq!(out, "hello world");
+        assert_eq!(count, 0);
+    }
+
+    #[test]
+    fn simple_replacement() {
+        let rules = sorted(vec![rule("my-app", "{{ project_name }}")]);
+        let (out, count) = apply_replacements("name = \"my-app\"", &rules);
+        assert_eq!(out, "name = \"{{ project_name }}\"");
+        assert_eq!(count, 1);
+    }
+
+    #[test]
+    fn multiple_occurrences() {
+        let rules = sorted(vec![rule("foo", "{{ x }}")]);
+        let (out, count) = apply_replacements("foo and foo again foo", &rules);
+        assert_eq!(out, "{{ x }} and {{ x }} again {{ x }}");
+        assert_eq!(count, 3);
+    }
+
+    #[test]
+    fn empty_content() {
+        let rules = sorted(vec![rule("x", "{{ x }}")]);
+        let (out, count) = apply_replacements("", &rules);
+        assert_eq!(out, "");
+        assert_eq!(count, 0);
+    }
+
+    #[test]
+    fn empty_literal_is_skipped() {
+        let rules = vec![rule("", "{{ x }}")];
+        let (out, count) = apply_replacements("hello", &rules);
+        assert_eq!(out, "hello");
+        assert_eq!(count, 0);
+    }
+
+    // ── apply_replacements: word boundaries ───────────────────────
+
+    #[test]
+    fn no_match_inside_longer_word() {
+        let rules = sorted(vec![rule("app", "{{ name }}")]);
+        let (out, count) = apply_replacements("the application is great", &rules);
+        assert_eq!(out, "the application is great");
+        assert_eq!(count, 0);
+    }
+
+    #[test]
+    fn no_match_with_prefix_attached() {
+        let rules = sorted(vec![rule("app", "{{ name }}")]);
+        let (out, count) = apply_replacements("myapp works", &rules);
+        assert_eq!(out, "myapp works");
+        assert_eq!(count, 0);
+    }
+
+    #[test]
+    fn no_match_with_suffix_attached() {
+        let rules = sorted(vec![rule("app", "{{ name }}")]);
+        let (out, count) = apply_replacements("apps are great", &rules);
+        assert_eq!(out, "apps are great");
+        assert_eq!(count, 0);
+    }
+
+    #[rstest]
+    #[case("app is here", "{{ n }} is here", 1)]
+    #[case("use app", "use {{ n }}", 1)]
+    #[case("app", "{{ n }}", 1)]
+    #[case("(app)", "({{ n }})", 1)]
+    #[case("\"app\"", "\"{{ n }}\"", 1)]
+    #[case("app.config", "{{ n }}.config", 1)]
+    #[case("/app/", "/{{ n }}/", 1)]
+    fn boundary_at_non_word_chars(
+        #[case] input: &str,
+        #[case] expected: &str,
+        #[case] expected_count: usize,
+    ) {
+        let rules = sorted(vec![rule("app", "{{ n }}")]);
+        let (out, count) = apply_replacements(input, &rules);
+        assert_eq!(out, expected, "input: {input:?}");
+        assert_eq!(count, expected_count);
+    }
+
+    #[test]
+    fn hyphen_is_word_boundary_blocker() {
+        // "my-app" contains "app" but hyphen is a word char, so "app" alone
+        // should NOT match inside "my-app".
+        let rules = sorted(vec![rule("app", "{{ name }}")]);
+        let (out, count) = apply_replacements("my-app", &rules);
+        assert_eq!(out, "my-app");
+        assert_eq!(count, 0);
+    }
+
+    #[test]
+    fn underscore_is_word_boundary_blocker() {
+        let rules = sorted(vec![rule("app", "{{ name }}")]);
+        let (out, count) = apply_replacements("my_app", &rules);
+        assert_eq!(out, "my_app");
+        assert_eq!(count, 0);
+    }
+
+    // ── apply_replacements: longest-match-first / overlap ─────────
+
+    #[test]
+    fn longest_match_wins() {
+        let rules = sorted(vec![
+            rule("my-app", "{{ full }}"),
+            rule("my", "{{ prefix }}"),
+        ]);
+        let (out, count) = apply_replacements("name: my-app", &rules);
+        assert_eq!(out, "name: {{ full }}");
+        assert_eq!(count, 1);
+    }
+
+    #[test]
+    fn shorter_rule_still_matches_elsewhere() {
+        let rules = sorted(vec![
+            rule("my-app", "{{ full }}"),
+            rule("my", "{{ prefix }}"),
+        ]);
+        let (out, count) = apply_replacements("my-app by my", &rules);
+        assert_eq!(out, "{{ full }} by {{ prefix }}");
+        assert_eq!(count, 2);
+    }
+
+    #[test]
+    fn adjacent_non_overlapping_matches() {
+        // Two rules that match at adjacent positions separated by a dot.
+        let rules = sorted(vec![rule("foo", "{{ a }}"), rule("bar", "{{ b }}")]);
+        let (out, count) = apply_replacements("foo.bar", &rules);
+        assert_eq!(out, "{{ a }}.{{ b }}");
+        assert_eq!(count, 2);
+    }
+
+    // ── apply_replacements: no re-scanning ────────────────────────
+
+    #[test]
+    fn replacement_output_is_not_rescanned() {
+        // If re-scanning occurred, the "x" in "{{ x }}" could trigger rule 2.
+        let rules = sorted(vec![rule("foo", "{{ x }}"), rule("x", "WRONG")]);
+        let (out, count) = apply_replacements("foo", &rules);
+        assert_eq!(out, "{{ x }}");
+        assert_eq!(count, 1);
+    }
+
+    // ── apply_replacements: unicode ───────────────────────────────
+
+    #[test]
+    fn unicode_content_preserved() {
+        // CJK chars are alphanumeric (is_word_char → true), so the literal
+        // must appear at a non-word boundary to match.
+        let rules = sorted(vec![rule("my-app", "{{ name }}")]);
+        let (out, count) = apply_replacements("プロジェクト: my-app です", &rules);
+        assert_eq!(out, "プロジェクト: {{ name }} です");
+        assert_eq!(count, 1);
+    }
+
+    #[test]
+    fn cjk_neighbors_block_boundary() {
+        // CJK characters are alphanumeric → word chars, so a literal
+        // flanked by them is not at a word boundary.
+        let rules = sorted(vec![rule("名前", "{{ name }}")]);
+        let (out, count) = apply_replacements("私の名前はアプリです", &rules);
+        assert_eq!(out, "私の名前はアプリです");
+        assert_eq!(count, 0);
+    }
+
+    #[test]
+    fn multibyte_boundary_respected() {
+        // "café" contains "é" which is alphanumeric → word char.
+        // Rule for "caf" should NOT match inside "café".
+        let rules = sorted(vec![rule("caf", "{{ x }}")]);
+        let (out, count) = apply_replacements("café", &rules);
+        assert_eq!(out, "café");
+        assert_eq!(count, 0);
+    }
+
+    // ── apply_path_replacements ───────────────────────────────────
+
+    #[test]
+    fn replaces_in_path_components() {
+        let rules = sorted(vec![rule("my-app", "{{ name }}")]);
+        let path = Path::new("src/my-app/main.rs");
+        let result = apply_path_replacements(path, &rules);
+        assert_eq!(result, PathBuf::from("src/{{ name }}/main.rs"));
+    }
+
+    #[test]
+    fn replaces_in_filename() {
+        let rules = sorted(vec![rule("my-app", "{{ name }}")]);
+        let path = Path::new("my-app.toml");
+        let result = apply_path_replacements(path, &rules);
+        assert_eq!(result, PathBuf::from("{{ name }}.toml"));
+    }
+
+    #[test]
+    fn no_match_across_path_separator() {
+        // "src/app" should not match "src/app" as a single literal — each
+        // component is replaced independently.
+        let rules = sorted(vec![rule("src/app", "{{ x }}")]);
+        let path = Path::new("src/app/main.rs");
+        let result = apply_path_replacements(path, &rules);
+        // Should be unchanged because "/" is a component separator, not part
+        // of any single component.
+        assert_eq!(result, PathBuf::from("src/app/main.rs"));
+    }
+}