Skip to content

Commit fb22906

Browse files
committed
fix(extract): handle nested excludes and symlinks to directories
detect_excludes only checked if exclude patterns existed at the project root, missing patterns like node_modules at deeper levels (e.g. docs/node_modules/). Always include all DEFAULT_EXCLUDES since should_exclude already handles nested matching via path components. Also skip symlinks that resolve to directories during scan. pnpm's node_modules/.pnpm uses symlinks to directories, and walkdir reports these as non-directory entries, causing read_to_string to fail with "Is a directory".
1 parent 761fcd5 commit fb22906

2 files changed

Lines changed: 36 additions & 20 deletions

File tree

src/extract/exclude.rs

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -62,22 +62,11 @@ const DEFAULT_COPY_WITHOUT_RENDER: &[&str] = &[
6262
];
6363

6464
/// Detect which default exclude patterns actually exist in the project.
65-
pub fn detect_excludes(project_dir: &Path) -> Vec<String> {
66-
let mut found = Vec::new();
67-
68-
for pattern in DEFAULT_EXCLUDES {
69-
let clean = pattern.trim_end_matches('/');
70-
// Skip glob patterns — they're always included
71-
if clean.contains('*') {
72-
found.push(pattern.to_string());
73-
continue;
74-
}
75-
if project_dir.join(clean).exists() {
76-
found.push(pattern.to_string());
77-
}
78-
}
79-
80-
found
65+
///
66+
/// All DEFAULT_EXCLUDES are always included because patterns like `node_modules`
67+
/// can appear at any depth (e.g. `docs/node_modules/`), not just the project root.
68+
pub fn detect_excludes(_project_dir: &Path) -> Vec<String> {
69+
DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect()
8170
}
8271

8372
/// Detect which copy-without-render patterns are relevant based on files present.
@@ -191,14 +180,13 @@ mod tests {
191180
#[test]
192181
fn test_detect_excludes() {
193182
let dir = tempfile::tempdir().unwrap();
194-
std::fs::create_dir(dir.path().join(".git")).unwrap();
195-
std::fs::write(dir.path().join(".DS_Store"), "").unwrap();
196183

197184
let found = detect_excludes(dir.path());
185+
// All DEFAULT_EXCLUDES are always included regardless of what exists on disk
198186
assert!(found.iter().any(|e| e.contains(".git")));
199187
assert!(found.iter().any(|e| e == ".DS_Store"));
200-
// Glob patterns should always be included
201188
assert!(found.iter().any(|e| e == "*.pyc"));
189+
assert!(found.iter().any(|e| e.contains("node_modules")));
202190
}
203191

204192
#[test]

src/extract/scan.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,14 @@ pub fn scan_project(project_dir: &Path, excludes: &[String]) -> crate::error::Re
4747
.unwrap_or_else(|| std::io::Error::other("walkdir error")),
4848
})?;
4949

50-
// Skip directories themselves (we only care about files)
50+
// Skip directories (including symlinks to directories, e.g. pnpm's
51+
// node_modules/.pnpm uses symlinks that point to directories).
5152
if entry.file_type().is_dir() {
5253
continue;
5354
}
55+
if entry.path_is_symlink() && entry.path().is_dir() {
56+
continue;
57+
}
5458

5559
let relative_path = entry
5660
.path()
@@ -124,6 +128,30 @@ mod tests {
124128
assert_eq!(result.files[0].relative_path, PathBuf::from("README.md"));
125129
}
126130

131+
#[cfg(unix)]
132+
#[test]
133+
fn test_scan_project_skips_symlinks_to_directories() {
134+
let dir = tempfile::tempdir().unwrap();
135+
std::fs::write(dir.path().join("real.txt"), "hello").unwrap();
136+
137+
// Create a subdirectory and a symlink pointing to it
138+
let subdir = dir.path().join("subdir");
139+
std::fs::create_dir(&subdir).unwrap();
140+
std::fs::write(subdir.join("nested.txt"), "nested").unwrap();
141+
std::os::unix::fs::symlink(&subdir, dir.path().join("link-to-dir")).unwrap();
142+
143+
let result = scan_project(dir.path(), &[]).unwrap();
144+
// Should find real.txt and subdir/nested.txt, but NOT choke on link-to-dir
145+
let paths: Vec<String> = result
146+
.files
147+
.iter()
148+
.map(|f| f.relative_path.to_string_lossy().to_string())
149+
.collect();
150+
assert!(paths.contains(&"real.txt".to_string()));
151+
assert!(paths.contains(&"subdir/nested.txt".to_string()));
152+
assert!(!paths.iter().any(|p| p.contains("link-to-dir")));
153+
}
154+
127155
#[test]
128156
fn test_scan_project_binary_detection() {
129157
let dir = tempfile::tempdir().unwrap();

0 commit comments

Comments
 (0)