Skip to content

Commit d6bffab

Browse files
committed
Add deep nested junction/symlink cycle detection (v0.7.2, #47)
CRITICAL SAFETY FIX: Prevents catastrophic data loss from nested junctions pointing to destination. Previously only checked top-level paths; now walks entire source tree to find dangerous links. Key changes: - detect_path_cycles_deep(): Full tree traversal with followlinks=False - Early cycle check in MOVE handler before file expansion - Link discovery report showing all links in source tree - Clear error messages with remediation options The fix catches scenarios like: 1. User runs: preserve MOVE C:\path\subdir -r --dst E:\backup\subdir -L junction 2. User later: preserve MOVE C:\path -r --dst E:\backup 3. Previously: Would copy through junction then delete E:\backup\subdir 4. Now: BLOCKED with clear explanation 21 new tests for nested junction scenarios. All 265 tests pass.
1 parent 2ed4ac2 commit d6bffab

5 files changed

Lines changed: 603 additions & 21 deletions

File tree

CHANGELOG.md

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,21 @@ All notable changes to this project will be documented in this file. This projec
66

77
## [Unreleased]
88

9+
## [0.7.2] - 2025-12-19
10+
911
### Added
10-
- **Path Cycle Detection** (#47)
11-
- Prevents catastrophic data loss when source and destination resolve to the same location
12-
- Detects symlinks/junctions pointing source to destination (or vice versa)
13-
- Blocks MOVE operations with CRITICAL error when cycle detected
14-
- Warns on COPY operations for destination-inside-source scenarios
15-
- New `detect_path_cycle()` function integrated into preflight checks
12+
- **Deep Path Cycle Detection** (#47)
13+
- Prevents catastrophic data loss from nested symlinks/junctions
14+
- Walks source tree to find links that resolve to destination
15+
- Blocks MOVE when any subdirectory link points to/inside destination
16+
- Link discovery report shows all links found in source tree
17+
- Handles: junctions (Windows), symlinks (all platforms), circular links
18+
- New `detect_path_cycles_deep()` function with comprehensive traversal
19+
- 21 unit tests covering nested junction/symlink scenarios
20+
21+
### Changed
22+
- MOVE preflight now uses deep cycle detection (traverses source tree)
23+
- COPY preflight uses simple cycle detection (top-level only, less critical)
1624

1725
## [0.7.1] - 2025-12-18
1826

preserve/handlers/move.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@
2020

2121
from preservelib import operations
2222
from preservelib import links
23-
from preservelib.operations import InsufficientSpaceError, PermissionCheckError
23+
from preservelib.operations import (
24+
InsufficientSpaceError,
25+
PermissionCheckError,
26+
detect_path_cycles_deep,
27+
)
2428
from preservelib.destination import (
2529
scan_destination,
2630
format_scan_report,
@@ -127,6 +131,44 @@ def handle_move_operation(args, logger):
127131
if not dest_path.exists():
128132
dest_path.mkdir(parents=True, exist_ok=True)
129133

134+
# CRITICAL: Early cycle detection on original source paths (before file expansion)
135+
# This catches nested junctions/symlinks that would cause catastrophic data loss
136+
# The detection must happen BEFORE os.walk expands directories (which follows links)
137+
if args.sources:
138+
can_proceed, cycle_hard, cycle_soft, link_report = detect_path_cycles_deep(
139+
args.sources, str(dest_path), "MOVE"
140+
)
141+
142+
if link_report:
143+
logger.info(f"Found {len(link_report)} link(s) in source tree")
144+
for link_info in link_report:
145+
link_type = link_info.get('link_type', 'unknown')
146+
link_path = link_info.get('link_path', 'unknown')
147+
target = link_info.get('target_resolved') or link_info.get('target', 'unknown')
148+
logger.debug(f" {link_type}: {link_path} -> {target}")
149+
150+
if not can_proceed:
151+
logger.error("")
152+
logger.error("=" * 70)
153+
logger.error("CRITICAL: Cycle detected - MOVE operation BLOCKED")
154+
logger.error("=" * 70)
155+
for issue in cycle_hard:
156+
logger.error(f" {issue}")
157+
logger.error("")
158+
logger.error("A MOVE in this configuration would cause CATASTROPHIC DATA LOSS.")
159+
logger.error("The source contains links that resolve to the destination.")
160+
logger.error("")
161+
logger.error("Options:")
162+
logger.error(" 1. Remove the problematic junction/symlink from source")
163+
logger.error(" 2. Use COPY instead (data preserved, but check for duplicates)")
164+
logger.error(" 3. Move the destination to a different location")
165+
logger.error("=" * 70)
166+
return 1
167+
168+
if cycle_soft:
169+
for issue in cycle_soft:
170+
logger.warning(issue)
171+
130172
# Get preserve directory
131173
preserve_dir = get_preserve_dir(args, dest_path)
132174

preserve/version.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
# Semantic version components
1818
MAJOR = 0
1919
MINOR = 7
20-
PATCH = 1
20+
PATCH = 2
2121

2222
# Optional release phase (alpha, beta, rc1, rc2, etc.)
2323
# Set to None for stable releases
@@ -27,7 +27,7 @@
2727
# DO NOT EDIT THIS LINE MANUALLY
2828
# Note: Hash reflects the commit this version builds upon (HEAD at commit time)
2929
# The hash will be one commit behind after the commit is created (git limitation)
30-
__version__ = "0.7.1_main_50-20251219-e20dc0ac"
30+
__version__ = "0.7.2_main_51-20251219-2ed4ac21"
3131

3232

3333
def get_version():

preservelib/operations.py

Lines changed: 221 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
from .manifest import PreserveManifest, calculate_file_hash, verify_file_hash
3333
from .metadata import collect_file_metadata, apply_file_metadata
34+
from .links import is_link, detect_link_type, get_link_target
3435
from .destination import (
3536
ConflictResolution,
3637
FileCategory,
@@ -371,6 +372,197 @@ def detect_path_cycle(
371372
return issues
372373

373374

375+
def detect_path_cycles_deep(
376+
source_paths: List[Union[str, Path]],
377+
dest_path: Union[str, Path],
378+
operation: str = "MOVE"
379+
) -> Tuple[bool, List[str], List[str], List[Dict[str, Any]]]:
380+
"""
381+
Deep scan for cycle conditions including nested symlinks/junctions.
382+
383+
This function performs a comprehensive check by:
384+
1. Running top-level cycle detection
385+
2. Walking the source tree WITHOUT following links
386+
3. For each link found, resolving its target and checking for cycles
387+
388+
Args:
389+
source_paths: List of source file/directory paths
390+
dest_path: Destination base path
391+
operation: Operation type ("COPY" or "MOVE")
392+
393+
Returns:
394+
Tuple of (can_proceed, hard_issues, soft_issues, link_report)
395+
- can_proceed: True if no blocking issues found
396+
- hard_issues: List of blocking issues (must abort)
397+
- soft_issues: List of warnings (can continue with confirmation)
398+
- link_report: List of dicts describing links found in source tree
399+
"""
400+
hard_issues = []
401+
soft_issues = []
402+
link_report = []
403+
is_move = operation.upper() == "MOVE"
404+
405+
dest = Path(dest_path)
406+
try:
407+
dest_resolved = dest.resolve()
408+
dest_exists = dest_resolved.exists()
409+
except OSError:
410+
dest_resolved = None
411+
dest_exists = False
412+
413+
# Phase 1: Top-level checks (existing function)
414+
top_level_issues = detect_path_cycle(source_paths, dest_path)
415+
for issue in top_level_issues:
416+
if "CRITICAL" in issue:
417+
hard_issues.append(issue)
418+
else:
419+
soft_issues.append(issue)
420+
421+
# Phase 2: Deep link discovery (walk without following links)
422+
# Track visited inodes to prevent infinite loops from circular symlinks
423+
visited_inodes = set()
424+
max_depth = 100 # Safety limit for deep trees
425+
426+
for source in source_paths:
427+
source_path = Path(source)
428+
429+
if not source_path.exists():
430+
continue
431+
432+
# For files, no traversal needed
433+
if source_path.is_file():
434+
continue
435+
436+
# Walk the directory tree WITHOUT following symlinks
437+
try:
438+
for root, dirs, files in os.walk(source_path, followlinks=False):
439+
# Check depth limit
440+
try:
441+
depth = len(Path(root).relative_to(source_path).parts)
442+
if depth > max_depth:
443+
soft_issues.append(
444+
f"WARNING: Depth limit ({max_depth}) reached at '{root}'. "
445+
f"Deeper directories not checked for cycles."
446+
)
447+
dirs.clear() # Don't descend further
448+
continue
449+
except ValueError:
450+
pass
451+
452+
root_path = Path(root)
453+
454+
# Check each subdirectory for links
455+
dirs_to_remove = []
456+
for d in dirs:
457+
dir_path = root_path / d
458+
459+
# Check if this directory is a link (symlink or junction)
460+
if is_link(dir_path):
461+
link_type = detect_link_type(dir_path)
462+
target_str = get_link_target(dir_path)
463+
464+
# Try to resolve the target
465+
try:
466+
target_resolved = dir_path.resolve()
467+
target_exists = target_resolved.exists()
468+
except OSError:
469+
target_resolved = None
470+
target_exists = False
471+
472+
# Record the link in our report
473+
link_info = {
474+
'link_path': str(dir_path),
475+
'link_type': link_type or 'unknown',
476+
'target': target_str or 'UNRESOLVABLE',
477+
'target_resolved': str(target_resolved) if target_resolved else None,
478+
'target_exists': target_exists,
479+
}
480+
link_report.append(link_info)
481+
482+
# Check for cycle conditions
483+
if target_resolved and dest_resolved and target_exists and dest_exists:
484+
try:
485+
# Check 1: Link target IS the destination
486+
if os.path.samefile(target_resolved, dest_resolved):
487+
issue = (
488+
f"CRITICAL: Link '{dir_path}' ({link_type}) points to "
489+
f"destination '{dest_path}'. Traversing it during {operation} "
490+
f"would copy files to themselves then delete them!"
491+
)
492+
if is_move:
493+
hard_issues.append(issue)
494+
else:
495+
soft_issues.append(issue.replace("CRITICAL:", "WARNING:"))
496+
497+
# Check 2: Link target is INSIDE destination
498+
elif target_resolved.is_relative_to(dest_resolved):
499+
issue = (
500+
f"CRITICAL: Link '{dir_path}' ({link_type}) points inside "
501+
f"destination at '{target_resolved}'. Traversing it during "
502+
f"{operation} would create a cycle!"
503+
)
504+
if is_move:
505+
hard_issues.append(issue)
506+
else:
507+
soft_issues.append(issue.replace("CRITICAL:", "WARNING:"))
508+
509+
# Check 3: Destination is inside link target
510+
elif dest_resolved.is_relative_to(target_resolved):
511+
soft_issues.append(
512+
f"WARNING: Link '{dir_path}' ({link_type}) target contains "
513+
f"destination. This may cause unexpected nesting behavior."
514+
)
515+
516+
except (OSError, ValueError):
517+
pass
518+
519+
# Check for circular symlinks (target points back into source tree)
520+
if target_resolved:
521+
try:
522+
target_stat = target_resolved.stat()
523+
inode_key = (target_stat.st_dev, target_stat.st_ino)
524+
if inode_key in visited_inodes:
525+
soft_issues.append(
526+
f"WARNING: Circular link detected at '{dir_path}'. "
527+
f"Target '{target_resolved}' was already visited."
528+
)
529+
else:
530+
visited_inodes.add(inode_key)
531+
except OSError:
532+
pass
533+
534+
# Don't descend into links (we've analyzed them)
535+
dirs_to_remove.append(d)
536+
537+
else:
538+
# Regular directory - track its inode to detect circular structures
539+
try:
540+
dir_stat = dir_path.stat()
541+
inode_key = (dir_stat.st_dev, dir_stat.st_ino)
542+
if inode_key in visited_inodes:
543+
soft_issues.append(
544+
f"WARNING: Directory '{dir_path}' was already visited "
545+
f"(possible hard-linked directory structure)."
546+
)
547+
dirs_to_remove.append(d)
548+
else:
549+
visited_inodes.add(inode_key)
550+
except OSError:
551+
pass
552+
553+
# Remove links and circular dirs from traversal
554+
for d in dirs_to_remove:
555+
dirs.remove(d)
556+
557+
except PermissionError as e:
558+
soft_issues.append(f"WARNING: Permission denied accessing '{source_path}': {e}")
559+
except OSError as e:
560+
soft_issues.append(f"WARNING: Error traversing '{source_path}': {e}")
561+
562+
can_proceed = len(hard_issues) == 0
563+
return can_proceed, hard_issues, soft_issues, link_report
564+
565+
374566
def preflight_checks(
375567
source_files: List[Union[str, Path]],
376568
dest_path: Union[str, Path],
@@ -407,19 +599,37 @@ def preflight_checks(
407599

408600
# CRITICAL: Check for path cycles (symlinks/junctions pointing to same location)
409601
# This must be checked first as it can cause catastrophic data loss on MOVE
410-
cycle_issues = detect_path_cycle(source_files, dest_path)
411-
for issue in cycle_issues:
412-
if issue.startswith("CRITICAL:"):
413-
# For MOVE: always block. For COPY: block on same-location, warn on others
414-
if is_move or "resolve to the same location" in issue:
415-
hard_issues.append(issue)
602+
# Use deep detection for MOVE (checks nested junctions), simple for COPY
603+
if is_move:
604+
# Deep scan: walks source tree to find nested junctions pointing to dest
605+
_, cycle_hard, cycle_soft, link_report = detect_path_cycles_deep(
606+
source_files, dest_path, operation
607+
)
608+
hard_issues.extend(cycle_hard)
609+
soft_issues.extend(cycle_soft)
610+
611+
# Log link report if any links found
612+
if link_report:
613+
logger.info(f"Found {len(link_report)} link(s) in source tree:")
614+
for link_info in link_report:
615+
logger.debug(
616+
f" - {link_info['link_type']}: {link_info['link_path']} -> "
617+
f"{link_info.get('target_resolved', link_info['target'])}"
618+
)
619+
else:
620+
# Simple check for COPY (less critical, no source deletion)
621+
cycle_issues = detect_path_cycle(source_files, dest_path)
622+
for issue in cycle_issues:
623+
if issue.startswith("CRITICAL:"):
624+
# For COPY: block on same-location, warn on others
625+
if "resolve to the same location" in issue:
626+
hard_issues.append(issue)
627+
else:
628+
soft_issues.append(issue)
629+
elif issue.startswith("WARNING:"):
630+
soft_issues.append(issue)
416631
else:
417632
soft_issues.append(issue)
418-
elif issue.startswith("WARNING:"):
419-
soft_issues.append(issue)
420-
else:
421-
# Unknown format, treat as soft issue
422-
soft_issues.append(issue)
423633

424634
# Check destination write permission
425635
if check_permissions:

0 commit comments

Comments
 (0)