|
31 | 31 |
|
32 | 32 | from .manifest import PreserveManifest, calculate_file_hash, verify_file_hash |
33 | 33 | from .metadata import collect_file_metadata, apply_file_metadata |
| 34 | +from .links import is_link, detect_link_type, get_link_target |
34 | 35 | from .destination import ( |
35 | 36 | ConflictResolution, |
36 | 37 | FileCategory, |
@@ -371,6 +372,197 @@ def detect_path_cycle( |
371 | 372 | return issues |
372 | 373 |
|
373 | 374 |
|
| 375 | +def detect_path_cycles_deep( |
| 376 | + source_paths: List[Union[str, Path]], |
| 377 | + dest_path: Union[str, Path], |
| 378 | + operation: str = "MOVE" |
| 379 | +) -> Tuple[bool, List[str], List[str], List[Dict[str, Any]]]: |
| 380 | + """ |
| 381 | + Deep scan for cycle conditions including nested symlinks/junctions. |
| 382 | +
|
| 383 | + This function performs a comprehensive check by: |
| 384 | + 1. Running top-level cycle detection |
| 385 | + 2. Walking the source tree WITHOUT following links |
| 386 | + 3. For each link found, resolving its target and checking for cycles |
| 387 | +
|
| 388 | + Args: |
| 389 | + source_paths: List of source file/directory paths |
| 390 | + dest_path: Destination base path |
| 391 | + operation: Operation type ("COPY" or "MOVE") |
| 392 | +
|
| 393 | + Returns: |
| 394 | + Tuple of (can_proceed, hard_issues, soft_issues, link_report) |
| 395 | + - can_proceed: True if no blocking issues found |
| 396 | + - hard_issues: List of blocking issues (must abort) |
| 397 | + - soft_issues: List of warnings (can continue with confirmation) |
| 398 | + - link_report: List of dicts describing links found in source tree |
| 399 | + """ |
| 400 | + hard_issues = [] |
| 401 | + soft_issues = [] |
| 402 | + link_report = [] |
| 403 | + is_move = operation.upper() == "MOVE" |
| 404 | + |
| 405 | + dest = Path(dest_path) |
| 406 | + try: |
| 407 | + dest_resolved = dest.resolve() |
| 408 | + dest_exists = dest_resolved.exists() |
| 409 | + except OSError: |
| 410 | + dest_resolved = None |
| 411 | + dest_exists = False |
| 412 | + |
| 413 | + # Phase 1: Top-level checks (existing function) |
| 414 | + top_level_issues = detect_path_cycle(source_paths, dest_path) |
| 415 | + for issue in top_level_issues: |
| 416 | + if "CRITICAL" in issue: |
| 417 | + hard_issues.append(issue) |
| 418 | + else: |
| 419 | + soft_issues.append(issue) |
| 420 | + |
| 421 | + # Phase 2: Deep link discovery (walk without following links) |
| 422 | + # Track visited inodes to prevent infinite loops from circular symlinks |
| 423 | + visited_inodes = set() |
| 424 | + max_depth = 100 # Safety limit for deep trees |
| 425 | + |
| 426 | + for source in source_paths: |
| 427 | + source_path = Path(source) |
| 428 | + |
| 429 | + if not source_path.exists(): |
| 430 | + continue |
| 431 | + |
| 432 | + # For files, no traversal needed |
| 433 | + if source_path.is_file(): |
| 434 | + continue |
| 435 | + |
| 436 | + # Walk the directory tree WITHOUT following symlinks |
| 437 | + try: |
| 438 | + for root, dirs, files in os.walk(source_path, followlinks=False): |
| 439 | + # Check depth limit |
| 440 | + try: |
| 441 | + depth = len(Path(root).relative_to(source_path).parts) |
| 442 | + if depth > max_depth: |
| 443 | + soft_issues.append( |
| 444 | + f"WARNING: Depth limit ({max_depth}) reached at '{root}'. " |
| 445 | + f"Deeper directories not checked for cycles." |
| 446 | + ) |
| 447 | + dirs.clear() # Don't descend further |
| 448 | + continue |
| 449 | + except ValueError: |
| 450 | + pass |
| 451 | + |
| 452 | + root_path = Path(root) |
| 453 | + |
| 454 | + # Check each subdirectory for links |
| 455 | + dirs_to_remove = [] |
| 456 | + for d in dirs: |
| 457 | + dir_path = root_path / d |
| 458 | + |
| 459 | + # Check if this directory is a link (symlink or junction) |
| 460 | + if is_link(dir_path): |
| 461 | + link_type = detect_link_type(dir_path) |
| 462 | + target_str = get_link_target(dir_path) |
| 463 | + |
| 464 | + # Try to resolve the target |
| 465 | + try: |
| 466 | + target_resolved = dir_path.resolve() |
| 467 | + target_exists = target_resolved.exists() |
| 468 | + except OSError: |
| 469 | + target_resolved = None |
| 470 | + target_exists = False |
| 471 | + |
| 472 | + # Record the link in our report |
| 473 | + link_info = { |
| 474 | + 'link_path': str(dir_path), |
| 475 | + 'link_type': link_type or 'unknown', |
| 476 | + 'target': target_str or 'UNRESOLVABLE', |
| 477 | + 'target_resolved': str(target_resolved) if target_resolved else None, |
| 478 | + 'target_exists': target_exists, |
| 479 | + } |
| 480 | + link_report.append(link_info) |
| 481 | + |
| 482 | + # Check for cycle conditions |
| 483 | + if target_resolved and dest_resolved and target_exists and dest_exists: |
| 484 | + try: |
| 485 | + # Check 1: Link target IS the destination |
| 486 | + if os.path.samefile(target_resolved, dest_resolved): |
| 487 | + issue = ( |
| 488 | + f"CRITICAL: Link '{dir_path}' ({link_type}) points to " |
| 489 | + f"destination '{dest_path}'. Traversing it during {operation} " |
| 490 | + f"would copy files to themselves then delete them!" |
| 491 | + ) |
| 492 | + if is_move: |
| 493 | + hard_issues.append(issue) |
| 494 | + else: |
| 495 | + soft_issues.append(issue.replace("CRITICAL:", "WARNING:")) |
| 496 | + |
| 497 | + # Check 2: Link target is INSIDE destination |
| 498 | + elif target_resolved.is_relative_to(dest_resolved): |
| 499 | + issue = ( |
| 500 | + f"CRITICAL: Link '{dir_path}' ({link_type}) points inside " |
| 501 | + f"destination at '{target_resolved}'. Traversing it during " |
| 502 | + f"{operation} would create a cycle!" |
| 503 | + ) |
| 504 | + if is_move: |
| 505 | + hard_issues.append(issue) |
| 506 | + else: |
| 507 | + soft_issues.append(issue.replace("CRITICAL:", "WARNING:")) |
| 508 | + |
| 509 | + # Check 3: Destination is inside link target |
| 510 | + elif dest_resolved.is_relative_to(target_resolved): |
| 511 | + soft_issues.append( |
| 512 | + f"WARNING: Link '{dir_path}' ({link_type}) target contains " |
| 513 | + f"destination. This may cause unexpected nesting behavior." |
| 514 | + ) |
| 515 | + |
| 516 | + except (OSError, ValueError): |
| 517 | + pass |
| 518 | + |
| 519 | + # Check for circular symlinks (target points back into source tree) |
| 520 | + if target_resolved: |
| 521 | + try: |
| 522 | + target_stat = target_resolved.stat() |
| 523 | + inode_key = (target_stat.st_dev, target_stat.st_ino) |
| 524 | + if inode_key in visited_inodes: |
| 525 | + soft_issues.append( |
| 526 | + f"WARNING: Circular link detected at '{dir_path}'. " |
| 527 | + f"Target '{target_resolved}' was already visited." |
| 528 | + ) |
| 529 | + else: |
| 530 | + visited_inodes.add(inode_key) |
| 531 | + except OSError: |
| 532 | + pass |
| 533 | + |
| 534 | + # Don't descend into links (we've analyzed them) |
| 535 | + dirs_to_remove.append(d) |
| 536 | + |
| 537 | + else: |
| 538 | + # Regular directory - track its inode to detect circular structures |
| 539 | + try: |
| 540 | + dir_stat = dir_path.stat() |
| 541 | + inode_key = (dir_stat.st_dev, dir_stat.st_ino) |
| 542 | + if inode_key in visited_inodes: |
| 543 | + soft_issues.append( |
| 544 | + f"WARNING: Directory '{dir_path}' was already visited " |
| 545 | + f"(possible hard-linked directory structure)." |
| 546 | + ) |
| 547 | + dirs_to_remove.append(d) |
| 548 | + else: |
| 549 | + visited_inodes.add(inode_key) |
| 550 | + except OSError: |
| 551 | + pass |
| 552 | + |
| 553 | + # Remove links and circular dirs from traversal |
| 554 | + for d in dirs_to_remove: |
| 555 | + dirs.remove(d) |
| 556 | + |
| 557 | + except PermissionError as e: |
| 558 | + soft_issues.append(f"WARNING: Permission denied accessing '{source_path}': {e}") |
| 559 | + except OSError as e: |
| 560 | + soft_issues.append(f"WARNING: Error traversing '{source_path}': {e}") |
| 561 | + |
| 562 | + can_proceed = len(hard_issues) == 0 |
| 563 | + return can_proceed, hard_issues, soft_issues, link_report |
| 564 | + |
| 565 | + |
374 | 566 | def preflight_checks( |
375 | 567 | source_files: List[Union[str, Path]], |
376 | 568 | dest_path: Union[str, Path], |
@@ -407,19 +599,37 @@ def preflight_checks( |
407 | 599 |
|
408 | 600 | # CRITICAL: Check for path cycles (symlinks/junctions pointing to same location) |
409 | 601 | # This must be checked first as it can cause catastrophic data loss on MOVE |
410 | | - cycle_issues = detect_path_cycle(source_files, dest_path) |
411 | | - for issue in cycle_issues: |
412 | | - if issue.startswith("CRITICAL:"): |
413 | | - # For MOVE: always block. For COPY: block on same-location, warn on others |
414 | | - if is_move or "resolve to the same location" in issue: |
415 | | - hard_issues.append(issue) |
| 602 | + # Use deep detection for MOVE (checks nested junctions), simple for COPY |
| 603 | + if is_move: |
| 604 | + # Deep scan: walks source tree to find nested junctions pointing to dest |
| 605 | + _, cycle_hard, cycle_soft, link_report = detect_path_cycles_deep( |
| 606 | + source_files, dest_path, operation |
| 607 | + ) |
| 608 | + hard_issues.extend(cycle_hard) |
| 609 | + soft_issues.extend(cycle_soft) |
| 610 | + |
| 611 | + # Log link report if any links found |
| 612 | + if link_report: |
| 613 | + logger.info(f"Found {len(link_report)} link(s) in source tree:") |
| 614 | + for link_info in link_report: |
| 615 | + logger.debug( |
| 616 | + f" - {link_info['link_type']}: {link_info['link_path']} -> " |
| 617 | + f"{link_info.get('target_resolved', link_info['target'])}" |
| 618 | + ) |
| 619 | + else: |
| 620 | + # Simple check for COPY (less critical, no source deletion) |
| 621 | + cycle_issues = detect_path_cycle(source_files, dest_path) |
| 622 | + for issue in cycle_issues: |
| 623 | + if issue.startswith("CRITICAL:"): |
| 624 | + # For COPY: block on same-location, warn on others |
| 625 | + if "resolve to the same location" in issue: |
| 626 | + hard_issues.append(issue) |
| 627 | + else: |
| 628 | + soft_issues.append(issue) |
| 629 | + elif issue.startswith("WARNING:"): |
| 630 | + soft_issues.append(issue) |
416 | 631 | else: |
417 | 632 | soft_issues.append(issue) |
418 | | - elif issue.startswith("WARNING:"): |
419 | | - soft_issues.append(issue) |
420 | | - else: |
421 | | - # Unknown format, treat as soft issue |
422 | | - soft_issues.append(issue) |
423 | 633 |
|
424 | 634 | # Check destination write permission |
425 | 635 | if check_permissions: |
|
0 commit comments