|
16 | 16 | from datashuttle.configs.config_class import Configs |
17 | 17 | from datashuttle.utils.custom_types import TopLevelFolder |
18 | 18 |
|
19 | | -import glob |
| 19 | +import fnmatch |
| 20 | +import json |
20 | 21 | from pathlib import Path |
21 | 22 |
|
22 | 23 | from datashuttle.configs import canonical_folders, canonical_tags |
23 | | -from datashuttle.utils import ssh, utils, validation |
| 24 | +from datashuttle.utils import rclone, utils, validation |
24 | 25 | from datashuttle.utils.custom_exceptions import NeuroBlueprintError |
25 | 26 |
|
26 | 27 | # ----------------------------------------------------------------------------- |
@@ -598,67 +599,90 @@ def search_for_folders( |
598 | 599 | Discovered folders (`all_folder_names`) and files (`all_filenames`). |
599 | 600 |
|
600 | 601 | """ |
601 | | - if local_or_central == "central" and cfg["connection_method"] == "ssh": |
602 | | - all_folder_names, all_filenames = ssh.search_ssh_central_for_folders( |
603 | | - search_path, |
604 | | - search_prefix, |
605 | | - cfg, |
606 | | - verbose, |
607 | | - return_full_path, |
608 | | - ) |
| 602 | + if ( |
| 603 | + local_or_central == "local" |
| 604 | + or cfg["connection_method"] == "local_filesystem" |
| 605 | + ) and not search_path.exists(): |
| 606 | + if verbose: |
| 607 | + utils.log_and_message(f"No file found at {search_path.as_posix()}") |
| 608 | + return [], [] |
| 609 | + |
| 610 | + if local_or_central == "local": |
| 611 | + rclone_config_name = None |
609 | 612 | else: |
610 | | - if not search_path.exists(): |
611 | | - if verbose: |
612 | | - utils.log_and_message( |
613 | | - f"No file found at {search_path.as_posix()}" |
614 | | - ) |
615 | | - return [], [] |
616 | | - |
617 | | - all_folder_names, all_filenames = search_filesystem_path_for_folders( |
618 | | - search_path / search_prefix, return_full_path |
| 613 | + rclone_config_name = cfg.get_rclone_config_name( |
| 614 | + cfg["connection_method"] |
619 | 615 | ) |
| 616 | + |
| 617 | + all_folder_names, all_filenames = search_local_or_remote( |
| 618 | + search_path, |
| 619 | + search_prefix, |
| 620 | + rclone_config_name, |
| 621 | + return_full_path, |
| 622 | + ) |
| 623 | + |
620 | 624 | return all_folder_names, all_filenames |
621 | 625 |
|
622 | 626 |
|
623 | | -# Actual function implementation |
624 | | -def search_filesystem_path_for_folders( |
625 | | - search_path_with_prefix: Path, return_full_path: bool = False |
626 | | -) -> Tuple[List[Path | str], List[Path | str]]: |
627 | | - r"""Search a folder through the local filesystem. |
| 627 | +def search_local_or_remote( |
| 628 | + search_path: Path, |
| 629 | + search_prefix: str, |
| 630 | + rclone_config_name: str | None, |
| 631 | + return_full_path: bool = False, |
| 632 | +) -> Tuple[List[Any], List[Any]]: |
| 633 | + """Search for files and folders in central path using `rclone lsjson` command. |
628 | 634 |
|
629 | | - Use glob to search the full search path (including prefix) with glob. |
630 | | - Files are filtered out of results, returning folders only. |
| 635 | + This command lists all the files and folders in the central path in a json format. |
| 636 | + The json contains file/folder info about each file/folder like name, type, etc. |
631 | 637 |
|
632 | 638 | Parameters |
633 | 639 | ---------- |
634 | | - search_path_with_prefix |
635 | | - Path to search along with search prefix e.g. "C:\drive\project\sub-*" |
636 | | -
|
| 640 | + search_path |
| 641 | + The path to search (relative to the local or remote drive). For example, |
| 642 | + for "local_filesystem" this is the path on the local machine. For "ssh", this |
| 643 | + is the path on the machine that has been connected to. |
| 644 | + search_prefix |
| 645 | + The search string e.g. "sub-*". |
| 646 | + rclone_config_name |
| 647 | + Name of the rclone config for the remote (not set for local). `rclone config` |
| 648 | + can be used in the terminal to see how rclone has stored these. In datashuttle, |
| 649 | + these are managed by `Configs`. |
637 | 650 | return_full_path |
638 | | - If `True` returns the path to the discovered folder or file, |
639 | | - otherwise just the name. |
640 | | -
|
641 | | - Returns |
642 | | - ------- |
643 | | - Discovered folders (`all_folder_names`) and files (`all_filenames`). |
| 651 | + If `True`, return the full filepath, otherwise return only the folder/file name. |
644 | 652 |
|
645 | 653 | """ |
646 | | - all_folder_names = [] |
647 | | - all_filenames = [] |
| 654 | + config_prefix = "" if not rclone_config_name else f"{rclone_config_name}:" |
648 | 655 |
|
649 | | - all_files_and_folders = list(glob.glob(search_path_with_prefix.as_posix())) |
650 | | - sorter_files_and_folders = sorted(all_files_and_folders) |
| 656 | + output = rclone.call_rclone( |
| 657 | + f'lsjson {config_prefix}"{search_path.as_posix()}"', |
| 658 | + pipe_std=True, |
| 659 | + ) |
651 | 660 |
|
652 | | - for file_or_folder_str in sorter_files_and_folders: |
653 | | - file_or_folder = Path(file_or_folder_str) |
| 661 | + all_folder_names: List[str] = [] |
| 662 | + all_filenames: List[str] = [] |
654 | 663 |
|
655 | | - if file_or_folder.is_dir(): |
656 | | - all_folder_names.append( |
657 | | - file_or_folder if return_full_path else file_or_folder.name |
658 | | - ) |
| 664 | + if output.returncode != 0: |
| 665 | + utils.log_and_message( |
| 666 | + f"Error searching files at {search_path.as_posix()}\n" |
| 667 | + f"{output.stderr.decode('utf-8') if output.stderr else ''}" |
| 668 | + ) |
| 669 | + return all_folder_names, all_filenames |
| 670 | + |
| 671 | + files_and_folders = json.loads(output.stdout) |
| 672 | + |
| 673 | + for file_or_folder in files_and_folders: |
| 674 | + name = file_or_folder["Name"] |
| 675 | + |
| 676 | + if not fnmatch.fnmatch(name, search_prefix): |
| 677 | + continue |
| 678 | + |
| 679 | + is_dir = file_or_folder.get("IsDir", False) |
| 680 | + |
| 681 | + to_append = search_path / name if return_full_path else name |
| 682 | + |
| 683 | + if is_dir: |
| 684 | + all_folder_names.append(to_append) |
659 | 685 | else: |
660 | | - all_filenames.append( |
661 | | - file_or_folder if return_full_path else file_or_folder.name |
662 | | - ) |
| 686 | + all_filenames.append(to_append) |
663 | 687 |
|
664 | 688 | return all_folder_names, all_filenames |
0 commit comments