|
| 1 | +import asyncio |
| 2 | +import json |
| 3 | +import uuid |
| 4 | +from pathlib import Path |
| 5 | +from typing import Dict, List, Optional, Union |
| 6 | + |
| 7 | +from pydantic import BaseModel |
| 8 | + |
| 9 | +from codegate.db.connection import DbRecorder |
| 10 | +from codegate.db.models import Workspace |
| 11 | + |
| 12 | + |
| 13 | +class Folder(BaseModel): |
| 14 | + files: List[str] = [] |
| 15 | + |
| 16 | + |
| 17 | +class Repository(BaseModel): |
| 18 | + name: str |
| 19 | + folder_tree: Dict[str, Folder] |
| 20 | + |
| 21 | + |
| 22 | +class FolderRepoScanner: |
| 23 | + |
| 24 | + def __init__(self, ignore_paths: Optional[List[str]] = None): |
| 25 | + if ignore_paths is None: |
| 26 | + ignore_paths = [] |
| 27 | + self.ignore_paths = ignore_paths |
| 28 | + |
| 29 | + def _should_skip(self, path: Path): |
| 30 | + """Skip certain paths that are not relevant for scanning.""" |
| 31 | + return any(part in path.parts for part in self.ignore_paths) |
| 32 | + |
| 33 | + def _read_repository_structure(self, repo_path: Path) -> Dict[str, Folder]: |
| 34 | + folder_tree: Dict[str, Folder] = {} |
| 35 | + for path in repo_path.rglob('*'): |
| 36 | + if self._should_skip(path): |
| 37 | + continue |
| 38 | + |
| 39 | + relative_path = path.relative_to(repo_path) |
| 40 | + if path.is_dir(): |
| 41 | + folder_tree[str(relative_path)] = Folder() |
| 42 | + else: |
| 43 | + parent_dir = str(relative_path.parent) |
| 44 | + if parent_dir not in folder_tree: |
| 45 | + folder_tree[parent_dir] = Folder() |
| 46 | + folder_tree[parent_dir].files.append(path.name) |
| 47 | + return folder_tree |
| 48 | + |
| 49 | + def read(self, path_str: Union[str, Path]) -> List[Repository]: |
| 50 | + path_dir = Path(path_str) |
| 51 | + if not path_dir.is_dir(): |
| 52 | + print(f"Path {path_dir} is not a directory") |
| 53 | + return [] |
| 54 | + |
| 55 | + found_repos = [] |
| 56 | + for child_path in path_dir.rglob('*'): |
| 57 | + if child_path.is_dir() and (child_path / ".git").exists(): |
| 58 | + repo_structure = self._read_repository_structure(child_path) |
| 59 | + new_repo = Repository(name=child_path.name, folder_tree=repo_structure) |
| 60 | + found_repos.append(new_repo) |
| 61 | + print(f"Found repository at {child_path}.") |
| 62 | + |
| 63 | + return found_repos |
| 64 | + |
| 65 | +class Workspaces: |
| 66 | + |
| 67 | + def __init__(self): |
| 68 | + self._db_recorder = DbRecorder() |
| 69 | + |
| 70 | + def read_workspaces(self, path: str, ignore_paths: Optional[List[str]] = None) -> None: |
| 71 | + repos = FolderRepoScanner(ignore_paths).read(path) |
| 72 | + workspaces = [ |
| 73 | + Workspace( |
| 74 | + id=str(uuid.uuid4()), |
| 75 | + name=repo.name, |
| 76 | + folder_tree_json=json.dumps(repo.folder_tree) |
| 77 | + ) |
| 78 | + for repo in repos |
| 79 | + ] |
| 80 | + asyncio.run(self._db_recorder.record_workspaces(workspaces)) |
0 commit comments