Skip to content

Commit 70353d1

Browse files
WIP: feat: Initial code to load workspaces from a specific container path
Related: #454 This is the initial work to create workspaces when the server is initialized. The idea is that the user mounts a volume at the specific location: `/app/codegate_workspaces` and read from there the git repositories.
1 parent 09e94df commit 70353d1

File tree

7 files changed

+130
-1
lines changed

7 files changed

+130
-1
lines changed

sql/schema/schema.sql

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,21 @@
11
-- Schema for codegate database using SQLite
22

3+
-- Workspaces table
4+
CREATE TABLE workspaces (
5+
id TEXT PRIMARY KEY, -- UUID stored as TEXT
6+
name TEXT,
7+
folder_tree_json TEXT -- JSON stored as TEXT
8+
);
9+
310
-- Prompts table
411
CREATE TABLE prompts (
512
id TEXT PRIMARY KEY, -- UUID stored as TEXT
13+
workspace_id TEXT NOT NULL,
614
timestamp DATETIME NOT NULL,
715
provider TEXT, -- VARCHAR(255)
816
request TEXT NOT NULL, -- Record the full request that arrived to the server
9-
type TEXT NOT NULL -- VARCHAR(50) (e.g. "fim", "chat")
17+
type TEXT NOT NULL, -- VARCHAR(50) (e.g. "fim", "chat")
18+
FOREIGN KEY (workspace_id) REFERENCES workspaces(id),
1019
);
1120

1221
-- Outputs table
@@ -41,6 +50,7 @@ CREATE TABLE settings (
4150
);
4251

4352
-- Create indexes for foreign keys and frequently queried columns
53+
CREATE INDEX idx_prompts_workspace_id ON prompts(workspace_id);
4454
CREATE INDEX idx_outputs_prompt_id ON outputs(prompt_id);
4555
CREATE INDEX idx_alerts_prompt_id ON alerts(prompt_id);
4656
CREATE INDEX idx_prompts_timestamp ON prompts(timestamp);

src/codegate/cli.py

+2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from codegate.providers.copilot.provider import CopilotProvider
2121
from codegate.server import init_app
2222
from codegate.storage.utils import restore_storage_backup
23+
from codegate.workspaces.workspaces import Workspaces
2324

2425

2526
class UvicornServer:
@@ -318,6 +319,7 @@ def serve(
318319
else:
319320
click.echo("Existing Certificates are already present.")
320321

322+
Workspaces().read_workspaces('/app/codegate_workspaces', cfg.ignore_paths_workspaces)
321323
# Initialize secrets manager and pipeline factory
322324
secrets_manager = SecretsManager()
323325
pipeline_factory = PipelineFactory(secrets_manager)

src/codegate/config.py

+3
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ class Config:
5454
force_certs: bool = False
5555

5656
max_fim_hash_lifetime: int = 60 * 5 # Time in seconds. Default is 5 minutes.
57+
ignore_paths_workspaces = [
58+
".git", "__pycache__", ".venv", ".DS_Store", "node_modules", ".pytest_cache", ".ruff_cache"
59+
]
5760

5861
# Provider URLs with defaults
5962
provider_urls: Dict[str, str] = field(default_factory=lambda: DEFAULT_PROVIDER_URLS.copy())

src/codegate/db/connection.py

+28
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
GetPromptWithOutputsRow,
1616
Output,
1717
Prompt,
18+
Workspace,
1819
)
1920
from codegate.pipeline.base import PipelineContext
2021

@@ -252,6 +253,33 @@ async def record_context(self, context: Optional[PipelineContext]) -> None:
252253
except Exception as e:
253254
logger.error(f"Failed to record context: {context}.", error=str(e))
254255

256+
async def record_workspaces(self, workspaces: List[Workspace]) -> List[Workspace]:
257+
if not workspaces:
258+
return
259+
sql = text(
260+
"""
261+
INSERT INTO workspaces (id, name, folder_tree_json)
262+
VALUES (:id, :name, :folder_tree_json)
263+
RETURNING *
264+
"""
265+
)
266+
workspaces_tasks = []
267+
async with asyncio.TaskGroup() as tg:
268+
for workspace in workspaces:
269+
try:
270+
result = tg.create_task(self._execute_update_pydantic_model(workspace, sql))
271+
workspaces_tasks.append(result)
272+
except Exception as e:
273+
logger.error(f"Failed to record alert: {workspace}.", error=str(e))
274+
275+
recorded_workspaces = []
276+
for workspace_coro in workspaces_tasks:
277+
workspace_recorded = workspace_coro.result()
278+
if workspace_recorded:
279+
recorded_workspaces.append(workspace_recorded)
280+
281+
return recorded_workspaces
282+
255283

256284
class DbReader(DbCodeGate):
257285

src/codegate/db/models.py

+6
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ class Setting(pydantic.BaseModel):
3737
other_settings: Optional[Any]
3838

3939

40+
class Workspace(pydantic.BaseModel):
41+
id: Any
42+
name: str
43+
folder_tree_json: str
44+
45+
4046
# Models for select queries
4147

4248

src/codegate/workspaces/__init__.py

Whitespace-only changes.

src/codegate/workspaces/workspaces.py

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import asyncio
2+
import json
3+
import uuid
4+
from pathlib import Path
5+
from typing import Dict, List, Optional, Union
6+
7+
from pydantic import BaseModel
8+
9+
from codegate.db.connection import DbRecorder
10+
from codegate.db.models import Workspace
11+
12+
13+
class Folder(BaseModel):
14+
files: List[str] = []
15+
16+
17+
class Repository(BaseModel):
18+
name: str
19+
folder_tree: Dict[str, Folder]
20+
21+
22+
class FolderRepoScanner:
23+
24+
def __init__(self, ignore_paths: Optional[List[str]] = None):
25+
if ignore_paths is None:
26+
ignore_paths = []
27+
self.ignore_paths = ignore_paths
28+
29+
def _should_skip(self, path: Path):
30+
"""Skip certain paths that are not relevant for scanning."""
31+
return any(part in path.parts for part in self.ignore_paths)
32+
33+
def _read_repository_structure(self, repo_path: Path) -> Dict[str, Folder]:
34+
folder_tree: Dict[str, Folder] = {}
35+
for path in repo_path.rglob('*'):
36+
if self._should_skip(path):
37+
continue
38+
39+
relative_path = path.relative_to(repo_path)
40+
if path.is_dir():
41+
folder_tree[str(relative_path)] = Folder()
42+
else:
43+
parent_dir = str(relative_path.parent)
44+
if parent_dir not in folder_tree:
45+
folder_tree[parent_dir] = Folder()
46+
folder_tree[parent_dir].files.append(path.name)
47+
return folder_tree
48+
49+
def read(self, path_str: Union[str, Path]) -> List[Repository]:
50+
path_dir = Path(path_str)
51+
if not path_dir.is_dir():
52+
print(f"Path {path_dir} is not a directory")
53+
return []
54+
55+
found_repos = []
56+
for child_path in path_dir.rglob('*'):
57+
if child_path.is_dir() and (child_path / ".git").exists():
58+
repo_structure = self._read_repository_structure(child_path)
59+
new_repo = Repository(name=child_path.name, folder_tree=repo_structure)
60+
found_repos.append(new_repo)
61+
print(f"Found repository at {child_path}.")
62+
63+
return found_repos
64+
65+
class Workspaces:
66+
67+
def __init__(self):
68+
self._db_recorder = DbRecorder()
69+
70+
def read_workspaces(self, path: str, ignore_paths: Optional[List[str]] = None) -> None:
71+
repos = FolderRepoScanner(ignore_paths).read(path)
72+
workspaces = [
73+
Workspace(
74+
id=str(uuid.uuid4()),
75+
name=repo.name,
76+
folder_tree_json=json.dumps(repo.folder_tree)
77+
)
78+
for repo in repos
79+
]
80+
asyncio.run(self._db_recorder.record_workspaces(workspaces))

0 commit comments

Comments
 (0)