diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md new file mode 100644 index 000000000..70bf24589 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question.md @@ -0,0 +1,19 @@ +--- +name: Question +about: Ask a question about the project +title: '' +labels: 'question' +assignees: '' +--- + +## Question + +Describe your question clearly + +## Context + +Any relevant context or background information + +## What I've Tried + +Steps or research you've already done diff --git a/.github/ISSUE_TEMPLATE/todo.md b/.github/ISSUE_TEMPLATE/todo.md index 978833797..0c8c78b65 100644 --- a/.github/ISSUE_TEMPLATE/todo.md +++ b/.github/ISSUE_TEMPLATE/todo.md @@ -1,32 +1,24 @@ --- -name: TODO/Task +name: Task/Enhancement about: Track development tasks and improvements -title: '[TODO] ' -labels: 'enhancement, todo' +title: '' +labels: 'enhancement' assignees: '' --- -## 📋 Task Description +## Description + Brief description of what needs to be done -## 🎯 Acceptance Criteria +## Acceptance Criteria + - [ ] Criterion 1 - [ ] Criterion 2 -- [ ] Criterion 3 -## 🔗 Related Files/Components -- File: `path/to/file.py` -- Component: Service/Tool name +## Related Files -## 📝 Additional Context -Any additional information, screenshots, or context +- `path/to/file.py` -## 🚦 Priority -- [ ] High -- [ ] Medium -- [ ] Low +## Additional Context -## 📊 Estimated Effort -- [ ] Small (< 2 hours) -- [ ] Medium (2-8 hours) -- [ ] Large (> 8 hours) +Any additional information or context diff --git a/README.md b/README.md index 423769ae8..3b47d6dc2 100644 --- a/README.md +++ b/README.md @@ -519,7 +519,10 @@ claude mcp add --transport stdio graph-code \ | Tool | Description | |----|-----------| -| `index_repository` | Parse and ingest the repository into the Memgraph knowledge graph. This builds a comprehensive graph of functions, classes, dependencies, and relationships. | +| `list_projects` | List all indexed projects in the knowledge graph database. Returns a list of project names that have been indexed. | +| `delete_project` | Delete a specific project from the knowledge graph database. This removes all nodes associated with the project while preserving other projects. Use list_projects first to see available projects. | +| `wipe_database` | WARNING: Completely wipe the entire database, removing ALL indexed projects. This cannot be undone. Use delete_project for removing individual projects. | +| `index_repository` | Parse and ingest the repository into the Memgraph knowledge graph. This builds a comprehensive graph of functions, classes, dependencies, and relationships. Note: This preserves other projects - only the current project is re-indexed. | | `query_code_graph` | Query the codebase knowledge graph using natural language. Ask questions like 'What functions call UserService.create_user?' or 'Show me all classes that implement the Repository interface'. | | `get_code_snippet` | Retrieve source code for a function, class, or method by its qualified name. Returns the source code, file path, line numbers, and docstring. | | `surgical_replace_code` | Surgically replace an exact code block in a file using diff-match-patch. Only modifies the exact target block, leaving the rest unchanged. | diff --git a/codebase_rag/constants.py b/codebase_rag/constants.py index 623ec4a7d..8abcda648 100644 --- a/codebase_rag/constants.py +++ b/codebase_rag/constants.py @@ -169,6 +169,8 @@ class GoogleProviderType(StrEnum): KEY_FROM_VAL = "from_val" KEY_TO_VAL = "to_val" KEY_VERSION_SPEC = "version_spec" +KEY_PREFIX = "prefix" +KEY_PROJECT_NAME = "project_name" ERR_SUBSTR_ALREADY_EXISTS = "already exists" ERR_SUBSTR_CONSTRAINT = "constraint" @@ -362,6 +364,8 @@ class RelationshipType(StrEnum): CSPROJ_SUFFIX = ".csproj" # (H) Cypher queries +CYPHER_DEFAULT_LIMIT = 50 + CYPHER_QUERY_EMBEDDINGS = """ MATCH (m:Module)-[:DEFINES]->(n) WHERE n:Function OR n:Method @@ -2298,6 +2302,9 @@ class CppNodeType(StrEnum): # (H) MCP tool names class MCPToolName(StrEnum): + LIST_PROJECTS = "list_projects" + DELETE_PROJECT = "delete_project" + WIPE_DATABASE = "wipe_database" INDEX_REPOSITORY = "index_repository" QUERY_CODE_GRAPH = "query_code_graph" GET_CODE_SNIPPET = "get_code_snippet" @@ -2319,6 +2326,7 @@ class MCPSchemaType(StrEnum): OBJECT = "object" STRING = "string" INTEGER = "integer" + BOOLEAN = "boolean" # (H) MCP schema fields @@ -2332,6 +2340,8 @@ class MCPSchemaField(StrEnum): # (H) MCP parameter names class MCPParamName(StrEnum): + PROJECT_NAME = "project_name" + CONFIRM = "confirm" NATURAL_LANGUAGE_QUERY = "natural_language_query" QUALIFIED_NAME = "qualified_name" FILE_PATH = "file_path" @@ -2354,10 +2364,15 @@ class MCPParamName(StrEnum): # (H) MCP response messages MCP_INDEX_SUCCESS = "Successfully indexed repository at {path}. Knowledge graph has been updated (previous data cleared)." +MCP_INDEX_SUCCESS_PROJECT = "Successfully indexed repository at {path}. Project '{project_name}' has been updated." MCP_INDEX_ERROR = "Error indexing repository: {error}" MCP_WRITE_SUCCESS = "Successfully wrote file: {path}" MCP_UNKNOWN_TOOL_ERROR = "Unknown tool: {name}" MCP_TOOL_EXEC_ERROR = "Error executing tool '{name}': {error}" +MCP_PROJECT_DELETED = "Successfully deleted project '{project_name}'." +MCP_WIPE_CANCELLED = "Database wipe cancelled. Set confirm=true to proceed." +MCP_WIPE_SUCCESS = "Database completely wiped. All projects have been removed." +MCP_WIPE_ERROR = "Error wiping database: {error}" # (H) MCP dict keys and values MCP_KEY_RESULTS = "results" diff --git a/codebase_rag/cypher_queries.py b/codebase_rag/cypher_queries.py index 8cc225187..4afd98b2e 100644 --- a/codebase_rag/cypher_queries.py +++ b/codebase_rag/cypher_queries.py @@ -1,27 +1,53 @@ +from .constants import CYPHER_DEFAULT_LIMIT + CYPHER_DELETE_ALL = "MATCH (n) DETACH DELETE n;" -CYPHER_EXAMPLE_DECORATED_FUNCTIONS = """MATCH (n:Function|Method) +CYPHER_LIST_PROJECTS = "MATCH (p:Project) RETURN p.name AS name ORDER BY p.name" + +CYPHER_DELETE_PROJECT = """ +MATCH (p:Project {name: $project_name}) +OPTIONAL MATCH (p)-[:CONTAINS_PACKAGE|CONTAINS_FOLDER|CONTAINS_FILE|CONTAINS_MODULE*]->(container) +OPTIONAL MATCH (container)-[:DEFINES|DEFINES_METHOD*]->(defined) +DETACH DELETE p, container, defined +""" + +CYPHER_EXAMPLE_DECORATED_FUNCTIONS = f"""MATCH (n:Function|Method) WHERE ANY(d IN n.decorators WHERE toLower(d) IN ['flow', 'task']) -RETURN n.name AS name, n.qualified_name AS qualified_name, labels(n) AS type""" +RETURN n.name AS name, n.qualified_name AS qualified_name, labels(n) AS type +LIMIT {CYPHER_DEFAULT_LIMIT}""" -CYPHER_EXAMPLE_CONTENT_BY_PATH = """MATCH (n) +CYPHER_EXAMPLE_CONTENT_BY_PATH = f"""MATCH (n) WHERE n.path IS NOT NULL AND n.path STARTS WITH 'workflows' -RETURN n.name AS name, n.path AS path, labels(n) AS type""" +RETURN n.name AS name, n.path AS path, labels(n) AS type +LIMIT {CYPHER_DEFAULT_LIMIT}""" -CYPHER_EXAMPLE_KEYWORD_SEARCH = """MATCH (n) +CYPHER_EXAMPLE_KEYWORD_SEARCH = f"""MATCH (n) WHERE toLower(n.name) CONTAINS 'database' OR (n.qualified_name IS NOT NULL AND toLower(n.qualified_name) CONTAINS 'database') -RETURN n.name AS name, n.qualified_name AS qualified_name, labels(n) AS type""" +RETURN n.name AS name, n.qualified_name AS qualified_name, labels(n) AS type +LIMIT {CYPHER_DEFAULT_LIMIT}""" CYPHER_EXAMPLE_FIND_FILE = """MATCH (f:File) WHERE toLower(f.name) = 'readme.md' AND f.path = 'README.md' RETURN f.path as path, f.name as name, labels(f) as type""" -CYPHER_EXAMPLE_README = """MATCH (f:File) WHERE toLower(f.name) CONTAINS 'readme' RETURN f.path AS path, f.name AS name, labels(f) AS type""" - -CYPHER_EXAMPLE_PYTHON_FILES = """MATCH (f:File) WHERE f.extension = '.py' RETURN f.path AS path, f.name AS name, labels(f) AS type""" - -CYPHER_EXAMPLE_TASKS = """MATCH (n:Function|Method) WHERE 'task' IN n.decorators RETURN n.qualified_name AS qualified_name, n.name AS name, labels(n) AS type""" - -CYPHER_EXAMPLE_FILES_IN_FOLDER = """MATCH (f:File) WHERE f.path STARTS WITH 'services' RETURN f.path AS path, f.name AS name, labels(f) AS type""" +CYPHER_EXAMPLE_README = f"""MATCH (f:File) +WHERE toLower(f.name) CONTAINS 'readme' +RETURN f.path AS path, f.name AS name, labels(f) AS type +LIMIT {CYPHER_DEFAULT_LIMIT}""" + +CYPHER_EXAMPLE_PYTHON_FILES = f"""MATCH (f:File) +WHERE f.extension = '.py' +RETURN f.path AS path, f.name AS name, labels(f) AS type +LIMIT {CYPHER_DEFAULT_LIMIT}""" + +CYPHER_EXAMPLE_TASKS = f"""MATCH (n:Function|Method) +WHERE 'task' IN n.decorators +RETURN n.qualified_name AS qualified_name, n.name AS name, labels(n) AS type +LIMIT {CYPHER_DEFAULT_LIMIT}""" + +CYPHER_EXAMPLE_FILES_IN_FOLDER = f"""MATCH (f:File) +WHERE f.path STARTS WITH 'services' +RETURN f.path AS path, f.name AS name, labels(f) AS type +LIMIT {CYPHER_DEFAULT_LIMIT}""" CYPHER_EXAMPLE_LIMIT_ONE = """MATCH (f:File) RETURN f.path as path, f.name as name, labels(f) as type LIMIT 1""" diff --git a/codebase_rag/graph_updater.py b/codebase_rag/graph_updater.py index cfe2129d1..4592970ba 100644 --- a/codebase_rag/graph_updater.py +++ b/codebase_rag/graph_updater.py @@ -234,7 +234,7 @@ def __init__( self.repo_path = repo_path self.parsers = parsers self.queries = queries - self.project_name = repo_path.name + self.project_name = repo_path.resolve().name self.simple_name_lookup: SimpleNameLookup = defaultdict(set) self.function_registry = FunctionRegistryTrie( simple_name_lookup=self.simple_name_lookup diff --git a/codebase_rag/logs.py b/codebase_rag/logs.py index dbd0a3394..6892bd1a1 100644 --- a/codebase_rag/logs.py +++ b/codebase_rag/logs.py @@ -164,6 +164,8 @@ MG_BATCH_PARAMS_TRUNCATED = " Params (first 10 of {count}): {params}..." MG_CLEANING_DB = "--- Cleaning database... ---" MG_DB_CLEANED = "--- Database cleaned. ---" +MG_DELETING_PROJECT = "--- Deleting project: {project_name} ---" +MG_PROJECT_DELETED = "--- Project {project_name} deleted. ---" MG_ENSURING_CONSTRAINTS = "Ensuring constraints..." MG_CONSTRAINTS_DONE = "Constraints checked/created." MG_NODE_BUFFER_FLUSH = ( @@ -568,7 +570,14 @@ MCP_INDEXING_REPO = "[MCP] Indexing repository at: {path}" MCP_CLEARING_DB = "[MCP] Clearing existing database to avoid conflicts..." MCP_DB_CLEARED = "[MCP] Database cleared. Starting fresh indexing..." +MCP_CLEARING_PROJECT = "[MCP] Clearing existing data for project '{project_name}'..." MCP_ERROR_INDEXING = "[MCP] Error indexing repository: {error}" +MCP_LISTING_PROJECTS = "[MCP] Listing all projects..." +MCP_ERROR_LIST_PROJECTS = "[MCP] Error listing projects: {error}" +MCP_DELETING_PROJECT = "[MCP] Deleting project: {project_name}" +MCP_ERROR_DELETE_PROJECT = "[MCP] Error deleting project: {error}" +MCP_WIPING_DATABASE = "[MCP] Wiping entire database!" +MCP_ERROR_WIPE = "[MCP] Error wiping database: {error}" MCP_QUERY_CODE_GRAPH = "[MCP] query_code_graph: {query}" MCP_QUERY_RESULTS = "[MCP] Query returned {count} results" MCP_ERROR_QUERY = "[MCP] Error querying code graph: {error}" diff --git a/codebase_rag/mcp/tools.py b/codebase_rag/mcp/tools.py index b102a48f1..5d1d2f7f5 100644 --- a/codebase_rag/mcp/tools.py +++ b/codebase_rag/mcp/tools.py @@ -23,6 +23,12 @@ from codebase_rag.tools.file_writer import FileWriter, create_file_writer_tool from codebase_rag.types_defs import ( CodeSnippetResultDict, + DeleteProjectErrorResult, + DeleteProjectResult, + DeleteProjectSuccessResult, + ListProjectsErrorResult, + ListProjectsResult, + ListProjectsSuccessResult, MCPHandlerType, MCPInputSchema, MCPInputSchemaProperty, @@ -62,6 +68,49 @@ def __init__( ) self._tools: dict[str, ToolMetadata] = { + cs.MCPToolName.LIST_PROJECTS: ToolMetadata( + name=cs.MCPToolName.LIST_PROJECTS, + description=td.MCP_TOOLS[cs.MCPToolName.LIST_PROJECTS], + input_schema=MCPInputSchema( + type=cs.MCPSchemaType.OBJECT, + properties={}, + required=[], + ), + handler=self.list_projects, + returns_json=True, + ), + cs.MCPToolName.DELETE_PROJECT: ToolMetadata( + name=cs.MCPToolName.DELETE_PROJECT, + description=td.MCP_TOOLS[cs.MCPToolName.DELETE_PROJECT], + input_schema=MCPInputSchema( + type=cs.MCPSchemaType.OBJECT, + properties={ + cs.MCPParamName.PROJECT_NAME: MCPInputSchemaProperty( + type=cs.MCPSchemaType.STRING, + description=td.MCP_PARAM_PROJECT_NAME, + ) + }, + required=[cs.MCPParamName.PROJECT_NAME], + ), + handler=self.delete_project, + returns_json=True, + ), + cs.MCPToolName.WIPE_DATABASE: ToolMetadata( + name=cs.MCPToolName.WIPE_DATABASE, + description=td.MCP_TOOLS[cs.MCPToolName.WIPE_DATABASE], + input_schema=MCPInputSchema( + type=cs.MCPSchemaType.OBJECT, + properties={ + cs.MCPParamName.CONFIRM: MCPInputSchemaProperty( + type=cs.MCPSchemaType.BOOLEAN, + description=td.MCP_PARAM_CONFIRM, + ) + }, + required=[cs.MCPParamName.CONFIRM], + ), + handler=self.wipe_database, + returns_json=False, + ), cs.MCPToolName.INDEX_REPOSITORY: ToolMetadata( name=cs.MCPToolName.INDEX_REPOSITORY, description=td.MCP_TOOLS[cs.MCPToolName.INDEX_REPOSITORY], @@ -199,13 +248,53 @@ def __init__( ), } - async def index_repository(self) -> str: - logger.info(lg.MCP_INDEXING_REPO.format(path=self.project_root)) + async def list_projects(self) -> ListProjectsResult: + logger.info(lg.MCP_LISTING_PROJECTS) + try: + projects = self.ingestor.list_projects() + return ListProjectsSuccessResult(projects=projects, count=len(projects)) + except Exception as e: + logger.error(lg.MCP_ERROR_LIST_PROJECTS.format(error=e)) + return ListProjectsErrorResult(error=str(e), projects=[], count=0) + async def delete_project(self, project_name: str) -> DeleteProjectResult: + logger.info(lg.MCP_DELETING_PROJECT.format(project_name=project_name)) + try: + projects = self.ingestor.list_projects() + if project_name not in projects: + return DeleteProjectErrorResult( + success=False, + error=te.MCP_PROJECT_NOT_FOUND.format( + project_name=project_name, projects=projects + ), + ) + self.ingestor.delete_project(project_name) + return DeleteProjectSuccessResult( + success=True, + project=project_name, + message=cs.MCP_PROJECT_DELETED.format(project_name=project_name), + ) + except Exception as e: + logger.error(lg.MCP_ERROR_DELETE_PROJECT.format(error=e)) + return DeleteProjectErrorResult(success=False, error=str(e)) + + async def wipe_database(self, confirm: bool) -> str: + if not confirm: + return cs.MCP_WIPE_CANCELLED + logger.warning(lg.MCP_WIPING_DATABASE) try: - logger.info(lg.MCP_CLEARING_DB) self.ingestor.clean_database() - logger.info(lg.MCP_DB_CLEARED) + return cs.MCP_WIPE_SUCCESS + except Exception as e: + logger.error(lg.MCP_ERROR_WIPE.format(error=e)) + return cs.MCP_WIPE_ERROR.format(error=e) + + async def index_repository(self) -> str: + logger.info(lg.MCP_INDEXING_REPO.format(path=self.project_root)) + project_name = Path(self.project_root).resolve().name + try: + logger.info(lg.MCP_CLEARING_PROJECT.format(project_name=project_name)) + self.ingestor.delete_project(project_name) updater = GraphUpdater( ingestor=self.ingestor, @@ -215,7 +304,9 @@ async def index_repository(self) -> str: ) updater.run() - return cs.MCP_INDEX_SUCCESS.format(path=self.project_root) + return cs.MCP_INDEX_SUCCESS_PROJECT.format( + path=self.project_root, project_name=project_name + ) except Exception as e: logger.error(lg.MCP_ERROR_INDEXING.format(error=e)) return cs.MCP_INDEX_ERROR.format(error=e) diff --git a/codebase_rag/prompts.py b/codebase_rag/prompts.py index c075a4667..7dbc60bf0 100644 --- a/codebase_rag/prompts.py +++ b/codebase_rag/prompts.py @@ -133,8 +133,20 @@ def build_rag_orchestrator_prompt(tools: list["Tool"]) -> str: {GRAPH_SCHEMA_AND_RULES} -**3. Query Patterns & Examples** -Your goal is to return the `name`, `path`, and `qualified_name` of the found nodes. +**3. Query Optimization Rules** + +- **LIMIT Results**: ALWAYS add `LIMIT 50` to queries that list items. This prevents overwhelming responses. +- **Aggregation Queries**: When asked "how many", "count", or "total", return ONLY the count, not all items: + - CORRECT: `MATCH (c:Class) RETURN count(c) AS total` + - WRONG: `MATCH (c:Class) RETURN c.name, c.path, count(c) AS total` (returns all items!) +- **List vs Count**: If asked to "list" or "show", return items with LIMIT. If asked to "count" or "how many", return only the count. + +**4. Query Patterns & Examples** +When listing items, return the `name`, `path`, and `qualified_name` with a LIMIT. + +**Pattern: Counting Items** +cypher// "How many classes are there?" or "Count all functions" +MATCH (c:Class) RETURN count(c) AS total **Pattern: Finding Decorated Functions/Methods (e.g., Workflows, Tasks)** cypher// "Find all prefect flows" or "what are the workflows?" or "show me the tasks" @@ -172,9 +184,19 @@ def build_rag_orchestrator_prompt(tools: list["Tool"]) -> str: - For code nodes (`Class`, `Function`, etc.), return `n.qualified_name AS qualified_name`. 4. **KEEP IT SIMPLE**: Do not try to be clever. A simple query that returns a few relevant nodes is better than a complex one that fails. 5. **CLAUSE ORDER**: You MUST follow the standard Cypher clause order: `MATCH`, `WHERE`, `RETURN`, `LIMIT`. +6. **ALWAYS ADD LIMIT**: For queries that list items, ALWAYS add `LIMIT 50` to prevent overwhelming responses. +7. **AGGREGATION QUERIES**: When asked "how many" or "count", return ONLY the count: + - CORRECT: `MATCH (c:Class) RETURN count(c) AS total` + - WRONG: `MATCH (c:Class) RETURN c.name, count(c) AS total` (returns all items!) **Examples:** +* **Natural Language:** "How many classes are there?" +* **Cypher Query:** + ```cypher + MATCH (c:Class) RETURN count(c) AS total + ``` + * **Natural Language:** "Find the main README file" * **Cypher Query:** ```cypher diff --git a/codebase_rag/services/graph_service.py b/codebase_rag/services/graph_service.py index daba55026..967ac0063 100644 --- a/codebase_rag/services/graph_service.py +++ b/codebase_rag/services/graph_service.py @@ -15,6 +15,8 @@ ERR_SUBSTR_CONSTRAINT, KEY_CREATED, KEY_FROM_VAL, + KEY_NAME, + KEY_PROJECT_NAME, KEY_PROPS, KEY_TO_VAL, NODE_UNIQUE_CONSTRAINTS, @@ -22,8 +24,10 @@ ) from ..cypher_queries import ( CYPHER_DELETE_ALL, + CYPHER_DELETE_PROJECT, CYPHER_EXPORT_NODES, CYPHER_EXPORT_RELATIONSHIPS, + CYPHER_LIST_PROJECTS, build_constraint_query, build_merge_node_query, build_merge_relationship_query, @@ -164,6 +168,15 @@ def clean_database(self) -> None: self._execute_query(CYPHER_DELETE_ALL) logger.info(ls.MG_DB_CLEANED) + def list_projects(self) -> list[str]: + result = self.fetch_all(CYPHER_LIST_PROJECTS) + return [str(r[KEY_NAME]) for r in result] + + def delete_project(self, project_name: str) -> None: + logger.info(ls.MG_DELETING_PROJECT.format(project_name=project_name)) + self._execute_query(CYPHER_DELETE_PROJECT, {KEY_PROJECT_NAME: project_name}) + logger.info(ls.MG_PROJECT_DELETED.format(project_name=project_name)) + def ensure_constraints(self) -> None: logger.info(ls.MG_ENSURING_CONSTRAINTS) for label, prop in NODE_UNIQUE_CONSTRAINTS.items(): diff --git a/codebase_rag/tests/integration/test_multi_project_integration.py b/codebase_rag/tests/integration/test_multi_project_integration.py new file mode 100644 index 000000000..663bf9666 --- /dev/null +++ b/codebase_rag/tests/integration/test_multi_project_integration.py @@ -0,0 +1,219 @@ +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +import pytest + +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers + +if TYPE_CHECKING: + from codebase_rag.services.graph_service import MemgraphIngestor + +pytestmark = [pytest.mark.integration] + + +@pytest.fixture +def project1_path(tmp_path: Path) -> Path: + project = tmp_path / "project1" + project.mkdir() + (project / "main.py").write_text( + """def hello(): + return "Hello from project1" + +class Service: + def run(self): + return hello() +""", + encoding="utf-8", + ) + return project + + +@pytest.fixture +def project2_path(tmp_path: Path) -> Path: + project = tmp_path / "project2" + project.mkdir() + (project / "app.py").write_text( + """def greet(): + return "Hello from project2" + +class Handler: + def handle(self): + return greet() +""", + encoding="utf-8", + ) + return project + + +def index_project(ingestor: MemgraphIngestor, project_path: Path) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=ingestor, + repo_path=project_path, + parsers=parsers, + queries=queries, + ) + updater.run() + + +class TestListProjects: + def test_list_projects_empty_database( + self, memgraph_ingestor: MemgraphIngestor + ) -> None: + result = memgraph_ingestor.list_projects() + + assert result == [] + + def test_list_projects_after_indexing( + self, memgraph_ingestor: MemgraphIngestor, project1_path: Path + ) -> None: + index_project(memgraph_ingestor, project1_path) + + result = memgraph_ingestor.list_projects() + + assert result == ["project1"] + + def test_list_projects_multiple( + self, + memgraph_ingestor: MemgraphIngestor, + project1_path: Path, + project2_path: Path, + ) -> None: + index_project(memgraph_ingestor, project1_path) + index_project(memgraph_ingestor, project2_path) + + result = memgraph_ingestor.list_projects() + + assert sorted(result) == ["project1", "project2"] + + +class TestDeleteProject: + def test_delete_project_removes_all_project_nodes( + self, memgraph_ingestor: MemgraphIngestor, project1_path: Path + ) -> None: + index_project(memgraph_ingestor, project1_path) + assert memgraph_ingestor.list_projects() == ["project1"] + + memgraph_ingestor.delete_project("project1") + + assert memgraph_ingestor.list_projects() == [] + nodes = memgraph_ingestor.fetch_all("MATCH (n) RETURN count(n) AS count") + assert nodes[0]["count"] == 0 + + def test_delete_project_preserves_other_projects( + self, + memgraph_ingestor: MemgraphIngestor, + project1_path: Path, + project2_path: Path, + ) -> None: + index_project(memgraph_ingestor, project1_path) + index_project(memgraph_ingestor, project2_path) + assert sorted(memgraph_ingestor.list_projects()) == ["project1", "project2"] + + memgraph_ingestor.delete_project("project1") + + assert memgraph_ingestor.list_projects() == ["project2"] + + project2_nodes = memgraph_ingestor.fetch_all( + "MATCH (n) WHERE n.qualified_name STARTS WITH 'project2.' RETURN count(n) AS count" + ) + assert project2_nodes[0]["count"] > 0 + + def test_delete_project_removes_files_and_folders( + self, memgraph_ingestor: MemgraphIngestor, project1_path: Path + ) -> None: + index_project(memgraph_ingestor, project1_path) + + files_before = memgraph_ingestor.fetch_all( + "MATCH (f:File) RETURN count(f) AS count" + ) + assert files_before[0]["count"] > 0 + + memgraph_ingestor.delete_project("project1") + + files_after = memgraph_ingestor.fetch_all( + "MATCH (f:File) RETURN count(f) AS count" + ) + assert files_after[0]["count"] == 0 + + def test_delete_nonexistent_project_no_error( + self, memgraph_ingestor: MemgraphIngestor + ) -> None: + memgraph_ingestor.delete_project("nonexistent") + + assert memgraph_ingestor.list_projects() == [] + + +class TestMultiProjectIsolation: + def test_reindex_only_affects_target_project( + self, + memgraph_ingestor: MemgraphIngestor, + project1_path: Path, + project2_path: Path, + ) -> None: + index_project(memgraph_ingestor, project1_path) + index_project(memgraph_ingestor, project2_path) + + project2_functions_before = memgraph_ingestor.fetch_all( + "MATCH (f:Function) WHERE f.qualified_name STARTS WITH 'project2.' " + "RETURN f.qualified_name AS name" + ) + + memgraph_ingestor.delete_project("project1") + index_project(memgraph_ingestor, project1_path) + + project2_functions_after = memgraph_ingestor.fetch_all( + "MATCH (f:Function) WHERE f.qualified_name STARTS WITH 'project2.' " + "RETURN f.qualified_name AS name" + ) + + assert sorted([f["name"] for f in project2_functions_before]) == sorted( + [f["name"] for f in project2_functions_after] + ) + + def test_projects_have_separate_namespaces( + self, + memgraph_ingestor: MemgraphIngestor, + project1_path: Path, + project2_path: Path, + ) -> None: + index_project(memgraph_ingestor, project1_path) + index_project(memgraph_ingestor, project2_path) + + project1_functions = memgraph_ingestor.fetch_all( + "MATCH (f:Function) WHERE f.qualified_name STARTS WITH 'project1.' " + "RETURN f.name AS name" + ) + project2_functions = memgraph_ingestor.fetch_all( + "MATCH (f:Function) WHERE f.qualified_name STARTS WITH 'project2.' " + "RETURN f.name AS name" + ) + + p1_names = {f["name"] for f in project1_functions} + p2_names = {f["name"] for f in project2_functions} + + assert "hello" in p1_names + assert "greet" in p2_names + assert "hello" not in p2_names + assert "greet" not in p1_names + + +class TestCleanDatabase: + def test_clean_database_removes_all_projects( + self, + memgraph_ingestor: MemgraphIngestor, + project1_path: Path, + project2_path: Path, + ) -> None: + index_project(memgraph_ingestor, project1_path) + index_project(memgraph_ingestor, project2_path) + assert len(memgraph_ingestor.list_projects()) == 2 + + memgraph_ingestor.clean_database() + + assert memgraph_ingestor.list_projects() == [] + nodes = memgraph_ingestor.fetch_all("MATCH (n) RETURN count(n) AS count") + assert nodes[0]["count"] == 0 diff --git a/codebase_rag/tests/test_mcp_query_and_index.py b/codebase_rag/tests/test_mcp_query_and_index.py index e831f8a0c..ce9a5ffcd 100644 --- a/codebase_rag/tests/test_mcp_query_and_index.py +++ b/codebase_rag/tests/test_mcp_query_and_index.py @@ -290,10 +290,10 @@ async def test_index_repository_multiple_times( assert mock_updater.run.call_count == 2 - async def test_index_repository_clears_database_first( + async def test_index_repository_clears_project_data_first( self, mcp_registry: MCPToolsRegistry, temp_project_root: Path ) -> None: - """Test that database is cleared before indexing.""" + """Test that project data is cleared before indexing.""" with patch("codebase_rag.mcp.tools.GraphUpdater") as mock_updater_class: mock_updater = MagicMock() mock_updater.run.return_value = None @@ -301,23 +301,23 @@ async def test_index_repository_clears_database_first( result = await mcp_registry.index_repository() - mcp_registry.ingestor.clean_database.assert_called_once() # type: ignore[attr-defined] + project_name = temp_project_root.resolve().name + mcp_registry.ingestor.delete_project.assert_called_once_with(project_name) # type: ignore[attr-defined] assert "Error:" not in result - assert "cleared" in result.lower() or "previous data" in result.lower() - async def test_index_repository_clears_before_updater_runs( + async def test_index_repository_deletes_project_before_updater_runs( self, mcp_registry: MCPToolsRegistry, temp_project_root: Path ) -> None: - """Test that database clearing happens before GraphUpdater runs.""" + """Test that project deletion happens before GraphUpdater runs.""" call_order: list[str] = [] - def mock_clean() -> None: - call_order.append("clean") + def mock_delete(project_name: str) -> None: + call_order.append("delete") def mock_run() -> None: call_order.append("run") - mcp_registry.ingestor.clean_database = MagicMock(side_effect=mock_clean) # type: ignore[method-assign] + mcp_registry.ingestor.delete_project = MagicMock(side_effect=mock_delete) # type: ignore[method-assign] with patch("codebase_rag.mcp.tools.GraphUpdater") as mock_updater_class: mock_updater = MagicMock() @@ -326,12 +326,11 @@ def mock_run() -> None: await mcp_registry.index_repository() - assert call_order == ["clean", "run"] + assert call_order == ["delete", "run"] - async def test_sequential_index_clears_previous_repo_data( + async def test_sequential_index_only_clears_own_project_data( self, tmp_path: Path ) -> None: - """Test that indexing a second repository clears the first repository's data.""" mock_ingestor = MagicMock() mock_cypher = MagicMock() @@ -357,10 +356,12 @@ async def test_sequential_index_clears_previous_repo_data( mock_updater_class.return_value = mock_updater await registry1.index_repository() - assert mock_ingestor.clean_database.call_count == 1 + mock_ingestor.delete_project.assert_called_with("project1") await registry2.index_repository() - assert mock_ingestor.clean_database.call_count == 2 + mock_ingestor.delete_project.assert_called_with("project2") + + assert mock_ingestor.delete_project.call_count == 2 class TestQueryAndIndexIntegration: @@ -419,3 +420,89 @@ async def test_index_and_query_workflow( ) result = await mcp_registry.query_code_graph("Find all classes") assert len(result["results"]) == 1 + + +class TestListProjects: + async def test_list_projects_success(self, mcp_registry: MCPToolsRegistry) -> None: + mcp_registry.ingestor.list_projects.return_value = ["project1", "project2"] # type: ignore[attr-defined] + + result = await mcp_registry.list_projects() + + assert result["projects"] == ["project1", "project2"] + assert result["count"] == 2 + assert "error" not in result + + async def test_list_projects_empty(self, mcp_registry: MCPToolsRegistry) -> None: + mcp_registry.ingestor.list_projects.return_value = [] # type: ignore[attr-defined] + + result = await mcp_registry.list_projects() + + assert result["projects"] == [] + assert result["count"] == 0 + + async def test_list_projects_error(self, mcp_registry: MCPToolsRegistry) -> None: + mcp_registry.ingestor.list_projects.side_effect = Exception("DB error") # type: ignore[attr-defined] + + result = await mcp_registry.list_projects() + + assert "error" in result + assert result["projects"] == [] + assert result["count"] == 0 + + +class TestDeleteProject: + async def test_delete_project_success(self, mcp_registry: MCPToolsRegistry) -> None: + mcp_registry.ingestor.list_projects.return_value = ["my-project", "other"] # type: ignore[attr-defined] + + result = await mcp_registry.delete_project("my-project") + + assert result["success"] is True + assert result["project"] == "my-project" + assert "message" in result + mcp_registry.ingestor.delete_project.assert_called_once_with("my-project") # type: ignore[attr-defined] + + async def test_delete_project_not_found( + self, mcp_registry: MCPToolsRegistry + ) -> None: + mcp_registry.ingestor.list_projects.return_value = ["other-project"] # type: ignore[attr-defined] + + result = await mcp_registry.delete_project("nonexistent") + + assert result["success"] is False + assert "error" in result + assert "not found" in result["error"].lower() + mcp_registry.ingestor.delete_project.assert_not_called() # type: ignore[attr-defined] + + async def test_delete_project_error(self, mcp_registry: MCPToolsRegistry) -> None: + mcp_registry.ingestor.list_projects.return_value = ["my-project"] # type: ignore[attr-defined] + mcp_registry.ingestor.delete_project.side_effect = Exception("Delete failed") # type: ignore[attr-defined] + + result = await mcp_registry.delete_project("my-project") + + assert result["success"] is False + assert "error" in result + + +class TestWipeDatabase: + async def test_wipe_database_confirmed( + self, mcp_registry: MCPToolsRegistry + ) -> None: + result = await mcp_registry.wipe_database(confirm=True) + + assert "wiped" in result.lower() + mcp_registry.ingestor.clean_database.assert_called_once() # type: ignore[attr-defined] + + async def test_wipe_database_not_confirmed( + self, mcp_registry: MCPToolsRegistry + ) -> None: + result = await mcp_registry.wipe_database(confirm=False) + + assert "cancelled" in result.lower() + mcp_registry.ingestor.clean_database.assert_not_called() # type: ignore[attr-defined] + + async def test_wipe_database_error(self, mcp_registry: MCPToolsRegistry) -> None: + mcp_registry.ingestor.clean_database.side_effect = Exception("Wipe failed") # type: ignore[attr-defined] + + result = await mcp_registry.wipe_database(confirm=True) + + assert "error" in result.lower() diff --git a/codebase_rag/tool_errors.py b/codebase_rag/tool_errors.py index 5b3a475c2..25540a976 100644 --- a/codebase_rag/tool_errors.py +++ b/codebase_rag/tool_errors.py @@ -63,6 +63,9 @@ MCP_INVALID_RESPONSE = "Code snippet tool returned an invalid response" MCP_PATH_NOT_EXISTS = "Target repository path does not exist: {path}" MCP_PATH_NOT_DIR = "Target repository path is not a directory: {path}" +MCP_PROJECT_NOT_FOUND = ( + "Project '{project_name}' not found. Available projects: {projects}" +) # (H) CLI validation errors INVALID_POSITIVE_INT = "{value!r} is not a valid positive integer" diff --git a/codebase_rag/tools/tool_descriptions.py b/codebase_rag/tools/tool_descriptions.py index c80eb76a7..008c60bef 100644 --- a/codebase_rag/tools/tool_descriptions.py +++ b/codebase_rag/tools/tool_descriptions.py @@ -71,9 +71,26 @@ class AgenticToolName(StrEnum): ) # (H) MCP tool descriptions +MCP_LIST_PROJECTS = ( + "List all indexed projects in the knowledge graph database. " + "Returns a list of project names that have been indexed." +) + +MCP_DELETE_PROJECT = ( + "Delete a specific project from the knowledge graph database. " + "This removes all nodes associated with the project while preserving other projects. " + "Use list_projects first to see available projects." +) + +MCP_WIPE_DATABASE = ( + "WARNING: Completely wipe the entire database, removing ALL indexed projects. " + "This cannot be undone. Use delete_project for removing individual projects." +) + MCP_INDEX_REPOSITORY = ( "Parse and ingest the repository into the Memgraph knowledge graph. " - "This builds a comprehensive graph of functions, classes, dependencies, and relationships." + "This builds a comprehensive graph of functions, classes, dependencies, and relationships. " + "Note: This preserves other projects - only the current project is re-indexed." ) MCP_QUERY_CODE_GRAPH = ( @@ -100,6 +117,8 @@ class AgenticToolName(StrEnum): MCP_LIST_DIRECTORY = "List contents of a directory in the project." +MCP_PARAM_PROJECT_NAME = "Name of the project to delete (e.g., 'my-project')" +MCP_PARAM_CONFIRM = "Must be true to confirm the wipe operation" MCP_PARAM_NATURAL_LANGUAGE_QUERY = "Your question in plain English about the codebase" MCP_PARAM_QUALIFIED_NAME = ( "Fully qualified name (e.g., 'app.services.UserService.create_user')" @@ -114,6 +133,9 @@ class AgenticToolName(StrEnum): MCP_TOOLS: dict[MCPToolName, str] = { + MCPToolName.LIST_PROJECTS: MCP_LIST_PROJECTS, + MCPToolName.DELETE_PROJECT: MCP_DELETE_PROJECT, + MCPToolName.WIPE_DATABASE: MCP_WIPE_DATABASE, MCPToolName.INDEX_REPOSITORY: MCP_INDEX_REPOSITORY, MCPToolName.QUERY_CODE_GRAPH: MCP_QUERY_CODE_GRAPH, MCPToolName.GET_CODE_SNIPPET: MCP_GET_CODE_SNIPPET, diff --git a/codebase_rag/types_defs.py b/codebase_rag/types_defs.py index ddfebcc59..bc252d0c4 100644 --- a/codebase_rag/types_defs.py +++ b/codebase_rag/types_defs.py @@ -383,7 +383,41 @@ class CodeSnippetResultDict(TypedDict, total=False): error: str -MCPResultType = str | QueryResultDict | CodeSnippetResultDict +class ListProjectsSuccessResult(TypedDict): + projects: list[str] + count: int + + +class ListProjectsErrorResult(TypedDict): + projects: list[str] + count: int + error: str + + +ListProjectsResult = ListProjectsSuccessResult | ListProjectsErrorResult + + +class DeleteProjectSuccessResult(TypedDict): + success: bool + project: str + message: str + + +class DeleteProjectErrorResult(TypedDict): + success: bool + error: str + + +DeleteProjectResult = DeleteProjectSuccessResult | DeleteProjectErrorResult + + +MCPResultType = ( + str + | QueryResultDict + | CodeSnippetResultDict + | ListProjectsResult + | DeleteProjectResult +) MCPHandlerType = Callable[..., Awaitable[MCPResultType]]