diff --git a/README.md b/README.md index 423769ae8..b805dbd61 100644 --- a/README.md +++ b/README.md @@ -526,6 +526,9 @@ claude mcp add --transport stdio graph-code \ | `read_file` | Read the contents of a file from the project. Supports pagination for large files. | | `write_file` | Write content to a file, creating it if it doesn't exist. | | `list_directory` | List contents of a directory in the project. | +| `list_projects` | List all indexed projects in the knowledge graph database. Returns a list of project names that have been indexed. | +| `delete_project` | Delete a specific project from the knowledge graph database. This removes all nodes associated with the project while preserving other projects. Use list_projects first to see available projects. | +| `wipe_database` | WARNING: Completely wipe the entire database, removing ALL indexed projects. This cannot be undone. Use delete_project for removing individual projects. | ### Example Usage diff --git a/codebase_rag/constants.py b/codebase_rag/constants.py index f0d70de9b..b75ebb759 100644 --- a/codebase_rag/constants.py +++ b/codebase_rag/constants.py @@ -2301,6 +2301,9 @@ class MCPToolName(StrEnum): READ_FILE = "read_file" WRITE_FILE = "write_file" LIST_DIRECTORY = "list_directory" + LIST_PROJECTS = "list_projects" + DELETE_PROJECT = "delete_project" + WIPE_DATABASE = "wipe_database" # (H) MCP environment variables @@ -2315,6 +2318,7 @@ class MCPSchemaType(StrEnum): OBJECT = "object" STRING = "string" INTEGER = "integer" + BOOLEAN = "boolean" # (H) MCP schema fields @@ -2337,6 +2341,8 @@ class MCPParamName(StrEnum): LIMIT = "limit" CONTENT = "content" DIRECTORY_PATH = "directory_path" + PROJECT_NAME = "project_name" + CONFIRM = "confirm" # (H) MCP server constants diff --git a/codebase_rag/logs.py b/codebase_rag/logs.py index 246c1f77d..0aca23881 100644 --- a/codebase_rag/logs.py +++ b/codebase_rag/logs.py @@ -162,6 +162,9 @@ MG_BATCH_PARAMS_TRUNCATED = " Params (first 10 of {count}): {params}..." MG_CLEANING_DB = "--- Cleaning database... ---" MG_DB_CLEANED = "--- Database cleaned. ---" +MG_LISTING_PROJECTS = "--- Listing indexed projects... ---" +MG_DELETING_PROJECT = "--- Deleting project: {name} ---" +MG_PROJECT_DELETED = "--- Project {name} deleted. ---" MG_ENSURING_CONSTRAINTS = "Ensuring constraints..." MG_CONSTRAINTS_DONE = "Constraints checked/created." MG_NODE_BUFFER_FLUSH = ( @@ -580,6 +583,24 @@ MCP_ERROR_WRITE = "[MCP] Error writing file: {error}" MCP_LIST_DIR = "[MCP] list_directory: {path}" MCP_ERROR_LIST_DIR = "[MCP] Error listing directory: {error}" +MCP_LIST_PROJECTS = "[MCP] Listing all indexed projects..." +MCP_LIST_PROJECTS_RESULT = "[MCP] Found {count} indexed projects" +MCP_ERROR_LIST_PROJECTS = "[MCP] Error listing projects: {error}" +MCP_DELETE_PROJECT = "[MCP] Deleting project: {name}" +MCP_DELETE_PROJECT_SUCCESS = "[MCP] Successfully deleted project: {name}" +MCP_DELETE_PROJECT_NOT_FOUND = "[MCP] Project not found: {name}" +MCP_ERROR_DELETE_PROJECT = "[MCP] Error deleting project: {error}" +MCP_WIPE_DB = "[MCP] Wiping entire database!" +MCP_WIPE_DB_CANCELLED = "[MCP] Database wipe cancelled (confirm=false)" +MCP_ERROR_WIPE_DB = "[MCP] Error wiping database: {error}" +MCP_CLEARING_PROJECT = "[MCP] Clearing existing data for project '{name}'..." +MCP_INDEX_PROJECT_SUCCESS = "[MCP] Successfully indexed project '{name}'" +MCP_SCOPE_VIOLATION = ( + "[MCP] Scope violation: attempted to access '{target}' from project '{current}'" +) +MCP_WIPE_DISABLED = ( + "[MCP] wipe_database is disabled in scoped mode. Use delete_project instead." +) # (H) MCP server logs MCP_SERVER_INFERRED_ROOT = "[GraphCode MCP] Using inferred project root: {path}" diff --git a/codebase_rag/mcp/tools.py b/codebase_rag/mcp/tools.py index b102a48f1..f7c31eacf 100644 --- a/codebase_rag/mcp/tools.py +++ b/codebase_rag/mcp/tools.py @@ -23,6 +23,8 @@ from codebase_rag.tools.file_writer import FileWriter, create_file_writer_tool from codebase_rag.types_defs import ( CodeSnippetResultDict, + DeleteProjectResultDict, + ListProjectsResultDict, MCPHandlerType, MCPInputSchema, MCPInputSchemaProperty, @@ -37,10 +39,13 @@ def __init__( project_root: str, ingestor: MemgraphIngestor, cypher_gen: CypherGenerator, + enforce_scope: bool = True, ) -> None: self.project_root = project_root self.ingestor = ingestor self.cypher_gen = cypher_gen + self.enforce_scope = enforce_scope + self.current_project = Path(project_root).resolve().name self.parsers, self.queries = load_parsers() @@ -197,15 +202,57 @@ def __init__( handler=self.list_directory, returns_json=False, ), + cs.MCPToolName.LIST_PROJECTS: ToolMetadata( + name=cs.MCPToolName.LIST_PROJECTS, + description=td.MCP_TOOLS[cs.MCPToolName.LIST_PROJECTS], + input_schema=MCPInputSchema( + type=cs.MCPSchemaType.OBJECT, + properties={}, + required=[], + ), + handler=self.list_projects, + returns_json=True, + ), + cs.MCPToolName.DELETE_PROJECT: ToolMetadata( + name=cs.MCPToolName.DELETE_PROJECT, + description=td.MCP_TOOLS[cs.MCPToolName.DELETE_PROJECT], + input_schema=MCPInputSchema( + type=cs.MCPSchemaType.OBJECT, + properties={ + cs.MCPParamName.PROJECT_NAME: MCPInputSchemaProperty( + type=cs.MCPSchemaType.STRING, + description=td.MCP_PARAM_PROJECT_NAME, + ) + }, + required=[cs.MCPParamName.PROJECT_NAME], + ), + handler=self.delete_project, + returns_json=True, + ), + cs.MCPToolName.WIPE_DATABASE: ToolMetadata( + name=cs.MCPToolName.WIPE_DATABASE, + description=td.MCP_TOOLS[cs.MCPToolName.WIPE_DATABASE], + input_schema=MCPInputSchema( + type=cs.MCPSchemaType.OBJECT, + properties={ + cs.MCPParamName.CONFIRM: MCPInputSchemaProperty( + type=cs.MCPSchemaType.BOOLEAN, + description=td.MCP_PARAM_CONFIRM, + ) + }, + required=[cs.MCPParamName.CONFIRM], + ), + handler=self.wipe_database, + returns_json=False, + ), } async def index_repository(self) -> str: logger.info(lg.MCP_INDEXING_REPO.format(path=self.project_root)) try: - logger.info(lg.MCP_CLEARING_DB) - self.ingestor.clean_database() - logger.info(lg.MCP_DB_CLEARED) + logger.info(lg.MCP_CLEARING_PROJECT.format(name=self.current_project)) + self.ingestor.delete_project(self.current_project) updater = GraphUpdater( ingestor=self.ingestor, @@ -215,7 +262,7 @@ async def index_repository(self) -> str: ) updater.run() - return cs.MCP_INDEX_SUCCESS.format(path=self.project_root) + return lg.MCP_INDEX_PROJECT_SUCCESS.format(name=self.current_project) except Exception as e: logger.error(lg.MCP_ERROR_INDEXING.format(error=e)) return cs.MCP_INDEX_ERROR.format(error=e) @@ -339,6 +386,72 @@ async def list_directory( logger.error(lg.MCP_ERROR_LIST_DIR.format(error=e)) return te.ERROR_WRAPPER.format(message=e) + async def list_projects(self) -> ListProjectsResultDict: + logger.info(lg.MCP_LIST_PROJECTS) + try: + all_projects = self.ingestor.list_projects() + if self.enforce_scope: + projects = [p for p in all_projects if p == self.current_project] + else: + projects = all_projects + logger.info(lg.MCP_LIST_PROJECTS_RESULT.format(count=len(projects))) + return ListProjectsResultDict(projects=projects, count=len(projects)) + except Exception as e: + logger.error(lg.MCP_ERROR_LIST_PROJECTS.format(error=e)) + return ListProjectsResultDict(error=str(e), projects=[], count=0) + + async def delete_project(self, project_name: str) -> DeleteProjectResultDict: + logger.info(lg.MCP_DELETE_PROJECT.format(name=project_name)) + try: + if self.enforce_scope and project_name != self.current_project: + logger.warning( + lg.MCP_SCOPE_VIOLATION.format( + target=project_name, current=self.current_project + ) + ) + return DeleteProjectResultDict( + success=False, + error=f"Scope violation: can only delete '{self.current_project}', " + f"not '{project_name}'.", + ) + projects = self.ingestor.list_projects() + if project_name not in projects: + logger.warning( + lg.MCP_DELETE_PROJECT_NOT_FOUND.format(name=project_name) + ) + return DeleteProjectResultDict( + success=False, + error=f"Project '{project_name}' not found.", + ) + self.ingestor.delete_project(project_name) + logger.info(lg.MCP_DELETE_PROJECT_SUCCESS.format(name=project_name)) + return DeleteProjectResultDict( + success=True, + project=project_name, + message=f"Successfully deleted project '{project_name}'.", + ) + except Exception as e: + logger.error(lg.MCP_ERROR_DELETE_PROJECT.format(error=e)) + return DeleteProjectResultDict(success=False, error=str(e)) + + async def wipe_database(self, confirm: bool) -> str: + if self.enforce_scope: + logger.warning(lg.MCP_WIPE_DISABLED) + return ( + "wipe_database is disabled in scoped mode to protect other projects. " + f"Use delete_project('{self.current_project}') to remove this project only." + ) + if not confirm: + logger.info(lg.MCP_WIPE_DB_CANCELLED) + return "Database wipe cancelled. Set confirm=true to proceed." + logger.warning(lg.MCP_WIPE_DB) + try: + self.ingestor.clean_database() + return "Database completely wiped. All projects have been removed." + except Exception as e: + logger.error(lg.MCP_ERROR_WIPE_DB.format(error=e)) + return te.ERROR_WRAPPER.format(message=e) + def get_tool_schemas(self) -> list[MCPToolSchema]: return [ MCPToolSchema( diff --git a/codebase_rag/services/graph_service.py b/codebase_rag/services/graph_service.py index daba55026..eda0049ed 100644 --- a/codebase_rag/services/graph_service.py +++ b/codebase_rag/services/graph_service.py @@ -164,6 +164,28 @@ def clean_database(self) -> None: self._execute_query(CYPHER_DELETE_ALL) logger.info(ls.MG_DB_CLEANED) + def list_projects(self) -> list[str]: + logger.info(ls.MG_LISTING_PROJECTS) + result = self.fetch_all( + "MATCH (p:Project) RETURN p.name AS name ORDER BY p.name" + ) + return [str(r["name"]) for r in result if r.get("name")] + + def delete_project(self, project_name: str) -> None: + logger.info(ls.MG_DELETING_PROJECT.format(name=project_name)) + # Use graph traversal to delete all nodes connected to the project. + # This catches File/Folder nodes (which use 'path' not 'qualified_name') + # as well as all other nodes reachable from the Project. + self._execute_query( + """ + MATCH (p:Project {name: $project_name}) + OPTIONAL MATCH (p)-[*]->(connected) + DETACH DELETE p, connected + """, + {"project_name": project_name}, + ) + logger.info(ls.MG_PROJECT_DELETED.format(name=project_name)) + def ensure_constraints(self) -> None: logger.info(ls.MG_ENSURING_CONSTRAINTS) for label, prop in NODE_UNIQUE_CONSTRAINTS.items(): diff --git a/codebase_rag/tests/test_mcp_query_and_index.py b/codebase_rag/tests/test_mcp_query_and_index.py index e831f8a0c..8914193b6 100644 --- a/codebase_rag/tests/test_mcp_query_and_index.py +++ b/codebase_rag/tests/test_mcp_query_and_index.py @@ -219,7 +219,7 @@ async def test_index_repository_success( assert "Error:" not in result assert "Success" in result or "indexed" in result.lower() - assert str(temp_project_root) in result + assert temp_project_root.name in result mock_updater.run.assert_called_once() async def test_index_repository_creates_graph_updater( @@ -293,7 +293,7 @@ async def test_index_repository_multiple_times( async def test_index_repository_clears_database_first( self, mcp_registry: MCPToolsRegistry, temp_project_root: Path ) -> None: - """Test that database is cleared before indexing.""" + """Test that project data is cleared before indexing.""" with patch("codebase_rag.mcp.tools.GraphUpdater") as mock_updater_class: mock_updater = MagicMock() mock_updater.run.return_value = None @@ -301,23 +301,24 @@ async def test_index_repository_clears_database_first( result = await mcp_registry.index_repository() - mcp_registry.ingestor.clean_database.assert_called_once() # type: ignore[attr-defined] + mcp_registry.ingestor.delete_project.assert_called_once_with( # type: ignore[attr-defined] + mcp_registry.current_project + ) assert "Error:" not in result - assert "cleared" in result.lower() or "previous data" in result.lower() async def test_index_repository_clears_before_updater_runs( self, mcp_registry: MCPToolsRegistry, temp_project_root: Path ) -> None: - """Test that database clearing happens before GraphUpdater runs.""" + """Test that project deletion happens before GraphUpdater runs.""" call_order: list[str] = [] - def mock_clean() -> None: - call_order.append("clean") + def mock_delete(project_name: str) -> None: + call_order.append("delete") def mock_run() -> None: call_order.append("run") - mcp_registry.ingestor.clean_database = MagicMock(side_effect=mock_clean) # type: ignore[method-assign] + mcp_registry.ingestor.delete_project = MagicMock(side_effect=mock_delete) # type: ignore[method-assign] with patch("codebase_rag.mcp.tools.GraphUpdater") as mock_updater_class: mock_updater = MagicMock() @@ -326,12 +327,12 @@ def mock_run() -> None: await mcp_registry.index_repository() - assert call_order == ["clean", "run"] + assert call_order == ["delete", "run"] async def test_sequential_index_clears_previous_repo_data( self, tmp_path: Path ) -> None: - """Test that indexing a second repository clears the first repository's data.""" + """Test that indexing repositories deletes only their own project data.""" mock_ingestor = MagicMock() mock_cypher = MagicMock() @@ -357,10 +358,12 @@ async def test_sequential_index_clears_previous_repo_data( mock_updater_class.return_value = mock_updater await registry1.index_repository() - assert mock_ingestor.clean_database.call_count == 1 + assert mock_ingestor.delete_project.call_count == 1 + mock_ingestor.delete_project.assert_called_with("project1") await registry2.index_repository() - assert mock_ingestor.clean_database.call_count == 2 + assert mock_ingestor.delete_project.call_count == 2 + mock_ingestor.delete_project.assert_called_with("project2") class TestQueryAndIndexIntegration: diff --git a/codebase_rag/tools/tool_descriptions.py b/codebase_rag/tools/tool_descriptions.py index c80eb76a7..15c7055bc 100644 --- a/codebase_rag/tools/tool_descriptions.py +++ b/codebase_rag/tools/tool_descriptions.py @@ -100,6 +100,22 @@ class AgenticToolName(StrEnum): MCP_LIST_DIRECTORY = "List contents of a directory in the project." +MCP_LIST_PROJECTS = ( + "List all indexed projects in the knowledge graph database. " + "Returns a list of project names that have been indexed." +) + +MCP_DELETE_PROJECT = ( + "Delete a specific project from the knowledge graph database. " + "This removes all nodes associated with the project while preserving other projects. " + "Use list_projects first to see available projects." +) + +MCP_WIPE_DATABASE = ( + "WARNING: Completely wipe the entire database, removing ALL indexed projects. " + "This cannot be undone. Use delete_project for removing individual projects." +) + MCP_PARAM_NATURAL_LANGUAGE_QUERY = "Your question in plain English about the codebase" MCP_PARAM_QUALIFIED_NAME = ( "Fully qualified name (e.g., 'app.services.UserService.create_user')" @@ -111,6 +127,8 @@ class AgenticToolName(StrEnum): MCP_PARAM_LIMIT = "Maximum number of lines to read (optional)" MCP_PARAM_CONTENT = "Content to write to the file" MCP_PARAM_DIRECTORY_PATH = "Relative path to directory from project root (default: '.')" +MCP_PARAM_PROJECT_NAME = "Name of the project to delete (e.g., 'my-project')" +MCP_PARAM_CONFIRM = "Must be true to confirm the wipe operation" MCP_TOOLS: dict[MCPToolName, str] = { @@ -121,6 +139,9 @@ class AgenticToolName(StrEnum): MCPToolName.READ_FILE: MCP_READ_FILE, MCPToolName.WRITE_FILE: MCP_WRITE_FILE, MCPToolName.LIST_DIRECTORY: MCP_LIST_DIRECTORY, + MCPToolName.LIST_PROJECTS: MCP_LIST_PROJECTS, + MCPToolName.DELETE_PROJECT: MCP_DELETE_PROJECT, + MCPToolName.WIPE_DATABASE: MCP_WIPE_DATABASE, } AGENTIC_TOOLS: dict[AgenticToolName, str] = { diff --git a/codebase_rag/types_defs.py b/codebase_rag/types_defs.py index 6e14e890a..f9414ff07 100644 --- a/codebase_rag/types_defs.py +++ b/codebase_rag/types_defs.py @@ -378,7 +378,26 @@ class CodeSnippetResultDict(TypedDict, total=False): error: str -MCPResultType = str | QueryResultDict | CodeSnippetResultDict +class ListProjectsResultDict(TypedDict, total=False): + projects: list[str] + count: int + error: str + + +class DeleteProjectResultDict(TypedDict, total=False): + success: bool + project: str + message: str + error: str + + +MCPResultType = ( + str + | QueryResultDict + | CodeSnippetResultDict + | ListProjectsResultDict + | DeleteProjectResultDict +) MCPHandlerType = Callable[..., Awaitable[MCPResultType]]