Skip to content
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,10 @@ claude mcp add --transport stdio graph-code \
<!-- SECTION:mcp_tools -->
| Tool | Description |
|----|-----------|
| `index_repository` | Parse and ingest the repository into the Memgraph knowledge graph. This builds a comprehensive graph of functions, classes, dependencies, and relationships. |
| `list_projects` | List all indexed projects in the knowledge graph database. Returns a list of project names that have been indexed. |
| `delete_project` | Delete a specific project from the knowledge graph database. This removes all nodes associated with the project while preserving other projects. Use list_projects first to see available projects. |
| `wipe_database` | WARNING: Completely wipe the entire database, removing ALL indexed projects. This cannot be undone. Use delete_project for removing individual projects. |
| `index_repository` | Parse and ingest the repository into the Memgraph knowledge graph. This builds a comprehensive graph of functions, classes, dependencies, and relationships. Note: This preserves other projects - only the current project is re-indexed. |
| `query_code_graph` | Query the codebase knowledge graph using natural language. Ask questions like 'What functions call UserService.create_user?' or 'Show me all classes that implement the Repository interface'. |
| `get_code_snippet` | Retrieve source code for a function, class, or method by its qualified name. Returns the source code, file path, line numbers, and docstring. |
| `surgical_replace_code` | Surgically replace an exact code block in a file using diff-match-patch. Only modifies the exact target block, leaving the rest unchanged. |
Expand Down
13 changes: 13 additions & 0 deletions codebase_rag/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ class GoogleProviderType(StrEnum):
KEY_FROM_VAL = "from_val"
KEY_TO_VAL = "to_val"
KEY_VERSION_SPEC = "version_spec"
KEY_PREFIX = "prefix"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The constant KEY_PREFIX is introduced but doesn't seem to be used anywhere in the codebase. To maintain code cleanliness and avoid dead code, it's best to remove it.

KEY_PROJECT_NAME = "project_name"

ERR_SUBSTR_ALREADY_EXISTS = "already exists"
ERR_SUBSTR_CONSTRAINT = "constraint"
Expand Down Expand Up @@ -2298,6 +2300,9 @@ class CppNodeType(StrEnum):

# (H) MCP tool names
class MCPToolName(StrEnum):
LIST_PROJECTS = "list_projects"
DELETE_PROJECT = "delete_project"
WIPE_DATABASE = "wipe_database"
INDEX_REPOSITORY = "index_repository"
QUERY_CODE_GRAPH = "query_code_graph"
GET_CODE_SNIPPET = "get_code_snippet"
Expand All @@ -2319,6 +2324,7 @@ class MCPSchemaType(StrEnum):
OBJECT = "object"
STRING = "string"
INTEGER = "integer"
BOOLEAN = "boolean"


# (H) MCP schema fields
Expand All @@ -2332,6 +2338,8 @@ class MCPSchemaField(StrEnum):

# (H) MCP parameter names
class MCPParamName(StrEnum):
PROJECT_NAME = "project_name"
CONFIRM = "confirm"
NATURAL_LANGUAGE_QUERY = "natural_language_query"
QUALIFIED_NAME = "qualified_name"
FILE_PATH = "file_path"
Expand All @@ -2354,10 +2362,15 @@ class MCPParamName(StrEnum):

# (H) MCP response messages
MCP_INDEX_SUCCESS = "Successfully indexed repository at {path}. Knowledge graph has been updated (previous data cleared)."
MCP_INDEX_SUCCESS_PROJECT = "Successfully indexed repository at {path}. Project '{project_name}' has been updated."
MCP_INDEX_ERROR = "Error indexing repository: {error}"
MCP_WRITE_SUCCESS = "Successfully wrote file: {path}"
MCP_UNKNOWN_TOOL_ERROR = "Unknown tool: {name}"
MCP_TOOL_EXEC_ERROR = "Error executing tool '{name}': {error}"
MCP_PROJECT_DELETED = "Successfully deleted project '{project_name}'."
MCP_WIPE_CANCELLED = "Database wipe cancelled. Set confirm=true to proceed."
MCP_WIPE_SUCCESS = "Database completely wiped. All projects have been removed."
MCP_WIPE_ERROR = "Error wiping database: {error}"

# (H) MCP dict keys and values
MCP_KEY_RESULTS = "results"
Expand Down
43 changes: 33 additions & 10 deletions codebase_rag/cypher_queries.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,50 @@
CYPHER_DELETE_ALL = "MATCH (n) DETACH DELETE n;"

CYPHER_LIST_PROJECTS = "MATCH (p:Project) RETURN p.name AS name ORDER BY p.name"

CYPHER_DELETE_PROJECT = """
MATCH (p:Project {name: $project_name})
OPTIONAL MATCH (p)-[*]-(connected)
DETACH DELETE p, connected
"""

CYPHER_EXAMPLE_DECORATED_FUNCTIONS = """MATCH (n:Function|Method)
WHERE ANY(d IN n.decorators WHERE toLower(d) IN ['flow', 'task'])
RETURN n.name AS name, n.qualified_name AS qualified_name, labels(n) AS type"""
RETURN n.name AS name, n.qualified_name AS qualified_name, labels(n) AS type
LIMIT 50"""

CYPHER_EXAMPLE_CONTENT_BY_PATH = """MATCH (n)
WHERE n.path IS NOT NULL AND n.path STARTS WITH 'workflows'
RETURN n.name AS name, n.path AS path, labels(n) AS type"""
RETURN n.name AS name, n.path AS path, labels(n) AS type
LIMIT 50"""

CYPHER_EXAMPLE_KEYWORD_SEARCH = """MATCH (n)
WHERE toLower(n.name) CONTAINS 'database' OR (n.qualified_name IS NOT NULL AND toLower(n.qualified_name) CONTAINS 'database')
RETURN n.name AS name, n.qualified_name AS qualified_name, labels(n) AS type"""
RETURN n.name AS name, n.qualified_name AS qualified_name, labels(n) AS type
LIMIT 50"""

CYPHER_EXAMPLE_FIND_FILE = """MATCH (f:File) WHERE toLower(f.name) = 'readme.md' AND f.path = 'README.md'
RETURN f.path as path, f.name as name, labels(f) as type"""

CYPHER_EXAMPLE_README = """MATCH (f:File) WHERE toLower(f.name) CONTAINS 'readme' RETURN f.path AS path, f.name AS name, labels(f) AS type"""

CYPHER_EXAMPLE_PYTHON_FILES = """MATCH (f:File) WHERE f.extension = '.py' RETURN f.path AS path, f.name AS name, labels(f) AS type"""

CYPHER_EXAMPLE_TASKS = """MATCH (n:Function|Method) WHERE 'task' IN n.decorators RETURN n.qualified_name AS qualified_name, n.name AS name, labels(n) AS type"""

CYPHER_EXAMPLE_FILES_IN_FOLDER = """MATCH (f:File) WHERE f.path STARTS WITH 'services' RETURN f.path AS path, f.name AS name, labels(f) AS type"""
CYPHER_EXAMPLE_README = """MATCH (f:File)
WHERE toLower(f.name) CONTAINS 'readme'
RETURN f.path AS path, f.name AS name, labels(f) AS type
LIMIT 50"""

CYPHER_EXAMPLE_PYTHON_FILES = """MATCH (f:File)
WHERE f.extension = '.py'
RETURN f.path AS path, f.name AS name, labels(f) AS type
LIMIT 50"""

CYPHER_EXAMPLE_TASKS = """MATCH (n:Function|Method)
WHERE 'task' IN n.decorators
RETURN n.qualified_name AS qualified_name, n.name AS name, labels(n) AS type
LIMIT 50"""

CYPHER_EXAMPLE_FILES_IN_FOLDER = """MATCH (f:File)
WHERE f.path STARTS WITH 'services'
RETURN f.path AS path, f.name AS name, labels(f) AS type
LIMIT 50"""

CYPHER_EXAMPLE_LIMIT_ONE = """MATCH (f:File) RETURN f.path as path, f.name as name, labels(f) as type LIMIT 1"""

Expand Down
2 changes: 1 addition & 1 deletion codebase_rag/graph_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def __init__(
self.repo_path = repo_path
self.parsers = parsers
self.queries = queries
self.project_name = repo_path.name
self.project_name = repo_path.resolve().name
self.simple_name_lookup: SimpleNameLookup = defaultdict(set)
self.function_registry = FunctionRegistryTrie(
simple_name_lookup=self.simple_name_lookup
Expand Down
9 changes: 9 additions & 0 deletions codebase_rag/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@
MG_BATCH_PARAMS_TRUNCATED = " Params (first 10 of {count}): {params}..."
MG_CLEANING_DB = "--- Cleaning database... ---"
MG_DB_CLEANED = "--- Database cleaned. ---"
MG_DELETING_PROJECT = "--- Deleting project: {project_name} ---"
MG_PROJECT_DELETED = "--- Project {project_name} deleted. ---"
MG_ENSURING_CONSTRAINTS = "Ensuring constraints..."
MG_CONSTRAINTS_DONE = "Constraints checked/created."
MG_NODE_BUFFER_FLUSH = (
Expand Down Expand Up @@ -568,7 +570,14 @@
MCP_INDEXING_REPO = "[MCP] Indexing repository at: {path}"
MCP_CLEARING_DB = "[MCP] Clearing existing database to avoid conflicts..."
MCP_DB_CLEARED = "[MCP] Database cleared. Starting fresh indexing..."
MCP_CLEARING_PROJECT = "[MCP] Clearing existing data for project '{project_name}'..."
MCP_ERROR_INDEXING = "[MCP] Error indexing repository: {error}"
MCP_LISTING_PROJECTS = "[MCP] Listing all projects..."
MCP_ERROR_LIST_PROJECTS = "[MCP] Error listing projects: {error}"
MCP_DELETING_PROJECT = "[MCP] Deleting project: {project_name}"
MCP_ERROR_DELETE_PROJECT = "[MCP] Error deleting project: {error}"
MCP_WIPING_DATABASE = "[MCP] Wiping entire database!"
MCP_ERROR_WIPE = "[MCP] Error wiping database: {error}"
MCP_QUERY_CODE_GRAPH = "[MCP] query_code_graph: {query}"
MCP_QUERY_RESULTS = "[MCP] Query returned {count} results"
MCP_ERROR_QUERY = "[MCP] Error querying code graph: {error}"
Expand Down
101 changes: 96 additions & 5 deletions codebase_rag/mcp/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
from codebase_rag.tools.file_writer import FileWriter, create_file_writer_tool
from codebase_rag.types_defs import (
CodeSnippetResultDict,
DeleteProjectErrorResult,
DeleteProjectResult,
DeleteProjectSuccessResult,
ListProjectsErrorResult,
ListProjectsResult,
ListProjectsSuccessResult,
MCPHandlerType,
MCPInputSchema,
MCPInputSchemaProperty,
Expand Down Expand Up @@ -62,6 +68,49 @@ def __init__(
)

self._tools: dict[str, ToolMetadata] = {
cs.MCPToolName.LIST_PROJECTS: ToolMetadata(
name=cs.MCPToolName.LIST_PROJECTS,
description=td.MCP_TOOLS[cs.MCPToolName.LIST_PROJECTS],
input_schema=MCPInputSchema(
type=cs.MCPSchemaType.OBJECT,
properties={},
required=[],
),
handler=self.list_projects,
returns_json=True,
),
cs.MCPToolName.DELETE_PROJECT: ToolMetadata(
name=cs.MCPToolName.DELETE_PROJECT,
description=td.MCP_TOOLS[cs.MCPToolName.DELETE_PROJECT],
input_schema=MCPInputSchema(
type=cs.MCPSchemaType.OBJECT,
properties={
cs.MCPParamName.PROJECT_NAME: MCPInputSchemaProperty(
type=cs.MCPSchemaType.STRING,
description=td.MCP_PARAM_PROJECT_NAME,
)
},
required=[cs.MCPParamName.PROJECT_NAME],
),
handler=self.delete_project,
returns_json=True,
),
cs.MCPToolName.WIPE_DATABASE: ToolMetadata(
name=cs.MCPToolName.WIPE_DATABASE,
description=td.MCP_TOOLS[cs.MCPToolName.WIPE_DATABASE],
input_schema=MCPInputSchema(
type=cs.MCPSchemaType.OBJECT,
properties={
cs.MCPParamName.CONFIRM: MCPInputSchemaProperty(
type=cs.MCPSchemaType.BOOLEAN,
description=td.MCP_PARAM_CONFIRM,
)
},
required=[cs.MCPParamName.CONFIRM],
),
handler=self.wipe_database,
returns_json=False,
),
cs.MCPToolName.INDEX_REPOSITORY: ToolMetadata(
name=cs.MCPToolName.INDEX_REPOSITORY,
description=td.MCP_TOOLS[cs.MCPToolName.INDEX_REPOSITORY],
Expand Down Expand Up @@ -199,13 +248,53 @@ def __init__(
),
}

async def index_repository(self) -> str:
logger.info(lg.MCP_INDEXING_REPO.format(path=self.project_root))
async def list_projects(self) -> ListProjectsResult:
logger.info(lg.MCP_LISTING_PROJECTS)
try:
projects = self.ingestor.list_projects()
return ListProjectsSuccessResult(projects=projects, count=len(projects))
except Exception as e:
logger.error(lg.MCP_ERROR_LIST_PROJECTS.format(error=e))
return ListProjectsErrorResult(error=str(e), projects=[], count=0)

async def delete_project(self, project_name: str) -> DeleteProjectResult:
logger.info(lg.MCP_DELETING_PROJECT.format(project_name=project_name))
try:
projects = self.ingestor.list_projects()
if project_name not in projects:
return DeleteProjectErrorResult(
success=False,
error=te.MCP_PROJECT_NOT_FOUND.format(
project_name=project_name, projects=projects
),
)
self.ingestor.delete_project(project_name)
return DeleteProjectSuccessResult(
success=True,
project=project_name,
message=cs.MCP_PROJECT_DELETED.format(project_name=project_name),
)
except Exception as e:
logger.error(lg.MCP_ERROR_DELETE_PROJECT.format(error=e))
return DeleteProjectErrorResult(success=False, error=str(e))

async def wipe_database(self, confirm: bool) -> str:
if not confirm:
return cs.MCP_WIPE_CANCELLED
logger.warning(lg.MCP_WIPING_DATABASE)
try:
logger.info(lg.MCP_CLEARING_DB)
self.ingestor.clean_database()
logger.info(lg.MCP_DB_CLEARED)
return cs.MCP_WIPE_SUCCESS
except Exception as e:
logger.error(lg.MCP_ERROR_WIPE.format(error=e))
return cs.MCP_WIPE_ERROR.format(error=e)

async def index_repository(self) -> str:
logger.info(lg.MCP_INDEXING_REPO.format(path=self.project_root))
project_name = Path(self.project_root).resolve().name
try:
logger.info(lg.MCP_CLEARING_PROJECT.format(project_name=project_name))
self.ingestor.delete_project(project_name)

updater = GraphUpdater(
ingestor=self.ingestor,
Expand All @@ -215,7 +304,9 @@ async def index_repository(self) -> str:
)
updater.run()

return cs.MCP_INDEX_SUCCESS.format(path=self.project_root)
return cs.MCP_INDEX_SUCCESS_PROJECT.format(
path=self.project_root, project_name=project_name
)
except Exception as e:
logger.error(lg.MCP_ERROR_INDEXING.format(error=e))
return cs.MCP_INDEX_ERROR.format(error=e)
Expand Down
26 changes: 24 additions & 2 deletions codebase_rag/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,20 @@ def build_rag_orchestrator_prompt(tools: list["Tool"]) -> str:

{GRAPH_SCHEMA_AND_RULES}

**3. Query Patterns & Examples**
Your goal is to return the `name`, `path`, and `qualified_name` of the found nodes.
**3. Query Optimization Rules**

- **LIMIT Results**: ALWAYS add `LIMIT 50` to queries that list items. This prevents overwhelming responses.
- **Aggregation Queries**: When asked "how many", "count", or "total", return ONLY the count, not all items:
- CORRECT: `MATCH (c:Class) RETURN count(c) AS total`
- WRONG: `MATCH (c:Class) RETURN c.name, c.path, count(c) AS total` (returns all items!)
- **List vs Count**: If asked to "list" or "show", return items with LIMIT. If asked to "count" or "how many", return only the count.

**4. Query Patterns & Examples**
When listing items, return the `name`, `path`, and `qualified_name` with a LIMIT.

**Pattern: Counting Items**
cypher// "How many classes are there?" or "Count all functions"
MATCH (c:Class) RETURN count(c) AS total

**Pattern: Finding Decorated Functions/Methods (e.g., Workflows, Tasks)**
cypher// "Find all prefect flows" or "what are the workflows?" or "show me the tasks"
Expand Down Expand Up @@ -172,9 +184,19 @@ def build_rag_orchestrator_prompt(tools: list["Tool"]) -> str:
- For code nodes (`Class`, `Function`, etc.), return `n.qualified_name AS qualified_name`.
4. **KEEP IT SIMPLE**: Do not try to be clever. A simple query that returns a few relevant nodes is better than a complex one that fails.
5. **CLAUSE ORDER**: You MUST follow the standard Cypher clause order: `MATCH`, `WHERE`, `RETURN`, `LIMIT`.
6. **ALWAYS ADD LIMIT**: For queries that list items, ALWAYS add `LIMIT 50` to prevent overwhelming responses.
7. **AGGREGATION QUERIES**: When asked "how many" or "count", return ONLY the count:
- CORRECT: `MATCH (c:Class) RETURN count(c) AS total`
- WRONG: `MATCH (c:Class) RETURN c.name, count(c) AS total` (returns all items!)

**Examples:**

* **Natural Language:** "How many classes are there?"
* **Cypher Query:**
```cypher
MATCH (c:Class) RETURN count(c) AS total
```

* **Natural Language:** "Find the main README file"
* **Cypher Query:**
```cypher
Expand Down
13 changes: 13 additions & 0 deletions codebase_rag/services/graph_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,19 @@
ERR_SUBSTR_CONSTRAINT,
KEY_CREATED,
KEY_FROM_VAL,
KEY_NAME,
KEY_PROJECT_NAME,
KEY_PROPS,
KEY_TO_VAL,
NODE_UNIQUE_CONSTRAINTS,
REL_TYPE_CALLS,
)
from ..cypher_queries import (
CYPHER_DELETE_ALL,
CYPHER_DELETE_PROJECT,
CYPHER_EXPORT_NODES,
CYPHER_EXPORT_RELATIONSHIPS,
CYPHER_LIST_PROJECTS,
build_constraint_query,
build_merge_node_query,
build_merge_relationship_query,
Expand Down Expand Up @@ -164,6 +168,15 @@ def clean_database(self) -> None:
self._execute_query(CYPHER_DELETE_ALL)
logger.info(ls.MG_DB_CLEANED)

def list_projects(self) -> list[str]:
result = self.fetch_all(CYPHER_LIST_PROJECTS)
return [str(r[KEY_NAME]) for r in result]

def delete_project(self, project_name: str) -> None:
logger.info(ls.MG_DELETING_PROJECT.format(project_name=project_name))
self._execute_query(CYPHER_DELETE_PROJECT, {KEY_PROJECT_NAME: project_name})
logger.info(ls.MG_PROJECT_DELETED.format(project_name=project_name))

def ensure_constraints(self) -> None:
logger.info(ls.MG_ENSURING_CONSTRAINTS)
for label, prop in NODE_UNIQUE_CONSTRAINTS.items():
Expand Down
Loading