From fe6c02a29a49d562da1695e0f9cfc1df3b8eff30 Mon Sep 17 00:00:00 2001 From: lux Date: Fri, 2 Jan 2026 12:08:18 +0100 Subject: [PATCH 1/2] added update_repository and semantic search to mcp tools --- codebase_rag/constants.py | 137 +++++++++++---- codebase_rag/logs.py | 221 ++++++++++++++++-------- codebase_rag/mcp/tools.py | 155 +++++++++++++++-- codebase_rag/tools/tool_descriptions.py | 32 +++- 4 files changed, 413 insertions(+), 132 deletions(-) diff --git a/codebase_rag/constants.py b/codebase_rag/constants.py index cfaf834f2..99432d811 100644 --- a/codebase_rag/constants.py +++ b/codebase_rag/constants.py @@ -229,10 +229,10 @@ class GoogleProviderType(StrEnum): UI_DIFF_FILE_HEADER = "[bold cyan]File: {path}[/bold cyan]" UI_NEW_FILE_HEADER = "[bold cyan]New file: {path}[/bold cyan]" UI_SHELL_COMMAND_HEADER = "[bold cyan]Shell command:[/bold cyan]" -UI_TOOL_APPROVAL = "[bold yellow]⚠️ Tool '{tool_name}' requires approval:[/bold yellow]" -UI_FEEDBACK_PROMPT = ( - "[bold yellow]Feedback (why rejected, or press Enter to skip)[/bold yellow]" +UI_TOOL_APPROVAL = ( + "[bold yellow]⚠️ Tool '{tool_name}' requires approval:[/bold yellow]" ) +UI_FEEDBACK_PROMPT = "[bold yellow]Feedback (why rejected, or press Enter to skip)[/bold yellow]" UI_OPTIMIZATION_START = ( "[bold green]Starting {language} optimization session...[/bold green]" ) @@ -246,7 +246,9 @@ class GoogleProviderType(StrEnum): "[bold green]Graph exported successfully to: {path}[/bold green]" ) UI_GRAPH_EXPORT_STATS = "[bold cyan]Export contains {nodes} nodes and {relationships} relationships[/bold cyan]" -UI_ERR_UNEXPECTED = "[bold red]An unexpected error occurred: {error}[/bold red]" +UI_ERR_UNEXPECTED = ( + "[bold red]An unexpected error occurred: {error}[/bold red]" +) UI_ERR_EXPORT_FAILED = "[bold red]Failed to export graph: {error}[/bold red]" UI_TOOL_ARGS_FORMAT = " Arguments: {args}" UI_REFERENCE_DOC_INFO = " using the reference document: {reference_document}" @@ -470,11 +472,18 @@ class LanguageMetadata(NamedTuple): "function_signature_item", "function_signature", ) -FUNCTION_NODES_GENERATOR = ("generator_function_declaration", "function_expression") +FUNCTION_NODES_GENERATOR = ( + "generator_function_declaration", + "function_expression", +) CLASS_NODES_BASIC = ("class_declaration", "class_definition") CLASS_NODES_STRUCT = ("struct_declaration", "struct_specifier", "struct_item") -CLASS_NODES_INTERFACE = ("interface_declaration", "trait_declaration", "trait_item") +CLASS_NODES_INTERFACE = ( + "interface_declaration", + "trait_declaration", + "trait_item", +) CLASS_NODES_ENUM = ("enum_declaration", "enum_item", "enum_specifier") CLASS_NODES_TYPE_ALIAS = ("type_alias_declaration", "type_item") CLASS_NODES_UNION = ("union_specifier", "union_item") @@ -485,8 +494,16 @@ class LanguageMetadata(NamedTuple): "member_call_expression", "field_expression", ) -CALL_NODES_OPERATOR = ("binary_expression", "unary_expression", "update_expression") -CALL_NODES_SPECIAL = ("new_expression", "delete_expression", "macro_invocation") +CALL_NODES_OPERATOR = ( + "binary_expression", + "unary_expression", + "update_expression", +) +CALL_NODES_SPECIAL = ( + "new_expression", + "delete_expression", + "macro_invocation", +) IMPORT_NODES_STANDARD = ("import_declaration", "import_statement") IMPORT_NODES_FROM = ("import_from_statement",) @@ -503,7 +520,11 @@ class LanguageMetadata(NamedTuple): "method_definition", ) JS_TS_CLASS_NODES = ("class_declaration", "class") -JS_TS_IMPORT_NODES = ("import_statement", "lexical_declaration", "export_statement") +JS_TS_IMPORT_NODES = ( + "import_statement", + "lexical_declaration", + "export_statement", +) JS_TS_LANGUAGES = frozenset({SupportedLanguage.JS, SupportedLanguage.TS}) # (H) C++ import node types @@ -718,7 +739,9 @@ class EventType(StrEnum): CREATED = "created" -CYPHER_DELETE_MODULE = "MATCH (m:Module {path: $path})-[*0..]->(c) DETACH DELETE m, c" +CYPHER_DELETE_MODULE = ( + "MATCH (m:Module {path: $path})-[*0..]->(c) DETACH DELETE m, c" +) CYPHER_DELETE_CALLS = "MATCH ()-[r:CALLS]->() DELETE r" REALTIME_LOGGER_FORMAT = ( @@ -813,7 +836,11 @@ class Architecture(StrEnum): MODULE_TRANSFORMERS = "transformers" MODULE_QDRANT_CLIENT = "qdrant_client" -SEMANTIC_DEPENDENCIES = (MODULE_QDRANT_CLIENT, MODULE_TORCH, MODULE_TRANSFORMERS) +SEMANTIC_DEPENDENCIES = ( + MODULE_QDRANT_CLIENT, + MODULE_TORCH, + MODULE_TRANSFORMERS, +) ML_DEPENDENCIES = (MODULE_TORCH, MODULE_TRANSFORMERS) @@ -848,7 +875,9 @@ class UniXcoderMode(StrEnum): CYPHER_MATCH_KEYWORD = "MATCH" # (H) Tool success messages -MSG_SURGICAL_SUCCESS = "Successfully applied surgical code replacement in: {path}" +MSG_SURGICAL_SUCCESS = ( + "Successfully applied surgical code replacement in: {path}" +) MSG_SURGICAL_FAILED = ( "Failed to apply surgical replacement in {path}. " "Target code not found or patches failed." @@ -867,7 +896,9 @@ class UniXcoderMode(StrEnum): # (H) Query tool messages QUERY_NOT_AVAILABLE = "N/A" DICT_KEY_RESULTS = "results" -QUERY_SUMMARY_SUCCESS = "Successfully retrieved {count} item(s) from the graph." +QUERY_SUMMARY_SUCCESS = ( + "Successfully retrieved {count} item(s) from the graph." +) QUERY_SUMMARY_TRANSLATION_FAILED = ( "I couldn't translate your request into a database query. Error: {error}" ) @@ -888,8 +919,12 @@ class UniXcoderMode(StrEnum): "Could not retrieve source code for node ID {id}. " "The node may not exist or source file may be unavailable." ) -MSG_SEMANTIC_SOURCE_FORMAT = "Source code for node ID {id}:\n\n```\n{code}\n```" -MSG_SEMANTIC_RESULT_HEADER = "Found {count} semantic matches for '{query}':\n\n" +MSG_SEMANTIC_SOURCE_FORMAT = ( + "Source code for node ID {id}:\n\n```\n{code}\n```" +) +MSG_SEMANTIC_RESULT_HEADER = ( + "Found {count} semantic matches for '{query}':\n\n" +) MSG_SEMANTIC_RESULT_FOOTER = "\n\nUse the qualified names above with other tools to get more details or source code." SEMANTIC_BATCH_SIZE = 100 SEMANTIC_TYPE_UNKNOWN = "Unknown" @@ -898,9 +933,7 @@ class UniXcoderMode(StrEnum): MSG_DOC_NO_CANDIDATES = "No valid text found in response candidates." MSG_DOC_NO_CONTENT = "No text content received from the API." MIME_TYPE_DEFAULT = "application/octet-stream" -DOC_PROMPT_PREFIX = ( - "Based on the document provided, please answer the following question: {question}" -) +DOC_PROMPT_PREFIX = "Based on the document provided, please answer the following question: {question}" # (H) Call processor constants MOD_RS = "mod.rs" @@ -1098,10 +1131,14 @@ class UniXcoderMode(StrEnum): LANG_MSG_CLASSES = "Classes: {nodes}" LANG_MSG_MODULES = "Modules: {nodes}" LANG_MSG_CALLS = "Calls: {nodes}" -LANG_MSG_LANG_ADDED = "\nLanguage '{name}' has been added to the configuration!" +LANG_MSG_LANG_ADDED = ( + "\nLanguage '{name}' has been added to the configuration!" +) LANG_MSG_UPDATED_CONFIG = "Updated {path}" LANG_MSG_REVIEW_PROMPT = "Please review the detected node types:" -LANG_MSG_REVIEW_HINT = " The auto-detection is good but may need manual adjustments." +LANG_MSG_REVIEW_HINT = ( + " The auto-detection is good but may need manual adjustments." +) LANG_MSG_EDIT_HINT = " Edit the configuration in: {path}" LANG_MSG_COMMON_ISSUES = "Look for these common issues:" LANG_MSG_ISSUE_MISCLASSIFIED = ( @@ -1119,7 +1156,9 @@ class UniXcoderMode(StrEnum): ) LANG_MSG_LANG_NOT_FOUND = "Language '{name}' not found." LANG_MSG_AVAILABLE_LANGS = "Available languages: {langs}" -LANG_MSG_REMOVED_FROM_CONFIG = "Removed language '{name}' from configuration file." +LANG_MSG_REMOVED_FROM_CONFIG = ( + "Removed language '{name}' from configuration file." +) LANG_MSG_REMOVING_SUBMODULE = "Removing git submodule '{path}'..." LANG_MSG_CLEANED_MODULES = "Cleaned up git modules directory: {path}" LANG_MSG_SUBMODULE_REMOVED = "Successfully removed submodule '{path}'" @@ -1135,16 +1174,24 @@ class UniXcoderMode(StrEnum): LANG_MSG_CLEANUP_CANCELLED = "Cleanup cancelled." # (H) Language CLI error messages -LANG_ERR_MISSING_ARGS = "Error: Either language_name or --grammar-url must be provided" +LANG_ERR_MISSING_ARGS = ( + "Error: Either language_name or --grammar-url must be provided" +) LANG_ERR_REINSTALL_FAILED = "Failed to reinstall submodule: {error}" -LANG_ERR_MANUAL_REMOVE_HINT = "You may need to remove it manually and try again:" +LANG_ERR_MANUAL_REMOVE_HINT = ( + "You may need to remove it manually and try again:" +) LANG_ERR_REPO_NOT_FOUND = "Error: Repository not found at {url}" -LANG_ERR_CUSTOM_URL_HINT = "Try using a custom URL with: --grammar-url " +LANG_ERR_CUSTOM_URL_HINT = ( + "Try using a custom URL with: --grammar-url " +) LANG_ERR_GIT = "Git error: {error}" LANG_ERR_NODE_TYPES_WARNING = ( "Warning: node-types.json not found in any expected location for {name}" ) -LANG_ERR_TREE_SITTER_JSON_WARNING = "Warning: tree-sitter.json not found in {path}" +LANG_ERR_TREE_SITTER_JSON_WARNING = ( + "Warning: tree-sitter.json not found in {path}" +) LANG_ERR_NO_GRAMMARS_WARNING = "Warning: No grammars found in tree-sitter.json" LANG_ERR_PARSE_NODE_TYPES = "Error parsing node-types.json: {error}" LANG_ERR_UPDATE_CONFIG = "Error updating config file: {error}" @@ -1155,13 +1202,13 @@ class UniXcoderMode(StrEnum): # (H) Language CLI prompts LANG_PROMPT_LANGUAGE_NAME = "Language name (e.g., 'c-sharp', 'python')" LANG_PROMPT_COMMON_NAME = "What is the common name for this language?" -LANG_PROMPT_EXTENSIONS = ( - "What file extensions should be associated with this language? (comma-separated)" -) +LANG_PROMPT_EXTENSIONS = "What file extensions should be associated with this language? (comma-separated)" LANG_PROMPT_FUNCTIONS = "Select nodes representing FUNCTIONS (comma-separated)" LANG_PROMPT_CLASSES = "Select nodes representing CLASSES (comma-separated)" LANG_PROMPT_MODULES = "Select nodes representing MODULES (comma-separated)" -LANG_PROMPT_CALLS = "Select nodes representing FUNCTION CALLS (comma-separated)" +LANG_PROMPT_CALLS = ( + "Select nodes representing FUNCTION CALLS (comma-separated)" +) LANG_PROMPT_CONTINUE = "Do you want to continue?" LANG_PROMPT_REMOVE_ORPHANS = "Do you want to remove these orphaned modules?" @@ -1210,8 +1257,12 @@ class CppNodeType(StrEnum): QUALIFIED_IDENTIFIER = "qualified_identifier" OPERATOR_NAME = "operator_name" DESTRUCTOR_NAME = "destructor_name" - CONSTRUCTOR_OR_DESTRUCTOR_DEFINITION = "constructor_or_destructor_definition" - CONSTRUCTOR_OR_DESTRUCTOR_DECLARATION = "constructor_or_destructor_declaration" + CONSTRUCTOR_OR_DESTRUCTOR_DEFINITION = ( + "constructor_or_destructor_definition" + ) + CONSTRUCTOR_OR_DESTRUCTOR_DECLARATION = ( + "constructor_or_destructor_declaration" + ) INLINE_METHOD_DEFINITION = "inline_method_definition" OPERATOR_CAST_DEFINITION = "operator_cast_definition" @@ -1244,7 +1295,9 @@ class CppNodeType(StrEnum): # (H) C++ keywords for class detection CPP_KEYWORD_CLASS = "class" CPP_KEYWORD_STRUCT = "struct" -CPP_EXPORTED_CLASS_KEYWORDS = frozenset({CPP_KEYWORD_CLASS, CPP_KEYWORD_STRUCT}) +CPP_EXPORTED_CLASS_KEYWORDS = frozenset( + {CPP_KEYWORD_CLASS, CPP_KEYWORD_STRUCT} +) CPP_FALLBACK_OPERATOR = "operator_unknown" CPP_FALLBACK_DESTRUCTOR = "~destructor" @@ -1988,7 +2041,9 @@ class CppNodeType(StrEnum): # (H) JS/TS module system node types TS_OBJECT_PATTERN = "object_pattern" -TS_SHORTHAND_PROPERTY_IDENTIFIER_PATTERN = "shorthand_property_identifier_pattern" +TS_SHORTHAND_PROPERTY_IDENTIFIER_PATTERN = ( + "shorthand_property_identifier_pattern" +) TS_PAIR_PATTERN = "pair_pattern" TS_FUNCTION_DECLARATION = "function_declaration" TS_GENERATOR_FUNCTION_DECLARATION = "generator_function_declaration" @@ -2109,12 +2164,14 @@ class CppNodeType(StrEnum): # (H) MCP tool names class MCPToolName(StrEnum): INDEX_REPOSITORY = "index_repository" + UPDATE_REPOSITORY = "update_repository" QUERY_CODE_GRAPH = "query_code_graph" GET_CODE_SNIPPET = "get_code_snippet" SURGICAL_REPLACE_CODE = "surgical_replace_code" READ_FILE = "read_file" WRITE_FILE = "write_file" LIST_DIRECTORY = "list_directory" + SEMANTIC_SEARCH = "semantic_search" # (H) MCP environment variables @@ -2151,6 +2208,7 @@ class MCPParamName(StrEnum): LIMIT = "limit" CONTENT = "content" DIRECTORY_PATH = "directory_path" + TOP_K = "top_k" # (H) MCP server constants @@ -2168,6 +2226,14 @@ class MCPParamName(StrEnum): MCP_WRITE_SUCCESS = "Successfully wrote file: {path}" MCP_UNKNOWN_TOOL_ERROR = "Unknown tool: {name}" MCP_TOOL_EXEC_ERROR = "Error executing tool '{name}': {error}" +MCP_UPDATE_SUCCESS = ( + "Successfully updated repository at {path} (no database wipe)." +) +MCP_UPDATE_ERROR = "Error updating repository: {error}" +MCP_SEMANTIC_NOT_AVAILABLE_RESPONSE = ( + "Semantic search is not available. Install with: uv sync --extra semantic" +) + # (H) MCP dict keys and values MCP_KEY_RESULTS = "results" @@ -2500,7 +2566,10 @@ class MCPParamName(StrEnum): ) # (H) LANGUAGE_SPECS node type tuples for Lua -SPEC_LUA_FUNCTION_TYPES = (TS_LUA_FUNCTION_DECLARATION, TS_LUA_FUNCTION_DEFINITION) +SPEC_LUA_FUNCTION_TYPES = ( + TS_LUA_FUNCTION_DECLARATION, + TS_LUA_FUNCTION_DEFINITION, +) SPEC_LUA_CLASS_TYPES: tuple[str, ...] = () SPEC_LUA_MODULE_TYPES = (TS_LUA_CHUNK,) SPEC_LUA_CALL_TYPES = (TS_LUA_FUNCTION_CALL,) diff --git a/codebase_rag/logs.py b/codebase_rag/logs.py index f97ba10c7..3e4aa30c7 100644 --- a/codebase_rag/logs.py +++ b/codebase_rag/logs.py @@ -3,20 +3,22 @@ # (H) Graph loading logs LOADING_GRAPH = "Loading graph from {path}" -LOADED_GRAPH = "Loaded {nodes} nodes and {relationships} relationships with indexes" +LOADED_GRAPH = ( + "Loaded {nodes} nodes and {relationships} relationships with indexes" +) ENSURING_PROJECT = "Ensuring Project: {name}" # (H) Pass logs PASS_1_STRUCTURE = "--- Pass 1: Identifying Packages and Folders ---" -PASS_2_FILES = ( - "\n--- Pass 2: Processing Files, Caching ASTs, and Collecting Definitions ---" -) +PASS_2_FILES = "\n--- Pass 2: Processing Files, Caching ASTs, and Collecting Definitions ---" PASS_3_CALLS = "--- Pass 3: Processing Function Calls from AST Cache ---" PASS_4_EMBEDDINGS = "--- Pass 4: Generating semantic embeddings ---" # (H) Analysis logs FOUND_FUNCTIONS = "\n--- Found {count} functions/methods in codebase ---" -ANALYSIS_COMPLETE = "\n--- Analysis complete. Flushing all data to database... ---" +ANALYSIS_COMPLETE = ( + "\n--- Analysis complete. Flushing all data to database... ---" +) REMOVING_STATE = "Removing in-memory state for: {path}" REMOVED_FROM_CACHE = " - Removed from ast_cache" REMOVING_QNS = " - Removing {count} QNs from function_registry" @@ -40,8 +42,12 @@ SEMANTIC_NOT_AVAILABLE = ( "Semantic search dependencies not available, skipping embedding generation" ) -INGESTOR_NO_QUERY = "Ingestor does not support querying, skipping embedding generation" -NO_FUNCTIONS_FOR_EMBEDDING = "No functions or methods found for embedding generation" +INGESTOR_NO_QUERY = ( + "Ingestor does not support querying, skipping embedding generation" +) +NO_FUNCTIONS_FOR_EMBEDDING = ( + "No functions or methods found for embedding generation" +) GENERATING_EMBEDDINGS = "Generating embeddings for {count} functions/methods" EMBEDDING_PROGRESS = "Generated {done}/{total} embeddings" EMBEDDING_FAILED = "Failed to embed {name}: {error}" @@ -73,16 +79,18 @@ # (H) Parser loader logs BUILDING_BINDINGS = "Building Python bindings for {lang}..." -BUILD_FAILED = "Failed to build {lang} bindings: stdout={stdout}, stderr={stderr}" +BUILD_FAILED = ( + "Failed to build {lang} bindings: stdout={stdout}, stderr={stderr}" +) BUILD_SUCCESS = "Successfully built {lang} bindings" IMPORTING_MODULE = "Attempting to import module: {module}" LOADED_FROM_SUBMODULE = ( "Successfully loaded {lang} from submodule bindings using {attr}" ) -NO_LANG_ATTR = ( - "Module {module} imported but has no language attribute. Available: {available}" +NO_LANG_ATTR = "Module {module} imported but has no language attribute. Available: {available}" +SUBMODULE_LOAD_FAILED = ( + "Failed to load {lang} from submodule bindings: {error}" ) -SUBMODULE_LOAD_FAILED = "Failed to load {lang} from submodule bindings: {error}" LIB_NOT_AVAILABLE = "Tree-sitter library for {lang} not available." LOCALS_QUERY_FAILED = "Failed to create locals query for {lang}: {error}" GRAMMAR_LOADED = "Successfully loaded {lang} grammar." @@ -91,10 +99,14 @@ # (H) File watcher logs WATCHER_ACTIVE = "File watcher is now active." -WATCHER_SKIP_NO_QUERY = "Ingestor does not support querying, skipping real-time update." +WATCHER_SKIP_NO_QUERY = ( + "Ingestor does not support querying, skipping real-time update." +) CHANGE_DETECTED = "Change detected: {event_type} on {path}. Updating graph." DELETION_QUERY = "Ran deletion query for path: {path}" -RECALC_CALLS = "Recalculating all function call relationships for consistency..." +RECALC_CALLS = ( + "Recalculating all function call relationships for consistency..." +) GRAPH_UPDATED = "Graph updated successfully for change in: {name}" INITIAL_SCAN = "Performing initial full codebase scan..." INITIAL_SCAN_DONE = "Initial scan complete. Starting real-time watcher." @@ -142,7 +154,9 @@ # (H) Source extraction logs SOURCE_FILE_NOT_FOUND = "Source file not found: {path}" SOURCE_INVALID_RANGE = "Invalid line range: {start}-{end}" -SOURCE_RANGE_EXCEEDS = "Line range {start}-{end} exceeds file length {length} in {path}" +SOURCE_RANGE_EXCEEDS = ( + "Line range {start}-{end} exceeds file length {length} in {path}" +) SOURCE_EXTRACT_FAILED = "Failed to extract source from {path}: {error}" SOURCE_AST_FAILED = "AST extraction failed for {name}: {error}" @@ -163,17 +177,23 @@ MG_NODE_BUFFER_FLUSH = ( "Node buffer reached batch size ({size}). Performing incremental flush." ) -MG_REL_BUFFER_FLUSH = ( - "Relationship buffer reached batch size ({size}). Performing incremental flush." +MG_REL_BUFFER_FLUSH = "Relationship buffer reached batch size ({size}). Performing incremental flush." +MG_NO_CONSTRAINT = ( + "No unique constraint defined for label '{label}'. Skipping flush." +) +MG_MISSING_PROP = ( + "Skipping {label} node missing required '{key}' property: {props}" ) -MG_NO_CONSTRAINT = "No unique constraint defined for label '{label}'. Skipping flush." -MG_MISSING_PROP = "Skipping {label} node missing required '{key}' property: {props}" MG_NODES_FLUSHED = "Flushed {flushed} of {total} buffered nodes." MG_NODES_SKIPPED = ( "Skipped {count} buffered nodes due to missing identifiers or constraints." ) -MG_CALLS_FAILED = "Failed to create {count} CALLS relationships - nodes may not exist" -MG_CALLS_SAMPLE = " Sample {index}: {from_label}.{from_val} -> {to_label}.{to_val}" +MG_CALLS_FAILED = ( + "Failed to create {count} CALLS relationships - nodes may not exist" +) +MG_CALLS_SAMPLE = ( + " Sample {index}: {from_label}.{from_val} -> {to_label}.{to_val}" +) MG_RELS_FLUSHED = ( "Flushed {total} relationships ({success} successful, {failed} failed)." ) @@ -194,9 +214,13 @@ TOOL_FILE_READ_SUCCESS = "[FileReader] Successfully read text from {path}" TOOL_FILE_BINARY = "[FileReader] {message}" TOOL_FILE_WRITE = "[FileWriter] Creating file: {path}" -TOOL_FILE_WRITE_SUCCESS = "[FileWriter] Successfully wrote {chars} characters to {path}" +TOOL_FILE_WRITE_SUCCESS = ( + "[FileWriter] Successfully wrote {chars} characters to {path}" +) TOOL_FILE_EDIT = "[FileEditor] Attempting full file replacement: {path}" -TOOL_FILE_EDIT_SUCCESS = "[FileEditor] Successfully replaced entire file: {path}" +TOOL_FILE_EDIT_SUCCESS = ( + "[FileEditor] Successfully replaced entire file: {path}" +) TOOL_FILE_EDIT_SURGICAL = ( "[FileEditor] Attempting surgical block replacement in: {path}" ) @@ -214,7 +238,9 @@ "Process already terminated when timeout kill was attempted." ) TOOL_SHELL_ERROR = "An error occurred while executing command: {error}" -TOOL_DOC_ANALYZE = "[DocumentAnalyzer] Analyzing '{path}' with question: '{question}'" +TOOL_DOC_ANALYZE = ( + "[DocumentAnalyzer] Analyzing '{path}' with question: '{question}'" +) # (H) Shell timing log SHELL_TIMING = "'{func}' executed in {time:.2f}ms" @@ -234,7 +260,9 @@ "or specify line number for precise targeting." ) EDITOR_FUNC_NOT_IN_FILE = "Function '{name}' not found in {path}." -EDITOR_PATCHES_NOT_CLEAN = "Patches for function '{name}' did not apply cleanly." +EDITOR_PATCHES_NOT_CLEAN = ( + "Patches for function '{name}' did not apply cleanly." +) EDITOR_NO_CHANGES = "No changes detected after replacement." EDITOR_REPLACE_SUCCESS = "Successfully replaced function '{name}' in {path}." EDITOR_PATCH_FAILED = "Some patches failed to apply cleanly to {path}" @@ -243,9 +271,7 @@ EDITOR_FILE_NOT_FOUND = "File not found: {path}" EDITOR_BLOCK_NOT_FOUND = "Target block not found in {path}" EDITOR_LOOKING_FOR = "Looking for: {block}" -EDITOR_MULTIPLE_OCCURRENCES = ( - "Multiple occurrences of target block found. Only replacing first occurrence." -) +EDITOR_MULTIPLE_OCCURRENCES = "Multiple occurrences of target block found. Only replacing first occurrence." EDITOR_NO_CHANGES_IDENTICAL = ( "No changes detected - target and replacement are identical" ) @@ -261,10 +287,14 @@ SEMANTIC_FOUND = "Found {count} semantic matches for: {query}" SEMANTIC_FAILED = "Semantic search failed for query '{query}': {error}" SEMANTIC_NODE_NOT_FOUND = "No node found with ID: {id}" -SEMANTIC_INVALID_LOCATION = "Missing or invalid source location info for node {id}" +SEMANTIC_INVALID_LOCATION = ( + "Missing or invalid source location info for node {id}" +) SEMANTIC_SOURCE_FAILED = "Failed to get source code for node {id}: {error}" SEMANTIC_TOOL_SEARCH = "[Tool:SemanticSearch] Searching for: '{query}'" -SEMANTIC_TOOL_SOURCE = "[Tool:GetFunctionSource] Retrieving source for node ID: {id}" +SEMANTIC_TOOL_SOURCE = ( + "[Tool:GetFunctionSource] Retrieving source for node ID: {id}" +) # (H) Document analyzer logs DOC_COPIED = "Copied external file to: {path}" @@ -297,7 +327,9 @@ # (H) File writer logs FILE_WRITER_INIT = "FileWriter initialized with root: {root}" FILE_WRITER_CREATE = "[FileWriter] Creating file: {path}" -FILE_WRITER_SUCCESS = "[FileWriter] Successfully wrote {chars} characters to {path}" +FILE_WRITER_SUCCESS = ( + "[FileWriter] Successfully wrote {chars} characters to {path}" +) # (H) Error logs (used with logger.error/warning) UNEXPECTED = "An unexpected error occurred: {error}" @@ -308,15 +340,15 @@ ) IMAGE_NOT_FOUND = "Image path found, but does not exist: {path}" IMAGE_COPY_FAILED = "Failed to copy image to temporary directory: {error}" -FILE_OUTSIDE_ROOT = "Security risk: Attempted to {action} file outside of project root." +FILE_OUTSIDE_ROOT = ( + "Security risk: Attempted to {action} file outside of project root." +) # (H) Call processor logs CALL_PROCESSING_FILE = "Processing calls in cached AST for: {path}" CALL_PROCESSING_FAILED = "Failed to process calls in {path}: {error}" CALL_FOUND_NODES = "Found {count} call nodes in {language} for {caller}" -CALL_FOUND = ( - "Found call from {caller} to {call_name} (resolved as {callee_type}:{callee_qn})" -) +CALL_FOUND = "Found call from {caller} to {call_name} (resolved as {callee_type}:{callee_qn})" CALL_NESTED_FOUND = "Found nested call from {caller} to {call_name} (resolved as {callee_type}:{callee_qn})" CALL_DIRECT_IMPORT = "Direct import resolved: {call_name} -> {qn}" CALL_TYPE_INFERRED = "Type-inferred object method resolved: {call_name} -> {method_qn} (via {obj}:{var_type})" @@ -334,18 +366,20 @@ "Instance-resolved inherited self-attribute call: {call_name} -> {method_qn} " "(via {attr_ref}:{var_type})" ) -CALL_IMPORT_QUALIFIED = "Import-resolved qualified call: {call_name} -> {method_qn}" +CALL_IMPORT_QUALIFIED = ( + "Import-resolved qualified call: {call_name} -> {method_qn}" +) CALL_INSTANCE_QUALIFIED = "Instance-resolved qualified call: {call_name} -> {method_qn} (via {class_name}:{var_type})" CALL_INSTANCE_INHERITED = "Instance-resolved inherited call: {call_name} -> {method_qn} (via {class_name}:{var_type})" CALL_WILDCARD = "Wildcard-resolved call: {call_name} -> {qn}" CALL_SAME_MODULE = "Same-module resolution: {call_name} -> {qn}" CALL_TRIE_FALLBACK = "Trie-based fallback resolution: {call_name} -> {qn}" CALL_UNRESOLVED = "Could not resolve call: {call_name}" -CALL_CHAINED = ( - "Resolved chained call: {call_name} -> {method_qn} (via {obj_expr}:{obj_type})" -) +CALL_CHAINED = "Resolved chained call: {call_name} -> {method_qn} (via {obj_expr}:{obj_type})" CALL_CHAINED_INHERITED = "Resolved chained inherited call: {call_name} -> {method_qn} (via {obj_expr}:{obj_type})" -CALL_SUPER_NO_CONTEXT = "No class context provided for super() call: {call_name}" +CALL_SUPER_NO_CONTEXT = ( + "No class context provided for super() call: {call_name}" +) CALL_SUPER_NO_INHERITANCE = "No inheritance info for class {class_qn}" CALL_SUPER_NO_PARENTS = "No parent classes found for {class_qn}" CALL_SUPER_RESOLVED = "Resolved super() call: {call_name} -> {method_qn}" @@ -368,7 +402,9 @@ DEP_PARSE_ERROR_CSPROJ = "Error parsing .csproj {path}: {error}" # (H) Import processor logs -IMP_TOOL_NOT_AVAILABLE = "External tool '{tool}' not available for stdlib introspection" +IMP_TOOL_NOT_AVAILABLE = ( + "External tool '{tool}' not available for stdlib introspection" +) IMP_CACHE_LOADED = "Loaded stdlib cache from {path}" IMP_CACHE_LOAD_ERROR = "Could not load stdlib cache: {error}" IMP_CACHE_SAVED = "Saved stdlib cache to {path}" @@ -376,9 +412,7 @@ IMP_CACHE_CLEARED = "Cleared stdlib cache from disk" IMP_CACHE_CLEAR_ERROR = "Could not clear stdlib cache from disk: {error}" IMP_PARSED_COUNT = "Parsed {count} imports in {module}" -IMP_CREATED_RELATIONSHIP = ( - " Created IMPORTS relationship: {from_module} -> {to_module} (from {full_name})" -) +IMP_CREATED_RELATIONSHIP = " Created IMPORTS relationship: {from_module} -> {to_module} (from {full_name})" IMP_PARSE_FAILED = "Failed to parse imports in {module}: {error}" IMP_IMPORT = " Import: {local} -> {full}" IMP_ALIASED_IMPORT = " Aliased import: {alias} -> {full}" @@ -417,12 +451,16 @@ CLASS_FOUND_TEMPLATE = " Found Template {node_type}: {name} (qn: {qn})" CLASS_FOUND_EXPORTED_STRUCT = " Found Exported Struct: {name} (qn: {qn})" CLASS_FOUND_EXPORTED_UNION = " Found Exported Union: {name} (qn: {qn})" -CLASS_FOUND_EXPORTED_TEMPLATE = " Found Exported Template Class: {name} (qn: {qn})" +CLASS_FOUND_EXPORTED_TEMPLATE = ( + " Found Exported Template Class: {name} (qn: {qn})" +) CLASS_FOUND_EXPORTED_CLASS = " Found Exported Class: {name} (qn: {qn})" CLASS_FOUND_CLASS = " Found Class: {name} (qn: {qn})" CLASS_FOUND_INLINE_MODULE = " Found Inline Module: {name} (qn: {qn})" CLASS_PASS_4 = "--- Pass 4: Processing Method Override Relationships ---" -CLASS_METHOD_OVERRIDE = "Method override: {method_qn} OVERRIDES {parent_method_qn}" +CLASS_METHOD_OVERRIDE = ( + "Method override: {method_qn} OVERRIDES {parent_method_qn}" +) CLASS_CPP_INHERITANCE = "Found C++ inheritance: {parent_name} -> {parent_qn}" # (H) Java type inference logs @@ -435,7 +473,9 @@ JAVA_CLASS_FIELD = "Class field: {name} -> {type}" JAVA_ASSIGNMENT = "Assignment: {name} -> {type}" JAVA_NO_METHOD_NAME = "No method name found in call node" -JAVA_RESOLVING_CALL = "Resolving Java method call: method={method}, object={object}" +JAVA_RESOLVING_CALL = ( + "Resolving Java method call: method={method}, object={object}" +) JAVA_RESOLVING_STATIC = "Resolving static/local method: {method}" JAVA_FOUND_STATIC = "Found static/local method: {result}" JAVA_STATIC_NOT_FOUND = "Static/local method not found: {method}" @@ -445,10 +485,14 @@ JAVA_FOUND_INSTANCE = "Found instance method: {result}" JAVA_INSTANCE_NOT_FOUND = "Instance method not found: {type}.{method}" JAVA_ENHANCED_FOR_VAR = "Enhanced for loop variable: {name} -> {type}" -JAVA_ENHANCED_FOR_VAR_ALT = "Enhanced for loop variable (alt): {name} -> {type}" +JAVA_ENHANCED_FOR_VAR_ALT = ( + "Enhanced for loop variable (alt): {name} -> {type}" +) # (H) JS type inference logs -JS_VAR_DECLARATOR_FOUND = "Found variable declarator: {var_name} in {module_qn}" +JS_VAR_DECLARATOR_FOUND = ( + "Found variable declarator: {var_name} in {module_qn}" +) JS_VAR_INFERRED = "Inferred JS variable: {var_name} -> {var_type}" JS_VAR_INFER_FAILED = "Could not infer type for variable: {var_name}" JS_VAR_TYPE_MAP_BUILT = "Built JS variable type map with {count} variables (found {declarator_count} declarators total)" @@ -497,9 +541,13 @@ PY_RECURSION_GUARD = "Recursion guard (method call): skipping {method}" PY_RECURSION_GUARD_QN = "Recursion guard: skipping {method_qn}" PY_RESOLVED_METHOD = "Resolved {class_name}.{method_name} to {method_qn}" -PY_INFER_ATTR_FAILED = "Failed to analyze instance variables for {attr}: {error}" +PY_INFER_ATTR_FAILED = ( + "Failed to analyze instance variables for {attr}: {error}" +) PY_INFER_RETURN_FAILED = "Failed to infer return type for {method}: {error}" -PY_VAR_FROM_CONTEXT = "Found variable type from method context: {var} -> {type}" +PY_VAR_FROM_CONTEXT = ( + "Found variable type from method context: {var} -> {type}" +) PY_VAR_CANNOT_INFER = "Cannot infer type for variable reference: {var}" PY_NO_CONTAINING_CLASS = "No containing class found for method" PY_NO_INIT_METHOD = "No __init__ method found in class" @@ -507,7 +555,9 @@ PY_FOUND_CLASS_AT_LEVEL = "Found class_definition at level {level}" PY_SEARCHING_LEVEL = "Level {level}: node type = {node_type}" PY_NO_CLASS_IN_HIERARCHY = "No class_definition found in parent hierarchy" -PY_SEARCHING_INIT = "Searching for __init__ method in class with {count} children" +PY_SEARCHING_INIT = ( + "Searching for __init__ method in class with {count} children" +) PY_CHILD_TYPE = " Child type: {type}" PY_NO_CLASS_BODY = " No class body (block) found" PY_SEARCHING_BODY = " Searching in class body with {count} children" @@ -517,23 +567,35 @@ PY_INIT_NOT_FOUND = " No __init__ method found in class body" # (H) JS/TS ingest logs -JS_PROTOTYPE_INHERITANCE = "Prototype inheritance: {child_qn} INHERITS {parent_qn}" -JS_PROTOTYPE_INHERITANCE_FAILED = "Failed to detect prototype inheritance: {error}" -JS_PROTOTYPE_METHOD_FOUND = " Found Prototype Method: {method_name} (qn: {method_qn})" -JS_PROTOTYPE_METHOD_DEFINES = "Prototype method: {constructor_qn} DEFINES {method_qn}" +JS_PROTOTYPE_INHERITANCE = ( + "Prototype inheritance: {child_qn} INHERITS {parent_qn}" +) +JS_PROTOTYPE_INHERITANCE_FAILED = ( + "Failed to detect prototype inheritance: {error}" +) +JS_PROTOTYPE_METHOD_FOUND = ( + " Found Prototype Method: {method_name} (qn: {method_qn})" +) +JS_PROTOTYPE_METHOD_DEFINES = ( + "Prototype method: {constructor_qn} DEFINES {method_qn}" +) JS_PROTOTYPE_METHODS_FAILED = "Failed to detect prototype methods: {error}" -JS_OBJECT_METHOD_FOUND = " Found Object Method: {method_name} (qn: {method_qn})" -JS_OBJECT_METHODS_PROCESS_FAILED = "Failed to process object literal methods: {error}" -JS_OBJECT_METHODS_DETECT_FAILED = "Failed to detect object literal methods: {error}" +JS_OBJECT_METHOD_FOUND = ( + " Found Object Method: {method_name} (qn: {method_qn})" +) +JS_OBJECT_METHODS_PROCESS_FAILED = ( + "Failed to process object literal methods: {error}" +) +JS_OBJECT_METHODS_DETECT_FAILED = ( + "Failed to detect object literal methods: {error}" +) JS_OBJECT_ARROW_FOUND = ( " Found Object Arrow Function: {function_name} (qn: {function_qn})" ) JS_ASSIGNMENT_ARROW_FOUND = ( " Found Assignment Arrow Function: {function_name} (qn: {function_qn})" ) -JS_ASSIGNMENT_FUNC_EXPR_FOUND = ( - " Found Assignment Function Expression: {function_name} (qn: {function_qn})" -) +JS_ASSIGNMENT_FUNC_EXPR_FOUND = " Found Assignment Function Expression: {function_name} (qn: {function_qn})" JS_ASSIGNMENT_ARROW_QUERY_FAILED = ( "Failed to process assignment arrow functions query: {error}" ) @@ -545,16 +607,22 @@ JS_COMMONJS_DESTRUCTURE_FAILED = ( "Failed to process CommonJS destructuring pattern: {error}" ) -JS_MISSING_IMPORT_PATTERNS_FAILED = "Failed to detect missing import patterns: {error}" +JS_MISSING_IMPORT_PATTERNS_FAILED = ( + "Failed to detect missing import patterns: {error}" +) JS_COMMONJS_VAR_DECLARATOR_FAILED = ( "Failed to process variable declarator for CommonJS: {error}" ) -JS_COMMONJS_IMPORT_FAILED = "Failed to process CommonJS import {imported_name}: {error}" -JS_MISSING_IMPORT_PATTERN = ( - "Missing pattern: {module_qn} IMPORTS {imported_name} from {resolved_source_module}" +JS_COMMONJS_IMPORT_FAILED = ( + "Failed to process CommonJS import {imported_name}: {error}" +) +JS_MISSING_IMPORT_PATTERN = "Missing pattern: {module_qn} IMPORTS {imported_name} from {resolved_source_module}" +JS_COMMONJS_EXPORTS_QUERY_FAILED = ( + "Failed to process CommonJS exports query: {error}" +) +JS_COMMONJS_EXPORTS_DETECT_FAILED = ( + "Failed to detect CommonJS exports: {error}" ) -JS_COMMONJS_EXPORTS_QUERY_FAILED = "Failed to process CommonJS exports query: {error}" -JS_COMMONJS_EXPORTS_DETECT_FAILED = "Failed to detect CommonJS exports: {error}" JS_ES6_EXPORTS_QUERY_FAILED = "Failed to process ES6 exports query: {error}" JS_ES6_EXPORTS_DETECT_FAILED = "Failed to detect ES6 exports: {error}" @@ -576,12 +644,17 @@ MCP_ERROR_WRITE = "[MCP] Error writing file: {error}" MCP_LIST_DIR = "[MCP] list_directory: {path}" MCP_ERROR_LIST_DIR = "[MCP] Error listing directory: {error}" +MCP_SEMANTIC_NOT_AVAILABLE = "[MCP] Semantic search not available. Install with: uv sync --extra semantic" +MCP_UPDATING_REPO = "[MCP] Updating repository at: {path}" +MCP_ERROR_UPDATING = "[MCP] Error updating repository: {error}" +MCP_SEMANTIC_SEARCH = "[MCP] semantic_search: {query}" + # (H) MCP server logs -MCP_SERVER_INFERRED_ROOT = "[GraphCode MCP] Using inferred project root: {path}" -MCP_SERVER_NO_ROOT = ( - "[GraphCode MCP] No project root configured, using current directory: {path}" +MCP_SERVER_INFERRED_ROOT = ( + "[GraphCode MCP] Using inferred project root: {path}" ) +MCP_SERVER_NO_ROOT = "[GraphCode MCP] No project root configured, using current directory: {path}" MCP_SERVER_ROOT_RESOLVED = "[GraphCode MCP] Project root resolved to: {path}" MCP_SERVER_USING_ROOT = "[GraphCode MCP] Using project root: {path}" MCP_SERVER_CONFIG_ERROR = "[GraphCode MCP] Configuration error: {error}" @@ -589,9 +662,13 @@ MCP_SERVER_INIT_SUCCESS = "[GraphCode MCP] Services initialized successfully" MCP_SERVER_CALLING_TOOL = "[GraphCode MCP] Calling tool: {name}" MCP_SERVER_UNKNOWN_TOOL = "[GraphCode MCP] Unknown tool: {name}" -MCP_SERVER_TOOL_ERROR = "[GraphCode MCP] Error executing tool '{name}': {error}" +MCP_SERVER_TOOL_ERROR = ( + "[GraphCode MCP] Error executing tool '{name}': {error}" +) MCP_SERVER_STARTING = "[GraphCode MCP] Starting MCP server..." -MCP_SERVER_CREATED = "[GraphCode MCP] Server created, starting stdio transport..." +MCP_SERVER_CREATED = ( + "[GraphCode MCP] Server created, starting stdio transport..." +) MCP_SERVER_CONNECTED = "[GraphCode MCP] Connected to Memgraph at {host}:{port}" MCP_SERVER_FATAL_ERROR = "[GraphCode MCP] Fatal error: {error}" MCP_SERVER_SHUTDOWN = "[GraphCode MCP] Shutting down server..." diff --git a/codebase_rag/mcp/tools.py b/codebase_rag/mcp/tools.py index b102a48f1..6ae89fce3 100644 --- a/codebase_rag/mcp/tools.py +++ b/codebase_rag/mcp/tools.py @@ -12,7 +12,10 @@ from codebase_rag.services.graph_service import MemgraphIngestor from codebase_rag.services.llm import CypherGenerator from codebase_rag.tools import tool_descriptions as td -from codebase_rag.tools.code_retrieval import CodeRetriever, create_code_retrieval_tool +from codebase_rag.tools.code_retrieval import ( + CodeRetriever, + create_code_retrieval_tool, +) from codebase_rag.tools.codebase_query import create_query_tool from codebase_rag.tools.directory_lister import ( DirectoryLister, @@ -29,6 +32,7 @@ MCPToolSchema, QueryResultDict, ) +from codebase_rag.utils.dependencies import has_semantic_dependencies class MCPToolsRegistry: @@ -53,14 +57,35 @@ def __init__( self._query_tool = create_query_tool( ingestor=ingestor, cypher_gen=cypher_gen, console=None ) - self._code_tool = create_code_retrieval_tool(code_retriever=self.code_retriever) - self._file_editor_tool = create_file_editor_tool(file_editor=self.file_editor) - self._file_reader_tool = create_file_reader_tool(file_reader=self.file_reader) - self._file_writer_tool = create_file_writer_tool(file_writer=self.file_writer) + self._code_tool = create_code_retrieval_tool( + code_retriever=self.code_retriever + ) + self._file_editor_tool = create_file_editor_tool( + file_editor=self.file_editor + ) + self._file_reader_tool = create_file_reader_tool( + file_reader=self.file_reader + ) + self._file_writer_tool = create_file_writer_tool( + file_writer=self.file_writer + ) self._directory_lister_tool = create_directory_lister_tool( directory_lister=self.directory_lister ) + self._semantic_search_tool = None + self._semantic_search_available = False + + if has_semantic_dependencies(): + from codebase_rag.tools.semantic_search import ( + create_semantic_search_tool, + ) + + self._semantic_search_tool = create_semantic_search_tool() + self._semantic_search_available = True + else: + logger.info(lg.MCP_SEMANTIC_NOT_AVAILABLE) + self._tools: dict[str, ToolMetadata] = { cs.MCPToolName.INDEX_REPOSITORY: ToolMetadata( name=cs.MCPToolName.INDEX_REPOSITORY, @@ -73,6 +98,17 @@ def __init__( handler=self.index_repository, returns_json=False, ), + cs.MCPToolName.UPDATE_REPOSITORY: ToolMetadata( + name=cs.MCPToolName.UPDATE_REPOSITORY, + description=td.MCP_TOOLS[cs.MCPToolName.UPDATE_REPOSITORY], + input_schema=MCPInputSchema( + type=cs.MCPSchemaType.OBJECT, + properties={}, + required=[], + ), + handler=self.update_repository, + returns_json=False, + ), cs.MCPToolName.QUERY_CODE_GRAPH: ToolMetadata( name=cs.MCPToolName.QUERY_CODE_GRAPH, description=td.MCP_TOOLS[cs.MCPToolName.QUERY_CODE_GRAPH], @@ -198,6 +234,28 @@ def __init__( returns_json=False, ), } + if self._semantic_search_available: + self._tools[cs.MCPToolName.SEMANTIC_SEARCH] = ToolMetadata( + name=cs.MCPToolName.SEMANTIC_SEARCH, + description=td.MCP_TOOLS[cs.MCPToolName.SEMANTIC_SEARCH], + input_schema=MCPInputSchema( + type=cs.MCPSchemaType.OBJECT, + properties={ + cs.MCPParamName.NATURAL_LANGUAGE_QUERY: MCPInputSchemaProperty( + type=cs.MCPSchemaType.STRING, + description=td.MCP_PARAM_NATURAL_LANGUAGE_QUERY, + ), + cs.MCPParamName.TOP_K: MCPInputSchemaProperty( + type=cs.MCPSchemaType.INTEGER, + description=td.MCP_PARAM_TOP_K, + default="5", + ), + }, + required=[cs.MCPParamName.NATURAL_LANGUAGE_QUERY], + ), + handler=self.semantic_search, + returns_json=False, + ) async def index_repository(self) -> str: logger.info(lg.MCP_INDEXING_REPO.format(path=self.project_root)) @@ -220,10 +278,33 @@ async def index_repository(self) -> str: logger.error(lg.MCP_ERROR_INDEXING.format(error=e)) return cs.MCP_INDEX_ERROR.format(error=e) - async def query_code_graph(self, natural_language_query: str) -> QueryResultDict: - logger.info(lg.MCP_QUERY_CODE_GRAPH.format(query=natural_language_query)) + async def update_repository(self) -> str: + logger.info(lg.MCP_UPDATING_REPO.format(path=self.project_root)) + + try: + updater = GraphUpdater( + ingestor=self.ingestor, + repo_path=Path(self.project_root), + parsers=self.parsers, + queries=self.queries, + ) + updater.run() + + return cs.MCP_UPDATE_SUCCESS.format(path=self.project_root) + except Exception as e: + logger.error(lg.MCP_ERROR_UPDATING.format(error=e)) + return cs.MCP_UPDATE_ERROR.format(error=e) + + async def query_code_graph( + self, natural_language_query: str + ) -> QueryResultDict: + logger.info( + lg.MCP_QUERY_CODE_GRAPH.format(query=natural_language_query) + ) try: - graph_data = await self._query_tool.function(natural_language_query) + graph_data = await self._query_tool.function( + natural_language_query + ) result_dict: QueryResultDict = graph_data.model_dump() logger.info( lg.MCP_QUERY_RESULTS.format( @@ -242,10 +323,14 @@ async def query_code_graph(self, natural_language_query: str) -> QueryResultDict ), ) - async def get_code_snippet(self, qualified_name: str) -> CodeSnippetResultDict: + async def get_code_snippet( + self, qualified_name: str + ) -> CodeSnippetResultDict: logger.info(lg.MCP_GET_CODE_SNIPPET.format(name=qualified_name)) try: - snippet = await self._code_tool.function(qualified_name=qualified_name) + snippet = await self._code_tool.function( + qualified_name=qualified_name + ) result: CodeSnippetResultDict | None = snippet.model_dump() if result is None: return CodeSnippetResultDict( @@ -278,9 +363,14 @@ async def surgical_replace_code( return te.ERROR_WRAPPER.format(message=e) async def read_file( - self, file_path: str, offset: int | None = None, limit: int | None = None + self, + file_path: str, + offset: int | None = None, + limit: int | None = None, ) -> str: - logger.info(lg.MCP_READ_FILE.format(path=file_path, offset=offset, limit=limit)) + logger.info( + lg.MCP_READ_FILE.format(path=file_path, offset=offset, limit=limit) + ) try: if offset is not None or limit is not None: full_path = Path(self.project_root) / file_path @@ -290,7 +380,9 @@ async def read_file( skipped_count = sum(1 for _ in itertools.islice(f, start)) if limit is not None: - sliced_lines = [line for _, line in zip(range(limit), f)] + sliced_lines = [ + line for _, line in zip(range(limit), f) + ] else: sliced_lines = list(f) @@ -298,7 +390,9 @@ async def read_file( remaining_lines_count = sum(1 for _ in f) total_lines = ( - skipped_count + len(sliced_lines) + remaining_lines_count + skipped_count + + len(sliced_lines) + + remaining_lines_count ) header = cs.MCP_PAGINATION_HEADER.format( @@ -308,7 +402,9 @@ async def read_file( ) return header + paginated_content else: - result = await self._file_reader_tool.function(file_path=file_path) + result = await self._file_reader_tool.function( + file_path=file_path + ) return str(result) except Exception as e: @@ -333,12 +429,29 @@ async def list_directory( ) -> str: logger.info(lg.MCP_LIST_DIR.format(path=directory_path)) try: - result = self._directory_lister_tool.function(directory_path=directory_path) + result = self._directory_lister_tool.function( + directory_path=directory_path + ) return str(result) except Exception as e: logger.error(lg.MCP_ERROR_LIST_DIR.format(error=e)) return te.ERROR_WRAPPER.format(message=e) + async def semantic_search( + self, natural_language_query: str, top_k: int = 5 + ) -> str: + if self._semantic_search_tool is None: + return cs.MCP_SEMANTIC_NOT_AVAILABLE_RESPONSE + + logger.info( + lg.MCP_SEMANTIC_SEARCH.format(query=natural_language_query) + ) + + result = await self._semantic_search_tool.function( + query=natural_language_query, top_k=top_k + ) + return str(result) + def get_tool_schemas(self) -> list[MCPToolSchema]: return [ MCPToolSchema( @@ -349,9 +462,15 @@ def get_tool_schemas(self) -> list[MCPToolSchema]: for metadata in self._tools.values() ] - def get_tool_handler(self, name: str) -> tuple[MCPHandlerType, bool] | None: + def get_tool_handler( + self, name: str + ) -> tuple[MCPHandlerType, bool] | None: metadata = self._tools.get(name) - return None if metadata is None else (metadata.handler, metadata.returns_json) + return ( + None + if metadata is None + else (metadata.handler, metadata.returns_json) + ) def create_mcp_tools_registry( diff --git a/codebase_rag/tools/tool_descriptions.py b/codebase_rag/tools/tool_descriptions.py index c80eb76a7..e4c8c979d 100644 --- a/codebase_rag/tools/tool_descriptions.py +++ b/codebase_rag/tools/tool_descriptions.py @@ -18,9 +18,7 @@ class AgenticToolName(StrEnum): GET_CODE_SNIPPET = "get_code_snippet" -ANALYZE_DOCUMENT = ( - "Analyzes documents (PDFs, images) to answer questions about their content." -) +ANALYZE_DOCUMENT = "Analyzes documents (PDFs, images) to answer questions about their content." CODEBASE_QUERY = ( "Query the codebase knowledge graph using natural language questions. " @@ -72,12 +70,19 @@ class AgenticToolName(StrEnum): # (H) MCP tool descriptions MCP_INDEX_REPOSITORY = ( + "WARNING: Clears the entire database including embeddings. " "Parse and ingest the repository into the Memgraph knowledge graph. " - "This builds a comprehensive graph of functions, classes, dependencies, and relationships." + "Use update_repository for incremental updates. Only use when explicitly requested." +) + +MCP_UPDATE_REPOSITORY = ( + "Update the repository in the Memgraph knowledge graph without clearing existing data. " + "Use this for incremental updates." ) MCP_QUERY_CODE_GRAPH = ( "Query the codebase knowledge graph using natural language. " + "Use semantic_search unless you know the exact names of classes/functions you are searching for. " "Ask questions like 'What functions call UserService.create_user?' or " "'Show me all classes that implement the Repository interface'." ) @@ -92,15 +97,21 @@ class AgenticToolName(StrEnum): "Only modifies the exact target block, leaving the rest unchanged." ) -MCP_READ_FILE = ( - "Read the contents of a file from the project. Supports pagination for large files." +MCP_SEMANTIC_SEARCH = ( + "Performs a semantic search for functions based on a natural language query " + "describing their purpose, returning a list of potential matches with similarity scores. " + "Requires the 'semantic' extra to be installed." ) +MCP_READ_FILE = "Read the contents of a file from the project. Supports pagination for large files." + MCP_WRITE_FILE = "Write content to a file, creating it if it doesn't exist." MCP_LIST_DIRECTORY = "List contents of a directory in the project." -MCP_PARAM_NATURAL_LANGUAGE_QUERY = "Your question in plain English about the codebase" +MCP_PARAM_NATURAL_LANGUAGE_QUERY = ( + "Your question in plain English about the codebase" +) MCP_PARAM_QUALIFIED_NAME = ( "Fully qualified name (e.g., 'app.services.UserService.create_user')" ) @@ -110,17 +121,22 @@ class AgenticToolName(StrEnum): MCP_PARAM_OFFSET = "Line number to start reading from (0-based, optional)" MCP_PARAM_LIMIT = "Maximum number of lines to read (optional)" MCP_PARAM_CONTENT = "Content to write to the file" -MCP_PARAM_DIRECTORY_PATH = "Relative path to directory from project root (default: '.')" +MCP_PARAM_DIRECTORY_PATH = ( + "Relative path to directory from project root (default: '.')" +) +MCP_PARAM_TOP_K = "Max number of results to return (optional, default: 5)" MCP_TOOLS: dict[MCPToolName, str] = { MCPToolName.INDEX_REPOSITORY: MCP_INDEX_REPOSITORY, + MCPToolName.UPDATE_REPOSITORY: MCP_UPDATE_REPOSITORY, MCPToolName.QUERY_CODE_GRAPH: MCP_QUERY_CODE_GRAPH, MCPToolName.GET_CODE_SNIPPET: MCP_GET_CODE_SNIPPET, MCPToolName.SURGICAL_REPLACE_CODE: MCP_SURGICAL_REPLACE_CODE, MCPToolName.READ_FILE: MCP_READ_FILE, MCPToolName.WRITE_FILE: MCP_WRITE_FILE, MCPToolName.LIST_DIRECTORY: MCP_LIST_DIRECTORY, + MCPToolName.SEMANTIC_SEARCH: MCP_SEMANTIC_SEARCH, } AGENTIC_TOOLS: dict[AgenticToolName, str] = { From dc48e85e5669e660434d900333c9222d60c0e157 Mon Sep 17 00:00:00 2001 From: lux Date: Fri, 2 Jan 2026 12:12:15 +0100 Subject: [PATCH 2/2] changed formatting --- codebase_rag/constants.py | 90 +++------- codebase_rag/logs.py | 220 ++++++++---------------- codebase_rag/mcp/tools.py | 74 ++------ codebase_rag/tools/tool_descriptions.py | 16 +- 4 files changed, 129 insertions(+), 271 deletions(-) diff --git a/codebase_rag/constants.py b/codebase_rag/constants.py index 99432d811..89b3195fe 100644 --- a/codebase_rag/constants.py +++ b/codebase_rag/constants.py @@ -229,10 +229,10 @@ class GoogleProviderType(StrEnum): UI_DIFF_FILE_HEADER = "[bold cyan]File: {path}[/bold cyan]" UI_NEW_FILE_HEADER = "[bold cyan]New file: {path}[/bold cyan]" UI_SHELL_COMMAND_HEADER = "[bold cyan]Shell command:[/bold cyan]" -UI_TOOL_APPROVAL = ( - "[bold yellow]⚠️ Tool '{tool_name}' requires approval:[/bold yellow]" +UI_TOOL_APPROVAL = "[bold yellow]⚠️ Tool '{tool_name}' requires approval:[/bold yellow]" +UI_FEEDBACK_PROMPT = ( + "[bold yellow]Feedback (why rejected, or press Enter to skip)[/bold yellow]" ) -UI_FEEDBACK_PROMPT = "[bold yellow]Feedback (why rejected, or press Enter to skip)[/bold yellow]" UI_OPTIMIZATION_START = ( "[bold green]Starting {language} optimization session...[/bold green]" ) @@ -246,9 +246,7 @@ class GoogleProviderType(StrEnum): "[bold green]Graph exported successfully to: {path}[/bold green]" ) UI_GRAPH_EXPORT_STATS = "[bold cyan]Export contains {nodes} nodes and {relationships} relationships[/bold cyan]" -UI_ERR_UNEXPECTED = ( - "[bold red]An unexpected error occurred: {error}[/bold red]" -) +UI_ERR_UNEXPECTED = "[bold red]An unexpected error occurred: {error}[/bold red]" UI_ERR_EXPORT_FAILED = "[bold red]Failed to export graph: {error}[/bold red]" UI_TOOL_ARGS_FORMAT = " Arguments: {args}" UI_REFERENCE_DOC_INFO = " using the reference document: {reference_document}" @@ -739,9 +737,7 @@ class EventType(StrEnum): CREATED = "created" -CYPHER_DELETE_MODULE = ( - "MATCH (m:Module {path: $path})-[*0..]->(c) DETACH DELETE m, c" -) +CYPHER_DELETE_MODULE = "MATCH (m:Module {path: $path})-[*0..]->(c) DETACH DELETE m, c" CYPHER_DELETE_CALLS = "MATCH ()-[r:CALLS]->() DELETE r" REALTIME_LOGGER_FORMAT = ( @@ -875,9 +871,7 @@ class UniXcoderMode(StrEnum): CYPHER_MATCH_KEYWORD = "MATCH" # (H) Tool success messages -MSG_SURGICAL_SUCCESS = ( - "Successfully applied surgical code replacement in: {path}" -) +MSG_SURGICAL_SUCCESS = "Successfully applied surgical code replacement in: {path}" MSG_SURGICAL_FAILED = ( "Failed to apply surgical replacement in {path}. " "Target code not found or patches failed." @@ -896,9 +890,7 @@ class UniXcoderMode(StrEnum): # (H) Query tool messages QUERY_NOT_AVAILABLE = "N/A" DICT_KEY_RESULTS = "results" -QUERY_SUMMARY_SUCCESS = ( - "Successfully retrieved {count} item(s) from the graph." -) +QUERY_SUMMARY_SUCCESS = "Successfully retrieved {count} item(s) from the graph." QUERY_SUMMARY_TRANSLATION_FAILED = ( "I couldn't translate your request into a database query. Error: {error}" ) @@ -919,12 +911,8 @@ class UniXcoderMode(StrEnum): "Could not retrieve source code for node ID {id}. " "The node may not exist or source file may be unavailable." ) -MSG_SEMANTIC_SOURCE_FORMAT = ( - "Source code for node ID {id}:\n\n```\n{code}\n```" -) -MSG_SEMANTIC_RESULT_HEADER = ( - "Found {count} semantic matches for '{query}':\n\n" -) +MSG_SEMANTIC_SOURCE_FORMAT = "Source code for node ID {id}:\n\n```\n{code}\n```" +MSG_SEMANTIC_RESULT_HEADER = "Found {count} semantic matches for '{query}':\n\n" MSG_SEMANTIC_RESULT_FOOTER = "\n\nUse the qualified names above with other tools to get more details or source code." SEMANTIC_BATCH_SIZE = 100 SEMANTIC_TYPE_UNKNOWN = "Unknown" @@ -933,7 +921,9 @@ class UniXcoderMode(StrEnum): MSG_DOC_NO_CANDIDATES = "No valid text found in response candidates." MSG_DOC_NO_CONTENT = "No text content received from the API." MIME_TYPE_DEFAULT = "application/octet-stream" -DOC_PROMPT_PREFIX = "Based on the document provided, please answer the following question: {question}" +DOC_PROMPT_PREFIX = ( + "Based on the document provided, please answer the following question: {question}" +) # (H) Call processor constants MOD_RS = "mod.rs" @@ -1131,14 +1121,10 @@ class UniXcoderMode(StrEnum): LANG_MSG_CLASSES = "Classes: {nodes}" LANG_MSG_MODULES = "Modules: {nodes}" LANG_MSG_CALLS = "Calls: {nodes}" -LANG_MSG_LANG_ADDED = ( - "\nLanguage '{name}' has been added to the configuration!" -) +LANG_MSG_LANG_ADDED = "\nLanguage '{name}' has been added to the configuration!" LANG_MSG_UPDATED_CONFIG = "Updated {path}" LANG_MSG_REVIEW_PROMPT = "Please review the detected node types:" -LANG_MSG_REVIEW_HINT = ( - " The auto-detection is good but may need manual adjustments." -) +LANG_MSG_REVIEW_HINT = " The auto-detection is good but may need manual adjustments." LANG_MSG_EDIT_HINT = " Edit the configuration in: {path}" LANG_MSG_COMMON_ISSUES = "Look for these common issues:" LANG_MSG_ISSUE_MISCLASSIFIED = ( @@ -1156,9 +1142,7 @@ class UniXcoderMode(StrEnum): ) LANG_MSG_LANG_NOT_FOUND = "Language '{name}' not found." LANG_MSG_AVAILABLE_LANGS = "Available languages: {langs}" -LANG_MSG_REMOVED_FROM_CONFIG = ( - "Removed language '{name}' from configuration file." -) +LANG_MSG_REMOVED_FROM_CONFIG = "Removed language '{name}' from configuration file." LANG_MSG_REMOVING_SUBMODULE = "Removing git submodule '{path}'..." LANG_MSG_CLEANED_MODULES = "Cleaned up git modules directory: {path}" LANG_MSG_SUBMODULE_REMOVED = "Successfully removed submodule '{path}'" @@ -1174,24 +1158,16 @@ class UniXcoderMode(StrEnum): LANG_MSG_CLEANUP_CANCELLED = "Cleanup cancelled." # (H) Language CLI error messages -LANG_ERR_MISSING_ARGS = ( - "Error: Either language_name or --grammar-url must be provided" -) +LANG_ERR_MISSING_ARGS = "Error: Either language_name or --grammar-url must be provided" LANG_ERR_REINSTALL_FAILED = "Failed to reinstall submodule: {error}" -LANG_ERR_MANUAL_REMOVE_HINT = ( - "You may need to remove it manually and try again:" -) +LANG_ERR_MANUAL_REMOVE_HINT = "You may need to remove it manually and try again:" LANG_ERR_REPO_NOT_FOUND = "Error: Repository not found at {url}" -LANG_ERR_CUSTOM_URL_HINT = ( - "Try using a custom URL with: --grammar-url " -) +LANG_ERR_CUSTOM_URL_HINT = "Try using a custom URL with: --grammar-url " LANG_ERR_GIT = "Git error: {error}" LANG_ERR_NODE_TYPES_WARNING = ( "Warning: node-types.json not found in any expected location for {name}" ) -LANG_ERR_TREE_SITTER_JSON_WARNING = ( - "Warning: tree-sitter.json not found in {path}" -) +LANG_ERR_TREE_SITTER_JSON_WARNING = "Warning: tree-sitter.json not found in {path}" LANG_ERR_NO_GRAMMARS_WARNING = "Warning: No grammars found in tree-sitter.json" LANG_ERR_PARSE_NODE_TYPES = "Error parsing node-types.json: {error}" LANG_ERR_UPDATE_CONFIG = "Error updating config file: {error}" @@ -1202,13 +1178,13 @@ class UniXcoderMode(StrEnum): # (H) Language CLI prompts LANG_PROMPT_LANGUAGE_NAME = "Language name (e.g., 'c-sharp', 'python')" LANG_PROMPT_COMMON_NAME = "What is the common name for this language?" -LANG_PROMPT_EXTENSIONS = "What file extensions should be associated with this language? (comma-separated)" +LANG_PROMPT_EXTENSIONS = ( + "What file extensions should be associated with this language? (comma-separated)" +) LANG_PROMPT_FUNCTIONS = "Select nodes representing FUNCTIONS (comma-separated)" LANG_PROMPT_CLASSES = "Select nodes representing CLASSES (comma-separated)" LANG_PROMPT_MODULES = "Select nodes representing MODULES (comma-separated)" -LANG_PROMPT_CALLS = ( - "Select nodes representing FUNCTION CALLS (comma-separated)" -) +LANG_PROMPT_CALLS = "Select nodes representing FUNCTION CALLS (comma-separated)" LANG_PROMPT_CONTINUE = "Do you want to continue?" LANG_PROMPT_REMOVE_ORPHANS = "Do you want to remove these orphaned modules?" @@ -1257,12 +1233,8 @@ class CppNodeType(StrEnum): QUALIFIED_IDENTIFIER = "qualified_identifier" OPERATOR_NAME = "operator_name" DESTRUCTOR_NAME = "destructor_name" - CONSTRUCTOR_OR_DESTRUCTOR_DEFINITION = ( - "constructor_or_destructor_definition" - ) - CONSTRUCTOR_OR_DESTRUCTOR_DECLARATION = ( - "constructor_or_destructor_declaration" - ) + CONSTRUCTOR_OR_DESTRUCTOR_DEFINITION = "constructor_or_destructor_definition" + CONSTRUCTOR_OR_DESTRUCTOR_DECLARATION = "constructor_or_destructor_declaration" INLINE_METHOD_DEFINITION = "inline_method_definition" OPERATOR_CAST_DEFINITION = "operator_cast_definition" @@ -1295,9 +1267,7 @@ class CppNodeType(StrEnum): # (H) C++ keywords for class detection CPP_KEYWORD_CLASS = "class" CPP_KEYWORD_STRUCT = "struct" -CPP_EXPORTED_CLASS_KEYWORDS = frozenset( - {CPP_KEYWORD_CLASS, CPP_KEYWORD_STRUCT} -) +CPP_EXPORTED_CLASS_KEYWORDS = frozenset({CPP_KEYWORD_CLASS, CPP_KEYWORD_STRUCT}) CPP_FALLBACK_OPERATOR = "operator_unknown" CPP_FALLBACK_DESTRUCTOR = "~destructor" @@ -2041,9 +2011,7 @@ class CppNodeType(StrEnum): # (H) JS/TS module system node types TS_OBJECT_PATTERN = "object_pattern" -TS_SHORTHAND_PROPERTY_IDENTIFIER_PATTERN = ( - "shorthand_property_identifier_pattern" -) +TS_SHORTHAND_PROPERTY_IDENTIFIER_PATTERN = "shorthand_property_identifier_pattern" TS_PAIR_PATTERN = "pair_pattern" TS_FUNCTION_DECLARATION = "function_declaration" TS_GENERATOR_FUNCTION_DECLARATION = "generator_function_declaration" @@ -2226,9 +2194,7 @@ class MCPParamName(StrEnum): MCP_WRITE_SUCCESS = "Successfully wrote file: {path}" MCP_UNKNOWN_TOOL_ERROR = "Unknown tool: {name}" MCP_TOOL_EXEC_ERROR = "Error executing tool '{name}': {error}" -MCP_UPDATE_SUCCESS = ( - "Successfully updated repository at {path} (no database wipe)." -) +MCP_UPDATE_SUCCESS = "Successfully updated repository at {path} (no database wipe)." MCP_UPDATE_ERROR = "Error updating repository: {error}" MCP_SEMANTIC_NOT_AVAILABLE_RESPONSE = ( "Semantic search is not available. Install with: uv sync --extra semantic" diff --git a/codebase_rag/logs.py b/codebase_rag/logs.py index 3e4aa30c7..780dd0cf0 100644 --- a/codebase_rag/logs.py +++ b/codebase_rag/logs.py @@ -3,22 +3,20 @@ # (H) Graph loading logs LOADING_GRAPH = "Loading graph from {path}" -LOADED_GRAPH = ( - "Loaded {nodes} nodes and {relationships} relationships with indexes" -) +LOADED_GRAPH = "Loaded {nodes} nodes and {relationships} relationships with indexes" ENSURING_PROJECT = "Ensuring Project: {name}" # (H) Pass logs PASS_1_STRUCTURE = "--- Pass 1: Identifying Packages and Folders ---" -PASS_2_FILES = "\n--- Pass 2: Processing Files, Caching ASTs, and Collecting Definitions ---" +PASS_2_FILES = ( + "\n--- Pass 2: Processing Files, Caching ASTs, and Collecting Definitions ---" +) PASS_3_CALLS = "--- Pass 3: Processing Function Calls from AST Cache ---" PASS_4_EMBEDDINGS = "--- Pass 4: Generating semantic embeddings ---" # (H) Analysis logs FOUND_FUNCTIONS = "\n--- Found {count} functions/methods in codebase ---" -ANALYSIS_COMPLETE = ( - "\n--- Analysis complete. Flushing all data to database... ---" -) +ANALYSIS_COMPLETE = "\n--- Analysis complete. Flushing all data to database... ---" REMOVING_STATE = "Removing in-memory state for: {path}" REMOVED_FROM_CACHE = " - Removed from ast_cache" REMOVING_QNS = " - Removing {count} QNs from function_registry" @@ -42,12 +40,8 @@ SEMANTIC_NOT_AVAILABLE = ( "Semantic search dependencies not available, skipping embedding generation" ) -INGESTOR_NO_QUERY = ( - "Ingestor does not support querying, skipping embedding generation" -) -NO_FUNCTIONS_FOR_EMBEDDING = ( - "No functions or methods found for embedding generation" -) +INGESTOR_NO_QUERY = "Ingestor does not support querying, skipping embedding generation" +NO_FUNCTIONS_FOR_EMBEDDING = "No functions or methods found for embedding generation" GENERATING_EMBEDDINGS = "Generating embeddings for {count} functions/methods" EMBEDDING_PROGRESS = "Generated {done}/{total} embeddings" EMBEDDING_FAILED = "Failed to embed {name}: {error}" @@ -79,18 +73,16 @@ # (H) Parser loader logs BUILDING_BINDINGS = "Building Python bindings for {lang}..." -BUILD_FAILED = ( - "Failed to build {lang} bindings: stdout={stdout}, stderr={stderr}" -) +BUILD_FAILED = "Failed to build {lang} bindings: stdout={stdout}, stderr={stderr}" BUILD_SUCCESS = "Successfully built {lang} bindings" IMPORTING_MODULE = "Attempting to import module: {module}" LOADED_FROM_SUBMODULE = ( "Successfully loaded {lang} from submodule bindings using {attr}" ) -NO_LANG_ATTR = "Module {module} imported but has no language attribute. Available: {available}" -SUBMODULE_LOAD_FAILED = ( - "Failed to load {lang} from submodule bindings: {error}" +NO_LANG_ATTR = ( + "Module {module} imported but has no language attribute. Available: {available}" ) +SUBMODULE_LOAD_FAILED = "Failed to load {lang} from submodule bindings: {error}" LIB_NOT_AVAILABLE = "Tree-sitter library for {lang} not available." LOCALS_QUERY_FAILED = "Failed to create locals query for {lang}: {error}" GRAMMAR_LOADED = "Successfully loaded {lang} grammar." @@ -99,14 +91,10 @@ # (H) File watcher logs WATCHER_ACTIVE = "File watcher is now active." -WATCHER_SKIP_NO_QUERY = ( - "Ingestor does not support querying, skipping real-time update." -) +WATCHER_SKIP_NO_QUERY = "Ingestor does not support querying, skipping real-time update." CHANGE_DETECTED = "Change detected: {event_type} on {path}. Updating graph." DELETION_QUERY = "Ran deletion query for path: {path}" -RECALC_CALLS = ( - "Recalculating all function call relationships for consistency..." -) +RECALC_CALLS = "Recalculating all function call relationships for consistency..." GRAPH_UPDATED = "Graph updated successfully for change in: {name}" INITIAL_SCAN = "Performing initial full codebase scan..." INITIAL_SCAN_DONE = "Initial scan complete. Starting real-time watcher." @@ -154,9 +142,7 @@ # (H) Source extraction logs SOURCE_FILE_NOT_FOUND = "Source file not found: {path}" SOURCE_INVALID_RANGE = "Invalid line range: {start}-{end}" -SOURCE_RANGE_EXCEEDS = ( - "Line range {start}-{end} exceeds file length {length} in {path}" -) +SOURCE_RANGE_EXCEEDS = "Line range {start}-{end} exceeds file length {length} in {path}" SOURCE_EXTRACT_FAILED = "Failed to extract source from {path}: {error}" SOURCE_AST_FAILED = "AST extraction failed for {name}: {error}" @@ -177,23 +163,17 @@ MG_NODE_BUFFER_FLUSH = ( "Node buffer reached batch size ({size}). Performing incremental flush." ) -MG_REL_BUFFER_FLUSH = "Relationship buffer reached batch size ({size}). Performing incremental flush." -MG_NO_CONSTRAINT = ( - "No unique constraint defined for label '{label}'. Skipping flush." -) -MG_MISSING_PROP = ( - "Skipping {label} node missing required '{key}' property: {props}" +MG_REL_BUFFER_FLUSH = ( + "Relationship buffer reached batch size ({size}). Performing incremental flush." ) +MG_NO_CONSTRAINT = "No unique constraint defined for label '{label}'. Skipping flush." +MG_MISSING_PROP = "Skipping {label} node missing required '{key}' property: {props}" MG_NODES_FLUSHED = "Flushed {flushed} of {total} buffered nodes." MG_NODES_SKIPPED = ( "Skipped {count} buffered nodes due to missing identifiers or constraints." ) -MG_CALLS_FAILED = ( - "Failed to create {count} CALLS relationships - nodes may not exist" -) -MG_CALLS_SAMPLE = ( - " Sample {index}: {from_label}.{from_val} -> {to_label}.{to_val}" -) +MG_CALLS_FAILED = "Failed to create {count} CALLS relationships - nodes may not exist" +MG_CALLS_SAMPLE = " Sample {index}: {from_label}.{from_val} -> {to_label}.{to_val}" MG_RELS_FLUSHED = ( "Flushed {total} relationships ({success} successful, {failed} failed)." ) @@ -214,13 +194,9 @@ TOOL_FILE_READ_SUCCESS = "[FileReader] Successfully read text from {path}" TOOL_FILE_BINARY = "[FileReader] {message}" TOOL_FILE_WRITE = "[FileWriter] Creating file: {path}" -TOOL_FILE_WRITE_SUCCESS = ( - "[FileWriter] Successfully wrote {chars} characters to {path}" -) +TOOL_FILE_WRITE_SUCCESS = "[FileWriter] Successfully wrote {chars} characters to {path}" TOOL_FILE_EDIT = "[FileEditor] Attempting full file replacement: {path}" -TOOL_FILE_EDIT_SUCCESS = ( - "[FileEditor] Successfully replaced entire file: {path}" -) +TOOL_FILE_EDIT_SUCCESS = "[FileEditor] Successfully replaced entire file: {path}" TOOL_FILE_EDIT_SURGICAL = ( "[FileEditor] Attempting surgical block replacement in: {path}" ) @@ -238,9 +214,7 @@ "Process already terminated when timeout kill was attempted." ) TOOL_SHELL_ERROR = "An error occurred while executing command: {error}" -TOOL_DOC_ANALYZE = ( - "[DocumentAnalyzer] Analyzing '{path}' with question: '{question}'" -) +TOOL_DOC_ANALYZE = "[DocumentAnalyzer] Analyzing '{path}' with question: '{question}'" # (H) Shell timing log SHELL_TIMING = "'{func}' executed in {time:.2f}ms" @@ -260,9 +234,7 @@ "or specify line number for precise targeting." ) EDITOR_FUNC_NOT_IN_FILE = "Function '{name}' not found in {path}." -EDITOR_PATCHES_NOT_CLEAN = ( - "Patches for function '{name}' did not apply cleanly." -) +EDITOR_PATCHES_NOT_CLEAN = "Patches for function '{name}' did not apply cleanly." EDITOR_NO_CHANGES = "No changes detected after replacement." EDITOR_REPLACE_SUCCESS = "Successfully replaced function '{name}' in {path}." EDITOR_PATCH_FAILED = "Some patches failed to apply cleanly to {path}" @@ -271,7 +243,9 @@ EDITOR_FILE_NOT_FOUND = "File not found: {path}" EDITOR_BLOCK_NOT_FOUND = "Target block not found in {path}" EDITOR_LOOKING_FOR = "Looking for: {block}" -EDITOR_MULTIPLE_OCCURRENCES = "Multiple occurrences of target block found. Only replacing first occurrence." +EDITOR_MULTIPLE_OCCURRENCES = ( + "Multiple occurrences of target block found. Only replacing first occurrence." +) EDITOR_NO_CHANGES_IDENTICAL = ( "No changes detected - target and replacement are identical" ) @@ -287,14 +261,10 @@ SEMANTIC_FOUND = "Found {count} semantic matches for: {query}" SEMANTIC_FAILED = "Semantic search failed for query '{query}': {error}" SEMANTIC_NODE_NOT_FOUND = "No node found with ID: {id}" -SEMANTIC_INVALID_LOCATION = ( - "Missing or invalid source location info for node {id}" -) +SEMANTIC_INVALID_LOCATION = "Missing or invalid source location info for node {id}" SEMANTIC_SOURCE_FAILED = "Failed to get source code for node {id}: {error}" SEMANTIC_TOOL_SEARCH = "[Tool:SemanticSearch] Searching for: '{query}'" -SEMANTIC_TOOL_SOURCE = ( - "[Tool:GetFunctionSource] Retrieving source for node ID: {id}" -) +SEMANTIC_TOOL_SOURCE = "[Tool:GetFunctionSource] Retrieving source for node ID: {id}" # (H) Document analyzer logs DOC_COPIED = "Copied external file to: {path}" @@ -327,9 +297,7 @@ # (H) File writer logs FILE_WRITER_INIT = "FileWriter initialized with root: {root}" FILE_WRITER_CREATE = "[FileWriter] Creating file: {path}" -FILE_WRITER_SUCCESS = ( - "[FileWriter] Successfully wrote {chars} characters to {path}" -) +FILE_WRITER_SUCCESS = "[FileWriter] Successfully wrote {chars} characters to {path}" # (H) Error logs (used with logger.error/warning) UNEXPECTED = "An unexpected error occurred: {error}" @@ -340,15 +308,15 @@ ) IMAGE_NOT_FOUND = "Image path found, but does not exist: {path}" IMAGE_COPY_FAILED = "Failed to copy image to temporary directory: {error}" -FILE_OUTSIDE_ROOT = ( - "Security risk: Attempted to {action} file outside of project root." -) +FILE_OUTSIDE_ROOT = "Security risk: Attempted to {action} file outside of project root." # (H) Call processor logs CALL_PROCESSING_FILE = "Processing calls in cached AST for: {path}" CALL_PROCESSING_FAILED = "Failed to process calls in {path}: {error}" CALL_FOUND_NODES = "Found {count} call nodes in {language} for {caller}" -CALL_FOUND = "Found call from {caller} to {call_name} (resolved as {callee_type}:{callee_qn})" +CALL_FOUND = ( + "Found call from {caller} to {call_name} (resolved as {callee_type}:{callee_qn})" +) CALL_NESTED_FOUND = "Found nested call from {caller} to {call_name} (resolved as {callee_type}:{callee_qn})" CALL_DIRECT_IMPORT = "Direct import resolved: {call_name} -> {qn}" CALL_TYPE_INFERRED = "Type-inferred object method resolved: {call_name} -> {method_qn} (via {obj}:{var_type})" @@ -366,20 +334,18 @@ "Instance-resolved inherited self-attribute call: {call_name} -> {method_qn} " "(via {attr_ref}:{var_type})" ) -CALL_IMPORT_QUALIFIED = ( - "Import-resolved qualified call: {call_name} -> {method_qn}" -) +CALL_IMPORT_QUALIFIED = "Import-resolved qualified call: {call_name} -> {method_qn}" CALL_INSTANCE_QUALIFIED = "Instance-resolved qualified call: {call_name} -> {method_qn} (via {class_name}:{var_type})" CALL_INSTANCE_INHERITED = "Instance-resolved inherited call: {call_name} -> {method_qn} (via {class_name}:{var_type})" CALL_WILDCARD = "Wildcard-resolved call: {call_name} -> {qn}" CALL_SAME_MODULE = "Same-module resolution: {call_name} -> {qn}" CALL_TRIE_FALLBACK = "Trie-based fallback resolution: {call_name} -> {qn}" CALL_UNRESOLVED = "Could not resolve call: {call_name}" -CALL_CHAINED = "Resolved chained call: {call_name} -> {method_qn} (via {obj_expr}:{obj_type})" -CALL_CHAINED_INHERITED = "Resolved chained inherited call: {call_name} -> {method_qn} (via {obj_expr}:{obj_type})" -CALL_SUPER_NO_CONTEXT = ( - "No class context provided for super() call: {call_name}" +CALL_CHAINED = ( + "Resolved chained call: {call_name} -> {method_qn} (via {obj_expr}:{obj_type})" ) +CALL_CHAINED_INHERITED = "Resolved chained inherited call: {call_name} -> {method_qn} (via {obj_expr}:{obj_type})" +CALL_SUPER_NO_CONTEXT = "No class context provided for super() call: {call_name}" CALL_SUPER_NO_INHERITANCE = "No inheritance info for class {class_qn}" CALL_SUPER_NO_PARENTS = "No parent classes found for {class_qn}" CALL_SUPER_RESOLVED = "Resolved super() call: {call_name} -> {method_qn}" @@ -402,9 +368,7 @@ DEP_PARSE_ERROR_CSPROJ = "Error parsing .csproj {path}: {error}" # (H) Import processor logs -IMP_TOOL_NOT_AVAILABLE = ( - "External tool '{tool}' not available for stdlib introspection" -) +IMP_TOOL_NOT_AVAILABLE = "External tool '{tool}' not available for stdlib introspection" IMP_CACHE_LOADED = "Loaded stdlib cache from {path}" IMP_CACHE_LOAD_ERROR = "Could not load stdlib cache: {error}" IMP_CACHE_SAVED = "Saved stdlib cache to {path}" @@ -412,7 +376,9 @@ IMP_CACHE_CLEARED = "Cleared stdlib cache from disk" IMP_CACHE_CLEAR_ERROR = "Could not clear stdlib cache from disk: {error}" IMP_PARSED_COUNT = "Parsed {count} imports in {module}" -IMP_CREATED_RELATIONSHIP = " Created IMPORTS relationship: {from_module} -> {to_module} (from {full_name})" +IMP_CREATED_RELATIONSHIP = ( + " Created IMPORTS relationship: {from_module} -> {to_module} (from {full_name})" +) IMP_PARSE_FAILED = "Failed to parse imports in {module}: {error}" IMP_IMPORT = " Import: {local} -> {full}" IMP_ALIASED_IMPORT = " Aliased import: {alias} -> {full}" @@ -451,16 +417,12 @@ CLASS_FOUND_TEMPLATE = " Found Template {node_type}: {name} (qn: {qn})" CLASS_FOUND_EXPORTED_STRUCT = " Found Exported Struct: {name} (qn: {qn})" CLASS_FOUND_EXPORTED_UNION = " Found Exported Union: {name} (qn: {qn})" -CLASS_FOUND_EXPORTED_TEMPLATE = ( - " Found Exported Template Class: {name} (qn: {qn})" -) +CLASS_FOUND_EXPORTED_TEMPLATE = " Found Exported Template Class: {name} (qn: {qn})" CLASS_FOUND_EXPORTED_CLASS = " Found Exported Class: {name} (qn: {qn})" CLASS_FOUND_CLASS = " Found Class: {name} (qn: {qn})" CLASS_FOUND_INLINE_MODULE = " Found Inline Module: {name} (qn: {qn})" CLASS_PASS_4 = "--- Pass 4: Processing Method Override Relationships ---" -CLASS_METHOD_OVERRIDE = ( - "Method override: {method_qn} OVERRIDES {parent_method_qn}" -) +CLASS_METHOD_OVERRIDE = "Method override: {method_qn} OVERRIDES {parent_method_qn}" CLASS_CPP_INHERITANCE = "Found C++ inheritance: {parent_name} -> {parent_qn}" # (H) Java type inference logs @@ -473,9 +435,7 @@ JAVA_CLASS_FIELD = "Class field: {name} -> {type}" JAVA_ASSIGNMENT = "Assignment: {name} -> {type}" JAVA_NO_METHOD_NAME = "No method name found in call node" -JAVA_RESOLVING_CALL = ( - "Resolving Java method call: method={method}, object={object}" -) +JAVA_RESOLVING_CALL = "Resolving Java method call: method={method}, object={object}" JAVA_RESOLVING_STATIC = "Resolving static/local method: {method}" JAVA_FOUND_STATIC = "Found static/local method: {result}" JAVA_STATIC_NOT_FOUND = "Static/local method not found: {method}" @@ -485,14 +445,10 @@ JAVA_FOUND_INSTANCE = "Found instance method: {result}" JAVA_INSTANCE_NOT_FOUND = "Instance method not found: {type}.{method}" JAVA_ENHANCED_FOR_VAR = "Enhanced for loop variable: {name} -> {type}" -JAVA_ENHANCED_FOR_VAR_ALT = ( - "Enhanced for loop variable (alt): {name} -> {type}" -) +JAVA_ENHANCED_FOR_VAR_ALT = "Enhanced for loop variable (alt): {name} -> {type}" # (H) JS type inference logs -JS_VAR_DECLARATOR_FOUND = ( - "Found variable declarator: {var_name} in {module_qn}" -) +JS_VAR_DECLARATOR_FOUND = "Found variable declarator: {var_name} in {module_qn}" JS_VAR_INFERRED = "Inferred JS variable: {var_name} -> {var_type}" JS_VAR_INFER_FAILED = "Could not infer type for variable: {var_name}" JS_VAR_TYPE_MAP_BUILT = "Built JS variable type map with {count} variables (found {declarator_count} declarators total)" @@ -541,13 +497,9 @@ PY_RECURSION_GUARD = "Recursion guard (method call): skipping {method}" PY_RECURSION_GUARD_QN = "Recursion guard: skipping {method_qn}" PY_RESOLVED_METHOD = "Resolved {class_name}.{method_name} to {method_qn}" -PY_INFER_ATTR_FAILED = ( - "Failed to analyze instance variables for {attr}: {error}" -) +PY_INFER_ATTR_FAILED = "Failed to analyze instance variables for {attr}: {error}" PY_INFER_RETURN_FAILED = "Failed to infer return type for {method}: {error}" -PY_VAR_FROM_CONTEXT = ( - "Found variable type from method context: {var} -> {type}" -) +PY_VAR_FROM_CONTEXT = "Found variable type from method context: {var} -> {type}" PY_VAR_CANNOT_INFER = "Cannot infer type for variable reference: {var}" PY_NO_CONTAINING_CLASS = "No containing class found for method" PY_NO_INIT_METHOD = "No __init__ method found in class" @@ -555,9 +507,7 @@ PY_FOUND_CLASS_AT_LEVEL = "Found class_definition at level {level}" PY_SEARCHING_LEVEL = "Level {level}: node type = {node_type}" PY_NO_CLASS_IN_HIERARCHY = "No class_definition found in parent hierarchy" -PY_SEARCHING_INIT = ( - "Searching for __init__ method in class with {count} children" -) +PY_SEARCHING_INIT = "Searching for __init__ method in class with {count} children" PY_CHILD_TYPE = " Child type: {type}" PY_NO_CLASS_BODY = " No class body (block) found" PY_SEARCHING_BODY = " Searching in class body with {count} children" @@ -567,35 +517,23 @@ PY_INIT_NOT_FOUND = " No __init__ method found in class body" # (H) JS/TS ingest logs -JS_PROTOTYPE_INHERITANCE = ( - "Prototype inheritance: {child_qn} INHERITS {parent_qn}" -) -JS_PROTOTYPE_INHERITANCE_FAILED = ( - "Failed to detect prototype inheritance: {error}" -) -JS_PROTOTYPE_METHOD_FOUND = ( - " Found Prototype Method: {method_name} (qn: {method_qn})" -) -JS_PROTOTYPE_METHOD_DEFINES = ( - "Prototype method: {constructor_qn} DEFINES {method_qn}" -) +JS_PROTOTYPE_INHERITANCE = "Prototype inheritance: {child_qn} INHERITS {parent_qn}" +JS_PROTOTYPE_INHERITANCE_FAILED = "Failed to detect prototype inheritance: {error}" +JS_PROTOTYPE_METHOD_FOUND = " Found Prototype Method: {method_name} (qn: {method_qn})" +JS_PROTOTYPE_METHOD_DEFINES = "Prototype method: {constructor_qn} DEFINES {method_qn}" JS_PROTOTYPE_METHODS_FAILED = "Failed to detect prototype methods: {error}" -JS_OBJECT_METHOD_FOUND = ( - " Found Object Method: {method_name} (qn: {method_qn})" -) -JS_OBJECT_METHODS_PROCESS_FAILED = ( - "Failed to process object literal methods: {error}" -) -JS_OBJECT_METHODS_DETECT_FAILED = ( - "Failed to detect object literal methods: {error}" -) +JS_OBJECT_METHOD_FOUND = " Found Object Method: {method_name} (qn: {method_qn})" +JS_OBJECT_METHODS_PROCESS_FAILED = "Failed to process object literal methods: {error}" +JS_OBJECT_METHODS_DETECT_FAILED = "Failed to detect object literal methods: {error}" JS_OBJECT_ARROW_FOUND = ( " Found Object Arrow Function: {function_name} (qn: {function_qn})" ) JS_ASSIGNMENT_ARROW_FOUND = ( " Found Assignment Arrow Function: {function_name} (qn: {function_qn})" ) -JS_ASSIGNMENT_FUNC_EXPR_FOUND = " Found Assignment Function Expression: {function_name} (qn: {function_qn})" +JS_ASSIGNMENT_FUNC_EXPR_FOUND = ( + " Found Assignment Function Expression: {function_name} (qn: {function_qn})" +) JS_ASSIGNMENT_ARROW_QUERY_FAILED = ( "Failed to process assignment arrow functions query: {error}" ) @@ -607,22 +545,16 @@ JS_COMMONJS_DESTRUCTURE_FAILED = ( "Failed to process CommonJS destructuring pattern: {error}" ) -JS_MISSING_IMPORT_PATTERNS_FAILED = ( - "Failed to detect missing import patterns: {error}" -) +JS_MISSING_IMPORT_PATTERNS_FAILED = "Failed to detect missing import patterns: {error}" JS_COMMONJS_VAR_DECLARATOR_FAILED = ( "Failed to process variable declarator for CommonJS: {error}" ) -JS_COMMONJS_IMPORT_FAILED = ( - "Failed to process CommonJS import {imported_name}: {error}" -) -JS_MISSING_IMPORT_PATTERN = "Missing pattern: {module_qn} IMPORTS {imported_name} from {resolved_source_module}" -JS_COMMONJS_EXPORTS_QUERY_FAILED = ( - "Failed to process CommonJS exports query: {error}" -) -JS_COMMONJS_EXPORTS_DETECT_FAILED = ( - "Failed to detect CommonJS exports: {error}" +JS_COMMONJS_IMPORT_FAILED = "Failed to process CommonJS import {imported_name}: {error}" +JS_MISSING_IMPORT_PATTERN = ( + "Missing pattern: {module_qn} IMPORTS {imported_name} from {resolved_source_module}" ) +JS_COMMONJS_EXPORTS_QUERY_FAILED = "Failed to process CommonJS exports query: {error}" +JS_COMMONJS_EXPORTS_DETECT_FAILED = "Failed to detect CommonJS exports: {error}" JS_ES6_EXPORTS_QUERY_FAILED = "Failed to process ES6 exports query: {error}" JS_ES6_EXPORTS_DETECT_FAILED = "Failed to detect ES6 exports: {error}" @@ -644,17 +576,19 @@ MCP_ERROR_WRITE = "[MCP] Error writing file: {error}" MCP_LIST_DIR = "[MCP] list_directory: {path}" MCP_ERROR_LIST_DIR = "[MCP] Error listing directory: {error}" -MCP_SEMANTIC_NOT_AVAILABLE = "[MCP] Semantic search not available. Install with: uv sync --extra semantic" +MCP_SEMANTIC_NOT_AVAILABLE = ( + "[MCP] Semantic search not available. Install with: uv sync --extra semantic" +) MCP_UPDATING_REPO = "[MCP] Updating repository at: {path}" MCP_ERROR_UPDATING = "[MCP] Error updating repository: {error}" MCP_SEMANTIC_SEARCH = "[MCP] semantic_search: {query}" # (H) MCP server logs -MCP_SERVER_INFERRED_ROOT = ( - "[GraphCode MCP] Using inferred project root: {path}" +MCP_SERVER_INFERRED_ROOT = "[GraphCode MCP] Using inferred project root: {path}" +MCP_SERVER_NO_ROOT = ( + "[GraphCode MCP] No project root configured, using current directory: {path}" ) -MCP_SERVER_NO_ROOT = "[GraphCode MCP] No project root configured, using current directory: {path}" MCP_SERVER_ROOT_RESOLVED = "[GraphCode MCP] Project root resolved to: {path}" MCP_SERVER_USING_ROOT = "[GraphCode MCP] Using project root: {path}" MCP_SERVER_CONFIG_ERROR = "[GraphCode MCP] Configuration error: {error}" @@ -662,13 +596,9 @@ MCP_SERVER_INIT_SUCCESS = "[GraphCode MCP] Services initialized successfully" MCP_SERVER_CALLING_TOOL = "[GraphCode MCP] Calling tool: {name}" MCP_SERVER_UNKNOWN_TOOL = "[GraphCode MCP] Unknown tool: {name}" -MCP_SERVER_TOOL_ERROR = ( - "[GraphCode MCP] Error executing tool '{name}': {error}" -) +MCP_SERVER_TOOL_ERROR = "[GraphCode MCP] Error executing tool '{name}': {error}" MCP_SERVER_STARTING = "[GraphCode MCP] Starting MCP server..." -MCP_SERVER_CREATED = ( - "[GraphCode MCP] Server created, starting stdio transport..." -) +MCP_SERVER_CREATED = "[GraphCode MCP] Server created, starting stdio transport..." MCP_SERVER_CONNECTED = "[GraphCode MCP] Connected to Memgraph at {host}:{port}" MCP_SERVER_FATAL_ERROR = "[GraphCode MCP] Fatal error: {error}" MCP_SERVER_SHUTDOWN = "[GraphCode MCP] Shutting down server..." diff --git a/codebase_rag/mcp/tools.py b/codebase_rag/mcp/tools.py index 6ae89fce3..6cedb5f96 100644 --- a/codebase_rag/mcp/tools.py +++ b/codebase_rag/mcp/tools.py @@ -57,18 +57,10 @@ def __init__( self._query_tool = create_query_tool( ingestor=ingestor, cypher_gen=cypher_gen, console=None ) - self._code_tool = create_code_retrieval_tool( - code_retriever=self.code_retriever - ) - self._file_editor_tool = create_file_editor_tool( - file_editor=self.file_editor - ) - self._file_reader_tool = create_file_reader_tool( - file_reader=self.file_reader - ) - self._file_writer_tool = create_file_writer_tool( - file_writer=self.file_writer - ) + self._code_tool = create_code_retrieval_tool(code_retriever=self.code_retriever) + self._file_editor_tool = create_file_editor_tool(file_editor=self.file_editor) + self._file_reader_tool = create_file_reader_tool(file_reader=self.file_reader) + self._file_writer_tool = create_file_writer_tool(file_writer=self.file_writer) self._directory_lister_tool = create_directory_lister_tool( directory_lister=self.directory_lister ) @@ -295,16 +287,10 @@ async def update_repository(self) -> str: logger.error(lg.MCP_ERROR_UPDATING.format(error=e)) return cs.MCP_UPDATE_ERROR.format(error=e) - async def query_code_graph( - self, natural_language_query: str - ) -> QueryResultDict: - logger.info( - lg.MCP_QUERY_CODE_GRAPH.format(query=natural_language_query) - ) + async def query_code_graph(self, natural_language_query: str) -> QueryResultDict: + logger.info(lg.MCP_QUERY_CODE_GRAPH.format(query=natural_language_query)) try: - graph_data = await self._query_tool.function( - natural_language_query - ) + graph_data = await self._query_tool.function(natural_language_query) result_dict: QueryResultDict = graph_data.model_dump() logger.info( lg.MCP_QUERY_RESULTS.format( @@ -323,14 +309,10 @@ async def query_code_graph( ), ) - async def get_code_snippet( - self, qualified_name: str - ) -> CodeSnippetResultDict: + async def get_code_snippet(self, qualified_name: str) -> CodeSnippetResultDict: logger.info(lg.MCP_GET_CODE_SNIPPET.format(name=qualified_name)) try: - snippet = await self._code_tool.function( - qualified_name=qualified_name - ) + snippet = await self._code_tool.function(qualified_name=qualified_name) result: CodeSnippetResultDict | None = snippet.model_dump() if result is None: return CodeSnippetResultDict( @@ -368,9 +350,7 @@ async def read_file( offset: int | None = None, limit: int | None = None, ) -> str: - logger.info( - lg.MCP_READ_FILE.format(path=file_path, offset=offset, limit=limit) - ) + logger.info(lg.MCP_READ_FILE.format(path=file_path, offset=offset, limit=limit)) try: if offset is not None or limit is not None: full_path = Path(self.project_root) / file_path @@ -380,9 +360,7 @@ async def read_file( skipped_count = sum(1 for _ in itertools.islice(f, start)) if limit is not None: - sliced_lines = [ - line for _, line in zip(range(limit), f) - ] + sliced_lines = [line for _, line in zip(range(limit), f)] else: sliced_lines = list(f) @@ -390,9 +368,7 @@ async def read_file( remaining_lines_count = sum(1 for _ in f) total_lines = ( - skipped_count - + len(sliced_lines) - + remaining_lines_count + skipped_count + len(sliced_lines) + remaining_lines_count ) header = cs.MCP_PAGINATION_HEADER.format( @@ -402,9 +378,7 @@ async def read_file( ) return header + paginated_content else: - result = await self._file_reader_tool.function( - file_path=file_path - ) + result = await self._file_reader_tool.function(file_path=file_path) return str(result) except Exception as e: @@ -429,23 +403,17 @@ async def list_directory( ) -> str: logger.info(lg.MCP_LIST_DIR.format(path=directory_path)) try: - result = self._directory_lister_tool.function( - directory_path=directory_path - ) + result = self._directory_lister_tool.function(directory_path=directory_path) return str(result) except Exception as e: logger.error(lg.MCP_ERROR_LIST_DIR.format(error=e)) return te.ERROR_WRAPPER.format(message=e) - async def semantic_search( - self, natural_language_query: str, top_k: int = 5 - ) -> str: + async def semantic_search(self, natural_language_query: str, top_k: int = 5) -> str: if self._semantic_search_tool is None: return cs.MCP_SEMANTIC_NOT_AVAILABLE_RESPONSE - logger.info( - lg.MCP_SEMANTIC_SEARCH.format(query=natural_language_query) - ) + logger.info(lg.MCP_SEMANTIC_SEARCH.format(query=natural_language_query)) result = await self._semantic_search_tool.function( query=natural_language_query, top_k=top_k @@ -462,15 +430,9 @@ def get_tool_schemas(self) -> list[MCPToolSchema]: for metadata in self._tools.values() ] - def get_tool_handler( - self, name: str - ) -> tuple[MCPHandlerType, bool] | None: + def get_tool_handler(self, name: str) -> tuple[MCPHandlerType, bool] | None: metadata = self._tools.get(name) - return ( - None - if metadata is None - else (metadata.handler, metadata.returns_json) - ) + return None if metadata is None else (metadata.handler, metadata.returns_json) def create_mcp_tools_registry( diff --git a/codebase_rag/tools/tool_descriptions.py b/codebase_rag/tools/tool_descriptions.py index e4c8c979d..1919cacb5 100644 --- a/codebase_rag/tools/tool_descriptions.py +++ b/codebase_rag/tools/tool_descriptions.py @@ -18,7 +18,9 @@ class AgenticToolName(StrEnum): GET_CODE_SNIPPET = "get_code_snippet" -ANALYZE_DOCUMENT = "Analyzes documents (PDFs, images) to answer questions about their content." +ANALYZE_DOCUMENT = ( + "Analyzes documents (PDFs, images) to answer questions about their content." +) CODEBASE_QUERY = ( "Query the codebase knowledge graph using natural language questions. " @@ -103,15 +105,15 @@ class AgenticToolName(StrEnum): "Requires the 'semantic' extra to be installed." ) -MCP_READ_FILE = "Read the contents of a file from the project. Supports pagination for large files." +MCP_READ_FILE = ( + "Read the contents of a file from the project. Supports pagination for large files." +) MCP_WRITE_FILE = "Write content to a file, creating it if it doesn't exist." MCP_LIST_DIRECTORY = "List contents of a directory in the project." -MCP_PARAM_NATURAL_LANGUAGE_QUERY = ( - "Your question in plain English about the codebase" -) +MCP_PARAM_NATURAL_LANGUAGE_QUERY = "Your question in plain English about the codebase" MCP_PARAM_QUALIFIED_NAME = ( "Fully qualified name (e.g., 'app.services.UserService.create_user')" ) @@ -121,9 +123,7 @@ class AgenticToolName(StrEnum): MCP_PARAM_OFFSET = "Line number to start reading from (0-based, optional)" MCP_PARAM_LIMIT = "Maximum number of lines to read (optional)" MCP_PARAM_CONTENT = "Content to write to the file" -MCP_PARAM_DIRECTORY_PATH = ( - "Relative path to directory from project root (default: '.')" -) +MCP_PARAM_DIRECTORY_PATH = "Relative path to directory from project root (default: '.')" MCP_PARAM_TOP_K = "Max number of results to return (optional, default: 5)"