-
Notifications
You must be signed in to change notification settings - Fork 1.5k
feat: Implement the documented "add_repo_metadata" functionality #2304
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 9 commits
1ef62f9
3b25b61
d1b8507
bebb2f3
10994bc
772dbaa
38af09e
7b9f60e
9a85654
5bdc1a9
7454325
87672ea
524ef53
29b6a1d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -174,6 +174,27 @@ def get_repo_settings(self): | |
| get_logger().error(f"Failed to get repo settings, error: {e}") | ||
| return "" | ||
|
|
||
| def get_repo_file(self, file_path: str) -> str: | ||
| try: | ||
| head_sha = self.pr.last_merge_commit | ||
| version = GitVersionDescriptor( | ||
| version=head_sha.commit_id, version_type="commit" | ||
| ) if head_sha else None | ||
| contents = self.azure_devops_client.get_item_content( | ||
| repository_id=self.repo_slug, | ||
| project=self.workspace_slug, | ||
| download=False, | ||
| include_content_metadata=False, | ||
| include_content=True, | ||
| path=file_path, | ||
| version_descriptor=version, | ||
| ) | ||
| content = list(contents)[0] | ||
| return content.decode("utf-8") if isinstance(content, bytes) else content | ||
|
Comment on lines
+177
to
+195
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 1. Azure reads merged commit AzureDevOpsProvider.get_repo_file() reads files at self.pr.last_merge_commit (merge result) instead of the PR source/head commit, so metadata may reflect the merged preview rather than the branch under review. This is inconsistent with other providers in this PR that explicitly read from the PR/MR source branch or head SHA. Agent Prompt
|
||
| except Exception as e: | ||
| get_logger().debug(f"Failed to get repo file '{file_path}': {e}") | ||
| return "" | ||
|
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.
|
||
|
|
||
| def get_files(self): | ||
| files = [] | ||
| for i in self.azure_devops_client.get_pull_request_commits( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -89,6 +89,23 @@ def get_repo_settings(self): | |
| except Exception: | ||
| return "" | ||
|
|
||
| def get_repo_file(self, file_path: str) -> str: | ||
| try: | ||
| # Read from the PR's source branch so metadata files reflect the branch under review | ||
| url = (f"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/src/" | ||
| f"{self.pr.source_branch}/{file_path}") | ||
| response = requests.request("GET", url, headers=self.headers) | ||
| if response.status_code == 404: | ||
| return "" | ||
| response.raise_for_status() | ||
| return response.text | ||
|
Comment on lines
+92
to
+101
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 4. Bitbucket file fetch can hang BitbucketProvider.get_repo_file performs an HTTP GET without a timeout, so enabling add_repo_metadata can block apply_repo_settings indefinitely on stalled connections. Because apply_repo_settings is called before request handling logic, this can stall the whole PR-agent flow. Agent Prompt
|
||
| except requests.exceptions.HTTPError as e: | ||
| get_logger().warning(f"Failed to get repo file '{file_path}': {e}") | ||
| return "" | ||
| except requests.exceptions.ConnectionError as e: | ||
| get_logger().warning(f"Connection error getting repo file '{file_path}': {e}") | ||
| return "" | ||
|
|
||
| def get_git_repo_url(self, pr_url: str=None) -> str: #bitbucket does not support issue url, so ignore param | ||
| try: | ||
| parsed_url = urlparse(self.pr_url) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -116,6 +116,19 @@ def get_repo_settings(self): | |
| get_logger().error(f"Failed to load .pr_agent.toml file, error: {e}") | ||
| return "" | ||
|
|
||
| def get_repo_file(self, file_path: str) -> str: | ||
| try: | ||
| head_sha = self.pr.fromRef['latestCommit'] | ||
| content = self.get_file(file_path, head_sha) | ||
|
Comment on lines
+121
to
+122
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 3. fromref['latestcommit'] unguarded access bitbucket_server_provider.get_repo_file() directly indexes self.pr.fromRef['latestCommit'] without checking the key exists or the shape is as expected. This can raise KeyError/TypeError on unexpected webhook/provider payload shapes. Agent Prompt
|
||
| return content.decode("utf-8") if isinstance(content, bytes) else (content or "") | ||
| except HTTPError as e: | ||
| if e.response.status_code != 404: | ||
| get_logger().error(f"Failed to load {file_path} file, error: {e}") | ||
| return "" | ||
| except Exception as e: | ||
| get_logger().error(f"Failed to load {file_path} file, error: {e}") | ||
| return "" | ||
|
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.
|
||
|
|
||
| def get_pr_id(self): | ||
| return self.pr_num | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,6 @@ | ||
| import copy | ||
| import os | ||
| import posixpath | ||
| import tempfile | ||
| import traceback | ||
|
|
||
|
|
@@ -11,6 +12,28 @@ | |
| from pr_agent.log import get_logger | ||
|
|
||
|
|
||
| def _is_safe_repo_file_path(file_path: str) -> bool: | ||
| """ | ||
| Validate that a file path is safe to read from a repository root. | ||
| Rejects absolute paths, paths with '..' traversal components, and backslashes. | ||
| """ | ||
| if not file_path or not file_path.strip(): | ||
| return False | ||
|
Comment on lines
+20
to
+27
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 2. _is_safe_repo_file_path() lacks type guard _is_safe_repo_file_path() calls .strip() on file_path without confirming it is a string, which can raise AttributeError if non-string values reach it (e.g., from configuration). The checklist requires defensive type checks before calling methods on external inputs. Agent Prompt
|
||
| # Reject absolute paths (Unix and Windows-style) | ||
| if os.path.isabs(file_path) or file_path.startswith("/") or file_path.startswith("\\"): | ||
| return False | ||
| if len(file_path) >= 2 and file_path[1] == ":": # e.g. C:\... | ||
| return False | ||
| # Reject backslashes (non-standard on most git providers, potential traversal vector) | ||
| if "\\" in file_path: | ||
| return False | ||
| # Normalize and reject any ".." components | ||
| normalized = posixpath.normpath(file_path) | ||
| if normalized.startswith("..") or "/.." in normalized: | ||
| return False | ||
| return True | ||
|
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.
|
||
|
|
||
|
|
||
| def apply_repo_settings(pr_url): | ||
| os.environ["AUTO_CAST_FOR_DYNACONF"] = "false" | ||
| git_provider = get_git_provider_with_context(pr_url) | ||
|
|
@@ -85,6 +108,66 @@ def apply_repo_settings(pr_url): | |
| except Exception as e: | ||
| get_logger().error(f"Failed to remove temporary settings file {repo_settings_file}", e) | ||
|
|
||
| # Repository metadata: fetch well-known instruction files (AGENTS.md, QODO.md, CLAUDE.md, …) | ||
| # from the PR's head branch root and inject their contents into every tool's extra_instructions. | ||
| # See: https://qodo-merge-docs.qodo.ai/usage-guide/additional_configurations/#bringing-additional-repository-metadata-to-pr-agent | ||
| # | ||
| # Guard: apply_repo_settings() can be called multiple times per request (e.g. once in the | ||
| # server handler and again inside PRAgent.handle_request). The TOML settings are idempotent | ||
| # (set/overwrite), but metadata is *appended* to extra_instructions, so we must skip on | ||
| # repeated calls to avoid duplicating content in prompts. | ||
| repo_metadata_applied = False | ||
| try: | ||
| repo_metadata_applied = context.get("repo_metadata_applied", False) | ||
| except Exception: | ||
| # No request context (e.g. CLI mode) — fall back to a flag on the settings object | ||
| repo_metadata_applied = get_settings().get("config._repo_metadata_applied", False) | ||
| if not repo_metadata_applied and get_settings().config.get("add_repo_metadata", False): | ||
| try: | ||
| metadata_files = get_settings().config.get("add_repo_metadata_file_list", | ||
| ["AGENTS.md", "QODO.md", "CLAUDE.md"]) | ||
|
|
||
| # Collect contents of all metadata files that exist in the repo | ||
| metadata_content_parts = [] | ||
| for file_name in metadata_files: | ||
|
Comment on lines
+142
to
+179
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 1. add_repo_metadata_file_list not validated add_repo_metadata_file_list is used directly without validating/normalizing its type/contents, which can lead to incorrect behavior (e.g., iterating over characters if a string is provided) or runtime errors. The checklist requires normalizing and validating user-provided settings before using them in logic. Agent Prompt
|
||
| if not _is_safe_repo_file_path(file_name): | ||
| get_logger().warning(f"Skipping unsafe metadata file path: '{file_name}'") | ||
| continue | ||
| content = git_provider.get_repo_file(file_name) | ||
| if content and content.strip(): | ||
| metadata_content_parts.append(content.strip()) | ||
| get_logger().info(f"Loaded repository metadata file: {file_name}") | ||
|
|
||
| # Append combined metadata to extra_instructions for every tool that supports it. | ||
| if metadata_content_parts: | ||
| combined_metadata = "\n\n".join(metadata_content_parts) | ||
| tool_sections = [ | ||
| "pr_reviewer", | ||
| "pr_description", | ||
| "pr_code_suggestions", | ||
| "pr_add_docs", | ||
| "pr_update_changelog", | ||
| "pr_test", | ||
| "pr_improve_component", | ||
| ] | ||
| for section in tool_sections: | ||
| section_obj = get_settings().get(section, None) | ||
| if section_obj is not None and hasattr(section_obj, "extra_instructions"): | ||
| existing = section_obj.extra_instructions or "" | ||
| if existing: | ||
| new_value = f"{existing}\n\n{combined_metadata}" | ||
| else: | ||
| new_value = combined_metadata | ||
| get_settings().set(f"{section}.extra_instructions", new_value) | ||
| # Mark as applied so repeated calls within the same request don't re-append | ||
| try: | ||
| context["repo_metadata_applied"] = True | ||
| except Exception: | ||
| pass | ||
| get_settings().set("config._repo_metadata_applied", True) | ||
| except Exception as e: | ||
|
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.
|
||
| get_logger().debug(f"Failed to load repository metadata files: {e}") | ||
|
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.
|
||
|
|
||
| # enable switching models with a short definition | ||
| if get_settings().config.model.lower() == 'claude-3-5-sonnet': | ||
| set_claude_model() | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.