Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions pr_agent/git_providers/azuredevops_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,27 @@ def get_repo_settings(self):
get_logger().error(f"Failed to get repo settings, error: {e}")
return ""

def get_repo_file(self, file_path: str) -> str:
try:
head_sha = self.pr.last_merge_commit
version = GitVersionDescriptor(
version=head_sha.commit_id, version_type="commit"
) if head_sha else None
contents = self.azure_devops_client.get_item_content(
repository_id=self.repo_slug,
project=self.workspace_slug,
download=False,
include_content_metadata=False,
include_content=True,
path=file_path,
version_descriptor=version,
)
Comment thread
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.
content = list(contents)[0]
return content.decode("utf-8") if isinstance(content, bytes) else content
Comment on lines +177 to +195
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. Azure reads merged commit 🐞 Bug ≡ Correctness

AzureDevOpsProvider.get_repo_file() reads files at self.pr.last_merge_commit (merge result) instead
of the PR source/head commit, so metadata may reflect the merged preview rather than the branch
under review. This is inconsistent with other providers in this PR that explicitly read from the
PR/MR source branch or head SHA.
Agent Prompt
### Issue description
`AzureDevopsProvider.get_repo_file()` fetches repository files using `self.pr.last_merge_commit`, which corresponds to the merge-result/merge-preview commit, not the PR source/head. Repository metadata should reflect the branch under review.

### Issue Context
Other providers in this PR intentionally read from the PR/MR source branch or head SHA.

### Fix Focus Areas
- pr_agent/git_providers/azuredevops_provider.py[177-193]

### Fix approach
Fetch the file using the PR source/head commit instead of `last_merge_commit`.
- Prefer Azure PR fields that represent the source commit (e.g., `last_merge_source_commit` if available on the PR model).
- If the model doesn’t expose it directly, derive the correct commit via the PR’s source ref and latest commit and use that commit id in `GitVersionDescriptor`.
- Ensure the ref used here matches what `get_pr_branch()` identifies as the PR source branch.

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

except Exception as e:
get_logger().debug(f"Failed to get repo file '{file_path}': {e}")
return ""
Comment thread
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.

def get_files(self):
files = []
for i in self.azure_devops_client.get_pull_request_commits(
Expand Down
17 changes: 17 additions & 0 deletions pr_agent/git_providers/bitbucket_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,23 @@ def get_repo_settings(self):
except Exception:
return ""

def get_repo_file(self, file_path: str) -> str:
try:
# Read from the PR's source branch so metadata files reflect the branch under review
url = (f"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/src/"
f"{self.pr.source_branch}/{file_path}")
response = requests.request("GET", url, headers=self.headers)
if response.status_code == 404:
return ""
response.raise_for_status()
return response.text
Comment on lines +92 to +101
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

4. Bitbucket file fetch can hang 🐞 Bug ☼ Reliability

BitbucketProvider.get_repo_file performs an HTTP GET without a timeout, so enabling
add_repo_metadata can block apply_repo_settings indefinitely on stalled connections. Because
apply_repo_settings is called before request handling logic, this can stall the whole PR-agent flow.
Agent Prompt
### Issue description
`BitbucketProvider.get_repo_file()` makes an outbound HTTP request without a timeout. In network stalls, this can hang indefinitely and block `apply_repo_settings()`.

### Issue Context
`apply_repo_settings()` runs before PR processing and calls `get_repo_file()` for each metadata filename, so this path is on the critical startup path when `add_repo_metadata=true`.

### Fix Focus Areas
- Add an explicit `timeout=` to the Bitbucket `requests.request("GET", ...)` call.
- Prefer a configurable timeout (or a sensible constant) and handle timeout exceptions similarly to other request errors.
- pr_agent/git_providers/bitbucket_provider.py[92-108]
- pr_agent/git_providers/utils.py[174-186]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

except requests.exceptions.HTTPError as e:
get_logger().warning(f"Failed to get repo file '{file_path}': {e}")
return ""
except requests.exceptions.ConnectionError as e:
get_logger().warning(f"Connection error getting repo file '{file_path}': {e}")
return ""

def get_git_repo_url(self, pr_url: str=None) -> str: #bitbucket does not support issue url, so ignore param
try:
parsed_url = urlparse(self.pr_url)
Expand Down
13 changes: 13 additions & 0 deletions pr_agent/git_providers/bitbucket_server_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,19 @@ def get_repo_settings(self):
get_logger().error(f"Failed to load .pr_agent.toml file, error: {e}")
return ""

def get_repo_file(self, file_path: str) -> str:
try:
head_sha = self.pr.fromRef['latestCommit']
content = self.get_file(file_path, head_sha)
Comment on lines +121 to +122
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

3. fromref['latestcommit'] unguarded access 📘 Rule violation ☼ Reliability

bitbucket_server_provider.get_repo_file() directly indexes self.pr.fromRef['latestCommit']
without checking the key exists or the shape is as expected. This can raise KeyError/TypeError
on unexpected webhook/provider payload shapes.
Agent Prompt
## Issue description
`self.pr.fromRef['latestCommit']` is accessed via direct indexing, which can crash if `fromRef` is missing, not a dict, or the key is absent.

## Issue Context
Provider payloads are external inputs and may vary across Bitbucket Server versions / API responses.

## Fix Focus Areas
- pr_agent/git_providers/bitbucket_server_provider.py[119-123]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

return content.decode("utf-8") if isinstance(content, bytes) else (content or "")
except HTTPError as e:
if e.response.status_code != 404:
get_logger().error(f"Failed to load {file_path} file, error: {e}")
return ""
except Exception as e:
get_logger().error(f"Failed to load {file_path} file, error: {e}")
return ""
Comment thread
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.

def get_pr_id(self):
return self.pr_num

Expand Down
8 changes: 8 additions & 0 deletions pr_agent/git_providers/codecommit_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,14 @@ def get_repo_settings(self):
settings_filename = ".pr_agent.toml"
return self.codecommit_client.get_file(self.repo_name, settings_filename, self.pr.source_commit, optional=True)

def get_repo_file(self, file_path: str) -> str:
try:
content = self.codecommit_client.get_file(self.repo_name, file_path, self.pr.source_commit, optional=True)
return content.decode("utf-8") if isinstance(content, bytes) else (content or "")
except ValueError as e:
get_logger().debug(f"Failed to get repo file '{file_path}': {e}")
return ""

def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
get_logger().info("CodeCommit provider does not support eyes reaction yet")
return True
Expand Down
7 changes: 7 additions & 0 deletions pr_agent/git_providers/gerrit_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,13 @@ def get_repo_settings(self):
except OSError:
return b""

def get_repo_file(self, file_path: str) -> str:
try:
with open(self.repo_path / file_path, 'r', encoding='utf-8') as f:
return f.read()
except OSError:
return ""
Comment thread
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.

def get_diff_files(self) -> list[FilePatchInfo]:
diffs = self.repo.head.commit.diff(
self.repo.head.commit.parents[0], # previous commit
Expand Down
13 changes: 13 additions & 0 deletions pr_agent/git_providers/git_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,19 @@ def _is_generated_by_pr_agent(self, description_lowercase: str) -> bool:
def get_repo_settings(self):
pass

def get_repo_file(self, file_path: str) -> str:
"""
Read a text file from the PR's head branch root directory.

Args:
file_path: Relative path to the file from the repository root.

Returns:
The file content as a UTF-8 string, or "" if the file does not exist
or cannot be read.
"""
return ""

def get_workspace_name(self):
return ""

Expand Down
18 changes: 18 additions & 0 deletions pr_agent/git_providers/gitea_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,24 @@ def get_repo_settings(self) -> str:

return response

def get_repo_file(self, file_path: str) -> str:
"""Get a file from the repository root"""
try:
response = self.repo_api.get_file_content(
owner=self.owner,
repo=self.repo,
commit_sha=self.sha,
filepath=file_path
)
return response if response else ""
except ApiException as e:
if e.status != 404:
self.logger.warning(f"Failed to get repo file '{file_path}': {e}")
return ""
except Exception as e:
self.logger.debug(f"Failed to get repo file '{file_path}': {e}")
return ""

def get_user_id(self) -> str:
"""Get the ID of the authenticated user"""
return f"{self.pr.user.id}" if self.pr else ""
Expand Down
13 changes: 13 additions & 0 deletions pr_agent/git_providers/github_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,19 @@ def get_repo_settings(self):
except Exception:
return ""

def get_repo_file(self, file_path: str) -> str:
try:
# Read from the PR's head branch so metadata files reflect the branch under review
contents = self.repo_obj.get_contents(file_path, ref=self.pr.head.sha).decoded_content
return contents.decode("utf-8") if isinstance(contents, bytes) else contents
except GithubException as e:
if e.status != 404:
get_logger().warning(f"Failed to get repo file '{file_path}': {e}")
return ""
except Exception as e:
get_logger().debug(f"Failed to get repo file '{file_path}': {e}")
return ""
Comment thread
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.

def get_workspace_name(self):
return self.repo.split('/')[0]

Expand Down
13 changes: 13 additions & 0 deletions pr_agent/git_providers/gitlab_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,19 @@ def get_repo_settings(self):
except Exception:
return ""

def get_repo_file(self, file_path: str) -> str:
try:
# Read from the MR's source branch so metadata files reflect the branch under review
contents = self.gl.projects.get(self.id_project).files.get(
file_path=file_path, ref=self.mr.source_branch).decode()
return contents.decode("utf-8") if isinstance(contents, bytes) else contents
except GitlabGetError:
# File not found or not accessible — expected when the metadata file doesn't exist
return ""
except Exception as e:
get_logger().debug(f"Failed to get repo file '{file_path}': {e}")
return ""
Comment thread
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.

def get_workspace_name(self):
return self.id_project.split('/')[0]

Expand Down
83 changes: 83 additions & 0 deletions pr_agent/git_providers/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import copy
import os
import posixpath
import tempfile
import traceback

Expand All @@ -11,6 +12,28 @@
from pr_agent.log import get_logger


def _is_safe_repo_file_path(file_path: str) -> bool:
"""
Validate that a file path is safe to read from a repository root.
Rejects absolute paths, paths with '..' traversal components, and backslashes.
"""
if not file_path or not file_path.strip():
return False
Comment on lines +20 to +27
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

2. _is_safe_repo_file_path() lacks type guard 📘 Rule violation ☼ Reliability

_is_safe_repo_file_path() calls .strip() on file_path without confirming it is a string, which
can raise AttributeError if non-string values reach it (e.g., from configuration). The checklist
requires defensive type checks before calling methods on external inputs.
Agent Prompt
## Issue description
`_is_safe_repo_file_path()` calls `file_path.strip()` without first ensuring `file_path` is a string. This can raise at runtime if configuration contains non-string entries.

## Issue Context
Even though the type annotation is `str`, configuration values are external inputs and can be malformed.

## Fix Focus Areas
- pr_agent/git_providers/utils.py[20-49]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

# Reject absolute paths (Unix and Windows-style)
if os.path.isabs(file_path) or file_path.startswith("/") or file_path.startswith("\\"):
return False
if len(file_path) >= 2 and file_path[1] == ":": # e.g. C:\...
return False
# Reject backslashes (non-standard on most git providers, potential traversal vector)
if "\\" in file_path:
return False
# Normalize and reject any ".." components
normalized = posixpath.normpath(file_path)
if normalized.startswith("..") or "/.." in normalized:
return False
return True
Comment thread
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.


def apply_repo_settings(pr_url):
os.environ["AUTO_CAST_FOR_DYNACONF"] = "false"
git_provider = get_git_provider_with_context(pr_url)
Expand Down Expand Up @@ -85,6 +108,66 @@ def apply_repo_settings(pr_url):
except Exception as e:
get_logger().error(f"Failed to remove temporary settings file {repo_settings_file}", e)

# Repository metadata: fetch well-known instruction files (AGENTS.md, QODO.md, CLAUDE.md, …)
# from the PR's head branch root and inject their contents into every tool's extra_instructions.
# See: https://qodo-merge-docs.qodo.ai/usage-guide/additional_configurations/#bringing-additional-repository-metadata-to-pr-agent
#
# Guard: apply_repo_settings() can be called multiple times per request (e.g. once in the
# server handler and again inside PRAgent.handle_request). The TOML settings are idempotent
# (set/overwrite), but metadata is *appended* to extra_instructions, so we must skip on
# repeated calls to avoid duplicating content in prompts.
repo_metadata_applied = False
try:
repo_metadata_applied = context.get("repo_metadata_applied", False)
except Exception:
# No request context (e.g. CLI mode) — fall back to a flag on the settings object
repo_metadata_applied = get_settings().get("config._repo_metadata_applied", False)
if not repo_metadata_applied and get_settings().config.get("add_repo_metadata", False):
try:
metadata_files = get_settings().config.get("add_repo_metadata_file_list",
["AGENTS.md", "QODO.md", "CLAUDE.md"])

# Collect contents of all metadata files that exist in the repo
metadata_content_parts = []
for file_name in metadata_files:
Comment on lines +142 to +179
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. add_repo_metadata_file_list not validated 📘 Rule violation ≡ Correctness

add_repo_metadata_file_list is used directly without validating/normalizing its type/contents,
which can lead to incorrect behavior (e.g., iterating over characters if a string is provided) or
runtime errors. The checklist requires normalizing and validating user-provided settings before
using them in logic.
Agent Prompt
## Issue description
`config.add_repo_metadata_file_list` is consumed without validation. If the setting is mis-typed (e.g., a string, `None`, or a list containing non-strings), metadata loading can behave incorrectly or crash.

## Issue Context
This setting is user-provided via TOML/env/config overrides and must be normalized to a predictable shape before iterating.

## Fix Focus Areas
- pr_agent/git_providers/utils.py[142-186]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

if not _is_safe_repo_file_path(file_name):
get_logger().warning(f"Skipping unsafe metadata file path: '{file_name}'")
continue
content = git_provider.get_repo_file(file_name)
if content and content.strip():
metadata_content_parts.append(content.strip())
get_logger().info(f"Loaded repository metadata file: {file_name}")

# Append combined metadata to extra_instructions for every tool that supports it.
if metadata_content_parts:
combined_metadata = "\n\n".join(metadata_content_parts)
tool_sections = [
"pr_reviewer",
"pr_description",
"pr_code_suggestions",
"pr_add_docs",
"pr_update_changelog",
"pr_test",
"pr_improve_component",
]
for section in tool_sections:
section_obj = get_settings().get(section, None)
if section_obj is not None and hasattr(section_obj, "extra_instructions"):
existing = section_obj.extra_instructions or ""
if existing:
new_value = f"{existing}\n\n{combined_metadata}"
else:
new_value = combined_metadata
get_settings().set(f"{section}.extra_instructions", new_value)
# Mark as applied so repeated calls within the same request don't re-append
try:
context["repo_metadata_applied"] = True
except Exception:
pass
get_settings().set("config._repo_metadata_applied", True)
except Exception as e:
Comment thread
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.
get_logger().debug(f"Failed to load repository metadata files: {e}")
Comment thread
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.

# enable switching models with a short definition
if get_settings().config.model.lower() == 'claude-3-5-sonnet':
set_claude_model()
Expand Down
2 changes: 2 additions & 0 deletions pr_agent/settings/configuration.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ log_level="DEBUG"
use_wiki_settings_file=true
use_repo_settings_file=true
use_global_settings_file=true
add_repo_metadata=false # when true, searches the PR's head branch root for metadata files (by default: AGENTS.md, QODO.md, CLAUDE.md) and appends their content as extra instructions to all tools
add_repo_metadata_file_list=["AGENTS.md", "QODO.md", "CLAUDE.md"] # override the default list of metadata filenames to search for when add_repo_metadata is true
disable_auto_feedback = false
ai_timeout=120 # 2 minutes
skip_keys = []
Expand Down
Loading