Skip to content

Commit cdeff89

Browse files
committed
feat: 支持批量审查功能 (PR sunmh207#148 + PR sunmh207#130)
- 新增批量审查功能,按文件分批次审查代码 - 支持通过环境变量控制批量审查行为 * BATCH_REVIEW_ENABLED: 启用/禁用批量审查(默认启用) * BATCH_REVIEW_FILES_PER_BATCH: 每批次文件数量(默认1) - 新增 summary_merge_review_prompt 提示词模板用于汇总多批次审查结果 - 批量审查功能完美结合项目级 prompt 模板支持 - 更新所有 webhook handler(GitLab/GitHub/Gitea)使用批量审查方法 - 保留 Gitea 平台支持
1 parent 0367cf9 commit cdeff89

File tree

3 files changed

+183
-8
lines changed

3 files changed

+183
-8
lines changed

biz/queue/worker.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ def handle_push_event(webhook_data: dict, gitlab_token: str, gitlab_url: str, gi
4141
if len(changes) > 0:
4242
project_name = webhook_data['project']['name']
4343
commits_text = ';'.join(commit.get('message', '').strip() for commit in commits)
44-
review_result = CodeReviewer().review_and_strip_code(str(changes), commits_text, project_name)
44+
code_reviewer = CodeReviewer()
45+
review_result = code_reviewer.review_changes_in_batches(changes, commits_text, project_name)
4546
score = CodeReviewer.parse_review_score(review_text=review_result)
4647
for item in changes:
4748
additions += item['additions']
@@ -134,10 +135,11 @@ def handle_merge_request_event(webhook_data: dict, gitlab_token: str, gitlab_url
134135
logger.error('Failed to get commits')
135136
return
136137

137-
# review 代码
138+
# review 代码 - 使用批量审查方法
138139
project_name = webhook_data['project']['name']
139140
commits_text = ';'.join(commit['title'] for commit in commits)
140-
review_result = CodeReviewer().review_and_strip_code(str(changes), commits_text, project_name)
141+
code_reviewer = CodeReviewer()
142+
review_result = code_reviewer.review_changes_in_batches(changes, commits_text, project_name)
141143

142144
# 将review结果提交到Gitlab的 notes
143145
handler.add_merge_request_notes(f'Auto Review Result: \n{review_result}')
@@ -193,7 +195,8 @@ def handle_github_push_event(webhook_data: dict, github_token: str, github_url:
193195
if len(changes) > 0:
194196
project_name = webhook_data['repository']['name']
195197
commits_text = ';'.join(commit.get('message', '').strip() for commit in commits)
196-
review_result = CodeReviewer().review_and_strip_code(str(changes), commits_text, project_name)
198+
code_reviewer = CodeReviewer()
199+
review_result = code_reviewer.review_changes_in_batches(changes, commits_text, project_name)
197200
score = CodeReviewer.parse_review_score(review_text=review_result)
198201
for item in changes:
199202
additions += item.get('additions', 0)
@@ -276,10 +279,11 @@ def handle_github_pull_request_event(webhook_data: dict, github_token: str, gith
276279
logger.error('Failed to get commits')
277280
return
278281

279-
# review 代码
282+
# review 代码 - 使用批量审查方法
280283
project_name = webhook_data['repository']['name']
281284
commits_text = ';'.join(commit['title'] for commit in commits)
282-
review_result = CodeReviewer().review_and_strip_code(str(changes), commits_text, project_name)
285+
code_reviewer = CodeReviewer()
286+
review_result = code_reviewer.review_changes_in_batches(changes, commits_text, project_name)
283287

284288
# 将review结果提交到GitHub的 notes
285289
handler.add_pull_request_notes(f'Auto Review Result: \n{review_result}')
@@ -334,7 +338,8 @@ def handle_gitea_push_event(webhook_data: dict, gitea_token: str, gitea_url: str
334338
if len(changes) > 0:
335339
project_name = webhook_data.get('repository', {}).get('name')
336340
commits_text = ';'.join(commit.get('message', '').strip() for commit in commits)
337-
review_result = CodeReviewer().review_and_strip_code(str(changes), commits_text, project_name)
341+
code_reviewer = CodeReviewer()
342+
review_result = code_reviewer.review_changes_in_batches(changes, commits_text, project_name)
338343
score = CodeReviewer.parse_review_score(review_text=review_result)
339344
for item in changes:
340345
additions += item.get('additions', 0)
@@ -413,7 +418,8 @@ def handle_gitea_pull_request_event(webhook_data: dict, gitea_token: str, gitea_
413418

414419
project_name = webhook_data.get('repository', {}).get('name')
415420
commits_text = ';'.join(commit.get('title', '') for commit in commits)
416-
review_result = CodeReviewer().review_and_strip_code(str(changes), commits_text, project_name)
421+
code_reviewer = CodeReviewer()
422+
review_result = code_reviewer.review_changes_in_batches(changes, commits_text, project_name)
417423

418424
handler.add_pull_request_notes(f'Auto Review Result: \n{review_result}')
419425

biz/utils/code_reviewer.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,123 @@ def review_code(self, diffs_text: str, commits_text: str = "", project_name: str
115115
]
116116
return self.call_llm(messages)
117117

118+
def review_changes_in_batches(self, changes: List[Dict[str, Any]], commits_text: str = "", project_name: str = "") -> str:
119+
"""
120+
按文件批次审查代码变更,然后汇总所有审查结果
121+
:param changes: 代码变更列表,每个元素是一个包含文件信息的字典
122+
:param commits_text: 提交信息
123+
:param project_name: 项目名称
124+
:return: 汇总后的审查结果
125+
"""
126+
if not changes:
127+
logger.info("代码变更为空")
128+
return "代码为空"
129+
130+
# 检查是否启用批量审查
131+
batch_review_enabled = os.getenv("BATCH_REVIEW_ENABLED", "1") == "1"
132+
133+
# 如果未启用批量审查,使用原有的一次性审查方式
134+
if not batch_review_enabled:
135+
logger.info("批量审查功能未启用,使用传统一次性审查方式")
136+
return self.review_and_strip_code(str(changes), commits_text, project_name)
137+
138+
review_max_tokens = int(os.getenv("REVIEW_MAX_TOKENS", 10000))
139+
# 获取每批次审查的文件数量配置
140+
files_per_batch = int(os.getenv("BATCH_REVIEW_FILES_PER_BATCH", 1))
141+
logger.info(f"批量审查已启用,每批次审查 {files_per_batch} 个文件")
142+
143+
partial_reviews = []
144+
total_files = len(changes)
145+
146+
# 按配置的批次大小分批进行审查
147+
for batch_start in range(0, total_files, files_per_batch):
148+
batch_end = min(batch_start + files_per_batch, total_files)
149+
batch_changes = changes[batch_start:batch_end]
150+
batch_num = (batch_start // files_per_batch) + 1
151+
total_batches = (total_files + files_per_batch - 1) // files_per_batch
152+
153+
logger.info(f"正在审查第 {batch_num}/{total_batches} 批次 (文件 {batch_start + 1}-{batch_end}/{total_files})")
154+
155+
# 收集当前批次的文件路径
156+
batch_file_paths = [
157+
change.get('new_path') or change.get('old_path', 'unknown')
158+
for change in batch_changes
159+
]
160+
161+
# 将批次内的文件转换为文本
162+
batch_text = str(batch_changes)
163+
164+
# 计算tokens数量,如果超过限制则截断
165+
tokens_count = count_tokens(batch_text)
166+
if tokens_count > review_max_tokens:
167+
logger.warning(f"批次 {batch_num} 的变更超过 {review_max_tokens} tokens,将截断")
168+
batch_text = truncate_text_by_tokens(batch_text, review_max_tokens)
169+
170+
# 审查当前批次,传递 project_name 参数
171+
try:
172+
review_result = self.review_code(batch_text, commits_text, project_name).strip()
173+
if review_result.startswith("```markdown") and review_result.endswith("```"):
174+
review_result = review_result[11:-3].strip()
175+
176+
# 添加批次标识
177+
batch_header = f"### 批次 {batch_num} (文件: {', '.join(batch_file_paths)})\n"
178+
partial_reviews.append(f"{batch_header}{review_result}")
179+
logger.info(f"批次 {batch_num} 审查完成")
180+
except Exception as e:
181+
logger.error(f"审查批次 {batch_num} 时出错: {e}")
182+
partial_reviews.append(f"### 批次 {batch_num}\n审查失败: {str(e)}")
183+
184+
# 如果只有一个批次,直接返回结果(去掉批次标识)
185+
if len(partial_reviews) == 1:
186+
# 去掉批次标题行
187+
result = partial_reviews[0]
188+
lines = result.split('\n', 1)
189+
return lines[1] if len(lines) > 1 else result
190+
191+
# 汇总多个批次的审查结果
192+
logger.info(f"开始汇总 {len(partial_reviews)} 个批次的审查结果")
193+
summary_result = self._summarize_reviews(partial_reviews, project_name)
194+
return summary_result
195+
196+
def _summarize_reviews(self, partial_reviews: List[str], project_name: str = "") -> str:
197+
"""
198+
使用 summary_merge_review_prompt 汇总多个审查结果
199+
:param partial_reviews: 各批次的审查结果列表
200+
:param project_name: 项目名称
201+
:return: 汇总后的总审查报告
202+
"""
203+
# 加载汇总提示词,支持项目级别的自定义
204+
normalized_project_name = project_name.replace("-", "_") if project_name else project_name
205+
project_prompts_path = os.getenv(f"{normalized_project_name.upper()}_PROMPT", None)
206+
207+
summary_prompts = (
208+
self._load_prompts(prompt_key="summary_merge_review_prompt", prompt_templates_file=project_prompts_path)
209+
if project_prompts_path
210+
else self._load_prompts("summary_merge_review_prompt", os.getenv("REVIEW_STYLE", "professional"))
211+
)
212+
213+
# 拼接所有分批审查结果
214+
partial_reviews_text = "\n\n---\n\n".join(partial_reviews)
215+
216+
# 构建汇总请求消息
217+
messages = [
218+
summary_prompts["system_message"],
219+
{
220+
"role": "user",
221+
"content": summary_prompts["user_message"]["content"].format(
222+
partial_reviews_text=partial_reviews_text
223+
),
224+
},
225+
]
226+
227+
# 调用LLM进行汇总
228+
summary_result = self.call_llm(messages).strip()
229+
if summary_result.startswith("```markdown") and summary_result.endswith("```"):
230+
summary_result = summary_result[11:-3].strip()
231+
232+
logger.info("审查结果汇总完成")
233+
return summary_result
234+
118235
@staticmethod
119236
def parse_review_score(review_text: str) -> int:
120237
"""解析 AI 返回的 Review 结果,返回评分"""

conf/prompt_templates.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,55 @@ code_review_prompt:
4141
4242
提交历史(commits):
4343
{commits_text}
44+
45+
summary_merge_review_prompt:
46+
system_prompt: |-
47+
你是一位高级软件架构师,现在需要对多个分批完成的代码审查结果进行整合成一个完整的总审查报告。
48+
49+
你的职责:
50+
1. 重新整合多个批次的审查结果,形成"统一评分的总报告"
51+
2. 不得丢失开发者定位问题所需的"批次级详细描述"
52+
3. 在顶层总结问题趋势及关键风险
53+
4. 根据全量问题重新统一打分
54+
55+
---
56+
57+
汇总结构必须包含以下 3 部分:
58+
59+
### 第一部分:全局问题总结与优化建议(进行归类整合,去重问题)
60+
- 从所有批次报告中抽取共性问题进行分类总结
61+
- 以整体角度提出优化方向,而不是重复粘贴批次内容
62+
63+
### 第二部分:分批次详细问题保留区(必须原样结构化保留)
64+
你的任务是按以下格式保留批次细节,不得简化或省略:
65+
```
66+
#### 批次 X(文件范围/来源说明)
67+
<保留该批完整的"问题描述与评分明细",不得删减内容>
68+
```
69+
70+
这样开发者能快速找到"哪个文件在哪个批次出了什么问题"。
71+
72+
### 第三部分:统一评分明细与总分(你必须重新评分)
73+
- 你需要结合多个批次的影响范围重新量化总评分,而不是平均或取最大值
74+
- 格式如下:
75+
```
76+
- 功能实现的正确性与健壮性:XX分
77+
- 安全性与潜在风险:XX分
78+
- 是否符合最佳实践:XX分
79+
- 性能与资源效率:XX分
80+
- 提交信息清晰性与准确性:XX分
81+
```
82+
83+
最后一行必须为:**总分:XX分**
84+
85+
---
86+
87+
自检规则:
88+
- 若未包含"分批次详细问题保留区"则需重新生成
89+
- 若未重新统一评分,而直接引用批次数值,则需重新评分
90+
- 若缺少"总分:XX分",必须重新生成
91+
- 若全局总结部分只是重复批次内容,必须进行整合后重新生成
92+
93+
user_prompt: |-
94+
以下是分批次代码审查结果,请将其整合为一个完整的总审查报告,并统一量化评分:
95+
{partial_reviews_text}

0 commit comments

Comments
 (0)