Skip to content

Commit c74c6b5

Browse files
committed
Add script to validate upstream references in PR branch commits
This script scans each commit in a PR branch for upstream Linux kernel commit references and validates those references. If a commit references an upstream commit, the script checks that the referenced commit exists in mainline and reports if it has been marked with a Fixes: tag in the upstream kernel. Usage: python3 check_kernel_commits.py <repo_path> <pr_branch> <base_branch> [--pretty] By default, the script outputs results for terminal display. Use the --pretty flag to format output for GitHub PR comments.
1 parent ab098bb commit c74c6b5

File tree

1 file changed

+165
-0
lines changed

1 file changed

+165
-0
lines changed

check_kernel_commits.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import subprocess
5+
import re
6+
import sys
7+
import textwrap
8+
9+
def run_git(repo, args):
10+
"""Run a git command in the given repository and return its output as a string."""
11+
result = subprocess.run(['git', '-C', repo] + args, text=True, capture_output=True, check=False)
12+
if result.returncode != 0:
13+
raise RuntimeError(f"Git command failed: {' '.join(args)}\n{result.stderr}")
14+
return result.stdout
15+
16+
def ref_exists(repo, ref):
17+
"""Return True if the given ref exists in the repository, False otherwise."""
18+
result = subprocess.run(
19+
['git', '-C', repo, 'rev-parse', '--verify', '--quiet', ref],
20+
stdout=subprocess.DEVNULL,
21+
stderr=subprocess.DEVNULL
22+
)
23+
return result.returncode == 0
24+
25+
def get_pr_commits(repo, pr_branch, base_branch):
26+
"""Get a list of commit SHAs that are in the PR branch but not in the base branch."""
27+
output = run_git(repo, ['rev-list', f'{base_branch}..{pr_branch}'])
28+
return output.strip().splitlines()
29+
30+
def get_commit_message(repo, sha):
31+
"""Get the commit message for a given commit SHA."""
32+
return run_git(repo, ['log', '-n', '1', '--format=%B', sha])
33+
34+
def get_short_hash_and_subject(repo, sha):
35+
"""Get the abbreviated commit hash and subject for a given commit SHA."""
36+
output = run_git(repo, ['log', '-n', '1', '--format=%h%x00%s', sha]).strip()
37+
short_hash, subject = output.split('\x00', 1)
38+
return short_hash, subject
39+
40+
def hash_exists_in_mainline(repo, upstream_ref, hash_):
41+
"""Check if a commit hash exists in the upstream reference."""
42+
try:
43+
# Ensure the referenced commit in the PR actually exists in the upstream ref.
44+
run_git(repo, ['cat-file', '-e', f'{upstream_ref}^{hash_}'])
45+
return True
46+
except RuntimeError:
47+
return False
48+
49+
def find_fixes_in_mainline(repo, upstream_ref, hash_):
50+
"""
51+
Return commits in upstream_ref that have Fixes: <first 12 chars of hash_>
52+
in their message, case-insensitive.
53+
"""
54+
short_hash = hash_[:12]
55+
output = run_git(repo, [
56+
'log', upstream_ref, '--grep', f'Fixes: {short_hash}', '-i', '--format=%h %s (%an)'
57+
])
58+
return output.strip()
59+
60+
def wrap_paragraph(text, width=80, initial_indent='', subsequent_indent=''):
61+
"""Wrap a paragraph of text to the specified width and indentation."""
62+
wrapper = textwrap.TextWrapper(width=width,
63+
initial_indent=initial_indent,
64+
subsequent_indent=subsequent_indent,
65+
break_long_words=False,
66+
break_on_hyphens=False)
67+
return wrapper.fill(text)
68+
69+
def main():
70+
parser = argparse.ArgumentParser(description="Check upstream references and Fixes: tags in PR branch commits.")
71+
parser.add_argument("repo", help="Path to the git repo")
72+
parser.add_argument("pr_branch", help="Name of the PR branch")
73+
parser.add_argument("base_branch", help="Name of the base branch")
74+
parser.add_argument("--markdown", action='store_true', help="Output in Markdown, suitable for GitHub PR comments")
75+
args = parser.parse_args()
76+
77+
upstream_ref = 'origin/kernel-mainline'
78+
79+
# Validate that all required refs exist before continuing
80+
missing_refs = []
81+
for refname, refval in [('upstream reference', upstream_ref),
82+
('PR branch', args.pr_branch),
83+
('base branch', args.base_branch)]:
84+
if not ref_exists(args.repo, refval):
85+
missing_refs.append((refname, refval))
86+
if missing_refs:
87+
for refname, refval in missing_refs:
88+
print(f"ERROR: The {refname} '{refval}' does not exist in the given repo.")
89+
print("Please fetch or create the required references before running this script.")
90+
sys.exit(1)
91+
92+
pr_commits = get_pr_commits(args.repo, args.pr_branch, args.base_branch)
93+
if not pr_commits:
94+
if args.markdown:
95+
print("> ℹ️ **No commits found in PR branch that are not in base branch.**")
96+
else:
97+
print("No commits found in PR branch that are not in base branch.")
98+
sys.exit(0)
99+
100+
any_findings = False
101+
out_lines = []
102+
103+
for sha in reversed(pr_commits): # oldest first
104+
short_hash, subject = get_short_hash_and_subject(args.repo, sha)
105+
pr_commit_desc = f"{short_hash} ({subject})"
106+
msg = get_commit_message(args.repo, sha)
107+
upstream_hashes = re.findall(r'^commit\s+([0-9a-fA-F]{12,40})', msg, re.MULTILINE)
108+
for uhash in upstream_hashes:
109+
short_uhash = uhash[:12]
110+
# Ensure the referenced commit in the PR actually exists in the upstream ref.
111+
exists = hash_exists_in_mainline(args.repo, upstream_ref, uhash)
112+
if not exists:
113+
any_findings = True
114+
if args.markdown:
115+
out_lines.append(
116+
f"- ❗ PR commit `{pr_commit_desc}` references upstream commit \n"
117+
f" `{short_uhash}` which does **not** exist in the upstream Linux kernel.\n"
118+
)
119+
else:
120+
header = (f"[NOTFOUND] PR commit {pr_commit_desc} references upstream commit "
121+
f"{short_uhash}, which does not exist in kernel-mainline.")
122+
out_lines.append(
123+
wrap_paragraph(header, width=80, initial_indent='',
124+
subsequent_indent=' ') # 11 spaces for '[NOTFOUND] '
125+
)
126+
out_lines.append("") # blank line
127+
continue
128+
fixes = find_fixes_in_mainline(args.repo, upstream_ref, uhash)
129+
if fixes:
130+
any_findings = True
131+
if args.markdown:
132+
fixes_block = " " + fixes.replace("\n", "\n ")
133+
out_lines.append(
134+
f"- ⚠️ PR commit `{pr_commit_desc}` references upstream commit \n"
135+
f" `{short_uhash}` which has been referenced by a `Fixes:` tag in the upstream \n"
136+
f" Linux kernel:\n\n"
137+
f"```text\n{fixes_block}\n```\n"
138+
)
139+
else:
140+
header = (f"[FIXES] PR commit {pr_commit_desc} references upstream commit "
141+
f"{short_uhash}, which has Fixes tags:")
142+
out_lines.append(
143+
wrap_paragraph(header, width=80, initial_indent='',
144+
subsequent_indent=' ') # 8 spaces for '[FIXES] '
145+
)
146+
out_lines.append("") # blank line after 'Fixes tags:'
147+
for line in fixes.splitlines():
148+
out_lines.append(' ' + line)
149+
out_lines.append("") # blank line
150+
151+
if any_findings:
152+
if args.markdown:
153+
print("## :mag: Upstream Linux Kernel Commit Check\n")
154+
print('\n'.join(out_lines))
155+
print("*This is an automated message from the kernel commit checker workflow.*")
156+
else:
157+
print('\n'.join(out_lines))
158+
else:
159+
if args.markdown:
160+
print("> ✅ **All referenced commits exist upstream and have no Fixes: tags.**")
161+
else:
162+
print("All referenced commits exist upstream and have no Fixes: tags.")
163+
164+
if __name__ == "__main__":
165+
main()

0 commit comments

Comments
 (0)