Skip to content

Commit 2198c68

Browse files
committed
Add script to validate upstream references in PR branch commits
This script scans each commit in a PR branch for upstream Linux kernel commit references and validates those references. If a commit references an upstream commit, the script checks that the referenced commit exists in mainline and reports if it has been marked with a Fixes: tag in the upstream kernel. Usage: python3 check_kernel_commits.py <repo_path> <pr_branch> <base_branch> [--pretty] By default, the script outputs results for terminal display. Use the --pretty flag to format output for GitHub PR comments.
1 parent ab098bb commit 2198c68

File tree

1 file changed

+161
-0
lines changed

1 file changed

+161
-0
lines changed

check_kernel_commits.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import subprocess
5+
import re
6+
import sys
7+
import textwrap
8+
9+
def run_git(repo, args):
10+
"""Run a git command in the given repository and return its output as a string."""
11+
result = subprocess.run(['git', '-C', repo] + args, text=True, capture_output=True, check=False)
12+
if result.returncode != 0:
13+
raise RuntimeError(f"Git command failed: {' '.join(args)}\n{result.stderr}")
14+
return result.stdout
15+
16+
def ref_exists(repo, ref):
17+
"""Return True if the given ref exists in the repository, False otherwise."""
18+
result = subprocess.run(
19+
['git', '-C', repo, 'rev-parse', '--verify', '--quiet', ref],
20+
stdout=subprocess.DEVNULL,
21+
stderr=subprocess.DEVNULL
22+
)
23+
return result.returncode == 0
24+
25+
def get_pr_commits(repo, pr_branch, base_branch):
26+
"""Get a list of commit SHAs that are in the PR branch but not in the base branch."""
27+
output = run_git(repo, ['rev-list', f'{base_branch}..{pr_branch}'])
28+
return output.strip().splitlines()
29+
30+
def get_commit_message(repo, sha):
31+
"""Get the commit message for a given commit SHA."""
32+
return run_git(repo, ['log', '-n', '1', '--format=%B', sha])
33+
34+
def get_short_hash_and_subject(repo, sha):
35+
"""Get the abbreviated commit hash and subject for a given commit SHA."""
36+
output = run_git(repo, ['log', '-n', '1', '--format=%h%x00%s', sha]).strip()
37+
short_hash, subject = output.split('\x00', 1)
38+
return short_hash, subject
39+
40+
def hash_exists_in_mainline(repo, upstream_ref, hash_):
41+
"""Check if a commit hash exists in the upstream reference."""
42+
result = subprocess.run(
43+
['git', '-C', repo, 'cat-file', '-e', f'{upstream_ref}^{hash_}'],
44+
stdout=subprocess.DEVNULL,
45+
stderr=subprocess.DEVNULL
46+
)
47+
return result.returncode == 0
48+
49+
def find_fixes_in_mainline(repo, upstream_ref, hash_):
50+
"""Return commits in upstream_ref that have Fixes: <first 12 chars of hash_> in their message."""
51+
short_hash = hash_[:12]
52+
output = run_git(repo, [
53+
'log', upstream_ref, '--grep', f'Fixes: {short_hash}', '--format=%h %s (%an)'
54+
])
55+
return output.strip()
56+
57+
def wrap_paragraph(text, width=80, initial_indent='', subsequent_indent=''):
58+
"""Wrap a paragraph of text to the specified width and indentation."""
59+
wrapper = textwrap.TextWrapper(width=width,
60+
initial_indent=initial_indent,
61+
subsequent_indent=subsequent_indent,
62+
break_long_words=False,
63+
break_on_hyphens=False)
64+
return wrapper.fill(text)
65+
66+
def main():
67+
parser = argparse.ArgumentParser(description="Check upstream references and Fixes: tags in PR branch commits.")
68+
parser.add_argument("repo", help="Path to the git repo")
69+
parser.add_argument("pr_branch", help="Name of the PR branch")
70+
parser.add_argument("base_branch", help="Name of the base branch")
71+
parser.add_argument("--pretty", action='store_true', help="Output in Markdown, suitable for GitHub PR comments")
72+
args = parser.parse_args()
73+
74+
upstream_ref = 'origin/kernel-mainline'
75+
76+
# Validate that all required refs exist before continuing
77+
missing_refs = []
78+
for refname, refval in [('upstream reference', upstream_ref),
79+
('PR branch', args.pr_branch),
80+
('base branch', args.base_branch)]:
81+
if not ref_exists(args.repo, refval):
82+
missing_refs.append((refname, refval))
83+
if missing_refs:
84+
for refname, refval in missing_refs:
85+
print(f"ERROR: The {refname} '{refval}' does not exist in the given repo.")
86+
print("Please fetch or create the required references before running this script.")
87+
sys.exit(1)
88+
89+
pr_commits = get_pr_commits(args.repo, args.pr_branch, args.base_branch)
90+
if not pr_commits:
91+
if args.pretty:
92+
print("> ℹ️ **No commits found in PR branch that are not in base branch.**")
93+
else:
94+
print("No commits found in PR branch that are not in base branch.")
95+
sys.exit(0)
96+
97+
any_findings = False
98+
out_lines = []
99+
100+
for sha in reversed(pr_commits): # oldest first
101+
short_hash, subject = get_short_hash_and_subject(args.repo, sha)
102+
pr_commit_desc = f"{short_hash} ({subject})"
103+
msg = get_commit_message(args.repo, sha)
104+
upstream_hashes = re.findall(r'^commit\s+([0-9a-fA-F]{12,40})', msg, re.MULTILINE)
105+
for uhash in upstream_hashes:
106+
short_uhash = uhash[:12]
107+
exists = hash_exists_in_mainline(args.repo, upstream_ref, uhash)
108+
if not exists:
109+
any_findings = True
110+
if args.pretty:
111+
out_lines.append(
112+
f"- ❗ PR commit `{pr_commit_desc}` references upstream commit \n"
113+
f" `{short_uhash}` which does **not** exist in the upstream Linux kernel.\n"
114+
)
115+
else:
116+
header = (f"[NOTFOUND] PR commit {pr_commit_desc} references upstream commit "
117+
f"{short_uhash}, which does not exist in kernel-mainline.")
118+
out_lines.append(
119+
wrap_paragraph(header, width=80, initial_indent='',
120+
subsequent_indent=' ') # 11 spaces for '[NOTFOUND] '
121+
)
122+
out_lines.append("") # blank line
123+
continue
124+
fixes = find_fixes_in_mainline(args.repo, upstream_ref, uhash)
125+
if fixes:
126+
any_findings = True
127+
if args.pretty:
128+
fixes_block = " " + fixes.replace("\n", "\n ")
129+
out_lines.append(
130+
f"- ⚠️ PR commit `{pr_commit_desc}` references upstream commit \n"
131+
f" `{short_uhash}` which has been referenced by a `Fixes:` tag in the upstream \n"
132+
f" Linux kernel:\n\n"
133+
f"```text\n{fixes_block}\n```\n"
134+
)
135+
else:
136+
header = (f"[FIXES] PR commit {pr_commit_desc} references upstream commit "
137+
f"{short_uhash}, which has Fixes tags:")
138+
out_lines.append(
139+
wrap_paragraph(header, width=80, initial_indent='',
140+
subsequent_indent=' ') # 8 spaces for '[FIXES] '
141+
)
142+
out_lines.append("") # blank line after 'Fixes tags:'
143+
for line in fixes.splitlines():
144+
out_lines.append(' ' + line)
145+
out_lines.append("") # blank line
146+
147+
if any_findings:
148+
if args.pretty:
149+
print("## :mag: Upstream Linux Kernel Commit Check\n")
150+
print('\n'.join(out_lines))
151+
print("*This is an automated message from the kernel commit checker workflow.*")
152+
else:
153+
print('\n'.join(out_lines))
154+
else:
155+
if args.pretty:
156+
print("> ✅ **All referenced commits exist upstream and have no Fixes: tags.**")
157+
else:
158+
print("All referenced commits exist upstream and have no Fixes: tags.")
159+
160+
if __name__ == "__main__":
161+
main()

0 commit comments

Comments
 (0)