-
-
Notifications
You must be signed in to change notification settings - Fork 90
Expand file tree
/
Copy pathvalidate_pr.py
More file actions
319 lines (268 loc) · 11.7 KB
/
validate_pr.py
File metadata and controls
319 lines (268 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
"""
Assortment of checks run against new PRs, when git-in-here.yml is appended to
The script formats results into markdown, and posts as a comment on the PR
It checks:
- The YAML is still valid and parsable
- The users username matches submission
- A valid question was selected
- The response was appended to the end of the list
- The length of the response is within recommended bounds
- PR template has not been deleted
- PR type has been filled in
- PR checklist has been completed
- The user has starred the repo
"""
# Dependency imports
import os
import yaml
import json
import requests
import logging
from typing import Dict, List, Union, Optional
from requests.exceptions import RequestException
# Constants
""" The username / org where the repository is located """
REPO_OWNER = os.environ.get("REPO_OWNER", "lissy93")
""" The name of the repository """
REPO_NAME = os.environ.get("REPO_NAME", "git-into-open-source")
""" A GitHub access token, required for higher rate-limit when fetching data """
GH_ACCESS_TOKEN = os.environ.get("GH_ACCESS_TOKEN", None)
""" The directory where this script is located """
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
""" The relative path to the YAML file containing the user-contributed content """
CONTRIBUTORS_FILE_PATH = os.path.join(SCRIPT_DIR, "..", "git-in-here.yml")
# Configure Logging
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO").upper()
logging.basicConfig(level=LOG_LEVEL)
logger = logging.getLogger(__name__)
def read_yaml_data(file_path: str) -> Union[Dict, List]:
"""
Reads and returns the content of a YAML file.
:param file_path: The path to the YAML file.
:return: Parsed content of the YAML file.
"""
try:
with open(file_path, "r") as f:
return yaml.safe_load(f)
except yaml.YAMLError:
logger.error(f"Error reading or parsing {file_path}.")
return {}
def check_valid_yaml() -> bool:
"""
Checks if a given file contains valid YAML.
:param file_path: The path to the file to check.
:return: Boolean indicating whether the file contains valid YAML.
"""
try:
logger.info(f"Checking if git-in-here is still valid YAML")
with open(CONTRIBUTORS_FILE_PATH, "r") as f:
yaml.safe_load(f)
return True
except yaml.YAMLError:
logger.error(f"Error: git-in-here does not contain valid YAML.")
return False
def username_matches_submission(username: str, data: Union[Dict, List]) -> bool:
"""
Checks if the username provided matches the user's submission in the YAML file.
:param username: The username of the contributor.
:return: Boolean indicating whether the provided username matches the submission.
"""
logger.info(f"Checking if {username} is included in the submission")
contributors = data.get('contributors', [])
for contributor in contributors:
if contributor.get('username', '').lower() == username.lower():
logger.info(f"Found {username}")
return True
return False
def has_appended_to_end(username: str, data: Union[Dict, List]) -> bool:
"""
Checks if the user has appended their contribution to the end of the contributors array.
:param username: The username of the contributor.
:return: Boolean indicating whether the contribution is at the end of the array.
"""
logger.info(f"Checking if {username} has appended their contribution to the end of the array")
contributors = data.get('contributors', [])
return contributors[-1].get('username', '').lower() == username.lower()
def question_is_valid(username: str, data: Union[Dict, List]) -> bool:
"""
Checks if the question for the given username exists in the YAML questions list.
:param username: The username of the contributor.
:return: Boolean indicating whether the question is valid.
"""
logger.info(f"Checking if {username} has answered a valid question")
questions = {k: v for k, v in data.items() if k.startswith('Q')}
for contributor in data.get('contributors', []):
if contributor['username'].lower() == username.lower():
question_ref = contributor.get('question', '')
return question_ref in questions.values()
return False # If the user is not in the contributors list
def check_if_stargazer(username) -> bool:
"""
Checks if a given user has starred the repository.
:param user: The username to check.
:return: Boolean indicating whether the user has starred the repo.
"""
logging.info("Fetching all stargazers of the repository")
stargazers: List[str] = []
url = f"https://api.github.com/repos/{REPO_OWNER}/{REPO_NAME}/stargazers"
headers = {"Accept": "application/vnd.github.v3+json"}
def get_next_url(headers: dict) -> Optional[str]:
links = headers.get("Link", "").split(",")
for link in links:
if 'rel="next"' in link:
return link.split(";")[0].replace("<", "").replace(">", "").strip()
return None
if GH_ACCESS_TOKEN:
headers["Authorization"] = f"token {GH_ACCESS_TOKEN}"
while url:
try:
res = requests.get(url, headers=headers, params={"per_page": 100})
if res.status_code == 200:
stargazers += [user["login"] for user in res.json()]
url = get_next_url(res.headers)
else:
break
except RequestException:
break
return True if username.lower() in [sg.lower() for sg in stargazers] else False
def response_length_is_valid(username: str, data: Union[Dict, List]) -> bool:
"""
Checks if the response for the given username is between 64 and 512 characters.
:param username: The username of the contributor.
:return: Boolean indicating whether the response length is within bounds.
"""
logger.info(f"Checking if {username} has a valid response length")
for contributor in data.get('contributors', []):
if contributor['username'].lower() == username.lower():
response = contributor.get('response', '').strip()
return 64 <= len(response) <= 512
return False # Return False if username is not found
def make_final_comment(user: str, errors: []) -> str:
result = ""
if user:
result += f"Hello @{user}! 👋\n\n"
result += f"Thanks for contributing to {REPO_NAME}! 🎉\n"
if errors:
errors.reverse()
result += "\n\nI've spotted a few possible issues for you to address:\n"
result += '\n'.join(errors)
result += (
"\n\nPlease ensure you've read and followed the "
"[Contributing Guidelines](https://github.com/Lissy93/git-into-open-source/blob/main/.github/CONTRIBUTING.md#guidelines)"
)
else:
result += "\n\nAll automated checks have passed, a human will review your PR soon :)"
result += "\n\n<sup>🤖 I'm a bot, and this message was automated. Follow me for updates.</sup>"
return result
def run_checks(user, contributor_data, pr_body):
errors = []
if not user:
# If we don't have a user associated with the PR, we can't continue
logger.error("Error: GITHUB_ACTOR environment variable not set.")
return []
if not check_if_stargazer(user):
errors.append(
"- You've not yet starred the repo. Dropping us a ⭐ helps other users find us :)"
)
if not check_valid_yaml():
errors.append(
"- It looks like there is a syntax error in git-in-here.yml. "
"You'll need to fix that before your PR can be reviewed. "
"Using a [YAML Validator](https://appdevtools.com/yaml-validator) might help."
)
# We return early here, because can't continue if the YAML is invalid
return errors
if not username_matches_submission(user, contributor_data):
errors.append(
"- I couldn't find your response, ensure that your `username` matches your GitHub username."
)
# If we don't have the users response, we can't continue
return errors
if not question_is_valid(user, contributor_data):
errors.append(
"- Please ensure that the question you've answered is in the list."
)
if not has_appended_to_end(user, contributor_data):
errors.append(
"- Please append your contribution to the end of the `contributors` list. "
"Do not add it to the top or in between other entries."
)
if not response_length_is_valid(user, contributor_data):
errors.append(
"- Ideally, the length of your response should be between 64 and 512 characters."
)
if not pr_body:
# Skipping future checks, as don't have PR body
logger.info("Skipping PR checks, as no PR body passed")
return errors
if len(pr_body) < 200:
errors.append("- The PR body seems to be missing some content. Please make sure you didn't delete the PR template.")
# If PR body is so short, no point checking for the rest...
return errors
if '___' in pr_body:
errors.append("- Please specify the PR type (and delete the `___` placeholder).")
if not any(box_checked in pr_body for box_checked in ['[x]', '[X]']):
errors.append("- Please ensure you've checked the boxes in the PR template (use `[x]`).")
return errors
def get_pr_body(pr_number: int) -> Optional[str]:
"""
Fetch the body of the PR using the GitHub API.
"""
url = f"https://api.github.com/repos/{REPO_OWNER}/{REPO_NAME}/pulls/{pr_number}"
headers = {
"Accept": "application/vnd.github.v3+json",
"Authorization": f"token {GH_ACCESS_TOKEN}"
}
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.json().get("body")
else:
logger.error(f"Failed to fetch PR body for PR #{pr_number}.")
return None
except RequestException:
logger.error(f"Request error while fetching PR body for PR #{pr_number}.")
return None
def get_pr_number_from_event() -> Optional[int]:
event_path = os.getenv('GITHUB_EVENT_PATH')
if not event_path:
return None
with open(event_path, 'r') as f:
event = json.load(f)
return event.get("number")
def post_comment_to_pr(pr_number: int, comment_body: str) -> bool:
url = f"https://api.github.com/repos/{REPO_OWNER}/{REPO_NAME}/issues/{pr_number}/comments"
headers = {
"Accept": "application/vnd.github.v3+json",
"Authorization": f"token {GH_ACCESS_TOKEN}"
}
data = {"body": comment_body}
try:
response = requests.post(url, headers=headers, json=data)
return response.status_code == 201
except RequestException:
return False
def main():
# Double check we've got the access token
if not GH_ACCESS_TOKEN:
logger.error("The GH_ACCESS_TOKEN environment variable not set.")
return
# Get the PR number for this request
pr_number = get_pr_number_from_event()
if not pr_number:
logger.error("Could not get PR number from GitHub event.")
# Get the body of the pull request (the description completed by user)
pr_body = get_pr_body(pr_number) if pr_number else None
# Get the username of the user who submitted the PR
user = os.getenv('GITHUB_ACTOR')
# Read the YAML data
contributor_data = read_yaml_data(CONTRIBUTORS_FILE_PATH)
# Run the checks
errors = run_checks(user, contributor_data, pr_body)
# Generate the markdown comment
markdown = make_final_comment(user, errors)
# Post the comment to the PR
if not post_comment_to_pr(pr_number, markdown):
logger.error(f"Failed to post comment to PR #{pr_number}.")
if __name__ == "__main__":
main()