Skip to content

Commit 595ad68

Browse files
committed
created first draft of check_copyright file in python
1 parent 5d51084 commit 595ad68

File tree

1 file changed

+118
-0
lines changed

1 file changed

+118
-0
lines changed

python/scripts/check_copyright.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
#!/usr/bin/env python3
2+
3+
import re
4+
import subprocess
5+
import sys
6+
from pathlib import Path
7+
8+
import click
9+
10+
EXTENSIONS = (".py", ".sh", ".ts")
11+
EXCLUDED_PATH = "tests_data"
12+
DIRECTORIES = ("python/", "rust/", "js/")
13+
COPYRIGHT_PATTERN = re.compile(r"Copyright", re.IGNORECASE)
14+
15+
16+
def get_git_files():
17+
"""
18+
Retrieve a list of files tracked by git that meet the following conditions:
19+
- File extension is one of the allowed extensions (.py, .sh, .ts).
20+
- The file path does not contain the excluded path ('tests_data').
21+
- The file is located in one of the designated directories (python/, rust/, js/).
22+
23+
Returns:
24+
List[Path]: A list of Path objects for files meeting the criteria.
25+
26+
Raises:
27+
SystemExit: Exits with code 2 if the git command fails.
28+
"""
29+
30+
try:
31+
result = subprocess.run(
32+
["git", "ls-files"], capture_output=True, text=True, check=True
33+
)
34+
return [
35+
Path(f)
36+
for f in result.stdout.strip().splitlines()
37+
if f.endswith(EXTENSIONS)
38+
and EXCLUDED_PATH not in f
39+
and f.startswith(DIRECTORIES)
40+
]
41+
except subprocess.CalledProcessError as e:
42+
click.secho(f"Git command failed: {e}", fg="red", bold=True)
43+
sys.exit(2)
44+
45+
46+
def has_copyright(path: Path) -> bool:
47+
"""
48+
Checks if the file at the given path contains a copyright notice.
49+
It reads the first five lines and searches for a match to the COPYRIGHT_PATTERN.
50+
51+
Args:
52+
path (Path): The file path to check.
53+
54+
Returns:
55+
bool: True if the copyright notice is found within the first five lines; False otherwise.
56+
"""
57+
58+
try:
59+
with path.open("r", encoding="utf-8") as f:
60+
for _ in range(5):
61+
line = f.readline()
62+
if COPYRIGHT_PATTERN.search(line):
63+
return True
64+
return False
65+
except Exception:
66+
return False
67+
68+
69+
@click.group()
70+
def cli():
71+
"""
72+
A Click command group to define copyright-related commands.
73+
74+
This function serves as an entry point for the command-line interface.
75+
"""
76+
pass
77+
78+
79+
@cli.command("check")
80+
def check_command():
81+
"""
82+
Check for missing copyright headers in the target files.
83+
84+
The command performs the following steps:
85+
- Retrieves the list of git-tracked files meeting the criteria.
86+
- Checks each file for a valid copyright header.
87+
- Prints any files that are missing the header.
88+
- Exits with status code 1 if any file is missing the header; otherwise exits successfully.
89+
"""
90+
error_files = []
91+
for file_path in get_git_files():
92+
if not has_copyright(file_path):
93+
error_files.append(file_path)
94+
95+
if error_files:
96+
click.secho("Missing copyright in:", fg="red", bold=True)
97+
for file in error_files:
98+
click.echo(f" - {file}")
99+
sys.exit(1)
100+
else:
101+
click.secho("All files have valid copyright.", fg="green")
102+
103+
104+
def copyright_found(file_path: Path) -> bool:
105+
"""
106+
A wrapper function to determine if a given file has a valid copyright banner.
107+
108+
Args:
109+
file_path (Path): The Path object representing the file to check.
110+
111+
Returns:
112+
bool: True if the file contains the copyright header in its first five lines; otherwise False.
113+
"""
114+
return has_copyright(file_path)
115+
116+
117+
if __name__ == "__main__":
118+
cli()

0 commit comments

Comments
 (0)