-
Notifications
You must be signed in to change notification settings - Fork 4
Initial version of a git_bdiff module #100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
10f789d
Initial version of a git_bdiff module
t00sa 56813be
Add the ability to specify a repository directory
t00sa 447dd57
Include review changes from @ericaneininger
t00sa 4f97911
Include review changes from @r-sharp
t00sa 35f4311
Improve tests of error handling
t00sa 538323a
Improve hash pattern match
t00sa 2ee2454
Minor change to remove some dead test code
t00sa 521adf7
Update branch pattern somewhat
t00sa 00af69a
Merge branch 'main' into git_bdiff
ericaneininger File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
#!/usr/bin/env python3 | ||
# *********************************COPYRIGHT************************************ | ||
# (C) Crown copyright Met Office. All rights reserved. | ||
# For further details please refer to the file COPYRIGHT.txt | ||
# which you should have received as part of this distribution. | ||
# *********************************COPYRIGHT************************************ | ||
""" | ||
Module to obtain a list of all altered files on a git branch from | ||
point where it diverged from the parent branch to the most recent | ||
commit. | ||
|
||
Usage is as follows: | ||
|
||
>>> bdiff = GitBDiff() | ||
|
||
And then: | ||
|
||
>>> for change in bdiff.files(): | ||
... print(change) | ||
""" | ||
|
||
import re | ||
import subprocess | ||
from pathlib import Path | ||
|
||
|
||
class GitBDiffError(Exception): | ||
"""Base bdiff error class.""" | ||
|
||
|
||
class GitBDiffNotGit(GitBDiffError): | ||
"""Error if the target not part of a git repository.""" | ||
|
||
def __init__(self, cmd): | ||
super().__init__( | ||
"not a repository (cmd:" + " ".join([str(i) for i in cmd]) + ")" | ||
) | ||
|
||
|
||
class GitBDiff: | ||
"""Class which generates a branch diff.""" | ||
|
||
# Name of primary branch - default is main | ||
primary_branch = "main" | ||
r-sharp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Match hex commit IDs | ||
_hash_pattern = re.compile(r"^\s*([0-9a-f]{40})\s*$") | ||
|
||
# Match branch names. This should catch all valid names but may | ||
# also some invalid names through. This should matter given that | ||
# it is being used to match git command output. For a complete | ||
# overview of the naming scheme, see man git check-ref-format | ||
_branch_pattern = re.compile(r"^\s*([^\s~\^\:\?\*\[]+[^.])\s*$") | ||
|
||
def __init__(self, parent=None, repo=None): | ||
self.parent = parent or self.primary_branch | ||
|
||
if repo is None: | ||
self._repo = None | ||
else: | ||
self._repo = Path(repo) | ||
if not self._repo.is_dir(): | ||
raise GitBDiffError(f"{repo} is not a directory") | ||
|
||
self.ancestor = self.get_branch_point() | ||
self.current = self.get_latest_commit() | ||
self.branch = self.get_branch_name() | ||
|
||
def get_branch_point(self): | ||
"""Get the branch point from the parent repo. | ||
|
||
Find the commit which marks the point of divergence from the | ||
parent repository. If there are no changes or this is the | ||
trunk, the branch point will be the same as the most recent | ||
commit. | ||
""" | ||
|
||
result = None | ||
for line in self.run_git(["merge-base", self.parent, "HEAD"]): | ||
if m := self._hash_pattern.match(line): | ||
result = m.group(1) | ||
break | ||
else: | ||
raise GitBDiffError("branch point not found") | ||
return result | ||
|
||
def get_latest_commit(self): | ||
"""Get the last commit ID on the branch.""" | ||
|
||
result = None | ||
for line in self.run_git(["show", "--pretty=%H", "--no-patch"]): | ||
if m := self._hash_pattern.match(line): | ||
result = m.group(1) | ||
break | ||
else: | ||
raise GitBDiffError("current revision not found") | ||
return result | ||
|
||
def get_branch_name(self): | ||
"""Get the name of the current branch.""" | ||
result = None | ||
for line in self.run_git(["branch", "--show-current"]): | ||
if m := self._branch_pattern.match(line): | ||
result = m.group(1) | ||
break | ||
else: | ||
raise GitBDiffError("unable to get branch name") | ||
return result | ||
r-sharp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
@property | ||
def is_branch(self): | ||
"""Whether this is a branch or main.""" | ||
return self.branch != self.primary_branch | ||
r-sharp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
@property | ||
def has_diverged(self): | ||
"""Whether the branch has diverged from its parent.""" | ||
return self.ancestor != self.current | ||
|
||
def files(self): | ||
"""Iterate over files changed on the branch.""" | ||
|
||
for line in self.run_git( | ||
["diff", "--name-only", "--diff-filter=AMX", self.ancestor] | ||
): | ||
if line != "": | ||
yield line | ||
|
||
def run_git(self, args): | ||
"""Run a git command and yield the output.""" | ||
|
||
if not isinstance(args, list): | ||
raise TypeError("args must be a list") | ||
cmd = ["git"] + args | ||
|
||
# Run the the command in the repo directory, capture the | ||
# output, and check for errors. The build in error check is | ||
# not used to allow specific git errors to be treated more | ||
# precisely | ||
proc = subprocess.run( | ||
cmd, capture_output=True, check=False, shell=False, cwd=self._repo | ||
) | ||
|
||
for line in proc.stderr.decode("utf-8").split("\n"): | ||
if line.startswith("fatal: not a git repository"): | ||
raise GitBDiffNotGit(cmd) | ||
if line.startswith("fatal: "): | ||
raise GitBDiffError(line[7:]) | ||
|
||
if proc.returncode != 0: | ||
raise GitBDiffError(f"command returned {proc.returncode}") | ||
|
||
yield from proc.stdout.decode("utf-8").split("\n") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
#!/usr/bin/env python3 | ||
# *********************************COPYRIGHT************************************ | ||
# (C) Crown copyright Met Office. All rights reserved. | ||
# For further details please refer to the file COPYRIGHT.txt | ||
# which you should have received as part of this distribution. | ||
# *********************************COPYRIGHT************************************ | ||
""" | ||
Test suite for git_bdiff module. | ||
""" | ||
|
||
import os | ||
import subprocess | ||
import pytest | ||
|
||
from git_bdiff import GitBDiff, GitBDiffError, GitBDiffNotGit | ||
|
||
|
||
# Disable warnings caused by the use of pytest fixtures | ||
# pylint: disable=redefined-outer-name | ||
|
||
|
||
def add_to_repo(start, end, message, mode="wt"): | ||
"""Add and commit dummy files to a repo.""" | ||
|
||
for i in range(start, end): | ||
with open(f"file{i}", mode, encoding="utf-8") as fd: | ||
print(f"Lorem ipsum dolor sit amet {i}", file=fd) | ||
|
||
subprocess.run(["git", "add", "-A"], check=True) | ||
subprocess.run(["git", "commit", "--no-gpg-sign", "-m", message], check=True) | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def git_repo(tmpdir_factory): | ||
"""Create and populate a test git repo.""" | ||
ericaneininger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
location = tmpdir_factory.mktemp("data") | ||
os.chdir(location) | ||
|
||
# Create the repo and add some files | ||
subprocess.run(["git", "init"], check=True) | ||
add_to_repo(0, 10, "Testing") | ||
|
||
# Create a branch and add some files | ||
subprocess.run(["git", "checkout", "-b", "mybranch"], check=True) | ||
add_to_repo(20, 30, "Commit to mybranch") | ||
r-sharp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Create a branch-of-branch and add more files | ||
subprocess.run(["git", "checkout", "-b", "subbranch"], check=True) | ||
add_to_repo(40, 50, "Commit to subbranch") | ||
|
||
# Create a branch from main without any changes | ||
subprocess.run(["git", "checkout", "main"], check=True) | ||
subprocess.run(["git", "checkout", "-b", "unchanged"], check=True) | ||
|
||
# Create a branch from main and overwrite some things | ||
subprocess.run(["git", "checkout", "main"], check=True) | ||
subprocess.run(["git", "checkout", "-b", "overwrite"], check=True) | ||
add_to_repo(0, 10, "Overwriting", "at") | ||
|
||
# Switch back to the main branch ready for testing | ||
subprocess.run(["git", "checkout", "main"], check=True) | ||
|
||
return location | ||
|
||
|
||
def test_init(git_repo): | ||
"""Test creation of a new GitBDiff instance""" | ||
|
||
os.chdir(git_repo) | ||
bdiff = GitBDiff() | ||
|
||
assert bdiff.branch is not None | ||
assert bdiff.branch == "main" | ||
assert not bdiff.is_branch | ||
assert not bdiff.has_diverged | ||
|
||
|
||
def test_repo_selection(git_repo): | ||
"""Test selection of repository directory.""" | ||
|
||
os.chdir("/") | ||
bdiff = GitBDiff(repo=git_repo) | ||
|
||
assert bdiff.branch is not None | ||
assert bdiff.branch == "main" | ||
assert not bdiff.is_branch | ||
assert not bdiff.has_diverged | ||
|
||
|
||
def test_invalid_repo_selection(git_repo): | ||
"""Test non-existent repo or plain file raises an error""" | ||
|
||
with pytest.raises(GitBDiffError): | ||
GitBDiff(repo="/nosuch") | ||
|
||
with pytest.raises(GitBDiffError): | ||
GitBDiff(repo="/etc/hosts") | ||
|
||
|
||
def test_branch_diff(git_repo): | ||
"""Test a simple branch diff.""" | ||
|
||
os.chdir(git_repo) | ||
subprocess.run(["git", "checkout", "mybranch"], check=True) | ||
|
||
try: | ||
bdiff = GitBDiff() | ||
changes = list(bdiff.files()) | ||
finally: | ||
subprocess.run(["git", "checkout", "main"], check=True) | ||
|
||
assert bdiff.branch == "mybranch" | ||
assert bdiff.is_branch | ||
assert bdiff.has_diverged | ||
assert len(changes) == 10 | ||
assert changes[0] == "file20" | ||
|
||
|
||
def test_branch_of_branch_diff(git_repo): | ||
"""Test a branch of branch diff. | ||
|
||
This effectively tests whether all the commits since the branch | ||
point with main are picked up correctly. | ||
""" | ||
|
||
os.chdir(git_repo) | ||
subprocess.run(["git", "checkout", "subbranch"], check=True) | ||
|
||
try: | ||
bdiff = GitBDiff() | ||
changes = list(bdiff.files()) | ||
finally: | ||
subprocess.run(["git", "checkout", "main"], check=True) | ||
|
||
assert bdiff.branch == "subbranch" | ||
assert bdiff.is_branch | ||
assert bdiff.has_diverged | ||
assert len(changes) == 20 | ||
assert changes[0] == "file20" | ||
assert changes[-1] == "file49" | ||
|
||
|
||
def test_overwritten_branch(git_repo): | ||
"""Test a diff of a branch with changed files.""" | ||
|
||
os.chdir(git_repo) | ||
subprocess.run(["git", "checkout", "overwrite"], check=True) | ||
try: | ||
bdiff = GitBDiff() | ||
changes = list(bdiff.files()) | ||
finally: | ||
subprocess.run(["git", "checkout", "main"], check=True) | ||
|
||
assert bdiff.branch == "overwrite" | ||
assert bdiff.is_branch | ||
assert bdiff.has_diverged | ||
assert len(changes) == 10 | ||
|
||
|
||
def test_unchanged_branch(git_repo): | ||
"""Test a branch with no commits.""" | ||
|
||
os.chdir(git_repo) | ||
subprocess.run(["git", "checkout", "unchanged"], check=True) | ||
|
||
try: | ||
bdiff = GitBDiff() | ||
changes = list(bdiff.files()) | ||
finally: | ||
subprocess.run(["git", "checkout", "main"], check=True) | ||
|
||
assert bdiff.branch == "unchanged" | ||
assert bdiff.is_branch | ||
assert not bdiff.has_diverged | ||
assert not changes | ||
|
||
|
||
def test_non_repo(tmpdir): | ||
"""Test exception if working directory is not a git repo.""" | ||
|
||
os.chdir(tmpdir) | ||
|
||
with pytest.raises(GitBDiffNotGit) as exc: | ||
GitBDiff() | ||
assert "not a repository" in str(exc.value) | ||
|
||
|
||
def test_nonexistent_parent(git_repo): | ||
"""Test exception if parent branch does not exist. | ||
|
||
This is a proxy test for the detection of all sorts of git | ||
errors. | ||
""" | ||
|
||
os.chdir(git_repo) | ||
|
||
with pytest.raises(GitBDiffError) as exc: | ||
GitBDiff(parent="nosuch") | ||
assert "Not a valid object name nosuch" in str(exc.value) | ||
|
||
|
||
def test_git_run(git_repo): | ||
"""Test git interface and error handling.""" | ||
|
||
bdiff = GitBDiff() | ||
|
||
with pytest.raises(TypeError) as exc: | ||
# Use a string in place of a list | ||
list(i for i in bdiff.run_git("commit -m ''")) | ||
assert "args must be a list" in str(exc.value) | ||
|
||
with pytest.raises(GitBDiffError) as exc: | ||
# Run a command that should return non-zero | ||
list(i for i in bdiff.run_git(["commit", "-m", "''"])) | ||
assert "command returned 1" in str(exc.value) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.