Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions bdiff/git_bdiff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#!/usr/bin/env python3
# *********************************COPYRIGHT************************************
# (C) Crown copyright Met Office. All rights reserved.
# For further details please refer to the file COPYRIGHT.txt
# which you should have received as part of this distribution.
# *********************************COPYRIGHT************************************
"""
Module to obtain a list of all altered files on a git branch from
point where it diverged from the parent branch to the most recent
commit.

Usage is as follows:

>>> bdiff = GitBDiff()

And then:

>>> for change in bdiff.files():
... print(change)
"""

import re
import subprocess
from pathlib import Path


class GitBDiffError(Exception):
"""Base bdiff error class."""


class GitBDiffNotGit(GitBDiffError):
"""Error if the target not part of a git repository."""

def __init__(self, cmd):
super().__init__(
"not a repository (cmd:" + " ".join([str(i) for i in cmd]) + ")"
)


class GitBDiff:
"""Class which generates a branch diff."""

# Name of primary branch - default is main
primary_branch = "main"

# Match hex commit IDs
_hash_pattern = re.compile(r"^\s*([0-9a-f]{40})\s*$")

# Match branch names. This should catch all valid names but may
# also some invalid names through. This should matter given that
# it is being used to match git command output. For a complete
# overview of the naming scheme, see man git check-ref-format
_branch_pattern = re.compile(r"^\s*([^\s~\^\:\?\*\[]+[^.])\s*$")

def __init__(self, parent=None, repo=None):
self.parent = parent or self.primary_branch

if repo is None:
self._repo = None
else:
self._repo = Path(repo)
if not self._repo.is_dir():
raise GitBDiffError(f"{repo} is not a directory")

self.ancestor = self.get_branch_point()
self.current = self.get_latest_commit()
self.branch = self.get_branch_name()

def get_branch_point(self):
"""Get the branch point from the parent repo.

Find the commit which marks the point of divergence from the
parent repository. If there are no changes or this is the
trunk, the branch point will be the same as the most recent
commit.
"""

result = None
for line in self.run_git(["merge-base", self.parent, "HEAD"]):
if m := self._hash_pattern.match(line):
result = m.group(1)
break
else:
raise GitBDiffError("branch point not found")
return result

def get_latest_commit(self):
"""Get the last commit ID on the branch."""

result = None
for line in self.run_git(["show", "--pretty=%H", "--no-patch"]):
if m := self._hash_pattern.match(line):
result = m.group(1)
break
else:
raise GitBDiffError("current revision not found")
return result

def get_branch_name(self):
"""Get the name of the current branch."""
result = None
for line in self.run_git(["branch", "--show-current"]):
if m := self._branch_pattern.match(line):
result = m.group(1)
break
else:
raise GitBDiffError("unable to get branch name")
return result

@property
def is_branch(self):
"""Whether this is a branch or main."""
return self.branch != self.primary_branch

@property
def has_diverged(self):
"""Whether the branch has diverged from its parent."""
return self.ancestor != self.current

def files(self):
"""Iterate over files changed on the branch."""

for line in self.run_git(
["diff", "--name-only", "--diff-filter=AMX", self.ancestor]
):
if line != "":
yield line

def run_git(self, args):
"""Run a git command and yield the output."""

if not isinstance(args, list):
raise TypeError("args must be a list")
cmd = ["git"] + args

# Run the the command in the repo directory, capture the
# output, and check for errors. The build in error check is
# not used to allow specific git errors to be treated more
# precisely
proc = subprocess.run(
cmd, capture_output=True, check=False, shell=False, cwd=self._repo
)

for line in proc.stderr.decode("utf-8").split("\n"):
if line.startswith("fatal: not a git repository"):
raise GitBDiffNotGit(cmd)
if line.startswith("fatal: "):
raise GitBDiffError(line[7:])

if proc.returncode != 0:
raise GitBDiffError(f"command returned {proc.returncode}")

yield from proc.stdout.decode("utf-8").split("\n")
216 changes: 216 additions & 0 deletions bdiff/tests/test_git_bdiff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
#!/usr/bin/env python3
# *********************************COPYRIGHT************************************
# (C) Crown copyright Met Office. All rights reserved.
# For further details please refer to the file COPYRIGHT.txt
# which you should have received as part of this distribution.
# *********************************COPYRIGHT************************************
"""
Test suite for git_bdiff module.
"""

import os
import subprocess
import pytest

from git_bdiff import GitBDiff, GitBDiffError, GitBDiffNotGit


# Disable warnings caused by the use of pytest fixtures
# pylint: disable=redefined-outer-name


def add_to_repo(start, end, message, mode="wt"):
"""Add and commit dummy files to a repo."""

for i in range(start, end):
with open(f"file{i}", mode, encoding="utf-8") as fd:
print(f"Lorem ipsum dolor sit amet {i}", file=fd)

subprocess.run(["git", "add", "-A"], check=True)
subprocess.run(["git", "commit", "--no-gpg-sign", "-m", message], check=True)


@pytest.fixture(scope="session")
def git_repo(tmpdir_factory):
"""Create and populate a test git repo."""

location = tmpdir_factory.mktemp("data")
os.chdir(location)

# Create the repo and add some files
subprocess.run(["git", "init"], check=True)
add_to_repo(0, 10, "Testing")

# Create a branch and add some files
subprocess.run(["git", "checkout", "-b", "mybranch"], check=True)
add_to_repo(20, 30, "Commit to mybranch")

# Create a branch-of-branch and add more files
subprocess.run(["git", "checkout", "-b", "subbranch"], check=True)
add_to_repo(40, 50, "Commit to subbranch")

# Create a branch from main without any changes
subprocess.run(["git", "checkout", "main"], check=True)
subprocess.run(["git", "checkout", "-b", "unchanged"], check=True)

# Create a branch from main and overwrite some things
subprocess.run(["git", "checkout", "main"], check=True)
subprocess.run(["git", "checkout", "-b", "overwrite"], check=True)
add_to_repo(0, 10, "Overwriting", "at")

# Switch back to the main branch ready for testing
subprocess.run(["git", "checkout", "main"], check=True)

return location


def test_init(git_repo):
"""Test creation of a new GitBDiff instance"""

os.chdir(git_repo)
bdiff = GitBDiff()

assert bdiff.branch is not None
assert bdiff.branch == "main"
assert not bdiff.is_branch
assert not bdiff.has_diverged


def test_repo_selection(git_repo):
"""Test selection of repository directory."""

os.chdir("/")
bdiff = GitBDiff(repo=git_repo)

assert bdiff.branch is not None
assert bdiff.branch == "main"
assert not bdiff.is_branch
assert not bdiff.has_diverged


def test_invalid_repo_selection(git_repo):
"""Test non-existent repo or plain file raises an error"""

with pytest.raises(GitBDiffError):
GitBDiff(repo="/nosuch")

with pytest.raises(GitBDiffError):
GitBDiff(repo="/etc/hosts")


def test_branch_diff(git_repo):
"""Test a simple branch diff."""

os.chdir(git_repo)
subprocess.run(["git", "checkout", "mybranch"], check=True)

try:
bdiff = GitBDiff()
changes = list(bdiff.files())
finally:
subprocess.run(["git", "checkout", "main"], check=True)

assert bdiff.branch == "mybranch"
assert bdiff.is_branch
assert bdiff.has_diverged
assert len(changes) == 10
assert changes[0] == "file20"


def test_branch_of_branch_diff(git_repo):
"""Test a branch of branch diff.

This effectively tests whether all the commits since the branch
point with main are picked up correctly.
"""

os.chdir(git_repo)
subprocess.run(["git", "checkout", "subbranch"], check=True)

try:
bdiff = GitBDiff()
changes = list(bdiff.files())
finally:
subprocess.run(["git", "checkout", "main"], check=True)

assert bdiff.branch == "subbranch"
assert bdiff.is_branch
assert bdiff.has_diverged
assert len(changes) == 20
assert changes[0] == "file20"
assert changes[-1] == "file49"


def test_overwritten_branch(git_repo):
"""Test a diff of a branch with changed files."""

os.chdir(git_repo)
subprocess.run(["git", "checkout", "overwrite"], check=True)
try:
bdiff = GitBDiff()
changes = list(bdiff.files())
finally:
subprocess.run(["git", "checkout", "main"], check=True)

assert bdiff.branch == "overwrite"
assert bdiff.is_branch
assert bdiff.has_diverged
assert len(changes) == 10


def test_unchanged_branch(git_repo):
"""Test a branch with no commits."""

os.chdir(git_repo)
subprocess.run(["git", "checkout", "unchanged"], check=True)

try:
bdiff = GitBDiff()
changes = list(bdiff.files())
finally:
subprocess.run(["git", "checkout", "main"], check=True)

assert bdiff.branch == "unchanged"
assert bdiff.is_branch
assert not bdiff.has_diverged
assert not changes


def test_non_repo(tmpdir):
"""Test exception if working directory is not a git repo."""

os.chdir(tmpdir)

with pytest.raises(GitBDiffNotGit) as exc:
GitBDiff()
assert "not a repository" in str(exc.value)


def test_nonexistent_parent(git_repo):
"""Test exception if parent branch does not exist.

This is a proxy test for the detection of all sorts of git
errors.
"""

os.chdir(git_repo)

with pytest.raises(GitBDiffError) as exc:
GitBDiff(parent="nosuch")
assert "Not a valid object name nosuch" in str(exc.value)


def test_git_run(git_repo):
"""Test git interface and error handling."""

bdiff = GitBDiff()

with pytest.raises(TypeError) as exc:
# Use a string in place of a list
list(i for i in bdiff.run_git("commit -m ''"))
assert "args must be a list" in str(exc.value)

with pytest.raises(GitBDiffError) as exc:
# Run a command that should return non-zero
list(i for i in bdiff.run_git(["commit", "-m", "''"]))
assert "command returned 1" in str(exc.value)