Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions bdiff/git_bdiff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/usr/bin/env python3
# *********************************COPYRIGHT************************************
# (C) Crown copyright Met Office. All rights reserved.
# For further details please refer to the file COPYRIGHT.txt
# which you should have received as part of this distribution.
# *********************************COPYRIGHT************************************
"""
Module to obtain a list of all altered files on a git branch from
point where it diverged from the parent branch to the most recent
commit.

Usage is as follows:

>>> bdiff = GitBDiff()

And then:

>>> for change in bdiff.files():
... print(change)
"""

import re
import subprocess


class GitBDiffError(Exception):
"""Base bdiff error class."""


class GitBDiffNotGit(GitBDiffError):
"""Error if the target not part of a git repository."""

def __init__(self, cmd):
super().__init__("not a repository (cmd:" + " ".join(cmd) + ")")


class GitBDiff:
"""Class which generates a branch diff."""

# Name of primary branch - default is main
primary_branch = "main"

# Match hex commit IDs
_hash_pattern = re.compile(r"^([0-9a-f]+)$")

# Match branch names
_branch_pattern = re.compile(r"^(\S+)$")

def __init__(self, parent=None):
self.parent = parent or self.primary_branch
self.ancestor = self.get_branch_point()
self.current = self.get_latest_commit()
self.branch = self.get_branch_name()

def get_branch_point(self):
"""Get the branch point from the parent repo.

Find the commit which marks the point of divergence from the
parent repository. If there are no changes or this is the
trunk, the branch point will be the same as the most recent
commit.
"""

result = None
for line in self.run_git(["merge-base", self.parent, "HEAD"]):
if m := self._hash_pattern.match(line):
result = m.group(1)
break
else:
raise GitBDiffError("branch point not found")
return result

def get_latest_commit(self):
"""Get the last commit ID on the branch."""

result = None
for line in self.run_git(["show", "--pretty=%H", "--no-patch"]):
if m := self._hash_pattern.match(line):
result = m.group(1)
break
else:
raise GitBDiffError("current revision not found")
return result

def get_branch_name(self):
"""Get the name of the current branch."""
result = None
for line in self.run_git(["branch", "--show-current"]):
if m := self._branch_pattern.match(line):
result = m.group(1)
break
else:
raise GitBDiffError("unable to get branch name")
return result

@property
def is_branch(self):
"""Whether this is a branch or main."""
return self.branch != self.primary_branch

@property
def has_diverged(self):
"""Whether the branch has diverged from its parent."""
return self.ancestor != self.current

def files(self):
"""Iterate over files changed on the branch."""

for line in self.run_git(
["diff", "--name-only", "--diff-filter=AMX", self.ancestor]
):
if line != "":
yield line

@staticmethod
def run_git(args):
"""Run a git command and yield the output."""

if isinstance(args, str):
args = args.split()
cmd = ["git"] + args

proc = subprocess.run(cmd, capture_output=True, check=False)

for line in proc.stderr.decode("utf-8").split("\n"):
if line.startswith("fatal: not a git repository"):
raise GitBDiffNotGit(cmd)
if line.startswith("fatal: "):
raise GitBDiffError(line[7:].rstrip())

if proc.returncode != 0:
raise GitBDiffError(f"command returned {proc.returncode}")

yield from proc.stdout.decode("utf-8").split("\n")
154 changes: 154 additions & 0 deletions bdiff/tests/test_git_bdiff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#!/usr/bin/env python3
# *********************************COPYRIGHT************************************
# (C) Crown copyright Met Office. All rights reserved.
# For further details please refer to the file COPYRIGHT.txt
# which you should have received as part of this distribution.
# *********************************COPYRIGHT************************************
"""
Test suite for git_bdiff module.
"""

import os
import subprocess
import pytest

from git_bdiff import GitBDiff, GitBDiffError, GitBDiffNotGit


# Disable warnings caused by the use of pytest fixtures
# pylint: disable=redefined-outer-name


def add_to_repo(start, end, message):
"""Add and commit dummy files to a repo."""

for i in range(start, end):
with open(f"file{i}", "wt", encoding="utf-8") as fd:
print(f"Hello {i}", file=fd)

subprocess.run(["git", "add", "-A"], check=True)
subprocess.run(["git", "commit", "--no-gpg-sign", "-m", message], check=True)


@pytest.fixture(scope="session")
def git_repo(tmpdir_factory):
"""Create and populate a test git repo."""

location = tmpdir_factory.mktemp("data")
os.chdir(location)

# Create the repo and add some files
subprocess.run(["git", "init"], check=True)
add_to_repo(0, 10, "Testing")

# Create a branch and add some files
subprocess.run(["git", "checkout", "-b", "mybranch"], check=True)
add_to_repo(20, 30, "Commit to mybranch")

# Create a branch-of-branch and add more files
subprocess.run(["git", "checkout", "-b", "subbranch"], check=True)
add_to_repo(40, 50, "Commit to subbranch")

# Create an branch from main without any changes
subprocess.run(["git", "checkout", "main"], check=True)
subprocess.run(["git", "checkout", "-b", "unchanged"], check=True)

# Switch back to the main branch ready for testing
subprocess.run(["git", "checkout", "main"], check=True)

return location


def test_init(git_repo):
"""Test creation of a new GitBDiff instance"""

os.chdir(git_repo)
bdiff = GitBDiff()

assert bdiff.branch is not None
assert bdiff.branch == "main"
assert not bdiff.is_branch
assert not bdiff.has_diverged


def test_branch_diff(git_repo):
"""Test a simple branch diff."""

os.chdir(git_repo)
subprocess.run(["git", "checkout", "mybranch"], check=True)

try:
bdiff = GitBDiff()
changes = list(bdiff.files())
finally:
subprocess.run(["git", "checkout", "main"], check=True)

assert bdiff.branch == "mybranch"
assert bdiff.is_branch
assert bdiff.has_diverged
assert len(changes) == 10
assert changes[0] == "file20"


def test_branch_of_branch_diff(git_repo):
"""Test a branch of branch diff.

This effectively tests whether all the commits since the branch
point with main are picked up correctly.
"""

os.chdir(git_repo)
subprocess.run(["git", "checkout", "subbranch"], check=True)

try:
bdiff = GitBDiff()
changes = list(bdiff.files())
finally:
subprocess.run(["git", "checkout", "main"], check=True)

assert bdiff.branch == "subbranch"
assert bdiff.is_branch
assert bdiff.has_diverged
assert len(changes) == 20
assert changes[0] == "file20"
assert changes[-1] == "file49"


def test_unchanged_branch(git_repo):
"""Test a branch with no commits."""

os.chdir(git_repo)
subprocess.run(["git", "checkout", "unchanged"], check=True)

try:
bdiff = GitBDiff()
changes = list(bdiff.files())
finally:
subprocess.run(["git", "checkout", "main"], check=True)

assert bdiff.branch == "unchanged"
assert bdiff.is_branch
assert not bdiff.has_diverged
assert not changes


def test_non_repo(tmpdir):
"""Test exception if working directory is not a git repo."""

os.chdir(tmpdir)

with pytest.raises(GitBDiffNotGit):
GitBDiff()


def test_unknown_parent(git_repo):
"""Test exception if parent branch does not exist.

This is a proxy test for the detection of all sorts of git
errors.
"""

os.chdir(git_repo)

with pytest.raises(GitBDiffError):
GitBDiff(parent="nosuch")