Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/usage/diff.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,11 @@ Examples
{"path": "file4", "changes": [{"type": "added", "size": 0}]}
{"path": "file3", "changes": [{"type": "removed", "size": 0}]}


# Use --sort-by with a comma-separated list; sorts apply stably from last to first.
# Here: primary by net size change descending, tie-breaker by path ascending
$ borg diff --sort-by=">size_diff,path" archive1 archive2
+17 B -5 B [-rw-r--r-- -> -rwxr-xr-x] file1
removed 0 B file3
added 0 B file4
+135 B -252 B file2
3 changes: 2 additions & 1 deletion src/borg/archiver/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@ def preprocess_args(self, args):
]
for i, arg in enumerate(args[:]):
for old_name, new_name, warning in deprecations:
if arg.startswith(old_name):
# either --old_name or --old_name=...
if arg == old_name or (arg.startswith(old_name) and arg[len(old_name)] == "="):
if new_name is not None:
args[i] = arg.replace(old_name, new_name)
print(warning, file=sys.stderr)
Expand Down
156 changes: 152 additions & 4 deletions src/borg/archiver/diff_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from ..archive import Archive
from ..constants import * # NOQA
from ..helpers import BaseFormatter, DiffFormatter, archivename_validator, PathSpec, BorgJsonEncoder
from ..helpers import IncludePatternNeverMatchedWarning, remove_surrogates
from ..item import ItemDiff
from ..manifest import Manifest
from ..logger import create_logger

Expand Down Expand Up @@ -87,11 +89,75 @@ def print_text_output(diff, formatter):
diffs_iter = Archive.compare_archives_iter(
archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids
)
# Conversion to string and filtering for diff.equal to save memory if sorting
# Filter out equal items early (keep as generator; listify only if sorting)
diffs = (diff for diff in diffs_iter if not diff.equal(args.content_only))

if args.sort:
diffs = sorted(diffs, key=lambda diff: diff.path)
sort_specs = []
if args.sort_by:
for spec in args.sort_by.split(","):
spec = spec.strip()
if spec:
sort_specs.append(spec)

def key_for(field: str, d: "ItemDiff"):
# strip direction markers if present
if field and field[0] in ("<", ">"):
field = field[1:]
# path
if field in (None, "", "path"):
return remove_surrogates(d.path)
# compute size_* from changes
if field in ("size_diff", "size_added", "size_removed"):
added = removed = 0
ch = d.changes().get("content")
if ch is not None:
info = ch.to_dict()
t = info.get("type")
if t == "modified":
added = info.get("added", 0)
removed = info.get("removed", 0)
elif t and t.startswith("added"):
added = info.get("added", info.get("size", 0))
removed = 0
elif t and t.startswith("removed"):
added = 0
removed = info.get("removed", info.get("size", 0))
if field == "size_diff":
return added - removed
if field == "size_added":
return added
if field == "size_removed":
return removed
# timestamp diffs
if field in ("ctime_diff", "mtime_diff"):
ts = field.split("_")[0]
t1 = d._item1.get(ts, 0)
t2 = d._item2.get(ts, 0)
return t2 - t1
# size of item in archive2
if field == "size":
it = d._item2
if it is None or it.get("deleted"):
return 0
return it.get_size()
# direct attributes from current item (prefer item2)
it = d._item2 or d._item1
attr_defaults = {"user": "", "group": "", "uid": -1, "gid": -1, "ctime": 0, "mtime": 0}
if field in attr_defaults:
if it is None:
return attr_defaults[field]
return it.get(field, attr_defaults[field])
raise ValueError(f"Invalid field name: {field}")

if sort_specs:
diffs = list(diffs)
# Apply stable sorts from last to first
for spec in reversed(sort_specs):
desc = False
field = spec
if field and field[0] in ("<", ">"):
desc = field[0] == ">"
diffs.sort(key=lambda di: key_for(field, di), reverse=desc)

formatter = DiffFormatter(format, args.content_only)
for diff in diffs:
Expand Down Expand Up @@ -149,7 +215,84 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser):
"""
)
+ DiffFormatter.keys_help()
+ textwrap.dedent(
"""

What is compared
+++++++++++++++++
For each matching item in both archives, Borg reports:

- Content changes: total added/removed bytes within files. If chunker parameters are comparable,
Borg compares chunk IDs quickly; otherwise, it compares the content.
- Metadata changes: user, group, mode, and other metadata shown inline like
"[old_mode -> new_mode]" for mode changes. Use ``--content-only`` to suppress metadata changes.
- Added/removed items: printed as "added SIZE path" or "removed SIZE path".

Output formats
++++++++++++++
The default (text) output shows one line per changed path, e.g.::

+135 B -252 B [ -rw-r--r-- -> -rwxr-xr-x ] path/to/file

JSON Lines output (``--json-lines``) prints one JSON object per changed path, e.g.::

{"path": "PATH", "changes": [
{"type": "modified", "added": BYTES, "removed": BYTES},
{"type": "mode", "old_mode": "-rw-r--r--", "new_mode": "-rwxr-xr-x"},
{"type": "added", "size": SIZE},
{"type": "removed", "size": SIZE}
]}

Sorting
++++++++
Use ``--sort-by FIELDS`` where FIELDS is a comma-separated list of fields.
Sorts are applied stably from last to first in the given list. Prepend ">" for
descending, "<" (or no prefix) for ascending, for example ``--sort-by=">size_added,path"``.
Supported fields include:

- path: the item path
- size_added: total bytes added for the item content
- size_removed: total bytes removed for the item content
- size_diff: size_added - size_removed (net content change)
- size: size of the item as stored in ARCHIVE2 (0 for removed items)
- user, group, uid, gid, ctime, mtime: taken from the item state in ARCHIVE2 when present
- ctime_diff, mtime_diff: timestamp difference (archive2 - archive1)

Performance considerations
++++++++++++++++++++++++++
diff automatically detects whether the archives were created with the same chunker
parameters. If so, only chunk IDs are compared, which is very fast.
"""
)
)

def diff_sort_spec_validator(s):
if not isinstance(s, str):
raise argparse.ArgumentTypeError("unsupported sort field (not a string)")
allowed = {
"path",
"size_added",
"size_removed",
"size_diff",
"size",
"user",
"group",
"uid",
"gid",
"ctime",
"mtime",
"ctime_diff",
"mtime_diff",
}
parts = [p.strip() for p in s.split(",") if p.strip()]
if not parts:
raise argparse.ArgumentTypeError("unsupported sort field: empty spec")
for spec in parts:
field = spec[1:] if spec and spec[0] in (">", "<") else spec
if field not in allowed:
raise argparse.ArgumentTypeError(f"unsupported sort field: {field}")
return ",".join(parts)

subparser = subparsers.add_parser(
"diff",
parents=[common_parser],
Expand All @@ -172,7 +315,6 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser):
action="store_true",
help="override the check of chunker parameters",
)
subparser.add_argument("--sort", dest="sort", action="store_true", help="Sort the output lines by file path.")
subparser.add_argument(
"--format",
metavar="FORMAT",
Expand All @@ -181,6 +323,12 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser):
help='specify format for differences between archives (default: "{change} {path}{NL}")',
)
subparser.add_argument("--json-lines", action="store_true", help="Format output as JSON Lines.")
subparser.add_argument(
"--sort-by",
dest="sort_by",
type=diff_sort_spec_validator,
help="Sort output by comma-separated fields (e.g., '>size_added,path').",
)
subparser.add_argument(
"--content-only",
action="store_true",
Expand Down
6 changes: 6 additions & 0 deletions src/borg/item.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,12 @@ class DiffChange:

class ItemDiff:
path: str
_item1: Item
_item2: Item
_chunk_1: Iterator
_chunk_2: Iterator
_numeric_ids: bool
_can_compare_chunk_ids: bool
def __init__(
self,
path: str,
Expand Down
114 changes: 112 additions & 2 deletions src/borg/testsuite/archiver/diff_cmd_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def test_time_diffs(archivers, request):
assert "ctime" not in output


def test_sort_option(archivers, request):
def test_sort_by_option(archivers, request):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)

Expand All @@ -313,14 +313,124 @@ def test_sort_option(archivers, request):
create_regular_file(archiver.input_path, "d_file_added", size=256)
cmd(archiver, "create", "test1", "input")

output = cmd(archiver, "diff", "test0", "test1", "--sort", "--content-only")
output = cmd(archiver, "diff", "test0", "test1", "--sort-by=path", "--content-only")
expected = ["a_file_removed", "b_file_added", "c_file_changed", "d_file_added", "e_file_changed", "f_file_removed"]
assert isinstance(output, str)
outputs = output.splitlines()
assert len(outputs) == len(expected)
assert all(x in line for x, line in zip(expected, outputs))


def test_sort_by_invalid_field_is_rejected(archivers, request):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)

create_regular_file(archiver.input_path, "file", size=1)
cmd(archiver, "create", "a1", "input")
create_regular_file(archiver.input_path, "file", size=2)
cmd(archiver, "create", "a2", "input")

# Unsupported field should cause argument parsing error
cmd(archiver, "diff", "a1", "a2", "--sort-by=not_a_field", exit_code=EXIT_ERROR)


def test_sort_by_size_added_then_path(archivers, request):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)

# Base archive with two files that will be removed later
create_regular_file(archiver.input_path, "r_big_removed", size=50)
create_regular_file(archiver.input_path, "r_small_removed", size=5)
cmd(archiver, "create", "base", "input")

# Second archive: remove both above and add two new files of different sizes
os.unlink("input/r_big_removed")
os.unlink("input/r_small_removed")
create_regular_file(archiver.input_path, "a_small_added", size=10)
create_regular_file(archiver.input_path, "b_large_added", size=30)
cmd(archiver, "create", "next", "input")

# Sort by size added (ascending), then path to break ties deterministically
output = cmd(archiver, "diff", "base", "next", "--sort-by=size_added,path", "--content-only")
lines = output.splitlines()
# Expect removed entries first (size_added=0), ordered by path, then added entries by increasing size
expected_order = [
"removed:.*input/r_big_removed", # size_added=0
"removed:.*input/r_small_removed", # size_added=0
"added:.*10 B.*input/a_small_added",
"added:.*30 B.*input/b_large_added",
]
assert len(lines) == len(expected_order)
for pattern, line in zip(expected_order, lines):
assert_line_exists([line], pattern)


@pytest.mark.parametrize(
"sort_key",
[
"path",
"size",
"size_added",
"size_removed",
"size_diff",
"user",
"group",
"uid",
"gid",
"ctime",
"mtime",
"ctime_diff",
"mtime_diff",
],
)
def test_sort_by_all_keys_with_directions(archivers, request, sort_key):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)

# Prepare initial files
create_regular_file(archiver.input_path, "a_removed", size=11)
create_regular_file(archiver.input_path, "f_removed", size=22)
create_regular_file(archiver.input_path, "c_changed", size=33)
create_regular_file(archiver.input_path, "e_changed", size=44)
cmd(archiver, "create", "s0", "input")

# Ensure that subsequent modifications happen on a later timestamp tick than s0
time.sleep(1.0 if is_darwin else 0.1) # HFS+ has ~1s timestamp granularity on macOS

# Create differences for second archive
os.unlink("input/a_removed")
os.unlink("input/f_removed")
os.unlink("input/c_changed")
os.unlink("input/e_changed")
# Recreate changed files with different sizes
create_regular_file(archiver.input_path, "c_changed", size=333)
create_regular_file(archiver.input_path, "e_changed", size=444)
# Added files
create_regular_file(archiver.input_path, "b_added", size=55)
create_regular_file(archiver.input_path, "d_added", size=66)
cmd(archiver, "create", "s1", "input")

expected_paths = {
"input/a_removed",
"input/b_added",
"input/c_changed",
"input/d_added",
"input/e_changed",
"input/f_removed",
}

# Exercise both ascending and descending for each key.
for direction in ("<", ">"):
sort_spec = f"{direction}{sort_key},path"
output = cmd(archiver, "diff", "s0", "s1", f"--sort-by={sort_spec}", "--content-only")
lines = output.splitlines()
assert len(lines) == len(expected_paths)
# Validate that we got exactly the expected items regardless of order.
# As we do not test the order, this is mostly for test coverage.
seen_paths = {line.split()[-1] for line in lines}
assert seen_paths == expected_paths


@pytest.mark.skipif(not are_hardlinks_supported(), reason="hardlinks not supported")
def test_hard_link_deletion_and_replacement(archivers, request):
archiver = request.getfixturevalue(archivers)
Expand Down
Loading