From 6b0badce91e5826415acca359181cd52507811bf Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Oct 2025 18:04:47 +0200 Subject: [PATCH 1/2] preprocess_args: fix option name matching previously, it also matched on --sort-by when it only should have matched on --sort. --- src/borg/archiver/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/borg/archiver/__init__.py b/src/borg/archiver/__init__.py index 481b286b09..e23f6adf6a 100644 --- a/src/borg/archiver/__init__.py +++ b/src/borg/archiver/__init__.py @@ -180,7 +180,8 @@ def preprocess_args(self, args): ] for i, arg in enumerate(args[:]): for old_name, new_name, warning in deprecations: - if arg.startswith(old_name): + # either --old_name or --old_name=... + if arg == old_name or (arg.startswith(old_name) and arg[len(old_name)] == "="): if new_name is not None: args[i] = arg.replace(old_name, new_name) print(warning, file=sys.stderr) From b10711753ed569188fe1a26584511f199048de88 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Oct 2025 17:37:14 +0200 Subject: [PATCH 2/2] diff --sort-by: enhanced sorting, fixes #8998 use borg diff --sort-by=spec1,spec2,spec2 for enhanced sorting. remove legacy --sort behaviour (sort by path), this was deprecated since 1.4.2. Co-authored-by: Daniel Rudolf This is a port of #9005 to master branch. --- docs/usage/diff.rst | 8 + src/borg/archiver/diff_cmd.py | 156 ++++++++++++++++++- src/borg/item.pyi | 6 + src/borg/testsuite/archiver/diff_cmd_test.py | 114 +++++++++++++- 4 files changed, 278 insertions(+), 6 deletions(-) diff --git a/docs/usage/diff.rst b/docs/usage/diff.rst index c372c3c068..9404326c12 100644 --- a/docs/usage/diff.rst +++ b/docs/usage/diff.rst @@ -16,3 +16,11 @@ Examples {"path": "file4", "changes": [{"type": "added", "size": 0}]} {"path": "file3", "changes": [{"type": "removed", "size": 0}]} + + # Use --sort-by with a comma-separated list; sorts apply stably from last to first. + # Here: primary by net size change descending, tie-breaker by path ascending + $ borg diff --sort-by=">size_diff,path" archive1 archive2 + +17 B -5 B [-rw-r--r-- -> -rwxr-xr-x] file1 + removed 0 B file3 + added 0 B file4 + +135 B -252 B file2 diff --git a/src/borg/archiver/diff_cmd.py b/src/borg/archiver/diff_cmd.py index eb0a10d289..65e11bd867 100644 --- a/src/borg/archiver/diff_cmd.py +++ b/src/borg/archiver/diff_cmd.py @@ -8,6 +8,8 @@ from ..archive import Archive from ..constants import * # NOQA from ..helpers import BaseFormatter, DiffFormatter, archivename_validator, PathSpec, BorgJsonEncoder +from ..helpers import IncludePatternNeverMatchedWarning, remove_surrogates +from ..item import ItemDiff from ..manifest import Manifest from ..logger import create_logger @@ -87,11 +89,75 @@ def print_text_output(diff, formatter): diffs_iter = Archive.compare_archives_iter( archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids ) - # Conversion to string and filtering for diff.equal to save memory if sorting + # Filter out equal items early (keep as generator; listify only if sorting) diffs = (diff for diff in diffs_iter if not diff.equal(args.content_only)) - if args.sort: - diffs = sorted(diffs, key=lambda diff: diff.path) + sort_specs = [] + if args.sort_by: + for spec in args.sort_by.split(","): + spec = spec.strip() + if spec: + sort_specs.append(spec) + + def key_for(field: str, d: "ItemDiff"): + # strip direction markers if present + if field and field[0] in ("<", ">"): + field = field[1:] + # path + if field in (None, "", "path"): + return remove_surrogates(d.path) + # compute size_* from changes + if field in ("size_diff", "size_added", "size_removed"): + added = removed = 0 + ch = d.changes().get("content") + if ch is not None: + info = ch.to_dict() + t = info.get("type") + if t == "modified": + added = info.get("added", 0) + removed = info.get("removed", 0) + elif t and t.startswith("added"): + added = info.get("added", info.get("size", 0)) + removed = 0 + elif t and t.startswith("removed"): + added = 0 + removed = info.get("removed", info.get("size", 0)) + if field == "size_diff": + return added - removed + if field == "size_added": + return added + if field == "size_removed": + return removed + # timestamp diffs + if field in ("ctime_diff", "mtime_diff"): + ts = field.split("_")[0] + t1 = d._item1.get(ts, 0) + t2 = d._item2.get(ts, 0) + return t2 - t1 + # size of item in archive2 + if field == "size": + it = d._item2 + if it is None or it.get("deleted"): + return 0 + return it.get_size() + # direct attributes from current item (prefer item2) + it = d._item2 or d._item1 + attr_defaults = {"user": "", "group": "", "uid": -1, "gid": -1, "ctime": 0, "mtime": 0} + if field in attr_defaults: + if it is None: + return attr_defaults[field] + return it.get(field, attr_defaults[field]) + raise ValueError(f"Invalid field name: {field}") + + if sort_specs: + diffs = list(diffs) + # Apply stable sorts from last to first + for spec in reversed(sort_specs): + desc = False + field = spec + if field and field[0] in ("<", ">"): + desc = field[0] == ">" + diffs.sort(key=lambda di: key_for(field, di), reverse=desc) formatter = DiffFormatter(format, args.content_only) for diff in diffs: @@ -149,7 +215,84 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser): """ ) + DiffFormatter.keys_help() + + textwrap.dedent( + """ + + What is compared + +++++++++++++++++ + For each matching item in both archives, Borg reports: + + - Content changes: total added/removed bytes within files. If chunker parameters are comparable, + Borg compares chunk IDs quickly; otherwise, it compares the content. + - Metadata changes: user, group, mode, and other metadata shown inline like + "[old_mode -> new_mode]" for mode changes. Use ``--content-only`` to suppress metadata changes. + - Added/removed items: printed as "added SIZE path" or "removed SIZE path". + + Output formats + ++++++++++++++ + The default (text) output shows one line per changed path, e.g.:: + + +135 B -252 B [ -rw-r--r-- -> -rwxr-xr-x ] path/to/file + + JSON Lines output (``--json-lines``) prints one JSON object per changed path, e.g.:: + + {"path": "PATH", "changes": [ + {"type": "modified", "added": BYTES, "removed": BYTES}, + {"type": "mode", "old_mode": "-rw-r--r--", "new_mode": "-rwxr-xr-x"}, + {"type": "added", "size": SIZE}, + {"type": "removed", "size": SIZE} + ]} + + Sorting + ++++++++ + Use ``--sort-by FIELDS`` where FIELDS is a comma-separated list of fields. + Sorts are applied stably from last to first in the given list. Prepend ">" for + descending, "<" (or no prefix) for ascending, for example ``--sort-by=">size_added,path"``. + Supported fields include: + + - path: the item path + - size_added: total bytes added for the item content + - size_removed: total bytes removed for the item content + - size_diff: size_added - size_removed (net content change) + - size: size of the item as stored in ARCHIVE2 (0 for removed items) + - user, group, uid, gid, ctime, mtime: taken from the item state in ARCHIVE2 when present + - ctime_diff, mtime_diff: timestamp difference (archive2 - archive1) + + Performance considerations + ++++++++++++++++++++++++++ + diff automatically detects whether the archives were created with the same chunker + parameters. If so, only chunk IDs are compared, which is very fast. + """ + ) ) + + def diff_sort_spec_validator(s): + if not isinstance(s, str): + raise argparse.ArgumentTypeError("unsupported sort field (not a string)") + allowed = { + "path", + "size_added", + "size_removed", + "size_diff", + "size", + "user", + "group", + "uid", + "gid", + "ctime", + "mtime", + "ctime_diff", + "mtime_diff", + } + parts = [p.strip() for p in s.split(",") if p.strip()] + if not parts: + raise argparse.ArgumentTypeError("unsupported sort field: empty spec") + for spec in parts: + field = spec[1:] if spec and spec[0] in (">", "<") else spec + if field not in allowed: + raise argparse.ArgumentTypeError(f"unsupported sort field: {field}") + return ",".join(parts) + subparser = subparsers.add_parser( "diff", parents=[common_parser], @@ -172,7 +315,6 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser): action="store_true", help="override the check of chunker parameters", ) - subparser.add_argument("--sort", dest="sort", action="store_true", help="Sort the output lines by file path.") subparser.add_argument( "--format", metavar="FORMAT", @@ -181,6 +323,12 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser): help='specify format for differences between archives (default: "{change} {path}{NL}")', ) subparser.add_argument("--json-lines", action="store_true", help="Format output as JSON Lines.") + subparser.add_argument( + "--sort-by", + dest="sort_by", + type=diff_sort_spec_validator, + help="Sort output by comma-separated fields (e.g., '>size_added,path').", + ) subparser.add_argument( "--content-only", action="store_true", diff --git a/src/borg/item.pyi b/src/borg/item.pyi index 97de37904b..b5026df0f1 100644 --- a/src/borg/item.pyi +++ b/src/borg/item.pyi @@ -263,6 +263,12 @@ class DiffChange: class ItemDiff: path: str + _item1: Item + _item2: Item + _chunk_1: Iterator + _chunk_2: Iterator + _numeric_ids: bool + _can_compare_chunk_ids: bool def __init__( self, path: str, diff --git a/src/borg/testsuite/archiver/diff_cmd_test.py b/src/borg/testsuite/archiver/diff_cmd_test.py index e07f575f69..92f1af0fa8 100644 --- a/src/borg/testsuite/archiver/diff_cmd_test.py +++ b/src/borg/testsuite/archiver/diff_cmd_test.py @@ -293,7 +293,7 @@ def test_time_diffs(archivers, request): assert "ctime" not in output -def test_sort_option(archivers, request): +def test_sort_by_option(archivers, request): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION) @@ -313,7 +313,7 @@ def test_sort_option(archivers, request): create_regular_file(archiver.input_path, "d_file_added", size=256) cmd(archiver, "create", "test1", "input") - output = cmd(archiver, "diff", "test0", "test1", "--sort", "--content-only") + output = cmd(archiver, "diff", "test0", "test1", "--sort-by=path", "--content-only") expected = ["a_file_removed", "b_file_added", "c_file_changed", "d_file_added", "e_file_changed", "f_file_removed"] assert isinstance(output, str) outputs = output.splitlines() @@ -321,6 +321,116 @@ def test_sort_option(archivers, request): assert all(x in line for x, line in zip(expected, outputs)) +def test_sort_by_invalid_field_is_rejected(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + + create_regular_file(archiver.input_path, "file", size=1) + cmd(archiver, "create", "a1", "input") + create_regular_file(archiver.input_path, "file", size=2) + cmd(archiver, "create", "a2", "input") + + # Unsupported field should cause argument parsing error + cmd(archiver, "diff", "a1", "a2", "--sort-by=not_a_field", exit_code=EXIT_ERROR) + + +def test_sort_by_size_added_then_path(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + + # Base archive with two files that will be removed later + create_regular_file(archiver.input_path, "r_big_removed", size=50) + create_regular_file(archiver.input_path, "r_small_removed", size=5) + cmd(archiver, "create", "base", "input") + + # Second archive: remove both above and add two new files of different sizes + os.unlink("input/r_big_removed") + os.unlink("input/r_small_removed") + create_regular_file(archiver.input_path, "a_small_added", size=10) + create_regular_file(archiver.input_path, "b_large_added", size=30) + cmd(archiver, "create", "next", "input") + + # Sort by size added (ascending), then path to break ties deterministically + output = cmd(archiver, "diff", "base", "next", "--sort-by=size_added,path", "--content-only") + lines = output.splitlines() + # Expect removed entries first (size_added=0), ordered by path, then added entries by increasing size + expected_order = [ + "removed:.*input/r_big_removed", # size_added=0 + "removed:.*input/r_small_removed", # size_added=0 + "added:.*10 B.*input/a_small_added", + "added:.*30 B.*input/b_large_added", + ] + assert len(lines) == len(expected_order) + for pattern, line in zip(expected_order, lines): + assert_line_exists([line], pattern) + + +@pytest.mark.parametrize( + "sort_key", + [ + "path", + "size", + "size_added", + "size_removed", + "size_diff", + "user", + "group", + "uid", + "gid", + "ctime", + "mtime", + "ctime_diff", + "mtime_diff", + ], +) +def test_sort_by_all_keys_with_directions(archivers, request, sort_key): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + + # Prepare initial files + create_regular_file(archiver.input_path, "a_removed", size=11) + create_regular_file(archiver.input_path, "f_removed", size=22) + create_regular_file(archiver.input_path, "c_changed", size=33) + create_regular_file(archiver.input_path, "e_changed", size=44) + cmd(archiver, "create", "s0", "input") + + # Ensure that subsequent modifications happen on a later timestamp tick than s0 + time.sleep(1.0 if is_darwin else 0.1) # HFS+ has ~1s timestamp granularity on macOS + + # Create differences for second archive + os.unlink("input/a_removed") + os.unlink("input/f_removed") + os.unlink("input/c_changed") + os.unlink("input/e_changed") + # Recreate changed files with different sizes + create_regular_file(archiver.input_path, "c_changed", size=333) + create_regular_file(archiver.input_path, "e_changed", size=444) + # Added files + create_regular_file(archiver.input_path, "b_added", size=55) + create_regular_file(archiver.input_path, "d_added", size=66) + cmd(archiver, "create", "s1", "input") + + expected_paths = { + "input/a_removed", + "input/b_added", + "input/c_changed", + "input/d_added", + "input/e_changed", + "input/f_removed", + } + + # Exercise both ascending and descending for each key. + for direction in ("<", ">"): + sort_spec = f"{direction}{sort_key},path" + output = cmd(archiver, "diff", "s0", "s1", f"--sort-by={sort_spec}", "--content-only") + lines = output.splitlines() + assert len(lines) == len(expected_paths) + # Validate that we got exactly the expected items regardless of order. + # As we do not test the order, this is mostly for test coverage. + seen_paths = {line.split()[-1] for line in lines} + assert seen_paths == expected_paths + + @pytest.mark.skipif(not are_hardlinks_supported(), reason="hardlinks not supported") def test_hard_link_deletion_and_replacement(archivers, request): archiver = request.getfixturevalue(archivers)