Skip to content

Commit 77f6a55

Browse files
diff --sort-by: enhanced sorting, fixes #8998
use borg diff --sort-by=spec1,spec2,spec2 for enhanced sorting. remove legacy --sort behaviour (sort by path), this was deprecated since 1.4.2. Co-authored-by: Daniel Rudolf <[email protected]> This is a port of #9005 to master branch.
1 parent 18d1c8a commit 77f6a55

File tree

4 files changed

+278
-6
lines changed

4 files changed

+278
-6
lines changed

docs/usage/diff.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,11 @@ Examples
1616
{"path": "file4", "changes": [{"type": "added", "size": 0}]}
1717
{"path": "file3", "changes": [{"type": "removed", "size": 0}]}
1818

19+
20+
# Use --sort-by with a comma-separated list; sorts apply stably from last to first.
21+
# Here: primary by net size change descending, tie-breaker by path ascending
22+
$ borg diff --sort-by=">size_diff,path" archive1 archive2
23+
+17 B -5 B [-rw-r--r-- -> -rwxr-xr-x] file1
24+
removed 0 B file3
25+
added 0 B file4
26+
+135 B -252 B file2

src/borg/archiver/diff_cmd.py

Lines changed: 152 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
from ..archive import Archive
99
from ..constants import * # NOQA
1010
from ..helpers import BaseFormatter, DiffFormatter, archivename_validator, PathSpec, BorgJsonEncoder
11+
from ..helpers import IncludePatternNeverMatchedWarning, remove_surrogates
12+
from ..item import ItemDiff
1113
from ..manifest import Manifest
1214
from ..logger import create_logger
1315

@@ -87,11 +89,75 @@ def print_text_output(diff, formatter):
8789
diffs_iter = Archive.compare_archives_iter(
8890
archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids
8991
)
90-
# Conversion to string and filtering for diff.equal to save memory if sorting
92+
# Filter out equal items early (keep as generator; listify only if sorting)
9193
diffs = (diff for diff in diffs_iter if not diff.equal(args.content_only))
9294

93-
if args.sort:
94-
diffs = sorted(diffs, key=lambda diff: diff.path)
95+
sort_specs = []
96+
if args.sort_by:
97+
for spec in args.sort_by.split(","):
98+
spec = spec.strip()
99+
if spec:
100+
sort_specs.append(spec)
101+
102+
def key_for(field: str, d: "ItemDiff"):
103+
# strip direction markers if present
104+
if field and field[0] in ("<", ">"):
105+
field = field[1:]
106+
# path
107+
if field in (None, "", "path"):
108+
return remove_surrogates(d.path)
109+
# compute size_* from changes
110+
if field in ("size_diff", "size_added", "size_removed"):
111+
added = removed = 0
112+
ch = d.changes().get("content")
113+
if ch is not None:
114+
info = ch.to_dict()
115+
t = info.get("type")
116+
if t == "modified":
117+
added = info.get("added", 0)
118+
removed = info.get("removed", 0)
119+
elif t and t.startswith("added"):
120+
added = info.get("added", info.get("size", 0))
121+
removed = 0
122+
elif t and t.startswith("removed"):
123+
added = 0
124+
removed = info.get("removed", info.get("size", 0))
125+
if field == "size_diff":
126+
return added - removed
127+
if field == "size_added":
128+
return added
129+
if field == "size_removed":
130+
return removed
131+
# timestamp diffs
132+
if field in ("ctime_diff", "mtime_diff"):
133+
ts = field.split("_")[0]
134+
t1 = d._item1.get(ts, 0)
135+
t2 = d._item2.get(ts, 0)
136+
return t2 - t1
137+
# size of item in archive2
138+
if field == "size":
139+
it = d._item2
140+
if it is None or it.get("deleted"):
141+
return 0
142+
return it.get_size()
143+
# direct attributes from current item (prefer item2)
144+
it = d._item2 or d._item1
145+
attr_defaults = {"user": "", "group": "", "uid": -1, "gid": -1, "ctime": 0, "mtime": 0}
146+
if field in attr_defaults:
147+
if it is None:
148+
return attr_defaults[field]
149+
return it.get(field, attr_defaults[field])
150+
raise ValueError(f"Invalid field name: {field}")
151+
152+
if sort_specs:
153+
diffs = list(diffs)
154+
# Apply stable sorts from last to first
155+
for spec in reversed(sort_specs):
156+
desc = False
157+
field = spec
158+
if field and field[0] in ("<", ">"):
159+
desc = field[0] == ">"
160+
diffs.sort(key=lambda di: key_for(field, di), reverse=desc)
95161

96162
formatter = DiffFormatter(format, args.content_only)
97163
for diff in diffs:
@@ -149,7 +215,84 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser):
149215
"""
150216
)
151217
+ DiffFormatter.keys_help()
218+
+ textwrap.dedent(
219+
"""
220+
221+
What is compared
222+
+++++++++++++++++
223+
For each matching item in both archives, Borg reports:
224+
225+
- Content changes: total added/removed bytes within files. If chunker parameters are comparable,
226+
Borg compares chunk IDs quickly; otherwise, it compares the content.
227+
- Metadata changes: user, group, mode, and other metadata shown inline like
228+
"[old_mode -> new_mode]" for mode changes. Use ``--content-only`` to suppress metadata changes.
229+
- Added/removed items: printed as "added SIZE path" or "removed SIZE path".
230+
231+
Output formats
232+
++++++++++++++
233+
The default (text) output shows one line per changed path, e.g.::
234+
235+
+135 B -252 B [ -rw-r--r-- -> -rwxr-xr-x ] path/to/file
236+
237+
JSON Lines output (``--json-lines``) prints one JSON object per changed path, e.g.::
238+
239+
{"path": "PATH", "changes": [
240+
{"type": "modified", "added": BYTES, "removed": BYTES},
241+
{"type": "mode", "old_mode": "-rw-r--r--", "new_mode": "-rwxr-xr-x"},
242+
{"type": "added", "size": SIZE},
243+
{"type": "removed", "size": SIZE}
244+
]}
245+
246+
Sorting
247+
++++++++
248+
Use ``--sort-by FIELDS`` where FIELDS is a comma-separated list of fields.
249+
Sorts are applied stably from last to first in the given list. Prepend ">" for
250+
descending, "<" (or no prefix) for ascending, for example ``--sort-by=">size_added,path"``.
251+
Supported fields include:
252+
253+
- path: the item path
254+
- size_added: total bytes added for the item content
255+
- size_removed: total bytes removed for the item content
256+
- size_diff: size_added - size_removed (net content change)
257+
- size: size of the item as stored in ARCHIVE2 (0 for removed items)
258+
- user, group, uid, gid, ctime, mtime: taken from the item state in ARCHIVE2 when present
259+
- ctime_diff, mtime_diff: timestamp difference (archive2 - archive1)
260+
261+
Performance considerations
262+
++++++++++++++++++++++++++
263+
diff automatically detects whether the archives were created with the same chunker
264+
parameters. If so, only chunk IDs are compared, which is very fast.
265+
"""
266+
)
152267
)
268+
269+
def diff_sort_spec_validator(s):
270+
if not isinstance(s, str):
271+
raise argparse.ArgumentTypeError("unsupported sort field (not a string)")
272+
allowed = {
273+
"path",
274+
"size_added",
275+
"size_removed",
276+
"size_diff",
277+
"size",
278+
"user",
279+
"group",
280+
"uid",
281+
"gid",
282+
"ctime",
283+
"mtime",
284+
"ctime_diff",
285+
"mtime_diff",
286+
}
287+
parts = [p.strip() for p in s.split(",") if p.strip()]
288+
if not parts:
289+
raise argparse.ArgumentTypeError("unsupported sort field: empty spec")
290+
for spec in parts:
291+
field = spec[1:] if spec and spec[0] in (">", "<") else spec
292+
if field not in allowed:
293+
raise argparse.ArgumentTypeError(f"unsupported sort field: {field}")
294+
return ",".join(parts)
295+
153296
subparser = subparsers.add_parser(
154297
"diff",
155298
parents=[common_parser],
@@ -172,7 +315,6 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser):
172315
action="store_true",
173316
help="override the check of chunker parameters",
174317
)
175-
subparser.add_argument("--sort", dest="sort", action="store_true", help="Sort the output lines by file path.")
176318
subparser.add_argument(
177319
"--format",
178320
metavar="FORMAT",
@@ -181,6 +323,12 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser):
181323
help='specify format for differences between archives (default: "{change} {path}{NL}")',
182324
)
183325
subparser.add_argument("--json-lines", action="store_true", help="Format output as JSON Lines.")
326+
subparser.add_argument(
327+
"--sort-by",
328+
dest="sort_by",
329+
type=diff_sort_spec_validator,
330+
help="Sort output by comma-separated fields (e.g., '>size_added,path').",
331+
)
184332
subparser.add_argument(
185333
"--content-only",
186334
action="store_true",

src/borg/item.pyi

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,12 @@ class DiffChange:
263263

264264
class ItemDiff:
265265
path: str
266+
_item1: Item
267+
_item2: Item
268+
_chunk_1: Iterator
269+
_chunk_2: Iterator
270+
_numeric_ids: bool
271+
_can_compare_chunk_ids: bool
266272
def __init__(
267273
self,
268274
path: str,

src/borg/testsuite/archiver/diff_cmd_test.py

Lines changed: 112 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def test_time_diffs(archivers, request):
293293
assert "ctime" not in output
294294

295295

296-
def test_sort_option(archivers, request):
296+
def test_sort_by_option(archivers, request):
297297
archiver = request.getfixturevalue(archivers)
298298
cmd(archiver, "repo-create", RK_ENCRYPTION)
299299

@@ -313,14 +313,124 @@ def test_sort_option(archivers, request):
313313
create_regular_file(archiver.input_path, "d_file_added", size=256)
314314
cmd(archiver, "create", "test1", "input")
315315

316-
output = cmd(archiver, "diff", "test0", "test1", "--sort", "--content-only")
316+
output = cmd(archiver, "diff", "test0", "test1", "--sort-by=path", "--content-only")
317317
expected = ["a_file_removed", "b_file_added", "c_file_changed", "d_file_added", "e_file_changed", "f_file_removed"]
318318
assert isinstance(output, str)
319319
outputs = output.splitlines()
320320
assert len(outputs) == len(expected)
321321
assert all(x in line for x, line in zip(expected, outputs))
322322

323323

324+
def test_sort_by_invalid_field_is_rejected(archivers, request):
325+
archiver = request.getfixturevalue(archivers)
326+
cmd(archiver, "repo-create", RK_ENCRYPTION)
327+
328+
create_regular_file(archiver.input_path, "file", size=1)
329+
cmd(archiver, "create", "a1", "input")
330+
create_regular_file(archiver.input_path, "file", size=2)
331+
cmd(archiver, "create", "a2", "input")
332+
333+
# Unsupported field should cause argument parsing error
334+
cmd(archiver, "diff", "a1", "a2", "--sort-by=not_a_field", exit_code=EXIT_ERROR)
335+
336+
337+
def test_sort_by_size_added_then_path(archivers, request):
338+
archiver = request.getfixturevalue(archivers)
339+
cmd(archiver, "repo-create", RK_ENCRYPTION)
340+
341+
# Base archive with two files that will be removed later
342+
create_regular_file(archiver.input_path, "r_big_removed", size=50)
343+
create_regular_file(archiver.input_path, "r_small_removed", size=5)
344+
cmd(archiver, "create", "base", "input")
345+
346+
# Second archive: remove both above and add two new files of different sizes
347+
os.unlink("input/r_big_removed")
348+
os.unlink("input/r_small_removed")
349+
create_regular_file(archiver.input_path, "a_small_added", size=10)
350+
create_regular_file(archiver.input_path, "b_large_added", size=30)
351+
cmd(archiver, "create", "next", "input")
352+
353+
# Sort by size added (ascending), then path to break ties deterministically
354+
output = cmd(archiver, "diff", "base", "next", "--sort-by=size_added,path", "--content-only")
355+
lines = output.splitlines()
356+
# Expect removed entries first (size_added=0), ordered by path, then added entries by increasing size
357+
expected_order = [
358+
"removed:.*input/r_big_removed", # size_added=0
359+
"removed:.*input/r_small_removed", # size_added=0
360+
"added:.*10 B.*input/a_small_added",
361+
"added:.*30 B.*input/b_large_added",
362+
]
363+
assert len(lines) == len(expected_order)
364+
for pattern, line in zip(expected_order, lines):
365+
assert_line_exists([line], pattern)
366+
367+
368+
@pytest.mark.parametrize(
369+
"sort_key",
370+
[
371+
"path",
372+
"size",
373+
"size_added",
374+
"size_removed",
375+
"size_diff",
376+
"user",
377+
"group",
378+
"uid",
379+
"gid",
380+
"ctime",
381+
"mtime",
382+
"ctime_diff",
383+
"mtime_diff",
384+
],
385+
)
386+
def test_sort_by_all_keys_with_directions(archivers, request, sort_key):
387+
archiver = request.getfixturevalue(archivers)
388+
cmd(archiver, "repo-create", RK_ENCRYPTION)
389+
390+
# Prepare initial files
391+
create_regular_file(archiver.input_path, "a_removed", size=11)
392+
create_regular_file(archiver.input_path, "f_removed", size=22)
393+
create_regular_file(archiver.input_path, "c_changed", size=33)
394+
create_regular_file(archiver.input_path, "e_changed", size=44)
395+
cmd(archiver, "create", "s0", "input")
396+
397+
# Ensure that subsequent modifications happen on a later timestamp tick than s0
398+
time.sleep(1.0 if is_darwin else 0.1) # HFS+ has ~1s timestamp granularity on macOS
399+
400+
# Create differences for second archive
401+
os.unlink("input/a_removed")
402+
os.unlink("input/f_removed")
403+
os.unlink("input/c_changed")
404+
os.unlink("input/e_changed")
405+
# Recreate changed files with different sizes
406+
create_regular_file(archiver.input_path, "c_changed", size=333)
407+
create_regular_file(archiver.input_path, "e_changed", size=444)
408+
# Added files
409+
create_regular_file(archiver.input_path, "b_added", size=55)
410+
create_regular_file(archiver.input_path, "d_added", size=66)
411+
cmd(archiver, "create", "s1", "input")
412+
413+
expected_paths = {
414+
"input/a_removed",
415+
"input/b_added",
416+
"input/c_changed",
417+
"input/d_added",
418+
"input/e_changed",
419+
"input/f_removed",
420+
}
421+
422+
# Exercise both ascending and descending for each key.
423+
for direction in ("<", ">"):
424+
sort_spec = f"{direction}{sort_key},path"
425+
output = cmd(archiver, "diff", "s0", "s1", f"--sort-by={sort_spec}", "--content-only")
426+
lines = output.splitlines()
427+
assert len(lines) == len(expected_paths)
428+
# Validate that we got exactly the expected items regardless of order.
429+
# As we do not test the order, this is mostly for test coverage.
430+
seen_paths = {line.split()[-1] for line in lines}
431+
assert seen_paths == expected_paths
432+
433+
324434
@pytest.mark.skipif(not are_hardlinks_supported(), reason="hardlinks not supported")
325435
def test_hard_link_deletion_and_replacement(archivers, request):
326436
archiver = request.getfixturevalue(archivers)

0 commit comments

Comments
 (0)