From 44cc6c81842d009d53d18b05d5f188a87a1ddaba Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sat, 19 Apr 2025 20:43:10 +0200 Subject: [PATCH 1/3] Adds int_or_interval format parser Accepts either int or interval, first tries parsing int then tries parsing as interval if that fails. Returns a timedelta for easy date math later. Now allows intervals of length 0 as a 0-length timedelta is perfectly fine to work with. --- src/borg/helpers/__init__.py | 2 +- src/borg/helpers/parseformat.py | 18 +++++- .../testsuite/helpers/parseformat_test.py | 56 ++++++++++++++++--- 3 files changed, 65 insertions(+), 11 deletions(-) diff --git a/src/borg/helpers/__init__.py b/src/borg/helpers/__init__.py index 6473f7dc69..b6902d01e3 100644 --- a/src/borg/helpers/__init__.py +++ b/src/borg/helpers/__init__.py @@ -28,7 +28,7 @@ from .misc import ChunkIteratorFileWrapper, open_item, chunkit, iter_separated, ErrorIgnoringTextIOWrapper from .parseformat import bin_to_hex, hex_to_bin, safe_encode, safe_decode from .parseformat import text_to_json, binary_to_json, remove_surrogates, join_cmd -from .parseformat import eval_escapes, decode_dict, positive_int_validator, interval +from .parseformat import eval_escapes, decode_dict, positive_int_validator, interval, int_or_interval from .parseformat import PathSpec, SortBySpec, ChunkerParams, FilesCacheMode, partial_format, DatetimeWrapper from .parseformat import format_file_size, parse_file_size, FileSize from .parseformat import sizeof_fmt, sizeof_fmt_iec, sizeof_fmt_decimal, Location, text_validator diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index cb481685ab..6cde35dd45 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -12,7 +12,7 @@ import uuid from typing import ClassVar, Any, TYPE_CHECKING, Literal from collections import OrderedDict -from datetime import datetime, timezone +from datetime import datetime, timezone, timedelta from functools import partial from string import Formatter @@ -154,12 +154,24 @@ def interval(s): except ValueError: seconds = -1 - if seconds <= 0: - raise argparse.ArgumentTypeError(f'Invalid number "{number}": expected positive integer') + if seconds < 0: + raise argparse.ArgumentTypeError(f'Invalid number "{number}": expected nonnegative integer') return seconds +def int_or_interval(s): + try: + return int(s) + except ValueError: + pass + + try: + return timedelta(seconds=interval(s)) + except argparse.ArgumentTypeError as e: + raise argparse.ArgumentTypeError(f"Value is neither an integer nor an interval: {e}") + + def ChunkerParams(s): params = s.strip().split(",") count = len(params) diff --git a/src/borg/testsuite/helpers/parseformat_test.py b/src/borg/testsuite/helpers/parseformat_test.py index ef39e6714c..256bc07199 100644 --- a/src/borg/testsuite/helpers/parseformat_test.py +++ b/src/borg/testsuite/helpers/parseformat_test.py @@ -1,7 +1,8 @@ import base64 import os +import re from argparse import ArgumentTypeError -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone import pytest @@ -16,6 +17,7 @@ format_file_size, parse_file_size, interval, + int_or_interval, partial_format, clean_lines, format_line, @@ -351,6 +353,7 @@ def test_format_timedelta(): @pytest.mark.parametrize( "timeframe, num_secs", [ + ("0S", 0), ("5S", 5), ("2M", 2 * 60), ("1H", 60 * 60), @@ -367,9 +370,9 @@ def test_interval(timeframe, num_secs): @pytest.mark.parametrize( "invalid_interval, error_tuple", [ - ("H", ('Invalid number "": expected positive integer',)), - ("-1d", ('Invalid number "-1": expected positive integer',)), - ("food", ('Invalid number "foo": expected positive integer',)), + ("H", ('Invalid number "": expected nonnegative integer',)), + ("-1d", ('Invalid number "-1": expected nonnegative integer',)), + ("food", ('Invalid number "foo": expected nonnegative integer',)), ], ) def test_interval_time_unit(invalid_interval, error_tuple): @@ -378,10 +381,49 @@ def test_interval_time_unit(invalid_interval, error_tuple): assert exc.value.args == error_tuple -def test_interval_number(): +@pytest.mark.parametrize( + "invalid_input, error_regex", + [ + ("x", r'^Unexpected time unit "x": choose from'), + ("-1t", r'^Unexpected time unit "t": choose from'), + ("fool", r'^Unexpected time unit "l": choose from'), + ("abc", r'^Unexpected time unit "c": choose from'), + (" abc ", r'^Unexpected time unit " ": choose from'), + ], +) +def test_interval_invalid_time_format(invalid_input, error_regex): + with pytest.raises(ArgumentTypeError) as exc: + interval(invalid_input) + assert re.search(error_regex, exc.value.args[0]) + + +@pytest.mark.parametrize( + "input, result", + [ + ("0", 0), + ("5", 5), + (" 999 ", 999), + ("0S", timedelta(seconds=0)), + ("5S", timedelta(seconds=5)), + ("1m", timedelta(days=31)), + ], +) +def test_int_or_interval(input, result): + assert int_or_interval(input) == result + + +@pytest.mark.parametrize( + "invalid_input, error_regex", + [ + ("H", r"Value is neither an integer nor an interval:"), + ("-1d", r"Value is neither an integer nor an interval:"), + ("food", r"Value is neither an integer nor an interval:"), + ], +) +def test_int_or_interval_time_unit(invalid_input, error_regex): with pytest.raises(ArgumentTypeError) as exc: - interval("5") - assert exc.value.args == ('Unexpected time unit "5": choose from y, m, w, d, H, M, S',) + int_or_interval(invalid_input) + assert re.search(error_regex, exc.value.args[0]) def test_parse_timestamp(): From 3f3d39a8b296c65e09288eca552aea2ace6a559b Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Sat, 19 Apr 2025 21:11:05 +0200 Subject: [PATCH 2/3] Adds optional interval support for all prune retention flags Support is added for setting prune retention with either an int (keep n archives) or an interval (keep within). This works much like --keep-within currently does, but extends support to all retention filters. Additionally adds a generic --keep flag to take over (or live alongside) both --keep-last and --keep-within. --keep-last is no longer an alias of --keep-secondly, now keeps archives made on the same second. Comparisons against archive timestamp are made to use local timezone instead of UTC. Should be equal result in practice, but allows for easier testing with frozen local time. --- requirements.d/development.txt | 1 + src/borg/archiver/prune_cmd.py | 168 ++++++----- src/borg/constants.py | 2 + src/borg/testsuite/archiver/prune_cmd_test.py | 271 +++++++++++++++--- 4 files changed, 331 insertions(+), 111 deletions(-) diff --git a/requirements.d/development.txt b/requirements.d/development.txt index 10d7b55bf0..09a7416599 100644 --- a/requirements.d/development.txt +++ b/requirements.d/development.txt @@ -10,6 +10,7 @@ pytest pytest-xdist pytest-cov pytest-benchmark +freezegun Cython pre-commit bandit[toml] diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index a4922b46a0..1c112c4d5b 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -1,16 +1,17 @@ import argparse from collections import OrderedDict -from datetime import datetime, timezone, timedelta +from datetime import datetime, timezone import logging from operator import attrgetter import os +import itertools from ._common import with_repository, Highlander from ..archive import Archive from ..cache import Cache from ..constants import * # NOQA -from ..helpers import ArchiveFormatter, interval, sig_int, ProgressIndicatorPercent, CommandError, Error -from ..helpers import archivename_validator +from ..helpers import interval, int_or_interval, sig_int, archivename_validator +from ..helpers import ArchiveFormatter, ProgressIndicatorPercent, CommandError, Error from ..manifest import Manifest from ..logger import create_logger @@ -18,20 +19,23 @@ logger = create_logger() -def prune_within(archives, seconds, kept_because): - target = datetime.now(timezone.utc) - timedelta(seconds=seconds) - kept_counter = 0 - result = [] - for a in archives: - if a.ts > target: - kept_counter += 1 - kept_because[a.id] = ("within", kept_counter) - result.append(a) - return result +# The *_period_func group of functions create period grouping keys to group together archives falling within a certain +# period. Among archives in each of these groups, only the latest (by creation timestamp) is kept. -def default_period_func(pattern): +def unique_period_func(): + counter = itertools.count() + + def unique_values(_a): + """Group archives by an incrementing counter, practically making each archive a group of 1""" + return next(counter) + + return unique_values + + +def pattern_period_func(pattern): def inner(a): + """Group archives by extracting given strftime-pattern from their creation timestamp""" # compute in local timezone return a.ts.astimezone().strftime(pattern) @@ -39,6 +43,7 @@ def inner(a): def quarterly_13weekly_period_func(a): + """Group archives by extracting the ISO-8601 13-week quarter from their creation timestamp""" (year, week, _) = a.ts.astimezone().isocalendar() # local time if week <= 13: # Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7) @@ -60,6 +65,7 @@ def quarterly_13weekly_period_func(a): def quarterly_3monthly_period_func(a): + """Group archives by extracting the 3-month quarter from their creation timestamp""" lt = a.ts.astimezone() # local time if lt.month <= 3: # 1-1 to 3-31 @@ -77,42 +83,64 @@ def quarterly_3monthly_period_func(a): PRUNING_PATTERNS = OrderedDict( [ - ("secondly", default_period_func("%Y-%m-%d %H:%M:%S")), - ("minutely", default_period_func("%Y-%m-%d %H:%M")), - ("hourly", default_period_func("%Y-%m-%d %H")), - ("daily", default_period_func("%Y-%m-%d")), - ("weekly", default_period_func("%G-%V")), - ("monthly", default_period_func("%Y-%m")), + # Each archive is considered for keeping + ("within", unique_period_func()), + ("last", unique_period_func()), + ("keep", unique_period_func()), + # Last archive (by creation timestamp) within period group is consiedered for keeping + ("secondly", pattern_period_func("%Y-%m-%d %H:%M:%S")), + ("minutely", pattern_period_func("%Y-%m-%d %H:%M")), + ("hourly", pattern_period_func("%Y-%m-%d %H")), + ("daily", pattern_period_func("%Y-%m-%d")), + ("weekly", pattern_period_func("%G-%V")), + ("monthly", pattern_period_func("%Y-%m")), ("quarterly_13weekly", quarterly_13weekly_period_func), ("quarterly_3monthly", quarterly_3monthly_period_func), - ("yearly", default_period_func("%Y")), + ("yearly", pattern_period_func("%Y")), ] ) -def prune_split(archives, rule, n, kept_because=None): - last = None +# Datetime cannot represent times before datetime.min, so a day is added to allow for time zone offset. +DATETIME_MIN_WITH_ZONE = datetime.min.replace(tzinfo=timezone.utc) + + +def prune_split(archives, rule, n_or_interval, base_timestamp, kept_because={}): + if isinstance(n_or_interval, int): + n, earliest_timestamp = n_or_interval, None + else: + n, earliest_timestamp = None, base_timestamp - n_or_interval + + def can_retain(a, keep): + if n is not None: + return len(keep) < n + else: + return a.ts > earliest_timestamp + keep = [] - period_func = PRUNING_PATTERNS[rule] - if kept_because is None: - kept_because = {} - if n == 0: + if n == 0 or len(archives) == 0: return keep a = None - for a in sorted(archives, key=attrgetter("ts"), reverse=True): + last = None + period_func = PRUNING_PATTERNS[rule] + sorted_archives = sorted(archives, key=attrgetter("ts"), reverse=True) + for a in sorted_archives: + if not can_retain(a, keep): + break period = period_func(a) if period != last: last = period if a.id not in kept_because: keep.append(a) kept_because[a.id] = (rule, len(keep)) - if len(keep) == n: - break + # Keep oldest archive if we didn't reach the target retention count - if a is not None and len(keep) < n and a.id not in kept_because: + a = sorted_archives[-1] + if a is not None and a.id not in kept_because and can_retain(a, keep): keep.append(a) kept_because[a.id] = (rule + "[oldest]", len(keep)) + return keep @@ -120,8 +148,12 @@ class PruneMixIn: @with_repository(compatibility=(Manifest.Operation.DELETE,)) def do_prune(self, args, repository, manifest): """Prune repository archives according to specified rules""" - if not any( - ( + if all( + e is None + for e in ( + args.keep, + args.within, + args.last, args.secondly, args.minutely, args.hourly, @@ -131,11 +163,10 @@ def do_prune(self, args, repository, manifest): args.quarterly_13weekly, args.quarterly_3monthly, args.yearly, - args.within, ) ): raise CommandError( - 'At least one of the "keep-within", "keep-last", ' + 'At least one of the "keep", "keep-within", "keep-last", ' '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' '"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", ' 'or "keep-yearly" settings must be specified.' @@ -159,15 +190,12 @@ def do_prune(self, args, repository, manifest): # (, ) kept_because = {} - # find archives which need to be kept because of the keep-within rule - if args.within: - keep += prune_within(archives, args.within, kept_because) - + base_timestamp = datetime.now().astimezone() # find archives which need to be kept because of the various time period rules for rule in PRUNING_PATTERNS.keys(): - num = getattr(args, rule, None) - if num is not None: - keep += prune_split(archives, rule, num, kept_because) + num_or_interval = getattr(args, rule, None) + if num_or_interval is not None: + keep += prune_split(archives, rule, num_or_interval, base_timestamp, kept_because) to_delete = set(archives) - set(keep) with Cache(repository, manifest, iec=args.iec) as cache: @@ -310,81 +338,81 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): help="keep all archives within this time interval", ) subparser.add_argument( - "--keep-last", + "--keep-last", dest="last", type=int, action=Highlander, help="number of archives to keep" + ) + subparser.add_argument( + "--keep", + dest="keep", + type=int_or_interval, + action=Highlander, + help="number or time interval of archives to keep", + ) + subparser.add_argument( "--keep-secondly", dest="secondly", - type=int, - default=0, + type=int_or_interval, action=Highlander, - help="number of secondly archives to keep", + help="number or time interval of secondly archives to keep", ) subparser.add_argument( "--keep-minutely", dest="minutely", - type=int, - default=0, + type=int_or_interval, action=Highlander, - help="number of minutely archives to keep", + help="number or time interval of minutely archives to keep", ) subparser.add_argument( "-H", "--keep-hourly", dest="hourly", - type=int, - default=0, + type=int_or_interval, action=Highlander, - help="number of hourly archives to keep", + help="number or time interval of hourly archives to keep", ) subparser.add_argument( "-d", "--keep-daily", dest="daily", - type=int, - default=0, + type=int_or_interval, action=Highlander, - help="number of daily archives to keep", + help="number or time interval of daily archives to keep", ) subparser.add_argument( "-w", "--keep-weekly", dest="weekly", - type=int, - default=0, + type=int_or_interval, action=Highlander, - help="number of weekly archives to keep", + help="number or time interval of weekly archives to keep", ) subparser.add_argument( "-m", "--keep-monthly", dest="monthly", - type=int, - default=0, + type=int_or_interval, action=Highlander, - help="number of monthly archives to keep", + help="number or time interval of monthly archives to keep", ) quarterly_group = subparser.add_mutually_exclusive_group() quarterly_group.add_argument( "--keep-13weekly", dest="quarterly_13weekly", - type=int, - default=0, - help="number of quarterly archives to keep (13 week strategy)", + type=int_or_interval, + help="number or time interval of quarterly archives to keep (13 week strategy)", ) quarterly_group.add_argument( "--keep-3monthly", dest="quarterly_3monthly", - type=int, - default=0, - help="number of quarterly archives to keep (3 month strategy)", + type=int_or_interval, + help="number or time interval of quarterly archives to keep (3 month strategy)", ) subparser.add_argument( "-y", "--keep-yearly", dest="yearly", - type=int, - default=0, + type=int_or_interval, action=Highlander, - help="number of yearly archives to keep", + help="number or time interval of yearly archives to keep", ) define_archive_filters_group(subparser, sort_by=False, first_last=False) subparser.add_argument( diff --git a/src/borg/constants.py b/src/borg/constants.py index 911a8f1bef..3fed99c131 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -137,7 +137,9 @@ EXIT_SIGNAL_BASE = 128 # terminated due to signal, rc = 128 + sig_no ISO_FORMAT_NO_USECS = "%Y-%m-%dT%H:%M:%S" +ISO_FORMAT_NO_USECS_ZONE = ISO_FORMAT_NO_USECS + "%z" ISO_FORMAT = ISO_FORMAT_NO_USECS + ".%f" +ISO_FORMAT_ZONE = ISO_FORMAT + "%z" DASHES = "-" * 78 diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index 9dda19a0b5..c4d63e9132 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -1,22 +1,22 @@ -import re -from datetime import datetime, timezone, timedelta - import pytest +import re +from datetime import datetime, timezone +from freezegun import freeze_time from ...constants import * # NOQA -from ...archiver.prune_cmd import prune_split, prune_within +from ...archiver.prune_cmd import prune_split +from ...helpers import CommandError from . import cmd, RK_ENCRYPTION, src_dir, generate_archiver_tests -from ...helpers import interval pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA -def _create_archive_ts(archiver, name, y, m, d, H=0, M=0, S=0): +def _create_archive_ts(archiver, name, y, m, d, H=0, M=0, S=0, us=0, tzinfo=None): cmd( archiver, "create", "--timestamp", - datetime(y, m, d, H, M, S, 0).strftime(ISO_FORMAT_NO_USECS), # naive == local time / local tz + datetime(y, m, d, H, M, S, us, tzinfo=tzinfo).strftime(ISO_FORMAT_ZONE), name, src_dir, ) @@ -256,7 +256,7 @@ def test_prune_ignore_protected(archivers, request): cmd(archiver, "create", "archive3", archiver.input_path) output = cmd(archiver, "prune", "--list", "--keep-last=1", "--match-archives=sh:archive*") assert "archive1" not in output # @PROT archives are completely ignored. - assert re.search(r"Keeping archive \(rule: secondly #1\):\s+archive3", output) + assert re.search(r"Keeping archive \(rule: last #1\):\s+archive3", output) assert re.search(r"Pruning archive \(.*?\):\s+archive2", output) output = cmd(archiver, "repo-list") assert "archive1" in output # @PROT protected archive1 from deletion @@ -283,38 +283,6 @@ def __repr__(self): local_tz = datetime.now(tz=timezone.utc).astimezone(tz=None).tzinfo -def test_prune_within(): - def subset(lst, indices): - return {lst[i] for i in indices} - - def dotest(test_archives, within, indices): - for ta in test_archives, reversed(test_archives): - kept_because = {} - keep = prune_within(ta, interval(within), kept_because) - assert set(keep) == subset(test_archives, indices) - assert all("within" == kept_because[a.id][0] for a in keep) - - # 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours - test_offsets = [60, 90 * 60, 150 * 60, 210 * 60, 25 * 60 * 60, 49 * 60 * 60] - now = datetime.now(timezone.utc) - test_dates = [now - timedelta(seconds=s) for s in test_offsets] - test_archives = [MockArchive(date, i) for i, date in enumerate(test_dates)] - - dotest(test_archives, "15S", []) - dotest(test_archives, "2M", [0]) - dotest(test_archives, "1H", [0]) - dotest(test_archives, "2H", [0, 1]) - dotest(test_archives, "3H", [0, 1, 2]) - dotest(test_archives, "24H", [0, 1, 2, 3]) - dotest(test_archives, "26H", [0, 1, 2, 3, 4]) - dotest(test_archives, "2d", [0, 1, 2, 3, 4]) - dotest(test_archives, "50H", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "3d", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "1w", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "1m", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "1y", [0, 1, 2, 3, 4, 5]) - - @pytest.mark.parametrize( "rule,num_to_keep,expected_ids", [ @@ -353,7 +321,7 @@ def subset(lst, ids): MockArchive(datetime(2017, 10, 1, 10, 0, 5, tzinfo=local_tz), 13), ] kept_because = {} - keep = prune_split(archives, rule, num_to_keep, kept_because) + keep = prune_split(archives, rule, num_to_keep, None, kept_because) assert set(keep) == subset(archives, expected_ids) for item in keep: @@ -400,3 +368,224 @@ def test_prune_split_no_archives(): assert keep == [] assert kept_because == {} + +def test_prune_keep_last_same_second(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + cmd(archiver, "create", "test1", src_dir) + cmd(archiver, "create", "test2", src_dir) + output = cmd(archiver, "prune", "--list", "--dry-run", "--keep-last=2") + # Both archives are kept even though they have the same timestamp to the second. Would previously have failed with + # old behavior of --keep-last. Archives sorted on seconds, order is undefined. + assert re.search(r"Keeping archive \(rule: last #\d\):\s+test1", output) + assert re.search(r"Keeping archive \(rule: last #\d\):\s+test2", output) + + +@freeze_time(datetime(2023, 12, 31, 23, 59, 59, tzinfo=None)) # Non-leap year ending on a Sunday +def test_prune_keep_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 12, 31, 23, 59, 59) + _create_archive_ts(archiver, "test-2", 2023, 12, 31, 23, 59, 59) + _create_archive_ts(archiver, "test-3", 2023, 12, 31, 23, 59, 58) + for keep_arg in ["--keep=2", "--keep=1S"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + + +@freeze_time(datetime(2023, 12, 31, 23, 59, 59, tzinfo=None)) +def test_prune_keep_int_or_interval_zero(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test", 2023, 12, 31, 23, 59, 59) + for keep_arg in ["--keep=0", "--keep=0S"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Would prune:\s+test", output) + + +@freeze_time(datetime(2023, 12, 31, 23, 59, 59, tzinfo=None)) +def test_prune_keep_secondly_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 12, 31, 23, 59, 58) + _create_archive_ts(archiver, "test-2", 2023, 12, 31, 23, 59, 57, 1) + _create_archive_ts(archiver, "test-3", 2023, 12, 31, 23, 59, 57) + _create_archive_ts(archiver, "test-4", 2023, 12, 31, 23, 59, 56, 999999) + for keep_arg in ["--keep-secondly=2", "--keep-secondly=2S"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: secondly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: secondly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Would prune:\s+test-4", output) + + +@freeze_time(datetime(2023, 12, 31, 23, 59, 0, tzinfo=None)) +def test_prune_keep_minutely_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 12, 31, 23, 58) + _create_archive_ts(archiver, "test-2", 2023, 12, 31, 23, 57, 1) + _create_archive_ts(archiver, "test-3", 2023, 12, 31, 23, 57) + _create_archive_ts(archiver, "test-4", 2023, 12, 31, 23, 56, 0, 1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2023, 12, 31, 23, 56) + for keep_arg in ["--keep-minutely=3", "--keep-minutely=3M"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: minutely #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: minutely #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: minutely #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@freeze_time(datetime(2023, 12, 31, 23, 0, 0, tzinfo=None)) +def test_prune_keep_hourly_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 12, 31, 22) + _create_archive_ts(archiver, "test-2", 2023, 12, 31, 21, us=1) + _create_archive_ts(archiver, "test-3", 2023, 12, 31, 21) + _create_archive_ts(archiver, "test-4", 2023, 12, 31, 20, us=1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2023, 12, 31, 20) + for keep_arg in ["--keep-hourly=3", "--keep-hourly=3H"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: hourly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: hourly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: hourly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_daily_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 12, 30) + _create_archive_ts(archiver, "test-2", 2023, 12, 29, S=1) + _create_archive_ts(archiver, "test-3", 2023, 12, 29) + _create_archive_ts(archiver, "test-4", 2023, 12, 28, us=1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2023, 12, 28) + for keep_arg in ["--keep-daily=3", "--keep-daily=3d"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: daily #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: daily #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_weekly_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 12, 24) + _create_archive_ts(archiver, "test-2", 2023, 12, 17, us=1) + _create_archive_ts(archiver, "test-3", 2023, 12, 17) + _create_archive_ts(archiver, "test-4", 2023, 12, 10, us=1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2023, 12, 10) + for keep_arg in ["--keep-weekly=3", "--keep-weekly=3w"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: weekly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: weekly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: weekly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_monthly_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 11, 30) + _create_archive_ts(archiver, "test-2", 2023, 10, 30, us=1) # Month defined as 31 days, so not Oct 31st + _create_archive_ts(archiver, "test-3", 2023, 10, 30) + _create_archive_ts(archiver, "test-4", 2023, 9, 29, us=1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2023, 9, 29) + for keep_arg in ["--keep-monthly=3", "--keep-monthly=3m"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: monthly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: monthly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: monthly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +# 2023-12-31 is Sunday, week 52. Makes these week calculations a little easier. +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_13weekly_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 10, 1) + _create_archive_ts(archiver, "test-2", 2023, 7, 2, us=1) + _create_archive_ts(archiver, "test-3", 2023, 7, 2) + _create_archive_ts(archiver, "test-4", 2023, 4, 2, us=1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2023, 4, 2) + for keep_arg in ["--keep-13weekly=3", "--keep-13weekly=39w"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_3monthly_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 9, 30) + _create_archive_ts(archiver, "test-2", 2023, 6, 30, us=1) + _create_archive_ts(archiver, "test-3", 2023, 6, 30) + _create_archive_ts(archiver, "test-4", 2023, 3, 31, us=1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2023, 3, 31) + for keep_arg in ["--keep-3monthly=3", f"--keep-3monthly={(datetime.now()-datetime(2023, 3, 31)).days}d"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_yearly_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2022, 12, 31) + _create_archive_ts(archiver, "test-2", 2021, 12, 31, us=1) + _create_archive_ts(archiver, "test-3", 2021, 12, 31) + _create_archive_ts(archiver, "test-4", 2020, 12, 31, us=1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2020, 12, 31) + for keep_arg in ["--keep-yearly=3", "--keep-yearly=3y"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: yearly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: yearly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: yearly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +def test_prune_no_args(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + with pytest.raises(CommandError) as error: + cmd(archiver, "prune") + output = str(error.value) + assert re.search(r"At least one of the .* settings must be specified.", output) + assert re.search(r"keep(?!-)", output) + flags = [ + "last", + "within", + "secondly", + "minutely", + "hourly", + "daily", + "weekly", + "monthly", + "yearly", + "13weekly", + "3monthly", + ] + for flag in flags: + assert f"keep-{flag}" in output From dfcee1dd1c1ac40c645071f7249916c471accd2e Mon Sep 17 00:00:00 2001 From: Hugo Wallenburg Date: Tue, 3 Jun 2025 23:11:55 +0200 Subject: [PATCH 3/3] Removes unnecessarily complicated local timezone in test Default with tzinfo=None is local timezone anyway, no need to set it manually. --- src/borg/testsuite/archiver/prune_cmd_test.py | 55 ++++++++----------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index c4d63e9132..ae9696d3cf 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -265,24 +265,14 @@ def test_prune_ignore_protected(archivers, request): class MockArchive: def __init__(self, ts, id): - self.ts = ts + # Real archive objects have UTC zoned timestamps + self.ts = ts.replace(tzinfo=timezone.utc) self.id = id def __repr__(self): return f"{self.id}: {self.ts.isoformat()}" -# This is the local timezone of the system running the tests. -# We need this e.g. to construct archive timestamps for the prune tests, -# because borg prune operates in the local timezone (it first converts the -# archive timestamp to the local timezone). So, if we want the y/m/d/h/m/s -# values which prune uses to be exactly the ones we give [and NOT shift them -# by tzoffset], we need to give the timestamps in the same local timezone. -# Please note that the timestamps in a real borg archive or manifest are -# stored in UTC timezone. -local_tz = datetime.now(tz=timezone.utc).astimezone(tz=None).tzinfo - - @pytest.mark.parametrize( "rule,num_to_keep,expected_ids", [ @@ -302,23 +292,23 @@ def subset(lst, ids): archives = [ # years apart - MockArchive(datetime(2015, 1, 1, 10, 0, 0, tzinfo=local_tz), 1), - MockArchive(datetime(2016, 1, 1, 10, 0, 0, tzinfo=local_tz), 2), - MockArchive(datetime(2017, 1, 1, 10, 0, 0, tzinfo=local_tz), 3), + MockArchive(datetime(2015, 1, 1, 10, 0, 0), 1), + MockArchive(datetime(2016, 1, 1, 10, 0, 0), 2), + MockArchive(datetime(2017, 1, 1, 10, 0, 0), 3), # months apart - MockArchive(datetime(2017, 2, 1, 10, 0, 0, tzinfo=local_tz), 4), - MockArchive(datetime(2017, 3, 1, 10, 0, 0, tzinfo=local_tz), 5), + MockArchive(datetime(2017, 2, 1, 10, 0, 0), 4), + MockArchive(datetime(2017, 3, 1, 10, 0, 0), 5), # days apart - MockArchive(datetime(2017, 3, 2, 10, 0, 0, tzinfo=local_tz), 6), - MockArchive(datetime(2017, 3, 3, 10, 0, 0, tzinfo=local_tz), 7), - MockArchive(datetime(2017, 3, 4, 10, 0, 0, tzinfo=local_tz), 8), + MockArchive(datetime(2017, 3, 2, 10, 0, 0), 6), + MockArchive(datetime(2017, 3, 3, 10, 0, 0), 7), + MockArchive(datetime(2017, 3, 4, 10, 0, 0), 8), # minutes apart - MockArchive(datetime(2017, 10, 1, 9, 45, 0, tzinfo=local_tz), 9), - MockArchive(datetime(2017, 10, 1, 9, 55, 0, tzinfo=local_tz), 10), + MockArchive(datetime(2017, 10, 1, 9, 45, 0), 9), + MockArchive(datetime(2017, 10, 1, 9, 55, 0), 10), # seconds apart - MockArchive(datetime(2017, 10, 1, 10, 0, 1, tzinfo=local_tz), 11), - MockArchive(datetime(2017, 10, 1, 10, 0, 3, tzinfo=local_tz), 12), - MockArchive(datetime(2017, 10, 1, 10, 0, 5, tzinfo=local_tz), 13), + MockArchive(datetime(2017, 10, 1, 10, 0, 1), 11), + MockArchive(datetime(2017, 10, 1, 10, 0, 3), 12), + MockArchive(datetime(2017, 10, 1, 10, 0, 5), 13), ] kept_because = {} keep = prune_split(archives, rule, num_to_keep, None, kept_because) @@ -334,17 +324,17 @@ def subset(lst, ids): archives = [ # oldest backup, but not last in its year - MockArchive(datetime(2018, 1, 1, 10, 0, 0, tzinfo=local_tz), 1), + MockArchive(datetime(2018, 1, 1, 10, 0, 0), 1), # an interim backup - MockArchive(datetime(2018, 12, 30, 10, 0, 0, tzinfo=local_tz), 2), + MockArchive(datetime(2018, 12, 30, 10, 0, 0), 2), # year-end backups - MockArchive(datetime(2018, 12, 31, 10, 0, 0, tzinfo=local_tz), 3), - MockArchive(datetime(2019, 12, 31, 10, 0, 0, tzinfo=local_tz), 4), + MockArchive(datetime(2018, 12, 31, 10, 0, 0), 3), + MockArchive(datetime(2019, 12, 31, 10, 0, 0), 4), ] # Keep oldest when retention target can't otherwise be met kept_because = {} - keep = prune_split(archives, "yearly", 3, kept_because) + keep = prune_split(archives, "yearly", 3, None, kept_because) assert set(keep) == subset(archives, [1, 3, 4]) assert kept_because[1][0] == "yearly[oldest]" @@ -353,7 +343,7 @@ def subset(lst, ids): # Otherwise, prune it kept_because = {} - keep = prune_split(archives, "yearly", 2, kept_because) + keep = prune_split(archives, "yearly", 2, None, kept_because) assert set(keep) == subset(archives, [3, 4]) assert kept_because[3][0] == "yearly" @@ -364,11 +354,12 @@ def test_prune_split_no_archives(): archives = [] kept_because = {} - keep = prune_split(archives, "yearly", 3, kept_because) + keep = prune_split(archives, "yearly", 3, None, kept_because) assert keep == [] assert kept_because == {} + def test_prune_keep_last_same_second(archivers, request): archiver = request.getfixturevalue(archivers) cmd(archiver, "repo-create", RK_ENCRYPTION)