Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
282d70c
Add preliminary support for ISO-8601 timestamps (no timezones at the …
c-herz Apr 19, 2025
db46cdb
reformatted to pass style checks
degabe Apr 21, 2025
4363bf7
Applied recommended changes from ThomasWald, still working as intende…
degabe Apr 21, 2025
69e8608
fix bug with local timezone attachment not correctly respecting DST
c-herz Apr 21, 2025
5c20d8f
Reformatted for consistency with code style guide
c-herz Apr 22, 2025
6f1bcd4
Added basic test suite for ISO-8601 and Unix timestamp matching
c-herz Apr 22, 2025
4060e94
Merge remote-tracking branch 'origin/dateFilterImprov' into datefilter
c-herz Apr 22, 2025
e9a8c5f
add day-precision filter test for `date:YYYY-MM-DD`
c-herz Apr 22, 2025
470758d
support timezone suffixes in date: patterns and add tests
c-herz Apr 22, 2025
df2d33d
Wildcard working. Done some manual testing, will focus on more rigoro…
degabe Apr 23, 2025
870bf7a
add tests for wildcard support in date: archive match patterns; refor…
c-herz Apr 25, 2025
461df75
fix bug with wildcards in date: match patterns not respecting supplie…
c-herz Apr 25, 2025
9553c35
remove stray testfile.txt
c-herz Apr 25, 2025
409733b
refactor date: pattern parser to use structured bottom-up regex, per …
c-herz Apr 25, 2025
de03806
refactor date: pattern parsing to use helper functions for datetime c…
c-herz Apr 25, 2025
796981c
add explicit time interval matching in date: archive match pattern (w…
c-herz Apr 25, 2025
7b8a194
add duration-based interval support for date: archive match patterns;…
c-herz Apr 25, 2025
8e3f1e4
add support for keyword-based date intervals in archive date: matchin…
c-herz Apr 25, 2025
904853d
refactor time.py: rename internal functions for clarity and consistency
c-herz Apr 25, 2025
6032c4a
add support for ISO week-date and ordinal-date matching in date: arch…
c-herz Apr 25, 2025
9cb5e5f
enhance compile_date_pattern docstring: clarify TIMESTAMP and DURATIO…
c-herz Apr 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions src/borg/helpers/time.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import re
from datetime import datetime, timezone, timedelta
from zoneinfo import ZoneInfo


def parse_timestamp(timestamp, tzinfo=timezone.utc):
Expand Down Expand Up @@ -185,3 +186,142 @@ def isoformat(self):
def archive_ts_now():
"""return tz-aware datetime obj for current time for usage as archive timestamp"""
return datetime.now(timezone.utc) # utc time / utc timezone


class DatePatternError(ValueError):
"""Raised when a date: archive pattern cannot be parsed."""


def exact_predicate(dt: datetime):
"""Return predicate matching archives whose ts equals dt (UTC)."""
dt_utc = dt.astimezone(timezone.utc)
return lambda ts: ts.astimezone(timezone.utc) == dt_utc


def interval_predicate(start: datetime, end: datetime):
start_utc = start.astimezone(timezone.utc)
end_utc = end.astimezone(timezone.utc)
return lambda ts: start_utc <= ts.astimezone(timezone.utc) < end_utc


def parse_tz(tzstr: str):
"""
Parses a UTC offset like +08:00 or [Region/Name] into a timezone object.
"""
if not tzstr:
return None
if tzstr == "Z":
return timezone.utc
if tzstr[0] in "+-":
sign = 1 if tzstr[0] == "+" else -1
try:
hh, mm = map(int, tzstr[1:].split(":"))
if not (0 <= mm < 60):
raise ValueError
except Exception:
raise DatePatternError("invalid UTC offset format")
# we do it this way so that, for example, -8:30 is
# -8 hours and -30 minutes, not -8 hours and +30 minutes
total_minutes = sign * (hh * 60 + mm)
# enforce ISO-8601 bounds (-12:00 to +14:00)
if not (-12 * 60 <= total_minutes <= 14 * 60):
raise DatePatternError("UTC offset outside ISO-8601 bounds")
return timezone(timedelta(minutes=total_minutes))
# [Region/Name]
try:
return ZoneInfo(tzstr.strip("[]"))
except Exception:
raise DatePatternError("invalid timezone format")


def compile_date_pattern(expr: str):
"""
Accepts any of:
YYYY
YYYY-MM
YYYY-MM-DD
YYYY-MM-DDTHH
YYYY-MM-DDTHH:MM
YYYY-MM-DDTHH:MM:SS
Unix epoch (@123456789)
…with an optional trailing timezone (Z or ±HH:MM or [Region/City]).
Returns a predicate that is True for timestamps in that interval.
"""
expr = expr.strip()
pattern = r"""
^
(?:
(?P<fraction>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+) # full timestamp with fraction
| (?P<second> \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}) # no fraction
| (?P<minute> \d{4}-\d{2}-\d{2}T\d{2}:\d{2}) # minute precision
| (?P<hour> \d{4}-\d{2}-\d{2}T\d{2}) # hour precision
| (?P<day> \d{4}-\d{2}-\d{2}) # day precision
| (?P<month> \d{4}-\d{2}) # month precision
| (?P<year> \d{4}) # year precision
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting approach.

What I meant was rather something like (simplified to cover only YYYY and YYYY-MM here as an example):

(?P<year>\d{4})
(-
 (?P<month>\d{2})
)?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes this would've been much simpler. Will try to work on refactoring it to this approach tomorrow.

| @(?P<epoch>\d+) # unix epoch
)
(?P<tz>Z|[+\-]\d{2}:\d{2}|\[[^\]]+\])? # optional timezone or [Region/City]
$
"""
m = re.match(pattern, expr, re.VERBOSE)
if not m:
raise DatePatternError(f"unrecognised date: {expr!r}")

gd = m.groupdict()
tz = parse_tz(gd.get("tz")) # will be None if tzstr is empty -> local timezone

# unix epoch and user-specified timezone are mutually exclusive
if gd["epoch"] and tz is not None:
raise DatePatternError("unix‐epoch patterns (@123456789) are UTC and must not include a timezone suffix")

# 1) fractional‐second exact match
if gd["fraction"]:
ts = gd["fraction"]
dt = parse_timestamp(ts, tzinfo=tz)
return exact_predicate(dt)

# 2) second‐precision interval
if gd["second"]:
ts = gd["second"]
start = parse_timestamp(ts, tzinfo=tz)
# within one second
return interval_predicate(start, start + timedelta(seconds=1))

# 3) minute‐precision interval
if gd["minute"]:
ts = gd["minute"] + ":00"
start = parse_timestamp(ts, tzinfo=tz)
return interval_predicate(start, start + timedelta(minutes=1))

# 4) hour‐precision interval
if gd["hour"]:
ts = gd["hour"] + ":00:00"
start = parse_timestamp(ts, tzinfo=tz)
return interval_predicate(start, start + timedelta(hours=1))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe just use 1 regex with group names ((?P<name>...) that covers 1) .. 4) and also YYYY, YYYY-MM, YYYY-MM-DD cases from below.

After a single m = re.match(regex, expr), you can check m.groupdict() in the right order (fraction, S, M, H, d, m, y) to determine which case you have.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe use re.VERBOSE so you can have a multi-line, commented regex for this.


# 5a) day‐precision interval
if gd["day"]:
ts = gd["day"] + "T00:00:00"
start = parse_timestamp(ts, tzinfo=tz)
return interval_predicate(start, start + timedelta(days=1))

# 5b) month‐precision interval
if gd["month"]:
ts = gd["month"] + "-01T00:00:00"
start = parse_timestamp(ts, tzinfo=tz)
return interval_predicate(start, offset_n_months(start, 1))

# 5c) year‐precision interval
if gd["year"]:
ts = gd["year"] + "-01-01T00:00:00"
start = parse_timestamp(ts, tzinfo=tz)
return interval_predicate(start, offset_n_months(start, 12))

# 6) unix‐epoch exact‐second match
if gd["epoch"]:
epoch = int(gd["epoch"])
start = datetime.fromtimestamp(epoch, tz=timezone.utc)
return interval_predicate(start, start + timedelta(seconds=1))

# should never get here
raise DatePatternError(f"unrecognised date: {expr!r}")
15 changes: 14 additions & 1 deletion src/borg/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@
from .constants import * # NOQA
from .helpers.datastruct import StableDict
from .helpers.parseformat import bin_to_hex, hex_to_bin
from .helpers.time import parse_timestamp, calculate_relative_offset, archive_ts_now
from .helpers.time import (
parse_timestamp,
calculate_relative_offset,
archive_ts_now,
compile_date_pattern,
DatePatternError,
)
from .helpers.errors import Error, CommandError
from .item import ArchiveItem
from .patterns import get_regex_from_pattern
Expand Down Expand Up @@ -198,6 +204,13 @@ def _matching_info_tuples(self, match_patterns, match_end, *, deleted=False):
elif match.startswith("host:"):
wanted_host = match.removeprefix("host:")
archive_infos = [x for x in archive_infos if x.host == wanted_host]
elif match.startswith("date:"):
wanted_date = match.removeprefix("date:")
try:
pred = compile_date_pattern(wanted_date)
except DatePatternError as e:
raise CommandError(f"Invalid date pattern: {match} ({e})")
archive_infos = [x for x in archive_infos if pred(x.ts)]
else: # do a match on the name
match = match.removeprefix("name:") # accept optional name: prefix
regex = get_regex_from_pattern(match)
Expand Down
Loading
Loading