Skip to content

Commit 7d70970

Browse files
authored
Merge branch 'main' into lazebnyi/add-json-loads-to-jwt-authentication-params
2 parents 76908e3 + a6d55be commit 7d70970

File tree

2 files changed

+117
-116
lines changed

2 files changed

+117
-116
lines changed

airbyte_cdk/utils/datetime_helpers.py

Lines changed: 48 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@
7676
assert ab_datetime_try_parse("2023-03-14T15:09:26Z") # Basic UTC format
7777
assert ab_datetime_try_parse("2023-03-14T15:09:26-04:00") # With timezone offset
7878
assert ab_datetime_try_parse("2023-03-14T15:09:26+00:00") # With explicit UTC offset
79-
assert not ab_datetime_try_parse("2023-03-14 15:09:26Z") # Invalid: missing T delimiter
80-
assert not ab_datetime_try_parse("foo") # Invalid: not a datetime
79+
assert ab_datetime_try_parse("2023-03-14 15:09:26Z") # Missing T delimiter but still parsable
80+
assert not ab_datetime_try_parse("foo") # Invalid: not parsable, returns `None`
8181
```
8282
"""
8383

@@ -138,6 +138,14 @@ def from_datetime(cls, dt: datetime) -> "AirbyteDateTime":
138138
dt.tzinfo or timezone.utc,
139139
)
140140

141+
def to_datetime(self) -> datetime:
142+
"""Converts this AirbyteDateTime to a standard datetime object.
143+
144+
Today, this just returns `self` because AirbyteDateTime is a subclass of `datetime`.
145+
In the future, we may modify our internal representation to use a different base class.
146+
"""
147+
return self
148+
141149
def __str__(self) -> str:
142150
"""Returns the datetime in ISO8601/RFC3339 format with 'T' delimiter.
143151
@@ -148,12 +156,7 @@ def __str__(self) -> str:
148156
str: ISO8601/RFC3339 formatted string.
149157
"""
150158
aware_self = self if self.tzinfo else self.replace(tzinfo=timezone.utc)
151-
base = self.strftime("%Y-%m-%dT%H:%M:%S")
152-
if self.microsecond:
153-
base = f"{base}.{self.microsecond:06d}"
154-
# Format timezone as ±HH:MM
155-
offset = aware_self.strftime("%z")
156-
return f"{base}{offset[:3]}:{offset[3:]}"
159+
return aware_self.isoformat(sep="T", timespec="auto")
157160

158161
def __repr__(self) -> str:
159162
"""Returns the same string representation as __str__ for consistency.
@@ -358,15 +361,15 @@ def ab_datetime_now() -> AirbyteDateTime:
358361
def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
359362
"""Parses a datetime string or timestamp into an AirbyteDateTime with timezone awareness.
360363
361-
Previously named: parse()
364+
This implementation is as flexible as possible to handle various datetime formats.
365+
Always returns a timezone-aware datetime (defaults to UTC if no timezone specified).
362366
363367
Handles:
364-
- ISO8601/RFC3339 format strings (with 'T' delimiter)
368+
- ISO8601/RFC3339 format strings (with ' ' or 'T' delimiter)
365369
- Unix timestamps (as integers or strings)
366370
- Date-only strings (YYYY-MM-DD)
367371
- Timezone-aware formats (+00:00 for UTC, or ±HH:MM offset)
368-
369-
Always returns a timezone-aware datetime (defaults to UTC if no timezone specified).
372+
- Anything that can be parsed by `dateutil.parser.parse()`
370373
371374
Args:
372375
dt_str: A datetime string in ISO8601/RFC3339 format, Unix timestamp (int/str),
@@ -416,15 +419,16 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
416419
except (ValueError, TypeError):
417420
raise ValueError(f"Invalid date format: {dt_str}")
418421

419-
# Validate datetime format
420-
if "/" in dt_str or " " in dt_str or "GMT" in dt_str:
421-
raise ValueError(f"Could not parse datetime string: {dt_str}")
422+
# Reject time-only strings without date
423+
if ":" in dt_str and dt_str.count("-") < 2 and dt_str.count("/") < 2:
424+
raise ValueError(f"Missing date part in datetime string: {dt_str}")
422425

423426
# Try parsing with dateutil for timezone handling
424427
try:
425428
parsed = parser.parse(dt_str)
426429
if parsed.tzinfo is None:
427430
parsed = parsed.replace(tzinfo=timezone.utc)
431+
428432
return AirbyteDateTime.from_datetime(parsed)
429433
except (ValueError, TypeError):
430434
raise ValueError(f"Could not parse datetime string: {dt_str}")
@@ -438,7 +442,29 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
438442
raise ValueError(f"Could not parse datetime string: {dt_str}")
439443

440444

441-
def ab_datetime_format(dt: Union[datetime, AirbyteDateTime]) -> str:
445+
def ab_datetime_try_parse(dt_str: str) -> AirbyteDateTime | None:
446+
"""Try to parse the input as a datetime, failing gracefully instead of raising an exception.
447+
448+
This is a thin wrapper around `ab_datetime_parse()` that catches parsing errors and
449+
returns `None` instead of raising an exception.
450+
The implementation is as flexible as possible to handle various datetime formats.
451+
Always returns a timezone-aware datetime (defaults to `UTC` if no timezone specified).
452+
453+
Example:
454+
>>> ab_datetime_try_parse("2023-03-14T15:09:26Z") # Returns AirbyteDateTime
455+
>>> ab_datetime_try_parse("2023-03-14 15:09:26Z") # Missing 'T' delimiter still parsable
456+
>>> ab_datetime_try_parse("2023-03-14") # Returns midnight UTC time
457+
"""
458+
try:
459+
return ab_datetime_parse(dt_str)
460+
except (ValueError, TypeError):
461+
return None
462+
463+
464+
def ab_datetime_format(
465+
dt: Union[datetime, AirbyteDateTime],
466+
format: str | None = None,
467+
) -> str:
442468
"""Formats a datetime object as an ISO8601/RFC3339 string with 'T' delimiter and timezone.
443469
444470
Previously named: format()
@@ -449,6 +475,8 @@ def ab_datetime_format(dt: Union[datetime, AirbyteDateTime]) -> str:
449475
450476
Args:
451477
dt: Any datetime object to format.
478+
format: Optional format string. If provided, calls `strftime()` with this format.
479+
Otherwise, uses the default ISO8601/RFC3339 format, adapted for available precision.
452480
453481
Returns:
454482
str: ISO8601/RFC3339 formatted datetime string.
@@ -464,54 +492,8 @@ def ab_datetime_format(dt: Union[datetime, AirbyteDateTime]) -> str:
464492
if dt.tzinfo is None:
465493
dt = dt.replace(tzinfo=timezone.utc)
466494

467-
# Format with consistent timezone representation
468-
base = dt.strftime("%Y-%m-%dT%H:%M:%S")
469-
if dt.microsecond:
470-
base = f"{base}.{dt.microsecond:06d}"
471-
offset = dt.strftime("%z")
472-
return f"{base}{offset[:3]}:{offset[3:]}"
473-
474-
475-
def ab_datetime_try_parse(dt_str: str) -> AirbyteDateTime | None:
476-
"""Try to parse the input string as an ISO8601/RFC3339 datetime, failing gracefully instead of raising an exception.
477-
478-
Requires strict ISO8601/RFC3339 format with:
479-
- 'T' delimiter between date and time components
480-
- Valid timezone (Z for UTC or ±HH:MM offset)
481-
- Complete datetime representation (date and time)
495+
if format:
496+
return dt.strftime(format)
482497

483-
Returns None for any non-compliant formats including:
484-
- Space-delimited datetimes
485-
- Date-only strings
486-
- Missing timezone
487-
- Invalid timezone format
488-
- Wrong date/time separators
489-
490-
Example:
491-
>>> ab_datetime_try_parse("2023-03-14T15:09:26Z") # Returns AirbyteDateTime
492-
>>> ab_datetime_try_parse("2023-03-14 15:09:26Z") # Returns None (invalid format)
493-
>>> ab_datetime_try_parse("2023-03-14") # Returns None (missing time and timezone)
494-
"""
495-
if not isinstance(dt_str, str):
496-
return None
497-
try:
498-
# Validate format before parsing
499-
if "T" not in dt_str:
500-
return None
501-
if not any(x in dt_str for x in ["Z", "+", "-"]):
502-
return None
503-
if "/" in dt_str or " " in dt_str or "GMT" in dt_str:
504-
return None
505-
506-
# Try parsing with dateutil
507-
parsed = parser.parse(dt_str)
508-
if parsed.tzinfo is None:
509-
return None
510-
511-
# Validate time components
512-
if not (0 <= parsed.hour <= 23 and 0 <= parsed.minute <= 59 and 0 <= parsed.second <= 59):
513-
return None
514-
515-
return AirbyteDateTime.from_datetime(parsed)
516-
except (ValueError, TypeError):
517-
return None
498+
# Format with consistent timezone representation and "T" delimiter
499+
return dt.isoformat(sep="T", timespec="auto")

unit_tests/utils/test_datetime_helpers.py

Lines changed: 69 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,63 @@ def test_now():
5151
@pytest.mark.parametrize(
5252
"input_value,expected_output,error_type,error_match",
5353
[
54+
# Valid formats - must have T delimiter and timezone
55+
("2023-03-14T15:09:26+00:00", "2023-03-14T15:09:26+00:00", None, None), # Basic UTC format
56+
(
57+
"2023-03-14T15:09:26.123+00:00",
58+
"2023-03-14T15:09:26.123000+00:00",
59+
None,
60+
None,
61+
), # With milliseconds
62+
(
63+
"2023-03-14T15:09:26.123456+00:00",
64+
"2023-03-14T15:09:26.123456+00:00",
65+
None,
66+
None,
67+
), # With microseconds
68+
(
69+
"2023-03-14T15:09:26-04:00",
70+
"2023-03-14T15:09:26-04:00",
71+
None,
72+
None,
73+
), # With timezone offset
74+
("2023-03-14T15:09:26Z", "2023-03-14T15:09:26+00:00", None, None), # With Z timezone
75+
(
76+
"2023-03-14T00:00:00+00:00",
77+
"2023-03-14T00:00:00+00:00",
78+
None,
79+
None,
80+
), # Full datetime with zero time
81+
(
82+
"2023-03-14T15:09:26GMT",
83+
"2023-03-14T15:09:26+00:00",
84+
None,
85+
None,
86+
), # Non-standard timezone name ok
87+
(
88+
"2023-03-14T15:09:26",
89+
"2023-03-14T15:09:26+00:00",
90+
None,
91+
None,
92+
), # Missing timezone, assume UTC
93+
(
94+
"2023-03-14 15:09:26",
95+
"2023-03-14T15:09:26+00:00",
96+
None,
97+
None,
98+
), # Missing T delimiter ok, assume UTC
99+
(
100+
"2023-03-14",
101+
"2023-03-14T00:00:00+00:00",
102+
None,
103+
None,
104+
), # Date only, missing time and timezone
105+
(
106+
"2023/03/14T15:09:26Z",
107+
"2023-03-14T15:09:26+00:00",
108+
None,
109+
None,
110+
), # Wrong date separator, ok
54111
# Valid formats
55112
("2023-03-14T15:09:26Z", "2023-03-14T15:09:26+00:00", None, None),
56113
("2023-03-14T15:09:26-04:00", "2023-03-14T15:09:26-04:00", None, None),
@@ -71,20 +128,10 @@ def test_now():
71128
("2023-12-32", None, ValueError, "Invalid date format: 2023-12-32"),
72129
("2023-00-14", None, ValueError, "Invalid date format: 2023-00-14"),
73130
("2023-12-00", None, ValueError, "Invalid date format: 2023-12-00"),
74-
# Invalid separators and formats
75-
("2023/12/14", None, ValueError, "Could not parse datetime string: 2023/12/14"),
76-
(
77-
"2023-03-14 15:09:26Z",
78-
None,
79-
ValueError,
80-
"Could not parse datetime string: 2023-03-14 15:09:26Z",
81-
),
82-
(
83-
"2023-03-14T15:09:26GMT",
84-
None,
85-
ValueError,
86-
"Could not parse datetime string: 2023-03-14T15:09:26GMT",
87-
),
131+
# Non-standard separators and formats, ok
132+
("2023/12/14", "2023-12-14T00:00:00+00:00", None, None),
133+
("2023-03-14 15:09:26Z", "2023-03-14T15:09:26+00:00", None, None),
134+
("2023-03-14T15:09:26GMT", "2023-03-14T15:09:26+00:00", None, None),
88135
# Invalid time components
89136
(
90137
"2023-03-14T25:09:26Z",
@@ -105,16 +152,24 @@ def test_now():
105152
"Could not parse datetime string: 2023-03-14T15:09:99Z",
106153
),
107154
],
155+
# ("invalid datetime", None), # Completely invalid
156+
# ("15:09:26Z", None), # Missing date component
157+
# ("2023-03-14T25:09:26Z", None), # Invalid hour
158+
# ("2023-03-14T15:99:26Z", None), # Invalid minute
159+
# ("2023-03-14T15:09:99Z", None), # Invalid second
160+
# ("2023-02-30T00:00:00Z", None), # Impossible date
108161
)
109162
def test_parse(input_value, expected_output, error_type, error_match):
110163
"""Test parsing various datetime string formats."""
111164
if error_type:
112165
with pytest.raises(error_type, match=error_match):
113166
ab_datetime_parse(input_value)
167+
assert not ab_datetime_try_parse(input_value)
114168
else:
115169
dt = ab_datetime_parse(input_value)
116170
assert isinstance(dt, AirbyteDateTime)
117171
assert str(dt) == expected_output
172+
assert ab_datetime_try_parse(input_value) and ab_datetime_try_parse(input_value) == dt
118173

119174

120175
@pytest.mark.parametrize(
@@ -194,42 +249,6 @@ def test_operator_overloading():
194249
_ = "invalid" - dt
195250

196251

197-
@pytest.mark.parametrize(
198-
"input_value,expected_output",
199-
[
200-
# Valid formats - must have T delimiter and timezone
201-
("2023-03-14T15:09:26+00:00", "2023-03-14T15:09:26+00:00"), # Basic UTC format
202-
("2023-03-14T15:09:26.123+00:00", "2023-03-14T15:09:26.123000+00:00"), # With milliseconds
203-
(
204-
"2023-03-14T15:09:26.123456+00:00",
205-
"2023-03-14T15:09:26.123456+00:00",
206-
), # With microseconds
207-
("2023-03-14T15:09:26-04:00", "2023-03-14T15:09:26-04:00"), # With timezone offset
208-
("2023-03-14T15:09:26Z", "2023-03-14T15:09:26+00:00"), # With Z timezone
209-
("2023-03-14T00:00:00+00:00", "2023-03-14T00:00:00+00:00"), # Full datetime with zero time
210-
# Invalid formats - reject anything without proper ISO8601/RFC3339 format
211-
("invalid datetime", None), # Completely invalid
212-
("2023-03-14 15:09:26", None), # Missing T delimiter
213-
("2023-03-14", None), # Date only, missing time and timezone
214-
("15:09:26Z", None), # Missing date component
215-
("2023-03-14T15:09:26", None), # Missing timezone
216-
("2023-03-14T15:09:26GMT", None), # Invalid timezone format
217-
("2023/03/14T15:09:26Z", None), # Wrong date separator
218-
("2023-03-14T25:09:26Z", None), # Invalid hour
219-
("2023-03-14T15:99:26Z", None), # Invalid minute
220-
("2023-03-14T15:09:99Z", None), # Invalid second
221-
],
222-
)
223-
def test_ab_datetime_try_parse(input_value, expected_output):
224-
"""Test datetime string format validation."""
225-
result = ab_datetime_try_parse(input_value)
226-
if expected_output is None:
227-
assert result is None
228-
else:
229-
assert isinstance(result, AirbyteDateTime)
230-
assert str(result) == expected_output
231-
232-
233252
def test_epoch_millis():
234253
"""Test Unix epoch millisecond timestamp conversion methods."""
235254
# Test to_epoch_millis()

0 commit comments

Comments
 (0)