Skip to content

Commit

Permalink
Merge branch 'main' into lazebnyi/add-json-loads-to-jwt-authenticatio…
Browse files Browse the repository at this point in the history
…n-params
  • Loading branch information
lazebnyi authored Jan 30, 2025
2 parents 76908e3 + a6d55be commit 7d70970
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 116 deletions.
114 changes: 48 additions & 66 deletions airbyte_cdk/utils/datetime_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@
assert ab_datetime_try_parse("2023-03-14T15:09:26Z") # Basic UTC format
assert ab_datetime_try_parse("2023-03-14T15:09:26-04:00") # With timezone offset
assert ab_datetime_try_parse("2023-03-14T15:09:26+00:00") # With explicit UTC offset
assert not ab_datetime_try_parse("2023-03-14 15:09:26Z") # Invalid: missing T delimiter
assert not ab_datetime_try_parse("foo") # Invalid: not a datetime
assert ab_datetime_try_parse("2023-03-14 15:09:26Z") # Missing T delimiter but still parsable
assert not ab_datetime_try_parse("foo") # Invalid: not parsable, returns `None`
```
"""

Expand Down Expand Up @@ -138,6 +138,14 @@ def from_datetime(cls, dt: datetime) -> "AirbyteDateTime":
dt.tzinfo or timezone.utc,
)

def to_datetime(self) -> datetime:
"""Converts this AirbyteDateTime to a standard datetime object.
Today, this just returns `self` because AirbyteDateTime is a subclass of `datetime`.
In the future, we may modify our internal representation to use a different base class.
"""
return self

def __str__(self) -> str:
"""Returns the datetime in ISO8601/RFC3339 format with 'T' delimiter.
Expand All @@ -148,12 +156,7 @@ def __str__(self) -> str:
str: ISO8601/RFC3339 formatted string.
"""
aware_self = self if self.tzinfo else self.replace(tzinfo=timezone.utc)
base = self.strftime("%Y-%m-%dT%H:%M:%S")
if self.microsecond:
base = f"{base}.{self.microsecond:06d}"
# Format timezone as ±HH:MM
offset = aware_self.strftime("%z")
return f"{base}{offset[:3]}:{offset[3:]}"
return aware_self.isoformat(sep="T", timespec="auto")

def __repr__(self) -> str:
"""Returns the same string representation as __str__ for consistency.
Expand Down Expand Up @@ -358,15 +361,15 @@ def ab_datetime_now() -> AirbyteDateTime:
def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
"""Parses a datetime string or timestamp into an AirbyteDateTime with timezone awareness.
Previously named: parse()
This implementation is as flexible as possible to handle various datetime formats.
Always returns a timezone-aware datetime (defaults to UTC if no timezone specified).
Handles:
- ISO8601/RFC3339 format strings (with 'T' delimiter)
- ISO8601/RFC3339 format strings (with ' ' or 'T' delimiter)
- Unix timestamps (as integers or strings)
- Date-only strings (YYYY-MM-DD)
- Timezone-aware formats (+00:00 for UTC, or ±HH:MM offset)
Always returns a timezone-aware datetime (defaults to UTC if no timezone specified).
- Anything that can be parsed by `dateutil.parser.parse()`
Args:
dt_str: A datetime string in ISO8601/RFC3339 format, Unix timestamp (int/str),
Expand Down Expand Up @@ -416,15 +419,16 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
except (ValueError, TypeError):
raise ValueError(f"Invalid date format: {dt_str}")

# Validate datetime format
if "/" in dt_str or " " in dt_str or "GMT" in dt_str:
raise ValueError(f"Could not parse datetime string: {dt_str}")
# Reject time-only strings without date
if ":" in dt_str and dt_str.count("-") < 2 and dt_str.count("/") < 2:
raise ValueError(f"Missing date part in datetime string: {dt_str}")

# Try parsing with dateutil for timezone handling
try:
parsed = parser.parse(dt_str)
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=timezone.utc)

return AirbyteDateTime.from_datetime(parsed)
except (ValueError, TypeError):
raise ValueError(f"Could not parse datetime string: {dt_str}")
Expand All @@ -438,7 +442,29 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
raise ValueError(f"Could not parse datetime string: {dt_str}")


def ab_datetime_format(dt: Union[datetime, AirbyteDateTime]) -> str:
def ab_datetime_try_parse(dt_str: str) -> AirbyteDateTime | None:
"""Try to parse the input as a datetime, failing gracefully instead of raising an exception.
This is a thin wrapper around `ab_datetime_parse()` that catches parsing errors and
returns `None` instead of raising an exception.
The implementation is as flexible as possible to handle various datetime formats.
Always returns a timezone-aware datetime (defaults to `UTC` if no timezone specified).
Example:
>>> ab_datetime_try_parse("2023-03-14T15:09:26Z") # Returns AirbyteDateTime
>>> ab_datetime_try_parse("2023-03-14 15:09:26Z") # Missing 'T' delimiter still parsable
>>> ab_datetime_try_parse("2023-03-14") # Returns midnight UTC time
"""
try:
return ab_datetime_parse(dt_str)
except (ValueError, TypeError):
return None


def ab_datetime_format(
dt: Union[datetime, AirbyteDateTime],
format: str | None = None,
) -> str:
"""Formats a datetime object as an ISO8601/RFC3339 string with 'T' delimiter and timezone.
Previously named: format()
Expand All @@ -449,6 +475,8 @@ def ab_datetime_format(dt: Union[datetime, AirbyteDateTime]) -> str:
Args:
dt: Any datetime object to format.
format: Optional format string. If provided, calls `strftime()` with this format.
Otherwise, uses the default ISO8601/RFC3339 format, adapted for available precision.
Returns:
str: ISO8601/RFC3339 formatted datetime string.
Expand All @@ -464,54 +492,8 @@ def ab_datetime_format(dt: Union[datetime, AirbyteDateTime]) -> str:
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)

# Format with consistent timezone representation
base = dt.strftime("%Y-%m-%dT%H:%M:%S")
if dt.microsecond:
base = f"{base}.{dt.microsecond:06d}"
offset = dt.strftime("%z")
return f"{base}{offset[:3]}:{offset[3:]}"


def ab_datetime_try_parse(dt_str: str) -> AirbyteDateTime | None:
"""Try to parse the input string as an ISO8601/RFC3339 datetime, failing gracefully instead of raising an exception.
Requires strict ISO8601/RFC3339 format with:
- 'T' delimiter between date and time components
- Valid timezone (Z for UTC or ±HH:MM offset)
- Complete datetime representation (date and time)
if format:
return dt.strftime(format)

Returns None for any non-compliant formats including:
- Space-delimited datetimes
- Date-only strings
- Missing timezone
- Invalid timezone format
- Wrong date/time separators
Example:
>>> ab_datetime_try_parse("2023-03-14T15:09:26Z") # Returns AirbyteDateTime
>>> ab_datetime_try_parse("2023-03-14 15:09:26Z") # Returns None (invalid format)
>>> ab_datetime_try_parse("2023-03-14") # Returns None (missing time and timezone)
"""
if not isinstance(dt_str, str):
return None
try:
# Validate format before parsing
if "T" not in dt_str:
return None
if not any(x in dt_str for x in ["Z", "+", "-"]):
return None
if "/" in dt_str or " " in dt_str or "GMT" in dt_str:
return None

# Try parsing with dateutil
parsed = parser.parse(dt_str)
if parsed.tzinfo is None:
return None

# Validate time components
if not (0 <= parsed.hour <= 23 and 0 <= parsed.minute <= 59 and 0 <= parsed.second <= 59):
return None

return AirbyteDateTime.from_datetime(parsed)
except (ValueError, TypeError):
return None
# Format with consistent timezone representation and "T" delimiter
return dt.isoformat(sep="T", timespec="auto")
119 changes: 69 additions & 50 deletions unit_tests/utils/test_datetime_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,63 @@ def test_now():
@pytest.mark.parametrize(
"input_value,expected_output,error_type,error_match",
[
# Valid formats - must have T delimiter and timezone
("2023-03-14T15:09:26+00:00", "2023-03-14T15:09:26+00:00", None, None), # Basic UTC format
(
"2023-03-14T15:09:26.123+00:00",
"2023-03-14T15:09:26.123000+00:00",
None,
None,
), # With milliseconds
(
"2023-03-14T15:09:26.123456+00:00",
"2023-03-14T15:09:26.123456+00:00",
None,
None,
), # With microseconds
(
"2023-03-14T15:09:26-04:00",
"2023-03-14T15:09:26-04:00",
None,
None,
), # With timezone offset
("2023-03-14T15:09:26Z", "2023-03-14T15:09:26+00:00", None, None), # With Z timezone
(
"2023-03-14T00:00:00+00:00",
"2023-03-14T00:00:00+00:00",
None,
None,
), # Full datetime with zero time
(
"2023-03-14T15:09:26GMT",
"2023-03-14T15:09:26+00:00",
None,
None,
), # Non-standard timezone name ok
(
"2023-03-14T15:09:26",
"2023-03-14T15:09:26+00:00",
None,
None,
), # Missing timezone, assume UTC
(
"2023-03-14 15:09:26",
"2023-03-14T15:09:26+00:00",
None,
None,
), # Missing T delimiter ok, assume UTC
(
"2023-03-14",
"2023-03-14T00:00:00+00:00",
None,
None,
), # Date only, missing time and timezone
(
"2023/03/14T15:09:26Z",
"2023-03-14T15:09:26+00:00",
None,
None,
), # Wrong date separator, ok
# Valid formats
("2023-03-14T15:09:26Z", "2023-03-14T15:09:26+00:00", None, None),
("2023-03-14T15:09:26-04:00", "2023-03-14T15:09:26-04:00", None, None),
Expand All @@ -71,20 +128,10 @@ def test_now():
("2023-12-32", None, ValueError, "Invalid date format: 2023-12-32"),
("2023-00-14", None, ValueError, "Invalid date format: 2023-00-14"),
("2023-12-00", None, ValueError, "Invalid date format: 2023-12-00"),
# Invalid separators and formats
("2023/12/14", None, ValueError, "Could not parse datetime string: 2023/12/14"),
(
"2023-03-14 15:09:26Z",
None,
ValueError,
"Could not parse datetime string: 2023-03-14 15:09:26Z",
),
(
"2023-03-14T15:09:26GMT",
None,
ValueError,
"Could not parse datetime string: 2023-03-14T15:09:26GMT",
),
# Non-standard separators and formats, ok
("2023/12/14", "2023-12-14T00:00:00+00:00", None, None),
("2023-03-14 15:09:26Z", "2023-03-14T15:09:26+00:00", None, None),
("2023-03-14T15:09:26GMT", "2023-03-14T15:09:26+00:00", None, None),
# Invalid time components
(
"2023-03-14T25:09:26Z",
Expand All @@ -105,16 +152,24 @@ def test_now():
"Could not parse datetime string: 2023-03-14T15:09:99Z",
),
],
# ("invalid datetime", None), # Completely invalid
# ("15:09:26Z", None), # Missing date component
# ("2023-03-14T25:09:26Z", None), # Invalid hour
# ("2023-03-14T15:99:26Z", None), # Invalid minute
# ("2023-03-14T15:09:99Z", None), # Invalid second
# ("2023-02-30T00:00:00Z", None), # Impossible date
)
def test_parse(input_value, expected_output, error_type, error_match):
"""Test parsing various datetime string formats."""
if error_type:
with pytest.raises(error_type, match=error_match):
ab_datetime_parse(input_value)
assert not ab_datetime_try_parse(input_value)
else:
dt = ab_datetime_parse(input_value)
assert isinstance(dt, AirbyteDateTime)
assert str(dt) == expected_output
assert ab_datetime_try_parse(input_value) and ab_datetime_try_parse(input_value) == dt


@pytest.mark.parametrize(
Expand Down Expand Up @@ -194,42 +249,6 @@ def test_operator_overloading():
_ = "invalid" - dt


@pytest.mark.parametrize(
"input_value,expected_output",
[
# Valid formats - must have T delimiter and timezone
("2023-03-14T15:09:26+00:00", "2023-03-14T15:09:26+00:00"), # Basic UTC format
("2023-03-14T15:09:26.123+00:00", "2023-03-14T15:09:26.123000+00:00"), # With milliseconds
(
"2023-03-14T15:09:26.123456+00:00",
"2023-03-14T15:09:26.123456+00:00",
), # With microseconds
("2023-03-14T15:09:26-04:00", "2023-03-14T15:09:26-04:00"), # With timezone offset
("2023-03-14T15:09:26Z", "2023-03-14T15:09:26+00:00"), # With Z timezone
("2023-03-14T00:00:00+00:00", "2023-03-14T00:00:00+00:00"), # Full datetime with zero time
# Invalid formats - reject anything without proper ISO8601/RFC3339 format
("invalid datetime", None), # Completely invalid
("2023-03-14 15:09:26", None), # Missing T delimiter
("2023-03-14", None), # Date only, missing time and timezone
("15:09:26Z", None), # Missing date component
("2023-03-14T15:09:26", None), # Missing timezone
("2023-03-14T15:09:26GMT", None), # Invalid timezone format
("2023/03/14T15:09:26Z", None), # Wrong date separator
("2023-03-14T25:09:26Z", None), # Invalid hour
("2023-03-14T15:99:26Z", None), # Invalid minute
("2023-03-14T15:09:99Z", None), # Invalid second
],
)
def test_ab_datetime_try_parse(input_value, expected_output):
"""Test datetime string format validation."""
result = ab_datetime_try_parse(input_value)
if expected_output is None:
assert result is None
else:
assert isinstance(result, AirbyteDateTime)
assert str(result) == expected_output


def test_epoch_millis():
"""Test Unix epoch millisecond timestamp conversion methods."""
# Test to_epoch_millis()
Expand Down

0 comments on commit 7d70970

Please sign in to comment.