From efe0cf4f3d9230f466ec585845e157ed68d3d477 Mon Sep 17 00:00:00 2001 From: Walison Filipe Date: Sun, 22 Jun 2025 22:28:26 -0300 Subject: [PATCH] Make multi-day partitions deterministic and aligned --- psqlextra/partitioning/time_partition_size.py | 13 ++++- tests/test_partitioning_time.py | 49 +++++++++++++++++-- 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/psqlextra/partitioning/time_partition_size.py b/psqlextra/partitioning/time_partition_size.py index 3d013bcd..46ef3691 100644 --- a/psqlextra/partitioning/time_partition_size.py +++ b/psqlextra/partitioning/time_partition_size.py @@ -1,6 +1,6 @@ import enum -from datetime import date, datetime +from datetime import date, datetime, timedelta, timezone from typing import Optional, Union from dateutil.relativedelta import relativedelta @@ -15,11 +15,15 @@ class PostgresTimePartitionUnit(enum.Enum): DAYS = "days" +UNIX_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc) + + class PostgresTimePartitionSize: """Size of a time-based range partition table.""" unit: PostgresTimePartitionUnit value: int + anchor: datetime def __init__( self, @@ -27,6 +31,7 @@ def __init__( months: Optional[int] = None, weeks: Optional[int] = None, days: Optional[int] = None, + anchor: datetime = UNIX_EPOCH ) -> None: sizes = [years, months, weeks, days] @@ -38,6 +43,7 @@ def __init__( "Partition can only have on size unit." ) + self.anchor = anchor if years: self.unit = PostgresTimePartitionUnit.YEARS self.value = years @@ -82,7 +88,10 @@ def start(self, dt: datetime) -> datetime: if self.unit == PostgresTimePartitionUnit.WEEKS: return self._ensure_datetime(dt - relativedelta(days=dt.weekday())) - return self._ensure_datetime(dt) + diff_days = (dt - self.anchor).days + partition_index = diff_days // self.value + start = self.anchor + timedelta(days=partition_index * self.value) + return self._ensure_datetime(start) @staticmethod def _ensure_datetime(dt: Union[date, datetime]) -> datetime: diff --git a/tests/test_partitioning_time.py b/tests/test_partitioning_time.py index 9f6b5bf1..6d190b3d 100644 --- a/tests/test_partitioning_time.py +++ b/tests/test_partitioning_time.py @@ -254,6 +254,49 @@ def test_partitioning_time_daily_apply(): assert table.partitions[6].name == "2019_jun_04" +@pytest.mark.postgres_version(lt=110000) +def test_partitioning_time_consistent_daily_apply(): + """Ensures that automatic daily partition creation is consistent and aligned + when the partition size spans multiple days (e.g., days > 1)""" + + model = define_fake_partitioned_model( + {"timestamp": models.DateTimeField()}, {"key": ["timestamp"]} + ) + + schema_editor = connection.schema_editor() + schema_editor.create_partitioned_model(model) + + with freezegun.freeze_time("2025-06-20"): + manager = PostgresPartitioningManager( + [partition_by_current_time(model, days=5, count=3)] + ) + manager.plan().apply() + + table = _get_partitioned_table(model) + assert len(table.partitions) == 3 + + # Partitions are aligned based on the fixed anchor (Unix Epoch by default). + # 2025-06-20 falls within the partition starting at 2025-06-16, + # since it's the most recent multiple of 5 days since 1970-01-01. + assert table.partitions[0].name == "2025_jun_16" + assert table.partitions[1].name == "2025_jun_21" + assert table.partitions[2].name == "2025_jun_26" + + # re-running it another day only creates the next one needed. + with freezegun.freeze_time("2025-06-22"): + manager = PostgresPartitioningManager( + [partition_by_current_time(model, days=5, count=3)] + ) + manager.plan().apply() + + table = _get_partitioned_table(model) + assert len(table.partitions) == 4 + assert table.partitions[0].name == "2025_jun_16" + assert table.partitions[1].name == "2025_jun_21" + assert table.partitions[2].name == "2025_jun_26" + assert table.partitions[3].name == "2025_jul_01" + + @pytest.mark.postgres_version(lt=110000) def test_partitioning_time_monthly_apply_insert(): """Tests whether automatically created monthly partitions line up @@ -376,7 +419,7 @@ def test_partitioning_time_daily_apply_insert(): @pytest.mark.parametrize( "kwargs,partition_names", [ - (dict(days=2), ["2019_jan_01", "2019_jan_03"]), + (dict(days=2), ["2018_dec_31", "2019_jan_02"]), (dict(weeks=2), ["2018_week_53", "2019_week_02"]), (dict(months=2), ["2019_jan", "2019_mar"]), (dict(years=2), ["2019", "2021"]), @@ -422,7 +465,7 @@ def test_partitioning_time_multiple(kwargs, partition_names): dict(days=7, max_age=relativedelta(weeks=1)), [ ("2019-1-1", 6), - ("2019-1-4", 6), + ("2019-1-4", 5), ("2019-1-8", 5), ("2019-1-15", 4), ("2019-1-16", 4), @@ -450,7 +493,7 @@ def test_partitioning_time_delete(kwargs, timepoints): with freezegun.freeze_time(timepoints[0][0]): manager.plan().apply() - for index, (dt, partition_count) in enumerate(timepoints): + for (dt, partition_count) in timepoints: with freezegun.freeze_time(dt): manager.plan(skip_create=True).apply()