From 268d3c653cb1d57d4ccbe432098eb1727f8291d1 Mon Sep 17 00:00:00 2001 From: Stuart Leitch Date: Thu, 5 Oct 2023 15:02:23 +0100 Subject: [PATCH 1/4] Update gitignore. --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 97ebaa67..7db697ef 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,6 @@ dist/ # Ignore PyCharm / IntelliJ files .idea/ +build/ +.python-version +docker-compose.yml \ No newline at end of file From c329d853076ee3437ca35a98c3142c7afc325d2f Mon Sep 17 00:00:00 2001 From: Stuart Leitch Date: Thu, 5 Oct 2023 16:05:45 +0100 Subject: [PATCH 2/4] Hourly partitions pass tests --- docs/source/table_partitioning.rst | 11 +++ .../partitioning/current_time_strategy.py | 3 +- psqlextra/partitioning/shorthands.py | 6 +- psqlextra/partitioning/time_partition.py | 5 +- psqlextra/partitioning/time_partition_size.py | 17 +++- tests/test_partitioning_time.py | 92 +++++++++++++++++++ 6 files changed, 127 insertions(+), 7 deletions(-) diff --git a/docs/source/table_partitioning.rst b/docs/source/table_partitioning.rst index 1bb5ba6f..a173d1ce 100644 --- a/docs/source/table_partitioning.rst +++ b/docs/source/table_partitioning.rst @@ -174,6 +174,17 @@ Time-based partitioning count=12, ), ), + + # 24 partitions ahead, each partition is 1 hour + # old partitions are never deleted, `max_age` is not set + # partitions will be named `[table_name]_[year]_[month]_[month day number]_[hour (24h)]`. + PostgresPartitioningConfig( + model=MyPartitionedModel, + strategy=PostgresCurrentTimePartitioningStrategy( + size=PostgresTimePartitionSize(hours=1), + count=24, + ), + ), ]) diff --git a/psqlextra/partitioning/current_time_strategy.py b/psqlextra/partitioning/current_time_strategy.py index 114a1aaf..6400e77f 100644 --- a/psqlextra/partitioning/current_time_strategy.py +++ b/psqlextra/partitioning/current_time_strategy.py @@ -16,7 +16,8 @@ class PostgresCurrentTimePartitioningStrategy( All buckets will be equal in size and start at the start of the unit. With monthly partitioning, partitions start on the 1st and - with weekly partitioning, partitions start on monday. + with weekly partitioning, partitions start on monday, with hourly + partitioning, partitions start at 00:00. """ def __init__( diff --git a/psqlextra/partitioning/shorthands.py b/psqlextra/partitioning/shorthands.py index 30175273..f263e362 100644 --- a/psqlextra/partitioning/shorthands.py +++ b/psqlextra/partitioning/shorthands.py @@ -16,6 +16,7 @@ def partition_by_current_time( months: Optional[int] = None, weeks: Optional[int] = None, days: Optional[int] = None, + hours: Optional[int] = None, max_age: Optional[relativedelta] = None, name_format: Optional[str] = None, ) -> PostgresPartitioningConfig: @@ -43,6 +44,9 @@ def partition_by_current_time( days: The amount of days each partition should contain. + hours: + The amount of hours each partition should contain. + max_age: The maximum age of a partition (calculated from the start of the partition). @@ -56,7 +60,7 @@ def partition_by_current_time( """ size = PostgresTimePartitionSize( - years=years, months=months, weeks=weeks, days=days + years=years, months=months, weeks=weeks, days=days, hours=hours ) return PostgresPartitioningConfig( diff --git a/psqlextra/partitioning/time_partition.py b/psqlextra/partitioning/time_partition.py index 3c8a4d87..64a8cf8d 100644 --- a/psqlextra/partitioning/time_partition.py +++ b/psqlextra/partitioning/time_partition.py @@ -20,6 +20,7 @@ class PostgresTimePartition(PostgresRangePartition): PostgresTimePartitionUnit.MONTHS: "%Y_%b", PostgresTimePartitionUnit.WEEKS: "%Y_week_%W", PostgresTimePartitionUnit.DAYS: "%Y_%b_%d", + PostgresTimePartitionUnit.HOURS: "%Y_%b_%d_%H:00:00", } def __init__( @@ -31,8 +32,8 @@ def __init__( end_datetime = start_datetime + size.as_delta() super().__init__( - from_values=start_datetime.strftime("%Y-%m-%d"), - to_values=end_datetime.strftime("%Y-%m-%d"), + from_values=start_datetime.strftime("%Y-%m-%d %H:00:00"), + to_values=end_datetime.strftime("%Y-%m-%d %H:00:00"), ) self.size = size diff --git a/psqlextra/partitioning/time_partition_size.py b/psqlextra/partitioning/time_partition_size.py index 3d013bcd..13f96692 100644 --- a/psqlextra/partitioning/time_partition_size.py +++ b/psqlextra/partitioning/time_partition_size.py @@ -13,6 +13,7 @@ class PostgresTimePartitionUnit(enum.Enum): MONTHS = "months" WEEKS = "weeks" DAYS = "days" + HOURS = "hours" class PostgresTimePartitionSize: @@ -27,8 +28,9 @@ def __init__( months: Optional[int] = None, weeks: Optional[int] = None, days: Optional[int] = None, + hours: Optional[int] = None, ) -> None: - sizes = [years, months, weeks, days] + sizes = [years, months, weeks, days, hours] if not any(sizes): raise PostgresPartitioningError("Partition cannot be 0 in size.") @@ -50,6 +52,9 @@ def __init__( elif days: self.unit = PostgresTimePartitionUnit.DAYS self.value = days + elif hours: + self.unit = PostgresTimePartitionUnit.HOURS + self.value = hours else: raise PostgresPartitioningError( "Unsupported time partitioning unit" @@ -68,6 +73,9 @@ def as_delta(self) -> relativedelta: if self.unit == PostgresTimePartitionUnit.DAYS: return relativedelta(days=self.value) + if self.unit == PostgresTimePartitionUnit.HOURS: + return relativedelta(hours=self.value) + raise PostgresPartitioningError( "Unsupported time partitioning unit: %s" % self.unit ) @@ -81,12 +89,15 @@ def start(self, dt: datetime) -> datetime: if self.unit == PostgresTimePartitionUnit.WEEKS: return self._ensure_datetime(dt - relativedelta(days=dt.weekday())) + + if self.unit == PostgresTimePartitionUnit.DAYS: + return self._ensure_datetime(dt) - return self._ensure_datetime(dt) + return self._ensure_datetime(dt.replace(hour=0)) @staticmethod def _ensure_datetime(dt: Union[date, datetime]) -> datetime: - return datetime(year=dt.year, month=dt.month, day=dt.day) + return datetime(year=dt.year, month=dt.month, day=dt.day, hour = dt.hour) def __repr__(self) -> str: return "PostgresTimePartitionSize<%s, %s>" % (self.unit, self.value) diff --git a/tests/test_partitioning_time.py b/tests/test_partitioning_time.py index 9f6b5bf1..ebc469b4 100644 --- a/tests/test_partitioning_time.py +++ b/tests/test_partitioning_time.py @@ -254,6 +254,57 @@ def test_partitioning_time_daily_apply(): assert table.partitions[6].name == "2019_jun_04" + +@pytest.mark.postgres_version(lt=110000) +def test_partitioning_time_hourly_apply(): + """Tests whether automatically creating new partitions ahead hourly works as + expected.""" + + model = define_fake_partitioned_model( + {"timestamp": models.DateTimeField()}, {"key": ["timestamp"]} + ) + + schema_editor = connection.schema_editor() + schema_editor.create_partitioned_model(model) + + # create partitions for the next 4 hours (including the current) + with freezegun.freeze_time("2019-1-23"): + manager = PostgresPartitioningManager( + [partition_by_current_time(model, hours=1, count=4)] + ) + manager.plan().apply() + + table = _get_partitioned_table(model) + assert len(table.partitions) == 4 + assert table.partitions[0].name == "2019_jan_23_00:00:00" + assert table.partitions[1].name == "2019_jan_23_01:00:00" + assert table.partitions[2].name == "2019_jan_23_02:00:00" + assert table.partitions[3].name == "2019_jan_23_03:00:00" + + # re-running it with 5, should just create one additional partition + with freezegun.freeze_time("2019-1-23"): + manager = PostgresPartitioningManager( + [partition_by_current_time(model, hours=1, count=5)] + ) + manager.plan().apply() + + table = _get_partitioned_table(model) + assert len(table.partitions) == 5 + assert table.partitions[4].name == "2019_jan_23_04:00:00" + + # it's june now, we want to partition two hours ahead + with freezegun.freeze_time("2019-06-03"): + manager = PostgresPartitioningManager( + [partition_by_current_time(model, hours=1, count=2)] + ) + manager.plan().apply() + + table = _get_partitioned_table(model) + assert len(table.partitions) == 7 + assert table.partitions[5].name == "2019_jun_03_00:00:00" + assert table.partitions[6].name == "2019_jun_03_01:00:00" + + @pytest.mark.postgres_version(lt=110000) def test_partitioning_time_monthly_apply_insert(): """Tests whether automatically created monthly partitions line up @@ -372,10 +423,51 @@ def test_partitioning_time_daily_apply_insert(): model.objects.create(timestamp=datetime.date(2019, 1, 10)) +@pytest.mark.postgres_version(lt=110000) +def test_partitioning_time_hourly_apply_insert(): + """Tests whether automatically created hourly partitions line up + perfectly.""" + + model = define_fake_partitioned_model( + {"timestamp": models.DateTimeField()}, {"key": ["timestamp"]} + ) + + schema_editor = connection.schema_editor() + schema_editor.create_partitioned_model(model) + + # that's a monday + with freezegun.freeze_time("2019-1-07"): + manager = PostgresPartitioningManager( + [partition_by_current_time(model, hours=1, count=2)] + ) + manager.plan().apply() + + table = _get_partitioned_table(model) + assert len(table.partitions) == 2 + + model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 0)) + model.objects.create(timestamp=datetime.datetime(2019, 1, 7 , 1)) + + with transaction.atomic(): + with pytest.raises(IntegrityError): + model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 2)) + model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 3)) + + with freezegun.freeze_time("2019-1-07"): + manager = PostgresPartitioningManager( + [partition_by_current_time(model, hours=1, count=4)] + ) + manager.plan().apply() + + model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 2)) + model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 3)) + + @pytest.mark.postgres_version(lt=110000) @pytest.mark.parametrize( "kwargs,partition_names", [ + (dict(hours=2), ["2019_jan_01_00:00:00", "2019_jan_01_02:00:00"]), (dict(days=2), ["2019_jan_01", "2019_jan_03"]), (dict(weeks=2), ["2018_week_53", "2019_week_02"]), (dict(months=2), ["2019_jan", "2019_mar"]), From ae24684f2fab2eddb99f34af71e34eb75400e25e Mon Sep 17 00:00:00 2001 From: Stuart Leitch Date: Fri, 6 Oct 2023 15:09:47 +0100 Subject: [PATCH 3/4] Update partitioning example in table_partitioning.rst --- docs/source/table_partitioning.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/table_partitioning.rst b/docs/source/table_partitioning.rst index a173d1ce..a5122fb9 100644 --- a/docs/source/table_partitioning.rst +++ b/docs/source/table_partitioning.rst @@ -175,9 +175,9 @@ Time-based partitioning ), ), - # 24 partitions ahead, each partition is 1 hour + # 24 partitions ahead, each partition is 1 hour, for a total of 24 hours. Starting with hour 0 of current day # old partitions are never deleted, `max_age` is not set - # partitions will be named `[table_name]_[year]_[month]_[month day number]_[hour (24h)]`. + # partitions will be named `[table_name]_[year]_[month]_[month day number]_[hour (24h)]:00:00`. PostgresPartitioningConfig( model=MyPartitionedModel, strategy=PostgresCurrentTimePartitioningStrategy( From d8fd2c14042c23b84607b4d33727ba1bc7df5bfd Mon Sep 17 00:00:00 2001 From: Stuart Leitch Date: Wed, 11 Oct 2023 09:24:26 +0100 Subject: [PATCH 4/4] Formatting. --- psqlextra/partitioning/current_time_strategy.py | 2 +- psqlextra/partitioning/time_partition_size.py | 4 ++-- tests/test_partitioning_time.py | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/psqlextra/partitioning/current_time_strategy.py b/psqlextra/partitioning/current_time_strategy.py index 6400e77f..795f60ca 100644 --- a/psqlextra/partitioning/current_time_strategy.py +++ b/psqlextra/partitioning/current_time_strategy.py @@ -16,7 +16,7 @@ class PostgresCurrentTimePartitioningStrategy( All buckets will be equal in size and start at the start of the unit. With monthly partitioning, partitions start on the 1st and - with weekly partitioning, partitions start on monday, with hourly + with weekly partitioning, partitions start on monday, with hourly partitioning, partitions start at 00:00. """ diff --git a/psqlextra/partitioning/time_partition_size.py b/psqlextra/partitioning/time_partition_size.py index 13f96692..6c6cc5fe 100644 --- a/psqlextra/partitioning/time_partition_size.py +++ b/psqlextra/partitioning/time_partition_size.py @@ -89,7 +89,7 @@ def start(self, dt: datetime) -> datetime: if self.unit == PostgresTimePartitionUnit.WEEKS: return self._ensure_datetime(dt - relativedelta(days=dt.weekday())) - + if self.unit == PostgresTimePartitionUnit.DAYS: return self._ensure_datetime(dt) @@ -97,7 +97,7 @@ def start(self, dt: datetime) -> datetime: @staticmethod def _ensure_datetime(dt: Union[date, datetime]) -> datetime: - return datetime(year=dt.year, month=dt.month, day=dt.day, hour = dt.hour) + return datetime(year=dt.year, month=dt.month, day=dt.day, hour=dt.hour) def __repr__(self) -> str: return "PostgresTimePartitionSize<%s, %s>" % (self.unit, self.value) diff --git a/tests/test_partitioning_time.py b/tests/test_partitioning_time.py index ebc469b4..1353f59c 100644 --- a/tests/test_partitioning_time.py +++ b/tests/test_partitioning_time.py @@ -254,7 +254,6 @@ def test_partitioning_time_daily_apply(): assert table.partitions[6].name == "2019_jun_04" - @pytest.mark.postgres_version(lt=110000) def test_partitioning_time_hourly_apply(): """Tests whether automatically creating new partitions ahead hourly works as @@ -446,7 +445,7 @@ def test_partitioning_time_hourly_apply_insert(): assert len(table.partitions) == 2 model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 0)) - model.objects.create(timestamp=datetime.datetime(2019, 1, 7 , 1)) + model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 1)) with transaction.atomic(): with pytest.raises(IntegrityError):