Fix: Mirror message duplication due to retries on SQS throttling (#7419, PR #7426)

achave11-ucsc · achave11-ucsc · commit c7888f4fb2e2 · 2025-09-22T09:36:43.000-07:00
diff --git a/src/azul/azulclient.py b/src/azul/azulclient.py
@@ -262,7 +262,12 @@ def mirror_queue(self):
         return aws.sqs_queue(name)
 
     def queue_mirror_messages(self, messages: Iterable[SQSMessage]) -> int:
-        return self.queues.send_messages(self.mirror_queue(), messages)
+        rate_limit = float(aws.sqs_fifo_rate_limit)
+        if config.is_in_lambda:
+            rate_limit /= config.mirroring_concurrency
+        return self.queues.send_messages(self.mirror_queue(),
+                                         messages,
+                                         rate_limit=rate_limit)
 
     def delete_all_indices(self, catalog: CatalogName):
         self.index_service.delete_indices(catalog)
diff --git a/src/azul/deployment.py b/src/azul/deployment.py
@@ -745,6 +745,14 @@ def sqs_resource(self) -> 'SQSServiceResource':
     def sqs_queue(self, queue_name: str) -> 'Queue':
         return self.sqs_resource.get_queue_by_name(QueueName=queue_name)
 
+    #: The maximum number of SendMessage, ReceiveMessage, or DeleteMessage API
+    #: calls per second supported for normal-throughput (as opposed to high-
+    #: throughput) FIFO queues.
+    #:
+    #: https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/quotas-messages.html
+    #:
+    sqs_fifo_rate_limit = 300
+
 
 aws = AWS()
 del AWS
diff --git a/src/azul/indexer/__init__.py b/src/azul/indexer/__init__.py
@@ -239,38 +239,40 @@ def parse(cls, prefix: str) -> Self:
         return cls(common=entry, partition=partition)
 
     @classmethod
-    def for_main_deployment(cls, num_subgraphs: int) -> Self:
+    def for_main_deployment(cls, num_elements: int, partition_size: int) -> Self:
         """
-        A prefix that is expected to rarely exceed 8192 subgraphs per partition
+        A prefix that divides a source containing the given number of elements
+        (subgraphs, files, …) into partitions that rarely exceed the given size.
 
-        >>> str(Prefix.for_main_deployment(0))
+        >>> n = 8192
+
+        >>> str(Prefix.for_main_deployment(0, n))
         Traceback (most recent call last):
         ...
         ValueError: math domain error
 
-        >>> str(Prefix.for_main_deployment(1))
+        >>> str(Prefix.for_main_deployment(1, n))
         '/0'
 
         >>> cases = [-1, 0, 1, 2]
 
-        >>> n = 8192
-        >>> [str(Prefix.for_main_deployment(n + i)) for i in cases]
+        >>> [str(Prefix.for_main_deployment(n + i, n)) for i in cases]
         ['/0', '/0', '/1', '/1']
 
         Sources with this many bundles are very rare, so we have a generous
         margin of error surrounding this cutoff point
 
-        >>> n = 8192 * 16
-        >>> [str(Prefix.for_main_deployment(n + i)) for i in cases]
+        >>> m = n * 16
+        >>> [str(Prefix.for_main_deployment(m + i, n)) for i in cases]
         ['/1', '/1', '/2', '/2']
         """
-        partition = cls._prefix_length(num_subgraphs, 8192)
+        partition = cls._prefix_length(num_elements, partition_size)
         return cls(common='', partition=partition)
 
     @classmethod
-    def for_lesser_deployment(cls, num_subgraphs: int) -> Self:
+    def for_lesser_deployment(cls, num_elements: int) -> Self:
         """
-        A prefix that yields an average of approximately 24 subgraphs per
+        A prefix that yields an average of approximately 24 elements per
         source, using an experimentally derived heuristic formula designed to
         minimize manual adjustment of the computed common prefixes. The
         partition prefix length is always 1, even though some partitions may be
@@ -294,9 +296,9 @@ def for_lesser_deployment(cls, num_subgraphs: int) -> Self:
         >>> [str(Prefix.for_lesser_deployment(n + i)) for i in cases]
         ['e/1', 'f/1', '00/1', '10/1']
         """
-        digits = f'{num_subgraphs - 1:x}'[::-1]
-        length = cls._prefix_length(num_subgraphs, 64)
-        assert length < len(digits), num_subgraphs
+        digits = f'{num_elements - 1:x}'[::-1]
+        length = cls._prefix_length(num_elements, 64)
+        assert length < len(digits), num_elements
         return cls(common=digits[:length], partition=1)
 
     @classmethod
diff --git a/src/azul/indexer/mirror_controller.py b/src/azul/indexer/mirror_controller.py
@@ -28,6 +28,9 @@
 from azul.chalice import (
     LambdaMetric,
 )
+from azul.deployment import (
+    aws,
+)
 from azul.digests import (
     Hasher,
     get_resumable_hasher,
@@ -131,7 +134,18 @@ def _mirror(self, action: MirrorAction, message: JSON):
     def mirror_source(self, catalog: CatalogName, source_json: JSON):
         plugin = self.repository_plugin(catalog)
         source = plugin.source_ref_cls.from_json(source_json)
-        source = plugin.partition_source_for_mirroring(catalog, source)
+        # The desired partition size depends on the maximum number of messages
+        # we can send in one Lambda invocation, because queueing the individual
+        # mirror_file messages turns out to dominate the running time of
+        # handling a mirror_source message.
+        partition_size = int(
+            aws.sqs_fifo_rate_limit  # max. # of SendMessage calls per second
+            * self.client.queues.batch_size  # number of messages per call
+            * config.mirror_lambda_timeout  # max. duration of the invocation
+            / config.mirroring_concurrency  # number of concurrent invocations
+            / 2  # safety margin
+        )
+        source = plugin.partition_source_for_mirroring(catalog, source, partition_size)
         prefix = source.spec.prefix
         log.info('Queueing %d partitions of source %r in catalog %r',
                  prefix.num_partitions, str(source.spec), catalog)
diff --git a/src/azul/plugins/__init__.py b/src/azul/plugins/__init__.py
@@ -697,23 +697,26 @@ def partition_source_for_indexing(self,
         an updated copy of the source with a heuristically computed prefix that
         should be appropriate for indexing in the given catalog.
         """
-        return self._partition_source(catalog, source, self.count_bundles)
+        partition_size = 8192
+        return self._partition_source(catalog, source, self.count_bundles, partition_size)
 
     def partition_source_for_mirroring(self,
                                        catalog: CatalogName,
-                                       source: SOURCE_REF
+                                       source: SOURCE_REF,
+                                       partition_size: int,
                                        ) -> SOURCE_REF:
         """
         If the source already has a prefix, return the source. Otherwise, return
         an updated copy of the source with a heuristically computed prefix that
         should be appropriate for mirroring in the given catalog.
         """
-        return self._partition_source(catalog, source, self.count_files)
+        return self._partition_source(catalog, source, self.count_files, partition_size)
 
     def _partition_source(self,
                           catalog: CatalogName,
                           source: SOURCE_REF,
-                          counter: Callable[[SOURCE_SPEC], int]
+                          counter: Callable[[SOURCE_SPEC], int],
+                          partition_size: int
                           ) -> SOURCE_REF:
         if source.spec.prefix is None:
             count = counter(source.spec)
@@ -722,7 +725,13 @@ def _partition_source(self,
             # We use the "lesser" heuristic during IT to keep the cost and
             # performance of the tests within reasonable limits
             if is_main and not is_it:
-                prefix = Prefix.for_main_deployment(count)
+                # Sanity-check the partition size. We know the upper bound
+                # caused some mirror Lambda invocations to time out. The lower
+                # bound is hypothetical. It'll likely still work for mirroring
+                # but we'd like to know if partitions get that small. For
+                # indexing, the partition size is fixed at the upper bound.
+                assert 512 <= partition_size <= 8192, partition_size
+                prefix = Prefix.for_main_deployment(count, partition_size)
             else:
                 prefix = Prefix.for_lesser_deployment(count)
             return source.with_prefix(prefix)
diff --git a/src/azul/queues.py b/src/azul/queues.py
@@ -194,11 +194,23 @@ def read_messages(self, queue: 'Queue') -> builtins.list['Message']:
         self._cleanup_messages(queue, messages)
         return messages
 
-    def send_messages(self, queue: 'Queue', messages: Iterable[SQSMessage]) -> int:
+    def send_messages(self,
+                      queue: 'Queue',
+                      messages: Iterable[SQSMessage],
+                      rate_limit: float | None = None
+                      ) -> int:
         num_messages = 0
         for batch in chunked(messages, self.batch_size):
             entries = [message.to_batch_entry(i) for i, message in enumerate(batch)]
+            start = time.time()
             queue.send_messages(Entries=entries)
+            if rate_limit is not None:
+                period = 1 / rate_limit
+                time_spent = time.time() - start
+                time_to_sleep = period - time_spent
+                if time_to_sleep > 0:
+                    log.debug('Sleeping %.3fs to prevent exceeding rate limit', time_to_sleep)
+                    time.sleep(time_to_sleep)
             num_messages += len(batch)
         return num_messages