diff --git a/dojo/db_migrations/0246_endpoint_idx_ep_product_lower_host_and_more.py b/dojo/db_migrations/0246_endpoint_idx_ep_product_lower_host_and_more.py new file mode 100644 index 00000000000..70ae2bd5fe1 --- /dev/null +++ b/dojo/db_migrations/0246_endpoint_idx_ep_product_lower_host_and_more.py @@ -0,0 +1,26 @@ +# Generated by Django 5.1.13 on 2025-10-23 22:01 + +import django.db.models.functions.text +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('dojo', '0245_alter_jira_instance_accepted_mapping_resolution'), + ] + + operations = [ + migrations.AddIndex( + model_name='endpoint', + index=models.Index(models.F('product'), django.db.models.functions.text.Lower('host'), name='idx_ep_product_lower_host'), + ), + migrations.AddIndex( + model_name='endpoint_status', + index=models.Index(condition=models.Q(('false_positive', False), ('mitigated', False), ('out_of_scope', False), ('risk_accepted', False)), fields=['endpoint'], name='idx_eps_active_by_endpoint'), + ), + migrations.AddIndex( + model_name='endpoint_status', + index=models.Index(condition=models.Q(('false_positive', False), ('mitigated', False), ('out_of_scope', False), ('risk_accepted', False)), fields=['finding'], name='idx_eps_active_by_finding'), + ), + ] diff --git a/dojo/endpoint/utils.py b/dojo/endpoint/utils.py index 2cc835aa974..75f81e60827 100644 --- a/dojo/endpoint/utils.py +++ b/dojo/endpoint/utils.py @@ -6,7 +6,6 @@ from django.contrib import messages from django.core.exceptions import ValidationError from django.core.validators import validate_ipv46_address -from django.db import transaction from django.db.models import Count, Q from django.http import HttpResponseRedirect from django.urls import reverse @@ -55,21 +54,27 @@ def endpoint_filter(**kwargs): def endpoint_get_or_create(**kwargs): - with transaction.atomic(): - qs = endpoint_filter(**kwargs) - count = qs.count() - if count == 0: - return Endpoint.objects.get_or_create(**kwargs) - if count == 1: - return qs.order_by("id").first(), False - logger.warning( - f"Endpoints in your database are broken. " - f"Please access {reverse('endpoint_migrate')} and migrate them to new format or remove them.", - ) - # Get the oldest endpoint first, and return that instead - # a datetime is not captured on the endpoint model, so ID - # will have to work here instead - return qs.order_by("id").first(), False + # This code looks a bit ugly/complicated. + # But this method is called so frequently that we need to optimize it. + # It executes at most one SELECT and one optional INSERT. + qs = endpoint_filter(**kwargs) + # Fetch up to two matches in a single round-trip. This covers + # the common cases efficiently: zero (create) or one (reuse). + matches = list(qs.order_by("id")[:2]) + if not matches: + # Most common case: nothing exists yet + return Endpoint.objects.create(**kwargs), True + if len(matches) == 1: + # Common case: exactly one existing endpoint + return matches[0], False + logger.warning( + f"Endpoints in your database are broken. " + f"Please access {reverse('endpoint_migrate')} and migrate them to new format or remove them.", + ) + # Get the oldest endpoint first, and return that instead + # a datetime is not captured on the endpoint model, so ID + # will have to work here instead + return matches[0], False def clean_hosts_run(apps, change): diff --git a/dojo/importers/endpoint_manager.py b/dojo/importers/endpoint_manager.py index f733d5c9e5a..ccfff345c40 100644 --- a/dojo/importers/endpoint_manager.py +++ b/dojo/importers/endpoint_manager.py @@ -31,6 +31,7 @@ def add_endpoints_to_unsaved_finding( self.clean_unsaved_endpoints(endpoints) for endpoint in endpoints: ep = None + eps = [] try: ep, _ = endpoint_get_or_create( protocol=endpoint.protocol, @@ -41,6 +42,7 @@ def add_endpoints_to_unsaved_finding( query=endpoint.query, fragment=endpoint.fragment, product=finding.test.engagement.product) + eps.append(ep) except (MultipleObjectsReturned): msg = ( f"Endpoints in your database are broken. " @@ -48,10 +50,12 @@ def add_endpoints_to_unsaved_finding( ) raise Exception(msg) - Endpoint_Status.objects.get_or_create( - finding=finding, - endpoint=ep, - defaults={"date": finding.date}) + # bulk_create will translate to INSERT WITH IGNORE CONFLICTS + # much faster than get_or_create which issues two queries per endpoint + # bulk_create will not trigger endpoint_status.save and signals which is fine for now + rows = [Endpoint_Status(finding=finding, endpoint=e, date=finding.date) for e in eps] + Endpoint_Status.objects.bulk_create(rows, ignore_conflicts=True, batch_size=1000) + logger.debug(f"IMPORT_SCAN: {len(endpoints)} endpoints imported") @dojo_async_task diff --git a/dojo/models.py b/dojo/models.py index 2c283c8d795..eed0fb0ad12 100644 --- a/dojo/models.py +++ b/dojo/models.py @@ -25,7 +25,7 @@ from django.core.files.base import ContentFile from django.core.validators import MaxValueValidator, MinValueValidator, RegexValidator, validate_ipv46_address from django.db import connection, models -from django.db.models import Count, JSONField, Q +from django.db.models import Count, F, JSONField, Q from django.db.models.expressions import Case, When from django.db.models.functions import Lower from django.urls import reverse @@ -1690,6 +1690,17 @@ class Meta: indexes = [ models.Index(fields=["finding", "mitigated"]), models.Index(fields=["endpoint", "mitigated"]), + # Optimize frequent lookups of "active" statuses (mitigated/flags all False) + models.Index( + name="idx_eps_active_by_endpoint", + fields=["endpoint"], + condition=Q(mitigated=False, false_positive=False, out_of_scope=False, risk_accepted=False), + ), + models.Index( + name="idx_eps_active_by_finding", + fields=["finding"], + condition=Q(mitigated=False, false_positive=False, out_of_scope=False, risk_accepted=False), + ), ] constraints = [ models.UniqueConstraint(fields=["finding", "endpoint"], name="endpoint-finding relation"), @@ -1749,6 +1760,12 @@ class Meta: ordering = ["product", "host", "protocol", "port", "userinfo", "path", "query", "fragment"] indexes = [ models.Index(fields=["product"]), + # Fast case-insensitive equality on host within product scope + models.Index( + F("product"), + Lower("host"), + name="idx_ep_product_lower_host", + ), ] def __hash__(self): diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 3b4ce357c85..b3a3709961d 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -178,11 +178,11 @@ def test_import_reimport_reimport_performance_async(self): configure_pghistory_triggers() self._import_reimport_performance( - expected_num_queries1=593, + expected_num_queries1=340, expected_num_async_tasks1=10, - expected_num_queries2=498, + expected_num_queries2=288, expected_num_async_tasks2=22, - expected_num_queries3=289, + expected_num_queries3=175, expected_num_async_tasks3=20, ) @@ -196,11 +196,11 @@ def test_import_reimport_reimport_performance_pghistory_async(self): configure_pghistory_triggers() self._import_reimport_performance( - expected_num_queries1=559, + expected_num_queries1=306, expected_num_async_tasks1=10, - expected_num_queries2=491, + expected_num_queries2=281, expected_num_async_tasks2=22, - expected_num_queries3=284, + expected_num_queries3=170, expected_num_async_tasks3=20, ) @@ -220,11 +220,11 @@ def test_import_reimport_reimport_performance_no_async(self): testuser.usercontactinfo.block_execution = True testuser.usercontactinfo.save() self._import_reimport_performance( - expected_num_queries1=603, + expected_num_queries1=350, expected_num_async_tasks1=10, - expected_num_queries2=515, + expected_num_queries2=305, expected_num_async_tasks2=22, - expected_num_queries3=304, + expected_num_queries3=190, expected_num_async_tasks3=20, ) @@ -242,11 +242,11 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self): testuser.usercontactinfo.save() self._import_reimport_performance( - expected_num_queries1=569, + expected_num_queries1=316, expected_num_async_tasks1=10, - expected_num_queries2=508, + expected_num_queries2=298, expected_num_async_tasks2=22, - expected_num_queries3=299, + expected_num_queries3=185, expected_num_async_tasks3=20, ) @@ -268,11 +268,11 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self self.system_settings(enable_product_grade=True) self._import_reimport_performance( - expected_num_queries1=604, + expected_num_queries1=351, expected_num_async_tasks1=11, - expected_num_queries2=516, + expected_num_queries2=306, expected_num_async_tasks2=23, - expected_num_queries3=305, + expected_num_queries3=191, expected_num_async_tasks3=21, ) @@ -291,11 +291,11 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr self.system_settings(enable_product_grade=True) self._import_reimport_performance( - expected_num_queries1=570, + expected_num_queries1=317, expected_num_async_tasks1=11, - expected_num_queries2=509, + expected_num_queries2=299, expected_num_async_tasks2=23, - expected_num_queries3=300, + expected_num_queries3=186, expected_num_async_tasks3=21, ) @@ -414,9 +414,9 @@ def test_deduplication_performance_async(self): self.system_settings(enable_deduplication=True) self._deduplication_performance( - expected_num_queries1=660, + expected_num_queries1=311, expected_num_async_tasks1=12, - expected_num_queries2=519, + expected_num_queries2=204, expected_num_async_tasks2=12, check_duplicates=False, # Async mode - deduplication happens later ) @@ -431,9 +431,9 @@ def test_deduplication_performance_pghistory_async(self): self.system_settings(enable_deduplication=True) self._deduplication_performance( - expected_num_queries1=624, + expected_num_queries1=275, expected_num_async_tasks1=12, - expected_num_queries2=500, + expected_num_queries2=185, expected_num_async_tasks2=12, check_duplicates=False, # Async mode - deduplication happens later ) @@ -452,9 +452,9 @@ def test_deduplication_performance_no_async(self): testuser.usercontactinfo.save() self._deduplication_performance( - expected_num_queries1=672, + expected_num_queries1=323, expected_num_async_tasks1=12, - expected_num_queries2=633, + expected_num_queries2=318, expected_num_async_tasks2=12, ) @@ -472,8 +472,8 @@ def test_deduplication_performance_pghistory_no_async(self): testuser.usercontactinfo.save() self._deduplication_performance( - expected_num_queries1=636, + expected_num_queries1=287, expected_num_async_tasks1=12, - expected_num_queries2=596, + expected_num_queries2=281, expected_num_async_tasks2=12, )