Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Generated by Django 5.1.13 on 2025-10-23 22:01

import django.db.models.functions.text
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('dojo', '0245_alter_jira_instance_accepted_mapping_resolution'),
]

operations = [
migrations.AddIndex(
model_name='endpoint',
index=models.Index(models.F('product'), django.db.models.functions.text.Lower('host'), name='idx_ep_product_lower_host'),
),
migrations.AddIndex(
model_name='endpoint_status',
index=models.Index(condition=models.Q(('false_positive', False), ('mitigated', False), ('out_of_scope', False), ('risk_accepted', False)), fields=['endpoint'], name='idx_eps_active_by_endpoint'),
),
migrations.AddIndex(
model_name='endpoint_status',
index=models.Index(condition=models.Q(('false_positive', False), ('mitigated', False), ('out_of_scope', False), ('risk_accepted', False)), fields=['finding'], name='idx_eps_active_by_finding'),
),
]
37 changes: 21 additions & 16 deletions dojo/endpoint/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from django.contrib import messages
from django.core.exceptions import ValidationError
from django.core.validators import validate_ipv46_address
from django.db import transaction
from django.db.models import Count, Q
from django.http import HttpResponseRedirect
from django.urls import reverse
Expand Down Expand Up @@ -55,21 +54,27 @@ def endpoint_filter(**kwargs):


def endpoint_get_or_create(**kwargs):
with transaction.atomic():
qs = endpoint_filter(**kwargs)
count = qs.count()
if count == 0:
return Endpoint.objects.get_or_create(**kwargs)
if count == 1:
return qs.order_by("id").first(), False
logger.warning(
f"Endpoints in your database are broken. "
f"Please access {reverse('endpoint_migrate')} and migrate them to new format or remove them.",
)
# Get the oldest endpoint first, and return that instead
# a datetime is not captured on the endpoint model, so ID
# will have to work here instead
return qs.order_by("id").first(), False
# This code looks a bit ugly/complicated.
# But this method is called so frequently that we need to optimize it.
# It executes at most one SELECT and one optional INSERT.
qs = endpoint_filter(**kwargs)
# Fetch up to two matches in a single round-trip. This covers
# the common cases efficiently: zero (create) or one (reuse).
matches = list(qs.order_by("id")[:2])
if not matches:
# Most common case: nothing exists yet
return Endpoint.objects.create(**kwargs), True
if len(matches) == 1:
# Common case: exactly one existing endpoint
return matches[0], False
logger.warning(
f"Endpoints in your database are broken. "
f"Please access {reverse('endpoint_migrate')} and migrate them to new format or remove them.",
)
# Get the oldest endpoint first, and return that instead
# a datetime is not captured on the endpoint model, so ID
# will have to work here instead
return matches[0], False


def clean_hosts_run(apps, change):
Expand Down
12 changes: 8 additions & 4 deletions dojo/importers/endpoint_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def add_endpoints_to_unsaved_finding(
self.clean_unsaved_endpoints(endpoints)
for endpoint in endpoints:
ep = None
eps = []
try:
ep, _ = endpoint_get_or_create(
protocol=endpoint.protocol,
Expand All @@ -41,17 +42,20 @@ def add_endpoints_to_unsaved_finding(
query=endpoint.query,
fragment=endpoint.fragment,
product=finding.test.engagement.product)
eps.append(ep)
except (MultipleObjectsReturned):
msg = (
f"Endpoints in your database are broken. "
f"Please access {reverse('endpoint_migrate')} and migrate them to new format or remove them."
)
raise Exception(msg)

Endpoint_Status.objects.get_or_create(
finding=finding,
endpoint=ep,
defaults={"date": finding.date})
# bulk_create will translate to INSERT WITH IGNORE CONFLICTS
# much faster than get_or_create which issues two queries per endpoint
# bulk_create will not trigger endpoint_status.save and signals which is fine for now
rows = [Endpoint_Status(finding=finding, endpoint=e, date=finding.date) for e in eps]
Endpoint_Status.objects.bulk_create(rows, ignore_conflicts=True, batch_size=1000)

logger.debug(f"IMPORT_SCAN: {len(endpoints)} endpoints imported")

@dojo_async_task
Expand Down
19 changes: 18 additions & 1 deletion dojo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from django.core.files.base import ContentFile
from django.core.validators import MaxValueValidator, MinValueValidator, RegexValidator, validate_ipv46_address
from django.db import connection, models
from django.db.models import Count, JSONField, Q
from django.db.models import Count, F, JSONField, Q
from django.db.models.expressions import Case, When
from django.db.models.functions import Lower
from django.urls import reverse
Expand Down Expand Up @@ -1690,6 +1690,17 @@ class Meta:
indexes = [
models.Index(fields=["finding", "mitigated"]),
models.Index(fields=["endpoint", "mitigated"]),
# Optimize frequent lookups of "active" statuses (mitigated/flags all False)
models.Index(
name="idx_eps_active_by_endpoint",
fields=["endpoint"],
condition=Q(mitigated=False, false_positive=False, out_of_scope=False, risk_accepted=False),
),
models.Index(
name="idx_eps_active_by_finding",
fields=["finding"],
condition=Q(mitigated=False, false_positive=False, out_of_scope=False, risk_accepted=False),
),
]
constraints = [
models.UniqueConstraint(fields=["finding", "endpoint"], name="endpoint-finding relation"),
Expand Down Expand Up @@ -1749,6 +1760,12 @@ class Meta:
ordering = ["product", "host", "protocol", "port", "userinfo", "path", "query", "fragment"]
indexes = [
models.Index(fields=["product"]),
# Fast case-insensitive equality on host within product scope
models.Index(
F("product"),
Lower("host"),
name="idx_ep_product_lower_host",
),
]

def __hash__(self):
Expand Down
52 changes: 26 additions & 26 deletions unittests/test_importers_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,11 @@ def test_import_reimport_reimport_performance_async(self):
configure_pghistory_triggers()

self._import_reimport_performance(
expected_num_queries1=593,
expected_num_queries1=340,
expected_num_async_tasks1=10,
expected_num_queries2=498,
expected_num_queries2=288,
expected_num_async_tasks2=22,
expected_num_queries3=289,
expected_num_queries3=175,
expected_num_async_tasks3=20,
)

Expand All @@ -196,11 +196,11 @@ def test_import_reimport_reimport_performance_pghistory_async(self):
configure_pghistory_triggers()

self._import_reimport_performance(
expected_num_queries1=559,
expected_num_queries1=306,
expected_num_async_tasks1=10,
expected_num_queries2=491,
expected_num_queries2=281,
expected_num_async_tasks2=22,
expected_num_queries3=284,
expected_num_queries3=170,
expected_num_async_tasks3=20,
)

Expand All @@ -220,11 +220,11 @@ def test_import_reimport_reimport_performance_no_async(self):
testuser.usercontactinfo.block_execution = True
testuser.usercontactinfo.save()
self._import_reimport_performance(
expected_num_queries1=603,
expected_num_queries1=350,
expected_num_async_tasks1=10,
expected_num_queries2=515,
expected_num_queries2=305,
expected_num_async_tasks2=22,
expected_num_queries3=304,
expected_num_queries3=190,
expected_num_async_tasks3=20,
)

Expand All @@ -242,11 +242,11 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self):
testuser.usercontactinfo.save()

self._import_reimport_performance(
expected_num_queries1=569,
expected_num_queries1=316,
expected_num_async_tasks1=10,
expected_num_queries2=508,
expected_num_queries2=298,
expected_num_async_tasks2=22,
expected_num_queries3=299,
expected_num_queries3=185,
expected_num_async_tasks3=20,
)

Expand All @@ -268,11 +268,11 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self
self.system_settings(enable_product_grade=True)

self._import_reimport_performance(
expected_num_queries1=604,
expected_num_queries1=351,
expected_num_async_tasks1=11,
expected_num_queries2=516,
expected_num_queries2=306,
expected_num_async_tasks2=23,
expected_num_queries3=305,
expected_num_queries3=191,
expected_num_async_tasks3=21,
)

Expand All @@ -291,11 +291,11 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr
self.system_settings(enable_product_grade=True)

self._import_reimport_performance(
expected_num_queries1=570,
expected_num_queries1=317,
expected_num_async_tasks1=11,
expected_num_queries2=509,
expected_num_queries2=299,
expected_num_async_tasks2=23,
expected_num_queries3=300,
expected_num_queries3=186,
expected_num_async_tasks3=21,
)

Expand Down Expand Up @@ -414,9 +414,9 @@ def test_deduplication_performance_async(self):
self.system_settings(enable_deduplication=True)

self._deduplication_performance(
expected_num_queries1=660,
expected_num_queries1=311,
expected_num_async_tasks1=12,
expected_num_queries2=519,
expected_num_queries2=204,
expected_num_async_tasks2=12,
check_duplicates=False, # Async mode - deduplication happens later
)
Expand All @@ -431,9 +431,9 @@ def test_deduplication_performance_pghistory_async(self):
self.system_settings(enable_deduplication=True)

self._deduplication_performance(
expected_num_queries1=624,
expected_num_queries1=275,
expected_num_async_tasks1=12,
expected_num_queries2=500,
expected_num_queries2=185,
expected_num_async_tasks2=12,
check_duplicates=False, # Async mode - deduplication happens later
)
Expand All @@ -452,9 +452,9 @@ def test_deduplication_performance_no_async(self):
testuser.usercontactinfo.save()

self._deduplication_performance(
expected_num_queries1=672,
expected_num_queries1=323,
expected_num_async_tasks1=12,
expected_num_queries2=633,
expected_num_queries2=318,
expected_num_async_tasks2=12,
)

Expand All @@ -472,8 +472,8 @@ def test_deduplication_performance_pghistory_no_async(self):
testuser.usercontactinfo.save()

self._deduplication_performance(
expected_num_queries1=636,
expected_num_queries1=287,
expected_num_async_tasks1=12,
expected_num_queries2=596,
expected_num_queries2=281,
expected_num_async_tasks2=12,
)