Skip to content

Commit 236d8b1

Browse files
endpoint import optimize (#13521)
* endpoint import optimize * rebase migration * rebase migration
1 parent ea09b35 commit 236d8b1

File tree

5 files changed

+99
-47
lines changed

5 files changed

+99
-47
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Generated by Django 5.1.13 on 2025-10-23 22:01
2+
3+
import django.db.models.functions.text
4+
from django.db import migrations, models
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
('dojo', '0245_alter_jira_instance_accepted_mapping_resolution'),
11+
]
12+
13+
operations = [
14+
migrations.AddIndex(
15+
model_name='endpoint',
16+
index=models.Index(models.F('product'), django.db.models.functions.text.Lower('host'), name='idx_ep_product_lower_host'),
17+
),
18+
migrations.AddIndex(
19+
model_name='endpoint_status',
20+
index=models.Index(condition=models.Q(('false_positive', False), ('mitigated', False), ('out_of_scope', False), ('risk_accepted', False)), fields=['endpoint'], name='idx_eps_active_by_endpoint'),
21+
),
22+
migrations.AddIndex(
23+
model_name='endpoint_status',
24+
index=models.Index(condition=models.Q(('false_positive', False), ('mitigated', False), ('out_of_scope', False), ('risk_accepted', False)), fields=['finding'], name='idx_eps_active_by_finding'),
25+
),
26+
]

dojo/endpoint/utils.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from django.contrib import messages
77
from django.core.exceptions import ValidationError
88
from django.core.validators import validate_ipv46_address
9-
from django.db import transaction
109
from django.db.models import Count, Q
1110
from django.http import HttpResponseRedirect
1211
from django.urls import reverse
@@ -55,21 +54,27 @@ def endpoint_filter(**kwargs):
5554

5655

5756
def endpoint_get_or_create(**kwargs):
58-
with transaction.atomic():
59-
qs = endpoint_filter(**kwargs)
60-
count = qs.count()
61-
if count == 0:
62-
return Endpoint.objects.get_or_create(**kwargs)
63-
if count == 1:
64-
return qs.order_by("id").first(), False
65-
logger.warning(
66-
f"Endpoints in your database are broken. "
67-
f"Please access {reverse('endpoint_migrate')} and migrate them to new format or remove them.",
68-
)
69-
# Get the oldest endpoint first, and return that instead
70-
# a datetime is not captured on the endpoint model, so ID
71-
# will have to work here instead
72-
return qs.order_by("id").first(), False
57+
# This code looks a bit ugly/complicated.
58+
# But this method is called so frequently that we need to optimize it.
59+
# It executes at most one SELECT and one optional INSERT.
60+
qs = endpoint_filter(**kwargs)
61+
# Fetch up to two matches in a single round-trip. This covers
62+
# the common cases efficiently: zero (create) or one (reuse).
63+
matches = list(qs.order_by("id")[:2])
64+
if not matches:
65+
# Most common case: nothing exists yet
66+
return Endpoint.objects.create(**kwargs), True
67+
if len(matches) == 1:
68+
# Common case: exactly one existing endpoint
69+
return matches[0], False
70+
logger.warning(
71+
f"Endpoints in your database are broken. "
72+
f"Please access {reverse('endpoint_migrate')} and migrate them to new format or remove them.",
73+
)
74+
# Get the oldest endpoint first, and return that instead
75+
# a datetime is not captured on the endpoint model, so ID
76+
# will have to work here instead
77+
return matches[0], False
7378

7479

7580
def clean_hosts_run(apps, change):

dojo/importers/endpoint_manager.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def add_endpoints_to_unsaved_finding(
3131
self.clean_unsaved_endpoints(endpoints)
3232
for endpoint in endpoints:
3333
ep = None
34+
eps = []
3435
try:
3536
ep, _ = endpoint_get_or_create(
3637
protocol=endpoint.protocol,
@@ -41,17 +42,20 @@ def add_endpoints_to_unsaved_finding(
4142
query=endpoint.query,
4243
fragment=endpoint.fragment,
4344
product=finding.test.engagement.product)
45+
eps.append(ep)
4446
except (MultipleObjectsReturned):
4547
msg = (
4648
f"Endpoints in your database are broken. "
4749
f"Please access {reverse('endpoint_migrate')} and migrate them to new format or remove them."
4850
)
4951
raise Exception(msg)
5052

51-
Endpoint_Status.objects.get_or_create(
52-
finding=finding,
53-
endpoint=ep,
54-
defaults={"date": finding.date})
53+
# bulk_create will translate to INSERT WITH IGNORE CONFLICTS
54+
# much faster than get_or_create which issues two queries per endpoint
55+
# bulk_create will not trigger endpoint_status.save and signals which is fine for now
56+
rows = [Endpoint_Status(finding=finding, endpoint=e, date=finding.date) for e in eps]
57+
Endpoint_Status.objects.bulk_create(rows, ignore_conflicts=True, batch_size=1000)
58+
5559
logger.debug(f"IMPORT_SCAN: {len(endpoints)} endpoints imported")
5660

5761
@dojo_async_task

dojo/models.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from django.core.files.base import ContentFile
2626
from django.core.validators import MaxValueValidator, MinValueValidator, RegexValidator, validate_ipv46_address
2727
from django.db import connection, models
28-
from django.db.models import Count, JSONField, Q
28+
from django.db.models import Count, F, JSONField, Q
2929
from django.db.models.expressions import Case, When
3030
from django.db.models.functions import Lower
3131
from django.urls import reverse
@@ -1690,6 +1690,17 @@ class Meta:
16901690
indexes = [
16911691
models.Index(fields=["finding", "mitigated"]),
16921692
models.Index(fields=["endpoint", "mitigated"]),
1693+
# Optimize frequent lookups of "active" statuses (mitigated/flags all False)
1694+
models.Index(
1695+
name="idx_eps_active_by_endpoint",
1696+
fields=["endpoint"],
1697+
condition=Q(mitigated=False, false_positive=False, out_of_scope=False, risk_accepted=False),
1698+
),
1699+
models.Index(
1700+
name="idx_eps_active_by_finding",
1701+
fields=["finding"],
1702+
condition=Q(mitigated=False, false_positive=False, out_of_scope=False, risk_accepted=False),
1703+
),
16931704
]
16941705
constraints = [
16951706
models.UniqueConstraint(fields=["finding", "endpoint"], name="endpoint-finding relation"),
@@ -1749,6 +1760,12 @@ class Meta:
17491760
ordering = ["product", "host", "protocol", "port", "userinfo", "path", "query", "fragment"]
17501761
indexes = [
17511762
models.Index(fields=["product"]),
1763+
# Fast case-insensitive equality on host within product scope
1764+
models.Index(
1765+
F("product"),
1766+
Lower("host"),
1767+
name="idx_ep_product_lower_host",
1768+
),
17521769
]
17531770

17541771
def __hash__(self):

unittests/test_importers_performance.py

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -178,11 +178,11 @@ def test_import_reimport_reimport_performance_async(self):
178178
configure_pghistory_triggers()
179179

180180
self._import_reimport_performance(
181-
expected_num_queries1=593,
181+
expected_num_queries1=340,
182182
expected_num_async_tasks1=10,
183-
expected_num_queries2=498,
183+
expected_num_queries2=288,
184184
expected_num_async_tasks2=22,
185-
expected_num_queries3=289,
185+
expected_num_queries3=175,
186186
expected_num_async_tasks3=20,
187187
)
188188

@@ -196,11 +196,11 @@ def test_import_reimport_reimport_performance_pghistory_async(self):
196196
configure_pghistory_triggers()
197197

198198
self._import_reimport_performance(
199-
expected_num_queries1=559,
199+
expected_num_queries1=306,
200200
expected_num_async_tasks1=10,
201-
expected_num_queries2=491,
201+
expected_num_queries2=281,
202202
expected_num_async_tasks2=22,
203-
expected_num_queries3=284,
203+
expected_num_queries3=170,
204204
expected_num_async_tasks3=20,
205205
)
206206

@@ -220,11 +220,11 @@ def test_import_reimport_reimport_performance_no_async(self):
220220
testuser.usercontactinfo.block_execution = True
221221
testuser.usercontactinfo.save()
222222
self._import_reimport_performance(
223-
expected_num_queries1=603,
223+
expected_num_queries1=350,
224224
expected_num_async_tasks1=10,
225-
expected_num_queries2=515,
225+
expected_num_queries2=305,
226226
expected_num_async_tasks2=22,
227-
expected_num_queries3=304,
227+
expected_num_queries3=190,
228228
expected_num_async_tasks3=20,
229229
)
230230

@@ -242,11 +242,11 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self):
242242
testuser.usercontactinfo.save()
243243

244244
self._import_reimport_performance(
245-
expected_num_queries1=569,
245+
expected_num_queries1=316,
246246
expected_num_async_tasks1=10,
247-
expected_num_queries2=508,
247+
expected_num_queries2=298,
248248
expected_num_async_tasks2=22,
249-
expected_num_queries3=299,
249+
expected_num_queries3=185,
250250
expected_num_async_tasks3=20,
251251
)
252252

@@ -268,11 +268,11 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self
268268
self.system_settings(enable_product_grade=True)
269269

270270
self._import_reimport_performance(
271-
expected_num_queries1=604,
271+
expected_num_queries1=351,
272272
expected_num_async_tasks1=11,
273-
expected_num_queries2=516,
273+
expected_num_queries2=306,
274274
expected_num_async_tasks2=23,
275-
expected_num_queries3=305,
275+
expected_num_queries3=191,
276276
expected_num_async_tasks3=21,
277277
)
278278

@@ -291,11 +291,11 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr
291291
self.system_settings(enable_product_grade=True)
292292

293293
self._import_reimport_performance(
294-
expected_num_queries1=570,
294+
expected_num_queries1=317,
295295
expected_num_async_tasks1=11,
296-
expected_num_queries2=509,
296+
expected_num_queries2=299,
297297
expected_num_async_tasks2=23,
298-
expected_num_queries3=300,
298+
expected_num_queries3=186,
299299
expected_num_async_tasks3=21,
300300
)
301301

@@ -414,9 +414,9 @@ def test_deduplication_performance_async(self):
414414
self.system_settings(enable_deduplication=True)
415415

416416
self._deduplication_performance(
417-
expected_num_queries1=660,
417+
expected_num_queries1=311,
418418
expected_num_async_tasks1=12,
419-
expected_num_queries2=519,
419+
expected_num_queries2=204,
420420
expected_num_async_tasks2=12,
421421
check_duplicates=False, # Async mode - deduplication happens later
422422
)
@@ -431,9 +431,9 @@ def test_deduplication_performance_pghistory_async(self):
431431
self.system_settings(enable_deduplication=True)
432432

433433
self._deduplication_performance(
434-
expected_num_queries1=624,
434+
expected_num_queries1=275,
435435
expected_num_async_tasks1=12,
436-
expected_num_queries2=500,
436+
expected_num_queries2=185,
437437
expected_num_async_tasks2=12,
438438
check_duplicates=False, # Async mode - deduplication happens later
439439
)
@@ -452,9 +452,9 @@ def test_deduplication_performance_no_async(self):
452452
testuser.usercontactinfo.save()
453453

454454
self._deduplication_performance(
455-
expected_num_queries1=672,
455+
expected_num_queries1=323,
456456
expected_num_async_tasks1=12,
457-
expected_num_queries2=633,
457+
expected_num_queries2=318,
458458
expected_num_async_tasks2=12,
459459
)
460460

@@ -472,8 +472,8 @@ def test_deduplication_performance_pghistory_no_async(self):
472472
testuser.usercontactinfo.save()
473473

474474
self._deduplication_performance(
475-
expected_num_queries1=636,
475+
expected_num_queries1=287,
476476
expected_num_async_tasks1=12,
477-
expected_num_queries2=596,
477+
expected_num_queries2=281,
478478
expected_num_async_tasks2=12,
479479
)

0 commit comments

Comments
 (0)