Skip to content

Commit 57aeffa

Browse files
Dav1ddeandrewshie-sentry
authored andcommitted
ref(dsyms): Prefer debug ids over code ids (#95861)
Only fallback to a code id search if there is no 'perfect' match for a debug id. Example SQL query: ```sql ( SELECT "sentry_projectdsymfile"."id" AS "col1", "sentry_projectdsymfile"."file_id" AS "col2", "sentry_projectdsymfile"."checksum" AS "col3", "sentry_projectdsymfile"."object_name" AS "col4", "sentry_projectdsymfile"."cpu_name" AS "col5", "sentry_projectdsymfile"."project_id" AS "col6", "sentry_projectdsymfile"."uuid" AS "col7", "sentry_projectdsymfile"."code_id" AS "col8", "sentry_projectdsymfile"."data" AS "col9", "sentry_projectdsymfile"."date_accessed" AS "col10", "sentry_file"."id" AS "col11", "sentry_file"."name" AS "col12", "sentry_file"."type" AS "col13", "sentry_file"."timestamp" AS "col14", "sentry_file"."headers" AS "col15", "sentry_file"."size" AS "col16", "sentry_file"."checksum" AS "col17", "sentry_file"."blob_id" AS "col18", "sentry_file"."path" AS "col19" FROM "sentry_projectdsymfile" INNER JOIN "sentry_file" ON ( "sentry_projectdsymfile"."file_id" = "sentry_file"."id") WHERE ( "sentry_projectdsymfile"."uuid" = dfb8e43a-f242-3d73-a453-aeb6a777ef75 AND "sentry_projectdsymfile"."project_id" = 4556436849360897)) UNION ( SELECT "sentry_projectdsymfile"."id" AS "col1", "sentry_projectdsymfile"."file_id" AS "col2", "sentry_projectdsymfile"."checksum" AS "col3", "sentry_projectdsymfile"."object_name" AS "col4", "sentry_projectdsymfile"."cpu_name" AS "col5", "sentry_projectdsymfile"."project_id" AS "col6", "sentry_projectdsymfile"."uuid" AS "col7", "sentry_projectdsymfile"."code_id" AS "col8", "sentry_projectdsymfile"."data" AS "col9", "sentry_projectdsymfile"."date_accessed" AS "col10", "sentry_file"."id" AS "col11", "sentry_file"."name" AS "col12", "sentry_file"."type" AS "col13", "sentry_file"."timestamp" AS "col14", "sentry_file"."headers" AS "col15", "sentry_file"."size" AS "col16", "sentry_file"."checksum" AS "col17", "sentry_file"."blob_id" AS "col18", "sentry_file"."path" AS "col19" FROM "sentry_projectdsymfile" INNER JOIN "sentry_file" ON ( "sentry_projectdsymfile"."file_id" = "sentry_file"."id") WHERE ( "sentry_projectdsymfile"."code_id" = ae0459704fc7256 AND "sentry_projectdsymfile"."project_id" = 4556436849360897 AND NOT EXISTS ( SELECT 1 AS "a" FROM "sentry_projectdsymfile" u0 WHERE ( u0."uuid" = dfb8e43a-f242-3d73-a453-aeb6a777ef75 AND u0."project_id" = 4556436849360897) limit 1))) ```
1 parent 10ac3b3 commit 57aeffa

File tree

4 files changed

+68
-23
lines changed

4 files changed

+68
-23
lines changed

src/sentry/api/endpoints/debug_files.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import jsonschema
88
import orjson
99
from django.db import IntegrityError, router
10-
from django.db.models import Q
10+
from django.db.models import Exists, Q
1111
from django.http import Http404, HttpResponse, StreamingHttpResponse
1212
from rest_framework import status
1313
from rest_framework.request import Request
@@ -265,20 +265,37 @@ def get(self, request: Request, project: Project) -> Response:
265265
except SymbolicError:
266266
pass
267267

268+
q = Q(project_id=project.id)
269+
if file_formats:
270+
file_format_q = Q()
271+
for file_format in file_formats:
272+
known_file_format = DIF_MIMETYPES.get(file_format)
273+
if known_file_format:
274+
file_format_q |= Q(file__headers__icontains=known_file_format)
275+
q &= file_format_q
276+
277+
queryset = None
268278
if debug_id and code_id:
269279
# Be lenient when searching for debug files, check either for a matching debug id
270-
# or a matching code id, as both of these are unique identifiers for an object.
280+
# or a matching code id. We only fallback to code id if there is no debug id match.
281+
# While both identifiers should be unique, in practice they are not and the debug id
282+
# yields better results.
271283
#
272284
# Ideally debug- and code-id yield the same files, but especially on Windows it is possible
273285
# that the debug id does not perfectly match due to 'age' differences, but the code-id
274286
# will match.
275-
q = Q(debug_id__exact=debug_id) | Q(code_id__exact=code_id)
287+
debug_id_qs = ProjectDebugFile.objects.filter(Q(debug_id__exact=debug_id) & q)
288+
queryset = debug_id_qs.select_related("file").union(
289+
ProjectDebugFile.objects.filter(Q(code_id__exact=code_id) & q)
290+
# Only return any code id matches if there are *no* debug id matches.
291+
.filter(~Exists(debug_id_qs)).select_related("file")
292+
)
276293
elif debug_id:
277-
q = Q(debug_id__exact=debug_id)
294+
q &= Q(debug_id__exact=debug_id)
278295
elif code_id:
279-
q = Q(code_id__exact=code_id)
296+
q &= Q(code_id__exact=code_id)
280297
elif query:
281-
q = (
298+
query_q = (
282299
Q(object_name__icontains=query)
283300
| Q(debug_id__icontains=query)
284301
| Q(code_id__icontains=query)
@@ -288,25 +305,17 @@ def get(self, request: Request, project: Project) -> Response:
288305

289306
known_file_format = DIF_MIMETYPES.get(query)
290307
if known_file_format:
291-
q |= Q(file__headers__icontains=known_file_format)
292-
else:
293-
q = Q()
308+
query_q |= Q(file__headers__icontains=known_file_format)
294309

295-
if file_formats:
296-
file_format_q = Q()
297-
for file_format in file_formats:
298-
known_file_format = DIF_MIMETYPES.get(file_format)
299-
if known_file_format:
300-
file_format_q |= Q(file__headers__icontains=known_file_format)
301-
q &= file_format_q
310+
q &= query_q
302311

303-
q &= Q(project_id=project.id)
304-
queryset = ProjectDebugFile.objects.filter(q).select_related("file")
312+
if queryset is None:
313+
queryset = ProjectDebugFile.objects.filter(q).select_related("file")
305314

306315
def on_results(difs: Sequence[ProjectDebugFile]):
307316
# NOTE: we are only refreshing files if there is direct query for specific files
308317
if debug_id and not query and not file_formats:
309-
maybe_renew_debug_files(q, difs)
318+
maybe_renew_debug_files(difs)
310319

311320
return serialize(difs, request.user)
312321

src/sentry/debug_files/debug_files.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from datetime import timedelta
55

66
from django.db import router
7-
from django.db.models import Q
87
from django.utils import timezone
98

109
from sentry.models.debugfile import ProjectDebugFile
@@ -16,7 +15,7 @@
1615
AVAILABLE_FOR_RENEWAL_DAYS = 30
1716

1817

19-
def maybe_renew_debug_files(query: Q, debug_files: Sequence[ProjectDebugFile]):
18+
def maybe_renew_debug_files(debug_files: Sequence[ProjectDebugFile]):
2019
# We take a snapshot in time that MUST be consistent across all updates.
2120
now = timezone.now()
2221
# We compute the threshold used to determine whether we want to renew the specific bundle.
@@ -27,10 +26,11 @@ def maybe_renew_debug_files(query: Q, debug_files: Sequence[ProjectDebugFile]):
2726
if not needs_bump:
2827
return
2928

29+
ids = [dif.id for dif in debug_files]
3030
with metrics.timer("debug_files_renewal"):
3131
with atomic_transaction(using=(router.db_for_write(ProjectDebugFile),)):
3232
updated_rows_count = ProjectDebugFile.objects.filter(
33-
query, date_accessed__lte=threshold_date
33+
id__in=ids, date_accessed__lte=threshold_date
3434
).update(date_accessed=now)
3535
if updated_rows_count > 0:
3636
metrics.incr("debug_files_renewal.were_renewed", updated_rows_count)

src/sentry/models/debugfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def find_by_debug_ids(
8787
# because otherwise this would be a circular import:
8888
from sentry.debug_files.debug_files import maybe_renew_debug_files
8989

90-
maybe_renew_debug_files(query, difs)
90+
maybe_renew_debug_files(difs)
9191

9292
difs_by_id: dict[str, list[ProjectDebugFile]] = {}
9393
for dif in difs:

tests/sentry/api/endpoints/test_debug_files.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,42 @@ def test_dsyms_search(self):
8585
dsyms = response.data
8686
assert len(dsyms) == 20
8787

88+
def test_dsyms_debugid_codeid_full_match(self):
89+
self._do_test_dsyms_by_debugid_and_codeid(
90+
("dfb8e43a-f242-3d73-a453-aeb6a777ef75", "ae0459704fc7256"),
91+
[(True, "dfb8e43a-f242-3d73-a453-aeb6a777ef75", "ae0459704fc7256")],
92+
)
93+
94+
def test_dsyms_debugid_codeid_full_match_and_partials(self):
95+
self._do_test_dsyms_by_debugid_and_codeid(
96+
("dfb8e43a-f242-3d73-a453-aeb6a777ef75", "ae0459704fc7256"),
97+
[
98+
(True, "dfb8e43a-f242-3d73-a453-aeb6a777ef75", "ae0459704fc7256"),
99+
(False, "00000000-000000000-0000-000000000000", "ae0459704fc7256"),
100+
(True, "dfb8e43a-f242-3d73-a453-aeb6a777ef75", "000000000000000"),
101+
],
102+
)
103+
104+
def test_dsyms_debugid_codeid_only_codeid(self):
105+
self._do_test_dsyms_by_debugid_and_codeid(
106+
("22222222-000000000-0000-000000000000", "ae0459704fc7256"),
107+
[
108+
(True, "10000000-000000000-0000-000000000000", "ae0459704fc7256"),
109+
(True, "00000000-000000000-0000-000000000000", "ae0459704fc7256"),
110+
(False, "dfb8e43a-f242-3d73-a453-aeb6a777ef75", "000000000000000"),
111+
],
112+
)
113+
114+
def _do_test_dsyms_by_debugid_and_codeid(self, query, files):
115+
for _, debug_id, code_id in files:
116+
self.create_dif_file(debug_id=debug_id, code_id=code_id)
117+
118+
response = self.client.get(f"{self.url}?debug_id={query[0]}&code_id={query[1]}")
119+
assert response.status_code == 200, response.content
120+
121+
actual = sorted((dsym["debugId"], dsym["codeId"]) for dsym in response.data)
122+
assert actual == sorted((debug_id, code_id) for (exp, debug_id, code_id) in files if exp)
123+
88124
def test_access_control(self):
89125
# create a debug files such as proguard:
90126
response = self._upload_proguard(self.url, PROGUARD_UUID)

0 commit comments

Comments
 (0)