diff --git a/src/sentry/api/endpoints/debug_files.py b/src/sentry/api/endpoints/debug_files.py index 20061399166c8d..6eef6cb710f21b 100644 --- a/src/sentry/api/endpoints/debug_files.py +++ b/src/sentry/api/endpoints/debug_files.py @@ -7,7 +7,7 @@ import jsonschema import orjson from django.db import IntegrityError, router -from django.db.models import Q +from django.db.models import Exists, Q from django.http import Http404, HttpResponse, StreamingHttpResponse from rest_framework import status from rest_framework.request import Request @@ -265,20 +265,37 @@ def get(self, request: Request, project: Project) -> Response: except SymbolicError: pass + q = Q(project_id=project.id) + if file_formats: + file_format_q = Q() + for file_format in file_formats: + known_file_format = DIF_MIMETYPES.get(file_format) + if known_file_format: + file_format_q |= Q(file__headers__icontains=known_file_format) + q &= file_format_q + + queryset = None if debug_id and code_id: # Be lenient when searching for debug files, check either for a matching debug id - # or a matching code id, as both of these are unique identifiers for an object. + # or a matching code id. We only fallback to code id if there is no debug id match. + # While both identifiers should be unique, in practice they are not and the debug id + # yields better results. # # Ideally debug- and code-id yield the same files, but especially on Windows it is possible # that the debug id does not perfectly match due to 'age' differences, but the code-id # will match. - q = Q(debug_id__exact=debug_id) | Q(code_id__exact=code_id) + debug_id_qs = ProjectDebugFile.objects.filter(Q(debug_id__exact=debug_id) & q) + queryset = debug_id_qs.select_related("file").union( + ProjectDebugFile.objects.filter(Q(code_id__exact=code_id) & q) + # Only return any code id matches if there are *no* debug id matches. + .filter(~Exists(debug_id_qs)).select_related("file") + ) elif debug_id: - q = Q(debug_id__exact=debug_id) + q &= Q(debug_id__exact=debug_id) elif code_id: - q = Q(code_id__exact=code_id) + q &= Q(code_id__exact=code_id) elif query: - q = ( + query_q = ( Q(object_name__icontains=query) | Q(debug_id__icontains=query) | Q(code_id__icontains=query) @@ -288,25 +305,17 @@ def get(self, request: Request, project: Project) -> Response: known_file_format = DIF_MIMETYPES.get(query) if known_file_format: - q |= Q(file__headers__icontains=known_file_format) - else: - q = Q() + query_q |= Q(file__headers__icontains=known_file_format) - if file_formats: - file_format_q = Q() - for file_format in file_formats: - known_file_format = DIF_MIMETYPES.get(file_format) - if known_file_format: - file_format_q |= Q(file__headers__icontains=known_file_format) - q &= file_format_q + q &= query_q - q &= Q(project_id=project.id) - queryset = ProjectDebugFile.objects.filter(q).select_related("file") + if queryset is None: + queryset = ProjectDebugFile.objects.filter(q).select_related("file") def on_results(difs: Sequence[ProjectDebugFile]): # NOTE: we are only refreshing files if there is direct query for specific files if debug_id and not query and not file_formats: - maybe_renew_debug_files(q, difs) + maybe_renew_debug_files(difs) return serialize(difs, request.user) diff --git a/src/sentry/debug_files/debug_files.py b/src/sentry/debug_files/debug_files.py index 002cd4b98e4fb8..9302a932ed2eda 100644 --- a/src/sentry/debug_files/debug_files.py +++ b/src/sentry/debug_files/debug_files.py @@ -4,7 +4,6 @@ from datetime import timedelta from django.db import router -from django.db.models import Q from django.utils import timezone from sentry.models.debugfile import ProjectDebugFile @@ -16,7 +15,7 @@ AVAILABLE_FOR_RENEWAL_DAYS = 30 -def maybe_renew_debug_files(query: Q, debug_files: Sequence[ProjectDebugFile]): +def maybe_renew_debug_files(debug_files: Sequence[ProjectDebugFile]): # We take a snapshot in time that MUST be consistent across all updates. now = timezone.now() # We compute the threshold used to determine whether we want to renew the specific bundle. @@ -27,10 +26,11 @@ def maybe_renew_debug_files(query: Q, debug_files: Sequence[ProjectDebugFile]): if not needs_bump: return + ids = [dif.id for dif in debug_files] with metrics.timer("debug_files_renewal"): with atomic_transaction(using=(router.db_for_write(ProjectDebugFile),)): updated_rows_count = ProjectDebugFile.objects.filter( - query, date_accessed__lte=threshold_date + id__in=ids, date_accessed__lte=threshold_date ).update(date_accessed=now) if updated_rows_count > 0: metrics.incr("debug_files_renewal.were_renewed", updated_rows_count) diff --git a/src/sentry/models/debugfile.py b/src/sentry/models/debugfile.py index 8b065b3bd5d16b..1bf83a30944962 100644 --- a/src/sentry/models/debugfile.py +++ b/src/sentry/models/debugfile.py @@ -87,7 +87,7 @@ def find_by_debug_ids( # because otherwise this would be a circular import: from sentry.debug_files.debug_files import maybe_renew_debug_files - maybe_renew_debug_files(query, difs) + maybe_renew_debug_files(difs) difs_by_id: dict[str, list[ProjectDebugFile]] = {} for dif in difs: diff --git a/tests/sentry/api/endpoints/test_debug_files.py b/tests/sentry/api/endpoints/test_debug_files.py index dd8ce9fef777a1..c38581999512a0 100644 --- a/tests/sentry/api/endpoints/test_debug_files.py +++ b/tests/sentry/api/endpoints/test_debug_files.py @@ -85,6 +85,42 @@ def test_dsyms_search(self): dsyms = response.data assert len(dsyms) == 20 + def test_dsyms_debugid_codeid_full_match(self): + self._do_test_dsyms_by_debugid_and_codeid( + ("dfb8e43a-f242-3d73-a453-aeb6a777ef75", "ae0459704fc7256"), + [(True, "dfb8e43a-f242-3d73-a453-aeb6a777ef75", "ae0459704fc7256")], + ) + + def test_dsyms_debugid_codeid_full_match_and_partials(self): + self._do_test_dsyms_by_debugid_and_codeid( + ("dfb8e43a-f242-3d73-a453-aeb6a777ef75", "ae0459704fc7256"), + [ + (True, "dfb8e43a-f242-3d73-a453-aeb6a777ef75", "ae0459704fc7256"), + (False, "00000000-000000000-0000-000000000000", "ae0459704fc7256"), + (True, "dfb8e43a-f242-3d73-a453-aeb6a777ef75", "000000000000000"), + ], + ) + + def test_dsyms_debugid_codeid_only_codeid(self): + self._do_test_dsyms_by_debugid_and_codeid( + ("22222222-000000000-0000-000000000000", "ae0459704fc7256"), + [ + (True, "10000000-000000000-0000-000000000000", "ae0459704fc7256"), + (True, "00000000-000000000-0000-000000000000", "ae0459704fc7256"), + (False, "dfb8e43a-f242-3d73-a453-aeb6a777ef75", "000000000000000"), + ], + ) + + def _do_test_dsyms_by_debugid_and_codeid(self, query, files): + for _, debug_id, code_id in files: + self.create_dif_file(debug_id=debug_id, code_id=code_id) + + response = self.client.get(f"{self.url}?debug_id={query[0]}&code_id={query[1]}") + assert response.status_code == 200, response.content + + actual = sorted((dsym["debugId"], dsym["codeId"]) for dsym in response.data) + assert actual == sorted((debug_id, code_id) for (exp, debug_id, code_id) in files if exp) + def test_access_control(self): # create a debug files such as proguard: response = self._upload_proguard(self.url, PROGUARD_UUID)