Skip to content

Make ReleaseFile TTI by tracking its last access time #95867

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion migrations_lockfile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ preprod: 0012_installablepreprod

replays: 0006_add_bulk_delete_job

sentry: 0952_fix_span_item_event_type_alerts
sentry: 0953_make_releasefiles_tti

social_auth: 0003_social_auth_json_field

Expand Down
13 changes: 9 additions & 4 deletions src/sentry/api/endpoints/artifact_lookup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import logging
from collections.abc import Iterable
from typing import NotRequired, TypedDict

from django.db.models.query import QuerySet
Expand All @@ -23,6 +22,7 @@
MAX_BUNDLES_QUERY,
query_artifact_bundles_containing_file,
)
from sentry.debug_files.release_files import maybe_renew_releasefiles, renew_releasefiles_by_id
from sentry.lang.native.sources import get_internal_artifact_lookup_source_url
from sentry.models.artifactbundle import NULL_STRING, ArtifactBundle
from sentry.models.distribution import Distribution
Expand Down Expand Up @@ -160,14 +160,19 @@ def get(self, request: Request, project: Project) -> Response:

# If no `ArtifactBundle`s were found matching the file, we fall back to
# looking up the file using the legacy `ReleaseFile` infrastructure.
individual_files: Iterable[ReleaseFile] = []
individual_files: list[ReleaseFile] = []
if not artifact_bundles:
release, dist = try_resolve_release_dist(project, release_name, dist_name)
if release:
metrics.incr("sourcemaps.lookup.release_file")
for releasefile_id in get_legacy_release_bundles(release, dist):
releasefile_ids = list(get_legacy_release_bundles(release, dist))
for releasefile_id in releasefile_ids:
all_bundles[f"release_file/{releasefile_id}"] = "release-old"
individual_files = get_legacy_releasefile_by_file_url(release, dist, url)
individual_files = list(get_legacy_releasefile_by_file_url(release, dist, url))

maybe_renew_releasefiles(individual_files)
if releasefile_ids:
renew_releasefiles_by_id(releasefile_ids)

# Then: Construct our response
url_constructor = UrlConstructor(request, project)
Expand Down
5 changes: 4 additions & 1 deletion src/sentry/api/endpoints/project_release_file_details.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from sentry.api.exceptions import ResourceDoesNotExist
from sentry.api.serializers import serialize
from sentry.api.serializers.models.release_file import decode_release_file_id
from sentry.debug_files.release_files import maybe_renew_releasefiles
from sentry.models.distribution import Distribution
from sentry.models.release import Release
from sentry.models.releasefile import ReleaseFile, delete_from_artifact_index, read_artifact_index
Expand Down Expand Up @@ -114,7 +115,9 @@ def _get_releasefile(release: Release, file_id: str, index_op=_get_from_index):
raise ResourceDoesNotExist
if isinstance(id, int):
try:
return ReleaseFile.public_objects.get(release_id=release.id, id=file_id)
releasefile = ReleaseFile.public_objects.get(release_id=release.id, id=file_id)
maybe_renew_releasefiles([releasefile])
return releasefile
except ReleaseFile.DoesNotExist:
raise ResourceDoesNotExist
else:
Expand Down
8 changes: 6 additions & 2 deletions src/sentry/api/endpoints/project_release_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from sentry.api.paginator import ChainPaginator
from sentry.api.serializers import serialize
from sentry.constants import MAX_RELEASE_FILES_OFFSET
from sentry.debug_files.release_files import maybe_renew_releasefiles
from sentry.models.distribution import Distribution
from sentry.models.files.file import File
from sentry.models.release import Release
Expand Down Expand Up @@ -93,8 +94,11 @@ def get_releasefiles(self, request: Request, release, organization_id):
source = ArtifactSource(dist, files, query, checksums)
data_sources.append(source)

def on_results(r):
return serialize(load_dist(r), request.user)
def on_results(release_files: list[ReleaseFile]):
# this should filter out all the "pseudo-ReleaseFile"s
maybe_renew_releasefiles([rf for rf in release_files if rf.id])

return serialize(load_dist(release_files), request.user)

# NOTE: Returned release files are ordered by name within their block,
# (i.e. per index file), but not overall
Expand Down
54 changes: 32 additions & 22 deletions src/sentry/api/endpoints/source_map_debug_blue_thunder_edition.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import Literal, TypedDict

import sentry_sdk
from django.db.models import QuerySet
from django.utils.encoding import force_bytes, force_str
from drf_spectacular.utils import extend_schema
from packaging.version import Version
Expand All @@ -17,6 +16,7 @@
from sentry.apidocs.constants import RESPONSE_FORBIDDEN, RESPONSE_NOT_FOUND, RESPONSE_UNAUTHORIZED
from sentry.apidocs.parameters import EventParams, GlobalParams
from sentry.apidocs.utils import inline_sentry_response_serializer
from sentry.debug_files.release_files import maybe_renew_releasefiles
from sentry.models.artifactbundle import (
ArtifactBundle,
ArtifactBundleArchive,
Expand Down Expand Up @@ -326,7 +326,7 @@ def __init__(self, abs_path: str, project: Project, release: Release, event):
self.matching_source_map_name: str | None = None

# Cached db objects across operations
self.artifact_index_release_files: QuerySet | list[ReleaseFile] | None = None
self.artifact_index_release_files: list[ReleaseFile] | None = None
self.dist_matched_artifact_index_release_file: ReleaseFile | None = None

self._find_source_file_in_basic_uploaded_files()
Expand Down Expand Up @@ -365,15 +365,18 @@ def _find_source_file_in_basic_uploaded_files(self) -> None:
if self.source_file_lookup_result == "found":
return

basic_release_source_files = ReleaseFile.objects.filter(
organization_id=self.project.organization_id,
release_id=self.release.id,
name__in=self.matching_source_file_names,
artifact_count=1, # Filter for un-zipped files
).select_related("file")
basic_release_source_files = list(
ReleaseFile.objects.filter(
organization_id=self.project.organization_id,
release_id=self.release.id,
name__in=self.matching_source_file_names,
artifact_count=1, # Filter for un-zipped files
).select_related("file")
)

if len(basic_release_source_files) > 0:
self.source_file_lookup_result = "wrong-dist"
maybe_renew_releasefiles(basic_release_source_files)

for possible_release_file in basic_release_source_files:
# Chck if dist matches
Expand Down Expand Up @@ -427,6 +430,7 @@ def _find_source_file_in_artifact_indexes(self):
file__type="release.bundle",
ident=archive_ident,
)
maybe_renew_releasefiles([archive_file])
with ReleaseArchive(archive_file.file.getfile()) as archive:
source_file, headers = archive.get_file_by_url(
self.found_source_file_name
Expand Down Expand Up @@ -506,15 +510,18 @@ def _find_source_map_in_basic_uploaded_files(self, matching_source_map_name: str
if self.source_map_lookup_result == "found":
return

basic_release_source_map_files = ReleaseFile.objects.filter(
organization_id=self.project.organization_id,
release_id=self.release.id,
name=matching_source_map_name,
artifact_count=1, # Filter for un-zipped files
).select_related("file")
basic_release_source_map_files = list(
ReleaseFile.objects.filter(
organization_id=self.project.organization_id,
release_id=self.release.id,
name=matching_source_map_name,
artifact_count=1, # Filter for un-zipped files
).select_related("file")
)

if len(basic_release_source_map_files) > 0:
self.source_map_lookup_result = "wrong-dist"
maybe_renew_releasefiles(basic_release_source_map_files)
for basic_release_source_map_file in basic_release_source_map_files:
if basic_release_source_map_file.ident == ReleaseFile.get_ident(
basic_release_source_map_file.name, self.event.dist
Expand Down Expand Up @@ -561,18 +568,19 @@ def _find_source_map_in_artifact_bundles(self, matching_source_map_name: str):
self.source_map_lookup_result = "found"
return

def _get_artifact_index_release_files(self):
def _get_artifact_index_release_files(self) -> list[ReleaseFile]:
# Cache result
if self.artifact_index_release_files is not None:
return self.artifact_index_release_files

self.artifact_index_release_files = ReleaseFile.objects.filter(
organization_id=self.project.organization_id,
release_id=self.release.id,
file__type="release.artifact-index",
).select_related("file")[
:ARTIFACT_INDEX_LOOKUP_LIMIT
] # limit by something sane in case people have a large number of dists for the same release
self.artifact_index_release_files = list(
ReleaseFile.objects.filter(
organization_id=self.project.organization_id,
release_id=self.release.id,
file__type="release.artifact-index",
).select_related("file")[:ARTIFACT_INDEX_LOOKUP_LIMIT]
) # limit by something sane in case people have a large number of dists for the same release
maybe_renew_releasefiles(self.artifact_index_release_files)

return self.artifact_index_release_files

Expand All @@ -591,6 +599,8 @@ def _get_dist_matched_artifact_index_release_file(self):
.select_related("file")
.first()
)
if self.dist_matched_artifact_index_release_file:
maybe_renew_releasefiles([self.dist_matched_artifact_index_release_file])

return self.dist_matched_artifact_index_release_file

Expand Down
2 changes: 2 additions & 0 deletions src/sentry/api/helpers/source_map_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from sentry import eventstore
from sentry.api.endpoints.project_release_files import ArtifactSource
from sentry.debug_files.release_files import maybe_renew_releasefiles
from sentry.eventstore.models import BaseEvent
from sentry.interfaces.exception import Exception as ExceptionInterface
from sentry.interfaces.stacktrace import Frame
Expand Down Expand Up @@ -293,6 +294,7 @@ def _get_releasefiles(release: Release, organization_id: int) -> list[ReleaseFil
file_list = file_list.select_related("file").order_by("name")

data_sources.extend(list(file_list.order_by("name")))
maybe_renew_releasefiles(data_sources)

dists = Distribution.objects.filter(organization_id=organization_id, release=release)
for dist in list(dists) + [None]:
Expand Down
40 changes: 40 additions & 0 deletions src/sentry/debug_files/release_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from __future__ import annotations

from datetime import timedelta

from django.db import router
from django.utils import timezone

from sentry.models.releasefile import ReleaseFile
from sentry.utils import metrics
from sentry.utils.db import atomic_transaction

# Number of days that determine whether a release file is ready for being renewed.
AVAILABLE_FOR_RENEWAL_DAYS = 30


def maybe_renew_releasefiles(releasefiles: list[ReleaseFile]):
# We take a snapshot in time that MUST be consistent across all updates.
now = timezone.now()
# We compute the threshold used to determine whether we want to renew the specific bundle.
threshold_date = now - timedelta(days=AVAILABLE_FOR_RENEWAL_DAYS)

# We first check if any file needs renewal, before going to the database.
needs_bump = [rf.id for rf in releasefiles if rf.date_accessed <= threshold_date]
if not needs_bump:
return

renew_releasefiles_by_id(needs_bump)


def renew_releasefiles_by_id(releasefile_ids: list[int]):
now = timezone.now()
threshold_date = now - timedelta(days=AVAILABLE_FOR_RENEWAL_DAYS)

with metrics.timer("release_files_renewal"):
with atomic_transaction(using=(router.db_for_write(ReleaseFile),)):
updated_rows_count = ReleaseFile.objects.filter(
id__in=releasefile_ids, date_accessed__lte=threshold_date
).update(date_accessed=now)
if updated_rows_count > 0:
metrics.incr("release_files_renewal.were_renewed", updated_rows_count)
38 changes: 38 additions & 0 deletions src/sentry/migrations/0953_make_releasefiles_tti.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 5.2.1 on 2025-07-18 10:28

import django.db.models.functions.datetime
import django.utils.timezone
from django.db import migrations, models

from sentry.new_migrations.migrations import CheckedMigration


class Migration(CheckedMigration):
# This flag is used to mark that a migration shouldn't be automatically run in production.
# This should only be used for operations where it's safe to run the migration after your
# code has deployed. So this should not be used for most operations that alter the schema
# of a table.
# Here are some things that make sense to mark as post deployment:
# - Large data migrations. Typically we want these to be run manually so that they can be
# monitored and not block the deploy for a long period of time while they run.
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
# run this outside deployments so that we don't block them. Note that while adding an index
# is a schema change, it's completely safe to run the operation after the code has deployed.
# Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment

is_post_deployment = False

dependencies = [
("sentry", "0952_fix_span_item_event_type_alerts"),
]

operations = [
migrations.AddField(
model_name="releasefile",
name="date_accessed",
field=models.DateTimeField(
db_default=django.db.models.functions.datetime.Now(),
default=django.utils.timezone.now,
),
),
]
5 changes: 5 additions & 0 deletions src/sentry/models/releasefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

import sentry_sdk
from django.db import models, router
from django.db.models.functions import Now
from django.utils import timezone

from sentry.backup.scopes import RelocationScope
from sentry.db.models import (
Expand Down Expand Up @@ -74,6 +76,8 @@ class ReleaseFile(Model):
name = models.TextField()
dist_id = BoundedBigIntegerField(null=True, db_index=True)

date_accessed = models.DateTimeField(default=timezone.now, db_default=Now())

#: For classic file uploads, this field is 1.
#: For release archives, this field is 0.
#: For artifact indexes, this field is the number of artifacts contained
Expand Down Expand Up @@ -111,6 +115,7 @@ def update(self, *args, **kwargs):
0
]
kwargs["ident"] = self.ident = type(self).get_ident(kwargs["name"], dist_name)
kwargs["date_accessed"] = timezone.now()
return super().update(*args, **kwargs)

@classmethod
Expand Down
44 changes: 44 additions & 0 deletions tests/sentry/api/endpoints/test_project_artifact_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,50 @@ def test_renewal_with_url(self):
== expected_date_added
)

def test_renewal_of_releasefiles(self):
old_timestamp = datetime.now(tz=timezone.utc) - timedelta(days=45)

file_headers = {"Sourcemap": "application.js.map"}
file = make_file("application.js", b"wat", "release.file", file_headers)
releasefile = ReleaseFile.objects.create(
organization_id=self.project.organization_id,
release_id=self.release.id,
file=file,
name="http://example.com/application.js",
date_accessed=old_timestamp,
)

archive1, archive1_file = self.create_archive(
fields={},
files={
"foo": "foo1",
"bar": "bar1",
},
)
archive1.date_accessed = old_timestamp
archive1.save()

self.login_as(user=self.user)

url = reverse(
"sentry-api-0-project-artifact-lookup",
kwargs={
"organization_id_or_slug": self.project.organization.slug,
"project_id_or_slug": self.project.slug,
},
)

response = self.client.get(
f"{url}?release={self.release.version}&url=application.js"
).json()

# the lookup finds both, as the bundle is resolved only by the release
assert len(response) == 2
assert response[0]["type"] == "file"
assert response[1]["type"] == "bundle"
assert ReleaseFile.objects.get(id=releasefile.id).date_accessed > old_timestamp
assert ReleaseFile.objects.get(id=archive1.id).date_accessed > old_timestamp

def test_access_control(self):
# release file
file_a = make_file("application.js", b"wat", "release.file", {})
Expand Down
Loading