@@ -33,6 +33,17 @@ def get_project(value: str) -> int | None:
33
33
return None
34
34
35
35
36
+ def get_organization (value : str ) -> int | None :
37
+ from sentry .models .organization import Organization
38
+
39
+ try :
40
+ if value .isdigit ():
41
+ return int (value )
42
+ return Organization .objects .get (slug = value ).id
43
+ except Organization .DoesNotExist :
44
+ return None
45
+
46
+
36
47
# We need a unique value to indicate when to stop multiprocessing queue
37
48
# an identity on an object() isn't guaranteed to work between parent
38
49
# and child proc
@@ -104,6 +115,7 @@ def multiprocess_worker(task_queue: _WorkQueue) -> None:
104
115
@click .command ()
105
116
@click .option ("--days" , default = 30 , show_default = True , help = "Numbers of days to truncate on." )
106
117
@click .option ("--project" , help = "Limit truncation to only entries from project." )
118
+ @click .option ("--organization" , help = "Limit truncation to only entries from organization." )
107
119
@click .option (
108
120
"--concurrency" ,
109
121
type = int ,
@@ -127,6 +139,7 @@ def multiprocess_worker(task_queue: _WorkQueue) -> None:
127
139
def cleanup (
128
140
days : int ,
129
141
project : str | None ,
142
+ organization : str | None ,
130
143
concurrency : int ,
131
144
silent : bool ,
132
145
model : tuple [str , ...],
@@ -137,9 +150,9 @@ def cleanup(
137
150
138
151
All data that is older than `--days` will be deleted. The default for
139
152
this is 30 days. In the default setting all projects will be truncated
140
- but if you have a specific project you want to limit this to this can be
141
- done with the `--project` flag which accepts a project ID or a string
142
- with the form `org/project` where both are slugs.
153
+ but if you have a specific project or organization you want to limit this to,
154
+ this can be done with the `--project` or `--organization` flags respectively,
155
+ which accepts a project/organization ID or a string with the form `org/project` where both are slugs.
143
156
"""
144
157
if concurrency < 1 :
145
158
click .echo ("Error: Minimum concurrency is 1" , err = True )
@@ -209,10 +222,13 @@ def is_filtered(model: type[Model]) -> bool:
209
222
exported_data (is_filtered , silent )
210
223
211
224
project_id = None
225
+ organization_id = None
212
226
if SiloMode .get_current_mode () != SiloMode .CONTROL :
213
227
if project :
214
228
remove_cross_project_models (deletes )
215
229
project_id = get_project_id_or_fail (project )
230
+ elif organization :
231
+ organization_id = get_organization_id_or_fail (organization )
216
232
else :
217
233
remove_old_nodestore_values (days )
218
234
@@ -240,6 +256,34 @@ def is_filtered(model: type[Model]) -> bool:
240
256
241
257
task_queue .join ()
242
258
259
+ organization_deletion_query , to_delete_by_organization = prepare_deletes_by_organization (
260
+ organization , organization_id , is_filtered
261
+ )
262
+
263
+ if organization_deletion_query is not None and len (to_delete_by_organization ):
264
+ debug_output ("Running bulk deletes in DELETES_BY_ORGANIZATION" )
265
+ for organization_id_for_deletion in RangeQuerySetWrapper (
266
+ organization_deletion_query .values_list ("id" , flat = True ),
267
+ result_value_getter = lambda item : item ,
268
+ ):
269
+ for model_tp , dtfield , order_by in to_delete_by_organization :
270
+ debug_output (
271
+ f"Removing { model_tp .__name__ } for days={ days } organization={ organization_id_for_deletion } "
272
+ )
273
+
274
+ imp = "." .join ((model_tp .__module__ , model_tp .__name__ ))
275
+
276
+ q = BulkDeleteQuery (
277
+ model = model_tp ,
278
+ dtfield = dtfield ,
279
+ days = days ,
280
+ organization_id = organization_id_for_deletion ,
281
+ order_by = order_by ,
282
+ )
283
+
284
+ for chunk in q .iterator (chunk_size = 100 ):
285
+ task_queue .put ((imp , chunk ))
286
+
243
287
project_deletion_query , to_delete_by_project = prepare_deletes_by_project (
244
288
project , project_id , is_filtered
245
289
)
@@ -357,6 +401,7 @@ def models_which_use_deletions_code_path() -> list[tuple[type[Model], str, str]]
357
401
from sentry .models .artifactbundle import ArtifactBundle
358
402
from sentry .models .eventattachment import EventAttachment
359
403
from sentry .models .grouprulestatus import GroupRuleStatus
404
+ from sentry .models .releasefile import ReleaseFile
360
405
from sentry .models .rulefirehistory import RuleFireHistory
361
406
from sentry .monitors .models import MonitorCheckIn
362
407
from sentry .replays .models import ReplayRecordingSegment
@@ -370,16 +415,19 @@ def models_which_use_deletions_code_path() -> list[tuple[type[Model], str, str]]
370
415
(MonitorCheckIn , "date_added" , "date_added" ),
371
416
(GroupRuleStatus , "date_added" , "date_added" ),
372
417
(RuleFireHistory , "date_added" , "date_added" ),
418
+ (ReleaseFile , "date_accessed" , "date_accessed" ),
373
419
]
374
420
375
421
376
422
def remove_cross_project_models (
377
- deletes : list [tuple [type [Model ], str , str ]]
423
+ deletes : list [tuple [type [Model ], str , str ]],
378
424
) -> list [tuple [type [Model ], str , str ]]:
379
425
from sentry .models .artifactbundle import ArtifactBundle
426
+ from sentry .models .releasefile import ReleaseFile
380
427
381
428
# These models span across projects, so let's skip them
382
429
deletes .remove ((ArtifactBundle , "date_added" , "date_added" ))
430
+ deletes .remove ((ReleaseFile , "date_accessed" , "date_accessed" ))
383
431
return deletes
384
432
385
433
@@ -392,6 +440,15 @@ def get_project_id_or_fail(project: str) -> int:
392
440
return project_id
393
441
394
442
443
+ def get_organization_id_or_fail (organization : str ) -> int :
444
+ click .echo ("Bulk NodeStore deletion not available for organization selection" , err = True )
445
+ organization_id = get_organization (organization )
446
+ if organization_id is None :
447
+ click .echo ("Error: Organization not found" , err = True )
448
+ raise click .Abort ()
449
+ return organization_id
450
+
451
+
395
452
def remove_old_nodestore_values (days : int ) -> None :
396
453
from sentry import nodestore
397
454
@@ -486,6 +543,37 @@ def prepare_deletes_by_project(
486
543
return project_deletion_query , to_delete_by_project
487
544
488
545
546
+ def prepare_deletes_by_organization (
547
+ organization : str | None ,
548
+ organization_id : int | None ,
549
+ is_filtered : Callable [[type [Model ]], bool ],
550
+ ) -> tuple [QuerySet [Any ] | None , list [tuple [Any , str , str ]]]:
551
+ from sentry .constants import ObjectStatus
552
+ from sentry .models .organization import Organization
553
+ from sentry .models .releasefile import ReleaseFile
554
+
555
+ # Deletions that we run per organization. In some cases we can't use an index on just the date
556
+ # column, so as an alternative we use `(organization_id, <date_col>)` instead
557
+ DELETES_BY_ORGANIZATION = [
558
+ (ReleaseFile , "date_accessed" , "date_accessed" ),
559
+ ]
560
+ organization_deletion_query = None
561
+ to_delete_by_organization = []
562
+ if SiloMode .get_current_mode () != SiloMode .CONTROL :
563
+ debug_output ("Preparing DELETES_BY_ORGANIZATION context" )
564
+ organization_deletion_query = Organization .objects .filter (status = ObjectStatus .ACTIVE )
565
+ if organization :
566
+ organization_deletion_query = Organization .objects .filter (id = organization_id )
567
+
568
+ for model_tp_tup in DELETES_BY_ORGANIZATION :
569
+ if is_filtered (model_tp_tup [0 ]):
570
+ debug_output (f">> Skipping { model_tp_tup [0 ].__name__ } " )
571
+ else :
572
+ to_delete_by_organization .append (model_tp_tup )
573
+
574
+ return organization_deletion_query , to_delete_by_organization
575
+
576
+
489
577
def remove_file_blobs (is_filtered : Callable [[type [Model ]], bool ], silent : bool ) -> None :
490
578
from sentry .models .file import FileBlob
491
579
0 commit comments