2121import threading
2222import time
2323from typing import Optional , Protocol , Sequence
24+ from urllib import parse
2425
2526from absl import logging
2627from etils import epath
3132from orbax .checkpoint ._src .path import step as step_lib
3233
3334
35+ urlparse = parse .urlparse
3436PurePosixPath = pathlib .PurePosixPath
3537
3638_THREADED_DELETE_DURATION = (
@@ -183,7 +185,8 @@ def delete(self, step: int) -> None:
183185 # Attempt to rename using GCS HNS API if configured.
184186 if self ._todelete_full_path is not None :
185187 if gcs_utils .is_gcs_path (self ._directory ):
186- self ._rename_gcs_step_with_hns (step , delete_target )
188+ # This is recommended for GCS buckets with HNS enabled.
189+ self ._gcs_rename_step (step , delete_target )
187190 else :
188191 raise NotImplementedError ()
189192 # Attempt to rename to local subdirectory using `todelete_subdir`
@@ -204,88 +207,55 @@ def delete(self, step: int) -> None:
204207 time .time () - start ,
205208 )
206209
207- def _rename_gcs_step_with_hns (
210+ def _gcs_rename_step (
208211 self , step : int , delete_target : epath .Path
209212 ):
210- """Renames a GCS directory using the Storage Control API.
213+ """Renames a GCS directory to a temporary location for deletion.
214+
215+ This method renames the directory using the
216+ underlying `tf.io.gfile.rename` method. This underlying
217+ implementation will automatically detect if the bucket is HNS-enabled
218+ and use a fast atomic rename, or fall back to a legacy
219+ copy/delete rename if it is not.
211220
212221 Args:
213222 step: The checkpoint step number.
214223 delete_target: The path to the directory to be renamed.
215-
216- Raises:
217- ValueError: If the GCS bucket is not HNS-enabled, as this is a
218- hard requirement for this operation.
219224 """
220- logging .info (
221- 'Condition: GCS path with `todelete_full_path` set. Checking for HNS.'
222- )
223- bucket_name , _ = gcs_utils .parse_gcs_path (self ._directory )
224- if not gcs_utils .is_hierarchical_namespace_enabled (self ._directory ):
225- raise ValueError (
226- f'Bucket "{ bucket_name } " does not have Hierarchical Namespace'
227- ' enabled, which is required when _todelete_full_path is set.'
228- )
229-
230- logging .info ('HNS bucket detected. Attempting to rename step %d.' , step )
231- # pylint: disable=g-import-not-at-top
232- from google .api_core import exceptions as google_exceptions # pytype: disable=import-error
233225 try :
234- from google .cloud import storage_control_v2 # pytype: disable=import-error
235- import google .auth # pytype: disable=import-error
236-
237- # Use default credentials, but without a quota project to avoid
238- # quota issues with this API.
239- credentials , _ = google .auth .default ()
240- creds_without_quota_project = credentials .with_quota_project (None )
241- client = storage_control_v2 .StorageControlClient (
242- credentials = creds_without_quota_project
243- )
244- # Destination parent is the absolute path to the bucket.
245- destination_parent_dir_str = (
226+ # Get the bucket name from the source path
227+ bucket_name = urlparse (str (delete_target )).netloc
228+ if not bucket_name :
229+ raise ValueError (
230+ f'Could not parse bucket name from path: { delete_target } '
231+ )
232+
233+ # Construct the destination path inside the `_todelete_full_path` dir.
234+ destination_parent_path = epath .Path (
246235 f'gs://{ bucket_name } /{ self ._todelete_full_path } '
247236 )
248- destination_parent_path = PurePosixPath (destination_parent_dir_str )
249- logging .info (
250- 'Ensuring destination parent folder exists via HNS API: %s' ,
251- destination_parent_dir_str ,
252- )
253- try :
254- parent_folder_id = str (
255- destination_parent_path .relative_to (f'gs://{ bucket_name } ' )
256- )
257- bucket_resource_name = f'projects/_/buckets/{ bucket_name } '
258- client .create_folder (
259- request = storage_control_v2 .CreateFolderRequest (
260- parent = bucket_resource_name ,
261- folder_id = parent_folder_id ,
262- recursive = True ,
263- )
264- )
265- logging .info ('HNS parent folder creation request sent.' )
266- except google_exceptions .AlreadyExists :
267- logging .info ('HNS parent folder already exists, proceeding.' )
237+ destination_parent_path .mkdir (parents = True , exist_ok = True )
268238
239+ # Create a unique name for the destination to avoid collisions.
269240 now = datetime .datetime .now ()
270241 timestamp_str = now .strftime ('%Y%m%d-%H%M%S-%f' )
271242 new_name_with_timestamp = f'{ delete_target .name } -{ timestamp_str } '
272243 dest_path = destination_parent_path / new_name_with_timestamp
273- source_folder_id = str (delete_target .relative_to (f'gs://{ bucket_name } ' ))
274- destination_folder_id = str (dest_path .relative_to (f'gs://{ bucket_name } ' ))
275- source_resource_name = (
276- f'projects/_/buckets/{ bucket_name } /folders/{ source_folder_id } '
277- )
278- logging .info ('Rename API call: Source: %s' , source_resource_name )
279- logging .info ('Rename API call: Destination ID: %s' , destination_folder_id )
280- request = storage_control_v2 .RenameFolderRequest (
281- name = source_resource_name ,
282- destination_folder_id = destination_folder_id ,
244+
245+ logging .info (
246+ 'Executing filesystem-aware rename: Source=`%s`, Destination=`%s`' ,
247+ delete_target ,
248+ dest_path ,
283249 )
284- op = client .rename_folder (request = request )
285- op .result ()
250+
251+ # Call the high-level rename method.
252+ # This will be fast on HNS and slow (but functional) on non-HNS.
253+ delete_target .rename (dest_path )
286254 logging .info ('Successfully renamed step %d to %s' , step , dest_path )
287- except google_exceptions .GoogleAPIError as e :
288- logging .error ('HNS rename failed for step %d. Error: %s' , step , e )
255+
256+ except Exception as e :
257+ logging .error ('Rename failed for step %d. Error: %s' , step , e )
258+ raise
289259
290260 def _rename_step_to_subdir (self , step : int , delete_target : epath .Path ):
291261 """Renames a step directory to its corresponding todelete_subdir."""
0 commit comments