@@ -114,25 +114,33 @@ async def _get_chunks(url: str, chunk_size: int) -> Generator[bytes, None, None]
114
114
raise Exception (f"Failed to read dataset at { url } " ) from None
115
115
116
116
117
+ def _rmrf (path : Path ) -> None :
118
+ if path .is_dir ():
119
+ shutil .rmtree (path )
120
+ else :
121
+ path .unlink ()
122
+
123
+
117
124
def _verify_files_dont_exist (
118
- paths : Iterable [Union [ str , Path ] ], remove_if_exist : bool = False
125
+ paths : Iterable [Path ], remove_if_exist : bool = False
119
126
) -> None :
120
127
"""
121
128
Verifies all paths in 'paths' don't exist.
122
129
:param paths: A iterable of strs or pathlib.Paths.
123
130
:param remove_if_exist=False: Removes file at path if they already exist.
124
131
:returns: None
125
- :raises FileExistsError: On the first path found that already exists.
132
+ :raises FileExistsError: On the first path found that already exists if remove_if_exist is False .
126
133
"""
127
134
for path in paths :
128
- path = Path ( path )
129
- if path .exists ():
135
+ # Could be a broken symlink => path.exists() is False
136
+ if path .exists () or path . is_symlink () :
130
137
if remove_if_exist :
131
- if path .is_symlink ():
132
- realpath = path .resolve ()
133
- path .unlink (realpath )
134
- else :
135
- shutil .rmtree (path )
138
+ while path .is_symlink ():
139
+ temp = path .readlink ()
140
+ path .unlink (missing_ok = True )
141
+ path = temp
142
+ if path .exists ():
143
+ _rmrf (path )
136
144
else :
137
145
raise FileExistsError (f"Error: File '{ path } ' already exists." )
138
146
@@ -254,9 +262,9 @@ async def prepare(
254
262
path / child .name
255
263
for child in map (Path , tf .getnames ())
256
264
if len (child .parents ) == 1 and _is_file_to_symlink (child )
257
- ],
258
- overwrite ,
259
- ) # Only check if top-level fileobject
265
+ ], # Only check if top-level fileobject
266
+ remove_if_exist = overwrite ,
267
+ )
260
268
pbar = tqdm (iterable = tf .getmembers (), total = len (tf .getmembers ()))
261
269
pbar .set_description (f"Extracting { filename } " )
262
270
for member in pbar :
@@ -269,22 +277,24 @@ async def prepare(
269
277
path / child .name
270
278
for child in map (Path , zf .namelist ())
271
279
if len (child .parents ) == 1 and _is_file_to_symlink (child )
272
- ],
273
- overwrite ,
280
+ ], # Only check if top-level fileobject
281
+ remove_if_exist = overwrite ,
274
282
)
275
283
pbar = tqdm (iterable = zf .infolist (), total = len (zf .infolist ()))
276
284
pbar .set_description (f"Extracting { filename } " )
277
285
for member in pbar :
278
286
zf .extract (member = member , path = extract_dir )
279
287
tmp_download_file .unlink ()
280
288
else :
281
- _verify_files_dont_exist ([path / filename ], overwrite )
289
+ _verify_files_dont_exist ([path / filename ], remove_if_exist = overwrite )
282
290
shutil .move (tmp_download_file , extract_dir / filename )
283
291
284
292
# If in jupyterlite environment, the extract_dir = path, so the files are already there.
285
293
if not _is_jupyterlite ():
286
294
# If not in jupyterlite environment, symlink top-level file objects in extract_dir
287
295
for child in filter (_is_file_to_symlink , extract_dir .iterdir ()):
296
+ if (path / child .name ).is_symlink () and overwrite :
297
+ (path / child .name ).unlink ()
288
298
(path / child .name ).symlink_to (child , target_is_directory = child .is_dir ())
289
299
290
300
if verbose :
0 commit comments