4
4
from __future__ import print_function
5
5
from __future__ import unicode_literals
6
6
7
+ import operator
7
8
import os
8
9
import tarfile
9
10
import typing
10
11
from collections import OrderedDict
11
12
from typing import cast , IO
12
13
13
14
import six
15
+ from six .moves import map
14
16
15
17
from . import errors
16
18
from .base import FS
22
24
from .opener import open_fs
23
25
from .permissions import Permissions
24
26
from ._url_tools import url_quote
25
- from .path import relpath , basename , isbase , normpath , parts , frombase
27
+ from .path import (
28
+ dirname ,
29
+ join ,
30
+ relpath ,
31
+ basename ,
32
+ isbase ,
33
+ normpath ,
34
+ parts ,
35
+ frombase ,
36
+ recursepath ,
37
+ relativefrom ,
38
+ )
26
39
from .wrapfs import WrapFS
27
40
28
41
if typing .TYPE_CHECKING :
@@ -255,6 +268,8 @@ class ReadTarFS(FS):
255
268
tarfile .SYMTYPE : ResourceType .symlink ,
256
269
tarfile .CONTTYPE : ResourceType .file ,
257
270
tarfile .LNKTYPE : ResourceType .symlink ,
271
+ # this is how we mark implicit directories
272
+ tarfile .DIRTYPE + b"i" : ResourceType .directory ,
258
273
}
259
274
260
275
@errors .CreateFailed .catch_all
@@ -275,24 +290,66 @@ def _directory_entries(self):
275
290
"""Lazy directory cache."""
276
291
if self ._directory_cache is None :
277
292
_decode = self ._decode
293
+ _encode = self ._encode
294
+
295
+ # collect all directory entries and remove slashes
278
296
_directory_entries = (
279
297
(_decode (info .name ).strip ("/" ), info ) for info in self ._tar
280
298
)
281
299
282
- def _list_tar ():
283
- for name , info in _directory_entries :
284
- try :
285
- _name = normpath (name )
286
- except IllegalBackReference :
287
- # Back references outside root, must be up to no good.
288
- pass
289
- else :
290
- if _name :
291
- yield _name , info
292
-
293
- self ._directory_cache = OrderedDict (_list_tar ())
300
+ # build the cache first before updating it to reduce chances
301
+ # of data races
302
+ _cache = OrderedDict ()
303
+ for name , info in _directory_entries :
304
+ # check for any invalid back references
305
+ try :
306
+ _name = normpath (name )
307
+ except IllegalBackReference :
308
+ continue
309
+
310
+ # add all implicit dirnames if not in the cache already
311
+ for partial_name in map (relpath , recursepath (_name )):
312
+ dirinfo = tarfile .TarInfo (self ._encode (partial_name ))
313
+ dirinfo .type = tarfile .DIRTYPE
314
+ _cache .setdefault (partial_name , dirinfo )
315
+
316
+ # add the entry itself, potentially overwriting implicit entries
317
+ _cache [_name ] = info
318
+
319
+ self ._directory_cache = _cache
294
320
return self ._directory_cache
295
321
322
+ def _follow_symlink (self , entry ):
323
+ """Follow an symlink `TarInfo` to find a concrete entry."""
324
+ _entry = entry
325
+ while _entry .issym ():
326
+ linkname = normpath (
327
+ join (dirname (self ._decode (_entry .name )), self ._decode (_entry .linkname ))
328
+ )
329
+ resolved = self ._resolve (linkname )
330
+ if resolved is None :
331
+ raise errors .ResourceNotFound (linkname )
332
+ _entry = self ._directory_entries [resolved ]
333
+
334
+ return _entry
335
+
336
+ def _resolve (self , path ):
337
+ """Replace path components that are symlinks with concrete components.
338
+
339
+ Returns:
340
+
341
+
342
+ """
343
+ if path in self ._directory_entries or not path :
344
+ return path
345
+ for prefix in map (relpath , reversed (recursepath (path ))):
346
+ suffix = relativefrom (prefix , path )
347
+ entry = self ._directory_entries .get (prefix )
348
+ if entry is not None and entry .issym ():
349
+ entry = self ._follow_symlink (entry )
350
+ return self ._resolve (join (self ._decode (entry .name ), suffix ))
351
+ return None
352
+
296
353
def __repr__ (self ):
297
354
# type: () -> Text
298
355
return "ReadTarFS({!r})" .format (self ._file )
@@ -327,31 +384,35 @@ def getinfo(self, path, namespaces=None):
327
384
namespaces = namespaces or ()
328
385
raw_info = {} # type: Dict[Text, Dict[Text, object]]
329
386
387
+ # special case for root
330
388
if not _path :
331
389
raw_info ["basic" ] = {"name" : "" , "is_dir" : True }
332
390
if "details" in namespaces :
333
391
raw_info ["details" ] = {"type" : int (ResourceType .directory )}
334
392
335
393
else :
336
- try :
337
- implicit = False
338
- member = self ._directory_entries [_path ]
339
- except KeyError :
340
- if not self .isdir (_path ):
341
- raise errors .ResourceNotFound (path )
342
- implicit = True
343
- member = tarfile .TarInfo (_path )
344
- member .type = tarfile .DIRTYPE
394
+
395
+ _realpath = self ._resolve (_path )
396
+ if _realpath is None :
397
+ raise errors .ResourceNotFound (path )
398
+
399
+ implicit = False
400
+ member = self ._directory_entries [_realpath ]
345
401
346
402
raw_info ["basic" ] = {
347
403
"name" : basename (self ._decode (member .name )),
348
- "is_dir" : member .isdir (),
404
+ "is_dir" : self .isdir (_path ), # is_dir should follow symlinks
349
405
}
350
406
351
407
if "link" in namespaces :
352
- raw_info ["link" ] = {
353
- "target" : self ._decode (member .linkname ) if member .issym () else None
354
- }
408
+ if member .issym ():
409
+ target = join (
410
+ dirname (self ._decode (member .name )),
411
+ self ._decode (member .linkname ),
412
+ )
413
+ else :
414
+ target = None
415
+ raw_info ["link" ] = {"target" : target }
355
416
if "details" in namespaces :
356
417
raw_info ["details" ] = {
357
418
"size" : member .size ,
@@ -381,23 +442,29 @@ def getinfo(self, path, namespaces=None):
381
442
382
443
def isdir (self , path ):
383
444
_path = relpath (self .validatepath (path ))
384
- try :
385
- return self ._directory_entries [_path ].isdir ()
386
- except KeyError :
387
- return any (isbase (_path , name ) for name in self ._directory_entries )
445
+ realpath = self ._resolve (_path )
446
+ if realpath is not None :
447
+ entry = self ._directory_entries [realpath ]
448
+ return self ._follow_symlink (entry ).isdir ()
449
+ else :
450
+ return False
388
451
389
452
def isfile (self , path ):
390
453
_path = relpath (self .validatepath (path ))
391
- try :
392
- return self ._directory_entries [_path ].isfile ()
393
- except KeyError :
454
+ realpath = self ._resolve (_path )
455
+ if realpath is not None :
456
+ entry = self ._directory_entries [realpath ]
457
+ return self ._follow_symlink (entry ).isfile ()
458
+ else :
394
459
return False
395
460
396
461
def islink (self , path ):
397
462
_path = relpath (self .validatepath (path ))
398
- try :
399
- return self ._directory_entries [_path ].issym ()
400
- except KeyError :
463
+ realpath = self ._resolve (_path )
464
+ if realpath is not None :
465
+ entry = self ._directory_entries [realpath ]
466
+ return entry .issym ()
467
+ else :
401
468
return False
402
469
403
470
def setinfo (self , path , info ):
@@ -409,13 +476,28 @@ def listdir(self, path):
409
476
# type: (Text) -> List[Text]
410
477
_path = relpath (self .validatepath (path ))
411
478
412
- if not self .gettype (path ) is ResourceType .directory :
413
- raise errors .DirectoryExpected (path )
479
+ # check the given path exists
480
+ realpath = self ._resolve (_path )
481
+ if realpath is None :
482
+ raise errors .ResourceNotFound (path )
483
+ elif realpath :
484
+ target = self ._follow_symlink (self ._directory_entries [realpath ])
485
+ # check the path is either a symlink mapping to a directory or a directory
486
+ if target .isdir ():
487
+ base = target .name
488
+ elif target .issym ():
489
+ base = target .linkname
490
+ else :
491
+ raise errors .DirectoryExpected (path )
492
+ else :
493
+ base = ""
414
494
495
+ # find all entries in the actual directory
415
496
children = (
416
- frombase (_path , n ) for n in self ._directory_entries if isbase (_path , n )
497
+ frombase (base , n ) for n in self ._directory_entries if isbase (base , n )
417
498
)
418
499
content = (parts (child )[1 ] for child in children if relpath (child ))
500
+
419
501
return list (OrderedDict .fromkeys (content ))
420
502
421
503
def makedir (
@@ -432,17 +514,18 @@ def openbin(self, path, mode="r", buffering=-1, **options):
432
514
# type: (Text, Text, int, **Any) -> BinaryIO
433
515
_path = relpath (self .validatepath (path ))
434
516
517
+ # check the requested mode is only a reading mode
435
518
if "w" in mode or "+" in mode or "a" in mode :
436
519
raise errors .ResourceReadOnly (path )
437
520
438
- try :
439
- member = self ._directory_entries [ _path ]
440
- except KeyError :
441
- six . raise_from ( errors .ResourceNotFound (path ), None )
521
+ # check the path actually resolves after following symlinks
522
+ _realpath = self ._resolve ( _path )
523
+ if _realpath is None :
524
+ raise errors .ResourceNotFound (path )
442
525
443
- # TarFile.extractfile returns None if the entry is
526
+ # TarFile.extractfile returns None if the entry is not a file
444
527
# neither a file nor a symlink
445
- reader = self ._tar .extractfile (member )
528
+ reader = self ._tar .extractfile (self . _directory_entries [ _realpath ] )
446
529
if reader is None :
447
530
raise errors .FileExpected (path )
448
531
0 commit comments