Skip to content

Commit c27cfd9

Browse files
althonoswillmcgugan
authored andcommitted
Revert back to emulate ZipExtFile.seek (#111)
* Implement emulated seeking for `ZipExtFile` objects * Add regression test for #108 * Fix documentation in `ZipExtFile.seek`
1 parent 92e985c commit c27cfd9

File tree

2 files changed

+30
-50
lines changed

2 files changed

+30
-50
lines changed

fs/zipfs.py

Lines changed: 14 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -33,34 +33,12 @@ def __init__(self, fs, name):
3333
super(_ZipExtFile, self).__init__(_zip.open(name), 'r', name)
3434

3535
def read(self, size=-1):
36-
if self._pos >= self._end:
37-
return b''
38-
elif size is None or size < 0:
39-
size = self._end - self._pos
40-
# NB(@althonos): do NOT replace by self._f.read() !
41-
buf = self._f.read(size-1) + self._f._readbuffer[-1:]
42-
self._f._offset += 1
43-
elif self._f._offset + size <= len(self._f._readbuffer):
44-
buf = self._f._readbuffer[self._f._offset:size+self._f._offset]
45-
self._f._offset += size
46-
else:
47-
buf = self._f.read(size)
36+
buf = self._f.read(-1 if size is None else size)
4837
self._pos += len(buf)
4938
return buf
5039

5140
def read1(self, size=-1):
52-
if self._pos >= self._end:
53-
return b''
54-
if size is None or size < 0:
55-
size = self._end - self._pos
56-
# NB(@althonos): do NOT replace by self._f.read1() !
57-
buf = self._f.read1(size-1) + self._f._readbuffer[-1:]
58-
self._f._offset += 1
59-
elif self._f._offset + size <= len(self._f._readbuffer):
60-
buf = self._f._readbuffer[self._f._offset:size+self._f._offset]
61-
self._f._offset += size
62-
else:
63-
buf = self._f.read1(size)
41+
buf = self._f.read1(-1 if size is None else size)
6442
self._pos += len(buf)
6543
return buf
6644

@@ -87,46 +65,32 @@ def seek(self, offset, whence=Seek.set):
8765
8866
Note:
8967
Zip compression does not support seeking, so the seeking
90-
is emulated. The internal decompression buffer will be used
91-
as much as possible, but sometimes it way be necessary to:
68+
is emulated. Seeking somewhere else than the current position
69+
will need to either:
9270
* reopen the file and restart decompression
9371
* read and discard data to advance in the file
9472
95-
The size of the zip buffer can be changed by setting the
96-
`zipfile.ZipExtFile.MIN_READ_SIZE` attribute.
97-
9873
"""
99-
if whence == Seek.set:
74+
if whence == Seek.current:
75+
offset += self._pos
76+
if whence == Seek.current or whence == Seek.set:
10077
if offset < 0:
101-
raise ValueError("Negative seek position {}".format(offset))
102-
elif offset >= self._pos:
103-
self.seek(offset - self._pos, Seek.current)
104-
else:
105-
self._f = self._zip.open(self.name)
106-
self._pos = 0
107-
self.seek(offset, Seek.set)
108-
elif whence == Seek.current:
109-
if offset > 0:
110-
if self._f._offset + offset < len(self._f._readbuffer):
111-
self._f._offset += offset
112-
else:
113-
self._f.read(offset)
114-
self._pos += offset
115-
elif self._f._offset + offset >= 0:
116-
self._f._offset += offset
117-
self._pos += offset
118-
else:
119-
self.seek(self._pos + offset, Seek.set)
78+
raise ValueError("Negative seek position {}".format(offset))
12079
elif whence == Seek.end:
12180
if offset > 0:
12281
raise ValueError("Positive seek position {}".format(offset))
123-
self.seek(self._end + offset, Seek.set)
82+
offset += self._end
12483
else:
12584
raise ValueError(
12685
"Invalid whence ({}, should be {}, {} or {})".format(
12786
whence, Seek.set, Seek.current, Seek.end
12887
)
12988
)
89+
90+
if offset < self._pos:
91+
self._f = self._zip.open(self.name)
92+
self._pos = 0
93+
self.read(offset - self._pos)
13094
return self._pos
13195

13296
def tell(self):

tests/test_zipfs.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,22 @@ def load_archive(self):
5151
def remove_archive(self):
5252
os.remove(self._temp_path)
5353

54+
def test_large(self):
55+
test_fs = open_fs('mem://')
56+
test_fs.setbytes('test.bin', b'a'*50000)
57+
write_zip(test_fs, self._temp_path)
58+
59+
self.fs = self.load_archive()
60+
61+
with self.fs.openbin('test.bin') as f:
62+
self.assertEqual(f.read(), b'a'*50000)
63+
with self.fs.openbin('test.bin') as f:
64+
self.assertEqual(f.read(50000), b'a'*50000)
65+
with self.fs.openbin('test.bin') as f:
66+
self.assertEqual(f.read1(), b'a'*50000)
67+
with self.fs.openbin('test.bin') as f:
68+
self.assertEqual(f.read1(50000), b'a'*50000)
69+
5470
def test_getinfo(self):
5571
super(TestReadZipFS, self).test_getinfo()
5672
top = self.fs.getinfo('top.txt', ['zip'])

0 commit comments

Comments
 (0)