Skip to content
This repository was archived by the owner on Apr 10, 2024. It is now read-only.

Commit 3c74744

Browse files
Make file caching more robust after seeing MemoryErrors on some workers
1 parent d523bce commit 3c74744

File tree

1 file changed

+19
-4
lines changed

1 file changed

+19
-4
lines changed

lucid/misc/io/reading.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@
3232
from tensorflow import gfile
3333
from tempfile import gettempdir
3434
from io import BytesIO, StringIO
35+
import gc
3536

36-
from lucid.misc.io.writing import write
37+
from lucid.misc.io.writing import write, write_handle
3738

3839

3940
# create logger with module name, e.g. lucid.misc.io.reading
@@ -149,6 +150,20 @@ def _read_and_cache(url):
149150
return _handle_gfile(local_path)
150151
else:
151152
log.info("Caching URL '%s' locally at '%s'.", url, local_path)
152-
data = read(url, cache=False) # important to avoid endless loop
153-
write(data, local_path)
154-
return BytesIO(data)
153+
with write_handle(local_path, 'wb') as output, read_handle(url, cache=False) as input:
154+
for chunk in _file_chunk_iterator(input):
155+
output.write(chunk)
156+
gc.collect()
157+
return _handle_gfile(local_path)
158+
159+
160+
from functools import partial
161+
from io import DEFAULT_BUFFER_SIZE
162+
import sys
163+
164+
def _file_chunk_iterator(file_handle):
165+
reader = partial(file_handle.read1, DEFAULT_BUFFER_SIZE)
166+
file_iterator = iter(reader, bytes())
167+
# TODO: once dropping Python <3.3 compat, update to `yield from ...`
168+
for chunk in file_iterator:
169+
yield chunk

0 commit comments

Comments
 (0)