|
32 | 32 | from tensorflow import gfile |
33 | 33 | from tempfile import gettempdir |
34 | 34 | from io import BytesIO, StringIO |
| 35 | +import gc |
35 | 36 |
|
36 | | -from lucid.misc.io.writing import write |
| 37 | +from lucid.misc.io.writing import write, write_handle |
37 | 38 |
|
38 | 39 |
|
39 | 40 | # create logger with module name, e.g. lucid.misc.io.reading |
@@ -149,6 +150,20 @@ def _read_and_cache(url): |
149 | 150 | return _handle_gfile(local_path) |
150 | 151 | else: |
151 | 152 | log.info("Caching URL '%s' locally at '%s'.", url, local_path) |
152 | | - data = read(url, cache=False) # important to avoid endless loop |
153 | | - write(data, local_path) |
154 | | - return BytesIO(data) |
| 153 | + with write_handle(local_path, 'wb') as output, read_handle(url, cache=False) as input: |
| 154 | + for chunk in _file_chunk_iterator(input): |
| 155 | + output.write(chunk) |
| 156 | + gc.collect() |
| 157 | + return _handle_gfile(local_path) |
| 158 | + |
| 159 | + |
| 160 | +from functools import partial |
| 161 | +from io import DEFAULT_BUFFER_SIZE |
| 162 | +import sys |
| 163 | + |
| 164 | +def _file_chunk_iterator(file_handle): |
| 165 | + reader = partial(file_handle.read1, DEFAULT_BUFFER_SIZE) |
| 166 | + file_iterator = iter(reader, bytes()) |
| 167 | + # TODO: once dropping Python <3.3 compat, update to `yield from ...` |
| 168 | + for chunk in file_iterator: |
| 169 | + yield chunk |
0 commit comments