Skip to content

Commit 695b879

Browse files
committed
do not start multipart upload for small writes
1 parent 3696a90 commit 695b879

File tree

1 file changed

+27
-25
lines changed

1 file changed

+27
-25
lines changed

fs_s3fs/_s3fs_file.py

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -139,12 +139,12 @@ def __init__(
139139

140140
self._object = s3_object
141141
self._min_part_size = min_part_size
142-
self._mp = self._object.initiate_multipart_upload(**self._upload_kwargs)
143-
144-
self._buf = b''
142+
self._mp = None
143+
self._buf = bytearray()
145144
self._total_bytes = 0
146145
self._total_parts = 0
147146
self._parts = []
147+
self._closed = False
148148

149149
#
150150
# This member is part of the io.BufferedIOBase interface.
@@ -156,7 +156,7 @@ def flush(self):
156156

157157
@property
158158
def closed(self):
159-
return self._mp is None
159+
return self._closed
160160

161161
def writable(self):
162162
"""Return True if the stream supports writing."""
@@ -180,10 +180,7 @@ def write(self, b):
180180
There's buffering happening under the covers, so this may not actually
181181
do any HTTP transfer right away."""
182182

183-
if self._buf:
184-
self._buf += b
185-
else:
186-
self._buf = b
183+
self._buf.extend(b)
187184

188185
length = len(b)
189186
self._total_bytes += length
@@ -199,36 +196,41 @@ def close(self):
199196

200197
if tuple(sys.exc_info()) != (None, None, None):
201198
self.terminate()
199+
self._closed = True
202200
return
203201

204-
if self._buf:
205-
self._upload_next_part()
206-
207-
if self._total_bytes:
208-
self._mp.complete(MultipartUpload={"Parts": self._parts})
209-
logger.debug("completed multipart upload")
210-
else:
202+
if self._total_bytes < self._min_part_size:
203+
# if we wrote less than min_part_size bytes
204+
# then directly put buffer contents instead of starting
205+
# multipart upload. It also fixes following:
211206
#
212207
# AWS complains with "The XML you provided was not well-formed or
213208
# did not validate against our published schema" when the input is
214209
# completely empty => abort the upload, no file created.
215210
#
216-
# We work around this by creating an empty file explicitly.
217-
#
218-
logger.debug("empty input, ignoring multipart upload")
219-
self.terminate()
220-
self._object.put(Body=b"", **self._upload_kwargs)
211+
assert not self._mp
212+
logger.debug("small input, ignoring multipart upload")
213+
self._object.put(Body=self._buf, **self._upload_kwargs)
214+
else:
215+
if self._buf:
216+
self._upload_next_part()
217+
self._mp.complete(MultipartUpload={"Parts": self._parts})
218+
logger.debug("completed multipart upload")
219+
221220
self._mp = None
221+
self._closed = True
222222
logger.debug("successfully closed")
223223

224224
@check_if_open
225225
def terminate(self):
226-
"""Cancel the underlying multipart upload."""
227-
assert self._mp, "no multipart upload in progress"
228-
self._mp.abort()
229-
self._mp = None
226+
"""Cancel the underlying multipart upload if any"""
227+
if self._mp:
228+
self._mp.abort()
229+
self._mp = None
230230

231231
def _upload_next_part(self):
232+
if not self._mp:
233+
self._mp = self._object.initiate_multipart_upload(**self._upload_kwargs)
232234
part_num = self._total_parts + 1
233235
logger.info(
234236
"uploading part #%i, %i bytes (total %.3fGB)",
@@ -242,4 +244,4 @@ def _upload_next_part(self):
242244
logger.debug("upload of part #%i finished" % part_num)
243245

244246
self._total_parts += 1
245-
self._buf = bytes()
247+
self._buf.clear()

0 commit comments

Comments
 (0)