Skip to content

Commit 31ac1ae

Browse files
authored
fix: incorrect checksum headers for both CreateMultipartUpload and UploadPart (#1529)
1 parent a929d3a commit 31ac1ae

File tree

4 files changed

+164
-12
lines changed

4 files changed

+164
-12
lines changed

minio/api.py

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2832,6 +2832,14 @@ def _complete_multipart_upload(
28322832
tag = SubElement(element, "Part")
28332833
SubElement(tag, "PartNumber", str(part.part_number))
28342834
SubElement(tag, "ETag", '"' + part.etag + '"')
2835+
if part.checksum_crc32:
2836+
SubElement(tag, "ChecksumCRC32", part.checksum_crc32)
2837+
elif part.checksum_crc32c:
2838+
SubElement(tag, "ChecksumCRC32C", part.checksum_crc32c)
2839+
elif part.checksum_sha1:
2840+
SubElement(tag, "ChecksumSHA1", part.checksum_sha1)
2841+
elif part.checksum_sha256:
2842+
SubElement(tag, "ChecksumSHA256", part.checksum_sha256)
28352843
body = getbytes(element)
28362844
headers = HTTPHeaderDict(
28372845
{
@@ -2921,7 +2929,7 @@ def _upload_part(
29212929
region: Optional[str] = None,
29222930
extra_headers: Optional[HTTPHeaderDict] = None,
29232931
extra_query_params: Optional[HTTPQueryDict] = None,
2924-
) -> str:
2932+
) -> ObjectWriteResult:
29252933
"""Execute UploadPart S3 API."""
29262934
query_params = HTTPQueryDict({
29272935
"partNumber": str(part_number),
@@ -2937,7 +2945,7 @@ def _upload_part(
29372945
extra_headers=extra_headers,
29382946
extra_query_params=extra_query_params,
29392947
)
2940-
return cast(str, result.etag)
2948+
return result
29412949

29422950
def _upload_part_task(self, kwargs):
29432951
"""Upload_part task for ThreadPool."""
@@ -3221,7 +3229,10 @@ def put_object(
32213229
)
32223230

32233231
if not upload_id:
3224-
headers.extend(checksum_headers)
3232+
headers.extend(make_headers(
3233+
hashers, add_content_sha256, add_sha256_checksum,
3234+
algorithm_only=True,
3235+
))
32253236
upload_id = self._create_multipart_upload(
32263237
bucket_name=bucket_name,
32273238
object_name=object_name,
@@ -3251,28 +3262,45 @@ def put_object(
32513262
self._upload_part_task, kwargs,
32523263
)
32533264
else:
3254-
etag = self._upload_part(
3265+
result = self._upload_part(
32553266
bucket_name=bucket_name,
32563267
object_name=object_name,
32573268
data=part_data,
32583269
headers=headers,
32593270
upload_id=upload_id,
32603271
part_number=part_number,
32613272
)
3262-
parts.append(Part(part_number, etag))
3273+
parts.append(Part(
3274+
part_number=part_number,
3275+
etag=result.etag,
3276+
checksum_crc32=result.checksum_crc32,
3277+
checksum_crc32c=result.checksum_crc32c,
3278+
checksum_sha1=result.checksum_sha1,
3279+
checksum_sha256=result.checksum_sha256,
3280+
))
32633281

32643282
if pool:
3265-
result = pool.result()
3283+
result_queue = pool.result()
32663284
parts = [Part(0, "")] * part_count
3267-
while not result.empty():
3268-
part_number, etag = result.get()
3269-
parts[part_number - 1] = Part(part_number, etag)
3285+
while not result_queue.empty():
3286+
part_number, upload_result = result_queue.get()
3287+
parts[part_number - 1] = Part(
3288+
part_number=part_number,
3289+
etag=upload_result.etag,
3290+
checksum_crc32=upload_result.checksum_crc32,
3291+
checksum_crc32c=upload_result.checksum_crc32c,
3292+
checksum_sha1=upload_result.checksum_sha1,
3293+
checksum_sha256=upload_result.checksum_sha256,
3294+
)
32703295

32713296
upload_result = self._complete_multipart_upload(
32723297
bucket_name=bucket_name,
32733298
object_name=object_name,
32743299
upload_id=cast(str, upload_id),
32753300
parts=parts,
3301+
extra_headers=HTTPHeaderDict(
3302+
sse.headers() if isinstance(sse, SseCustomerKey) else None
3303+
),
32763304
)
32773305
return ObjectWriteResult.new(
32783306
headers=upload_result.headers,

minio/checksum.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -402,9 +402,18 @@ def reset_hashers(hashers: Optional[Dict[Algorithm, "Hasher"]]):
402402
def make_headers(
403403
hashers: Optional[Dict[Algorithm, "Hasher"]],
404404
add_content_sha256: bool,
405-
add_sha256_checksum: bool
405+
add_sha256_checksum: bool,
406+
algorithm_only: bool = False
406407
) -> Dict[str, str]:
407-
"""Makes headers for hashers."""
408+
"""Makes headers for hashers.
409+
410+
Args:
411+
hashers: Dictionary of algorithm to hasher instances
412+
add_content_sha256: Whether to add x-amz-content-sha256 header
413+
add_sha256_checksum: Whether to add SHA256 checksum header
414+
algorithm_only: If True, only include algorithm declaration header,
415+
not checksum value headers
416+
"""
408417
headers = {}
409418
if hashers:
410419
for algo, hasher in hashers.items():
@@ -415,5 +424,6 @@ def make_headers(
415424
if not add_sha256_checksum:
416425
continue
417426
headers["x-amz-sdk-checksum-algorithm"] = str(algo)
418-
headers[algo.header()] = base64_string(sum_bytes)
427+
if not algorithm_only:
428+
headers[algo.header()] = base64_string(sum_bytes)
419429
return headers

minio/datatypes.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,10 @@ class Part:
284284
etag: str
285285
last_modified: Optional[datetime] = None
286286
size: Optional[int] = None
287+
checksum_crc32: Optional[str] = None
288+
checksum_crc32c: Optional[str] = None
289+
checksum_sha1: Optional[str] = None
290+
checksum_sha256: Optional[str] = None
287291

288292
@classmethod
289293
def fromxml(cls: Type[C], element: ET.Element) -> C:

tests/functional/tests.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from urllib3._collections import HTTPHeaderDict
4343

4444
from minio import Minio
45+
from minio.checksum import Algorithm
4546
from minio.commonconfig import ENABLED, REPLACE, CopySource, SnowballObject
4647
from minio.datatypes import PostPolicy
4748
from minio.deleteobjects import DeleteObject
@@ -908,6 +909,114 @@ def test_negative_put_object_with_path_segment( # pylint: disable=invalid-name
908909
_client.remove_bucket(bucket_name=bucket_name)
909910

910911

912+
def test_put_object_multipart_with_checksum( # pylint: disable=invalid-name
913+
log_entry):
914+
"""Test put_object() multipart upload with checksum validation.
915+
916+
This test validates the AWS S3 compliant checksum implementation for
917+
multipart uploads:
918+
- CreateMultipartUpload receives algorithm header only (not values)
919+
- UploadPart includes checksum value headers
920+
- CompleteMultipartUpload includes checksums in XML body
921+
"""
922+
923+
# Get a unique bucket_name and object_name
924+
bucket_name = _gen_bucket_name()
925+
object_name = f"{uuid4()}-checksum"
926+
object_name_sha256 = None # Initialize for cleanup
927+
# Use 6 MB to trigger multipart upload (> 5 MB threshold)
928+
length = 6 * MB
929+
930+
log_entry["args"] = {
931+
"bucket_name": bucket_name,
932+
"object_name": object_name,
933+
"length": length,
934+
"data": "LimitedRandomReader(6 * MB)",
935+
"checksum": "Algorithm.CRC32C",
936+
}
937+
938+
try:
939+
_client.make_bucket(bucket_name=bucket_name)
940+
941+
# Upload with CRC32C checksum - triggers multipart upload
942+
reader = LimitedRandomReader(length)
943+
result = _client.put_object(
944+
bucket_name=bucket_name,
945+
object_name=object_name,
946+
data=reader,
947+
length=length,
948+
checksum=Algorithm.CRC32C,
949+
)
950+
951+
# Verify upload succeeded and returned valid result
952+
if not result.etag:
953+
raise ValueError("Upload did not return valid ETag")
954+
955+
# Verify ETag indicates multipart upload (contains dash and part count)
956+
if '-' not in result.etag:
957+
raise ValueError(
958+
f"Expected multipart ETag (with dash), got: {result.etag}")
959+
960+
# Stat the object to verify it exists and has correct size
961+
st_obj = _client.stat_object(
962+
bucket_name=bucket_name,
963+
object_name=object_name,
964+
)
965+
966+
if st_obj.size != length:
967+
raise ValueError(
968+
f"Size mismatch: expected {length}, got {st_obj.size}")
969+
970+
# Test with SHA256 checksum algorithm
971+
object_name_sha256 = f"{uuid4()}-checksum-sha256"
972+
log_entry["args"]["object_name"] = object_name_sha256
973+
log_entry["args"]["checksum"] = "Algorithm.SHA256"
974+
975+
reader = LimitedRandomReader(length)
976+
result = _client.put_object(
977+
bucket_name=bucket_name,
978+
object_name=object_name_sha256,
979+
data=reader,
980+
length=length,
981+
checksum=Algorithm.SHA256,
982+
)
983+
984+
if not result.etag:
985+
raise ValueError("Upload with SHA256 did not return valid ETag")
986+
987+
if '-' not in result.etag:
988+
raise ValueError(
989+
f"Expected multipart ETag for SHA256, got: {result.etag}")
990+
991+
st_obj = _client.stat_object(
992+
bucket_name=bucket_name,
993+
object_name=object_name_sha256,
994+
)
995+
996+
if st_obj.size != length:
997+
raise ValueError(
998+
f"Size mismatch: expected {length}, got {st_obj.size}")
999+
1000+
finally:
1001+
try:
1002+
_client.remove_object(
1003+
bucket_name=bucket_name, object_name=object_name)
1004+
except: # pylint: disable=bare-except
1005+
pass
1006+
if object_name_sha256:
1007+
try:
1008+
_client.remove_object(
1009+
bucket_name=bucket_name,
1010+
object_name=object_name_sha256,
1011+
)
1012+
except: # pylint: disable=bare-except
1013+
pass
1014+
try:
1015+
_client.remove_bucket(bucket_name=bucket_name)
1016+
except: # pylint: disable=bare-except
1017+
pass
1018+
1019+
9111020
def _test_stat_object(log_entry, sse=None, version_check=False):
9121021
"""Test stat_object()."""
9131022

@@ -2393,6 +2502,7 @@ def main():
23932502
test_copy_object_unmodified_since: None,
23942503
test_put_object: {"sse": ssec} if ssec else None,
23952504
test_negative_put_object_with_path_segment: None,
2505+
test_put_object_multipart_with_checksum: None,
23962506
test_stat_object: {"sse": ssec} if ssec else None,
23972507
test_stat_object_version: {"sse": ssec} if ssec else None,
23982508
test_get_object: {"sse": ssec} if ssec else None,

0 commit comments

Comments
 (0)