From a0ed3f258df83ce79345c4907d431d791965832f Mon Sep 17 00:00:00 2001 From: konstin Date: Wed, 28 Jan 2026 11:38:04 +0100 Subject: [PATCH 1/5] Validate WHEEL file tags It can happen that the filename and the dist-info `WHEEL` file mismatch, either because the author manually changed the wheel filename, or due to a bug in the build backend implementation. This caused a number of bugs in uv, which expected the `WHEEL` file to contain accurate tags in the basic `Key: Value` format: * https://github.com/astral-sh/uv/issues/2149 * https://github.com/astral-sh/uv/issues/6615 * https://github.com/astral-sh/uv/issues/15832 * https://github.com/astral-sh/uv/issues/16581 * https://github.com/astral-sh/uv/issues/16823 Another notable example is https://github.com/ziglang/zig-pypi/pull/40, where the author wasn't aware of the expanded tags requirement. This PR adds validation that enforces the basic `Key: Value` format, that the tags in the `WHEEL` file are expanded and that the tags match. --- tests/unit/forklift/test_legacy.py | 142 +++++++++++++++++++++++------ warehouse/forklift/legacy.py | 92 ++++++++++++++++--- 2 files changed, 193 insertions(+), 41 deletions(-) diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index 4edd47d98013..7a22d7488ca4 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -14,6 +14,7 @@ from textwrap import dedent from unittest import mock +import packaging import pretend import psycopg import pytest @@ -76,10 +77,18 @@ def _get_tar_testdata(compression_type=""): return temp_f.getvalue() -def _get_whl_testdata(name="fake_package", version="1.0"): +def _format_tags(tags: str) -> str: + tags = packaging.tags.parse_tag(tags) + return "\n".join(f"Tag: {tag}" for tag in tags) + + +def _get_whl_testdata( + name="fake_package", version="1.0", tags=_format_tags("py3-none-any") +): temp_f = io.BytesIO() with zipfile.ZipFile(file=temp_f, mode="w") as zfp: zfp.writestr(f"{name}-{version}.dist-info/METADATA", "Fake metadata") + zfp.writestr(f"{name}-{version}.dist-info/WHEEL", tags) zfp.writestr(f"{name}-{version}.dist-info/licenses/LICENSE.MIT", "Fake License") zfp.writestr( f"{name}-{version}.dist-info/licenses/LICENSE.APACHE", "Fake License" @@ -89,6 +98,7 @@ def _get_whl_testdata(name="fake_package", version="1.0"): dedent( f"""\ {name}-{version}.dist-info/METADATA, + {name}-{version}.dist-info/WHEEL, {name}-{version}.dist-info/licenses/LICENSE.MIT, {name}-{version}.dist-info/licenses/LICENSE.APACHE, {name}-{version}.dist-info/RECORD, @@ -349,37 +359,88 @@ def test_zipfile_exceeds_compression_threshold(self, tmpdir): "File exceeds compression ratio of 50", ) - def test_wheel_no_wheel_file(self, tmpdir): - f = str(tmpdir.join("test-1.0-py3-none-any.whl")) - - with zipfile.ZipFile(f, "w") as zfp: - zfp.writestr("something.txt", b"Just a placeholder file") + @pytest.mark.parametrize( + ("tags_filename", "tags_file", "error"), + [ + ( + "py3-none-macosx_11_0_arm64", + "Tag: py3-none-macosx_13_0_arm64", + "Wheel filename and WHEEL file tags mismatch: " + + "py3-none-macosx_11_0_arm64 vs. py3-none-macosx_13_0_arm64", + ), + ( + "py2-none-any", + "Tag: py2-none-any\nTag: py3-none-any", + "WHEEL file has tags not in wheel filename: py3-none-any", + ), + ( + "py2.py3-none-any", + "Tag: py3-none-any", + "Wheel filename has tags not in WHEEL file: py2-none-any", + ), + ( + "py2.py3-none-any", + "Tag: py2.py3-none-any", + "Tags in WHEEL file must be expanded, not compressed: py2.py3-none-any", + ), + ], + ) + def test_upload_wheel_tag_mismatches( + self, + pyramid_config, + db_request, + tags_filename, + tags_file, + error, + ): + user = UserFactory.create() + EmailFactory.create(user=user) + project = ProjectFactory.create() + release = ReleaseFactory.create(project=project, version="1.0") + RoleFactory.create(user=user, project=project) - assert legacy._is_valid_dist_file(f, "bdist_wheel") == ( - False, - "WHEEL not found at test-1.0.dist-info/WHEEL", + filename = "{}-{}-{}.whl".format( + project.normalized_name.replace("-", "_"), release.version, tags_filename ) + data = _get_whl_testdata( + name=project.normalized_name.replace("-", "_"), + version="1.0", + tags=tags_file, + ) + digest = hashlib.md5(data).hexdigest() - def test_wheel_has_wheel_file(self, tmpdir): - f = str(tmpdir.join("test-1.0-py3-none-any.whl")) - - with zipfile.ZipFile(f, "w") as zfp: - zfp.writestr("something.txt", b"Just a placeholder file") - zfp.writestr("test-1.0.dist-info/WHEEL", b"this is the package info") + pyramid_config.testing_securitypolicy(identity=user) + db_request.user = user + db_request.user_agent = "warehouse-tests/6.6.6" + db_request.POST = MultiDict( + { + "metadata_version": "2.4", + "name": project.name, + "version": "1.0", + "summary": "This is my summary!", + "filetype": "bdist_wheel", + "md5_digest": digest, + "content": pretend.stub( + filename=filename, + file=io.BytesIO(data), + type="application/zip", + ), + } + ) + db_request.POST.extend([("pyversion", "py3")]) - assert legacy._is_valid_dist_file(f, "bdist_wheel") == (True, None) + storage_service = pretend.stub(store=lambda path, filepath, meta: None) + db_request.find_service = lambda svc, name=None, context=None: { + IFileStorage: storage_service, + }.get(svc) - def test_invalid_wheel_filename(self, tmpdir): - f = str(tmpdir.join("cheese.whl")) + with pytest.raises(HTTPBadRequest) as excinfo: + legacy.file_upload(db_request) - with zipfile.ZipFile(f, "w") as zfp: - zfp.writestr("something.txt", b"Just a placeholder file") - zfp.writestr("test-1.0.dist-info/WHEEL", b"this is the package info") + resp = excinfo.value - assert legacy._is_valid_dist_file(f, "bdist_wheel") == ( - False, - "Unable to parse name and version from wheel filename", - ) + assert resp.status_code == 400 + assert resp.status == f"400 Invalid distribution file. {error}" def test_incorrect_number_of_top_level_directories_sdist_tar(self, tmpdir): tar_fn = str(tmpdir.join("test.tar.gz")) @@ -2875,7 +2936,9 @@ def test_upload_succeeds_with_wheel( project.normalized_name.replace("-", "_"), release.version, plat ) filebody = _get_whl_testdata( - name=project.normalized_name.replace("-", "_"), version=release.version + name=project.normalized_name.replace("-", "_"), + version=release.version, + tags=_format_tags(f"cp34-none-{plat}"), ) filestoragehash = _storage_hash(filebody) @@ -3225,7 +3288,9 @@ def test_upload_succeeds_with_wheel_after_sdist( release.version, ) filebody = _get_whl_testdata( - name=project.normalized_name.replace("-", "_"), version=release.version + name=project.normalized_name.replace("-", "_"), + version=release.version, + tags=_format_tags("cp34-none-any"), ) filestoragehash = _storage_hash(filebody) @@ -3524,6 +3589,10 @@ def test_upload_warns_with_mismatched_wheel_and_zip_contents( with zipfile.ZipFile(file=temp_f, mode="w") as zfp: zfp.writestr("some_file", "some_data") zfp.writestr(f"{project_name}-{release.version}.dist-info/METADATA", "") + zfp.writestr( + f"{project_name}-{release.version}.dist-info/WHEEL", + "Tag: cp34-none-any", + ) zfp.writestr( f"{project_name}-{release.version}.dist-info/RECORD", f"{project_name}-{release.version}.dist-info/RECORD,", @@ -3599,11 +3668,16 @@ def test_upload_record_does_not_warn_with_zip_dir( zfp.mkdir(f"{project_name}-{release.version}.dist-info/") zfp.writestr(f"{project_name}-{release.version}.dist-info/METADATA", "") + zfp.writestr( + f"{project_name}-{release.version}.dist-info/WHEEL", + "Tag: cp34-none-any", + ) zfp.writestr( f"{project_name}-{release.version}.dist-info/RECORD", dedent( f"""\ {project_name}-{release.version}.dist-info/METADATA, + {project_name}-{release.version}.dist-info/WHEEL, {project_name}-{release.version}.dist-info/RECORD, """, ), @@ -3669,11 +3743,16 @@ def test_upload_record_does_not_warn_windows_path_separators( project_name = project.normalized_name.replace("-", "_") with zipfile.ZipFile(file=temp_f, mode="w") as zfp: zfp.writestr(f"{project_name}-{release.version}.dist-info/METADATA", "") + zfp.writestr( + f"{project_name}-{release.version}.dist-info/WHEEL", + "Tag: cp34-none-any", + ) zfp.writestr( f"{project_name}-{release.version}.dist-info/RECORD", dedent( f"""\ {project_name}-{release.version}.dist-info\\METADATA, + {project_name}-{release.version}.dist-info\\WHEEL, {project_name}-{release.version}.dist-info\\RECORD, """, ), @@ -3744,6 +3823,10 @@ def test_upload_record_check_does_not_include_jws_p7s( zfp.mkdir(f"{project_name}-{release.version}.dist-info/") zfp.writestr(f"{project_name}-{release.version}.dist-info/METADATA", "") + zfp.writestr( + f"{project_name}-{release.version}.dist-info/WHEEL", + "Tag: cp34-none-any", + ) zfp.writestr( f"{project_name}-{release.version}.dist-info/{exempt_filename}", "" ) @@ -3752,6 +3835,7 @@ def test_upload_record_check_does_not_include_jws_p7s( dedent( f"""\ {project_name}-{release.version}.dist-info/METADATA, + {project_name}-{release.version}.dist-info/WHEEL, {project_name}-{release.version}.dist-info/RECORD, """, ), @@ -3818,6 +3902,10 @@ def test_upload_record_check_exclusions_only_in_dist_info( with zipfile.ZipFile(file=temp_f, mode="w") as zfp: zfp.writestr("some_file", "some_data") zfp.writestr(f"{project_name}-{release.version}.dist-info/METADATA", "") + zfp.writestr( + f"{project_name}-{release.version}.dist-info/WHEEL", + "Tag: cp34-none-any", + ) zfp.writestr(f"not-dist-info/{exempt_filename}", "") zfp.writestr( f"{project_name}-{release.version}.dist-info/RECORD", diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index c6d1f4677036..d26ad36316b7 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -11,6 +11,7 @@ import packaging.requirements import packaging.specifiers +import packaging.tags import packaging.utils import packaging.version import packaging_legacy.version @@ -280,7 +281,7 @@ def _validate_filename(filename, filetype): ) -def _is_valid_dist_file(filename, filetype): +def _is_valid_dist_file(filename: str, filetype: str): """ Perform some basic checks to see whether the indicated file could be a valid distribution file. @@ -335,19 +336,6 @@ def _is_valid_dist_file(filename, filetype): zfp.read(target_file) except KeyError: return False, f"PKG-INFO not found at {target_file}" - if filename.endswith(".whl"): - try: - name, version, _ = os.path.basename(filename).split("-", 2) - except ValueError: - return ( - False, - "Unable to parse name and version from wheel filename", - ) - target_file = os.path.join(f"{name}-{version}.dist-info", "WHEEL") - try: - zfp.read(target_file) - except KeyError: - return False, f"WHEEL not found at {target_file}" # Check the ZIP file record framing # to avoid parser differentials. @@ -390,6 +378,61 @@ def _is_valid_dist_file(filename, filetype): return True, None +def _is_valid_wheel_file( + contents: bytes, tags: frozenset[packaging.tags.Tag] +) -> tuple[bool, str | None]: + """ + Check whether the WHEEL file matches the wheel filename tags. + + A mismatch can happen when manually editing the wheel filename without also editing + the dist-info WHEEL file. + """ + filename_tags = {str(tag) for tag in tags} + dist_info_tags = set() + try: + decoded = contents.decode("utf-8") + except UnicodeDecodeError: + return False, "WHEEL file is not valid UTF-8" + + for line in decoded.splitlines(): + if line.startswith("Tag:"): + tag = line.removeprefix("Tag:").strip() + if "." in tag: + return ( + False, + f"Tags in WHEEL file must be expanded, not compressed: {tag}", + ) + dist_info_tags.add(tag) + + # Handle the case that there's no overlap, usually two different tags. + if not (dist_info_tags & filename_tags): + formatted_filename_tags = ", ".join(sorted(filename_tags)) + formatted_dist_info_tags = ", ".join(sorted(dist_info_tags)) + return ( + False, + "Wheel filename and WHEEL file tags mismatch: " + + f"{formatted_filename_tags} vs. {formatted_dist_info_tags}", + ) + + only_dist_info_tags = dist_info_tags - filename_tags + if only_dist_info_tags: + formatted_tags = ", ".join(sorted(only_dist_info_tags)) + return ( + False, + f"WHEEL file has tags not in wheel filename: {formatted_tags}", + ) + + only_filename_tags = filename_tags - dist_info_tags + if only_filename_tags: + formatted_tags = ", ".join(sorted(only_filename_tags)) + return ( + False, + f"Wheel filename has tags not in WHEEL file: {formatted_tags}", + ) + + return True, None + + def _is_duplicate_file(db_session, filename, hashes): """ Check to see if file already exists, and if it's content matches. @@ -1500,6 +1543,27 @@ def file_upload(request): k: h.hexdigest().lower() for k, h in metadata_file_hashes.items() } + with zipfile.ZipFile(temporary_filename) as zfp: + target_file = os.path.join(f"{name}-{version}.dist-info", "WHEEL") + try: + wheel_file = zfp.read(target_file) + valid, reason = _is_valid_wheel_file(wheel_file, tags) + except KeyError: + valid = False + reason = f"WHEEL not found at {target_file}" + + if not valid: + request.metrics.increment( + "warehouse.upload.failed", + tags=[ + "reason:invalid-distribution-file", + f"filetype:{form.filetype.data}", + ], + ) + raise _exc_with_message( + HTTPBadRequest, f"Invalid distribution file. {reason}" + ) + # If the user provided attestations, verify them # We persist these attestations subsequently, only after the # release file is persisted. From 708826a0221f2a1c00ea2ce25a68dc4624707066 Mon Sep 17 00:00:00 2001 From: konstin Date: Wed, 28 Jan 2026 12:30:01 +0100 Subject: [PATCH 2/5] Improve coverage --- tests/unit/forklift/test_legacy.py | 68 +++++++++++++++++++----------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index 7a22d7488ca4..21c1851f7678 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -77,34 +77,40 @@ def _get_tar_testdata(compression_type=""): return temp_f.getvalue() -def _format_tags(tags: str) -> str: +def _format_tags(tags: str) -> bytes: + """ + Build a mock WHEEL file from the given tags. + """ tags = packaging.tags.parse_tag(tags) - return "\n".join(f"Tag: {tag}" for tag in tags) + formatted = "Wheel-Version: 1.0\n" + "\n".join(f"Tag: {tag}" for tag in tags) + return formatted.encode("utf-8") def _get_whl_testdata( - name="fake_package", version="1.0", tags=_format_tags("py3-none-any") + name="fake_package", + version="1.0", + tags: bytes | None = _format_tags("py3-none-any"), ): temp_f = io.BytesIO() with zipfile.ZipFile(file=temp_f, mode="w") as zfp: zfp.writestr(f"{name}-{version}.dist-info/METADATA", "Fake metadata") - zfp.writestr(f"{name}-{version}.dist-info/WHEEL", tags) + if tags: + zfp.writestr(f"{name}-{version}.dist-info/WHEEL", tags) zfp.writestr(f"{name}-{version}.dist-info/licenses/LICENSE.MIT", "Fake License") zfp.writestr( f"{name}-{version}.dist-info/licenses/LICENSE.APACHE", "Fake License" ) - zfp.writestr( - f"{name}-{version}.dist-info/RECORD", - dedent( - f"""\ - {name}-{version}.dist-info/METADATA, - {name}-{version}.dist-info/WHEEL, - {name}-{version}.dist-info/licenses/LICENSE.MIT, - {name}-{version}.dist-info/licenses/LICENSE.APACHE, - {name}-{version}.dist-info/RECORD, - """, - ), - ) + record = dedent( + f"""\ + {name}-{version}.dist-info/METADATA, + {name}-{version}.dist-info/licenses/LICENSE.MIT, + {name}-{version}.dist-info/licenses/LICENSE.APACHE, + {name}-{version}.dist-info/RECORD, + """, + ) + if tags: + record += f"{name}-{version}.dist-info/WHEEL,\n" + zfp.writestr(f"{name}-{version}.dist-info/RECORD", record) return temp_f.getvalue() @@ -364,25 +370,35 @@ def test_zipfile_exceeds_compression_threshold(self, tmpdir): [ ( "py3-none-macosx_11_0_arm64", - "Tag: py3-none-macosx_13_0_arm64", + b"Tag: py3-none-macosx_13_0_arm64", "Wheel filename and WHEEL file tags mismatch: " + "py3-none-macosx_11_0_arm64 vs. py3-none-macosx_13_0_arm64", ), ( "py2-none-any", - "Tag: py2-none-any\nTag: py3-none-any", + b"Tag: py2-none-any\nTag: py3-none-any", "WHEEL file has tags not in wheel filename: py3-none-any", ), ( "py2.py3-none-any", - "Tag: py3-none-any", + b"Tag: py3-none-any", "Wheel filename has tags not in WHEEL file: py2-none-any", ), ( "py2.py3-none-any", - "Tag: py2.py3-none-any", + b"Tag: py2.py3-none-any", "Tags in WHEEL file must be expanded, not compressed: py2.py3-none-any", ), + ( + "py3-none-macosx_11_0_arm64", + b"\xff\x00\xff", + "WHEEL file is not valid UTF-8", + ), + ( + "py3-none-macosx_11_0_arm64", + None, + "WHEEL not found at -1.0.dist-info/WHEEL", + ), ], ) def test_upload_wheel_tag_mismatches( @@ -399,11 +415,10 @@ def test_upload_wheel_tag_mismatches( release = ReleaseFactory.create(project=project, version="1.0") RoleFactory.create(user=user, project=project) - filename = "{}-{}-{}.whl".format( - project.normalized_name.replace("-", "_"), release.version, tags_filename - ) + name = project.normalized_name.replace("-", "_") + filename = f"{name}-{release.version}-{tags_filename}.whl" data = _get_whl_testdata( - name=project.normalized_name.replace("-", "_"), + name=name, version="1.0", tags=tags_file, ) @@ -440,7 +455,10 @@ def test_upload_wheel_tag_mismatches( resp = excinfo.value assert resp.status_code == 400 - assert resp.status == f"400 Invalid distribution file. {error}" + rendered_error = f"400 Invalid distribution file. {error}".replace( + "", name + ) + assert resp.status == rendered_error def test_incorrect_number_of_top_level_directories_sdist_tar(self, tmpdir): tar_fn = str(tmpdir.join("test.tar.gz")) From 32f8aa7fa208e5e2f78c0f90e16af8ad8cbfffca Mon Sep 17 00:00:00 2001 From: konstin Date: Sat, 7 Feb 2026 14:58:45 +0100 Subject: [PATCH 3/5] Review --- tests/unit/forklift/test_legacy.py | 12 +++-- warehouse/forklift/legacy.py | 73 +++++++++++++++--------------- 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index 21c1851f7678..cfa07889bdf3 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -372,27 +372,29 @@ def test_zipfile_exceeds_compression_threshold(self, tmpdir): "py3-none-macosx_11_0_arm64", b"Tag: py3-none-macosx_13_0_arm64", "Wheel filename and WHEEL file tags mismatch: " - + "py3-none-macosx_11_0_arm64 vs. py3-none-macosx_13_0_arm64", + + "'py3-none-macosx_11_0_arm64' vs. 'py3-none-macosx_13_0_arm64'", ), ( "py2-none-any", b"Tag: py2-none-any\nTag: py3-none-any", - "WHEEL file has tags not in wheel filename: py3-none-any", + "WHEEL file has tags not in wheel filename: 'py3-none-any'", ), ( "py2.py3-none-any", b"Tag: py3-none-any", - "Wheel filename has tags not in WHEEL file: py2-none-any", + "Wheel filename has tags not in WHEEL file: 'py2-none-any'", ), ( "py2.py3-none-any", b"Tag: py2.py3-none-any", - "Tags in WHEEL file must be expanded, not compressed: py2.py3-none-any", + "Tags in WHEEL file must be expanded, not compressed: " + + "'py2.py3-none-any'", ), ( "py3-none-macosx_11_0_arm64", b"\xff\x00\xff", - "WHEEL file is not valid UTF-8", + "Wheel filename and WHEEL file tags mismatch: " + + "'py3-none-macosx_11_0_arm64' vs. ''", ), ( "py3-none-macosx_11_0_arm64", diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index d26ad36316b7..5b0cf1f9018e 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -1,4 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 +import email.parser +import email.policy import hashlib import hmac import os.path @@ -378,9 +380,7 @@ def _is_valid_dist_file(filename: str, filetype: str): return True, None -def _is_valid_wheel_file( - contents: bytes, tags: frozenset[packaging.tags.Tag] -) -> tuple[bool, str | None]: +def _validate_wheel_file(contents: bytes, tags: frozenset[packaging.tags.Tag]): """ Check whether the WHEEL file matches the wheel filename tags. @@ -389,49 +389,40 @@ def _is_valid_wheel_file( """ filename_tags = {str(tag) for tag in tags} dist_info_tags = set() - try: - decoded = contents.decode("utf-8") - except UnicodeDecodeError: - return False, "WHEEL file is not valid UTF-8" - - for line in decoded.splitlines(): - if line.startswith("Tag:"): - tag = line.removeprefix("Tag:").strip() - if "." in tag: - return ( - False, - f"Tags in WHEEL file must be expanded, not compressed: {tag}", - ) - dist_info_tags.add(tag) + + data = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(contents) + for key, value in data.items(): + if key != "Tag": + continue + if "." in value: + raise ValueError( + f"Tags in WHEEL file must be expanded, not compressed: '{value}'", + ) + dist_info_tags.add(value) # Handle the case that there's no overlap, usually two different tags. if not (dist_info_tags & filename_tags): - formatted_filename_tags = ", ".join(sorted(filename_tags)) - formatted_dist_info_tags = ", ".join(sorted(dist_info_tags)) - return ( - False, + formatted_filename_tags = "', '".join(sorted(filename_tags)) + formatted_dist_info_tags = "', '".join(sorted(dist_info_tags)) + raise ValueError( "Wheel filename and WHEEL file tags mismatch: " - + f"{formatted_filename_tags} vs. {formatted_dist_info_tags}", + + f"'{formatted_filename_tags}' vs. '{formatted_dist_info_tags}'", ) only_dist_info_tags = dist_info_tags - filename_tags if only_dist_info_tags: - formatted_tags = ", ".join(sorted(only_dist_info_tags)) - return ( - False, - f"WHEEL file has tags not in wheel filename: {formatted_tags}", + formatted_tags = "', '".join(sorted(only_dist_info_tags)) + raise ValueError( + f"WHEEL file has tags not in wheel filename: '{formatted_tags}'", ) only_filename_tags = filename_tags - dist_info_tags if only_filename_tags: - formatted_tags = ", ".join(sorted(only_filename_tags)) - return ( - False, - f"Wheel filename has tags not in WHEEL file: {formatted_tags}", + formatted_tags = "', '".join(sorted(only_filename_tags)) + raise ValueError( + f"Wheel filename has tags not in WHEEL file: '{formatted_tags}'", ) - return True, None - def _is_duplicate_file(db_session, filename, hashes): """ @@ -1547,12 +1538,20 @@ def file_upload(request): target_file = os.path.join(f"{name}-{version}.dist-info", "WHEEL") try: wheel_file = zfp.read(target_file) - valid, reason = _is_valid_wheel_file(wheel_file, tags) + _validate_wheel_file(wheel_file, tags) except KeyError: - valid = False - reason = f"WHEEL not found at {target_file}" - - if not valid: + request.metrics.increment( + "warehouse.upload.failed", + tags=[ + "reason:invalid-distribution-file", + f"filetype:{form.filetype.data}", + ], + ) + raise _exc_with_message( + HTTPBadRequest, + f"Invalid distribution file. WHEEL not found at {target_file}", + ) + except ValueError as reason: request.metrics.increment( "warehouse.upload.failed", tags=[ From 6847c1342ae3af1341c362e5b39d0949f3cb5a0d Mon Sep 17 00:00:00 2001 From: konstin Date: Sat, 7 Feb 2026 20:49:58 +0100 Subject: [PATCH 4/5] Reject non-UTF-8 WHEEL files --- tests/unit/forklift/test_legacy.py | 3 +-- warehouse/forklift/legacy.py | 7 ++++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index cfa07889bdf3..da0b14660668 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -393,8 +393,7 @@ def test_zipfile_exceeds_compression_threshold(self, tmpdir): ( "py3-none-macosx_11_0_arm64", b"\xff\x00\xff", - "Wheel filename and WHEEL file tags mismatch: " - + "'py3-none-macosx_11_0_arm64' vs. ''", + "WHEEL file is not UTF-8 encoded", ), ( "py3-none-macosx_11_0_arm64", diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index 5b0cf1f9018e..44bff460dced 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -390,7 +390,12 @@ def _validate_wheel_file(contents: bytes, tags: frozenset[packaging.tags.Tag]): filename_tags = {str(tag) for tag in tags} dist_info_tags = set() - data = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(contents) + try: + content = contents.decode("utf-8") + except UnicodeDecodeError: + raise ValueError("WHEEL file is not UTF-8 encoded") + + data = email.parser.Parser(policy=email.policy.compat32).parsestr(content) for key, value in data.items(): if key != "Tag": continue From 0ee6a6b57eeb67408a9612a65b71b76823f53cc3 Mon Sep 17 00:00:00 2001 From: konstin Date: Wed, 11 Feb 2026 18:49:06 +0100 Subject: [PATCH 5/5] Use HeaderParser instead of Parser --- warehouse/forklift/legacy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index 44bff460dced..292387ebf31c 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -395,7 +395,7 @@ def _validate_wheel_file(contents: bytes, tags: frozenset[packaging.tags.Tag]): except UnicodeDecodeError: raise ValueError("WHEEL file is not UTF-8 encoded") - data = email.parser.Parser(policy=email.policy.compat32).parsestr(content) + data = email.parser.HeaderParser(policy=email.policy.compat32).parsestr(content) for key, value in data.items(): if key != "Tag": continue