From fe28c2b06a504981ace1c72f24cc5dcabc53e6a7 Mon Sep 17 00:00:00 2001 From: Helge Wehder Date: Tue, 15 Jul 2025 11:50:27 +0100 Subject: [PATCH 1/7] review draft Signed-off-by: Helge Wehder --- .../CWE-703/CWE-476/compliant03.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 docs/Secure-Coding-Guide-for-Python/CWE-703/CWE-476/compliant03.py diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-703/CWE-476/compliant03.py b/docs/Secure-Coding-Guide-for-Python/CWE-703/CWE-476/compliant03.py new file mode 100644 index 00000000..d63fd463 --- /dev/null +++ b/docs/Secure-Coding-Guide-for-Python/CWE-703/CWE-476/compliant03.py @@ -0,0 +1,18 @@ +# SPDX-FileCopyrightText: OpenSSF project contributors +# SPDX-License-Identifier: MIT +"""Compliant Code Example""" + + +def print_number_of_students(classroom: list[str]): + """Print the number of students in a classroom""" + if not isinstance(classroom, list): + raise ValueError("classroom is not a list") + # TODO: also check each entry + print(f"The classroom has {len(classroom)} students.") + + +##################### +# Attempting to exploit above code example +##################### +print_number_of_students(["student 1", "student 2", "Student 3"]) +print_number_of_students(None) From 274d21b8d5f1c511f2c0b3ff84fcfe3a149b3ea4 Mon Sep 17 00:00:00 2001 From: Helge Wehder Date: Tue, 15 Jul 2025 13:22:02 +0100 Subject: [PATCH 2/7] pySCG: adding doc for CWE-664_CWE-409 as part of #531 Signed-off-by: Helge Wehder --- .../CWE-664/CWE-409/README.md | 355 ++++++++++++++++++ .../CWE-664/CWE-409/compliant01.py | 122 +++--- .../CWE-664/CWE-409/example01.py | 108 ++++-- .../{noncomplian01.py => noncompliant01.py} | 3 +- .../CWE-664/CWE-409/noncompliant02.py | 5 +- docs/Secure-Coding-Guide-for-Python/readme.md | 2 +- .../templates/README_TEMPLATE.md | 74 +++- 7 files changed, 565 insertions(+), 104 deletions(-) create mode 100644 docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md rename docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/{noncomplian01.py => noncompliant01.py} (84%) diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md new file mode 100644 index 00000000..238d8693 --- /dev/null +++ b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md @@ -0,0 +1,355 @@ +# CWE-409: Improper Handling of Highly Compressed Data (Data Amplification) + +Prevent slip and bomb attacks when decompressing and unpacking compressed data such as `ZIP`, `TAR.GZ`, `JAR`, `WAR`, `RPM` or `DOCX`. + +Zip is used reprehensively in this rule for all compression formats. + +Zip Slip, or directory traversal attacks, use files with a relative path such as `../../../../../bin/bash` or full path `/bin/bash` in order to extract malicious code into an unwanted location [[2018 Snyk](https://snyk.io/blog/zip-slip-vulnerability/)]. Zip slip attacks can be prevented by sanitizing path names as described in _[CWE-180: Incorrect behavior order: Validate before Canonicalize](https://github.com/ossf/wg-best-practices-os-developers/blob/main/docs/Secure-Coding-Guide-for-Python/CWE-707/CWE-180/README.md)_. + +Zip bomb attacks try to overload a system that tries to unpack it for a denial-of-service attack by either containing: + +* Files with easy to compress patterns such as continuation of zeros [[Luisfontes19 2021](https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/)] +* Ridiculously deep folder structures +* Nested symbolic or hard-links +* Nested zip files +* Manipulated zip headers [[Woot 2019](https://www.bamsoftware.com/talks/woot19-zipbomb/)] + +Language specific packaging formats, such as Java `.jar` or Python `.whl`, also use zip for compression. + +To run the examples in the page, you can prepare a file by running the script on the page. + +_Simple zip bomb generator in [example01.py](example01.py):_ + +```py +"""Code Example""" +# SPDX-FileCopyrightText: OpenSSF project contributors +# SPDX-License-Identifier: MIT +"""Code to create a simple zip bomb in python for test purposes""" + +import os +import zipfile + + +def create_zip_slip_and_bomb(zip_filename: str): + """Create a zip file with zip slip and zip bomb content for testing + + Args: + zip_filename (str): name of zip file + """ + with zipfile.ZipFile(zip_filename, "w", compression=zipfile.ZIP_DEFLATED) as zf: + # Adding a normal safe file at the start + zip_normal = "safe_dir/zip_normal.txt" + print(f"Adding a harmless normal file {zip_normal}") + zf.writestr(zip_normal, b"Just a safe file\n") + + print("Creating zip_slip example files") + ##################################################### + # Zip Slip attempts: + # - file with path traversal for unix + slip_file_name = "../" * 10 + "tmp/zip_slip_posix.txt" + slip_file_data = b"This test file tries to escape the extraction directory!\n" + zf.writestr(slip_file_name, slip_file_data) + + # - File with path traversal for Windows + # Internally we have zip use slash, even for Windows + slip_file_name = "../" * 10 + "tmp/zip_slip_windows0.txt" + zf.writestr(slip_file_name, slip_file_data) + + # - File with path traversal for Windows + # Old extractors mishandle backslash + slip_file_name = ".." * 10 + "Temp\\zip_slip_windows1.txt" + zf.writestr(slip_file_name, slip_file_data) + + # - Traversal attack with mixed slashes for Windows + # Old extractors mishandling mixed slashes + slip_file_name = "..\\/" * 10 + "Temp\\zip_slip_windows2.txt" + zf.writestr(slip_file_name, slip_file_data) + + # - Absolute path with drive letter for Windows + slip_file_name = "C:/Temp/zip_slip_windows3.txt" + zf.writestr(slip_file_name, slip_file_data) + + ################################################## + # Zip Bomb Attempts: + # - With 150MB files filled with zeros + bomb_uncompressed_size = 150 * 1024 * 1024 # 150 MB + large_data = b"\0" * bomb_uncompressed_size + filename = "zipbombfile" + + # - trying to fake the metadata size to 1KB for the first one + print(f"trying to add fake 1KB metadata for {filename}0.txt") + info = zipfile.ZipInfo(f"{filename}0.txt)") + info.compress_type = zipfile.ZIP_DEFLATED + info.file_size = 1024 # 1 KB (fake size) + zf.writestr(f"{filename}0.txt", large_data) + + # - Some more large files + print("Writing more large zipbombfile's") + for i in range(1, 4): + zf.writestr(f"{filename}{i}.txt", large_data) + + filesize = os.path.getsize(zip_filename) / float(1024 * 1024) + print(f"created \n{zip_filename} : {filesize:.2f} MB") + + +if __name__ == "__main__": + create_zip_slip_and_bomb("zip_attack_test.zip") +``` + +Prefabricated zip bombs and zip slip archives for testing can be found on: [[port9org 2025](https://github.com/port9org/SecLists/tree/master/Payloads/Zip-Bombs)] + +## Non-Compliant Code Example - No File Validation + +The `noncompliant01.py` example simply extracts all the files in the archive without performing any verification. The `extractall()` method will attempt to normalize the path name. Any archive from an untrusted source should be inspected prior to extraction. There is no attempt to control where the files are extracted, which is the script current working directory. + +_[noncompliant01.py](noncompliant01.py):_ + +```python +# SPDX-FileCopyrightText: OpenSSF project contributors +# SPDX-License-Identifier: MIT +"""Non-compliant Code Example""" + +import zipfile + +with zipfile.ZipFile("zip_attack_test.zip", mode="r") as archive: + archive.extractall() +``` + +The `noncompliant01.py` code will extract any quantity of payloads. With a unmodified `example01.py` we get only `4 x 150MB` `zipbombfileX.txt`'s that are much bigger than the `0.58MB` `zip_attack_test.zip` archive. + +The directory traversal payload will try to extract a `\Temp\zip_slip_windows.txt` for Windows and a `/tmp/zip_slip_posix.txt` for Unix based systems. Depending on the zip library in use the files may either end up in their indented target, under the same directory as the `zipbombfile.txt` files, or not at all. + +## Non-Compliant Code Example - Incorrect File Validation + +Experiment with the code by varying the `MAXSIZE`. + +The `noncompliant02.py` code example tries to check the file_size from the `ZipInfo` instances provided by the `infolist()` method from `ZipFile`. This information is read from the `zip` archive metadata, so it is not reliable and can be forged by an attacker. The `extract()` method will attempt to normalize the path name. Again, there is no attempt to control where the files are extracted to in order to prevent traversal attacks. The underlaying zip library may or may not prevent traversal attacks. + +_[noncompliant02.py](noncompliant02.py):_ + +```python +# SPDX-FileCopyrightText: OpenSSF project contributors +# SPDX-License-Identifier: MIT +"""Non-compliant Code Example""" + +import zipfile + +MAXSIZE = 100 * 1024 * 1024 # limit is in bytes + +with zipfile.ZipFile("zip_attack_test.zip", mode="r") as archive: + for member in archive.infolist(): + if member.file_size >= MAXSIZE: + print(f"Unable to extract {member.filename}, exceeds size {MAXSIZE}") + else: + print(f"Extracting {member.filename}") + archive.extract(member.filename) + +``` + +Depending on the underlaying zip library we should see `noncompliant02.py` prevent a zip bomb but not a traversal attack. + +__Example `noncompliant02.py` output:__ + +```bash +Extracting safe_dir/zip_normal.txt +Extracting ../../../../../../../../../../tmp/zip_slip_posix.txt +Extracting ../../../../../../../../../../tmp/zip_slip_windows0.txt +Extracting ....................Temp\zip_slip_windows1.txt +Extracting ..\/..\/..\/..\/..\/..\/..\/..\/..\/..\/Temp\zip_slip_windows2.txt +Extracting C:/Temp/zip_slip_windows3.txt +Unable to extract zipbombfile0.txt, exceeds size 104857600 +Unable to extract zipbombfile1.txt, exceeds size 104857600 +Unable to extract zipbombfile2.txt, exceeds size 104857600 +Unable to extract zipbombfile3.txt, exceeds size 104857600 +``` + +A well manipulated zip archive may be able to fool the `noncompliant02.py` code. + +## Compliant Solution + +In this example, a base path location needs to be provided from the back-end. This should be isolated on the server using an appropriate mechanism such as a separate partition. To limit the amount of files extracted, the amount of entries is incremented after each file and is monitored until it reaches the limit `MAXAMT`. If `MAXAMT` is reached, an exception is thrown. To check the file size of each file as it is being extracted, the `IO` interface `read()` is used. It will attempt to read until `MAXSIZE` plus one byte. If the length of the read data exceeds `MAXSIZE`, the file is deemed to be too large and an exception is raised. + +Please note that the following aspects of the compliant solution: + +* The `path_validation()` function will check if the path of each ZIP archive member is in the `base_path`, and if it is not an exception is thrown, A less strict option has also been included to permit subdirectories. +* `MAXSIZE` will need enough RAM dimensioned on the host to hold it in memory +* The code will not retain any permissions on the archive members (for example, some implementations such as `unzip` will store permissions in the external attributes field) + +Change the `MAXSIZE` and `MAXAMT` variables to explorer the protection code. + +_[compliant01.py](compliant01.py):_ + +```python +# SPDX-FileCopyrightText: OpenSSF project contributors +# SPDX-License-Identifier: MIT +"""Compliant Code Example""" + +import zipfile +from pathlib import Path + +MAXSIZE = 100 * 1024 * 1024 # limit is in bytes +MAXAMT = 1000 # max amount of files, includes directories in the archive + + +class ZipExtractException(Exception): + """Custom Exception""" + + +def path_validation(filepath: Path, base_path: Path): + """Ensure to have only allowed path names + + Args: + filepath (Path): path to archive + base_path (Path): path to folder for extracting archives + + Raises: + ZipExtractException: if a directory traversal is detected + """ + input_path_resolved = (base_path / filepath).resolve() + base_path_resolved = base_path.resolve() + + if not str(input_path_resolved).startswith(str(base_path_resolved)): + raise ZipExtractException( + f"Filename {str(input_path_resolved)} not in {str(base_path)} directory" + ) + + +def extract_files(filepath: str, base_path: str, exist_ok: bool = True): + """Extract archive below base_path + + Args: + filepath (str): path to archive + base_path (str): path to folder for extracting archives + exist_ok (bool, optional): Overwrite existing. Defaults to True. + + Raises: + ZipExtractException: If there are to many files + ZipExtractException: If there are to big files + ZipExtractException: If a directory traversal is detected + """ + # TODO: avoid exposing sensitive data to a lesser trusted entity via errors + with zipfile.ZipFile(filepath, mode="r") as archive: + # limit number of files: + if len(archive.infolist()) > MAXAMT: + raise ZipExtractException( + f"Metadata check: too many files, limit is {MAXAMT}" + ) + + # validate by iterating over meta data: + for item in archive.infolist(): + # limit file size using meta data: + if item.file_size > MAXSIZE: + raise ZipExtractException( + f"Metadata check: {item.filename} is bigger than {MAXSIZE}" + ) + + path_validation(Path(item.filename), Path(base_path)) + + # create target folder + Path(base_path).mkdir(exist_ok=exist_ok) + + # preparing for extraction, need to create directories first + # as they may come in random order to the files + for item in archive.infolist(): + if item.is_dir(): + xpath = Path(base_path).joinpath(item.filename).resolve() + xpath.mkdir(exist_ok=exist_ok) + + # start of extracting files: + for item in archive.infolist(): + if item.is_dir(): + continue + # we got a file + with archive.open(item.filename, mode="r") as filehandle: + read_data = filehandle.read(MAXSIZE + 1) + if len(read_data) > MAXSIZE: + # meta data was lying to us, actual size is bigger: + raise ZipExtractException( + f"Reality check, {item.filename} bigger than {MAXSIZE}" + ) + xpath = Path(base_path).joinpath(filehandle.name).resolve() + with open(xpath, mode="wb") as filehandle: + filehandle.write(read_data) + print(f"extracted successfully below {base_path}") + + +##################### +# Trying to exploit above code example +##################### + +extract_files("zip_attack_test.zip", "ziptemp") + +``` + +The `compliant01.py` code will extract everything below the provided `base_path` unless it detects a to big file or attempt to extract to a different parent then `base_path`. + +## Automated Detection + + +
+ + + + + + + + + + + +
ToolVersionCheckerDescription
SonarQube6.7S5042Expanding archive files without controlling resource consumption is security-sensitive
+ +## Related Guidelines + + + + + + + + + + + + + + + + + + + + + + + + + + +
MITRE CWEPillar: [CWE-664, Improper Control of a Resource Through its Lifetime]
MITRE CWEBase: CWE-409, Improper Handling of Highly Compressed Data (Data Amplification)
MITRE CWEBase: CWE-180: Incorrect behavior order: Validate before Canonicalize
Secure Coding in PythonBase: CWE-180: Incorrect behavior order: Validate before Canonicalize
[SEI CERT Oracle Coding Standard for Java]IDS04-J, Safely extract files from ZipInputStream
OWASP FoundationWSTG-BUSL-09 Test Upload of Malicious Files, Available from: https://owasp.org/www-project-web-security-testing-guide/latest/4-Web_Application_Security_Testing/10-Business_Logic_Testing/09-Test_Upload_of_Malicious_Files
+ +## Bibliography + + + + + + + + + + + + + + + + + + + + + + +
[Snyk 2018]Public Disclosure of a Critical Arbitrary File Overwrite Vulnerability: Zip Slip | Snyk [online]. Available from: hhttps://snyk.io/blog/zip-slip-vulnerability/, [Accessed 15 July 2025]
Python Docszipfile — Work with ZIP archives — Python 3.10.5 documentation [online]. Available from: https://docs.python.org/3/library/zipfile.html [Accessed 15 July 2025]
[Woot 2019]https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/
[Luisfontes19 2021]https://thesecurityvault.com/attacks-with-zip-files-and-mitigations/
[port9org 2025]https://github.com/port9org/SecLists/tree/master/Payloads/Zip-Bombs
diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/compliant01.py b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/compliant01.py index 6702ac95..41173bd9 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/compliant01.py +++ b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/compliant01.py @@ -1,63 +1,101 @@ # SPDX-FileCopyrightText: OpenSSF project contributors # SPDX-License-Identifier: MIT -""" Compliant Code Example """ +# SPDX-FileCopyrightText: OpenSSF project contributors +# SPDX-License-Identifier: MIT +"""Compliant Code Example""" + import zipfile from pathlib import Path MAXSIZE = 100 * 1024 * 1024 # limit is in bytes -MAXAMT = 5 # max amount of files, includes directories in the archive +MAXAMT = 1000 # max amount of files, includes directories in the archive class ZipExtractException(Exception): """Custom Exception""" -def path_validation(input_path, base_path, permit_subdirs=True): - """Ensure to have only allowed path names""" - test_path = (Path(base_path) / input_path).resolve() - if permit_subdirs: - if not Path(base_path).resolve() in test_path.resolve().parents: - raise ZipExtractException(f"Filename {test_path} not in {Path(base_path)} directory") - else: - if test_path.parent != Path(base_path).resolve(): - raise ZipExtractException(f"Filename {test_path} not in {Path(base_path)} directory") +def path_validation(filepath: Path, base_path: Path): + """Ensure to have only allowed path names + + Args: + filepath (Path): path to archive + base_path (Path): path to folder for extracting archives + + Raises: + ZipExtractException: if a directory traversal is detected + """ + input_path_resolved = (base_path / filepath).resolve() + base_path_resolved = base_path.resolve() + + if not str(input_path_resolved).startswith(str(base_path_resolved)): + raise ZipExtractException( + f"Filename {str(input_path_resolved)} not in {str(base_path)} directory" + ) + +def extract_files(filepath: str, base_path: str, exist_ok: bool = True): + """Extract archive below base_path -def extract_files(file, base_path): - """Unpack zip file into base_path""" - with zipfile.ZipFile(file, mode="r") as archive: - dirs = [] - # Validation: + Args: + filepath (str): path to archive + base_path (str): path to folder for extracting archives + exist_ok (bool, optional): Overwrite existing. Defaults to True. + + Raises: + ZipExtractException: If there are to many files + ZipExtractException: If there are to big files + ZipExtractException: If a directory traversal is detected + """ + # TODO: avoid exposing sensitive data to a lesser trusted entity via errors + + with zipfile.ZipFile(filepath, mode="r") as archive: + # limit number of files: if len(archive.infolist()) > MAXAMT: - raise ZipExtractException(f"Metadata check: too many files, limit is {MAXAMT}") - for zm in archive.infolist(): - if zm.file_size > MAXSIZE: - raise ZipExtractException(f"Metadata check: {zm.filename} is too big, limit is {MAXSIZE}") - path_validation(zm.filename, base_path) - with archive.open(zm.filename, mode='r') as mte: - read_data = mte.read(MAXSIZE + 1) - if len(read_data) > MAXSIZE: - raise ZipExtractException(f"File {zm.filename} bigger than {MAXSIZE}") + raise ZipExtractException( + f"Metadata check: too many files, limit is {MAXAMT}" + ) - if not Path(base_path).resolve().exists(): - Path(base_path).resolve().mkdir(exist_ok=True) + # validate by iterating over meta data: + for item in archive.infolist(): + # limit file size using meta data: + if item.file_size > MAXSIZE: + raise ZipExtractException( + f"Metadata check: {item.filename} is bigger than {MAXSIZE}" + ) - for zm in archive.infolist(): - # Extraction - create directories - if zm.is_dir(): - dirs.append(Path(base_path).resolve().joinpath(zm.filename)) + path_validation(Path(item.filename), Path(base_path)) - for directory in dirs: - Path.mkdir(directory) + # create target folder + Path(base_path).mkdir(exist_ok=exist_ok) + + # preparing for extraction, need to create directories first + # as they may come in random order to the files + for item in archive.infolist(): + if item.is_dir(): + xpath = Path(base_path).joinpath(item.filename).resolve() + xpath.mkdir(exist_ok=exist_ok) + + # start of extracting files: + for item in archive.infolist(): + if item.is_dir(): + continue + # we got a file + with archive.open(item.filename, mode="r") as filehandle: + read_data = filehandle.read(MAXSIZE + 1) + if len(read_data) > MAXSIZE: + # meta data was lying to us, actual size is bigger: + raise ZipExtractException( + f"Reality check, {item.filename} bigger than {MAXSIZE}" + ) + xpath = Path(base_path).joinpath(filehandle.name).resolve() + with open(xpath, mode="wb") as filehandle: + filehandle.write(read_data) + print(f"extracted successfully below {base_path}") - for zm in archive.infolist(): - with archive.open(zm.filename, mode='r') as mte: - xpath = Path(base_path).joinpath(mte.name).resolve() - print(f"Writing file {xpath}") - # Skip if directory - if xpath not in dirs: # check if file is a directory - with open(xpath, mode="wb") as filehandle: - filehandle.write(read_data) +##################### +# Trying to exploit above code example +##################### -extract_files("zip_attack_test.zip", "ziptemp") \ No newline at end of file +extract_files("zip_attack_test.zip", "ziptemp") diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/example01.py b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/example01.py index 82a51a80..d342f9ee 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/example01.py +++ b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/example01.py @@ -1,44 +1,72 @@ # SPDX-FileCopyrightText: OpenSSF project contributors # SPDX-License-Identifier: MIT """Code to create a simple zip bomb in python for test purposes""" -import zipfile + import os -import sys - -ZIPBOMB = "zipbombfile.txt" -ZIPTRAVERSAL = "zipslipfile.txt" -ZIPFILENAME = "zip_attack_test.zip" - -# preparing zip bomb file -with open(ZIPBOMB, 'w', encoding="utf-8") as filehandle: - for line in range(1023 * 128): - sys.stdout.write(f"Preparing bombfile by writing lines of zero's to {ZIPBOMB}: {line}\r") - sys.stdout.flush() - filehandle.write("0" * 1023 + "\n") - filehandle.close() -filesize = os.path.getsize(ZIPBOMB) / float(1024 * 1024) -print(f"\n{ZIPBOMB} : {filesize:.2f} MB") - -# preparing zip slip file -with open(ZIPTRAVERSAL, 'a', encoding="utf-8") as filehandle: - filehandle.write("Testfile, filename shows if its good or evil") - filehandle.close() -traversal_files = ["zip_normal.txt"] -traversal_files.append("../" * 39 + "tmp/zip_slip_posix.txt") -traversal_files.append(r"..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\Temp\zip_slip_windows.txt") - -with zipfile.ZipFile(ZIPFILENAME, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: - for clone in range(4): - print(f"Adding {ZIPBOMB + str(clone)} as {ZIPFILENAME}") - zf.write(ZIPBOMB, ZIPBOMB + str(clone)) - print("Adding multiple zip slip file's:") - for item in traversal_files: - print(f"Adding traversal attack file TRAVERSAL_FILE as {item}") - zf.write(ZIPTRAVERSAL, item) - -print(f"Removing temporary files: {ZIPBOMB}, {ZIPTRAVERSAL}") -os.remove(ZIPBOMB) -os.remove(ZIPTRAVERSAL) - -filesize = os.path.getsize(ZIPFILENAME) / float(1024 * 1024) -print(f"\n{ZIPFILENAME} : {filesize:.2f} MB") +import zipfile + + +def create_zip_slip_and_bomb(zip_filename: str): + """Create a zip file with zip slip and zip bomb content for testing + + Args: + zip_filename (str): name of zip file + """ + with zipfile.ZipFile(zip_filename, "w", compression=zipfile.ZIP_DEFLATED) as zf: + # Adding a normal safe file at the start + zip_normal = "safe_dir/zip_normal.txt" + print(f"Adding a harmless normal file {zip_normal}") + zf.writestr(zip_normal, b"Just a safe file\n") + + print("Creating zip_slip example files") + ##################################################### + # Zip Slip attempts: + # - file with path traversal for unix + slip_file_name = "../" * 10 + "tmp/zip_slip_posix.txt" + slip_file_data = b"This test file tries to escape the extraction directory!\n" + zf.writestr(slip_file_name, slip_file_data) + + # - File with path traversal for Windows + # Internally we have zip use slash, even for Windows + slip_file_name = "../" * 10 + "tmp/zip_slip_windows0.txt" + zf.writestr(slip_file_name, slip_file_data) + + # - File with path traversal for Windows + # Old extractors mishandle backslash + slip_file_name = ".." * 10 + "Temp\\zip_slip_windows1.txt" + zf.writestr(slip_file_name, slip_file_data) + + # - Traversal attack with mixed slashes for Windows + # Old extractors mishandling mixed slashes + slip_file_name = "..\\/" * 10 + "Temp\\zip_slip_windows2.txt" + zf.writestr(slip_file_name, slip_file_data) + + # - Absolute path with drive letter for Windows + slip_file_name = "C:/Temp/zip_slip_windows3.txt" + zf.writestr(slip_file_name, slip_file_data) + + ################################################## + # Zip Bomb Attempts: + # - With 150MB files filled with zeros + bomb_uncompressed_size = 150 * 1024 * 1024 # 150 MB + large_data = b"\0" * bomb_uncompressed_size + filename = "zipbombfile" + + # - trying to fake the metadata size to 1KB for the first one + print(f"trying to add fake 1KB metadata for {filename}0.txt") + info = zipfile.ZipInfo(f"{filename}0.txt)") + info.compress_type = zipfile.ZIP_DEFLATED + info.file_size = 1024 # 1 KB (fake size) + zf.writestr(f"{filename}0.txt", large_data) + + # - Some more large files + print("Writing more large zipbombfile's") + for i in range(1, 4): + zf.writestr(f"{filename}{i}.txt", large_data) + + filesize = os.path.getsize(zip_filename) / float(1024 * 1024) + print(f"created \n{zip_filename} : {filesize:.2f} MB") + + +if __name__ == "__main__": + create_zip_slip_and_bomb("zip_attack_test.zip") diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/noncomplian01.py b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/noncompliant01.py similarity index 84% rename from docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/noncomplian01.py rename to docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/noncompliant01.py index f34fbff7..923a0fe5 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/noncomplian01.py +++ b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/noncompliant01.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: OpenSSF project contributors # SPDX-License-Identifier: MIT -""" Non-compliant Code Example """ +"""Non-compliant Code Example""" + import zipfile with zipfile.ZipFile("zip_attack_test.zip", mode="r") as archive: diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/noncompliant02.py b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/noncompliant02.py index 8092d128..d2e79b92 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/noncompliant02.py +++ b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/noncompliant02.py @@ -1,9 +1,10 @@ # SPDX-FileCopyrightText: OpenSSF project contributors # SPDX-License-Identifier: MIT -""" Non-compliant Code Example """ +"""Non-compliant Code Example""" + import zipfile -MAXSIZE = 100 * 1024 * 1024 # limit is in bytes +MAXSIZE = 100 * 1024 * 1024 # limit is in bytes with zipfile.ZipFile("zip_attack_test.zip", mode="r") as archive: for member in archive.infolist(): diff --git a/docs/Secure-Coding-Guide-for-Python/readme.md b/docs/Secure-Coding-Guide-for-Python/readme.md index c398eddd..5fc4ba5d 100644 --- a/docs/Secure-Coding-Guide-for-Python/readme.md +++ b/docs/Secure-Coding-Guide-for-Python/readme.md @@ -52,7 +52,7 @@ It is __not production code__ and requires code-style or python best practices t |[CWE-197: Numeric Truncation Error](CWE-664/CWE-197/README.md)|| |[CWE-197: Control rounding when converting to less precise numbers](CWE-664/CWE-197/01/README.md)|| |[CWE-400: Uncontrolled Resource Consumption](CWE-664/CWE-400/README.md)|| -|[CWE-409: Improper Handling of Highly Compressed Data (Data Amplification)](CWE-664/CWE-409/.)|| +|[CWE-409: Improper Handling of Highly Compressed Data (Data Amplification)](CWE-664/CWE-409/README.md)|| |[CWE-410: Insufficient Resource Pool](CWE-664/CWE-410/README.md)|| |[CWE-426: Untrusted Search Path](CWE-664/CWE-426/README.md)|[CVE-2015-1326](https://www.cvedetails.com/cve/CVE-2015-1326),
CVSSv3.0: __8.8__,
EPSS: __00.20__ (23.11.2023)| |[CWE-459: Incomplete Cleanup](CWE-664/CWE-459/README.md)|| diff --git a/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md b/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md index d06d0d9d..2ff863ab 100644 --- a/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md +++ b/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md @@ -4,7 +4,13 @@ Introduction sentence, this will be displayed in search engines. Introduction paragraph, expanding on the introduction sentence... -[*example01.py:*](example01.py) +* Use bullet points instead of commas. +* Be brief. +* Avoid duplicated content. + +An `example01.py` code is optional and usefull to demonstrate behaviour that does __not__ work well in the compliant or noncompliant code. + +_[example01.py:](example01.py)_ ```py """Code Example""" @@ -22,7 +28,7 @@ Console output... Introduction to the code example... -*[noncompliant01.py](noncompliant01.py):* +**[noncompliant01.py](noncompliant01.py):** ```python """Non-compliant Code Example""" @@ -60,25 +66,57 @@ Short explanation of expected outcome of running the code example, e.g. "The cod ## Automated Detection -|Tool|Version|Checker|Description| -|:---|:---|:---|:---| -|Bandit|1.7.4 on Python 3.10.4|Not Available|| -|Flake8|8-4.0.1 on Python 3.10.4|Not Available|| +
+
+ + + + + + + + + + + + + + + + + +
ToolVersionCheckerDescription
Bandit1.7.4 on Python 3.10.4Not Available
Flake88-4.0.1 on Python 3.10.4Not Available
## Related Guidelines -||| -|:---|:---| -|[MITRE CWE](http://cwe.mitre.org/)|Pillar: [CWE-000: Name of the pillar (4.13) (mitre.org)](https://cwe.mitre.org/data/definitions/000.html)| -|[MITRE CWE](http://cwe.mitre.org/)|Base/Class (choose which one it is based on the abstraction on the CWE page): [CWE-000: Numeric Truncation Error](https://cwe.mitre.org/data/definitions/000.html)| -|[SEI CERT Coding Standard for Java](https://wiki.sei.cmu.edu/confluence/display/java/SEI+CERT+Oracle+Coding+Standard+for+Java)|[Reference Goes here](http://YOUR_LINK)| -|[SEI CERT C Coding Standard](https://web.archive.org/web/20220511061752/https://wiki.sei.cmu.edu/confluence/display/c/SEI+CERT+C+Coding+Standard)|[Reference Goes here](http://YOUR_LINK)| -|[ISO/IEC TR 24772:2019]|[Reference Goes here](http://YOUR_LINK)| + + + + + + + + + + + + + + + + + +
MITRE CWEPillar: [CWE-682: Incorrect Calculation]
MITRE CWEBase: [CWE-1335: Incorrect Bitwise Shift of Integer (4.12)]
[SEI CERT Oracle Coding Standard for Java][NUM14-J. Use shift operators correctly]
[CERT C Coding Standard][INT34-C. Do not shift an expression by a negative number of bits or by greater than or equal to the number of bits that exist in the operand]
## Bibliography -||| -|:---|:---| -|[[Python docs](https://docs.python.org/3/reference/expressions.html#binary-arithmetic-operations)]|Python Software Foundation. (2024). Expressions, Title goes here [online]. Available from: [https://docs.python.org/3/reference/YOUR_LINK](https://docs.python.org/3/reference/YOUR_LINK) [accessed 1 January 2024] | - -When writing bibligraphy, follow the [Harvard reference guide](https://dkit.ie.libguides.com/harvard/citing-referencing) + + + + + + + + + +
[SEI CERT JAVA 2024]NUM01-J. Do not perform bitwise and arithmetic operations on the same data [online]. Available from: https://wiki.sei.cmu.edu/confluence/display/java/NUM01-J.+Do+not+perform+bitwise+and+arithmetic+operations+on+the+same+data, [Accessed 6 May 2025]
[SEI CERT C 2025]CERT C Coding Standard [online]. Available from: https://www.securecoding.cert.org/confluence/display/seccode/CERT+C+Coding+Standard [Accessed 6 May 2025]
From c5cd02e4aad3fcff81b711368787ab6c76ea527d Mon Sep 17 00:00:00 2001 From: Helge Wehder Date: Tue, 15 Jul 2025 13:24:05 +0100 Subject: [PATCH 3/7] pySCG: adding doc for CWE-664_CWE-409 as part of #531 Signed-off-by: Helge Wehder --- docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md index 238d8693..98545f88 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md +++ b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md @@ -352,4 +352,4 @@ The `compliant01.py` code will extract everything below the provided `base_path` -
[port9org 2025] https://github.com/port9org/SecLists/tree/master/Payloads/Zip-Bombs
+
From 6adc36d65f67c90ae258833c38746b482f856f17 Mon Sep 17 00:00:00 2001 From: Helge Wehder Date: Tue, 15 Jul 2025 13:26:49 +0100 Subject: [PATCH 4/7] pySCG: adding doc for CWE-664_CWE-409 as part of #531 Signed-off-by: Helge Wehder --- .../Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md b/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md index 2ff863ab..ccf9b7a3 100644 --- a/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md +++ b/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md @@ -119,4 +119,4 @@ Short explanation of expected outcome of running the code example, e.g. "The cod [SEI CERT C 2025] CERT C Coding Standard [online]. Available from: https://www.securecoding.cert.org/confluence/display/seccode/CERT+C+Coding+Standard [Accessed 6 May 2025] - +
From c83dc5e483ede6c41f6c31e30ad6150ff91c33ed Mon Sep 17 00:00:00 2001 From: Helge Wehder Date: Tue, 15 Jul 2025 13:37:17 +0100 Subject: [PATCH 5/7] fixed md lint errors Signed-off-by: Helge Wehder --- .../CWE-664/CWE-409/README.md | 2 +- .../templates/README_TEMPLATE.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md index 98545f88..059a487b 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md +++ b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md @@ -146,7 +146,7 @@ with zipfile.ZipFile("zip_attack_test.zip", mode="r") as archive: ``` -Depending on the underlaying zip library we should see `noncompliant02.py` prevent a zip bomb but not a traversal attack. +Depending on the underlaying zip library we should see `noncompliant02.py` prevent a zip bomb but not a traversal attack. __Example `noncompliant02.py` output:__ diff --git a/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md b/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md index ccf9b7a3..988566dc 100644 --- a/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md +++ b/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md @@ -18,7 +18,7 @@ _[example01.py:](example01.py)_ # Code goes here ``` - **Output of example01.py:** + __Output of example01.py:__ ```bash Console output... @@ -28,7 +28,7 @@ Console output... Introduction to the code example... -**[noncompliant01.py](noncompliant01.py):** +_[noncompliant01.py](noncompliant01.py):_ ```python """Non-compliant Code Example""" @@ -48,7 +48,7 @@ Short explanation of expected outcome of running the code example, e.g. "The cod Introduction to the code example... -*[compliant01.py](compliant01.py):* +_[compliant01.py](compliant01.py):_ ```python """Compliant Code Example""" From ac80b10686c8aac9848e6016e5696f5989934b26 Mon Sep 17 00:00:00 2001 From: Helge Wehder Date: Tue, 22 Jul 2025 12:15:34 +0100 Subject: [PATCH 6/7] removing wrongly merged file Signed-off-by: Helge Wehder --- .../CWE-703/CWE-476/compliant03.py | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 docs/Secure-Coding-Guide-for-Python/CWE-703/CWE-476/compliant03.py diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-703/CWE-476/compliant03.py b/docs/Secure-Coding-Guide-for-Python/CWE-703/CWE-476/compliant03.py deleted file mode 100644 index d63fd463..00000000 --- a/docs/Secure-Coding-Guide-for-Python/CWE-703/CWE-476/compliant03.py +++ /dev/null @@ -1,18 +0,0 @@ -# SPDX-FileCopyrightText: OpenSSF project contributors -# SPDX-License-Identifier: MIT -"""Compliant Code Example""" - - -def print_number_of_students(classroom: list[str]): - """Print the number of students in a classroom""" - if not isinstance(classroom, list): - raise ValueError("classroom is not a list") - # TODO: also check each entry - print(f"The classroom has {len(classroom)} students.") - - -##################### -# Attempting to exploit above code example -##################### -print_number_of_students(["student 1", "student 2", "Student 3"]) -print_number_of_students(None) From 0f36af2178a47294d96f64be63e318153e23fcd2 Mon Sep 17 00:00:00 2001 From: Helge Wehder Date: Tue, 22 Jul 2025 12:34:42 +0100 Subject: [PATCH 7/7] added dummy 209 to allow referenceing and linking, updated template with suggested changes Signed-off-by: Helge Wehder --- .../CWE-664/CWE-209/README.md | 3 +++ .../CWE-664/CWE-409/README.md | 18 +++++++++++++----- .../CWE-664/CWE-409/compliant01.py | 2 +- .../templates/README_TEMPLATE.md | 3 ++- 4 files changed, 19 insertions(+), 7 deletions(-) create mode 100644 docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-209/README.md diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-209/README.md b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-209/README.md new file mode 100644 index 00000000..a4734934 --- /dev/null +++ b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-209/README.md @@ -0,0 +1,3 @@ +# CWE-209: Generation of Error Message Containing Sensitive Information + +Dummy file to be repaced during PR diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md index 059a487b..03af28b5 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md +++ b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md @@ -100,7 +100,7 @@ Prefabricated zip bombs and zip slip archives for testing can be found on: [[por ## Non-Compliant Code Example - No File Validation -The `noncompliant01.py` example simply extracts all the files in the archive without performing any verification. The `extractall()` method will attempt to normalize the path name. Any archive from an untrusted source should be inspected prior to extraction. There is no attempt to control where the files are extracted, which is the script current working directory. +The `extractall()` method in `noncompliant01.py` will attempt to normalize the path name while making no attempt to control where the files are extracted to. The script uses the current working directory as a starting point and allows to escape the default path. Any archive from an untrusted source must be inspected prior to extraction and extracted forced below a specific path in order to prevent traversal attacks. _[noncompliant01.py](noncompliant01.py):_ @@ -117,13 +117,13 @@ with zipfile.ZipFile("zip_attack_test.zip", mode="r") as archive: The `noncompliant01.py` code will extract any quantity of payloads. With a unmodified `example01.py` we get only `4 x 150MB` `zipbombfileX.txt`'s that are much bigger than the `0.58MB` `zip_attack_test.zip` archive. -The directory traversal payload will try to extract a `\Temp\zip_slip_windows.txt` for Windows and a `/tmp/zip_slip_posix.txt` for Unix based systems. Depending on the zip library in use the files may either end up in their indented target, under the same directory as the `zipbombfile.txt` files, or not at all. +The directory traversal payload will try to extract a `\Temp\zip_slip_windows.txt` for Windows and a `/tmp/zip_slip_posix.txt` for Unix based systems. Depending on the zip library in use the files may either end up in their intended target, under the same directory as the `zipbombfile.txt` files, or not at all. ## Non-Compliant Code Example - Incorrect File Validation Experiment with the code by varying the `MAXSIZE`. -The `noncompliant02.py` code example tries to check the file_size from the `ZipInfo` instances provided by the `infolist()` method from `ZipFile`. This information is read from the `zip` archive metadata, so it is not reliable and can be forged by an attacker. The `extract()` method will attempt to normalize the path name. Again, there is no attempt to control where the files are extracted to in order to prevent traversal attacks. The underlaying zip library may or may not prevent traversal attacks. +The `noncompliant02.py` code example tries to check the `file_size` from the `ZipInfo` instances provided by the `infolist()` method from `ZipFile`. This information is read from the `zip` archive metadata, so it is not reliable and can be forged by an attacker. The `extract()` method will attempt to normalize the path name. Again, there is no attempt to control where the files are extracted to in order to prevent traversal attacks. The underlying zip library may or may not prevent traversal attacks. _[noncompliant02.py](noncompliant02.py):_ @@ -146,7 +146,7 @@ with zipfile.ZipFile("zip_attack_test.zip", mode="r") as archive: ``` -Depending on the underlaying zip library we should see `noncompliant02.py` prevent a zip bomb but not a traversal attack. +Depending on the underlying zip library we should see `noncompliant02.py` prevent a zip bomb but not a traversal attack. __Example `noncompliant02.py` output:__ @@ -227,7 +227,7 @@ def extract_files(filepath: str, base_path: str, exist_ok: bool = True): ZipExtractException: If there are to big files ZipExtractException: If a directory traversal is detected """ - # TODO: avoid exposing sensitive data to a lesser trusted entity via errors + # TODO: avoid CWE-209: Generation of Error Message Containing Sensitive Information with zipfile.ZipFile(filepath, mode="r") as archive: # limit number of files: if len(archive.infolist()) > MAXAMT: @@ -315,10 +315,18 @@ The `compliant01.py` code will extract everything below the provided `base_path` MITRE CWE Base: CWE-180: Incorrect behavior order: Validate before Canonicalize + + MITRE CWE + Base: CWE-209: Generation of Error Message Containing Sensitive Information + Secure Coding in Python Base: CWE-180: Incorrect behavior order: Validate before Canonicalize + + Secure Coding in Python + Base: CWE-209: Generation of Error Message Containing Sensitive Information + [SEI CERT Oracle Coding Standard for Java] IDS04-J, Safely extract files from ZipInputStream diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/compliant01.py b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/compliant01.py index 41173bd9..61c2be38 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/compliant01.py +++ b/docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/compliant01.py @@ -47,7 +47,7 @@ def extract_files(filepath: str, base_path: str, exist_ok: bool = True): ZipExtractException: If there are to big files ZipExtractException: If a directory traversal is detected """ - # TODO: avoid exposing sensitive data to a lesser trusted entity via errors + # TODO: avoid CWE-209: Generation of Error Message Containing Sensitive Information with zipfile.ZipFile(filepath, mode="r") as archive: # limit number of files: diff --git a/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md b/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md index 988566dc..bcea3cd7 100644 --- a/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md +++ b/docs/Secure-Coding-Guide-for-Python/templates/README_TEMPLATE.md @@ -96,7 +96,7 @@ Short explanation of expected outcome of running the code example, e.g. "The cod MITRE CWE - Base: [CWE-1335: Incorrect Bitwise Shift of Integer (4.12)] + Base or Class (choose which one it is based on the abstraction on the CWE page): [CWE-1335: Incorrect Bitwise Shift of Integer (4.12)] [SEI CERT Oracle Coding Standard for Java] @@ -120,3 +120,4 @@ Short explanation of expected outcome of running the code example, e.g. "The cod CERT C Coding Standard [online]. Available from: https://www.securecoding.cert.org/confluence/display/seccode/CERT+C+Coding+Standard [Accessed 6 May 2025] +When writing bibliography, follow the [Harvard reference guide](https://dkit.ie.libguides.com/harvard/citing-referencing)