Skip to content

pySCG pillar 664 base 409 #944

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
355 changes: 355 additions & 0 deletions docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/README.md

Large diffs are not rendered by default.

122 changes: 80 additions & 42 deletions docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/compliant01.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,101 @@
# SPDX-FileCopyrightText: OpenSSF project contributors
# SPDX-License-Identifier: MIT
""" Compliant Code Example """
# SPDX-FileCopyrightText: OpenSSF project contributors
# SPDX-License-Identifier: MIT
"""Compliant Code Example"""

import zipfile
from pathlib import Path

MAXSIZE = 100 * 1024 * 1024 # limit is in bytes
MAXAMT = 5 # max amount of files, includes directories in the archive
MAXAMT = 1000 # max amount of files, includes directories in the archive


class ZipExtractException(Exception):
"""Custom Exception"""
Comment on lines 14 to 15
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't we use the built-in ValueError instead of creating a custom exception with no distinct characteristics?



def path_validation(input_path, base_path, permit_subdirs=True):
"""Ensure to have only allowed path names"""
test_path = (Path(base_path) / input_path).resolve()
if permit_subdirs:
if not Path(base_path).resolve() in test_path.resolve().parents:
raise ZipExtractException(f"Filename {test_path} not in {Path(base_path)} directory")
else:
if test_path.parent != Path(base_path).resolve():
raise ZipExtractException(f"Filename {test_path} not in {Path(base_path)} directory")
def path_validation(filepath: Path, base_path: Path):
"""Ensure to have only allowed path names

Args:
filepath (Path): path to archive
base_path (Path): path to folder for extracting archives

Raises:
ZipExtractException: if a directory traversal is detected
"""
input_path_resolved = (base_path / filepath).resolve()
base_path_resolved = base_path.resolve()

if not str(input_path_resolved).startswith(str(base_path_resolved)):
raise ZipExtractException(
f"Filename {str(input_path_resolved)} not in {str(base_path)} directory"
)


def extract_files(filepath: str, base_path: str, exist_ok: bool = True):
"""Extract archive below base_path

def extract_files(file, base_path):
"""Unpack zip file into base_path"""
with zipfile.ZipFile(file, mode="r") as archive:
dirs = []
# Validation:
Args:
filepath (str): path to archive
base_path (str): path to folder for extracting archives
exist_ok (bool, optional): Overwrite existing. Defaults to True.

Raises:
ZipExtractException: If there are to many files
ZipExtractException: If there are to big files
ZipExtractException: If a directory traversal is detected
"""
# TODO: avoid exposing sensitive data to a lesser trusted entity via errors
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could reference CWE-209 to give an example of how to do it once that rule gets merged :D


with zipfile.ZipFile(filepath, mode="r") as archive:
# limit number of files:
if len(archive.infolist()) > MAXAMT:
raise ZipExtractException(f"Metadata check: too many files, limit is {MAXAMT}")
for zm in archive.infolist():
if zm.file_size > MAXSIZE:
raise ZipExtractException(f"Metadata check: {zm.filename} is too big, limit is {MAXSIZE}")
path_validation(zm.filename, base_path)
with archive.open(zm.filename, mode='r') as mte:
read_data = mte.read(MAXSIZE + 1)
if len(read_data) > MAXSIZE:
raise ZipExtractException(f"File {zm.filename} bigger than {MAXSIZE}")
raise ZipExtractException(
f"Metadata check: too many files, limit is {MAXAMT}"
)

if not Path(base_path).resolve().exists():
Path(base_path).resolve().mkdir(exist_ok=True)
# validate by iterating over meta data:
for item in archive.infolist():
# limit file size using meta data:
if item.file_size > MAXSIZE:
raise ZipExtractException(
f"Metadata check: {item.filename} is bigger than {MAXSIZE}"
)

for zm in archive.infolist():
# Extraction - create directories
if zm.is_dir():
dirs.append(Path(base_path).resolve().joinpath(zm.filename))
path_validation(Path(item.filename), Path(base_path))

for directory in dirs:
Path.mkdir(directory)
# create target folder
Path(base_path).mkdir(exist_ok=exist_ok)

# preparing for extraction, need to create directories first
# as they may come in random order to the files
for item in archive.infolist():
if item.is_dir():
xpath = Path(base_path).joinpath(item.filename).resolve()
xpath.mkdir(exist_ok=exist_ok)

# start of extracting files:
for item in archive.infolist():
if item.is_dir():
continue
# we got a file
with archive.open(item.filename, mode="r") as filehandle:
read_data = filehandle.read(MAXSIZE + 1)
if len(read_data) > MAXSIZE:
# meta data was lying to us, actual size is bigger:
raise ZipExtractException(
f"Reality check, {item.filename} bigger than {MAXSIZE}"
)
xpath = Path(base_path).joinpath(filehandle.name).resolve()
with open(xpath, mode="wb") as filehandle:
filehandle.write(read_data)
print(f"extracted successfully below {base_path}")

for zm in archive.infolist():
with archive.open(zm.filename, mode='r') as mte:
xpath = Path(base_path).joinpath(mte.name).resolve()
print(f"Writing file {xpath}")
# Skip if directory
if xpath not in dirs: # check if file is a directory
with open(xpath, mode="wb") as filehandle:
filehandle.write(read_data)

#####################
# Trying to exploit above code example
#####################

extract_files("zip_attack_test.zip", "ziptemp")
extract_files("zip_attack_test.zip", "ziptemp")
108 changes: 68 additions & 40 deletions docs/Secure-Coding-Guide-for-Python/CWE-664/CWE-409/example01.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,72 @@
# SPDX-FileCopyrightText: OpenSSF project contributors
# SPDX-License-Identifier: MIT
"""Code to create a simple zip bomb in python for test purposes"""
import zipfile

import os
import sys

ZIPBOMB = "zipbombfile.txt"
ZIPTRAVERSAL = "zipslipfile.txt"
ZIPFILENAME = "zip_attack_test.zip"

# preparing zip bomb file
with open(ZIPBOMB, 'w', encoding="utf-8") as filehandle:
for line in range(1023 * 128):
sys.stdout.write(f"Preparing bombfile by writing lines of zero's to {ZIPBOMB}: {line}\r")
sys.stdout.flush()
filehandle.write("0" * 1023 + "\n")
filehandle.close()
filesize = os.path.getsize(ZIPBOMB) / float(1024 * 1024)
print(f"\n{ZIPBOMB} : {filesize:.2f} MB")

# preparing zip slip file
with open(ZIPTRAVERSAL, 'a', encoding="utf-8") as filehandle:
filehandle.write("Testfile, filename shows if its good or evil")
filehandle.close()
traversal_files = ["zip_normal.txt"]
traversal_files.append("../" * 39 + "tmp/zip_slip_posix.txt")
traversal_files.append(r"..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\..\Temp\zip_slip_windows.txt")

with zipfile.ZipFile(ZIPFILENAME, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
for clone in range(4):
print(f"Adding {ZIPBOMB + str(clone)} as {ZIPFILENAME}")
zf.write(ZIPBOMB, ZIPBOMB + str(clone))
print("Adding multiple zip slip file's:")
for item in traversal_files:
print(f"Adding traversal attack file TRAVERSAL_FILE as {item}")
zf.write(ZIPTRAVERSAL, item)

print(f"Removing temporary files: {ZIPBOMB}, {ZIPTRAVERSAL}")
os.remove(ZIPBOMB)
os.remove(ZIPTRAVERSAL)

filesize = os.path.getsize(ZIPFILENAME) / float(1024 * 1024)
print(f"\n{ZIPFILENAME} : {filesize:.2f} MB")
import zipfile


def create_zip_slip_and_bomb(zip_filename: str):
"""Create a zip file with zip slip and zip bomb content for testing

Args:
zip_filename (str): name of zip file
"""
with zipfile.ZipFile(zip_filename, "w", compression=zipfile.ZIP_DEFLATED) as zf:
# Adding a normal safe file at the start
zip_normal = "safe_dir/zip_normal.txt"
print(f"Adding a harmless normal file {zip_normal}")
zf.writestr(zip_normal, b"Just a safe file\n")

print("Creating zip_slip example files")
#####################################################
# Zip Slip attempts:
# - file with path traversal for unix
slip_file_name = "../" * 10 + "tmp/zip_slip_posix.txt"
slip_file_data = b"This test file tries to escape the extraction directory!\n"
zf.writestr(slip_file_name, slip_file_data)

# - File with path traversal for Windows
# Internally we have zip use slash, even for Windows
slip_file_name = "../" * 10 + "tmp/zip_slip_windows0.txt"
zf.writestr(slip_file_name, slip_file_data)

# - File with path traversal for Windows
# Old extractors mishandle backslash
slip_file_name = ".." * 10 + "Temp\\zip_slip_windows1.txt"
zf.writestr(slip_file_name, slip_file_data)

# - Traversal attack with mixed slashes for Windows
# Old extractors mishandling mixed slashes
slip_file_name = "..\\/" * 10 + "Temp\\zip_slip_windows2.txt"
zf.writestr(slip_file_name, slip_file_data)

# - Absolute path with drive letter for Windows
slip_file_name = "C:/Temp/zip_slip_windows3.txt"
zf.writestr(slip_file_name, slip_file_data)

##################################################
# Zip Bomb Attempts:
# - With 150MB files filled with zeros
bomb_uncompressed_size = 150 * 1024 * 1024 # 150 MB
large_data = b"\0" * bomb_uncompressed_size
filename = "zipbombfile"

# - trying to fake the metadata size to 1KB for the first one
print(f"trying to add fake 1KB metadata for {filename}0.txt")
info = zipfile.ZipInfo(f"{filename}0.txt)")
info.compress_type = zipfile.ZIP_DEFLATED
info.file_size = 1024 # 1 KB (fake size)
zf.writestr(f"{filename}0.txt", large_data)

# - Some more large files
print("Writing more large zipbombfile's")
for i in range(1, 4):
zf.writestr(f"{filename}{i}.txt", large_data)

filesize = os.path.getsize(zip_filename) / float(1024 * 1024)
print(f"created \n{zip_filename} : {filesize:.2f} MB")


if __name__ == "__main__":
create_zip_slip_and_bomb("zip_attack_test.zip")
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: OpenSSF project contributors
# SPDX-License-Identifier: MIT
""" Non-compliant Code Example """
"""Non-compliant Code Example"""

import zipfile

with zipfile.ZipFile("zip_attack_test.zip", mode="r") as archive:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# SPDX-FileCopyrightText: OpenSSF project contributors
# SPDX-License-Identifier: MIT
""" Non-compliant Code Example """
"""Non-compliant Code Example"""

import zipfile

MAXSIZE = 100 * 1024 * 1024 # limit is in bytes
MAXSIZE = 100 * 1024 * 1024 # limit is in bytes

with zipfile.ZipFile("zip_attack_test.zip", mode="r") as archive:
for member in archive.infolist():
Expand Down
18 changes: 18 additions & 0 deletions docs/Secure-Coding-Guide-for-Python/CWE-703/CWE-476/compliant03.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file seems unrelated to the pull request. I can't find this CWE on the main branch. Was it added by accident or will the readme and the other code examples be added later?

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# SPDX-FileCopyrightText: OpenSSF project contributors
# SPDX-License-Identifier: MIT
"""Compliant Code Example"""


def print_number_of_students(classroom: list[str]):
"""Print the number of students in a classroom"""
if not isinstance(classroom, list):
raise ValueError("classroom is not a list")
# TODO: also check each entry
print(f"The classroom has {len(classroom)} students.")


#####################
# Attempting to exploit above code example
#####################
print_number_of_students(["student 1", "student 2", "Student 3"])
print_number_of_students(None)
2 changes: 1 addition & 1 deletion docs/Secure-Coding-Guide-for-Python/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ It is __not production code__ and requires code-style or python best practices t
|[CWE-197: Numeric Truncation Error](CWE-664/CWE-197/README.md)||
|[CWE-197: Control rounding when converting to less precise numbers](CWE-664/CWE-197/01/README.md)||
|[CWE-400: Uncontrolled Resource Consumption](CWE-664/CWE-400/README.md)||
|[CWE-409: Improper Handling of Highly Compressed Data (Data Amplification)](CWE-664/CWE-409/.)||
|[CWE-409: Improper Handling of Highly Compressed Data (Data Amplification)](CWE-664/CWE-409/README.md)||
|[CWE-410: Insufficient Resource Pool](CWE-664/CWE-410/README.md)||
|[CWE-426: Untrusted Search Path](CWE-664/CWE-426/README.md)|[CVE-2015-1326](https://www.cvedetails.com/cve/CVE-2015-1326),<br/>CVSSv3.0: __8.8__,<br/>EPSS: __00.20__ (23.11.2023)|
|[CWE-459: Incomplete Cleanup](CWE-664/CWE-459/README.md)||
Expand Down
Loading