py-pdf · stefan6419846 · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026
diff --git a/docs/user/security.md b/docs/user/security.md
@@ -18,6 +18,9 @@ aware of the possible side effects, you can modify the following constants which
 For JBIG2 images, there is a similar parameter to limit the memory usage during decoding: `pypdf.filters.JBIG2_MAX_OUTPUT_LENGTH`
 It defaults to 75 MB as well.
 
+For all streams, the maximum allowed value for the `/Length` field is limited to `pypdf.filters.MAX_DECLARED_STREAM_LENGTH`, which
+defaults to 75 MB as well.
+
 For the *FlateDecode* filter, the number of bytes to attempt recovery with can be set by `pypdf.filters.ZLIB_MAX_RECOVERY_INPUT_LENGTH`.
 It defaults to 5 MB due to the much more complex recovery approach.
 

diff --git a/pypdf/filters.py b/pypdf/filters.py
@@ -72,6 +72,8 @@
     is_null_or_none,
 )
 
+MAX_DECLARED_STREAM_LENGTH = 75_000_000
+
 JBIG2_MAX_OUTPUT_LENGTH = 75_000_000
 LZW_MAX_OUTPUT_LENGTH = 75_000_000
 RUN_LENGTH_MAX_OUTPUT_LENGTH = 75_000_000

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
@@ -63,7 +63,7 @@
 from ..constants import StreamAttributes as SA
 from ..constants import TypArguments as TA
 from ..constants import TypFitArguments as TF
-from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError
+from ..errors import STREAM_TRUNCATED_PREMATURELY, LimitReachedError, PdfReadError, PdfStreamError
 from ._base import (
     BooleanObject,
     ByteStringObject,
@@ -637,6 +637,10 @@ def read_from_stream(
                 length = -1
             pstart = stream.tell()
             if length >= 0:
+                from ..filters import MAX_DECLARED_STREAM_LENGTH  # noqa: PLC0415
+                if length > MAX_DECLARED_STREAM_LENGTH:
+                    raise LimitReachedError(f"Declared stream length of {length} exceeds maximum allowed length.")
+
                 data["__streamdata__"] = stream.read(length)
             else:
                 data["__streamdata__"] = read_until_regex(

diff --git a/tests/generic/test_data_structures.py b/tests/generic/test_data_structures.py
@@ -1,12 +1,23 @@
 """Test the pypdf.generic._data_structures module."""
+import os
+import subprocess
+import sys
 from io import BytesIO
+from pathlib import Path
+from typing import Callable
 
 import pytest
 
 from pypdf import PdfReader, PdfWriter
+from pypdf.errors import LimitReachedError
 from pypdf.generic import DictionaryObject, NameObject, RectangleObject, TreeObject
 from tests import RESOURCE_ROOT, get_data_from_url
 
+try:
+    import resource
+except ImportError:
+    resource = None
+
 
 def test_dictionary_object__get_next_object_position():
     reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
@@ -54,3 +65,107 @@ def test_array_object__clone_same_object_multiple_times(caplog):
         page2 = writer.add_page(page)
         assert page2.mediabox == RectangleObject((0, 0, 595, 841))
     assert caplog.messages == []
+
+
+@pytest.mark.enable_socket
+def test_dictionary_object__read_from_stream__limit():
+    name = "read_from_stream__length_2gb.pdf"
+    url = "https://github.com/user-attachments/files/25842437/read_from_stream__length_2gb.pdf"
+
+    reader = PdfReader(BytesIO(get_data_from_url(url=url, name=name)))
+    page = reader.pages[0]
+
+    with pytest.raises(
+            expected_exception=LimitReachedError,
+            match=r"^Declared stream length of 2147483647 exceeds maximum allowed length\.$"
+    ):
+        page.extract_text()
+
+
+def _prepare_test_dictionary_object__read_from_stream__no_limit(
+        path: Path
+) -> tuple[str, dict[str, str], Callable[[], None]]:
+    env = os.environ.copy()
+    env["COVERAGE_PROCESS_START"] = "pyproject.toml"
+
+    name = "read_from_stream__length_2gb.pdf"
+    url = "https://github.com/user-attachments/files/25842437/read_from_stream__length_2gb.pdf"
+    data = get_data_from_url(url=url, name=name)
+    pdf_path = path / name
+    pdf_path.write_bytes(data)
+    pdf_path_str = pdf_path.resolve().as_posix()
+
+    try:
+        env["PYTHONPATH"] = "." + os.pathsep + env["PYTHONPATH"]
+    except KeyError:
+        env["PYTHONPATH"] = "."
+
+    def limit_virtual_memory() -> None:
+        limit_kb = 1_000_000
+        limit_bytes = limit_kb * 1024
+        resource.setrlimit(resource.RLIMIT_AS, (limit_bytes, limit_bytes))
+
+    return pdf_path_str, env, limit_virtual_memory
+
+
+@pytest.mark.enable_socket
+@pytest.mark.skipif(condition=resource is None, reason="Does not have 'resource' module.")
+@pytest.mark.skipif(sys.platform == "darwin", reason="RLIMIT_AS is unreliable.")
+def test_dictionary_object__read_from_stream__no_limit(tmp_path):
+    pdf_path_str, env, limit_virtual_memory = _prepare_test_dictionary_object__read_from_stream__no_limit(tmp_path)
+
+    source_file = tmp_path / "script.py"
+    source_file.write_text(
+        f"""
+import sys
+from pypdf import filters, PdfReader
+
+filters.MAX_DECLARED_STREAM_LENGTH = sys.maxsize
+
+with open({pdf_path_str!r}, mode="rb") as fd:
+    reader = PdfReader(fd)
+    print(reader.pages[0].extract_text())
+"""
+    )
+
+    result = subprocess.run(  # noqa: S603  # We have the control here.
+        [sys.executable, source_file],
+        capture_output=True,
+        env=env,
+        text=True,
+        preexec_fn=limit_virtual_memory,
+    )
+    assert result.returncode == 1
+    assert result.stdout == ""
+    assert result.stderr.replace("\r", "").endswith("\nMemoryError\n")
+
+
+@pytest.mark.enable_socket
+@pytest.mark.skipif(condition=resource is None, reason="Does not have 'resource' module.")
+@pytest.mark.skipif(sys.platform == "darwin", reason="RLIMIT_AS is unreliable.")
+def test_dictionary_object__read_from_stream__no_limit__path(tmp_path):
+    pdf_path_str, env, limit_virtual_memory = _prepare_test_dictionary_object__read_from_stream__no_limit(tmp_path)
+
+    source_file = tmp_path / "script.py"
+    source_file.write_text(
+        f"""
+import sys
+from pypdf import filters, PdfReader
+
+filters.MAX_DECLARED_STREAM_LENGTH = sys.maxsize
+
+reader = PdfReader({pdf_path_str!r})
+print(reader.pages[0].extract_text())
+"""
+    )
+
+    result = subprocess.run(  # noqa: S603  # We have the control here.
+        [sys.executable, source_file],
+        capture_output=True,
+        env=env,
+        text=True,
+        preexec_fn=limit_virtual_memory,
+    )
+    assert result.returncode == 0
+    assert result.stdout.replace("\r", "") == "Hello from pypdf\n"
+    assert result.stderr == ""