|
8 | 8 | import pypdf.xmp |
9 | 9 | from pypdf import PdfReader, PdfWriter |
10 | 10 | from pypdf.errors import PdfReadError, XmpDocumentError |
11 | | -from pypdf.generic import NameObject, StreamObject |
| 11 | +from pypdf.generic import ContentStream, NameObject, StreamObject |
12 | 12 | from pypdf.xmp import XmpInformation |
13 | 13 |
|
14 | 14 | from . import RESOURCE_ROOT, SAMPLE_ROOT, get_data_from_url |
@@ -887,3 +887,57 @@ def test_xmp_information__create_and_set_metadata(): |
887 | 887 | assert xmp.dc_contributor == ["test1"] |
888 | 888 | assert xmp.dc_creator == ["test2"] |
889 | 889 | assert xmp.dc_title == {"x-default": "test3"} |
| 890 | + |
| 891 | + |
| 892 | +def test_xmp_information__external_entity_expansion(tmpdir): |
| 893 | + path = tmpdir / "secret.txt" |
| 894 | + path.write("VERY SECRET") |
| 895 | + |
| 896 | + stream = ContentStream(pdf=None, stream=None) |
| 897 | + stream.set_data(f"""<?xml version="1.0"?> |
| 898 | +<!DOCTYPE foo [ |
| 899 | + <!ENTITY xxe SYSTEM "file://{path}"> |
| 900 | +]> |
| 901 | +<x:xmpmeta xmlns:x="adobe:ns:meta/"> |
| 902 | + <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> |
| 903 | + <rdf:Description rdf:about=""> |
| 904 | + <dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">&xxe;abc</dc:creator> |
| 905 | + </rdf:Description> |
| 906 | + </rdf:RDF> |
| 907 | +</x:xmpmeta>""".encode()) |
| 908 | + |
| 909 | + xmp = XmpInformation(stream) |
| 910 | + assert xmp.dc_creator == ["abc"] |
| 911 | + |
| 912 | + |
| 913 | +@pytest.mark.timeout(10) |
| 914 | +def test_xmp_information__exponential_entity_expansion(): |
| 915 | + stream = ContentStream(pdf=None, stream=None) |
| 916 | + stream.set_data(b"""<?xml version="1.0"?> |
| 917 | +<!DOCTYPE lolz [ |
| 918 | + <!ENTITY lol "lol"> |
| 919 | + <!ENTITY lol2 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;"> |
| 920 | + <!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;"> |
| 921 | + <!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;"> |
| 922 | + <!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;"> |
| 923 | + <!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;"> |
| 924 | + <!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;"> |
| 925 | + <!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;"> |
| 926 | + <!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;"> |
| 927 | +]> |
| 928 | +<x:xmpmeta xmlns:x="adobe:ns:meta/"> |
| 929 | + <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> |
| 930 | + <rdf:Description rdf:about=""> |
| 931 | + <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/">&lol9;</dc:title> |
| 932 | + </rdf:Description> |
| 933 | + </rdf:RDF> |
| 934 | +</x:xmpmeta>""") |
| 935 | + |
| 936 | + with pytest.raises( |
| 937 | + expected_exception=PdfReadError, |
| 938 | + match=( |
| 939 | + r"^XML in XmpInformation was invalid: limit on input amplification factor " |
| 940 | + r"\(from DTD and entities\) breached: line 16, column 60$" |
| 941 | + ) |
| 942 | + ): |
| 943 | + XmpInformation(stream) |
0 commit comments