-
-
Notifications
You must be signed in to change notification settings - Fork 32.6k
gh-117779: Fix reading duplicated entries in zipfile by name #129254
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2415,7 +2415,36 @@ def test_decompress_without_3rd_party_library(self): | |
self.assertRaises(RuntimeError, zf.extract, 'a.txt') | ||
|
||
@requires_zlib() | ||
def test_full_overlap(self): | ||
def test_full_overlap_different_names(self): | ||
data = ( | ||
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' | ||
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00b\xed' | ||
b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P' | ||
b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2' | ||
b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00' | ||
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK' | ||
b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' | ||
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00' | ||
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05' | ||
b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00' | ||
b'\x00\x00\x00' | ||
) | ||
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: | ||
self.assertEqual(zipf.namelist(), ['a', 'b']) | ||
zi = zipf.getinfo('a') | ||
self.assertEqual(zi.header_offset, 0) | ||
self.assertEqual(zi.compress_size, 16) | ||
self.assertEqual(zi.file_size, 1033) | ||
zi = zipf.getinfo('b') | ||
self.assertEqual(zi.header_offset, 0) | ||
self.assertEqual(zi.compress_size, 16) | ||
self.assertEqual(zi.file_size, 1033) | ||
self.assertEqual(len(zipf.read('b')), 1033) | ||
with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'): | ||
zipf.read('a') | ||
|
||
@requires_zlib() | ||
def test_full_overlap_different_names2(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would have given this function a different name, something that distinguishes it from the non-2 version... or at the very least add a docstring explaining what it's asserting. Future readers are going to have a hard time discerning the motivations for these two tests based on There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These tests are very similar, the only difference is what central directory name matches the local header name -- the first or the second. I hope it will be clear from the comments. |
||
data = ( | ||
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' | ||
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed' | ||
|
@@ -2439,9 +2468,43 @@ def test_full_overlap(self): | |
self.assertEqual(zi.header_offset, 0) | ||
self.assertEqual(zi.compress_size, 16) | ||
self.assertEqual(zi.file_size, 1033) | ||
self.assertEqual(len(zipf.read('a')), 1033) | ||
with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'): | ||
zipf.read('b') | ||
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm: | ||
self.assertEqual(len(zipf.read('a')), 1033) | ||
self.assertEqual(cm.filename, __file__) | ||
|
||
@requires_zlib() | ||
def test_full_overlap_same_name(self): | ||
data = ( | ||
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' | ||
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed' | ||
b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P' | ||
b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2' | ||
b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00' | ||
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK' | ||
b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' | ||
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00' | ||
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK\x05' | ||
b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00' | ||
b'\x00\x00\x00' | ||
) | ||
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: | ||
self.assertEqual(zipf.namelist(), ['a', 'a']) | ||
self.assertEqual(len(zipf.infolist()), 2) | ||
zi = zipf.getinfo('a') | ||
self.assertEqual(zi.header_offset, 0) | ||
self.assertEqual(zi.compress_size, 16) | ||
self.assertEqual(zi.file_size, 1033) | ||
self.assertEqual(len(zipf.read('a')), 1033) | ||
self.assertEqual(len(zipf.read(zi)), 1033) | ||
self.assertEqual(len(zipf.read(zipf.infolist()[1])), 1033) | ||
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm: | ||
self.assertEqual(len(zipf.read(zipf.infolist()[0])), 1033) | ||
self.assertEqual(cm.filename, __file__) | ||
with self.assertWarnsRegex(UserWarning, 'Overlapped entries') as cm: | ||
zipf.open(zipf.infolist()[0]).close() | ||
self.assertEqual(cm.filename, __file__) | ||
|
||
@requires_zlib() | ||
def test_quoted_overlap(self): | ||
|
@@ -2474,6 +2537,47 @@ def test_quoted_overlap(self): | |
zipf.read('a') | ||
self.assertEqual(len(zipf.read('b')), 1033) | ||
|
||
@requires_zlib() | ||
def test_overlap_with_central_dir(self): | ||
data = ( | ||
b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z' | ||
b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00' | ||
b'\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x81\x00\x00\x00\x00aP' | ||
b'K\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00/\x00\x00\x00\x00' | ||
b'\x00\x00\x00\x00\x00' | ||
) | ||
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: | ||
self.assertEqual(zipf.namelist(), ['a']) | ||
self.assertEqual(len(zipf.infolist()), 1) | ||
zi = zipf.getinfo('a') | ||
self.assertEqual(zi.header_offset, 0) | ||
self.assertEqual(zi.compress_size, 11) | ||
self.assertEqual(zi.file_size, 1033) | ||
with self.assertRaisesRegex(zipfile.BadZipFile, 'Bad magic number'): | ||
zipf.read('a') | ||
|
||
@requires_zlib() | ||
def test_overlap_with_archive_comment(self): | ||
data = ( | ||
b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z' | ||
b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00' | ||
b'\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x81E\x00\x00\x00aP' | ||
b'K\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00/\x00\x00\x00\x00' | ||
b'\x00\x00\x00*\x00' | ||
b'PK\x03\x04\x14\x00\x00\x00\x08\x00G_|Z\xe2\x1e' | ||
b'8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00aK' | ||
b'L\x1c\x05\xa3`\x14\x8cx\x00\x00' | ||
) | ||
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: | ||
self.assertEqual(zipf.namelist(), ['a']) | ||
self.assertEqual(len(zipf.infolist()), 1) | ||
zi = zipf.getinfo('a') | ||
self.assertEqual(zi.header_offset, 69) | ||
self.assertEqual(zi.compress_size, 11) | ||
self.assertEqual(zi.file_size, 1033) | ||
with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'): | ||
zipf.read('a') | ||
|
||
def tearDown(self): | ||
unlink(TESTFN) | ||
unlink(TESTFN2) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Fix reading duplicated entries in :mod:`zipfile` by name. | ||
Reading duplicated entries (except the last one) by ``ZipInfo`` | ||
now emits a warning instead of raising an exception. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ideally, binary data like this should be accompanied by an explanation of what it represents and ideally how it was constructed. Should this test start failing, how will someone know what this zip file represents?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point. I'll add comments. Please see #137152.
The data was created manually. I created simple ZIP files using the
zip
command, and then edited them -- removed or moved the data, changed file names, updated offsets. I tested how it worked with unpatchedzipfile
(all was read successfully).Only the data for test_quoted_overlap was generated by a script provided in the original report, and then simplified.