diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 068aa13ed70356..bea5e96ac07df8 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1541,6 +1541,12 @@ def _proc_pax(self, tarfile): except HeaderError as e: raise SubsequentHeaderError(str(e)) from None + # offset_data needs to be stored in case "size" is in pax_headers and + # the next TAR offset needs to be recomputed. next.offset_data may get + # overwritten when parsing sparse files and therefore cannot be used + # directly for the recomputation. + next_offset_data = next.offset_data + # Process GNU sparse information. if "GNU.sparse.map" in pax_headers: # GNU extended sparse format version 0.1. @@ -1563,9 +1569,10 @@ def _proc_pax(self, tarfile): # If the extended header replaces the size field, # we need to recalculate the offset where the next # header starts. - offset = next.offset_data + offset = next_offset_data if next.isreg() or next.type not in SUPPORTED_TYPES: - offset += next._block(next.size) + # Do not use use next.size here because it may contain the real size for sparse files. + offset += next._block(int(pax_headers["size"])) tarfile.offset = offset return next diff --git a/Misc/NEWS.d/next/Library/2025-07-13-12-21-36.gh-issue-136602.9Q67Ns.rst b/Misc/NEWS.d/next/Library/2025-07-13-12-21-36.gh-issue-136602.9Q67Ns.rst new file mode 100644 index 00000000000000..462378974fcb26 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-13-12-21-36.gh-issue-136602.9Q67Ns.rst @@ -0,0 +1,2 @@ +Fix wrong ``offset_data`` being used for sparse files with an additional +``size`` PAX keyword.