diff --git a/crates/paksmith-core/src/container/pak/index/entry_header.rs b/crates/paksmith-core/src/container/pak/index/entry_header.rs index 23a891b..ddf62fc 100644 --- a/crates/paksmith-core/src/container/pak/index/entry_header.rs +++ b/crates/paksmith-core/src/container/pak/index/entry_header.rs @@ -14,7 +14,10 @@ use byteorder::{LittleEndian, ReadBytesExt}; use super::compression::{CompressionBlock, CompressionMethod}; use crate::container::pak::version::PakVersion; use crate::digest::Sha1Digest; -use crate::error::{BoundsUnit, EncodedFault, IndexParseFault, OverflowSite, PaksmithError}; +use crate::error::{ + AllocationContext, BoundsUnit, EncodedFault, IndexParseFault, OverflowSite, PaksmithError, + WireField, +}; /// Sanity ceiling on compression block count per entry (~16M blocks of /// 64KiB would be a 1TiB entry). @@ -194,7 +197,7 @@ impl PakEntryHeader { if block_count > MAX_BLOCKS_PER_ENTRY { return Err(PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "block_count", + field: WireField::BlockCount, value: u64::from(block_count), limit: u64::from(MAX_BLOCKS_PER_ENTRY), unit: BoundsUnit::Items, @@ -214,8 +217,9 @@ impl PakEntryHeader { .try_reserve_exact(block_count as usize) .map_err(|source| PaksmithError::InvalidIndex { fault: IndexParseFault::AllocationFailed { - context: "compression blocks", + context: AllocationContext::InlineCompressionBlocks, requested: block_count as usize, + unit: BoundsUnit::Items, source, path: None, }, @@ -414,8 +418,9 @@ impl PakEntryHeader { .try_reserve_exact(block_count as usize) .map_err(|source| PaksmithError::InvalidIndex { fault: IndexParseFault::AllocationFailed { - context: "encoded compression blocks", + context: AllocationContext::EncodedCompressionBlocks, requested: block_count as usize, + unit: BoundsUnit::Items, source, path: None, }, @@ -538,7 +543,7 @@ impl PakEntryHeader { if uncompressed_size > max_uncompressed { return Err(PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "uncompressed_size", + field: WireField::UncompressedSize, value: uncompressed_size, limit: max_uncompressed, unit: BoundsUnit::Bytes, @@ -581,41 +586,40 @@ impl PakEntryHeader { /// past the in-data record into the payload region; a mismatch here would /// silently shift the payload boundary. pub fn matches_payload(&self, payload: &Self, path: &str) -> crate::Result<()> { - let mismatch = - |field: &'static str, idx: String, dat: String| PaksmithError::InvalidIndex { - fault: IndexParseFault::FieldMismatch { - path: path.to_string(), - field, - index_value: idx, - payload_value: dat, - }, - }; + let mismatch = |field: WireField, idx: String, dat: String| PaksmithError::InvalidIndex { + fault: IndexParseFault::FieldMismatch { + path: path.to_string(), + field, + index_value: idx, + payload_value: dat, + }, + }; let lhs = self.common(); let rhs = payload.common(); if lhs.compressed_size != rhs.compressed_size { return Err(mismatch( - "compressed_size", + WireField::CompressedSize, lhs.compressed_size.to_string(), rhs.compressed_size.to_string(), )); } if lhs.uncompressed_size != rhs.uncompressed_size { return Err(mismatch( - "uncompressed_size", + WireField::UncompressedSize, lhs.uncompressed_size.to_string(), rhs.uncompressed_size.to_string(), )); } if lhs.compression_method != rhs.compression_method { return Err(mismatch( - "compression_method", + WireField::CompressionMethod, format!("{:?}", lhs.compression_method), format!("{:?}", rhs.compression_method), )); } if lhs.is_encrypted != rhs.is_encrypted { return Err(mismatch( - "is_encrypted", + WireField::IsEncrypted, lhs.is_encrypted.to_string(), rhs.is_encrypted.to_string(), )); @@ -637,7 +641,7 @@ impl PakEntryHeader { if let (Some(lhs_sha), Some(rhs_sha)) = (self.sha1(), payload.sha1()) { if lhs_sha != rhs_sha { return Err(mismatch( - "sha1", + WireField::Sha1, lhs_sha.short().to_string(), rhs_sha.short().to_string(), )); @@ -678,11 +682,11 @@ impl PakEntryHeader { format!("{} blocks", rhs.compression_blocks.len()), ), }; - return Err(mismatch("compression_blocks", lhs_desc, rhs_desc)); + return Err(mismatch(WireField::CompressionBlocks, lhs_desc, rhs_desc)); } if lhs.compression_block_size != rhs.compression_block_size { return Err(mismatch( - "compression_block_size", + WireField::CompressionBlockSize, lhs.compression_block_size.to_string(), rhs.compression_block_size.to_string(), )); diff --git a/crates/paksmith-core/src/container/pak/index/flat.rs b/crates/paksmith-core/src/container/pak/index/flat.rs index 0f0e662..3aeb915 100644 --- a/crates/paksmith-core/src/container/pak/index/flat.rs +++ b/crates/paksmith-core/src/container/pak/index/flat.rs @@ -17,7 +17,7 @@ use super::compression::CompressionMethod; use super::fstring::read_fstring; use super::{ENTRY_MIN_RECORD_BYTES, PakIndex, PakIndexEntry}; use crate::container::pak::version::PakVersion; -use crate::error::{BoundsUnit, IndexParseFault, PaksmithError}; +use crate::error::{AllocationContext, BoundsUnit, IndexParseFault, PaksmithError, WireField}; // Cross-file `impl PakIndex` block: adds the v3-v9 parser entry point. // The type itself, the version dispatcher, and the shared `from_entries` @@ -46,7 +46,7 @@ impl PakIndex { if u64::from(entry_count) > max_entries { return Err(PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "entry_count", + field: WireField::EntryCount, value: u64::from(entry_count), limit: max_entries, unit: BoundsUnit::Items, @@ -63,8 +63,9 @@ impl PakIndex { .try_reserve_exact(entry_count as usize) .map_err(|source| PaksmithError::InvalidIndex { fault: IndexParseFault::AllocationFailed { - context: "entries", + context: AllocationContext::FlatIndexEntries, requested: entry_count as usize, + unit: BoundsUnit::Items, source, path: None, }, diff --git a/crates/paksmith-core/src/container/pak/index/mod.rs b/crates/paksmith-core/src/container/pak/index/mod.rs index 59c0e1f..99f8997 100644 --- a/crates/paksmith-core/src/container/pak/index/mod.rs +++ b/crates/paksmith-core/src/container/pak/index/mod.rs @@ -39,7 +39,7 @@ use std::io::{Read, Seek, SeekFrom}; use tracing::warn; use crate::container::pak::version::PakVersion; -use crate::error::{IndexParseFault, PaksmithError}; +use crate::error::{AllocationContext, BoundsUnit, IndexParseFault, PaksmithError}; use fstring::read_fstring; @@ -371,8 +371,9 @@ impl PakIndex { seen.try_reserve(entries_len) .map_err(|source| PaksmithError::InvalidIndex { fault: IndexParseFault::AllocationFailed { - context: "dedup tracker for entries", + context: AllocationContext::DedupTracker, requested: entries_len, + unit: BoundsUnit::Items, source, path: None, }, @@ -409,8 +410,9 @@ impl PakIndex { .try_reserve(entries.len()) .map_err(|source| PaksmithError::InvalidIndex { fault: IndexParseFault::AllocationFailed { - context: "by-path lookup entries", + context: AllocationContext::ByPathLookup, requested: entries.len(), + unit: BoundsUnit::Items, source, path: None, }, @@ -463,7 +465,7 @@ mod tests { use super::entry_header::encoded_entry_in_data_record_size; use super::*; use crate::digest::Sha1Digest; - use crate::error::{BoundsUnit, EncodedFault, FStringFault, OverflowSite}; + use crate::error::{BoundsUnit, EncodedFault, FStringFault, OverflowSite, WireField}; // Issue #68: V10+ fixture builder shared with the integration // proptest under `tests/index_proptest.rs`. Gated behind // `__test_utils`, which is auto-enabled during `cargo test` via @@ -2009,7 +2011,7 @@ mod tests { else { panic!("expected InvalidIndex BoundsExceeded with Bytes unit and no path, got: {err:?}") }; - assert_eq!(*field, "uncompressed_size"); + assert_eq!(*field, WireField::UncompressedSize); assert_eq!(*value, 0x10_0000_u64); // 3 blocks × 0x1000 each = 0x3000 cap. assert_eq!(*limit, 0x3000_u64); @@ -2331,7 +2333,7 @@ mod tests { &err, PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "fdi_size", + field: WireField::FdiSize, .. } } @@ -2377,7 +2379,7 @@ mod tests { &err, PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "non_encoded_count", + field: WireField::NonEncodedCount, .. } } @@ -2538,7 +2540,7 @@ mod tests { &err, PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "dir_count", + field: WireField::DirCount, .. }, } @@ -2688,7 +2690,7 @@ mod tests { &err, PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "dir_count", + field: WireField::DirCount, value: 2, limit: 1, .. diff --git a/crates/paksmith-core/src/container/pak/index/path_hash.rs b/crates/paksmith-core/src/container/pak/index/path_hash.rs index 9a611f5..f4ba3d9 100644 --- a/crates/paksmith-core/src/container/pak/index/path_hash.rs +++ b/crates/paksmith-core/src/container/pak/index/path_hash.rs @@ -31,7 +31,9 @@ use super::entry_header::PakEntryHeader; use super::fstring::read_fstring; use super::{ENTRY_MIN_RECORD_BYTES, PakIndex, PakIndexEntry}; use crate::container::pak::version::PakVersion; -use crate::error::{BoundsUnit, EncodedFault, IndexParseFault, PaksmithError}; +use crate::error::{ + AllocationContext, BoundsUnit, EncodedFault, IndexParseFault, PaksmithError, WireField, +}; /// Standalone ceiling on the v10+ FDI region byte size. A real-world /// full directory index for a 100k-file pak is typically a few MB; @@ -87,7 +89,7 @@ impl PakIndex { let index_size_usize = usize::try_from(index_size).map_err(|_| PaksmithError::InvalidIndex { fault: IndexParseFault::U64ExceedsPlatformUsize { - field: "index_size", + field: WireField::IndexSize, value: index_size, path: None, }, @@ -97,8 +99,9 @@ impl PakIndex { .try_reserve_exact(index_size_usize) .map_err(|source| PaksmithError::InvalidIndex { fault: IndexParseFault::AllocationFailed { - context: "bytes for v10+ index", + context: AllocationContext::V10MainIndexBytes, requested: index_size_usize, + unit: BoundsUnit::Bytes, source, path: None, }, @@ -168,7 +171,7 @@ impl PakIndex { let encoded_entries_size_usize = usize::try_from(encoded_entries_size).map_err(|_| PaksmithError::InvalidIndex { fault: IndexParseFault::U64ExceedsPlatformUsize { - field: "encoded_entries_size", + field: WireField::EncodedEntriesSize, value: u64::from(encoded_entries_size), path: None, }, @@ -179,7 +182,7 @@ impl PakIndex { if u64::from(encoded_entries_size) > index_size { return Err(PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "encoded_entries_size", + field: WireField::EncodedEntriesSize, value: u64::from(encoded_entries_size), limit: index_size, unit: BoundsUnit::Bytes, @@ -192,8 +195,9 @@ impl PakIndex { .try_reserve_exact(encoded_entries_size_usize) .map_err(|source| PaksmithError::InvalidIndex { fault: IndexParseFault::AllocationFailed { - context: "bytes for v10+ encoded entries", + context: AllocationContext::V10EncodedEntriesBytes, requested: encoded_entries_size_usize, + unit: BoundsUnit::Bytes, source, path: None, }, @@ -209,7 +213,7 @@ impl PakIndex { if u64::from(non_encoded_count) > max_non_encoded { return Err(PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "non_encoded_count", + field: WireField::NonEncodedCount, value: u64::from(non_encoded_count), limit: max_non_encoded, unit: BoundsUnit::Items, @@ -222,8 +226,9 @@ impl PakIndex { .try_reserve_exact(non_encoded_count as usize) .map_err(|source| PaksmithError::InvalidIndex { fault: IndexParseFault::AllocationFailed { - context: "non-encoded entries for v10+ index", + context: AllocationContext::V10NonEncodedEntries, requested: non_encoded_count as usize, + unit: BoundsUnit::Items, source, path: None, }, @@ -240,7 +245,7 @@ impl PakIndex { if fdi_size > MAX_FDI_BYTES { return Err(PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "fdi_size", + field: WireField::FdiSize, value: fdi_size, limit: MAX_FDI_BYTES, unit: BoundsUnit::Bytes, @@ -252,7 +257,7 @@ impl PakIndex { let fdi_size_usize = usize::try_from(fdi_size).map_err(|_| PaksmithError::InvalidIndex { fault: IndexParseFault::U64ExceedsPlatformUsize { - field: "fdi_size", + field: WireField::FdiSize, value: fdi_size, path: None, }, @@ -262,8 +267,9 @@ impl PakIndex { .try_reserve_exact(fdi_size_usize) .map_err(|source| PaksmithError::InvalidIndex { fault: IndexParseFault::AllocationFailed { - context: "bytes for v10+ full directory index", + context: AllocationContext::V10FdiBytes, requested: fdi_size_usize, + unit: BoundsUnit::Bytes, source, path: None, }, @@ -282,7 +288,7 @@ impl PakIndex { if u64::from(dir_count) > max_dirs_for_fdi { return Err(PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "dir_count", + field: WireField::DirCount, value: u64::from(dir_count), limit: max_dirs_for_fdi, unit: BoundsUnit::Items, @@ -299,7 +305,7 @@ impl PakIndex { if u64::from(file_count) > max_files_for_fdi { return Err(PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "file_count", + field: WireField::FileCount, value: u64::from(file_count), limit: max_files_for_fdi, unit: BoundsUnit::Items, @@ -312,8 +318,9 @@ impl PakIndex { .try_reserve_exact(file_count as usize) .map_err(|source| PaksmithError::InvalidIndex { fault: IndexParseFault::AllocationFailed { - context: "entries for v10+ index", + context: AllocationContext::V10IndexEntries, requested: file_count as usize, + unit: BoundsUnit::Items, source, path: None, }, diff --git a/crates/paksmith-core/src/container/pak/mod.rs b/crates/paksmith-core/src/container/pak/mod.rs index 24c6e82..4778ec7 100644 --- a/crates/paksmith-core/src/container/pak/mod.rs +++ b/crates/paksmith-core/src/container/pak/mod.rs @@ -63,8 +63,8 @@ use tracing::{debug, error, warn}; use crate::container::{ContainerFormat, ContainerReader, EntryFlags, EntryMetadata}; use crate::digest::Sha1Digest; use crate::error::{ - BlockBoundsKind, BoundsUnit, DecompressionFault, HashTarget, IndexParseFault, - OffsetPastFileSizeKind, OverflowSite, PaksmithError, + AllocationContext, BlockBoundsKind, BoundsUnit, DecompressionFault, HashTarget, + IndexParseFault, OffsetPastFileSizeKind, OverflowSite, PaksmithError, WireField, }; use self::footer::PakFooter; @@ -269,7 +269,7 @@ impl PakReader { ); return Err(PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "uncompressed_size", + field: WireField::UncompressedSize, value: uncompressed, limit: MAX_UNCOMPRESSED_ENTRY_BYTES, unit: BoundsUnit::Bytes, @@ -964,18 +964,25 @@ impl PakReader { self.version(), writer, ), - // Already rejected above; unreachable in practice but keep - // the match exhaustive without an opaque _ arm. + // Already rejected at the top of `stream_entry_to`; this + // arm exists to keep the match exhaustive (per CLAUDE.md + // "no panics in core") without an opaque `_` catch-all. + // If we ever reach here, the early-reject path was bypassed + // by a refactor — surface as `InvariantViolated` so an + // operator gets a typed error rather than a panic, and the + // bug is unmistakable in logs. CompressionMethod::Gzip | CompressionMethod::Oodle | CompressionMethod::Zstd | CompressionMethod::Lz4 | CompressionMethod::Unknown(_) - | CompressionMethod::UnknownByName(_) => { - unreachable!( - "unsupported compression method should have been rejected at the top of stream_entry_to" - ) - } + | CompressionMethod::UnknownByName(_) => Err(PaksmithError::InvalidIndex { + fault: IndexParseFault::InvariantViolated { + reason: "stream_entry_to dispatch reached an unsupported \ + CompressionMethod arm — early-reject at top of \ + function was bypassed (see issue #138)", + }, + }), } } } @@ -1027,7 +1034,7 @@ impl ContainerReader for PakReader { let size_usize = usize::try_from(uncompressed_size).map_err(|_| PaksmithError::InvalidIndex { fault: IndexParseFault::U64ExceedsPlatformUsize { - field: "uncompressed_size", + field: WireField::UncompressedSize, value: uncompressed_size, path: Some(path.to_string()), }, @@ -1041,8 +1048,9 @@ impl ContainerReader for PakReader { warn!(path, size = size_usize, error = %source, "output reservation failed"); PaksmithError::InvalidIndex { fault: IndexParseFault::AllocationFailed { - context: "bytes", + context: AllocationContext::EntryPayloadBytes, requested: size_usize, + unit: BoundsUnit::Bytes, source, path: Some(path.to_string()), }, @@ -1208,7 +1216,7 @@ fn stream_zlib_to( let block_len_usize = usize::try_from(block_len).map_err(|_| PaksmithError::InvalidIndex { fault: IndexParseFault::U64ExceedsPlatformUsize { - field: "block_length", + field: WireField::BlockLength, value: block_len, path: Some(path.to_string()), }, @@ -1540,6 +1548,21 @@ impl VerifyStats { /// against the empty-but-hashed-shell substitution attack: an /// attacker who replaces a populated archive with a zero-entry /// archive whose index correctly hashes still fails this check. + /// + /// **Caveat (issue #131):** for v10+ archives, `is_fully_verified() + /// == true` only attests that the FDI/PHI region bytes hash to the + /// SHA-1 values stored in the main-index header. It does NOT prove + /// the FNV-64 keys inside the PHI table correspond to the FDI- + /// derived paths — paksmith currently has no PHI ↔ FDI cross- + /// validation primitive. (To actually exploit this gap, an attacker + /// would also need to rewrite the PHI's stored SHA-1 inside the + /// main index, the main-index hash itself, and whatever footer + /// mechanism authenticates the main-index hash; if all those are + /// under attacker control, the FNV-64-vs-FDI-path mismatch would + /// still go undetected here.) The cross-validation primitive is + /// the Phase-2 hook on top of `path_hash_seed` (#98 + #131); until + /// it lands, treat `is_fully_verified() == true` as "stored hashes + /// match stored bytes," not "the hash table is authoritative." pub fn is_fully_verified(&self) -> bool { // Region state passes if Verified or NotPresent — the latter // is the legitimate "no FDI/PHI in this archive" case for diff --git a/crates/paksmith-core/src/error.rs b/crates/paksmith-core/src/error.rs index 4f41c4e..1f48617 100644 --- a/crates/paksmith-core/src/error.rs +++ b/crates/paksmith-core/src/error.rs @@ -502,11 +502,20 @@ impl fmt::Display for DecompressionFault { /// enough machine-readable context to identify it without parsing a /// human-readable string. Tests can match exhaustively /// (`assert!(matches!(err, PaksmithError::InvalidIndex { fault: -/// IndexParseFault::BoundsExceeded { field: "file_count", .. } }))`) +/// IndexParseFault::BoundsExceeded { field: WireField::FileCount, .. } }))`) /// rather than substring-scanning a `String` reason. /// /// **Display format** mirrors the prior `reason: String` text shapes -/// so operator-visible messages are stable across the refactor. +/// so operator-visible messages are stable across the refactor — with +/// one documented exception: [`Self::AllocationFailed`] gained a +/// `unit: BoundsUnit` field in #133 (so operators can disambiguate +/// "65535 bytes" from "65535 items"), and the rendered text now reads +/// `"could not reserve N {unit} for {context}: {source}"` rather than +/// the pre-#133 `"could not reserve N {context}: {source}"`. Operator +/// log greps anchored on the *full* pre-#133 shape need a one-time +/// update; greps anchored on substrings like +/// `"could not reserve \d+ bytes"` or `"compression blocks"` keep +/// matching. /// /// `#[non_exhaustive]` because new categories will be added as new /// parse paths land (e.g., Phase 2 UAsset parsing); downstream @@ -529,9 +538,8 @@ pub enum IndexParseFault { /// /// Construction invariant (caller-enforced): `value > limit`. BoundsExceeded { - /// Wire-format field name (`"file_count"`, `"block_count"`, - /// `"fdi_size"`, etc.). - field: &'static str, + /// Wire-format field name. Closed set per [`WireField`] (#134). + field: WireField, /// The header-claimed value. value: u64, /// The cap it exceeds. @@ -540,19 +548,24 @@ pub enum IndexParseFault { /// alerts by units rather than parsing the `field` string. unit: BoundsUnit, /// Path of the entry the bound applies to, when the field is - /// per-entry (e.g. `"uncompressed_size"`). `None` for - /// archive-level bounds (e.g. `"fdi_size"`, `"file_count"`). + /// per-entry (e.g. [`WireField::UncompressedSize`]). `None` + /// for archive-level bounds (e.g. [`WireField::FdiSize`]). path: Option, }, /// A `try_reserve` / `try_reserve_exact` call returned `Err`. /// Surfaced rather than letting the allocator abort the process. AllocationFailed { - /// Free-form context label naming what we were reserving - /// (`"v10+ encoded entries"`, `"compression blocks"`, etc.). - context: &'static str, - /// Number of items or bytes (depending on context) we tried - /// to reserve. + /// What was being reserved. Closed set per + /// [`AllocationContext`] (#134). + context: AllocationContext, + /// Number of `unit`s we tried to reserve. Combine with `unit` + /// to get a typed quantity. requested: usize, + /// Unit of `requested` (bytes vs items). Operators sizing + /// budget alerts can't tell from `requested = 65535` alone + /// whether that's "65 KiB" or "65 535 entries × header size." + /// (#133 — sibling parallel of [`Self::BoundsExceeded::unit`].) + unit: BoundsUnit, /// Underlying allocator error, carrying OS-level detail. source: TryReserveError, /// Path of the entry whose payload allocation failed, when @@ -563,13 +576,13 @@ pub enum IndexParseFault { /// A header-claimed `u64` size doesn't fit in `usize` on the /// current platform. Practically a 32-bit-target concern. U64ExceedsPlatformUsize { - /// Wire-format field name. - field: &'static str, + /// Wire-format field name. Closed set per [`WireField`] (#134). + field: WireField, /// The u64 value that didn't fit. value: u64, /// Path of the entry the field applies to, when per-entry - /// (e.g. `"uncompressed_size"`). `None` for archive-level - /// (e.g. `"index_size"`, `"fdi_size"`). + /// (e.g. [`WireField::UncompressedSize`]). `None` for + /// archive-level (e.g. [`WireField::IndexSize`]). path: Option, }, /// Two views of the same entry's metadata (in-data record vs. @@ -579,9 +592,8 @@ pub enum IndexParseFault { FieldMismatch { /// Path of the entry whose records disagreed. path: String, - /// Wire-format field name (`"sha1"`, `"compressed_size"`, - /// `"compression_method"`, etc.). - field: &'static str, + /// Wire-format field name. Closed set per [`WireField`] (#134). + field: WireField, /// Display of the index-header value. index_value: String, /// Display of the in-data-record value. @@ -873,18 +885,200 @@ impl IndexParseFault { } } -/// Unit qualifier for [`IndexParseFault::BoundsExceeded`]. +/// Unit qualifier for [`IndexParseFault::BoundsExceeded`] and +/// [`IndexParseFault::AllocationFailed`]. /// Lets monitoring/dashboards group alerts by unit without parsing -/// the `field` string. +/// the `field` / `context` string. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[non_exhaustive] pub enum BoundsUnit { - /// `value`/`limit` are byte counts. + /// `value`/`limit`/`requested` are byte counts. Bytes, - /// `value`/`limit` are item counts (entries, blocks, slots, etc.). + /// `value`/`limit`/`requested` are item counts (entries, blocks, slots, etc.). Items, } +/// Wire-format field name for [`IndexParseFault`] variants that pin a +/// specific field. Replaces the prior `field: &'static str` stringly-typed +/// pattern (closes #134). +/// +/// Closed set of names rather than `&'static str` so callers and tests +/// get compile-time exhaustiveness: a typo at a callsite is a compile +/// error, and tests using `matches!(err, ... { field: +/// WireField::FileCount, .. })` cannot silently pass against a stale +/// string. Same precedent as [`OverflowSite`]. +/// +/// `Display` emits the canonical wire-stable snake_case name. Operator +/// log greps and downstream tooling that hard-coded the previous +/// `&'static str` values keep working: every variant Displays to the +/// exact string the call site previously passed. +/// +/// Derives mirror the [`OverflowSite`] / [`AllocationContext`] precedent +/// (`Debug + Clone + Copy + PartialEq + Eq`, no `Hash`). No in-tree +/// caller uses these as `HashMap` keys or in `HashSet`; add `Hash` only +/// when a real consumer materializes. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum WireField { + /// Per-entry: declared decompressed size. + UncompressedSize, + /// Per-entry: declared compressed (on-disk) size. + CompressedSize, + /// Per-entry: per-block compressed length (block end − block start). + BlockLength, + /// Per-entry: declared compression-block size (uniform per non-final block). + CompressionBlockSize, + /// Per-entry: number of compression blocks. + BlockCount, + /// Per-entry: SHA-1 digest field. + Sha1, + /// Per-entry: encryption flag. + IsEncrypted, + /// Per-entry: compression method discriminant. + CompressionMethod, + /// Per-entry: full compression-block layout (used by + /// [`IndexParseFault::FieldMismatch`] when individual blocks differ). + CompressionBlocks, + /// Archive-level: number of entries in a flat (v3-v9) index. + EntryCount, + /// Archive-level: number of non-encoded entries in a v10+ main index. + NonEncodedCount, + /// Archive-level: number of files in a v10+ Full Directory Index. + FileCount, + /// Archive-level: number of directories in a v10+ Full Directory Index. + DirCount, + /// Archive-level: byte size of the Full Directory Index region. + FdiSize, + /// Archive-level: byte size of the encoded-entries blob in a v10+ main index. + EncodedEntriesSize, + /// Archive-level: byte size of the main index (footer-declared). + IndexSize, +} + +impl fmt::Display for WireField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Wire-stable snake_case names. Match the `&'static str` values + // that the call sites passed before #134 typified the field. + let s = match self { + Self::UncompressedSize => "uncompressed_size", + Self::CompressedSize => "compressed_size", + Self::BlockLength => "block_length", + Self::CompressionBlockSize => "compression_block_size", + Self::BlockCount => "block_count", + Self::Sha1 => "sha1", + Self::IsEncrypted => "is_encrypted", + Self::CompressionMethod => "compression_method", + Self::CompressionBlocks => "compression_blocks", + Self::EntryCount => "entry_count", + Self::NonEncodedCount => "non_encoded_count", + Self::FileCount => "file_count", + Self::DirCount => "dir_count", + Self::FdiSize => "fdi_size", + Self::EncodedEntriesSize => "encoded_entries_size", + Self::IndexSize => "index_size", + }; + f.write_str(s) + } +} + +/// What was being allocated when [`IndexParseFault::AllocationFailed`] +/// fires. Replaces the prior `context: &'static str` stringly-typed +/// pattern (closes #134). +/// +/// Closed set of allocation sites rather than free-form labels so a +/// new reservation site requires explicitly extending this enum (caught +/// at compile time) rather than typing a fresh ad-hoc string. +/// +/// `Display` emits a bare noun-phrase label naming WHAT was being +/// reserved (no leading unit word). The unit is rendered separately +/// by the `AllocationFailed` Display arm via the `unit: BoundsUnit` +/// field, so the rendered shape is `"could not reserve N {unit} for +/// {context}: {source}"` — e.g. `"could not reserve 65536 bytes for +/// v10+ index: ..."` or `"could not reserve 32 items for compression +/// blocks: ..."`. The bare-label convention prevents the `"bytes for +/// bytes for v10+ index"` stutter that would result from contexts +/// whose pre-#134 strings already led with the unit word. +/// +/// **Wire-stability vs pre-PR #144 (#134):** for the `*Bytes` +/// variants, the rendered text gains a `for {label}` suffix that +/// disambiguates the unit (the operator alert grep +/// `"could not reserve \d+ bytes"` keeps matching, just with more +/// detail after). For the `*Items`-unit variants, the pre-#134 +/// `&'static str` strings already contained the noun-phrase the unit +/// suggests ("compression blocks"), so the rendered shape is +/// `"could not reserve N items for compression blocks: ..."` — also +/// a one-time text change from `"compression blocks: ..."`. Operator +/// log greps that anchored on the *full* `"could not reserve N {old- +/// context}: ..."` shape will need a one-time update. +/// +/// **Naming convention** (for new variants — applied uniformly here): +/// - Prefix `V10` for v10+-specific allocation sites (`V10MainIndexBytes`, +/// `V10IndexEntries`, etc.). +/// - Prefix `Inline`/`Encoded` for the two compression-block read paths. +/// - Prefix `Flat` for v3-v9 flat-layout sites. +/// - Suffix `Bytes` for raw byte buffers (paired with `BoundsUnit::Bytes`). +/// - Suffix with a domain plural noun for typed-element collections. +/// - Bare names (no scope prefix) for version-agnostic sites: utility +/// allocations like `DedupTracker` and `ByPathLookup`, and per-entry +/// buffers like `EntryPayloadBytes` (the `Bytes` suffix marks the +/// raw-byte-buffer shape; "bare" refers to the absent layout-version +/// prefix, not the absent suffix). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum AllocationContext { + /// Per-entry payload byte buffer. + /// Constructed in `PakReader::read_entry` (the `ContainerReader` + /// trait override at `container/pak/mod.rs`'s `read_entry` + /// implementation), which allocates the output Vec upfront before + /// streaming into it. + EntryPayloadBytes, + /// Per-entry compression-block records, inline-header read path. + InlineCompressionBlocks, + /// Per-entry compression-block records, encoded-header read path. + EncodedCompressionBlocks, + /// Flat-index entries vector (v3-v9 sequential index). + FlatIndexEntries, + /// Per-walk dedup tracker (HashSet of seen filenames). + DedupTracker, + /// `by_path` lookup HashMap for fast entry resolution. + ByPathLookup, + /// v10+ main-index byte buffer (slurp before parsing). + V10MainIndexBytes, + /// v10+ encoded-entries blob byte buffer. + V10EncodedEntriesBytes, + /// v10+ non-encoded entries vector. + V10NonEncodedEntries, + /// v10+ Full Directory Index byte buffer. + V10FdiBytes, + /// v10+ entries vector (combined encoded + non-encoded view). + V10IndexEntries, +} + +impl fmt::Display for AllocationContext { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Bare noun-phrase labels — the unit is rendered separately by + // the `AllocationFailed` Display arm so this string MUST NOT + // begin with the unit word, or the rendered text stutters + // (e.g. "bytes for bytes for v10+ index"). + // + // Pinned per-variant by `allocation_context_display_tokens_are_wire_stable`. + let s = match self { + Self::EntryPayloadBytes => "entry payload", + Self::InlineCompressionBlocks => "compression blocks", + Self::EncodedCompressionBlocks => "encoded compression blocks", + Self::FlatIndexEntries => "entries", + Self::DedupTracker => "dedup tracker for entries", + Self::ByPathLookup => "by-path lookup entries", + Self::V10MainIndexBytes => "v10+ index", + Self::V10EncodedEntriesBytes => "v10+ encoded entries", + Self::V10NonEncodedEntries => "non-encoded entries for v10+ index", + Self::V10FdiBytes => "v10+ full directory index", + Self::V10IndexEntries => "entries for v10+ index", + }; + f.write_str(s) + } +} + /// Parser site that produced an [`IndexParseFault::U64ArithmeticOverflow`]. /// /// Closed set of names rather than `&'static str` so callers and tests @@ -1136,16 +1330,28 @@ impl std::fmt::Display for IndexParseFault { Self::AllocationFailed { context, requested, + unit, source, path, } => { + // Wire format: include the unit so operators sizing + // budget alerts can distinguish "{N} bytes for X" from + // "{N} items for X". Pre-#133 the rendered text was + // ambiguous: "could not reserve 65535 compression + // blocks: ..." reads as 65535 BLOCKS, but "could not + // reserve 65535 bytes: ..." reads as 65535 BYTES — only + // human inference disambiguated. The `{unit}` slot + // makes it unambiguous in every variant. if let Some(p) = path { write!( f, - "could not reserve {requested} {context} for entry `{p}`: {source}" + "could not reserve {requested} {unit} for {context} for entry `{p}`: {source}" ) } else { - write!(f, "could not reserve {requested} {context}: {source}") + write!( + f, + "could not reserve {requested} {unit} for {context}: {source}" + ) } } Self::U64ExceedsPlatformUsize { field, value, path } => { @@ -1340,7 +1546,7 @@ mod tests { fn with_index_path_does_not_overwrite_existing_path() { let err = PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "uncompressed_size", + field: WireField::UncompressedSize, value: 100, limit: 50, unit: BoundsUnit::Bytes, @@ -1727,7 +1933,7 @@ mod tests { #[test] fn index_parse_fault_display_bounds_exceeded_with_path() { let s = fault_display(&IndexParseFault::BoundsExceeded { - field: "uncompressed_size", + field: WireField::UncompressedSize, value: 1_000_000_000, limit: 100_000_000, unit: BoundsUnit::Bytes, @@ -1747,7 +1953,7 @@ mod tests { // Archive-level: no per-entry path. Different format-string // branch from the per-entry case above. let s = fault_display(&IndexParseFault::BoundsExceeded { - field: "file_count", + field: WireField::FileCount, value: 999_999, limit: 1_000, unit: BoundsUnit::Items, @@ -1771,14 +1977,19 @@ mod tests { .try_reserve_exact(usize::MAX) .expect_err("reserving usize::MAX must fail"); let s = fault_display(&IndexParseFault::AllocationFailed { - context: "compression blocks", + context: AllocationContext::InlineCompressionBlocks, requested: 1_048_576, + unit: BoundsUnit::Items, source, path: Some("Content/Mid.uasset".into()), }); - assert!(s.contains("Content/Mid.uasset"), "got: {s}"); - assert!(s.contains("compression blocks"), "got: {s}"); - assert!(s.contains("1048576"), "got: {s}"); + // Pin adjacency of count + unit + context (the format-string + // shape, not just substring containment) so a reordering or + // template change is caught loudly. + assert!( + s.contains("1048576 items for compression blocks for entry `Content/Mid.uasset`"), + "got: {s}" + ); } #[test] @@ -1787,8 +1998,9 @@ mod tests { .try_reserve_exact(usize::MAX) .expect_err("reserving usize::MAX must fail"); let s = fault_display(&IndexParseFault::AllocationFailed { - context: "v10+ encoded entries", + context: AllocationContext::V10IndexEntries, requested: 100_000, + unit: BoundsUnit::Items, source, path: None, }); @@ -1796,14 +2008,73 @@ mod tests { !s.contains("entry `"), "archive-level must not include `entry`: {s}" ); - assert!(s.contains("v10+ encoded entries"), "got: {s}"); - assert!(s.contains("100000"), "got: {s}"); + assert!( + s.contains("100000 items for entries for v10+ index"), + "got: {s}" + ); + } + + /// PR #144 R1 finding (sev 6): the AllocationFailed Display tests + /// only covered `BoundsUnit::Items`, even though the byte-mode case + /// is the entire motivation of the new `unit` field. This test + /// pins the byte-mode rendering of every `*Bytes` AllocationContext + /// variant — and would have caught the R1 "bytes for bytes" Display + /// stutter before merge. + #[test] + fn index_parse_fault_display_allocation_failed_bytes_unit_no_stutter() { + let make_source = || { + Vec::::new() + .try_reserve_exact(usize::MAX) + .expect_err("reserving usize::MAX must fail") + }; + + // The four `*Bytes`-suffixed contexts. Pin the exact rendered + // shape so the bare-noun-phrase Display convention is enforced + // — any future rename that adds back the leading "bytes" to a + // context Display string would re-introduce the R1 stutter and + // fail this test. + let cases: &[(AllocationContext, &str)] = &[ + ( + AllocationContext::EntryPayloadBytes, + "could not reserve 65536 bytes for entry payload", + ), + ( + AllocationContext::V10MainIndexBytes, + "could not reserve 65536 bytes for v10+ index", + ), + ( + AllocationContext::V10EncodedEntriesBytes, + "could not reserve 65536 bytes for v10+ encoded entries", + ), + ( + AllocationContext::V10FdiBytes, + "could not reserve 65536 bytes for v10+ full directory index", + ), + ]; + for (context, expected_prefix) in cases { + let s = fault_display(&IndexParseFault::AllocationFailed { + context: *context, + requested: 65_536, + unit: BoundsUnit::Bytes, + source: make_source(), + path: None, + }); + assert!( + s.contains(expected_prefix), + "context={context:?}: expected prefix `{expected_prefix}`, got: {s}" + ); + // Negative assertion: the stutter "bytes for bytes" must NOT appear. + assert!( + !s.contains("bytes for bytes"), + "context={context:?}: rendered text contains `bytes for bytes` stutter: {s}" + ); + } } #[test] fn index_parse_fault_display_u64_exceeds_platform_usize_with_path() { let s = fault_display(&IndexParseFault::U64ExceedsPlatformUsize { - field: "uncompressed_size", + field: WireField::UncompressedSize, value: u64::MAX, path: Some("Content/Huge.uasset".into()), }); @@ -1816,7 +2087,7 @@ mod tests { #[test] fn index_parse_fault_display_u64_exceeds_platform_usize_archive_level() { let s = fault_display(&IndexParseFault::U64ExceedsPlatformUsize { - field: "index_size", + field: WireField::IndexSize, value: u64::MAX, path: None, }); @@ -1832,7 +2103,7 @@ mod tests { fn index_parse_fault_display_field_mismatch() { let s = fault_display(&IndexParseFault::FieldMismatch { path: "Content/X.uasset".into(), - field: "compressed_size", + field: WireField::CompressedSize, index_value: "100".into(), payload_value: "200".into(), }); @@ -2070,6 +2341,79 @@ mod tests { } } + /// Pin all `WireField` Display tokens. Mirror of + /// `overflow_site_display_tokens_are_wire_stable` for the + /// closed-set typed-name pattern. Per the type doc-comment, every + /// variant Displays to the exact `&'static str` the call site + /// previously passed pre-#134 — operator log greps and downstream + /// tooling that hard-coded the old strings depend on this. + /// Without this test, a typo in a Display arm + /// (`"compression_block_sze"`) would compile, pass clippy, pass + /// every other test, and silently break dashboard greps. + #[test] + fn wire_field_display_tokens_are_wire_stable() { + let cases: &[(WireField, &str)] = &[ + (WireField::UncompressedSize, "uncompressed_size"), + (WireField::CompressedSize, "compressed_size"), + (WireField::BlockLength, "block_length"), + (WireField::CompressionBlockSize, "compression_block_size"), + (WireField::BlockCount, "block_count"), + (WireField::Sha1, "sha1"), + (WireField::IsEncrypted, "is_encrypted"), + (WireField::CompressionMethod, "compression_method"), + (WireField::CompressionBlocks, "compression_blocks"), + (WireField::EntryCount, "entry_count"), + (WireField::NonEncodedCount, "non_encoded_count"), + (WireField::FileCount, "file_count"), + (WireField::DirCount, "dir_count"), + (WireField::FdiSize, "fdi_size"), + (WireField::EncodedEntriesSize, "encoded_entries_size"), + (WireField::IndexSize, "index_size"), + ]; + for (field, expected) in cases { + assert_eq!(field.to_string(), *expected); + } + } + + /// Pin all `AllocationContext` Display tokens. Same precedent as + /// `wire_field_display_tokens_are_wire_stable` / + /// `overflow_site_display_tokens_are_wire_stable`. The + /// `*Bytes`-suffixed variants Display to bare noun phrases (no + /// leading "bytes" word) — a future rename that adds back the + /// leading unit would re-introduce the PR #144 R1 "bytes for + /// bytes" stutter and fail this test. + #[test] + fn allocation_context_display_tokens_are_wire_stable() { + let cases: &[(AllocationContext, &str)] = &[ + (AllocationContext::EntryPayloadBytes, "entry payload"), + ( + AllocationContext::InlineCompressionBlocks, + "compression blocks", + ), + ( + AllocationContext::EncodedCompressionBlocks, + "encoded compression blocks", + ), + (AllocationContext::FlatIndexEntries, "entries"), + (AllocationContext::DedupTracker, "dedup tracker for entries"), + (AllocationContext::ByPathLookup, "by-path lookup entries"), + (AllocationContext::V10MainIndexBytes, "v10+ index"), + ( + AllocationContext::V10EncodedEntriesBytes, + "v10+ encoded entries", + ), + ( + AllocationContext::V10NonEncodedEntries, + "non-encoded entries for v10+ index", + ), + (AllocationContext::V10FdiBytes, "v10+ full directory index"), + (AllocationContext::V10IndexEntries, "entries for v10+ index"), + ]; + for (context, expected) in cases { + assert_eq!(context.to_string(), *expected); + } + } + /// Pin the three `FStringFault` Display arms not exercised by /// `index_parse_fault_display_fstring_malformed_delegates_to_inner` /// (`LengthExceedsMaximum`, `MissingNullTerminator`, diff --git a/crates/paksmith-core/tests/pak_integration.rs b/crates/paksmith-core/tests/pak_integration.rs index c62919a..30f69bc 100644 --- a/crates/paksmith-core/tests/pak_integration.rs +++ b/crates/paksmith-core/tests/pak_integration.rs @@ -15,7 +15,7 @@ use paksmith_core::container::{ContainerFormat, ContainerReader, EntryMetadata}; use paksmith_core::container::pak::index::CompressionMethod; use paksmith_core::error::{ BlockBoundsKind, BoundsUnit, DecompressionFault, IndexParseFault, OffsetPastFileSizeKind, - OverflowSite, + OverflowSite, WireField, }; use sha1::{Digest, Sha1}; use std::fmt::Write as _; @@ -609,7 +609,7 @@ fn read_entry_rejects_in_data_index_mismatch() { &err, paksmith_core::PaksmithError::InvalidIndex { fault: IndexParseFault::FieldMismatch { - field: "compressed_size", + field: WireField::CompressedSize, .. }, } @@ -2244,7 +2244,7 @@ fn open_rejects_oversized_uncompressed_size() { &err, paksmith_core::PaksmithError::InvalidIndex { fault: IndexParseFault::BoundsExceeded { - field: "uncompressed_size", + field: WireField::UncompressedSize, unit: BoundsUnit::Bytes, .. },