From 481e85def643d500e557a144baac8b3af13bbad1 Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Tue, 13 Jan 2026 11:52:39 -0500 Subject: [PATCH 01/17] WIP --- .../Readers/MicrosoftCabinet.cs | 38 ----------------- .../Wrappers/MicrosoftCabinet.cs | 42 +++---------------- 2 files changed, 6 insertions(+), 74 deletions(-) diff --git a/SabreTools.Serialization/Readers/MicrosoftCabinet.cs b/SabreTools.Serialization/Readers/MicrosoftCabinet.cs index 0e66fdba..a2fba52d 100644 --- a/SabreTools.Serialization/Readers/MicrosoftCabinet.cs +++ b/SabreTools.Serialization/Readers/MicrosoftCabinet.cs @@ -172,47 +172,9 @@ private static CFFOLDER ParseFolder(Stream data, CFHEADER header) if (header.FolderReservedSize > 0) folder.ReservedData = data.ReadBytes(header.FolderReservedSize); - if (folder.CabStartOffset > 0) - { - long currentPosition = data.Position; - data.SeekIfPossible(folder.CabStartOffset, SeekOrigin.Begin); - - folder.DataBlocks = new CFDATA[folder.DataCount]; - for (int i = 0; i < folder.DataCount; i++) - { - CFDATA dataBlock = ParseDataBlock(data, header.DataReservedSize); - folder.DataBlocks[i] = dataBlock; - } - - data.SeekIfPossible(currentPosition, SeekOrigin.Begin); - } - return folder; } - /// - /// Parse a Stream into a data block - /// - /// Stream to parse - /// Reserved byte size for data blocks - /// Filled folder on success, null on error - private static CFDATA ParseDataBlock(Stream data, byte dataReservedSize) - { - var dataBlock = new CFDATA(); - - dataBlock.Checksum = data.ReadUInt32LittleEndian(); - dataBlock.CompressedSize = data.ReadUInt16LittleEndian(); - dataBlock.UncompressedSize = data.ReadUInt16LittleEndian(); - - if (dataReservedSize > 0) - data.SeekIfPossible(dataReservedSize, SeekOrigin.Current); - - if (dataBlock.CompressedSize > 0) - data.SeekIfPossible(dataBlock.CompressedSize, SeekOrigin.Current); - - return dataBlock; - } - /// /// Parse a Stream into a file /// diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs index 5a8c678b..839cffcc 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs @@ -184,7 +184,7 @@ private int GetFolderIndex(CFFILE file) public Stream? DecompressBlocks(string? filename, CFFOLDER? folder, int folderIndex, bool includeDebug) { // Ensure data blocks - var dataBlocks = GetDataBlocks(filename, folder, folderIndex); + var dataBlocks = GetFolders(filename, folder, folderIndex); if (dataBlocks == null || dataBlocks.Length == 0) return null; @@ -310,14 +310,12 @@ private static CompressionType GetCompressionType(CFFOLDER folder) /// Indicates if previous cabinets should be ignored /// Indicates if next cabinets should be ignored /// Array of data blocks on success, null otherwise - private CFDATA[]? GetDataBlocks(string? filename, CFFOLDER? folder, int folderIndex, bool skipPrev = false, bool skipNext = false) + private Tuple[]? GetFolders(string? filename, CFFOLDER? folder, int folderIndex, bool skipPrev = false, bool skipNext = false) { // Skip invalid folders - if (folder?.DataBlocks == null || folder.DataBlocks.Length == 0) + if (folder == null || folder.DataCount == 0) return null; - - GetData(folder); - + // Get all files for the folder var files = GetFiles(folderIndex); if (files.Length == 0) @@ -340,7 +338,7 @@ private static CompressionType GetCompressionType(CFFOLDER folder) { int prevFolderIndex = Prev.FolderCount - 1; var prevFolder = Prev.Folders[prevFolderIndex - 1]; - prevBlocks = Prev.GetDataBlocks(filename, prevFolder, prevFolderIndex, skipNext: true) ?? []; + prevBlocks = Prev.GetFolders(filename, prevFolder, prevFolderIndex, skipNext: true) ?? []; } } @@ -356,7 +354,7 @@ private static CompressionType GetCompressionType(CFFOLDER folder) if (Next?.Header != null && Next.Folders != null) { var nextFolder = Next.Folders[0]; - nextBlocks = Next.GetDataBlocks(filename, nextFolder, 0, skipPrev: true) ?? []; + nextBlocks = Next.GetFolders(filename, nextFolder, 0, skipPrev: true) ?? []; } } @@ -364,34 +362,6 @@ private static CompressionType GetCompressionType(CFFOLDER folder) return [.. prevBlocks, .. folder.DataBlocks, .. nextBlocks]; } - /// - /// Loads in all the datablocks for the current folder. - /// - /// The folder to have the datablocks loaded for - public void GetData(CFFOLDER folder) - { - if (folder.CabStartOffset <= 0) - return; - - uint offset = folder.CabStartOffset; - for (int i = 0; i < folder.DataCount; i++) - { - offset += 8; - - if (Header.DataReservedSize > 0) - { - folder.DataBlocks[i].ReservedData = ReadRangeFromSource(offset, Header.DataReservedSize); - offset += Header.DataReservedSize; - } - - if (folder.DataBlocks[i].CompressedSize > 0) - { - folder.DataBlocks[i].CompressedData = ReadRangeFromSource(offset, folder.DataBlocks[i].CompressedSize); - offset += folder.DataBlocks[i].CompressedSize; - } - } - } - /// /// Get all files for the current folder, plus connected spanned folders. /// From 962167aafc553a66c8218fe5a6a9b2b8a583199f Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Tue, 13 Jan 2026 12:17:40 -0500 Subject: [PATCH 02/17] WIP 2 --- .../Models/MicrosoftCabinet/FolderTuple.cs | 26 ++++++++++++++ .../Wrappers/MicrosoftCabinet.cs | 34 +++++++++++++------ 2 files changed, 50 insertions(+), 10 deletions(-) create mode 100644 SabreTools.Serialization/Models/MicrosoftCabinet/FolderTuple.cs diff --git a/SabreTools.Serialization/Models/MicrosoftCabinet/FolderTuple.cs b/SabreTools.Serialization/Models/MicrosoftCabinet/FolderTuple.cs new file mode 100644 index 00000000..9913e72d --- /dev/null +++ b/SabreTools.Serialization/Models/MicrosoftCabinet/FolderTuple.cs @@ -0,0 +1,26 @@ +using System; + +namespace SabreTools.Data.Models.MicrosoftCabinet +{ + // TODO: Surely there's a better way to do this + /// + /// Tuple to hold what's needed to open a specific folder + /// + public sealed class FolderTuple + { + /// + /// Filename for one cabinet in the set, if available + /// + public string Filename { get; set; } = String.Empty; + + /// + /// Folder containing the blocks to decompress + /// + public CFFOLDER Folder { get; set; } = new CFFOLDER(); + + /// + /// A series of one or more cabinet file (CFFILE) entries + /// + public int FolderIndex { get; set; } + } +} \ No newline at end of file diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs index 839cffcc..8414ae07 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs @@ -184,7 +184,7 @@ private int GetFolderIndex(CFFILE file) public Stream? DecompressBlocks(string? filename, CFFOLDER? folder, int folderIndex, bool includeDebug) { // Ensure data blocks - var dataBlocks = GetFolders(filename, folder, folderIndex); + var dataBlocks = GetDataBlocks(filename, folder, folderIndex); if (dataBlocks == null || dataBlocks.Length == 0) return null; @@ -199,6 +199,7 @@ private int GetFolderIndex(CFFILE file) var ms = new MemoryStream(); for (int i = 0; i < dataBlocks.Length; i++) { + // TODO: wire up var db = dataBlocks[i]; // Get the data to be processed @@ -310,23 +311,36 @@ private static CompressionType GetCompressionType(CFFOLDER folder) /// Indicates if previous cabinets should be ignored /// Indicates if next cabinets should be ignored /// Array of data blocks on success, null otherwise - private Tuple[]? GetFolders(string? filename, CFFOLDER? folder, int folderIndex, bool skipPrev = false, bool skipNext = false) + private FolderTuple?[] GetDataBlocks(string? filename, CFFOLDER? folder, int folderIndex, bool skipPrev = false, bool skipNext = false) { // Skip invalid folders - if (folder == null || folder.DataCount == 0) - return null; + if (folder?.DataBlocks == null || folder.DataBlocks.Length == 0) + return []; // Get all files for the folder var files = GetFiles(folderIndex); if (files.Length == 0) - return folder.DataBlocks; + return []; + + FolderTuple?[] folderTuple = new FolderTuple[1]; + folderTuple[0] = null; + if (filename != null && folder != null) + { + folderTuple[0] = new FolderTuple + { + Filename = filename, + Folder = folder, + FolderIndex = folderIndex + }; + } + // Check if the folder spans in either direction bool spanPrev = Array.Exists(files, f => f.FolderIndex == FolderIndex.CONTINUED_FROM_PREV || f.FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT); bool spanNext = Array.Exists(files, f => f.FolderIndex == FolderIndex.CONTINUED_TO_NEXT || f.FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT); // If the folder spans backward and Prev is not being skipped - CFDATA[] prevBlocks = []; + FolderTuple?[] prevFolderTuples = []; if (!skipPrev && spanPrev) { // Try to get Prev if it doesn't exist @@ -338,12 +352,12 @@ private static CompressionType GetCompressionType(CFFOLDER folder) { int prevFolderIndex = Prev.FolderCount - 1; var prevFolder = Prev.Folders[prevFolderIndex - 1]; - prevBlocks = Prev.GetFolders(filename, prevFolder, prevFolderIndex, skipNext: true) ?? []; + prevFolderTuples = Prev.GetDataBlocks(filename, prevFolder, prevFolderIndex, skipNext: true) ?? []; } } // If the folder spans forward and Next is not being skipped - CFDATA[] nextBlocks = []; + FolderTuple?[] nextFolderTuples = []; if (!skipNext && spanNext) { // Try to get Next if it doesn't exist @@ -354,12 +368,12 @@ private static CompressionType GetCompressionType(CFFOLDER folder) if (Next?.Header != null && Next.Folders != null) { var nextFolder = Next.Folders[0]; - nextBlocks = Next.GetFolders(filename, nextFolder, 0, skipPrev: true) ?? []; + nextFolderTuples = Next.GetDataBlocks(filename, nextFolder, 0, skipPrev: true) ?? []; } } // Return all found blocks in order - return [.. prevBlocks, .. folder.DataBlocks, .. nextBlocks]; + return [.. prevFolderTuples, .. folderTuple, .. nextFolderTuples]; } /// From 25c29eb0e32e91d5dfe6648c4da29fe99b5b9a91 Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Tue, 13 Jan 2026 18:04:51 -0500 Subject: [PATCH 03/17] Todo: you're missing a read somehow and getting misaligned by two bytes? maybe properly implementing the buffer will magically fix it --- .../Readers/MicrosoftCabinet.cs | 20 ++ .../Wrappers/MicrosoftCabinet.Extraction.cs | 192 +++++++++++++++++- .../Wrappers/MicrosoftCabinet.cs | 78 +------ 3 files changed, 205 insertions(+), 85 deletions(-) diff --git a/SabreTools.Serialization/Readers/MicrosoftCabinet.cs b/SabreTools.Serialization/Readers/MicrosoftCabinet.cs index a2fba52d..f04cde40 100644 --- a/SabreTools.Serialization/Readers/MicrosoftCabinet.cs +++ b/SabreTools.Serialization/Readers/MicrosoftCabinet.cs @@ -174,6 +174,26 @@ private static CFFOLDER ParseFolder(Stream data, CFHEADER header) return folder; } + + /// + /// Parse a Stream into a folder + /// + /// Stream to parse + /// Cabinet header to get flags and sizes from + /// Filled folder on success, null on error + private static CFFOLDER ParseFolderData(Stream data, CFHEADER header) + { + var folder = new CFFOLDER(); + + folder.CabStartOffset = data.ReadUInt32LittleEndian(); + folder.DataCount = data.ReadUInt16LittleEndian(); + folder.CompressionType = (CompressionType)data.ReadUInt16LittleEndian(); + + if (header.FolderReservedSize > 0) + folder.ReservedData = data.ReadBytes(header.FolderReservedSize); + + return folder; + } /// /// Parse a Stream into a file diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index f1923c34..52189eb2 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -3,6 +3,7 @@ using System.IO; using SabreTools.Data.Models.MicrosoftCabinet; using SabreTools.IO.Extensions; +using SabreTools.IO.Compression.MSZIP; namespace SabreTools.Serialization.Wrappers { @@ -161,6 +162,8 @@ public bool Extract(string outputDirectory, bool includeDebug) cabinet = OpenSet(Filename); ignorePrev = true; } + + // TODO: first folder idk // If the archive is invalid if (cabinet?.Folders == null || cabinet.Folders.Length == 0) @@ -184,10 +187,13 @@ public bool Extract(string outputDirectory, bool includeDebug) } // Move to the next cabinet, if possible + + /* Array.ForEach(cabinet.Folders, folder => folder.DataBlocks = []); + */ cabinet = cabinet.Next; - cabinet?.Prev = null; + /*cabinet?.Prev = null;*/ // TODO: already-extracted data isn't being cleared from memory, at least not nearly enough. if (cabinet?.Folders == null || cabinet.Folders.Length == 0) @@ -220,10 +226,6 @@ private bool ExtractFolder(string? filename, bool ignorePrev, bool includeDebug) { - // Decompress the blocks, if possible - using var blockStream = DecompressBlocks(filename, folder, folderIndex, includeDebug); - if (blockStream == null || blockStream.Length == 0) - return false; // Loop through the files bool allExtracted = true; @@ -246,11 +248,15 @@ private bool ExtractFolder(string? filename, } CFFILE[] files = fileList.ToArray(); - blockStream.SeekIfPossible(0, SeekOrigin.Begin); + byte[] leftoverBytes = []; + if (folder == null) // TODO: this should never happen + return false; + + this._dataSource.SeekIfPossible(folder.CabStartOffset, SeekOrigin.Begin); for (int i = 0; i < files.Length; i++) { var file = files[i]; - allExtracted &= ExtractFiles(outputDirectory, blockStream, file, includeDebug); + allExtracted &= ExtractFiles(outputDirectory, folder, file, ref leftoverBytes, includeDebug); } return allExtracted; @@ -265,11 +271,11 @@ private bool ExtractFolder(string? filename, /// File information /// True to include debug data, false otherwise /// True if the file extracted, false otherwise - private static bool ExtractFiles(string outputDirectory, Stream blockStream, CFFILE file, bool includeDebug) + private bool ExtractFiles(string outputDirectory, CFFOLDER? folder, CFFILE file, ref byte[] leftoverBytes, bool includeDebug) { try { - byte[] fileData = blockStream.ReadBytes((int)file.FileSize); + // byte[] fileData = blockStream.ReadBytes((int)file.FileSize); // Ensure directory separators are consistent string filename = file.Name; @@ -286,7 +292,173 @@ private static bool ExtractFiles(string outputDirectory, Stream blockStream, CFF // Open the output file for writing using var fs = File.Open(filename, FileMode.Create, FileAccess.Write, FileShare.None); - fs.Write(fileData, 0, fileData.Length); + +#region workregion + + // Ensure folder contains data + // TODO: does this ever fail on spanned only folders or something + if (folder == null || folder.DataCount == 0) + return false; + + // Get the compression type + var compressionType = GetCompressionType(folder!); + + // Setup decompressors + var mszip = Decompressor.Create(); + //uint quantumWindowBits = (uint)(((ushort)folder.CompressionType >> 8) & 0x1f); + + // Loop through the data blocks + + MicrosoftCabinet cabinet = this; + + int cabinetCount = 1; + if (this.Files[this.FileCount - 1].FolderIndex == FolderIndex.CONTINUED_TO_NEXT) + { + cabinetCount++; + MicrosoftCabinet? tempCabinet = this.Next; // TODO: what do you do if this is null, it shouldn't be + while (tempCabinet?.Files[0].FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT) + { + cabinetCount++; + tempCabinet = tempCabinet.Next; + } + } + + // TODO: do continued spanned folders ever contain another file beyond the one spanned one + + + CFFOLDER currentFolder = folder; + int currentCabinetCount = 0; + bool continuedBlock = false; + bool fileFinished = false; + CFDATA continuedDataBlock = new CFDATA(); // TODO: this wont work because it resets i think. Another ref? do in main buffer + // TODO: these probably dont need to be longs, they were ints before + int filesize = (int)file.FileSize; + int extractedSize = 0; + while (currentCabinetCount < cabinetCount) + { + lock (cabinet._dataSourceLock) + { + if (currentFolder.CabStartOffset <= 0) + return false; // TODO: why is a CabStartOffset of 0 not acceptable? header? + + /*long currentPosition = cabinet._dataSource.Position;*/ + + for (int i = 0; i < currentFolder.DataCount; i++) + { + if (leftoverBytes.Length > 0) + { + int writeSize = Math.Min(leftoverBytes.Length, filesize - extractedSize); + byte[] tempLeftoverBytes = (byte[])leftoverBytes.Clone(); + if (writeSize < leftoverBytes.Length) + { + leftoverBytes = new byte[leftoverBytes.Length - writeSize]; + Array.Copy(tempLeftoverBytes, writeSize, leftoverBytes, 0, leftoverBytes.Length); + } + else + { + leftoverBytes = []; + } + fs.Write(tempLeftoverBytes, 0, writeSize); + extractedSize += tempLeftoverBytes.Length; + if (extractedSize >= filesize) + { + fileFinished = true; + break; + } + } + // TODO: wire up + var db = new CFDATA(); + + var dataReservedSize = cabinet.Header.DataReservedSize; + + db.Checksum = cabinet._dataSource.ReadUInt32LittleEndian(); + db.CompressedSize = cabinet._dataSource.ReadUInt16LittleEndian(); + db.UncompressedSize = cabinet._dataSource.ReadUInt16LittleEndian(); + + if (dataReservedSize > 0) + db.ReservedData = cabinet._dataSource.ReadBytes(dataReservedSize); + + if (db.CompressedSize > 0) + db.CompressedData = cabinet._dataSource.ReadBytes(db.CompressedSize); + + /*data.SeekIfPossible(currentPosition, SeekOrigin.Begin);*/ + + // Get the data to be processed + byte[] blockData = db.CompressedData; + + // If the block is continued, append + if (db.UncompressedSize == 0) + { + // TODO: is this a correct assumption at all + + continuedBlock = true; + continuedDataBlock = db; + } + else + { + if (continuedBlock) + { + // TODO: why was there a continue if compressed data is null here + continuedBlock = false; + db.CompressedData = [.. continuedDataBlock.CompressedData, .. blockData]; + db.CompressedSize += continuedDataBlock.CompressedSize; + db.UncompressedSize = continuedDataBlock.UncompressedSize; + continuedDataBlock = new CFDATA(); + } + + // Get the uncompressed data block + byte[] data = compressionType switch + { + CompressionType.TYPE_NONE => blockData, + CompressionType.TYPE_MSZIP => DecompressMSZIPBlock(currentCabinetCount, mszip, i, db, blockData, + includeDebug), + + // TODO: Unsupported + CompressionType.TYPE_QUANTUM => [], + CompressionType.TYPE_LZX => [], + + // Should be impossible + _ => [], + }; + int writeSize = Math.Min(data.Length, filesize - extractedSize ); + if (writeSize < data.Length) + { + leftoverBytes = new byte[data.Length - writeSize]; + Array.Copy(data, writeSize, leftoverBytes, 0, leftoverBytes.Length); + } + fs.Write(data, 0, writeSize); + extractedSize += data.Length; + if (extractedSize >= filesize) + { + fileFinished = true; + break; + } + // TODO: do i ever need to flush before the end of the file? + } + } + } + + if (fileFinished) + break; + + // TODO: does this running unnecessarily on unspanned folders cause issues + // TODO: spanned folders are only across cabs and never within cabs, right + + if (cabinet.Next == null) + break; + + if (currentCabinetCount == cabinetCount - 1) + break; + + cabinet = cabinet.Next; + cabinet._dataSource.SeekIfPossible(currentFolder.CabStartOffset, SeekOrigin.Begin); + currentFolder = cabinet.Folders[0]; + currentCabinetCount++; + } + +#endregion + + //fs.Write(fileData, 0, fileData.Length); fs.Flush(); } catch (Exception ex) diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs index 8414ae07..65573580 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs @@ -173,78 +173,6 @@ private int GetFolderIndex(CFFILE file) #region Folders - /// - /// Decompress all blocks for a folder - /// - /// Filename for one cabinet in the set, if available - /// Folder containing the blocks to decompress - /// Index of the folder in the cabinet - /// True to include debug data, false otherwise - /// Stream representing the decompressed data on success, null otherwise - public Stream? DecompressBlocks(string? filename, CFFOLDER? folder, int folderIndex, bool includeDebug) - { - // Ensure data blocks - var dataBlocks = GetDataBlocks(filename, folder, folderIndex); - if (dataBlocks == null || dataBlocks.Length == 0) - return null; - - // Get the compression type - var compressionType = GetCompressionType(folder!); - - // Setup decompressors - var mszip = Decompressor.Create(); - //uint quantumWindowBits = (uint)(((ushort)folder.CompressionType >> 8) & 0x1f); - - // Loop through the data blocks - var ms = new MemoryStream(); - for (int i = 0; i < dataBlocks.Length; i++) - { - // TODO: wire up - var db = dataBlocks[i]; - - // Get the data to be processed - byte[] blockData = db.CompressedData; - - // If the block is continued, append - bool continuedBlock = false; - if (db.UncompressedSize == 0) - { - var nextBlock = dataBlocks[i + 1]; - byte[]? nextData = nextBlock.CompressedData; - if (nextData == null) - continue; - - continuedBlock = true; - blockData = [.. blockData, .. nextData]; - db.CompressedSize += nextBlock.CompressedSize; - db.UncompressedSize = nextBlock.UncompressedSize; - } - - // Get the uncompressed data block - byte[] data = compressionType switch - { - CompressionType.TYPE_NONE => blockData, - CompressionType.TYPE_MSZIP => DecompressMSZIPBlock(folderIndex, mszip, i, db, blockData, includeDebug), - - // TODO: Unsupported - CompressionType.TYPE_QUANTUM => [], - CompressionType.TYPE_LZX => [], - - // Should be impossible - _ => [], - }; - - // Write the uncompressed data block - ms.Write(data, 0, data.Length); - ms.Flush(); - - // Increment additionally if we had a continued block - if (continuedBlock) i++; - } - - return ms; - } - /// /// Decompress an MS-ZIP block using an existing decompressor /// @@ -311,7 +239,7 @@ private static CompressionType GetCompressionType(CFFOLDER folder) /// Indicates if previous cabinets should be ignored /// Indicates if next cabinets should be ignored /// Array of data blocks on success, null otherwise - private FolderTuple?[] GetDataBlocks(string? filename, CFFOLDER? folder, int folderIndex, bool skipPrev = false, bool skipNext = false) + private FolderTuple?[] GetFolders(string? filename, CFFOLDER? folder, int folderIndex, bool skipPrev = false, bool skipNext = false) { // Skip invalid folders if (folder?.DataBlocks == null || folder.DataBlocks.Length == 0) @@ -352,7 +280,7 @@ private static CompressionType GetCompressionType(CFFOLDER folder) { int prevFolderIndex = Prev.FolderCount - 1; var prevFolder = Prev.Folders[prevFolderIndex - 1]; - prevFolderTuples = Prev.GetDataBlocks(filename, prevFolder, prevFolderIndex, skipNext: true) ?? []; + prevFolderTuples = Prev.GetFolders(filename, prevFolder, prevFolderIndex, skipNext: true) ?? []; } } @@ -368,7 +296,7 @@ private static CompressionType GetCompressionType(CFFOLDER folder) if (Next?.Header != null && Next.Folders != null) { var nextFolder = Next.Folders[0]; - nextFolderTuples = Next.GetDataBlocks(filename, nextFolder, 0, skipPrev: true) ?? []; + nextFolderTuples = Next.GetFolders(filename, nextFolder, 0, skipPrev: true) ?? []; } } From 91e2c48a1f62a6e755fb7c35043274199c66ee87 Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Tue, 13 Jan 2026 18:35:18 -0500 Subject: [PATCH 04/17] continued blocks my behated --- .../Wrappers/MicrosoftCabinet.Extraction.cs | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index 52189eb2..f9e5534f 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -253,10 +253,12 @@ private bool ExtractFolder(string? filename, return false; this._dataSource.SeekIfPossible(folder.CabStartOffset, SeekOrigin.Begin); + // Setup decompressors + var mszip = Decompressor.Create(); for (int i = 0; i < files.Length; i++) { var file = files[i]; - allExtracted &= ExtractFiles(outputDirectory, folder, file, ref leftoverBytes, includeDebug); + allExtracted &= ExtractFiles(outputDirectory, folder, file, ref leftoverBytes, mszip, includeDebug); } return allExtracted; @@ -271,7 +273,7 @@ private bool ExtractFolder(string? filename, /// File information /// True to include debug data, false otherwise /// True if the file extracted, false otherwise - private bool ExtractFiles(string outputDirectory, CFFOLDER? folder, CFFILE file, ref byte[] leftoverBytes, bool includeDebug) + private bool ExtractFiles(string outputDirectory, CFFOLDER? folder, CFFILE file, ref byte[] leftoverBytes, Decompressor mszip, bool includeDebug) { try { @@ -303,8 +305,6 @@ private bool ExtractFiles(string outputDirectory, CFFOLDER? folder, CFFILE file, // Get the compression type var compressionType = GetCompressionType(folder!); - // Setup decompressors - var mszip = Decompressor.Create(); //uint quantumWindowBits = (uint)(((ushort)folder.CompressionType >> 8) & 0x1f); // Loop through the data blocks @@ -393,16 +393,31 @@ private bool ExtractFiles(string outputDirectory, CFFOLDER? folder, CFFILE file, continuedBlock = true; continuedDataBlock = db; + + // TODO: these really need to never happen + if (cabinet.Next == null) + break; + + if (currentCabinetCount == cabinetCount - 1) + break; + + cabinet = cabinet.Next; + cabinet._dataSource.SeekIfPossible(currentFolder.CabStartOffset, SeekOrigin.Begin); + currentFolder = cabinet.Folders[0]; + currentCabinetCount++; } else { if (continuedBlock) { + var nextBlock = db; + db = continuedDataBlock; // TODO: why was there a continue if compressed data is null here continuedBlock = false; - db.CompressedData = [.. continuedDataBlock.CompressedData, .. blockData]; - db.CompressedSize += continuedDataBlock.CompressedSize; - db.UncompressedSize = continuedDataBlock.UncompressedSize; + byte[]? nextData = nextBlock.CompressedData; + blockData = [.. blockData, .. nextData]; + db.CompressedSize += nextBlock.CompressedSize; + db.UncompressedSize = nextBlock.UncompressedSize; continuedDataBlock = new CFDATA(); } From 7e66a9f18aead9738480978f5979784b2a51591b Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Wed, 14 Jan 2026 18:01:27 -0500 Subject: [PATCH 05/17] Pre-major-testing --- .../Wrappers/MicrosoftCabinet.Extraction.cs | 558 ++++++++---------- 1 file changed, 255 insertions(+), 303 deletions(-) diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index f9e5534f..f7c097ff 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -37,7 +37,7 @@ public partial class MicrosoftCabinet : IExtractable // If the file is invalid if (string.IsNullOrEmpty(filename)) return null; - else if (!File.Exists(filename!)) + else if (!File.Exists(filename)) return null; // Get the full file path and directory @@ -149,9 +149,6 @@ public partial class MicrosoftCabinet : IExtractable /// public bool Extract(string outputDirectory, bool includeDebug) { - // Display warning in debug runs - if (includeDebug) Console.WriteLine("WARNING: LZX and Quantum compression schemes are not supported so some files may be skipped!"); - // Do not ignore previous links by default bool ignorePrev = false; @@ -161,75 +158,55 @@ public bool Extract(string outputDirectory, bool includeDebug) { cabinet = OpenSet(Filename); ignorePrev = true; - } - - // TODO: first folder idk - - // If the archive is invalid - if (cabinet?.Folders == null || cabinet.Folders.Length == 0) - return false; - - try - { - // Loop through the folders - bool allExtracted = true; - while (true) + + // Display warning in debug runs + if (includeDebug && cabinet != null) { - // Loop through the current folders - for (int f = 0; f < cabinet.Folders.Length; f++) + var tempCabinet = cabinet; + HashSet compressionTypes = new HashSet(); + while (true) // this feels unsafe, but the existing code already did it { - if (f == 0 && (cabinet.Files[0].FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT - || cabinet.Files[0].FolderIndex == FolderIndex.CONTINUED_FROM_PREV)) - continue; - - var folder = cabinet.Folders[f]; - allExtracted &= cabinet.ExtractFolder(Filename, outputDirectory, folder, f, ignorePrev, includeDebug); + for (int i = 0; i < tempCabinet.FolderCount; i++) + compressionTypes.Add(tempCabinet.Folders[i].CompressionType & CompressionType.MASK_TYPE); // TODO: what is this mask for? + + tempCabinet = tempCabinet.Next; + + if (tempCabinet == null) // TODO: handle better + break; + + if (tempCabinet.Folders.Length == 0) + break; } + + string firstLine = "Mscab contains compression:"; + bool firstFence = true; + foreach (CompressionType compressionType in compressionTypes) + { + if (firstFence) + firstFence = false; + else + firstLine += ","; - // Move to the next cabinet, if possible - - /* - Array.ForEach(cabinet.Folders, folder => folder.DataBlocks = []); - */ - - cabinet = cabinet.Next; - /*cabinet?.Prev = null;*/ - - // TODO: already-extracted data isn't being cleared from memory, at least not nearly enough. - if (cabinet?.Folders == null || cabinet.Folders.Length == 0) - break; + firstLine += $" {compressionType}"; + } + + Console.WriteLine(firstLine); + if (compressionTypes.Contains(CompressionType.TYPE_QUANTUM) || compressionTypes.Contains(CompressionType.TYPE_LZX)) + Console.WriteLine("WARNING: LZX and Quantum compression schemes are not supported so some files may be skipped!"); } - - return allExtracted; } - catch (Exception ex) - { - if (includeDebug) Console.Error.WriteLine(ex); + + // If the archive is invalid + if (cabinet?.Folders == null || cabinet.Folders.Length == 0) return false; - } + + return cabinet.ExtractSet(Filename, outputDirectory, ignorePrev, includeDebug); } - /// - /// Extract the contents of a single folder - /// - /// Filename for one cabinet in the set, if available - /// Path to the output directory - /// Folder containing the blocks to decompress - /// Index of the folder in the cabinet - /// True to ignore previous links, false otherwise - /// True to include debug data, false otherwise - /// True if all files extracted, false otherwise - private bool ExtractFolder(string? filename, - string outputDirectory, - CFFOLDER? folder, - int folderIndex, - bool ignorePrev, - bool includeDebug) + private CFFILE[] GetSpannedFilesArray(string? filename, int f, bool ignorePrev) { - // Loop through the files - bool allExtracted = true; - var filterFiles = GetSpannedFiles(filename, folderIndex, ignorePrev); + var filterFiles = GetSpannedFiles(filename, f, ignorePrev); List fileList = []; // Filtering, add debug output eventually @@ -247,284 +224,259 @@ private bool ExtractFolder(string? filename, fileList.Add(file); } - CFFILE[] files = fileList.ToArray(); - byte[] leftoverBytes = []; - if (folder == null) // TODO: this should never happen - return false; - - this._dataSource.SeekIfPossible(folder.CabStartOffset, SeekOrigin.Begin); - // Setup decompressors - var mszip = Decompressor.Create(); - for (int i = 0; i < files.Length; i++) - { - var file = files[i]; - allExtracted &= ExtractFiles(outputDirectory, folder, file, ref leftoverBytes, mszip, includeDebug); - } + return fileList.ToArray(); + } - return allExtracted; + private FileStream GetFileStream(string filename, string outputDirectory) + { + // byte[] fileData = blockStream.ReadBytes((int)file.FileSize); + + // Ensure directory separators are consistent + if (Path.DirectorySeparatorChar == '\\') + filename = filename.Replace('/', '\\'); + else if (Path.DirectorySeparatorChar == '/') + filename = filename.Replace('\\', '/'); + + // Ensure the full output directory exists + filename = Path.Combine(outputDirectory, filename); + var directoryName = Path.GetDirectoryName(filename); + if (directoryName != null && !Directory.Exists(directoryName)) + Directory.CreateDirectory(directoryName); + + // Open the output file for writing + return File.Open(filename, FileMode.Create, FileAccess.Write, FileShare.None); } - // TODO: this will apparently improve memory usage/performance, but it's not clear if this implementation is enough for that to happen - /// - /// Extract the contents of a single file, intended to be used with all files in a straight shot - /// - /// Path to the output directory - /// Stream representing the uncompressed block data - /// File information - /// True to include debug data, false otherwise - /// True if the file extracted, false otherwise - private bool ExtractFiles(string outputDirectory, CFFOLDER? folder, CFFILE file, ref byte[] leftoverBytes, Decompressor mszip, bool includeDebug) + private CFDATA ReadBlock(MicrosoftCabinet cabinet) { - try - { - // byte[] fileData = blockStream.ReadBytes((int)file.FileSize); - - // Ensure directory separators are consistent - string filename = file.Name; - if (Path.DirectorySeparatorChar == '\\') - filename = filename.Replace('/', '\\'); - else if (Path.DirectorySeparatorChar == '/') - filename = filename.Replace('\\', '/'); - - // Ensure the full output directory exists - filename = Path.Combine(outputDirectory, filename); - var directoryName = Path.GetDirectoryName(filename); - if (directoryName != null && !Directory.Exists(directoryName)) - Directory.CreateDirectory(directoryName); - - // Open the output file for writing - using var fs = File.Open(filename, FileMode.Create, FileAccess.Write, FileShare.None); - -#region workregion + var db = new CFDATA(); - // Ensure folder contains data - // TODO: does this ever fail on spanned only folders or something - if (folder == null || folder.DataCount == 0) - return false; + var dataReservedSize = cabinet.Header.DataReservedSize; - // Get the compression type - var compressionType = GetCompressionType(folder!); + db.Checksum = cabinet._dataSource.ReadUInt32LittleEndian(); + db.CompressedSize = cabinet._dataSource.ReadUInt16LittleEndian(); + db.UncompressedSize = cabinet._dataSource.ReadUInt16LittleEndian(); - //uint quantumWindowBits = (uint)(((ushort)folder.CompressionType >> 8) & 0x1f); + if (dataReservedSize > 0) + db.ReservedData = cabinet._dataSource.ReadBytes(dataReservedSize); - // Loop through the data blocks + if (db.CompressedSize > 0) + db.CompressedData = cabinet._dataSource.ReadBytes(db.CompressedSize); - MicrosoftCabinet cabinet = this; - - int cabinetCount = 1; - if (this.Files[this.FileCount - 1].FolderIndex == FolderIndex.CONTINUED_TO_NEXT) - { - cabinetCount++; - MicrosoftCabinet? tempCabinet = this.Next; // TODO: what do you do if this is null, it shouldn't be - while (tempCabinet?.Files[0].FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT) - { - cabinetCount++; - tempCabinet = tempCabinet.Next; - } - } - - // TODO: do continued spanned folders ever contain another file beyond the one spanned one + return db; + } - - CFFOLDER currentFolder = folder; - int currentCabinetCount = 0; - bool continuedBlock = false; - bool fileFinished = false; - CFDATA continuedDataBlock = new CFDATA(); // TODO: this wont work because it resets i think. Another ref? do in main buffer - // TODO: these probably dont need to be longs, they were ints before - int filesize = (int)file.FileSize; - int extractedSize = 0; - while (currentCabinetCount < cabinetCount) + // TODO: cab stepping, folder stepping (I think?), 0 size continued blocks, find something that triggers exact data size + /// + /// Extract the contents of a cabinet set + /// + /// Filename for one cabinet in the set, if available + /// Path to the output directory + /// True to ignore previous links, false otherwise + /// True to include debug data, false otherwise + /// True if all files extracted, false otherwise + private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignorePrev, bool includeDebug) + { + var cabinet = this; + var currentCabFilename = cabFilename; + try + { + // Loop through the folders + bool allExtracted = true; + while (true) { - lock (cabinet._dataSourceLock) + // Loop through the current folders + for (int f = 0; f < cabinet.Folders.Length; f++) { - if (currentFolder.CabStartOffset <= 0) - return false; // TODO: why is a CabStartOffset of 0 not acceptable? header? - - /*long currentPosition = cabinet._dataSource.Position;*/ + if (f == 0 && (cabinet.Files[0].FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT + || cabinet.Files[0].FolderIndex == FolderIndex.CONTINUED_FROM_PREV)) + continue; - for (int i = 0; i < currentFolder.DataCount; i++) + var folder = cabinet.Folders[f]; + CFFILE[] files = cabinet.GetSpannedFilesArray(currentCabFilename, f, ignorePrev); + var file = files[0]; + int bytesLeft = (int)file.FileSize; + int fileCounter = 0; + + cabinet._dataSource.SeekIfPossible(folder.CabStartOffset, SeekOrigin.Begin); + var mszip = Decompressor.Create(); + try { - if (leftoverBytes.Length > 0) - { - int writeSize = Math.Min(leftoverBytes.Length, filesize - extractedSize); - byte[] tempLeftoverBytes = (byte[])leftoverBytes.Clone(); - if (writeSize < leftoverBytes.Length) - { - leftoverBytes = new byte[leftoverBytes.Length - writeSize]; - Array.Copy(tempLeftoverBytes, writeSize, leftoverBytes, 0, leftoverBytes.Length); - } - else - { - leftoverBytes = []; - } - fs.Write(tempLeftoverBytes, 0, writeSize); - extractedSize += tempLeftoverBytes.Length; - if (extractedSize >= filesize) - { - fileFinished = true; - break; - } - } - // TODO: wire up - var db = new CFDATA(); + var fs = GetFileStream(file.Name, outputDirectory); - var dataReservedSize = cabinet.Header.DataReservedSize; + // Ensure folder contains data + // TODO: does this fail on spanned only folders or something? when would this happen + if (folder.DataCount == 0) + return false; - db.Checksum = cabinet._dataSource.ReadUInt32LittleEndian(); - db.CompressedSize = cabinet._dataSource.ReadUInt16LittleEndian(); - db.UncompressedSize = cabinet._dataSource.ReadUInt16LittleEndian(); + // Get the compression type + var compressionType = GetCompressionType(folder); - if (dataReservedSize > 0) - db.ReservedData = cabinet._dataSource.ReadBytes(dataReservedSize); + // TODO: what is this comment here for + //uint quantumWindowBits = (uint)(((ushort)folder.CompressionType >> 8) & 0x1f); - if (db.CompressedSize > 0) - db.CompressedData = cabinet._dataSource.ReadBytes(db.CompressedSize); - - /*data.SeekIfPossible(currentPosition, SeekOrigin.Begin);*/ + if (folder.CabStartOffset <= 0) + return false; // TODO: why is a CabStartOffset of 0 not acceptable? header? - // Get the data to be processed - byte[] blockData = db.CompressedData; + var tempCabinet = cabinet; + int j = 0; - // If the block is continued, append - if (db.UncompressedSize == 0) + // Loop through the data blocks + // Has to be a while loop instead of a for loop due to cab spanning continue blocks + while (j < folder.DataCount) { - // TODO: is this a correct assumption at all - - continuedBlock = true; - continuedDataBlock = db; - - // TODO: these really need to never happen - if (cabinet.Next == null) - break; - - if (currentCabinetCount == cabinetCount - 1) - break; - - cabinet = cabinet.Next; - cabinet._dataSource.SeekIfPossible(currentFolder.CabStartOffset, SeekOrigin.Begin); - currentFolder = cabinet.Folders[0]; - currentCabinetCount++; - } - else - { - if (continuedBlock) + // TODO: since i need lock state to be maintained the whole loop, do i need to cache and reset position to be safe? + lock (tempCabinet._dataSourceLock) { - var nextBlock = db; - db = continuedDataBlock; - // TODO: why was there a continue if compressed data is null here - continuedBlock = false; - byte[]? nextData = nextBlock.CompressedData; - blockData = [.. blockData, .. nextData]; - db.CompressedSize += nextBlock.CompressedSize; - db.UncompressedSize = nextBlock.UncompressedSize; - continuedDataBlock = new CFDATA(); + var db = ReadBlock(tempCabinet); + + // Get the data to be processed + byte[] blockData = db.CompressedData; + + // If the block is continued, append + // TODO: this is specifically if and only if it's jumping between cabs on a spanned folder, I think? + bool continuedBlock = false; + if (db.UncompressedSize == 0) + { + tempCabinet = tempCabinet.Next; + if (tempCabinet == null) // TODO: handle better? + return false; + + folder = tempCabinet.Folders[0]; + lock (tempCabinet._dataSourceLock) + { + // TODO: make sure this spans? + tempCabinet._dataSource.SeekIfPossible(folder.CabStartOffset, SeekOrigin.Begin); + var nextBlock = ReadBlock(tempCabinet); + byte[] nextData = nextBlock.CompressedData; + if (nextData.Length == 0) // TODO: null cant happen, is it meant to be if it's empty? + continue; + + continuedBlock = true; + blockData = [.. blockData, .. nextData]; + db.CompressedSize += nextBlock.CompressedSize; + db.UncompressedSize = nextBlock.UncompressedSize; + } + } + + // Get the uncompressed data block + byte[] data = compressionType switch + { + CompressionType.TYPE_NONE => blockData, + CompressionType.TYPE_MSZIP => DecompressMSZIPBlock(f, mszip, j, db, blockData, includeDebug), + + // TODO: Unsupported + CompressionType.TYPE_QUANTUM => [], + CompressionType.TYPE_LZX => [], + + // Should be impossible + _ => [], + }; + if (bytesLeft > 0 && bytesLeft >= data.Length) + { + fs.Write(data); + bytesLeft -= data.Length; + } + else if (bytesLeft > 0 && bytesLeft < data.Length) + { + int tempBytesLeft = bytesLeft; + fs.Write(data, 0, bytesLeft); + fs.Close(); + + // reached end of folder + if (fileCounter + 1 == files.Length) + break; + + file = files[++fileCounter]; + bytesLeft = (int)file.FileSize; + fs = GetFileStream(file.Name, outputDirectory); + // TODO: can I deduplicate this? probably not since I need breaks + while (bytesLeft < data.Length - tempBytesLeft) + { + fs.Write(data, tempBytesLeft, bytesLeft); + tempBytesLeft += bytesLeft; + fs.Close(); + + // reached end of folder + if (fileCounter + 1 == files.Length) + break; + + file = files[++fileCounter]; + bytesLeft = (int)file.FileSize; + fs = GetFileStream(file.Name, outputDirectory); + } + + fs.Write(data, tempBytesLeft, data.Length - tempBytesLeft); + bytesLeft -= (data.Length - tempBytesLeft); + if (bytesLeft == 0) // Edge case on the final file of the final cab of https://dbox.tools/titles/pc/57520FA0/ + fs.Close(); + } + else // TODO: find something that can actually trigger this case + { + int tempBytesLeft = bytesLeft; + fs.Close(); + + // reached end of folder + if (fileCounter + 1 == files.Length) + break; + + file = files[++fileCounter]; + bytesLeft = (int)file.FileSize; + fs = GetFileStream(file.Name, outputDirectory); + while (bytesLeft < data.Length - tempBytesLeft) + { + fs.Write(data, tempBytesLeft, bytesLeft); + tempBytesLeft += bytesLeft; + fs.Close(); + + // reached end of folder + if (fileCounter + 1 == files.Length) + break; + + file = files[++fileCounter]; + bytesLeft = (int)file.FileSize; + fs = GetFileStream(file.Name, outputDirectory); + if (bytesLeft == 0) // This case is not currently observed, but presumably it can also happen like above + fs.Close(); + } + + fs.Write(data, tempBytesLeft, data.Length - tempBytesLeft); + bytesLeft -= (data.Length - tempBytesLeft); + } + + // TODO: do i ever need to flush before the end of the file? + j++; + if (continuedBlock) + j = 0; } - - // Get the uncompressed data block - byte[] data = compressionType switch - { - CompressionType.TYPE_NONE => blockData, - CompressionType.TYPE_MSZIP => DecompressMSZIPBlock(currentCabinetCount, mszip, i, db, blockData, - includeDebug), - - // TODO: Unsupported - CompressionType.TYPE_QUANTUM => [], - CompressionType.TYPE_LZX => [], - - // Should be impossible - _ => [], - }; - int writeSize = Math.Min(data.Length, filesize - extractedSize ); - if (writeSize < data.Length) - { - leftoverBytes = new byte[data.Length - writeSize]; - Array.Copy(data, writeSize, leftoverBytes, 0, leftoverBytes.Length); - } - fs.Write(data, 0, writeSize); - extractedSize += data.Length; - if (extractedSize >= filesize) - { - fileFinished = true; - break; - } - // TODO: do i ever need to flush before the end of the file? } } + catch (Exception ex) + { + if (includeDebug) Console.Error.WriteLine(ex); + return false; + } } - if (fileFinished) - break; - - // TODO: does this running unnecessarily on unspanned folders cause issues - // TODO: spanned folders are only across cabs and never within cabs, right + // Move to the next cabinet, if possible + cabinet = cabinet.Next; + if (cabinet == null) // TODO: handle better + return false; - if (cabinet.Next == null) - break; + currentCabFilename = cabinet.Filename; - if (currentCabinetCount == cabinetCount - 1) + // TODO: already-extracted data isn't being cleared from memory, at least not nearly enough. + if (cabinet.Folders.Length == 0) break; - - cabinet = cabinet.Next; - cabinet._dataSource.SeekIfPossible(currentFolder.CabStartOffset, SeekOrigin.Begin); - currentFolder = cabinet.Folders[0]; - currentCabinetCount++; } -#endregion - - //fs.Write(fileData, 0, fileData.Length); - fs.Flush(); - } - catch (Exception ex) - { - if (includeDebug) Console.Error.WriteLine(ex); - return false; - } - - return true; - } - - /// - /// Extract the contents of a single file - /// - /// Path to the output directory - /// Stream representing the uncompressed block data - /// File information - /// True to include debug data, false otherwise - /// True if the file extracted, false otherwise - private static bool ExtractFile(string outputDirectory, Stream blockStream, CFFILE file, bool includeDebug) - { - try - { - blockStream.SeekIfPossible(file.FolderStartOffset, SeekOrigin.Begin); - byte[] fileData = blockStream.ReadBytes((int)file.FileSize); - - // Ensure directory separators are consistent - string filename = file.Name; - if (Path.DirectorySeparatorChar == '\\') - filename = filename.Replace('/', '\\'); - else if (Path.DirectorySeparatorChar == '/') - filename = filename.Replace('\\', '/'); - - // Ensure the full output directory exists - filename = Path.Combine(outputDirectory, filename); - var directoryName = Path.GetDirectoryName(filename); - if (directoryName != null && !Directory.Exists(directoryName)) - Directory.CreateDirectory(directoryName); - - // Open the output file for writing - using var fs = File.Open(filename, FileMode.Create, FileAccess.Write, FileShare.None); - fs.Write(fileData, 0, fileData.Length); - fs.Flush(); + return allExtracted; } catch (Exception ex) { if (includeDebug) Console.Error.WriteLine(ex); return false; } - - return true; } #endregion @@ -567,4 +519,4 @@ private static uint S(byte[] a, int b, int x) #endregion } -} +} \ No newline at end of file From ebfa57db1b4bfecca022ecb2467bfc954a14ecb3 Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Wed, 14 Jan 2026 18:05:25 -0500 Subject: [PATCH 06/17] Forgot to add summaries --- .../Wrappers/MicrosoftCabinet.Extraction.cs | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index f7c097ff..99075947 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -203,6 +203,13 @@ public bool Extract(string outputDirectory, bool includeDebug) return cabinet.ExtractSet(Filename, outputDirectory, ignorePrev, includeDebug); } + /// + /// Get filtered array of spanned files for a folder + /// + /// Filename for one cabinet in the set, if available + /// Index of the folder in the cabinet + /// True to ignore previous links, false otherwise + /// Filtered array of files private CFFILE[] GetSpannedFilesArray(string? filename, int f, bool ignorePrev) { // Loop through the files @@ -227,10 +234,14 @@ private CFFILE[] GetSpannedFilesArray(string? filename, int f, bool ignorePrev) return fileList.ToArray(); } + /// + /// Get filestream for a file to be extracted to + /// + /// Filename for the file that will be extracted to + /// Path to the output directory + /// Filestream for the file to be extracted to private FileStream GetFileStream(string filename, string outputDirectory) { - // byte[] fileData = blockStream.ReadBytes((int)file.FileSize); - // Ensure directory separators are consistent if (Path.DirectorySeparatorChar == '\\') filename = filename.Replace('/', '\\'); @@ -247,6 +258,11 @@ private FileStream GetFileStream(string filename, string outputDirectory) return File.Open(filename, FileMode.Create, FileAccess.Write, FileShare.None); } + /// + /// Read a datablock from a cabinet + /// + /// Cabinet to be read from + /// Read datablock private CFDATA ReadBlock(MicrosoftCabinet cabinet) { var db = new CFDATA(); From 15d9c276797a7138074780ff41c9b6454c110c22 Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Thu, 15 Jan 2026 15:06:22 -0500 Subject: [PATCH 07/17] Attempt to properly roll back. The state i wanted to roll back to wasn't in a commit before. --- .../Wrappers/MicrosoftCabinet.Extraction.cs | 55 ++++++++++++++++++- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index 99075947..5c656960 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -159,6 +159,23 @@ public bool Extract(string outputDirectory, bool includeDebug) cabinet = OpenSet(Filename); ignorePrev = true; + // TOOD: reenable after confirming rollback is good + /* + if (cabinet == null) // TODO: handle better + return false; + + // If we have anything but the first file, avoid extraction to avoid repeat extracts + // TODO: handle partial sets + // TODO: is there any way for this to not spam the logs on large sets? probably not, but idk + if (this.Filename != cabinet.Filename) + { + string firstCabName = Path.GetFileName(cabinet.Filename) ?? string.Empty; + if (includeDebug) Console.WriteLine($"Only the first cabinet {firstCabName} will be extracted!"); + return false; + } + */ + + // Display warning in debug runs if (includeDebug && cabinet != null) { @@ -388,10 +405,24 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore // Should be impossible _ => [], }; + + // TODO: will 0 byte files mess things up if (bytesLeft > 0 && bytesLeft >= data.Length) { fs.Write(data); bytesLeft -= data.Length; + if (bytesLeft == 0) // Edge case on http://redump.org/disc/107833/ + { + fs.Close(); + + // reached end of folder + if (fileCounter + 1 == files.Length) + break; + + file = files[++fileCounter]; + bytesLeft = (int)file.FileSize; + fs = GetFileStream(file.Name, outputDirectory); + } } else if (bytesLeft > 0 && bytesLeft < data.Length) { @@ -425,7 +456,17 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore fs.Write(data, tempBytesLeft, data.Length - tempBytesLeft); bytesLeft -= (data.Length - tempBytesLeft); if (bytesLeft == 0) // Edge case on the final file of the final cab of https://dbox.tools/titles/pc/57520FA0/ + { fs.Close(); + + // reached end of folder + if (fileCounter + 1 == files.Length) + break; + + file = files[++fileCounter]; + bytesLeft = (int)file.FileSize; + fs = GetFileStream(file.Name, outputDirectory); + } } else // TODO: find something that can actually trigger this case { @@ -452,12 +493,22 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore file = files[++fileCounter]; bytesLeft = (int)file.FileSize; fs = GetFileStream(file.Name, outputDirectory); - if (bytesLeft == 0) // This case is not currently observed, but presumably it can also happen like above - fs.Close(); } fs.Write(data, tempBytesLeft, data.Length - tempBytesLeft); bytesLeft -= (data.Length - tempBytesLeft); + if (bytesLeft == 0) // This case is not currently observed, but presumably it can also happen like above + { + fs.Close(); + + // reached end of folder + if (fileCounter + 1 == files.Length) + break; + + file = files[++fileCounter]; + bytesLeft = (int)file.FileSize; + fs = GetFileStream(file.Name, outputDirectory); + } } // TODO: do i ever need to flush before the end of the file? From b8be815c68000559a07d2f10257a4850f53e32d8 Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Thu, 15 Jan 2026 17:31:17 -0500 Subject: [PATCH 08/17] Figured out the issue with the rolled back commit, this has to be a while loop because of 0 byte files. Reimplemented clean code. --- .../Wrappers/MicrosoftCabinet.Extraction.cs | 46 +++++-------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index 5c656960..784582e6 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -411,18 +411,6 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore { fs.Write(data); bytesLeft -= data.Length; - if (bytesLeft == 0) // Edge case on http://redump.org/disc/107833/ - { - fs.Close(); - - // reached end of folder - if (fileCounter + 1 == files.Length) - break; - - file = files[++fileCounter]; - bytesLeft = (int)file.FileSize; - fs = GetFileStream(file.Name, outputDirectory); - } } else if (bytesLeft > 0 && bytesLeft < data.Length) { @@ -455,18 +443,6 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore fs.Write(data, tempBytesLeft, data.Length - tempBytesLeft); bytesLeft -= (data.Length - tempBytesLeft); - if (bytesLeft == 0) // Edge case on the final file of the final cab of https://dbox.tools/titles/pc/57520FA0/ - { - fs.Close(); - - // reached end of folder - if (fileCounter + 1 == files.Length) - break; - - file = files[++fileCounter]; - bytesLeft = (int)file.FileSize; - fs = GetFileStream(file.Name, outputDirectory); - } } else // TODO: find something that can actually trigger this case { @@ -497,18 +473,20 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore fs.Write(data, tempBytesLeft, data.Length - tempBytesLeft); bytesLeft -= (data.Length - tempBytesLeft); - if (bytesLeft == 0) // This case is not currently observed, but presumably it can also happen like above - { - fs.Close(); + } + + // Top if block occurs on http://redump.org/disc/107833/ , middle on https://dbox.tools/titles/pc/57520FA0 , bottom still unobserved + while (bytesLeft == 0) + { + fs.Close(); - // reached end of folder - if (fileCounter + 1 == files.Length) - break; + // reached end of folder + if (fileCounter + 1 == files.Length) + break; - file = files[++fileCounter]; - bytesLeft = (int)file.FileSize; - fs = GetFileStream(file.Name, outputDirectory); - } + file = files[++fileCounter]; + bytesLeft = (int)file.FileSize; + fs = GetFileStream(file.Name, outputDirectory); } // TODO: do i ever need to flush before the end of the file? From 631b46b46f9822e3c645521f9e15da9136a88112 Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Thu, 15 Jan 2026 17:33:53 -0500 Subject: [PATCH 09/17] Comment so I don't forget why it's like this --- SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index 784582e6..13049760 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -476,6 +476,7 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore } // Top if block occurs on http://redump.org/disc/107833/ , middle on https://dbox.tools/titles/pc/57520FA0 , bottom still unobserved + // While loop since this also handles 0 byte files. while (bytesLeft == 0) { fs.Close(); From b2302c1992b2291ea2ef169b2aaf85605e5a594f Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Thu, 15 Jan 2026 18:51:22 -0500 Subject: [PATCH 10/17] Skip unsupported compression types before opening filestream. --- .../Wrappers/MicrosoftCabinet.Extraction.cs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index 13049760..2a4b50dd 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -184,7 +184,7 @@ public bool Extract(string outputDirectory, bool includeDebug) while (true) // this feels unsafe, but the existing code already did it { for (int i = 0; i < tempCabinet.FolderCount; i++) - compressionTypes.Add(tempCabinet.Folders[i].CompressionType & CompressionType.MASK_TYPE); // TODO: what is this mask for? + compressionTypes.Add(GetCompressionType(tempCabinet.Folders[i])); tempCabinet = tempCabinet.Next; @@ -335,15 +335,17 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore var mszip = Decompressor.Create(); try { - var fs = GetFileStream(file.Name, outputDirectory); - // Ensure folder contains data // TODO: does this fail on spanned only folders or something? when would this happen if (folder.DataCount == 0) return false; - // Get the compression type + // Skip unsupported compression types to avoid opening a blank filestream. This can be altered/removed if these types are ever supported. var compressionType = GetCompressionType(folder); + if (compressionType == CompressionType.TYPE_QUANTUM || compressionType == CompressionType.TYPE_LZX) + continue; + + var fs = GetFileStream(file.Name, outputDirectory); // TODO: what is this comment here for //uint quantumWindowBits = (uint)(((ushort)folder.CompressionType >> 8) & 0x1f); @@ -374,7 +376,8 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore tempCabinet = tempCabinet.Next; if (tempCabinet == null) // TODO: handle better? return false; - + + // Compressiontype not updated because there's no way it's possible that it can swap on continued blocks folder = tempCabinet.Folders[0]; lock (tempCabinet._dataSourceLock) { From 39cc07d32426179ba77cba68c075b87dc4d2ca7e Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Thu, 15 Jan 2026 19:54:10 -0500 Subject: [PATCH 11/17] Reenable non-start cab skipping --- .../Wrappers/MicrosoftCabinet.Extraction.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index 2a4b50dd..2afea74a 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -160,7 +160,6 @@ public bool Extract(string outputDirectory, bool includeDebug) ignorePrev = true; // TOOD: reenable after confirming rollback is good - /* if (cabinet == null) // TODO: handle better return false; @@ -173,7 +172,6 @@ public bool Extract(string outputDirectory, bool includeDebug) if (includeDebug) Console.WriteLine($"Only the first cabinet {firstCabName} will be extracted!"); return false; } - */ // Display warning in debug runs From afa3d3b9cc499847bcccd5dab12b340be6d3015d Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Thu, 15 Jan 2026 20:14:54 -0500 Subject: [PATCH 12/17] TODO so i don't forget --- .../Wrappers/MicrosoftCabinet.Extraction.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index 2afea74a..9fb2bba4 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -477,7 +477,8 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore } // Top if block occurs on http://redump.org/disc/107833/ , middle on https://dbox.tools/titles/pc/57520FA0 , bottom still unobserved - // While loop since this also handles 0 byte files. + // While loop since this also handles 0 byte files. Example file seen in http://redump.org/disc/93312/ , cab Group17.cab, file TRACKSLOC6DYNTEX_BIN + // TODO: make sure that file is actually supposed to be 0 bytes. 7z also extracts it as 0 bytes, so it probably is, but it's good to make sure. while (bytesLeft == 0) { fs.Close(); From e7395d0b49b21094fda3848b4d5a2ad84e1e85d8 Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Thu, 15 Jan 2026 20:32:44 -0500 Subject: [PATCH 13/17] TODO so I don't forget --- SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index 9fb2bba4..9a87e7d9 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -166,6 +166,7 @@ public bool Extract(string outputDirectory, bool includeDebug) // If we have anything but the first file, avoid extraction to avoid repeat extracts // TODO: handle partial sets // TODO: is there any way for this to not spam the logs on large sets? probably not, but idk + // TODO: if/when full msi support is added, somehow this is going to have to take that into account, while also still handling partial sets if (this.Filename != cabinet.Filename) { string firstCabName = Path.GetFileName(cabinet.Filename) ?? string.Empty; From 91ce847111bfb0f8acdc151c7961049a513b168d Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Thu, 15 Jan 2026 21:52:39 -0500 Subject: [PATCH 14/17] iterate on continued block correctly. --- .../Wrappers/MicrosoftCabinet.Extraction.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index 9a87e7d9..ddb725aa 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -494,9 +494,10 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore } // TODO: do i ever need to flush before the end of the file? - j++; if (continuedBlock) j = 0; + + j++; } } } From 366cea9d23eb79cf94f1966a15a87231e42790f0 Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Fri, 16 Jan 2026 14:18:15 -0500 Subject: [PATCH 15/17] Handle incomplete extraction better --- .../Wrappers/MicrosoftCabinet.Extraction.cs | 102 +++++++++++++----- .../Wrappers/MicrosoftCabinet.cs | 89 ++------------- 2 files changed, 85 insertions(+), 106 deletions(-) diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index ddb725aa..81ea30cf 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -31,8 +31,9 @@ public partial class MicrosoftCabinet : IExtractable /// Open a cabinet set for reading, if possible /// /// Filename for one cabinet in the set + /// /// Wrapper representing the set, null on error - private static MicrosoftCabinet? OpenSet(string? filename) + private static MicrosoftCabinet? OpenSet(string? filename, bool includeDebug) { // If the file is invalid if (string.IsNullOrEmpty(filename)) @@ -53,7 +54,7 @@ public partial class MicrosoftCabinet : IExtractable while (current.CabinetPrev != null) { // Attempt to open the previous cabinet - var prev = current.OpenPrevious(filename); + var prev = current.OpenPrevious(filename, includeDebug); if (prev?.Header == null) break; @@ -72,7 +73,7 @@ public partial class MicrosoftCabinet : IExtractable break; // Open the next cabinet and try to parse - var next = current.OpenNext(filename); + var next = current.OpenNext(filename, includeDebug); if (next?.Header == null) break; @@ -90,7 +91,8 @@ public partial class MicrosoftCabinet : IExtractable /// Open the next archive, if possible /// /// Filename for one cabinet in the set - private MicrosoftCabinet? OpenNext(string? filename) + /// + private MicrosoftCabinet? OpenNext(string? filename, bool includeDebug) { // Ignore invalid archives if (string.IsNullOrEmpty(filename)) @@ -104,21 +106,33 @@ public partial class MicrosoftCabinet : IExtractable if (string.IsNullOrEmpty(next)) return null; + string baseNext = next!; + // Get the full next path string? folder = Path.GetDirectoryName(filename); if (folder != null) next = Path.Combine(folder, next); // Open and return the next cabinet - var fs = File.Open(next, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); - return Create(fs); + // Catch exceptions due to file not existing, etc + try + { + var fs = File.Open(next, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); + return Create(fs); + } + catch + { + if (includeDebug) Console.WriteLine($"Error: Cabinet set part {baseNext} could not be opened!"); + return null; + } } /// /// Open the previous archive, if possible /// /// Filename for one cabinet in the set - private MicrosoftCabinet? OpenPrevious(string? filename) + /// + private MicrosoftCabinet? OpenPrevious(string? filename, bool includeDebug) { // Ignore invalid archives if (string.IsNullOrEmpty(filename)) @@ -132,14 +146,25 @@ public partial class MicrosoftCabinet : IExtractable if (string.IsNullOrEmpty(prev)) return null; + string basePrev = prev!; + // Get the full next path string? folder = Path.GetDirectoryName(filename); if (folder != null) prev = Path.Combine(folder, prev); // Open and return the previous cabinet - var fs = File.Open(prev, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); - return Create(fs); + // Catch exceptions due to file not existing, etc + try + { + var fs = File.Open(prev, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); + return Create(fs); + } + catch + { + if (includeDebug) Console.WriteLine($"Error: Cabinet set part {basePrev} could not be opened!"); + return null; + } } #endregion @@ -156,7 +181,7 @@ public bool Extract(string outputDirectory, bool includeDebug) var cabinet = this; if (Filename != null) { - cabinet = OpenSet(Filename); + cabinet = OpenSet(Filename, includeDebug); ignorePrev = true; // TOOD: reenable after confirming rollback is good @@ -225,11 +250,12 @@ public bool Extract(string outputDirectory, bool includeDebug) /// Filename for one cabinet in the set, if available /// Index of the folder in the cabinet /// True to ignore previous links, false otherwise + /// /// Filtered array of files - private CFFILE[] GetSpannedFilesArray(string? filename, int f, bool ignorePrev) + private CFFILE[] GetSpannedFilesArray(string? filename, int f, bool ignorePrev, bool includeDebug) { // Loop through the files - var filterFiles = GetSpannedFiles(filename, f, ignorePrev); + var filterFiles = GetSpannedFiles(filename, f, includeDebug, ignorePrev); List fileList = []; // Filtering, add debug output eventually @@ -279,23 +305,34 @@ private FileStream GetFileStream(string filename, string outputDirectory) /// /// Cabinet to be read from /// Read datablock - private CFDATA ReadBlock(MicrosoftCabinet cabinet) + private CFDATA? ReadBlock(MicrosoftCabinet cabinet) { - var db = new CFDATA(); + // Should only ever occur if it tries to read more than the file, but good to catch in general + // TODO: does putting this in a try-catch block slow things down? Thousands of readblocks will get called for any cab + // TODO: Since the file is certainly messed up in some way if this fails, should it be deleted in case it causes cascading issues for BOS? Probably not, but idk + try + { + var db = new CFDATA(); - var dataReservedSize = cabinet.Header.DataReservedSize; + var dataReservedSize = cabinet.Header.DataReservedSize; - db.Checksum = cabinet._dataSource.ReadUInt32LittleEndian(); - db.CompressedSize = cabinet._dataSource.ReadUInt16LittleEndian(); - db.UncompressedSize = cabinet._dataSource.ReadUInt16LittleEndian(); + db.Checksum = cabinet._dataSource.ReadUInt32LittleEndian(); + db.CompressedSize = cabinet._dataSource.ReadUInt16LittleEndian(); + db.UncompressedSize = cabinet._dataSource.ReadUInt16LittleEndian(); - if (dataReservedSize > 0) - db.ReservedData = cabinet._dataSource.ReadBytes(dataReservedSize); + if (dataReservedSize > 0) + db.ReservedData = cabinet._dataSource.ReadBytes(dataReservedSize); - if (db.CompressedSize > 0) - db.CompressedData = cabinet._dataSource.ReadBytes(db.CompressedSize); + if (db.CompressedSize > 0) + db.CompressedData = cabinet._dataSource.ReadBytes(db.CompressedSize); - return db; + return db; + } + catch + { + return null; + } + } // TODO: cab stepping, folder stepping (I think?), 0 size continued blocks, find something that triggers exact data size @@ -325,7 +362,7 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore continue; var folder = cabinet.Folders[f]; - CFFILE[] files = cabinet.GetSpannedFilesArray(currentCabFilename, f, ignorePrev); + CFFILE[] files = cabinet.GetSpannedFilesArray(currentCabFilename, f, ignorePrev, includeDebug); var file = files[0]; int bytesLeft = (int)file.FileSize; int fileCounter = 0; @@ -363,6 +400,11 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore lock (tempCabinet._dataSourceLock) { var db = ReadBlock(tempCabinet); + if (db == null) + { + if (includeDebug) Console.Error.WriteLine($"Error extracting file {file.Name}"); + break; + } // Get the data to be processed byte[] blockData = db.CompressedData; @@ -373,8 +415,8 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore if (db.UncompressedSize == 0) { tempCabinet = tempCabinet.Next; - if (tempCabinet == null) // TODO: handle better? - return false; + if (tempCabinet == null) + break; // Next cab is missing, continue // Compressiontype not updated because there's no way it's possible that it can swap on continued blocks folder = tempCabinet.Folders[0]; @@ -383,6 +425,12 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore // TODO: make sure this spans? tempCabinet._dataSource.SeekIfPossible(folder.CabStartOffset, SeekOrigin.Begin); var nextBlock = ReadBlock(tempCabinet); + if (nextBlock == null) + { + if (includeDebug) Console.Error.WriteLine($"Error extracting file {file.Name}"); + break; + } + byte[] nextData = nextBlock.CompressedData; if (nextData.Length == 0) // TODO: null cant happen, is it meant to be if it's empty? continue; @@ -510,7 +558,7 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore // Move to the next cabinet, if possible cabinet = cabinet.Next; - if (cabinet == null) // TODO: handle better + if (cabinet == null) // If the next cabinet is missing, there's no better way to handle this return false; currentCabFilename = cabinet.Filename; diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs index 65573580..1f17b6bc 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.cs @@ -230,87 +230,18 @@ private static CompressionType GetCompressionType(CFFOLDER folder) return (CompressionType)ushort.MaxValue; } - /// - /// Get the set of data blocks for a folder - /// - /// Filename for one cabinet in the set, if available - /// Folder containing the blocks to decompress - /// Index of the folder in the cabinet - /// Indicates if previous cabinets should be ignored - /// Indicates if next cabinets should be ignored - /// Array of data blocks on success, null otherwise - private FolderTuple?[] GetFolders(string? filename, CFFOLDER? folder, int folderIndex, bool skipPrev = false, bool skipNext = false) - { - // Skip invalid folders - if (folder?.DataBlocks == null || folder.DataBlocks.Length == 0) - return []; - - // Get all files for the folder - var files = GetFiles(folderIndex); - if (files.Length == 0) - return []; - - FolderTuple?[] folderTuple = new FolderTuple[1]; - folderTuple[0] = null; - - if (filename != null && folder != null) - { - folderTuple[0] = new FolderTuple - { - Filename = filename, - Folder = folder, - FolderIndex = folderIndex - }; - } - - // Check if the folder spans in either direction - bool spanPrev = Array.Exists(files, f => f.FolderIndex == FolderIndex.CONTINUED_FROM_PREV || f.FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT); - bool spanNext = Array.Exists(files, f => f.FolderIndex == FolderIndex.CONTINUED_TO_NEXT || f.FolderIndex == FolderIndex.CONTINUED_PREV_AND_NEXT); - - // If the folder spans backward and Prev is not being skipped - FolderTuple?[] prevFolderTuples = []; - if (!skipPrev && spanPrev) - { - // Try to get Prev if it doesn't exist - if (Prev?.Header == null) - Prev = OpenPrevious(filename); - - // Get all blocks from Prev - if (Prev?.Header != null && Prev.Folders != null) - { - int prevFolderIndex = Prev.FolderCount - 1; - var prevFolder = Prev.Folders[prevFolderIndex - 1]; - prevFolderTuples = Prev.GetFolders(filename, prevFolder, prevFolderIndex, skipNext: true) ?? []; - } - } - - // If the folder spans forward and Next is not being skipped - FolderTuple?[] nextFolderTuples = []; - if (!skipNext && spanNext) - { - // Try to get Next if it doesn't exist - if (Next?.Header == null) - Next = OpenNext(filename); - - // Get all blocks from Prev - if (Next?.Header != null && Next.Folders != null) - { - var nextFolder = Next.Folders[0]; - nextFolderTuples = Next.GetFolders(filename, nextFolder, 0, skipPrev: true) ?? []; - } - } - - // Return all found blocks in order - return [.. prevFolderTuples, .. folderTuple, .. nextFolderTuples]; - } - /// /// Get all files for the current folder, plus connected spanned folders. /// + /// /// Index of the folder in the cabinet + /// /// True to ignore previous links, false otherwise + /// + /// /// Array of all files for the folder - private CFFILE[] GetSpannedFiles(string? filename, int folderIndex, bool ignorePrev = false, bool skipPrev = false, bool skipNext = false) + private CFFILE[] GetSpannedFiles(string? filename, int folderIndex, bool includeDebug, bool ignorePrev = false, + bool skipPrev = false, bool skipNext = false) { // Ignore invalid archives if (Files.IsNullOrEmpty()) @@ -345,13 +276,13 @@ private CFFILE[] GetSpannedFiles(string? filename, int folderIndex, bool ignoreP { // Try to get Prev if it doesn't exist if (Prev?.Header == null) - Prev = OpenPrevious(filename); + Prev = OpenPrevious(filename, includeDebug); // Get all files from Prev if (Prev?.Header != null && Prev.Folders != null) { int prevFolderIndex = Prev.FolderCount - 1; - prevFiles = Prev.GetSpannedFiles(filename, prevFolderIndex, skipNext: true) ?? []; + prevFiles = Prev.GetSpannedFiles(filename, prevFolderIndex, includeDebug, skipNext: true) ?? []; } } @@ -361,13 +292,13 @@ private CFFILE[] GetSpannedFiles(string? filename, int folderIndex, bool ignoreP { // Try to get Next if it doesn't exist if (Next?.Header == null) - Next = OpenNext(filename); + Next = OpenNext(filename, false); // Debug ignored here since if it's enabled, this will get output earlier anyways // Get all files from Prev if (Next?.Header != null && Next.Folders != null) { var nextFolder = Next.Folders[0]; - nextFiles = Next.GetSpannedFiles(filename, 0, skipPrev: true) ?? []; + nextFiles = Next.GetSpannedFiles(filename, 0, includeDebug, skipPrev: true) ?? []; } } From 5dcc3222e3d01de99afef38c74c8b271e1ff8a24 Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Fri, 16 Jan 2026 14:30:24 -0500 Subject: [PATCH 16/17] Remove TODOs to ready for PR comments --- .../Readers/MicrosoftCabinet.cs | 20 ----------- .../Wrappers/MicrosoftCabinet.Extraction.cs | 33 +++++-------------- 2 files changed, 9 insertions(+), 44 deletions(-) diff --git a/SabreTools.Serialization/Readers/MicrosoftCabinet.cs b/SabreTools.Serialization/Readers/MicrosoftCabinet.cs index f04cde40..a2fba52d 100644 --- a/SabreTools.Serialization/Readers/MicrosoftCabinet.cs +++ b/SabreTools.Serialization/Readers/MicrosoftCabinet.cs @@ -174,26 +174,6 @@ private static CFFOLDER ParseFolder(Stream data, CFHEADER header) return folder; } - - /// - /// Parse a Stream into a folder - /// - /// Stream to parse - /// Cabinet header to get flags and sizes from - /// Filled folder on success, null on error - private static CFFOLDER ParseFolderData(Stream data, CFHEADER header) - { - var folder = new CFFOLDER(); - - folder.CabStartOffset = data.ReadUInt32LittleEndian(); - folder.DataCount = data.ReadUInt16LittleEndian(); - folder.CompressionType = (CompressionType)data.ReadUInt16LittleEndian(); - - if (header.FolderReservedSize > 0) - folder.ReservedData = data.ReadBytes(header.FolderReservedSize); - - return folder; - } /// /// Parse a Stream into a file diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index 81ea30cf..0e89c62a 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -31,7 +31,7 @@ public partial class MicrosoftCabinet : IExtractable /// Open a cabinet set for reading, if possible /// /// Filename for one cabinet in the set - /// + /// True to include debug data, false otherwise /// Wrapper representing the set, null on error private static MicrosoftCabinet? OpenSet(string? filename, bool includeDebug) { @@ -91,7 +91,7 @@ public partial class MicrosoftCabinet : IExtractable /// Open the next archive, if possible /// /// Filename for one cabinet in the set - /// + /// True to include debug data, false otherwise private MicrosoftCabinet? OpenNext(string? filename, bool includeDebug) { // Ignore invalid archives @@ -131,7 +131,7 @@ public partial class MicrosoftCabinet : IExtractable /// Open the previous archive, if possible /// /// Filename for one cabinet in the set - /// + /// True to include debug data, false otherwise private MicrosoftCabinet? OpenPrevious(string? filename, bool includeDebug) { // Ignore invalid archives @@ -185,12 +185,10 @@ public bool Extract(string outputDirectory, bool includeDebug) ignorePrev = true; // TOOD: reenable after confirming rollback is good - if (cabinet == null) // TODO: handle better + if (cabinet == null) return false; // If we have anything but the first file, avoid extraction to avoid repeat extracts - // TODO: handle partial sets - // TODO: is there any way for this to not spam the logs on large sets? probably not, but idk // TODO: if/when full msi support is added, somehow this is going to have to take that into account, while also still handling partial sets if (this.Filename != cabinet.Filename) { @@ -212,7 +210,7 @@ public bool Extract(string outputDirectory, bool includeDebug) tempCabinet = tempCabinet.Next; - if (tempCabinet == null) // TODO: handle better + if (tempCabinet == null) break; if (tempCabinet.Folders.Length == 0) @@ -250,7 +248,7 @@ public bool Extract(string outputDirectory, bool includeDebug) /// Filename for one cabinet in the set, if available /// Index of the folder in the cabinet /// True to ignore previous links, false otherwise - /// + /// True to include debug data, false otherwise /// Filtered array of files private CFFILE[] GetSpannedFilesArray(string? filename, int f, bool ignorePrev, bool includeDebug) { @@ -308,8 +306,6 @@ private FileStream GetFileStream(string filename, string outputDirectory) private CFDATA? ReadBlock(MicrosoftCabinet cabinet) { // Should only ever occur if it tries to read more than the file, but good to catch in general - // TODO: does putting this in a try-catch block slow things down? Thousands of readblocks will get called for any cab - // TODO: Since the file is certainly messed up in some way if this fails, should it be deleted in case it causes cascading issues for BOS? Probably not, but idk try { var db = new CFDATA(); @@ -335,7 +331,6 @@ private FileStream GetFileStream(string filename, string outputDirectory) } - // TODO: cab stepping, folder stepping (I think?), 0 size continued blocks, find something that triggers exact data size /// /// Extract the contents of a cabinet set /// @@ -372,7 +367,6 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore try { // Ensure folder contains data - // TODO: does this fail on spanned only folders or something? when would this happen if (folder.DataCount == 0) return false; @@ -383,11 +377,10 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore var fs = GetFileStream(file.Name, outputDirectory); - // TODO: what is this comment here for //uint quantumWindowBits = (uint)(((ushort)folder.CompressionType >> 8) & 0x1f); if (folder.CabStartOffset <= 0) - return false; // TODO: why is a CabStartOffset of 0 not acceptable? header? + return false; var tempCabinet = cabinet; int j = 0; @@ -396,7 +389,6 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore // Has to be a while loop instead of a for loop due to cab spanning continue blocks while (j < folder.DataCount) { - // TODO: since i need lock state to be maintained the whole loop, do i need to cache and reset position to be safe? lock (tempCabinet._dataSourceLock) { var db = ReadBlock(tempCabinet); @@ -410,7 +402,6 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore byte[] blockData = db.CompressedData; // If the block is continued, append - // TODO: this is specifically if and only if it's jumping between cabs on a spanned folder, I think? bool continuedBlock = false; if (db.UncompressedSize == 0) { @@ -422,7 +413,6 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore folder = tempCabinet.Folders[0]; lock (tempCabinet._dataSourceLock) { - // TODO: make sure this spans? tempCabinet._dataSource.SeekIfPossible(folder.CabStartOffset, SeekOrigin.Begin); var nextBlock = ReadBlock(tempCabinet); if (nextBlock == null) @@ -432,7 +422,7 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore } byte[] nextData = nextBlock.CompressedData; - if (nextData.Length == 0) // TODO: null cant happen, is it meant to be if it's empty? + if (nextData.Length == 0) continue; continuedBlock = true; @@ -456,7 +446,6 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore _ => [], }; - // TODO: will 0 byte files mess things up if (bytesLeft > 0 && bytesLeft >= data.Length) { fs.Write(data); @@ -475,7 +464,6 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore file = files[++fileCounter]; bytesLeft = (int)file.FileSize; fs = GetFileStream(file.Name, outputDirectory); - // TODO: can I deduplicate this? probably not since I need breaks while (bytesLeft < data.Length - tempBytesLeft) { fs.Write(data, tempBytesLeft, bytesLeft); @@ -494,7 +482,7 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore fs.Write(data, tempBytesLeft, data.Length - tempBytesLeft); bytesLeft -= (data.Length - tempBytesLeft); } - else // TODO: find something that can actually trigger this case + else { int tempBytesLeft = bytesLeft; fs.Close(); @@ -527,7 +515,6 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore // Top if block occurs on http://redump.org/disc/107833/ , middle on https://dbox.tools/titles/pc/57520FA0 , bottom still unobserved // While loop since this also handles 0 byte files. Example file seen in http://redump.org/disc/93312/ , cab Group17.cab, file TRACKSLOC6DYNTEX_BIN - // TODO: make sure that file is actually supposed to be 0 bytes. 7z also extracts it as 0 bytes, so it probably is, but it's good to make sure. while (bytesLeft == 0) { fs.Close(); @@ -541,7 +528,6 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore fs = GetFileStream(file.Name, outputDirectory); } - // TODO: do i ever need to flush before the end of the file? if (continuedBlock) j = 0; @@ -563,7 +549,6 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool ignore currentCabFilename = cabinet.Filename; - // TODO: already-extracted data isn't being cleared from memory, at least not nearly enough. if (cabinet.Folders.Length == 0) break; } From 68823b5008e14c29d67efdb56965b317bfe08e0b Mon Sep 17 00:00:00 2001 From: HeroponRikiBestest Date: Fri, 16 Jan 2026 14:37:43 -0500 Subject: [PATCH 17/17] Missed one --- SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index 0e89c62a..84991aa2 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -184,7 +184,6 @@ public bool Extract(string outputDirectory, bool includeDebug) cabinet = OpenSet(Filename, includeDebug); ignorePrev = true; - // TOOD: reenable after confirming rollback is good if (cabinet == null) return false;