Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@ dependencies = [
"jefferson>=0.4.5",
"lark>=1.1.8",
"lief>=0.16.1",
"lz4>=4.3.2,!=4.4.3", # 4.4.3 doesn't have aarch64 wheels https://github.com/python-lz4/python-lz4/pull/298
"lz4>=4.3.2,!=4.4.3", # 4.4.3 doesn't have aarch64 wheels https://github.com/python-lz4/python-lz4/pull/298
"plotext>=4.2.0,<6.0",
"pluggy>=1.3.0",
"pyfatfs>=1.0.5",
"pymdown-extensions>=10.15",
"pyperscan>=0.3.0",
"python-magic>=0.4.27",
"python-msi>=0.0.0a2",
"pyzstd",
"rarfile>=4.1",
"rich>=13.3.5",
Expand Down
2 changes: 2 additions & 0 deletions python/unblob/handlers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
cab,
cpio,
dmg,
msi,
partclone,
rar,
sevenzip,
Expand Down Expand Up @@ -88,6 +89,7 @@
arc.ARCHandler,
arj.ARJHandler,
cab.CABHandler,
msi.MsiHandler,
tar.TarUstarHandler,
tar.TarUnixHandler,
cpio.PortableASCIIHandler,
Expand Down
65 changes: 65 additions & 0 deletions python/unblob/handlers/archive/msi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""MSI Handler

Extracts uses 7z for now. Could migrate to a fully Python-based implementation:

https://github.com/nightlark/pymsi
"""

from typing import Optional
import io

import pymsi
from structlog import get_logger

from unblob.extractors import Command

from ...models import (
File,
Handler,
HandlerDoc,
HandlerType,
HexString,
Reference,
ValidChunk,
)

logger = get_logger()


class MsiHandler(Handler):
NAME = "msi"

PATTERNS = [
HexString("D0 CF 11 E0 A1 B1 1A E1")
]
EXTRACTOR = Command("7z", "x", "-p", "-y", "{inpath}", "-o{outdir}")

DOC = HandlerDoc(
name="MSI",
description="Microsoft Installer (MSI) files are used for the installation, maintenance, and removal of software.",
handler_type=HandlerType.ARCHIVE,
vendor="Microsoft",
references=[
Reference(
title="MSI File Format Documentation",
url="https://docs.microsoft.com/en-us/windows/win32/msi/overview-of-windows-installer",
)
],
limitations=[],
)

def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
file.seek(start_offset, io.SEEK_SET)

package = pymsi.Package(file)
msi = pymsi.Msi(package, False)

# multiply the number of sectors by the sector size, plus 512 for header
msi_size = (msi.package.ole.nb_sect * msi.package.ole.sector_size) + 512

msi_end_offset = start_offset + msi_size

return ValidChunk(
start_offset = start_offset,
end_offset = msi_end_offset,
)
4 changes: 3 additions & 1 deletion python/unblob/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@
DEFAULT_PROCESS_NUM = multiprocessing.cpu_count()
DEFAULT_SKIP_MAGIC = (
"BFLT",
"Composite Document File V2 Document",
# TODO: Need to disable this for MSI but does it need to be enabled for
# other types of Composite Documents?
#"Composite Document File V2 Document",
"Erlang BEAM file",
"GIF",
"GNU message catalog",
Expand Down
23 changes: 23 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.