Skip to content

Commit b91c017

Browse files
committed
refactors .NET parsing
1 parent 31cbaf8 commit b91c017

File tree

15 files changed

+1478
-1710
lines changed

15 files changed

+1478
-1710
lines changed

refinery/lib/dotnet/deserialize.py

Lines changed: 384 additions & 323 deletions
Large diffs are not rendered by default.

refinery/lib/dotnet/header.py

Lines changed: 914 additions & 745 deletions
Large diffs are not rendered by default.

refinery/lib/dotnet/resources.py

Lines changed: 128 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -6,160 +6,140 @@
66
"""
77
from __future__ import annotations
88

9+
import datetime
10+
import enum
911
import re
1012

11-
from refinery.lib.dotnet.deserialize import BinaryFormatterParser
12-
from refinery.lib.dotnet.types import (
13-
Blob,
14-
Box,
15-
Byte,
16-
Char,
17-
DateTime,
18-
Double,
19-
EncodedInteger,
20-
Int16,
21-
Int32,
22-
Int64,
23-
LengthPrefixedString,
24-
Null,
25-
SByte,
26-
Single,
27-
StreamReader,
28-
StringPrimitive,
29-
Struct,
30-
TimeSpan,
31-
UInt16,
32-
UInt32,
33-
UInt64,
34-
unpack,
13+
from refinery.lib.dotnet.deserialize import (
14+
BinaryFormatterParser,
15+
DotNetRsrcReader,
16+
DotNetStruct,
3517
)
18+
from refinery.lib.dotnet.header import DotNetStructReader
19+
from refinery.lib.id import buffer_contains
3620

3721

3822
class NoManagedResource(AssertionError):
3923
pass
4024

4125

42-
class String(LengthPrefixedString):
43-
def __init__(self, reader):
44-
LengthPrefixedString.__init__(self, reader, codec='UTF-8')
45-
46-
47-
class Boolean(Byte):
48-
@property
49-
def Value(self):
50-
return bool(super().Value)
51-
52-
53-
class Decimal(Blob):
54-
def __init__(self, reader):
55-
Blob.__init__(self, reader, 16)
56-
57-
@property
58-
def Value(self):
59-
# TODO: Unknown whether this is correct
60-
return int.from_bytes(self._data, 'big')
61-
62-
63-
class ByteArray(Struct):
64-
def parse(self):
65-
self.Size = self.expect(UInt32)
66-
self.Value = self._reader.read(self.Size)
67-
68-
def __bytes__(self):
69-
return self.Value
70-
71-
72-
class NetManifestResource(Struct):
73-
USERTYPES = 0x40
74-
PRIMITIVE = {
75-
0x00: Null,
76-
0x01: String,
77-
0x02: Boolean,
78-
0x03: Char,
79-
0x04: Byte,
80-
0x05: SByte,
81-
0x06: Int16,
82-
0x07: UInt16,
83-
0x08: Int32,
84-
0x09: UInt32,
85-
0x0A: Int64,
86-
0x0B: UInt64,
87-
0x0C: Single,
88-
0x0D: Double,
89-
0x0E: Decimal,
90-
0x0F: DateTime,
91-
0x10: TimeSpan,
92-
0x20: ByteArray,
93-
0x21: ByteArray,
94-
}
95-
96-
def parse(self):
97-
self.Signature = self.expect(UInt32)
26+
def stream(reader: DotNetStructReader):
27+
return BinaryFormatterParser(reader.read_length_prefixed())
28+
29+
30+
class RsrcPrimitive(enum.IntEnum):
31+
Null = 0x00 # noqa
32+
String = 0x01 # noqa
33+
Boolean = 0x02 # noqa
34+
Char = 0x03 # noqa
35+
Byte = 0x04 # noqa
36+
SByte = 0x05 # noqa
37+
Int16 = 0x06 # noqa
38+
UInt16 = 0x07 # noqa
39+
Int32 = 0x08 # noqa
40+
UInt32 = 0x09 # noqa
41+
Int64 = 0x0A # noqa
42+
UInt64 = 0x0B # noqa
43+
Single = 0x0C # noqa
44+
Double = 0x0D # noqa
45+
Decimal = 0x0E # noqa
46+
DateTime = 0x0F # noqa
47+
TimeSpan = 0x10 # noqa
48+
ByteArray = 0x20 # noqa
49+
Stream = 0x21 # noqa
50+
51+
52+
RsrcUserTypeBase = 0x40
53+
54+
55+
RsrcPrimitiveDispatch = {
56+
RsrcPrimitive.Null : DotNetStructReader.read_dn_null,
57+
RsrcPrimitive.Boolean : DotNetStructReader.read_bool_byte,
58+
RsrcPrimitive.Byte : DotNetStructReader.read_byte,
59+
RsrcPrimitive.Char : DotNetStructReader.read_char,
60+
RsrcPrimitive.Decimal : DotNetStructReader.read_dn_decimal,
61+
RsrcPrimitive.Single : DotNetStructReader.f32,
62+
RsrcPrimitive.Double : DotNetStructReader.f64,
63+
RsrcPrimitive.Int16 : DotNetStructReader.i16,
64+
RsrcPrimitive.Int32 : DotNetStructReader.i32,
65+
RsrcPrimitive.Int64 : DotNetStructReader.i64,
66+
RsrcPrimitive.SByte : DotNetStructReader.i8,
67+
RsrcPrimitive.TimeSpan : DotNetStructReader.read_dn_time_span,
68+
RsrcPrimitive.DateTime : DotNetStructReader.read_dn_date_time,
69+
RsrcPrimitive.UInt16 : DotNetStructReader.u16,
70+
RsrcPrimitive.UInt32 : DotNetStructReader.u32,
71+
RsrcPrimitive.UInt64 : DotNetStructReader.u64,
72+
RsrcPrimitive.String : DotNetStructReader.read_dn_length_prefixed_string,
73+
RsrcPrimitive.ByteArray : DotNetStructReader.read_length_prefixed,
74+
RsrcPrimitive.Stream : stream,
75+
}
76+
77+
78+
class NetResource(DotNetStruct):
79+
Value: int | str | bool | list | memoryview | datetime.datetime | datetime.timedelta | None
80+
Data: memoryview
81+
TypeName: str
82+
Error: str | None
83+
84+
def __init__(self, reader: DotNetStructReader, base: int):
85+
self.Name = reader.read_dn_length_prefixed_string(codec='utf-16le')
86+
self.Offset = reader.u32() + base
87+
self.Size = 0
88+
self.Error = None
89+
90+
91+
class NetManifestResource(DotNetStruct):
92+
93+
def __init__(self, reader: DotNetStructReader):
94+
self.Signature = reader.u32()
9895
if self.Signature != 0xBEEFCACE:
9996
raise NoManagedResource
97+
self.ReaderCount = reader.u32()
98+
self.ReaderTypeLength = reader.u32()
99+
tr = DotNetRsrcReader(reader.read_exactly(self.ReaderTypeLength))
100+
self.ReaderType = rt = tr.read_dn_string_primitive()
101+
self.ResourceSetType = tr.read_dn_string_primitive()
100102

101-
self.ReaderCount = self.expect(UInt32)
102-
self.ReaderTypeLength = self.expect(UInt32)
103-
104-
tr = StreamReader(self._reader.read(self.ReaderTypeLength))
105-
self.ReaderType = tr.expect(StringPrimitive)
106-
self.ResourceSetType = tr.expect(StringPrimitive)
107-
108-
if not re.match(r"^System\.Resources\.ResourceReader,\s*mscorlib", self.ReaderType):
103+
if not re.match(r"^System\.Resources\.ResourceReader,\s*mscorlib", rt):
109104
raise AssertionError('unknown resource reader')
110105

111-
self.Version = self.expect(UInt32)
112-
ResourceCount = self.expect(UInt32)
113-
RsrcTypeCount = self.expect(UInt32)
114-
115-
ResourceTypes = [
116-
self.expect(LengthPrefixedString)
117-
for _ in range(RsrcTypeCount)
118-
]
119-
120-
self._reader.align(8)
121-
self._reader.skip(4 * ResourceCount)
122-
123-
# Since we do not require the resouce hashes, we skip over them.
124-
# The following would be the code to read in the hashes:
125-
#
126-
# ResourceHashes = [
127-
# self.expect(UInt32)
128-
# for _ in range(ResourceCount)
129-
# ]
106+
self.Version = reader.u32()
107+
ResourceCount = reader.u32()
108+
RsrcTypeCount = reader.u32()
130109

131-
ResourceNameOffsets = [
132-
self.expect(UInt32)
133-
for _ in range(ResourceCount)
134-
]
110+
ResourceTypes = [reader.read_dn_length_prefixed_string()
111+
for _ in range(RsrcTypeCount)]
135112

136-
self.DataSectionOffset = self.expect(UInt32)
137-
138-
self.Resources = []
113+
reader.byte_align(8)
114+
self.ResourceHashes = [reader.u32() for _ in range(ResourceCount)]
115+
ResourceNameOffsets = [reader.u32() for _ in range(ResourceCount)]
116+
self.DataSectionOffset = base = reader.u32()
117+
rsrc: list[NetResource] = []
118+
self.Resources = rsrc
139119

140120
for k in range(ResourceCount):
141-
with self._reader.checkpoint():
142-
self._reader.skip(ResourceNameOffsets[k])
143-
Name = self.expect(LengthPrefixedString, codec='UTF-16LE')
144-
Offset = self.expect(UInt32) + self.DataSectionOffset
145-
self.Resources.append(Box(Offset=Offset, Name=Name))
146-
147-
self.Resources.sort(key=lambda r: r.Offset)
148-
self.Resources.append(Box(Offset=len(self._reader)))
149-
self.Resources = [
150-
Box(Size=b.Offset - a.Offset - 1, **a)
151-
for a, b in zip(self.Resources, self.Resources[1:])
152-
]
153-
154-
for Index, Entry in enumerate(self.Resources):
155-
156-
self._reader.seek(Entry.Offset)
157-
TypeCode = self.expect(EncodedInteger)
121+
with reader.detour():
122+
reader.skip(ResourceNameOffsets[k])
123+
rsrc.append(NetResource(reader, base))
124+
125+
if rsrc:
126+
rsrc.sort(key=lambda r: r.Offset)
127+
it = iter(rsrc)
128+
next(it)
129+
ends = [r.Offset for r in it]
130+
ends.append(len(reader))
131+
for r, end in zip(rsrc, ends):
132+
r.Size = end - r.Offset - 1
133+
134+
for Entry in rsrc:
135+
136+
reader.seek(Entry.Offset)
137+
TypeCode = reader.read_dn_encoded_integer()
158138
Entry.Error = None
159-
Entry.Value = Entry.Data = self._reader.read(Entry.Size)
139+
Entry.Value = Entry.Data = reader.read_exactly(Entry.Size)
160140

161-
if TypeCode >= self.USERTYPES:
162-
Entry.TypeName = ResourceTypes[TypeCode - self.USERTYPES]
141+
if TypeCode >= RsrcUserTypeBase:
142+
Entry.TypeName = ResourceTypes[TypeCode - RsrcUserTypeBase]
163143
try:
164144
Deserialized = BinaryFormatterParser(
165145
Entry.Data,
@@ -168,28 +148,28 @@ def parse(self):
168148
keep_meta=False
169149
)
170150
except Exception as error:
171-
Entry.Error = f'failed to deserialize entry data: {error}'
151+
Entry.Error = F'Failed to deserialize entry data: {error}'
172152
continue
173153
try:
174154
_, _, _, Data = Deserialized
175155
except ValueError:
176-
Entry.Error = f'deserialized entry has {len(Deserialized)} records, 4 were expected.'
156+
Entry.Error = F'Deserialized entry has {len(Deserialized)} records, 4 were expected.'
177157
continue
178-
if Data not in Entry.Data:
179-
Entry.Error = 'the computed entry value is not a substring of the entry data.'
158+
if not buffer_contains(Entry.Data, Data):
159+
Entry.Error = 'The computed entry value is not a substring of the entry data.'
180160
Entry.Value = Entry.Data
181161
else:
182162
Entry.Value = Data
183-
184-
elif TypeCode in self.PRIMITIVE:
185-
Type = self.PRIMITIVE[TypeCode]
186-
Entry.TypeName = repr(Type)
187-
package = StreamReader(Entry.Data).expect_with_meta(Type)
188-
Entry.Value = unpack(package)
189163
else:
190-
Entry.TypeName = f'UNKNOWN TYPE 0x{TypeCode:X}'
164+
try:
165+
Type = RsrcPrimitive(TypeCode)
166+
except ValueError:
167+
Entry.TypeName = F'UnknownType[{TypeCode:#x}]'
168+
else:
169+
Entry.TypeName = Type.name
170+
package = DotNetStructReader(Entry.Value)
171+
Entry.Value = RsrcPrimitiveDispatch[Type](package)
191172

192173

193-
class NetStructuredResources(list):
194-
def __init__(self, data):
195-
list.__init__(self, NetManifestResource(StreamReader(data)).Resources)
174+
def NetStructuredResources(data):
175+
return NetManifestResource(DotNetStructReader(memoryview(data))).Resources

0 commit comments

Comments
 (0)