Skip to content

Commit a695f43

Browse files
committed
Support for loading mxOPAQUE_CLASS objects in v7.3 HDF5 format
* MAT_HDF5.jl (matopen): New argument Endian indicator, Reads and parses subsystem on load * MAT_HDF5.jl (close): Update to write endian header based on system endianness * MAT_HDF5.jl (m_read::HDF5.Dataset): Update to handle MATLAB_object_decode (mxOPAQUE_CLASS) types * MAT_HDF5.jl (m_read::HDF5.Group): Update to read subsystem data and function_handles * MAT.jl (matopen): Update function calls Subsystem Improvements * MAT_subsys.jl: Add copyright notice * MAT_subsys.jl: Add support for handle class objects * MAT_subsys.jl (load_subsystem): Update to handle different FileWrapper versions * MAT_subsys.jl: Add support to decode nested objects Tests * test/read.jl: Update tests for "function_handles.mat" and "struct_table_datetime.mat"
1 parent 89572a7 commit a695f43

File tree

4 files changed

+162
-47
lines changed

4 files changed

+162
-47
lines changed

src/MAT.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ function matopen(filename::AbstractString, rd::Bool, wr::Bool, cr::Bool, tr::Boo
4141
# When creating new files, create as HDF5 by default
4242
fs = filesize(filename)
4343
if cr && (tr || fs == 0)
44-
return MAT_HDF5.matopen(filename, rd, wr, cr, tr, ff, compress)
44+
return MAT_HDF5.matopen(filename, rd, wr, cr, tr, ff, compress, Base.ENDIAN_BOM == 0x04030201)
4545
elseif fs == 0
4646
error("File \"$filename\" does not exist and create was not specified")
4747
end
@@ -77,7 +77,7 @@ function matopen(filename::AbstractString, rd::Bool, wr::Bool, cr::Bool, tr::Boo
7777
seek(rawfid, offset)
7878
if read!(rawfid, Vector{UInt8}(undef, 8)) == HDF5_HEADER
7979
close(rawfid)
80-
return MAT_HDF5.matopen(filename, rd, wr, cr, tr, ff, compress)
80+
return MAT_HDF5.matopen(filename, rd, wr, cr, tr, ff, compress, endian_indicator == 0x494D)
8181
end
8282
end
8383

src/MAT_HDF5.jl

Lines changed: 64 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
module MAT_HDF5
3030

3131
using HDF5, SparseArrays
32+
using ..MAT_subsys
3233

3334
import Base: names, read, write, close
3435
import HDF5: Reference
@@ -69,8 +70,13 @@ function close(f::MatlabHDF5File)
6970
unsafe_copyto!(magicptr, idptr, length(identifier))
7071
end
7172
magic[126] = 0x02
72-
magic[127] = 0x49
73-
magic[128] = 0x4d
73+
if Base.ENDIAN_BOM == 0x04030201
74+
magic[127] = 0x49
75+
magic[128] = 0x4d
76+
else
77+
magic[127] = 0x4d
78+
magic[128] = 0x49
79+
end
7480
rawfid = open(f.plain.filename, "r+")
7581
write(rawfid, magic)
7682
close(rawfid)
@@ -80,7 +86,7 @@ function close(f::MatlabHDF5File)
8086
nothing
8187
end
8288

83-
function matopen(filename::AbstractString, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::Bool, compress::Bool)
89+
function matopen(filename::AbstractString, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::Bool, compress::Bool, endian_indicator::Bool)
8490
local f
8591
if ff && !wr
8692
error("Cannot append to a read-only file")
@@ -109,6 +115,11 @@ function matopen(filename::AbstractString, rd::Bool, wr::Bool, cr::Bool, tr::Boo
109115
fid.refcounter = length(g)-1
110116
close(g)
111117
end
118+
subsys_refs = "#subsystem#"
119+
if haskey(fid.plain, subsys_refs)
120+
subsys_data = m_read(fid.plain[subsys_refs])
121+
MAT_subsys.load_subsys!(subsys_data, endian_indicator)
122+
end
112123
fid
113124
end
114125

@@ -118,6 +129,7 @@ const name_type_attr_matlab = "MATLAB_class"
118129
const empty_attr_matlab = "MATLAB_empty"
119130
const sparse_attr_matlab = "MATLAB_sparse"
120131
const int_decode_attr_matlab = "MATLAB_int_decode"
132+
const object_type_attr_matlab = "MATLAB_object_decode"
121133

122134
### Reading
123135
function read_complex(dtype::HDF5.Datatype, dset::HDF5.Dataset, ::Type{T}) where T
@@ -128,6 +140,21 @@ function read_complex(dtype::HDF5.Datatype, dset::HDF5.Dataset, ::Type{T}) where
128140
return read(dset, Complex{T})
129141
end
130142

143+
function read_cell(dset::HDF5.Dataset)
144+
refs = read(dset, Reference)
145+
out = Array{Any}(undef, size(refs))
146+
f = HDF5.file(dset)
147+
for i = 1:length(refs)
148+
dset = f[refs[i]]
149+
try
150+
out[i] = m_read(dset)
151+
finally
152+
close(dset)
153+
end
154+
end
155+
return out
156+
end
157+
131158
function m_read(dset::HDF5.Dataset)
132159
if haskey(dset, empty_attr_matlab)
133160
# Empty arrays encode the dimensions as the dataset
@@ -150,36 +177,46 @@ function m_read(dset::HDF5.Dataset)
150177
end
151178

152179
mattype = haskey(dset, name_type_attr_matlab) ? read_attribute(dset, name_type_attr_matlab) : "cell"
180+
objecttype = haskey(dset, object_type_attr_matlab) ? read_attribute(dset, object_type_attr_matlab) : nothing
153181

154-
if mattype == "cell"
182+
if mattype == "cell" && objecttype === nothing
155183
# Cell arrays, represented as an array of refs
156-
refs = read(dset, Reference)
157-
out = Array{Any}(undef, size(refs))
158-
f = HDF5.file(dset)
159-
for i = 1:length(refs)
160-
dset = f[refs[i]]
161-
try
162-
out[i] = m_read(dset)
163-
finally
164-
close(dset)
165-
end
184+
return read_cell(dset)
185+
elseif objecttype !== nothing
186+
if objecttype != 3
187+
@warn "MATLAB Object Type $mattype is currently not supported."
188+
return missing
189+
end
190+
if mattype == "FileWrapper__"
191+
return read_cell(dset)
192+
end
193+
if haskey(dset, "MATLAB_fields")
194+
@warn "Enumeration Instances are not supported currently."
195+
return missing
166196
end
167-
return out
168197
elseif !haskey(str2type_matlab,mattype)
169-
@warn "MATLAB $mattype values are currently not supported"
198+
@warn "MATLAB $mattype values are currently not supported."
170199
return missing
171200
end
172201

173202
# Regular arrays of values
174203
# Convert to Julia type
175-
T = str2type_matlab[mattype]
204+
if objecttype === nothing
205+
T = str2type_matlab[mattype]
206+
else
207+
T = UInt32 # FIXME: Default for MATLAB objects?
208+
end
176209

177210
# Check for a COMPOUND data set, and if so handle complex numbers specially
178211
dtype = datatype(dset)
179212
try
180213
class_id = HDF5.API.h5t_get_class(dtype.id)
181214
d = class_id == HDF5.API.H5T_COMPOUND ? read_complex(dtype, dset, T) : read(dset, T)
182-
length(d) == 1 ? d[1] : d
215+
if objecttype !== nothing
216+
return MAT_subsys.load_mcos_object(d, "MCOS")
217+
else
218+
return length(d) == 1 ? d[1] : d
219+
end
183220
finally
184221
close(dtype)
185222
end
@@ -194,7 +231,11 @@ end
194231

195232
# reading a struct, struct array, or sparse matrix
196233
function m_read(g::HDF5.Group)
197-
mattype = read_attribute(g, name_type_attr_matlab)
234+
if HDF5.name(g) == "/#subsystem#"
235+
mattype = "#subsystem#"
236+
else
237+
mattype = read_attribute(g, name_type_attr_matlab)
238+
end
198239
if mattype != "struct"
199240
# Check if this is a sparse matrix.
200241
fn = keys(g)
@@ -226,10 +267,11 @@ function m_read(g::HDF5.Group)
226267
end
227268
return SparseMatrixCSC(convert(Int, read_attribute(g, sparse_attr_matlab)), length(jc)-1, jc, ir, data)
228269
elseif mattype == "function_handle"
229-
@warn "MATLAB $mattype values are currently not supported"
230-
return missing
270+
# Fall through
231271
else
232-
@warn "Unknown non-struct group of type $mattype detected; attempting to read as struct"
272+
if mattype != "#subsystem#"
273+
@warn "Unknown non-struct group of type $mattype detected; attempting to read as struct"
274+
end
233275
end
234276
end
235277
if haskey(g, "MATLAB_fields")

src/MAT_subsys.jl

Lines changed: 90 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,27 @@
1+
# MAT_subsys.jl
2+
# Tools for processing MAT-file subsystem data in Julia
3+
#
4+
# Copyright (C) 2025 Nithin Lakshmisha
5+
#
6+
# Permission is hereby granted, free of charge, to any person obtaining
7+
# a copy of this software and associated documentation files (the
8+
# "Software"), to deal in the Software without restriction, including
9+
# without limitation the rights to use, copy, modify, merge, publish,
10+
# distribute, sublicense, and/or sell copies of the Software, and to
11+
# permit persons to whom the Software is furnished to do so, subject to
12+
# the following conditions:
13+
#
14+
# The above copyright notice and this permission notice shall be
15+
# included in all copies or substantial portions of the Software.
16+
#
17+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24+
125
module MAT_subsys
226

327
const FWRAP_VERSION = 4
@@ -39,27 +63,34 @@ mutable struct Subsys
3963
end
4064

4165
const subsys_cache = Ref{Union{Nothing,Subsys}}(nothing)
66+
const object_cache = Ref{Union{Nothing, Dict{UInt32, Dict{String,Any}}}}(nothing)
4267

4368
function clear_subsys!()
4469
subsys_cache[] = nothing
70+
object_cache[] = nothing
4571
end
4672

4773
function load_subsys!(subsystem_data::Dict{String,Any}, swap_bytes::Bool)
4874
subsys_cache[] = Subsys()
75+
object_cache[] = Dict{UInt32, Dict{String,Any}}()
4976
subsys_cache[].handle_data = get(subsystem_data, "handle", nothing)
5077
subsys_cache[].java_data = get(subsystem_data, "java", nothing)
5178
mcos_data = get(subsystem_data, "MCOS", nothing)
5279
if mcos_data === nothing
5380
return
5481
end
5582

56-
fwrap_metadata = vec(mcos_data[2][1, 1])
83+
if mcos_data isa Tuple
84+
# Backward compatibility with MAT_v5
85+
mcos_data = mcos_data[2]
86+
end
87+
fwrap_metadata = vec(mcos_data[1, 1])
5788

5889
# FIXME: Is this the best way to read?
5990
# Integers are written as uint8 (with swap), interpret as uint32
6091
version = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[1:4]) : fwrap_metadata[1:4])[1]
61-
if version > FWRAP_VERSION
62-
error("Unsupported FileWrapper version: $version")
92+
if version <= 1 || version > FWRAP_VERSION
93+
error("Cannot read subsystem: Unsupported FileWrapper version: $version")
6394
end
6495

6596
subsys_cache[].num_names = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[5:8]) : fwrap_metadata[5:8])[1]
@@ -86,18 +117,26 @@ function load_subsys!(subsystem_data::Dict{String,Any}, swap_bytes::Bool)
86117
subsys_cache[].object_id_metadata = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[region_offsets[3]+1:region_offsets[4]]) : fwrap_metadata[region_offsets[3]+1:region_offsets[4]])
87118
subsys_cache[].obj_prop_metadata = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[region_offsets[4]+1:region_offsets[5]]) : fwrap_metadata[region_offsets[4]+1:region_offsets[5]])
88119
subsys_cache[].dynprop_metadata = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[region_offsets[5]+1:region_offsets[6]]) : fwrap_metadata[region_offsets[5]+1:region_offsets[6]])
89-
subsys_cache[]._u6_metadata = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[region_offsets[6]+1:region_offsets[7]]) : fwrap_metadata[region_offsets[6]+1:region_offsets[7]])
90-
subsys_cache[]._u7_metadata = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[region_offsets[7]+1:region_offsets[8]]) : fwrap_metadata[region_offsets[7]+1:region_offsets[8]])
91120

92-
if version < 4
93-
subsys_cache[].prop_vals_saved = mcos_data[2][3:end-2, 1]
121+
if region_offsets[6] != 0
122+
subsys_cache[]._u6_metadata = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[region_offsets[6]+1:region_offsets[7]]) : fwrap_metadata[region_offsets[6]+1:region_offsets[7]])
123+
end
124+
125+
if region_offsets[7] != 0
126+
subsys_cache[]._u7_metadata = reinterpret(UInt32, swap_bytes ? reverse(fwrap_metadata[region_offsets[7]+1:region_offsets[8]]) : fwrap_metadata[region_offsets[7]+1:region_offsets[8]])
127+
end
128+
129+
if version == 2
130+
subsys_cache[].prop_vals_saved = mcos_data[3:end-1, 1]
131+
elseif version == 3
132+
subsys_cache[].prop_vals_saved = mcos_data[3:end-2, 1]
133+
subsys_cache[]._c2 = mcos_data[end-1, 1]
94134
else
95-
subsys_cache[].prop_vals_saved = mcos_data[2][3:end-3, 1]
96-
subsys_cache[]._c3 = mcos_data[2][end-2, 1]
135+
subsys_cache[].prop_vals_saved = mcos_data[3:end-3, 1]
136+
subsys_cache[]._c3 = mcos_data[end-2, 1]
97137
end
98138

99-
subsys_cache[]._c2 = mcos_data[2][end-1, 1]
100-
subsys_cache[].prop_vals_defaults = mcos_data[2][end, 1]
139+
subsys_cache[].prop_vals_defaults = mcos_data[end, 1]
101140
end
102141

103142
function get_classname(class_id::UInt32)
@@ -107,10 +146,10 @@ function get_classname(class_id::UInt32)
107146
namespace = if namespace_idx == 0
108147
""
109148
else
110-
subsys_cache[].mcos_names[namespace_idx-1] * "."
149+
subsys_cache[].mcos_names[namespace_idx] * "."
111150
end
112151

113-
classname = namespace * subsys_cache[].mcos_names[classname_idx-1]
152+
classname = namespace * subsys_cache[].mcos_names[classname_idx]
114153
return classname
115154
end
116155

@@ -138,6 +177,33 @@ function get_property_idxs(obj_type_id::UInt32, saveobj_ret_type::Bool)
138177
return prop_field_idxs[offset:offset+nprops*nfields-1]
139178
end
140179

180+
function find_nested_prop(prop_value::Any)
181+
# Hacky way to find a nested object
182+
# Nested objects are stored as a uint32 Matrix with a unique signature
183+
# MATLAB probably uses some kind of placeholders to decode
184+
# But this should work here
185+
if prop_value isa Dict
186+
# Handle nested objects in a dictionary (struct)
187+
for (key, value) in prop_value
188+
prop_value[key] = find_nested_prop(value)
189+
end
190+
end
191+
192+
if prop_value isa Matrix{Any}
193+
# Handle nested objects in a Cell
194+
for i in eachindex(prop_value)
195+
prop_value[i] = find_nested_prop(prop_value[i])
196+
end
197+
end
198+
199+
if prop_value isa Matrix{UInt32} && prop_value[1,1] == 0xdd000000
200+
# MATLAB identifies any uint32 array with first value 0xdd000000 as an MCOS object
201+
return load_mcos_object(prop_value, "MCOS")
202+
end
203+
204+
return prop_value
205+
end
206+
141207
function get_saved_properties(obj_type_id::UInt32, saveobj_ret_type::Bool)
142208
save_prop_map = Dict{String,Any}()
143209
prop_field_idxs = get_property_idxs(obj_type_id, saveobj_ret_type)
@@ -148,14 +214,13 @@ function get_saved_properties(obj_type_id::UInt32, saveobj_ret_type::Bool)
148214
if prop_type == 0
149215
prop_value = subsys_cache[].mcos_names[prop_field_idxs[i*3+3]]
150216
elseif prop_type == 1
151-
# FIXME: Search for nested objects
152217
prop_value = subsys_cache[].prop_vals_saved[prop_field_idxs[i*3+3]+1]
153218
elseif prop_type == 2
154219
prop_value = prop_field_idxs[i*3+3]
155220
else
156221
error("Unknown property type ID: $prop_type encountered during deserialization")
157222
end
158-
save_prop_map[prop_name] = prop_value
223+
save_prop_map[prop_name] = find_nested_prop(prop_value)
159224
end
160225
return save_prop_map
161226
end
@@ -181,8 +246,6 @@ function get_properties(object_id::UInt32)
181246
end
182247

183248
function load_mcos_object(metadata::Any, type_name::String)
184-
# TODO: Add support for handle class objects
185-
186249
if type_name != "MCOS"
187250
@warn "Loading Type:$type_name is not implemented. Returning metadata."
188251
return metadata
@@ -213,9 +276,17 @@ function load_mcos_object(metadata::Any, type_name::String)
213276
classname = get_classname(class_id)
214277

215278
object_arr = Array{Dict{String,Any}}(undef, convert(Vector{Int}, dims)...)
279+
216280
for i = 1:length(object_arr)
217-
prop_dict = get_properties(object_ids[i])
218-
prop_dict["__class__"] = classname
281+
oid = object_ids[i]
282+
if haskey(object_cache[], oid)
283+
prop_dict = object_cache[][oid]
284+
else
285+
prop_dict = Dict{String,Any}()
286+
object_cache[][oid] = prop_dict
287+
merge!(prop_dict, get_properties(oid))
288+
prop_dict["__class__"] = classname
289+
end
219290
object_arr[i] = prop_dict
220291
end
221292

test/read.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,16 +219,18 @@ end
219219
let objtestfile = "function_handles.mat"
220220
vars = matread(joinpath(dirname(@__FILE__), "v7.3", objtestfile))
221221
@test "sin" in keys(vars)
222-
@test ismissing(vars["sin"])
222+
@test typeof(vars["sin"]) == Dict{String, Any}
223+
@test Set(keys(vars["sin"])) == Set(["function_handle", "sentinel", "separator", "matlabroot"])
223224
@test "anonymous" in keys(vars)
224-
@test ismissing(vars["anonymous"])
225+
@test typeof(vars["anonymous"]) == Dict{String, Any}
226+
@test Set(keys(vars["anonymous"])) == Set(["function_handle", "sentinel", "separator", "matlabroot"])
225227
end
226228
let objtestfile = "struct_table_datetime.mat"
227229
vars = matread(joinpath(dirname(@__FILE__), "v7.3", objtestfile))["s"]
228230
@test "testTable" in keys(vars)
229-
@test ismissing(vars["testTable"])
231+
@test vars["testTable"][1, 1]["__class__"] == "table"
230232
@test "testDatetime" in keys(vars)
231-
@test ismissing(vars["testDatetime"])
233+
@test vars["testDatetime"][1, 1]["__class__"] == "datetime"
232234
end
233235

234236
# test reading of old-style Matlab object in v7.3 format

0 commit comments

Comments
 (0)