@@ -63,6 +63,12 @@ fn write_offset(buf: &mut Vec<u8>, value: usize, nbytes: u8) {
63
63
buf. extend_from_slice ( & bytes[ ..nbytes as usize ] ) ;
64
64
}
65
65
66
+ /// Write little-endian integer to buffer at a specific position
67
+ fn write_offset_at_pos ( buf : & mut [ u8 ] , start_pos : usize , value : usize , nbytes : u8 ) {
68
+ let bytes = value. to_le_bytes ( ) ;
69
+ buf[ start_pos..start_pos + nbytes as usize ] . copy_from_slice ( & bytes[ ..nbytes as usize ] ) ;
70
+ }
71
+
66
72
/// Wrapper around a `Vec<u8>` that provides methods for appending
67
73
/// primitive values, variant types, and metadata.
68
74
///
@@ -342,6 +348,63 @@ impl ValueBuffer {
342
348
write_offset ( buf, data_size, nbytes) ;
343
349
}
344
350
}
351
+
352
+ /// Writes out the header byte for a variant object or list, from the starting position
353
+ /// of the buffer, will return the position after this write
354
+ fn append_header_start_from_buf_pos (
355
+ & mut self ,
356
+ start_pos : usize , // the start position where the header will be inserted
357
+ header_byte : u8 ,
358
+ is_large : bool ,
359
+ num_fields : usize ,
360
+ ) -> usize {
361
+ let buffer = self . inner_mut ( ) ;
362
+
363
+ // Write header at the original start position
364
+ let mut header_pos = start_pos;
365
+
366
+ // Write header byte
367
+ buffer[ header_pos] = header_byte;
368
+ header_pos += 1 ;
369
+
370
+ // Write number of fields
371
+ if is_large {
372
+ buffer[ header_pos..header_pos + 4 ] . copy_from_slice ( & ( num_fields as u32 ) . to_le_bytes ( ) ) ;
373
+ header_pos += 4 ;
374
+ } else {
375
+ buffer[ header_pos] = num_fields as u8 ;
376
+ header_pos += 1 ;
377
+ }
378
+
379
+ header_pos
380
+ }
381
+
382
+ /// Writes out the offsets for an array of offsets, including the final offset (data size).
383
+ /// from the starting position of the buffer, will return the position after this write
384
+ fn append_offset_array_start_from_buf_pos (
385
+ & mut self ,
386
+ start_pos : usize ,
387
+ offsets : impl IntoIterator < Item = usize > ,
388
+ data_size : Option < usize > ,
389
+ nbytes : u8 ,
390
+ ) -> usize {
391
+ let buf = self . inner_mut ( ) ;
392
+
393
+ let mut current_pos = start_pos;
394
+ for relative_offset in offsets {
395
+ write_offset_at_pos ( buf, current_pos, relative_offset, nbytes) ;
396
+ current_pos += nbytes as usize ;
397
+ }
398
+
399
+ // Write data_size
400
+ if let Some ( data_size) = data_size {
401
+ // Write data_size at the end of the offsets
402
+ write_offset_at_pos ( buf, current_pos, data_size, nbytes) ;
403
+ current_pos += nbytes as usize ;
404
+ }
405
+
406
+ current_pos
407
+ }
345
408
}
346
409
347
410
/// Builder for constructing metadata for [`Variant`] values.
@@ -506,7 +569,7 @@ enum ParentState<'a> {
506
569
metadata_builder : & ' a mut MetadataBuilder ,
507
570
fields : & ' a mut IndexMap < u32 , usize > ,
508
571
field_name : & ' a str ,
509
- object_start_offset : usize ,
572
+ parent_offset_base : usize ,
510
573
} ,
511
574
}
512
575
@@ -545,7 +608,7 @@ impl ParentState<'_> {
545
608
metadata_builder,
546
609
fields,
547
610
field_name,
548
- object_start_offset,
611
+ parent_offset_base : object_start_offset,
549
612
..
550
613
} => {
551
614
let field_id = metadata_builder. upsert_field_name ( field_name) ;
@@ -576,7 +639,7 @@ impl ParentState<'_> {
576
639
}
577
640
}
578
641
579
- // return the offset of the underlying buffer at the time of calling this method.
642
+ // Return the offset of the underlying buffer at the time of calling this method.
580
643
fn buffer_current_offset ( & self ) -> usize {
581
644
match self {
582
645
ParentState :: Variant { buffer, .. }
@@ -585,7 +648,7 @@ impl ParentState<'_> {
585
648
}
586
649
}
587
650
588
- // return the current index of the undelying metadata buffer at the time of calling this method.
651
+ // Return the current index of the undelying metadata buffer at the time of calling this method.
589
652
fn metadata_current_offset ( & self ) -> usize {
590
653
match self {
591
654
ParentState :: Variant {
@@ -1048,8 +1111,6 @@ impl<'a> ListBuilder<'a> {
1048
1111
1049
1112
// Get parent's buffer
1050
1113
let parent_buffer = self . parent_state . buffer ( ) ;
1051
- // as object builder has been reused the parent buffer,
1052
- // we need to shift the offset by the starting offset of the parent object
1053
1114
let starting_offset = parent_buffer. offset ( ) ;
1054
1115
1055
1116
// Write header
@@ -1078,12 +1139,12 @@ impl Drop for ListBuilder<'_> {
1078
1139
pub struct ObjectBuilder < ' a > {
1079
1140
parent_state : ParentState < ' a > ,
1080
1141
fields : IndexMap < u32 , usize > , // (field_id, offset)
1081
- /// the starting offset in the parent's buffer where this object starts
1082
- object_start_offset : usize ,
1083
- /// the starting offset in the parent's metadata buffer where this object starts
1142
+ /// The starting offset in the parent's buffer where this object starts
1143
+ parent_offset_base : usize ,
1144
+ /// The starting offset in the parent's metadata buffer where this object starts
1084
1145
/// used to truncate the written fields in `drop` if the current object has not been finished
1085
- object_meta_start_offset : usize ,
1086
- /// whether the object has been finished, the written content of the current object
1146
+ parent_metadata_offset_base : usize ,
1147
+ /// Whether the object has been finished, the written content of the current object
1087
1148
/// will be truncated in `drop` if `has_been_finished` is false
1088
1149
has_been_finished : bool ,
1089
1150
validate_unique_fields : bool ,
@@ -1093,14 +1154,14 @@ pub struct ObjectBuilder<'a> {
1093
1154
1094
1155
impl < ' a > ObjectBuilder < ' a > {
1095
1156
fn new ( parent_state : ParentState < ' a > , validate_unique_fields : bool ) -> Self {
1096
- let start_offset = parent_state. buffer_current_offset ( ) ;
1097
- let meta_start_offset = parent_state. metadata_current_offset ( ) ;
1157
+ let offset_base = parent_state. buffer_current_offset ( ) ;
1158
+ let meta_offset_base = parent_state. metadata_current_offset ( ) ;
1098
1159
Self {
1099
1160
parent_state,
1100
1161
fields : IndexMap :: new ( ) ,
1101
- object_start_offset : start_offset ,
1162
+ parent_offset_base : offset_base ,
1102
1163
has_been_finished : false ,
1103
- object_meta_start_offset : meta_start_offset ,
1164
+ parent_metadata_offset_base : meta_offset_base ,
1104
1165
validate_unique_fields,
1105
1166
duplicate_fields : HashSet :: new ( ) ,
1106
1167
}
@@ -1128,7 +1189,7 @@ impl<'a> ObjectBuilder<'a> {
1128
1189
let ( buffer, metadata_builder) = self . parent_state . buffer_and_metadata_builder ( ) ;
1129
1190
1130
1191
let field_id = metadata_builder. upsert_field_name ( key) ;
1131
- let field_start = buffer. offset ( ) - self . object_start_offset ;
1192
+ let field_start = buffer. offset ( ) - self . parent_offset_base ;
1132
1193
1133
1194
if self . fields . insert ( field_id, field_start) . is_some ( ) && self . validate_unique_fields {
1134
1195
self . duplicate_fields . insert ( field_id) ;
@@ -1158,7 +1219,7 @@ impl<'a> ObjectBuilder<'a> {
1158
1219
metadata_builder,
1159
1220
fields : & mut self . fields ,
1160
1221
field_name : key,
1161
- object_start_offset : self . object_start_offset ,
1222
+ parent_offset_base : self . parent_offset_base ,
1162
1223
} ;
1163
1224
( state, validate_unique_fields)
1164
1225
}
@@ -1207,14 +1268,14 @@ impl<'a> ObjectBuilder<'a> {
1207
1268
1208
1269
// the length of the metadata's field names is a very cheap to compute the upper bound.
1209
1270
// it will almost always be a tight upper bound as well -- it would take a pretty
1210
- // carefully crafted object to use only the early field ids of a large dictionary.
1271
+ // carefully crafted object to use only the early field ids of a large dictionary.
1211
1272
let max_id = metadata_builder. field_names . len ( ) ;
1212
1273
let id_size = int_size ( max_id) ;
1213
1274
1214
1275
let parent_buffer = self . parent_state . buffer ( ) ;
1215
1276
let current_offset = parent_buffer. offset ( ) ;
1216
- // current object starts from `object_start_offset`
1217
- let data_size = current_offset - self . object_start_offset ;
1277
+ // Current object starts from `object_start_offset`
1278
+ let data_size = current_offset - self . parent_offset_base ;
1218
1279
let offset_size = int_size ( data_size) ;
1219
1280
1220
1281
let num_fields = self . fields . len ( ) ;
@@ -1225,7 +1286,7 @@ impl<'a> ObjectBuilder<'a> {
1225
1286
( num_fields * id_size as usize ) + // field IDs
1226
1287
( ( num_fields + 1 ) * offset_size as usize ) ; // field offsets + data_size
1227
1288
1228
- let starting_offset = self . object_start_offset ;
1289
+ let starting_offset = self . parent_offset_base ;
1229
1290
1230
1291
// Shift existing data to make room for the header
1231
1292
let buffer = parent_buffer. inner_mut ( ) ;
@@ -1239,42 +1300,33 @@ impl<'a> ObjectBuilder<'a> {
1239
1300
1240
1301
// Write header byte
1241
1302
let header = object_header ( is_large, id_size, offset_size) ;
1242
- buffer[ header_pos] = header;
1243
- header_pos += 1 ;
1244
-
1245
- // Write number of fields
1246
- if is_large {
1247
- buffer[ header_pos..header_pos + 4 ] . copy_from_slice ( & ( num_fields as u32 ) . to_le_bytes ( ) ) ;
1248
- header_pos += 4 ;
1249
- } else {
1250
- buffer[ header_pos] = num_fields as u8 ;
1251
- header_pos += 1 ;
1252
- }
1253
-
1254
- // Write field IDs
1255
- for field_id in self . fields . keys ( ) {
1256
- let id_bytes = field_id. to_le_bytes ( ) ;
1257
- buffer[ header_pos..header_pos + id_size as usize ]
1258
- . copy_from_slice ( & id_bytes[ ..id_size as usize ] ) ;
1259
- header_pos += id_size as usize ;
1260
- }
1261
-
1262
- // Write field offsets (adjusted for header)
1263
- for relative_offset in self . fields . values ( ) {
1264
- let offset_bytes = relative_offset. to_le_bytes ( ) ;
1265
- buffer[ header_pos..header_pos + offset_size as usize ]
1266
- . copy_from_slice ( & offset_bytes[ ..offset_size as usize ] ) ;
1267
- header_pos += offset_size as usize ;
1268
- }
1269
-
1270
- // Write data_size
1271
- let data_size_bytes = data_size. to_le_bytes ( ) ;
1272
- buffer[ header_pos..header_pos + offset_size as usize ]
1273
- . copy_from_slice ( & data_size_bytes[ ..offset_size as usize ] ) ;
1274
1303
1304
+ header_pos = self
1305
+ . parent_state
1306
+ . buffer ( )
1307
+ . append_header_start_from_buf_pos ( header_pos, header, is_large, num_fields) ;
1308
+
1309
+ header_pos = self
1310
+ . parent_state
1311
+ . buffer ( )
1312
+ . append_offset_array_start_from_buf_pos (
1313
+ header_pos,
1314
+ self . fields . keys ( ) . copied ( ) . map ( |id| id as usize ) ,
1315
+ None ,
1316
+ id_size,
1317
+ ) ;
1318
+
1319
+ self . parent_state
1320
+ . buffer ( )
1321
+ . append_offset_array_start_from_buf_pos (
1322
+ header_pos,
1323
+ self . fields . values ( ) . copied ( ) ,
1324
+ Some ( data_size) ,
1325
+ offset_size,
1326
+ ) ;
1275
1327
self . parent_state . finish ( starting_offset) ;
1276
1328
1277
- // mark that this object has been finished
1329
+ // Mark that this object has been finished
1278
1330
self . has_been_finished = true ;
1279
1331
1280
1332
Ok ( ( ) )
@@ -1287,17 +1339,17 @@ impl<'a> ObjectBuilder<'a> {
1287
1339
/// is finalized.
1288
1340
impl Drop for ObjectBuilder < ' _ > {
1289
1341
fn drop ( & mut self ) {
1290
- // truncate the buffer if the `finish` method has not been called.
1342
+ // Truncate the buffer if the `finish` method has not been called.
1291
1343
if !self . has_been_finished {
1292
1344
self . parent_state
1293
1345
. buffer ( )
1294
1346
. inner_mut ( )
1295
- . truncate ( self . object_start_offset ) ;
1347
+ . truncate ( self . parent_offset_base ) ;
1296
1348
1297
1349
self . parent_state
1298
1350
. metadata_builder ( )
1299
1351
. field_names
1300
- . truncate ( self . object_meta_start_offset ) ;
1352
+ . truncate ( self . parent_metadata_offset_base ) ;
1301
1353
}
1302
1354
}
1303
1355
}
@@ -2078,7 +2130,7 @@ mod tests {
2078
2130
assert_eq ! ( Variant :: from( false ) , second_inner_list_g. get( 1 ) . unwrap( ) ) ;
2079
2131
}
2080
2132
2081
- // this test wants to cover the logic for reuse parent buffer for list builder
2133
+ // This test wants to cover the logic for reuse parent buffer for list builder
2082
2134
// the builder looks like
2083
2135
// [ "apple", "false", [{"a": "b", "b": "c"}, {"c":"d", "d":"e"}], [[1, true], ["tree", false]], 1]
2084
2136
#[ test]
@@ -2148,12 +2200,12 @@ mod tests {
2148
2200
2149
2201
assert_eq ! ( 5 , outer_list. len( ) ) ;
2150
2202
2151
- // primitive value
2203
+ // Primitive value
2152
2204
assert_eq ! ( Variant :: from( "apple" ) , outer_list. get( 0 ) . unwrap( ) ) ;
2153
2205
assert_eq ! ( Variant :: from( false ) , outer_list. get( 1 ) . unwrap( ) ) ;
2154
2206
assert_eq ! ( Variant :: from( 1 ) , outer_list. get( 4 ) . unwrap( ) ) ;
2155
2207
2156
- // the first inner list [{"a": "b", "b": "c"}, {"c":"d", "d":"e"}]
2208
+ // The first inner list [{"a": "b", "b": "c"}, {"c":"d", "d":"e"}]
2157
2209
let list1_variant = outer_list. get ( 2 ) . unwrap ( ) ;
2158
2210
let list1 = list1_variant. as_list ( ) . unwrap ( ) ;
2159
2211
assert_eq ! ( 2 , list1. len( ) ) ;
@@ -2166,19 +2218,19 @@ mod tests {
2166
2218
assert_eq ! ( "b" , list1_obj1. field_name( 1 ) . unwrap( ) ) ;
2167
2219
assert_eq ! ( Variant :: from( "c" ) , list1_obj1. field( 1 ) . unwrap( ) ) ;
2168
2220
2169
- // the second inner list [[1, true], ["tree", false]]
2221
+ // The second inner list [[1, true], ["tree", false]]
2170
2222
let list2_variant = outer_list. get ( 3 ) . unwrap ( ) ;
2171
2223
let list2 = list2_variant. as_list ( ) . unwrap ( ) ;
2172
2224
assert_eq ! ( 2 , list2. len( ) ) ;
2173
2225
2174
- // the list [1, true]
2226
+ // The list [1, true]
2175
2227
let list2_list1_variant = list2. get ( 0 ) . unwrap ( ) ;
2176
2228
let list2_list1 = list2_list1_variant. as_list ( ) . unwrap ( ) ;
2177
2229
assert_eq ! ( 2 , list2_list1. len( ) ) ;
2178
2230
assert_eq ! ( Variant :: from( 1 ) , list2_list1. get( 0 ) . unwrap( ) ) ;
2179
2231
assert_eq ! ( Variant :: from( true ) , list2_list1. get( 1 ) . unwrap( ) ) ;
2180
2232
2181
- // the list ["true", false]
2233
+ // The list ["true", false]
2182
2234
let list2_list2_variant = list2. get ( 1 ) . unwrap ( ) ;
2183
2235
let list2_list2 = list2_list2_variant. as_list ( ) . unwrap ( ) ;
2184
2236
assert_eq ! ( 2 , list2_list2. len( ) ) ;
@@ -2673,8 +2725,8 @@ mod tests {
2673
2725
// Only the second attempt should appear in the final variant
2674
2726
let ( metadata, value) = builder. finish ( ) ;
2675
2727
let metadata = VariantMetadata :: try_new ( & metadata) . unwrap ( ) ;
2676
- assert_eq ! ( metadata. len( ) , 1 ) ; // rolled back
2677
- assert_eq ! ( & metadata[ 0 ] , "name" ) ;
2728
+ assert_eq ! ( metadata. len( ) , 1 ) ;
2729
+ assert_eq ! ( & metadata[ 0 ] , "name" ) ; // not rolled back
2678
2730
2679
2731
let variant = Variant :: try_new_with_metadata ( metadata, & value) . unwrap ( ) ;
2680
2732
assert_eq ! ( variant, Variant :: Int8 ( 2 ) ) ;
0 commit comments