@@ -217,6 +217,9 @@ pub struct RowGroupMetaData {
217
217
num_rows : i64 ,
218
218
total_byte_size : i64 ,
219
219
schema_descr : SchemaDescPtr ,
220
+ /// Cube: We'll roundtrip file_offset from thrift, but we always originate it as None. It was
221
+ /// not present in the older Parquet RowGroupMetaData definition.
222
+ file_offset : Option < i64 > ,
220
223
/// Ordinal position of this row group in file
221
224
ordinal : Option < i16 > ,
222
225
}
@@ -280,37 +283,25 @@ impl RowGroupMetaData {
280
283
let cc = ColumnChunkMetaData :: from_thrift ( d. clone ( ) , c) ?;
281
284
columns. push ( cc) ;
282
285
}
283
- // Notably, the function to_thrift, below, doesn't write these fields, and RowGroupMetadata doesn't have them.
284
- if rg. file_offset . is_some ( ) {
285
- return Err ( ParquetError :: NYI (
286
- "Parsing RowGroup file_offset fields is not yet implemented" . to_string ( ) ,
287
- ) ) ;
288
- }
289
- if rg. total_compressed_size . is_some ( ) {
290
- return Err ( ParquetError :: NYI (
291
- "Parsing RowGroup total_compressed_size fields is not yet implemented"
292
- . to_string ( ) ,
293
- ) ) ;
294
- }
295
286
Ok ( RowGroupMetaData {
296
287
columns,
297
288
num_rows,
298
289
total_byte_size,
290
+ file_offset : rg. file_offset ,
299
291
schema_descr,
300
292
ordinal : rg. ordinal ,
301
293
} )
302
294
}
303
295
304
296
/// Method to convert to Thrift.
305
297
pub fn to_thrift ( & self ) -> RowGroup {
306
- // TODO: Understand file_offset and total_compressed_size fields.
307
298
RowGroup {
308
299
columns : self . columns ( ) . iter ( ) . map ( |v| v. to_thrift ( ) ) . collect ( ) ,
309
300
total_byte_size : self . total_byte_size ,
310
301
num_rows : self . num_rows ,
311
302
sorting_columns : None ,
312
- file_offset : None ,
313
- total_compressed_size : None ,
303
+ file_offset : self . file_offset ,
304
+ total_compressed_size : Some ( self . compressed_size ( ) ) ,
314
305
ordinal : self . ordinal ,
315
306
}
316
307
}
@@ -375,6 +366,8 @@ impl RowGroupMetaDataBuilder {
375
366
columns : self . columns ,
376
367
num_rows : self . num_rows ,
377
368
total_byte_size : self . total_byte_size ,
369
+ // Cube: Here is where we originate the None value for the optionally supplied value file_offset field.
370
+ file_offset : None ,
378
371
schema_descr : self . schema_descr ,
379
372
ordinal : self . ordinal ,
380
373
} )
0 commit comments