@@ -243,6 +243,7 @@ void ColumnReader::InitializeRead(idx_t row_group_idx_p, const vector<ColumnChun
243243void ColumnReader::PrepareRead (parquet_filter_t &filter) {
244244 dict_decoder.reset ();
245245 defined_decoder.reset ();
246+ bss_decoder.reset ();
246247 block.reset ();
247248 PageHeader page_hdr;
248249 page_hdr.read (protocol);
@@ -443,6 +444,13 @@ void ColumnReader::PrepareDataPage(PageHeader &page_hdr) {
443444 PrepareDeltaByteArray (*block);
444445 break ;
445446 }
447+ case Encoding::BYTE_STREAM_SPLIT: {
448+ // Subtract 1 from length as the block is allocated with 1 extra byte,
449+ // but the byte stream split encoder needs to know the correct data size.
450+ bss_decoder = make_uniq<BssDecoder>(block->ptr , block->len - 1 );
451+ block->inc (block->len );
452+ break ;
453+ }
446454 case Encoding::PLAIN:
447455 // nothing to do here, will be read directly below
448456 break ;
@@ -488,7 +496,7 @@ idx_t ColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data_ptr
488496
489497 idx_t null_count = 0 ;
490498
491- if ((dict_decoder || dbp_decoder || rle_decoder) && HasDefines ()) {
499+ if ((dict_decoder || dbp_decoder || rle_decoder || bss_decoder ) && HasDefines ()) {
492500 // we need the null count because the dictionary offsets have no entries for nulls
493501 for (idx_t i = 0 ; i < read_now; i++) {
494502 if (define_out[i + result_offset] != max_define) {
@@ -534,6 +542,23 @@ idx_t ColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data_ptr
534542 } else if (byte_array_data) {
535543 // DELTA_BYTE_ARRAY or DELTA_LENGTH_BYTE_ARRAY
536544 DeltaByteArray (define_out, read_now, filter, result_offset, result);
545+ } else if (bss_decoder) {
546+ auto read_buf = make_shared<ResizeableBuffer>();
547+
548+ switch (schema.type ) {
549+ case duckdb_parquet::format::Type::FLOAT:
550+ read_buf->resize (reader.allocator , sizeof (float ) * (read_now - null_count));
551+ bss_decoder->GetBatch <float >(read_buf->ptr , read_now - null_count);
552+ break ;
553+ case duckdb_parquet::format::Type::DOUBLE:
554+ read_buf->resize (reader.allocator , sizeof (double ) * (read_now - null_count));
555+ bss_decoder->GetBatch <double >(read_buf->ptr , read_now - null_count);
556+ break ;
557+ default :
558+ throw std::runtime_error (" BYTE_STREAM_SPLIT encoding is only supported for FLOAT or DOUBLE data" );
559+ }
560+
561+ Plain (read_buf, define_out, read_now, filter, result_offset, result);
537562 } else {
538563 PlainReference (block, result);
539564 Plain (block, define_out, read_now, filter, result_offset, result);
0 commit comments