Skip to content

Commit 820d74d

Browse files
committed
vcdecoder: add memory-based output interface
Currently, the VCDiffStreamingDecoder class expects applications to call into ::StartDecoding() with a dictionary, and then maintains an internal buffer (decoded_target_) to accumulate pending output bytes. These are then written through ::AppendNewOutputText() and ::FlushDecodedTarget(). The problem with this implementation is that the allocation of this intermediate buffer may fail, which will then result in the application abort with a trap. This patch adds an alternative interface that does not lead to internal allocation. StartDecoding() may now optionally be called with an output buffer and a maximum length, and DecodeChunk() may be called without an OutputString parameter. If operated this way, the decoder will directly write to the provided memory buffer, which can, for instance, be mmap()ed on a block device. The old interface is kept around for full compatibility.
1 parent 7162d8e commit 820d74d

File tree

2 files changed

+94
-25
lines changed

2 files changed

+94
-25
lines changed

src/google/vcdecoder.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ class VCDiffStreamingDecoder {
6060
// contents are not copied, and the client is responsible for ensuring that
6161
// dictionary_ptr is valid until FinishDecoding is called.
6262
//
63-
void StartDecoding(const char* dictionary_ptr, size_t dictionary_size);
63+
void StartDecoding(const char* dictionary_ptr, size_t dictionary_size,
64+
char *output_ptr = NULL, size_t output_size = 0);
6465

6566
// Accepts "data[0,len-1]" as additional data received in the
6667
// compressed stream. If any chunks of data can be fully decoded,
@@ -78,13 +79,17 @@ class VCDiffStreamingDecoder {
7879
// of the number of calls to DecodeChunk().
7980
//
8081
template<class OutputType>
81-
bool DecodeChunk(const char* data, size_t len, OutputType* output) {
82-
OutputString<OutputType> output_string(output);
83-
return DecodeChunkToInterface(data, len, &output_string);
82+
bool DecodeChunk(const char* data, size_t len, OutputType* output = NULL) {
83+
if (output) {
84+
OutputString<OutputType> output_string(output);
85+
return DecodeChunkToInterface(data, len, &output_string);
86+
} else {
87+
return DecodeChunkToInterface(data, len);
88+
}
8489
}
8590

8691
bool DecodeChunkToInterface(const char* data, size_t len,
87-
OutputStringInterface* output_string);
92+
OutputStringInterface* output_string = NULL);
8893

8994
// Finishes decoding after all data has been received. Returns true
9095
// if decoding of the entire stream was successful. FinishDecoding()

src/vcdecoder.cc

Lines changed: 84 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,8 @@ class VCDiffStreamingDecoderImpl {
344344
// These functions are identical to their counterparts
345345
// in VCDiffStreamingDecoder.
346346
//
347-
void StartDecoding(const char* dictionary_ptr, size_t dictionary_size);
347+
void StartDecoding(const char* dictionary_ptr, size_t dictionary_size,
348+
char *output_ptr = NULL, size_t output_size = 0);
348349

349350
bool DecodeChunk(const char* data,
350351
size_t len,
@@ -474,8 +475,6 @@ class VCDiffStreamingDecoderImpl {
474475

475476
VCDiffAddressCache* addr_cache() { return addr_cache_.get(); }
476477

477-
string* decoded_target() { return &decoded_target_; }
478-
479478
bool allow_vcd_target() const { return allow_vcd_target_; }
480479

481480
void SetAllowVcdTarget(bool allow_vcd_target) {
@@ -487,6 +486,59 @@ class VCDiffStreamingDecoderImpl {
487486
allow_vcd_target_ = allow_vcd_target;
488487
}
489488

489+
const char* decoded_target_ptr() const {
490+
return output_ptr_ ?: decoded_target_.data();
491+
}
492+
493+
void decoded_target_copy_bytes(const char* data, size_t size) {
494+
if (output_ptr_) {
495+
if (decoded_target_output_position_ + size > output_size_ ||
496+
decoded_target_output_position_ + size < decoded_target_output_position_) {
497+
VCD_DFATAL << "Buffer overflow in decoded_target_copy_bytes()"
498+
<< VCD_ENDL;
499+
return;
500+
}
501+
memcpy(output_ptr_ + decoded_target_output_position_, data, size);
502+
decoded_target_output_position_ += size;
503+
} else {
504+
decoded_target_.append(data, size);
505+
}
506+
}
507+
508+
void decoded_target_run_byte(unsigned char byte, size_t size) {
509+
if (output_ptr_) {
510+
if (decoded_target_output_position_ + size > output_size_ ||
511+
decoded_target_output_position_ + size < decoded_target_output_position_) {
512+
VCD_DFATAL << "Buffer overflow in decoded_target_run_byte()"
513+
<< VCD_ENDL;
514+
return;
515+
}
516+
memset(output_ptr_ + decoded_target_output_position_, byte, size);
517+
decoded_target_output_position_ += size;
518+
} else {
519+
decoded_target_.append(size, byte);
520+
}
521+
}
522+
523+
size_t decoded_target_write_position() {
524+
if (output_ptr_) {
525+
return decoded_target_output_position_;
526+
} else {
527+
return decoded_target_.size();
528+
}
529+
}
530+
531+
bool ensure_output_capacity(size_t wanted_capacity) {
532+
if (output_ptr_) {
533+
return (wanted_capacity <= output_size_);
534+
} else {
535+
if (decoded_target_.capacity() < wanted_capacity) {
536+
decoded_target_.reserve(wanted_capacity);
537+
return true;
538+
}
539+
}
540+
}
541+
490542
private:
491543
// Reads the VCDiff delta file header section as described in RFC section 4.1,
492544
// except the custom code table data. Returns RESULT_ERROR if an error
@@ -540,6 +592,10 @@ class VCDiffStreamingDecoderImpl {
540592
const char* dictionary_ptr_;
541593
size_t dictionary_size_;
542594

595+
// buffer-based output
596+
char* output_ptr_;
597+
size_t output_size_;
598+
543599
// This string will be used to store any unparsed bytes left over when
544600
// DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA.
545601
// It will also be used to concatenate those unparsed bytes with the data
@@ -638,6 +694,8 @@ void VCDiffStreamingDecoderImpl::Reset() {
638694
start_decoding_was_called_ = false;
639695
dictionary_ptr_ = NULL;
640696
dictionary_size_ = 0;
697+
output_ptr_ = NULL;
698+
output_size_ = 0;
641699
vcdiff_version_code_ = '\0';
642700
planned_target_file_size_ = kUnlimitedBytes;
643701
total_of_target_window_sizes_ = 0;
@@ -649,7 +707,9 @@ void VCDiffStreamingDecoderImpl::Reset() {
649707
}
650708

651709
void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr,
652-
size_t dictionary_size) {
710+
size_t dictionary_size,
711+
char* output_ptr,
712+
size_t output_size) {
653713
if (start_decoding_was_called_) {
654714
VCD_DFATAL << "StartDecoding() called twice without FinishDecoding()"
655715
<< VCD_ENDL;
@@ -660,6 +720,8 @@ void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr,
660720
Reset();
661721
dictionary_ptr_ = dictionary_ptr;
662722
dictionary_size_ = dictionary_size;
723+
output_ptr_ = output_ptr;
724+
output_size_ = output_size;
663725
start_decoding_was_called_ = true;
664726
}
665727

@@ -906,7 +968,7 @@ bool VCDiffStreamingDecoderImpl::DecodeChunk(
906968
// Found exactly the length we expected. Stop decoding.
907969
break;
908970
}
909-
if (!allow_vcd_target()) {
971+
if (!allow_vcd_target() && output_string) {
910972
// VCD_TARGET will never be used to reference target data before the
911973
// start of the current window, so flush and clear the contents of
912974
// decoded_target_.
@@ -920,7 +982,9 @@ bool VCDiffStreamingDecoderImpl::DecodeChunk(
920982
}
921983
unparsed_bytes_.assign(parseable_chunk.UnparsedData(),
922984
parseable_chunk.UnparsedSize());
923-
AppendNewOutputText(output_string);
985+
if (output_string) {
986+
AppendNewOutputText(output_string);
987+
}
924988
return true;
925989
}
926990

@@ -989,7 +1053,7 @@ void VCDiffDeltaFileWindow::Reset() {
9891053
found_header_ = false;
9901054

9911055
// Mark the start of the current target window.
992-
target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U;
1056+
target_window_start_pos_ = parent_ ? parent_->decoded_target_write_position() : 0U;
9931057
target_window_length_ = 0;
9941058

9951059
source_segment_ptr_ = NULL;
@@ -1072,14 +1136,13 @@ VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections(
10721136
//
10731137
VCDiffResult VCDiffDeltaFileWindow::ReadHeader(
10741138
ParseableChunk* parseable_chunk) {
1075-
std::string* decoded_target = parent_->decoded_target();
10761139
VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(),
10771140
parseable_chunk->End());
10781141
size_t source_segment_position = 0;
10791142
unsigned char win_indicator = 0;
10801143
if (!header_parser.ParseWinIndicatorAndSourceSegment(
10811144
parent_->dictionary_size(),
1082-
decoded_target->size(),
1145+
parent_->decoded_target_write_position(),
10831146
parent_->allow_vcd_target(),
10841147
&win_indicator,
10851148
&source_segment_length_,
@@ -1102,17 +1165,17 @@ VCDiffResult VCDiffDeltaFileWindow::ReadHeader(
11021165
// Reserve enough space in the output string for the current target window.
11031166
const size_t wanted_capacity =
11041167
target_window_start_pos_ + target_window_length_;
1105-
if (decoded_target->capacity() < wanted_capacity) {
1106-
decoded_target->reserve(wanted_capacity);
1168+
if (!parent_->ensure_output_capacity(wanted_capacity)) {
1169+
return RESULT_ERROR;
11071170
}
11081171
// Get a pointer to the start of the source segment.
11091172
if (win_indicator & VCD_SOURCE) {
11101173
source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position;
11111174
} else if (win_indicator & VCD_TARGET) {
11121175
// This assignment must happen after the reserve().
1113-
// decoded_target should not be resized again while processing this window,
1176+
// the output buffer should not be resized again while processing this window,
11141177
// so source_segment_ptr_ should remain valid.
1115-
source_segment_ptr_ = decoded_target->data() + source_segment_position;
1178+
source_segment_ptr_ = parent_->decoded_target_ptr() + source_segment_position;
11161179
}
11171180
// The whole window header was found and parsed successfully.
11181181
found_header_ = true;
@@ -1132,7 +1195,7 @@ void VCDiffDeltaFileWindow::UpdateInstructionPointer(
11321195
}
11331196

11341197
inline size_t VCDiffDeltaFileWindow::TargetBytesDecoded() {
1135-
return parent_->decoded_target()->size() - target_window_start_pos_;
1198+
return parent_->decoded_target_write_position() - target_window_start_pos_;
11361199
}
11371200

11381201
size_t VCDiffDeltaFileWindow::TargetBytesRemaining() {
@@ -1145,11 +1208,11 @@ size_t VCDiffDeltaFileWindow::TargetBytesRemaining() {
11451208
}
11461209

11471210
inline void VCDiffDeltaFileWindow::CopyBytes(const char* data, size_t size) {
1148-
parent_->decoded_target()->append(data, size);
1211+
parent_->decoded_target_copy_bytes(data, size);
11491212
}
11501213

11511214
inline void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) {
1152-
parent_->decoded_target()->append(size, byte);
1215+
parent_->decoded_target_run_byte(byte, size);
11531216
}
11541217

11551218
VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) {
@@ -1216,7 +1279,7 @@ VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size,
12161279
}
12171280
address -= source_segment_length_;
12181281
// address is now based at start of target window
1219-
const char* const target_segment_ptr = parent_->decoded_target()->data() +
1282+
const char* const target_segment_ptr = parent_->decoded_target_ptr() +
12201283
target_window_start_pos_;
12211284
while (size > (target_bytes_decoded - address)) {
12221285
// Recursive copy that extends into the yet-to-be-copied target data
@@ -1300,7 +1363,7 @@ int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) {
13001363
return RESULT_ERROR;
13011364
}
13021365
const char* const target_window_start =
1303-
parent_->decoded_target()->data() + target_window_start_pos_;
1366+
parent_->decoded_target_ptr() + target_window_start_pos_;
13041367
if (has_checksum_ &&
13051368
(ComputeAdler32(target_window_start, target_window_length_)
13061369
!= expected_checksum_)) {
@@ -1396,8 +1459,9 @@ VCDiffStreamingDecoder::VCDiffStreamingDecoder()
13961459

13971460
VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; }
13981461

1399-
void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) {
1400-
impl_->StartDecoding(source, len);
1462+
void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len,
1463+
char *output_ptr, size_t output_size) {
1464+
impl_->StartDecoding(source, len, output_ptr, output_size);
14011465
}
14021466

14031467
bool VCDiffStreamingDecoder::DecodeChunkToInterface(

0 commit comments

Comments
 (0)