Skip to content

Commit 8f613ee

Browse files
authored
zlib: add support for brotli compression dictionary
This change adds JS API support for custom compression dictionaries with Brotli in the zlib library. The underlying Brotli dependency already supports this and zstd exposes something similar. This follows the zstd approach for using a custom dictionary but for Brotli. Fixes: #52250 PR-URL: #61763 Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Gürgün Dayıoğlu <hey@gurgun.day>
1 parent 3d30c30 commit 8f613ee

File tree

3 files changed

+229
-11
lines changed

3 files changed

+229
-11
lines changed

lib/zlib.js

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -830,11 +830,29 @@ function Brotli(opts, mode) {
830830
});
831831
}
832832

833+
let dictionary = opts?.dictionary;
834+
if (dictionary !== undefined && !isArrayBufferView(dictionary)) {
835+
if (isAnyArrayBuffer(dictionary)) {
836+
dictionary = Buffer.from(dictionary);
837+
} else {
838+
throw new ERR_INVALID_ARG_TYPE(
839+
'options.dictionary',
840+
['Buffer', 'TypedArray', 'DataView', 'ArrayBuffer'],
841+
dictionary,
842+
);
843+
}
844+
}
845+
833846
const handle = mode === BROTLI_DECODE ?
834847
new binding.BrotliDecoder(mode) : new binding.BrotliEncoder(mode);
835848

836849
this._writeState = new Uint32Array(2);
837-
handle.init(brotliInitParamsArray, this._writeState, processCallback);
850+
handle.init(
851+
brotliInitParamsArray,
852+
this._writeState,
853+
processCallback,
854+
dictionary,
855+
);
838856

839857
ZlibBase.call(this, opts, mode, handle, brotliDefaultOpts);
840858
}

src/node_zlib.cc

Lines changed: 84 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040

4141
#include "brotli/decode.h"
4242
#include "brotli/encode.h"
43+
#include "brotli/shared_dictionary.h"
4344
#include "zlib.h"
4445
#include "zstd.h"
4546
#include "zstd_errors.h"
@@ -256,7 +257,7 @@ class BrotliEncoderContext final : public BrotliContext {
256257
public:
257258
void Close();
258259
void DoThreadPoolWork();
259-
CompressionError Init();
260+
CompressionError Init(std::vector<uint8_t>&& dictionary = {});
260261
CompressionError ResetStream();
261262
CompressionError SetParams(int key, uint32_t value);
262263
CompressionError GetErrorInfo() const;
@@ -268,13 +269,18 @@ class BrotliEncoderContext final : public BrotliContext {
268269
private:
269270
bool last_result_ = false;
270271
DeleteFnPtr<BrotliEncoderState, BrotliEncoderDestroyInstance> state_;
272+
DeleteFnPtr<BrotliEncoderPreparedDictionary,
273+
BrotliEncoderDestroyPreparedDictionary>
274+
prepared_dictionary_;
275+
// Dictionary data must remain valid while the prepared dictionary is alive.
276+
std::vector<uint8_t> dictionary_;
271277
};
272278

273279
class BrotliDecoderContext final : public BrotliContext {
274280
public:
275281
void Close();
276282
void DoThreadPoolWork();
277-
CompressionError Init();
283+
CompressionError Init(std::vector<uint8_t>&& dictionary = {});
278284
CompressionError ResetStream();
279285
CompressionError SetParams(int key, uint32_t value);
280286
CompressionError GetErrorInfo() const;
@@ -288,6 +294,8 @@ class BrotliDecoderContext final : public BrotliContext {
288294
BrotliDecoderErrorCode error_ = BROTLI_DECODER_NO_ERROR;
289295
std::string error_string_;
290296
DeleteFnPtr<BrotliDecoderState, BrotliDecoderDestroyInstance> state_;
297+
// Dictionary data must remain valid for the lifetime of the decoder.
298+
std::vector<uint8_t> dictionary_;
291299
};
292300

293301
class ZstdContext : public MemoryRetainer {
@@ -830,7 +838,8 @@ class BrotliCompressionStream final :
830838
static void Init(const FunctionCallbackInfo<Value>& args) {
831839
BrotliCompressionStream* wrap;
832840
ASSIGN_OR_RETURN_UNWRAP(&wrap, args.This());
833-
CHECK(args.Length() == 3 && "init(params, writeResult, writeCallback)");
841+
CHECK((args.Length() == 3 || args.Length() == 4) &&
842+
"init(params, writeResult, writeCallback[, dictionary])");
834843

835844
CHECK(args[1]->IsUint32Array());
836845
CHECK_GE(args[1].As<Uint32Array>()->Length(), 2);
@@ -841,7 +850,18 @@ class BrotliCompressionStream final :
841850
wrap->InitStream(write_result, write_js_callback);
842851

843852
AllocScope alloc_scope(wrap);
844-
CompressionError err = wrap->context()->Init();
853+
std::vector<uint8_t> dictionary;
854+
if (args.Length() == 4 && !args[3]->IsUndefined()) {
855+
if (!args[3]->IsArrayBufferView()) {
856+
THROW_ERR_INVALID_ARG_TYPE(
857+
wrap->env(), "dictionary must be an ArrayBufferView if provided");
858+
return;
859+
}
860+
ArrayBufferViewContents<uint8_t> contents(args[3]);
861+
dictionary.assign(contents.data(), contents.data() + contents.length());
862+
}
863+
864+
CompressionError err = wrap->context()->Init(std::move(dictionary));
845865
if (err.IsError()) {
846866
wrap->EmitError(err);
847867
// TODO(addaleax): Sometimes we generate better error codes in C++ land,
@@ -1387,23 +1407,57 @@ void BrotliEncoderContext::DoThreadPoolWork() {
13871407

13881408
void BrotliEncoderContext::Close() {
13891409
state_.reset();
1410+
prepared_dictionary_.reset();
1411+
dictionary_.clear();
13901412
mode_ = NONE;
13911413
}
13921414

1393-
CompressionError BrotliEncoderContext::Init() {
1415+
CompressionError BrotliEncoderContext::Init(std::vector<uint8_t>&& dictionary) {
13941416
brotli_alloc_func alloc = CompressionStreamMemoryOwner::AllocForBrotli;
13951417
brotli_free_func free = CompressionStreamMemoryOwner::FreeForZlib;
13961418
void* opaque =
13971419
CompressionStream<BrotliEncoderContext>::AllocatorOpaquePointerForContext(
13981420
this);
1421+
1422+
// Clean up any previous dictionary state before re-initializing.
1423+
prepared_dictionary_.reset();
1424+
dictionary_.clear();
1425+
13991426
state_.reset(BrotliEncoderCreateInstance(alloc, free, opaque));
14001427
if (!state_) {
14011428
return CompressionError("Could not initialize Brotli instance",
14021429
"ERR_ZLIB_INITIALIZATION_FAILED",
14031430
-1);
1404-
} else {
1405-
return CompressionError {};
14061431
}
1432+
1433+
if (!dictionary.empty()) {
1434+
// The dictionary data must remain valid for the lifetime of the prepared
1435+
// dictionary, so take ownership via move.
1436+
dictionary_ = std::move(dictionary);
1437+
1438+
prepared_dictionary_.reset(
1439+
BrotliEncoderPrepareDictionary(BROTLI_SHARED_DICTIONARY_RAW,
1440+
dictionary_.size(),
1441+
dictionary_.data(),
1442+
BROTLI_MAX_QUALITY,
1443+
alloc,
1444+
free,
1445+
opaque));
1446+
if (!prepared_dictionary_) {
1447+
return CompressionError("Failed to prepare brotli dictionary",
1448+
"ERR_ZLIB_DICTIONARY_LOAD_FAILED",
1449+
-1);
1450+
}
1451+
1452+
if (!BrotliEncoderAttachPreparedDictionary(state_.get(),
1453+
prepared_dictionary_.get())) {
1454+
return CompressionError("Failed to attach brotli dictionary",
1455+
"ERR_ZLIB_DICTIONARY_LOAD_FAILED",
1456+
-1);
1457+
}
1458+
}
1459+
1460+
return CompressionError{};
14071461
}
14081462

14091463
CompressionError BrotliEncoderContext::ResetStream() {
@@ -1435,6 +1489,7 @@ CompressionError BrotliEncoderContext::GetErrorInfo() const {
14351489

14361490
void BrotliDecoderContext::Close() {
14371491
state_.reset();
1492+
dictionary_.clear();
14381493
mode_ = NONE;
14391494
}
14401495

@@ -1455,20 +1510,39 @@ void BrotliDecoderContext::DoThreadPoolWork() {
14551510
}
14561511
}
14571512

1458-
CompressionError BrotliDecoderContext::Init() {
1513+
CompressionError BrotliDecoderContext::Init(std::vector<uint8_t>&& dictionary) {
14591514
brotli_alloc_func alloc = CompressionStreamMemoryOwner::AllocForBrotli;
14601515
brotli_free_func free = CompressionStreamMemoryOwner::FreeForZlib;
14611516
void* opaque =
14621517
CompressionStream<BrotliDecoderContext>::AllocatorOpaquePointerForContext(
14631518
this);
1519+
1520+
// Clean up any previous dictionary state before re-initializing.
1521+
dictionary_.clear();
1522+
14641523
state_.reset(BrotliDecoderCreateInstance(alloc, free, opaque));
14651524
if (!state_) {
14661525
return CompressionError("Could not initialize Brotli instance",
14671526
"ERR_ZLIB_INITIALIZATION_FAILED",
14681527
-1);
1469-
} else {
1470-
return CompressionError {};
14711528
}
1529+
1530+
if (!dictionary.empty()) {
1531+
// The dictionary data must remain valid for the lifetime of the decoder,
1532+
// so take ownership via move.
1533+
dictionary_ = std::move(dictionary);
1534+
1535+
if (!BrotliDecoderAttachDictionary(state_.get(),
1536+
BROTLI_SHARED_DICTIONARY_RAW,
1537+
dictionary_.size(),
1538+
dictionary_.data())) {
1539+
return CompressionError("Failed to attach brotli dictionary",
1540+
"ERR_ZLIB_DICTIONARY_LOAD_FAILED",
1541+
-1);
1542+
}
1543+
}
1544+
1545+
return CompressionError{};
14721546
}
14731547

14741548
CompressionError BrotliDecoderContext::ResetStream() {
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
'use strict';
2+
3+
const common = require('../common');
4+
const assert = require('assert');
5+
const zlib = require('zlib');
6+
7+
const dictionary = Buffer.from(
8+
`Lorem ipsum dolor sit amet, consectetur adipiscing elit.
9+
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
10+
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.`
11+
);
12+
13+
const input = Buffer.from(
14+
`Lorem ipsum dolor sit amet, consectetur adipiscing elit.
15+
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
16+
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
17+
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
18+
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.`
19+
);
20+
21+
// Test with convenience methods (async).
22+
zlib.brotliCompress(input, { dictionary }, common.mustSucceed((compressed) => {
23+
assert(compressed.length < input.length,
24+
'compressed data should be smaller with dictionary');
25+
zlib.brotliDecompress(compressed, { dictionary }, common.mustSucceed((decompressed) => {
26+
assert.strictEqual(decompressed.toString(), input.toString());
27+
}));
28+
}));
29+
30+
// Test with streaming API.
31+
{
32+
const encoder = zlib.createBrotliCompress({ dictionary });
33+
const decoder = zlib.createBrotliDecompress({ dictionary });
34+
35+
const chunks = [];
36+
decoder.on('data', (chunk) => chunks.push(chunk));
37+
decoder.on('end', common.mustCall(() => {
38+
const result = Buffer.concat(chunks);
39+
assert.strictEqual(result.toString(), input.toString());
40+
}));
41+
42+
encoder.pipe(decoder);
43+
encoder.end(input);
44+
}
45+
46+
// Test that dictionary improves compression ratio.
47+
{
48+
const withDict = zlib.brotliCompressSync(input, { dictionary });
49+
const withoutDict = zlib.brotliCompressSync(input);
50+
51+
// Dictionary-based compression should be at least as good as without.
52+
assert(withDict.length <= withoutDict.length,
53+
`Dictionary compression (${withDict.length}) should not be ` +
54+
`larger than non-dictionary compression (${withoutDict.length})`);
55+
56+
// Verify decompression with dictionary works.
57+
const decompressed = zlib.brotliDecompressSync(withDict, { dictionary });
58+
assert.strictEqual(decompressed.toString(), input.toString());
59+
}
60+
61+
// Test that decompression without matching dictionary fails.
62+
{
63+
const compressed = zlib.brotliCompressSync(input, { dictionary });
64+
assert.throws(() => {
65+
zlib.brotliDecompressSync(compressed);
66+
}, (err) => {
67+
assert.match(err.code, /ERR_/);
68+
return true;
69+
});
70+
}
71+
72+
// Test that decompression with wrong dictionary fails.
73+
{
74+
const compressed = zlib.brotliCompressSync(input, { dictionary });
75+
const wrongDictionary = Buffer.from('this is the wrong dictionary');
76+
assert.throws(() => {
77+
zlib.brotliDecompressSync(compressed, { dictionary: wrongDictionary });
78+
}, (err) => {
79+
assert.match(err.code, /ERR_/);
80+
return true;
81+
});
82+
}
83+
84+
// Test that dictionary works with ArrayBuffer (converted to Buffer).
85+
{
86+
const arrayBufferDict = dictionary.buffer.slice(
87+
dictionary.byteOffset,
88+
dictionary.byteOffset + dictionary.byteLength,
89+
);
90+
const compressed = zlib.brotliCompressSync(input, { dictionary: arrayBufferDict });
91+
const decompressed = zlib.brotliDecompressSync(compressed, { dictionary: arrayBufferDict });
92+
assert.strictEqual(decompressed.toString(), input.toString());
93+
}
94+
95+
// Test that dictionary works with TypedArray (Uint8Array).
96+
{
97+
const uint8Dict = new Uint8Array(dictionary);
98+
const compressed = zlib.brotliCompressSync(input, { dictionary: uint8Dict });
99+
const decompressed = zlib.brotliDecompressSync(compressed, { dictionary: uint8Dict });
100+
assert.strictEqual(decompressed.toString(), input.toString());
101+
}
102+
103+
// Test that invalid dictionary type throws ERR_INVALID_ARG_TYPE.
104+
for (const invalidDict of ['string', 123, true, { object: true }, [1, 2, 3]]) {
105+
assert.throws(() => {
106+
zlib.createBrotliCompress({ dictionary: invalidDict });
107+
}, { code: 'ERR_INVALID_ARG_TYPE' });
108+
109+
assert.throws(() => {
110+
zlib.createBrotliDecompress({ dictionary: invalidDict });
111+
}, { code: 'ERR_INVALID_ARG_TYPE' });
112+
}
113+
114+
// Test with streaming API and wrong dictionary emits error event.
115+
{
116+
const compressed = zlib.brotliCompressSync(input, { dictionary });
117+
const wrongDict = Buffer.from('wrong dictionary data');
118+
const decoder = zlib.createBrotliDecompress({ dictionary: wrongDict });
119+
120+
decoder.on('error', common.mustCall((err) => {
121+
assert.match(err.code, /ERR_/);
122+
}));
123+
124+
decoder.write(compressed);
125+
decoder.end();
126+
}

0 commit comments

Comments
 (0)