Skip to content

Commit b3e0bcf

Browse files
authored
chore: further extend the compression analysis (#5065)
Allow export/import of huffman tables via `DEBUG COMPRESSION EXPORT` or `DEBUG COMPRESSION IMPORT <bintable>` Signed-off-by: Roman Gershman <[email protected]>
1 parent 4d07d7d commit b3e0bcf

File tree

4 files changed

+92
-24
lines changed

4 files changed

+92
-24
lines changed

src/core/compact_object.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1524,13 +1524,13 @@ std::string_view ObjTypeToString(CompactObjType type) {
15241524
return "Invalid type"sv;
15251525
}
15261526

1527-
std::optional<CompactObjType> ObjTypeFromString(std::string_view sv) {
1527+
CompactObjType ObjTypeFromString(std::string_view sv) {
15281528
for (auto& p : kObjTypeToString) {
15291529
if (absl::EqualsIgnoreCase(sv, p.second)) {
15301530
return p.first;
15311531
}
15321532
}
1533-
return std::nullopt;
1533+
return kInvalidCompactObjType;
15341534
}
15351535

15361536
} // namespace dfly

src/core/compact_object.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,8 @@ inline bool CompactObj::operator==(std::string_view sv) const {
543543

544544
std::string_view ObjTypeToString(CompactObjType type);
545545

546-
std::optional<CompactObjType> ObjTypeFromString(std::string_view sv);
546+
// Returns kInvalidCompactObjType if sv is not a valid type.
547+
CompactObjType ObjTypeFromString(std::string_view sv);
547548

548549
namespace detail {
549550

src/server/common.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,8 +299,8 @@ OpResult<ScanOpts> ScanOpts::TryFrom(CmdArgList args) {
299299
if (pattern != "*")
300300
scan_opts.matcher.reset(new GlobMatcher{pattern, true});
301301
} else if (opt == "TYPE") {
302-
auto obj_type = ObjTypeFromString(ArgS(args, i + 1));
303-
if (!obj_type) {
302+
CompactObjType obj_type = ObjTypeFromString(ArgS(args, i + 1));
303+
if (obj_type == kInvalidCompactObjType) {
304304
return facade::OpStatus::SYNTAX_ERR;
305305
}
306306
scan_opts.type_filter = obj_type;

src/server/debugcmd.cc

Lines changed: 86 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ struct HufHist {
279279
}
280280
};
281281

282-
void DoComputeHist(optional<CompactObjType> type, EngineShard* shard, ConnectionContext* cntx,
282+
void DoComputeHist(CompactObjType type, EngineShard* shard, ConnectionContext* cntx,
283283
HufHist* dest) {
284284
auto& db_slice = cntx->ns->GetDbSlice(shard->shard_id());
285285
DbTable* dbt = db_slice.GetDBTable(cntx->db_index());
@@ -294,26 +294,26 @@ void DoComputeHist(optional<CompactObjType> type, EngineShard* shard, Connection
294294
do {
295295
cursor = table.Traverse(cursor, [&](PrimeIterator it) {
296296
scratch.clear();
297-
if (!type) {
297+
if (type == kInvalidCompactObjType) { // KEYSPACE
298298
it->first.GetString(&scratch);
299-
} else if (*type == OBJ_STRING && it->second.ObjType() == OBJ_STRING) {
299+
} else if (type == OBJ_STRING && it->second.ObjType() == OBJ_STRING) {
300300
it->second.GetString(&scratch);
301-
} else if (*type == OBJ_ZSET && it->second.ObjType() == OBJ_ZSET) {
301+
} else if (type == OBJ_ZSET && it->second.ObjType() == OBJ_ZSET) {
302302
container_utils::IterateSortedSet(
303303
it->second.GetRobjWrapper(), [&](container_utils::ContainerEntry entry, double) {
304304
if (entry.value) {
305305
HIST_add(dest->hist.data(), entry.value, entry.length);
306306
}
307307
return true;
308308
});
309-
} else if (*type == OBJ_LIST && it->second.ObjType() == OBJ_LIST) {
309+
} else if (type == OBJ_LIST && it->second.ObjType() == OBJ_LIST) {
310310
container_utils::IterateList(it->second, [&](container_utils::ContainerEntry entry) {
311311
if (entry.value) {
312312
HIST_add(dest->hist.data(), entry.value, entry.length);
313313
}
314314
return true;
315315
});
316-
} else if (*type == OBJ_HASH && it->second.ObjType() == OBJ_HASH) {
316+
} else if (type == OBJ_HASH && it->second.ObjType() == OBJ_HASH) {
317317
container_utils::IterateMap(it->second, [&](container_utils::ContainerEntry key,
318318
container_utils::ContainerEntry value) {
319319
if (key.value) {
@@ -596,9 +596,11 @@ void DebugCmd::Run(CmdArgList args, facade::SinkReplyBuilder* builder) {
596596
" traffic logging is stopped.",
597597
"RECVSIZE [<tid> | ENABLE | DISABLE]",
598598
" Prints the histogram of the received request sizes on the given thread",
599-
"COMPRESSION [type]"
599+
"COMPRESSION [IMPORT <bintable> | EXPORT] [type]",
600600
" Estimate the compressibility of values of the given type. if no type is given, ",
601-
" checks compressibility of keys",
601+
" checks compressibility of keys. If IN is specified, then the provided ",
602+
" bintable is used to check compressibility. If OUT is specified, then ",
603+
" the serialized table is printed as well",
602604
"IOSTATS [PS]",
603605
" Prints IO stats per thread. If PS is specified, prints thread-level stats ",
604606
" per second.",
@@ -1281,14 +1283,29 @@ void DebugCmd::Keys(CmdArgList args, facade::SinkReplyBuilder* builder) {
12811283
}
12821284

12831285
void DebugCmd::Compression(CmdArgList args, facade::SinkReplyBuilder* builder) {
1284-
optional<CompactObjType> type;
1285-
if (args.size() > 0) {
1286-
string_view type_str = ArgS(args, 0);
1286+
CompactObjType type = kInvalidCompactObjType;
1287+
CmdArgParser parser(args);
1288+
string bintable;
1289+
bool print_bintable = false;
1290+
1291+
if (parser.Check("EXPORT")) {
1292+
print_bintable = true;
1293+
} else {
1294+
parser.Check("IMPORT", &bintable);
1295+
}
1296+
1297+
if (parser.HasNext()) {
1298+
string_view type_str = parser.Next();
12871299
type = ObjTypeFromString(type_str);
1288-
if (!type) {
1300+
if (type == kInvalidCompactObjType) {
12891301
return builder->SendError(kSyntaxErr);
12901302
}
12911303
}
1304+
1305+
if (parser.HasError()) {
1306+
return builder->SendError(parser.Error()->MakeReply());
1307+
}
1308+
12921309
auto* rb = static_cast<RedisReplyBuilder*>(builder);
12931310

12941311
fb2::Mutex mu;
@@ -1300,26 +1317,72 @@ void DebugCmd::Compression(CmdArgList args, facade::SinkReplyBuilder* builder) {
13001317
hist.Merge(local);
13011318
});
13021319

1303-
HUF_CREATE_STATIC_CTABLE(huf_ctable, HufHist::kMaxSymbol);
1304-
13051320
size_t num_bits = 0, compressed_size = 0, raw_size = 0;
1321+
unsigned table_max_symbol = 255;
13061322

13071323
if (hist.max_symbol) {
1324+
HUF_CREATE_STATIC_CTABLE(huf_ctable, HufHist::kMaxSymbol);
1325+
13081326
unique_ptr<uint32_t[]> wrkspace(new uint32_t[HUF_CTABLE_WORKSPACE_SIZE_U32]);
13091327
constexpr size_t kWspSize = HUF_CTABLE_WORKSPACE_SIZE;
1310-
num_bits = HUF_buildCTable_wksp(huf_ctable, hist.hist.data(), hist.max_symbol, 0,
1311-
wrkspace.get(), kWspSize);
13121328

1313-
compressed_size = HUF_estimateCompressedSize(huf_ctable, hist.hist.data(), hist.max_symbol);
1329+
if (bintable.empty()) {
1330+
table_max_symbol = hist.max_symbol;
1331+
num_bits = HUF_buildCTable_wksp(huf_ctable, hist.hist.data(), table_max_symbol, 0,
1332+
wrkspace.get(), kWspSize);
1333+
if (HUF_isError(num_bits)) {
1334+
return rb->SendError(StrCat("Internal error: ", HUF_getErrorName(num_bits)));
1335+
}
1336+
} else {
1337+
// Try to read the bintable and create a ctable from it.
1338+
unsigned has_zero_weights = 1;
1339+
1340+
size_t read_size = HUF_readCTable(huf_ctable, &table_max_symbol, bintable.data(),
1341+
bintable.size(), &has_zero_weights);
1342+
if (HUF_isError(read_size)) {
1343+
return rb->SendError(StrCat("Internal error: ", HUF_getErrorName(read_size)));
1344+
}
1345+
if (read_size != bintable.size()) {
1346+
return rb->SendError("Invalid bintable");
1347+
}
1348+
}
1349+
1350+
compressed_size = HUF_estimateCompressedSize(huf_ctable, hist.hist.data(), table_max_symbol);
1351+
for (unsigned i = table_max_symbol + 1; i <= hist.max_symbol; i++) {
1352+
compressed_size += hist.hist[i];
1353+
}
13141354
raw_size = 0;
1315-
for (unsigned i = 0; i < hist.max_symbol; i++) {
1355+
for (unsigned i = 0; i <= hist.max_symbol; i++) {
13161356
raw_size += hist.hist[i];
13171357
}
1358+
1359+
if (print_bintable) {
1360+
// Reverse engineered: (maxSymbolValue + 1) / 2 + 1.
1361+
constexpr unsigned kMaxTableSize = 130;
1362+
bintable.resize(kMaxTableSize);
1363+
1364+
// Seems we can reuse the same workspace, its capacity is enough.
1365+
size_t res = HUF_writeCTable_wksp(bintable.data(), kMaxTableSize, huf_ctable,
1366+
table_max_symbol, num_bits, wrkspace.get(), kWspSize);
1367+
if (HUF_isError(res)) {
1368+
return rb->SendError(StrCat("Internal error: ", HUF_getErrorName(res)));
1369+
}
1370+
bintable.resize(res);
1371+
} else {
1372+
bintable.clear();
1373+
}
13181374
}
13191375

1320-
rb->StartCollection(5, RedisReplyBuilder::CollectionType::MAP);
1376+
unsigned map_len = print_bintable ? 7 : 6;
1377+
1378+
rb->StartCollection(map_len, RedisReplyBuilder::CollectionType::MAP);
13211379
rb->SendSimpleString("max_symbol");
13221380
rb->SendLong(hist.max_symbol);
1381+
1382+
// in case we load a bintable, table_max_symbol may be different from max_symbol.
1383+
// if it's smaller, it means our table can not encode all symbols.
1384+
rb->SendSimpleString("table_max_symbol");
1385+
rb->SendLong(table_max_symbol);
13231386
rb->SendSimpleString("max_bits");
13241387
rb->SendLong(num_bits);
13251388
rb->SendSimpleString("raw_size");
@@ -1329,6 +1392,10 @@ void DebugCmd::Compression(CmdArgList args, facade::SinkReplyBuilder* builder) {
13291392
rb->SendSimpleString("ratio");
13301393
double ratio = raw_size > 0 ? static_cast<double>(compressed_size) / raw_size : 0;
13311394
rb->SendDouble(ratio);
1395+
if (print_bintable) {
1396+
rb->SendSimpleString("bintable");
1397+
rb->SendBulkString(bintable);
1398+
}
13321399
}
13331400

13341401
void DebugCmd::IOStats(CmdArgList args, facade::SinkReplyBuilder* builder) {

0 commit comments

Comments
 (0)