-
Notifications
You must be signed in to change notification settings - Fork 69
Libdeflate port #25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Libdeflate port #25
Changes from 7 commits
e08daa7
17e9ace
a2fcbdb
5c0b86f
3cb55a8
32f4c39
da1d85c
72ca105
16c669c
864905e
4b7ec6f
d9e64d1
abfdc49
bd480a6
487927c
4e284d0
15205f2
b4684ca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
#include <gzip/config.hpp> | ||
|
||
// zlib | ||
#include <zlib.h> | ||
#include <libdeflate.h> | ||
|
||
// std | ||
#include <limits> | ||
|
@@ -14,13 +14,27 @@ class Compressor | |
{ | ||
std::size_t max_; | ||
int level_; | ||
struct libdeflate_compressor* compressor_ = nullptr; | ||
|
||
public: | ||
Compressor(int level = Z_DEFAULT_COMPRESSION, | ||
Compressor(int level = 6, | ||
std::size_t max_bytes = 2000000000) // by default refuse operation if uncompressed data is > 2GB | ||
: max_(max_bytes), | ||
level_(level) | ||
{ | ||
compressor_ = libdeflate_alloc_compressor(level_); | ||
if (!compressor_) | ||
{ | ||
throw std::runtime_error("libdeflate_alloc_compressor failed"); | ||
} | ||
} | ||
|
||
~Compressor() | ||
{ | ||
if (compressor_) | ||
{ | ||
libdeflate_free_compressor(compressor_); | ||
} | ||
} | ||
|
||
template <typename InputType> | ||
|
@@ -41,68 +55,29 @@ class Compressor | |
throw std::runtime_error("size may use more memory than intended when decompressing"); | ||
} | ||
|
||
z_stream deflate_s; | ||
deflate_s.zalloc = Z_NULL; | ||
deflate_s.zfree = Z_NULL; | ||
deflate_s.opaque = Z_NULL; | ||
deflate_s.avail_in = 0; | ||
deflate_s.next_in = Z_NULL; | ||
|
||
// The windowBits parameter is the base two logarithm of the window size (the size of the history buffer). | ||
// It should be in the range 8..15 for this version of the library. | ||
// Larger values of this parameter result in better compression at the expense of memory usage. | ||
// This range of values also changes the decoding type: | ||
// -8 to -15 for raw deflate | ||
// 8 to 15 for zlib | ||
// (8 to 15) + 16 for gzip | ||
// (8 to 15) + 32 to automatically detect gzip/zlib header (decompression/inflate only) | ||
constexpr int window_bits = 15 + 16; // gzip with windowbits of 15 | ||
|
||
constexpr int mem_level = 8; | ||
// The memory requirements for deflate are (in bytes): | ||
// (1 << (window_bits+2)) + (1 << (mem_level+9)) | ||
// with a default value of 8 for mem_level and our window_bits of 15 | ||
// this is 128Kb | ||
|
||
#pragma GCC diagnostic push | ||
#pragma GCC diagnostic ignored "-Wold-style-cast" | ||
if (deflateInit2(&deflate_s, level_, Z_DEFLATED, window_bits, mem_level, Z_DEFAULT_STRATEGY) != Z_OK) | ||
std::size_t max_compressed_size = libdeflate_gzip_compress_bound(compressor_, size); | ||
artemp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// TODO: sanity check this before allocating | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reason for this comment is fear/lack of knowledge on my part. What happens if |
||
if (max_compressed_size > output.size()) | ||
{ | ||
throw std::runtime_error("deflate init failed"); | ||
output.resize(max_compressed_size); | ||
} | ||
#pragma GCC diagnostic pop | ||
|
||
deflate_s.next_in = reinterpret_cast<z_const Bytef*>(data); | ||
deflate_s.avail_in = static_cast<unsigned int>(size); | ||
|
||
std::size_t size_compressed = 0; | ||
do | ||
std::size_t actual_compressed_size = libdeflate_gzip_compress(compressor_, | ||
data, | ||
size, | ||
const_cast<char*>(output.data()), | ||
max_compressed_size); | ||
if (actual_compressed_size == 0) | ||
{ | ||
size_t increase = size / 2 + 1024; | ||
if (output.size() < (size_compressed + increase)) | ||
{ | ||
output.resize(size_compressed + increase); | ||
} | ||
// There is no way we see that "increase" would not fit in an unsigned int, | ||
// hence we use static cast here to avoid -Wshorten-64-to-32 error | ||
deflate_s.avail_out = static_cast<unsigned int>(increase); | ||
deflate_s.next_out = reinterpret_cast<Bytef*>((&output[0] + size_compressed)); | ||
// From http://www.zlib.net/zlib_how.html | ||
// "deflate() has a return value that can indicate errors, yet we do not check it here. | ||
// Why not? Well, it turns out that deflate() can do no wrong here." | ||
// Basically only possible error is from deflateInit not working properly | ||
deflate(&deflate_s, Z_FINISH); | ||
size_compressed += (increase - deflate_s.avail_out); | ||
} while (deflate_s.avail_out == 0); | ||
|
||
deflateEnd(&deflate_s); | ||
output.resize(size_compressed); | ||
throw std::runtime_error("actual_compressed_size 0"); | ||
} | ||
output.resize(actual_compressed_size); | ||
} | ||
}; | ||
|
||
inline std::string compress(const char* data, | ||
std::size_t size, | ||
int level = Z_DEFAULT_COMPRESSION) | ||
int level = 6) | ||
artemp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
Compressor comp(level); | ||
std::string output; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -90,7 +90,7 @@ TEST_CASE("round trip compression - gzip") | |
|
||
SECTION("no compression") | ||
{ | ||
int level = Z_NO_COMPRESSION; | ||
int level = 0; | ||
std::string compressed_data = gzip::compress(data.data(), data.size()); | ||
CHECK(gzip::is_compressed(compressed_data.data(), compressed_data.size())); | ||
std::string new_data = gzip::decompress(compressed_data.data(), compressed_data.size()); | ||
|
@@ -99,7 +99,7 @@ TEST_CASE("round trip compression - gzip") | |
|
||
SECTION("default compression level") | ||
{ | ||
int level = Z_DEFAULT_COMPRESSION; | ||
int level = 6; | ||
std::string compressed_data = gzip::compress(data.data(), data.size()); | ||
CHECK(gzip::is_compressed(compressed_data.data(), compressed_data.size())); | ||
std::string new_data = gzip::decompress(compressed_data.data(), compressed_data.size()); | ||
|
@@ -108,7 +108,7 @@ TEST_CASE("round trip compression - gzip") | |
|
||
SECTION("compression level -- min to max") | ||
{ | ||
for (int level = Z_BEST_SPEED; level <= Z_BEST_COMPRESSION; ++level) | ||
for (int level = 1; level <= 9; ++level) | ||
{ | ||
std::string compressed_data = gzip::compress(data.data(), data.size()); | ||
CHECK(gzip::is_compressed(compressed_data.data(), compressed_data.size())); | ||
|
@@ -130,7 +130,7 @@ TEST_CASE("test decompression size limit") | |
std::istreambuf_iterator<char>()); | ||
stream.close(); | ||
|
||
std::size_t limit = 20 * 1024 * 1024; // 20 Mb | ||
std::size_t limit = 500 * 1024 * 1024; // 500 Mb | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @springmeyer - I've changed logic to validate output buffer size rather then input, which makes more sense in my opinion. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👌 |
||
// file should be about 500 mb uncompressed | ||
gzip::Decompressor decomp(limit); | ||
std::string output; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Noting that this approach (initialize C struct pointer in constructor + free the memory in the deconstructor) is applying RAII (https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) to avoid a memory leak.