From e08daa7d4894b755969f7a0430088a1ac884492a Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Tue, 3 Apr 2018 15:41:40 -0700 Subject: [PATCH 01/18] replace zlib with libdeflate --- CMakeLists.txt | 8 ++-- include/gzip/compress.hpp | 86 ++++++++++++---------------------- include/gzip/decompress.hpp | 92 +++++++++++++++---------------------- test/test_io.cpp | 6 +-- 4 files changed, 74 insertions(+), 118 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9280ae1..4b5fc69 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,8 +35,8 @@ include_directories(SYSTEM ${MASON_PACKAGE_catch_INCLUDE_DIRS}) mason_use(benchmark VERSION 1.3.0) include_directories(SYSTEM ${MASON_PACKAGE_benchmark_INCLUDE_DIRS}) -mason_use(zlib VERSION 1.2.8) -include_directories(SYSTEM ${MASON_PACKAGE_zlib_INCLUDE_DIRS}) +mason_use(libdeflate VERSION e9d1014) +include_directories(SYSTEM ${MASON_PACKAGE_libdeflate_INCLUDE_DIRS}) include_directories("${PROJECT_SOURCE_DIR}/include") @@ -49,5 +49,5 @@ file(GLOB BENCH_SOURCES bench/*.cpp) add_executable(bench-tests ${BENCH_SOURCES}) # link zlib static library to the unit-tests binary so the tests know where to find the zlib impl code -target_link_libraries(unit-tests ${MASON_PACKAGE_zlib_STATIC_LIBS}) -target_link_libraries(bench-tests ${MASON_PACKAGE_benchmark_STATIC_LIBS} ${CMAKE_THREAD_LIBS_INIT} ${MASON_PACKAGE_zlib_STATIC_LIBS}) +target_link_libraries(unit-tests ${MASON_PACKAGE_libdeflate_STATIC_LIBS}) +target_link_libraries(bench-tests ${MASON_PACKAGE_benchmark_STATIC_LIBS} ${CMAKE_THREAD_LIBS_INIT} ${MASON_PACKAGE_libdeflate_STATIC_LIBS}) diff --git a/include/gzip/compress.hpp b/include/gzip/compress.hpp index 2ec56c2..b02301d 100644 --- a/include/gzip/compress.hpp +++ b/include/gzip/compress.hpp @@ -1,7 +1,7 @@ #include // zlib -#include +#include // std #include @@ -14,13 +14,26 @@ class Compressor { std::size_t max_; int level_; + struct libdeflate_compressor * compressor_ = nullptr; public: - Compressor(int level = Z_DEFAULT_COMPRESSION, + Compressor(int level = 6, std::size_t max_bytes = 2000000000) // by default refuse operation if uncompressed data is > 2GB : max_(max_bytes), level_(level) { + compressor_ = libdeflate_alloc_compressor(level_); + if (!compressor_) { + throw std::runtime_error("libdeflate_alloc_compressor failed"); + } + } + + ~Compressor() + { + if (compressor_) + { + libdeflate_free_compressor(compressor_); + } } template @@ -41,68 +54,27 @@ class Compressor throw std::runtime_error("size may use more memory than intended when decompressing"); } - z_stream deflate_s; - deflate_s.zalloc = Z_NULL; - deflate_s.zfree = Z_NULL; - deflate_s.opaque = Z_NULL; - deflate_s.avail_in = 0; - deflate_s.next_in = Z_NULL; - - // The windowBits parameter is the base two logarithm of the window size (the size of the history buffer). - // It should be in the range 8..15 for this version of the library. - // Larger values of this parameter result in better compression at the expense of memory usage. - // This range of values also changes the decoding type: - // -8 to -15 for raw deflate - // 8 to 15 for zlib - // (8 to 15) + 16 for gzip - // (8 to 15) + 32 to automatically detect gzip/zlib header (decompression/inflate only) - constexpr int window_bits = 15 + 16; // gzip with windowbits of 15 - - constexpr int mem_level = 8; - // The memory requirements for deflate are (in bytes): - // (1 << (window_bits+2)) + (1 << (mem_level+9)) - // with a default value of 8 for mem_level and our window_bits of 15 - // this is 128Kb - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wold-style-cast" - if (deflateInit2(&deflate_s, level_, Z_DEFLATED, window_bits, mem_level, Z_DEFAULT_STRATEGY) != Z_OK) - { - throw std::runtime_error("deflate init failed"); + std::size_t max_compressed_size = libdeflate_gzip_compress_bound(compressor_,size); + // TODO: sanity check this before allocating + if (max_compressed_size > output.size()) { + output.resize(max_compressed_size); } -#pragma GCC diagnostic pop - - deflate_s.next_in = reinterpret_cast(data); - deflate_s.avail_in = static_cast(size); - std::size_t size_compressed = 0; - do - { - size_t increase = size / 2 + 1024; - if (output.size() < (size_compressed + increase)) - { - output.resize(size_compressed + increase); - } - // There is no way we see that "increase" would not fit in an unsigned int, - // hence we use static cast here to avoid -Wshorten-64-to-32 error - deflate_s.avail_out = static_cast(increase); - deflate_s.next_out = reinterpret_cast((&output[0] + size_compressed)); - // From http://www.zlib.net/zlib_how.html - // "deflate() has a return value that can indicate errors, yet we do not check it here. - // Why not? Well, it turns out that deflate() can do no wrong here." - // Basically only possible error is from deflateInit not working properly - deflate(&deflate_s, Z_FINISH); - size_compressed += (increase - deflate_s.avail_out); - } while (deflate_s.avail_out == 0); - - deflateEnd(&deflate_s); - output.resize(size_compressed); + std::size_t actual_compressed_size = libdeflate_gzip_compress(compressor_, + data, + size, + reinterpret_cast(&output[0]), + max_compressed_size); + if (actual_compressed_size == 0) { + throw std::runtime_error("actual_compressed_size 0"); + } + output.resize(actual_compressed_size); } }; inline std::string compress(const char* data, std::size_t size, - int level = Z_DEFAULT_COMPRESSION) + int level = 6) { Compressor comp(level); std::string output; diff --git a/include/gzip/decompress.hpp b/include/gzip/decompress.hpp index b70670f..86735b6 100644 --- a/include/gzip/decompress.hpp +++ b/include/gzip/decompress.hpp @@ -1,7 +1,7 @@ #include // zlib -#include +#include // std #include @@ -13,84 +13,68 @@ namespace gzip { class Decompressor { std::size_t max_; + struct libdeflate_decompressor * decompressor_ = nullptr; public: Decompressor(std::size_t max_bytes = 1000000000) // by default refuse operation if compressed data is > 1GB : max_(max_bytes) - { + { + decompressor_ = libdeflate_alloc_decompressor(); + if (!decompressor_) { + throw std::runtime_error("libdeflate_alloc_decompressor failed"); + } } + ~Decompressor() + { + if (decompressor_) + { + libdeflate_free_decompressor(decompressor_); + } + } + template void decompress(OutputType& output, const char* data, std::size_t size) const { - z_stream inflate_s; - - inflate_s.zalloc = Z_NULL; - inflate_s.zfree = Z_NULL; - inflate_s.opaque = Z_NULL; - inflate_s.avail_in = 0; - inflate_s.next_in = Z_NULL; - - // The windowBits parameter is the base two logarithm of the window size (the size of the history buffer). - // It should be in the range 8..15 for this version of the library. - // Larger values of this parameter result in better compression at the expense of memory usage. - // This range of values also changes the decoding type: - // -8 to -15 for raw deflate - // 8 to 15 for zlib - // (8 to 15) + 16 for gzip - // (8 to 15) + 32 to automatically detect gzip/zlib header - constexpr int window_bits = 15 + 32; // auto with windowbits of 15 - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wold-style-cast" - if (inflateInit2(&inflate_s, window_bits) != Z_OK) - { - throw std::runtime_error("inflate init failed"); - } -#pragma GCC diagnostic pop - inflate_s.next_in = reinterpret_cast(data); #ifdef DEBUG // Verify if size (long type) input will fit into unsigned int, type used for zlib's avail_in std::uint64_t size_64 = size * 2; if (size_64 > std::numeric_limits::max()) { - inflateEnd(&inflate_s); throw std::runtime_error("size arg is too large to fit into unsigned int type x2"); } #endif if (size > max_ || (size * 2) > max_) { - inflateEnd(&inflate_s); throw std::runtime_error("size may use more memory than intended when decompressing"); } - inflate_s.avail_in = static_cast(size); - std::size_t size_uncompressed = 0; - do - { - std::size_t resize_to = size_uncompressed + 2 * size; - if (resize_to > max_) - { - inflateEnd(&inflate_s); - throw std::runtime_error("size of output string will use more memory then intended when decompressing"); - } - output.resize(resize_to); - inflate_s.avail_out = static_cast(2 * size); - inflate_s.next_out = reinterpret_cast(&output[0] + size_uncompressed); - int ret = inflate(&inflate_s, Z_FINISH); - if (ret != Z_STREAM_END && ret != Z_OK && ret != Z_BUF_ERROR) - { - std::string error_msg = inflate_s.msg; - inflateEnd(&inflate_s); - throw std::runtime_error(error_msg); - } - size_uncompressed += (2 * size - inflate_s.avail_out); - } while (inflate_s.avail_out == 0); - inflateEnd(&inflate_s); - output.resize(size_uncompressed); + // https://github.com/kaorimatz/libdeflate-ruby/blob/0e33da96cdaad3162f03ec924b25b2f4f2847538/ext/libdeflate/libdeflate_ext.c#L340 + // https://github.com/ebiggers/libdeflate/commit/5a9d25a8922e2d74618fba96e56db4fe145510f4 + std::size_t actual_size; + std::size_t uncompressed_size_guess = size * 2; + output.resize(uncompressed_size_guess); + enum libdeflate_result result = libdeflate_gzip_decompress(decompressor_, + data, + size, + static_cast(&output[0]), + uncompressed_size_guess, &actual_size); + if (result == LIBDEFLATE_INSUFFICIENT_SPACE) { + throw std::runtime_error("no space: did not succeed"); + } + else if (result == LIBDEFLATE_SHORT_OUTPUT) { + throw std::runtime_error("short output: did not succeed"); + } + else if (result == LIBDEFLATE_BAD_DATA) { + throw std::runtime_error("bad data: did not succeed"); + } + else if (result != LIBDEFLATE_SUCCESS) { + throw std::runtime_error("did not succeed"); + } + output.resize(actual_size); } }; diff --git a/test/test_io.cpp b/test/test_io.cpp index a245a11..f960c9f 100644 --- a/test/test_io.cpp +++ b/test/test_io.cpp @@ -90,7 +90,7 @@ TEST_CASE("round trip compression - gzip") SECTION("no compression") { - int level = Z_NO_COMPRESSION; + int level = 0; std::string compressed_data = gzip::compress(data.data(), data.size()); CHECK(gzip::is_compressed(compressed_data.data(), compressed_data.size())); std::string new_data = gzip::decompress(compressed_data.data(), compressed_data.size()); @@ -99,7 +99,7 @@ TEST_CASE("round trip compression - gzip") SECTION("default compression level") { - int level = Z_DEFAULT_COMPRESSION; + int level = 6; std::string compressed_data = gzip::compress(data.data(), data.size()); CHECK(gzip::is_compressed(compressed_data.data(), compressed_data.size())); std::string new_data = gzip::decompress(compressed_data.data(), compressed_data.size()); @@ -108,7 +108,7 @@ TEST_CASE("round trip compression - gzip") SECTION("compression level -- min to max") { - for (int level = Z_BEST_SPEED; level <= Z_BEST_COMPRESSION; ++level) + for (int level = 1; level <= 9; ++level) { std::string compressed_data = gzip::compress(data.data(), data.size()); CHECK(gzip::is_compressed(compressed_data.data(), compressed_data.size())); From 17e9ace00211eac31c20f31e16c858822fbfbb8b Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Tue, 3 Apr 2018 15:42:16 -0700 Subject: [PATCH 02/18] make format --- include/gzip/compress.hpp | 21 ++++++++++++--------- include/gzip/decompress.hpp | 37 +++++++++++++++++++++---------------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/include/gzip/compress.hpp b/include/gzip/compress.hpp index b02301d..78b5177 100644 --- a/include/gzip/compress.hpp +++ b/include/gzip/compress.hpp @@ -14,7 +14,7 @@ class Compressor { std::size_t max_; int level_; - struct libdeflate_compressor * compressor_ = nullptr; + struct libdeflate_compressor* compressor_ = nullptr; public: Compressor(int level = 6, @@ -23,7 +23,8 @@ class Compressor level_(level) { compressor_ = libdeflate_alloc_compressor(level_); - if (!compressor_) { + if (!compressor_) + { throw std::runtime_error("libdeflate_alloc_compressor failed"); } } @@ -54,18 +55,20 @@ class Compressor throw std::runtime_error("size may use more memory than intended when decompressing"); } - std::size_t max_compressed_size = libdeflate_gzip_compress_bound(compressor_,size); + std::size_t max_compressed_size = libdeflate_gzip_compress_bound(compressor_, size); // TODO: sanity check this before allocating - if (max_compressed_size > output.size()) { + if (max_compressed_size > output.size()) + { output.resize(max_compressed_size); } std::size_t actual_compressed_size = libdeflate_gzip_compress(compressor_, - data, - size, - reinterpret_cast(&output[0]), - max_compressed_size); - if (actual_compressed_size == 0) { + data, + size, + reinterpret_cast(&output[0]), + max_compressed_size); + if (actual_compressed_size == 0) + { throw std::runtime_error("actual_compressed_size 0"); } output.resize(actual_compressed_size); diff --git a/include/gzip/decompress.hpp b/include/gzip/decompress.hpp index 86735b6..0a37367 100644 --- a/include/gzip/decompress.hpp +++ b/include/gzip/decompress.hpp @@ -13,25 +13,26 @@ namespace gzip { class Decompressor { std::size_t max_; - struct libdeflate_decompressor * decompressor_ = nullptr; + struct libdeflate_decompressor* decompressor_ = nullptr; public: Decompressor(std::size_t max_bytes = 1000000000) // by default refuse operation if compressed data is > 1GB : max_(max_bytes) - { + { decompressor_ = libdeflate_alloc_decompressor(); - if (!decompressor_) { + if (!decompressor_) + { throw std::runtime_error("libdeflate_alloc_decompressor failed"); } } ~Decompressor() { - if (decompressor_) - { - libdeflate_free_decompressor(decompressor_); - } - } + if (decompressor_) + { + libdeflate_free_decompressor(decompressor_); + } + } template void decompress(OutputType& output, @@ -58,20 +59,24 @@ class Decompressor std::size_t uncompressed_size_guess = size * 2; output.resize(uncompressed_size_guess); enum libdeflate_result result = libdeflate_gzip_decompress(decompressor_, - data, - size, - static_cast(&output[0]), - uncompressed_size_guess, &actual_size); - if (result == LIBDEFLATE_INSUFFICIENT_SPACE) { + data, + size, + static_cast(&output[0]), + uncompressed_size_guess, &actual_size); + if (result == LIBDEFLATE_INSUFFICIENT_SPACE) + { throw std::runtime_error("no space: did not succeed"); } - else if (result == LIBDEFLATE_SHORT_OUTPUT) { + else if (result == LIBDEFLATE_SHORT_OUTPUT) + { throw std::runtime_error("short output: did not succeed"); } - else if (result == LIBDEFLATE_BAD_DATA) { + else if (result == LIBDEFLATE_BAD_DATA) + { throw std::runtime_error("bad data: did not succeed"); } - else if (result != LIBDEFLATE_SUCCESS) { + else if (result != LIBDEFLATE_SUCCESS) + { throw std::runtime_error("did not succeed"); } output.resize(actual_size); From a2fcbdbfff2c2d80412dceb67e8d4ca76f7ae60c Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Fri, 27 Jul 2018 19:49:12 -0700 Subject: [PATCH 03/18] need more space for vector tiles --- include/gzip/decompress.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/gzip/decompress.hpp b/include/gzip/decompress.hpp index 0a37367..2c72976 100644 --- a/include/gzip/decompress.hpp +++ b/include/gzip/decompress.hpp @@ -56,7 +56,7 @@ class Decompressor // https://github.com/kaorimatz/libdeflate-ruby/blob/0e33da96cdaad3162f03ec924b25b2f4f2847538/ext/libdeflate/libdeflate_ext.c#L340 // https://github.com/ebiggers/libdeflate/commit/5a9d25a8922e2d74618fba96e56db4fe145510f4 std::size_t actual_size; - std::size_t uncompressed_size_guess = size * 2; + std::size_t uncompressed_size_guess = size * 3; output.resize(uncompressed_size_guess); enum libdeflate_result result = libdeflate_gzip_decompress(decompressor_, data, From 5c0b86f578aff2a2cf5b129ddaee23dcf06bef21 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Fri, 27 Jul 2018 20:08:00 -0700 Subject: [PATCH 04/18] need more space to avoid 'no space: did not succeed' error with mapbox/vtcomposite#83 --- include/gzip/decompress.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/gzip/decompress.hpp b/include/gzip/decompress.hpp index 2c72976..badd315 100644 --- a/include/gzip/decompress.hpp +++ b/include/gzip/decompress.hpp @@ -56,7 +56,7 @@ class Decompressor // https://github.com/kaorimatz/libdeflate-ruby/blob/0e33da96cdaad3162f03ec924b25b2f4f2847538/ext/libdeflate/libdeflate_ext.c#L340 // https://github.com/ebiggers/libdeflate/commit/5a9d25a8922e2d74618fba96e56db4fe145510f4 std::size_t actual_size; - std::size_t uncompressed_size_guess = size * 3; + std::size_t uncompressed_size_guess = size * 4; output.resize(uncompressed_size_guess); enum libdeflate_result result = libdeflate_gzip_decompress(decompressor_, data, From 3cb55a86ee26f968db4ec5c5d9d1df4ff8c9e000 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Thu, 27 Sep 2018 11:10:08 +0100 Subject: [PATCH 05/18] use `data()` to access pointer to the first element of a container (c++11) --- include/gzip/compress.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/gzip/compress.hpp b/include/gzip/compress.hpp index 78b5177..5d22cae 100644 --- a/include/gzip/compress.hpp +++ b/include/gzip/compress.hpp @@ -65,7 +65,7 @@ class Compressor std::size_t actual_compressed_size = libdeflate_gzip_compress(compressor_, data, size, - reinterpret_cast(&output[0]), + const_cast(output.data()), max_compressed_size); if (actual_compressed_size == 0) { From 32f4c392b657d205b52c134b3a72757119873459 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Thu, 27 Sep 2018 11:13:35 +0100 Subject: [PATCH 06/18] Iteratively grow output buffer capacity if "guessed" size is not sufficient + use `data()` --- include/gzip/decompress.hpp | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/include/gzip/decompress.hpp b/include/gzip/decompress.hpp index badd315..627e670 100644 --- a/include/gzip/decompress.hpp +++ b/include/gzip/decompress.hpp @@ -57,17 +57,23 @@ class Decompressor // https://github.com/ebiggers/libdeflate/commit/5a9d25a8922e2d74618fba96e56db4fe145510f4 std::size_t actual_size; std::size_t uncompressed_size_guess = size * 4; - output.resize(uncompressed_size_guess); - enum libdeflate_result result = libdeflate_gzip_decompress(decompressor_, - data, - size, - static_cast(&output[0]), - uncompressed_size_guess, &actual_size); - if (result == LIBDEFLATE_INSUFFICIENT_SPACE) + output.reserve(uncompressed_size_guess); + enum libdeflate_result result; + for (;;) { - throw std::runtime_error("no space: did not succeed"); + result = libdeflate_gzip_decompress(decompressor_, + data, + size, + const_cast(output.data()), + output.capacity(), &actual_size); + if (result != LIBDEFLATE_INSUFFICIENT_SPACE) + { + break; + } + output.reserve((output.capacity() << 1) - output.size()); } - else if (result == LIBDEFLATE_SHORT_OUTPUT) + + if (result == LIBDEFLATE_SHORT_OUTPUT) { throw std::runtime_error("short output: did not succeed"); } From da1d85cceaad9c2863d4027115f2fd42f7e706ea Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Thu, 27 Sep 2018 14:46:33 +0100 Subject: [PATCH 07/18] set file size limit to ~500Mb --- test/test_io.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_io.cpp b/test/test_io.cpp index f960c9f..4a9bd9c 100644 --- a/test/test_io.cpp +++ b/test/test_io.cpp @@ -130,7 +130,7 @@ TEST_CASE("test decompression size limit") std::istreambuf_iterator()); stream.close(); - std::size_t limit = 20 * 1024 * 1024; // 20 Mb + std::size_t limit = 500 * 1024 * 1024; // 500 Mb // file should be about 500 mb uncompressed gzip::Decompressor decomp(limit); std::string output; From 72ca10530939de11fd7ad098eece90cbfa35a32f Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Thu, 27 Sep 2018 14:48:02 +0100 Subject: [PATCH 08/18] use `resize` to grow output buffer + remove bogus code and apply max size logic to output buffer + throw an exception if request to grow output buffer exceeds max_size (default 2GB). --- include/gzip/decompress.hpp | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/include/gzip/decompress.hpp b/include/gzip/decompress.hpp index 627e670..f631719 100644 --- a/include/gzip/decompress.hpp +++ b/include/gzip/decompress.hpp @@ -12,11 +12,11 @@ namespace gzip { class Decompressor { - std::size_t max_; + std::size_t const max_; struct libdeflate_decompressor* decompressor_ = nullptr; public: - Decompressor(std::size_t max_bytes = 1000000000) // by default refuse operation if compressed data is > 1GB + Decompressor(std::size_t max_bytes = 2147483648u) // by default refuse operation if required uutput buffer is > 2GB : max_(max_bytes) { decompressor_ = libdeflate_alloc_decompressor(); @@ -36,28 +36,14 @@ class Decompressor template void decompress(OutputType& output, - const char* data, + char const* data, std::size_t size) const { - -#ifdef DEBUG - // Verify if size (long type) input will fit into unsigned int, type used for zlib's avail_in - std::uint64_t size_64 = size * 2; - if (size_64 > std::numeric_limits::max()) - { - throw std::runtime_error("size arg is too large to fit into unsigned int type x2"); - } -#endif - if (size > max_ || (size * 2) > max_) - { - throw std::runtime_error("size may use more memory than intended when decompressing"); - } - // https://github.com/kaorimatz/libdeflate-ruby/blob/0e33da96cdaad3162f03ec924b25b2f4f2847538/ext/libdeflate/libdeflate_ext.c#L340 // https://github.com/ebiggers/libdeflate/commit/5a9d25a8922e2d74618fba96e56db4fe145510f4 std::size_t actual_size; - std::size_t uncompressed_size_guess = size * 4; - output.reserve(uncompressed_size_guess); + std::size_t uncompressed_size_guess = std::min(size * 4, max_); + output.resize(uncompressed_size_guess); enum libdeflate_result result; for (;;) { @@ -65,12 +51,17 @@ class Decompressor data, size, const_cast(output.data()), - output.capacity(), &actual_size); + output.size(), &actual_size); if (result != LIBDEFLATE_INSUFFICIENT_SPACE) { break; } - output.reserve((output.capacity() << 1) - output.size()); + std::size_t new_size = (output.capacity() << 1) - output.size(); + if (new_size > max_) + { + throw std::runtime_error("request to resize output buffer exceeded maximum limit"); + } + output.resize(new_size); } if (result == LIBDEFLATE_SHORT_OUTPUT) From 16c669ce1c278fef1cdc4ec6709afa5f135b74a2 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Thu, 27 Sep 2018 15:12:02 +0100 Subject: [PATCH 09/18] improve output buffer growth logic by ensuring size never exceed max allowed. --- include/gzip/decompress.hpp | 6 +++--- test/test_io.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/gzip/decompress.hpp b/include/gzip/decompress.hpp index f631719..5292e90 100644 --- a/include/gzip/decompress.hpp +++ b/include/gzip/decompress.hpp @@ -56,11 +56,11 @@ class Decompressor { break; } - std::size_t new_size = (output.capacity() << 1) - output.size(); - if (new_size > max_) + if (output.size() == max_) { - throw std::runtime_error("request to resize output buffer exceeded maximum limit"); + throw std::runtime_error("request to resize output buffer can't exceed maximum limit"); } + std::size_t new_size = std::min((output.capacity() << 1) - output.size(), max_); output.resize(new_size); } diff --git a/test/test_io.cpp b/test/test_io.cpp index 4a9bd9c..b322ad4 100644 --- a/test/test_io.cpp +++ b/test/test_io.cpp @@ -135,5 +135,5 @@ TEST_CASE("test decompression size limit") gzip::Decompressor decomp(limit); std::string output; CHECK_THROWS(decomp.decompress(output, str_compressed.data(), str_compressed.size())); - CHECK(output.size() < limit); + CHECK(output.size() <= limit); } From 864905e937fad5d444058d4a49595f5f373e2636 Mon Sep 17 00:00:00 2001 From: Dane Springmeyer Date: Fri, 28 Sep 2018 10:08:11 -0700 Subject: [PATCH 10/18] bump to see if travis now works again From 4b7ec6ff28a70cd0b476fb2b19ae0c9b0441adf8 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Wed, 3 Oct 2018 10:47:06 +0100 Subject: [PATCH 11/18] Update libdeflate to version 1.0 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4b5fc69..41dcb42 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,7 +35,7 @@ include_directories(SYSTEM ${MASON_PACKAGE_catch_INCLUDE_DIRS}) mason_use(benchmark VERSION 1.3.0) include_directories(SYSTEM ${MASON_PACKAGE_benchmark_INCLUDE_DIRS}) -mason_use(libdeflate VERSION e9d1014) +mason_use(libdeflate VERSION 1.0) include_directories(SYSTEM ${MASON_PACKAGE_libdeflate_INCLUDE_DIRS}) include_directories("${PROJECT_SOURCE_DIR}/include") From d9e64d130f6dc1045973c4e099367a5a793eea8c Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Wed, 3 Oct 2018 10:58:20 +0100 Subject: [PATCH 12/18] remove zlib specific test case --- test/test_io.cpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/test/test_io.cpp b/test/test_io.cpp index b322ad4..73efea1 100644 --- a/test/test_io.cpp +++ b/test/test_io.cpp @@ -49,23 +49,6 @@ TEST_CASE("successful decompress - pointer") REQUIRE(data == value); } -#ifdef DEBUG -TEST_CASE("fail decompress - pointer, debug throws int overflow") -{ - std::string data = "hello hello hello hello"; - const char* pointer = data.data(); - std::string compressed_data = gzip::compress(pointer, data.size()); - const char* compressed_pointer = compressed_data.data(); - - // numeric_limit useful for integer conversion - unsigned int i = std::numeric_limits::max(); - // turn int i into a long, so we can add to it safely without overflow - unsigned long l = static_cast(i) + 1; - - CHECK_THROWS_WITH(gzip::decompress(compressed_pointer, l), Catch::Contains("size arg is too large to fit into unsigned int type x2")); -} -#endif - TEST_CASE("invalid decompression") { std::string data("this is a string that should be compressed data"); From abfdc494c7bc242391831dea3e04274a7b04da12 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Wed, 3 Oct 2018 11:30:49 +0100 Subject: [PATCH 13/18] Use meaningful names for template parameters (#27) --- include/gzip/compress.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/gzip/compress.hpp b/include/gzip/compress.hpp index 5d22cae..caec978 100644 --- a/include/gzip/compress.hpp +++ b/include/gzip/compress.hpp @@ -37,9 +37,9 @@ class Compressor } } - template - void compress(InputType& output, - const char* data, + template + void compress(OutputType& output, + char const* data, std::size_t size) const { From bd480a6dfbddf5ec9b970f740078f947ad1dca55 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Wed, 3 Oct 2018 11:31:58 +0100 Subject: [PATCH 14/18] remove remaining gzip specific validation and test. --- include/gzip/compress.hpp | 8 -------- test/test_io.cpp | 14 -------------- 2 files changed, 22 deletions(-) diff --git a/include/gzip/compress.hpp b/include/gzip/compress.hpp index caec978..59b9e14 100644 --- a/include/gzip/compress.hpp +++ b/include/gzip/compress.hpp @@ -42,14 +42,6 @@ class Compressor char const* data, std::size_t size) const { - -#ifdef DEBUG - // Verify if size input will fit into unsigned int, type used for zlib's avail_in - if (size > std::numeric_limits::max()) - { - throw std::runtime_error("size arg is too large to fit into unsigned int type"); - } -#endif if (size > max_) { throw std::runtime_error("size may use more memory than intended when decompressing"); diff --git a/test/test_io.cpp b/test/test_io.cpp index 73efea1..a3989b9 100644 --- a/test/test_io.cpp +++ b/test/test_io.cpp @@ -25,20 +25,6 @@ TEST_CASE("fail compress - throws max size limit") CHECK_THROWS_WITH(gzip::compress(pointer, l), Catch::Contains("size may use more memory than intended when decompressing")); } -#ifdef DEBUG -TEST_CASE("fail compress - pointer, debug throws int overflow") -{ - std::string data = "hello hello hello hello"; - const char* pointer = data.data(); - // numeric_limit useful for integer conversion - unsigned int i = std::numeric_limits::max(); - // turn int i into a long, so we can add to it safely without overflow - unsigned long l = static_cast(i) + 1; - - CHECK_THROWS_WITH(gzip::compress(pointer, l), Catch::Contains("size arg is too large to fit into unsigned int type")); -} -#endif - TEST_CASE("successful decompress - pointer") { std::string data = "hello hello hello hello"; From 487927cff6be975919d36bd031b97cb28d0623fe Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Wed, 3 Oct 2018 11:34:07 +0100 Subject: [PATCH 15/18] remove useless SECTION (#27) --- test/test_io.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/test/test_io.cpp b/test/test_io.cpp index a3989b9..39d0181 100644 --- a/test/test_io.cpp +++ b/test/test_io.cpp @@ -6,13 +6,9 @@ TEST_CASE("successful compress") { std::string data = "hello hello hello hello"; - - SECTION("pointer") - { - const char* pointer = data.data(); - std::string value = gzip::compress(pointer, data.size()); - REQUIRE(!value.empty()); - } + const char* pointer = data.data(); + std::string value = gzip::compress(pointer, data.size()); + REQUIRE(!value.empty()); } TEST_CASE("fail compress - throws max size limit") From 4e284d0c17728889f72a56852f2ec820d75fe130 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Wed, 3 Oct 2018 11:38:19 +0100 Subject: [PATCH 16/18] make `is_compressed` noexcept (#27) --- include/gzip/utils.hpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/gzip/utils.hpp b/include/gzip/utils.hpp index db123d1..82cabae 100644 --- a/include/gzip/utils.hpp +++ b/include/gzip/utils.hpp @@ -5,18 +5,18 @@ namespace gzip { // These live in gzip.hpp because it doesnt need to use deps. // Otherwise, they would need to live in impl files if these methods used // zlib structures or functions like inflate/deflate) -inline bool is_compressed(const char* data, std::size_t size) +inline bool is_compressed(const char* data, std::size_t size) noexcept { return size > 2 && - ( - // zlib - ( - static_cast(data[0]) == 0x78 && - (static_cast(data[1]) == 0x9C || - static_cast(data[1]) == 0x01 || - static_cast(data[1]) == 0xDA || - static_cast(data[1]) == 0x5E)) || - // gzip - (static_cast(data[0]) == 0x1F && static_cast(data[1]) == 0x8B)); + ( + // zlib + ( + static_cast(data[0]) == 0x78 && + (static_cast(data[1]) == 0x9C || + static_cast(data[1]) == 0x01 || + static_cast(data[1]) == 0xDA || + static_cast(data[1]) == 0x5E)) || + // gzip + (static_cast(data[0]) == 0x1F && static_cast(data[1]) == 0x8B)); } } // namespace gzip From 15205f2c4b954135a89ec1653ff7cd29673b9b07 Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Wed, 3 Oct 2018 12:18:36 +0100 Subject: [PATCH 17/18] format --- include/gzip/utils.hpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/gzip/utils.hpp b/include/gzip/utils.hpp index 82cabae..affcd64 100644 --- a/include/gzip/utils.hpp +++ b/include/gzip/utils.hpp @@ -8,15 +8,15 @@ namespace gzip { inline bool is_compressed(const char* data, std::size_t size) noexcept { return size > 2 && - ( - // zlib - ( - static_cast(data[0]) == 0x78 && - (static_cast(data[1]) == 0x9C || - static_cast(data[1]) == 0x01 || - static_cast(data[1]) == 0xDA || - static_cast(data[1]) == 0x5E)) || - // gzip - (static_cast(data[0]) == 0x1F && static_cast(data[1]) == 0x8B)); + ( + // zlib + ( + static_cast(data[0]) == 0x78 && + (static_cast(data[1]) == 0x9C || + static_cast(data[1]) == 0x01 || + static_cast(data[1]) == 0xDA || + static_cast(data[1]) == 0x5E)) || + // gzip + (static_cast(data[0]) == 0x1F && static_cast(data[1]) == 0x8B)); } } // namespace gzip From b4684ca0458dee5d99b095ad2f22bc9f600ed32b Mon Sep 17 00:00:00 2001 From: Artem Pavlenko Date: Tue, 9 Oct 2018 10:10:33 +0100 Subject: [PATCH 18/18] Make Compressor/Decompressor noncopyable --- include/gzip/compress.hpp | 3 +++ include/gzip/decompress.hpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/include/gzip/compress.hpp b/include/gzip/compress.hpp index 59b9e14..1ff90dd 100644 --- a/include/gzip/compress.hpp +++ b/include/gzip/compress.hpp @@ -15,6 +15,9 @@ class Compressor std::size_t max_; int level_; struct libdeflate_compressor* compressor_ = nullptr; + // make noncopyable + Compressor(Compressor const&) = delete; + Compressor& operator=(Compressor const&) = delete; public: Compressor(int level = 6, diff --git a/include/gzip/decompress.hpp b/include/gzip/decompress.hpp index 5292e90..2efc214 100644 --- a/include/gzip/decompress.hpp +++ b/include/gzip/decompress.hpp @@ -14,6 +14,9 @@ class Decompressor { std::size_t const max_; struct libdeflate_decompressor* decompressor_ = nullptr; + // make noncopyable + Decompressor(Decompressor const&) = delete; + Decompressor& operator=(Decompressor const&) = delete; public: Decompressor(std::size_t max_bytes = 2147483648u) // by default refuse operation if required uutput buffer is > 2GB