mapbox · springmeyer · Apr 3, 2018 · Apr 3, 2018 · Jul 28, 2018 · Jul 28, 2018
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -35,8 +35,8 @@ include_directories(SYSTEM ${MASON_PACKAGE_catch_INCLUDE_DIRS})
 mason_use(benchmark VERSION 1.3.0)
 include_directories(SYSTEM ${MASON_PACKAGE_benchmark_INCLUDE_DIRS})
 
-mason_use(zlib VERSION 1.2.8)
-include_directories(SYSTEM ${MASON_PACKAGE_zlib_INCLUDE_DIRS})
+mason_use(libdeflate VERSION 1.0)
+include_directories(SYSTEM ${MASON_PACKAGE_libdeflate_INCLUDE_DIRS})
 
 include_directories("${PROJECT_SOURCE_DIR}/include")
 
@@ -49,5 +49,5 @@ file(GLOB BENCH_SOURCES bench/*.cpp)
 add_executable(bench-tests ${BENCH_SOURCES})
 
 # link zlib static library to the unit-tests binary so the tests know where to find the zlib impl code
-target_link_libraries(unit-tests ${MASON_PACKAGE_zlib_STATIC_LIBS})
-target_link_libraries(bench-tests ${MASON_PACKAGE_benchmark_STATIC_LIBS} ${CMAKE_THREAD_LIBS_INIT} ${MASON_PACKAGE_zlib_STATIC_LIBS})
+target_link_libraries(unit-tests ${MASON_PACKAGE_libdeflate_STATIC_LIBS})
+target_link_libraries(bench-tests ${MASON_PACKAGE_benchmark_STATIC_LIBS} ${CMAKE_THREAD_LIBS_INIT} ${MASON_PACKAGE_libdeflate_STATIC_LIBS})
diff --git a/include/gzip/compress.hpp b/include/gzip/compress.hpp
@@ -1,7 +1,7 @@
 #include <gzip/config.hpp>
 
 // zlib
-#include <zlib.h>
+#include <libdeflate.h>
 
 // std
 #include <limits>
@@ -14,95 +14,65 @@ class Compressor
 {
     std::size_t max_;
     int level_;
+    struct libdeflate_compressor* compressor_ = nullptr;
+    // make noncopyable
+    Compressor(Compressor const&) = delete;
+    Compressor& operator=(Compressor const&) = delete;
 
   public:
-    Compressor(int level = Z_DEFAULT_COMPRESSION,
+    Compressor(int level = 6,
                std::size_t max_bytes = 2000000000) // by default refuse operation if uncompressed data is > 2GB
         : max_(max_bytes),
           level_(level)
     {
+        compressor_ = libdeflate_alloc_compressor(level_);
+        if (!compressor_)
+        {
+            throw std::runtime_error("libdeflate_alloc_compressor failed");
+        }
     }
 
-    template <typename InputType>
-    void compress(InputType& output,
-                  const char* data,
-                  std::size_t size) const
+    ~Compressor()
     {
-
-#ifdef DEBUG
-        // Verify if size input will fit into unsigned int, type used for zlib's avail_in
-        if (size > std::numeric_limits<unsigned int>::max())
+        if (compressor_)
         {
-            throw std::runtime_error("size arg is too large to fit into unsigned int type");
+            libdeflate_free_compressor(compressor_);
         }
-#endif
+    }
+
+    template <typename OutputType>
+    void compress(OutputType& output,
+                  char const* data,
+                  std::size_t size) const
+    {
         if (size > max_)
         {
             throw std::runtime_error("size may use more memory than intended when decompressing");
         }
 
-        z_stream deflate_s;
-        deflate_s.zalloc = Z_NULL;
-        deflate_s.zfree = Z_NULL;
-        deflate_s.opaque = Z_NULL;
-        deflate_s.avail_in = 0;
-        deflate_s.next_in = Z_NULL;
-
-        // The windowBits parameter is the base two logarithm of the window size (the size of the history buffer).
-        // It should be in the range 8..15 for this version of the library.
-        // Larger values of this parameter result in better compression at the expense of memory usage.
-        // This range of values also changes the decoding type:
-        //  -8 to -15 for raw deflate
-        //  8 to 15 for zlib
-        // (8 to 15) + 16 for gzip
-        // (8 to 15) + 32 to automatically detect gzip/zlib header (decompression/inflate only)
-        constexpr int window_bits = 15 + 16; // gzip with windowbits of 15
-
-        constexpr int mem_level = 8;
-        // The memory requirements for deflate are (in bytes):
-        // (1 << (window_bits+2)) +  (1 << (mem_level+9))
-        // with a default value of 8 for mem_level and our window_bits of 15
-        // this is 128Kb
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wold-style-cast"
-        if (deflateInit2(&deflate_s, level_, Z_DEFLATED, window_bits, mem_level, Z_DEFAULT_STRATEGY) != Z_OK)
+        std::size_t max_compressed_size = libdeflate_gzip_compress_bound(compressor_, size);
+        // TODO: sanity check this before allocating
+        if (max_compressed_size > output.size())
         {
-            throw std::runtime_error("deflate init failed");
+            output.resize(max_compressed_size);
         }
-#pragma GCC diagnostic pop
 
-        deflate_s.next_in = reinterpret_cast<z_const Bytef*>(data);
-        deflate_s.avail_in = static_cast<unsigned int>(size);
-
-        std::size_t size_compressed = 0;
-        do
+        std::size_t actual_compressed_size = libdeflate_gzip_compress(compressor_,
+                                                                      data,
+                                                                      size,
+                                                                      const_cast<char*>(output.data()),
+                                                                      max_compressed_size);
+        if (actual_compressed_size == 0)
         {
-            size_t increase = size / 2 + 1024;
-            if (output.size() < (size_compressed + increase))
-            {
-                output.resize(size_compressed + increase);
-            }
-            // There is no way we see that "increase" would not fit in an unsigned int,
-            // hence we use static cast here to avoid -Wshorten-64-to-32 error
-            deflate_s.avail_out = static_cast<unsigned int>(increase);
-            deflate_s.next_out = reinterpret_cast<Bytef*>((&output[0] + size_compressed));
-            // From http://www.zlib.net/zlib_how.html
-            // "deflate() has a return value that can indicate errors, yet we do not check it here.
-            // Why not? Well, it turns out that deflate() can do no wrong here."
-            // Basically only possible error is from deflateInit not working properly
-            deflate(&deflate_s, Z_FINISH);
-            size_compressed += (increase - deflate_s.avail_out);
-        } while (deflate_s.avail_out == 0);
-
-        deflateEnd(&deflate_s);
-        output.resize(size_compressed);
+            throw std::runtime_error("actual_compressed_size 0");
+        }
+        output.resize(actual_compressed_size);
     }
 };
 
 inline std::string compress(const char* data,
                             std::size_t size,
-                            int level = Z_DEFAULT_COMPRESSION)
+                            int level = 6)
 {
     Compressor comp(level);
     std::string output;

diff --git a/include/gzip/decompress.hpp b/include/gzip/decompress.hpp
@@ -1,7 +1,7 @@
 #include <gzip/config.hpp>
 
 // zlib
-#include <zlib.h>
+#include <libdeflate.h>
 
 // std
 #include <limits>
@@ -12,85 +12,74 @@ namespace gzip {
 
 class Decompressor
 {
-    std::size_t max_;
+    std::size_t const max_;
+    struct libdeflate_decompressor* decompressor_ = nullptr;
+    // make noncopyable
+    Decompressor(Decompressor const&) = delete;
+    Decompressor& operator=(Decompressor const&) = delete;
 
   public:
-    Decompressor(std::size_t max_bytes = 1000000000) // by default refuse operation if compressed data is > 1GB
+    Decompressor(std::size_t max_bytes = 2147483648u) // by default refuse operation if required uutput buffer is > 2GB
         : max_(max_bytes)
     {
+        decompressor_ = libdeflate_alloc_decompressor();
+        if (!decompressor_)
+        {
+            throw std::runtime_error("libdeflate_alloc_decompressor failed");
+        }
+    }
+
+    ~Decompressor()
+    {
+        if (decompressor_)
+        {
+            libdeflate_free_decompressor(decompressor_);
+        }
     }
 
     template <typename OutputType>
     void decompress(OutputType& output,
-                    const char* data,
+                    char const* data,
                     std::size_t size) const
     {
-        z_stream inflate_s;
-
-        inflate_s.zalloc = Z_NULL;
-        inflate_s.zfree = Z_NULL;
-        inflate_s.opaque = Z_NULL;
-        inflate_s.avail_in = 0;
-        inflate_s.next_in = Z_NULL;
-
-        // The windowBits parameter is the base two logarithm of the window size (the size of the history buffer).
-        // It should be in the range 8..15 for this version of the library.
-        // Larger values of this parameter result in better compression at the expense of memory usage.
-        // This range of values also changes the decoding type:
-        //  -8 to -15 for raw deflate
-        //  8 to 15 for zlib
-        // (8 to 15) + 16 for gzip
-        // (8 to 15) + 32 to automatically detect gzip/zlib header
-        constexpr int window_bits = 15 + 32; // auto with windowbits of 15
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wold-style-cast"
-        if (inflateInit2(&inflate_s, window_bits) != Z_OK)
+        // https://github.com/kaorimatz/libdeflate-ruby/blob/0e33da96cdaad3162f03ec924b25b2f4f2847538/ext/libdeflate/libdeflate_ext.c#L340
+        // https://github.com/ebiggers/libdeflate/commit/5a9d25a8922e2d74618fba96e56db4fe145510f4
+        std::size_t actual_size;
+        std::size_t uncompressed_size_guess = std::min(size * 4, max_);
+        output.resize(uncompressed_size_guess);
+        enum libdeflate_result result;
+        for (;;)
         {
-            throw std::runtime_error("inflate init failed");
+            result = libdeflate_gzip_decompress(decompressor_,
+                                                data,
+                                                size,
+                                                const_cast<char*>(output.data()),
+                                                output.size(), &actual_size);
+            if (result != LIBDEFLATE_INSUFFICIENT_SPACE)
+            {
+                break;
+            }
+            if (output.size() == max_)
+            {
+                throw std::runtime_error("request to resize output buffer can't exceed maximum limit");
+            }
+            std::size_t new_size = std::min((output.capacity() << 1) - output.size(), max_);
+            output.resize(new_size);
         }
-#pragma GCC diagnostic pop
-        inflate_s.next_in = reinterpret_cast<z_const Bytef*>(data);
 
-#ifdef DEBUG
-        // Verify if size (long type) input will fit into unsigned int, type used for zlib's avail_in
-        std::uint64_t size_64 = size * 2;
-        if (size_64 > std::numeric_limits<unsigned int>::max())
+        if (result == LIBDEFLATE_SHORT_OUTPUT)
         {
-            inflateEnd(&inflate_s);
-            throw std::runtime_error("size arg is too large to fit into unsigned int type x2");
+            throw std::runtime_error("short output: did not succeed");
         }
-#endif
-        if (size > max_ || (size * 2) > max_)
+        else if (result == LIBDEFLATE_BAD_DATA)
         {
-            inflateEnd(&inflate_s);
-            throw std::runtime_error("size may use more memory than intended when decompressing");
+            throw std::runtime_error("bad data: did not succeed");
         }
-        inflate_s.avail_in = static_cast<unsigned int>(size);
-        std::size_t size_uncompressed = 0;
-        do
+        else if (result != LIBDEFLATE_SUCCESS)
         {
-            std::size_t resize_to = size_uncompressed + 2 * size;
-            if (resize_to > max_)
-            {
-                inflateEnd(&inflate_s);
-                throw std::runtime_error("size of output string will use more memory then intended when decompressing");
-            }
-            output.resize(resize_to);
-            inflate_s.avail_out = static_cast<unsigned int>(2 * size);
-            inflate_s.next_out = reinterpret_cast<Bytef*>(&output[0] + size_uncompressed);
-            int ret = inflate(&inflate_s, Z_FINISH);
-            if (ret != Z_STREAM_END && ret != Z_OK && ret != Z_BUF_ERROR)
-            {
-                std::string error_msg = inflate_s.msg;
-                inflateEnd(&inflate_s);
-                throw std::runtime_error(error_msg);
-            }
-
-            size_uncompressed += (2 * size - inflate_s.avail_out);
-        } while (inflate_s.avail_out == 0);
-        inflateEnd(&inflate_s);
-        output.resize(size_uncompressed);
+            throw std::runtime_error("did not succeed");
+        }
+        output.resize(actual_size);
     }
 };
 

diff --git a/include/gzip/utils.hpp b/include/gzip/utils.hpp
@@ -5,7 +5,7 @@ namespace gzip {
 // These live in gzip.hpp because it doesnt need to use deps.
 // Otherwise, they would need to live in impl files if these methods used
 // zlib structures or functions like inflate/deflate)
-inline bool is_compressed(const char* data, std::size_t size)
+inline bool is_compressed(const char* data, std::size_t size) noexcept
 {
     return size > 2 &&
            (