diff --git a/src/ZipFile.jl b/src/ZipFile.jl index eb1b5df..afd440a 100644 --- a/src/ZipFile.jl +++ b/src/ZipFile.jl @@ -516,13 +516,7 @@ function update_reader!(f::ReadableFile, data::Array{UInt8}) f._zpos = position(f._io) - f._datapos datalen = length(data) f._pos += datalen - chunk_size = if Sys.WORD_SIZE > 32 2^31 else datalen end - start = 1 - while datalen > 0 - f._currentcrc32 = Zlib.crc32(view(data, start:start-1+min(datalen, chunk_size)), f._currentcrc32) - datalen -= chunk_size - start += chunk_size - end + f._currentcrc32 = Zlib.crc32(data, f._currentcrc32) if eof(f) if f.method == Deflate @@ -669,9 +663,8 @@ Base.readavailable(io::ZipFile.ReadableFile) = read(io) # Write nb elements located at p into f. function unsafe_write(f::WritableFile, p::Ptr{UInt8}, nb::UInt) - # zlib doesn't like 0 length writes if nb == 0 - return 0 + return UInt(0) end n = unsafe_write(f._zio, p, nb) @@ -679,7 +672,7 @@ function unsafe_write(f::WritableFile, p::Ptr{UInt8}, nb::UInt) error("short write") end - f.crc32 = Zlib.crc32(unsafe_wrap(Array, p, nb), f.crc32) + f.crc32 = Zlib.unsafe_crc32(p, nb, f.crc32) f.uncompressedsize += n n end diff --git a/src/Zlib.jl b/src/Zlib.jl index e7d1ca1..1174e61 100644 --- a/src/Zlib.jl +++ b/src/Zlib.jl @@ -122,15 +122,20 @@ end Writer(io::IO, raw::Bool=false) = Writer(io, 9, raw) -function write(w::Writer, p::Ptr, nb::Integer) +function Base.unsafe_write(w::Writer, p::Ptr{UInt8}, nb::UInt)::UInt + if nb == 0 + return UInt(0) + end + max_chunk_size::UInt = UInt(typemax(Cuint))>>1 + chunk_offset = UInt(0) + num_bytes_left = nb + chunk_size = min(max_chunk_size, num_bytes_left) + w.strm.avail_in = chunk_size w.strm.next_in = p - w.strm.avail_in = nb outbuf = Vector{UInt8}(undef, 1024) - GC.@preserve outbuf while true w.strm.avail_out = length(outbuf) w.strm.next_out = pointer(outbuf) - ret = ccall((:deflate, libz), Int32, (Ptr{z_stream}, Int32), Ref(w.strm), Z_NO_FLUSH) @@ -139,10 +144,21 @@ function write(w::Writer, p::Ptr, nb::Integer) end n = length(outbuf) - w.strm.avail_out - if n > 0 && write(w.io, outbuf[1:n]) != n + if n > 0 && write(w.io, view(outbuf,1:n)) != n error("short write") end - if w.strm.avail_out != 0 + # Update w.strm.avail_in if needed + if w.strm.avail_in == 0 + # mark that previous chunk was written + chunk_offset += chunk_size + num_bytes_left -= chunk_size + # new chunk size, will be zero at the end. + chunk_size = min(max_chunk_size, num_bytes_left) + @assert chunk_offset + chunk_size ≤ nb + w.strm.next_in = p + chunk_offset + w.strm.avail_in = chunk_size + end + if (w.strm.avail_out != 0) && (w.strm.avail_in == 0) break end end @@ -151,38 +167,8 @@ function write(w::Writer, p::Ptr, nb::Integer) nb end -function write(w::Writer, a::Array{UInt8}) - GC.@preserve a write(w, pointer(a), length(a)) -end - -# If this is not provided, Base.IO write methods will write -# arrays one element at a time. -function write(w::Writer, a::Array{T}) where T - if isbits(T) - GC.@preserve a write(w, pointer(a), length(a)*sizeof(T)) - else - invoke(write, Tuple{IO,Array}, w, a) - end -end - -# Copied from Julia base/io.jl -function write(w::Writer, a::SubArray{T,N,A}) where {T,N,A<:Array} - if !isbits(T) || stride(a,1)!=1 - return invoke(write, Tuple{Any,AbstractArray}, s, a) - end - colsz = size(a,1)*sizeof(T) - if N<=1 - return GC.@preserve a write(s, pointer(a, 1), colsz) - else - for idx in CartesianRange(tuple(1, size(a)[2:end]...)) - GC.@preserve a write(w, pointer(a, idx.I), colsz) - end - return colsz*Base.trailingsize(a,2) - end -end - function write(w::Writer, b::UInt8) - write(w, UInt8[b]) + write(w, Ref(b)) end function close(w::Writer) @@ -365,10 +351,17 @@ function eof(r::Reader) bytesavailable(r.buf) == 0 && eof(r.io) end -function crc32(data::AbstractArray{UInt8}, crc::Integer=0) - convert(UInt32, (ccall((:crc32, libz), - Culong, (Culong, Ptr{UInt8}, Cuint), - crc, data, length(data)))) +function unsafe_crc32(p::Ptr{UInt8}, nb::UInt, crc::UInt32)::UInt32 + ccall((:crc32_z, libz), + Culong, (Culong, Ptr{UInt8}, Csize_t), + crc, p, nb, + ) +end + +function crc32(data::AbstractArray{UInt8}, crc::Integer=0)::UInt32 + GC.@preserve data begin + unsafe_crc32(pointer(data), UInt(length(data)), UInt32(crc)) + end end crc32(data::AbstractString, crc::Integer=0) = crc32(convert(AbstractArray{UInt8}, data), crc) diff --git a/test/bigtests.jl b/test/bigtests.jl new file mode 100644 index 0000000..9a4f68c --- /dev/null +++ b/test/bigtests.jl @@ -0,0 +1,43 @@ +# These tests require over 8 GB of memory and a 64 bit Int + +using ZipFile +using Test + +@testset "big array with Zlib" begin + big_array = collect(1:2^29+2^25) + + io = IOBuffer() + w = ZipFile.Zlib.Writer(io, 1, true) + write(w, big_array) + close(w) + w = nothing + @info "done writing big_array" + seekstart(io) + r = ZipFile.Zlib.Reader(io, true) + buffer = zeros(Int, 2^22) + for bi in 1:(length(big_array)>>22) + read!(r, buffer) + @test ((bi-1)<<22+1):(bi<<22) == buffer + end + close(r) + r = nothing + close(io) + io = nothing + @info "done reading big_array" + + # Check that crc32 works + crc32_big::UInt32 = ZipFile.Zlib.crc32( + reinterpret(UInt8, big_array) + ) + crc32_parts::UInt32 = 0 + for bi in 1:(length(big_array)>>22) + crc32_parts = ZipFile.Zlib.crc32( + reinterpret(UInt8, view(big_array,((bi-1)<<22+1):(bi<<22))), + crc32_parts + ) + end + @test crc32_parts == crc32_big + + + big_array = nothing +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 5ce1221..5d83a8e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -182,4 +182,8 @@ if !Debug rm(tmp, recursive=true) end +if "bigtests" in ARGS + include("bigtests.jl") +end + println("done")