From 1af3f0d475be12306bcc7cb540382724e8543973 Mon Sep 17 00:00:00 2001 From: nhz2 Date: Fri, 7 Mar 2025 10:12:22 -0500 Subject: [PATCH 1/3] Add ZstdCompressor --- Project.toml | 2 ++ docs/src/reference.md | 2 +- src/Compressors/Compressors.jl | 1 + src/Compressors/zstd.jl | 49 ++++++++++++++++++++++++++++++++++ test/python.jl | 6 +++-- 5 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 src/Compressors/zstd.jl diff --git a/Project.toml b/Project.toml index 8303d7b..54db8d2 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.9.4" [deps] AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95" Blosc = "a74b3585-a348-5f62-a45c-50e91977d574" +ChunkCodecLibZstd = "55437552-ac27-4d47-9aa3-63184e8fd398" CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" DateTimes64 = "b342263e-b350-472a-b1a9-8dfd21b51589" @@ -22,6 +23,7 @@ ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c" [compat] AWSS3 = "0.10, 0.11" Blosc = "0.5, 0.6, 0.7" +ChunkCodecLibZstd = "0.1.1" CodecZlib = "0.6, 0.7" DataStructures = "0.17, 0.18" DateTimes64 = "1" diff --git a/docs/src/reference.md b/docs/src/reference.md index 4cf889a..7d0e31c 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -18,5 +18,5 @@ Pages = ["ZGroup.jl"] ```@autodocs Modules = [Zarr] -Pages = ["Compressors/Compressors.jl", "Compressors/blosc.jl", "Compressors/zlib.jl"] +Pages = ["Compressors/Compressors.jl", "Compressors/blosc.jl", "Compressors/zlib.jl", "Compressors/zstd.jl"] ``` diff --git a/src/Compressors/Compressors.jl b/src/Compressors/Compressors.jl index e676c95..1854128 100644 --- a/src/Compressors/Compressors.jl +++ b/src/Compressors/Compressors.jl @@ -48,6 +48,7 @@ const compressortypes = Dict{Union{String,Nothing}, Type{<: Compressor}}() # Include the compressor implementations include("blosc.jl") include("zlib.jl") +include("zstd.jl") # ## Fallback definitions for the compressor interface # Define fallbacks and generic methods for the compressor interface diff --git a/src/Compressors/zstd.jl b/src/Compressors/zstd.jl new file mode 100644 index 0000000..d852294 --- /dev/null +++ b/src/Compressors/zstd.jl @@ -0,0 +1,49 @@ +#= +# Zstd compression + +This file implements a Zstd compressor via ChunkCodecLibZstd.jl. + +=# + +using ChunkCodecLibZstd: ZstdEncodeOptions, encode, decode, ChunkCodecCore + + +""" + ZstdCompressor(;level=0, checksum=false) +Returns a `ZstdCompressor` struct that can serve as a Zarr array compressor. Keyword arguments are: +* `level=0`: the compression level, regular levels are 1 to 22, 0 is a special value for default, there are also even faster negative levels. +* `checksum=false`: flag to enable saving checksums. +""" +struct ZstdCompressor <: Compressor + config::ZstdEncodeOptions +end + +ZstdCompressor(;level=0, checksum::Bool=false) = ZstdCompressor(ZstdEncodeOptions(;compressionLevel=level, checksum)) + +function getCompressor(::Type{ZstdCompressor}, d::Dict) + ZstdCompressor(; + level=get(Returns(0), d, "level"), + checksum=Bool(get(Returns(false), d, "checksum")), + ) +end + +function zuncompress(a, ::ZstdCompressor, T) + result = decode(z.config.codec, a) + _reinterpret(Base.nonmissingtype(T),result) +end + +function zuncompress!(data::DenseArray, compressed, z::ZstdCompressor) + dst = reinterpret(UInt8, vec(data)) + n = length(dst) + n_decoded = something(ChunkCodecCore.try_decode!(z.config.codec, dst, compressed))::Int64 + n_decoded == n || error("expected to decode $n bytes, only got $n_decoded bytes") + data +end + +function zcompress(a, z::ZstdCompressor) + encode(z.config, reinterpret(UInt8, vec(a))) +end + +JSON.lower(z::ZstdCompressor) = Dict("id"=>"zstd", "level" => z.config.compressionLevel, "checksum" => z.config.checksum) + +Zarr.compressortypes["zstd"] = ZstdCompressor \ No newline at end of file diff --git a/test/python.jl b/test/python.jl index 9eb9f4e..8160d01 100644 --- a/test/python.jl +++ b/test/python.jl @@ -21,7 +21,7 @@ groupattrs = Dict("String attribute"=>"One", "Int attribute"=>5, "Float attribut g = zgroup(pjulia,attrs=groupattrs) # Test all supported data types and compressors -import Zarr: NoCompressor, BloscCompressor, ZlibCompressor, MaxLengthString, +import Zarr: NoCompressor, BloscCompressor, ZlibCompressor, ZstdCompressor, MaxLengthString, Fletcher32Filter, FixedScaleOffsetFilter, ShuffleFilter, QuantizeFilter, DeltaFilter using Random: randstring numeric_dtypes = (UInt8, UInt16, UInt32, UInt64, @@ -38,7 +38,9 @@ compressors = ( "blosc_autoshuffle"=>BloscCompressor(cname="zstd",shuffle=-1), "blosc_noshuffle"=>BloscCompressor(cname="zstd",shuffle=0), "blosc_bitshuffle"=>BloscCompressor(cname="zstd",shuffle=2), - "zlib"=>ZlibCompressor()) + "zlib"=>ZlibCompressor(), + "zstd"=>ZstdCompressor(), +) filters = ( "fletcher32"=>Fletcher32Filter(), "scale_offset"=>FixedScaleOffsetFilter(offset=1000, scale=10^6, T=Float64, Tenc=Int32), From fcd82ab629c5752a8c425769a823d18b8c093d6d Mon Sep 17 00:00:00 2001 From: nhz2 Date: Fri, 7 Mar 2025 15:29:43 -0500 Subject: [PATCH 2/3] fix typo --- src/Compressors/zstd.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Compressors/zstd.jl b/src/Compressors/zstd.jl index d852294..0f28ec4 100644 --- a/src/Compressors/zstd.jl +++ b/src/Compressors/zstd.jl @@ -27,7 +27,7 @@ function getCompressor(::Type{ZstdCompressor}, d::Dict) ) end -function zuncompress(a, ::ZstdCompressor, T) +function zuncompress(a, z::ZstdCompressor, T) result = decode(z.config.codec, a) _reinterpret(Base.nonmissingtype(T),result) end From d5a86559883a154e6aecca5c674e3ec5baba991c Mon Sep 17 00:00:00 2001 From: nhz2 Date: Mon, 7 Apr 2025 15:06:03 -0400 Subject: [PATCH 3/3] Use decode! --- Project.toml | 6 ++++-- src/Compressors/zstd.jl | 10 +++------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/Project.toml b/Project.toml index 54db8d2..73bb5cf 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.9.4" [deps] AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95" Blosc = "a74b3585-a348-5f62-a45c-50e91977d574" +ChunkCodecCore = "0b6fb165-00bc-4d37-ab8b-79f91016dbe1" ChunkCodecLibZstd = "55437552-ac27-4d47-9aa3-63184e8fd398" CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" @@ -23,7 +24,8 @@ ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c" [compat] AWSS3 = "0.10, 0.11" Blosc = "0.5, 0.6, 0.7" -ChunkCodecLibZstd = "0.1.1" +ChunkCodecCore = "0.4.2" +ChunkCodecLibZstd = "0.1.2" CodecZlib = "0.6, 0.7" DataStructures = "0.17, 0.18" DateTimes64 = "1" @@ -34,4 +36,4 @@ OffsetArrays = "0.11, 1.0" OpenSSL = "1" URIs = "1" ZipArchives = "2" -julia = "1.2" +julia = "1.10" diff --git a/src/Compressors/zstd.jl b/src/Compressors/zstd.jl index 0f28ec4..84aae01 100644 --- a/src/Compressors/zstd.jl +++ b/src/Compressors/zstd.jl @@ -4,9 +4,8 @@ This file implements a Zstd compressor via ChunkCodecLibZstd.jl. =# - -using ChunkCodecLibZstd: ZstdEncodeOptions, encode, decode, ChunkCodecCore - +using ChunkCodecLibZstd: ZstdEncodeOptions +using ChunkCodecCore: encode, decode, decode! """ ZstdCompressor(;level=0, checksum=false) @@ -33,10 +32,7 @@ function zuncompress(a, z::ZstdCompressor, T) end function zuncompress!(data::DenseArray, compressed, z::ZstdCompressor) - dst = reinterpret(UInt8, vec(data)) - n = length(dst) - n_decoded = something(ChunkCodecCore.try_decode!(z.config.codec, dst, compressed))::Int64 - n_decoded == n || error("expected to decode $n bytes, only got $n_decoded bytes") + decode!(z.config.codec, reinterpret(UInt8, vec(data)), compressed) data end