From 29416b88a9771986ebd706c8a8a11e8f8fa4458f Mon Sep 17 00:00:00 2001 From: crusaderky Date: Fri, 17 Oct 2025 16:08:03 +0100 Subject: [PATCH] Increase code sanity and test coverage --- CMakeLists.txt | 21 +++++- pixi.toml | 10 ++- src/blosc_filter.c | 99 +++++++++++++++++---------- src/example.c | 34 ++++++---- src/test_array.c | 113 ++++++++++++++++++++++++++++++ src/test_compound.c | 151 +++++++++++++++++++++++++++++++++++++++++ src/test_strings.c | 133 ++++++++++++++++++++++++++++++++++++ src/test_tiny_chunks.c | 103 ++++++++++++++++++++++++++++ 8 files changed, 608 insertions(+), 56 deletions(-) create mode 100644 src/test_array.c create mode 100644 src/test_compound.c create mode 100644 src/test_strings.c create mode 100644 src/test_tiny_chunks.c diff --git a/CMakeLists.txt b/CMakeLists.txt index ff78664..8801942 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.5.0) +cmake_minimum_required(VERSION 3.10.0) cmake_policy(SET CMP0074 NEW) project(blosc_hdf5) include(ExternalProject) @@ -94,6 +94,23 @@ if(BUILD_TESTS) find_package(Threads REQUIRED) set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT}) add_executable(example src/example.c) + add_executable(test_array src/test_array.c) + add_executable(test_compound src/test_compound.c) + add_executable(test_strings src/test_strings.c) + add_executable(test_tiny_chunks src/test_tiny_chunks.c) target_link_libraries(example blosc_filter_shared ${HDF5_LIBRARIES} ${LIBS}) - add_test(test_hdf5_filter example) + target_link_libraries(test_array blosc_filter_shared ${HDF5_LIBRARIES} ${LIBS}) + target_link_libraries(test_compound blosc_filter_shared ${HDF5_LIBRARIES} ${LIBS}) + target_link_libraries(test_strings blosc_filter_shared ${HDF5_LIBRARIES} ${LIBS}) + target_link_libraries(test_tiny_chunks blosc_filter_shared ${HDF5_LIBRARIES} ${LIBS}) + add_test(NAME example[nelmts=0] COMMAND example) + add_test(NAME example[nelmts=4] COMMAND example 4) + add_test(NAME example[nelmts=5] COMMAND example 5) + add_test(NAME example[nelmts=6] COMMAND example 6) + add_test(NAME example[nelmts=7] COMMAND example 7) + add_test(NAME test_array COMMAND test_array) + add_test(NAME test_compound[le_BLOSC_MAX_TYPESIZE] COMMAND test_compound 255) + add_test(NAME test_compound[gt_BLOSC_MAX_TYPESIZE] COMMAND test_compound 256) + add_test(NAME test_strings COMMAND test_strings) + add_test(NAME test_tiny_chunks COMMAND test_tiny_chunks) endif(BUILD_TESTS) diff --git a/pixi.toml b/pixi.toml index 36389bf..08555b5 100644 --- a/pixi.toml +++ b/pixi.toml @@ -8,10 +8,16 @@ compilers = "*" cmake = "*" hdf5 = "*" +[activation.env] +# CFLAGS = "-O0 -g -DBLOSC_DEBUG" +# FIXME Debug hangs on Windows GitHub runners +# CMAKE_CONFIG = "Debug" +CMAKE_CONFIG = "Release" + [tasks] clean = "rm -rf build" mkdir-build = { cmd = "mkdir -p build" } build-thirdparty = { cwd = "build", cmd = "cmake .." } -build-hdf5-blosc = { cwd = "build", cmd = "cmake --build . --config Release" } +build-hdf5-blosc = { cwd = "build", cmd = "cmake --build . --config $CMAKE_CONFIG" } build = { depends-on = [ "mkdir-build", "build-thirdparty", "build-hdf5-blosc" ] } -test = { cwd = "build", cmd = "ctest --output-on-failure" } +test = { cwd = "build", cmd = "ctest --output-on-failure --build-config $CMAKE_CONFIG" } diff --git a/src/blosc_filter.c b/src/blosc_filter.c index a099e2d..325e930 100644 --- a/src/blosc_filter.c +++ b/src/blosc_filter.c @@ -11,6 +11,7 @@ */ +#include #include #include #include @@ -28,8 +29,6 @@ #define PUSH_ERR(func, minor, ...) H5Epush(H5E_DEFAULT, __FILE__, func, __LINE__, H5E_ERR_CLS, H5E_PLINE, minor, __VA_ARGS__) #endif /* defined(__GNUC__) */ -#define GET_FILTER(a, b, c, d, e, f, g) H5Pget_filter_by_id(a,b,c,d,e,f,g,NULL) - size_t blosc_filter(unsigned flags, size_t cd_nelmts, const unsigned cd_values[], size_t nbytes, @@ -79,23 +78,34 @@ herr_t blosc_set_local(hid_t dcpl, hid_t type, hid_t space) { int i; herr_t r; - unsigned int typesize, basetypesize; - unsigned int bufsize; + unsigned int typesize, chunksize, basetypesize; hsize_t chunkdims[32]; unsigned int flags; - size_t nelements = 8; - unsigned int values[] = {0, 0, 0, 0, 0, 0, 0, 0}; + size_t cd_nelmts = 8; + /* + * cd_values[0] = hdf5-blosc format version + * cd_values[1] = blosc format version + * cd_values[2] = typesize + * cd_values[3] = uncompressed chunk size (unused) + * cd_values[4] = compression level + * cd_values[5] = 0: shuffle not active, 1: shuffle active + * cd_values[6] = compressor, e.g. BLOSC_BLOSCLZ + * cd_values[7] = unused + */ + unsigned int cd_values[] = {0, 0, 0, 0, 0, 0, 0, 0}; hid_t super_type; H5T_class_t classt; - r = GET_FILTER(dcpl, FILTER_BLOSC, &flags, &nelements, values, 0, NULL); + r = H5Pget_filter_by_id( + dcpl, FILTER_BLOSC, &flags, &cd_nelmts, cd_values, 0, NULL, NULL + ); if (r < 0) return -1; - if (nelements < 4) nelements = 4; /* First 4 slots reserved. */ + if (cd_nelmts < 4) cd_nelmts = 4; /* First 4 slots reserved. */ /* Set Blosc info in first two slots */ - values[0] = FILTER_BLOSC_VERSION; - values[1] = BLOSC_VERSION_FORMAT; + cd_values[0] = FILTER_BLOSC_VERSION; + cd_values[1] = BLOSC_VERSION_FORMAT; ndims = H5Pget_chunk(dcpl, 32, chunkdims); if (ndims < 0) return -1; @@ -108,6 +118,7 @@ herr_t blosc_set_local(hid_t dcpl, hid_t type, hid_t space) { if (typesize == 0) return -1; /* Get the size of the base type, even for ARRAY types */ classt = H5Tget_class(type); + if (classt == H5T_NO_CLASS) return -1; if (classt == H5T_ARRAY) { /* Get the array base component */ super_type = H5Tget_super(type); @@ -120,22 +131,25 @@ herr_t blosc_set_local(hid_t dcpl, hid_t type, hid_t space) { /* Limit large typesizes (they are pretty expensive to shuffle and, in addition, Blosc does not handle typesizes larger than - 256 bytes). */ + 255 bytes). */ if (basetypesize > BLOSC_MAX_TYPESIZE) basetypesize = 1; - values[2] = basetypesize; + cd_values[2] = basetypesize; - /* Get the size of the chunk */ - bufsize = typesize; + /* Get the size of the chunk. This is unused by blosc_filter(). + It is retained for backward compatibility. + */ + chunksize = typesize; for (i = 0; i < ndims; i++) { - bufsize *= chunkdims[i]; + chunksize *= chunkdims[i]; } - values[3] = bufsize; + cd_values[3] = chunksize; #ifdef BLOSC_DEBUG - fprintf(stderr, "Blosc: Computed buffer size %d\n", bufsize); + fprintf(stderr, "Blosc: typesize=%d; chunksize=%d\n", + typesize, chunksize); #endif - r = H5Pmodify_filter(dcpl, FILTER_BLOSC, flags, nelements, values); + r = H5Pmodify_filter(dcpl, FILTER_BLOSC, flags, cd_nelmts, cd_values); if (r < 0) return -1; return 1; @@ -159,9 +173,15 @@ size_t blosc_filter(unsigned flags, size_t cd_nelmts, const char* complist; char errmsg[256]; + assert(cd_nelmts >= 4); + assert(cd_values[0] == FILTER_BLOSC_VERSION); + assert(cd_values[1] == BLOSC_VERSION_FORMAT); + assert(nbytes > 0); + assert(*buf_size >= nbytes); + /* Filter params that are always set */ typesize = cd_values[2]; /* The datatype size */ - outbuf_size = cd_values[3]; /* Precomputed buffer guess */ + assert(typesize > 0 && typesize <= BLOSC_MAX_TYPESIZE); /* Optional params */ if (cd_nelmts >= 5) { clevel = cd_values[4]; /* The compression level */ @@ -200,14 +220,14 @@ size_t blosc_filter(unsigned flags, size_t cd_nelmts, proceeds. */ - outbuf_size = (*buf_size); + outbuf_size = nbytes; #ifdef BLOSC_DEBUG - fprintf(stderr, "Blosc: Compress %zd chunk w/buffer %zd\n", - nbytes, outbuf_size); + fprintf(stderr, "Blosc: Compress %zd bytes chunk (typesize=%d)\n", + nbytes, typesize); #endif - outbuf = malloc(outbuf_size); + outbuf = malloc(nbytes); if (outbuf == NULL) { PUSH_ERR("blosc_filter", H5E_CALLBACK, @@ -218,29 +238,32 @@ size_t blosc_filter(unsigned flags, size_t cd_nelmts, blosc_set_compressor(compname); status = blosc_compress(clevel, doshuffle, typesize, nbytes, *buf, outbuf, nbytes); + if (status == 0) goto failed; /* compressed size > input size. This is OK. */ if (status < 0) { + /* Internal error */ PUSH_ERR("blosc_filter", H5E_CALLBACK, "Blosc compression error"); goto failed; } + assert((size_t)status <= nbytes); /* We're decompressing */ } else { /* declare dummy variables */ size_t cbytes, blocksize; - free(outbuf); - /* Extract the exact outbuf_size from the buffer header. * - * NOTE: the guess value got from "cd_values" corresponds to the - * uncompressed chunk size but it should not be used in a general - * cases since other filters in the pipeline can modify the buffere - * size. + * NOTE: cd_values[3] contains the uncompressed chunk size. + * It should not be used in general cases since other filters in the + * pipeline can modify the buffer size. */ blosc_cbuffer_sizes(*buf, &outbuf_size, &cbytes, &blocksize); + assert(cbytes == nbytes); #ifdef BLOSC_DEBUG - fprintf(stderr, "Blosc: Decompress %zd chunk w/buffer %zd\n", nbytes, outbuf_size); + fprintf(stderr, + "Blosc: Decompress %zd bytes compressed chunk into %zd bytes buffer\n", + nbytes, outbuf_size); #endif outbuf = malloc(outbuf_size); @@ -254,18 +277,20 @@ size_t blosc_filter(unsigned flags, size_t cd_nelmts, if (status <= 0) { /* decompression failed */ PUSH_ERR("blosc_filter", H5E_CALLBACK, "Blosc decompression error"); goto failed; - } /* if !status */ + } } /* compressing vs decompressing */ - if (status != 0) { - free(*buf); - *buf = outbuf; - *buf_size = outbuf_size; - return status; /* Size of compressed/decompressed data */ - } + assert(status > 0); + assert(status <= outbuf_size); + /* Compression successful */ + free(*buf); + *buf = outbuf; + *buf_size = outbuf_size; + return status; /* Size of compressed/decompressed data */ failed: + /* Note: we will reach this when compressed size > original size. */ free(outbuf); return 0; diff --git a/src/example.c b/src/example.c index 79ed5f0..efbf602 100644 --- a/src/example.c +++ b/src/example.c @@ -14,7 +14,7 @@ To run: $ ./example - Blosc version info: 1.3.0 ($Date:: 2014-01-11 #$) + Blosc version info: 1.21.7.dev ($Date:: 2024-06-24 #$) Success! $ h5ls -v example.h5 Opened "example.h5" with sec2 driver. @@ -22,8 +22,8 @@ Location: 1:800 Links: 1 Chunks: {1, 100, 100} 40000 bytes - Storage: 4000000 logical bytes, 126002 allocated bytes, 3174.55% utilization - Filter-0: blosc-32001 OPT {2, 2, 4, 40000, 4, 1, 2} + Storage: 4000000 logical bytes, 168312 allocated bytes, 2376.54% utilization + Filter-0: blosc-32001 OPT {2, 2, 4, 40000} Type: native float */ @@ -34,9 +34,10 @@ #define SIZE 100*100*100 #define SHAPE {100,100,100} +#define NDIM 3 #define CHUNKSHAPE {1,100,100} -int main(){ +int main(int argc, char **argv){ static float data[SIZE]; static float data_out[SIZE]; @@ -44,10 +45,11 @@ int main(){ const hsize_t chunkshape[] = CHUNKSHAPE; char *version, *date; int r, i; + size_t cd_nelmts; unsigned int cd_values[7]; int return_code = 1; - hid_t fid, sid, dset, plist = 0; + hid_t fid = 0, sid = 0, dset = 0, plist = 0; for(i=0; i +#include +#include +#include "hdf5.h" +#include "blosc_filter.h" + +#define SIZE 1000*1000 +#define SHAPE {1000} +#define NDIM 1 +#define TYPE_SHAPE {100, 10} +#define TYPE_NDIM 2 +#define CHUNKSHAPE {10, 100, 10} + +int main(){ + + static float data[SIZE]; + static float data_out[SIZE]; + const hsize_t shape[] = SHAPE; + const hsize_t chunkshape[] = CHUNKSHAPE; + const hsize_t type_shape[] = TYPE_SHAPE; + char *version, *date; + int r, i; + int return_code = 1; + + hid_t fid = 0, sid = 0, dset = 0, plist = 0, dtype = 0; + + for(i=0; i0) H5Tclose(dtype); + if(dset>0) H5Dclose(dset); + if(sid>0) H5Sclose(sid); + if(plist>0) H5Pclose(plist); + if(fid>0) H5Fclose(fid); + + return return_code; +} diff --git a/src/test_compound.c b/src/test_compound.c new file mode 100644 index 0000000..8b8f847 --- /dev/null +++ b/src/test_compound.c @@ -0,0 +1,151 @@ +/* + Copyright (C) 2025 Francesc Alted + http://blosc.org + License: MIT (see LICENSE.txt) + + Test for compound datatypes (H5T_COMPOUND). + This triggers a special case when the compound datatype is larger than + BLOSC_MAX_TYPESIZE (255 bytes). + + To compile this program: + + h5cc blosc_filter.c test_compound.c -o test_compound -lblosc -lpthread + + To run: + + $ ./test_compound 255 # <= BLOSC_MAX_TYPESIZE + Blosc version info: 1.21.7.dev ($Date:: 2024-06-24 #$) + Success! + $ h5ls -v test_compound.h5 + Opened "test_compound.h5" with sec2 driver. + dset Dataset {100000/100000} + Location: 1:800 + Links: 1 + Chunks: {1000} 255000 bytes + Storage: 25500000 logical bytes, 450900 allocated bytes, 5655.36% utilization + Filter-0: blosc-32001 OPT {2, 2, 255, 255000} + Type: struct { + "field_0" +0 native unsigned char + [...] + "field_254" +254 native unsigned char + } 255 bytes + + $ ./test_compound 256 # > BLOSC_MAX_TYPESIZE + Blosc version info: 1.21.7.dev ($Date:: 2024-06-24 #$) + Success! + $ h5ls -v test_compound.h5 + Opened "test_compound.h5" with sec2 driver. + dset Dataset {100000/100000} + Location: 1:800 + Links: 1 + Chunks: {1000} 256000 bytes + Storage: 25600000 logical bytes, 157400 allocated bytes, 16264.29% utilization + Filter-0: blosc-32001 OPT {2, 2, 1, 256000} + Type: struct { + "field_0" +0 native unsigned char + [...] + "field_255" +255 native unsigned char + } 256 bytes + +*/ + +#include +#include +#include +#include "hdf5.h" +#include "blosc_filter.h" + +#define SIZE 100000 +#define SHAPE {100000} +#define NDIM 1 +#define CHUNKSHAPE {1000} + +int main(int argc, char **argv){ + + static unsigned char *data = NULL; + static unsigned char *data_out = NULL; + int struct_size = 0; + const hsize_t shape[] = SHAPE; + const hsize_t chunkshape[] = CHUNKSHAPE; + char *version, *date; + int r, i; + int return_code = 1; + + hid_t fid = 0, sid = 0, dset = 0, plist = 0, dtype = 0; + + if (argc == 2) struct_size = atoi(argv[1]); + if (struct_size < 1) { + fprintf(stderr, "Usage: %s \n", argv[0]); + goto failed; + } + + data = malloc(SIZE * struct_size); + if(data == NULL) goto failed; + data_out = malloc(SIZE * struct_size); + if(data_out == NULL) goto failed; + for (i=0; i0) H5Tclose(dtype); + if(dset>0) H5Dclose(dset); + if(sid>0) H5Sclose(sid); + if(plist>0) H5Pclose(plist); + if(fid>0) H5Fclose(fid); + + return return_code; +} diff --git a/src/test_strings.c b/src/test_strings.c new file mode 100644 index 0000000..1fd47d5 --- /dev/null +++ b/src/test_strings.c @@ -0,0 +1,133 @@ +/* + Copyright (C) 2025 Francesc Alted + http://blosc.org + License: MIT (see LICENSE.txt) + + Test for variable-width strings and other VLEN types. + + To compile this program: + + h5cc blosc_filter.c test_strings.c -o test_strings -lblosc -lpthread + + To run: + + $ ./test_strings + Blosc version info: 1.21.7.dev ($Date:: 2024-06-24 #$) + Success! + $ h5ls -v test_strings.h5 + Opened "test_strings.h5" with sec2 driver. + dset Dataset {100000/100000} + Location: 1:800 + Links: 1 + Chunks: {1000} 8000 bytes + Storage: 800000 logical bytes, 505070 allocated bytes, 158.39% utilization + Filter-0: blosc-32001 OPT {2, 2, 1, 0} + Type: variable-length null-terminated UTF-8 string + +*/ + +#include +#include +#include +#include "hdf5.h" +#include "blosc_filter.h" + +#define SIZE 100000 +#define SHAPE {100000} +#define NDIM 1 +#define CHUNKSHAPE {1000} +#define MAX_STRING_LEN 14 + +int main(){ + + static char* data[SIZE]; + static char* data_out[SIZE]; + const hsize_t shape[] = SHAPE; + const hsize_t chunkshape[] = CHUNKSHAPE; + char *version, *date; + int r, i; + int return_code = 1; + + hid_t fid = 0, sid = 0, dset = 0, plist = 0, dtype = 0; + + /* Note: for this example we could call a single malloc and fill it back + to back with Hello 0\0World 0\0Hello 1\0World 1\0... + However we want to test behaviour when the strings are originally + non-contiguous in memory. */ + memset(data, 0, sizeof(char*) * SIZE); /* For safe cleanup */ + for(i=0; i0) H5Tclose(dtype); + if(dset>0) H5Dclose(dset); + if(sid>0) H5Sclose(sid); + if(plist>0) H5Pclose(plist); + if(fid>0) H5Fclose(fid); + + return return_code; +} diff --git a/src/test_tiny_chunks.c b/src/test_tiny_chunks.c new file mode 100644 index 0000000..c78b56d --- /dev/null +++ b/src/test_tiny_chunks.c @@ -0,0 +1,103 @@ +/* + Copyright (C) 2025 Francesc Alted + http://blosc.org + License: MIT (see LICENSE.txt) + + Test for uncompressible chunks, e.g. where the compressed size would be + larger than the original one. + + To compile this program: + + h5cc blosc_filter.c test_tiny_chunks.c -o test_tiny_chunks -lblosc -lpthread + + To run: + + $ ./test_tiny_chunks + Blosc version info: 1.21.7.dev ($Date:: 2024-06-24 #$) + Success! + $ h5ls -v test_tiny_chunks.h5 + Opened "test_tiny_chunks.h5" with sec2 driver. + dset Dataset {1000/1000} + Location: 1:800 + Links: 1 + Chunks: {1} 4 bytes + Storage: 4000 logical bytes, 4000 allocated bytes, 100.00% utilization + Filter-0: blosc-32001 OPT {2, 2, 4, 4} + Type: native float + +*/ + +#include +#include "hdf5.h" +#include "blosc_filter.h" + +#define SIZE 1000 +#define SHAPE {1000} +#define NDIM 1 +#define CHUNKSHAPE {1} + +int main(){ + + static float data[SIZE]; + static float data_out[SIZE]; + const hsize_t shape[] = SHAPE; + const hsize_t chunkshape[] = CHUNKSHAPE; + char *version, *date; + int r, i; + int return_code = 1; + + hid_t fid = 0, sid = 0, dset = 0, plist = 0; + + for(i=0; i0) H5Dclose(dset); + if(sid>0) H5Sclose(sid); + if(plist>0) H5Pclose(plist); + if(fid>0) H5Fclose(fid); + + return return_code; +}