Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.5.0)
cmake_minimum_required(VERSION 3.10.0)
cmake_policy(SET CMP0074 NEW)
project(blosc_hdf5)
include(ExternalProject)
Expand Down Expand Up @@ -94,6 +94,23 @@ if(BUILD_TESTS)
find_package(Threads REQUIRED)
set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT})
add_executable(example src/example.c)
add_executable(test_array src/test_array.c)
add_executable(test_compound src/test_compound.c)
add_executable(test_strings src/test_strings.c)
add_executable(test_tiny_chunks src/test_tiny_chunks.c)
target_link_libraries(example blosc_filter_shared ${HDF5_LIBRARIES} ${LIBS})
add_test(test_hdf5_filter example)
target_link_libraries(test_array blosc_filter_shared ${HDF5_LIBRARIES} ${LIBS})
target_link_libraries(test_compound blosc_filter_shared ${HDF5_LIBRARIES} ${LIBS})
target_link_libraries(test_strings blosc_filter_shared ${HDF5_LIBRARIES} ${LIBS})
target_link_libraries(test_tiny_chunks blosc_filter_shared ${HDF5_LIBRARIES} ${LIBS})
add_test(NAME example[nelmts=0] COMMAND example)
add_test(NAME example[nelmts=4] COMMAND example 4)
add_test(NAME example[nelmts=5] COMMAND example 5)
add_test(NAME example[nelmts=6] COMMAND example 6)
add_test(NAME example[nelmts=7] COMMAND example 7)
add_test(NAME test_array COMMAND test_array)
add_test(NAME test_compound[le_BLOSC_MAX_TYPESIZE] COMMAND test_compound 255)
add_test(NAME test_compound[gt_BLOSC_MAX_TYPESIZE] COMMAND test_compound 256)
add_test(NAME test_strings COMMAND test_strings)
add_test(NAME test_tiny_chunks COMMAND test_tiny_chunks)
endif(BUILD_TESTS)
10 changes: 8 additions & 2 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@ compilers = "*"
cmake = "*"
hdf5 = "*"

[activation.env]
# CFLAGS = "-O0 -g -DBLOSC_DEBUG"
# FIXME Debug hangs on Windows GitHub runners
CMAKE_CONFIG = "Debug"
# CMAKE_CONFIG = "Release"

[tasks]
clean = "rm -rf build"
mkdir-build = { cmd = "mkdir -p build" }
build-thirdparty = { cwd = "build", cmd = "cmake .." }
build-hdf5-blosc = { cwd = "build", cmd = "cmake --build . --config Release" }
build-hdf5-blosc = { cwd = "build", cmd = "cmake --build . --config $CMAKE_CONFIG" }
build = { depends-on = [ "mkdir-build", "build-thirdparty", "build-hdf5-blosc" ] }
test = { cwd = "build", cmd = "ctest --output-on-failure" }
test = { cwd = "build", cmd = "ctest --output-on-failure --build-config $CMAKE_CONFIG" }
99 changes: 62 additions & 37 deletions src/blosc_filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
*/


#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
Expand All @@ -28,8 +29,6 @@
#define PUSH_ERR(func, minor, ...) H5Epush(H5E_DEFAULT, __FILE__, func, __LINE__, H5E_ERR_CLS, H5E_PLINE, minor, __VA_ARGS__)
#endif /* defined(__GNUC__) */

#define GET_FILTER(a, b, c, d, e, f, g) H5Pget_filter_by_id(a,b,c,d,e,f,g,NULL)


size_t blosc_filter(unsigned flags, size_t cd_nelmts,
const unsigned cd_values[], size_t nbytes,
Expand Down Expand Up @@ -79,23 +78,34 @@ herr_t blosc_set_local(hid_t dcpl, hid_t type, hid_t space) {
int i;
herr_t r;

unsigned int typesize, basetypesize;
unsigned int bufsize;
unsigned int typesize, chunksize, basetypesize;
hsize_t chunkdims[32];
unsigned int flags;
size_t nelements = 8;
unsigned int values[] = {0, 0, 0, 0, 0, 0, 0, 0};
size_t cd_nelmts = 8;
/*
* cd_values[0] = hdf5-blosc format version
* cd_values[1] = blosc format version
* cd_values[2] = typesize
* cd_values[3] = uncompressed chunk size (unused)
* cd_values[4] = compression level
* cd_values[5] = 0: shuffle not active, 1: shuffle active
* cd_values[6] = compressor, e.g. BLOSC_BLOSCLZ
* cd_values[7] = unused
*/
unsigned int cd_values[] = {0, 0, 0, 0, 0, 0, 0, 0};
hid_t super_type;
H5T_class_t classt;

r = GET_FILTER(dcpl, FILTER_BLOSC, &flags, &nelements, values, 0, NULL);
r = H5Pget_filter_by_id(
dcpl, FILTER_BLOSC, &flags, &cd_nelmts, cd_values, 0, NULL, NULL
);
if (r < 0) return -1;

if (nelements < 4) nelements = 4; /* First 4 slots reserved. */
if (cd_nelmts < 4) cd_nelmts = 4; /* First 4 slots reserved. */

/* Set Blosc info in first two slots */
values[0] = FILTER_BLOSC_VERSION;
values[1] = BLOSC_VERSION_FORMAT;
cd_values[0] = FILTER_BLOSC_VERSION;
cd_values[1] = BLOSC_VERSION_FORMAT;

ndims = H5Pget_chunk(dcpl, 32, chunkdims);
if (ndims < 0) return -1;
Expand All @@ -108,6 +118,7 @@ herr_t blosc_set_local(hid_t dcpl, hid_t type, hid_t space) {
if (typesize == 0) return -1;
/* Get the size of the base type, even for ARRAY types */
classt = H5Tget_class(type);
if (classt == H5T_NO_CLASS) return -1;
if (classt == H5T_ARRAY) {
/* Get the array base component */
super_type = H5Tget_super(type);
Expand All @@ -120,22 +131,25 @@ herr_t blosc_set_local(hid_t dcpl, hid_t type, hid_t space) {

/* Limit large typesizes (they are pretty expensive to shuffle
and, in addition, Blosc does not handle typesizes larger than
256 bytes). */
255 bytes). */
if (basetypesize > BLOSC_MAX_TYPESIZE) basetypesize = 1;
values[2] = basetypesize;
cd_values[2] = basetypesize;

/* Get the size of the chunk */
bufsize = typesize;
/* Get the size of the chunk. This is unused by blosc_filter().
It is retained for backward compatibility.
*/
chunksize = typesize;
for (i = 0; i < ndims; i++) {
bufsize *= chunkdims[i];
chunksize *= chunkdims[i];
}
values[3] = bufsize;
cd_values[3] = chunksize;

#ifdef BLOSC_DEBUG
fprintf(stderr, "Blosc: Computed buffer size %d\n", bufsize);
fprintf(stderr, "Blosc: typesize=%d; chunksize=%d\n",
typesize, chunksize);
#endif

r = H5Pmodify_filter(dcpl, FILTER_BLOSC, flags, nelements, values);
r = H5Pmodify_filter(dcpl, FILTER_BLOSC, flags, cd_nelmts, cd_values);
if (r < 0) return -1;

return 1;
Expand All @@ -159,9 +173,15 @@ size_t blosc_filter(unsigned flags, size_t cd_nelmts,
const char* complist;
char errmsg[256];

assert(cd_nelmts >= 4);
Copy link
Contributor Author

@crusaderky crusaderky Oct 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test_strings crashes here if you remove the workaround hack from it. blosc_set_local did not run so this is valued 0.

assert(cd_values[0] == FILTER_BLOSC_VERSION);
assert(cd_values[1] == BLOSC_VERSION_FORMAT);
assert(nbytes > 0);
assert(*buf_size >= nbytes);

/* Filter params that are always set */
typesize = cd_values[2]; /* The datatype size */
outbuf_size = cd_values[3]; /* Precomputed buffer guess */
assert(typesize > 0 && typesize <= BLOSC_MAX_TYPESIZE);
/* Optional params */
if (cd_nelmts >= 5) {
clevel = cd_values[4]; /* The compression level */
Expand Down Expand Up @@ -200,14 +220,14 @@ size_t blosc_filter(unsigned flags, size_t cd_nelmts,
proceeds.
*/

outbuf_size = (*buf_size);
outbuf_size = nbytes;

#ifdef BLOSC_DEBUG
fprintf(stderr, "Blosc: Compress %zd chunk w/buffer %zd\n",
nbytes, outbuf_size);
fprintf(stderr, "Blosc: Compress %zd bytes chunk (typesize=%d)\n",
nbytes, typesize);
#endif

outbuf = malloc(outbuf_size);
outbuf = malloc(nbytes);

if (outbuf == NULL) {
PUSH_ERR("blosc_filter", H5E_CALLBACK,
Expand All @@ -218,29 +238,32 @@ size_t blosc_filter(unsigned flags, size_t cd_nelmts,
blosc_set_compressor(compname);
status = blosc_compress(clevel, doshuffle, typesize, nbytes,
*buf, outbuf, nbytes);
if (status == 0) goto failed; /* compressed size > input size. This is OK. */
if (status < 0) {
/* Internal error */
PUSH_ERR("blosc_filter", H5E_CALLBACK, "Blosc compression error");
goto failed;
}
assert((size_t)status <= nbytes);

/* We're decompressing */
} else {
/* declare dummy variables */
size_t cbytes, blocksize;

free(outbuf);

/* Extract the exact outbuf_size from the buffer header.
*
* NOTE: the guess value got from "cd_values" corresponds to the
* uncompressed chunk size but it should not be used in a general
* cases since other filters in the pipeline can modify the buffere
* size.
* NOTE: cd_values[3] contains the uncompressed chunk size.
* It should not be used in general cases since other filters in the
* pipeline can modify the buffer size.
*/
blosc_cbuffer_sizes(*buf, &outbuf_size, &cbytes, &blocksize);
assert(cbytes == nbytes);

#ifdef BLOSC_DEBUG
fprintf(stderr, "Blosc: Decompress %zd chunk w/buffer %zd\n", nbytes, outbuf_size);
fprintf(stderr,
"Blosc: Decompress %zd bytes compressed chunk into %zd bytes buffer\n",
nbytes, outbuf_size);
#endif

outbuf = malloc(outbuf_size);
Expand All @@ -254,18 +277,20 @@ size_t blosc_filter(unsigned flags, size_t cd_nelmts,
if (status <= 0) { /* decompression failed */
PUSH_ERR("blosc_filter", H5E_CALLBACK, "Blosc decompression error");
goto failed;
} /* if !status */
}

} /* compressing vs decompressing */

if (status != 0) {
free(*buf);
*buf = outbuf;
*buf_size = outbuf_size;
return status; /* Size of compressed/decompressed data */
}
assert(status > 0);
assert(status <= outbuf_size);
/* Compression successful */
free(*buf);
*buf = outbuf;
*buf_size = outbuf_size;
return status; /* Size of compressed/decompressed data */

failed:
/* Note: we will reach this when compressed size > original size. */
free(outbuf);
return 0;

Expand Down
34 changes: 19 additions & 15 deletions src/example.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@
To run:

$ ./example
Blosc version info: 1.3.0 ($Date:: 2014-01-11 #$)
Blosc version info: 1.21.7.dev ($Date:: 2024-06-24 #$)
Success!
$ h5ls -v example.h5
Opened "example.h5" with sec2 driver.
dset Dataset {100/100, 100/100, 100/100}
Location: 1:800
Links: 1
Chunks: {1, 100, 100} 40000 bytes
Storage: 4000000 logical bytes, 126002 allocated bytes, 3174.55% utilization
Filter-0: blosc-32001 OPT {2, 2, 4, 40000, 4, 1, 2}
Storage: 4000000 logical bytes, 168312 allocated bytes, 2376.54% utilization
Filter-0: blosc-32001 OPT {2, 2, 4, 40000}
Type: native float

*/
Expand All @@ -34,34 +34,35 @@

#define SIZE 100*100*100
#define SHAPE {100,100,100}
#define NDIM 3
#define CHUNKSHAPE {1,100,100}

int main(){
int main(int argc, char **argv){

static float data[SIZE];
static float data_out[SIZE];
const hsize_t shape[] = SHAPE;
const hsize_t chunkshape[] = CHUNKSHAPE;
char *version, *date;
int r, i;
size_t cd_nelmts;
unsigned int cd_values[7];
int return_code = 1;

hid_t fid, sid, dset, plist = 0;
hid_t fid = 0, sid = 0, dset = 0, plist = 0;

for(i=0; i<SIZE; i++){
data[i] = i;
}

/* Register the filter with the library */
r = register_blosc(&version, &date);
if(r<0) goto failed;
printf("Blosc version info: %s (%s)\n", version, date);
free(version);
free(date);

if(r<0) goto failed;

sid = H5Screate_simple(3, shape, NULL);
sid = H5Screate_simple(NDIM, shape, NULL);
if(sid<0) goto failed;

fid = H5Fcreate("example.h5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
Expand All @@ -71,7 +72,7 @@ int main(){
if(plist<0) goto failed;

/* Chunked layout required for filters */
r = H5Pset_chunk(plist, 3, chunkshape);
r = H5Pset_chunk(plist, NDIM, chunkshape);
if(r<0) goto failed;

/* This is the easiest way to call Blosc with default values: 5
Expand All @@ -85,8 +86,15 @@ int main(){
cd_values[6] = BLOSC_BLOSCLZ; /* the actual compressor to use */

/* Set the filter with 7 params */
r = H5Pset_filter(plist, FILTER_BLOSC, H5Z_FLAG_OPTIONAL, 7, cd_values);

/* r = H5Pset_filter(plist, FILTER_BLOSC, H5Z_FLAG_OPTIONAL, 7, cd_values); */

/* Test under different configurations */
if (argc == 2) {
cd_nelmts = atoi(argv[1]);
r = H5Pset_filter(plist, FILTER_BLOSC, H5Z_FLAG_OPTIONAL, cd_nelmts, cd_values);
} else {
r = H5Pset_filter(plist, FILTER_BLOSC, H5Z_FLAG_OPTIONAL, 0, NULL);
}
if(r<0) goto failed;

/* Using the blosc filter in combination with other ones also works
Expand All @@ -96,11 +104,7 @@ int main(){
if(r<0) goto failed;
*/

#if H5_USE_16_API
dset = H5Dcreate(fid, "dset", H5T_NATIVE_FLOAT, sid, plist);
#else
dset = H5Dcreate(fid, "dset", H5T_NATIVE_FLOAT, sid, H5P_DEFAULT, plist, H5P_DEFAULT);
#endif
if(dset<0) goto failed;

r = H5Dwrite(dset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &data);
Expand Down
Loading
Loading