diff --git a/numcodecs/blosc.pyx b/numcodecs/blosc.pyx index 2c43dd04..6e6851db 100644 --- a/numcodecs/blosc.pyx +++ b/numcodecs/blosc.pyx @@ -8,12 +8,11 @@ import multiprocessing import os -from cpython.buffer cimport PyBUF_ANY_CONTIGUOUS, PyBUF_WRITEABLE +from cpython.buffer cimport PyBuffer_IsContiguous from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING +from cpython.memoryview cimport PyMemoryView_GET_BUFFER -from .compat_ext cimport Buffer -from .compat_ext import Buffer from .compat import ensure_contiguous_ndarray from .abc import Codec @@ -146,17 +145,18 @@ def cbuffer_sizes(source): """ cdef: - Buffer buffer + memoryview source_mv + const Py_buffer* source_pb size_t nbytes, cbytes, blocksize - # obtain buffer - buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") # determine buffer size - blosc_cbuffer_sizes(buffer.ptr, &nbytes, &cbytes, &blocksize) - - # release buffers - buffer.release() + blosc_cbuffer_sizes(source_pb.buf, &nbytes, &cbytes, &blocksize) return nbytes, cbytes, blocksize @@ -164,16 +164,17 @@ def cbuffer_sizes(source): def cbuffer_complib(source): """Return the name of the compression library used to compress `source`.""" cdef: - Buffer buffer + memoryview source_mv + const Py_buffer* source_pb - # obtain buffer - buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") # determine buffer size - complib = blosc_cbuffer_complib(buffer.ptr) - - # release buffers - buffer.release() + complib = blosc_cbuffer_complib(source_pb.buf) complib = complib.decode('ascii') @@ -193,18 +194,19 @@ def cbuffer_metainfo(source): """ cdef: - Buffer buffer + memoryview source_mv + const Py_buffer* source_pb size_t typesize int flags - # obtain buffer - buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") # determine buffer size - blosc_cbuffer_metainfo(buffer.ptr, &typesize, &flags) - - # release buffers - buffer.release() + blosc_cbuffer_metainfo(source_pb.buf, &typesize, &flags) # decompose flags if flags & BLOSC_DOSHUFFLE: @@ -252,23 +254,29 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE, """ cdef: - char *source_ptr - char *dest_ptr - Buffer source_buffer + memoryview source_mv + const Py_buffer* source_pb + const char* source_ptr size_t nbytes, itemsize int cbytes bytes dest + char* dest_ptr # check valid cname early cname_str = cname.decode('ascii') if cname_str not in list_compressors(): err_bad_cname(cname_str) - # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr - nbytes = source_buffer.nbytes - itemsize = source_buffer.itemsize + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") + + # extract metadata + source_ptr = source_pb.buf + nbytes = source_pb.len + itemsize = source_pb.itemsize # determine shuffle if shuffle == AUTOSHUFFLE: @@ -280,46 +288,40 @@ def compress(source, char* cname, int clevel, int shuffle=SHUFFLE, raise ValueError('invalid shuffle argument; expected -1, 0, 1 or 2, found %r' % shuffle) - try: - - # setup destination - dest = PyBytes_FromStringAndSize(NULL, nbytes + BLOSC_MAX_OVERHEAD) - dest_ptr = PyBytes_AS_STRING(dest) - - # perform compression - if _get_use_threads(): - # allow blosc to use threads internally + # setup destination + dest = PyBytes_FromStringAndSize(NULL, nbytes + BLOSC_MAX_OVERHEAD) + dest_ptr = PyBytes_AS_STRING(dest) - # N.B., we are using blosc's global context, and so we need to use a lock - # to ensure no-one else can modify the global context while we're setting it - # up and using it. - with get_mutex(): + # perform compression + if _get_use_threads(): + # allow blosc to use threads internally - # set compressor - compressor_set = blosc_set_compressor(cname) - if compressor_set < 0: - # shouldn't happen if we checked against list of compressors - # already, but just in case - err_bad_cname(cname_str) + # N.B., we are using blosc's global context, and so we need to use a lock + # to ensure no-one else can modify the global context while we're setting it + # up and using it. + with get_mutex(): - # set blocksize - blosc_set_blocksize(blocksize) + # set compressor + compressor_set = blosc_set_compressor(cname) + if compressor_set < 0: + # shouldn't happen if we checked against list of compressors + # already, but just in case + err_bad_cname(cname_str) - # perform compression - with nogil: - cbytes = blosc_compress(clevel, shuffle, itemsize, nbytes, source_ptr, - dest_ptr, nbytes + BLOSC_MAX_OVERHEAD) + # set blocksize + blosc_set_blocksize(blocksize) - else: + # perform compression with nogil: - cbytes = blosc_compress_ctx(clevel, shuffle, itemsize, nbytes, source_ptr, - dest_ptr, nbytes + BLOSC_MAX_OVERHEAD, - cname, blocksize, 1) + cbytes = blosc_compress(clevel, shuffle, itemsize, nbytes, source_ptr, + dest_ptr, nbytes + BLOSC_MAX_OVERHEAD) - finally: + else: + with nogil: + cbytes = blosc_compress_ctx(clevel, shuffle, itemsize, nbytes, source_ptr, + dest_ptr, nbytes + BLOSC_MAX_OVERHEAD, + cname, blocksize, 1) - # release buffers - source_buffer.release() # check compression was successful if cbytes <= 0: @@ -350,15 +352,22 @@ def decompress(source, dest=None): """ cdef: int ret - char *source_ptr - char *dest_ptr - Buffer source_buffer - Buffer dest_buffer = None + memoryview source_mv + const Py_buffer* source_pb + const char* source_ptr + memoryview dest_mv + Py_buffer* dest_pb + char* dest_ptr size_t nbytes, cbytes, blocksize - # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") + + # get source pointer + source_ptr = source_pb.buf # determine buffer size blosc_cbuffer_sizes(source_ptr, &nbytes, &cbytes, &blocksize) @@ -366,37 +375,29 @@ def decompress(source, dest=None): # setup destination buffer if dest is None: # allocate memory - dest = PyBytes_FromStringAndSize(NULL, nbytes) - dest_ptr = PyBytes_AS_STRING(dest) - dest_nbytes = nbytes + dest_1d = dest = PyBytes_FromStringAndSize(NULL, nbytes) else: - arr = ensure_contiguous_ndarray(dest) - dest_buffer = Buffer(arr, PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE) - dest_ptr = dest_buffer.ptr - dest_nbytes = dest_buffer.nbytes - - try: - - # guard condition - if dest_nbytes < nbytes: - raise ValueError('destination buffer too small; expected at least %s, ' - 'got %s' % (nbytes, dest_nbytes)) - - # perform decompression - if _get_use_threads(): - # allow blosc to use threads internally - with nogil: - ret = blosc_decompress(source_ptr, dest_ptr, nbytes) - else: - with nogil: - ret = blosc_decompress_ctx(source_ptr, dest_ptr, nbytes, 1) - - finally: - - # release buffers - source_buffer.release() - if dest_buffer is not None: - dest_buffer.release() + dest_1d = ensure_contiguous_ndarray(dest) + + # obtain dest memoryview + dest_mv = memoryview(dest_1d) + dest_pb = PyMemoryView_GET_BUFFER(dest_mv) + dest_ptr = dest_pb.buf + dest_nbytes = dest_pb.len + + # guard condition + if dest_nbytes < nbytes: + raise ValueError('destination buffer too small; expected at least %s, ' + 'got %s' % (nbytes, dest_nbytes)) + + # perform decompression + if _get_use_threads(): + # allow blosc to use threads internally + with nogil: + ret = blosc_decompress(source_ptr, dest_ptr, nbytes) + else: + with nogil: + ret = blosc_decompress_ctx(source_ptr, dest_ptr, nbytes, 1) # handle errors if ret <= 0: @@ -433,14 +434,22 @@ def decompress_partial(source, start, nitems, dest=None): int encoding_size int nitems_bytes int start_bytes - char *source_ptr - char *dest_ptr - Buffer source_buffer - Buffer dest_buffer = None - - # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr + const char* source_ptr + memoryview source_mv + const Py_buffer* source_pb + memoryview dest_mv + Py_buffer* dest_pb + char* dest_ptr + size_t dest_nbytes + + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b"A"): + raise BufferError("`source` must contain contiguous memory") + + # setup source pointer + source_ptr = source_pb.buf # get encoding size from source buffer header encoding_size = source[3] @@ -451,26 +460,22 @@ def decompress_partial(source, start, nitems, dest=None): # setup destination buffer if dest is None: - dest = PyBytes_FromStringAndSize(NULL, nitems_bytes) - dest_ptr = PyBytes_AS_STRING(dest) - dest_nbytes = nitems_bytes + # allocate memory + dest_1d = dest = PyBytes_FromStringAndSize(NULL, nitems_bytes) else: - arr = ensure_contiguous_ndarray(dest) - dest_buffer = Buffer(arr, PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE) - dest_ptr = dest_buffer.ptr - dest_nbytes = dest_buffer.nbytes + dest_1d = ensure_contiguous_ndarray(dest) + + # obtain dest memoryview + dest_mv = memoryview(dest_1d) + dest_pb = PyMemoryView_GET_BUFFER(dest_mv) + dest_ptr = dest_pb.buf + dest_nbytes = dest_pb.len # try decompression - try: - if dest_nbytes < nitems_bytes: - raise ValueError('destination buffer too small; expected at least %s, ' - 'got %s' % (nitems_bytes, dest_nbytes)) - ret = blosc_getitem(source_ptr, start, nitems, dest_ptr) - - finally: - source_buffer.release() - if dest_buffer is not None: - dest_buffer.release() + if dest_nbytes < nitems_bytes: + raise ValueError('destination buffer too small; expected at least %s, ' + 'got %s' % (nitems_bytes, dest_nbytes)) + ret = blosc_getitem(source_ptr, start, nitems, dest_ptr) # ret refers to the number of bytes returned from blosc_getitem. if ret <= 0: diff --git a/numcodecs/compat_ext.pxd b/numcodecs/compat_ext.pxd deleted file mode 100644 index dfcaee0f..00000000 --- a/numcodecs/compat_ext.pxd +++ /dev/null @@ -1,12 +0,0 @@ -# cython: language_level=3 - - -cdef class Buffer: - cdef: - char *ptr - Py_buffer buffer - size_t nbytes - size_t itemsize - bint acquired - - cpdef release(self) diff --git a/numcodecs/compat_ext.pyx b/numcodecs/compat_ext.pyx deleted file mode 100644 index f57e3cfd..00000000 --- a/numcodecs/compat_ext.pyx +++ /dev/null @@ -1,28 +0,0 @@ -# cython: embedsignature=True -# cython: profile=False -# cython: linetrace=False -# cython: binding=False -# cython: language_level=3 -from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release - - -from .compat import ensure_contiguous_ndarray - - -cdef class Buffer: - """Convenience class for buffer interface.""" - - def __cinit__(self, obj, int flags): - PyObject_GetBuffer(obj, &(self.buffer), flags) - self.acquired = True - self.ptr = self.buffer.buf - self.itemsize = self.buffer.itemsize - self.nbytes = self.buffer.len - - cpdef release(self): - if self.acquired: - PyBuffer_Release(&(self.buffer)) - self.acquired = False - - def __dealloc__(self): - self.release() diff --git a/numcodecs/lz4.pyx b/numcodecs/lz4.pyx index 87a7f6ca..a6b544fa 100644 --- a/numcodecs/lz4.pyx +++ b/numcodecs/lz4.pyx @@ -5,13 +5,11 @@ # cython: language_level=3 -from cpython.buffer cimport PyBUF_ANY_CONTIGUOUS, PyBUF_WRITEABLE +from cpython.buffer cimport PyBuffer_IsContiguous from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING +from cpython.memoryview cimport PyMemoryView_GET_BUFFER from libc.stdint cimport uint8_t, uint32_t - -from .compat_ext cimport Buffer -from .compat_ext import Buffer from ._utils cimport store_le32, load_le32 from .compat import ensure_contiguous_ndarray from .abc import Codec @@ -66,40 +64,38 @@ def compress(source, int acceleration=DEFAULT_ACCELERATION): """ cdef: - char *source_ptr - char *dest_ptr - char *dest_start - Buffer source_buffer - int source_size, dest_size, compressed_size + memoryview source_mv + const Py_buffer* source_pb + const char* source_ptr bytes dest + char* dest_ptr + char* dest_start + int source_size, dest_size, compressed_size # check level if acceleration <= 0: acceleration = DEFAULT_ACCELERATION # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr - source_size = source_buffer.nbytes - - try: - - # setup destination - dest_size = LZ4_compressBound(source_size) - dest = PyBytes_FromStringAndSize(NULL, dest_size + sizeof(uint32_t)) - dest_ptr = PyBytes_AS_STRING(dest) - store_le32(dest_ptr, source_size) - dest_start = dest_ptr + sizeof(uint32_t) - - # perform compression - with nogil: - compressed_size = LZ4_compress_fast(source_ptr, dest_start, source_size, dest_size, - acceleration) - - finally: - - # release buffers - source_buffer.release() + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") + + source_ptr = source_pb.buf + source_size = source_pb.len + + # setup destination + dest_size = LZ4_compressBound(source_size) + dest = PyBytes_FromStringAndSize(NULL, dest_size + sizeof(uint32_t)) + dest_ptr = PyBytes_AS_STRING(dest) + store_le32(dest_ptr, source_size) + dest_start = dest_ptr + sizeof(uint32_t) + + # perform compression + with nogil: + compressed_size = LZ4_compress_fast(source_ptr, dest_start, source_size, dest_size, + acceleration) # check compression was successful if compressed_size <= 0: @@ -129,52 +125,54 @@ def decompress(source, dest=None): """ cdef: - char *source_ptr - char *source_start - char *dest_ptr - Buffer source_buffer - Buffer dest_buffer = None + memoryview source_mv + const Py_buffer* source_pb + const char* source_ptr + const char* source_start + memoryview dest_mv + Py_buffer* dest_pb + char* dest_ptr int source_size, dest_size, decompressed_size # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr - source_size = source_buffer.nbytes - - try: - - # determine uncompressed size - if source_size < sizeof(uint32_t): - raise ValueError('bad input data') - dest_size = load_le32(source_ptr) - if dest_size <= 0: - raise RuntimeError('LZ4 decompression error: invalid input data') - source_start = source_ptr + sizeof(uint32_t) - source_size -= sizeof(uint32_t) - - # setup destination buffer - if dest is None: - # allocate memory - dest = PyBytes_FromStringAndSize(NULL, dest_size) - dest_ptr = PyBytes_AS_STRING(dest) - else: - arr = ensure_contiguous_ndarray(dest) - dest_buffer = Buffer(arr, PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE) - dest_ptr = dest_buffer.ptr - if dest_buffer.nbytes < dest_size: - raise ValueError('destination buffer too small; expected at least %s, ' - 'got %s' % (dest_size, dest_buffer.nbytes)) - - # perform decompression - with nogil: - decompressed_size = LZ4_decompress_safe(source_start, dest_ptr, source_size, dest_size) - - finally: - - # release buffers - source_buffer.release() - if dest_buffer is not None: - dest_buffer.release() + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") + + # extract source metadata + source_ptr = source_pb.buf + source_size = source_pb.len + + # determine uncompressed size + if source_size < sizeof(uint32_t): + raise ValueError('bad input data') + dest_size = load_le32(source_ptr) + if dest_size <= 0: + raise RuntimeError('LZ4 decompression error: invalid input data') + source_start = source_ptr + sizeof(uint32_t) + source_size -= sizeof(uint32_t) + + # setup destination buffer + if dest is None: + # allocate memory + dest_1d = dest = PyBytes_FromStringAndSize(NULL, dest_size) + else: + dest_1d = ensure_contiguous_ndarray(dest) + + # obtain dest memoryview + dest_mv = memoryview(dest_1d) + dest_pb = PyMemoryView_GET_BUFFER(dest_mv) + dest_ptr = dest_pb.buf + dest_nbytes = dest_pb.len + + if dest_nbytes < dest_size: + raise ValueError('destination buffer too small; expected at least %s, ' + 'got %s' % (dest_size, dest_nbytes)) + + # perform decompression + with nogil: + decompressed_size = LZ4_decompress_safe(source_start, dest_ptr, source_size, dest_size) # check decompression was successful if decompressed_size <= 0: diff --git a/numcodecs/vlen.pyx b/numcodecs/vlen.pyx index e1e149ee..61efe347 100644 --- a/numcodecs/vlen.pyx +++ b/numcodecs/vlen.pyx @@ -10,12 +10,16 @@ cimport cython from numpy cimport ndarray import numpy as np from .abc import Codec -from .compat_ext cimport Buffer -from .compat_ext import Buffer from .compat import ensure_contiguous_ndarray -from cpython cimport (PyBytes_GET_SIZE, PyBytes_AS_STRING, PyBytes_Check, - PyBytes_FromStringAndSize, PyUnicode_AsUTF8String) -from cpython.buffer cimport PyBUF_ANY_CONTIGUOUS +from cpython.buffer cimport PyBuffer_IsContiguous +from cpython.bytes cimport ( + PyBytes_AS_STRING, + PyBytes_GET_SIZE, + PyBytes_Check, + PyBytes_FromStringAndSize, +) +from cpython.memoryview cimport PyMemoryView_GET_BUFFER +from cpython.unicode cimport PyUnicode_AsUTF8String from libc.stdint cimport uint8_t from libc.string cimport memcpy from ._utils cimport store_le32, load_le32 @@ -132,23 +136,26 @@ class VLenUTF8(Codec): @cython.boundscheck(False) def decode(self, buf, out=None): cdef: - Buffer input_buffer - char* data - char* data_end - Py_ssize_t i, l, n_items, data_length, input_length + memoryview buf_mv + const Py_buffer* buf_pb + const char* data + const char* data_end + Py_ssize_t i, l, n_items, data_length - # accept any buffer + # obtain memoryview buf = ensure_contiguous_ndarray(buf) - input_buffer = Buffer(buf, PyBUF_ANY_CONTIGUOUS) - input_length = input_buffer.nbytes + buf_mv = memoryview(buf) + buf_pb = PyMemoryView_GET_BUFFER(buf_mv) # sanity checks - if input_length < HEADER_LENGTH: + if not PyBuffer_IsContiguous(buf_pb, b'A'): + raise BufferError("`buf` must contain contiguous memory") + if buf_pb.len < HEADER_LENGTH: raise ValueError('corrupt buffer, missing or truncated header') # obtain input data pointer - data = input_buffer.ptr - data_end = data + input_length + data = buf_pb.buf + data_end = data + buf_pb.len # load number of items n_items = load_le32(data) @@ -260,23 +267,26 @@ class VLenBytes(Codec): @cython.boundscheck(False) def decode(self, buf, out=None): cdef: - Buffer input_buffer - char* data - char* data_end - Py_ssize_t i, l, n_items, data_length, input_length + memoryview buf_mv + const Py_buffer* buf_pb + const char* data + const char* data_end + Py_ssize_t i, l, n_items, data_length - # accept any buffer + # obtain memoryview buf = ensure_contiguous_ndarray(buf) - input_buffer = Buffer(buf, PyBUF_ANY_CONTIGUOUS) - input_length = input_buffer.nbytes + buf_mv = memoryview(buf) + buf_pb = PyMemoryView_GET_BUFFER(buf_mv) # sanity checks - if input_length < HEADER_LENGTH: + if not PyBuffer_IsContiguous(buf_pb, b'A'): + raise BufferError("`buf` must contain contiguous memory") + if buf_pb.len < HEADER_LENGTH: raise ValueError('corrupt buffer, missing or truncated header') # obtain input data pointer - data = input_buffer.ptr - data_end = data + input_length + data = buf_pb.buf + data_end = data + buf_pb.len # load number of items n_items = load_le32(data) @@ -352,11 +362,12 @@ class VLenArray(Codec): object[:] values object[:] normed_values int[:] lengths - char* encv + const char* encv bytes b bytearray out char* data - Buffer value_buffer + memoryview value_mv + const Py_buffer* value_pb object v # normalise input @@ -398,11 +409,13 @@ class VLenArray(Codec): l = lengths[i] store_le32(data, l) data += 4 - value_buffer = Buffer(normed_values[i], PyBUF_ANY_CONTIGUOUS) - encv = value_buffer.ptr + + value_mv = memoryview(normed_values[i]) + value_pb = PyMemoryView_GET_BUFFER(value_mv) + + encv = value_pb.buf memcpy(data, encv, l) data += l - value_buffer.release() return out @@ -410,23 +423,26 @@ class VLenArray(Codec): @cython.boundscheck(False) def decode(self, buf, out=None): cdef: - Buffer input_buffer - char* data - char* data_end - Py_ssize_t i, l, n_items, data_length, input_length + memoryview buf_mv + const Py_buffer* buf_pb + const char* data + const char* data_end + Py_ssize_t i, l, n_items, data_length - # accept any buffer + # obtain memoryview buf = ensure_contiguous_ndarray(buf) - input_buffer = Buffer(buf, PyBUF_ANY_CONTIGUOUS) - input_length = input_buffer.nbytes + buf_mv = memoryview(buf) + buf_pb = PyMemoryView_GET_BUFFER(buf_mv) # sanity checks - if input_length < HEADER_LENGTH: + if not PyBuffer_IsContiguous(buf_pb, b'A'): + raise BufferError("`buf` must contain contiguous memory") + if buf_pb.len < HEADER_LENGTH: raise ValueError('corrupt buffer, missing or truncated header') # obtain input data pointer - data = input_buffer.ptr - data_end = data + input_length + data = buf_pb.buf + data_end = data + buf_pb.len # load number of items n_items = load_le32(data) diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index efd12fa2..ced430dc 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -5,12 +5,10 @@ # cython: language_level=3 -from cpython.buffer cimport PyBUF_ANY_CONTIGUOUS, PyBUF_WRITEABLE +from cpython.buffer cimport PyBuffer_IsContiguous from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING +from cpython.memoryview cimport PyMemoryView_GET_BUFFER - -from .compat_ext cimport Buffer -from .compat_ext import Buffer from .compat import ensure_contiguous_ndarray from .abc import Codec @@ -92,20 +90,26 @@ def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False): """ cdef: - char *source_ptr - char *dest_ptr - Buffer source_buffer + memoryview source_mv + const Py_buffer* source_pb + const char* source_ptr size_t source_size, dest_size, compressed_size bytes dest + char* dest_ptr # check level if level > MAX_CLEVEL: level = MAX_CLEVEL + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") + # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr - source_size = source_buffer.nbytes + source_ptr = source_pb.buf + source_size = source_pb.len cctx = ZSTD_createCCtx() param_set_result = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level) @@ -120,22 +124,14 @@ def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False): error = ZSTD_getErrorName(param_set_result) raise RuntimeError('Could not set zstd checksum flag: %s' % error) - try: - - # setup destination - dest_size = ZSTD_compressBound(source_size) - dest = PyBytes_FromStringAndSize(NULL, dest_size) - dest_ptr = PyBytes_AS_STRING(dest) + # setup destination + dest_size = ZSTD_compressBound(source_size) + dest = PyBytes_FromStringAndSize(NULL, dest_size) + dest_ptr = PyBytes_AS_STRING(dest) - # perform compression - with nogil: - compressed_size = ZSTD_compress2(cctx, dest_ptr, dest_size, source_ptr, source_size) - - finally: - if cctx: - ZSTD_freeCCtx(cctx) - # release buffers - source_buffer.release() + # perform compression + with nogil: + compressed_size = ZSTD_compress2(cctx, dest_ptr, dest_size, source_ptr, source_size) # check compression was successful if ZSTD_isError(compressed_size): @@ -165,47 +161,51 @@ def decompress(source, dest=None): """ cdef: - char *source_ptr - char *dest_ptr - Buffer source_buffer - Buffer dest_buffer = None + memoryview source_mv + const Py_buffer* source_pb + char* source_ptr + memoryview dest_mv + Py_buffer* dest_pb + char* dest_ptr size_t source_size, dest_size, decompressed_size - - # setup source buffer - source_buffer = Buffer(source, PyBUF_ANY_CONTIGUOUS) - source_ptr = source_buffer.ptr - source_size = source_buffer.nbytes - - try: - - # determine uncompressed size - dest_size = ZSTD_getFrameContentSize(source_ptr, source_size) - if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_UNKNOWN or dest_size == ZSTD_CONTENTSIZE_ERROR: - raise RuntimeError('Zstd decompression error: invalid input data') - - # setup destination buffer - if dest is None: - # allocate memory - dest = PyBytes_FromStringAndSize(NULL, dest_size) - dest_ptr = PyBytes_AS_STRING(dest) - else: - arr = ensure_contiguous_ndarray(dest) - dest_buffer = Buffer(arr, PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE) - dest_ptr = dest_buffer.ptr - if dest_buffer.nbytes < dest_size: - raise ValueError('destination buffer too small; expected at least %s, ' - 'got %s' % (dest_size, dest_buffer.nbytes)) - - # perform decompression - with nogil: - decompressed_size = ZSTD_decompress(dest_ptr, dest_size, source_ptr, source_size) - - finally: - - # release buffers - source_buffer.release() - if dest_buffer is not None: - dest_buffer.release() + size_t nbytes, cbytes, blocksize + + # obtain source memoryview + source_mv = memoryview(source) + source_pb = PyMemoryView_GET_BUFFER(source_mv) + if not PyBuffer_IsContiguous(source_pb, b'A'): + raise BufferError("`source` must contain contiguous memory") + + # get source pointer + source_ptr = source_pb.buf + source_size = source_pb.len + + # determine uncompressed size + dest_size = ZSTD_getFrameContentSize(source_ptr, source_size) + if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_UNKNOWN or dest_size == ZSTD_CONTENTSIZE_ERROR: + raise RuntimeError('Zstd decompression error: invalid input data') + + # setup destination buffer + if dest is None: + # allocate memory + dest_1d = dest = PyBytes_FromStringAndSize(NULL, dest_size) + else: + dest_1d = ensure_contiguous_ndarray(dest) + + # obtain dest memoryview + dest_mv = memoryview(dest_1d) + dest_pb = PyMemoryView_GET_BUFFER(dest_mv) + dest_ptr = dest_pb.buf + dest_nbytes = dest_pb.len + + # validate output buffer + if dest_nbytes < dest_size: + raise ValueError('destination buffer too small; expected at least %s, ' + 'got %s' % (dest_size, dest_nbytes)) + + # perform decompression + with nogil: + decompressed_size = ZSTD_decompress(dest_ptr, dest_size, source_ptr, source_size) # check decompression was successful if ZSTD_isError(decompressed_size): diff --git a/setup.py b/setup.py index b6db0797..0f06795e 100644 --- a/setup.py +++ b/setup.py @@ -274,25 +274,6 @@ def jenkins_extension(): return extensions -def compat_extension(): - info('setting up compat extension') - - extra_compile_args = base_compile_args.copy() - - sources = ['numcodecs/compat_ext.pyx'] - - # define extension module - extensions = [ - Extension( - 'numcodecs.compat_ext', - sources=sources, - extra_compile_args=extra_compile_args, - ), - ] - - return extensions - - def shuffle_extension(): info('setting up shuffle extension') @@ -361,7 +342,6 @@ def run_setup(with_extensions): blosc_extension() + zstd_extension() + lz4_extension() - + compat_extension() + shuffle_extension() + vlen_extension() + fletcher_extension()