Skip to content

Commit

Permalink
Add canonical compressed data from FITS site
Browse files Browse the repository at this point in the history
  • Loading branch information
Cadair authored and astrofrog committed Jan 19, 2023
1 parent 643b814 commit 864eac5
Show file tree
Hide file tree
Showing 8 changed files with 2,438 additions and 37 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
*.fits -text
astropy/io/fits/tiled_compression/tests/data/*fits binary
Binary file not shown.
366 changes: 366 additions & 0 deletions astropy/io/fits/tiled_compression/tests/data/m13_gzip.fits

Large diffs are not rendered by default.

191 changes: 191 additions & 0 deletions astropy/io/fits/tiled_compression/tests/data/m13_hcomp.fits

Large diffs are not rendered by default.

1,484 changes: 1,484 additions & 0 deletions astropy/io/fits/tiled_compression/tests/data/m13_plio.fits

Large diffs are not rendered by default.

323 changes: 323 additions & 0 deletions astropy/io/fits/tiled_compression/tests/data/m13_rice.fits

Large diffs are not rendered by default.

67 changes: 46 additions & 21 deletions astropy/io/fits/tiled_compression/tests/test_tiled_compression.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pathlib import Path

import numpy as np
import pytest
from numpy.testing import assert_equal
Expand All @@ -9,6 +11,10 @@
decompress_hdu,
decompress_tile,
)
from astropy.io.fits.tiled_compression.tiled_compression import (
_buffer_to_array,
_header_to_settings,
)

COMPRESSION_TYPES = [
"GZIP_1",
Expand All @@ -31,9 +37,6 @@
@pytest.mark.parametrize(("compression_type", "dtype"), parameters)
def test_basic(tmp_path, compression_type, dtype):

# In future can pass in settings as part of the parameterization
settings = {}

# Generate compressed file dynamically

original_data = np.arange(144).reshape((12, 12)).astype(dtype)
Expand All @@ -49,24 +52,7 @@ def test_basic(tmp_path, compression_type, dtype):
# Load in raw compressed data
hdulist = fits.open(tmp_path / "test.fits", disable_image_compression=True)

tile_shape = (hdulist[1].header["ZTILE2"], hdulist[1].header["ZTILE1"])

if compression_type == "GZIP_2":
settings["itemsize"] = original_data.dtype.itemsize
elif compression_type == "PLIO_1":
settings["tilesize"] = np.product(tile_shape)
elif compression_type == "RICE_1":
settings["blocksize"] = hdulist[1].header["ZVAL1"]
settings["bytepix"] = hdulist[1].header["ZVAL2"]
settings["tilesize"] = np.product(tile_shape)
elif compression_type == "HCOMPRESS_1":
# TODO: generalize bytepix, we need to pick 4 or 8 and then cast down
# later to smaller ints if needed.
settings["bytepix"] = 4
settings["scale"] = hdulist[1].header["ZVAL1"]
settings["smooth"] = hdulist[1].header["ZVAL2"]
settings["nx"] = hdulist[1].header["ZTILE2"]
settings["ny"] = hdulist[1].header["ZTILE1"]
settings = settings_from_hdu(hdulist[1], original_data)

# Test decompression of the first tile

Expand Down Expand Up @@ -214,3 +200,42 @@ def test_compress_hdu(tmp_path, compression_type, dtype):
heap_length_c, heap_data_c = compress_hdu_c(hdu)

_assert_heap_same(heap_data, heap_data_c, 9)


@pytest.fixture
def canonical_data_base_path():
return Path(__file__).parent / "data"


@pytest.fixture(
params=(Path(__file__).parent / "data").glob("m13_*.fits"), ids=lambda x: x.name
)
def canonical_int_hdus(request):
"""
This fixture provides 4 files downloaded from https://fits.gsfc.nasa.gov/registry/tilecompression.html
Which are used as canonical tests of data not compressed by Astropy.
"""
with fits.open(request.param, disable_image_compression=True) as hdul:
yield hdul[1]


@pytest.fixture
def original_int_hdu(canonical_data_base_path):
with fits.open(canonical_data_base_path / "m13.fits") as hdul:
yield hdul[0]


def test_canonical_data(original_int_hdu, canonical_int_hdus):
hdr = canonical_int_hdus.header
tile_size = (hdr["ZTILE2"], hdr["ZTILE1"])
compression_type = hdr["ZCMPTYPE"]
original_tile_1 = original_int_hdu.data[: tile_size[0], : tile_size[1]]
compressed_tile_bytes = canonical_int_hdus.data["COMPRESSED_DATA"][0].tobytes()

settings = _header_to_settings(canonical_int_hdus.header)
tile_data_buffer = decompress_tile(
compressed_tile_bytes, algorithm=compression_type, **settings
)
tile_data = _buffer_to_array(tile_data_buffer, hdr)
np.testing.assert_allclose(original_tile_1, tile_data)
43 changes: 27 additions & 16 deletions astropy/io/fits/tiled_compression/tiled_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,8 +449,8 @@ def _header_to_settings(header):
elif header["ZCMPTYPE"] == "PLIO_1":
settings["tilesize"] = np.product(tile_shape)
elif header["ZCMPTYPE"] == "RICE_1":
settings["blocksize"] = header["ZVAL1"]
settings["bytepix"] = header["ZVAL2"]
settings["blocksize"] = header.get("ZVAL1", 32)
settings["bytepix"] = header.get("ZVAL2", 4)
settings["tilesize"] = np.product(tile_shape)
elif header["ZCMPTYPE"] == "HCOMPRESS_1":
settings["bytepix"] = 4
Expand All @@ -462,6 +462,30 @@ def _header_to_settings(header):
return settings


def _buffer_to_array(tile_buffer, header):
"""
Convert a buffer to an array using the header.
This is a helper function which takes a raw buffer (as output by .decode)
and using the FITS header translates it into a numpy array with the correct
dtype, endianess and shape.
"""
tile_shape = (header["ZTILE2"], header["ZTILE1"])

if header["ZCMPTYPE"].startswith("GZIP") and header["ZBITPIX"] > 8:
# TOOD: support float types
int_size = header["ZBITPIX"] // 8
tile_data = np.asarray(tile_buffer).view(f">i{int_size}").reshape(tile_shape)
else:
if tile_buffer.format == "b":
# NOTE: this feels like a Numpy bug - need to investigate
tile_data = np.asarray(tile_buffer, dtype=np.uint8).reshape(tile_shape)
else:
tile_data = np.asarray(tile_buffer).reshape(tile_shape)

return tile_data


def decompress_hdu(hdu):
"""
Drop-in replacement for decompress_hdu from compressionmodule.c
Expand All @@ -480,20 +504,7 @@ def decompress_hdu(hdu):
tile_buffer = decompress_tile(
cdata, algorithm=hdu._header["ZCMPTYPE"], **settings
)

if hdu._header["ZCMPTYPE"].startswith("GZIP") and hdu._header["ZBITPIX"] > 8:
# TOOD: support float types
int_size = hdu._header["ZBITPIX"] // 8
tile_data = (
np.asarray(tile_buffer).view(f">i{int_size}").reshape(tile_shape)
)
else:
if tile_buffer.format == "b":
# NOTE: this feels like a Numpy bug - need to investigate
tile_data = np.asarray(tile_buffer, dtype=np.uint8).reshape(tile_shape)
else:
tile_data = np.asarray(tile_buffer).reshape(tile_shape)

tile_data = _buffer_to_array(tile_buffer, hdu._header)
data[
istart : istart + tile_shape[0], jstart : jstart + tile_shape[1]
] = tile_data
Expand Down

0 comments on commit 864eac5

Please sign in to comment.