From 90dbb684f0568f313c0f3cfa50df1a467e0a55a4 Mon Sep 17 00:00:00 2001
From: Ryan Abernathey <ryan.abernathey@gmail.com>
Date: Mon, 19 Dec 2022 09:28:26 -0500
Subject: [PATCH 1/3] add parametrized tests for netcdf encoding options

---
 kerchunk/tests/test_hdf.py | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index 50f1dba7..2a0639ad 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -286,3 +286,40 @@ def test_compact():
     m = fsspec.get_mapper("reference://", fo=out)
     g = zarr.open(m)
     assert np.allclose(g.ancillary_data.atlas_sdp_gps_epoch[:], 1.19880002e09)
+
+
+@pytest.mark.parametrize("zlib", [True, False], ids=["zlib", "no_zlib"])
+@pytest.mark.parametrize("shuffle", [True, False], ids=["shuffle", "no_shuffle"])
+@pytest.mark.parametrize("fletcher32", [True, False], ids=["fletcher32", "no_fletcher32"])
+def test_encoding_options(zlib, shuffle, fletcher32, tmp_path):
+    fname = tmp_path / "test.nc"
+    
+    shape = (2, 10)
+    chunksizes = (1, 10)
+
+    encoding = {
+        'zlib': zlib,
+        'shuffle': shuffle,
+        'complevel': 2,
+        'fletcher32': fletcher32,
+        'contiguous': False,
+        'chunksizes': chunksizes
+    }
+
+    da = xr.DataArray(
+        data=np.random.rand(*shape),
+        dims=['y', 'x'],
+        name="foo",
+        attrs={"bar": "baz"}
+    )
+    da.encoding = encoding
+    ds = da.to_dataset()
+    ds.to_netcdf(fname, engine="netcdf4", mode="w")
+
+    with fsspec.open(fname) as fp:
+        h5chunks = kerchunk.hdf.SingleHdf5ToZarr(fp, fname, inline_threshold=0, spec=0)
+        refs = h5chunks.translate()
+
+    store = fsspec.get_mapper("reference://", fo=refs)
+    ds2 = xr.open_dataset(store, engine="zarr", chunks={})
+    xr.testing.assert_identical(ds, ds2)

From 5d8ae215f029c39fe8fe4f029871c7465c45a7b7 Mon Sep 17 00:00:00 2001
From: Ryan Abernathey <ryan.abernathey@gmail.com>
Date: Mon, 19 Dec 2022 09:32:17 -0500
Subject: [PATCH 2/3] pre-commit

---
 kerchunk/tests/test_hdf.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index 2a0639ad..1ed0d20c 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -290,27 +290,26 @@ def test_compact():
 
 @pytest.mark.parametrize("zlib", [True, False], ids=["zlib", "no_zlib"])
 @pytest.mark.parametrize("shuffle", [True, False], ids=["shuffle", "no_shuffle"])
-@pytest.mark.parametrize("fletcher32", [True, False], ids=["fletcher32", "no_fletcher32"])
+@pytest.mark.parametrize(
+    "fletcher32", [True, False], ids=["fletcher32", "no_fletcher32"]
+)
 def test_encoding_options(zlib, shuffle, fletcher32, tmp_path):
     fname = tmp_path / "test.nc"
-    
+
     shape = (2, 10)
     chunksizes = (1, 10)
 
     encoding = {
-        'zlib': zlib,
-        'shuffle': shuffle,
-        'complevel': 2,
-        'fletcher32': fletcher32,
-        'contiguous': False,
-        'chunksizes': chunksizes
+        "zlib": zlib,
+        "shuffle": shuffle,
+        "complevel": 2,
+        "fletcher32": fletcher32,
+        "contiguous": False,
+        "chunksizes": chunksizes,
     }
 
     da = xr.DataArray(
-        data=np.random.rand(*shape),
-        dims=['y', 'x'],
-        name="foo",
-        attrs={"bar": "baz"}
+        data=np.random.rand(*shape), dims=["y", "x"], name="foo", attrs={"bar": "baz"}
     )
     da.encoding = encoding
     ds = da.to_dataset()

From 61150102fc22c2566d0573becd600428f6c2a97d Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Mon, 19 Dec 2022 10:10:54 -0500
Subject: [PATCH 3/3] fix fletcher

---
 kerchunk/codecs.py | 13 +++++++++++++
 kerchunk/hdf.py    |  7 +++----
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/kerchunk/codecs.py b/kerchunk/codecs.py
index 28f0fc15..3755b3f5 100644
--- a/kerchunk/codecs.py
+++ b/kerchunk/codecs.py
@@ -66,6 +66,19 @@ def decode(self, buf, out=None):
 numcodecs.register_codec(FillStringsCodec, "fill_hdf_strings")
 
 
+class FletcherDummyFilter(numcodecs.abc.Codec):
+    codec_id = "fletcher_null"
+
+    def decode(self, buff, out=None):
+        return buff[:-4]
+
+    def encode(self, buf):
+        pass
+
+
+numcodecs.register_codec(FletcherDummyFilter, "fletcher_null")
+
+
 class GRIBCodec(numcodecs.abc.Codec):
     """
     Read GRIB stream of bytes as a message using eccodes
diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
index b54bd525..ebe461f1 100644
--- a/kerchunk/hdf.py
+++ b/kerchunk/hdf.py
@@ -7,7 +7,7 @@
 import zarr
 from zarr.meta import encode_fill_value
 import numcodecs
-from .codecs import FillStringsCodec
+from .codecs import FillStringsCodec, FletcherDummyFilter
 from .utils import _encode_for_JSON
 
 try:
@@ -376,6 +376,8 @@ def _translator(self, name: str, h5obj: Union[h5py.Dataset, h5py.Group]):
                         )
 
                 # Create a Zarr array equivalent to this HDF5 dataset...
+                if h5obj.fletcher32:
+                    filters.append(FletcherDummyFilter())
                 za = self._zroot.create_dataset(
                     h5obj.name,
                     shape=h5obj.shape,
@@ -399,9 +401,6 @@ def _translator(self, name: str, h5obj: Union[h5py.Dataset, h5py.Group]):
                 # Store chunk location metadata...
                 if cinfo:
                     for k, v in cinfo.items():
-                        if h5obj.fletcher32:
-                            logging.info("Discarding fletcher32 checksum")
-                            v["size"] -= 4
                         self.store[za._chunk_key(k)] = [
                             self._uri,
                             v["offset"],