ClimateBenchPress · treigerm · Apr 24, 2025 · Apr 17, 2025 · Apr 22, 2025 · Apr 22, 2025
diff --git a/src/climatebenchpress/compressor/compressors/abc.py b/src/climatebenchpress/compressor/compressors/abc.py
@@ -57,12 +57,24 @@ class Compressor(ABC):
 
     @staticmethod
     @abstractmethod
-    def abs_bound_codec(dtype: np.dtype, error_bound: float) -> Codec:
+    def abs_bound_codec(
+        error_bound: float,
+        *,
+        dtype: Optional[np.dtype] = None,
+        data_abs_min: Optional[float] = None,
+        data_abs_max: Optional[float] = None,
+    ) -> Codec:
         pass
 
     @staticmethod
     @abstractmethod
-    def rel_bound_codec(dtype: np.dtype, error_bound: float) -> Codec:
+    def rel_bound_codec(
+        error_bound: float,
+        *,
+        dtype: Optional[np.dtype] = None,
+        data_abs_min: Optional[float] = None,
+        data_abs_max: Optional[float] = None,
+    ) -> Codec:
         pass
 
     @classmethod
@@ -116,9 +128,19 @@ def build(
             new_codecs: dict[VariableName, Codec] = dict()
             for var, eb in eb_per_var.items():
                 if eb.abs_error is not None and cls.has_abs_error_impl:
-                    new_codecs[var] = cls.abs_bound_codec(dtypes[var], eb.abs_error)
+                    new_codecs[var] = cls.abs_bound_codec(
+                        eb.abs_error,
+                        dtype=dtypes[var],
+                        data_abs_min=data_abs_min[var],
+                        data_abs_max=data_abs_max[var],
+                    )
                 elif eb.rel_error is not None and cls.has_rel_error_impl:
-                    new_codecs[var] = cls.rel_bound_codec(dtypes[var], eb.rel_error)
+                    new_codecs[var] = cls.rel_bound_codec(
+                        eb.rel_error,
+                        dtype=dtypes[var],
+                        data_abs_min=data_abs_min[var],
+                        data_abs_max=data_abs_max[var],
+                    )
                 else:
                     # This should never happen as we have already transformed the error bounds.
                     # If this happens, it means there is a bug in the implementation.

diff --git a/src/climatebenchpress/compressor/compressors/bitround.py b/src/climatebenchpress/compressor/compressors/bitround.py
@@ -13,7 +13,9 @@ class BitRound(Compressor):
     description = "Bit Rounding"
 
     @staticmethod
-    def rel_bound_codec(dtype, error_bound):
+    def rel_bound_codec(error_bound, *, dtype=None, **kwargs):
+        assert dtype is not None, "dtype must be provided"
+
         keepbits = compute_keepbits(dtype, error_bound)
         return CodecStack(
             numcodecs_wasm_bit_round.BitRound(keepbits=keepbits),

diff --git a/src/climatebenchpress/compressor/compressors/bitround_pco.py b/src/climatebenchpress/compressor/compressors/bitround_pco.py
@@ -14,7 +14,9 @@ class BitRoundPco(Compressor):
     description = "Bit Rounding + PCodec"
 
     @staticmethod
-    def rel_bound_codec(dtype, error_bound):
+    def rel_bound_codec(error_bound, *, dtype=None, **kwargs):
+        assert dtype is not None, "dtype must be provided"
+
         keepbits = compute_keepbits(dtype, error_bound)
         return CodecStack(
             numcodecs_wasm_bit_round.BitRound(keepbits=keepbits),

diff --git a/src/climatebenchpress/compressor/compressors/jpeg2000.py b/src/climatebenchpress/compressor/compressors/jpeg2000.py
@@ -16,29 +16,44 @@ class Jpeg2000(Compressor):
     description = "JPEG 2000"
 
     @staticmethod
-    def abs_bound_codec(dtype, error_bound):
-        # Currently, the input is transformed into the range
-        # round(min_pixel_val/ error_bound) <= x <= round(max_pixel_val / error_bound)
-        # This means any values outside this range will incur a larger error.
-        precision = error_bound
+    def abs_bound_codec(
+        error_bound,
+        *,
+        data_abs_min=None,
+        data_abs_max=None,
+        **kwargs,
+    ):
+        assert data_abs_min is not None, "data_abs_min must be provided"
+        assert data_abs_max is not None, "data_abs_max must be provided"
+
         max_pixel_val = 2**25 - 1  # maximum pixel value for our integer encoding.
 
+        data_range = data_abs_max - data_abs_min
+
         # Here we use the formula for the PSNR (https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio)
         # to convert between the absolute error and the PSNR value.
         # The original PSNR formula uses the root mean square error (RMSE),
         # therefore JPEG does not guaruantee pointwise error bounds but only
         # average error bounds.
-        psnr = 20 * (math.log10(max_pixel_val) - math.log10(error_bound))
+        psnr = 20 * (math.log10(data_range) - math.log10(error_bound))
 
         return CodecStack(
+            # increase precision for better rounding during linear quantization
+            numcodecs.astype.AsType(
+                encode_dtype="float64",
+                decode_dtype="float32",
+            ),
+            # remap from [min, max] to [0, max_pixel_val]
             numcodecs_wasm_fixed_offset_scale.FixedOffsetScale(
-                offset=0,
-                scale=precision,
+                offset=data_abs_min,
+                scale=data_range / max_pixel_val,
             ),
+            # round and truncate to integer values
             numcodecs_wasm_round.Round(precision=1),
             numcodecs.astype.AsType(
                 encode_dtype="int32",
-                decode_dtype="float32",
+                decode_dtype="float64",
             ),
+            # apply the PSNR error bound
             numcodecs_wasm_jpeg2000.Jpeg2000(mode="psnr", psnr=psnr),
         )
diff --git a/src/climatebenchpress/compressor/compressors/stochround.py b/src/climatebenchpress/compressor/compressors/stochround.py
@@ -13,7 +13,7 @@ class StochRound(Compressor):
     description = "Stochastic Rounding"
 
     @staticmethod
-    def abs_bound_codec(dtype, error_bound):
+    def abs_bound_codec(error_bound, **kwargs):
         precision = error_bound
         return CodecStack(
             numcodecs_wasm_uniform_noise.UniformNoise(scale=precision / 2, seed=42),

diff --git a/src/climatebenchpress/compressor/compressors/sz3.py b/src/climatebenchpress/compressor/compressors/sz3.py
@@ -10,11 +10,11 @@ class Sz3(Compressor):
     description = "SZ3"
 
     @staticmethod
-    def abs_bound_codec(dtype, error_bound):
+    def abs_bound_codec(error_bound, **kwargs):
         return numcodecs_wasm_sz3.Sz3(eb_mode="abs", eb_abs=error_bound)
 
     @staticmethod
-    def rel_bound_codec(dtype, error_bound):
+    def rel_bound_codec(error_bound, **kwargs):
         # SZ3 will not ensure that the relative error bound is strictly met.
         # Internally, SZ3 transforms the relative error bound to an absolute error bound
         # based on the range of the input data:

diff --git a/src/climatebenchpress/compressor/compressors/tthresh.py b/src/climatebenchpress/compressor/compressors/tthresh.py
@@ -10,9 +10,9 @@ class Tthresh(Compressor):
     description = "tthresh"
 
     @staticmethod
-    def abs_bound_codec(dtype, error_bound):
+    def abs_bound_codec(error_bound, **kwargs):
         return numcodecs_wasm_tthresh.Tthresh(eb_mode="rmse", eb_rmse=error_bound)
 
     @staticmethod
-    def rel_bound_codec(dtype, error_bound):
+    def rel_bound_codec(error_bound, **kwargs):
         return numcodecs_wasm_tthresh.Tthresh(eb_mode="eps", eb_rmse=error_bound)
diff --git a/src/climatebenchpress/compressor/compressors/zfp.py b/src/climatebenchpress/compressor/compressors/zfp.py
@@ -18,5 +18,5 @@ class Zfp(Compressor):
     # See https://zfp.readthedocs.io/en/release1.0.1/faq.html#q-relerr for more details.
 
     @staticmethod
-    def abs_bound_codec(dtype, error_bound):
+    def abs_bound_codec(error_bound, **kwargs):
         return numcodecs_wasm_zfp.Zfp(mode="fixed-accuracy", tolerance=error_bound)