Skip to content

Commit 961980d

Browse files
authored
2138 hi l2 combine calibration products (IMAP-Science-Operations-Center#2227)
* Add code to correctly combine fluxes and uncertainties across calibration products * Add logging messages Temporary solution for uncertainty Fix hi_l2 tests * Add comments fix uncertainty fix l2 test * Fix error in improved_stat_unc. Square was already being calculated. * rename unc_sq to variance * Add test coverage for combining hi calibration products * Fix doc build: xarray.DataSet -> xarray.Dataset * Make sure comments consistently distinguish between uncertainty and variance Fix bug in improved variance with single calibration product
1 parent 4872dcd commit 961980d

File tree

2 files changed

+536
-32
lines changed

2 files changed

+536
-32
lines changed

imap_processing/hi/hi_l2.py

Lines changed: 163 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,8 @@ def generate_hi_map(
141141
output_map.data_1d[var] /= output_map.data_1d["exposure_factor"]
142142

143143
output_map.data_1d.update(calculate_ena_signal_rates(output_map.data_1d))
144-
output_map.data_1d.update(
145-
calculate_ena_intensity(
146-
output_map.data_1d, geometric_factors_path, esa_energies_path
147-
)
144+
output_map.data_1d = calculate_ena_intensity(
145+
output_map.data_1d, geometric_factors_path, esa_energies_path
148146
)
149147

150148
output_map.data_1d["obs_date"].data = output_map.data_1d["obs_date"].data.astype(
@@ -224,7 +222,7 @@ def calculate_ena_intensity(
224222
map_ds: xr.Dataset,
225223
geometric_factors_path: str | Path,
226224
esa_energies_path: str | Path,
227-
) -> dict[str, xr.DataArray]:
225+
) -> xr.Dataset:
228226
"""
229227
Calculate the ena intensities.
230228
@@ -239,8 +237,9 @@ def calculate_ena_intensity(
239237
240238
Returns
241239
-------
242-
intensity_vars : dict[str, xarray.DataArray]
243-
ENA Intensity with statistical and systematic uncertainties.
240+
map_ds : xarray.Dataset
241+
Map dataset with new variables: ena_intensity, ena_intensity_stat_unc,
242+
ena_intensity_sys_err.
244243
"""
245244
# read calibration product configuration file
246245
cal_prod_df = CalibrationProductConfig.from_csv(geometric_factors_path)
@@ -255,29 +254,167 @@ def calculate_ena_intensity(
255254

256255
# Convert ENA Signal Rate to Flux
257256
flux_conversion_divisor = geometric_factor * esa_energy
258-
intensity_vars = {
259-
"ena_intensity": map_ds["ena_signal_rates"] / flux_conversion_divisor,
260-
"ena_intensity_stat_unc": map_ds["ena_signal_rate_stat_unc"]
261-
/ flux_conversion_divisor,
262-
"ena_intensity_sys_err": map_ds["bg_rates_unc"] / flux_conversion_divisor,
263-
}
264-
265-
# TODO: Correctly implement combining of calibration products. For now, just sum
266-
# Hi groups direct events into distinct calibration products based on coincidence
267-
# type. (See L1B processing and Hi Algorithm Document section 6.1.2) When adding
268-
# together different calibration products, a different weighting must be used
269-
# than exposure time. (See Hi Algorithm Document Section 3.1.2)
270-
intensity_vars["ena_intensity"] = intensity_vars["ena_intensity"].sum(
271-
dim="calibration_prod"
257+
map_ds["ena_intensity"] = map_ds["ena_signal_rates"] / flux_conversion_divisor
258+
map_ds["ena_intensity_stat_unc"] = (
259+
map_ds["ena_signal_rate_stat_unc"] / flux_conversion_divisor
260+
)
261+
map_ds["ena_intensity_sys_err"] = map_ds["bg_rates_unc"] / flux_conversion_divisor
262+
263+
# Combine calibration products using proper weighted averaging
264+
# as described in Hi Algorithm Document Section 3.1.2
265+
map_ds = combine_calibration_products(
266+
map_ds,
267+
geometric_factor,
268+
esa_energy,
269+
)
270+
271+
return map_ds
272+
273+
274+
def combine_calibration_products(
275+
map_ds: xr.Dataset,
276+
geometric_factors: xr.DataArray,
277+
esa_energies: xr.DataArray,
278+
) -> xr.Dataset:
279+
"""
280+
Combine calibration products using weighted averaging.
281+
282+
Implements the algorithm described in Hi Algorithm Document Section 3.1.2
283+
for properly combining data from multiple calibration products.
284+
285+
Parameters
286+
----------
287+
map_ds : xarray.Dataset
288+
Map dataset that has preliminary intensity variables computed for each
289+
calibration product.
290+
geometric_factors : xarray.DataArray
291+
Geometric factors for each calibration product and energy step.
292+
esa_energies : xarray.DataArray
293+
Central energies for each energy step.
294+
295+
Returns
296+
-------
297+
map_ds : xarray.Dataset
298+
Map dataset with updated variables: ena_intensity, ena_intensity_stat_unc,
299+
ena_intensity_sys_err now combined across calibration products at each
300+
energy level.
301+
"""
302+
ena_flux = map_ds["ena_intensity"]
303+
sys_err = map_ds["ena_intensity_sys_err"]
304+
305+
# Calculate improved statistical variance estimates using geometric factor
306+
# ratios to reduce bias from Poisson uncertainty estimation
307+
improved_stat_variance = _calculate_improved_stat_variance(
308+
map_ds, geometric_factors, esa_energies
272309
)
273-
intensity_vars["ena_intensity_stat_unc"] = np.sqrt(
274-
(intensity_vars["ena_intensity_stat_unc"] ** 2).sum(dim="calibration_prod")
310+
311+
# Calculate total variance
312+
# Note that sys_err contains uncertainty, so it must be squared to get
313+
# the systematic variance needed in this equation.
314+
total_variance = improved_stat_variance + sys_err**2
315+
316+
# Perform inverse-variance weighted averaging
317+
# Handle divide by zero and invalid values
318+
with np.errstate(divide="ignore", invalid="ignore"):
319+
# Calculate weights for statistical variance combination using only
320+
# statistical variance
321+
stat_weights = 1.0 / improved_stat_variance
322+
323+
# Combined statistical uncertainty from inverse-variance formula
324+
combined_stat_unc = np.sqrt(1.0 / stat_weights.sum(dim="calibration_prod"))
325+
326+
# Use total variance weights for flux combination
327+
flux_weights = 1.0 / total_variance
328+
weighted_flux_sum = (ena_flux * flux_weights).sum(dim="calibration_prod")
329+
combined_flux = weighted_flux_sum / flux_weights.sum(dim="calibration_prod")
330+
331+
map_ds["ena_intensity"] = combined_flux
332+
map_ds["ena_intensity_stat_unc"] = combined_stat_unc
333+
# For systematic error, just do quadrature sum over the systematic error for
334+
# each calibration product.
335+
map_ds["ena_intensity_sys_err"] = np.sqrt((sys_err**2).sum(dim="calibration_prod"))
336+
337+
return map_ds
338+
339+
340+
def _calculate_improved_stat_variance(
341+
map_ds: xr.Dataset,
342+
geometric_factors: xr.DataArray,
343+
esa_energies: xr.DataArray,
344+
) -> xr.DataArray:
345+
"""
346+
Calculate improved statistical variances using geometric factor ratios.
347+
348+
This implements the algorithm from Hi Algorithm Document Section 3.1.2:
349+
For calibration product X, replace N_X in the uncertainty calculation with
350+
an improved estimate using geometric factor ratios from all calibration products.
351+
352+
The key insight is that we can vectorize this by first computing a geometric
353+
factor normalized signal rate, then scaling it back for each calibration product.
354+
355+
Parameters
356+
----------
357+
map_ds : xarray.Dataset
358+
Map dataset.
359+
geometric_factors : xr.DataArray
360+
Geometric factors for each calibration product.
361+
esa_energies : xarray.DataArray
362+
Central energies for each energy step.
363+
364+
Returns
365+
-------
366+
improved_variance : xr.DataArray
367+
Improved statistical variance estimates.
368+
"""
369+
n_calib_prods = map_ds["ena_intensity"].sizes.get("calibration_prod", 1)
370+
371+
if n_calib_prods <= 1:
372+
# No improvement possible with single calibration product
373+
return map_ds["ena_intensity_stat_unc"] ** 2
374+
375+
logger.debug("Computing geometric factor normalized signal rates")
376+
377+
# signal_rates = counts / exposure_factor - bg_rates
378+
# signal_rates shape is: (n_epoch, n_energy, n_cal_prod, n_spatial_pixels)
379+
signal_rates = map_ds["ena_signal_rates"]
380+
381+
# Compute geometric factor normalized signal rate (vectorized approach)
382+
# This represents the weighted average signal rate per unit geometric factor
383+
# geometric_factor_norm_signal_rates shape is: (n_epoch, n_energy, n_spatial_pixels)
384+
geometric_factor_norm_signal_rates = signal_rates.sum(
385+
dim="calibration_prod"
386+
) / geometric_factors.sum(dim="calibration_prod")
387+
388+
# For each calibration product, the averaged signal rate estimate is:
389+
# averaged_signal_rate_i = geometric_factor_norm_signal_rates * geometric_factor_i
390+
# averaged_signal_rates shape is: (n_epoch, n_energy, n_cal_prod, n_spatial_pixels)
391+
averaged_signal_rates = geometric_factor_norm_signal_rates * geometric_factors
392+
393+
logger.debug("Including background rates in uncertainty calculation")
394+
# Convert averaged signal rates back to flux uncertainties
395+
# Total count rates for Poisson uncertainty calculation
396+
total_count_rates_for_uncertainty = map_ds["bg_rates"] + averaged_signal_rates
397+
398+
# Ensure non-negative values for sqrt and minimum of 1 for uncertainty calculation
399+
total_count_rates_for_uncertainty = xr.where(
400+
total_count_rates_for_uncertainty < 1, 1, total_count_rates_for_uncertainty
275401
)
276-
intensity_vars["ena_intensity_sys_err"] = np.sqrt(
277-
(intensity_vars["ena_intensity_sys_err"] ** 2).sum(dim="calibration_prod")
402+
403+
logger.debug("Computing improved flux uncertainties")
404+
# Statistical variance:
405+
with np.errstate(divide="ignore", invalid="ignore"):
406+
improved_variance = total_count_rates_for_uncertainty / (
407+
map_ds["exposure_factor"] * (geometric_factors * esa_energies)
408+
)
409+
410+
# Handle invalid cases by falling back to original uncertainties
411+
improved_variance = xr.where(
412+
~np.isfinite(improved_variance) | (geometric_factors == 0),
413+
map_ds["ena_intensity_stat_unc"],
414+
improved_variance,
278415
)
279416

280-
return intensity_vars
417+
return improved_variance
281418

282419

283420
def esa_energy_df(

0 commit comments

Comments
 (0)