-
Notifications
You must be signed in to change notification settings - Fork 33
2935 hi l1c backgrounds utilities #2936
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
subagonsouth
merged 9 commits into
IMAP-Science-Operations-Center:dev
from
subagonsouth:2935-hi-l1c-backgrounds---utilities
Apr 9, 2026
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
57627b7
Add background config to hi utils
subagonsouth 1325ea5
Add fixture to conftest
subagonsouth 0802773
Remove unused accessor functions
subagonsouth 2991c39
Add background config test file
subagonsouth e565f67
Copilot feedback changes
subagonsouth becbf1b
Fix documentation build
subagonsouth 49db01a
Fix typo in csv comment section
subagonsouth 4b42c01
Add proper unit tests for utils iter functions
subagonsouth 77fbaf3
Clean up Config definitions
subagonsouth File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,7 +7,7 @@ | |
| from dataclasses import dataclass | ||
| from enum import IntEnum | ||
| from pathlib import Path | ||
| from typing import Any | ||
| from typing import IO, Any | ||
|
|
||
| import numpy as np | ||
| import pandas as pd | ||
|
|
@@ -313,7 +313,12 @@ class EsaEnergyStepLookupTable: | |
|
|
||
| def __init__(self) -> None: | ||
| self.df = pd.DataFrame( | ||
| columns=["start_met", "end_met", "esa_step", "esa_energy_step"] | ||
| { | ||
| "start_met": pd.Series(dtype="float64"), | ||
| "end_met": pd.Series(dtype="float64"), | ||
| "esa_step": pd.Series(dtype="int64"), | ||
| "esa_energy_step": pd.Series(dtype="int64"), | ||
| } | ||
| ) | ||
| self._indexed = False | ||
|
|
||
|
|
@@ -451,30 +456,23 @@ def query( | |
| return results.astype(self._esa_energy_step_dtype) | ||
|
|
||
|
|
||
| @pd.api.extensions.register_dataframe_accessor("cal_prod_config") | ||
| class CalibrationProductConfig: | ||
| class _BaseConfigAccessor: | ||
| """ | ||
| Register custom accessor for calibration product configuration DataFrames. | ||
| Base class for configuration DataFrame accessors. | ||
|
|
||
| Provides common functionality for validating and processing configuration | ||
| DataFrames with coincidence types and TOF windows. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| pandas_obj : pandas.DataFrame | ||
| Object to run validation and use accessor functions on. | ||
| """ | ||
|
|
||
| index_columns = ( | ||
| "calibration_prod", | ||
| "esa_energy_step", | ||
| ) | ||
| # Subclasses must define these | ||
| index_columns: tuple[str, ...] | ||
| required_columns: tuple[str, ...] | ||
| tof_detector_pairs = ("ab", "ac1", "bc1", "c1c2") | ||
| required_columns = ( | ||
| "coincidence_type_list", | ||
| *[ | ||
| f"tof_{det_pair}_{limit}" | ||
| for det_pair in tof_detector_pairs | ||
| for limit in ["low", "high"] | ||
| ], | ||
| ) | ||
|
|
||
| def __init__(self, pandas_obj: pd.DataFrame) -> None: | ||
| self._validate(pandas_obj) | ||
|
|
@@ -495,16 +493,14 @@ def _validate(self, df: pd.DataFrame) -> None: | |
| AttributeError : If the dataframe does not pass validation. | ||
| """ | ||
| for index_name in self.index_columns: | ||
| if index_name in df.index: | ||
| if index_name not in df.index.names: | ||
| raise AttributeError( | ||
| f"Required index {index_name} not present in dataframe." | ||
| ) | ||
| # Verify that the Dataframe has all the required columns | ||
| for col in self.required_columns: | ||
| if col not in df.columns: | ||
| raise AttributeError(f"Required column {col} not present in dataframe.") | ||
| # TODO: Verify that the same ESA energy steps exist in all unique calibration | ||
| # product numbers | ||
|
|
||
| def _add_coincidence_values_column(self) -> None: | ||
| """Generate and add the coincidence_type_values column to the dataframe.""" | ||
|
|
@@ -518,28 +514,65 @@ def _add_coincidence_values_column(self) -> None: | |
| axis=1, | ||
| ) | ||
|
|
||
| @property | ||
| def calibration_product_numbers(self) -> npt.NDArray[np.int_]: | ||
| """ | ||
| Get the calibration product numbers from the current configuration. | ||
|
|
||
| Returns | ||
| ------- | ||
| cal_prod_numbers : numpy.ndarray | ||
| Array of calibration product numbers from the configuration. | ||
| These are sorted in ascending order and can be arbitrary integers. | ||
| """ | ||
| return ( | ||
| self._obj.index.get_level_values("calibration_prod") | ||
| .unique() | ||
| .sort_values() | ||
| .values | ||
| ) | ||
|
|
||
|
|
||
| @pd.api.extensions.register_dataframe_accessor("cal_prod_config") | ||
| class CalibrationProductConfig(_BaseConfigAccessor): | ||
| """Register custom accessor for calibration product configuration DataFrames.""" | ||
|
|
||
| index_columns = ( | ||
| "calibration_prod", | ||
| "esa_energy_step", | ||
| ) | ||
| required_columns = ( | ||
| "coincidence_type_list", | ||
| *[ | ||
| f"tof_{det_pair}_{limit}" | ||
| for det_pair in _BaseConfigAccessor.tof_detector_pairs | ||
| for limit in ["low", "high"] | ||
| ], | ||
| ) | ||
|
|
||
| @classmethod | ||
| def from_csv(cls, path: str | Path) -> pd.DataFrame: | ||
| def from_csv(cls, path: str | Path | IO[str]) -> pd.DataFrame: | ||
| """ | ||
| Read configuration CSV file into a pandas.DataFrame. | ||
| Read calibration product configuration CSV file into a pandas.DataFrame. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| path : str or pathlib.Path | ||
| Location of the Calibration Product configuration CSV file. | ||
| path : str or pathlib.Path or file-like object | ||
| Location of the calibration product configuration CSV file. | ||
|
|
||
| Returns | ||
| ------- | ||
| dataframe : pandas.DataFrame | ||
| Validated calibration product configuration data frame. | ||
| Validated calibration product configuration DataFrame with | ||
| coincidence_type_values column added. | ||
| """ | ||
| df = pd.read_csv( | ||
| path, | ||
| index_col=cls.index_columns, | ||
| converters={"coincidence_type_list": lambda s: tuple(s.split("|"))}, | ||
| comment="#", | ||
| ) | ||
| # Force the _init_ method to run by using the namespace | ||
| # Trigger the accessor to run validation and add coincidence_type_values | ||
| _ = df.cal_prod_config.number_of_products | ||
| return df | ||
|
|
||
|
|
@@ -556,23 +589,51 @@ def number_of_products(self) -> int: | |
| """ | ||
| return len(self._obj.index.unique(level="calibration_prod")) | ||
|
|
||
| @property | ||
| def calibration_product_numbers(self) -> npt.NDArray[np.int_]: | ||
|
|
||
| @pd.api.extensions.register_dataframe_accessor("background_config") | ||
| class BackgroundConfig(_BaseConfigAccessor): | ||
| """Register custom accessor for background configuration DataFrames.""" | ||
|
|
||
| index_columns = ( | ||
| "calibration_prod", | ||
| "background_index", | ||
| ) | ||
| required_columns = ( | ||
| "coincidence_type_list", | ||
| *[ | ||
| f"tof_{det_pair}_{limit}" | ||
| for det_pair in _BaseConfigAccessor.tof_detector_pairs | ||
| for limit in ["low", "high"] | ||
| ], | ||
| "scaling_factor", | ||
| "uncertainty", | ||
| ) | ||
|
|
||
| @classmethod | ||
| def from_csv(cls, path: str | Path | IO[str]) -> pd.DataFrame: | ||
| """ | ||
| Get the calibration product numbers from the current configuration. | ||
| Read background configuration CSV file into a pandas.DataFrame. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| path : str or pathlib.Path or file-like object | ||
| Location of the background configuration CSV file. | ||
|
|
||
|
Comment on lines
+612
to
621
|
||
| Returns | ||
| ------- | ||
| cal_prod_numbers : numpy.ndarray | ||
| Array of calibration product numbers from the configuration. | ||
| These are sorted in ascending order and can be arbitrary integers. | ||
| dataframe : pandas.DataFrame | ||
| Validated background configuration DataFrame with | ||
| coincidence_type_values column added. | ||
| """ | ||
| return ( | ||
| self._obj.index.get_level_values("calibration_prod") | ||
| .unique() | ||
| .sort_values() | ||
| .values | ||
| df = pd.read_csv( | ||
| path, | ||
| index_col=cls.index_columns, | ||
| converters={"coincidence_type_list": lambda s: tuple(s.split("|"))}, | ||
| comment="#", | ||
| ) | ||
| # Trigger the accessor to run validation and add coincidence_type_values | ||
| _ = df.background_config.calibration_product_numbers | ||
| return df | ||
|
|
||
subagonsouth marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| def get_tof_window_mask( | ||
|
|
@@ -753,24 +814,104 @@ def iter_qualified_events_by_config( | |
| yield esa_energy, config_row, np.zeros(n_events, dtype=bool) | ||
| continue | ||
|
|
||
| # Check coincidence type | ||
| coin_mask = filter_events_by_coincidence( | ||
| de_ds, config_row.coincidence_type_values | ||
| ) | ||
| # Apply common filtering logic | ||
| filter_mask = _filter_events_by_config_row(de_ds, config_row, tof_fill_vals) | ||
|
|
||
| yield esa_energy, config_row, esa_mask & filter_mask | ||
|
|
||
| # Build TOF windows dict from config row | ||
| tof_windows = { | ||
| f"tof_{pair}": ( | ||
| getattr(config_row, f"tof_{pair}_low"), | ||
| getattr(config_row, f"tof_{pair}_high"), | ||
| ) | ||
| for pair in CalibrationProductConfig.tof_detector_pairs | ||
| } | ||
|
|
||
| # Check TOF windows | ||
| tof_mask = get_tof_window_mask(de_ds, tof_windows, tof_fill_vals) | ||
| def _filter_events_by_config_row( | ||
| de_ds: xr.Dataset, | ||
| config_row: Any, | ||
| tof_fill_vals: dict[str, float], | ||
| ) -> NDArray[np.bool_]: | ||
| """ | ||
| Filter events by coincidence type and TOF windows for a single config row. | ||
|
|
||
| yield esa_energy, config_row, esa_mask & coin_mask & tof_mask | ||
| Helper function to apply common filtering logic used by both | ||
| iter_qualified_events_by_config and iter_background_events_by_config. | ||
subagonsouth marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| Parameters | ||
| ---------- | ||
| de_ds : xarray.Dataset | ||
| Direct Event dataset with coincidence_type and TOF variables. | ||
| config_row : namedtuple | ||
| Config row from DataFrame.itertuples() containing: | ||
| - coincidence_type_values: tuple of int coincidence types | ||
| - tof_<pair>_low, tof_<pair>_high: TOF window bounds | ||
| tof_fill_vals : dict[str, float] | ||
| Dictionary mapping TOF variable names to their fill values. | ||
|
|
||
| Returns | ||
| ------- | ||
| filter_mask : numpy.ndarray | ||
| Boolean mask where True = event matches the filter criteria. | ||
| """ | ||
| # Check coincidence type | ||
| coin_mask = filter_events_by_coincidence(de_ds, config_row.coincidence_type_values) | ||
|
|
||
| # Build TOF windows dict from config row | ||
| tof_windows = { | ||
| f"tof_{pair}": ( | ||
| getattr(config_row, f"tof_{pair}_low"), | ||
| getattr(config_row, f"tof_{pair}_high"), | ||
| ) | ||
| for pair in CalibrationProductConfig.tof_detector_pairs | ||
| } | ||
|
|
||
| # Check TOF windows | ||
| tof_mask = get_tof_window_mask(de_ds, tof_windows, tof_fill_vals) | ||
|
|
||
| return coin_mask & tof_mask | ||
|
|
||
|
|
||
| def iter_background_events_by_config( | ||
| de_ds: xr.Dataset, | ||
| background_config: pd.DataFrame, | ||
| ) -> Generator[tuple[Any, xr.Dataset], None, None]: | ||
| """ | ||
| Iterate over background config, yielding filtered event datasets. | ||
|
|
||
| For each (calibration_prod, background_index) combination in the config, | ||
| yields the filtered dataset containing only events that match BOTH | ||
| coincidence_type AND TOF window checks. | ||
|
|
||
| Unlike iter_qualified_events_by_config, this does NOT filter by ESA energy | ||
| step, as background counts are accumulated across all ESA steps. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| de_ds : xarray.Dataset | ||
| Direct Event dataset with coincidence_type and TOF variables. | ||
| TOF variables must have FILLVAL attribute for fill value handling. | ||
| background_config : pandas.DataFrame | ||
| Config DataFrame with multi-index (calibration_prod, background_index). | ||
| Must have coincidence_type_values column and TOF window columns. | ||
|
|
||
| Yields | ||
| ------ | ||
| config_row : namedtuple | ||
| The config row from itertuples() containing background settings. | ||
| filtered_ds : xarray.Dataset | ||
| Filtered dataset containing only events matching the criteria. | ||
| """ | ||
| n_events = len(de_ds["event_met"]) | ||
|
|
||
| # Build TOF fill values from dataset attributes | ||
| tof_fill_vals = _build_tof_fill_vals(de_ds) | ||
subagonsouth marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| for config_row in background_config.itertuples(): | ||
| if n_events == 0: | ||
| # Return empty dataset | ||
| yield config_row, de_ds.isel(event_met=slice(0, 0)) | ||
| continue | ||
|
|
||
| # Apply common filtering logic | ||
| filter_mask = _filter_events_by_config_row(de_ds, config_row, tof_fill_vals) | ||
|
|
||
| # Return filtered dataset (no ESA energy filtering) | ||
| filtered_ds = de_ds.isel(event_met=filter_mask) | ||
| yield config_row, filtered_ds | ||
|
|
||
|
|
||
| def compute_qualified_event_mask( | ||
|
|
@@ -801,7 +942,7 @@ def compute_qualified_event_mask( | |
| qualified_mask : np.ndarray | ||
| Boolean mask - True if event qualifies for at least one cal product. | ||
| """ | ||
| n_events = len(de_ds["event_met"]) if "event_met" in de_ds.dims else 0 | ||
| n_events = len(de_ds["event_met"]) | ||
subagonsouth marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if n_events == 0: | ||
| return np.array([], dtype=bool) | ||
|
|
||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 19 additions & 0 deletions
19
imap_processing/tests/hi/data/l1/imap_hi_90sensor-backgrounds_20240101_v001.csv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| # THIS IS A TEST FILE AND SHOULD NOT BE USED IN PRODUCTION PROCESSING. | ||
| # | ||
| # Backgrounds Determination Configuration File | ||
| # Valid start date: 2024-01-01 (from filename) | ||
| # This file will be used in processing until a new calibration file is uploaded | ||
| # to the SDC with either a higher version number or new date in the filename. | ||
| # For details on how the SDC selects ancillary files for processing, see: | ||
| # https://imap-processing.readthedocs.io/en/latest/data-access/calibration-files.html | ||
| # | ||
| # When creating PSET products, the following table will be used to determine per | ||
| # calibration product backgrounds. The backgrounds for each calibration product are | ||
| # computed individually and the sum is subtracted from the calibration product. Background | ||
| # uncertainties are summed in quadrature and reported in background uncertainties. | ||
| # | ||
| calibration_prod,background_index,coincidence_type_list,tof_ab_low,tof_ab_high,tof_ac1_low,tof_ac1_high,tof_bc1_low,tof_bc1_high,tof_c1c2_low,tof_c1c2_high,scaling_factor,uncertainty | ||
| 0,0,ABC1C2|ABC1,-20,16,-46,-15,-511,511,0,1023,0.00306,0.00030 | ||
| 0,1,AB,-20,16,-46,-15,-511,511,0,1023,0.0189,0.0020 | ||
| 1,0,BC1C2|AC1,-20,16,-46,-15,-511,511,0,1023,0.00306,0.00030 | ||
| 1,1,AB,-20,16,-46,-15,-511,511,0,1023,0.0189,0.0020 |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Type hint for
from_csvispath: str | Path, but callers/tests pass file-like objects (e.g.,io.StringIO) whichpd.read_csvsupports. Consider widening the annotation (and docstring wording) to include file-like inputs (e.g.,str | Path | IO[str]) to reflect the actual supported API.