diff --git a/imap_processing/ancillary/ancillary_dataset_combiner.py b/imap_processing/ancillary/ancillary_dataset_combiner.py index 8d97edb7e..104255f87 100644 --- a/imap_processing/ancillary/ancillary_dataset_combiner.py +++ b/imap_processing/ancillary/ancillary_dataset_combiner.py @@ -431,17 +431,13 @@ def convert_file_to_dataset(self, filepath: str | Path) -> xr.Dataset: # noqa: lines = [line.strip() for line in f if not line.startswith("#")] identifiers = [line.split(" ", 1)[0] for line in lines] values = [float(line.split(" ", 1)[1]) for line in lines] - ds = xr.Dataset( + return xr.Dataset( { - "cps_per_r": (["start_time_utc"], values), # floats - }, - coords={ - "start_time_utc": np.array(identifiers, dtype="datetime64[s]") - }, # (e.g. '2025-07-01T00:00:00') + "start_time_utc": (["time_block"], identifiers), + "cps_per_r": (["time_block"], values), + } ) - return ds.sortby("start_time_utc") - elif filename.endswith(".json"): # Handle pipeline settings JSON file using the generic read_json method return self.convert_json_to_dataset(filepath) diff --git a/imap_processing/glows/l2/glows_l2_data.py b/imap_processing/glows/l2/glows_l2_data.py index c91b797f9..06dff9f7d 100644 --- a/imap_processing/glows/l2/glows_l2_data.py +++ b/imap_processing/glows/l2/glows_l2_data.py @@ -574,16 +574,39 @@ def get_calibration_factor( epoch_values : np.ndarray Array of epoch values from the L1B dataset, in TT J2000 nanoseconds. calibration_dataset : xr.Dataset - Dataset containing calibration data. + Dataset containing calibration data with the following structure: + Coords: epoch (datetime64[s]) + Dims: epoch, cps_per_r_dim_0, start_time_utc_dim_0 + Data vars: "cps_per_r" and "start_time_utc" are 2D (epoch, *_dim_0) + + Note: epoch and start_time_utc do not necessarily match in size or + values + - epoch contains timestamps in the calibration data up to a defined + day buffer and start_time_utc are the timestamps for all the + calibration data entries. + - epoch is used for selecting the time block, and start_time_utc is + used for selecting the calibration value within that block. Returns ------- float The calibration factor needed to compute flux in Rayleigh units. """ - # Use the midpoint epoch for the day + # Use the midpoint epoch for the observation day mid_idx = len(epoch_values) // 2 mid_epoch_utc = et_to_datetime64(ttj2000ns_to_et(epoch_values[mid_idx].item())) - return calibration_dataset.sel(start_time_utc=mid_epoch_utc, method="pad")[ - "cps_per_r" - ].data.item() + + # Select calibration data before or equal to mid_epoch_utc using "pad" to find + # the nearest preceding entry in the calibration dataset's epoch + # coordinate which is in UTC datetime64 format. + cal_at_epoch = calibration_dataset.sel(epoch=mid_epoch_utc, method="pad") + + # start_time_utc is a data variable with its own index dimension. + # Use searchsorted to find the last entry whose start_time_utc <= mid_epoch_utc. + start_times = np.array( + cal_at_epoch["start_time_utc"].values, dtype="datetime64[ns]" + ) + nearest_idx = np.searchsorted(start_times, mid_epoch_utc, side="right") - 1 + + # Select the calibration value at the nearest index. + return float(cal_at_epoch["cps_per_r"].isel(cps_per_r_dim_0=nearest_idx)) diff --git a/imap_processing/tests/ancillary/test_ancillary_dataset_combiner.py b/imap_processing/tests/ancillary/test_ancillary_dataset_combiner.py index be784f0eb..36b9b1343 100644 --- a/imap_processing/tests/ancillary/test_ancillary_dataset_combiner.py +++ b/imap_processing/tests/ancillary/test_ancillary_dataset_combiner.py @@ -324,7 +324,7 @@ def test_glows_l2_calibration_combiner(tmp_path): combiner = GlowsAncillaryCombiner([], "20251115") dataset = combiner.convert_file_to_dataset(file_path) - assert "start_time_utc" in dataset.coords + assert "start_time_utc" in dataset.data_vars assert ( np.diff(dataset.start_time_utc.values.astype("datetime64")) >= np.timedelta64(0) ).all() diff --git a/imap_processing/tests/glows/conftest.py b/imap_processing/tests/glows/conftest.py index 8dc1118b5..fcf055e00 100644 --- a/imap_processing/tests/glows/conftest.py +++ b/imap_processing/tests/glows/conftest.py @@ -239,11 +239,35 @@ def mock_conversion_table_dict(): @pytest.fixture def mock_calibration_dataset(): """Create a mock CalibrationDataset object for testing.""" + + # Both cps_per_r and start_time_utc are 2D: (epoch, *_dim_0). return xr.Dataset( - {"cps_per_r": xr.DataArray([0.849, 1.020], dims=["start_time_utc"])}, + { + "cps_per_r": xr.DataArray( + [[0.849, 1.020, 1.500], [0.849, 1.020, 1.500]], + dims=["epoch", "cps_per_r_dim_0"], + ), + "start_time_utc": xr.DataArray( + np.array( + [ + [ + "2011-09-19T09:58:04", + "2011-09-20T18:12:48", + "2011-09-21T18:15:50", + ], + [ + "2011-09-19T09:58:04", + "2011-09-20T18:12:48", + "2011-09-21T18:15:50", + ], + ], + ), + dims=["epoch", "start_time_utc_dim_0"], + ), + }, coords={ - "start_time_utc": np.array( - ["2011-09-19T09:58:04", "2011-09-20T18:12:48"], dtype="datetime64[s]" + "epoch": np.array( + ["2011-09-19T00:00:00", "2011-09-20T00:00:00"], dtype="datetime64[s]" ) }, ) diff --git a/imap_processing/tests/glows/test_glows_l2_data.py b/imap_processing/tests/glows/test_glows_l2_data.py index cb7a71ac6..1a9b27656 100644 --- a/imap_processing/tests/glows/test_glows_l2_data.py +++ b/imap_processing/tests/glows/test_glows_l2_data.py @@ -92,38 +92,42 @@ def l1b_dataset(): def test_get_calibration_factor(mock_calibration_dataset): - """Test selecting correct calibration factor.""" + """Test selecting correct calibration factor. - # Mock calibration data: - # timestamps: ["2011-09-19T09:58:04", "2011-09-20T18:12:48"] - # values: [0.849, 1.020] + Mock calibration data: + start_time_utc (dims epoch × start_time_utc_dim_0, same per epoch): + ["2011-09-19T09:58:04", "2011-09-20T18:12:48", "2011-09-21T18:15:50"] + cps_per_r (dims epoch × cps_per_r_dim_0, same per epoch): + index 0 → 0.849, index 1 → 1.020, index 2 → 1.500 + """ + # Case 1: The mid-epoch ('2011-09-22T10:30:55.015') falls after the + # start_time_utc entries, so the last entry (index 2) is selected → 1.500. + + # ["2011-09-22T07:45:55.015", "2011-09-22T10:30:55.015", "2011-09-22T13:15:55.015"] + later_epoch = np.array([369949621199000000, 369959521199000000, 369969421199000000]) + assert HistogramL2.get_calibration_factor( + later_epoch, mock_calibration_dataset + ) == pytest.approx(1.500) - # Case 1: The mid-epoch is after calibration timestamps, - # so the last value is selected (1.020). + # Case 2: The mid-epoch ('2011-09-21T00:52:15.000') falls between the 2nd and + # 3rd start_time_utc entries, so the 2nd entry (index 1) is selected → 1.020. # ['2011-09-21T00:50:15.000', '2011-09-21T00:52:15.000', '2011-09-21T00:54:15.000'] - later_epoch = np.array([369838281184000000, 369838401184000000, 369838521184000000]) + between_epoch = np.array( + [369838281184000000, 369838401184000000, 369838521184000000] + ) assert HistogramL2.get_calibration_factor( - later_epoch, mock_calibration_dataset + between_epoch, mock_calibration_dataset ) == pytest.approx(1.020) - # Case 2: The mid-epoch is before all calibration timestamps, - # so a KeyError is raised with the "pad" filter method. + # Case 3: The mid-epoch is before all start_time_utc entries, + # so a KeyError is raised by xarray's "pad" selection method. # ['2011-09-18T19:59:08.816', '2011-09-18T20:01:08.816', '2011-09-18T20:03:08.816'] early_epoch = np.array([369648015000000000, 369648135000000000, 369648255000000000]) with pytest.raises(KeyError): HistogramL2.get_calibration_factor(early_epoch, mock_calibration_dataset) - # Case 3: The mid-epoch is between the calibration times, - # so the first value is selected (0.849). - - # '2011-09-20T16:30:15.000' - between_epoch = np.array([369808281184000000]) - assert HistogramL2.get_calibration_factor( - between_epoch, mock_calibration_dataset - ) == pytest.approx(0.849) - @pytest.mark.external_kernel def test_ecliptic_coords_computation(furnish_kernels):