diff --git a/imap_processing/tests/external_test_data_config.py b/imap_processing/tests/external_test_data_config.py index 6895b43da..736e506d4 100644 --- a/imap_processing/tests/external_test_data_config.py +++ b/imap_processing/tests/external_test_data_config.py @@ -162,6 +162,7 @@ ("validate_high_energy_culling_results_repoint00047_v2.csv", "ultra/data/l1/"), ("validate_stat_culling_results_repoint00047_v2.csv", "ultra/data/l1/"), ("validate_upstream_ion_1_culling_results_repoint00047_v1.csv", "ultra/data/l1/"), + ("validate_spectral_culling_results_repoint00047_v1.csv", "ultra/data/l1/"), ("de_test_data_repoint00047.csv", "ultra/data/l1/"), ("FM45_UltraFM45Extra_TV_Tests_2024-01-22T0930_20240122T093008.CCSDS", "ultra/data/l0/"), ("ultra45_raw_sc_rawnrgevnt_19840122_00.csv", "ultra/data/l0/"), diff --git a/imap_processing/tests/ultra/unit/conftest.py b/imap_processing/tests/ultra/unit/conftest.py index 3e457483e..f9205f0ef 100644 --- a/imap_processing/tests/ultra/unit/conftest.py +++ b/imap_processing/tests/ultra/unit/conftest.py @@ -691,5 +691,9 @@ def mock_goodtimes_dataset(): "spin_number", np.zeros(nspins, dtype=np.uint16), ), + "quality_spectral": ( + "spin_number", + np.zeros(nspins, dtype=np.uint16), + ), } ) diff --git a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py index c2f4181fe..1df484572 100644 --- a/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py +++ b/imap_processing/tests/ultra/unit/test_ultra_l1b_culling.py @@ -27,6 +27,7 @@ flag_low_voltage, flag_rates, flag_scattering, + flag_spectral_events, flag_statistical_outliers, flag_upstream_ion, get_binned_energy_ranges, @@ -459,6 +460,7 @@ def test_get_energy_and_spin_dependent_rejection_mask(): "energy_range_edges": energy_range_edges, "quality_upstream_ion_1": np.full(n_spins, 0), "quality_upstream_ion_2": np.full(n_spins, 0), + "quality_spectral": np.full(n_spins, 0), } ) # update quality flags to test that events get rejected @@ -967,3 +969,28 @@ def test_upstream_ion_cull_invalid_channels(setup_repoint_47_data): [5, 6, 7], # Invalid channels that are out of bounds 90, ) + + +@pytest.mark.external_test_data +def test_validate_spectral_cull(setup_repoint_47_data): + """Validate that spectral flags match expected results.""" + # read test data from csv files + expected_results = pd.read_csv( + TEST_PATH / "validate_spectral_culling_results_repoint00047_v1.csv" + ).to_numpy() + de_ds, _, spin_tbin_edges = setup_repoint_47_data + intervals, _, _ = build_energy_bins() + energy_ranges = get_binned_energy_ranges(intervals) + mask = np.zeros((len(energy_ranges) - 1, len(spin_tbin_edges) - 1), dtype=bool) + mask[0:2, 0:2] = ( + True # This will mark the first 2 energy bins and first 2 spin bins as flagged + ) + flags = flag_spectral_events( + de_ds, + spin_tbin_edges, + energy_ranges, + UltraConstants.SPECTRAL_ENERGY_CHANNELS, + 90, + ) + results = flags | mask + np.testing.assert_array_equal(results, ~expected_results.astype(bool)) diff --git a/imap_processing/ultra/constants.py b/imap_processing/ultra/constants.py index ede9302a6..c18d0a23e 100644 --- a/imap_processing/ultra/constants.py +++ b/imap_processing/ultra/constants.py @@ -230,6 +230,9 @@ class UltraConstants: UPSTREAM_ION_ENERGY_CHANNELS_1: ClassVar[list] = [0, 1, 2] UPSTREAM_ION_ENERGY_CHANNELS_2: ClassVar[list] = [2, 3, 4] UPSTREAM_SIG_THRESHOLD = 2.5 + # Spectral culling parameters + SPECTRAL_ENERGY_CHANNELS: ClassVar[list] = [0, 1, 2, 3] + SPECTRAL_SIG_THRESHOLD = 1 # Set dimensions for extended spin/goodtime support variables # ISTP requires fixed dimensions, so we set these to the maximum we expect to need # and pad with fill values if we use fewer bins. diff --git a/imap_processing/ultra/l1b/extendedspin.py b/imap_processing/ultra/l1b/extendedspin.py index 5e4e6c80d..a2fe50b4f 100644 --- a/imap_processing/ultra/l1b/extendedspin.py +++ b/imap_processing/ultra/l1b/extendedspin.py @@ -14,6 +14,7 @@ flag_imap_instruments, flag_low_voltage, flag_rates, + flag_spectral_events, flag_statistical_outliers, flag_upstream_ion, get_binned_energy_ranges, @@ -75,7 +76,6 @@ def calculate_extendedspin( spin_tbin_edges = get_binned_spins_edges( spin, spin_period, spin_starttime, spin_bin_size ) - # Calculate goodtime quality flags. # The culling algorithms should be called in the following order # 1. Low voltage @@ -123,8 +123,16 @@ def calculate_extendedspin( UltraConstants.UPSTREAM_ION_ENERGY_CHANNELS_2, instrument_id, ) - # Update mask to include upstream ion flags #2 - mask = mask | upstream_ion_qf_2 + spectral_qf = flag_spectral_events( + de_dataset, + spin_tbin_edges, + energy_ranges, + UltraConstants.SPECTRAL_ENERGY_CHANNELS, + instrument_id, + ) + # Update mask to include upstream ion flags #2 and spectral flags before flagging + # statistical outliers + mask = mask | upstream_ion_qf_2 | spectral_qf stat_outliers_qf, _, _, _ = flag_statistical_outliers( de_dataset, spin_tbin_edges, @@ -132,7 +140,6 @@ def calculate_extendedspin( mask, instrument_id, ) - # Get the number of pulses per spin. pulses = get_pulses_per_spin(aux_dataset, rates_dataset) @@ -186,9 +193,11 @@ def calculate_extendedspin( voltage_qf = voltage_qf * combined_flags upstream_ion_qf_1 = upstream_ion_qf_1 * combined_flags upstream_ion_qf_2 = upstream_ion_qf_2 * combined_flags + spectral_qf = spectral_qf * combined_flags # Expand binned quality flags to individual spins. high_energy_qf = expand_bin_flags_to_spins(len(spin), high_energy_qf, spin_bin_size) voltage_qf = expand_bin_flags_to_spins(len(spin), voltage_qf, spin_bin_size) + spectral_qf = expand_bin_flags_to_spins(len(spin), spectral_qf, spin_bin_size) upstream_ion_qf_1 = expand_bin_flags_to_spins( len(spin), upstream_ion_qf_1, spin_bin_size ) @@ -210,6 +219,7 @@ def calculate_extendedspin( extendedspin_dict["quality_low_voltage"] = voltage_qf # shape (nspin,) extendedspin_dict["quality_upstream_ion_1"] = upstream_ion_qf_1 # shape (nspin,) extendedspin_dict["quality_upstream_ion_2"] = upstream_ion_qf_2 # shape (nspin,) + extendedspin_dict["quality_spectral"] = spectral_qf # shape (nspin,) extendedspin_dict["quality_statistics"] = stat_outliers_qf # shape (nspin,) extendedspin_dict["quality_high_energy"] = high_energy_qf # shape (nspin,) # ISTP requires stable dimension sizes, so this field must always remain size 16. diff --git a/imap_processing/ultra/l1b/goodtimes.py b/imap_processing/ultra/l1b/goodtimes.py index 266a2a2c5..5a57191e5 100644 --- a/imap_processing/ultra/l1b/goodtimes.py +++ b/imap_processing/ultra/l1b/goodtimes.py @@ -98,6 +98,9 @@ def calculate_goodtimes(extendedspin_dataset: xr.Dataset, name: str) -> xr.Datas goodtimes_dataset["quality_upstream_ion_2"] = xr.DataArray( np.array([FILLVAL_UINT16], dtype="uint16"), dims=["spin_number"] ) + goodtimes_dataset["quality_spectral"] = xr.DataArray( + np.array([FILLVAL_UINT16], dtype="uint16"), dims=["spin_number"] + ) goodtimes_dataset["quality_statistics"] = xr.DataArray( np.array([FILLVAL_UINT16], dtype="uint16"), dims=["spin_number"] ) diff --git a/imap_processing/ultra/l1b/quality_flag_filters.py b/imap_processing/ultra/l1b/quality_flag_filters.py index 10b621c3f..faa3921a5 100644 --- a/imap_processing/ultra/l1b/quality_flag_filters.py +++ b/imap_processing/ultra/l1b/quality_flag_filters.py @@ -23,6 +23,7 @@ "quality_low_voltage", "quality_upstream_ion_1", "quality_upstream_ion_2", + "quality_spectral", "quality_high_energy", "quality_statistics", ] diff --git a/imap_processing/ultra/l1b/ultra_l1b_culling.py b/imap_processing/ultra/l1b/ultra_l1b_culling.py index 882e829f2..edd428845 100644 --- a/imap_processing/ultra/l1b/ultra_l1b_culling.py +++ b/imap_processing/ultra/l1b/ultra_l1b_culling.py @@ -1018,6 +1018,67 @@ def flag_upstream_ion( return flagged +def flag_spectral_events( + de_dataset: xr.Dataset, + spin_tbin_edges: NDArray, + energy_ranges: NDArray, + channels: list, + sensor_id: int = 90, +) -> NDArray: + """ + Flag spectral events. + + Parameters + ---------- + de_dataset : xr.Dataset + Direct event dataset. + spin_tbin_edges : NDArray + Edges of the spin time bins. + energy_ranges : NDArray + Array of energy range edges. + channels : list + List of energy channel indices to use for spectral flagging. + sensor_id : int + Sensor ID (e.g., 45 or 90). + + Returns + ------- + flagged : NDArray + Boolean array of shape (n_spin_bins,) where True indicates spin bins flagged for + spectral anomalies. These flags are energy independent and should be applied + across all energy channels. + """ + # validate that the channels provided are within the bounds of the energy ranges + if not np.all([ch in range(len(energy_ranges) - 1) for ch in channels]): + raise ValueError( + f"Channels provided for spectral flagging must be within the bounds" + f" of the energy ranges. Provided channels: {channels}, number of energy" + f" ranges: {len(energy_ranges) - 1}." + ) + counts_sum = get_valid_de_count_summary( + de_dataset, energy_ranges, spin_tbin_edges, sensor_id=sensor_id + )[channels, :] # shape (num_channels, n_spin_bins) + # Flag spin bins where the signed count difference between adjacent selected + # energy channels exceeds a Poisson-based threshold. For each pair of + # adjacent channels, compute np.diff(counts_sum, axis=0) and compare that + # signed difference to a threshold scaled by the combined Poisson + # uncertainty (sqrt(N1 + N2)) of those two channels for each spin bin. + # If any adjacent channel pair exceeds the threshold for a spin bin, that + # spin bin is flagged across all energy ranges. + diff = np.diff(counts_sum, axis=0) - UltraConstants.SPECTRAL_SIG_THRESHOLD * ( + np.sqrt(counts_sum[:-1] + counts_sum[1:]) + ) # shape (num_channels - 1, n_spin_bins) + flagged = np.any(diff > 0, axis=0) # shape (n_spin_bins,) + num_culled: int = np.sum(flagged) + logger.info( + f"Spectral culling removed {num_culled} spin bins using channels" + f" {channels} and threshold {UltraConstants.SPECTRAL_SIG_THRESHOLD}." + f" These are energy independent flags and will be applied across all" + f" energy channels." + ) + return flagged + + def get_valid_de_count_summary( de_dataset: xr.Dataset, energy_ranges: NDArray,