Skip to content

Commit 08892de

Browse files
authored
HIT L1B - Handle data gaps in sectored counts (#1827)
* Fix issue where data gaps weren't being excluded from subsetting sectored counts data. Filtering data by slicing included partial major frames in the slice range. Now the code properly filters for complete major frames only * Update test for filtering sector counts for complete sets of sector count data to check for different types of data gaps that could occur. Also update docstring for the function to clarify that only data where livetime values from the previous 10 minutes are available are included in the output since it's needed to calculate rates * Raise error if no valid start indices are found in the dataset. Update unit test for this case. * Address PR comments by adding comments to explain data filtering steps
1 parent f9edeaa commit 08892de

File tree

2 files changed

+132
-27
lines changed

2 files changed

+132
-27
lines changed

imap_processing/hit/l1b/hit_l1b.py

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@ def subset_data_for_sectored_counts(
366366
A set of sectored data starts with hydrogen and ends with iron and correspond to
367367
the mod 10 values 0-9. The livetime values from the previous 10 minutes are used
368368
to calculate the rates for each set since those counts are transmitted 10 minutes
369-
after they were collected.
369+
after they were collected. Therefore, only complete sets of sectored counts where
370+
livetime from the previous 10 minutes are available are included in the output.
370371
371372
Parameters
372373
----------
@@ -378,7 +379,7 @@ def subset_data_for_sectored_counts(
378379
Returns
379380
-------
380381
tuple[xr.Dataset, xr.DataArray]
381-
Subsetted L1A counts dataset and corresponding livetime values.
382+
Dataset of complete sectored counts and corresponding livetime values.
382383
"""
383384
# Identify 10-minute intervals of complete sectored counts.
384385
bin_size = 10
@@ -392,16 +393,34 @@ def subset_data_for_sectored_counts(
392393
start_indices = np.where(matches)[0]
393394

394395
# Filter out start indices that are less than or equal to the bin size
395-
# since the previous 10 minutes are needed
396-
start_indices = start_indices[start_indices > bin_size]
397-
data_slice = slice(start_indices[0], start_indices[-1] + bin_size)
398-
399-
# Subset data to include only complete sets of sectored counts
400-
l1b_sectored_rates_dataset = l1a_counts_dataset.isel(epoch=data_slice)
396+
# since the previous 10 minutes are needed for calculating rates
397+
if start_indices.size == 0:
398+
logger.error(
399+
"No data to process - valid start indices not found for "
400+
"complete sectored counts."
401+
)
402+
raise ValueError("No valid start indices found for complete sectored counts.")
403+
else:
404+
start_indices = start_indices[start_indices >= bin_size]
405+
406+
# Subset data for complete sets of sectored counts.
407+
# Each set of sectored counts is 10 minutes long, so we take the indices
408+
# starting from the start indices and extend to the bin size of 10.
409+
# This creates a 1D array of indices that correspond to the complete
410+
# sets of sectored counts which is used to filter the L1A dataset and
411+
# create the L1B sectored rates dataset.
412+
data_indices = np.concatenate(
413+
[np.arange(idx, idx + bin_size) for idx in start_indices]
414+
)
415+
l1b_sectored_rates_dataset = l1a_counts_dataset.isel(epoch=data_indices)
401416

402-
# Subset livetime staggered from sectored counts by 10 minutes
403-
livetime_slice = slice(start_indices[0] - bin_size, start_indices[-1])
404-
livetime = livetime[livetime_slice]
417+
# Subset livetime values corresponding to the previous 10 minutes
418+
# for each start index. This ensures the livetime data aligns correctly
419+
# with the sectored counts for rate calculations.
420+
livetime_indices = np.concatenate(
421+
[np.arange(idx - bin_size, idx) for idx in start_indices]
422+
)
423+
livetime = livetime.isel(epoch=livetime_indices)
405424

406425
return l1b_sectored_rates_dataset, livetime
407426

imap_processing/tests/hit/test_hit_l1b.py

Lines changed: 102 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -150,27 +150,113 @@ def test_sum_livetime_10min():
150150

151151
def test_subset_data_for_sectored_counts():
152152
"""Test the subset_data_for_sectored_counts function."""
153-
# Create a sample L1A counts dataset
154-
l1a_counts_dataset = xr.Dataset(
155-
{
156-
"hdr_minute_cnt": ("epoch", np.arange(105, 135)),
157-
"h_sectored_counts": ("epoch", np.arange(0, 30)),
158-
"he4_sectored_counts": ("epoch", np.arange(0, 30)),
159-
},
160-
)
153+
154+
def create_l1a_counts_dataset(hdr_minute_cnt_values):
155+
"""Helper to create L1A counts dataset."""
156+
return xr.Dataset(
157+
{
158+
"hdr_minute_cnt": ("epoch", hdr_minute_cnt_values),
159+
"h_sectored_counts": ("epoch", np.arange(len(hdr_minute_cnt_values))),
160+
"he4_sectored_counts": ("epoch", np.arange(len(hdr_minute_cnt_values))),
161+
},
162+
)
163+
164+
def validate_subset(l1a_counts_dataset, livetime):
165+
"""Helper to validate the subset results."""
166+
subset_dataset, subset_livetime = subset_data_for_sectored_counts(
167+
l1a_counts_dataset, livetime
168+
)
169+
assert subset_dataset.sizes["epoch"] == 10
170+
assert len(subset_livetime["epoch"]) == 10
171+
assert np.all(subset_dataset["hdr_minute_cnt"].values % 10 == np.arange(10))
161172

162173
# Create a sample livetime data array
163174
livetime = xr.DataArray(np.arange(1.0, 31.0, dtype=np.float32), dims=["epoch"])
164175

165-
# Call the function
166-
subset_dataset, subset_livetime = subset_data_for_sectored_counts(
167-
l1a_counts_dataset, livetime
176+
# Test with partial data at the start and end of the dataset
177+
l1a_counts_dataset = create_l1a_counts_dataset(np.arange(105, 135))
178+
validate_subset(l1a_counts_dataset, livetime)
179+
180+
# Test with partial data in the middle of the dataset
181+
l1a_counts_dataset = create_l1a_counts_dataset(
182+
[
183+
100,
184+
101,
185+
102,
186+
103,
187+
104,
188+
105,
189+
106,
190+
107,
191+
108,
192+
109,
193+
110,
194+
111,
195+
112,
196+
113,
197+
114,
198+
120,
199+
121,
200+
122,
201+
123,
202+
124,
203+
130,
204+
131,
205+
132,
206+
133,
207+
134,
208+
135,
209+
136,
210+
137,
211+
138,
212+
139,
213+
]
168214
)
169-
170-
# Check the results
171-
assert subset_dataset.sizes["epoch"] == 10
172-
assert len(subset_livetime["epoch"]) == 10
173-
assert np.all(subset_dataset["hdr_minute_cnt"].values % 10 == np.arange(10))
215+
validate_subset(l1a_counts_dataset, livetime)
216+
217+
# Test with partial data at the start, middle, and end of the dataset
218+
l1a_counts_dataset = create_l1a_counts_dataset(
219+
[
220+
105,
221+
106,
222+
107,
223+
108,
224+
109,
225+
110,
226+
111,
227+
112,
228+
113,
229+
114,
230+
115,
231+
116,
232+
117,
233+
118,
234+
119,
235+
120,
236+
121,
237+
122,
238+
130,
239+
131,
240+
132,
241+
133,
242+
134,
243+
135,
244+
136,
245+
137,
246+
138,
247+
139,
248+
140,
249+
141,
250+
]
251+
)
252+
validate_subset(l1a_counts_dataset, livetime)
253+
254+
# Test with only partial data in the dataset
255+
l1a_counts_dataset = create_l1a_counts_dataset(np.arange(100, 160, 2))
256+
with pytest.raises(
257+
ValueError, match="No valid start indices found for complete sectored counts."
258+
):
259+
subset_data_for_sectored_counts(l1a_counts_dataset, livetime)
174260

175261

176262
def test_process_summed_rates_data(l1a_counts_dataset, livetime):

0 commit comments

Comments
 (0)