Skip to content

Commit 3384651

Browse files
authored
Mag l1c pre launch updates (IMAP-Science-Operations-Center#2224)
* WIP work on L1C * adding tests * WIP: add gaps at beginning/end of day * Updates to properly design vecsec in tests * Add gap detection at the start and end of the day * Ruff updates * removing unneeded prints
1 parent 5cd6fe1 commit 3384651

File tree

5 files changed

+622
-68
lines changed

5 files changed

+622
-68
lines changed

imap_processing/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,9 +1124,9 @@ def do_processing( # noqa: PLR0912
11241124
input_data = [load_cdf(dep) for dep in science_files]
11251125
# Input datasets can be in any order, and are validated within mag_l1c
11261126
if len(input_data) == 1:
1127-
datasets = [mag_l1c(input_data[0])]
1127+
datasets = [mag_l1c(input_data[0], current_day)]
11281128
elif len(input_data) == 2:
1129-
datasets = [mag_l1c(input_data[0], input_data[1])]
1129+
datasets = [mag_l1c(input_data[0], current_day, input_data[1])]
11301130
else:
11311131
raise ValueError(
11321132
f"Invalid dependencies found for MAG L1C:"

imap_processing/mag/l1c/interpolation_methods.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,11 +229,13 @@ def cic_filter(
229229
cic1 = cic1 / decimation_factor
230230
cic2 = np.convolve(cic1, cic1)
231231
delay = (len(cic2) - 1) // 2
232+
232233
input_filtered = input_timestamps
234+
vectors_filtered = lfilter(cic2, 1, input_vectors, axis=0)
233235
if delay != 0:
234236
input_filtered = input_timestamps[:-delay]
237+
vectors_filtered = vectors_filtered[delay:]
235238

236-
vectors_filtered = lfilter(cic2, 1, input_vectors, axis=0)[delay:]
237239
return input_filtered, vectors_filtered
238240

239241

@@ -270,6 +272,12 @@ def linear_filtered(
270272
Interpolated vectors of shape (m, 3) where m is equal to the number of output
271273
timestamps. Contains x, y, z components of the vector.
272274
"""
275+
if input_vectors.shape[0] != input_timestamps.shape[0]:
276+
raise ValueError(
277+
"Input vectors and input timestamps must have the same length. "
278+
f"Got {input_vectors.shape[0]} and {input_timestamps.shape[0]}"
279+
)
280+
273281
input_filtered, vectors_filtered = cic_filter(
274282
input_vectors, input_timestamps, output_timestamps, input_rate, output_rate
275283
)

imap_processing/mag/l1c/mag_l1c.py

Lines changed: 99 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@
99
from imap_processing.mag import imap_mag_sdc_configuration_v001 as configuration
1010
from imap_processing.mag.constants import ModeFlags, VecSec
1111
from imap_processing.mag.l1c.interpolation_methods import InterpolationFunction
12+
from imap_processing.spice.time import et_to_ttj2000ns, str_to_et
1213

1314
logger = logging.getLogger(__name__)
1415

1516

1617
def mag_l1c(
1718
first_input_dataset: xr.Dataset,
19+
day_to_process: np.datetime64,
1820
second_input_dataset: xr.Dataset = None,
1921
) -> xr.Dataset:
2022
"""
@@ -27,6 +29,9 @@ def mag_l1c(
2729
first_input_dataset : xr.Dataset
2830
The first input dataset to process. This can be either burst or norm data, for
2931
mago or magi.
32+
day_to_process : np.datetime64
33+
The day to process, in np.datetime64[D] format. This is used to fill gaps at
34+
the beginning or end of the day if needed.
3035
second_input_dataset : xr.Dataset, optional
3136
The second input dataset to process. This should be burst if first_input_dataset
3237
was norm, or norm if first_input_dataset was burst. It should match the
@@ -263,13 +268,15 @@ def process_mag_l1c(
263268
normal_mode_dataset: xr.Dataset,
264269
burst_mode_dataset: xr.Dataset,
265270
interpolation_function: InterpolationFunction,
271+
day_to_process: np.datetime64 | None = None,
266272
) -> np.ndarray:
267273
"""
268274
Create MAG L1C data from L1B datasets.
269275
270276
This function starts from the normal mode dataset and completes the following steps:
271277
1. find all the gaps in the dataset
272-
2. generate a new timeline with the gaps filled
278+
2. generate a new timeline with the gaps filled, including new timestamps to fill
279+
out the rest of the day to +/- 15 minutes on either side
273280
3. fill the timeline with normal mode data (so, all the non-gap timestamps)
274281
4. interpolate the gaps using the burst mode data and the method specified in
275282
interpolation_function.
@@ -288,6 +295,10 @@ def process_mag_l1c(
288295
The burst mode dataset, which is used to fill in the gaps in the normal mode.
289296
interpolation_function : InterpolationFunction
290297
The interpolation function to use to fill in the gaps.
298+
day_to_process : np.datetime64, optional
299+
The day to process, in np.datetime64[D] format. This is used to fill
300+
gaps at the beginning or end of the day if needed. If not included, these
301+
gaps will not be filled.
291302
292303
Returns
293304
-------
@@ -306,8 +317,23 @@ def process_mag_l1c(
306317
output_dataset["sample_interpolated"] = xr.DataArray(
307318
np.zeros(len(normal_mode_dataset))
308319
)
320+
day_start_ns = None
321+
day_end_ns = None
309322

310-
gaps = find_all_gaps(norm_epoch, normal_vecsec_dict)
323+
if day_to_process is not None:
324+
day_start = day_to_process.astype("datetime64[s]") - np.timedelta64(15, "m")
325+
326+
# get the end of the day plus 15 minutes
327+
day_end = (
328+
day_to_process.astype("datetime64[s]")
329+
+ np.timedelta64(1, "D")
330+
+ np.timedelta64(15, "m")
331+
)
332+
333+
day_start_ns = et_to_ttj2000ns(str_to_et(str(day_start)))
334+
day_end_ns = et_to_ttj2000ns(str_to_et(str(day_end)))
335+
336+
gaps = find_all_gaps(norm_epoch, normal_vecsec_dict, day_start_ns, day_end_ns)
311337

312338
new_timeline = generate_timeline(norm_epoch, gaps)
313339
norm_filled = fill_normal_data(normal_mode_dataset, new_timeline)
@@ -319,7 +345,9 @@ def process_mag_l1c(
319345

320346

321347
def fill_normal_data(
322-
normal_dataset: xr.Dataset, new_timeline: np.ndarray
348+
normal_dataset: xr.Dataset,
349+
new_timeline: np.ndarray,
350+
day_to_process: np.datetime64 | None = None,
323351
) -> np.ndarray:
324352
"""
325353
Fill the new timeline with the normal mode data.
@@ -332,6 +360,10 @@ def fill_normal_data(
332360
The normal mode dataset.
333361
new_timeline : np.ndarray
334362
A 1D array of timestamps to fill.
363+
day_to_process : np.datetime64, optional
364+
The day to process, in np.datetime64[D] format. This is used to fill
365+
gaps at the beginning or end of the day if needed. If not included, these
366+
gaps will not be filled.
335367
336368
Returns
337369
-------
@@ -341,12 +373,11 @@ def fill_normal_data(
341373
Indices: 0 - epoch, 1-4 - vector x, y, z, and range, 5 - generated flag,
342374
6-7 - compression flags.
343375
"""
344-
# TODO: fill with FILLVAL?
376+
# TODO: fill with FILLVAL
345377
filled_timeline: np.ndarray = np.zeros((len(new_timeline), 8))
346378
filled_timeline[:, 0] = new_timeline
347379
# Flags, will also indicate any missed timestamps
348380
filled_timeline[:, 5] = ModeFlags.MISSING.value
349-
350381
for index, timestamp in enumerate(normal_dataset["epoch"].data):
351382
timeline_index = np.searchsorted(new_timeline, timestamp)
352383
filled_timeline[timeline_index, 1:5] = normal_dataset["vectors"].data[index]
@@ -399,9 +430,11 @@ def interpolate_gaps(
399430
)
400431

401432
for gap in gaps:
402-
# TODO: we might need a few inputs before or after start/end
433+
# TODO: we need extra data at the beginning and end of the gap
403434
burst_gap_start = (np.abs(burst_epochs - gap[0])).argmin()
404435
burst_gap_end = (np.abs(burst_epochs - gap[1])).argmin()
436+
# if this gap is too big, we may be missing burst data at the start or end of
437+
# the day and shouldn't use it here.
405438

406439
# for the CIC filter, we need 2x normal mode cadence seconds
407440

@@ -428,10 +461,6 @@ def interpolate_gaps(
428461
gap_timeline = filled_norm_timeline[
429462
(filled_norm_timeline > gap[0]) & (filled_norm_timeline < gap[1])
430463
]
431-
logger.info(
432-
f"difference between gap start and burst start: "
433-
f"{gap_timeline[0] - burst_epochs[burst_start]}"
434-
)
435464

436465
short = (gap_timeline >= burst_epochs[burst_start]) & (
437466
gap_timeline <= burst_epochs[burst_gap_end]
@@ -487,40 +516,46 @@ def generate_timeline(epoch_data: np.ndarray, gaps: np.ndarray) -> np.ndarray:
487516
The existing timeline data, in the shape (n,).
488517
gaps : numpy.ndarray
489518
An array of gaps to fill, with shape (n, 2) where n is the number of gaps.
490-
The gap is specified as (start, end) where start and end both exist in the
491-
timeline already.
519+
The gap is specified as (start, end).
492520
493521
Returns
494522
-------
495523
numpy.ndarray
496524
The new timeline, filled with the existing data and the generated gaps.
497525
"""
498-
full_timeline: np.ndarray = np.zeros(0)
499-
500-
# When we have our gaps, generate the full timeline
501-
last_gap = 0
526+
full_timeline: np.ndarray = np.array([])
527+
last_index = 0
502528
for gap in gaps:
503-
gap_start_index = np.where(epoch_data == gap[0])[0]
504-
gap_end_index = np.where(epoch_data == gap[1])[0]
505-
if gap_start_index.size != 1 or gap_end_index.size != 1:
506-
raise ValueError("Gap start or end not found in input timeline")
507-
529+
epoch_start_index = np.searchsorted(epoch_data, gap[0], side="left")
508530
full_timeline = np.concatenate(
509-
(
510-
full_timeline,
511-
epoch_data[last_gap : gap_start_index[0]],
512-
generate_missing_timestamps(gap),
513-
)
531+
(full_timeline, epoch_data[last_index:epoch_start_index])
514532
)
515-
last_gap = gap_end_index[0]
533+
generated_timestamps = generate_missing_timestamps(gap)
534+
if generated_timestamps.size == 0:
535+
continue
536+
537+
# Remove any generated timestamps that are already in the timeline
538+
# Use np.isin to check for exact matches
539+
mask = ~np.isin(generated_timestamps, full_timeline)
540+
generated_timestamps = generated_timestamps[mask]
541+
542+
if generated_timestamps.size == 0:
543+
print("All generated timestamps already exist in timeline")
544+
continue
516545

517-
full_timeline = np.concatenate((full_timeline, epoch_data[last_gap:]))
546+
full_timeline = np.concatenate((full_timeline, generated_timestamps))
547+
last_index = int(np.searchsorted(epoch_data, gap[1], side="left"))
548+
549+
full_timeline = np.concatenate((full_timeline, epoch_data[last_index:]))
518550

519551
return full_timeline
520552

521553

522554
def find_all_gaps(
523-
epoch_data: np.ndarray, vecsec_dict: dict | None = None
555+
epoch_data: np.ndarray,
556+
vecsec_dict: dict | None = None,
557+
start_of_day_ns: float | None = None,
558+
end_of_day_ns: float | None = None,
524559
) -> np.ndarray:
525560
"""
526561
Find all the gaps in the epoch data.
@@ -529,6 +564,9 @@ def find_all_gaps(
529564
it will assume a nominal 1/2 second gap. A gap is defined as missing data from the
530565
expected sequence as defined by vectors_per_second_attr.
531566
567+
If start_of_day_ns and end_of_day_ns are provided, gaps at the beginning and end of
568+
the day will be added if the epoch_data does not cover the full day.
569+
532570
Parameters
533571
----------
534572
epoch_data : numpy.ndarray
@@ -537,6 +575,12 @@ def find_all_gaps(
537575
A dictionary of the form {start: vecsec, start: vecsec} where start is the time
538576
in nanoseconds and vecsec is the number of vectors per second. This will be
539577
used to find the gaps. If not provided, a 1/2 second gap is assumed.
578+
start_of_day_ns : float, optional
579+
The start of the day in nanoseconds since TTJ2000. If provided, a gap will be
580+
added from this time to the first epoch if they don't match.
581+
end_of_day_ns : float, optional
582+
The end of the day in nanoseconds since TTJ2000. If provided, a gap will be
583+
added from the last epoch to this time if they don't match.
540584
541585
Returns
542586
-------
@@ -546,15 +590,23 @@ def find_all_gaps(
546590
timeline.
547591
"""
548592
gaps: np.ndarray = np.zeros((0, 3))
549-
if vecsec_dict is None:
550-
# TODO: when we go back to the previous file, also retrieve expected
551-
# vectors per second
552-
# If no vecsec is provided, assume 2 vectors per second
553-
vecsec_dict = {0: VecSec.TWO_VECS_PER_S.value}
593+
594+
# TODO: when we go back to the previous file, also retrieve expected
595+
# vectors per second
596+
597+
vecsec_dict = {0: VecSec.TWO_VECS_PER_S.value} | (vecsec_dict or {})
554598

555599
end_index = epoch_data.shape[0]
600+
601+
if start_of_day_ns is not None and epoch_data[0] > start_of_day_ns:
602+
# Add a gap from the start of the day to the first timestamp
603+
gaps = np.concatenate(
604+
(gaps, np.array([[start_of_day_ns, epoch_data[0], vecsec_dict[0]]]))
605+
)
606+
556607
for start_time in reversed(sorted(vecsec_dict.keys())):
557-
start_index = np.where(start_time == epoch_data)[0][0]
608+
# Find the start index that is equal to or immediately after start_time
609+
start_index = np.searchsorted(epoch_data, start_time, side="left")
558610
gaps = np.concatenate(
559611
(
560612
find_gaps(
@@ -565,6 +617,11 @@ def find_all_gaps(
565617
)
566618
end_index = start_index
567619

620+
if end_of_day_ns is not None and epoch_data[-1] < end_of_day_ns:
621+
gaps = np.concatenate(
622+
(gaps, np.array([[epoch_data[-1], end_of_day_ns, vecsec_dict[start_time]]]))
623+
)
624+
568625
return gaps
569626

570627

@@ -592,11 +649,9 @@ def find_gaps(timeline_data: np.ndarray, vectors_per_second: int) -> np.ndarray:
592649
# Expected difference between timestamps in nanoseconds.
593650
expected_gap = 1 / vectors_per_second * 1e9
594651

595-
# TODO: timestamps can vary by a few ms. Per Alastair, this can be around 7.5% of
596-
# cadence without counting as a "gap".
597652
diffs = abs(np.diff(timeline_data))
598-
# 3.5e7 == 7.5% of 0.5s in nanoseconds, a common gap. In the future, this number
599-
# will be calculated from the expected gap.
653+
654+
# Gap can be up to 7.5% larger than expected vectors per second due to clock drift
600655
gap_index = np.asarray(diffs - expected_gap > expected_gap * 0.075).nonzero()[0]
601656
output: np.ndarray = np.zeros((len(gap_index), 3))
602657

@@ -607,7 +662,6 @@ def find_gaps(timeline_data: np.ndarray, vectors_per_second: int) -> np.ndarray:
607662
vectors_per_second,
608663
]
609664

610-
# TODO: How should I handle/find gaps at the end?
611665
return output
612666

613667

@@ -622,17 +676,16 @@ def generate_missing_timestamps(gap: np.ndarray) -> np.ndarray:
622676
----------
623677
gap : numpy.ndarray
624678
Array of timestamps of shape (2,) containing n gaps with start_gap and
625-
end_gap. Start_gap and end_gap both correspond to points in timeline_data.
679+
end_gap. Start_gap and end_gap both correspond to points in timeline_data and
680+
are included in the output timespan.
626681
627682
Returns
628683
-------
629684
full_timeline: numpy.ndarray
630685
Completed timeline.
631686
"""
632687
# Generated timestamps should always be 0.5 seconds apart
633-
# TODO: is this in the configuration file?
634688
difference_ns = 0.5 * 1e9
635-
636689
output: np.ndarray = np.arange(gap[0], gap[1], difference_ns)
637690
return output
638691

@@ -657,8 +710,9 @@ def vectors_per_second_from_string(vecsec_string: str) -> dict:
657710
vecsec_dict = {}
658711
vecsec_segments = vecsec_string.split(",")
659712
for vecsec_segment in vecsec_segments:
660-
start_time, vecsec = vecsec_segment.split(":")
661-
vecsec_dict[int(start_time)] = int(vecsec)
713+
if vecsec_segment:
714+
start_time, vecsec = vecsec_segment.split(":")
715+
vecsec_dict[int(start_time)] = int(vecsec)
662716

663717
return vecsec_dict
664718

0 commit comments

Comments
 (0)