99from imap_processing .mag import imap_mag_sdc_configuration_v001 as configuration
1010from imap_processing .mag .constants import ModeFlags , VecSec
1111from imap_processing .mag .l1c .interpolation_methods import InterpolationFunction
12+ from imap_processing .spice .time import et_to_ttj2000ns , str_to_et
1213
1314logger = logging .getLogger (__name__ )
1415
1516
1617def mag_l1c (
1718 first_input_dataset : xr .Dataset ,
19+ day_to_process : np .datetime64 ,
1820 second_input_dataset : xr .Dataset = None ,
1921) -> xr .Dataset :
2022 """
@@ -27,6 +29,9 @@ def mag_l1c(
2729 first_input_dataset : xr.Dataset
2830 The first input dataset to process. This can be either burst or norm data, for
2931 mago or magi.
32+ day_to_process : np.datetime64
33+ The day to process, in np.datetime64[D] format. This is used to fill gaps at
34+ the beginning or end of the day if needed.
3035 second_input_dataset : xr.Dataset, optional
3136 The second input dataset to process. This should be burst if first_input_dataset
3237 was norm, or norm if first_input_dataset was burst. It should match the
@@ -263,13 +268,15 @@ def process_mag_l1c(
263268 normal_mode_dataset : xr .Dataset ,
264269 burst_mode_dataset : xr .Dataset ,
265270 interpolation_function : InterpolationFunction ,
271+ day_to_process : np .datetime64 | None = None ,
266272) -> np .ndarray :
267273 """
268274 Create MAG L1C data from L1B datasets.
269275
270276 This function starts from the normal mode dataset and completes the following steps:
271277 1. find all the gaps in the dataset
272- 2. generate a new timeline with the gaps filled
278+ 2. generate a new timeline with the gaps filled, including new timestamps to fill
279+ out the rest of the day to +/- 15 minutes on either side
273280 3. fill the timeline with normal mode data (so, all the non-gap timestamps)
274281 4. interpolate the gaps using the burst mode data and the method specified in
275282 interpolation_function.
@@ -288,6 +295,10 @@ def process_mag_l1c(
288295 The burst mode dataset, which is used to fill in the gaps in the normal mode.
289296 interpolation_function : InterpolationFunction
290297 The interpolation function to use to fill in the gaps.
298+ day_to_process : np.datetime64, optional
299+ The day to process, in np.datetime64[D] format. This is used to fill
300+ gaps at the beginning or end of the day if needed. If not included, these
301+ gaps will not be filled.
291302
292303 Returns
293304 -------
@@ -306,8 +317,23 @@ def process_mag_l1c(
306317 output_dataset ["sample_interpolated" ] = xr .DataArray (
307318 np .zeros (len (normal_mode_dataset ))
308319 )
320+ day_start_ns = None
321+ day_end_ns = None
309322
310- gaps = find_all_gaps (norm_epoch , normal_vecsec_dict )
323+ if day_to_process is not None :
324+ day_start = day_to_process .astype ("datetime64[s]" ) - np .timedelta64 (15 , "m" )
325+
326+ # get the end of the day plus 15 minutes
327+ day_end = (
328+ day_to_process .astype ("datetime64[s]" )
329+ + np .timedelta64 (1 , "D" )
330+ + np .timedelta64 (15 , "m" )
331+ )
332+
333+ day_start_ns = et_to_ttj2000ns (str_to_et (str (day_start )))
334+ day_end_ns = et_to_ttj2000ns (str_to_et (str (day_end )))
335+
336+ gaps = find_all_gaps (norm_epoch , normal_vecsec_dict , day_start_ns , day_end_ns )
311337
312338 new_timeline = generate_timeline (norm_epoch , gaps )
313339 norm_filled = fill_normal_data (normal_mode_dataset , new_timeline )
@@ -319,7 +345,9 @@ def process_mag_l1c(
319345
320346
321347def fill_normal_data (
322- normal_dataset : xr .Dataset , new_timeline : np .ndarray
348+ normal_dataset : xr .Dataset ,
349+ new_timeline : np .ndarray ,
350+ day_to_process : np .datetime64 | None = None ,
323351) -> np .ndarray :
324352 """
325353 Fill the new timeline with the normal mode data.
@@ -332,6 +360,10 @@ def fill_normal_data(
332360 The normal mode dataset.
333361 new_timeline : np.ndarray
334362 A 1D array of timestamps to fill.
363+ day_to_process : np.datetime64, optional
364+ The day to process, in np.datetime64[D] format. This is used to fill
365+ gaps at the beginning or end of the day if needed. If not included, these
366+ gaps will not be filled.
335367
336368 Returns
337369 -------
@@ -341,12 +373,11 @@ def fill_normal_data(
341373 Indices: 0 - epoch, 1-4 - vector x, y, z, and range, 5 - generated flag,
342374 6-7 - compression flags.
343375 """
344- # TODO: fill with FILLVAL?
376+ # TODO: fill with FILLVAL
345377 filled_timeline : np .ndarray = np .zeros ((len (new_timeline ), 8 ))
346378 filled_timeline [:, 0 ] = new_timeline
347379 # Flags, will also indicate any missed timestamps
348380 filled_timeline [:, 5 ] = ModeFlags .MISSING .value
349-
350381 for index , timestamp in enumerate (normal_dataset ["epoch" ].data ):
351382 timeline_index = np .searchsorted (new_timeline , timestamp )
352383 filled_timeline [timeline_index , 1 :5 ] = normal_dataset ["vectors" ].data [index ]
@@ -399,9 +430,11 @@ def interpolate_gaps(
399430 )
400431
401432 for gap in gaps :
402- # TODO: we might need a few inputs before or after start/ end
433+ # TODO: we need extra data at the beginning and end of the gap
403434 burst_gap_start = (np .abs (burst_epochs - gap [0 ])).argmin ()
404435 burst_gap_end = (np .abs (burst_epochs - gap [1 ])).argmin ()
436+ # if this gap is too big, we may be missing burst data at the start or end of
437+ # the day and shouldn't use it here.
405438
406439 # for the CIC filter, we need 2x normal mode cadence seconds
407440
@@ -428,10 +461,6 @@ def interpolate_gaps(
428461 gap_timeline = filled_norm_timeline [
429462 (filled_norm_timeline > gap [0 ]) & (filled_norm_timeline < gap [1 ])
430463 ]
431- logger .info (
432- f"difference between gap start and burst start: "
433- f"{ gap_timeline [0 ] - burst_epochs [burst_start ]} "
434- )
435464
436465 short = (gap_timeline >= burst_epochs [burst_start ]) & (
437466 gap_timeline <= burst_epochs [burst_gap_end ]
@@ -487,40 +516,46 @@ def generate_timeline(epoch_data: np.ndarray, gaps: np.ndarray) -> np.ndarray:
487516 The existing timeline data, in the shape (n,).
488517 gaps : numpy.ndarray
489518 An array of gaps to fill, with shape (n, 2) where n is the number of gaps.
490- The gap is specified as (start, end) where start and end both exist in the
491- timeline already.
519+ The gap is specified as (start, end).
492520
493521 Returns
494522 -------
495523 numpy.ndarray
496524 The new timeline, filled with the existing data and the generated gaps.
497525 """
498- full_timeline : np .ndarray = np .zeros (0 )
499-
500- # When we have our gaps, generate the full timeline
501- last_gap = 0
526+ full_timeline : np .ndarray = np .array ([])
527+ last_index = 0
502528 for gap in gaps :
503- gap_start_index = np .where (epoch_data == gap [0 ])[0 ]
504- gap_end_index = np .where (epoch_data == gap [1 ])[0 ]
505- if gap_start_index .size != 1 or gap_end_index .size != 1 :
506- raise ValueError ("Gap start or end not found in input timeline" )
507-
529+ epoch_start_index = np .searchsorted (epoch_data , gap [0 ], side = "left" )
508530 full_timeline = np .concatenate (
509- (
510- full_timeline ,
511- epoch_data [last_gap : gap_start_index [0 ]],
512- generate_missing_timestamps (gap ),
513- )
531+ (full_timeline , epoch_data [last_index :epoch_start_index ])
514532 )
515- last_gap = gap_end_index [0 ]
533+ generated_timestamps = generate_missing_timestamps (gap )
534+ if generated_timestamps .size == 0 :
535+ continue
536+
537+ # Remove any generated timestamps that are already in the timeline
538+ # Use np.isin to check for exact matches
539+ mask = ~ np .isin (generated_timestamps , full_timeline )
540+ generated_timestamps = generated_timestamps [mask ]
541+
542+ if generated_timestamps .size == 0 :
543+ print ("All generated timestamps already exist in timeline" )
544+ continue
516545
517- full_timeline = np .concatenate ((full_timeline , epoch_data [last_gap :]))
546+ full_timeline = np .concatenate ((full_timeline , generated_timestamps ))
547+ last_index = int (np .searchsorted (epoch_data , gap [1 ], side = "left" ))
548+
549+ full_timeline = np .concatenate ((full_timeline , epoch_data [last_index :]))
518550
519551 return full_timeline
520552
521553
522554def find_all_gaps (
523- epoch_data : np .ndarray , vecsec_dict : dict | None = None
555+ epoch_data : np .ndarray ,
556+ vecsec_dict : dict | None = None ,
557+ start_of_day_ns : float | None = None ,
558+ end_of_day_ns : float | None = None ,
524559) -> np .ndarray :
525560 """
526561 Find all the gaps in the epoch data.
@@ -529,6 +564,9 @@ def find_all_gaps(
529564 it will assume a nominal 1/2 second gap. A gap is defined as missing data from the
530565 expected sequence as defined by vectors_per_second_attr.
531566
567+ If start_of_day_ns and end_of_day_ns are provided, gaps at the beginning and end of
568+ the day will be added if the epoch_data does not cover the full day.
569+
532570 Parameters
533571 ----------
534572 epoch_data : numpy.ndarray
@@ -537,6 +575,12 @@ def find_all_gaps(
537575 A dictionary of the form {start: vecsec, start: vecsec} where start is the time
538576 in nanoseconds and vecsec is the number of vectors per second. This will be
539577 used to find the gaps. If not provided, a 1/2 second gap is assumed.
578+ start_of_day_ns : float, optional
579+ The start of the day in nanoseconds since TTJ2000. If provided, a gap will be
580+ added from this time to the first epoch if they don't match.
581+ end_of_day_ns : float, optional
582+ The end of the day in nanoseconds since TTJ2000. If provided, a gap will be
583+ added from the last epoch to this time if they don't match.
540584
541585 Returns
542586 -------
@@ -546,15 +590,23 @@ def find_all_gaps(
546590 timeline.
547591 """
548592 gaps : np .ndarray = np .zeros ((0 , 3 ))
549- if vecsec_dict is None :
550- # TODO: when we go back to the previous file, also retrieve expected
551- # vectors per second
552- # If no vecsec is provided, assume 2 vectors per second
553- vecsec_dict = {0 : VecSec .TWO_VECS_PER_S .value }
593+
594+ # TODO: when we go back to the previous file, also retrieve expected
595+ # vectors per second
596+
597+ vecsec_dict = {0 : VecSec .TWO_VECS_PER_S .value } | ( vecsec_dict or {})
554598
555599 end_index = epoch_data .shape [0 ]
600+
601+ if start_of_day_ns is not None and epoch_data [0 ] > start_of_day_ns :
602+ # Add a gap from the start of the day to the first timestamp
603+ gaps = np .concatenate (
604+ (gaps , np .array ([[start_of_day_ns , epoch_data [0 ], vecsec_dict [0 ]]]))
605+ )
606+
556607 for start_time in reversed (sorted (vecsec_dict .keys ())):
557- start_index = np .where (start_time == epoch_data )[0 ][0 ]
608+ # Find the start index that is equal to or immediately after start_time
609+ start_index = np .searchsorted (epoch_data , start_time , side = "left" )
558610 gaps = np .concatenate (
559611 (
560612 find_gaps (
@@ -565,6 +617,11 @@ def find_all_gaps(
565617 )
566618 end_index = start_index
567619
620+ if end_of_day_ns is not None and epoch_data [- 1 ] < end_of_day_ns :
621+ gaps = np .concatenate (
622+ (gaps , np .array ([[epoch_data [- 1 ], end_of_day_ns , vecsec_dict [start_time ]]]))
623+ )
624+
568625 return gaps
569626
570627
@@ -592,11 +649,9 @@ def find_gaps(timeline_data: np.ndarray, vectors_per_second: int) -> np.ndarray:
592649 # Expected difference between timestamps in nanoseconds.
593650 expected_gap = 1 / vectors_per_second * 1e9
594651
595- # TODO: timestamps can vary by a few ms. Per Alastair, this can be around 7.5% of
596- # cadence without counting as a "gap".
597652 diffs = abs (np .diff (timeline_data ))
598- # 3.5e7 == 7.5% of 0.5s in nanoseconds, a common gap. In the future, this number
599- # will be calculated from the expected gap.
653+
654+ # Gap can be up to 7.5% larger than expected vectors per second due to clock drift
600655 gap_index = np .asarray (diffs - expected_gap > expected_gap * 0.075 ).nonzero ()[0 ]
601656 output : np .ndarray = np .zeros ((len (gap_index ), 3 ))
602657
@@ -607,7 +662,6 @@ def find_gaps(timeline_data: np.ndarray, vectors_per_second: int) -> np.ndarray:
607662 vectors_per_second ,
608663 ]
609664
610- # TODO: How should I handle/find gaps at the end?
611665 return output
612666
613667
@@ -622,17 +676,16 @@ def generate_missing_timestamps(gap: np.ndarray) -> np.ndarray:
622676 ----------
623677 gap : numpy.ndarray
624678 Array of timestamps of shape (2,) containing n gaps with start_gap and
625- end_gap. Start_gap and end_gap both correspond to points in timeline_data.
679+ end_gap. Start_gap and end_gap both correspond to points in timeline_data and
680+ are included in the output timespan.
626681
627682 Returns
628683 -------
629684 full_timeline: numpy.ndarray
630685 Completed timeline.
631686 """
632687 # Generated timestamps should always be 0.5 seconds apart
633- # TODO: is this in the configuration file?
634688 difference_ns = 0.5 * 1e9
635-
636689 output : np .ndarray = np .arange (gap [0 ], gap [1 ], difference_ns )
637690 return output
638691
@@ -657,8 +710,9 @@ def vectors_per_second_from_string(vecsec_string: str) -> dict:
657710 vecsec_dict = {}
658711 vecsec_segments = vecsec_string .split ("," )
659712 for vecsec_segment in vecsec_segments :
660- start_time , vecsec = vecsec_segment .split (":" )
661- vecsec_dict [int (start_time )] = int (vecsec )
713+ if vecsec_segment :
714+ start_time , vecsec = vecsec_segment .split (":" )
715+ vecsec_dict [int (start_time )] = int (vecsec )
662716
663717 return vecsec_dict
664718
0 commit comments