tmplummer
diff --git a/‎imap_processing/cli.py‎
Lines changed: 2 additions & 2 deletions b/‎imap_processing/cli.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎imap_processing/mag/l1c/interpolation_methods.py‎
Lines changed: 9 additions & 1 deletion b/‎imap_processing/mag/l1c/interpolation_methods.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎imap_processing/mag/l1c/mag_l1c.py‎
Lines changed: 99 additions & 45 deletions b/‎imap_processing/mag/l1c/mag_l1c.py‎
Lines changed: 99 additions & 45 deletions
@@ -1124,9 +1124,9 @@ def do_processing(  # noqa: PLR0912
             input_data = [load_cdf(dep) for dep in science_files]
             # Input datasets can be in any order, and are validated within mag_l1c
             if len(input_data) == 1:
-                datasets = [mag_l1c(input_data[0])]
+                datasets = [mag_l1c(input_data[0], current_day)]
             elif len(input_data) == 2:
-                datasets = [mag_l1c(input_data[0], input_data[1])]
+                datasets = [mag_l1c(input_data[0], current_day, input_data[1])]
             else:
                 raise ValueError(
                     f"Invalid dependencies found for MAG L1C:"
 
@@ -229,11 +229,13 @@ def cic_filter(
     cic1 = cic1 / decimation_factor
     cic2 = np.convolve(cic1, cic1)
     delay = (len(cic2) - 1) // 2
+
     input_filtered = input_timestamps
+    vectors_filtered = lfilter(cic2, 1, input_vectors, axis=0)
     if delay != 0:
         input_filtered = input_timestamps[:-delay]
+        vectors_filtered = vectors_filtered[delay:]
 
-    vectors_filtered = lfilter(cic2, 1, input_vectors, axis=0)[delay:]
     return input_filtered, vectors_filtered
 
 
@@ -270,6 +272,12 @@ def linear_filtered(
         Interpolated vectors of shape (m, 3) where m is equal to the number of output
         timestamps. Contains x, y, z components of the vector.
     """
+    if input_vectors.shape[0] != input_timestamps.shape[0]:
+        raise ValueError(
+            "Input vectors and input timestamps must have the same length. "
+            f"Got {input_vectors.shape[0]} and {input_timestamps.shape[0]}"
+        )
+
     input_filtered, vectors_filtered = cic_filter(
         input_vectors, input_timestamps, output_timestamps, input_rate, output_rate
     )
 
@@ -9,12 +9,14 @@
 from imap_processing.mag import imap_mag_sdc_configuration_v001 as configuration
 from imap_processing.mag.constants import ModeFlags, VecSec
 from imap_processing.mag.l1c.interpolation_methods import InterpolationFunction
+from imap_processing.spice.time import et_to_ttj2000ns, str_to_et
 
 logger = logging.getLogger(__name__)
 
 
 def mag_l1c(
     first_input_dataset: xr.Dataset,
+    day_to_process: np.datetime64,
     second_input_dataset: xr.Dataset = None,
 ) -> xr.Dataset:
     """
@@ -27,6 +29,9 @@ def mag_l1c(
     first_input_dataset : xr.Dataset
         The first input dataset to process. This can be either burst or norm data, for
         mago or magi.
+    day_to_process : np.datetime64
+        The day to process, in np.datetime64[D] format. This is used to fill gaps at
+        the beginning or end of the day if needed.
     second_input_dataset : xr.Dataset, optional
         The second input dataset to process. This should be burst if first_input_dataset
         was norm, or norm if first_input_dataset was burst. It should match the
@@ -263,13 +268,15 @@ def process_mag_l1c(
     normal_mode_dataset: xr.Dataset,
     burst_mode_dataset: xr.Dataset,
     interpolation_function: InterpolationFunction,
+    day_to_process: np.datetime64 | None = None,
 ) -> np.ndarray:
     """
     Create MAG L1C data from L1B datasets.
 
     This function starts from the normal mode dataset and completes the following steps:
     1. find all the gaps in the dataset
-    2. generate a new timeline with the gaps filled
+    2. generate a new timeline with the gaps filled, including new timestamps to fill
+    out the rest of the day to +/- 15 minutes on either side
     3. fill the timeline with normal mode data (so, all the non-gap timestamps)
     4. interpolate the gaps using the burst mode data and the method specified in
         interpolation_function.
@@ -288,6 +295,10 @@ def process_mag_l1c(
         The burst mode dataset, which is used to fill in the gaps in the normal mode.
     interpolation_function : InterpolationFunction
         The interpolation function to use to fill in the gaps.
+    day_to_process : np.datetime64, optional
+        The day to process, in np.datetime64[D] format. This is used to fill
+        gaps at the beginning or end of the day if needed. If not included, these
+        gaps will not be filled.
 
     Returns
     -------
@@ -306,8 +317,23 @@ def process_mag_l1c(
     output_dataset["sample_interpolated"] = xr.DataArray(
         np.zeros(len(normal_mode_dataset))
     )
+    day_start_ns = None
+    day_end_ns = None
 
-    gaps = find_all_gaps(norm_epoch, normal_vecsec_dict)
+    if day_to_process is not None:
+        day_start = day_to_process.astype("datetime64[s]") - np.timedelta64(15, "m")
+
+        # get the end of the day plus 15 minutes
+        day_end = (
+            day_to_process.astype("datetime64[s]")
+            + np.timedelta64(1, "D")
+            + np.timedelta64(15, "m")
+        )
+
+        day_start_ns = et_to_ttj2000ns(str_to_et(str(day_start)))
+        day_end_ns = et_to_ttj2000ns(str_to_et(str(day_end)))
+
+    gaps = find_all_gaps(norm_epoch, normal_vecsec_dict, day_start_ns, day_end_ns)
 
     new_timeline = generate_timeline(norm_epoch, gaps)
     norm_filled = fill_normal_data(normal_mode_dataset, new_timeline)
@@ -319,7 +345,9 @@ def process_mag_l1c(
 
 
 def fill_normal_data(
-    normal_dataset: xr.Dataset, new_timeline: np.ndarray
+    normal_dataset: xr.Dataset,
+    new_timeline: np.ndarray,
+    day_to_process: np.datetime64 | None = None,
 ) -> np.ndarray:
     """
     Fill the new timeline with the normal mode data.
@@ -332,6 +360,10 @@ def fill_normal_data(
         The normal mode dataset.
     new_timeline : np.ndarray
         A 1D array of timestamps to fill.
+    day_to_process : np.datetime64, optional
+        The day to process, in np.datetime64[D] format. This is used to fill
+        gaps at the beginning or end of the day if needed. If not included, these
+        gaps will not be filled.
 
     Returns
     -------
@@ -341,12 +373,11 @@ def fill_normal_data(
         Indices: 0 - epoch, 1-4 - vector x, y, z, and range, 5 - generated flag,
         6-7 - compression flags.
     """
-    # TODO: fill with FILLVAL?
+    # TODO: fill with FILLVAL
     filled_timeline: np.ndarray = np.zeros((len(new_timeline), 8))
     filled_timeline[:, 0] = new_timeline
     # Flags, will also indicate any missed timestamps
     filled_timeline[:, 5] = ModeFlags.MISSING.value
-
     for index, timestamp in enumerate(normal_dataset["epoch"].data):
         timeline_index = np.searchsorted(new_timeline, timestamp)
         filled_timeline[timeline_index, 1:5] = normal_dataset["vectors"].data[index]
@@ -399,9 +430,11 @@ def interpolate_gaps(
         )
 
     for gap in gaps:
-        # TODO: we might need a few inputs before or after start/end
+        # TODO: we need extra data at the beginning and end of the gap
         burst_gap_start = (np.abs(burst_epochs - gap[0])).argmin()
         burst_gap_end = (np.abs(burst_epochs - gap[1])).argmin()
+        # if this gap is too big, we may be missing burst data at the start or end of
+        # the day and shouldn't use it here.
 
         # for the CIC filter, we need 2x normal mode cadence seconds
 
@@ -428,10 +461,6 @@ def interpolate_gaps(
         gap_timeline = filled_norm_timeline[
             (filled_norm_timeline > gap[0]) & (filled_norm_timeline < gap[1])
         ]
-        logger.info(
-            f"difference between gap start and burst start: "
-            f"{gap_timeline[0] - burst_epochs[burst_start]}"
-        )
 
         short = (gap_timeline >= burst_epochs[burst_start]) & (
             gap_timeline <= burst_epochs[burst_gap_end]
@@ -487,40 +516,46 @@ def generate_timeline(epoch_data: np.ndarray, gaps: np.ndarray) -> np.ndarray:
         The existing timeline data, in the shape (n,).
     gaps : numpy.ndarray
         An array of gaps to fill, with shape (n, 2) where n is the number of gaps.
-        The gap is specified as (start, end) where start and end both exist in the
-        timeline already.
+        The gap is specified as (start, end).
 
     Returns
     -------
     numpy.ndarray
         The new timeline, filled with the existing data and the generated gaps.
     """
-    full_timeline: np.ndarray = np.zeros(0)
-
-    # When we have our gaps, generate the full timeline
-    last_gap = 0
+    full_timeline: np.ndarray = np.array([])
+    last_index = 0
     for gap in gaps:
-        gap_start_index = np.where(epoch_data == gap[0])[0]
-        gap_end_index = np.where(epoch_data == gap[1])[0]
-        if gap_start_index.size != 1 or gap_end_index.size != 1:
-            raise ValueError("Gap start or end not found in input timeline")
-
+        epoch_start_index = np.searchsorted(epoch_data, gap[0], side="left")
         full_timeline = np.concatenate(
-            (
-                full_timeline,
-                epoch_data[last_gap : gap_start_index[0]],
-                generate_missing_timestamps(gap),
-            )
+            (full_timeline, epoch_data[last_index:epoch_start_index])
         )
-        last_gap = gap_end_index[0]
+        generated_timestamps = generate_missing_timestamps(gap)
+        if generated_timestamps.size == 0:
+            continue
+
+        # Remove any generated timestamps that are already in the timeline
+        # Use np.isin to check for exact matches
+        mask = ~np.isin(generated_timestamps, full_timeline)
+        generated_timestamps = generated_timestamps[mask]
+
+        if generated_timestamps.size == 0:
+            print("All generated timestamps already exist in timeline")
+            continue
 
-    full_timeline = np.concatenate((full_timeline, epoch_data[last_gap:]))
+        full_timeline = np.concatenate((full_timeline, generated_timestamps))
+        last_index = int(np.searchsorted(epoch_data, gap[1], side="left"))
+
+    full_timeline = np.concatenate((full_timeline, epoch_data[last_index:]))
 
     return full_timeline
 
 
 def find_all_gaps(
-    epoch_data: np.ndarray, vecsec_dict: dict | None = None
+    epoch_data: np.ndarray,
+    vecsec_dict: dict | None = None,
+    start_of_day_ns: float | None = None,
+    end_of_day_ns: float | None = None,
 ) -> np.ndarray:
     """
     Find all the gaps in the epoch data.
@@ -529,6 +564,9 @@ def find_all_gaps(
     it will assume a nominal 1/2 second gap. A gap is defined as missing data from the
     expected sequence as defined by vectors_per_second_attr.
 
+    If start_of_day_ns and end_of_day_ns are provided, gaps at the beginning and end of
+    the day will be added if the epoch_data does not cover the full day.
+
     Parameters
     ----------
     epoch_data : numpy.ndarray
@@ -537,6 +575,12 @@ def find_all_gaps(
         A dictionary of the form {start: vecsec, start: vecsec} where start is the time
         in nanoseconds and vecsec is the number of vectors per second. This will be
         used to find the gaps. If not provided, a 1/2 second gap is assumed.
+    start_of_day_ns : float, optional
+        The start of the day in nanoseconds since TTJ2000. If provided, a gap will be
+        added from this time to the first epoch if they don't match.
+    end_of_day_ns : float, optional
+        The end of the day in nanoseconds since TTJ2000. If provided, a gap will be
+        added from the last epoch to this time if they don't match.
 
     Returns
     -------
@@ -546,15 +590,23 @@ def find_all_gaps(
         timeline.
     """
     gaps: np.ndarray = np.zeros((0, 3))
-    if vecsec_dict is None:
-        # TODO: when we go back to the previous file, also retrieve expected
-        #  vectors per second
-        # If no vecsec is provided, assume 2 vectors per second
-        vecsec_dict = {0: VecSec.TWO_VECS_PER_S.value}
+
+    # TODO: when we go back to the previous file, also retrieve expected
+    #  vectors per second
+
+    vecsec_dict = {0: VecSec.TWO_VECS_PER_S.value} | (vecsec_dict or {})
 
     end_index = epoch_data.shape[0]
+
+    if start_of_day_ns is not None and epoch_data[0] > start_of_day_ns:
+        # Add a gap from the start of the day to the first timestamp
+        gaps = np.concatenate(
+            (gaps, np.array([[start_of_day_ns, epoch_data[0], vecsec_dict[0]]]))
+        )
+
     for start_time in reversed(sorted(vecsec_dict.keys())):
-        start_index = np.where(start_time == epoch_data)[0][0]
+        # Find the start index that is equal to or immediately after start_time
+        start_index = np.searchsorted(epoch_data, start_time, side="left")
         gaps = np.concatenate(
             (
                 find_gaps(
@@ -565,6 +617,11 @@ def find_all_gaps(
         )
         end_index = start_index
 
+    if end_of_day_ns is not None and epoch_data[-1] < end_of_day_ns:
+        gaps = np.concatenate(
+            (gaps, np.array([[epoch_data[-1], end_of_day_ns, vecsec_dict[start_time]]]))
+        )
+
     return gaps
 
 
@@ -592,11 +649,9 @@ def find_gaps(timeline_data: np.ndarray, vectors_per_second: int) -> np.ndarray:
     # Expected difference between timestamps in nanoseconds.
     expected_gap = 1 / vectors_per_second * 1e9
 
-    # TODO: timestamps can vary by a few ms. Per Alastair, this can be around 7.5% of
-    #  cadence without counting as a "gap".
     diffs = abs(np.diff(timeline_data))
-    # 3.5e7 == 7.5% of 0.5s in nanoseconds, a common gap. In the future, this number
-    # will be calculated from the expected gap.
+
+    # Gap can be up to 7.5% larger than expected vectors per second due to clock drift
     gap_index = np.asarray(diffs - expected_gap > expected_gap * 0.075).nonzero()[0]
     output: np.ndarray = np.zeros((len(gap_index), 3))
 
@@ -607,7 +662,6 @@ def find_gaps(timeline_data: np.ndarray, vectors_per_second: int) -> np.ndarray:
             vectors_per_second,
         ]
 
-    # TODO: How should I handle/find gaps at the end?
     return output
 
 
@@ -622,17 +676,16 @@ def generate_missing_timestamps(gap: np.ndarray) -> np.ndarray:
     ----------
     gap : numpy.ndarray
         Array of timestamps of shape (2,) containing n gaps with start_gap and
-        end_gap. Start_gap and end_gap both correspond to points in timeline_data.
+        end_gap. Start_gap and end_gap both correspond to points in timeline_data and
+        are included in the output timespan.
 
     Returns
     -------
     full_timeline: numpy.ndarray
         Completed timeline.
     """
     # Generated timestamps should always be 0.5 seconds apart
-    # TODO: is this in the configuration file?
     difference_ns = 0.5 * 1e9
-
     output: np.ndarray = np.arange(gap[0], gap[1], difference_ns)
     return output
 
@@ -657,8 +710,9 @@ def vectors_per_second_from_string(vecsec_string: str) -> dict:
     vecsec_dict = {}
     vecsec_segments = vecsec_string.split(",")
     for vecsec_segment in vecsec_segments:
-        start_time, vecsec = vecsec_segment.split(":")
-        vecsec_dict[int(start_time)] = int(vecsec)
+        if vecsec_segment:
+            start_time, vecsec = vecsec_segment.split(":")
+            vecsec_dict[int(start_time)] = int(vecsec)
 
     return vecsec_dict