diff --git a/aeon/forecasting/stats/_arima.py b/aeon/forecasting/stats/_arima.py index 357bc4bbe9..c7bf349865 100644 --- a/aeon/forecasting/stats/_arima.py +++ b/aeon/forecasting/stats/_arima.py @@ -12,6 +12,7 @@ from aeon.forecasting.base import BaseForecaster from aeon.forecasting.utils._extract_paras import _extract_arma_params from aeon.forecasting.utils._nelder_mead import nelder_mead +from aeon.transformations.series._diff import _undifference class ARIMA(BaseForecaster): @@ -126,18 +127,11 @@ def _fit(self, y, exog=None): differenced_forecast = self.fitted_values_[-1] if self.d == 0: - forecast_value = differenced_forecast - elif self.d == 1: - forecast_value = differenced_forecast + self._series[-1] - else: # for d > 1, iteratively undifference - forecast_value = differenced_forecast - last_vals = self._series[-self.d :] - for _ in range(self.d): - forecast_value += last_vals[-1] - last_vals[-2] - # Shift values to avoid appending to list (efficient) - last_vals = np.roll(last_vals, -1) - last_vals[-1] = forecast_value # Extract the parameter values - self.forecast_ = forecast_value + self.forecast_ = differenced_forecast + else: + self.forecast_ = _undifference( + np.array([differenced_forecast]), self._series[-self.d :] + )[self.d] if self.use_constant: self.c_ = formatted_params[0][0] self.phi_ = formatted_params[1][: self.p] @@ -198,12 +192,12 @@ def _predict(self, y, exog=None): forecast_diff = c + ar_forecast + ma_forecast # Undifference the forecast - if d == 0: + if self.d == 0: return forecast_diff - elif d == 1: - return forecast_diff + y[-1] else: - return forecast_diff + np.sum(y[-d:]) + return _undifference(np.array([forecast_diff]), self._series[-self.d :])[ + self.d + ] def _forecast(self, y, exog=None): """Forecast one ahead for time series y.""" @@ -242,17 +236,10 @@ def iterative_forecast(self, y, prediction_horizon): # Correct differencing using forecast values y_forecast_diff = forecast_series[n : n + h] - d = self.d - if d == 0: + if self.d == 0: return y_forecast_diff - else: # Correct undifferencing - # Start with last d values from original y - undiff = list(self._series[-d:]) - for i in range(h): - # Take the last d values and sum them - reconstructed = y_forecast_diff[i] + sum(undiff[-d:]) - undiff.append(reconstructed) - return np.array(undiff[d:]) + else: + return _undifference(y_forecast_diff, self._series[-self.d :])[self.d :] @njit(cache=True, fastmath=True) diff --git a/aeon/transformations/series/_diff.py b/aeon/transformations/series/_diff.py index 221987b7bd..6519a1a960 100644 --- a/aeon/transformations/series/_diff.py +++ b/aeon/transformations/series/_diff.py @@ -1,8 +1,11 @@ +"""Difference Transformer.""" + import numpy as np +from numba import njit from aeon.transformations.series.base import BaseSeriesTransformer -__maintainer__ = ["TinaJin0228"] +__maintainer__ = ["TinaJin0228", "alexbanwell1"] __all__ = ["DifferenceTransformer"] @@ -59,6 +62,7 @@ class DifferenceTransformer(BaseSeriesTransformer): _tags = { "capability:multivariate": True, + "capability:inverse_transform": True, "X_inner_type": "np.ndarray", "fit_is_empty": True, } @@ -66,6 +70,7 @@ class DifferenceTransformer(BaseSeriesTransformer): def __init__(self, order=1): self.order = order super().__init__(axis=1) + self.x_ = None def _transform(self, X, y=None): """ @@ -84,9 +89,110 @@ def _transform(self, X, y=None): raise ValueError( f"`order` must be a positive integer, but got {self.order}" ) + self.x_ = X diff_X = np.diff(X, n=self.order, axis=1) Xt = diff_X return Xt + + def _inverse_transform(self, X, y=None): + """ + Inverse transform to reconstruct the original time series. + + Parameters + ---------- + X : Time series to inverse transform. With shape (n_channels, n_timepoints). + y : If provided, should contain the first `order` values of the original series. + If None, the first `order` values will be taken from values + stored in _transform. + + Returns + ------- + Xt : np.ndarray + Reconstructed original time series. + """ + if y is None: + y = self.x_ + if y is None or y.shape[1] < self.order: + raise ValueError( + f"Inverse transform requires first {self.order} original \ + data values. If y is supplied, then those values will be used. \ + If not if .transform() has been called, the X series from that \ + shall be used. But inverse_transform called with y=None and \ + no previous .transform() call." + ) + if y.shape[0] != X.shape[0]: + raise ValueError( + f"y must have the same number of channels as X. " + f"Got X.shape[0]={X.shape[0]}, y.shape[0]={y.shape[0]}" + ) + X = np.array(X, dtype=np.float64) + y = np.array(y, dtype=np.float64) + initial_values = y[:, : self.order] + + return np.array( + [ + _undifference(diff_X, initial_value) + for diff_X, initial_value in zip(X, initial_values) + ] + ) + + +@njit(cache=True, fastmath=True) +def _comb(n, k): + """ + Calculate the binomial coefficient C(n, k) = n! / (k! * (n - k)!). + + Parameters + ---------- + n : int + The total number of items. + k : int + The number of items to choose. + + Returns + ------- + int + The binomial coefficient C(n, k). + """ + if k < 0 or k > n: + return 0 + if k > n - k: + k = n - k # Take advantage of symmetry + c = 1 + for i in range(k): + c = c * (n - i) // (i + 1) + return c + + +@njit(cache=True, fastmath=True) +def _undifference(diff, initial_values): + """ + Reconstruct original time series from an n-th order differenced series. + + Parameters + ---------- + diff : array-like + n-th order differenced series of length N - n + initial_values : array-like + The first n values of the original series before differencing (length n) + + Returns + ------- + original : np.ndarray + Reconstructed original series of length N + """ + n = len(initial_values) + kernel = np.array( + [(-1) ** (k + 1) * _comb(n, k) for k in range(1, n + 1)], + dtype=initial_values.dtype, + ) + original = np.empty((n + len(diff)), dtype=initial_values.dtype) + original[:n] = initial_values + + for i, d in enumerate(diff): + original[n + i] = np.dot(kernel, original[i : n + i][::-1]) + d + + return original diff --git a/aeon/transformations/series/tests/test_diff.py b/aeon/transformations/series/tests/test_diff.py index 9f54fccf7d..5ac4b39be9 100644 --- a/aeon/transformations/series/tests/test_diff.py +++ b/aeon/transformations/series/tests/test_diff.py @@ -12,26 +12,41 @@ def test_diff(): dt1 = DifferenceTransformer(order=1) Xt1 = dt1.fit_transform(X) expected1 = np.array([[3.0, 5.0, 7.0, 9.0, 11.0]]) + X_hat1 = dt1.inverse_transform(Xt1, X) np.testing.assert_allclose( Xt1, expected1, equal_nan=True, err_msg="Value mismatch for order 1" ) + np.testing.assert_allclose( + X_hat1, X, equal_nan=True, err_msg="Inverse transform failed for order 1" + ) dt2 = DifferenceTransformer(order=2) Xt2 = dt2.fit_transform(X) expected2 = np.array([[2.0, 2.0, 2.0, 2.0]]) + X_hat2 = dt2.inverse_transform(Xt2, X) np.testing.assert_allclose( Xt2, expected2, equal_nan=True, err_msg="Value mismatch for order 2" ) + np.testing.assert_allclose( + X_hat2, X, equal_nan=True, err_msg="Inverse transform failed for order 2" + ) Y = np.array([[1, 2, 3, 4], [5, 3, 1, 8]]) Yt1 = dt1.fit_transform(Y) expected3 = np.array([[1, 1, 1], [-2, -2, 7]]) + Y_hat1 = dt1.inverse_transform(Yt1, Y) np.testing.assert_allclose( Yt1, expected3, equal_nan=True, err_msg="Value mismatch for order 1,multivariate", ) + np.testing.assert_allclose( + Y_hat1, + Y, + equal_nan=True, + err_msg="Inverse transform failed for order 1,multivariate", + )