From 775e1e88d3158cd248accbf19781aed1fc12dafb Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 8 Nov 2024 16:12:31 +0000 Subject: [PATCH 01/16] mockup --- .../whole_series/__init__.py | 1 + .../whole_series/_outlier_detection.py | 34 +++++++ aeon/anomaly_detection/whole_series/base.py | 92 +++++++++++++++++++ ...eld_collection_anomaly_detection_checks.py | 9 ++ .../_yield_estimator_checks.py | 9 ++ aeon/testing/testing_data.py | 2 + aeon/utils/base/_register.py | 2 + 7 files changed, 149 insertions(+) create mode 100644 aeon/anomaly_detection/whole_series/__init__.py create mode 100644 aeon/anomaly_detection/whole_series/_outlier_detection.py create mode 100644 aeon/anomaly_detection/whole_series/base.py create mode 100644 aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py diff --git a/aeon/anomaly_detection/whole_series/__init__.py b/aeon/anomaly_detection/whole_series/__init__.py new file mode 100644 index 0000000000..68f149dc2e --- /dev/null +++ b/aeon/anomaly_detection/whole_series/__init__.py @@ -0,0 +1 @@ +"""Whole-series anomaly detection methods.""" diff --git a/aeon/anomaly_detection/whole_series/_outlier_detection.py b/aeon/anomaly_detection/whole_series/_outlier_detection.py new file mode 100644 index 0000000000..a228b6b644 --- /dev/null +++ b/aeon/anomaly_detection/whole_series/_outlier_detection.py @@ -0,0 +1,34 @@ +"""Basic outlier detection classifier.""" + +from aeon.anomaly_detection import IsolationForest +from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector +from aeon.base._base import _clone_estimator + + +class OutlierDetectionClassifier(BaseCollectionAnomalyDetector): + """Basic outlier detection classifier.""" + + _tags = { + "X_inner_type": "numpy2D", + } + + def __init__(self, estimator, random_state=None): + self.estimator = estimator + self.random_state = random_state + + super().__init__() + + def _fit(self, X, y=None): + self.estimator_ = _clone_estimator( + self.estimator, random_state=self.random_state + ) + self.estimator_.fit(X, y) + return self + + def _predict(self, X): + pred = self.estimator_.predict(X) + pred[pred == -1] = 0 + return pred + + def _get_test_params(cls, parameter_set="default"): + return {"estimator": IsolationForest()} diff --git a/aeon/anomaly_detection/whole_series/base.py b/aeon/anomaly_detection/whole_series/base.py new file mode 100644 index 0000000000..305053795c --- /dev/null +++ b/aeon/anomaly_detection/whole_series/base.py @@ -0,0 +1,92 @@ +"""Abstract base class for whole-series/collection anomaly detectors.""" + +__maintainer__ = ["MatthewMiddlehurst"] +__all__ = ["BaseCollectionAnomalyDetector"] + +from abc import abstractmethod +from typing import final + +import numpy as np +import pandas as pd + +from aeon.base import BaseCollectionEstimator + + +class BaseCollectionAnomalyDetector(BaseCollectionEstimator): + """Collection anomaly detector base class.""" + + _tags = { + "fit_is_empty": False, + "requires_y": False, + } + + def __init__(self): + super().__init__() + + @final + def fit(self, X, y=None): + """Fit.""" + if self.get_tag("fit_is_empty"): + self.is_fitted = True + return self + + if self.get_tag("requires_y"): + if y is None: + raise ValueError("Tag requires_y is true, but fit called with y=None") + + # reset estimator at the start of fit + self.reset() + + X = self._preprocess_collection(X) + if y is not None: + y = self._check_y(y, self.metadata_["n_cases"]) + + self._fit(X, y) + + # this should happen last + self.is_fitted = True + return self + + @final + def predict(self, X): + """Predict.""" + fit_empty = self.get_tag("fit_is_empty") + if not fit_empty: + self._check_is_fitted() + + X = self._preprocess_collection(X, store_metadata=False) + # Check if X has the correct shape seen during fitting + self._check_shape(X) + + return self._predict(X) + + @abstractmethod + def _fit(self, X, y=None): ... + + @abstractmethod + def _predict(self, X): ... + + def _check_y(self, y, n_cases): + if not isinstance(y, (pd.Series, np.ndarray)): + raise TypeError( + f"y must be a np.array or a pd.Series, but found type: {type(y)}" + ) + if isinstance(y, np.ndarray) and y.ndim > 1: + raise TypeError(f"y must be 1-dimensional, found {y.ndim} dimensions") + + if not all([x == 0 or x == 1 for x in y]): + raise ValueError( + "y input must only contain 0 (not anomalous) or 1 (anomalous) values." + ) + + # Check matching number of labels + n_labels = y.shape[0] + if n_cases != n_labels: + raise ValueError( + f"Mismatch in number of cases. Found X = {n_cases} and y = {n_labels}" + ) + + if isinstance(y, pd.Series): + y = pd.Series.to_numpy(y) + + return y diff --git a/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py b/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py new file mode 100644 index 0000000000..40f7fe7aaf --- /dev/null +++ b/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py @@ -0,0 +1,9 @@ +"""Tests for all collection anomaly detectors.""" + + +def _yield_collection_anomaly_detection_checks( + estimator_class, estimator_instances, datatypes +): + """Yield all collection anomaly detection checks for an aeon estimator.""" + # nothing currently! + return [] diff --git a/aeon/testing/estimator_checking/_yield_estimator_checks.py b/aeon/testing/estimator_checking/_yield_estimator_checks.py index 20664bea73..0a1aecd9f8 100644 --- a/aeon/testing/estimator_checking/_yield_estimator_checks.py +++ b/aeon/testing/estimator_checking/_yield_estimator_checks.py @@ -14,6 +14,7 @@ from sklearn.utils.estimator_checks import check_get_params_invariance from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector from aeon.base import BaseAeonEstimator from aeon.base._base import _clone_estimator from aeon.classification import BaseClassifier @@ -34,6 +35,9 @@ from aeon.testing.estimator_checking._yield_clustering_checks import ( _yield_clustering_checks, ) +from aeon.testing.estimator_checking._yield_collection_anomaly_detection_checks import ( + _yield_collection_anomaly_detection_checks, +) from aeon.testing.estimator_checking._yield_collection_transformation_checks import ( _yield_collection_transformation_checks, ) @@ -152,6 +156,11 @@ def _yield_all_aeon_checks( estimator_class, estimator_instances, datatypes ) + if issubclass(estimator_class, BaseCollectionAnomalyDetector): + yield from _yield_collection_anomaly_detection_checks( + estimator_class, estimator_instances, datatypes + ) + if issubclass(estimator_class, BaseSimilaritySearch): yield from _yield_similarity_search_checks( estimator_class, estimator_instances, datatypes diff --git a/aeon/testing/testing_data.py b/aeon/testing/testing_data.py index 55b9092443..70bc12908a 100644 --- a/aeon/testing/testing_data.py +++ b/aeon/testing/testing_data.py @@ -3,6 +3,7 @@ import numpy as np from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector from aeon.base import BaseCollectionEstimator, BaseSeriesEstimator from aeon.classification import BaseClassifier from aeon.classification.early_classification import BaseEarlyClassifier @@ -821,6 +822,7 @@ def _get_label_type_for_estimator(estimator): or isinstance(estimator, BaseClusterer) or isinstance(estimator, BaseCollectionTransformer) or isinstance(estimator, BaseSimilaritySearch) + or isinstance(estimator, BaseCollectionAnomalyDetector) ): label_type = "Classification" elif isinstance(estimator, BaseRegressor): diff --git a/aeon/utils/base/_register.py b/aeon/utils/base/_register.py index 1327d626ef..fce96c6ca2 100644 --- a/aeon/utils/base/_register.py +++ b/aeon/utils/base/_register.py @@ -17,6 +17,7 @@ from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector from aeon.base import BaseAeonEstimator, BaseCollectionEstimator, BaseSeriesEstimator from aeon.classification.base import BaseClassifier from aeon.classification.early_classification import BaseEarlyClassifier @@ -37,6 +38,7 @@ "transformer": BaseTransformer, # estimator types "anomaly-detector": BaseAnomalyDetector, + "collection-anomaly-detector": BaseCollectionAnomalyDetector, "collection-transformer": BaseCollectionTransformer, "classifier": BaseClassifier, "clusterer": BaseClusterer, From ba1c55e3fc65936aee4b3c84647cf8e100b9dcaf Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 8 Nov 2024 16:19:25 +0000 Subject: [PATCH 02/16] init --- aeon/anomaly_detection/whole_series/__init__.py | 10 ++++++++++ aeon/utils/base/_register.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/aeon/anomaly_detection/whole_series/__init__.py b/aeon/anomaly_detection/whole_series/__init__.py index 68f149dc2e..14d0e5c466 100644 --- a/aeon/anomaly_detection/whole_series/__init__.py +++ b/aeon/anomaly_detection/whole_series/__init__.py @@ -1 +1,11 @@ """Whole-series anomaly detection methods.""" + +__all__ = [ + "BaseCollectionAnomalyDetector", + "OutlierDetectionClassifier", +] + +from aeon.anomaly_detection.whole_series._outlier_detection import ( + OutlierDetectionClassifier, +) +from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector diff --git a/aeon/utils/base/_register.py b/aeon/utils/base/_register.py index fce96c6ca2..1b986b1bf9 100644 --- a/aeon/utils/base/_register.py +++ b/aeon/utils/base/_register.py @@ -38,7 +38,7 @@ "transformer": BaseTransformer, # estimator types "anomaly-detector": BaseAnomalyDetector, - "collection-anomaly-detector": BaseCollectionAnomalyDetector, + "collection_anomaly_detector": BaseCollectionAnomalyDetector, "collection-transformer": BaseCollectionTransformer, "classifier": BaseClassifier, "clusterer": BaseClusterer, From 8eb67c883ebb00f19d23743dee6ec6c6b4aadab7 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 8 Nov 2024 16:33:13 +0000 Subject: [PATCH 03/16] decorator --- aeon/anomaly_detection/whole_series/_outlier_detection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/anomaly_detection/whole_series/_outlier_detection.py b/aeon/anomaly_detection/whole_series/_outlier_detection.py index a228b6b644..7c415587ff 100644 --- a/aeon/anomaly_detection/whole_series/_outlier_detection.py +++ b/aeon/anomaly_detection/whole_series/_outlier_detection.py @@ -30,5 +30,6 @@ def _predict(self, X): pred[pred == -1] = 0 return pred + @classmethod def _get_test_params(cls, parameter_set="default"): return {"estimator": IsolationForest()} From a6d33abbe25089f6c01f68f7c628f613abe5be30 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 8 Nov 2024 17:10:27 +0000 Subject: [PATCH 04/16] correct import and tag --- aeon/anomaly_detection/whole_series/_outlier_detection.py | 5 +++-- aeon/utils/tags/_tags.py | 7 ++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/aeon/anomaly_detection/whole_series/_outlier_detection.py b/aeon/anomaly_detection/whole_series/_outlier_detection.py index 7c415587ff..53ac4bea29 100644 --- a/aeon/anomaly_detection/whole_series/_outlier_detection.py +++ b/aeon/anomaly_detection/whole_series/_outlier_detection.py @@ -1,6 +1,7 @@ """Basic outlier detection classifier.""" -from aeon.anomaly_detection import IsolationForest +from sklearn.ensemble import IsolationForest + from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector from aeon.base._base import _clone_estimator @@ -32,4 +33,4 @@ def _predict(self, X): @classmethod def _get_test_params(cls, parameter_set="default"): - return {"estimator": IsolationForest()} + return {"estimator": IsolationForest(n_estimators=3)} diff --git a/aeon/utils/tags/_tags.py b/aeon/utils/tags/_tags.py index 650a7c4dc4..75fc77223b 100644 --- a/aeon/utils/tags/_tags.py +++ b/aeon/utils/tags/_tags.py @@ -130,7 +130,12 @@ class : identifier for the base class of objects this tag applies to "point belongs to.", }, "requires_y": { - "class": ["transformer", "anomaly-detector", "segmenter"], + "class": [ + "transformer", + "anomaly-detector", + "collection_anomaly_detector", + "segmenter", + ], "type": "bool", "description": "Does this estimator require y to be passed in its methods?", }, From 2db96f26237c9d311acebb6702e87459ba464687 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Mon, 14 Apr 2025 14:29:22 +0100 Subject: [PATCH 05/16] base docs --- aeon/anomaly_detection/whole_series/base.py | 115 +++++++++++++++++++- aeon/classification/base.py | 2 - 2 files changed, 111 insertions(+), 6 deletions(-) diff --git a/aeon/anomaly_detection/whole_series/base.py b/aeon/anomaly_detection/whole_series/base.py index 305053795c..9194ae557b 100644 --- a/aeon/anomaly_detection/whole_series/base.py +++ b/aeon/anomaly_detection/whole_series/base.py @@ -1,4 +1,27 @@ -"""Abstract base class for whole-series/collection anomaly detectors.""" +""" +Abstract base class for whole-series/collection anomaly detectors. + + class name: BaseCollectionAnomalyDetector + +Defining methods: + fitting - fit(self, X, y) + predicting - predict(self, X) + +Data validation: + data processing - _preprocess_collection(self, X, store_metadata=True) + shape verification - _check_shape(self, X) + +State: + fitted model/strategy - by convention, any attributes ending in "_" + fitted state flag - is_fitted + train input metadata - metadata_ + resetting state - reset(self) + +Tags: + default estimator tags - _tags + tag retrieval - get_tag(self, tag_name) + tag setting - set_tag(self, tag_name, value) +""" __maintainer__ = ["MatthewMiddlehurst"] __all__ = ["BaseCollectionAnomalyDetector"] @@ -13,7 +36,23 @@ class BaseCollectionAnomalyDetector(BaseCollectionEstimator): - """Collection anomaly detector base class.""" + """ + Abstract base class for collection anomaly detectors. + + The base detector specifies the methods and method signatures that all + collection anomaly detectors have to implement. Attributes with an underscore + suffix are set in the method fit. + + Attributes + ---------- + is_fitted : bool + True if the estimator has been fitted, False otherwise. + Unused if ``"fit_is_empty"`` tag is set to True. + metadata_ : dict + Dictionary containing metadata about the `fit` input data. + _tags_dynamic : dict + Dictionary containing dynamic tag values which have been set at runtime. + """ _tags = { "fit_is_empty": False, @@ -25,7 +64,42 @@ def __init__(self): @final def fit(self, X, y=None): - """Fit.""" + """Fit collection anomaly detector to training data. + + Parameters + ---------- + X : np.ndarray or list + Input data, any number of channels, equal length series of shape ``( + n_cases, n_channels, n_timepoints)`` + or 2D np.array (univariate, equal length series) of shape + ``(n_cases, n_timepoints)`` + or list of numpy arrays (any number of channels, unequal length series) + of shape ``[n_cases]``, 2D np.array ``(n_channels, n_timepoints_i)``, + where ``n_timepoints_i`` is length of series ``i``. Other types are + allowed and converted into one of the above. + + Different estimators have different capabilities to handle different + types of input. If ``self.get_tag("capability:multivariate")`` is False, + they cannot handle multivariate series, so either ``n_channels == 1`` is + true or X is 2D of shape ``(n_cases, n_timepoints)``. If ``self.get_tag( + "capability:unequal_length")`` is False, they cannot handle unequal + length input. In both situations, a ``ValueError`` is raised if X has a + characteristic that the estimator does not have the capability for is + passed. + y : np.ndarray + 1D np.array of int, of shape ``(n_cases)`` - anomaly labels + (ground truth) for fitting indices corresponding to instance indices in X. + + Returns + ------- + self : BaseCollectionAnomalyDetector + Reference to self. + + Notes + ----- + Changes state by creating a fitted model that updates attributes + ending in "_" and sets is_fitted flag to True. + """ if self.get_tag("fit_is_empty"): self.is_fitted = True return self @@ -49,7 +123,35 @@ def fit(self, X, y=None): @final def predict(self, X): - """Predict.""" + """Predicts anomalies for time series in X. + + Parameters + ---------- + X : np.ndarray or list + Input data, any number of channels, equal length series of shape ``( + n_cases, n_channels, n_timepoints)`` + or 2D np.array (univariate, equal length series) of shape + ``(n_cases, n_timepoints)`` + or list of numpy arrays (any number of channels, unequal length series) + of shape ``[n_cases]``, 2D np.array ``(n_channels, n_timepoints_i)``, + where ``n_timepoints_i`` is length of series ``i`` + other types are allowed and converted into one of the above. + + Different estimators have different capabilities to handle different + types of input. If ``self.get_tag("capability:multivariate")`` is False, + they cannot handle multivariate series, so either ``n_channels == 1`` is + true or X is 2D of shape ``(n_cases, n_timepoints)``. If ``self.get_tag( + "capability:unequal_length")`` is False, they cannot handle unequal + length input. In both situations, a ``ValueError`` is raised if X has a + characteristic that the estimator does not have the capability for is + passed. + + Returns + ------- + predictions : np.ndarray + 1D np.array of float, of shape (n_cases) - predicted anomalies + indices correspond to instance indices in X + """ fit_empty = self.get_tag("fit_is_empty") if not fit_empty: self._check_is_fitted() @@ -67,6 +169,11 @@ def _fit(self, X, y=None): ... def _predict(self, X): ... def _check_y(self, y, n_cases): + """Check y input is valid. + + Must be 1-dimensional and contain only 0s (no anomaly) and 1s (anomaly). + Must match the number of cases in X. + """ if not isinstance(y, (pd.Series, np.ndarray)): raise TypeError( f"y must be a np.array or a pd.Series, but found type: {type(y)}" diff --git a/aeon/classification/base.py b/aeon/classification/base.py index 92d3b304a8..fa7ed89cb6 100644 --- a/aeon/classification/base.py +++ b/aeon/classification/base.py @@ -52,8 +52,6 @@ class BaseClassifier(ClassifierMixin, BaseCollectionEstimator): Number of classes (length of ``classes_``). _class_dictionary : dict Mapping of classes_ onto integers ``0 ... n_classes_-1``. - _estimator_type : string - The type of estimator. Required by some ``sklearn`` tools, set to "classifier". """ _tags = { From 1920ee49131f4bb0a8e5a4b261466cc3e32c137e Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Mon, 14 Apr 2025 15:18:11 +0100 Subject: [PATCH 06/16] wrappers --- .../whole_series/_classification.py | 65 +++++++++++++++++++ .../whole_series/_outlier_detection.py | 42 +++++++++--- 2 files changed, 97 insertions(+), 10 deletions(-) create mode 100644 aeon/anomaly_detection/whole_series/_classification.py diff --git a/aeon/anomaly_detection/whole_series/_classification.py b/aeon/anomaly_detection/whole_series/_classification.py new file mode 100644 index 0000000000..dbe50a127c --- /dev/null +++ b/aeon/anomaly_detection/whole_series/_classification.py @@ -0,0 +1,65 @@ +"""Adapter to use classification algorithms for collection anomaly detection.""" + +__maintainer__ = [] + + +from sklearn.base import ClassifierMixin +from sklearn.ensemble import RandomForestClassifier + +from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector +from aeon.base._base import _clone_estimator +from aeon.classification.feature_based import SummaryClassifier + + +class ClassificationAdapter(BaseCollectionAnomalyDetector): + """ + Basic classifier adapter for collection anomaly detection. + + This class wraps a classification algorithm to be used as an anomaly detector. + Anomaly labels are required for training. + + Parameters + ---------- + classifier : aeon classifier or ClassifierMixin + The classification algorithm to be adapted. + random_state : int, RandomState instance or None, default=None + If `int`, random_state is the seed used by the random number generator; + If `RandomState` instance, random_state is the random number generator; + If `None`, the random number generator is the `RandomState` instance used + by `np.random`. + """ + + _tags = { + "X_inner_type": "numpy2D", + "requires_y": True, + } + + def __init__(self, classifier, random_state=None): + self.classifier = classifier + self.random_state = random_state + + super().__init__() + + def _fit(self, X, y=None): + if not isinstance(self.classifier, ClassifierMixin): + raise ValueError( + "The estimator must be an aeon classification algorithm " + "or class that implements the ClassifierMixin interface." + ) + + self.classifier_ = _clone_estimator( + self.classifier, random_state=self.random_state + ) + self.classifier_.fit(X, y) + return self + + def _predict(self, X): + return self.classifier_.predict(X) + + @classmethod + def _get_test_params(cls, parameter_set="default"): + return { + "estimator": SummaryClassifier( + estimator=RandomForestClassifier(n_estimators=5) + ) + } diff --git a/aeon/anomaly_detection/whole_series/_outlier_detection.py b/aeon/anomaly_detection/whole_series/_outlier_detection.py index 53ac4bea29..95a7aade88 100644 --- a/aeon/anomaly_detection/whole_series/_outlier_detection.py +++ b/aeon/anomaly_detection/whole_series/_outlier_detection.py @@ -1,33 +1,55 @@ -"""Basic outlier detection classifier.""" +"""Adapter to use outlier detection algorithms for collection anomaly detection.""" +__maintainer__ = [] + +from sklearn.base import OutlierMixin from sklearn.ensemble import IsolationForest from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector from aeon.base._base import _clone_estimator -class OutlierDetectionClassifier(BaseCollectionAnomalyDetector): - """Basic outlier detection classifier.""" +class OutlierDetectionAdapter(BaseCollectionAnomalyDetector): + """ + Basic outlier detection adapter for collection anomaly detection. + + This class wraps an sklearn outlier detection algorithm to be used as an anomaly + detector. + + Parameters + ---------- + detector : OutlierMixin + The outlier detection algorithm to be adapted. + random_state : int, RandomState instance or None, default=None + If `int`, random_state is the seed used by the random number generator; + If `RandomState` instance, random_state is the random number generator; + If `None`, the random number generator is the `RandomState` instance used + by `np.random`. + """ _tags = { "X_inner_type": "numpy2D", } - def __init__(self, estimator, random_state=None): - self.estimator = estimator + def __init__(self, detector, random_state=None): + self.detector = detector self.random_state = random_state super().__init__() def _fit(self, X, y=None): - self.estimator_ = _clone_estimator( - self.estimator, random_state=self.random_state - ) - self.estimator_.fit(X, y) + if not isinstance(self.detector, OutlierMixin): + raise ValueError( + "The estimator must be an outlier detection algorithm " + "that implements the OutlierMixin interface." + ) + + self.detector_ = _clone_estimator(self.detector, random_state=self.random_state) + self.detector_.fit(X, y) return self def _predict(self, X): - pred = self.estimator_.predict(X) + pred = self.detector_.predict(X) pred[pred == -1] = 0 return pred From debc38b6f78dcb534b2b4eab72f07bf4cb06f275 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 17 Apr 2025 22:02:49 +0100 Subject: [PATCH 07/16] tests --- ...eld_collection_anomaly_detection_checks.py | 64 ++++++++++++++++++- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py b/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py index 40f7fe7aaf..76c91e312e 100644 --- a/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py +++ b/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py @@ -1,9 +1,69 @@ """Tests for all collection anomaly detectors.""" +from functools import partial + +from aeon.base._base import _clone_estimator +from aeon.testing.testing_data import FULL_TEST_DATA_DICT +from aeon.testing.utils.estimator_checks import _assert_predict_labels +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES + def _yield_collection_anomaly_detection_checks( estimator_class, estimator_instances, datatypes ): """Yield all collection anomaly detection checks for an aeon estimator.""" - # nothing currently! - return [] + # only class required + yield partial( + check_collection_detector_overrides_and_tags, estimator_class=estimator_class + ) + + # test class instances + for i, estimator in enumerate(estimator_instances): + # test all data types + for datatype in datatypes[i]: + yield partial( + check_collection_detector_output, estimator=estimator, datatype=datatype + ) + + +def check_collection_detector_overrides_and_tags(estimator_class): + """Test compliance with the detector base class contract.""" + # Test they don't override final methods, because Python does not enforce this + final_methods = [ + "fit", + "predict", + ] + for method in final_methods: + if method in estimator_class.__dict__: + raise ValueError( + f"Collection anomaly detector {estimator_class} overrides the " + f"method {method}. Override _{method} instead." + ) + + # Test valid tag for X_inner_type + X_inner_type = estimator_class.get_class_tag(tag_name="X_inner_type") + if isinstance(X_inner_type, str): + assert X_inner_type in COLLECTIONS_DATA_TYPES + else: # must be a list + assert all([t in COLLECTIONS_DATA_TYPES for t in X_inner_type]) + + # one of X_inner_types must be capable of storing unequal length + if estimator_class.get_class_tag("capability:unequal_length"): + valid_unequal_types = ["np-list", "df-list", "pd-multiindex"] + if isinstance(X_inner_type, str): + assert X_inner_type in valid_unequal_types + else: # must be a list + assert any([t in valid_unequal_types for t in X_inner_type]) + + +def check_collection_detector_output(estimator, datatype): + """Test detector outputs the correct data types and values.""" + estimator = _clone_estimator(estimator) + + # run fit and predict + estimator.fit( + FULL_TEST_DATA_DICT[datatype]["train"][0], + FULL_TEST_DATA_DICT[datatype]["train"][1], + ) + y_pred = estimator.predict(FULL_TEST_DATA_DICT[datatype]["test"][0]) + _assert_predict_labels(y_pred, datatype, unique_labels=[0, 1]) From 7179b2602f4af0da22ee57d6ac6dc47fcb18ed7d Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 18 Apr 2025 13:55:47 +0100 Subject: [PATCH 08/16] docs and imports --- aeon/anomaly_detection/whole_series/__init__.py | 6 ++++-- docs/api_reference/anomaly_detection.rst | 13 +++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/aeon/anomaly_detection/whole_series/__init__.py b/aeon/anomaly_detection/whole_series/__init__.py index 14d0e5c466..7084ff40a9 100644 --- a/aeon/anomaly_detection/whole_series/__init__.py +++ b/aeon/anomaly_detection/whole_series/__init__.py @@ -2,10 +2,12 @@ __all__ = [ "BaseCollectionAnomalyDetector", - "OutlierDetectionClassifier", + "ClassificationAdapter", + "OutlierDetectionAdapter", ] +from aeon.anomaly_detection.whole_series._classification import ClassificationAdapter from aeon.anomaly_detection.whole_series._outlier_detection import ( - OutlierDetectionClassifier, + OutlierDetectionAdapter, ) from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector diff --git a/docs/api_reference/anomaly_detection.rst b/docs/api_reference/anomaly_detection.rst index 082c082fc4..baa8ee0532 100644 --- a/docs/api_reference/anomaly_detection.rst +++ b/docs/api_reference/anomaly_detection.rst @@ -36,6 +36,19 @@ Detectors STOMP STRAY +Whole-series +------------ + +.. currentmodule:: aeon.anomaly_detection.whole_series + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + BaseCollectionAnomalyDetector + ClassificationAdapter + OutlierDetectionAdapter + Base ---- From 3d37a81793b321c35258d6399225dada4bc58b22 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 18 Apr 2025 13:59:37 +0100 Subject: [PATCH 09/16] test params --- aeon/anomaly_detection/whole_series/_classification.py | 2 +- aeon/anomaly_detection/whole_series/_outlier_detection.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/anomaly_detection/whole_series/_classification.py b/aeon/anomaly_detection/whole_series/_classification.py index dbe50a127c..849312a9a2 100644 --- a/aeon/anomaly_detection/whole_series/_classification.py +++ b/aeon/anomaly_detection/whole_series/_classification.py @@ -59,7 +59,7 @@ def _predict(self, X): @classmethod def _get_test_params(cls, parameter_set="default"): return { - "estimator": SummaryClassifier( + "classifier": SummaryClassifier( estimator=RandomForestClassifier(n_estimators=5) ) } diff --git a/aeon/anomaly_detection/whole_series/_outlier_detection.py b/aeon/anomaly_detection/whole_series/_outlier_detection.py index 95a7aade88..ac702c7297 100644 --- a/aeon/anomaly_detection/whole_series/_outlier_detection.py +++ b/aeon/anomaly_detection/whole_series/_outlier_detection.py @@ -55,4 +55,4 @@ def _predict(self, X): @classmethod def _get_test_params(cls, parameter_set="default"): - return {"estimator": IsolationForest(n_estimators=3)} + return {"detector": IsolationForest(n_estimators=3)} From 4ba76925647bff4804b288219fcaf3649560dec1 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Fri, 18 Apr 2025 14:16:38 +0100 Subject: [PATCH 10/16] register --- aeon/utils/base/_register.py | 2 +- aeon/utils/tags/_tags.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/utils/base/_register.py b/aeon/utils/base/_register.py index 78372b04cb..a4d8b2d303 100644 --- a/aeon/utils/base/_register.py +++ b/aeon/utils/base/_register.py @@ -39,7 +39,7 @@ "transformer": BaseTransformer, # estimator types "anomaly-detector": BaseAnomalyDetector, - "collection_anomaly_detector": BaseCollectionAnomalyDetector, + "collection-anomaly-detector": BaseCollectionAnomalyDetector, "collection-transformer": BaseCollectionTransformer, "classifier": BaseClassifier, "clusterer": BaseClusterer, diff --git a/aeon/utils/tags/_tags.py b/aeon/utils/tags/_tags.py index ba7456c617..1e973b5a4f 100644 --- a/aeon/utils/tags/_tags.py +++ b/aeon/utils/tags/_tags.py @@ -141,7 +141,7 @@ class : identifier for the base class of objects this tag applies to "class": [ "transformer", "anomaly-detector", - "collection_anomaly_detector", + "collection-anomaly-detector", "segmenter", ], "type": "bool", From d47b53720acdd48d326affc15b64cc1337c293b9 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Mon, 5 May 2025 21:49:58 +0100 Subject: [PATCH 11/16] big refactor --- aeon/anomaly_detection/__init__.py | 2 - aeon/anomaly_detection/base.py | 209 +------------ aeon/anomaly_detection/collection/__init__.py | 11 + .../_classification.py | 2 +- .../_outlier_detection.py | 2 +- .../{whole_series => collection}/base.py | 0 .../distance_based/__init__.py | 19 -- .../distribution_based/__init__.py | 9 - .../outlier_detection/__init__.py | 11 - aeon/anomaly_detection/series/__init__.py | 9 + .../_pyodadapter.py | 4 +- aeon/anomaly_detection/series/base.py | 293 ++++++++++++++++++ .../series/distance_based/__init__.py | 21 ++ .../{ => series}/distance_based/_cblof.py | 2 +- .../{ => series}/distance_based/_kmeans.py | 4 +- .../distance_based/_left_stampi.py | 4 +- .../{ => series}/distance_based/_lof.py | 2 +- .../{ => series}/distance_based/_merlin.py | 4 +- .../distance_based}/_rockad.py | 4 +- .../{ => series}/distance_based/_stomp.py | 4 +- .../distance_based/tests/__init__.py | 0 .../distance_based/tests/test_cblof.py | 2 +- .../distance_based/tests/test_kmeans.py | 2 +- .../distance_based/tests/test_left_stampi.py | 2 +- .../distance_based/tests/test_lof.py | 2 +- .../distance_based/tests/test_merlin.py | 2 +- .../distance_based}/tests/test_rockad.py | 2 +- .../distance_based/tests/test_stomp.py | 2 +- .../series/distribution_based/__init__.py | 9 + .../{ => series}/distribution_based/_copod.py | 2 +- .../distribution_based/_dwt_mlead.py | 4 +- .../distribution_based/tests/__init__.py | 0 .../distribution_based/tests/test_copod.py | 2 +- .../tests/test_dwt_mlead.py | 2 +- .../series/outlier_detection/__init__.py | 9 + .../outlier_detection/_iforest.py | 2 +- .../outlier_detection}/_one_class_svm.py | 4 +- .../{ => series}/outlier_detection/_stray.py | 4 +- .../outlier_detection/tests/__init__.py | 0 .../outlier_detection/tests/test_iforest.py | 2 +- .../tests/test_one_class_svm.py | 2 +- .../outlier_detection/tests/test_stray.py | 2 +- .../{ => series}/tests/__init__.py | 0 .../{ => series}/tests/test_base.py | 0 .../tests/test_pyod_adapter.py | 2 +- .../whole_series/__init__.py | 13 - .../whole_series/tests/__init__.py | 1 - .../_yield_estimator_checks.py | 4 +- .../_mock_anomaly_detectors.py | 4 +- aeon/testing/testing_data.py | 6 +- aeon/utils/base/_identifier.py | 2 + aeon/utils/base/_register.py | 15 +- aeon/utils/base/tests/test_identifier.py | 2 +- aeon/utils/tags/_tags.py | 1 - aeon/utils/tags/tests/test_discovery.py | 6 +- aeon/utils/tests/test_discovery.py | 4 +- docs/developer_guide/adding_typehints.md | 12 +- .../anomaly_detection/anomaly_detection.ipynb | 12 +- 58 files changed, 442 insertions(+), 316 deletions(-) create mode 100644 aeon/anomaly_detection/collection/__init__.py rename aeon/anomaly_detection/{whole_series => collection}/_classification.py (96%) rename aeon/anomaly_detection/{whole_series => collection}/_outlier_detection.py (95%) rename aeon/anomaly_detection/{whole_series => collection}/base.py (100%) delete mode 100644 aeon/anomaly_detection/distance_based/__init__.py delete mode 100644 aeon/anomaly_detection/distribution_based/__init__.py delete mode 100644 aeon/anomaly_detection/outlier_detection/__init__.py create mode 100644 aeon/anomaly_detection/series/__init__.py rename aeon/anomaly_detection/{outlier_detection => series}/_pyodadapter.py (98%) create mode 100644 aeon/anomaly_detection/series/base.py create mode 100644 aeon/anomaly_detection/series/distance_based/__init__.py rename aeon/anomaly_detection/{ => series}/distance_based/_cblof.py (98%) rename aeon/anomaly_detection/{ => series}/distance_based/_kmeans.py (98%) rename aeon/anomaly_detection/{ => series}/distance_based/_left_stampi.py (97%) rename aeon/anomaly_detection/{ => series}/distance_based/_lof.py (98%) rename aeon/anomaly_detection/{ => series}/distance_based/_merlin.py (98%) rename aeon/anomaly_detection/{whole_series => series/distance_based}/_rockad.py (98%) rename aeon/anomaly_detection/{ => series}/distance_based/_stomp.py (97%) rename aeon/anomaly_detection/{ => series}/distance_based/tests/__init__.py (100%) rename aeon/anomaly_detection/{ => series}/distance_based/tests/test_cblof.py (97%) rename aeon/anomaly_detection/{ => series}/distance_based/tests/test_kmeans.py (95%) rename aeon/anomaly_detection/{ => series}/distance_based/tests/test_left_stampi.py (99%) rename aeon/anomaly_detection/{ => series}/distance_based/tests/test_lof.py (99%) rename aeon/anomaly_detection/{ => series}/distance_based/tests/test_merlin.py (96%) rename aeon/anomaly_detection/{whole_series => series/distance_based}/tests/test_rockad.py (96%) rename aeon/anomaly_detection/{ => series}/distance_based/tests/test_stomp.py (95%) create mode 100644 aeon/anomaly_detection/series/distribution_based/__init__.py rename aeon/anomaly_detection/{ => series}/distribution_based/_copod.py (97%) rename aeon/anomaly_detection/{ => series}/distribution_based/_dwt_mlead.py (98%) rename aeon/anomaly_detection/{ => series}/distribution_based/tests/__init__.py (100%) rename aeon/anomaly_detection/{ => series}/distribution_based/tests/test_copod.py (96%) rename aeon/anomaly_detection/{ => series}/distribution_based/tests/test_dwt_mlead.py (95%) create mode 100644 aeon/anomaly_detection/series/outlier_detection/__init__.py rename aeon/anomaly_detection/{ => series}/outlier_detection/_iforest.py (98%) rename aeon/anomaly_detection/{distance_based => series/outlier_detection}/_one_class_svm.py (98%) rename aeon/anomaly_detection/{ => series}/outlier_detection/_stray.py (98%) rename aeon/anomaly_detection/{ => series}/outlier_detection/tests/__init__.py (100%) rename aeon/anomaly_detection/{ => series}/outlier_detection/tests/test_iforest.py (98%) rename aeon/anomaly_detection/{distance_based => series/outlier_detection}/tests/test_one_class_svm.py (95%) rename aeon/anomaly_detection/{ => series}/outlier_detection/tests/test_stray.py (98%) rename aeon/anomaly_detection/{ => series}/tests/__init__.py (100%) rename aeon/anomaly_detection/{ => series}/tests/test_base.py (100%) rename aeon/anomaly_detection/{outlier_detection => series}/tests/test_pyod_adapter.py (98%) delete mode 100644 aeon/anomaly_detection/whole_series/__init__.py delete mode 100644 aeon/anomaly_detection/whole_series/tests/__init__.py diff --git a/aeon/anomaly_detection/__init__.py b/aeon/anomaly_detection/__init__.py index 65343cd774..878e29fd32 100644 --- a/aeon/anomaly_detection/__init__.py +++ b/aeon/anomaly_detection/__init__.py @@ -3,5 +3,3 @@ __all__ = [ "BaseAnomalyDetector", ] - -from aeon.anomaly_detection.base import BaseAnomalyDetector diff --git a/aeon/anomaly_detection/base.py b/aeon/anomaly_detection/base.py index 2e333cf755..60d35f0e2e 100644 --- a/aeon/anomaly_detection/base.py +++ b/aeon/anomaly_detection/base.py @@ -4,87 +4,23 @@ __all__ = ["BaseAnomalyDetector"] from abc import abstractmethod -from typing import final import numpy as np -import pandas as pd -from aeon.base import BaseSeriesEstimator -from aeon.base._base_series import VALID_SERIES_INPUT_TYPES +from aeon.base import BaseAeonEstimator -class BaseAnomalyDetector(BaseSeriesEstimator): - """Base class for anomaly detection algorithms. - - Anomaly detection algorithms are used to identify anomalous subsequences in time - series data. These algorithms take a series of length m and return a boolean, int or - float array of length m, where each element indicates whether the corresponding - subsequence is anomalous or its anomaly score. - - Input and internal data format (where m is the number of time points and d is the - number of channels): - Univariate series (default): - np.ndarray, shape ``(m,)``, ``(m, 1)`` or ``(1, m)`` depending on axis. - This is converted to a 2D np.ndarray internally. - pd.DataFrame, shape ``(m, 1)`` or ``(1, m)`` depending on axis. - pd.Series, shape ``(m,)``. - Multivariate series: - np.ndarray array, shape ``(m, d)`` or ``(d, m)`` depending on axis. - pd.DataFrame ``(m, d)`` or ``(d, m)`` depending on axis. - - Output data format (one of the following): - Anomaly scores (default): - np.ndarray, shape ``(m,)`` of type float. For each point of the input time - series, the anomaly score is a float value indicating the degree of - anomalousness. The higher the score, the more anomalous the point. - Binary classification: - np.ndarray, shape ``(m,)`` of type bool or int. For each point of the input - time series, the output is a boolean or integer value indicating whether the - point is anomalous (``True``/``1``) or not (``False``/``0``). - - Detector learning types: - Unsupervised (default): - Unsupervised detectors do not require any training data and can directly be - used on the target time series. Their tags are set to ``fit_is_empty=True`` - and ``requires_y=False``. You would usually call the ``fit_predict`` method - on these detectors. - Semi-supervised: - Semi-supervised detectors require a training step on a time series without - anomalies (normal behaving time series). The target value ``y`` would - consist of only zeros. Thus, these algorithms have logic in the ``fit`` - method, but do not require the target values. Their tags are set to - ``fit_is_empty=False`` and ``requires_y=False``. You would usually first - call the ``fit`` method on the training data and then the ``predict`` - method for your target time series. - Supervised: - Supervised detectors require a training step on a time series with known - anomalies (anomalies should be present and must be annotated). The detector - implements the ``fit`` method, and the target value ``y`` consists of zeros - and ones. Their tags are, thus, set to ``fit_is_empty=False`` and - ``requires_y=True``. You would usually first call the ``fit`` method on the - training data and then the ``predict`` method for your target time series. - - Parameters - ---------- - axis : int - The time point axis of the input series if it is 2D. If ``axis==0``, it is - assumed each column is a time series and each row is a time point. i.e. the - shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates - the time series are in rows, i.e. the shape of the data is - ``(n_channels, n_timepoints)``. - Setting this class variable will convert the input data to the chosen axis. - """ +class BaseAnomalyDetector(BaseAeonEstimator): + """todo base class docs.""" _tags = { - "X_inner_type": "np.ndarray", # One of VALID_SERIES_INNER_TYPES - "fit_is_empty": True, - "requires_y": False, + # todo } def __init__(self, axis): - super().__init__(axis=axis) + super().__init__() - @final + @abstractmethod def fit(self, X, y=None, axis=1): """Fit time series anomaly detector to X. @@ -112,31 +48,12 @@ def fit(self, X, y=None, axis=1): Returns ------- - BaseAnomalyDetector + BaseSeriesAnomalyDetector The fitted estimator, reference to self. """ - if self.get_tag("fit_is_empty"): - self.is_fitted = True - return self - - if self.get_tag("requires_y"): - if y is None: - raise ValueError("Tag requires_y is true, but fit called with y=None") - - # reset estimator at the start of fit - self.reset() - - X = self._preprocess_series(X, axis, True) - if y is not None: - y = self._check_y(y) + ... - self._fit(X=X, y=y) - - # this should happen last - self.is_fitted = True - return self - - @final + @abstractmethod def predict(self, X, axis=1) -> np.ndarray: """Find anomalies in X. @@ -159,15 +76,9 @@ def predict(self, X, axis=1) -> np.ndarray: A boolean, int or float array of length len(X), where each element indicates whether the corresponding subsequence is anomalous or its anomaly score. """ - fit_empty = self.get_tag("fit_is_empty") - if not fit_empty: - self._check_is_fitted() - - X = self._preprocess_series(X, axis, False) + ... - return self._predict(X) - - @final + @abstractmethod def fit_predict(self, X, y=None, axis=1) -> np.ndarray: """Fit time series anomaly detector and find anomalies for X. @@ -194,100 +105,4 @@ def fit_predict(self, X, y=None, axis=1) -> np.ndarray: A boolean, int or float array of length len(X), where each element indicates whether the corresponding subsequence is anomalous or its anomaly score. """ - if self.get_tag("requires_y"): - if y is None: - raise ValueError("Tag requires_y is true, but fit called with y=None") - - # reset estimator at the start of fit - self.reset() - - X = self._preprocess_series(X, axis, True) - - if self.get_tag("fit_is_empty"): - self.is_fitted = True - return self._predict(X) - - if y is not None: - y = self._check_y(y) - - pred = self._fit_predict(X, y) - - # this should happen last - self.is_fitted = True - return pred - - def _fit(self, X, y): - return self - - @abstractmethod - def _predict(self, X) -> np.ndarray: ... - - def _fit_predict(self, X, y): - self._fit(X, y) - return self._predict(X) - - def _check_y(self, y: VALID_SERIES_INPUT_TYPES) -> np.ndarray: - # Remind user if y is not required for this estimator on failure - req_msg = ( - f"{self.__class__.__name__} does not require a y input." - if self.get_tag("requires_y") - else "" - ) - new_y = y - - # must be a valid input type, see VALID_SERIES_INPUT_TYPES in - # BaseSeriesEstimator - if isinstance(y, np.ndarray): - # check valid shape - if y.ndim > 1: - raise ValueError( - "Error in input type for y: y input as np.ndarray should be 1D." - + req_msg - ) - - # check valid dtype - fail = False - if issubclass(y.dtype.type, np.integer): - new_y = y.astype(bool) - fail = not np.array_equal(y, new_y) - elif not issubclass(y.dtype.type, np.bool_): - fail = True - - if fail: - raise ValueError( - "Error in input type for y: y input type must be an integer array " - "containing 0 and 1 or a boolean array." + req_msg - ) - elif isinstance(y, pd.Series): - # check series is of boolean dtype - if not pd.api.types.is_bool_dtype(y): - raise ValueError( - "Error in input type for y: y input as pd.Series must have a " - "boolean dtype." + req_msg - ) - - new_y = y.values - elif isinstance(y, pd.DataFrame): - # only accept size 1 dataframe - if y.shape[1] > 1: - raise ValueError( - "Error in input type for y: y input as pd.DataFrame should have a " - "single column series." - ) - - # check column is of boolean dtype - if not all(pd.api.types.is_bool_dtype(y[col]) for col in y.columns): - raise ValueError( - "Error in input type for y: y input as pd.DataFrame must have a " - "boolean dtype." + req_msg - ) - - new_y = y.squeeze().values - else: - raise ValueError( - f"Error in input type for y: it should be one of " - f"{VALID_SERIES_INPUT_TYPES}, saw {type(y)}" - ) - - new_y = new_y.astype(bool) - return new_y + ... diff --git a/aeon/anomaly_detection/collection/__init__.py b/aeon/anomaly_detection/collection/__init__.py new file mode 100644 index 0000000000..4fc14ffd1f --- /dev/null +++ b/aeon/anomaly_detection/collection/__init__.py @@ -0,0 +1,11 @@ +"""Whole-series anomaly detection methods.""" + +__all__ = [ + "BaseCollectionAnomalyDetector", + "ClassificationAdapter", + "OutlierDetectionAdapter", +] + +from aeon.anomaly_detection.collection._classification import ClassificationAdapter +from aeon.anomaly_detection.collection._outlier_detection import OutlierDetectionAdapter +from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector diff --git a/aeon/anomaly_detection/whole_series/_classification.py b/aeon/anomaly_detection/collection/_classification.py similarity index 96% rename from aeon/anomaly_detection/whole_series/_classification.py rename to aeon/anomaly_detection/collection/_classification.py index 849312a9a2..d8aa34ebea 100644 --- a/aeon/anomaly_detection/whole_series/_classification.py +++ b/aeon/anomaly_detection/collection/_classification.py @@ -6,7 +6,7 @@ from sklearn.base import ClassifierMixin from sklearn.ensemble import RandomForestClassifier -from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector +from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector from aeon.base._base import _clone_estimator from aeon.classification.feature_based import SummaryClassifier diff --git a/aeon/anomaly_detection/whole_series/_outlier_detection.py b/aeon/anomaly_detection/collection/_outlier_detection.py similarity index 95% rename from aeon/anomaly_detection/whole_series/_outlier_detection.py rename to aeon/anomaly_detection/collection/_outlier_detection.py index ac702c7297..db40a8e749 100644 --- a/aeon/anomaly_detection/whole_series/_outlier_detection.py +++ b/aeon/anomaly_detection/collection/_outlier_detection.py @@ -5,7 +5,7 @@ from sklearn.base import OutlierMixin from sklearn.ensemble import IsolationForest -from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector +from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector from aeon.base._base import _clone_estimator diff --git a/aeon/anomaly_detection/whole_series/base.py b/aeon/anomaly_detection/collection/base.py similarity index 100% rename from aeon/anomaly_detection/whole_series/base.py rename to aeon/anomaly_detection/collection/base.py diff --git a/aeon/anomaly_detection/distance_based/__init__.py b/aeon/anomaly_detection/distance_based/__init__.py deleted file mode 100644 index 5eb342b780..0000000000 --- a/aeon/anomaly_detection/distance_based/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -"""Distance basedTime Series Anomaly Detection.""" - -__all__ = [ - "CBLOF", - "KMeansAD", - "LeftSTAMPi", - "LOF", - "MERLIN", - "OneClassSVM", - "STOMP", -] - -from aeon.anomaly_detection.distance_based._cblof import CBLOF -from aeon.anomaly_detection.distance_based._kmeans import KMeansAD -from aeon.anomaly_detection.distance_based._left_stampi import LeftSTAMPi -from aeon.anomaly_detection.distance_based._lof import LOF -from aeon.anomaly_detection.distance_based._merlin import MERLIN -from aeon.anomaly_detection.distance_based._one_class_svm import OneClassSVM -from aeon.anomaly_detection.distance_based._stomp import STOMP diff --git a/aeon/anomaly_detection/distribution_based/__init__.py b/aeon/anomaly_detection/distribution_based/__init__.py deleted file mode 100644 index e52a7512ba..0000000000 --- a/aeon/anomaly_detection/distribution_based/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Distribution based Time Series Anomaly Detection.""" - -__all__ = [ - "COPOD", - "DWT_MLEAD", -] - -from aeon.anomaly_detection.distribution_based._copod import COPOD -from aeon.anomaly_detection.distribution_based._dwt_mlead import DWT_MLEAD diff --git a/aeon/anomaly_detection/outlier_detection/__init__.py b/aeon/anomaly_detection/outlier_detection/__init__.py deleted file mode 100644 index ad9b7868e5..0000000000 --- a/aeon/anomaly_detection/outlier_detection/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Time Series Outlier Detection.""" - -__all__ = [ - "IsolationForest", - "PyODAdapter", - "STRAY", -] - -from aeon.anomaly_detection.outlier_detection._iforest import IsolationForest -from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter -from aeon.anomaly_detection.outlier_detection._stray import STRAY diff --git a/aeon/anomaly_detection/series/__init__.py b/aeon/anomaly_detection/series/__init__.py new file mode 100644 index 0000000000..a4d2052d1c --- /dev/null +++ b/aeon/anomaly_detection/series/__init__.py @@ -0,0 +1,9 @@ +"""Single series Time Series Anomaly Detection.""" + +__all__ = [ + "BaseSeriesAnomalyDetector", + "PyODAdapter", +] + +from aeon.anomaly_detection.series._pyodadapter import PyODAdapter +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector diff --git a/aeon/anomaly_detection/outlier_detection/_pyodadapter.py b/aeon/anomaly_detection/series/_pyodadapter.py similarity index 98% rename from aeon/anomaly_detection/outlier_detection/_pyodadapter.py rename to aeon/anomaly_detection/series/_pyodadapter.py index 5a068857c6..aa4e1f9779 100644 --- a/aeon/anomaly_detection/outlier_detection/_pyodadapter.py +++ b/aeon/anomaly_detection/series/_pyodadapter.py @@ -10,7 +10,7 @@ import numpy as np from sklearn import clone -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.utils.validation._dependencies import _check_soft_dependencies from aeon.utils.windowing import reverse_windowing, sliding_windows @@ -18,7 +18,7 @@ from pyod.models.base import BaseDetector -class PyODAdapter(BaseAnomalyDetector): +class PyODAdapter(BaseSeriesAnomalyDetector): """Adapter for PyOD anomaly detection models to be used in the Aeon framework. This adapter allows the use of PyOD models in the Aeon framework. The adapter diff --git a/aeon/anomaly_detection/series/base.py b/aeon/anomaly_detection/series/base.py new file mode 100644 index 0000000000..57863d9f3d --- /dev/null +++ b/aeon/anomaly_detection/series/base.py @@ -0,0 +1,293 @@ +"""Abstract base class for series time series anomaly detectors.""" + +__maintainer__ = ["MatthewMiddlehurst"] +__all__ = ["BaseSeriesAnomalyDetector"] + +from abc import abstractmethod +from typing import final + +import numpy as np +import pandas as pd + +from aeon.base import BaseSeriesEstimator +from aeon.base._base_series import VALID_SERIES_INPUT_TYPES + + +class BaseSeriesAnomalyDetector(BaseSeriesEstimator): + """Base class for series anomaly detection algorithms. + + Anomaly detection algorithms are used to identify anomalous subsequences in time + series data. These algorithms take a series of length m and return a boolean, int or + float array of length m, where each element indicates whether the corresponding + subsequence is anomalous or its anomaly score. + + Input and internal data format (where m is the number of time points and d is the + number of channels): + Univariate series (default): + np.ndarray, shape ``(m,)``, ``(m, 1)`` or ``(1, m)`` depending on axis. + This is converted to a 2D np.ndarray internally. + pd.DataFrame, shape ``(m, 1)`` or ``(1, m)`` depending on axis. + pd.Series, shape ``(m,)``. + Multivariate series: + np.ndarray array, shape ``(m, d)`` or ``(d, m)`` depending on axis. + pd.DataFrame ``(m, d)`` or ``(d, m)`` depending on axis. + + Output data format (one of the following): + Anomaly scores (default): + np.ndarray, shape ``(m,)`` of type float. For each point of the input time + series, the anomaly score is a float value indicating the degree of + anomalousness. The higher the score, the more anomalous the point. + Binary classification: + np.ndarray, shape ``(m,)`` of type bool or int. For each point of the input + time series, the output is a boolean or integer value indicating whether the + point is anomalous (``True``/``1``) or not (``False``/``0``). + + Detector learning types: + Unsupervised (default): + Unsupervised detectors do not require any training data and can directly be + used on the target time series. Their tags are set to ``fit_is_empty=True`` + and ``requires_y=False``. You would usually call the ``fit_predict`` method + on these detectors. + Semi-supervised: + Semi-supervised detectors require a training step on a time series without + anomalies (normal behaving time series). The target value ``y`` would + consist of only zeros. Thus, these algorithms have logic in the ``fit`` + method, but do not require the target values. Their tags are set to + ``fit_is_empty=False`` and ``requires_y=False``. You would usually first + call the ``fit`` method on the training data and then the ``predict`` + method for your target time series. + Supervised: + Supervised detectors require a training step on a time series with known + anomalies (anomalies should be present and must be annotated). The detector + implements the ``fit`` method, and the target value ``y`` consists of zeros + and ones. Their tags are, thus, set to ``fit_is_empty=False`` and + ``requires_y=True``. You would usually first call the ``fit`` method on the + training data and then the ``predict`` method for your target time series. + + Parameters + ---------- + axis : int + The time point axis of the input series if it is 2D. If ``axis==0``, it is + assumed each column is a time series and each row is a time point. i.e. the + shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates + the time series are in rows, i.e. the shape of the data is + ``(n_channels, n_timepoints)``. + Setting this class variable will convert the input data to the chosen axis. + """ + + _tags = { + "X_inner_type": "np.ndarray", # One of VALID_SERIES_INNER_TYPES + "fit_is_empty": True, + "requires_y": False, + } + + def __init__(self, axis): + super().__init__(axis=axis) + + @final + def fit(self, X, y=None, axis=1): + """Fit time series anomaly detector to X. + + If the tag ``fit_is_empty`` is true, this just sets the ``is_fitted`` tag to + true. Otherwise, it checks ``self`` can handle ``X``, formats ``X`` into + the structure required by ``self`` then passes ``X`` (and possibly ``y``) to + ``_fit``. + + Parameters + ---------- + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + The time series to fit the model to. + A valid aeon time series data structure. See + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. + y : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES, default=None + The target values for the time series. + A valid aeon time series data structure. See + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. + axis : int + The time point axis of the input series if it is 2D. If ``axis==0``, it is + assumed each column is a time series and each row is a time point. i.e. the + shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates + the time series are in rows, i.e. the shape of the data is + ``(n_channels, n_timepoints)``. + + Returns + ------- + BaseSeriesAnomalyDetector + The fitted estimator, reference to self. + """ + if self.get_tag("fit_is_empty"): + self.is_fitted = True + return self + + if self.get_tag("requires_y"): + if y is None: + raise ValueError("Tag requires_y is true, but fit called with y=None") + + # reset estimator at the start of fit + self.reset() + + X = self._preprocess_series(X, axis, True) + if y is not None: + y = self._check_y(y) + + self._fit(X=X, y=y) + + # this should happen last + self.is_fitted = True + return self + + @final + def predict(self, X, axis=1) -> np.ndarray: + """Find anomalies in X. + + Parameters + ---------- + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + The time series to fit the model to. + A valid aeon time series data structure. See + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. + axis : int, default=1 + The time point axis of the input series if it is 2D. If ``axis==0``, it is + assumed each column is a time series and each row is a time point. i.e. the + shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates + the time series are in rows, i.e. the shape of the data is + ``(n_channels, n_timepoints)``. + + Returns + ------- + np.ndarray + A boolean, int or float array of length len(X), where each element indicates + whether the corresponding subsequence is anomalous or its anomaly score. + """ + fit_empty = self.get_tag("fit_is_empty") + if not fit_empty: + self._check_is_fitted() + + X = self._preprocess_series(X, axis, False) + + return self._predict(X) + + @final + def fit_predict(self, X, y=None, axis=1) -> np.ndarray: + """Fit time series anomaly detector and find anomalies for X. + + Parameters + ---------- + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + The time series to fit the model to. + A valid aeon time series data structure. See + aeon.base._base_series.VALID_INPUT_TYPES for aeon supported types. + y : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES, default=None + The target values for the time series. + A valid aeon time series data structure. See + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. + axis : int, default=1 + The time point axis of the input series if it is 2D. If ``axis==0``, it is + assumed each column is a time series and each row is a time point. i.e. the + shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates + the time series are in rows, i.e. the shape of the data is + ``(n_channels, n_timepoints)``. + + Returns + ------- + np.ndarray + A boolean, int or float array of length len(X), where each element indicates + whether the corresponding subsequence is anomalous or its anomaly score. + """ + if self.get_tag("requires_y"): + if y is None: + raise ValueError("Tag requires_y is true, but fit called with y=None") + + # reset estimator at the start of fit + self.reset() + + X = self._preprocess_series(X, axis, True) + + if self.get_tag("fit_is_empty"): + self.is_fitted = True + return self._predict(X) + + if y is not None: + y = self._check_y(y) + + pred = self._fit_predict(X, y) + + # this should happen last + self.is_fitted = True + return pred + + def _fit(self, X, y): + return self + + @abstractmethod + def _predict(self, X) -> np.ndarray: ... + + def _fit_predict(self, X, y): + self._fit(X, y) + return self._predict(X) + + def _check_y(self, y: VALID_SERIES_INPUT_TYPES) -> np.ndarray: + # Remind user if y is not required for this estimator on failure + req_msg = ( + f"{self.__class__.__name__} does not require a y input." + if self.get_tag("requires_y") + else "" + ) + new_y = y + + # must be a valid input type, see VALID_SERIES_INPUT_TYPES in + # BaseSeriesEstimator + if isinstance(y, np.ndarray): + # check valid shape + if y.ndim > 1: + raise ValueError( + "Error in input type for y: y input as np.ndarray should be 1D." + + req_msg + ) + + # check valid dtype + fail = False + if issubclass(y.dtype.type, np.integer): + new_y = y.astype(bool) + fail = not np.array_equal(y, new_y) + elif not issubclass(y.dtype.type, np.bool_): + fail = True + + if fail: + raise ValueError( + "Error in input type for y: y input type must be an integer array " + "containing 0 and 1 or a boolean array." + req_msg + ) + elif isinstance(y, pd.Series): + # check series is of boolean dtype + if not pd.api.types.is_bool_dtype(y): + raise ValueError( + "Error in input type for y: y input as pd.Series must have a " + "boolean dtype." + req_msg + ) + + new_y = y.values + elif isinstance(y, pd.DataFrame): + # only accept size 1 dataframe + if y.shape[1] > 1: + raise ValueError( + "Error in input type for y: y input as pd.DataFrame should have a " + "single column series." + ) + + # check column is of boolean dtype + if not all(pd.api.types.is_bool_dtype(y[col]) for col in y.columns): + raise ValueError( + "Error in input type for y: y input as pd.DataFrame must have a " + "boolean dtype." + req_msg + ) + + new_y = y.squeeze().values + else: + raise ValueError( + f"Error in input type for y: it should be one of " + f"{VALID_SERIES_INPUT_TYPES}, saw {type(y)}" + ) + + new_y = new_y.astype(bool) + return new_y diff --git a/aeon/anomaly_detection/series/distance_based/__init__.py b/aeon/anomaly_detection/series/distance_based/__init__.py new file mode 100644 index 0000000000..df02c8cd92 --- /dev/null +++ b/aeon/anomaly_detection/series/distance_based/__init__.py @@ -0,0 +1,21 @@ +"""Distance-based Time Series Anomaly Detection.""" + +__all__ = [ + "CBLOF", + "KMeansAD", + "LeftSTAMPi", + "LOF", + "MERLIN", + "OneClassSVM", + "STOMP", + "ROCKAD", +] + +from aeon.anomaly_detection.series.distance_based._cblof import CBLOF +from aeon.anomaly_detection.series.distance_based._kmeans import KMeansAD +from aeon.anomaly_detection.series.distance_based._left_stampi import LeftSTAMPi +from aeon.anomaly_detection.series.distance_based._lof import LOF +from aeon.anomaly_detection.series.distance_based._merlin import MERLIN +from aeon.anomaly_detection.series.distance_based._rockad import ROCKAD +from aeon.anomaly_detection.series.distance_based._stomp import STOMP +from aeon.anomaly_detection.series.outlier_detection._one_class_svm import OneClassSVM diff --git a/aeon/anomaly_detection/distance_based/_cblof.py b/aeon/anomaly_detection/series/distance_based/_cblof.py similarity index 98% rename from aeon/anomaly_detection/distance_based/_cblof.py rename to aeon/anomaly_detection/series/distance_based/_cblof.py index 18bb044c14..016a9c18ba 100644 --- a/aeon/anomaly_detection/distance_based/_cblof.py +++ b/aeon/anomaly_detection/series/distance_based/_cblof.py @@ -7,7 +7,7 @@ import numpy as np -from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter +from aeon.anomaly_detection.series._pyodadapter import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distance_based/_kmeans.py b/aeon/anomaly_detection/series/distance_based/_kmeans.py similarity index 98% rename from aeon/anomaly_detection/distance_based/_kmeans.py rename to aeon/anomaly_detection/series/distance_based/_kmeans.py index bb8f188a1d..aea82ee21a 100644 --- a/aeon/anomaly_detection/distance_based/_kmeans.py +++ b/aeon/anomaly_detection/series/distance_based/_kmeans.py @@ -8,11 +8,11 @@ import numpy as np from sklearn.cluster import KMeans -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.utils.windowing import reverse_windowing, sliding_windows -class KMeansAD(BaseAnomalyDetector): +class KMeansAD(BaseSeriesAnomalyDetector): """KMeans anomaly detector. The k-Means anomaly detector uses k-Means clustering to detect anomalies in time diff --git a/aeon/anomaly_detection/distance_based/_left_stampi.py b/aeon/anomaly_detection/series/distance_based/_left_stampi.py similarity index 97% rename from aeon/anomaly_detection/distance_based/_left_stampi.py rename to aeon/anomaly_detection/series/distance_based/_left_stampi.py index 43078ce021..cbeba2c5a5 100644 --- a/aeon/anomaly_detection/distance_based/_left_stampi.py +++ b/aeon/anomaly_detection/series/distance_based/_left_stampi.py @@ -6,11 +6,11 @@ import numpy as np -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.utils.windowing import reverse_windowing -class LeftSTAMPi(BaseAnomalyDetector): +class LeftSTAMPi(BaseSeriesAnomalyDetector): """LeftSTAMPi anomaly detector. LeftSTAMPi [1]_ calculates the left matrix profile of a time series, diff --git a/aeon/anomaly_detection/distance_based/_lof.py b/aeon/anomaly_detection/series/distance_based/_lof.py similarity index 98% rename from aeon/anomaly_detection/distance_based/_lof.py rename to aeon/anomaly_detection/series/distance_based/_lof.py index 2c3615d906..1a914583a9 100644 --- a/aeon/anomaly_detection/distance_based/_lof.py +++ b/aeon/anomaly_detection/series/distance_based/_lof.py @@ -7,7 +7,7 @@ import numpy as np -from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter +from aeon.anomaly_detection.series._pyodadapter import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distance_based/_merlin.py b/aeon/anomaly_detection/series/distance_based/_merlin.py similarity index 98% rename from aeon/anomaly_detection/distance_based/_merlin.py rename to aeon/anomaly_detection/series/distance_based/_merlin.py index b63224acd5..be0d2a9ead 100644 --- a/aeon/anomaly_detection/distance_based/_merlin.py +++ b/aeon/anomaly_detection/series/distance_based/_merlin.py @@ -8,13 +8,13 @@ import numpy as np from numba import njit -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.distances import squared_distance from aeon.utils.numba.general import AEON_NUMBA_STD_THRESHOLD from aeon.utils.numba.stats import mean, std -class MERLIN(BaseAnomalyDetector): +class MERLIN(BaseSeriesAnomalyDetector): """MERLIN anomaly detector. MERLIN is a discord discovery algorithm that uses a sliding window to find the diff --git a/aeon/anomaly_detection/whole_series/_rockad.py b/aeon/anomaly_detection/series/distance_based/_rockad.py similarity index 98% rename from aeon/anomaly_detection/whole_series/_rockad.py rename to aeon/anomaly_detection/series/distance_based/_rockad.py index 603a8732a3..15e2016e18 100644 --- a/aeon/anomaly_detection/whole_series/_rockad.py +++ b/aeon/anomaly_detection/series/distance_based/_rockad.py @@ -10,12 +10,12 @@ from sklearn.preprocessing import PowerTransformer from sklearn.utils import resample -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.transformations.collection.convolution_based import Rocket from aeon.utils.windowing import reverse_windowing, sliding_windows -class ROCKAD(BaseAnomalyDetector): +class ROCKAD(BaseSeriesAnomalyDetector): """ ROCKET-based Anomaly Detector (ROCKAD). diff --git a/aeon/anomaly_detection/distance_based/_stomp.py b/aeon/anomaly_detection/series/distance_based/_stomp.py similarity index 97% rename from aeon/anomaly_detection/distance_based/_stomp.py rename to aeon/anomaly_detection/series/distance_based/_stomp.py index 3f8be36432..7298b438c9 100644 --- a/aeon/anomaly_detection/distance_based/_stomp.py +++ b/aeon/anomaly_detection/series/distance_based/_stomp.py @@ -7,11 +7,11 @@ import numpy as np -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.utils.windowing import reverse_windowing -class STOMP(BaseAnomalyDetector): +class STOMP(BaseSeriesAnomalyDetector): """STOMP anomaly detector. STOMP calculates the matrix profile of a time series which is the distance to the diff --git a/aeon/anomaly_detection/distance_based/tests/__init__.py b/aeon/anomaly_detection/series/distance_based/tests/__init__.py similarity index 100% rename from aeon/anomaly_detection/distance_based/tests/__init__.py rename to aeon/anomaly_detection/series/distance_based/tests/__init__.py diff --git a/aeon/anomaly_detection/distance_based/tests/test_cblof.py b/aeon/anomaly_detection/series/distance_based/tests/test_cblof.py similarity index 97% rename from aeon/anomaly_detection/distance_based/tests/test_cblof.py rename to aeon/anomaly_detection/series/distance_based/tests/test_cblof.py index d1472af6a2..ce7579734f 100644 --- a/aeon/anomaly_detection/distance_based/tests/test_cblof.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_cblof.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from aeon.anomaly_detection.distance_based import CBLOF +from aeon.anomaly_detection.series.distance_based import CBLOF from aeon.testing.data_generation import make_example_1d_numpy from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distance_based/tests/test_kmeans.py b/aeon/anomaly_detection/series/distance_based/tests/test_kmeans.py similarity index 95% rename from aeon/anomaly_detection/distance_based/tests/test_kmeans.py rename to aeon/anomaly_detection/series/distance_based/tests/test_kmeans.py index 2647411b88..bc966929b0 100644 --- a/aeon/anomaly_detection/distance_based/tests/test_kmeans.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_kmeans.py @@ -6,7 +6,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.distance_based import KMeansAD +from aeon.anomaly_detection.series.distance_based import KMeansAD def test_kmeansad_univariate(): diff --git a/aeon/anomaly_detection/distance_based/tests/test_left_stampi.py b/aeon/anomaly_detection/series/distance_based/tests/test_left_stampi.py similarity index 99% rename from aeon/anomaly_detection/distance_based/tests/test_left_stampi.py rename to aeon/anomaly_detection/series/distance_based/tests/test_left_stampi.py index 6444bccdfe..2e14928625 100644 --- a/aeon/anomaly_detection/distance_based/tests/test_left_stampi.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_left_stampi.py @@ -8,7 +8,7 @@ import numpy as np import pytest -from aeon.anomaly_detection.distance_based._left_stampi import LeftSTAMPi +from aeon.anomaly_detection.series.distance_based._left_stampi import LeftSTAMPi from aeon.testing.data_generation import make_example_1d_numpy from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distance_based/tests/test_lof.py b/aeon/anomaly_detection/series/distance_based/tests/test_lof.py similarity index 99% rename from aeon/anomaly_detection/distance_based/tests/test_lof.py rename to aeon/anomaly_detection/series/distance_based/tests/test_lof.py index 033d11295b..a9107705fa 100644 --- a/aeon/anomaly_detection/distance_based/tests/test_lof.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_lof.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from aeon.anomaly_detection.distance_based import LOF +from aeon.anomaly_detection.series.distance_based import LOF from aeon.testing.data_generation import make_example_1d_numpy from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distance_based/tests/test_merlin.py b/aeon/anomaly_detection/series/distance_based/tests/test_merlin.py similarity index 96% rename from aeon/anomaly_detection/distance_based/tests/test_merlin.py rename to aeon/anomaly_detection/series/distance_based/tests/test_merlin.py index ccf7e3300d..0ef5aafb8a 100644 --- a/aeon/anomaly_detection/distance_based/tests/test_merlin.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_merlin.py @@ -4,7 +4,7 @@ import numpy as np -from aeon.anomaly_detection.distance_based import MERLIN +from aeon.anomaly_detection.series.distance_based import MERLIN TEST_DATA = np.array( [ diff --git a/aeon/anomaly_detection/whole_series/tests/test_rockad.py b/aeon/anomaly_detection/series/distance_based/tests/test_rockad.py similarity index 96% rename from aeon/anomaly_detection/whole_series/tests/test_rockad.py rename to aeon/anomaly_detection/series/distance_based/tests/test_rockad.py index 7d3694b2c8..51d2425505 100644 --- a/aeon/anomaly_detection/whole_series/tests/test_rockad.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_rockad.py @@ -4,7 +4,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.whole_series import ROCKAD +from aeon.anomaly_detection.series.distance_based import ROCKAD def test_rockad_univariate(): diff --git a/aeon/anomaly_detection/distance_based/tests/test_stomp.py b/aeon/anomaly_detection/series/distance_based/tests/test_stomp.py similarity index 95% rename from aeon/anomaly_detection/distance_based/tests/test_stomp.py rename to aeon/anomaly_detection/series/distance_based/tests/test_stomp.py index b506c89ea0..f8225d3c7b 100644 --- a/aeon/anomaly_detection/distance_based/tests/test_stomp.py +++ b/aeon/anomaly_detection/series/distance_based/tests/test_stomp.py @@ -6,7 +6,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.distance_based import STOMP +from aeon.anomaly_detection.series.distance_based import STOMP from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/series/distribution_based/__init__.py b/aeon/anomaly_detection/series/distribution_based/__init__.py new file mode 100644 index 0000000000..7338cb740d --- /dev/null +++ b/aeon/anomaly_detection/series/distribution_based/__init__.py @@ -0,0 +1,9 @@ +"""Distribution-based Time Series Anomaly Detection.""" + +__all__ = [ + "COPOD", + "DWT_MLEAD", +] + +from aeon.anomaly_detection.series.distribution_based._copod import COPOD +from aeon.anomaly_detection.series.distribution_based._dwt_mlead import DWT_MLEAD diff --git a/aeon/anomaly_detection/distribution_based/_copod.py b/aeon/anomaly_detection/series/distribution_based/_copod.py similarity index 97% rename from aeon/anomaly_detection/distribution_based/_copod.py rename to aeon/anomaly_detection/series/distribution_based/_copod.py index bd2af0e084..3e72614f17 100644 --- a/aeon/anomaly_detection/distribution_based/_copod.py +++ b/aeon/anomaly_detection/series/distribution_based/_copod.py @@ -7,7 +7,7 @@ import numpy as np -from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter +from aeon.anomaly_detection.series._pyodadapter import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distribution_based/_dwt_mlead.py b/aeon/anomaly_detection/series/distribution_based/_dwt_mlead.py similarity index 98% rename from aeon/anomaly_detection/distribution_based/_dwt_mlead.py rename to aeon/anomaly_detection/series/distribution_based/_dwt_mlead.py index cb0de0c015..2154abee22 100644 --- a/aeon/anomaly_detection/distribution_based/_dwt_mlead.py +++ b/aeon/anomaly_detection/series/distribution_based/_dwt_mlead.py @@ -11,7 +11,7 @@ from numpy.lib.stride_tricks import sliding_window_view from sklearn.covariance import EmpiricalCovariance -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.utils.numba.wavelets import multilevel_haar_transform @@ -30,7 +30,7 @@ def _combine_alternating(xs: list[Any], ys: list[Any]) -> Iterable[Any]: yield y -class DWT_MLEAD(BaseAnomalyDetector): +class DWT_MLEAD(BaseSeriesAnomalyDetector): """DWT-MLEAD anomaly detector. DWT-MLEAD is an anomaly detection algorithm that uses the Discrete Wavelet Transform diff --git a/aeon/anomaly_detection/distribution_based/tests/__init__.py b/aeon/anomaly_detection/series/distribution_based/tests/__init__.py similarity index 100% rename from aeon/anomaly_detection/distribution_based/tests/__init__.py rename to aeon/anomaly_detection/series/distribution_based/tests/__init__.py diff --git a/aeon/anomaly_detection/distribution_based/tests/test_copod.py b/aeon/anomaly_detection/series/distribution_based/tests/test_copod.py similarity index 96% rename from aeon/anomaly_detection/distribution_based/tests/test_copod.py rename to aeon/anomaly_detection/series/distribution_based/tests/test_copod.py index 40969da0e7..8732b33c96 100644 --- a/aeon/anomaly_detection/distribution_based/tests/test_copod.py +++ b/aeon/anomaly_detection/series/distribution_based/tests/test_copod.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from aeon.anomaly_detection.distribution_based import COPOD +from aeon.anomaly_detection.series.distribution_based import COPOD from aeon.testing.data_generation import make_example_1d_numpy from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distribution_based/tests/test_dwt_mlead.py b/aeon/anomaly_detection/series/distribution_based/tests/test_dwt_mlead.py similarity index 95% rename from aeon/anomaly_detection/distribution_based/tests/test_dwt_mlead.py rename to aeon/anomaly_detection/series/distribution_based/tests/test_dwt_mlead.py index 664d715122..3a714570b3 100644 --- a/aeon/anomaly_detection/distribution_based/tests/test_dwt_mlead.py +++ b/aeon/anomaly_detection/series/distribution_based/tests/test_dwt_mlead.py @@ -6,7 +6,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.distribution_based import DWT_MLEAD +from aeon.anomaly_detection.series.distribution_based import DWT_MLEAD def test_dwt_mlead_output(): diff --git a/aeon/anomaly_detection/series/outlier_detection/__init__.py b/aeon/anomaly_detection/series/outlier_detection/__init__.py new file mode 100644 index 0000000000..33a41b84c7 --- /dev/null +++ b/aeon/anomaly_detection/series/outlier_detection/__init__.py @@ -0,0 +1,9 @@ +"""Time Series Outlier Detection.""" + +__all__ = [ + "IsolationForest", + "STRAY", +] + +from aeon.anomaly_detection.series.outlier_detection._iforest import IsolationForest +from aeon.anomaly_detection.series.outlier_detection._stray import STRAY diff --git a/aeon/anomaly_detection/outlier_detection/_iforest.py b/aeon/anomaly_detection/series/outlier_detection/_iforest.py similarity index 98% rename from aeon/anomaly_detection/outlier_detection/_iforest.py rename to aeon/anomaly_detection/series/outlier_detection/_iforest.py index f13152d0e7..a0d62261e6 100644 --- a/aeon/anomaly_detection/outlier_detection/_iforest.py +++ b/aeon/anomaly_detection/series/outlier_detection/_iforest.py @@ -7,7 +7,7 @@ import numpy as np -from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter +from aeon.anomaly_detection.series._pyodadapter import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distance_based/_one_class_svm.py b/aeon/anomaly_detection/series/outlier_detection/_one_class_svm.py similarity index 98% rename from aeon/anomaly_detection/distance_based/_one_class_svm.py rename to aeon/anomaly_detection/series/outlier_detection/_one_class_svm.py index 9e654ee326..667ec32f9a 100644 --- a/aeon/anomaly_detection/distance_based/_one_class_svm.py +++ b/aeon/anomaly_detection/series/outlier_detection/_one_class_svm.py @@ -7,11 +7,11 @@ import numpy as np from sklearn.svm import OneClassSVM as OCSVM -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.utils.windowing import reverse_windowing, sliding_windows -class OneClassSVM(BaseAnomalyDetector): +class OneClassSVM(BaseSeriesAnomalyDetector): """OneClassSVM for anomaly detection. This class implements the OneClassSVM algorithm for anomaly detection diff --git a/aeon/anomaly_detection/outlier_detection/_stray.py b/aeon/anomaly_detection/series/outlier_detection/_stray.py similarity index 98% rename from aeon/anomaly_detection/outlier_detection/_stray.py rename to aeon/anomaly_detection/series/outlier_detection/_stray.py index e7512e2d24..3d78be8643 100644 --- a/aeon/anomaly_detection/outlier_detection/_stray.py +++ b/aeon/anomaly_detection/series/outlier_detection/_stray.py @@ -8,10 +8,10 @@ import numpy.typing as npt from sklearn.neighbors import NearestNeighbors -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector -class STRAY(BaseAnomalyDetector): +class STRAY(BaseSeriesAnomalyDetector): """STRAY: robust anomaly detection in data streams with concept drift. This is based on STRAY (Search TRace AnomalY) [1]_, which is a modification diff --git a/aeon/anomaly_detection/outlier_detection/tests/__init__.py b/aeon/anomaly_detection/series/outlier_detection/tests/__init__.py similarity index 100% rename from aeon/anomaly_detection/outlier_detection/tests/__init__.py rename to aeon/anomaly_detection/series/outlier_detection/tests/__init__.py diff --git a/aeon/anomaly_detection/outlier_detection/tests/test_iforest.py b/aeon/anomaly_detection/series/outlier_detection/tests/test_iforest.py similarity index 98% rename from aeon/anomaly_detection/outlier_detection/tests/test_iforest.py rename to aeon/anomaly_detection/series/outlier_detection/tests/test_iforest.py index a66d1003fb..07e0a085c3 100644 --- a/aeon/anomaly_detection/outlier_detection/tests/test_iforest.py +++ b/aeon/anomaly_detection/series/outlier_detection/tests/test_iforest.py @@ -4,7 +4,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.outlier_detection import IsolationForest +from aeon.anomaly_detection.series.outlier_detection import IsolationForest from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distance_based/tests/test_one_class_svm.py b/aeon/anomaly_detection/series/outlier_detection/tests/test_one_class_svm.py similarity index 95% rename from aeon/anomaly_detection/distance_based/tests/test_one_class_svm.py rename to aeon/anomaly_detection/series/outlier_detection/tests/test_one_class_svm.py index 7a3aca2042..6395291617 100644 --- a/aeon/anomaly_detection/distance_based/tests/test_one_class_svm.py +++ b/aeon/anomaly_detection/series/outlier_detection/tests/test_one_class_svm.py @@ -4,7 +4,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.distance_based import OneClassSVM +from aeon.anomaly_detection.series.distance_based import OneClassSVM def test_one_class_svm_univariate(): diff --git a/aeon/anomaly_detection/outlier_detection/tests/test_stray.py b/aeon/anomaly_detection/series/outlier_detection/tests/test_stray.py similarity index 98% rename from aeon/anomaly_detection/outlier_detection/tests/test_stray.py rename to aeon/anomaly_detection/series/outlier_detection/tests/test_stray.py index 8429a8a3c5..76ef9ef915 100644 --- a/aeon/anomaly_detection/outlier_detection/tests/test_stray.py +++ b/aeon/anomaly_detection/series/outlier_detection/tests/test_stray.py @@ -5,7 +5,7 @@ import numpy as np from sklearn.preprocessing import MinMaxScaler -from aeon.anomaly_detection.outlier_detection import STRAY +from aeon.anomaly_detection.series.outlier_detection import STRAY def test_default_1D(): diff --git a/aeon/anomaly_detection/tests/__init__.py b/aeon/anomaly_detection/series/tests/__init__.py similarity index 100% rename from aeon/anomaly_detection/tests/__init__.py rename to aeon/anomaly_detection/series/tests/__init__.py diff --git a/aeon/anomaly_detection/tests/test_base.py b/aeon/anomaly_detection/series/tests/test_base.py similarity index 100% rename from aeon/anomaly_detection/tests/test_base.py rename to aeon/anomaly_detection/series/tests/test_base.py diff --git a/aeon/anomaly_detection/outlier_detection/tests/test_pyod_adapter.py b/aeon/anomaly_detection/series/tests/test_pyod_adapter.py similarity index 98% rename from aeon/anomaly_detection/outlier_detection/tests/test_pyod_adapter.py rename to aeon/anomaly_detection/series/tests/test_pyod_adapter.py index ee75078133..84906c245d 100644 --- a/aeon/anomaly_detection/outlier_detection/tests/test_pyod_adapter.py +++ b/aeon/anomaly_detection/series/tests/test_pyod_adapter.py @@ -6,7 +6,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.outlier_detection import PyODAdapter +from aeon.anomaly_detection.series.outlier_detection import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/whole_series/__init__.py b/aeon/anomaly_detection/whole_series/__init__.py deleted file mode 100644 index 7084ff40a9..0000000000 --- a/aeon/anomaly_detection/whole_series/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -"""Whole-series anomaly detection methods.""" - -__all__ = [ - "BaseCollectionAnomalyDetector", - "ClassificationAdapter", - "OutlierDetectionAdapter", -] - -from aeon.anomaly_detection.whole_series._classification import ClassificationAdapter -from aeon.anomaly_detection.whole_series._outlier_detection import ( - OutlierDetectionAdapter, -) -from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector diff --git a/aeon/anomaly_detection/whole_series/tests/__init__.py b/aeon/anomaly_detection/whole_series/tests/__init__.py deleted file mode 100644 index 9292e8d9bd..0000000000 --- a/aeon/anomaly_detection/whole_series/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Whole series anomaly detection tests.""" diff --git a/aeon/testing/estimator_checking/_yield_estimator_checks.py b/aeon/testing/estimator_checking/_yield_estimator_checks.py index 363a0ce24d..643118793d 100644 --- a/aeon/testing/estimator_checking/_yield_estimator_checks.py +++ b/aeon/testing/estimator_checking/_yield_estimator_checks.py @@ -11,7 +11,7 @@ import numpy as np from sklearn.exceptions import NotFittedError -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector from aeon.base import BaseAeonEstimator from aeon.base._base import _clone_estimator @@ -138,7 +138,7 @@ def _yield_all_aeon_checks( estimator_class, estimator_instances, datatypes ) - if issubclass(estimator_class, BaseAnomalyDetector): + if issubclass(estimator_class, BaseSeriesAnomalyDetector): yield from _yield_anomaly_detection_checks( estimator_class, estimator_instances, datatypes ) diff --git a/aeon/testing/mock_estimators/_mock_anomaly_detectors.py b/aeon/testing/mock_estimators/_mock_anomaly_detectors.py index 4ec14d35fa..d48ebfcfb5 100644 --- a/aeon/testing/mock_estimators/_mock_anomaly_detectors.py +++ b/aeon/testing/mock_estimators/_mock_anomaly_detectors.py @@ -10,10 +10,10 @@ import numpy as np -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector -class MockAnomalyDetector(BaseAnomalyDetector): +class MockAnomalyDetector(BaseSeriesAnomalyDetector): """Mock anomaly detector.""" _tags = { diff --git a/aeon/testing/testing_data.py b/aeon/testing/testing_data.py index bdb5cea37d..1458d4a0d9 100644 --- a/aeon/testing/testing_data.py +++ b/aeon/testing/testing_data.py @@ -2,8 +2,8 @@ import numpy as np -from aeon.anomaly_detection.base import BaseAnomalyDetector -from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector +from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.base import BaseCollectionEstimator, BaseSeriesEstimator from aeon.classification import BaseClassifier from aeon.classification.early_classification import BaseEarlyClassifier @@ -1023,7 +1023,7 @@ def _get_task_for_estimator(estimator): data_label = "SimilaritySearch" # series data with no secondary input elif ( - isinstance(estimator, BaseAnomalyDetector) + isinstance(estimator, BaseSeriesAnomalyDetector) or isinstance(estimator, BaseSegmenter) or isinstance(estimator, BaseSeriesTransformer) or isinstance(estimator, BaseForecaster) diff --git a/aeon/utils/base/_identifier.py b/aeon/utils/base/_identifier.py index cf2722cfcb..2857b45bd1 100644 --- a/aeon/utils/base/_identifier.py +++ b/aeon/utils/base/_identifier.py @@ -47,6 +47,8 @@ def get_identifier(estimator): if len(identifiers) == 0: raise TypeError("Error, no identifiers could be determined for estimator") + if len(identifiers) > 1 and "anomaly-detector" in identifiers: + identifiers.remove("anomaly-detector") if len(identifiers) > 1 and "estimator" in identifiers: identifiers.remove("estimator") if len(identifiers) > 1 and "series-estimator" in identifiers: diff --git a/aeon/utils/base/_register.py b/aeon/utils/base/_register.py index a4d8b2d303..749c005e5f 100644 --- a/aeon/utils/base/_register.py +++ b/aeon/utils/base/_register.py @@ -15,9 +15,9 @@ "VALID_ESTIMATOR_BASES", ] - from aeon.anomaly_detection.base import BaseAnomalyDetector -from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector +from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.base import BaseAeonEstimator, BaseCollectionEstimator, BaseSeriesEstimator from aeon.classification.base import BaseClassifier from aeon.classification.early_classification import BaseEarlyClassifier @@ -33,12 +33,12 @@ # all base classes BASE_CLASS_REGISTER = { # abstract - no estimator directly inherits from these + "anomaly-detector": BaseAnomalyDetector, "collection-estimator": BaseCollectionEstimator, "estimator": BaseAeonEstimator, "series-estimator": BaseSeriesEstimator, "transformer": BaseTransformer, # estimator types - "anomaly-detector": BaseAnomalyDetector, "collection-anomaly-detector": BaseCollectionAnomalyDetector, "collection-transformer": BaseCollectionTransformer, "classifier": BaseClassifier, @@ -47,6 +47,7 @@ "regressor": BaseRegressor, "segmenter": BaseSegmenter, "similarity_searcher": BaseSimilaritySearch, + "series-anomaly-detector": BaseSeriesAnomalyDetector, "series-transformer": BaseSeriesTransformer, "forecaster": BaseForecaster, } @@ -55,5 +56,11 @@ VALID_ESTIMATOR_BASES = { k: BASE_CLASS_REGISTER[k] for k in BASE_CLASS_REGISTER.keys() - - {"estimator", "collection-estimator", "series-estimator", "transformer"} + - { + "anomaly-detector", + "estimator", + "collection-estimator", + "series-estimator", + "transformer", + } } diff --git a/aeon/utils/base/tests/test_identifier.py b/aeon/utils/base/tests/test_identifier.py index 8084492599..99eed77dad 100644 --- a/aeon/utils/base/tests/test_identifier.py +++ b/aeon/utils/base/tests/test_identifier.py @@ -34,7 +34,7 @@ def test_get_identifier(): assert ( get_identifier(MockAnomalyDetector) == get_identifier(MockAnomalyDetector()) - == "anomaly-detector" + == "series-anomaly-detector" ) assert ( get_identifier(MockSeriesTransformer) diff --git a/aeon/utils/tags/_tags.py b/aeon/utils/tags/_tags.py index 1e973b5a4f..7edb50f132 100644 --- a/aeon/utils/tags/_tags.py +++ b/aeon/utils/tags/_tags.py @@ -141,7 +141,6 @@ class : identifier for the base class of objects this tag applies to "class": [ "transformer", "anomaly-detector", - "collection-anomaly-detector", "segmenter", ], "type": "bool", diff --git a/aeon/utils/tags/tests/test_discovery.py b/aeon/utils/tags/tests/test_discovery.py index fd3d57fa16..cc9dca0e36 100644 --- a/aeon/utils/tags/tests/test_discovery.py +++ b/aeon/utils/tags/tests/test_discovery.py @@ -2,7 +2,7 @@ import pytest -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.classification import BaseClassifier from aeon.testing.mock_estimators import MockClassifier from aeon.testing.mock_estimators._mock_anomaly_detectors import MockAnomalyDetector @@ -42,8 +42,8 @@ def test_all_tags_for_estimator_anomaly_detection(): assert "capability:contractable" not in tags assert tags == all_tags_for_estimator(MockAnomalyDetector) - assert tags == all_tags_for_estimator(BaseAnomalyDetector) - assert tags == all_tags_for_estimator("anomaly-detector") + assert tags == all_tags_for_estimator(BaseSeriesAnomalyDetector) + assert tags == all_tags_for_estimator("series-anomaly-detector") tag_names = all_tags_for_estimator(MockAnomalyDetector(), names_only=True) assert isinstance(tag_names, list) diff --git a/aeon/utils/tests/test_discovery.py b/aeon/utils/tests/test_discovery.py index 9a0fcd6ee7..f71727320c 100644 --- a/aeon/utils/tests/test_discovery.py +++ b/aeon/utils/tests/test_discovery.py @@ -3,7 +3,7 @@ import pytest from sklearn.base import BaseEstimator -from aeon.anomaly_detection.base import BaseAnomalyDetector +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.base import BaseAeonEstimator from aeon.classification import BaseClassifier, DummyClassifier from aeon.clustering import BaseClusterer @@ -62,7 +62,7 @@ def test_all_estimators_by_type(item): [ [BaseTransformer, BaseClassifier], [BaseClassifier, "segmenter"], - [BaseClassifier, BaseAnomalyDetector, BaseClusterer], + [BaseClassifier, BaseSeriesAnomalyDetector, BaseClusterer], ], ) def test_all_estimators_by_multiple_types(input): diff --git a/docs/developer_guide/adding_typehints.md b/docs/developer_guide/adding_typehints.md index 5f77ce119b..ad0a9d5fa9 100644 --- a/docs/developer_guide/adding_typehints.md +++ b/docs/developer_guide/adding_typehints.md @@ -34,17 +34,23 @@ information. The `pyod` `BaseDetector` class can now be used in type hints with these additions. ```python +"""Adapter for PyOD models""" + from __future__ import annotations -from aeon.anomaly_detection.base import BaseAnomalyDetector +__maintainer__ = [] +__all__ = ["PyODAdapter"] + +from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from typing import TYPE_CHECKING if TYPE_CHECKING: from pyod.models.base import BaseDetector -class PyODAdapter(BaseAnomalyDetector): + +class PyODAdapter(BaseSeriesAnomalyDetector): def __init__( - self, pyod_model: BaseDetector, window_size: int = 10, stride: int = 1 + self, pyod_model: BaseDetector, window_size: int = 10, stride: int = 1 ): self.pyod_model = pyod_model self.window_size = window_size diff --git a/examples/anomaly_detection/anomaly_detection.ipynb b/examples/anomaly_detection/anomaly_detection.ipynb index 7afd00aff8..c9011d3871 100644 --- a/examples/anomaly_detection/anomaly_detection.ipynb +++ b/examples/anomaly_detection/anomaly_detection.ipynb @@ -185,7 +185,7 @@ "metadata": {}, "outputs": [], "source": [ - "from aeon.anomaly_detection.distance_based import STOMP\n", + "from aeon.anomaly_detection.series.distance_based import STOMP\n", "from aeon.benchmarking.metrics.anomaly_detection import range_roc_auc_score\n", "\n", "detector = STOMP(window_size=200)\n", @@ -203,21 +203,21 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "743fbbaa-a7d0-4f56-993a-07453f6a9442", "metadata": {}, + "cell_type": "code", "outputs": [], + "execution_count": null, "source": [ "from pyod.models.ocsvm import OCSVM\n", "\n", - "from aeon.anomaly_detection.outlier_detection import PyODAdapter\n", + "from aeon.anomaly_detection.series import PyODAdapter\n", "from aeon.benchmarking.metrics.anomaly_detection import range_roc_auc_score\n", "\n", "detector = PyODAdapter(OCSVM(), window_size=3)\n", "y_scores = detector.fit_predict(X, axis=0)\n", "range_roc_auc_score(y, y_scores)" - ] + ], + "id": "8c89d43f2e5476e6" }, { "cell_type": "markdown", From 57f3950f3001e8bce120978cf20db409d3b7e7e5 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 8 May 2025 10:07:19 +0100 Subject: [PATCH 12/16] smoothing refactor --- aeon/transformations/series/_dft.py | 59 +++---------- aeon/transformations/series/_exp_smoothing.py | 55 +++--------- aeon/transformations/series/_gauss.py | 43 +++------ .../transformations/series/_moving_average.py | 51 +++-------- aeon/transformations/series/_sg.py | 43 +++------ aeon/transformations/series/_siv.py | 53 +++-------- .../series/smoothing/__init__.py | 17 ++++ aeon/transformations/series/smoothing/_dfa.py | 84 ++++++++++++++++++ .../series/smoothing/_exp_smoothing.py | 84 ++++++++++++++++++ .../series/smoothing/_gauss.py | 67 ++++++++++++++ .../series/smoothing/_moving_average.py | 67 ++++++++++++++ aeon/transformations/series/smoothing/_rms.py | 87 +++++++++++++++++++ aeon/transformations/series/smoothing/_sg.py | 68 +++++++++++++++ .../series/smoothing/tests/__init__.py | 1 + .../series/{ => smoothing}/tests/test_dft.py | 12 +-- .../tests/test_exp_smoothing.py | 20 ++--- .../{ => smoothing}/tests/test_gauss.py | 22 +---- .../tests/test_moving_average.py | 21 ++--- .../tests/test_rms.py} | 20 +---- .../series/{ => smoothing}/tests/test_sg.py | 20 +---- 20 files changed, 582 insertions(+), 312 deletions(-) create mode 100644 aeon/transformations/series/smoothing/__init__.py create mode 100644 aeon/transformations/series/smoothing/_dfa.py create mode 100644 aeon/transformations/series/smoothing/_exp_smoothing.py create mode 100644 aeon/transformations/series/smoothing/_gauss.py create mode 100644 aeon/transformations/series/smoothing/_moving_average.py create mode 100644 aeon/transformations/series/smoothing/_rms.py create mode 100644 aeon/transformations/series/smoothing/_sg.py create mode 100644 aeon/transformations/series/smoothing/tests/__init__.py rename aeon/transformations/series/{ => smoothing}/tests/test_dft.py (73%) rename aeon/transformations/series/{ => smoothing}/tests/test_exp_smoothing.py (72%) rename aeon/transformations/series/{ => smoothing}/tests/test_gauss.py (60%) rename aeon/transformations/series/{ => smoothing}/tests/test_moving_average.py (54%) rename aeon/transformations/series/{tests/test_siv.py => smoothing/tests/test_rms.py} (64%) rename aeon/transformations/series/{ => smoothing}/tests/test_sg.py (64%) diff --git a/aeon/transformations/series/_dft.py b/aeon/transformations/series/_dft.py index 1f603c39af..24e0b27ab7 100644 --- a/aeon/transformations/series/_dft.py +++ b/aeon/transformations/series/_dft.py @@ -4,12 +4,20 @@ __all__ = ["DFTSeriesTransformer"] -import numpy as np +from deprecated.sphinx import deprecated -from aeon.transformations.series.base import BaseSeriesTransformer +from aeon.transformations.series.smoothing import DiscreteFourierApproximation -class DFTSeriesTransformer(BaseSeriesTransformer): +# TODO: Remove in v1.3.0 +@deprecated( + version="1.2.0", + reason="DFTSeriesTransformer is deprecated and will be removed in v1.3.0. " + "Please use DiscreteFourierApproximation from " + "transformations.series.smoothing instead.", + category=FutureWarning, +) +class DFTSeriesTransformer(DiscreteFourierApproximation): """Filter a times series using Discrete Fourier Approximation (DFT). Parameters @@ -42,47 +50,4 @@ class DFTSeriesTransformer(BaseSeriesTransformer): (2, 100) """ - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, r=0.5, sort=False): - self.r = r - self.sort = sort - super().__init__(axis=1) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - Parameters - ---------- - X : np.ndarray - time series in shape (n_channels, n_timepoints) - y : ignored argument for interface compatibility - - Returns - ------- - transformed version of X - """ - # Compute DFT - dft = np.fft.fft(X) - - # Mask array of terms to keep and number of terms to keep - mask = np.zeros_like(dft, dtype=bool) - keep = max(int(self.r * dft.shape[1]), 1) - - # If sort is set, sort the indices by the decreasing dft amplitude - if self.sort: - sorted_indices = np.argsort(np.abs(dft))[:, ::-1] - for i in range(dft.shape[0]): - mask[i, sorted_indices[i, 0:keep]] = True - # Else, keep the first terms - else: - mask[:, 0:keep] = True - - # Invert DFT with masked terms - X_ = np.fft.ifft(dft * mask).real - - return X_ + ... diff --git a/aeon/transformations/series/_exp_smoothing.py b/aeon/transformations/series/_exp_smoothing.py index 5566769463..08217a9d05 100644 --- a/aeon/transformations/series/_exp_smoothing.py +++ b/aeon/transformations/series/_exp_smoothing.py @@ -3,14 +3,21 @@ __maintainer__ = ["Datadote"] __all__ = ["ExpSmoothingSeriesTransformer"] -from typing import Union -import numpy as np +from deprecated.sphinx import deprecated -from aeon.transformations.series.base import BaseSeriesTransformer +from aeon.transformations.series.smoothing import ExponentialSmoothing -class ExpSmoothingSeriesTransformer(BaseSeriesTransformer): +# TODO: Remove in v1.3.0 +@deprecated( + version="1.2.0", + reason="ExpSmoothingSeriesTransformer is deprecated and will be removed in v1.3.0. " + "Please use ExponentialSmoothing from " + "transformations.series.smoothing instead.", + category=FutureWarning, +) +class ExpSmoothingSeriesTransformer(ExponentialSmoothing): """Filter a time series using exponential smoothing. - Exponential smoothing (EXP) is a generalisaton of moving average smoothing that @@ -54,42 +61,4 @@ class ExpSmoothingSeriesTransformer(BaseSeriesTransformer): [10. 9.5 8.75 7.875]] """ - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__( - self, alpha: float = 0.2, window_size: Union[int, float, None] = None - ) -> None: - if not 0 <= alpha <= 1: - raise ValueError(f"alpha must be in range [0, 1], got {alpha}") - if window_size is not None and window_size <= 0: - raise ValueError(f"window_size must be > 0, got {window_size}") - super().__init__(axis=1) - self.alpha = alpha if window_size is None else 2.0 / (window_size + 1) - self.window_size = window_size - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - private _transform containing core logic, called from transform - - Parameters - ---------- - X : np.ndarray - Data to be transformed - y : ignored argument for interface compatibility - Additional data, e.g., labels for transformation - - Returns - ------- - Xt: 2D np.ndarray - transformed version of X - """ - Xt = np.zeros_like(X, dtype="float") - Xt[:, 0] = X[:, 0] - for i in range(1, Xt.shape[1]): - Xt[:, i] = self.alpha * X[:, i] + (1 - self.alpha) * Xt[:, i - 1] - return Xt + ... diff --git a/aeon/transformations/series/_gauss.py b/aeon/transformations/series/_gauss.py index 863d8cf6b9..959cddac6e 100644 --- a/aeon/transformations/series/_gauss.py +++ b/aeon/transformations/series/_gauss.py @@ -4,12 +4,20 @@ __all__ = ["GaussSeriesTransformer"] -from scipy.ndimage import gaussian_filter1d +from deprecated.sphinx import deprecated -from aeon.transformations.series.base import BaseSeriesTransformer +from aeon.transformations.series.smoothing import GaussianFilter -class GaussSeriesTransformer(BaseSeriesTransformer): +# TODO: Remove in v1.3.0 +@deprecated( + version="1.2.0", + reason="GaussSeriesTransformer is deprecated and will be removed in v1.3.0. " + "Please use GaussianFilter from " + "transformations.series.smoothing instead.", + category=FutureWarning, +) +class GaussSeriesTransformer(GaussianFilter): """Filter a times series using Gaussian filter. Parameters @@ -45,31 +53,4 @@ class GaussSeriesTransformer(BaseSeriesTransformer): (2, 100) """ - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, sigma=1, order=0): - self.sigma = sigma - self.order = order - super().__init__(axis=1) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - Parameters - ---------- - X : np.ndarray - time series in shape (n_channels, n_timepoints) - y : ignored argument for interface compatibility - - Returns - ------- - transformed version of X - """ - # Compute Gaussian filter - X_ = gaussian_filter1d(X, self.sigma, self.axis, self.order) - - return X_ + ... diff --git a/aeon/transformations/series/_moving_average.py b/aeon/transformations/series/_moving_average.py index 7e9993946e..61fb15e834 100644 --- a/aeon/transformations/series/_moving_average.py +++ b/aeon/transformations/series/_moving_average.py @@ -3,12 +3,21 @@ __maintainer__ = ["Datadote"] __all__ = ["MovingAverageSeriesTransformer"] -import numpy as np -from aeon.transformations.series.base import BaseSeriesTransformer +from deprecated.sphinx import deprecated +from aeon.transformations.series.smoothing import MovingAverage -class MovingAverageSeriesTransformer(BaseSeriesTransformer): + +# TODO: Remove in v1.3.0 +@deprecated( + version="1.2.0", + reason="MovingAverageSeriesTransformer is deprecated and will be removed in " + "v1.3.0. Please use MovingAverage from " + "transformations.series.smoothing instead.", + category=FutureWarning, +) +class MovingAverageSeriesTransformer(MovingAverage): """Calculate the moving average of an array of numbers. Slides a window across the input array, and returns the averages for each window. @@ -41,38 +50,4 @@ class MovingAverageSeriesTransformer(BaseSeriesTransformer): [[-2.5 -1.5 -0.5 0.5 1.5 2.5]] """ - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, window_size: int = 5) -> None: - super().__init__(axis=0) - if window_size <= 0: - raise ValueError(f"window_size must be > 0, got {window_size}") - self.window_size = window_size - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - private _transform containing core logic, called from transform - - Parameters - ---------- - X : np.ndarray - Data to be transformed - y : ignored argument for interface compatibility - Additional data, e.g., labels for transformation - - Returns - ------- - Xt: 2D np.ndarray - transformed version of X - """ - csum = np.cumsum(X, axis=0) - csum[self.window_size :, :] = ( - csum[self.window_size :, :] - csum[: -self.window_size, :] - ) - Xt = csum[self.window_size - 1 :, :] / self.window_size - return Xt + ... diff --git a/aeon/transformations/series/_sg.py b/aeon/transformations/series/_sg.py index 19000cc0e8..0fc1de3984 100644 --- a/aeon/transformations/series/_sg.py +++ b/aeon/transformations/series/_sg.py @@ -4,12 +4,20 @@ __all__ = ["SGSeriesTransformer"] -from scipy.signal import savgol_filter +from deprecated.sphinx import deprecated -from aeon.transformations.series.base import BaseSeriesTransformer +from aeon.transformations.series.smoothing import SavitzkyGolayFilter -class SGSeriesTransformer(BaseSeriesTransformer): +# TODO: Remove in v1.3.0 +@deprecated( + version="1.2.0", + reason="SGSeriesTransformer is deprecated and will be removed in v1.3.0. " + "Please use SavitzkyGolayFilter from " + "transformations.series.smoothing instead.", + category=FutureWarning, +) +class SGSeriesTransformer(SavitzkyGolayFilter): """Filter a times series using Savitzky-Golay (SG). Parameters @@ -45,31 +53,4 @@ class SGSeriesTransformer(BaseSeriesTransformer): (2, 100) """ - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, window_length=5, polyorder=2): - self.window_length = window_length - self.polyorder = polyorder - super().__init__(axis=1) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - Parameters - ---------- - X : np.ndarray - time series in shape (n_channels, n_timepoints) - y : ignored argument for interface compatibility - - Returns - ------- - transformed version of X - """ - # Compute SG - X_ = savgol_filter(X, self.window_length, self.polyorder) - - return X_ + ... diff --git a/aeon/transformations/series/_siv.py b/aeon/transformations/series/_siv.py index 1bb2ad3e0a..b6eeb5e591 100644 --- a/aeon/transformations/series/_siv.py +++ b/aeon/transformations/series/_siv.py @@ -4,13 +4,20 @@ __all__ = ["SIVSeriesTransformer"] -import numpy as np -from scipy.ndimage import median_filter +from deprecated.sphinx import deprecated -from aeon.transformations.series.base import BaseSeriesTransformer +from aeon.transformations.series.smoothing import RecursiveMedianSieve -class SIVSeriesTransformer(BaseSeriesTransformer): +# TODO: Remove in v1.3.0 +@deprecated( + version="1.2.0", + reason="SIVSeriesTransformer is deprecated and will be removed in v1.3.0. " + "Please use RecursiveMedianSieve from " + "transformations.series.smoothing instead.", + category=FutureWarning, +) +class SIVSeriesTransformer(RecursiveMedianSieve): """Filter a times series using Recursive Median Sieve (SIV). Parameters @@ -48,40 +55,4 @@ class SIVSeriesTransformer(BaseSeriesTransformer): (2, 100) """ - _tags = { - "capability:multivariate": True, - "X_inner_type": "np.ndarray", - "fit_is_empty": True, - } - - def __init__(self, window_length=None): - self.window_length = window_length - super().__init__(axis=1) - - def _transform(self, X, y=None): - """Transform X and return a transformed version. - - Parameters - ---------- - X : np.ndarray - time series in shape (n_channels, n_timepoints) - y : ignored argument for interface compatibility - - Returns - ------- - transformed version of X - """ - window_length = self.window_length - if window_length is None: - window_length = [3, 5, 7] - if not isinstance(window_length, list): - window_length = [window_length] - - # Compute SIV - X_ = X - - for w in window_length: - footprint = np.ones((1, w)) - X_ = median_filter(X_, footprint=footprint) - - return X_ + ... diff --git a/aeon/transformations/series/smoothing/__init__.py b/aeon/transformations/series/smoothing/__init__.py new file mode 100644 index 0000000000..52ebcc3c8e --- /dev/null +++ b/aeon/transformations/series/smoothing/__init__.py @@ -0,0 +1,17 @@ +"""Series smoothing transformers.""" + +__all__ = [ + "DiscreteFourierApproximation", + "ExponentialSmoothing", + "GaussianFilter", + "MovingAverage", + "SavitzkyGolayFilter", + "RecursiveMedianSieve", +] + +from aeon.transformations.series.smoothing._dfa import DiscreteFourierApproximation +from aeon.transformations.series.smoothing._exp_smoothing import ExponentialSmoothing +from aeon.transformations.series.smoothing._gauss import GaussianFilter +from aeon.transformations.series.smoothing._moving_average import MovingAverage +from aeon.transformations.series.smoothing._rms import RecursiveMedianSieve +from aeon.transformations.series.smoothing._sg import SavitzkyGolayFilter diff --git a/aeon/transformations/series/smoothing/_dfa.py b/aeon/transformations/series/smoothing/_dfa.py new file mode 100644 index 0000000000..a5e017ac66 --- /dev/null +++ b/aeon/transformations/series/smoothing/_dfa.py @@ -0,0 +1,84 @@ +"""Discrete Fourier Approximation filter transformation for smoothing.""" + +__maintainer__ = ["Cyril-Meyer"] +__all__ = ["DiscreteFourierApproximation"] + + +import numpy as np + +from aeon.transformations.series.base import BaseSeriesTransformer + + +class DiscreteFourierApproximation(BaseSeriesTransformer): + """Filter a times series using a Discrete Fourier Approximation. + + Smooths the series by first transforming into the frequency domain, discarding + the high frequency terms, then transforming back to the time domain. + + Parameters + ---------- + r : float, default=0.5 + Proportion of Fourier terms to retain [0, 1] + sort : bool, default=False + Sort the Fourier terms by amplitude to keep most important terms + + References + ---------- + .. [1] Cooley, J., Lewis, P., Welch, P.: The fast fourier transform and its + applications. IEEE Trans. Educ. 12(1), 27–34 (1969) + + Examples + -------- + >>> import numpy as np + >>> from aeon.transformations.series.smoothing import DiscreteFourierApproximation + >>> X = np.random.random((2, 100)) # Random series length 100 + >>> dft = DiscreteFourierApproximation() + >>> X_ = dft.fit_transform(X) + >>> X_.shape + (2, 100) + """ + + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, r=0.5, sort=False): + self.r = r + self.sort = sort + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + Parameters + ---------- + X : np.ndarray + time series in shape (n_channels, n_timepoints) + y : ignored argument for interface compatibility + + Returns + ------- + transformed version of X + """ + # Compute DFT + dft = np.fft.fft(X) + + # Mask array of terms to keep and number of terms to keep + mask = np.zeros_like(dft, dtype=bool) + keep = max(int(self.r * dft.shape[1]), 1) + + # If sort is set, sort the indices by the decreasing dft amplitude + if self.sort: + sorted_indices = np.argsort(np.abs(dft))[:, ::-1] + for i in range(dft.shape[0]): + mask[i, sorted_indices[i, 0:keep]] = True + # Else, keep the first terms + else: + mask[:, 0:keep] = True + + # Invert DFT with masked terms + X_ = np.fft.ifft(dft * mask).real + + return X_ diff --git a/aeon/transformations/series/smoothing/_exp_smoothing.py b/aeon/transformations/series/smoothing/_exp_smoothing.py new file mode 100644 index 0000000000..cd70138fa8 --- /dev/null +++ b/aeon/transformations/series/smoothing/_exp_smoothing.py @@ -0,0 +1,84 @@ +"""Exponential smoothing transformation.""" + +__maintainer__ = ["Datadote"] +__all__ = ["ExponentialSmoothing"] + +from typing import Union + +import numpy as np + +from aeon.transformations.series.base import BaseSeriesTransformer + + +class ExponentialSmoothing(BaseSeriesTransformer): + """Filter a time series using exponential smoothing. + + - Exponential smoothing (EXP) is a generalisaton of moving average smoothing that + assigns a decaying weight to each element rather than averaging over a window. + - Assume time series T = [t_0, ..., t_j], and smoothed values S = [s_0, ..., s_j] + - Then, s_0 = t_0 and s_j = alpha * t_j + (1 - alpha) * s_j-1 + where 0 ≤ alpha ≤ 1. If window_size is given, alpha is overwritten, and set as + alpha = 2. / (window_size + 1) + + Parameters + ---------- + alpha: float, default=0.2 + decaying weight. Range [0, 1]. Overwritten by window_size if window_size exists + window_size: int or float or None, default=None + If window_size is specified, alpha is set to 2. / (window_size + 1) + + Examples + -------- + >>> import numpy as np + >>> from aeon.transformations.series.smoothing import ExponentialSmoothing + >>> X = np.array([-2, -1, 0, 1, 2]) + >>> transformer = ExponentialSmoothing(0.5) + >>> transformer.fit_transform(X) + [[-2. -1.5 -0.75 0.125 1.0625]] + >>> X = np.array([[1, 2, 3, 4], [10, 9, 8, 7]]) + >>> transformer.fit_transform(X) + [[ 1. 1.5 2.25 3.125] + [10. 9.5 8.75 7.875]] + """ + + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__( + self, alpha: float = 0.2, window_size: Union[int, float, None] = None + ) -> None: + self.alpha = alpha if window_size is None else 2.0 / (window_size + 1) + self.window_size = window_size + + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + private _transform containing core logic, called from transform + + Parameters + ---------- + X : np.ndarray + Data to be transformed + y : ignored argument for interface compatibility + Additional data, e.g., labels for transformation + + Returns + ------- + Xt: 2D np.ndarray + transformed version of X + """ + if not 0 <= self.alpha <= 1: + raise ValueError(f"alpha must be in range [0, 1], got {self.alpha}") + if self.window_size is not None and self.window_size <= 0: + raise ValueError(f"window_size must be > 0, got {self.window_size}") + + Xt = np.zeros_like(X, dtype="float") + Xt[:, 0] = X[:, 0] + for i in range(1, Xt.shape[1]): + Xt[:, i] = self.alpha * X[:, i] + (1 - self.alpha) * Xt[:, i - 1] + return Xt diff --git a/aeon/transformations/series/smoothing/_gauss.py b/aeon/transformations/series/smoothing/_gauss.py new file mode 100644 index 0000000000..7946f8d634 --- /dev/null +++ b/aeon/transformations/series/smoothing/_gauss.py @@ -0,0 +1,67 @@ +"""Gaussian filter transformation.""" + +__maintainer__ = ["Cyril-Meyer"] +__all__ = ["GaussianFilter"] + + +from scipy.ndimage import gaussian_filter1d + +from aeon.transformations.series.base import BaseSeriesTransformer + + +class GaussianFilter(BaseSeriesTransformer): + """Filter a time series using Gaussian filter. + + Wrapper for the SciPy ``gaussian_filter1d`` function. + + Parameters + ---------- + sigma : float, default=1 + Standard deviation for the Gaussian kernel. + order : int, default=0 + An order of 0 corresponds to convolution with a Gaussian kernel. + A positive order corresponds to convolution with that derivative of a + Gaussian. + + References + ---------- + .. [1] Chou, Y. L. "Statistical Analysis, Section 17.9." New York: Holt + International (1975). + + Examples + -------- + >>> import numpy as np + >>> from aeon.transformations.series.smoothing import GaussianFilter + >>> X = np.random.random((2, 100)) # Random series length 100 + >>> gauss = GaussianFilter(sigma=5) + >>> X_ = gauss.fit_transform(X) + >>> X_.shape + (2, 100) + """ + + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, sigma=1, order=0): + self.sigma = sigma + self.order = order + + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + Parameters + ---------- + X : np.ndarray + time series in shape (n_channels, n_timepoints) + y : ignored argument for interface compatibility + + Returns + ------- + transformed version of X + """ + return gaussian_filter1d(X, self.sigma, axis=self.axis, order=self.order) diff --git a/aeon/transformations/series/smoothing/_moving_average.py b/aeon/transformations/series/smoothing/_moving_average.py new file mode 100644 index 0000000000..c4b806e6c7 --- /dev/null +++ b/aeon/transformations/series/smoothing/_moving_average.py @@ -0,0 +1,67 @@ +"""Moving average transformation.""" + +__maintainer__ = ["Datadote"] +__all__ = ["MovingAverage"] + +import numpy as np + +from aeon.transformations.series.base import BaseSeriesTransformer + + +class MovingAverage(BaseSeriesTransformer): + """Calculate the moving average for a time series. + + Slides a window across the input array, and returns the averages for each window. + + Parameters + ---------- + window_size: int, default=5 + Number of values to average for each window. + + Examples + -------- + >>> import numpy as np + >>> from aeon.transformations.series.smoothing import MovingAverage + >>> X = np.array([-3, -2, -1, 0, 1, 2, 3]) + >>> transformer = MovingAverage(2) + >>> transformer.fit_transform(X) + [[-2.5 -1.5 -0.5 0.5 1.5 2.5]] + """ + + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, window_size: int = 5) -> None: + self.window_size = window_size + + super().__init__(axis=0) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + private _transform containing core logic, called from transform + + Parameters + ---------- + X : np.ndarray + Data to be transformed + y : ignored argument for interface compatibility + Additional data, e.g., labels for transformation + + Returns + ------- + Xt: 2D np.ndarray + transformed version of X + """ + if self.window_size <= 0: + raise ValueError(f"window_size must be > 0, got {self.window_size}") + + csum = np.cumsum(X, axis=0) + csum[self.window_size :, :] = ( + csum[self.window_size :, :] - csum[: -self.window_size, :] + ) + Xt = csum[self.window_size - 1 :, :] / self.window_size + return Xt diff --git a/aeon/transformations/series/smoothing/_rms.py b/aeon/transformations/series/smoothing/_rms.py new file mode 100644 index 0000000000..9b9b45becb --- /dev/null +++ b/aeon/transformations/series/smoothing/_rms.py @@ -0,0 +1,87 @@ +"""Recursive Median Sieve filter transformation.""" + +__maintainer__ = ["Cyril-Meyer"] +__all__ = ["RecursiveMedianSieve"] + + +import numpy as np +from scipy.ndimage import median_filter + +from aeon.transformations.series.base import BaseSeriesTransformer + + +class RecursiveMedianSieve(BaseSeriesTransformer): + """Filter a times series using a Recursive Median Sieve. + + Parameters + ---------- + window_length : list of int or int, default=None + The filter windows lengths (recommended increasing value). + If None, defaults to [3, 5, 7]. + + Notes + ----- + Use scipy.ndimage.median_filter instead of scipy.signal.medfilt : + The more general function scipy.ndimage.median_filter has a more efficient + implementation of a median filter and therefore runs much faster. + https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.medfilt.html + + References + ---------- + .. [1] Bangham J. A. (1988). + Data-sieving hydrophobicity plots. + Analytical biochemistry, 174(1), 142–145. + https://doi.org/10.1016/0003-2697(88)90528-3 + .. [2] Yli-Harja, O., Koivisto, P., Bangham, J. A., Cawley, G., + Harvey, R., & Shmulevich, I. (2001). + Simplified implementation of the recursive median sieve. + Signal Process., 81(7), 1565–1570. + https://doi.org/10.1016/S0165-1684(01)00054-8 + + Examples + -------- + >>> import numpy as np + >>> from aeon.transformations.series.smoothing import RecursiveMedianSieve + >>> X = np.random.random((2, 100)) # Random series length 100 + >>> siv = RecursiveMedianSieve() + >>> X_ = siv.fit_transform(X) + >>> X_.shape + (2, 100) + """ + + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, window_length=None): + self.window_length = window_length + + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + Parameters + ---------- + X : np.ndarray + time series in shape (n_channels, n_timepoints) + y : ignored argument for interface compatibility + + Returns + ------- + transformed version of X + """ + window_length = self.window_length + if window_length is None: + window_length = [3, 5, 7] + if not isinstance(window_length, list): + window_length = [window_length] + + X_ = X + for w in window_length: + footprint = np.ones((1, w)) + X_ = median_filter(X_, footprint=footprint) + + return X_ diff --git a/aeon/transformations/series/smoothing/_sg.py b/aeon/transformations/series/smoothing/_sg.py new file mode 100644 index 0000000000..750f5c5701 --- /dev/null +++ b/aeon/transformations/series/smoothing/_sg.py @@ -0,0 +1,68 @@ +"""Savitzky-Golay filter transformation.""" + +__maintainer__ = ["Cyril-Meyer"] +__all__ = ["SavitzkyGolayFilter"] + + +from scipy.signal import savgol_filter + +from aeon.transformations.series.base import BaseSeriesTransformer + + +class SavitzkyGolayFilter(BaseSeriesTransformer): + """Filter a times series using Savitzky-Golay (SG). + + Wrapper for the SciPy ``savgol_filter`` function. + + Parameters + ---------- + window_length : int, default=5 + The length of the filter window (i.e., the number of coefficients). + window_length must be less than or equal to the size of the input. + polyorder : int, default=2 + The order of the polynomial used to fit the samples. + polyorder must be less than window_length. + + References + ---------- + .. [1] Savitzky, A., & Golay, M. J. (1964). + Smoothing and differentiation of data by simplified least squares procedures. + Analytical chemistry, 36(8), 1627-1639. + + Examples + -------- + >>> import numpy as np + >>> from aeon.transformations.series.smoothing import SavitzkyGolayFilter + >>> X = np.random.random((2, 100)) # Random series length 100 + >>> sg = SavitzkyGolayFilter() + >>> X_ = sg.fit_transform(X) + >>> X_.shape + (2, 100) + """ + + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, window_length=5, polyorder=2): + self.window_length = window_length + self.polyorder = polyorder + + super().__init__(axis=1) + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + Parameters + ---------- + X : np.ndarray + time series in shape (n_channels, n_timepoints) + y : ignored argument for interface compatibility + + Returns + ------- + transformed version of X + """ + return savgol_filter(X, self.window_length, self.polyorder) diff --git a/aeon/transformations/series/smoothing/tests/__init__.py b/aeon/transformations/series/smoothing/tests/__init__.py new file mode 100644 index 0000000000..5882e7a73b --- /dev/null +++ b/aeon/transformations/series/smoothing/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for series smoothing transformations.""" diff --git a/aeon/transformations/series/tests/test_dft.py b/aeon/transformations/series/smoothing/tests/test_dft.py similarity index 73% rename from aeon/transformations/series/tests/test_dft.py rename to aeon/transformations/series/smoothing/tests/test_dft.py index 6ffe04b78c..62cd7147a4 100644 --- a/aeon/transformations/series/tests/test_dft.py +++ b/aeon/transformations/series/smoothing/tests/test_dft.py @@ -1,12 +1,12 @@ """Tests for DFT transformation.""" -__maintainer__ = [] - import numpy as np import pytest +from aeon.transformations.series.smoothing._dfa import DiscreteFourierApproximation + -@pytest.mark.parametrize("r", [0.00, 0.25, 0.50, 0.75, 1.00]) +@pytest.mark.parametrize("r", [0.00, 0.50, 1.00]) @pytest.mark.parametrize("sort", [True, False]) def test_dft(r, sort): """Test the functionality of DFT transformation.""" @@ -23,15 +23,11 @@ def test_dft(r, sort): + 0.1 * np.sin(2 * np.pi * 8 * t) ) x12 = np.array([x1, x2]) - x12r = x12 + np.random.random((2, n_samples)) * 0.25 - - from aeon.transformations.series._dft import DFTSeriesTransformer - dft = DFTSeriesTransformer(r=r, sort=sort) + dft = DiscreteFourierApproximation(r=r, sort=sort) x_1 = dft.fit_transform(x1) x_2 = dft.fit_transform(x2) x_12 = dft.fit_transform(x12) - dft.fit_transform(x12r) np.testing.assert_almost_equal(x_1[0], x_12[0], decimal=4) np.testing.assert_almost_equal(x_2[0], x_12[1], decimal=4) diff --git a/aeon/transformations/series/tests/test_exp_smoothing.py b/aeon/transformations/series/smoothing/tests/test_exp_smoothing.py similarity index 72% rename from aeon/transformations/series/tests/test_exp_smoothing.py rename to aeon/transformations/series/smoothing/tests/test_exp_smoothing.py index cb83a4e7da..c441104e8c 100644 --- a/aeon/transformations/series/tests/test_exp_smoothing.py +++ b/aeon/transformations/series/smoothing/tests/test_exp_smoothing.py @@ -1,11 +1,9 @@ -"""Tests for ExpSmoothingSeriesTransformer.""" - -__maintainer__ = ["Datadote"] +"""Tests for ExponentialSmoothing.""" import numpy as np import pytest -from aeon.transformations.series._exp_smoothing import ExpSmoothingSeriesTransformer +from aeon.transformations.series.smoothing import ExponentialSmoothing TEST_DATA = [np.array([-2, -1, 0, 1, 2]), np.array([[1, 2, 3, 4], [10, 9, 8, 7]])] EXPECTED_RESULTS = [ @@ -16,7 +14,7 @@ def test_input_1d_array(): """Test inputs of dimension 1.""" - transformer = ExpSmoothingSeriesTransformer(0.5) + transformer = ExponentialSmoothing(0.5) idx_data = 0 Xt = transformer.fit_transform(TEST_DATA[idx_data]) np.testing.assert_almost_equal(Xt, EXPECTED_RESULTS[idx_data], decimal=5) @@ -24,7 +22,7 @@ def test_input_1d_array(): def test_input_2d_array(): """Test inputs of dimension 2.""" - transformer = ExpSmoothingSeriesTransformer(0.5) + transformer = ExponentialSmoothing(0.5) idx_data = 1 Xt = transformer.fit_transform(TEST_DATA[idx_data]) np.testing.assert_almost_equal(Xt, EXPECTED_RESULTS[idx_data], decimal=5) @@ -34,8 +32,8 @@ def test_input_2d_array(): def test_window_size_matches_alpha(alpha_window): """Check same output results using equivalent alpha and window_size.""" alpha, window_size = alpha_window - transformer1 = ExpSmoothingSeriesTransformer(alpha=alpha) - transformer2 = ExpSmoothingSeriesTransformer(window_size=window_size) + transformer1 = ExponentialSmoothing(alpha=alpha) + transformer2 = ExponentialSmoothing(window_size=window_size) for i in range(len(TEST_DATA)): Xt1 = transformer1.fit_transform(TEST_DATA[i]) Xt2 = transformer2.fit_transform(TEST_DATA[i]) @@ -45,16 +43,16 @@ def test_window_size_matches_alpha(alpha_window): def test_alpha_less_than_zero(): """Test alpha less than zero.""" with pytest.raises(ValueError): - ExpSmoothingSeriesTransformer(-0.5) + ExponentialSmoothing(-0.5) def test_alpha_greater_than_one(): """Test alpha greater than one.""" with pytest.raises(ValueError): - ExpSmoothingSeriesTransformer(2.0) + ExponentialSmoothing(2.0) def test_window_size_than_one(): """Test window_size < 0.""" with pytest.raises(ValueError): - ExpSmoothingSeriesTransformer(window_size=0) + ExponentialSmoothing(window_size=0) diff --git a/aeon/transformations/series/tests/test_gauss.py b/aeon/transformations/series/smoothing/tests/test_gauss.py similarity index 60% rename from aeon/transformations/series/tests/test_gauss.py rename to aeon/transformations/series/smoothing/tests/test_gauss.py index 6ab65ac107..52a8ae3ee8 100644 --- a/aeon/transformations/series/tests/test_gauss.py +++ b/aeon/transformations/series/smoothing/tests/test_gauss.py @@ -1,12 +1,12 @@ """Tests for Gauss transformation.""" -__maintainer__ = [] - import numpy as np import pytest +from aeon.transformations.series.smoothing import GaussianFilter + -@pytest.mark.parametrize("sigma", [0.1, 0.5, 1, 2, 5, 10]) +@pytest.mark.parametrize("sigma", [0.1, 1, 10]) @pytest.mark.parametrize("order", [0, 1, 2]) def test_gauss(sigma, order): """Test the functionality of Gauss transformation.""" @@ -23,25 +23,11 @@ def test_gauss(sigma, order): + 0.1 * np.sin(2 * np.pi * 8 * t) ) x12 = np.array([x1, x2]) - x12r = x12 + np.random.random((2, n_samples)) * 0.25 - from aeon.transformations.series._gauss import GaussSeriesTransformer - - sg = GaussSeriesTransformer(sigma=sigma, order=order) + sg = GaussianFilter(sigma=sigma, order=order) x_1 = sg.fit_transform(x1) x_2 = sg.fit_transform(x2) x_12 = sg.fit_transform(x12) - x_12_r = sg.fit_transform(x12r) - - """ - # Visualize smoothing - import matplotlib.pyplot as plt - plt.plot(x12r[0]) - plt.plot(x_12_r[0]) - plt.savefig(fname=f'Gauss_{sigma}_{order}.png') - plt.clf() - """ np.testing.assert_almost_equal(x_1[0], x_12[0], decimal=4) np.testing.assert_almost_equal(x_2[0], x_12[1], decimal=4) - assert x_12.shape == x_12_r.shape diff --git a/aeon/transformations/series/tests/test_moving_average.py b/aeon/transformations/series/smoothing/tests/test_moving_average.py similarity index 54% rename from aeon/transformations/series/tests/test_moving_average.py rename to aeon/transformations/series/smoothing/tests/test_moving_average.py index bfbf3a71bd..6fa1e55a74 100644 --- a/aeon/transformations/series/tests/test_moving_average.py +++ b/aeon/transformations/series/smoothing/tests/test_moving_average.py @@ -1,26 +1,27 @@ """Tests for MovingAverageTransformer.""" -__maintainer__ = ["Datadote"] - import numpy as np import pytest -from aeon.transformations.series._moving_average import MovingAverageSeriesTransformer +from aeon.transformations.series.smoothing import MovingAverage -TEST_DATA = [np.array([-3, -2, -1, 0, 1, 2, 3]), np.array([[-3, -2, -1, 0, 1, 2, 3]])] +TEST_DATA = [ + np.array([-3, -2, -1, 0, 1, 2, 3]), + np.array([[-3, -2, -1, 0, 1, 2, 3], [3, 2, 1, 0, -1, -2, -3]]), +] EXPECTED_RESULTS = [ np.array([[-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]]), - np.array([[-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]]), + np.array([[-2.5, -1.5, -0.5, 0.5, 1.5, 2.5], [2.5, 1.5, 0.5, -0.5, -1.5, -2.5]]), ] def test_window_size_greater_than_zero(): """Test window sizes > 0.""" - ma = MovingAverageSeriesTransformer(window_size=1) + ma = MovingAverage(window_size=1) xt = ma.fit_transform(TEST_DATA[0]) - np.testing.assert_array_almost_equal(xt, xt, decimal=2) + np.testing.assert_array_almost_equal(xt, TEST_DATA[0], decimal=2) - ma = MovingAverageSeriesTransformer(window_size=2) + ma = MovingAverage(window_size=2) for i in range(len(TEST_DATA)): xt = ma.fit_transform(TEST_DATA[i]) np.testing.assert_array_almost_equal(xt, EXPECTED_RESULTS[i], decimal=2) @@ -29,10 +30,10 @@ def test_window_size_greater_than_zero(): def test_window_size_equal_zero(): """Test window size == 0.""" with pytest.raises(ValueError): - MovingAverageSeriesTransformer(window_size=0) + MovingAverage(window_size=0) def test_window_size_less_than_zero(): """Test window sizes < 0.""" with pytest.raises(ValueError): - MovingAverageSeriesTransformer(window_size=-1) + MovingAverage(window_size=-1) diff --git a/aeon/transformations/series/tests/test_siv.py b/aeon/transformations/series/smoothing/tests/test_rms.py similarity index 64% rename from aeon/transformations/series/tests/test_siv.py rename to aeon/transformations/series/smoothing/tests/test_rms.py index c8042e9c5b..a99a02981e 100644 --- a/aeon/transformations/series/tests/test_siv.py +++ b/aeon/transformations/series/smoothing/tests/test_rms.py @@ -1,10 +1,10 @@ """Tests for SIV transformation.""" -__maintainer__ = [] - import numpy as np import pytest +from aeon.transformations.series.smoothing import RecursiveMedianSieve + @pytest.mark.parametrize( "window_length", [1, 2, 3, 5, 7, 10, 11, [2, 3], [3, 5], [3, 5, 7], [3, 5, 7, 11]] @@ -24,25 +24,11 @@ def test_siv(window_length): + 0.1 * np.sin(2 * np.pi * 8 * t) ) x12 = np.array([x1, x2]) - x12r = x12 + np.random.random((2, n_samples)) * 0.25 - from aeon.transformations.series._siv import SIVSeriesTransformer - - siv = SIVSeriesTransformer(window_length=window_length) + siv = RecursiveMedianSieve(window_length=window_length) x_1 = siv.fit_transform(x1) x_2 = siv.fit_transform(x2) x_12 = siv.fit_transform(x12) - x_12_r = siv.fit_transform(x12r) - - """ - # Visualize smoothing - import matplotlib.pyplot as plt - plt.plot(x12r[0]) - plt.plot(x_12_r[0]) - plt.savefig(fname=f'SIV_{window_length}.png') - plt.clf() - """ np.testing.assert_almost_equal(x_1[0], x_12[0], decimal=4) np.testing.assert_almost_equal(x_2[0], x_12[1], decimal=4) - assert x_12.shape == x_12_r.shape diff --git a/aeon/transformations/series/tests/test_sg.py b/aeon/transformations/series/smoothing/tests/test_sg.py similarity index 64% rename from aeon/transformations/series/tests/test_sg.py rename to aeon/transformations/series/smoothing/tests/test_sg.py index 7df2970086..75604fb196 100644 --- a/aeon/transformations/series/tests/test_sg.py +++ b/aeon/transformations/series/smoothing/tests/test_sg.py @@ -1,10 +1,10 @@ """Tests for SG transformation.""" -__maintainer__ = [] - import numpy as np import pytest +from aeon.transformations.series.smoothing import SavitzkyGolayFilter + @pytest.mark.parametrize("window_length", [5, 9, 17]) @pytest.mark.parametrize("polyorder", [2, 3, 4]) @@ -23,25 +23,11 @@ def test_sg(window_length, polyorder): + 0.1 * np.sin(2 * np.pi * 8 * t) ) x12 = np.array([x1, x2]) - x12r = x12 + np.random.random((2, n_samples)) * 0.25 - from aeon.transformations.series._sg import SGSeriesTransformer - - sg = SGSeriesTransformer(window_length=window_length, polyorder=polyorder) + sg = SavitzkyGolayFilter(window_length=window_length, polyorder=polyorder) x_1 = sg.fit_transform(x1) x_2 = sg.fit_transform(x2) x_12 = sg.fit_transform(x12) - x_12_r = sg.fit_transform(x12r) - - """ - # Visualize smoothing - import matplotlib.pyplot as plt - plt.plot(x12r[0]) - plt.plot(x_12_r[0]) - plt.savefig(fname=f'SG_{window_length}_{polyorder}.png') - plt.clf() - """ np.testing.assert_almost_equal(x_1[0], x_12[0], decimal=4) np.testing.assert_almost_equal(x_2[0], x_12[1], decimal=4) - assert x_12.shape == x_12_r.shape From d20fd027a9945f0a7212f257dfd07ba08e1b9f5a Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 8 May 2025 10:37:25 +0100 Subject: [PATCH 13/16] ad stuff --- aeon/anomaly_detection/__init__.py | 2 + aeon/anomaly_detection/base.py | 209 ++++++++++++- aeon/anomaly_detection/collection/__init__.py | 11 - .../collection/_classification.py | 65 ---- .../collection/_outlier_detection.py | 58 ---- aeon/anomaly_detection/collection/base.py | 199 ------------ .../distance_based/__init__.py | 19 ++ .../{series => }/distance_based/_cblof.py | 2 +- .../{series => }/distance_based/_kmeans.py | 4 +- .../distance_based/_left_stampi.py | 4 +- .../{series => }/distance_based/_lof.py | 2 +- .../{series => }/distance_based/_merlin.py | 4 +- .../_one_class_svm.py | 4 +- .../{series => }/distance_based/_stomp.py | 4 +- .../distance_based/tests/__init__.py | 0 .../distance_based/tests/test_cblof.py | 2 +- .../distance_based/tests/test_kmeans.py | 2 +- .../distance_based/tests/test_left_stampi.py | 2 +- .../distance_based/tests/test_lof.py | 2 +- .../distance_based/tests/test_merlin.py | 2 +- .../tests/test_one_class_svm.py | 2 +- .../distance_based/tests/test_stomp.py | 2 +- .../distribution_based/__init__.py | 9 + .../{series => }/distribution_based/_copod.py | 2 +- .../distribution_based/_dwt_mlead.py | 4 +- .../distribution_based/tests/__init__.py | 0 .../distribution_based/tests/test_copod.py | 2 +- .../tests/test_dwt_mlead.py | 2 +- .../outlier_detection/__init__.py | 11 + .../outlier_detection/_iforest.py | 2 +- .../_pyodadapter.py | 4 +- .../{series => }/outlier_detection/_stray.py | 4 +- .../outlier_detection/tests/__init__.py | 0 .../outlier_detection/tests/test_iforest.py | 2 +- .../tests/test_pyod_adapter.py | 2 +- .../outlier_detection/tests/test_stray.py | 2 +- aeon/anomaly_detection/series/__init__.py | 9 - aeon/anomaly_detection/series/base.py | 293 ------------------ .../series/distance_based/__init__.py | 21 -- .../series/distribution_based/__init__.py | 9 - .../series/outlier_detection/__init__.py | 9 - .../{series => }/tests/__init__.py | 0 .../{series => }/tests/test_base.py | 0 .../whole_series/__init__.py | 7 + .../_rockad.py | 4 +- .../whole_series/tests/__init__.py | 1 + .../tests/test_rockad.py | 2 +- aeon/classification/base.py | 2 + ...eld_collection_anomaly_detection_checks.py | 69 ----- .../_yield_estimator_checks.py | 13 +- .../_mock_anomaly_detectors.py | 4 +- aeon/testing/testing_data.py | 6 +- aeon/utils/base/_identifier.py | 2 - aeon/utils/base/_register.py | 15 +- aeon/utils/base/tests/test_identifier.py | 2 +- aeon/utils/tags/_tags.py | 6 +- aeon/utils/tags/tests/test_discovery.py | 6 +- aeon/utils/tests/test_discovery.py | 4 +- docs/api_reference/anomaly_detection.rst | 8 +- docs/developer_guide/adding_typehints.md | 12 +- .../anomaly_detection/anomaly_detection.ipynb | 12 +- 61 files changed, 310 insertions(+), 853 deletions(-) delete mode 100644 aeon/anomaly_detection/collection/__init__.py delete mode 100644 aeon/anomaly_detection/collection/_classification.py delete mode 100644 aeon/anomaly_detection/collection/_outlier_detection.py delete mode 100644 aeon/anomaly_detection/collection/base.py create mode 100644 aeon/anomaly_detection/distance_based/__init__.py rename aeon/anomaly_detection/{series => }/distance_based/_cblof.py (98%) rename aeon/anomaly_detection/{series => }/distance_based/_kmeans.py (98%) rename aeon/anomaly_detection/{series => }/distance_based/_left_stampi.py (97%) rename aeon/anomaly_detection/{series => }/distance_based/_lof.py (98%) rename aeon/anomaly_detection/{series => }/distance_based/_merlin.py (98%) rename aeon/anomaly_detection/{series/outlier_detection => distance_based}/_one_class_svm.py (98%) rename aeon/anomaly_detection/{series => }/distance_based/_stomp.py (97%) rename aeon/anomaly_detection/{series => }/distance_based/tests/__init__.py (100%) rename aeon/anomaly_detection/{series => }/distance_based/tests/test_cblof.py (97%) rename aeon/anomaly_detection/{series => }/distance_based/tests/test_kmeans.py (95%) rename aeon/anomaly_detection/{series => }/distance_based/tests/test_left_stampi.py (99%) rename aeon/anomaly_detection/{series => }/distance_based/tests/test_lof.py (99%) rename aeon/anomaly_detection/{series => }/distance_based/tests/test_merlin.py (96%) rename aeon/anomaly_detection/{series/outlier_detection => distance_based}/tests/test_one_class_svm.py (95%) rename aeon/anomaly_detection/{series => }/distance_based/tests/test_stomp.py (95%) create mode 100644 aeon/anomaly_detection/distribution_based/__init__.py rename aeon/anomaly_detection/{series => }/distribution_based/_copod.py (97%) rename aeon/anomaly_detection/{series => }/distribution_based/_dwt_mlead.py (98%) rename aeon/anomaly_detection/{series => }/distribution_based/tests/__init__.py (100%) rename aeon/anomaly_detection/{series => }/distribution_based/tests/test_copod.py (96%) rename aeon/anomaly_detection/{series => }/distribution_based/tests/test_dwt_mlead.py (95%) create mode 100644 aeon/anomaly_detection/outlier_detection/__init__.py rename aeon/anomaly_detection/{series => }/outlier_detection/_iforest.py (98%) rename aeon/anomaly_detection/{series => outlier_detection}/_pyodadapter.py (98%) rename aeon/anomaly_detection/{series => }/outlier_detection/_stray.py (98%) rename aeon/anomaly_detection/{series => }/outlier_detection/tests/__init__.py (100%) rename aeon/anomaly_detection/{series => }/outlier_detection/tests/test_iforest.py (98%) rename aeon/anomaly_detection/{series => outlier_detection}/tests/test_pyod_adapter.py (98%) rename aeon/anomaly_detection/{series => }/outlier_detection/tests/test_stray.py (98%) delete mode 100644 aeon/anomaly_detection/series/__init__.py delete mode 100644 aeon/anomaly_detection/series/base.py delete mode 100644 aeon/anomaly_detection/series/distance_based/__init__.py delete mode 100644 aeon/anomaly_detection/series/distribution_based/__init__.py delete mode 100644 aeon/anomaly_detection/series/outlier_detection/__init__.py rename aeon/anomaly_detection/{series => }/tests/__init__.py (100%) rename aeon/anomaly_detection/{series => }/tests/test_base.py (100%) create mode 100644 aeon/anomaly_detection/whole_series/__init__.py rename aeon/anomaly_detection/{series/distance_based => whole_series}/_rockad.py (98%) create mode 100644 aeon/anomaly_detection/whole_series/tests/__init__.py rename aeon/anomaly_detection/{series/distance_based => whole_series}/tests/test_rockad.py (96%) delete mode 100644 aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py diff --git a/aeon/anomaly_detection/__init__.py b/aeon/anomaly_detection/__init__.py index 878e29fd32..65343cd774 100644 --- a/aeon/anomaly_detection/__init__.py +++ b/aeon/anomaly_detection/__init__.py @@ -3,3 +3,5 @@ __all__ = [ "BaseAnomalyDetector", ] + +from aeon.anomaly_detection.base import BaseAnomalyDetector diff --git a/aeon/anomaly_detection/base.py b/aeon/anomaly_detection/base.py index 60d35f0e2e..2e333cf755 100644 --- a/aeon/anomaly_detection/base.py +++ b/aeon/anomaly_detection/base.py @@ -4,23 +4,87 @@ __all__ = ["BaseAnomalyDetector"] from abc import abstractmethod +from typing import final import numpy as np +import pandas as pd -from aeon.base import BaseAeonEstimator +from aeon.base import BaseSeriesEstimator +from aeon.base._base_series import VALID_SERIES_INPUT_TYPES -class BaseAnomalyDetector(BaseAeonEstimator): - """todo base class docs.""" +class BaseAnomalyDetector(BaseSeriesEstimator): + """Base class for anomaly detection algorithms. + + Anomaly detection algorithms are used to identify anomalous subsequences in time + series data. These algorithms take a series of length m and return a boolean, int or + float array of length m, where each element indicates whether the corresponding + subsequence is anomalous or its anomaly score. + + Input and internal data format (where m is the number of time points and d is the + number of channels): + Univariate series (default): + np.ndarray, shape ``(m,)``, ``(m, 1)`` or ``(1, m)`` depending on axis. + This is converted to a 2D np.ndarray internally. + pd.DataFrame, shape ``(m, 1)`` or ``(1, m)`` depending on axis. + pd.Series, shape ``(m,)``. + Multivariate series: + np.ndarray array, shape ``(m, d)`` or ``(d, m)`` depending on axis. + pd.DataFrame ``(m, d)`` or ``(d, m)`` depending on axis. + + Output data format (one of the following): + Anomaly scores (default): + np.ndarray, shape ``(m,)`` of type float. For each point of the input time + series, the anomaly score is a float value indicating the degree of + anomalousness. The higher the score, the more anomalous the point. + Binary classification: + np.ndarray, shape ``(m,)`` of type bool or int. For each point of the input + time series, the output is a boolean or integer value indicating whether the + point is anomalous (``True``/``1``) or not (``False``/``0``). + + Detector learning types: + Unsupervised (default): + Unsupervised detectors do not require any training data and can directly be + used on the target time series. Their tags are set to ``fit_is_empty=True`` + and ``requires_y=False``. You would usually call the ``fit_predict`` method + on these detectors. + Semi-supervised: + Semi-supervised detectors require a training step on a time series without + anomalies (normal behaving time series). The target value ``y`` would + consist of only zeros. Thus, these algorithms have logic in the ``fit`` + method, but do not require the target values. Their tags are set to + ``fit_is_empty=False`` and ``requires_y=False``. You would usually first + call the ``fit`` method on the training data and then the ``predict`` + method for your target time series. + Supervised: + Supervised detectors require a training step on a time series with known + anomalies (anomalies should be present and must be annotated). The detector + implements the ``fit`` method, and the target value ``y`` consists of zeros + and ones. Their tags are, thus, set to ``fit_is_empty=False`` and + ``requires_y=True``. You would usually first call the ``fit`` method on the + training data and then the ``predict`` method for your target time series. + + Parameters + ---------- + axis : int + The time point axis of the input series if it is 2D. If ``axis==0``, it is + assumed each column is a time series and each row is a time point. i.e. the + shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates + the time series are in rows, i.e. the shape of the data is + ``(n_channels, n_timepoints)``. + Setting this class variable will convert the input data to the chosen axis. + """ _tags = { - # todo + "X_inner_type": "np.ndarray", # One of VALID_SERIES_INNER_TYPES + "fit_is_empty": True, + "requires_y": False, } def __init__(self, axis): - super().__init__() + super().__init__(axis=axis) - @abstractmethod + @final def fit(self, X, y=None, axis=1): """Fit time series anomaly detector to X. @@ -48,12 +112,31 @@ def fit(self, X, y=None, axis=1): Returns ------- - BaseSeriesAnomalyDetector + BaseAnomalyDetector The fitted estimator, reference to self. """ - ... + if self.get_tag("fit_is_empty"): + self.is_fitted = True + return self - @abstractmethod + if self.get_tag("requires_y"): + if y is None: + raise ValueError("Tag requires_y is true, but fit called with y=None") + + # reset estimator at the start of fit + self.reset() + + X = self._preprocess_series(X, axis, True) + if y is not None: + y = self._check_y(y) + + self._fit(X=X, y=y) + + # this should happen last + self.is_fitted = True + return self + + @final def predict(self, X, axis=1) -> np.ndarray: """Find anomalies in X. @@ -76,9 +159,15 @@ def predict(self, X, axis=1) -> np.ndarray: A boolean, int or float array of length len(X), where each element indicates whether the corresponding subsequence is anomalous or its anomaly score. """ - ... + fit_empty = self.get_tag("fit_is_empty") + if not fit_empty: + self._check_is_fitted() - @abstractmethod + X = self._preprocess_series(X, axis, False) + + return self._predict(X) + + @final def fit_predict(self, X, y=None, axis=1) -> np.ndarray: """Fit time series anomaly detector and find anomalies for X. @@ -105,4 +194,100 @@ def fit_predict(self, X, y=None, axis=1) -> np.ndarray: A boolean, int or float array of length len(X), where each element indicates whether the corresponding subsequence is anomalous or its anomaly score. """ - ... + if self.get_tag("requires_y"): + if y is None: + raise ValueError("Tag requires_y is true, but fit called with y=None") + + # reset estimator at the start of fit + self.reset() + + X = self._preprocess_series(X, axis, True) + + if self.get_tag("fit_is_empty"): + self.is_fitted = True + return self._predict(X) + + if y is not None: + y = self._check_y(y) + + pred = self._fit_predict(X, y) + + # this should happen last + self.is_fitted = True + return pred + + def _fit(self, X, y): + return self + + @abstractmethod + def _predict(self, X) -> np.ndarray: ... + + def _fit_predict(self, X, y): + self._fit(X, y) + return self._predict(X) + + def _check_y(self, y: VALID_SERIES_INPUT_TYPES) -> np.ndarray: + # Remind user if y is not required for this estimator on failure + req_msg = ( + f"{self.__class__.__name__} does not require a y input." + if self.get_tag("requires_y") + else "" + ) + new_y = y + + # must be a valid input type, see VALID_SERIES_INPUT_TYPES in + # BaseSeriesEstimator + if isinstance(y, np.ndarray): + # check valid shape + if y.ndim > 1: + raise ValueError( + "Error in input type for y: y input as np.ndarray should be 1D." + + req_msg + ) + + # check valid dtype + fail = False + if issubclass(y.dtype.type, np.integer): + new_y = y.astype(bool) + fail = not np.array_equal(y, new_y) + elif not issubclass(y.dtype.type, np.bool_): + fail = True + + if fail: + raise ValueError( + "Error in input type for y: y input type must be an integer array " + "containing 0 and 1 or a boolean array." + req_msg + ) + elif isinstance(y, pd.Series): + # check series is of boolean dtype + if not pd.api.types.is_bool_dtype(y): + raise ValueError( + "Error in input type for y: y input as pd.Series must have a " + "boolean dtype." + req_msg + ) + + new_y = y.values + elif isinstance(y, pd.DataFrame): + # only accept size 1 dataframe + if y.shape[1] > 1: + raise ValueError( + "Error in input type for y: y input as pd.DataFrame should have a " + "single column series." + ) + + # check column is of boolean dtype + if not all(pd.api.types.is_bool_dtype(y[col]) for col in y.columns): + raise ValueError( + "Error in input type for y: y input as pd.DataFrame must have a " + "boolean dtype." + req_msg + ) + + new_y = y.squeeze().values + else: + raise ValueError( + f"Error in input type for y: it should be one of " + f"{VALID_SERIES_INPUT_TYPES}, saw {type(y)}" + ) + + new_y = new_y.astype(bool) + return new_y diff --git a/aeon/anomaly_detection/collection/__init__.py b/aeon/anomaly_detection/collection/__init__.py deleted file mode 100644 index 4fc14ffd1f..0000000000 --- a/aeon/anomaly_detection/collection/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Whole-series anomaly detection methods.""" - -__all__ = [ - "BaseCollectionAnomalyDetector", - "ClassificationAdapter", - "OutlierDetectionAdapter", -] - -from aeon.anomaly_detection.collection._classification import ClassificationAdapter -from aeon.anomaly_detection.collection._outlier_detection import OutlierDetectionAdapter -from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector diff --git a/aeon/anomaly_detection/collection/_classification.py b/aeon/anomaly_detection/collection/_classification.py deleted file mode 100644 index d8aa34ebea..0000000000 --- a/aeon/anomaly_detection/collection/_classification.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Adapter to use classification algorithms for collection anomaly detection.""" - -__maintainer__ = [] - - -from sklearn.base import ClassifierMixin -from sklearn.ensemble import RandomForestClassifier - -from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector -from aeon.base._base import _clone_estimator -from aeon.classification.feature_based import SummaryClassifier - - -class ClassificationAdapter(BaseCollectionAnomalyDetector): - """ - Basic classifier adapter for collection anomaly detection. - - This class wraps a classification algorithm to be used as an anomaly detector. - Anomaly labels are required for training. - - Parameters - ---------- - classifier : aeon classifier or ClassifierMixin - The classification algorithm to be adapted. - random_state : int, RandomState instance or None, default=None - If `int`, random_state is the seed used by the random number generator; - If `RandomState` instance, random_state is the random number generator; - If `None`, the random number generator is the `RandomState` instance used - by `np.random`. - """ - - _tags = { - "X_inner_type": "numpy2D", - "requires_y": True, - } - - def __init__(self, classifier, random_state=None): - self.classifier = classifier - self.random_state = random_state - - super().__init__() - - def _fit(self, X, y=None): - if not isinstance(self.classifier, ClassifierMixin): - raise ValueError( - "The estimator must be an aeon classification algorithm " - "or class that implements the ClassifierMixin interface." - ) - - self.classifier_ = _clone_estimator( - self.classifier, random_state=self.random_state - ) - self.classifier_.fit(X, y) - return self - - def _predict(self, X): - return self.classifier_.predict(X) - - @classmethod - def _get_test_params(cls, parameter_set="default"): - return { - "classifier": SummaryClassifier( - estimator=RandomForestClassifier(n_estimators=5) - ) - } diff --git a/aeon/anomaly_detection/collection/_outlier_detection.py b/aeon/anomaly_detection/collection/_outlier_detection.py deleted file mode 100644 index db40a8e749..0000000000 --- a/aeon/anomaly_detection/collection/_outlier_detection.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Adapter to use outlier detection algorithms for collection anomaly detection.""" - -__maintainer__ = [] - -from sklearn.base import OutlierMixin -from sklearn.ensemble import IsolationForest - -from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector -from aeon.base._base import _clone_estimator - - -class OutlierDetectionAdapter(BaseCollectionAnomalyDetector): - """ - Basic outlier detection adapter for collection anomaly detection. - - This class wraps an sklearn outlier detection algorithm to be used as an anomaly - detector. - - Parameters - ---------- - detector : OutlierMixin - The outlier detection algorithm to be adapted. - random_state : int, RandomState instance or None, default=None - If `int`, random_state is the seed used by the random number generator; - If `RandomState` instance, random_state is the random number generator; - If `None`, the random number generator is the `RandomState` instance used - by `np.random`. - """ - - _tags = { - "X_inner_type": "numpy2D", - } - - def __init__(self, detector, random_state=None): - self.detector = detector - self.random_state = random_state - - super().__init__() - - def _fit(self, X, y=None): - if not isinstance(self.detector, OutlierMixin): - raise ValueError( - "The estimator must be an outlier detection algorithm " - "that implements the OutlierMixin interface." - ) - - self.detector_ = _clone_estimator(self.detector, random_state=self.random_state) - self.detector_.fit(X, y) - return self - - def _predict(self, X): - pred = self.detector_.predict(X) - pred[pred == -1] = 0 - return pred - - @classmethod - def _get_test_params(cls, parameter_set="default"): - return {"detector": IsolationForest(n_estimators=3)} diff --git a/aeon/anomaly_detection/collection/base.py b/aeon/anomaly_detection/collection/base.py deleted file mode 100644 index 9194ae557b..0000000000 --- a/aeon/anomaly_detection/collection/base.py +++ /dev/null @@ -1,199 +0,0 @@ -""" -Abstract base class for whole-series/collection anomaly detectors. - - class name: BaseCollectionAnomalyDetector - -Defining methods: - fitting - fit(self, X, y) - predicting - predict(self, X) - -Data validation: - data processing - _preprocess_collection(self, X, store_metadata=True) - shape verification - _check_shape(self, X) - -State: - fitted model/strategy - by convention, any attributes ending in "_" - fitted state flag - is_fitted - train input metadata - metadata_ - resetting state - reset(self) - -Tags: - default estimator tags - _tags - tag retrieval - get_tag(self, tag_name) - tag setting - set_tag(self, tag_name, value) -""" - -__maintainer__ = ["MatthewMiddlehurst"] -__all__ = ["BaseCollectionAnomalyDetector"] - -from abc import abstractmethod -from typing import final - -import numpy as np -import pandas as pd - -from aeon.base import BaseCollectionEstimator - - -class BaseCollectionAnomalyDetector(BaseCollectionEstimator): - """ - Abstract base class for collection anomaly detectors. - - The base detector specifies the methods and method signatures that all - collection anomaly detectors have to implement. Attributes with an underscore - suffix are set in the method fit. - - Attributes - ---------- - is_fitted : bool - True if the estimator has been fitted, False otherwise. - Unused if ``"fit_is_empty"`` tag is set to True. - metadata_ : dict - Dictionary containing metadata about the `fit` input data. - _tags_dynamic : dict - Dictionary containing dynamic tag values which have been set at runtime. - """ - - _tags = { - "fit_is_empty": False, - "requires_y": False, - } - - def __init__(self): - super().__init__() - - @final - def fit(self, X, y=None): - """Fit collection anomaly detector to training data. - - Parameters - ---------- - X : np.ndarray or list - Input data, any number of channels, equal length series of shape ``( - n_cases, n_channels, n_timepoints)`` - or 2D np.array (univariate, equal length series) of shape - ``(n_cases, n_timepoints)`` - or list of numpy arrays (any number of channels, unequal length series) - of shape ``[n_cases]``, 2D np.array ``(n_channels, n_timepoints_i)``, - where ``n_timepoints_i`` is length of series ``i``. Other types are - allowed and converted into one of the above. - - Different estimators have different capabilities to handle different - types of input. If ``self.get_tag("capability:multivariate")`` is False, - they cannot handle multivariate series, so either ``n_channels == 1`` is - true or X is 2D of shape ``(n_cases, n_timepoints)``. If ``self.get_tag( - "capability:unequal_length")`` is False, they cannot handle unequal - length input. In both situations, a ``ValueError`` is raised if X has a - characteristic that the estimator does not have the capability for is - passed. - y : np.ndarray - 1D np.array of int, of shape ``(n_cases)`` - anomaly labels - (ground truth) for fitting indices corresponding to instance indices in X. - - Returns - ------- - self : BaseCollectionAnomalyDetector - Reference to self. - - Notes - ----- - Changes state by creating a fitted model that updates attributes - ending in "_" and sets is_fitted flag to True. - """ - if self.get_tag("fit_is_empty"): - self.is_fitted = True - return self - - if self.get_tag("requires_y"): - if y is None: - raise ValueError("Tag requires_y is true, but fit called with y=None") - - # reset estimator at the start of fit - self.reset() - - X = self._preprocess_collection(X) - if y is not None: - y = self._check_y(y, self.metadata_["n_cases"]) - - self._fit(X, y) - - # this should happen last - self.is_fitted = True - return self - - @final - def predict(self, X): - """Predicts anomalies for time series in X. - - Parameters - ---------- - X : np.ndarray or list - Input data, any number of channels, equal length series of shape ``( - n_cases, n_channels, n_timepoints)`` - or 2D np.array (univariate, equal length series) of shape - ``(n_cases, n_timepoints)`` - or list of numpy arrays (any number of channels, unequal length series) - of shape ``[n_cases]``, 2D np.array ``(n_channels, n_timepoints_i)``, - where ``n_timepoints_i`` is length of series ``i`` - other types are allowed and converted into one of the above. - - Different estimators have different capabilities to handle different - types of input. If ``self.get_tag("capability:multivariate")`` is False, - they cannot handle multivariate series, so either ``n_channels == 1`` is - true or X is 2D of shape ``(n_cases, n_timepoints)``. If ``self.get_tag( - "capability:unequal_length")`` is False, they cannot handle unequal - length input. In both situations, a ``ValueError`` is raised if X has a - characteristic that the estimator does not have the capability for is - passed. - - Returns - ------- - predictions : np.ndarray - 1D np.array of float, of shape (n_cases) - predicted anomalies - indices correspond to instance indices in X - """ - fit_empty = self.get_tag("fit_is_empty") - if not fit_empty: - self._check_is_fitted() - - X = self._preprocess_collection(X, store_metadata=False) - # Check if X has the correct shape seen during fitting - self._check_shape(X) - - return self._predict(X) - - @abstractmethod - def _fit(self, X, y=None): ... - - @abstractmethod - def _predict(self, X): ... - - def _check_y(self, y, n_cases): - """Check y input is valid. - - Must be 1-dimensional and contain only 0s (no anomaly) and 1s (anomaly). - Must match the number of cases in X. - """ - if not isinstance(y, (pd.Series, np.ndarray)): - raise TypeError( - f"y must be a np.array or a pd.Series, but found type: {type(y)}" - ) - if isinstance(y, np.ndarray) and y.ndim > 1: - raise TypeError(f"y must be 1-dimensional, found {y.ndim} dimensions") - - if not all([x == 0 or x == 1 for x in y]): - raise ValueError( - "y input must only contain 0 (not anomalous) or 1 (anomalous) values." - ) - - # Check matching number of labels - n_labels = y.shape[0] - if n_cases != n_labels: - raise ValueError( - f"Mismatch in number of cases. Found X = {n_cases} and y = {n_labels}" - ) - - if isinstance(y, pd.Series): - y = pd.Series.to_numpy(y) - - return y diff --git a/aeon/anomaly_detection/distance_based/__init__.py b/aeon/anomaly_detection/distance_based/__init__.py new file mode 100644 index 0000000000..5eb342b780 --- /dev/null +++ b/aeon/anomaly_detection/distance_based/__init__.py @@ -0,0 +1,19 @@ +"""Distance basedTime Series Anomaly Detection.""" + +__all__ = [ + "CBLOF", + "KMeansAD", + "LeftSTAMPi", + "LOF", + "MERLIN", + "OneClassSVM", + "STOMP", +] + +from aeon.anomaly_detection.distance_based._cblof import CBLOF +from aeon.anomaly_detection.distance_based._kmeans import KMeansAD +from aeon.anomaly_detection.distance_based._left_stampi import LeftSTAMPi +from aeon.anomaly_detection.distance_based._lof import LOF +from aeon.anomaly_detection.distance_based._merlin import MERLIN +from aeon.anomaly_detection.distance_based._one_class_svm import OneClassSVM +from aeon.anomaly_detection.distance_based._stomp import STOMP diff --git a/aeon/anomaly_detection/series/distance_based/_cblof.py b/aeon/anomaly_detection/distance_based/_cblof.py similarity index 98% rename from aeon/anomaly_detection/series/distance_based/_cblof.py rename to aeon/anomaly_detection/distance_based/_cblof.py index 016a9c18ba..18bb044c14 100644 --- a/aeon/anomaly_detection/series/distance_based/_cblof.py +++ b/aeon/anomaly_detection/distance_based/_cblof.py @@ -7,7 +7,7 @@ import numpy as np -from aeon.anomaly_detection.series._pyodadapter import PyODAdapter +from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/series/distance_based/_kmeans.py b/aeon/anomaly_detection/distance_based/_kmeans.py similarity index 98% rename from aeon/anomaly_detection/series/distance_based/_kmeans.py rename to aeon/anomaly_detection/distance_based/_kmeans.py index aea82ee21a..bb8f188a1d 100644 --- a/aeon/anomaly_detection/series/distance_based/_kmeans.py +++ b/aeon/anomaly_detection/distance_based/_kmeans.py @@ -8,11 +8,11 @@ import numpy as np from sklearn.cluster import KMeans -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector from aeon.utils.windowing import reverse_windowing, sliding_windows -class KMeansAD(BaseSeriesAnomalyDetector): +class KMeansAD(BaseAnomalyDetector): """KMeans anomaly detector. The k-Means anomaly detector uses k-Means clustering to detect anomalies in time diff --git a/aeon/anomaly_detection/series/distance_based/_left_stampi.py b/aeon/anomaly_detection/distance_based/_left_stampi.py similarity index 97% rename from aeon/anomaly_detection/series/distance_based/_left_stampi.py rename to aeon/anomaly_detection/distance_based/_left_stampi.py index cbeba2c5a5..43078ce021 100644 --- a/aeon/anomaly_detection/series/distance_based/_left_stampi.py +++ b/aeon/anomaly_detection/distance_based/_left_stampi.py @@ -6,11 +6,11 @@ import numpy as np -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector from aeon.utils.windowing import reverse_windowing -class LeftSTAMPi(BaseSeriesAnomalyDetector): +class LeftSTAMPi(BaseAnomalyDetector): """LeftSTAMPi anomaly detector. LeftSTAMPi [1]_ calculates the left matrix profile of a time series, diff --git a/aeon/anomaly_detection/series/distance_based/_lof.py b/aeon/anomaly_detection/distance_based/_lof.py similarity index 98% rename from aeon/anomaly_detection/series/distance_based/_lof.py rename to aeon/anomaly_detection/distance_based/_lof.py index 1a914583a9..2c3615d906 100644 --- a/aeon/anomaly_detection/series/distance_based/_lof.py +++ b/aeon/anomaly_detection/distance_based/_lof.py @@ -7,7 +7,7 @@ import numpy as np -from aeon.anomaly_detection.series._pyodadapter import PyODAdapter +from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/series/distance_based/_merlin.py b/aeon/anomaly_detection/distance_based/_merlin.py similarity index 98% rename from aeon/anomaly_detection/series/distance_based/_merlin.py rename to aeon/anomaly_detection/distance_based/_merlin.py index be0d2a9ead..b63224acd5 100644 --- a/aeon/anomaly_detection/series/distance_based/_merlin.py +++ b/aeon/anomaly_detection/distance_based/_merlin.py @@ -8,13 +8,13 @@ import numpy as np from numba import njit -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector from aeon.distances import squared_distance from aeon.utils.numba.general import AEON_NUMBA_STD_THRESHOLD from aeon.utils.numba.stats import mean, std -class MERLIN(BaseSeriesAnomalyDetector): +class MERLIN(BaseAnomalyDetector): """MERLIN anomaly detector. MERLIN is a discord discovery algorithm that uses a sliding window to find the diff --git a/aeon/anomaly_detection/series/outlier_detection/_one_class_svm.py b/aeon/anomaly_detection/distance_based/_one_class_svm.py similarity index 98% rename from aeon/anomaly_detection/series/outlier_detection/_one_class_svm.py rename to aeon/anomaly_detection/distance_based/_one_class_svm.py index 667ec32f9a..9e654ee326 100644 --- a/aeon/anomaly_detection/series/outlier_detection/_one_class_svm.py +++ b/aeon/anomaly_detection/distance_based/_one_class_svm.py @@ -7,11 +7,11 @@ import numpy as np from sklearn.svm import OneClassSVM as OCSVM -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector from aeon.utils.windowing import reverse_windowing, sliding_windows -class OneClassSVM(BaseSeriesAnomalyDetector): +class OneClassSVM(BaseAnomalyDetector): """OneClassSVM for anomaly detection. This class implements the OneClassSVM algorithm for anomaly detection diff --git a/aeon/anomaly_detection/series/distance_based/_stomp.py b/aeon/anomaly_detection/distance_based/_stomp.py similarity index 97% rename from aeon/anomaly_detection/series/distance_based/_stomp.py rename to aeon/anomaly_detection/distance_based/_stomp.py index 7298b438c9..3f8be36432 100644 --- a/aeon/anomaly_detection/series/distance_based/_stomp.py +++ b/aeon/anomaly_detection/distance_based/_stomp.py @@ -7,11 +7,11 @@ import numpy as np -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector from aeon.utils.windowing import reverse_windowing -class STOMP(BaseSeriesAnomalyDetector): +class STOMP(BaseAnomalyDetector): """STOMP anomaly detector. STOMP calculates the matrix profile of a time series which is the distance to the diff --git a/aeon/anomaly_detection/series/distance_based/tests/__init__.py b/aeon/anomaly_detection/distance_based/tests/__init__.py similarity index 100% rename from aeon/anomaly_detection/series/distance_based/tests/__init__.py rename to aeon/anomaly_detection/distance_based/tests/__init__.py diff --git a/aeon/anomaly_detection/series/distance_based/tests/test_cblof.py b/aeon/anomaly_detection/distance_based/tests/test_cblof.py similarity index 97% rename from aeon/anomaly_detection/series/distance_based/tests/test_cblof.py rename to aeon/anomaly_detection/distance_based/tests/test_cblof.py index ce7579734f..d1472af6a2 100644 --- a/aeon/anomaly_detection/series/distance_based/tests/test_cblof.py +++ b/aeon/anomaly_detection/distance_based/tests/test_cblof.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from aeon.anomaly_detection.series.distance_based import CBLOF +from aeon.anomaly_detection.distance_based import CBLOF from aeon.testing.data_generation import make_example_1d_numpy from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/series/distance_based/tests/test_kmeans.py b/aeon/anomaly_detection/distance_based/tests/test_kmeans.py similarity index 95% rename from aeon/anomaly_detection/series/distance_based/tests/test_kmeans.py rename to aeon/anomaly_detection/distance_based/tests/test_kmeans.py index bc966929b0..2647411b88 100644 --- a/aeon/anomaly_detection/series/distance_based/tests/test_kmeans.py +++ b/aeon/anomaly_detection/distance_based/tests/test_kmeans.py @@ -6,7 +6,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.series.distance_based import KMeansAD +from aeon.anomaly_detection.distance_based import KMeansAD def test_kmeansad_univariate(): diff --git a/aeon/anomaly_detection/series/distance_based/tests/test_left_stampi.py b/aeon/anomaly_detection/distance_based/tests/test_left_stampi.py similarity index 99% rename from aeon/anomaly_detection/series/distance_based/tests/test_left_stampi.py rename to aeon/anomaly_detection/distance_based/tests/test_left_stampi.py index 2e14928625..6444bccdfe 100644 --- a/aeon/anomaly_detection/series/distance_based/tests/test_left_stampi.py +++ b/aeon/anomaly_detection/distance_based/tests/test_left_stampi.py @@ -8,7 +8,7 @@ import numpy as np import pytest -from aeon.anomaly_detection.series.distance_based._left_stampi import LeftSTAMPi +from aeon.anomaly_detection.distance_based._left_stampi import LeftSTAMPi from aeon.testing.data_generation import make_example_1d_numpy from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/series/distance_based/tests/test_lof.py b/aeon/anomaly_detection/distance_based/tests/test_lof.py similarity index 99% rename from aeon/anomaly_detection/series/distance_based/tests/test_lof.py rename to aeon/anomaly_detection/distance_based/tests/test_lof.py index a9107705fa..033d11295b 100644 --- a/aeon/anomaly_detection/series/distance_based/tests/test_lof.py +++ b/aeon/anomaly_detection/distance_based/tests/test_lof.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from aeon.anomaly_detection.series.distance_based import LOF +from aeon.anomaly_detection.distance_based import LOF from aeon.testing.data_generation import make_example_1d_numpy from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/series/distance_based/tests/test_merlin.py b/aeon/anomaly_detection/distance_based/tests/test_merlin.py similarity index 96% rename from aeon/anomaly_detection/series/distance_based/tests/test_merlin.py rename to aeon/anomaly_detection/distance_based/tests/test_merlin.py index 0ef5aafb8a..ccf7e3300d 100644 --- a/aeon/anomaly_detection/series/distance_based/tests/test_merlin.py +++ b/aeon/anomaly_detection/distance_based/tests/test_merlin.py @@ -4,7 +4,7 @@ import numpy as np -from aeon.anomaly_detection.series.distance_based import MERLIN +from aeon.anomaly_detection.distance_based import MERLIN TEST_DATA = np.array( [ diff --git a/aeon/anomaly_detection/series/outlier_detection/tests/test_one_class_svm.py b/aeon/anomaly_detection/distance_based/tests/test_one_class_svm.py similarity index 95% rename from aeon/anomaly_detection/series/outlier_detection/tests/test_one_class_svm.py rename to aeon/anomaly_detection/distance_based/tests/test_one_class_svm.py index 6395291617..7a3aca2042 100644 --- a/aeon/anomaly_detection/series/outlier_detection/tests/test_one_class_svm.py +++ b/aeon/anomaly_detection/distance_based/tests/test_one_class_svm.py @@ -4,7 +4,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.series.distance_based import OneClassSVM +from aeon.anomaly_detection.distance_based import OneClassSVM def test_one_class_svm_univariate(): diff --git a/aeon/anomaly_detection/series/distance_based/tests/test_stomp.py b/aeon/anomaly_detection/distance_based/tests/test_stomp.py similarity index 95% rename from aeon/anomaly_detection/series/distance_based/tests/test_stomp.py rename to aeon/anomaly_detection/distance_based/tests/test_stomp.py index f8225d3c7b..b506c89ea0 100644 --- a/aeon/anomaly_detection/series/distance_based/tests/test_stomp.py +++ b/aeon/anomaly_detection/distance_based/tests/test_stomp.py @@ -6,7 +6,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.series.distance_based import STOMP +from aeon.anomaly_detection.distance_based import STOMP from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/distribution_based/__init__.py b/aeon/anomaly_detection/distribution_based/__init__.py new file mode 100644 index 0000000000..e52a7512ba --- /dev/null +++ b/aeon/anomaly_detection/distribution_based/__init__.py @@ -0,0 +1,9 @@ +"""Distribution based Time Series Anomaly Detection.""" + +__all__ = [ + "COPOD", + "DWT_MLEAD", +] + +from aeon.anomaly_detection.distribution_based._copod import COPOD +from aeon.anomaly_detection.distribution_based._dwt_mlead import DWT_MLEAD diff --git a/aeon/anomaly_detection/series/distribution_based/_copod.py b/aeon/anomaly_detection/distribution_based/_copod.py similarity index 97% rename from aeon/anomaly_detection/series/distribution_based/_copod.py rename to aeon/anomaly_detection/distribution_based/_copod.py index 3e72614f17..bd2af0e084 100644 --- a/aeon/anomaly_detection/series/distribution_based/_copod.py +++ b/aeon/anomaly_detection/distribution_based/_copod.py @@ -7,7 +7,7 @@ import numpy as np -from aeon.anomaly_detection.series._pyodadapter import PyODAdapter +from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/series/distribution_based/_dwt_mlead.py b/aeon/anomaly_detection/distribution_based/_dwt_mlead.py similarity index 98% rename from aeon/anomaly_detection/series/distribution_based/_dwt_mlead.py rename to aeon/anomaly_detection/distribution_based/_dwt_mlead.py index 2154abee22..cb0de0c015 100644 --- a/aeon/anomaly_detection/series/distribution_based/_dwt_mlead.py +++ b/aeon/anomaly_detection/distribution_based/_dwt_mlead.py @@ -11,7 +11,7 @@ from numpy.lib.stride_tricks import sliding_window_view from sklearn.covariance import EmpiricalCovariance -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector from aeon.utils.numba.wavelets import multilevel_haar_transform @@ -30,7 +30,7 @@ def _combine_alternating(xs: list[Any], ys: list[Any]) -> Iterable[Any]: yield y -class DWT_MLEAD(BaseSeriesAnomalyDetector): +class DWT_MLEAD(BaseAnomalyDetector): """DWT-MLEAD anomaly detector. DWT-MLEAD is an anomaly detection algorithm that uses the Discrete Wavelet Transform diff --git a/aeon/anomaly_detection/series/distribution_based/tests/__init__.py b/aeon/anomaly_detection/distribution_based/tests/__init__.py similarity index 100% rename from aeon/anomaly_detection/series/distribution_based/tests/__init__.py rename to aeon/anomaly_detection/distribution_based/tests/__init__.py diff --git a/aeon/anomaly_detection/series/distribution_based/tests/test_copod.py b/aeon/anomaly_detection/distribution_based/tests/test_copod.py similarity index 96% rename from aeon/anomaly_detection/series/distribution_based/tests/test_copod.py rename to aeon/anomaly_detection/distribution_based/tests/test_copod.py index 8732b33c96..40969da0e7 100644 --- a/aeon/anomaly_detection/series/distribution_based/tests/test_copod.py +++ b/aeon/anomaly_detection/distribution_based/tests/test_copod.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from aeon.anomaly_detection.series.distribution_based import COPOD +from aeon.anomaly_detection.distribution_based import COPOD from aeon.testing.data_generation import make_example_1d_numpy from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/series/distribution_based/tests/test_dwt_mlead.py b/aeon/anomaly_detection/distribution_based/tests/test_dwt_mlead.py similarity index 95% rename from aeon/anomaly_detection/series/distribution_based/tests/test_dwt_mlead.py rename to aeon/anomaly_detection/distribution_based/tests/test_dwt_mlead.py index 3a714570b3..664d715122 100644 --- a/aeon/anomaly_detection/series/distribution_based/tests/test_dwt_mlead.py +++ b/aeon/anomaly_detection/distribution_based/tests/test_dwt_mlead.py @@ -6,7 +6,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.series.distribution_based import DWT_MLEAD +from aeon.anomaly_detection.distribution_based import DWT_MLEAD def test_dwt_mlead_output(): diff --git a/aeon/anomaly_detection/outlier_detection/__init__.py b/aeon/anomaly_detection/outlier_detection/__init__.py new file mode 100644 index 0000000000..ad9b7868e5 --- /dev/null +++ b/aeon/anomaly_detection/outlier_detection/__init__.py @@ -0,0 +1,11 @@ +"""Time Series Outlier Detection.""" + +__all__ = [ + "IsolationForest", + "PyODAdapter", + "STRAY", +] + +from aeon.anomaly_detection.outlier_detection._iforest import IsolationForest +from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter +from aeon.anomaly_detection.outlier_detection._stray import STRAY diff --git a/aeon/anomaly_detection/series/outlier_detection/_iforest.py b/aeon/anomaly_detection/outlier_detection/_iforest.py similarity index 98% rename from aeon/anomaly_detection/series/outlier_detection/_iforest.py rename to aeon/anomaly_detection/outlier_detection/_iforest.py index a0d62261e6..f13152d0e7 100644 --- a/aeon/anomaly_detection/series/outlier_detection/_iforest.py +++ b/aeon/anomaly_detection/outlier_detection/_iforest.py @@ -7,7 +7,7 @@ import numpy as np -from aeon.anomaly_detection.series._pyodadapter import PyODAdapter +from aeon.anomaly_detection.outlier_detection._pyodadapter import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/series/_pyodadapter.py b/aeon/anomaly_detection/outlier_detection/_pyodadapter.py similarity index 98% rename from aeon/anomaly_detection/series/_pyodadapter.py rename to aeon/anomaly_detection/outlier_detection/_pyodadapter.py index aa4e1f9779..5a068857c6 100644 --- a/aeon/anomaly_detection/series/_pyodadapter.py +++ b/aeon/anomaly_detection/outlier_detection/_pyodadapter.py @@ -10,7 +10,7 @@ import numpy as np from sklearn import clone -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector from aeon.utils.validation._dependencies import _check_soft_dependencies from aeon.utils.windowing import reverse_windowing, sliding_windows @@ -18,7 +18,7 @@ from pyod.models.base import BaseDetector -class PyODAdapter(BaseSeriesAnomalyDetector): +class PyODAdapter(BaseAnomalyDetector): """Adapter for PyOD anomaly detection models to be used in the Aeon framework. This adapter allows the use of PyOD models in the Aeon framework. The adapter diff --git a/aeon/anomaly_detection/series/outlier_detection/_stray.py b/aeon/anomaly_detection/outlier_detection/_stray.py similarity index 98% rename from aeon/anomaly_detection/series/outlier_detection/_stray.py rename to aeon/anomaly_detection/outlier_detection/_stray.py index 3d78be8643..e7512e2d24 100644 --- a/aeon/anomaly_detection/series/outlier_detection/_stray.py +++ b/aeon/anomaly_detection/outlier_detection/_stray.py @@ -8,10 +8,10 @@ import numpy.typing as npt from sklearn.neighbors import NearestNeighbors -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector -class STRAY(BaseSeriesAnomalyDetector): +class STRAY(BaseAnomalyDetector): """STRAY: robust anomaly detection in data streams with concept drift. This is based on STRAY (Search TRace AnomalY) [1]_, which is a modification diff --git a/aeon/anomaly_detection/series/outlier_detection/tests/__init__.py b/aeon/anomaly_detection/outlier_detection/tests/__init__.py similarity index 100% rename from aeon/anomaly_detection/series/outlier_detection/tests/__init__.py rename to aeon/anomaly_detection/outlier_detection/tests/__init__.py diff --git a/aeon/anomaly_detection/series/outlier_detection/tests/test_iforest.py b/aeon/anomaly_detection/outlier_detection/tests/test_iforest.py similarity index 98% rename from aeon/anomaly_detection/series/outlier_detection/tests/test_iforest.py rename to aeon/anomaly_detection/outlier_detection/tests/test_iforest.py index 07e0a085c3..a66d1003fb 100644 --- a/aeon/anomaly_detection/series/outlier_detection/tests/test_iforest.py +++ b/aeon/anomaly_detection/outlier_detection/tests/test_iforest.py @@ -4,7 +4,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.series.outlier_detection import IsolationForest +from aeon.anomaly_detection.outlier_detection import IsolationForest from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/series/tests/test_pyod_adapter.py b/aeon/anomaly_detection/outlier_detection/tests/test_pyod_adapter.py similarity index 98% rename from aeon/anomaly_detection/series/tests/test_pyod_adapter.py rename to aeon/anomaly_detection/outlier_detection/tests/test_pyod_adapter.py index 84906c245d..ee75078133 100644 --- a/aeon/anomaly_detection/series/tests/test_pyod_adapter.py +++ b/aeon/anomaly_detection/outlier_detection/tests/test_pyod_adapter.py @@ -6,7 +6,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.series.outlier_detection import PyODAdapter +from aeon.anomaly_detection.outlier_detection import PyODAdapter from aeon.utils.validation._dependencies import _check_soft_dependencies diff --git a/aeon/anomaly_detection/series/outlier_detection/tests/test_stray.py b/aeon/anomaly_detection/outlier_detection/tests/test_stray.py similarity index 98% rename from aeon/anomaly_detection/series/outlier_detection/tests/test_stray.py rename to aeon/anomaly_detection/outlier_detection/tests/test_stray.py index 76ef9ef915..8429a8a3c5 100644 --- a/aeon/anomaly_detection/series/outlier_detection/tests/test_stray.py +++ b/aeon/anomaly_detection/outlier_detection/tests/test_stray.py @@ -5,7 +5,7 @@ import numpy as np from sklearn.preprocessing import MinMaxScaler -from aeon.anomaly_detection.series.outlier_detection import STRAY +from aeon.anomaly_detection.outlier_detection import STRAY def test_default_1D(): diff --git a/aeon/anomaly_detection/series/__init__.py b/aeon/anomaly_detection/series/__init__.py deleted file mode 100644 index a4d2052d1c..0000000000 --- a/aeon/anomaly_detection/series/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Single series Time Series Anomaly Detection.""" - -__all__ = [ - "BaseSeriesAnomalyDetector", - "PyODAdapter", -] - -from aeon.anomaly_detection.series._pyodadapter import PyODAdapter -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector diff --git a/aeon/anomaly_detection/series/base.py b/aeon/anomaly_detection/series/base.py deleted file mode 100644 index 57863d9f3d..0000000000 --- a/aeon/anomaly_detection/series/base.py +++ /dev/null @@ -1,293 +0,0 @@ -"""Abstract base class for series time series anomaly detectors.""" - -__maintainer__ = ["MatthewMiddlehurst"] -__all__ = ["BaseSeriesAnomalyDetector"] - -from abc import abstractmethod -from typing import final - -import numpy as np -import pandas as pd - -from aeon.base import BaseSeriesEstimator -from aeon.base._base_series import VALID_SERIES_INPUT_TYPES - - -class BaseSeriesAnomalyDetector(BaseSeriesEstimator): - """Base class for series anomaly detection algorithms. - - Anomaly detection algorithms are used to identify anomalous subsequences in time - series data. These algorithms take a series of length m and return a boolean, int or - float array of length m, where each element indicates whether the corresponding - subsequence is anomalous or its anomaly score. - - Input and internal data format (where m is the number of time points and d is the - number of channels): - Univariate series (default): - np.ndarray, shape ``(m,)``, ``(m, 1)`` or ``(1, m)`` depending on axis. - This is converted to a 2D np.ndarray internally. - pd.DataFrame, shape ``(m, 1)`` or ``(1, m)`` depending on axis. - pd.Series, shape ``(m,)``. - Multivariate series: - np.ndarray array, shape ``(m, d)`` or ``(d, m)`` depending on axis. - pd.DataFrame ``(m, d)`` or ``(d, m)`` depending on axis. - - Output data format (one of the following): - Anomaly scores (default): - np.ndarray, shape ``(m,)`` of type float. For each point of the input time - series, the anomaly score is a float value indicating the degree of - anomalousness. The higher the score, the more anomalous the point. - Binary classification: - np.ndarray, shape ``(m,)`` of type bool or int. For each point of the input - time series, the output is a boolean or integer value indicating whether the - point is anomalous (``True``/``1``) or not (``False``/``0``). - - Detector learning types: - Unsupervised (default): - Unsupervised detectors do not require any training data and can directly be - used on the target time series. Their tags are set to ``fit_is_empty=True`` - and ``requires_y=False``. You would usually call the ``fit_predict`` method - on these detectors. - Semi-supervised: - Semi-supervised detectors require a training step on a time series without - anomalies (normal behaving time series). The target value ``y`` would - consist of only zeros. Thus, these algorithms have logic in the ``fit`` - method, but do not require the target values. Their tags are set to - ``fit_is_empty=False`` and ``requires_y=False``. You would usually first - call the ``fit`` method on the training data and then the ``predict`` - method for your target time series. - Supervised: - Supervised detectors require a training step on a time series with known - anomalies (anomalies should be present and must be annotated). The detector - implements the ``fit`` method, and the target value ``y`` consists of zeros - and ones. Their tags are, thus, set to ``fit_is_empty=False`` and - ``requires_y=True``. You would usually first call the ``fit`` method on the - training data and then the ``predict`` method for your target time series. - - Parameters - ---------- - axis : int - The time point axis of the input series if it is 2D. If ``axis==0``, it is - assumed each column is a time series and each row is a time point. i.e. the - shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates - the time series are in rows, i.e. the shape of the data is - ``(n_channels, n_timepoints)``. - Setting this class variable will convert the input data to the chosen axis. - """ - - _tags = { - "X_inner_type": "np.ndarray", # One of VALID_SERIES_INNER_TYPES - "fit_is_empty": True, - "requires_y": False, - } - - def __init__(self, axis): - super().__init__(axis=axis) - - @final - def fit(self, X, y=None, axis=1): - """Fit time series anomaly detector to X. - - If the tag ``fit_is_empty`` is true, this just sets the ``is_fitted`` tag to - true. Otherwise, it checks ``self`` can handle ``X``, formats ``X`` into - the structure required by ``self`` then passes ``X`` (and possibly ``y``) to - ``_fit``. - - Parameters - ---------- - X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES - The time series to fit the model to. - A valid aeon time series data structure. See - aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. - y : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES, default=None - The target values for the time series. - A valid aeon time series data structure. See - aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. - axis : int - The time point axis of the input series if it is 2D. If ``axis==0``, it is - assumed each column is a time series and each row is a time point. i.e. the - shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates - the time series are in rows, i.e. the shape of the data is - ``(n_channels, n_timepoints)``. - - Returns - ------- - BaseSeriesAnomalyDetector - The fitted estimator, reference to self. - """ - if self.get_tag("fit_is_empty"): - self.is_fitted = True - return self - - if self.get_tag("requires_y"): - if y is None: - raise ValueError("Tag requires_y is true, but fit called with y=None") - - # reset estimator at the start of fit - self.reset() - - X = self._preprocess_series(X, axis, True) - if y is not None: - y = self._check_y(y) - - self._fit(X=X, y=y) - - # this should happen last - self.is_fitted = True - return self - - @final - def predict(self, X, axis=1) -> np.ndarray: - """Find anomalies in X. - - Parameters - ---------- - X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES - The time series to fit the model to. - A valid aeon time series data structure. See - aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. - axis : int, default=1 - The time point axis of the input series if it is 2D. If ``axis==0``, it is - assumed each column is a time series and each row is a time point. i.e. the - shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates - the time series are in rows, i.e. the shape of the data is - ``(n_channels, n_timepoints)``. - - Returns - ------- - np.ndarray - A boolean, int or float array of length len(X), where each element indicates - whether the corresponding subsequence is anomalous or its anomaly score. - """ - fit_empty = self.get_tag("fit_is_empty") - if not fit_empty: - self._check_is_fitted() - - X = self._preprocess_series(X, axis, False) - - return self._predict(X) - - @final - def fit_predict(self, X, y=None, axis=1) -> np.ndarray: - """Fit time series anomaly detector and find anomalies for X. - - Parameters - ---------- - X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES - The time series to fit the model to. - A valid aeon time series data structure. See - aeon.base._base_series.VALID_INPUT_TYPES for aeon supported types. - y : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES, default=None - The target values for the time series. - A valid aeon time series data structure. See - aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. - axis : int, default=1 - The time point axis of the input series if it is 2D. If ``axis==0``, it is - assumed each column is a time series and each row is a time point. i.e. the - shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates - the time series are in rows, i.e. the shape of the data is - ``(n_channels, n_timepoints)``. - - Returns - ------- - np.ndarray - A boolean, int or float array of length len(X), where each element indicates - whether the corresponding subsequence is anomalous or its anomaly score. - """ - if self.get_tag("requires_y"): - if y is None: - raise ValueError("Tag requires_y is true, but fit called with y=None") - - # reset estimator at the start of fit - self.reset() - - X = self._preprocess_series(X, axis, True) - - if self.get_tag("fit_is_empty"): - self.is_fitted = True - return self._predict(X) - - if y is not None: - y = self._check_y(y) - - pred = self._fit_predict(X, y) - - # this should happen last - self.is_fitted = True - return pred - - def _fit(self, X, y): - return self - - @abstractmethod - def _predict(self, X) -> np.ndarray: ... - - def _fit_predict(self, X, y): - self._fit(X, y) - return self._predict(X) - - def _check_y(self, y: VALID_SERIES_INPUT_TYPES) -> np.ndarray: - # Remind user if y is not required for this estimator on failure - req_msg = ( - f"{self.__class__.__name__} does not require a y input." - if self.get_tag("requires_y") - else "" - ) - new_y = y - - # must be a valid input type, see VALID_SERIES_INPUT_TYPES in - # BaseSeriesEstimator - if isinstance(y, np.ndarray): - # check valid shape - if y.ndim > 1: - raise ValueError( - "Error in input type for y: y input as np.ndarray should be 1D." - + req_msg - ) - - # check valid dtype - fail = False - if issubclass(y.dtype.type, np.integer): - new_y = y.astype(bool) - fail = not np.array_equal(y, new_y) - elif not issubclass(y.dtype.type, np.bool_): - fail = True - - if fail: - raise ValueError( - "Error in input type for y: y input type must be an integer array " - "containing 0 and 1 or a boolean array." + req_msg - ) - elif isinstance(y, pd.Series): - # check series is of boolean dtype - if not pd.api.types.is_bool_dtype(y): - raise ValueError( - "Error in input type for y: y input as pd.Series must have a " - "boolean dtype." + req_msg - ) - - new_y = y.values - elif isinstance(y, pd.DataFrame): - # only accept size 1 dataframe - if y.shape[1] > 1: - raise ValueError( - "Error in input type for y: y input as pd.DataFrame should have a " - "single column series." - ) - - # check column is of boolean dtype - if not all(pd.api.types.is_bool_dtype(y[col]) for col in y.columns): - raise ValueError( - "Error in input type for y: y input as pd.DataFrame must have a " - "boolean dtype." + req_msg - ) - - new_y = y.squeeze().values - else: - raise ValueError( - f"Error in input type for y: it should be one of " - f"{VALID_SERIES_INPUT_TYPES}, saw {type(y)}" - ) - - new_y = new_y.astype(bool) - return new_y diff --git a/aeon/anomaly_detection/series/distance_based/__init__.py b/aeon/anomaly_detection/series/distance_based/__init__.py deleted file mode 100644 index df02c8cd92..0000000000 --- a/aeon/anomaly_detection/series/distance_based/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -"""Distance-based Time Series Anomaly Detection.""" - -__all__ = [ - "CBLOF", - "KMeansAD", - "LeftSTAMPi", - "LOF", - "MERLIN", - "OneClassSVM", - "STOMP", - "ROCKAD", -] - -from aeon.anomaly_detection.series.distance_based._cblof import CBLOF -from aeon.anomaly_detection.series.distance_based._kmeans import KMeansAD -from aeon.anomaly_detection.series.distance_based._left_stampi import LeftSTAMPi -from aeon.anomaly_detection.series.distance_based._lof import LOF -from aeon.anomaly_detection.series.distance_based._merlin import MERLIN -from aeon.anomaly_detection.series.distance_based._rockad import ROCKAD -from aeon.anomaly_detection.series.distance_based._stomp import STOMP -from aeon.anomaly_detection.series.outlier_detection._one_class_svm import OneClassSVM diff --git a/aeon/anomaly_detection/series/distribution_based/__init__.py b/aeon/anomaly_detection/series/distribution_based/__init__.py deleted file mode 100644 index 7338cb740d..0000000000 --- a/aeon/anomaly_detection/series/distribution_based/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Distribution-based Time Series Anomaly Detection.""" - -__all__ = [ - "COPOD", - "DWT_MLEAD", -] - -from aeon.anomaly_detection.series.distribution_based._copod import COPOD -from aeon.anomaly_detection.series.distribution_based._dwt_mlead import DWT_MLEAD diff --git a/aeon/anomaly_detection/series/outlier_detection/__init__.py b/aeon/anomaly_detection/series/outlier_detection/__init__.py deleted file mode 100644 index 33a41b84c7..0000000000 --- a/aeon/anomaly_detection/series/outlier_detection/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Time Series Outlier Detection.""" - -__all__ = [ - "IsolationForest", - "STRAY", -] - -from aeon.anomaly_detection.series.outlier_detection._iforest import IsolationForest -from aeon.anomaly_detection.series.outlier_detection._stray import STRAY diff --git a/aeon/anomaly_detection/series/tests/__init__.py b/aeon/anomaly_detection/tests/__init__.py similarity index 100% rename from aeon/anomaly_detection/series/tests/__init__.py rename to aeon/anomaly_detection/tests/__init__.py diff --git a/aeon/anomaly_detection/series/tests/test_base.py b/aeon/anomaly_detection/tests/test_base.py similarity index 100% rename from aeon/anomaly_detection/series/tests/test_base.py rename to aeon/anomaly_detection/tests/test_base.py diff --git a/aeon/anomaly_detection/whole_series/__init__.py b/aeon/anomaly_detection/whole_series/__init__.py new file mode 100644 index 0000000000..7098b8cd08 --- /dev/null +++ b/aeon/anomaly_detection/whole_series/__init__.py @@ -0,0 +1,7 @@ +"""Whole Time Series Anomaly Detection.""" + +__all__ = [ + "ROCKAD", +] + +from aeon.anomaly_detection.whole_series._rockad import ROCKAD diff --git a/aeon/anomaly_detection/series/distance_based/_rockad.py b/aeon/anomaly_detection/whole_series/_rockad.py similarity index 98% rename from aeon/anomaly_detection/series/distance_based/_rockad.py rename to aeon/anomaly_detection/whole_series/_rockad.py index 15e2016e18..603a8732a3 100644 --- a/aeon/anomaly_detection/series/distance_based/_rockad.py +++ b/aeon/anomaly_detection/whole_series/_rockad.py @@ -10,12 +10,12 @@ from sklearn.preprocessing import PowerTransformer from sklearn.utils import resample -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector from aeon.transformations.collection.convolution_based import Rocket from aeon.utils.windowing import reverse_windowing, sliding_windows -class ROCKAD(BaseSeriesAnomalyDetector): +class ROCKAD(BaseAnomalyDetector): """ ROCKET-based Anomaly Detector (ROCKAD). diff --git a/aeon/anomaly_detection/whole_series/tests/__init__.py b/aeon/anomaly_detection/whole_series/tests/__init__.py new file mode 100644 index 0000000000..9292e8d9bd --- /dev/null +++ b/aeon/anomaly_detection/whole_series/tests/__init__.py @@ -0,0 +1 @@ +"""Whole series anomaly detection tests.""" diff --git a/aeon/anomaly_detection/series/distance_based/tests/test_rockad.py b/aeon/anomaly_detection/whole_series/tests/test_rockad.py similarity index 96% rename from aeon/anomaly_detection/series/distance_based/tests/test_rockad.py rename to aeon/anomaly_detection/whole_series/tests/test_rockad.py index 51d2425505..7d3694b2c8 100644 --- a/aeon/anomaly_detection/series/distance_based/tests/test_rockad.py +++ b/aeon/anomaly_detection/whole_series/tests/test_rockad.py @@ -4,7 +4,7 @@ import pytest from sklearn.utils import check_random_state -from aeon.anomaly_detection.series.distance_based import ROCKAD +from aeon.anomaly_detection.whole_series import ROCKAD def test_rockad_univariate(): diff --git a/aeon/classification/base.py b/aeon/classification/base.py index fa7ed89cb6..92d3b304a8 100644 --- a/aeon/classification/base.py +++ b/aeon/classification/base.py @@ -52,6 +52,8 @@ class BaseClassifier(ClassifierMixin, BaseCollectionEstimator): Number of classes (length of ``classes_``). _class_dictionary : dict Mapping of classes_ onto integers ``0 ... n_classes_-1``. + _estimator_type : string + The type of estimator. Required by some ``sklearn`` tools, set to "classifier". """ _tags = { diff --git a/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py b/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py deleted file mode 100644 index 76c91e312e..0000000000 --- a/aeon/testing/estimator_checking/_yield_collection_anomaly_detection_checks.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Tests for all collection anomaly detectors.""" - -from functools import partial - -from aeon.base._base import _clone_estimator -from aeon.testing.testing_data import FULL_TEST_DATA_DICT -from aeon.testing.utils.estimator_checks import _assert_predict_labels -from aeon.utils.data_types import COLLECTIONS_DATA_TYPES - - -def _yield_collection_anomaly_detection_checks( - estimator_class, estimator_instances, datatypes -): - """Yield all collection anomaly detection checks for an aeon estimator.""" - # only class required - yield partial( - check_collection_detector_overrides_and_tags, estimator_class=estimator_class - ) - - # test class instances - for i, estimator in enumerate(estimator_instances): - # test all data types - for datatype in datatypes[i]: - yield partial( - check_collection_detector_output, estimator=estimator, datatype=datatype - ) - - -def check_collection_detector_overrides_and_tags(estimator_class): - """Test compliance with the detector base class contract.""" - # Test they don't override final methods, because Python does not enforce this - final_methods = [ - "fit", - "predict", - ] - for method in final_methods: - if method in estimator_class.__dict__: - raise ValueError( - f"Collection anomaly detector {estimator_class} overrides the " - f"method {method}. Override _{method} instead." - ) - - # Test valid tag for X_inner_type - X_inner_type = estimator_class.get_class_tag(tag_name="X_inner_type") - if isinstance(X_inner_type, str): - assert X_inner_type in COLLECTIONS_DATA_TYPES - else: # must be a list - assert all([t in COLLECTIONS_DATA_TYPES for t in X_inner_type]) - - # one of X_inner_types must be capable of storing unequal length - if estimator_class.get_class_tag("capability:unequal_length"): - valid_unequal_types = ["np-list", "df-list", "pd-multiindex"] - if isinstance(X_inner_type, str): - assert X_inner_type in valid_unequal_types - else: # must be a list - assert any([t in valid_unequal_types for t in X_inner_type]) - - -def check_collection_detector_output(estimator, datatype): - """Test detector outputs the correct data types and values.""" - estimator = _clone_estimator(estimator) - - # run fit and predict - estimator.fit( - FULL_TEST_DATA_DICT[datatype]["train"][0], - FULL_TEST_DATA_DICT[datatype]["train"][1], - ) - y_pred = estimator.predict(FULL_TEST_DATA_DICT[datatype]["test"][0]) - _assert_predict_labels(y_pred, datatype, unique_labels=[0, 1]) diff --git a/aeon/testing/estimator_checking/_yield_estimator_checks.py b/aeon/testing/estimator_checking/_yield_estimator_checks.py index 643118793d..d583104e6b 100644 --- a/aeon/testing/estimator_checking/_yield_estimator_checks.py +++ b/aeon/testing/estimator_checking/_yield_estimator_checks.py @@ -11,8 +11,7 @@ import numpy as np from sklearn.exceptions import NotFittedError -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector -from aeon.anomaly_detection.whole_series.base import BaseCollectionAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector from aeon.base import BaseAeonEstimator from aeon.base._base import _clone_estimator from aeon.classification import BaseClassifier @@ -32,9 +31,6 @@ from aeon.testing.estimator_checking._yield_clustering_checks import ( _yield_clustering_checks, ) -from aeon.testing.estimator_checking._yield_collection_anomaly_detection_checks import ( - _yield_collection_anomaly_detection_checks, -) from aeon.testing.estimator_checking._yield_early_classification_checks import ( _yield_early_classification_checks, ) @@ -138,16 +134,11 @@ def _yield_all_aeon_checks( estimator_class, estimator_instances, datatypes ) - if issubclass(estimator_class, BaseSeriesAnomalyDetector): + if issubclass(estimator_class, BaseAnomalyDetector): yield from _yield_anomaly_detection_checks( estimator_class, estimator_instances, datatypes ) - if issubclass(estimator_class, BaseCollectionAnomalyDetector): - yield from _yield_collection_anomaly_detection_checks( - estimator_class, estimator_instances, datatypes - ) - if issubclass(estimator_class, BaseTransformer): yield from _yield_transformation_checks( estimator_class, estimator_instances, datatypes diff --git a/aeon/testing/mock_estimators/_mock_anomaly_detectors.py b/aeon/testing/mock_estimators/_mock_anomaly_detectors.py index d48ebfcfb5..4ec14d35fa 100644 --- a/aeon/testing/mock_estimators/_mock_anomaly_detectors.py +++ b/aeon/testing/mock_estimators/_mock_anomaly_detectors.py @@ -10,10 +10,10 @@ import numpy as np -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector -class MockAnomalyDetector(BaseSeriesAnomalyDetector): +class MockAnomalyDetector(BaseAnomalyDetector): """Mock anomaly detector.""" _tags = { diff --git a/aeon/testing/testing_data.py b/aeon/testing/testing_data.py index 1458d4a0d9..eb134cddda 100644 --- a/aeon/testing/testing_data.py +++ b/aeon/testing/testing_data.py @@ -2,8 +2,7 @@ import numpy as np -from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector from aeon.base import BaseCollectionEstimator, BaseSeriesEstimator from aeon.classification import BaseClassifier from aeon.classification.early_classification import BaseEarlyClassifier @@ -1013,7 +1012,6 @@ def _get_task_for_estimator(estimator): or isinstance(estimator, BaseEarlyClassifier) or isinstance(estimator, BaseClusterer) or isinstance(estimator, BaseCollectionTransformer) - or isinstance(estimator, BaseCollectionAnomalyDetector) ): data_label = "Classification" # collection data with continuous target labels @@ -1023,7 +1021,7 @@ def _get_task_for_estimator(estimator): data_label = "SimilaritySearch" # series data with no secondary input elif ( - isinstance(estimator, BaseSeriesAnomalyDetector) + isinstance(estimator, BaseAnomalyDetector) or isinstance(estimator, BaseSegmenter) or isinstance(estimator, BaseSeriesTransformer) or isinstance(estimator, BaseForecaster) diff --git a/aeon/utils/base/_identifier.py b/aeon/utils/base/_identifier.py index 2857b45bd1..cf2722cfcb 100644 --- a/aeon/utils/base/_identifier.py +++ b/aeon/utils/base/_identifier.py @@ -47,8 +47,6 @@ def get_identifier(estimator): if len(identifiers) == 0: raise TypeError("Error, no identifiers could be determined for estimator") - if len(identifiers) > 1 and "anomaly-detector" in identifiers: - identifiers.remove("anomaly-detector") if len(identifiers) > 1 and "estimator" in identifiers: identifiers.remove("estimator") if len(identifiers) > 1 and "series-estimator" in identifiers: diff --git a/aeon/utils/base/_register.py b/aeon/utils/base/_register.py index 749c005e5f..1d81c2512c 100644 --- a/aeon/utils/base/_register.py +++ b/aeon/utils/base/_register.py @@ -15,9 +15,8 @@ "VALID_ESTIMATOR_BASES", ] + from aeon.anomaly_detection.base import BaseAnomalyDetector -from aeon.anomaly_detection.collection.base import BaseCollectionAnomalyDetector -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector from aeon.base import BaseAeonEstimator, BaseCollectionEstimator, BaseSeriesEstimator from aeon.classification.base import BaseClassifier from aeon.classification.early_classification import BaseEarlyClassifier @@ -33,13 +32,12 @@ # all base classes BASE_CLASS_REGISTER = { # abstract - no estimator directly inherits from these - "anomaly-detector": BaseAnomalyDetector, "collection-estimator": BaseCollectionEstimator, "estimator": BaseAeonEstimator, "series-estimator": BaseSeriesEstimator, "transformer": BaseTransformer, # estimator types - "collection-anomaly-detector": BaseCollectionAnomalyDetector, + "anomaly-detector": BaseAnomalyDetector, "collection-transformer": BaseCollectionTransformer, "classifier": BaseClassifier, "clusterer": BaseClusterer, @@ -47,7 +45,6 @@ "regressor": BaseRegressor, "segmenter": BaseSegmenter, "similarity_searcher": BaseSimilaritySearch, - "series-anomaly-detector": BaseSeriesAnomalyDetector, "series-transformer": BaseSeriesTransformer, "forecaster": BaseForecaster, } @@ -56,11 +53,5 @@ VALID_ESTIMATOR_BASES = { k: BASE_CLASS_REGISTER[k] for k in BASE_CLASS_REGISTER.keys() - - { - "anomaly-detector", - "estimator", - "collection-estimator", - "series-estimator", - "transformer", - } + - {"estimator", "collection-estimator", "series-estimator", "transformer"} } diff --git a/aeon/utils/base/tests/test_identifier.py b/aeon/utils/base/tests/test_identifier.py index 99eed77dad..8084492599 100644 --- a/aeon/utils/base/tests/test_identifier.py +++ b/aeon/utils/base/tests/test_identifier.py @@ -34,7 +34,7 @@ def test_get_identifier(): assert ( get_identifier(MockAnomalyDetector) == get_identifier(MockAnomalyDetector()) - == "series-anomaly-detector" + == "anomaly-detector" ) assert ( get_identifier(MockSeriesTransformer) diff --git a/aeon/utils/tags/_tags.py b/aeon/utils/tags/_tags.py index 7edb50f132..e1bacdd5ad 100644 --- a/aeon/utils/tags/_tags.py +++ b/aeon/utils/tags/_tags.py @@ -138,11 +138,7 @@ class : identifier for the base class of objects this tag applies to "point belongs to.", }, "requires_y": { - "class": [ - "transformer", - "anomaly-detector", - "segmenter", - ], + "class": ["transformer", "anomaly-detector", "segmenter"], "type": "bool", "description": "Does this estimator require y to be passed in its methods?", }, diff --git a/aeon/utils/tags/tests/test_discovery.py b/aeon/utils/tags/tests/test_discovery.py index cc9dca0e36..fd3d57fa16 100644 --- a/aeon/utils/tags/tests/test_discovery.py +++ b/aeon/utils/tags/tests/test_discovery.py @@ -2,7 +2,7 @@ import pytest -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector from aeon.classification import BaseClassifier from aeon.testing.mock_estimators import MockClassifier from aeon.testing.mock_estimators._mock_anomaly_detectors import MockAnomalyDetector @@ -42,8 +42,8 @@ def test_all_tags_for_estimator_anomaly_detection(): assert "capability:contractable" not in tags assert tags == all_tags_for_estimator(MockAnomalyDetector) - assert tags == all_tags_for_estimator(BaseSeriesAnomalyDetector) - assert tags == all_tags_for_estimator("series-anomaly-detector") + assert tags == all_tags_for_estimator(BaseAnomalyDetector) + assert tags == all_tags_for_estimator("anomaly-detector") tag_names = all_tags_for_estimator(MockAnomalyDetector(), names_only=True) assert isinstance(tag_names, list) diff --git a/aeon/utils/tests/test_discovery.py b/aeon/utils/tests/test_discovery.py index f71727320c..9a0fcd6ee7 100644 --- a/aeon/utils/tests/test_discovery.py +++ b/aeon/utils/tests/test_discovery.py @@ -3,7 +3,7 @@ import pytest from sklearn.base import BaseEstimator -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector from aeon.base import BaseAeonEstimator from aeon.classification import BaseClassifier, DummyClassifier from aeon.clustering import BaseClusterer @@ -62,7 +62,7 @@ def test_all_estimators_by_type(item): [ [BaseTransformer, BaseClassifier], [BaseClassifier, "segmenter"], - [BaseClassifier, BaseSeriesAnomalyDetector, BaseClusterer], + [BaseClassifier, BaseAnomalyDetector, BaseClusterer], ], ) def test_all_estimators_by_multiple_types(input): diff --git a/docs/api_reference/anomaly_detection.rst b/docs/api_reference/anomaly_detection.rst index e50d51f3f0..3e22c445b7 100644 --- a/docs/api_reference/anomaly_detection.rst +++ b/docs/api_reference/anomaly_detection.rst @@ -34,7 +34,6 @@ Distance-based MERLIN OneClassSVM STOMP - ROCKAD Distribution-based ----------------- @@ -76,8 +75,7 @@ Reconstruction-based The algorithms for this family are not implemented yet. - -Whole-series +Whole-Series ------------ .. currentmodule:: aeon.anomaly_detection.whole_series @@ -86,9 +84,7 @@ Whole-series :toctree: auto_generated/ :template: class.rst - BaseCollectionAnomalyDetector - ClassificationAdapter - OutlierDetectionAdapter + ROCKAD Base ---- diff --git a/docs/developer_guide/adding_typehints.md b/docs/developer_guide/adding_typehints.md index ad0a9d5fa9..5f77ce119b 100644 --- a/docs/developer_guide/adding_typehints.md +++ b/docs/developer_guide/adding_typehints.md @@ -34,23 +34,17 @@ information. The `pyod` `BaseDetector` class can now be used in type hints with these additions. ```python -"""Adapter for PyOD models""" - from __future__ import annotations -__maintainer__ = [] -__all__ = ["PyODAdapter"] - -from aeon.anomaly_detection.series.base import BaseSeriesAnomalyDetector +from aeon.anomaly_detection.base import BaseAnomalyDetector from typing import TYPE_CHECKING if TYPE_CHECKING: from pyod.models.base import BaseDetector - -class PyODAdapter(BaseSeriesAnomalyDetector): +class PyODAdapter(BaseAnomalyDetector): def __init__( - self, pyod_model: BaseDetector, window_size: int = 10, stride: int = 1 + self, pyod_model: BaseDetector, window_size: int = 10, stride: int = 1 ): self.pyod_model = pyod_model self.window_size = window_size diff --git a/examples/anomaly_detection/anomaly_detection.ipynb b/examples/anomaly_detection/anomaly_detection.ipynb index c9011d3871..7afd00aff8 100644 --- a/examples/anomaly_detection/anomaly_detection.ipynb +++ b/examples/anomaly_detection/anomaly_detection.ipynb @@ -185,7 +185,7 @@ "metadata": {}, "outputs": [], "source": [ - "from aeon.anomaly_detection.series.distance_based import STOMP\n", + "from aeon.anomaly_detection.distance_based import STOMP\n", "from aeon.benchmarking.metrics.anomaly_detection import range_roc_auc_score\n", "\n", "detector = STOMP(window_size=200)\n", @@ -203,21 +203,21 @@ ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "id": "743fbbaa-a7d0-4f56-993a-07453f6a9442", + "metadata": {}, + "outputs": [], "source": [ "from pyod.models.ocsvm import OCSVM\n", "\n", - "from aeon.anomaly_detection.series import PyODAdapter\n", + "from aeon.anomaly_detection.outlier_detection import PyODAdapter\n", "from aeon.benchmarking.metrics.anomaly_detection import range_roc_auc_score\n", "\n", "detector = PyODAdapter(OCSVM(), window_size=3)\n", "y_scores = detector.fit_predict(X, axis=0)\n", "range_roc_auc_score(y, y_scores)" - ], - "id": "8c89d43f2e5476e6" + ] }, { "cell_type": "markdown", From c0776e6dae217c6fdc136e87d96ed5cbdee75197 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 8 May 2025 11:45:44 +0100 Subject: [PATCH 14/16] fixes --- aeon/transformations/series/_dft.py | 2 +- aeon/transformations/series/_exp_smoothing.py | 2 +- aeon/transformations/series/_gauss.py | 2 +- .../transformations/series/_moving_average.py | 2 +- aeon/transformations/series/_sg.py | 2 +- aeon/transformations/series/_siv.py | 2 +- .../series/smoothing/_exp_smoothing.py | 6 +-- .../series/smoothing/_moving_average.py | 2 +- .../smoothing/tests/test_exp_smoothing.py | 9 ++-- .../smoothing/tests/test_moving_average.py | 8 ++-- docs/api_reference/transformations.rst | 15 +++++++ .../transformations/smoothing_filters.ipynb | 42 ++++++++----------- 12 files changed, 54 insertions(+), 40 deletions(-) diff --git a/aeon/transformations/series/_dft.py b/aeon/transformations/series/_dft.py index 24e0b27ab7..d29ff92d8f 100644 --- a/aeon/transformations/series/_dft.py +++ b/aeon/transformations/series/_dft.py @@ -50,4 +50,4 @@ class DFTSeriesTransformer(DiscreteFourierApproximation): (2, 100) """ - ... + pass diff --git a/aeon/transformations/series/_exp_smoothing.py b/aeon/transformations/series/_exp_smoothing.py index 08217a9d05..56739e4491 100644 --- a/aeon/transformations/series/_exp_smoothing.py +++ b/aeon/transformations/series/_exp_smoothing.py @@ -61,4 +61,4 @@ class ExpSmoothingSeriesTransformer(ExponentialSmoothing): [10. 9.5 8.75 7.875]] """ - ... + pass diff --git a/aeon/transformations/series/_gauss.py b/aeon/transformations/series/_gauss.py index 959cddac6e..a596cf74e1 100644 --- a/aeon/transformations/series/_gauss.py +++ b/aeon/transformations/series/_gauss.py @@ -53,4 +53,4 @@ class GaussSeriesTransformer(GaussianFilter): (2, 100) """ - ... + pass diff --git a/aeon/transformations/series/_moving_average.py b/aeon/transformations/series/_moving_average.py index 61fb15e834..930e1a6ecb 100644 --- a/aeon/transformations/series/_moving_average.py +++ b/aeon/transformations/series/_moving_average.py @@ -50,4 +50,4 @@ class MovingAverageSeriesTransformer(MovingAverage): [[-2.5 -1.5 -0.5 0.5 1.5 2.5]] """ - ... + pass diff --git a/aeon/transformations/series/_sg.py b/aeon/transformations/series/_sg.py index 0fc1de3984..1f1db1761a 100644 --- a/aeon/transformations/series/_sg.py +++ b/aeon/transformations/series/_sg.py @@ -53,4 +53,4 @@ class SGSeriesTransformer(SavitzkyGolayFilter): (2, 100) """ - ... + pass diff --git a/aeon/transformations/series/_siv.py b/aeon/transformations/series/_siv.py index b6eeb5e591..f343df8f04 100644 --- a/aeon/transformations/series/_siv.py +++ b/aeon/transformations/series/_siv.py @@ -55,4 +55,4 @@ class SIVSeriesTransformer(RecursiveMedianSieve): (2, 100) """ - ... + pass diff --git a/aeon/transformations/series/smoothing/_exp_smoothing.py b/aeon/transformations/series/smoothing/_exp_smoothing.py index cd70138fa8..7b944a7854 100644 --- a/aeon/transformations/series/smoothing/_exp_smoothing.py +++ b/aeon/transformations/series/smoothing/_exp_smoothing.py @@ -34,11 +34,11 @@ class ExponentialSmoothing(BaseSeriesTransformer): >>> X = np.array([-2, -1, 0, 1, 2]) >>> transformer = ExponentialSmoothing(0.5) >>> transformer.fit_transform(X) - [[-2. -1.5 -0.75 0.125 1.0625]] + array([[-2. , -1.5 , -0.75 , 0.125 , 1.0625]]) >>> X = np.array([[1, 2, 3, 4], [10, 9, 8, 7]]) >>> transformer.fit_transform(X) - [[ 1. 1.5 2.25 3.125] - [10. 9.5 8.75 7.875]] + array([[ 1. , 1.5 , 2.25 , 3.125], + [10. , 9.5 , 8.75 , 7.875]]) """ _tags = { diff --git a/aeon/transformations/series/smoothing/_moving_average.py b/aeon/transformations/series/smoothing/_moving_average.py index c4b806e6c7..612b31b32b 100644 --- a/aeon/transformations/series/smoothing/_moving_average.py +++ b/aeon/transformations/series/smoothing/_moving_average.py @@ -25,7 +25,7 @@ class MovingAverage(BaseSeriesTransformer): >>> X = np.array([-3, -2, -1, 0, 1, 2, 3]) >>> transformer = MovingAverage(2) >>> transformer.fit_transform(X) - [[-2.5 -1.5 -0.5 0.5 1.5 2.5]] + array([[-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]]) """ _tags = { diff --git a/aeon/transformations/series/smoothing/tests/test_exp_smoothing.py b/aeon/transformations/series/smoothing/tests/test_exp_smoothing.py index c441104e8c..e3ca4b0267 100644 --- a/aeon/transformations/series/smoothing/tests/test_exp_smoothing.py +++ b/aeon/transformations/series/smoothing/tests/test_exp_smoothing.py @@ -43,16 +43,19 @@ def test_window_size_matches_alpha(alpha_window): def test_alpha_less_than_zero(): """Test alpha less than zero.""" with pytest.raises(ValueError): - ExponentialSmoothing(-0.5) + e = ExponentialSmoothing(-0.5) + e.fit_transform(TEST_DATA[0]) def test_alpha_greater_than_one(): """Test alpha greater than one.""" with pytest.raises(ValueError): - ExponentialSmoothing(2.0) + e = ExponentialSmoothing(2.0) + e.fit_transform(TEST_DATA[0]) def test_window_size_than_one(): """Test window_size < 0.""" with pytest.raises(ValueError): - ExponentialSmoothing(window_size=0) + e = ExponentialSmoothing(window_size=0) + e.fit_transform(TEST_DATA[0]) diff --git a/aeon/transformations/series/smoothing/tests/test_moving_average.py b/aeon/transformations/series/smoothing/tests/test_moving_average.py index 6fa1e55a74..049de5dbaa 100644 --- a/aeon/transformations/series/smoothing/tests/test_moving_average.py +++ b/aeon/transformations/series/smoothing/tests/test_moving_average.py @@ -19,7 +19,7 @@ def test_window_size_greater_than_zero(): """Test window sizes > 0.""" ma = MovingAverage(window_size=1) xt = ma.fit_transform(TEST_DATA[0]) - np.testing.assert_array_almost_equal(xt, TEST_DATA[0], decimal=2) + np.testing.assert_array_almost_equal(xt[0], TEST_DATA[0], decimal=2) ma = MovingAverage(window_size=2) for i in range(len(TEST_DATA)): @@ -30,10 +30,12 @@ def test_window_size_greater_than_zero(): def test_window_size_equal_zero(): """Test window size == 0.""" with pytest.raises(ValueError): - MovingAverage(window_size=0) + m = MovingAverage(window_size=0) + m.fit_transform(TEST_DATA[0]) def test_window_size_less_than_zero(): """Test window sizes < 0.""" with pytest.raises(ValueError): - MovingAverage(window_size=-1) + m = MovingAverage(window_size=-1) + m.fit_transform(TEST_DATA[0]) diff --git a/docs/api_reference/transformations.rst b/docs/api_reference/transformations.rst index 2a56fd847f..138ca4ffcb 100644 --- a/docs/api_reference/transformations.rst +++ b/docs/api_reference/transformations.rst @@ -180,6 +180,21 @@ Series transforms PCASeriesTransformer WarpingSeriesTransformer +Smoothing +~~~~~~~~~ + +.. currentmodule:: aeon.transformations.series.smoothing + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + DiscreteFourierApproximation + ExponentialSmoothing + GaussianFilter + MovingAverage + SavitzkyGolayFilter + RecursiveMedianSieve Base ---- diff --git a/examples/transformations/smoothing_filters.ipynb b/examples/transformations/smoothing_filters.ipynb index 6a7776f04e..d0026dc329 100644 --- a/examples/transformations/smoothing_filters.ipynb +++ b/examples/transformations/smoothing_filters.ipynb @@ -76,9 +76,11 @@ " axe.legend()\n", "\n", "\n", - "def plot_transformation(transformer=None):\n", + "def plot_transformation(transformer, title=None):\n", " \"\"\"Plot transformation for each ts.\"\"\"\n", " fig, axes = plt.subplots(2, 2, figsize=(16, 8), dpi=75)\n", + " if title is not None:\n", + " fig.suptitle(title)\n", "\n", " plot_axes(\n", " axes[0, 0], x_airline, transformer.fit_transform(x_airline)[0], \"x_airline\"\n", @@ -114,9 +116,9 @@ } ], "source": [ - "from aeon.transformations.series import GaussSeriesTransformer\n", + "from aeon.transformations.series.smoothing import GaussianFilter\n", "\n", - "t = GaussSeriesTransformer()\n", + "t = GaussianFilter()\n", "plot_transformation(t)" ] }, @@ -156,13 +158,13 @@ } ], "source": [ - "from aeon.transformations.series import DFTSeriesTransformer\n", + "from aeon.transformations.series.smoothing import DiscreteFourierApproximation\n", "\n", - "t = DFTSeriesTransformer()\n", - "plot_transformation(t)\n", + "t = DiscreteFourierApproximation()\n", + "plot_transformation(t, title=\"DFA Default\")\n", "\n", - "t = DFTSeriesTransformer(r=0.1, sort=True)\n", - "plot_transformation(t)" + "t = DiscreteFourierApproximation(r=0.1, sort=True)\n", + "plot_transformation(t, title=\"DFA Sorted\")" ] }, { @@ -191,9 +193,9 @@ } ], "source": [ - "from aeon.transformations.series import SIVSeriesTransformer\n", + "from aeon.transformations.series.smoothing import RecursiveMedianSieve\n", "\n", - "t = SIVSeriesTransformer()\n", + "t = RecursiveMedianSieve()\n", "plot_transformation(t)" ] }, @@ -223,9 +225,9 @@ } ], "source": [ - "from aeon.transformations.series import SGSeriesTransformer\n", + "from aeon.transformations.series.smoothing import SavitzkyGolayFilter\n", "\n", - "t = SGSeriesTransformer()\n", + "t = SavitzkyGolayFilter()\n", "plot_transformation(t)" ] }, @@ -255,9 +257,9 @@ } ], "source": [ - "from aeon.transformations.series._moving_average import MovingAverageSeriesTransformer\n", + "from aeon.transformations.series.smoothing import MovingAverage\n", "\n", - "t = MovingAverageSeriesTransformer()\n", + "t = MovingAverage()\n", "plot_transformation(t)" ] }, @@ -287,19 +289,11 @@ } ], "source": [ - "from aeon.transformations.series._exp_smoothing import ExpSmoothingSeriesTransformer\n", + "from aeon.transformations.series.smoothing import ExponentialSmoothing\n", "\n", - "t = ExpSmoothingSeriesTransformer()\n", + "t = ExponentialSmoothing()\n", "plot_transformation(t)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "85639aea-d14f-4b41-9b1f-b72444ecc7ae", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 5f7346d337bf791923d7ac22c6c325426edc3ae8 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 8 May 2025 12:09:21 +0100 Subject: [PATCH 15/16] fixes --- .readthedocs.yml | 2 +- pyproject.toml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 9005cc0fc1..9abd56f587 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -13,7 +13,7 @@ python: build: os: ubuntu-24.04 tools: - python: "3.10" + python: "3.11" sphinx: configuration: docs/conf.py diff --git a/pyproject.toml b/pyproject.toml index 06408e7b9d..440465b0e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,6 +120,8 @@ docs = [ "furo", "tabulate", "matplotlib", + + "snowballstemmer<3" # temporary to fix doc build bug ] [project.urls] From ebc961dd892100076dd81746b8d6caf9cb56b4f3 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 8 May 2025 17:42:58 +0100 Subject: [PATCH 16/16] no longer required --- pyproject.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 440465b0e4..06408e7b9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,8 +120,6 @@ docs = [ "furo", "tabulate", "matplotlib", - - "snowballstemmer<3" # temporary to fix doc build bug ] [project.urls]