Source code for dtaianomaly.anomaly_detection.PrincipalComponentAnalysis

from pyod.models.pca import PCA

from dtaianomaly.anomaly_detection.BaseDetector import Supervision
from dtaianomaly.anomaly_detection.PyODAnomalyDetector import PyODAnomalyDetector


[docs] class PrincipalComponentAnalysis(PyODAnomalyDetector): """ Anomaly detector based on the Principal Component Analysis (PCA) :cite:`aggarwal2017linear`. PCA maps the data to a lower dimensional space through linear projections. The goal of these projections is to capture the most important information of the samples. This important information is related to the type of behaviors that occur frequently in the data. Thus, anomalies are detected by measuring the deviation of the samples in the lower dimensional space. Parameters ---------- window_size: int or str The window size to use for extracting sliding windows from the time series. This value will be passed to :py:meth:`~dtaianomaly.anomaly_detection.compute_window_size`. stride: int, default=1 The stride, i.e., the step size for extracting sliding windows from the time series. n_components: int or float, default=None The number of components to keep. - if ``int``: Use the specified number of components. - if ``float``: Use the percentage of components. - if ``None``: Use all components. **kwargs: Arguments to be passed to the PyOD PCA. Attributes ---------- window_size_: int The effectively used window size for this anomaly detector pyod_detector_ : PCA A PCA-detector of PyOD Examples -------- >>> from dtaianomaly.anomaly_detection import PrincipalComponentAnalysis >>> from dtaianomaly.data import demonstration_time_series >>> x, y = demonstration_time_series() >>> pca = PrincipalComponentAnalysis(10).fit(x) >>> pca.decision_function(x) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE array([16286.63724327, 15951.05917741, 15613.5739773 , ..., 18596.5273311 , 18496.96613747, 18483.47985554]...) Notes ----- PCA inherets from :py:class:`~dtaianomaly.anomaly_detection.PyodAnomalyDetector`. """ n_components: int | float | None def __init__( self, window_size: int | str, stride: int = 1, n_components: int | float = None, **kwargs, ): if n_components is not None: if not isinstance(n_components, (int, float)) or isinstance( n_components, bool ): raise TypeError("`n_components` should be integer or 'auto'") if isinstance(n_components, int) and n_components < 1: raise ValueError("`n_components` should be strictly positive") if isinstance(n_components, float) and ( n_components <= 0 or n_components > 1 ): raise ValueError("`n_components` between 0 and 1") self.n_components = n_components super().__init__(window_size, stride, **kwargs) def _initialize_detector(self, **kwargs) -> PCA: return PCA(n_components=self.n_components, **kwargs) def _supervision(self): return Supervision.SEMI_SUPERVISED