Source code for dtaianomaly.anomaly_detection.KernelPrincipalComponentAnalysis

from typing import Literal

from pyod.models.kpca import KPCA

from dtaianomaly.anomaly_detection.BaseDetector import Supervision
from dtaianomaly.anomaly_detection.PyODAnomalyDetector import PyODAnomalyDetector


[docs] class KernelPrincipalComponentAnalysis(PyODAnomalyDetector): """ Anomaly detector based on the Kernel Principal Component Analysis (KPCA) :cite:`hoffmann2007kernel`. Standard PCA maps the data to a lower dimensional space through linear projections. Deviations in this lower dimensional space are then considered to be anomalies. KPCA is a non-linear extension of PCA, which maps the data into a new kernel space, from which the principal components are learned. Parameters ---------- window_size: int or str The window size to use for extracting sliding windows from the time series. This value will be passed to :py:meth:`~dtaianomaly.anomaly_detection.compute_window_size`. stride: int, default=1 The stride, i.e., the step size for extracting sliding windows from the time series. n_components: int, default=None The number of components to use. If None, all non-zero components are kept. kernel: {'linear', 'poly', 'rbf', 'sigmoid', 'cosine'}, default='rbf' The kernel to use for PCA. **kwargs: Arguments to be passed to the PyOD PCA. Attributes ---------- window_size_: int The effectively used window size for this anomaly detector pyod_detector_ : KPCA A KPCA-detector of PyOD Examples -------- >>> from dtaianomaly.anomaly_detection import KernelPrincipalComponentAnalysis >>> from dtaianomaly.data import demonstration_time_series >>> x, y = demonstration_time_series() >>> kpca = KernelPrincipalComponentAnalysis(10, n_components=2).fit(x) >>> kpca.decision_function(x) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE array([0.03151377, 0.03697829, 0.04415575, ..., 0.03345565, 0.0330048 , 0.03089501]...) Notes ----- KPCA inherets from :py:class:`~dtaianomaly.anomaly_detection.PyodAnomalyDetector`. """ n_components: int | None kernel: Literal["linear", "poly", "rbf", "sigmoid", "cosine"] def __init__( self, window_size: int | str, stride: int = 1, n_components: int = None, kernel: Literal["linear", "poly", "rbf", "sigmoid", "cosine"] = "rbf", **kwargs, ): if n_components is not None: if not isinstance(n_components, int) or isinstance(n_components, bool): raise TypeError("`n_components` should be integer") if n_components < 1: raise ValueError("`n_components` should be strictly positive") if not isinstance(kernel, str): raise TypeError("`kernel` should be a string!") if kernel not in ["linear", "poly", "rbf", "sigmoid", "cosine"]: raise ValueError( "`kernel` should be one of {'linear', 'poly', 'rbf', 'sigmoid', 'cosine'}" ) self.n_components = n_components self.kernel = kernel super().__init__(window_size, stride, **kwargs) def _initialize_detector(self, **kwargs) -> KPCA: return KPCA(n_components=self.n_components, kernel=self.kernel, **kwargs) def _supervision(self): return Supervision.SEMI_SUPERVISED