Source code for dtaianomaly.anomaly_detection.HistogramBasedOutlierScore

from typing import Literal

from pyod.models.hbos import HBOS

from dtaianomaly.anomaly_detection.BaseDetector import Supervision
from dtaianomaly.anomaly_detection.PyODAnomalyDetector import PyODAnomalyDetector


[docs] class HistogramBasedOutlierScore(PyODAnomalyDetector): """ Anomaly detector based on the Histogram Based Outlier Score (HBOS) algorithm :cite:`goldstein2012histogram`. Histogram Based Outlier Score (HBOS) constructs for each feature a univariate histogram. Bins with a small height (for static bin widths) or wider bins (for dynamic bin widths) correspond to sparse regions of the feature space. Thus, values falling in these bins lay in sparse regions of the feature space and are considered more anomalous. In this implementation, it is possible to set a window size to take the past observations into account. However, HBOS assumes feature independence. Therefore, for a time series with :math:`D` attributes and a window size :math:`w`, HBOS constructs :math:`D \\times w` independent histograms, from which the anomaly score is computed. Parameters ---------- window_size: int or str The window size to use for extracting sliding windows from the time series. This value will be passed to :py:meth:`~dtaianomaly.anomaly_detection.compute_window_size`. stride: int, default=1 The stride, i.e., the step size for extracting sliding windows from the time series. n_bins: int or 'auto', default=10 The number of bins for each feature. If ``'auto'``, the birge-rozenblac method is used for automatically selecting the number of bins for each feature. alpha: float in [0, 1], default=0.1 The regularizer for preventing overlfow. tol: float in [0, 1], default=0.5 Parameter defining the flexibility for dealing with samples that fall outside the bins. **kwargs Arguments to be passed to the PyOD histogram based outlier score. Attributes ---------- window_size_: int The effectively used window size for this anomaly detector pyod_detector_ : HBOS An HBOS detector of PyOD Examples -------- >>> from dtaianomaly.anomaly_detection import HistogramBasedOutlierScore >>> from dtaianomaly.data import demonstration_time_series >>> x, y = demonstration_time_series() >>> hbos = HistogramBasedOutlierScore(1).fit(x) >>> hbos.decision_function(x) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE array([0.51808795, 0.51808795, 0.51808795, ..., 0.48347552, 0.48347552, 0.48347552]...) Notes ----- The HBOS detector inherets from :py:class:`~dtaianomaly.anomaly_detection.PyODAnomalyDetector`. """ n_bins: int | Literal["auto"] alpha: float tol: float def __init__( self, window_size: int | str, stride: int = 1, n_bins: int | Literal["auto"] = 10, alpha: float = 0.1, tol: float = 0.5, **kwargs, ): if isinstance(n_bins, str): if n_bins != "auto": raise ValueError("If `n_bins` is a string, it should equal 'auto'.") else: if not isinstance(n_bins, int) or isinstance(n_bins, bool): raise TypeError("`n_bins` should be integer or 'auto'") if n_bins <= 1: raise ValueError("`n_bins` should be at least 2") if not isinstance(alpha, (float, int)) or isinstance(alpha, bool): raise TypeError("`alpha` should be numeric") if alpha <= 0 or alpha >= 1: raise ValueError("`alpha` must be in [0, 1]") if not isinstance(tol, (float, int)) or isinstance(tol, bool): raise TypeError("`tol` should be numeric") if tol <= 0 or tol >= 1: raise ValueError("`tol` must be in [0, 1]") self.n_bins = n_bins self.alpha = alpha self.tol = tol super().__init__(window_size, stride, **kwargs) def _initialize_detector(self, **kwargs) -> HBOS: return HBOS(n_bins=self.n_bins, alpha=self.alpha, tol=self.tol, **kwargs) def _supervision(self): return Supervision.UNSUPERVISED