Source code for dtaianomaly.anomaly_detection._IsolationForest

from typing import Literal

from pyod.models.iforest import IForest

from dtaianomaly.anomaly_detection._BaseDetector import Supervision
from dtaianomaly.anomaly_detection._BasePyODAnomalyDetector import (
    BasePyODAnomalyDetector,
)
from dtaianomaly.type_validation import (
    FloatAttribute,
    IntegerAttribute,
    LiteralAttribute,
)
from dtaianomaly.windowing import WINDOW_SIZE_TYPE

__all__ = ["IsolationForest"]


[docs] class IsolationForest(BasePyODAnomalyDetector): """ Anomaly detector based on the Isolation Forest algorithm :cite:`liu2008isolation`. The isolation forest generates random binary trees to split the data. If an instance requires fewer splits to isolate it from the other data, it is nearer to the root of the tree, and consequently receives a higher anomaly score. Parameters ---------- window_size : int or str The window size to use for extracting sliding windows from the time series. This value will be passed to :py:meth:`~dtaianomaly.anomaly_detection.compute_window_size`. stride : int, default=1 The stride, i.e., the step size for extracting sliding windows from the time series. n_estimators : int, default=100 The number of base trees in the ensemble. max_samples : int or float, default='auto' The number of samples to draw for training each base estimator: - if ``int``: Draw at most ``max_samples`` samples. - if ``float``: Draw at most ``max_samples`` percentage of the samples. - if ``'auto'``: Set ``max_samples=min(256, n_windows)``. max_features : int or float, default=1.0 The number of features to use for training each base estimator: - if ``int``: Use at most ``max_features`` features. - if ``float``: Use at most ``max_features`` percentage of the features. **kwargs Arguments to be passed to the PyOD isolation forest. Attributes ---------- window_size_ : int The effectively used window size for this anomaly detector pyod_detector_ : IForest An Isolation Forest detector of PyOD Examples -------- >>> from dtaianomaly.anomaly_detection import IsolationForest >>> from dtaianomaly.data import demonstration_time_series >>> x, y = demonstration_time_series() >>> isolation_forest = IsolationForest(10).fit(x) >>> isolation_forest.decision_function(x) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE array([-0.02301142, -0.01266304, -0.00786237, ..., -0.04561172, -0.0420979 , -0.04414417]...) """ n_estimators: int max_samples: float | int | Literal["auto"] max_features: int | float attribute_validation = { "n_estimators": IntegerAttribute(minimum=1), "max_samples": IntegerAttribute(minimum=1) | FloatAttribute(0.0, 1.0, inclusive_minimum=False) | LiteralAttribute("auto"), "max_features": IntegerAttribute(minimum=1) | FloatAttribute(0.0, 1.0, inclusive_minimum=False), } def __init__( self, window_size: WINDOW_SIZE_TYPE, stride: int = 1, n_estimators: int = 100, max_samples: float | int = "auto", max_features: int | float = 1.0, **kwargs, ): self.n_estimators = n_estimators self.max_samples = max_samples self.max_features = max_features super().__init__(window_size, stride, **kwargs) def _initialize_detector(self, **kwargs) -> IForest: return IForest( n_estimators=self.n_estimators, max_samples=self.max_samples, max_features=self.max_features, **kwargs, ) def _supervision(self): return Supervision.UNSUPERVISED