Source code for dtaianomaly.anomaly_detection.KNearestNeighbors

from pyod.models.knn import KNN

from dtaianomaly.anomaly_detection.BaseDetector import Supervision
from dtaianomaly.anomaly_detection.PyODAnomalyDetector import PyODAnomalyDetector



[docs]
class KNearestNeighbors(PyODAnomalyDetector):
    """
    Anomaly detector based on K-nearest neighbors [ramaswamy2000efficient]_.

    Given some distance metric :math:`dist`, the :math:`K`-nearest neighbor of an
    instance :math:`x` is the sample :math:`y` such that there exist exactly :math:`K-1`
    other samples :math:`z` with :math:`dist(x, z) < dist(x, y)`. The :math:`K`-nearest neighbor
    distance of :math:`x` equals the distance to this :math:`K`the nearest neighbor.
    The larger this :math:`K`-nearest neighbor distance of a sample is, the further
    away it is from the other instances. :math:`K`-nearest neighbor uses this distance
    as an anomaly score, and thus detects distance-based anomalies.

    Notes
    -----
    The K-nearest neighbors inherets from :py:class:`~dtaianomaly.anomaly_detection.PyodAnomalyDetector`.

    Parameters
    ----------
    window_size: int or str
        The window size to use for extracting sliding windows from the time series. This
        value will be passed to :py:meth:`~dtaianomaly.anomaly_detection.compute_window_size`.
    stride: int, default=1
        The stride, i.e., the step size for extracting sliding windows from the time series.
    **kwargs
        Arguments to be passed to the PyOD isolation forest.

    Attributes
    ----------
    window_size_: int
        The effectively used window size for this anomaly detector
    pyod_detector_ : KNN
        A K-nearest neighbors detector of PyOD

    Examples
    --------
    >>> from dtaianomaly.anomaly_detection import KNearestNeighbors
    >>> from dtaianomaly.data import demonstration_time_series
    >>> x, y = demonstration_time_series()
    >>> knn = KNearestNeighbors(10).fit(x)
    >>> knn.decision_function(x)
    array([0.2527578 , 0.26430228, 0.2728953 , ..., 0.26269151, 0.26798469,
           0.26139759])

    References
    ----------
    .. [ramaswamy2000efficient] Ramaswamy, Sridhar, Rajeev Rastogi, and Kyuseok Shim.
       "Efficient algorithms for mining outliers from large data sets." Proceedings
       of the 2000 ACM SIGMOD international conference on Management of data. 2000,
       doi: `10.1145/342009.33543 <https://doi.org/10.1145/342009.33543>`_.
    """

    def _initialize_detector(self, **kwargs) -> KNN:
        return KNN(**kwargs)

    def _supervision(self):
        return Supervision.UNSUPERVISED