Source code for dtaianomaly.thresholding._TopNThreshold

import numpy as np

from dtaianomaly.thresholding._Thresholding import Thresholding
from dtaianomaly.type_validation import IntegerAttribute

__all__ = ["TopNThreshold"]


[docs] class TopNThreshold(Thresholding): """ Thresholding based on a top N strategy. The top `n` anomaly scores are considered anomalous (1), Other (lower) scores are considered normal (0). Parameters ---------- n : int The number of instances that should be flagged as an anomaly. Examples -------- >>> from dtaianomaly.thresholding import TopNThreshold >>> thresholder = TopNThreshold(3) >>> thresholder.threshold([0.1, 0.2, 0.3, 0.6, 0.8, 0.5, 0.3, 0.3]) array([0, 0, 0, 1, 1, 1, 0, 0]) """ n: int attribute_validation = {"n": IntegerAttribute(minimum=0)} def __init__(self, n: int): self.n = n def _threshold(self, scores: np.ndarray): """ Apply the top-N thresholding. Parameters ---------- scores : array-like (n_samples) Raw anomaly scores Returns ------- anomaly_labels : array-like of shape (n_samples) Integer array of 1s and 0s, representing anomalous samples and normal samples respectively Raises ------ ValueError If the number of given anomaly scores is smaller than :py:attr:`~dtaianomaly.thresholding.TopNThreshold.n`. """ if self.n > scores.shape[0]: raise ValueError( f"There are only {scores.shape[0]} anomaly scores given, but {self.n} observations should be anomalous!" ) cutoff = np.partition(scores, -self.n)[-self.n] return np.asarray(cutoff <= scores, dtype=np.int8)