Source code for dtaianomaly.thresholding._ContaminationRateThreshold

import numpy as np

from dtaianomaly.thresholding._Thresholding import Thresholding
from dtaianomaly.type_validation import FloatAttribute

__all__ = ["ContaminationRateThreshold"]


[docs] class ContaminationRateThreshold(Thresholding): """ Thresholding based on a contamination rate. The top `contamination_rate` proportion of anomaly scores are considered anomalous (1), Other (lower) scores are considered normal (0). Parameters ---------- contamination_rate : float The contamination_rate, i.e., the percentage of instances that are anomalous. Examples -------- >>> from dtaianomaly.thresholding import ContaminationRateThreshold >>> thresholder = ContaminationRateThreshold(0.25) >>> thresholder.threshold([0.1, 0.2, 0.3, 0.6, 0.8, 0.5, 0.3, 0.3]) array([0, 0, 0, 1, 1, 0, 0, 0]) """ contamination_rate: float attribute_validation = { "contamination_rate": FloatAttribute(minimum=0.0, maximum=1.0) } def __init__(self, contamination_rate: float): # if not isinstance(contamination_rate, float): # raise TypeError("Rate should be a float") # if contamination_rate < 0.0 or 1.0 < contamination_rate: # raise ValueError( # f"Rate should be between 0 and 1. Received {contamination_rate}" # ) self.contamination_rate = contamination_rate def _threshold(self, scores: np.ndarray): """ Apply the contamination-rate thresholding. Parameters ---------- scores: array-like (n_samples) Raw anomaly scores Returns ------- anomaly_labels: array-like of shape (n_samples) Integer array of 1s and 0s, representing anomalous samples and normal samples respectively """ cutoff = np.quantile(scores, 1.0 - self.contamination_rate) return np.asarray(cutoff <= scores)