Source code for dtaianomaly.evaluation.simple_proba_metrics

import numpy as np
from sklearn import metrics

from dtaianomaly.evaluation.metrics import ProbaMetric


[docs] class AreaUnderROC(ProbaMetric): """ Computes the Area Under the Receiver Operating Characteristic Curve (AUC-ROC) score. The AUC-ROC is a widely used metric to evaluate the performance of a binary classifier, especially in anomaly detection. The ROC-curve plots the true positive rate (recall) against the false positive rate across different classification thresholds. The AUC-ROC represents the likelihood that the model ranks a randomly chosen anomaly higher than a randomly chosen normal instance. AUC-ROC provides a single number summarizing the model's ability to distinguish between normal and anomalous instances. A value of 1.0 indicates perfect discrimination, while 0.5 implies the model performs no better than random guessing. It is especially useful when anomalies are rare, as it considers the trade-off between detecting true anomalies (high recall) and minimizing false positives. """ def __init__(self) -> None: super().__init__() def _compute(self, y_true: np.ndarray, y_pred: np.ndarray, **kwargs) -> float: return float(metrics.roc_auc_score(y_true=y_true, y_score=y_pred))
[docs] class AreaUnderPR(ProbaMetric): """ Computes the Area Under the Precision-Recall Curve (AUC-PR) score. The AUC-PR is a performance metric that is especially useful for evaluating models in imbalanced datasets, such as anomaly detection, where the number of normal instances vastly outnumbers the anomalies. The Precision-Recall curve plots precision against recall at various thresholds, providing a detailed view of the trade-off between detecting true anomalies (recall) and minimizing false alarms (precision). AUC-PR summarizes the curve into a single value, representing the overall ability of the model to identify anomalies while keeping false positives in check. A higher AUC-PR value indicates better performance, meaning the model is effective at detecting true anomalies with fewer false positives. """ def __init__(self) -> None: super().__init__() def _compute(self, y_true: np.ndarray, y_pred: np.ndarray, **kwargs) -> float: precision, recall, _ = metrics.precision_recall_curve(y_true, y_pred) return float(metrics.auc(recall, precision))