Source code for dtaianomaly.data.UCRLoader

import numpy as np

from dtaianomaly.data.DataSet import DataSet
from dtaianomaly.data.PathDataLoader import PathDataLoader


[docs] class UCRLoader(PathDataLoader): """ Lazy dataloader for the UCR suite of anomaly detection data sets :cite:`wu2023current`. This implementation expects the file names to contain the start and stop time stamps of the single anomaly in the time series as: ``*_<train-test-split>_<start>_<stop>.txt``. """ def _load(self) -> DataSet: # Extract the meta-information from the name of the file [*_, train_test_split, start_anomaly, end_anomaly] = self.path.rstrip( ".txt" ).split("_") train_test_split = int(train_test_split) start_anomaly = int(start_anomaly) end_anomaly = int(end_anomaly) # Load time series X = np.loadtxt(self.path) X_train = X[:train_test_split] X_test = X[train_test_split:] # To ensure the file extensions gets ignored y = np.zeros(shape=X.shape[0], dtype=int) y[start_anomaly:end_anomaly] = 1 y_test = y[train_test_split:] # Return a DataSet object return DataSet(X_test=X_test, y_test=y_test, X_train=X_train)