Source code for dtaianomaly.data.data

import abc
import os
import numpy as np
from pathlib import Path
from typing import NamedTuple, List, Type, Union

from dtaianomaly.PrettyPrintable import PrettyPrintable


[docs] class DataSet(NamedTuple): """ A class for time series anomaly detection data sets. These consist of the raw data itself and the ground truth labels. Parameters ---------- x: array-like of shape (n_samples, n_features) The time series. y: array-like of shape (n_samples) The ground truth anomaly labels. """ x: np.ndarray y: np.ndarray
[docs] class LazyDataLoader(PrettyPrintable): """ A lazy dataloader for anomaly detection workflows This is a data loading utility to point towards a specific data set (with `path`) and to load it at a later point in time during execution of a workflow. This way we limit memory usage and allow for virtually unlimited scaling of the number of data sets in a workflow. Parameters ---------- path: str Path to the relevant data set. Raises ------ FileNotFoundError If the given path does not point to an existing file or directory. """ path: str def __init__(self, path: Union[str, Path]) -> None: if not (Path(path).is_file() or Path(path).is_dir()): raise FileNotFoundError(f'No such file or directory: {path}') self.path = str(path)
[docs] @abc.abstractmethod def load(self) -> DataSet: """ Load the dataset. Returns ------- data_set: DataSet The loaded dataset. """
[docs] def from_directory(directory: Union[str, Path], dataloader: Type[LazyDataLoader]) -> List[LazyDataLoader]: """ Construct a `LazyDataLoader` instance for every file in the given `directory` Parameters ---------- directory: str or Path Path to the directory in question dataloader: LazyDataLoader **object** Class object of the data loader, called for constructing each data loader instance Returns ------- data_loaders: List[LazyDataLoader] A list of the initialized data loaders, one for each data set in the given directory. Raises ------ FileNotFoundError If `directory` cannot be found """ if not Path(directory).is_dir(): raise FileNotFoundError(f'No such directory: {directory}') all_files = [ os.path.join(directory, f) for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f)) or os.path.isdir(os.path.join(directory, f)) ] return [dataloader(file) for file in all_files]