Source code for dtaianomaly.data.LazyDataLoader

import abc

from dtaianomaly.data.DataSet import DataSet
from dtaianomaly.PrettyPrintable import PrettyPrintable


[docs] class LazyDataLoader(PrettyPrintable): """ A lazy dataloader for anomaly detection workflows This is a data loading utility to point towards a specific data set and to load it at a later point in time during execution of a workflow. This way we limit memory usage and allow for virtually unlimited scaling of the number of data sets in a workflow. Parameters ---------- do_caching: bool, default=False Whether to cache the loaded data or not Attributes ---------- cache_ : DataSet Cached version of the loaded data set. Only available if ``do_caching==True`` and the data has been loaded before. """ do_caching: bool cache_: DataSet def __init__(self, do_caching: bool = False): self.do_caching = do_caching
[docs] def load(self) -> DataSet: """ Load the dataset. If ``do_caching==True``, the loaded will be saved in the cache if no cache is available yet, and the cached data will be returned. Returns ------- data_set: DataSet The loaded dataset. """ if self.do_caching: if not hasattr(self, "cache_"): self.cache_ = self._load() return self.cache_ else: return self._load()
@abc.abstractmethod def _load(self) -> DataSet: """Abstract method to effectively load the data."""