Source code for dtaianomaly.preprocessing.Preprocessor

import abc
from typing import Optional, Tuple

import numpy as np

from dtaianomaly import utils


def check_preprocessing_inputs(X: np.ndarray, y: Optional[np.ndarray] = None) -> None:
    """
    Check if the given `X` and `y` arrays are valid.

    Parameters
    ----------
    X: array-like of shape (n_samples, n_attributes)
        Raw time series
    y: array-like, default=None
        Ground-truth information

    Raises
    ------
    ValueError
        If inputs are not valid numeric arrays
    ValueError
        If inputs have a different size in the first dimension (n_samples)
    """
    if not utils.is_valid_array_like(X):
        raise ValueError("`X` is not a valid array")
    if y is not None and not utils.is_valid_array_like(y):
        raise ValueError("`y` is not  valid array")
    if y is not None:
        X = np.asarray(X)
        y = np.asarray(y)
        if X.shape[0] != y.shape[0]:
            raise ValueError("`X` and `y` have a different number of samples")


[docs] class Preprocessor(utils.PrettyPrintable): """ Base preprocessor class. """
[docs] def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "Preprocessor": """ First checks the inputs with :py:meth:`~dtaianomaly.preprocessing.Preprocessor.check_preprocessing_inputs`, and then fits this preprocessor. Parameters ---------- X: array-like of shape (n_samples, n_attributes) Raw time series y: array-like, default=None Ground-truth information Returns ------- self: Preprocessor Returns the fitted instance self. """ check_preprocessing_inputs(X, y) return self._fit(np.asarray(X), y if y is None else np.asarray(y))
@abc.abstractmethod def _fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "Preprocessor": """Effectively fit this preprocessor, without checking the inputs."""
[docs] def transform( self, X: np.ndarray, y: Optional[np.ndarray] = None ) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ First checks the inputs with :py:meth:`~dtaianomaly.preprocessing.Preprocessor.check_preprocessing_inputs`, and then transforms (i.e., preprocesses) the given time series. Parameters ---------- X: array-like of shape (n_samples, n_attributes) Raw time series y: array-like of shape (n_samples), default=None Ground-truth information Returns ------- X_transformed: np.ndarray of shape (n_samples, n_attributes) Preprocessed raw time series y_transformed: np.ndarray of shape (n_samples) The transformed ground truth. If no ground truth was provided (`y=None`), then None will be returned as well. """ check_preprocessing_inputs(X, y) return self._transform(np.asarray(X), y if y is None else np.asarray(y))
@abc.abstractmethod def _transform( self, X: np.ndarray, y: Optional[np.ndarray] = None ) -> Tuple[np.ndarray, Optional[np.ndarray]]: """Effectively transform the given data, without checking the inputs."""
[docs] def fit_transform( self, X: np.ndarray, y: Optional[np.ndarray] = None ) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ First checks the inputs with :py:meth:`~dtaianomaly.preprocessing.Preprocessor.check_preprocessing_inputs`, and then chains the fit and transform methods on the given data, i.e., first fit this preprocessor on the given `X` and `y`, after which the given `X` and `y` will be transformed. Parameters ---------- X: array-like of shape (n_samples, n_attributes) Raw time series y: array-like of shape (n_samples), default=None Ground-truth information Returns ------- X_transformed: np.ndarray of shape (n_samples, n_attributes) Preprocessed raw time series y_transformed: np.ndarray of shape (n_samples) The transformed ground truth. If no ground truth was provided (`y=None`), then None will be returned as well. """ return self.fit(X, y).transform(X, y)
[docs] class Identity(Preprocessor): """ Identity preprocessor. A dummy preprocessor which does not do any processing at all. """ def _fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "Preprocessor": return self def _transform( self, X: np.ndarray, y: Optional[np.ndarray] = None ) -> Tuple[np.ndarray, Optional[np.ndarray]]: return X, y