Source code for dtaianomaly.preprocessing.Preprocessor


import abc
import numpy as np
from typing import Tuple, Optional

from dtaianomaly import utils
from dtaianomaly.PrettyPrintable import PrettyPrintable


def check_preprocessing_inputs(X: np.ndarray, y: Optional[np.ndarray] = None) -> None:
    """
    Check if the given `X` and `y` arrays are valid.

    Parameters
    ----------
    X: array-like of shape (n_samples, n_attributes)
        Raw time series
    y: array-like, default=None
        Ground-truth information

    Raises
    ------
    ValueError
        If inputs are not valid numeric arrays
    ValueError
        If inputs have a different size in the first dimension (n_samples)
    """
    if not utils.is_valid_array_like(X):
        raise ValueError('`X` is not a valid array')
    if y is not None and not utils.is_valid_array_like(y):
        raise ValueError('`y` is not  valid array')
    if y is not None and X.shape[0] != y.shape[0]:
        raise ValueError('`X` and `y` have a different number of samples')


[docs] class Preprocessor(PrettyPrintable): """ Base preprocessor class. """
[docs] def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> 'Preprocessor': """ First checks the inputs with :py:meth:`~dtaianomaly.preprocessing.Preprocessor.check_preprocessing_inputs`, and then fits this preprocessor. Parameters ---------- X: array-like of shape (n_samples, n_attributes) Raw time series y: array-like, default=None Ground-truth information Returns ------- self: Preprocessor Returns the fitted instance self. """ check_preprocessing_inputs(X, y) return self._fit(X, y)
@abc.abstractmethod def _fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> 'Preprocessor': """ Effectively fit this preprocessor, without checking the inputs. """
[docs] def transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ First checks the inputs with :py:meth:`~dtaianomaly.preprocessing.Preprocessor.check_preprocessing_inputs`, and then transforms (i.e., preprocesses) the given time series. Parameters ---------- X: array-like of shape (n_samples, n_attributes) Raw time series y: array-like of shape (n_samples), default=None Ground-truth information Returns ------- X_transformed: np.ndarray of shape (n_samples, n_attributes) Preprocessed raw time series y_transformed: np.ndarray of shape (n_samples) The transformed ground truth. If no ground truth was provided (`y=None`), then None will be returned as well. """ check_preprocessing_inputs(X, y) return self._transform(X, y)
@abc.abstractmethod def _transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ Effectively transform the given data, without checking the inputs. """
[docs] def fit_transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ First checks the inputs with :py:meth:`~dtaianomaly.preprocessing.Preprocessor.check_preprocessing_inputs`, and then chains the fit and transform methods on the given data, i.e., first fit this preprocessor on the given `X` and `y`, after which the given `X` and `y` will be transformed. Parameters ---------- X: array-like of shape (n_samples, n_attributes) Raw time series y: array-like of shape (n_samples), default=None Ground-truth information Returns ------- X_transformed: np.ndarray of shape (n_samples, n_attributes) Preprocessed raw time series y_transformed: np.ndarray of shape (n_samples) The transformed ground truth. If no ground truth was provided (`y=None`), then None will be returned as well. """ check_preprocessing_inputs(X, y) return self._fit(X, y)._transform(X, y)
[docs] class Identity(Preprocessor): """ Identity preprocessor. A dummy preprocessor which does not do any processing at all. """ def _fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> 'Preprocessor': return self def _transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]: return X, y