Source code for dtaianomaly.preprocessing._Preprocessor

import abc

import numpy as np

from dtaianomaly.type_validation import AttributeValidationMixin
from dtaianomaly.utils import (
    CheckIsFittedMixin,
    PrintConstructionCallMixin,
    is_valid_array_like,
)

__all__ = ["Preprocessor"]


def _check_preprocessing_inputs(X: np.ndarray, y: np.ndarray = None) -> None:
    """
    Check the inputs for preprocessing.

    Check if the given `X` and `y` arrays are valid, i.e., if they
    are valid array-likes and have the same length.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_attributes)
        Raw time series
    y : array-like, default=None
        Ground-truth information

    Raises
    ------
    ValueError
        If inputs are not valid numeric arrays
    ValueError
        If inputs have a different size in the first dimension (n_samples)
    """
    if not is_valid_array_like(X):
        raise ValueError("`X` is not a valid array")
    if y is not None and not is_valid_array_like(y):
        raise ValueError("`y` is not  valid array")
    if y is not None:
        X = np.asarray(X)
        y = np.asarray(y)
        if X.shape[0] != y.shape[0]:
            raise ValueError("`X` and `y` have a different number of samples")


[docs] class Preprocessor( PrintConstructionCallMixin, AttributeValidationMixin, CheckIsFittedMixin ): """ Base preprocessor class. Class to preprocess data. This is useful for applying transformations on the data such that anomalies are more clearly visible or such that the data has a standard form (e.g., scaling). """
[docs] def fit(self, X: np.ndarray, y: np.ndarray = None) -> "Preprocessor": """ Fit this preprocessor. First checks the inputs with :py:meth:`~dtaianomaly.preprocessing.Preprocessor.check_preprocessing_inputs`, and then fits this preprocessor. Parameters ---------- X : array-like of shape (n_samples, n_attributes) Raw time series. y : array-like, default=None Ground-truth information. Returns ------- Preprocessor Returns the fitted instance self. """ _check_preprocessing_inputs(X, y) return self._fit(np.asarray(X), y if y is None else np.asarray(y))
@abc.abstractmethod def _fit(self, X: np.ndarray, y: np.ndarray = None) -> "Preprocessor": """Effectively fit this preprocessor, without checking the inputs."""
[docs] def transform( self, X: np.ndarray, y: np.ndarray = None ) -> (np.ndarray, np.ndarray | None): """ Transform the given time series. First checks the inputs with :py:meth:`~dtaianomaly.preprocessing.Preprocessor.check_preprocessing_inputs`, and then transforms (i.e., preprocesses) the given time series. Parameters ---------- X : array-like of shape (n_samples, n_attributes) Raw time series. y : array-like of shape (n_samples), default=None Ground-truth information. Returns ------- X_transformed : np.ndarray of shape (n_samples, n_attributes) Preprocessed raw time series. y_transformed : np.ndarray of shape (n_samples) The transformed ground truth. If no ground truth was provided (`y=None`), then None will be returned as well. """ self.check_is_fitted() _check_preprocessing_inputs(X, y) return self._transform(np.asarray(X), y if y is None else np.asarray(y))
@abc.abstractmethod def _transform( self, X: np.ndarray, y: np.ndarray = None ) -> (np.ndarray, np.ndarray | None): """Effectively transform the given data, without checking the inputs."""
[docs] def fit_transform( self, X: np.ndarray, y: np.ndarray = None ) -> (np.ndarray, np.ndarray | None): """ Fit this preprocessor and transform the given time series. First checks the inputs with :py:meth:`~dtaianomaly.preprocessing.Preprocessor.check_preprocessing_inputs`, and then chains the fit and transform methods on the given data, i.e., first fit this preprocessor on the given `X` and `y`, after which the given `X` and `y` will be transformed. Parameters ---------- X : array-like of shape (n_samples, n_attributes) Raw time series. y : array-like of shape (n_samples), default=None Ground-truth information. Returns ------- X_transformed : np.ndarray of shape (n_samples, n_attributes) Preprocessed raw time series. y_transformed : np.ndarray of shape (n_samples) The transformed ground truth. If no ground truth was provided (`y=None`), then None will be returned as well. """ return self.fit(X, y).transform(X, y)
def piped_str(self) -> str: return self.__str__()