Source code for dtaianomaly.preprocessing._PiecewiseAggregateApproximation

import numpy as np

from dtaianomaly.preprocessing._Preprocessor import Preprocessor
from dtaianomaly.type_validation import IntegerAttribute

__all__ = ["PiecewiseAggregateApproximation"]



[docs]
class PiecewiseAggregateApproximation(Preprocessor):
    """
    Perform piecewise aggregate approximation :cite:`keogh2001dimensionality`.

    Piecewise Aggregate Approximation (PAA) is a
    form of dimensionality reduction of time series, originally proposed for
    fast indexing of time series in large databases. Given a value for :math:`n`,
    PAA divides the time series in :math:`n` equi-sized frames. Next, each frame
    is replaced by its mean value. Specifically, for a time series :math:`x` of
    length :math:`N`, position :math:`i` in the transformed time series :math:`y`
    equals:

    .. math::

       y_i = \\frac{n}{N} \\displaystyle\\sum_{j=N/N(i-1)+1}^{(n/N)i} x_j

    For multivariate time series, the dimension of each attribute is reduced
    independently, but the same frames are used.

    Parameters
    ----------
    n : int
        The number of equi-sized frames to generate.

    Examples
    --------
    >>> from dtaianomaly.preprocessing import PiecewiseAggregateApproximation
    >>> from dtaianomaly.data import demonstration_time_series
    >>> X, y = demonstration_time_series()
    >>> preprocessor = PiecewiseAggregateApproximation(n=8)
    >>> X_, y_ = preprocessor.fit_transform(X, y)
    """

    n: int
    attribute_validation = {"n": IntegerAttribute(minimum=1)}

    def __init__(self, n: int):
        self.n = n

    def _fit(self, X: np.ndarray, y: np.ndarray = None) -> "Preprocessor":
        return self

    def _transform(
        self, X: np.ndarray, y: np.ndarray = None
    ) -> (np.ndarray, np.ndarray | None):
        if X.shape[0] <= self.n:
            return X, y

        X_ = paa(X, self.n)
        if y is None:
            return X_, y
        else:
            return X_, np.where(paa(y, self.n) < 0.5, 0, 1)



def paa(x: np.ndarray, n: int) -> np.ndarray:
    indices = np.linspace(0, x.shape[0], n + 1, dtype=int, endpoint=True)
    return np.array([np.mean(x[s:e], axis=0) for s, e in zip(indices, indices[1:])])