Skip to content

TimeSeriesDataset

A time series dataset maps feature and time columns to a target column. Not like the TabularDataset a TimeSeries needs to contain one target and one time column, but can have empty features.

Create a time series dataset from a mapping of column names to their values.

Parameters:

Name Type Description Default
data Table | Mapping[str, Sequence[Any]]

The data.

required
target_name str

Name of the target column.

required
time_name str

Name of the time column.

required
extra_names list[str] | None

Names of the columns that are neither features nor target. If None, no extra columns are used, i.e. all but the target column are used as features.

None

Raises:

Type Description
ColumnLengthMismatchError

If columns have different lengths.

ValueError

If the target column is also an extra column.

ValueError

If no feature column remains.

Examples:

>>> from safeds.data.labeled.containers import TabularDataset
>>> dataset = TimeSeriesDataset(
...     {"id": [1, 2, 3], "feature": [4, 5, 6], "target": [1, 2, 3], "error":[0,0,1]},
...     target_name="target",
...     time_name = "id",
...     extra_names=["error"]
... )
Source code in src/safeds/data/labeled/containers/_time_series_dataset.py
class TimeSeriesDataset:
    """
    A time series dataset maps feature and time columns to a target column. Not like the TabularDataset a TimeSeries needs to contain one target and one time column, but can have empty features.

    Create a time series dataset from a mapping of column names to their values.

    Parameters
    ----------
    data:
        The data.
    target_name:
        Name of the target column.
    time_name:
        Name of the time column.
    extra_names:
        Names of the columns that are neither features nor target. If None, no extra columns are used, i.e. all but
        the target column are used as features.

    Raises
    ------
    ColumnLengthMismatchError
        If columns have different lengths.
    ValueError
        If the target column is also an extra column.
    ValueError
        If no feature column remains.

    Examples
    --------
    >>> from safeds.data.labeled.containers import TabularDataset
    >>> dataset = TimeSeriesDataset(
    ...     {"id": [1, 2, 3], "feature": [4, 5, 6], "target": [1, 2, 3], "error":[0,0,1]},
    ...     target_name="target",
    ...     time_name = "id",
    ...     extra_names=["error"]
    ... )
    """

    # ------------------------------------------------------------------------------------------------------------------
    # Dunder methods
    # ------------------------------------------------------------------------------------------------------------------
    def __init__(
        self,
        data: Table | Mapping[str, Sequence[Any]],
        target_name: str,
        time_name: str,
        extra_names: list[str] | None = None,
    ):
        # Preprocess inputs
        if not isinstance(data, Table):
            data = Table(data)
        if extra_names is None:
            extra_names = []

        # Derive feature names
        feature_names = [name for name in data.column_names if name not in {target_name, *extra_names, time_name}]

        # Validate inputs
        if time_name in extra_names:
            raise ValueError(f"Column '{time_name}' cannot be both time and extra.")
        if target_name in extra_names:
            raise ValueError(f"Column '{target_name}' cannot be both target and extra.")
        if len(feature_names) == 0:
            feature_names = []

        # Set attributes
        self._table: Table = data
        self._features: Table = data.keep_only_columns(feature_names)
        self._target: Column = data.get_column(target_name)
        self._time: Column = data.get_column(time_name)
        self._extras: Table = data.keep_only_columns(extra_names)

    def __eq__(self, other: object) -> bool:
        """
        Compare two time series datasets.

        Returns
        -------
        equals:
            'True' if features, time, target and extras are equal, 'False' otherwise.
        """
        if not isinstance(other, TimeSeriesDataset):
            return NotImplemented
        return (self is other) or (
            self.target == other.target
            and self.features == other.features
            and self.extras == other.extras
            and self.time == other.time
        )

    def __hash__(self) -> int:
        """
        Return a deterministic hash value for this time series dataset.

        Returns
        -------
        hash:
            The hash value.
        """
        return _structural_hash(self.target, self.features, self.extras, self.time)

    def __sizeof__(self) -> int:
        """
        Return the complete size of this object.

        Returns
        -------
        size:
            Size of this object in bytes.
        """
        return (
            sys.getsizeof(self._target)
            + sys.getsizeof(self._features)
            + sys.getsizeof(self.extras)
            + sys.getsizeof(self._time)
        )

    # ------------------------------------------------------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------------------------------------------------------

    @property
    def features(self) -> Table:
        """The feature columns of the time series dataset."""
        return self._features

    @property
    def target(self) -> Column:
        """The target column of the time series dataset."""
        return self._target

    @property
    def time(self) -> Column:
        """The time column of the time series dataset."""
        return self._time

    @property
    def extras(self) -> Table:
        """
        Additional columns of the time series dataset that are neither features, target nor time.

        These can be used to store additional information about instances, such as IDs.
        """
        return self._extras

    # ------------------------------------------------------------------------------------------------------------------
    # Conversion
    # ------------------------------------------------------------------------------------------------------------------

    def to_table(self) -> Table:
        """
        Return a new `Table` containing the feature columns, the target column, the time column and the extra columns.

        The original `TimeSeriesDataset` is not modified.

        Returns
        -------
        table:
            A table containing the feature columns, the target column, the time column and the extra columns.
        """
        return self._table

    def _into_dataloader_with_window(self, window_size: int, forecast_horizon: int, batch_size: int) -> DataLoader:
        """
        Return a Dataloader for the data stored in this time series, used for training neural networks.

        It splits the target column into windows, uses them as feature and creates targets for the time series, by
        forecast length. The original time series dataset is not modified.

        Parameters
        ----------
        window_size:
            The size of the created windows
        forecast_horizon:
            The length of the forecast horizon, where all datapoints are collected until the given lag.
        batch_size:
            The size of data batches that should be loaded at one time.

        Raises
        ------
        OutOfBoundsError:
            If window_size or forecast_horizon is below 1
        ValueError:
            If the size is smaller or even than forecast_horizon + window_size

        Returns
        -------
        result:
            The DataLoader.
        """
        import torch
        from torch.utils.data import DataLoader

        _init_default_device()

        target_tensor = torch.tensor(self.target._data.values, dtype=torch.float32)

        x_s = []
        y_s = []

        size = target_tensor.size(0)
        if window_size < 1:
            raise OutOfBoundsError(actual=window_size, name="window_size", lower_bound=ClosedBound(1))
        if forecast_horizon < 1:
            raise OutOfBoundsError(actual=forecast_horizon, name="forecast_horizon", lower_bound=ClosedBound(1))
        if size <= forecast_horizon + window_size:
            raise ValueError("Can not create windows with window size less then forecast horizon + window_size")
        # create feature windows and for that features targets lagged by forecast len
        # every feature column wird auch gewindowed
        # -> [i, win_size],[target]
        feature_cols = self.features.to_columns()
        for i in range(size - (forecast_horizon + window_size)):
            window = target_tensor[i : i + window_size]
            label = target_tensor[i + window_size + forecast_horizon]
            for col in feature_cols:
                data = torch.tensor(col._data.values, dtype=torch.float32)
                window = torch.cat((window, data[i : i + window_size]), dim=0)
            x_s.append(window)
            y_s.append(label)
        x_s_tensor = torch.stack(x_s)
        y_s_tensor = torch.stack(y_s)
        dataset = _create_dataset(x_s_tensor, y_s_tensor)
        return DataLoader(dataset=dataset, batch_size=batch_size)

    def _into_dataloader_with_window_predict(
        self,
        window_size: int,
        forecast_horizon: int,
        batch_size: int,
    ) -> DataLoader:
        """
        Return a Dataloader for the data stored in this time series, used for training neural networks.

        It splits the target column into windows, uses them as feature and creates targets for the time series, by
        forecast length. The original time series dataset is not modified.

        Parameters
        ----------
        window_size:
            The size of the created windows
        batch_size:
            The size of data batches that should be loaded at one time.

        Raises
        ------
        OutOfBoundsError:
            If window_size or forecast_horizon is below 1
        ValueError:
            If the size is smaller or even than forecast_horizon + window_size

        Returns
        -------
        result:
            The DataLoader.
        """
        import torch
        from torch.utils.data import DataLoader

        _init_default_device()

        target_tensor = torch.tensor(self.target._data.values, dtype=torch.float32)
        x_s = []

        size = target_tensor.size(0)
        if window_size < 1:
            raise OutOfBoundsError(actual=window_size, name="window_size", lower_bound=ClosedBound(1))
        if forecast_horizon < 1:
            raise OutOfBoundsError(actual=forecast_horizon, name="forecast_horizon", lower_bound=ClosedBound(1))
        if size <= forecast_horizon + window_size:
            raise ValueError("Can not create windows with window size less then forecast horizon + window_size")

        feature_cols = self.features.to_columns()
        for i in range(size - (forecast_horizon + window_size)):
            window = target_tensor[i : i + window_size]
            for col in feature_cols:
                data = torch.tensor(col._data.values, dtype=torch.float32)
                window = torch.cat((window, data[i : i + window_size]), dim=-1)
            x_s.append(window)

        x_s_tensor = torch.stack(x_s)

        dataset = _create_dataset_predict(x_s_tensor)
        return DataLoader(dataset=dataset, batch_size=batch_size)

    # ------------------------------------------------------------------------------------------------------------------
    # IPython integration
    # ------------------------------------------------------------------------------------------------------------------

    def _repr_html_(self) -> str:
        """
        Return an HTML representation of the time series dataset.

        Returns
        -------
        output:
            The generated HTML.
        """
        return self._table._repr_html_()

extras: Table property

Additional columns of the time series dataset that are neither features, target nor time.

These can be used to store additional information about instances, such as IDs.

features: Table property

The feature columns of the time series dataset.

target: Column property

The target column of the time series dataset.

time: Column property

The time column of the time series dataset.

to_table()

Return a new Table containing the feature columns, the target column, the time column and the extra columns.

The original TimeSeriesDataset is not modified.

Returns:

Name Type Description
table Table

A table containing the feature columns, the target column, the time column and the extra columns.

Source code in src/safeds/data/labeled/containers/_time_series_dataset.py
def to_table(self) -> Table:
    """
    Return a new `Table` containing the feature columns, the target column, the time column and the extra columns.

    The original `TimeSeriesDataset` is not modified.

    Returns
    -------
    table:
        A table containing the feature columns, the target column, the time column and the extra columns.
    """
    return self._table