Skip to content

Regressor

Bases: ABC

Abstract base class for all regressors.

Source code in src/safeds/ml/classical/regression/_regressor.py
class Regressor(ABC):
    """Abstract base class for all regressors."""

    def __hash__(self) -> int:
        """
        Return a deterministic hash value for a regressor.

        Returns
        -------
        hash:
            The hash value.
        """
        return _structural_hash(self.__class__.__qualname__, self.is_fitted)

    @abstractmethod
    def fit(self, training_set: TabularDataset | ExperimentalTabularDataset) -> Regressor:
        """
        Create a copy of this regressor and fit it with the given training data.

        This regressor is not modified.

        Parameters
        ----------
        training_set:
            The training data containing the feature and target vectors.

        Returns
        -------
        fitted_regressor:
            The fitted regressor.

        Raises
        ------
        LearningError
            If the training data contains invalid values or if the training failed.
        """

    @abstractmethod
    def predict(self, dataset: Table | ExperimentalTable | ExperimentalTabularDataset) -> TabularDataset:
        """
        Predict a target vector using a dataset containing feature vectors. The model has to be trained first.

        Parameters
        ----------
        dataset:
            The dataset containing the feature vectors.

        Returns
        -------
        table:
            A dataset containing the given feature vectors and the predicted target vector.

        Raises
        ------
        ModelNotFittedError
            If the model has not been fitted yet.
        DatasetMissesFeaturesError
            If the dataset misses feature columns.
        PredictionError
            If predicting with the given dataset failed.
        """

    @property
    @abstractmethod
    def is_fitted(self) -> bool:
        """Whether the regressor is fitted."""

    @abstractmethod
    def _get_sklearn_regressor(self) -> RegressorMixin:
        """
        Return a new wrapped Regressor from sklearn.

        Returns
        -------
        wrapped_regressor:
            The sklearn Regressor.
        """

    # ------------------------------------------------------------------------------------------------------------------
    # Metrics
    # ------------------------------------------------------------------------------------------------------------------

    def summarize_metrics(self, validation_or_test_set: TabularDataset | ExperimentalTabularDataset) -> Table:
        """
        Summarize the regressor's metrics on the given data.

        Parameters
        ----------
        validation_or_test_set:
            The validation or test set.

        Returns
        -------
        metrics:
            A table containing the regressor's metrics.

        Raises
        ------
        TypeError
            If a table is passed instead of a tabular dataset.
        """
        mean_absolute_error = self.mean_absolute_error(validation_or_test_set)
        mean_squared_error = self.mean_squared_error(validation_or_test_set)

        return Table(
            {
                "metric": ["mean_absolute_error", "mean_squared_error"],
                "value": [mean_absolute_error, mean_squared_error],
            },
        )

    def mean_absolute_error(self, validation_or_test_set: TabularDataset | ExperimentalTabularDataset) -> float:
        """
        Compute the mean absolute error (MAE) of the regressor on the given data.

        Parameters
        ----------
        validation_or_test_set:
            The validation or test set.

        Returns
        -------
        mean_absolute_error:
            The calculated mean absolute error (the average of the distance of each individual row).

        Raises
        ------
        TypeError
            If a table is passed instead of a tabular dataset.
        """
        from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error

        if not isinstance(validation_or_test_set, TabularDataset) and isinstance(validation_or_test_set, Table):
            raise PlainTableError

        if isinstance(validation_or_test_set, TabularDataset):
            expected = validation_or_test_set.target
            predicted = self.predict(validation_or_test_set.features).target

            # TODO: more efficient implementation using polars
            _check_metrics_preconditions(predicted, expected)
            return sk_mean_absolute_error(expected._data, predicted._data)
        elif isinstance(validation_or_test_set, ExperimentalTabularDataset):  # pragma: no cover
            expected_2 = validation_or_test_set.target
            predicted_2 = self.predict(validation_or_test_set.features).target

            # TODO: more efficient implementation using polars
            _check_metrics_preconditions_experimental(predicted_2, expected_2)
            return sk_mean_absolute_error(expected_2._series, predicted_2._data)

    # noinspection PyProtectedMember
    def mean_squared_error(self, validation_or_test_set: TabularDataset | ExperimentalTabularDataset) -> float:
        """
        Compute the mean squared error (MSE) on the given data.

        Parameters
        ----------
        validation_or_test_set:
            The validation or test set.

        Returns
        -------
        mean_squared_error:
            The calculated mean squared error (the average of the distance of each individual row squared).

        Raises
        ------
        TypeError
            If a table is passed instead of a tabular dataset.
        """
        from sklearn.metrics import mean_squared_error as sk_mean_squared_error

        if not isinstance(validation_or_test_set, TabularDataset) and isinstance(validation_or_test_set, Table):
            raise PlainTableError

        if isinstance(validation_or_test_set, TabularDataset):
            expected = validation_or_test_set.target
            predicted = self.predict(validation_or_test_set.features).target

            # TODO: more efficient implementation using polars
            _check_metrics_preconditions(predicted, expected)
            return sk_mean_squared_error(expected._data, predicted._data)
        elif isinstance(validation_or_test_set, ExperimentalTabularDataset):  # pragma: no cover
            expected_2 = validation_or_test_set.target
            predicted_2 = self.predict(validation_or_test_set.features).target

            # TODO: more efficient implementation using polars
            _check_metrics_preconditions_experimental(predicted_2, expected_2)
            return sk_mean_squared_error(expected_2._series, predicted_2._data)

is_fitted: bool abstractmethod property

Whether the regressor is fitted.

fit(training_set) abstractmethod

Create a copy of this regressor and fit it with the given training data.

This regressor is not modified.

Parameters:

Name Type Description Default
training_set TabularDataset | ExperimentalTabularDataset

The training data containing the feature and target vectors.

required

Returns:

Name Type Description
fitted_regressor Regressor

The fitted regressor.

Raises:

Type Description
LearningError

If the training data contains invalid values or if the training failed.

Source code in src/safeds/ml/classical/regression/_regressor.py
@abstractmethod
def fit(self, training_set: TabularDataset | ExperimentalTabularDataset) -> Regressor:
    """
    Create a copy of this regressor and fit it with the given training data.

    This regressor is not modified.

    Parameters
    ----------
    training_set:
        The training data containing the feature and target vectors.

    Returns
    -------
    fitted_regressor:
        The fitted regressor.

    Raises
    ------
    LearningError
        If the training data contains invalid values or if the training failed.
    """

mean_absolute_error(validation_or_test_set)

Compute the mean absolute error (MAE) of the regressor on the given data.

Parameters:

Name Type Description Default
validation_or_test_set TabularDataset | ExperimentalTabularDataset

The validation or test set.

required

Returns:

Name Type Description
mean_absolute_error float

The calculated mean absolute error (the average of the distance of each individual row).

Raises:

Type Description
TypeError

If a table is passed instead of a tabular dataset.

Source code in src/safeds/ml/classical/regression/_regressor.py
def mean_absolute_error(self, validation_or_test_set: TabularDataset | ExperimentalTabularDataset) -> float:
    """
    Compute the mean absolute error (MAE) of the regressor on the given data.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.

    Returns
    -------
    mean_absolute_error:
        The calculated mean absolute error (the average of the distance of each individual row).

    Raises
    ------
    TypeError
        If a table is passed instead of a tabular dataset.
    """
    from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error

    if not isinstance(validation_or_test_set, TabularDataset) and isinstance(validation_or_test_set, Table):
        raise PlainTableError

    if isinstance(validation_or_test_set, TabularDataset):
        expected = validation_or_test_set.target
        predicted = self.predict(validation_or_test_set.features).target

        # TODO: more efficient implementation using polars
        _check_metrics_preconditions(predicted, expected)
        return sk_mean_absolute_error(expected._data, predicted._data)
    elif isinstance(validation_or_test_set, ExperimentalTabularDataset):  # pragma: no cover
        expected_2 = validation_or_test_set.target
        predicted_2 = self.predict(validation_or_test_set.features).target

        # TODO: more efficient implementation using polars
        _check_metrics_preconditions_experimental(predicted_2, expected_2)
        return sk_mean_absolute_error(expected_2._series, predicted_2._data)

mean_squared_error(validation_or_test_set)

Compute the mean squared error (MSE) on the given data.

Parameters:

Name Type Description Default
validation_or_test_set TabularDataset | ExperimentalTabularDataset

The validation or test set.

required

Returns:

Name Type Description
mean_squared_error float

The calculated mean squared error (the average of the distance of each individual row squared).

Raises:

Type Description
TypeError

If a table is passed instead of a tabular dataset.

Source code in src/safeds/ml/classical/regression/_regressor.py
def mean_squared_error(self, validation_or_test_set: TabularDataset | ExperimentalTabularDataset) -> float:
    """
    Compute the mean squared error (MSE) on the given data.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.

    Returns
    -------
    mean_squared_error:
        The calculated mean squared error (the average of the distance of each individual row squared).

    Raises
    ------
    TypeError
        If a table is passed instead of a tabular dataset.
    """
    from sklearn.metrics import mean_squared_error as sk_mean_squared_error

    if not isinstance(validation_or_test_set, TabularDataset) and isinstance(validation_or_test_set, Table):
        raise PlainTableError

    if isinstance(validation_or_test_set, TabularDataset):
        expected = validation_or_test_set.target
        predicted = self.predict(validation_or_test_set.features).target

        # TODO: more efficient implementation using polars
        _check_metrics_preconditions(predicted, expected)
        return sk_mean_squared_error(expected._data, predicted._data)
    elif isinstance(validation_or_test_set, ExperimentalTabularDataset):  # pragma: no cover
        expected_2 = validation_or_test_set.target
        predicted_2 = self.predict(validation_or_test_set.features).target

        # TODO: more efficient implementation using polars
        _check_metrics_preconditions_experimental(predicted_2, expected_2)
        return sk_mean_squared_error(expected_2._series, predicted_2._data)

predict(dataset) abstractmethod

Predict a target vector using a dataset containing feature vectors. The model has to be trained first.

Parameters:

Name Type Description Default
dataset Table | ExperimentalTable | ExperimentalTabularDataset

The dataset containing the feature vectors.

required

Returns:

Name Type Description
table TabularDataset

A dataset containing the given feature vectors and the predicted target vector.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

DatasetMissesFeaturesError

If the dataset misses feature columns.

PredictionError

If predicting with the given dataset failed.

Source code in src/safeds/ml/classical/regression/_regressor.py
@abstractmethod
def predict(self, dataset: Table | ExperimentalTable | ExperimentalTabularDataset) -> TabularDataset:
    """
    Predict a target vector using a dataset containing feature vectors. The model has to be trained first.

    Parameters
    ----------
    dataset:
        The dataset containing the feature vectors.

    Returns
    -------
    table:
        A dataset containing the given feature vectors and the predicted target vector.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    DatasetMissesFeaturesError
        If the dataset misses feature columns.
    PredictionError
        If predicting with the given dataset failed.
    """

summarize_metrics(validation_or_test_set)

Summarize the regressor's metrics on the given data.

Parameters:

Name Type Description Default
validation_or_test_set TabularDataset | ExperimentalTabularDataset

The validation or test set.

required

Returns:

Name Type Description
metrics Table

A table containing the regressor's metrics.

Raises:

Type Description
TypeError

If a table is passed instead of a tabular dataset.

Source code in src/safeds/ml/classical/regression/_regressor.py
def summarize_metrics(self, validation_or_test_set: TabularDataset | ExperimentalTabularDataset) -> Table:
    """
    Summarize the regressor's metrics on the given data.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.

    Returns
    -------
    metrics:
        A table containing the regressor's metrics.

    Raises
    ------
    TypeError
        If a table is passed instead of a tabular dataset.
    """
    mean_absolute_error = self.mean_absolute_error(validation_or_test_set)
    mean_squared_error = self.mean_squared_error(validation_or_test_set)

    return Table(
        {
            "metric": ["mean_absolute_error", "mean_squared_error"],
            "value": [mean_absolute_error, mean_squared_error],
        },
    )