Skip to content

Regressor

Bases: SupervisedModel, ABC

A model for regression tasks.

Source code in src/safeds/ml/classical/regression/_regressor.py
class Regressor(SupervisedModel, ABC):
    """A model for regression tasks."""

    # ------------------------------------------------------------------------------------------------------------------
    # Metrics
    # ------------------------------------------------------------------------------------------------------------------

    def summarize_metrics(self, validation_or_test_set: Table | TabularDataset) -> Table:
        """
        Summarize the regressor's metrics on the given data.

        Parameters
        ----------
        validation_or_test_set:
            The validation or test set.

        Returns
        -------
        metrics:
            A table containing the regressor's metrics.

        Raises
        ------
        ModelNotFittedError
            If the classifier has not been fitted yet.
        """
        if not self.is_fitted:
            raise ModelNotFittedError

        validation_or_test_set = _extract_table(validation_or_test_set)

        return RegressionMetrics.summarize(
            self.predict(validation_or_test_set),
            validation_or_test_set.get_column(self.get_target_name()),
        )

    def coefficient_of_determination(self, validation_or_test_set: Table | TabularDataset) -> float:
        """
        Compute the coefficient of determination (R²) of the regressor on the given data.

        The coefficient of determination compares the regressor's predictions to another model that always predicts the
        mean of the target values. It is a measure of how well the regressor explains the variance in the target values.

        The **higher** the coefficient of determination, the better the regressor. Results range from negative infinity
        to 1.0. You can interpret the coefficient of determination as follows:

        | R²         | Interpretation                                                                             |
        | ---------- | ------------------------------------------------------------------------------------------ |
        | 1.0        | The model perfectly predicts the target values. Did you overfit?                           |
        | (0.0, 1.0) | The model is better than predicting the mean of the target values. You should be here.     |
        | 0.0        | The model is as good as predicting the mean of the target values. Try something else.      |
        | (-∞, 0.0)  | The model is worse than predicting the mean of the target values. Something is very wrong. |

        **Note:** Some other libraries call this metric `r2_score`.

        Parameters
        ----------
        validation_or_test_set:
            The validation or test set.

        Returns
        -------
        coefficient_of_determination:
            The coefficient of determination of the regressor.

        Raises
        ------
        ModelNotFittedError
            If the classifier has not been fitted yet.
        """
        if not self.is_fitted:
            raise ModelNotFittedError

        validation_or_test_set = _extract_table(validation_or_test_set)

        return RegressionMetrics.coefficient_of_determination(
            self.predict(validation_or_test_set),
            validation_or_test_set.get_column(self.get_target_name()),
        )

    def mean_absolute_error(self, validation_or_test_set: Table | TabularDataset) -> float:
        """
        Compute the mean absolute error (MAE) of the regressor on the given data.

        The mean absolute error is the average of the absolute differences between the predicted and expected target
        values. The **lower** the mean absolute error, the better the regressor. Results range from 0.0 to positive
        infinity.

        Parameters
        ----------
        validation_or_test_set:
            The validation or test set.

        Returns
        -------
        mean_absolute_error:
            The mean absolute error of the regressor.

        Raises
        ------
        ModelNotFittedError
            If the classifier has not been fitted yet.
        """
        if not self.is_fitted:
            raise ModelNotFittedError

        validation_or_test_set = _extract_table(validation_or_test_set)

        return RegressionMetrics.mean_absolute_error(
            self.predict(validation_or_test_set),
            validation_or_test_set.get_column(self.get_target_name()),
        )

    def mean_directional_accuracy(self, validation_or_test_set: Table | TabularDataset) -> float:
        """
        Compute the mean directional accuracy (MDA) of the regressor on the given data.

        This metric compares two consecutive target values and checks if the predicted direction (down/unchanged/up)
        matches the expected direction. The mean directional accuracy is the proportion of correctly predicted
        directions. The **higher** the mean directional accuracy, the better the regressor. Results range from 0.0 to
        1.0.

        This metric is useful for time series data, where the order of the target values has a meaning. It is not useful
        for other types of data. Because of this, it is not included in the `summarize_metrics` method.

        Parameters
        ----------
        validation_or_test_set:
            The validation or test set.

        Returns
        -------
        mean_directional_accuracy:
            The mean directional accuracy of the regressor.

        Raises
        ------
        ModelNotFittedError
            If the classifier has not been fitted yet.
        """
        if not self.is_fitted:
            raise ModelNotFittedError

        validation_or_test_set = _extract_table(validation_or_test_set)

        return RegressionMetrics.mean_directional_accuracy(
            self.predict(validation_or_test_set),
            validation_or_test_set.get_column(self.get_target_name()),
        )

    def mean_squared_error(self, validation_or_test_set: Table | TabularDataset) -> float:
        """
        Compute the mean squared error (MSE) of the regressor on the given data.

        The mean squared error is the average of the squared differences between the predicted and expected target
        values. The **lower** the mean squared error, the better the regressor. Results range from 0.0 to positive
        infinity.

        **Note:** To get the root mean squared error (RMSE), take the square root of the result.

        Parameters
        ----------
        validation_or_test_set:
            The validation or test set.

        Returns
        -------
        mean_squared_error:
            The mean squared error of the regressor.

        Raises
        ------
        ModelNotFittedError
            If the classifier has not been fitted yet.
        """
        if not self.is_fitted:
            raise ModelNotFittedError

        validation_or_test_set = _extract_table(validation_or_test_set)

        return RegressionMetrics.mean_squared_error(
            self.predict(validation_or_test_set),
            validation_or_test_set.get_column(self.get_target_name()),
        )

    def median_absolute_deviation(self, validation_or_test_set: Table | TabularDataset) -> float:
        """
        Compute the median absolute deviation (MAD) of the regressor on the given data.

        The median absolute deviation is the median of the absolute differences between the predicted and expected
        target values. The **lower** the median absolute deviation, the better the regressor. Results range from 0.0 to
        positive infinity.

        Parameters
        ----------
        validation_or_test_set:
            The validation or test set.

        Returns
        -------
        median_absolute_deviation:
            The median absolute deviation of the regressor.

        Raises
        ------
        ModelNotFittedError
            If the classifier has not been fitted yet.
        """
        if not self.is_fitted:
            raise ModelNotFittedError

        validation_or_test_set = _extract_table(validation_or_test_set)

        return RegressionMetrics.median_absolute_deviation(
            self.predict(validation_or_test_set),
            validation_or_test_set.get_column(self.get_target_name()),
        )

is_fitted: bool

Whether the model is fitted.

coefficient_of_determination

Compute the coefficient of determination (R²) of the regressor on the given data.

The coefficient of determination compares the regressor's predictions to another model that always predicts the mean of the target values. It is a measure of how well the regressor explains the variance in the target values.

The higher the coefficient of determination, the better the regressor. Results range from negative infinity to 1.0. You can interpret the coefficient of determination as follows:

Interpretation
1.0 The model perfectly predicts the target values. Did you overfit?
(0.0, 1.0) The model is better than predicting the mean of the target values. You should be here.
0.0 The model is as good as predicting the mean of the target values. Try something else.
(-∞, 0.0) The model is worse than predicting the mean of the target values. Something is very wrong.

Note: Some other libraries call this metric r2_score.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required

Returns:

Name Type Description
coefficient_of_determination float

The coefficient of determination of the regressor.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/regression/_regressor.py
def coefficient_of_determination(self, validation_or_test_set: Table | TabularDataset) -> float:
    """
    Compute the coefficient of determination (R²) of the regressor on the given data.

    The coefficient of determination compares the regressor's predictions to another model that always predicts the
    mean of the target values. It is a measure of how well the regressor explains the variance in the target values.

    The **higher** the coefficient of determination, the better the regressor. Results range from negative infinity
    to 1.0. You can interpret the coefficient of determination as follows:

    | R²         | Interpretation                                                                             |
    | ---------- | ------------------------------------------------------------------------------------------ |
    | 1.0        | The model perfectly predicts the target values. Did you overfit?                           |
    | (0.0, 1.0) | The model is better than predicting the mean of the target values. You should be here.     |
    | 0.0        | The model is as good as predicting the mean of the target values. Try something else.      |
    | (-∞, 0.0)  | The model is worse than predicting the mean of the target values. Something is very wrong. |

    **Note:** Some other libraries call this metric `r2_score`.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.

    Returns
    -------
    coefficient_of_determination:
        The coefficient of determination of the regressor.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return RegressionMetrics.coefficient_of_determination(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
    )

fit

Create a copy of this model and fit it with the given training data.

Note: This model is not modified.

Parameters:

Name Type Description Default
training_set TabularDataset

The training data containing the features and target.

required

Returns:

Name Type Description
fitted_model Self

The fitted model.

Raises:

Type Description
LearningError

If the training data contains invalid values or if the training failed.

Source code in src/safeds/ml/classical/_supervised_model.py
def fit(self, training_set: TabularDataset) -> Self:
    """
    Create a copy of this model and fit it with the given training data.

    **Note:** This model is not modified.

    Parameters
    ----------
    training_set:
        The training data containing the features and target.

    Returns
    -------
    fitted_model:
        The fitted model.

    Raises
    ------
    LearningError
        If the training data contains invalid values or if the training failed.
    """
    if not isinstance(training_set, TabularDataset) and isinstance(training_set, Table):
        raise PlainTableError
    if training_set.to_table().number_of_rows == 0:
        raise DatasetMissesDataError

    self._check_additional_fit_preconditions(training_set)

    wrapped_model = self._get_sklearn_model()
    _fit_sklearn_model_in_place(wrapped_model, training_set)

    result = self._clone()
    result._feature_schema = training_set.features.schema
    result._target_name = training_set.target.name
    result._target_type = training_set.target.type
    result._wrapped_model = wrapped_model

    return result

get_feature_names

Return the names of the feature columns.

Note: The model must be fitted.

Returns:

Name Type Description
feature_names list[str]

The names of the feature columns.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_feature_names(self) -> list[str]:
    """
    Return the names of the feature columns.

    **Note:** The model must be fitted.

    Returns
    -------
    feature_names:
        The names of the feature columns.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._feature_schema is None:
        raise ModelNotFittedError

    return self._feature_schema.column_names

get_features_schema

Return the schema of the feature columns.

Note: The model must be fitted.

Returns:

Name Type Description
feature_schema Schema

The schema of the feature columns.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_features_schema(self) -> Schema:
    """
    Return the schema of the feature columns.

    **Note:** The model must be fitted.

    Returns
    -------
    feature_schema:
        The schema of the feature columns.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._feature_schema is None:
        raise ModelNotFittedError

    return self._feature_schema

get_target_name

Return the name of the target column.

Note: The model must be fitted.

Returns:

Name Type Description
target_name str

The name of the target column.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_target_name(self) -> str:
    """
    Return the name of the target column.

    **Note:** The model must be fitted.

    Returns
    -------
    target_name:
        The name of the target column.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._target_name is None:
        raise ModelNotFittedError

    return self._target_name

get_target_type

Return the type of the target column.

Note: The model must be fitted.

Returns:

Name Type Description
target_type DataType

The type of the target column.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_target_type(self) -> DataType:
    """
    Return the type of the target column.

    **Note:** The model must be fitted.

    Returns
    -------
    target_type:
        The type of the target column.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._target_type is None:
        raise ModelNotFittedError

    return self._target_type

mean_absolute_error

Compute the mean absolute error (MAE) of the regressor on the given data.

The mean absolute error is the average of the absolute differences between the predicted and expected target values. The lower the mean absolute error, the better the regressor. Results range from 0.0 to positive infinity.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required

Returns:

Name Type Description
mean_absolute_error float

The mean absolute error of the regressor.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/regression/_regressor.py
def mean_absolute_error(self, validation_or_test_set: Table | TabularDataset) -> float:
    """
    Compute the mean absolute error (MAE) of the regressor on the given data.

    The mean absolute error is the average of the absolute differences between the predicted and expected target
    values. The **lower** the mean absolute error, the better the regressor. Results range from 0.0 to positive
    infinity.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.

    Returns
    -------
    mean_absolute_error:
        The mean absolute error of the regressor.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return RegressionMetrics.mean_absolute_error(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
    )

mean_directional_accuracy

Compute the mean directional accuracy (MDA) of the regressor on the given data.

This metric compares two consecutive target values and checks if the predicted direction (down/unchanged/up) matches the expected direction. The mean directional accuracy is the proportion of correctly predicted directions. The higher the mean directional accuracy, the better the regressor. Results range from 0.0 to 1.0.

This metric is useful for time series data, where the order of the target values has a meaning. It is not useful for other types of data. Because of this, it is not included in the summarize_metrics method.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required

Returns:

Name Type Description
mean_directional_accuracy float

The mean directional accuracy of the regressor.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/regression/_regressor.py
def mean_directional_accuracy(self, validation_or_test_set: Table | TabularDataset) -> float:
    """
    Compute the mean directional accuracy (MDA) of the regressor on the given data.

    This metric compares two consecutive target values and checks if the predicted direction (down/unchanged/up)
    matches the expected direction. The mean directional accuracy is the proportion of correctly predicted
    directions. The **higher** the mean directional accuracy, the better the regressor. Results range from 0.0 to
    1.0.

    This metric is useful for time series data, where the order of the target values has a meaning. It is not useful
    for other types of data. Because of this, it is not included in the `summarize_metrics` method.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.

    Returns
    -------
    mean_directional_accuracy:
        The mean directional accuracy of the regressor.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return RegressionMetrics.mean_directional_accuracy(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
    )

mean_squared_error

Compute the mean squared error (MSE) of the regressor on the given data.

The mean squared error is the average of the squared differences between the predicted and expected target values. The lower the mean squared error, the better the regressor. Results range from 0.0 to positive infinity.

Note: To get the root mean squared error (RMSE), take the square root of the result.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required

Returns:

Name Type Description
mean_squared_error float

The mean squared error of the regressor.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/regression/_regressor.py
def mean_squared_error(self, validation_or_test_set: Table | TabularDataset) -> float:
    """
    Compute the mean squared error (MSE) of the regressor on the given data.

    The mean squared error is the average of the squared differences between the predicted and expected target
    values. The **lower** the mean squared error, the better the regressor. Results range from 0.0 to positive
    infinity.

    **Note:** To get the root mean squared error (RMSE), take the square root of the result.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.

    Returns
    -------
    mean_squared_error:
        The mean squared error of the regressor.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return RegressionMetrics.mean_squared_error(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
    )

median_absolute_deviation

Compute the median absolute deviation (MAD) of the regressor on the given data.

The median absolute deviation is the median of the absolute differences between the predicted and expected target values. The lower the median absolute deviation, the better the regressor. Results range from 0.0 to positive infinity.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required

Returns:

Name Type Description
median_absolute_deviation float

The median absolute deviation of the regressor.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/regression/_regressor.py
def median_absolute_deviation(self, validation_or_test_set: Table | TabularDataset) -> float:
    """
    Compute the median absolute deviation (MAD) of the regressor on the given data.

    The median absolute deviation is the median of the absolute differences between the predicted and expected
    target values. The **lower** the median absolute deviation, the better the regressor. Results range from 0.0 to
    positive infinity.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.

    Returns
    -------
    median_absolute_deviation:
        The median absolute deviation of the regressor.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return RegressionMetrics.median_absolute_deviation(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
    )

predict

Predict the target values on the given dataset.

Note: The model must be fitted.

Parameters:

Name Type Description Default
dataset Table | TabularDataset

The dataset containing at least the features.

required

Returns:

Name Type Description
prediction TabularDataset

The given dataset with an additional column for the predicted target values.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

DatasetMissesFeaturesError

If the dataset misses feature columns.

PredictionError

If predicting with the given dataset failed.

Source code in src/safeds/ml/classical/_supervised_model.py
def predict(
    self,
    dataset: Table | TabularDataset,
) -> TabularDataset:
    """
    Predict the target values on the given dataset.

    **Note:** The model must be fitted.

    Parameters
    ----------
    dataset:
        The dataset containing at least the features.

    Returns
    -------
    prediction:
        The given dataset with an additional column for the predicted target values.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    DatasetMissesFeaturesError
        If the dataset misses feature columns.
    PredictionError
        If predicting with the given dataset failed.
    """
    self._check_additional_predict_preconditions(dataset)

    return _predict_with_sklearn_model(
        self._wrapped_model,
        dataset,
        self.get_feature_names(),
        self.get_target_name(),
    )

summarize_metrics

Summarize the regressor's metrics on the given data.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required

Returns:

Name Type Description
metrics Table

A table containing the regressor's metrics.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/regression/_regressor.py
def summarize_metrics(self, validation_or_test_set: Table | TabularDataset) -> Table:
    """
    Summarize the regressor's metrics on the given data.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.

    Returns
    -------
    metrics:
        A table containing the regressor's metrics.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return RegressionMetrics.summarize(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
    )