Skip to content

AdaBoostClassifier

Bases: Classifier, _AdaBoostBase

Ada Boost classification.

Parameters:

Name Type Description Default
learner Classifier | None | Choice[Classifier | None]

The learner from which the boosted ensemble is built.

None
max_learner_count int | Choice[int]

The maximum number of learners at which boosting is terminated. In case of perfect fit, the learning procedure is stopped early. Has to be greater than 0.

50
learning_rate float | Choice[float]

Weight applied to each classifier at each boosting iteration. A higher learning rate increases the contribution of each classifier. Has to be greater than 0.

1.0

Raises:

Type Description
OutOfBoundsError

If max_learner_count or learning_rate are less than or equal to 0.

Source code in src/safeds/ml/classical/classification/_ada_boost_classifier.py
class AdaBoostClassifier(Classifier, _AdaBoostBase):
    """
    Ada Boost classification.

    Parameters
    ----------
    learner:
        The learner from which the boosted ensemble is built.
    max_learner_count:
        The maximum number of learners at which boosting is terminated. In case of perfect fit, the learning procedure
        is stopped early. Has to be greater than 0.
    learning_rate:
        Weight applied to each classifier at each boosting iteration. A higher learning rate increases the contribution
        of each classifier. Has to be greater than 0.

    Raises
    ------
    OutOfBoundsError
        If `max_learner_count` or `learning_rate` are less than or equal to 0.
    """

    # ------------------------------------------------------------------------------------------------------------------
    # Dunder methods
    # ------------------------------------------------------------------------------------------------------------------

    def __init__(
        self,
        *,
        learner: Classifier | None | Choice[Classifier | None] = None,
        max_learner_count: int | Choice[int] = 50,
        learning_rate: float | Choice[float] = 1.0,
    ) -> None:
        # Initialize superclasses
        Classifier.__init__(self)
        _AdaBoostBase.__init__(
            self,
            max_learner_count=max_learner_count,
            learning_rate=learning_rate,
        )

        # Hyperparameters
        self._learner: Classifier | None | Choice[Classifier | None] = learner

    def __hash__(self) -> int:
        return _structural_hash(
            Classifier.__hash__(self),
            _AdaBoostBase.__hash__(self),
            self._learner,
        )

    # ------------------------------------------------------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------------------------------------------------------

    @property
    def learner(self) -> Classifier | None | Choice[Classifier | None]:  # type: ignore[override]
        """The base learner used for training the ensemble."""
        return self._learner

    # ------------------------------------------------------------------------------------------------------------------
    # Template methods
    # ------------------------------------------------------------------------------------------------------------------

    def _clone(self) -> AdaBoostClassifier:
        return AdaBoostClassifier(
            learner=self.learner,
            max_learner_count=self._max_learner_count,
            learning_rate=self._learning_rate,
        )

    def _get_sklearn_model(self) -> ClassifierMixin:
        from sklearn.ensemble import AdaBoostClassifier as SklearnAdaBoostClassifier

        assert not isinstance(self.learner, Choice)
        learner = self.learner._get_sklearn_model() if self.learner is not None else None
        return SklearnAdaBoostClassifier(
            estimator=learner,
            n_estimators=self._max_learner_count,
            learning_rate=self._learning_rate,
            algorithm="SAMME",  #   Will be the default in sklearn 1.6, remove this line then
        )

    def _check_additional_fit_preconditions(self) -> None:
        if (
            isinstance(self._max_learner_count, Choice)
            or isinstance(self._learning_rate, Choice)
            or isinstance(self._learner, Choice)
        ):
            raise FittingWithChoiceError

    def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None:
        if (
            not isinstance(self._max_learner_count, Choice)
            and not isinstance(self._learning_rate, Choice)
            and not isinstance(self._learner, Choice)
        ):
            raise FittingWithoutChoiceError

    def _get_models_for_all_choices(self) -> list[AdaBoostClassifier]:
        learner_choices = self._learner if isinstance(self._learner, Choice) else [self._learner]
        max_learner_count_choices = (
            self._max_learner_count if isinstance(self._max_learner_count, Choice) else [self._max_learner_count]
        )
        learning_rate_choices = (
            self._learning_rate if isinstance(self._learning_rate, Choice) else [self._learning_rate]
        )

        models = []
        for learner in learner_choices:
            for mlc in max_learner_count_choices:
                for lr in learning_rate_choices:
                    models.append(AdaBoostClassifier(learner=learner, max_learner_count=mlc, learning_rate=lr))
        return models

is_fitted: bool

Whether the model is fitted.

learner: Classifier | None | Choice[Classifier | None]

The base learner used for training the ensemble.

learning_rate: float | Choice[float]

The learning rate.

max_learner_count: int | Choice[int]

The maximum number of learners in the ensemble.

accuracy

Compute the accuracy of the classifier on the given data.

The accuracy is the proportion of predicted target values that were correct. The higher the accuracy, the better. Results range from 0.0 to 1.0.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required

Returns:

Name Type Description
accuracy float

The classifier's accuracy.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def accuracy(self, validation_or_test_set: Table | TabularDataset) -> float:
    """
    Compute the accuracy of the classifier on the given data.

    The accuracy is the proportion of predicted target values that were correct. The **higher** the accuracy, the
    better. Results range from 0.0 to 1.0.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.

    Returns
    -------
    accuracy:
        The classifier's accuracy.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.accuracy(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
    )

f1_score

Compute the classifier's F₁ score on the given data.

The F₁ score is the harmonic mean of precision and recall. The higher the F₁ score, the better the classifier. Results range from 0.0 to 1.0.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required
positive_class Any

The class to be considered positive. All other classes are considered negative.

required

Returns:

Name Type Description
f1_score float

The classifier's F₁ score.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def f1_score(
    self,
    validation_or_test_set: Table | TabularDataset,
    positive_class: Any,
) -> float:
    """
    Compute the classifier's F₁ score on the given data.

    The F₁ score is the harmonic mean of precision and recall. The **higher** the F₁ score, the better the
    classifier. Results range from 0.0 to 1.0.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    f1_score:
        The classifier's F₁ score.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.f1_score(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
        positive_class,
    )

fit

Create a copy of this model and fit it with the given training data.

Note: This model is not modified.

Parameters:

Name Type Description Default
training_set TabularDataset

The training data containing the features and target.

required

Returns:

Name Type Description
fitted_model Self

The fitted model.

Raises:

Type Description
PlainTableError

If a table is passed instead of a TabularDataset.

DatasetMissesDataError

If the given training set contains no data.

FittingWithChoiceError

When trying to call this method on a model with hyperparameter choices.

LearningError

If the training data contains invalid values or if the training failed.

Source code in src/safeds/ml/classical/_supervised_model.py
def fit(self, training_set: TabularDataset) -> Self:
    """
    Create a copy of this model and fit it with the given training data.

    **Note:** This model is not modified.

    Parameters
    ----------
    training_set:
        The training data containing the features and target.

    Returns
    -------
    fitted_model:
        The fitted model.

    Raises
    ------
    PlainTableError
        If a table is passed instead of a TabularDataset.
    DatasetMissesDataError
        If the given training set contains no data.
    FittingWithChoiceError
        When trying to call this method on a model with hyperparameter choices.
    LearningError
        If the training data contains invalid values or if the training failed.
    """
    if not isinstance(training_set, TabularDataset) and isinstance(training_set, Table):
        raise PlainTableError
    if training_set.to_table().row_count == 0:
        raise DatasetMissesDataError

    self._check_additional_fit_preconditions()
    self._check_more_additional_fit_preconditions(training_set)

    wrapped_model = self._get_sklearn_model()
    _fit_sklearn_model_in_place(wrapped_model, training_set)

    result = self._clone()
    result._feature_schema = training_set.features.schema
    result._target_name = training_set.target.name
    result._target_type = training_set.target.type
    result._wrapped_model = wrapped_model

    return result

Use the hyperparameter choices to create multiple models and fit them.

Note: This model is not modified.

Parameters:

Name Type Description Default
training_set TabularDataset

The training data containing the features and target.

required
optimization_metric ClassifierMetric

The metric that should be used for determining the performance of a model.

required
positive_class Any

The class to be considered positive. All other classes are considered negative. Needs to be provided when choosing precision, f1score or recall as optimization metric.

None

Returns:

Name Type Description
best_model Self

The model that performed the best out of all possible models given the Choices of hyperparameters.

Raises:

Type Description
PlainTableError

If a table is passed instead of a TabularDataset.

DatasetMissesDataError

If the given training set contains no data.

FittingWithoutChoiceError

When trying to call this method on a model without hyperparameter choices.

LearningError

If the training data contains invalid values or if the training failed.

Source code in src/safeds/ml/classical/classification/_classifier.py
def fit_by_exhaustive_search(
    self,
    training_set: TabularDataset,
    optimization_metric: ClassifierMetric,
    positive_class: Any = None,
) -> Self:
    """
    Use the hyperparameter choices to create multiple models and fit them.

    **Note:** This model is not modified.

    Parameters
    ----------
    training_set:
        The training data containing the features and target.
    optimization_metric:
        The metric that should be used for determining the performance of a model.
    positive_class:
        The class to be considered positive. All other classes are considered negative.
        Needs to be provided when choosing precision, f1score or recall as optimization metric.

    Returns
    -------
    best_model:
        The model that performed the best out of all possible models given the Choices of hyperparameters.

    Raises
    ------
    PlainTableError
        If a table is passed instead of a TabularDataset.
    DatasetMissesDataError
        If the given training set contains no data.
    FittingWithoutChoiceError
        When trying to call this method on a model without hyperparameter choices.
    LearningError
        If the training data contains invalid values or if the training failed.
    """
    if training_set.to_table().row_count == 0:
        raise DatasetMissesDataError
    if optimization_metric.value in {"precision", "recall", "f1score"} and positive_class is None:
        raise LearningError(
            f"Please provide a positive class when using optimization metric '{optimization_metric.value}'",
        )

    self._check_additional_fit_by_exhaustive_search_preconditions()

    [train_split, test_split] = training_set.to_table().split_rows(0.75)
    train_data = train_split.to_tabular_dataset(
        target_name=training_set.target.name,
        extra_names=training_set.extras.column_names,
    )
    test_data = test_split.to_tabular_dataset(
        target_name=training_set.target.name,
        extra_names=training_set.extras.column_names,
    )

    list_of_models = self._get_models_for_all_choices()
    list_of_fitted_models = []

    with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context("spawn")) as executor:
        futures = []
        for model in list_of_models:
            futures.append(executor.submit(model.fit, train_data))
        [done, _] = wait(futures, return_when=ALL_COMPLETED)
        for future in done:
            list_of_fitted_models.append(future.result())
    executor.shutdown()
    best_model = None
    best_metric_value = None
    for fitted_model in list_of_fitted_models:
        if best_model is None:
            best_model = fitted_model
            match optimization_metric.value:
                case "accuracy":
                    best_metric_value = fitted_model.accuracy(test_data)
                case "precision":
                    best_metric_value = fitted_model.precision(test_data, positive_class)
                case "recall":
                    best_metric_value = fitted_model.recall(test_data, positive_class)
                case "f1_score":
                    best_metric_value = fitted_model.recall(test_data, positive_class)
        else:
            match optimization_metric.value:
                case "accuracy":
                    accuracy_of_fitted_model = fitted_model.accuracy(test_data)
                    if accuracy_of_fitted_model > best_metric_value:
                        best_model = fitted_model  # pragma: no cover
                        best_metric_value = accuracy_of_fitted_model  # pragma: no cover
                case "precision":
                    precision_of_fitted_model = fitted_model.precision(test_data, positive_class)
                    if precision_of_fitted_model > best_metric_value:
                        best_model = fitted_model  # pragma: no cover
                        best_metric_value = precision_of_fitted_model  # pragma: no cover
                case "recall":
                    recall_of_fitted_model = fitted_model.recall(test_data, positive_class)
                    if recall_of_fitted_model > best_metric_value:
                        best_model = fitted_model  # pragma: no cover
                        best_metric_value = recall_of_fitted_model  # pragma: no cover
                case "f1_score":
                    f1score_of_fitted_model = fitted_model.f1_score(test_data, positive_class)
                    if f1score_of_fitted_model > best_metric_value:
                        best_model = fitted_model  # pragma: no cover
                        best_metric_value = f1score_of_fitted_model  # pragma: no cover
    assert best_model is not None
    return best_model

get_feature_names

Return the names of the feature columns.

Note: The model must be fitted.

Returns:

Name Type Description
feature_names list[str]

The names of the feature columns.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_feature_names(self) -> list[str]:
    """
    Return the names of the feature columns.

    **Note:** The model must be fitted.

    Returns
    -------
    feature_names:
        The names of the feature columns.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._feature_schema is None:
        raise ModelNotFittedError

    return self._feature_schema.column_names

get_features_schema

Return the schema of the feature columns.

Note: The model must be fitted.

Returns:

Name Type Description
feature_schema Schema

The schema of the feature columns.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_features_schema(self) -> Schema:
    """
    Return the schema of the feature columns.

    **Note:** The model must be fitted.

    Returns
    -------
    feature_schema:
        The schema of the feature columns.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._feature_schema is None:
        raise ModelNotFittedError

    return self._feature_schema

get_target_name

Return the name of the target column.

Note: The model must be fitted.

Returns:

Name Type Description
target_name str

The name of the target column.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_target_name(self) -> str:
    """
    Return the name of the target column.

    **Note:** The model must be fitted.

    Returns
    -------
    target_name:
        The name of the target column.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._target_name is None:
        raise ModelNotFittedError

    return self._target_name

get_target_type

Return the type of the target column.

Note: The model must be fitted.

Returns:

Name Type Description
target_type DataType

The type of the target column.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_target_type(self) -> DataType:
    """
    Return the type of the target column.

    **Note:** The model must be fitted.

    Returns
    -------
    target_type:
        The type of the target column.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._target_type is None:
        raise ModelNotFittedError

    return self._target_type

precision

Compute the classifier's precision on the given data.

The precision is the proportion of positive predictions that were correct. The higher the precision, the better the classifier. Results range from 0.0 to 1.0.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required
positive_class Any

The class to be considered positive. All other classes are considered negative.

required

Returns:

Name Type Description
precision float

The classifier's precision.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def precision(
    self,
    validation_or_test_set: Table | TabularDataset,
    positive_class: Any,
) -> float:
    """
    Compute the classifier's precision on the given data.

    The precision is the proportion of positive predictions that were correct. The **higher** the precision, the
    better the classifier. Results range from 0.0 to 1.0.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    precision:
        The classifier's precision.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.precision(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
        positive_class,
    )

predict

Predict the target values on the given dataset.

Note: The model must be fitted.

Parameters:

Name Type Description Default
dataset Table | TabularDataset

The dataset containing at least the features.

required

Returns:

Name Type Description
prediction TabularDataset

The given dataset with an additional column for the predicted target values.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

DatasetMissesFeaturesError

If the dataset misses feature columns.

PredictionError

If predicting with the given dataset failed.

Source code in src/safeds/ml/classical/_supervised_model.py
def predict(
    self,
    dataset: Table | TabularDataset,
) -> TabularDataset:
    """
    Predict the target values on the given dataset.

    **Note:** The model must be fitted.

    Parameters
    ----------
    dataset:
        The dataset containing at least the features.

    Returns
    -------
    prediction:
        The given dataset with an additional column for the predicted target values.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    DatasetMissesFeaturesError
        If the dataset misses feature columns.
    PredictionError
        If predicting with the given dataset failed.
    """
    self._check_additional_predict_preconditions(dataset)

    return _predict_with_sklearn_model(
        self._wrapped_model,
        dataset,
        self.get_feature_names(),
        self.get_target_name(),
    )

recall

Compute the classifier's recall on the given data.

The recall is the proportion of actual positives that were predicted correctly. The higher the recall, the better the classifier. Results range from 0.0 to 1.0.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required
positive_class Any

The class to be considered positive. All other classes are considered negative.

required

Returns:

Name Type Description
recall float

The classifier's recall.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def recall(self, validation_or_test_set: Table | TabularDataset, positive_class: Any) -> float:
    """
    Compute the classifier's recall on the given data.

    The recall is the proportion of actual positives that were predicted correctly. The **higher** the recall, the
    better the classifier. Results range from 0.0 to 1.0.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    recall:
        The classifier's recall.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.recall(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
        positive_class,
    )

summarize_metrics

Summarize the classifier's metrics on the given data.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required
positive_class Any

The class to be considered positive. All other classes are considered negative.

required

Returns:

Name Type Description
metrics Table

A table containing the classifier's metrics.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def summarize_metrics(
    self,
    validation_or_test_set: Table | TabularDataset,
    positive_class: Any,
) -> Table:
    """
    Summarize the classifier's metrics on the given data.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    metrics:
        A table containing the classifier's metrics.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.summarize(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
        positive_class,
    )