Skip to content

AdaBoostClassifier

Bases: Classifier

Ada Boost classification.

Parameters:

Name Type Description Default
learner Classifier | None

The learner from which the boosted ensemble is built.

None
maximum_number_of_learners int

The maximum number of learners at which boosting is terminated. In case of perfect fit, the learning procedure is stopped early. Has to be greater than 0.

50
learning_rate float

Weight applied to each classifier at each boosting iteration. A higher learning rate increases the contribution of each classifier. Has to be greater than 0.

1.0

Raises:

Type Description
OutOfBoundsError

If maximum_number_of_learners or learning_rate are less than or equal to 0.

Source code in src/safeds/ml/classical/classification/_ada_boost.py
class AdaBoostClassifier(Classifier):
    """
    Ada Boost classification.

    Parameters
    ----------
    learner:
        The learner from which the boosted ensemble is built.
    maximum_number_of_learners:
        The maximum number of learners at which boosting is terminated. In case of perfect fit, the learning procedure
        is stopped early. Has to be greater than 0.
    learning_rate:
        Weight applied to each classifier at each boosting iteration. A higher learning rate increases the contribution
        of each classifier. Has to be greater than 0.

    Raises
    ------
    OutOfBoundsError
        If `maximum_number_of_learners` or `learning_rate` are less than or equal to 0.
    """

    def __hash__(self) -> int:
        return _structural_hash(
            Classifier.__hash__(self),
            self._target_name,
            self._feature_names,
            self._learning_rate,
            self._maximum_number_of_learners,
        )

    def __init__(
        self,
        *,
        learner: Classifier | None = None,
        maximum_number_of_learners: int = 50,
        learning_rate: float = 1.0,
    ) -> None:
        # Validation
        if maximum_number_of_learners < 1:
            raise OutOfBoundsError(
                maximum_number_of_learners,
                name="maximum_number_of_learners",
                lower_bound=ClosedBound(1),
            )
        if learning_rate <= 0:
            raise OutOfBoundsError(learning_rate, name="learning_rate", lower_bound=OpenBound(0))

        # Hyperparameters
        self._learner = learner
        self._maximum_number_of_learners = maximum_number_of_learners
        self._learning_rate = learning_rate

        # Internal state
        self._wrapped_classifier: sk_AdaBoostClassifier | None = None
        self._feature_names: list[str] | None = None
        self._target_name: str | None = None

    @property
    def learner(self) -> Classifier | None:
        """
        Get the base learner used for training the ensemble.

        Returns
        -------
        result:
            The base learner.
        """
        return self._learner

    @property
    def maximum_number_of_learners(self) -> int:
        """
        Get the maximum number of learners in the ensemble.

        Returns
        -------
        result:
            The maximum number of learners.
        """
        return self._maximum_number_of_learners

    @property
    def learning_rate(self) -> float:
        """
        Get the learning rate.

        Returns
        -------
        result:
            The learning rate.
        """
        return self._learning_rate

    def fit(self, training_set: TabularDataset | ExperimentalTabularDataset) -> AdaBoostClassifier:
        """
        Create a copy of this classifier and fit it with the given training data.

        This classifier is not modified.

        Parameters
        ----------
        training_set:
            The training data containing the feature and target vectors.

        Returns
        -------
        fitted_classifier:
            The fitted classifier.

        Raises
        ------
        LearningError
            If the training data contains invalid values or if the training failed.
        TypeError
            If a table is passed instead of a tabular dataset.
        NonNumericColumnError
            If the training data contains non-numerical values.
        MissingValuesColumnError
            If the training data contains missing values.
        DatasetMissesDataError
            If the training data contains no rows.
        """
        wrapped_classifier = self._get_sklearn_classifier()
        fit(wrapped_classifier, training_set)

        result = AdaBoostClassifier(
            learner=self.learner,
            maximum_number_of_learners=self.maximum_number_of_learners,
            learning_rate=self._learning_rate,
        )
        result._wrapped_classifier = wrapped_classifier
        result._feature_names = training_set.features.column_names
        result._target_name = training_set.target.name

        return result

    def predict(self, dataset: Table | ExperimentalTable | ExperimentalTabularDataset) -> TabularDataset:
        """
        Predict a target vector using a dataset containing feature vectors. The model has to be trained first.

        Parameters
        ----------
        dataset:
            The dataset containing the feature vectors.

        Returns
        -------
        table:
            A dataset containing the given feature vectors and the predicted target vector.

        Raises
        ------
        ModelNotFittedError
            If the model has not been fitted yet.
        DatasetMissesFeaturesError
            If the dataset misses feature columns.
        PredictionError
            If predicting with the given dataset failed.
        NonNumericColumnError
            If the dataset contains non-numerical values.
        MissingValuesColumnError
            If the dataset contains missing values.
        DatasetMissesDataError
            If the dataset contains no rows.
        """
        return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name)

    @property
    def is_fitted(self) -> bool:
        """Whether the classifier is fitted."""
        return self._wrapped_classifier is not None

    def _get_sklearn_classifier(self) -> ClassifierMixin:
        """
        Return a new wrapped Classifier from sklearn.

        Returns
        -------
        wrapped_classifier:
            The sklearn Classifier.
        """
        from sklearn.ensemble import AdaBoostClassifier as sk_AdaBoostClassifier

        learner = self.learner._get_sklearn_classifier() if self.learner is not None else None
        return sk_AdaBoostClassifier(
            estimator=learner,
            n_estimators=self.maximum_number_of_learners,
            learning_rate=self._learning_rate,
        )

is_fitted: bool property

Whether the classifier is fitted.

learner: Classifier | None property

Get the base learner used for training the ensemble.

Returns:

Name Type Description
result Classifier | None

The base learner.

learning_rate: float property

Get the learning rate.

Returns:

Name Type Description
result float

The learning rate.

maximum_number_of_learners: int property

Get the maximum number of learners in the ensemble.

Returns:

Name Type Description
result int

The maximum number of learners.

fit(training_set)

Create a copy of this classifier and fit it with the given training data.

This classifier is not modified.

Parameters:

Name Type Description Default
training_set TabularDataset | ExperimentalTabularDataset

The training data containing the feature and target vectors.

required

Returns:

Name Type Description
fitted_classifier AdaBoostClassifier

The fitted classifier.

Raises:

Type Description
LearningError

If the training data contains invalid values or if the training failed.

TypeError

If a table is passed instead of a tabular dataset.

NonNumericColumnError

If the training data contains non-numerical values.

MissingValuesColumnError

If the training data contains missing values.

DatasetMissesDataError

If the training data contains no rows.

Source code in src/safeds/ml/classical/classification/_ada_boost.py
def fit(self, training_set: TabularDataset | ExperimentalTabularDataset) -> AdaBoostClassifier:
    """
    Create a copy of this classifier and fit it with the given training data.

    This classifier is not modified.

    Parameters
    ----------
    training_set:
        The training data containing the feature and target vectors.

    Returns
    -------
    fitted_classifier:
        The fitted classifier.

    Raises
    ------
    LearningError
        If the training data contains invalid values or if the training failed.
    TypeError
        If a table is passed instead of a tabular dataset.
    NonNumericColumnError
        If the training data contains non-numerical values.
    MissingValuesColumnError
        If the training data contains missing values.
    DatasetMissesDataError
        If the training data contains no rows.
    """
    wrapped_classifier = self._get_sklearn_classifier()
    fit(wrapped_classifier, training_set)

    result = AdaBoostClassifier(
        learner=self.learner,
        maximum_number_of_learners=self.maximum_number_of_learners,
        learning_rate=self._learning_rate,
    )
    result._wrapped_classifier = wrapped_classifier
    result._feature_names = training_set.features.column_names
    result._target_name = training_set.target.name

    return result

predict(dataset)

Predict a target vector using a dataset containing feature vectors. The model has to be trained first.

Parameters:

Name Type Description Default
dataset Table | ExperimentalTable | ExperimentalTabularDataset

The dataset containing the feature vectors.

required

Returns:

Name Type Description
table TabularDataset

A dataset containing the given feature vectors and the predicted target vector.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

DatasetMissesFeaturesError

If the dataset misses feature columns.

PredictionError

If predicting with the given dataset failed.

NonNumericColumnError

If the dataset contains non-numerical values.

MissingValuesColumnError

If the dataset contains missing values.

DatasetMissesDataError

If the dataset contains no rows.

Source code in src/safeds/ml/classical/classification/_ada_boost.py
def predict(self, dataset: Table | ExperimentalTable | ExperimentalTabularDataset) -> TabularDataset:
    """
    Predict a target vector using a dataset containing feature vectors. The model has to be trained first.

    Parameters
    ----------
    dataset:
        The dataset containing the feature vectors.

    Returns
    -------
    table:
        A dataset containing the given feature vectors and the predicted target vector.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    DatasetMissesFeaturesError
        If the dataset misses feature columns.
    PredictionError
        If predicting with the given dataset failed.
    NonNumericColumnError
        If the dataset contains non-numerical values.
    MissingValuesColumnError
        If the dataset contains missing values.
    DatasetMissesDataError
        If the dataset contains no rows.
    """
    return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name)