Skip to content

RandomForestClassifier

Bases: Classifier

Random forest classification.

Parameters:

Name Type Description Default
number_of_trees int

The number of trees to be used in the random forest. Has to be greater than 0.

100

Raises:

Type Description
OutOfBoundsError

If number_of_trees is less than 1.

Source code in src/safeds/ml/classical/classification/_random_forest.py
class RandomForestClassifier(Classifier):
    """Random forest classification.

    Parameters
    ----------
    number_of_trees : int
        The number of trees to be used in the random forest. Has to be greater than 0.

    Raises
    ------
    OutOfBoundsError
        If `number_of_trees` is less than 1.
    """

    def __hash__(self) -> int:
        return _structural_hash(
            Classifier.__hash__(self),
            self._target_name,
            self._feature_names,
            self._number_of_trees,
        )

    def __init__(self, *, number_of_trees: int = 100) -> None:
        # Validation
        if number_of_trees < 1:
            raise OutOfBoundsError(number_of_trees, name="number_of_trees", lower_bound=ClosedBound(1))

        # Hyperparameters
        self._number_of_trees = number_of_trees

        # Internal state
        self._wrapped_classifier: sk_RandomForestClassifier | None = None
        self._feature_names: list[str] | None = None
        self._target_name: str | None = None

    @property
    def number_of_trees(self) -> int:
        """
        Get the number of trees used in the random forest.

        Returns
        -------
        result: int
            The number of trees.
        """
        return self._number_of_trees

    def fit(self, training_set: TaggedTable) -> RandomForestClassifier:
        """
        Create a copy of this classifier and fit it with the given training data.

        This classifier is not modified.

        Parameters
        ----------
        training_set : TaggedTable
            The training data containing the feature and target vectors.

        Returns
        -------
        fitted_classifier : RandomForestClassifier
            The fitted classifier.

        Raises
        ------
        LearningError
            If the training data contains invalid values or if the training failed.
        UntaggedTableError
            If the table is untagged.
        NonNumericColumnError
            If the training data contains non-numerical values.
        MissingValuesColumnError
            If the training data contains missing values.
        DatasetMissesDataError
            If the training data contains no rows.
        """
        wrapped_classifier = self._get_sklearn_classifier()
        fit(wrapped_classifier, training_set)

        result = RandomForestClassifier(number_of_trees=self._number_of_trees)
        result._wrapped_classifier = wrapped_classifier
        result._feature_names = training_set.features.column_names
        result._target_name = training_set.target.name

        return result

    def predict(self, dataset: Table) -> TaggedTable:
        """
        Predict a target vector using a dataset containing feature vectors. The model has to be trained first.

        Parameters
        ----------
        dataset : Table
            The dataset containing the feature vectors.

        Returns
        -------
        table : TaggedTable
            A dataset containing the given feature vectors and the predicted target vector.

        Raises
        ------
        ModelNotFittedError
            If the model has not been fitted yet.
        DatasetContainsTargetError
            If the dataset contains the target column already.
        DatasetMissesFeaturesError
            If the dataset misses feature columns.
        PredictionError
            If predicting with the given dataset failed.
        NonNumericColumnError
            If the dataset contains non-numerical values.
        MissingValuesColumnError
            If the dataset contains missing values.
        DatasetMissesDataError
            If the dataset contains no rows.
        """
        return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name)

    def is_fitted(self) -> bool:
        """
        Check if the classifier is fitted.

        Returns
        -------
        is_fitted : bool
            Whether the classifier is fitted.
        """
        return self._wrapped_classifier is not None

    def _get_sklearn_classifier(self) -> ClassifierMixin:
        """
        Return a new wrapped Classifier from sklearn.

        Returns
        -------
        wrapped_classifier: ClassifierMixin
            The sklearn Classifier.
        """
        from sklearn.ensemble import RandomForestClassifier as sk_RandomForestClassifier

        return sk_RandomForestClassifier(self._number_of_trees, n_jobs=-1)

number_of_trees: int property

Get the number of trees used in the random forest.

Returns:

Name Type Description
result int

The number of trees.

__hash__()

Source code in src/safeds/ml/classical/classification/_random_forest.py
def __hash__(self) -> int:
    return _structural_hash(
        Classifier.__hash__(self),
        self._target_name,
        self._feature_names,
        self._number_of_trees,
    )

__init__(*, number_of_trees=100)

Source code in src/safeds/ml/classical/classification/_random_forest.py
def __init__(self, *, number_of_trees: int = 100) -> None:
    # Validation
    if number_of_trees < 1:
        raise OutOfBoundsError(number_of_trees, name="number_of_trees", lower_bound=ClosedBound(1))

    # Hyperparameters
    self._number_of_trees = number_of_trees

    # Internal state
    self._wrapped_classifier: sk_RandomForestClassifier | None = None
    self._feature_names: list[str] | None = None
    self._target_name: str | None = None

fit(training_set)

Create a copy of this classifier and fit it with the given training data.

This classifier is not modified.

Parameters:

Name Type Description Default
training_set TaggedTable

The training data containing the feature and target vectors.

required

Returns:

Name Type Description
fitted_classifier RandomForestClassifier

The fitted classifier.

Raises:

Type Description
LearningError

If the training data contains invalid values or if the training failed.

UntaggedTableError

If the table is untagged.

NonNumericColumnError

If the training data contains non-numerical values.

MissingValuesColumnError

If the training data contains missing values.

DatasetMissesDataError

If the training data contains no rows.

Source code in src/safeds/ml/classical/classification/_random_forest.py
def fit(self, training_set: TaggedTable) -> RandomForestClassifier:
    """
    Create a copy of this classifier and fit it with the given training data.

    This classifier is not modified.

    Parameters
    ----------
    training_set : TaggedTable
        The training data containing the feature and target vectors.

    Returns
    -------
    fitted_classifier : RandomForestClassifier
        The fitted classifier.

    Raises
    ------
    LearningError
        If the training data contains invalid values or if the training failed.
    UntaggedTableError
        If the table is untagged.
    NonNumericColumnError
        If the training data contains non-numerical values.
    MissingValuesColumnError
        If the training data contains missing values.
    DatasetMissesDataError
        If the training data contains no rows.
    """
    wrapped_classifier = self._get_sklearn_classifier()
    fit(wrapped_classifier, training_set)

    result = RandomForestClassifier(number_of_trees=self._number_of_trees)
    result._wrapped_classifier = wrapped_classifier
    result._feature_names = training_set.features.column_names
    result._target_name = training_set.target.name

    return result

is_fitted()

Check if the classifier is fitted.

Returns:

Name Type Description
is_fitted bool

Whether the classifier is fitted.

Source code in src/safeds/ml/classical/classification/_random_forest.py
def is_fitted(self) -> bool:
    """
    Check if the classifier is fitted.

    Returns
    -------
    is_fitted : bool
        Whether the classifier is fitted.
    """
    return self._wrapped_classifier is not None

predict(dataset)

Predict a target vector using a dataset containing feature vectors. The model has to be trained first.

Parameters:

Name Type Description Default
dataset Table

The dataset containing the feature vectors.

required

Returns:

Name Type Description
table TaggedTable

A dataset containing the given feature vectors and the predicted target vector.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

DatasetContainsTargetError

If the dataset contains the target column already.

DatasetMissesFeaturesError

If the dataset misses feature columns.

PredictionError

If predicting with the given dataset failed.

NonNumericColumnError

If the dataset contains non-numerical values.

MissingValuesColumnError

If the dataset contains missing values.

DatasetMissesDataError

If the dataset contains no rows.

Source code in src/safeds/ml/classical/classification/_random_forest.py
def predict(self, dataset: Table) -> TaggedTable:
    """
    Predict a target vector using a dataset containing feature vectors. The model has to be trained first.

    Parameters
    ----------
    dataset : Table
        The dataset containing the feature vectors.

    Returns
    -------
    table : TaggedTable
        A dataset containing the given feature vectors and the predicted target vector.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    DatasetContainsTargetError
        If the dataset contains the target column already.
    DatasetMissesFeaturesError
        If the dataset misses feature columns.
    PredictionError
        If predicting with the given dataset failed.
    NonNumericColumnError
        If the dataset contains non-numerical values.
    MissingValuesColumnError
        If the dataset contains missing values.
    DatasetMissesDataError
        If the dataset contains no rows.
    """
    return predict(self._wrapped_classifier, dataset, self._feature_names, self._target_name)