Skip to content

Classifier

Bases: SupervisedModel, ABC

A model for classification tasks.

Source code in src/safeds/ml/classical/classification/_classifier.py
class Classifier(SupervisedModel, ABC):
    """A model for classification tasks."""

    # ------------------------------------------------------------------------------------------------------------------
    # Metrics
    # ------------------------------------------------------------------------------------------------------------------

    def summarize_metrics(
        self,
        validation_or_test_set: Table | TabularDataset,
        positive_class: Any,
    ) -> Table:
        """
        Summarize the classifier's metrics on the given data.

        **Note:** The model must be fitted.

        Parameters
        ----------
        validation_or_test_set:
            The validation or test set.
        positive_class:
            The class to be considered positive. All other classes are considered negative.

        Returns
        -------
        metrics:
            A table containing the classifier's metrics.

        Raises
        ------
        ModelNotFittedError
            If the classifier has not been fitted yet.
        """
        if not self.is_fitted:
            raise ModelNotFittedError

        validation_or_test_set = _extract_table(validation_or_test_set)

        return ClassificationMetrics.summarize(
            self.predict(validation_or_test_set),
            validation_or_test_set.get_column(self.get_target_name()),
            positive_class,
        )

    def accuracy(self, validation_or_test_set: Table | TabularDataset) -> float:
        """
        Compute the accuracy of the classifier on the given data.

        The accuracy is the proportion of predicted target values that were correct. The **higher** the accuracy, the
        better. Results range from 0.0 to 1.0.

        **Note:** The model must be fitted.

        Parameters
        ----------
        validation_or_test_set:
            The validation or test set.

        Returns
        -------
        accuracy:
            The classifier's accuracy.

        Raises
        ------
        ModelNotFittedError
            If the classifier has not been fitted yet.
        """
        if not self.is_fitted:
            raise ModelNotFittedError

        validation_or_test_set = _extract_table(validation_or_test_set)

        return ClassificationMetrics.accuracy(
            self.predict(validation_or_test_set),
            validation_or_test_set.get_column(self.get_target_name()),
        )

    def f1_score(
        self,
        validation_or_test_set: Table | TabularDataset,
        positive_class: Any,
    ) -> float:
        """
        Compute the classifier's F₁ score on the given data.

        The F₁ score is the harmonic mean of precision and recall. The **higher** the F₁ score, the better the
        classifier. Results range from 0.0 to 1.0.

        **Note:** The model must be fitted.

        Parameters
        ----------
        validation_or_test_set:
            The validation or test set.
        positive_class:
            The class to be considered positive. All other classes are considered negative.

        Returns
        -------
        f1_score:
            The classifier's F₁ score.

        Raises
        ------
        ModelNotFittedError
            If the classifier has not been fitted yet.
        """
        if not self.is_fitted:
            raise ModelNotFittedError

        validation_or_test_set = _extract_table(validation_or_test_set)

        return ClassificationMetrics.f1_score(
            self.predict(validation_or_test_set),
            validation_or_test_set.get_column(self.get_target_name()),
            positive_class,
        )

    def precision(
        self,
        validation_or_test_set: Table | TabularDataset,
        positive_class: Any,
    ) -> float:
        """
        Compute the classifier's precision on the given data.

        The precision is the proportion of positive predictions that were correct. The **higher** the precision, the
        better the classifier. Results range from 0.0 to 1.0.

        **Note:** The model must be fitted.

        Parameters
        ----------
        validation_or_test_set:
            The validation or test set.
        positive_class:
            The class to be considered positive. All other classes are considered negative.

        Returns
        -------
        precision:
            The classifier's precision.

        Raises
        ------
        ModelNotFittedError
            If the classifier has not been fitted yet.
        """
        if not self.is_fitted:
            raise ModelNotFittedError

        validation_or_test_set = _extract_table(validation_or_test_set)

        return ClassificationMetrics.precision(
            self.predict(validation_or_test_set),
            validation_or_test_set.get_column(self.get_target_name()),
            positive_class,
        )

    def recall(self, validation_or_test_set: Table | TabularDataset, positive_class: Any) -> float:
        """
        Compute the classifier's recall on the given data.

        The recall is the proportion of actual positives that were predicted correctly. The **higher** the recall, the
        better the classifier. Results range from 0.0 to 1.0.

        **Note:** The model must be fitted.

        Parameters
        ----------
        validation_or_test_set:
            The validation or test set.
        positive_class:
            The class to be considered positive. All other classes are considered negative.

        Returns
        -------
        recall:
            The classifier's recall.

        Raises
        ------
        ModelNotFittedError
            If the classifier has not been fitted yet.
        """
        if not self.is_fitted:
            raise ModelNotFittedError

        validation_or_test_set = _extract_table(validation_or_test_set)

        return ClassificationMetrics.recall(
            self.predict(validation_or_test_set),
            validation_or_test_set.get_column(self.get_target_name()),
            positive_class,
        )

is_fitted: bool

Whether the model is fitted.

accuracy

Compute the accuracy of the classifier on the given data.

The accuracy is the proportion of predicted target values that were correct. The higher the accuracy, the better. Results range from 0.0 to 1.0.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required

Returns:

Name Type Description
accuracy float

The classifier's accuracy.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def accuracy(self, validation_or_test_set: Table | TabularDataset) -> float:
    """
    Compute the accuracy of the classifier on the given data.

    The accuracy is the proportion of predicted target values that were correct. The **higher** the accuracy, the
    better. Results range from 0.0 to 1.0.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.

    Returns
    -------
    accuracy:
        The classifier's accuracy.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.accuracy(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
    )

f1_score

Compute the classifier's F₁ score on the given data.

The F₁ score is the harmonic mean of precision and recall. The higher the F₁ score, the better the classifier. Results range from 0.0 to 1.0.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required
positive_class Any

The class to be considered positive. All other classes are considered negative.

required

Returns:

Name Type Description
f1_score float

The classifier's F₁ score.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def f1_score(
    self,
    validation_or_test_set: Table | TabularDataset,
    positive_class: Any,
) -> float:
    """
    Compute the classifier's F₁ score on the given data.

    The F₁ score is the harmonic mean of precision and recall. The **higher** the F₁ score, the better the
    classifier. Results range from 0.0 to 1.0.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    f1_score:
        The classifier's F₁ score.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.f1_score(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
        positive_class,
    )

fit

Create a copy of this model and fit it with the given training data.

Note: This model is not modified.

Parameters:

Name Type Description Default
training_set TabularDataset

The training data containing the features and target.

required

Returns:

Name Type Description
fitted_model Self

The fitted model.

Raises:

Type Description
LearningError

If the training data contains invalid values or if the training failed.

Source code in src/safeds/ml/classical/_supervised_model.py
def fit(self, training_set: TabularDataset) -> Self:
    """
    Create a copy of this model and fit it with the given training data.

    **Note:** This model is not modified.

    Parameters
    ----------
    training_set:
        The training data containing the features and target.

    Returns
    -------
    fitted_model:
        The fitted model.

    Raises
    ------
    LearningError
        If the training data contains invalid values or if the training failed.
    """
    if not isinstance(training_set, TabularDataset) and isinstance(training_set, Table):
        raise PlainTableError
    if training_set.to_table().number_of_rows == 0:
        raise DatasetMissesDataError

    self._check_additional_fit_preconditions(training_set)

    wrapped_model = self._get_sklearn_model()
    _fit_sklearn_model_in_place(wrapped_model, training_set)

    result = self._clone()
    result._feature_schema = training_set.features.schema
    result._target_name = training_set.target.name
    result._target_type = training_set.target.type
    result._wrapped_model = wrapped_model

    return result

get_feature_names

Return the names of the feature columns.

Note: The model must be fitted.

Returns:

Name Type Description
feature_names list[str]

The names of the feature columns.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_feature_names(self) -> list[str]:
    """
    Return the names of the feature columns.

    **Note:** The model must be fitted.

    Returns
    -------
    feature_names:
        The names of the feature columns.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._feature_schema is None:
        raise ModelNotFittedError

    return self._feature_schema.column_names

get_features_schema

Return the schema of the feature columns.

Note: The model must be fitted.

Returns:

Name Type Description
feature_schema Schema

The schema of the feature columns.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_features_schema(self) -> Schema:
    """
    Return the schema of the feature columns.

    **Note:** The model must be fitted.

    Returns
    -------
    feature_schema:
        The schema of the feature columns.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._feature_schema is None:
        raise ModelNotFittedError

    return self._feature_schema

get_target_name

Return the name of the target column.

Note: The model must be fitted.

Returns:

Name Type Description
target_name str

The name of the target column.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_target_name(self) -> str:
    """
    Return the name of the target column.

    **Note:** The model must be fitted.

    Returns
    -------
    target_name:
        The name of the target column.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._target_name is None:
        raise ModelNotFittedError

    return self._target_name

get_target_type

Return the type of the target column.

Note: The model must be fitted.

Returns:

Name Type Description
target_type DataType

The type of the target column.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_target_type(self) -> DataType:
    """
    Return the type of the target column.

    **Note:** The model must be fitted.

    Returns
    -------
    target_type:
        The type of the target column.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._target_type is None:
        raise ModelNotFittedError

    return self._target_type

precision

Compute the classifier's precision on the given data.

The precision is the proportion of positive predictions that were correct. The higher the precision, the better the classifier. Results range from 0.0 to 1.0.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required
positive_class Any

The class to be considered positive. All other classes are considered negative.

required

Returns:

Name Type Description
precision float

The classifier's precision.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def precision(
    self,
    validation_or_test_set: Table | TabularDataset,
    positive_class: Any,
) -> float:
    """
    Compute the classifier's precision on the given data.

    The precision is the proportion of positive predictions that were correct. The **higher** the precision, the
    better the classifier. Results range from 0.0 to 1.0.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    precision:
        The classifier's precision.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.precision(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
        positive_class,
    )

predict

Predict the target values on the given dataset.

Note: The model must be fitted.

Parameters:

Name Type Description Default
dataset Table | TabularDataset

The dataset containing at least the features.

required

Returns:

Name Type Description
prediction TabularDataset

The given dataset with an additional column for the predicted target values.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

DatasetMissesFeaturesError

If the dataset misses feature columns.

PredictionError

If predicting with the given dataset failed.

Source code in src/safeds/ml/classical/_supervised_model.py
def predict(
    self,
    dataset: Table | TabularDataset,
) -> TabularDataset:
    """
    Predict the target values on the given dataset.

    **Note:** The model must be fitted.

    Parameters
    ----------
    dataset:
        The dataset containing at least the features.

    Returns
    -------
    prediction:
        The given dataset with an additional column for the predicted target values.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    DatasetMissesFeaturesError
        If the dataset misses feature columns.
    PredictionError
        If predicting with the given dataset failed.
    """
    self._check_additional_predict_preconditions(dataset)

    return _predict_with_sklearn_model(
        self._wrapped_model,
        dataset,
        self.get_feature_names(),
        self.get_target_name(),
    )

recall

Compute the classifier's recall on the given data.

The recall is the proportion of actual positives that were predicted correctly. The higher the recall, the better the classifier. Results range from 0.0 to 1.0.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required
positive_class Any

The class to be considered positive. All other classes are considered negative.

required

Returns:

Name Type Description
recall float

The classifier's recall.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def recall(self, validation_or_test_set: Table | TabularDataset, positive_class: Any) -> float:
    """
    Compute the classifier's recall on the given data.

    The recall is the proportion of actual positives that were predicted correctly. The **higher** the recall, the
    better the classifier. Results range from 0.0 to 1.0.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    recall:
        The classifier's recall.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.recall(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
        positive_class,
    )

summarize_metrics

Summarize the classifier's metrics on the given data.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required
positive_class Any

The class to be considered positive. All other classes are considered negative.

required

Returns:

Name Type Description
metrics Table

A table containing the classifier's metrics.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def summarize_metrics(
    self,
    validation_or_test_set: Table | TabularDataset,
    positive_class: Any,
) -> Table:
    """
    Summarize the classifier's metrics on the given data.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    metrics:
        A table containing the classifier's metrics.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.summarize(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
        positive_class,
    )