ClassificationMetrics

Bases: ABC

A collection of classification metrics.

Methods:

Name	Description
`accuracy`	Compute the accuracy on the given data.
`f1_score`	Compute the F₁ score on the given data.
`precision`	Compute the precision on the given data.
`recall`	Compute the recall on the given data.
`summarize`	Summarize classification metrics on the given data.

Source code in src/safeds/ml/metrics/_classification_metrics.py

class ClassificationMetrics(ABC):
    """A collection of classification metrics."""

    @abstractmethod
    def __init__(self) -> None: ...

    @staticmethod
    def summarize(
        predicted: Column | TabularDataset | TimeSeriesDataset,
        expected: Column | TabularDataset | TimeSeriesDataset,
        positive_class: Any,
    ) -> Table:
        """
        Summarize classification metrics on the given data.

        Parameters
        ----------
        predicted:
            The predicted target values produced by the classifier.
        expected:
            The expected target values.
        positive_class:
            The class to be considered positive. All other classes are considered negative.

        Returns
        -------
        metrics:
            A table containing the classification metrics.
        """
        expected = _extract_target(expected)
        predicted = _extract_target(predicted)
        _check_equal_length(predicted, expected)

        accuracy = ClassificationMetrics.accuracy(predicted, expected)
        precision = ClassificationMetrics.precision(predicted, expected, positive_class)
        recall = ClassificationMetrics.recall(predicted, expected, positive_class)
        f1_score = ClassificationMetrics.f1_score(predicted, expected, positive_class)

        return Table(
            {
                "metric": ["accuracy", "precision", "recall", "f1_score"],
                "value": [accuracy, precision, recall, f1_score],
            },
        )

    @staticmethod
    def accuracy(
        predicted: Column | TabularDataset | TimeSeriesDataset,
        expected: Column | TabularDataset | TimeSeriesDataset,
    ) -> float:
        """
        Compute the accuracy on the given data.

        The accuracy is the proportion of predicted target values that were correct. The **higher** the accuracy, the
        better. Results range from 0.0 to 1.0.

        Parameters
        ----------
        predicted:
            The predicted target values produced by the classifier.
        expected:
            The expected target values.

        Returns
        -------
        accuracy:
            The calculated accuracy.
        """
        expected = _extract_target(expected)
        predicted = _extract_target(predicted)
        _check_equal_length(predicted, expected)

        from polars.exceptions import ComputeError

        if expected.row_count == 0:
            return 1.0  # Everything was predicted correctly (since there is nothing to predict)

        try:
            return expected._series.eq_missing(predicted._series).mean()
        except ComputeError:
            return 0.0  # Types are not compatible, so no prediction can be correct

    @staticmethod
    def f1_score(
        predicted: Column | TabularDataset | TimeSeriesDataset,
        expected: Column | TabularDataset | TimeSeriesDataset,
        positive_class: Any,
    ) -> float:
        """
        Compute the F₁ score on the given data.

        The F₁ score is the harmonic mean of precision and recall. The **higher** the F₁ score, the better the
        classifier. Results range from 0.0 to 1.0.

        Parameters
        ----------
        predicted:
            The predicted target values produced by the classifier.
        expected:
            The expected target values.
        positive_class:
            The class to be considered positive. All other classes are considered negative.

        Returns
        -------
        f1_score:
            The calculated F₁ score.
        """
        predicted = _extract_target(predicted)
        expected = _extract_target(expected)
        _check_equal_length(predicted, expected)

        true_positives = (expected._series.eq(positive_class) & predicted._series.eq(positive_class)).sum()
        false_positives = (expected._series.ne(positive_class) & predicted._series.eq(positive_class)).sum()
        false_negatives = (expected._series.eq(positive_class) & predicted._series.ne(positive_class)).sum()

        if true_positives + false_positives + false_negatives == 0:
            return 1.0  # Only true negatives (so all predictions are correct)

        return 2 * true_positives / (2 * true_positives + false_positives + false_negatives)

    @staticmethod
    def precision(
        predicted: Column | TabularDataset | TimeSeriesDataset,
        expected: Column | TabularDataset | TimeSeriesDataset,
        positive_class: Any,
    ) -> float:
        """
        Compute the precision on the given data.

        The precision is the proportion of positive predictions that were correct. The **higher** the precision, the
        better the classifier. Results range from 0.0 to 1.0.

        Parameters
        ----------
        predicted:
            The predicted target values produced by the classifier.
        expected:
            The expected target values.
        positive_class:
            The class to be considered positive. All other classes are considered negative.

        Returns
        -------
        precision:
            The calculated precision.
        """
        expected = _extract_target(expected)
        predicted = _extract_target(predicted)
        _check_equal_length(predicted, expected)

        true_positives = (expected._series.eq(positive_class) & predicted._series.eq(positive_class)).sum()
        predicted_positives = predicted._series.eq(positive_class).sum()

        if predicted_positives == 0:
            return 1.0  # All positive predictions were correct (since there are none)

        return true_positives / predicted_positives

    @staticmethod
    def recall(
        predicted: Column | TabularDataset | TimeSeriesDataset,
        expected: Column | TabularDataset | TimeSeriesDataset,
        positive_class: Any,
    ) -> float:
        """
        Compute the recall on the given data.

        The recall is the proportion of actual positives that were predicted correctly. The **higher** the recall, the
        better the classifier. Results range from 0.0 to 1.0.

        Parameters
        ----------
        predicted:
            The predicted target values produced by the classifier.
        expected:
            The expected target values.
        positive_class:
            The class to be considered positive. All other classes are considered negative.

        Returns
        -------
        recall:
            The calculated recall.
        """
        expected = _extract_target(expected)
        predicted = _extract_target(predicted)
        _check_equal_length(predicted, expected)

        true_positives = (expected._series.eq(positive_class) & predicted._series.eq(positive_class)).sum()
        actual_positives = expected._series.eq(positive_class).sum()

        if actual_positives == 0:
            return 1.0  # All actual positives were predicted correctly (since there are none)

        return true_positives / actual_positives

`accuracy` ¶

Compute the accuracy on the given data.

The accuracy is the proportion of predicted target values that were correct. The higher the accuracy, the better. Results range from 0.0 to 1.0.

Parameters:

Name	Type	Description	Default
`predicted`	`Column \| TabularDataset \| TimeSeriesDataset`	The predicted target values produced by the classifier.	required
`expected`	`Column \| TabularDataset \| TimeSeriesDataset`	The expected target values.	required

Returns:

Name	Type	Description
`accuracy`	`float`	The calculated accuracy.

Source code in src/safeds/ml/metrics/_classification_metrics.py

@staticmethod
def accuracy(
    predicted: Column | TabularDataset | TimeSeriesDataset,
    expected: Column | TabularDataset | TimeSeriesDataset,
) -> float:
    """
    Compute the accuracy on the given data.

    The accuracy is the proportion of predicted target values that were correct. The **higher** the accuracy, the
    better. Results range from 0.0 to 1.0.

    Parameters
    ----------
    predicted:
        The predicted target values produced by the classifier.
    expected:
        The expected target values.

    Returns
    -------
    accuracy:
        The calculated accuracy.
    """
    expected = _extract_target(expected)
    predicted = _extract_target(predicted)
    _check_equal_length(predicted, expected)

    from polars.exceptions import ComputeError

    if expected.row_count == 0:
        return 1.0  # Everything was predicted correctly (since there is nothing to predict)

    try:
        return expected._series.eq_missing(predicted._series).mean()
    except ComputeError:
        return 0.0  # Types are not compatible, so no prediction can be correct

`f1_score` ¶

Compute the F₁ score on the given data.

The F₁ score is the harmonic mean of precision and recall. The higher the F₁ score, the better the classifier. Results range from 0.0 to 1.0.

Parameters:

Name	Type	Description	Default
`predicted`	`Column \| TabularDataset \| TimeSeriesDataset`	The predicted target values produced by the classifier.	required
`expected`	`Column \| TabularDataset \| TimeSeriesDataset`	The expected target values.	required
`positive_class`	`Any`	The class to be considered positive. All other classes are considered negative.	required

Returns:

Name	Type	Description
`f1_score`	`float`	The calculated F₁ score.

Source code in src/safeds/ml/metrics/_classification_metrics.py

@staticmethod
def f1_score(
    predicted: Column | TabularDataset | TimeSeriesDataset,
    expected: Column | TabularDataset | TimeSeriesDataset,
    positive_class: Any,
) -> float:
    """
    Compute the F₁ score on the given data.

    The F₁ score is the harmonic mean of precision and recall. The **higher** the F₁ score, the better the
    classifier. Results range from 0.0 to 1.0.

    Parameters
    ----------
    predicted:
        The predicted target values produced by the classifier.
    expected:
        The expected target values.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    f1_score:
        The calculated F₁ score.
    """
    predicted = _extract_target(predicted)
    expected = _extract_target(expected)
    _check_equal_length(predicted, expected)

    true_positives = (expected._series.eq(positive_class) & predicted._series.eq(positive_class)).sum()
    false_positives = (expected._series.ne(positive_class) & predicted._series.eq(positive_class)).sum()
    false_negatives = (expected._series.eq(positive_class) & predicted._series.ne(positive_class)).sum()

    if true_positives + false_positives + false_negatives == 0:
        return 1.0  # Only true negatives (so all predictions are correct)

    return 2 * true_positives / (2 * true_positives + false_positives + false_negatives)

`precision` ¶

Compute the precision on the given data.

The precision is the proportion of positive predictions that were correct. The higher the precision, the better the classifier. Results range from 0.0 to 1.0.

Parameters:

Name	Type	Description	Default
`predicted`	`Column \| TabularDataset \| TimeSeriesDataset`	The predicted target values produced by the classifier.	required
`expected`	`Column \| TabularDataset \| TimeSeriesDataset`	The expected target values.	required
`positive_class`	`Any`	The class to be considered positive. All other classes are considered negative.	required

Returns:

Name	Type	Description
`precision`	`float`	The calculated precision.

Source code in src/safeds/ml/metrics/_classification_metrics.py

@staticmethod
def precision(
    predicted: Column | TabularDataset | TimeSeriesDataset,
    expected: Column | TabularDataset | TimeSeriesDataset,
    positive_class: Any,
) -> float:
    """
    Compute the precision on the given data.

    The precision is the proportion of positive predictions that were correct. The **higher** the precision, the
    better the classifier. Results range from 0.0 to 1.0.

    Parameters
    ----------
    predicted:
        The predicted target values produced by the classifier.
    expected:
        The expected target values.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    precision:
        The calculated precision.
    """
    expected = _extract_target(expected)
    predicted = _extract_target(predicted)
    _check_equal_length(predicted, expected)

    true_positives = (expected._series.eq(positive_class) & predicted._series.eq(positive_class)).sum()
    predicted_positives = predicted._series.eq(positive_class).sum()

    if predicted_positives == 0:
        return 1.0  # All positive predictions were correct (since there are none)

    return true_positives / predicted_positives

`recall` ¶

Compute the recall on the given data.

The recall is the proportion of actual positives that were predicted correctly. The higher the recall, the better the classifier. Results range from 0.0 to 1.0.

Parameters:

Name	Type	Description	Default
`predicted`	`Column \| TabularDataset \| TimeSeriesDataset`	The predicted target values produced by the classifier.	required
`expected`	`Column \| TabularDataset \| TimeSeriesDataset`	The expected target values.	required
`positive_class`	`Any`	The class to be considered positive. All other classes are considered negative.	required

Returns:

Name	Type	Description
`recall`	`float`	The calculated recall.

Source code in src/safeds/ml/metrics/_classification_metrics.py

@staticmethod
def recall(
    predicted: Column | TabularDataset | TimeSeriesDataset,
    expected: Column | TabularDataset | TimeSeriesDataset,
    positive_class: Any,
) -> float:
    """
    Compute the recall on the given data.

    The recall is the proportion of actual positives that were predicted correctly. The **higher** the recall, the
    better the classifier. Results range from 0.0 to 1.0.

    Parameters
    ----------
    predicted:
        The predicted target values produced by the classifier.
    expected:
        The expected target values.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    recall:
        The calculated recall.
    """
    expected = _extract_target(expected)
    predicted = _extract_target(predicted)
    _check_equal_length(predicted, expected)

    true_positives = (expected._series.eq(positive_class) & predicted._series.eq(positive_class)).sum()
    actual_positives = expected._series.eq(positive_class).sum()

    if actual_positives == 0:
        return 1.0  # All actual positives were predicted correctly (since there are none)

    return true_positives / actual_positives

`summarize` ¶

Summarize classification metrics on the given data.

Parameters:

Name	Type	Description	Default
`predicted`	`Column \| TabularDataset \| TimeSeriesDataset`	The predicted target values produced by the classifier.	required
`expected`	`Column \| TabularDataset \| TimeSeriesDataset`	The expected target values.	required
`positive_class`	`Any`	The class to be considered positive. All other classes are considered negative.	required

Returns:

Name	Type	Description
`metrics`	`Table`	A table containing the classification metrics.

Source code in src/safeds/ml/metrics/_classification_metrics.py

@staticmethod
def summarize(
    predicted: Column | TabularDataset | TimeSeriesDataset,
    expected: Column | TabularDataset | TimeSeriesDataset,
    positive_class: Any,
) -> Table:
    """
    Summarize classification metrics on the given data.

    Parameters
    ----------
    predicted:
        The predicted target values produced by the classifier.
    expected:
        The expected target values.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    metrics:
        A table containing the classification metrics.
    """
    expected = _extract_target(expected)
    predicted = _extract_target(predicted)
    _check_equal_length(predicted, expected)

    accuracy = ClassificationMetrics.accuracy(predicted, expected)
    precision = ClassificationMetrics.precision(predicted, expected, positive_class)
    recall = ClassificationMetrics.recall(predicted, expected, positive_class)
    f1_score = ClassificationMetrics.f1_score(predicted, expected, positive_class)

    return Table(
        {
            "metric": ["accuracy", "precision", "recall", "f1_score"],
            "value": [accuracy, precision, recall, f1_score],
        },
    )

ClassificationMetrics

accuracy ¶

f1_score ¶

precision ¶

recall ¶

summarize ¶

`accuracy` ¶

`f1_score` ¶

`precision` ¶

`recall` ¶

`summarize` ¶