Skip to content

KNearestNeighborsClassifier

Bases: Classifier, _KNearestNeighborsBase

K-nearest-neighbors classification.

Parameters:

Name Type Description Default
neighbor_count int | Choice[int]

The number of neighbors to use for interpolation. Has to be greater than 0 (validated in the constructor) and less than or equal to the sample size (validated when calling fit).

required

Raises:

Type Description
OutOfBoundsError

If neighbor_count is less than 1.

Source code in src/safeds/ml/classical/classification/_k_nearest_neighbors_classifier.py
class KNearestNeighborsClassifier(Classifier, _KNearestNeighborsBase):
    """
    K-nearest-neighbors classification.

    Parameters
    ----------
    neighbor_count:
        The number of neighbors to use for interpolation. Has to be greater than 0 (validated in the constructor) and
        less than or equal to the sample size (validated when calling `fit`).

    Raises
    ------
    OutOfBoundsError
        If `neighbor_count` is less than 1.
    """

    # ------------------------------------------------------------------------------------------------------------------
    # Dunder methods
    # ------------------------------------------------------------------------------------------------------------------

    def __init__(
        self,
        neighbor_count: int | Choice[int],
    ) -> None:
        # Initialize superclasses
        Classifier.__init__(self)
        _KNearestNeighborsBase.__init__(
            self,
            neighbor_count=neighbor_count,
        )

    def __hash__(self) -> int:
        return _structural_hash(
            Classifier.__hash__(self),
            _KNearestNeighborsBase.__hash__(self),
        )

    # ------------------------------------------------------------------------------------------------------------------
    # Template methods
    # ------------------------------------------------------------------------------------------------------------------

    def _clone(self) -> KNearestNeighborsClassifier:
        return KNearestNeighborsClassifier(
            neighbor_count=self._neighbor_count,
        )

    def _get_sklearn_model(self) -> ClassifierMixin:
        from sklearn.neighbors import KNeighborsClassifier as SklearnKNeighborsClassifier

        return SklearnKNeighborsClassifier(
            n_neighbors=self._neighbor_count,
            n_jobs=-1,
        )

    def _check_more_additional_fit_preconditions(self, training_set: TabularDataset) -> None:
        if isinstance(self._neighbor_count, Choice):
            raise FittingWithChoiceError
        if self._neighbor_count > training_set._table.row_count:
            raise ValueError(
                (
                    f"The parameter 'neighbor_count' ({self._neighbor_count}) has to be less than or equal to"
                    f" the sample size ({training_set._table.row_count})."
                ),
            )

    def _check_additional_fit_by_exhaustive_search_preconditions(self) -> None:
        if not isinstance(self._neighbor_count, Choice):
            raise FittingWithoutChoiceError

    def _get_models_for_all_choices(self) -> list[KNearestNeighborsClassifier]:
        assert isinstance(self._neighbor_count, Choice)  # this is always true and just here for linting
        models = []
        for nc in self._neighbor_count:
            models.append(KNearestNeighborsClassifier(neighbor_count=nc))
        return models

is_fitted: bool

Whether the model is fitted.

neighbor_count: int | Choice[int]

The number of neighbors used for interpolation.

accuracy

Compute the accuracy of the classifier on the given data.

The accuracy is the proportion of predicted target values that were correct. The higher the accuracy, the better. Results range from 0.0 to 1.0.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required

Returns:

Name Type Description
accuracy float

The classifier's accuracy.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def accuracy(self, validation_or_test_set: Table | TabularDataset) -> float:
    """
    Compute the accuracy of the classifier on the given data.

    The accuracy is the proportion of predicted target values that were correct. The **higher** the accuracy, the
    better. Results range from 0.0 to 1.0.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.

    Returns
    -------
    accuracy:
        The classifier's accuracy.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.accuracy(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
    )

f1_score

Compute the classifier's F₁ score on the given data.

The F₁ score is the harmonic mean of precision and recall. The higher the F₁ score, the better the classifier. Results range from 0.0 to 1.0.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required
positive_class Any

The class to be considered positive. All other classes are considered negative.

required

Returns:

Name Type Description
f1_score float

The classifier's F₁ score.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def f1_score(
    self,
    validation_or_test_set: Table | TabularDataset,
    positive_class: Any,
) -> float:
    """
    Compute the classifier's F₁ score on the given data.

    The F₁ score is the harmonic mean of precision and recall. The **higher** the F₁ score, the better the
    classifier. Results range from 0.0 to 1.0.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    f1_score:
        The classifier's F₁ score.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.f1_score(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
        positive_class,
    )

fit

Create a copy of this model and fit it with the given training data.

Note: This model is not modified.

Parameters:

Name Type Description Default
training_set TabularDataset

The training data containing the features and target.

required

Returns:

Name Type Description
fitted_model Self

The fitted model.

Raises:

Type Description
PlainTableError

If a table is passed instead of a TabularDataset.

DatasetMissesDataError

If the given training set contains no data.

FittingWithChoiceError

When trying to call this method on a model with hyperparameter choices.

LearningError

If the training data contains invalid values or if the training failed.

Source code in src/safeds/ml/classical/_supervised_model.py
def fit(self, training_set: TabularDataset) -> Self:
    """
    Create a copy of this model and fit it with the given training data.

    **Note:** This model is not modified.

    Parameters
    ----------
    training_set:
        The training data containing the features and target.

    Returns
    -------
    fitted_model:
        The fitted model.

    Raises
    ------
    PlainTableError
        If a table is passed instead of a TabularDataset.
    DatasetMissesDataError
        If the given training set contains no data.
    FittingWithChoiceError
        When trying to call this method on a model with hyperparameter choices.
    LearningError
        If the training data contains invalid values or if the training failed.
    """
    if not isinstance(training_set, TabularDataset) and isinstance(training_set, Table):
        raise PlainTableError
    if training_set.to_table().row_count == 0:
        raise DatasetMissesDataError

    self._check_additional_fit_preconditions()
    self._check_more_additional_fit_preconditions(training_set)

    wrapped_model = self._get_sklearn_model()
    _fit_sklearn_model_in_place(wrapped_model, training_set)

    result = self._clone()
    result._feature_schema = training_set.features.schema
    result._target_name = training_set.target.name
    result._target_type = training_set.target.type
    result._wrapped_model = wrapped_model

    return result

Use the hyperparameter choices to create multiple models and fit them.

Note: This model is not modified.

Parameters:

Name Type Description Default
training_set TabularDataset

The training data containing the features and target.

required
optimization_metric ClassifierMetric

The metric that should be used for determining the performance of a model.

required
positive_class Any

The class to be considered positive. All other classes are considered negative. Needs to be provided when choosing precision, f1score or recall as optimization metric.

None

Returns:

Name Type Description
best_model Self

The model that performed the best out of all possible models given the Choices of hyperparameters.

Raises:

Type Description
PlainTableError

If a table is passed instead of a TabularDataset.

DatasetMissesDataError

If the given training set contains no data.

FittingWithoutChoiceError

When trying to call this method on a model without hyperparameter choices.

LearningError

If the training data contains invalid values or if the training failed.

Source code in src/safeds/ml/classical/classification/_classifier.py
def fit_by_exhaustive_search(
    self,
    training_set: TabularDataset,
    optimization_metric: ClassifierMetric,
    positive_class: Any = None,
) -> Self:
    """
    Use the hyperparameter choices to create multiple models and fit them.

    **Note:** This model is not modified.

    Parameters
    ----------
    training_set:
        The training data containing the features and target.
    optimization_metric:
        The metric that should be used for determining the performance of a model.
    positive_class:
        The class to be considered positive. All other classes are considered negative.
        Needs to be provided when choosing precision, f1score or recall as optimization metric.

    Returns
    -------
    best_model:
        The model that performed the best out of all possible models given the Choices of hyperparameters.

    Raises
    ------
    PlainTableError
        If a table is passed instead of a TabularDataset.
    DatasetMissesDataError
        If the given training set contains no data.
    FittingWithoutChoiceError
        When trying to call this method on a model without hyperparameter choices.
    LearningError
        If the training data contains invalid values or if the training failed.
    """
    if training_set.to_table().row_count == 0:
        raise DatasetMissesDataError
    if optimization_metric.value in {"precision", "recall", "f1score"} and positive_class is None:
        raise LearningError(
            f"Please provide a positive class when using optimization metric '{optimization_metric.value}'",
        )

    self._check_additional_fit_by_exhaustive_search_preconditions()

    [train_split, test_split] = training_set.to_table().split_rows(0.75)
    train_data = train_split.to_tabular_dataset(
        target_name=training_set.target.name,
        extra_names=training_set.extras.column_names,
    )
    test_data = test_split.to_tabular_dataset(
        target_name=training_set.target.name,
        extra_names=training_set.extras.column_names,
    )

    list_of_models = self._get_models_for_all_choices()
    list_of_fitted_models = []

    with ProcessPoolExecutor(max_workers=len(list_of_models), mp_context=mp.get_context("spawn")) as executor:
        futures = []
        for model in list_of_models:
            futures.append(executor.submit(model.fit, train_data))
        [done, _] = wait(futures, return_when=ALL_COMPLETED)
        for future in done:
            list_of_fitted_models.append(future.result())
    executor.shutdown()
    best_model = None
    best_metric_value = None
    for fitted_model in list_of_fitted_models:
        if best_model is None:
            best_model = fitted_model
            match optimization_metric.value:
                case "accuracy":
                    best_metric_value = fitted_model.accuracy(test_data)
                case "precision":
                    best_metric_value = fitted_model.precision(test_data, positive_class)
                case "recall":
                    best_metric_value = fitted_model.recall(test_data, positive_class)
                case "f1_score":
                    best_metric_value = fitted_model.recall(test_data, positive_class)
        else:
            match optimization_metric.value:
                case "accuracy":
                    accuracy_of_fitted_model = fitted_model.accuracy(test_data)
                    if accuracy_of_fitted_model > best_metric_value:
                        best_model = fitted_model  # pragma: no cover
                        best_metric_value = accuracy_of_fitted_model  # pragma: no cover
                case "precision":
                    precision_of_fitted_model = fitted_model.precision(test_data, positive_class)
                    if precision_of_fitted_model > best_metric_value:
                        best_model = fitted_model  # pragma: no cover
                        best_metric_value = precision_of_fitted_model  # pragma: no cover
                case "recall":
                    recall_of_fitted_model = fitted_model.recall(test_data, positive_class)
                    if recall_of_fitted_model > best_metric_value:
                        best_model = fitted_model  # pragma: no cover
                        best_metric_value = recall_of_fitted_model  # pragma: no cover
                case "f1_score":
                    f1score_of_fitted_model = fitted_model.f1_score(test_data, positive_class)
                    if f1score_of_fitted_model > best_metric_value:
                        best_model = fitted_model  # pragma: no cover
                        best_metric_value = f1score_of_fitted_model  # pragma: no cover
    assert best_model is not None
    return best_model

get_feature_names

Return the names of the feature columns.

Note: The model must be fitted.

Returns:

Name Type Description
feature_names list[str]

The names of the feature columns.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_feature_names(self) -> list[str]:
    """
    Return the names of the feature columns.

    **Note:** The model must be fitted.

    Returns
    -------
    feature_names:
        The names of the feature columns.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._feature_schema is None:
        raise ModelNotFittedError

    return self._feature_schema.column_names

get_features_schema

Return the schema of the feature columns.

Note: The model must be fitted.

Returns:

Name Type Description
feature_schema Schema

The schema of the feature columns.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_features_schema(self) -> Schema:
    """
    Return the schema of the feature columns.

    **Note:** The model must be fitted.

    Returns
    -------
    feature_schema:
        The schema of the feature columns.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._feature_schema is None:
        raise ModelNotFittedError

    return self._feature_schema

get_target_name

Return the name of the target column.

Note: The model must be fitted.

Returns:

Name Type Description
target_name str

The name of the target column.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_target_name(self) -> str:
    """
    Return the name of the target column.

    **Note:** The model must be fitted.

    Returns
    -------
    target_name:
        The name of the target column.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._target_name is None:
        raise ModelNotFittedError

    return self._target_name

get_target_type

Return the type of the target column.

Note: The model must be fitted.

Returns:

Name Type Description
target_type DataType

The type of the target column.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

Source code in src/safeds/ml/classical/_supervised_model.py
def get_target_type(self) -> DataType:
    """
    Return the type of the target column.

    **Note:** The model must be fitted.

    Returns
    -------
    target_type:
        The type of the target column.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    """
    # Used in favor of is_fitted, so the type checker is happy
    if self._target_type is None:
        raise ModelNotFittedError

    return self._target_type

precision

Compute the classifier's precision on the given data.

The precision is the proportion of positive predictions that were correct. The higher the precision, the better the classifier. Results range from 0.0 to 1.0.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required
positive_class Any

The class to be considered positive. All other classes are considered negative.

required

Returns:

Name Type Description
precision float

The classifier's precision.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def precision(
    self,
    validation_or_test_set: Table | TabularDataset,
    positive_class: Any,
) -> float:
    """
    Compute the classifier's precision on the given data.

    The precision is the proportion of positive predictions that were correct. The **higher** the precision, the
    better the classifier. Results range from 0.0 to 1.0.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    precision:
        The classifier's precision.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.precision(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
        positive_class,
    )

predict

Predict the target values on the given dataset.

Note: The model must be fitted.

Parameters:

Name Type Description Default
dataset Table | TabularDataset

The dataset containing at least the features.

required

Returns:

Name Type Description
prediction TabularDataset

The given dataset with an additional column for the predicted target values.

Raises:

Type Description
ModelNotFittedError

If the model has not been fitted yet.

DatasetMissesFeaturesError

If the dataset misses feature columns.

PredictionError

If predicting with the given dataset failed.

Source code in src/safeds/ml/classical/_supervised_model.py
def predict(
    self,
    dataset: Table | TabularDataset,
) -> TabularDataset:
    """
    Predict the target values on the given dataset.

    **Note:** The model must be fitted.

    Parameters
    ----------
    dataset:
        The dataset containing at least the features.

    Returns
    -------
    prediction:
        The given dataset with an additional column for the predicted target values.

    Raises
    ------
    ModelNotFittedError
        If the model has not been fitted yet.
    DatasetMissesFeaturesError
        If the dataset misses feature columns.
    PredictionError
        If predicting with the given dataset failed.
    """
    self._check_additional_predict_preconditions(dataset)

    return _predict_with_sklearn_model(
        self._wrapped_model,
        dataset,
        self.get_feature_names(),
        self.get_target_name(),
    )

recall

Compute the classifier's recall on the given data.

The recall is the proportion of actual positives that were predicted correctly. The higher the recall, the better the classifier. Results range from 0.0 to 1.0.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required
positive_class Any

The class to be considered positive. All other classes are considered negative.

required

Returns:

Name Type Description
recall float

The classifier's recall.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def recall(self, validation_or_test_set: Table | TabularDataset, positive_class: Any) -> float:
    """
    Compute the classifier's recall on the given data.

    The recall is the proportion of actual positives that were predicted correctly. The **higher** the recall, the
    better the classifier. Results range from 0.0 to 1.0.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    recall:
        The classifier's recall.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.recall(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
        positive_class,
    )

summarize_metrics

Summarize the classifier's metrics on the given data.

Note: The model must be fitted.

Parameters:

Name Type Description Default
validation_or_test_set Table | TabularDataset

The validation or test set.

required
positive_class Any

The class to be considered positive. All other classes are considered negative.

required

Returns:

Name Type Description
metrics Table

A table containing the classifier's metrics.

Raises:

Type Description
ModelNotFittedError

If the classifier has not been fitted yet.

Source code in src/safeds/ml/classical/classification/_classifier.py
def summarize_metrics(
    self,
    validation_or_test_set: Table | TabularDataset,
    positive_class: Any,
) -> Table:
    """
    Summarize the classifier's metrics on the given data.

    **Note:** The model must be fitted.

    Parameters
    ----------
    validation_or_test_set:
        The validation or test set.
    positive_class:
        The class to be considered positive. All other classes are considered negative.

    Returns
    -------
    metrics:
        A table containing the classifier's metrics.

    Raises
    ------
    ModelNotFittedError
        If the classifier has not been fitted yet.
    """
    if not self.is_fitted:
        raise ModelNotFittedError

    validation_or_test_set = _extract_table(validation_or_test_set)

    return ClassificationMetrics.summarize(
        self.predict(validation_or_test_set),
        validation_or_test_set.get_column(self.get_target_name()),
        positive_class,
    )