Skip to content

KNearestNeighborsImputer

Bases: TableTransformer

The KNearestNeighborsImputer replaces missing values in given Columns with the mean value of the K-nearest neighbors.

Parameters:

Name Type Description Default
neighbor_count int

The number of neighbors to consider when imputing missing values.

required
selector str | list[str] | None

The list of columns used to impute missing values. If 'None', all columns are used.

None
value_to_replace float | str | None

The placeholder for the missing values. All occurrences ofmissing_values will be imputed.

None

Methods:

Name Description
fit

Learn a transformation for a set of columns in a table.

fit_and_transform

Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.

transform

Apply the learned transformation to a table.

Attributes:

Name Type Description
is_fitted bool

Whether the transformer is fitted.

neighbor_count int

The number of neighbors to consider when imputing missing values.

value_to_replace float | str | None

The value to replace.

Source code in src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py
class KNearestNeighborsImputer(TableTransformer):
    """
    The KNearestNeighborsImputer replaces missing values in given Columns with the mean value of the K-nearest neighbors.

    Parameters
    ----------
    neighbor_count:
        The number of neighbors to consider when imputing missing values.
    selector:
        The list of columns used to impute missing values. If 'None', all columns are used.
    value_to_replace:
        The placeholder for the missing values. All occurrences of`missing_values` will be imputed.
    """

    # ------------------------------------------------------------------------------------------------------------------
    # Dunder methods
    # ------------------------------------------------------------------------------------------------------------------

    def __init__(
        self,
        neighbor_count: int,
        *,
        selector: str | list[str] | None = None,
        value_to_replace: float | str | None = None,
    ) -> None:
        super().__init__(selector)

        _check_bounds(name="neighbor_count", actual=neighbor_count, lower_bound=_ClosedBound(1))

        # parameter
        self._neighbor_count: int = neighbor_count
        self._value_to_replace: float | str | None = value_to_replace

        # attributes
        self._wrapped_transformer: sk_KNNImputer | None = None

    def __hash__(self) -> int:
        return _structural_hash(
            super().__hash__(),
            self._neighbor_count,
            self._value_to_replace,
            # Leave out the internal state for faster hashing
        )

    # ------------------------------------------------------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------------------------------------------------------

    @property
    def is_fitted(self) -> bool:
        """Whether the transformer is fitted."""
        return self._wrapped_transformer is not None

    @property
    def neighbor_count(self) -> int:
        """The number of neighbors to consider when imputing missing values."""
        return self._neighbor_count

    @property
    def value_to_replace(self) -> float | str | None:
        """The value to replace."""
        return self._value_to_replace

    # ------------------------------------------------------------------------------------------------------------------
    # Learning and transformation
    # ------------------------------------------------------------------------------------------------------------------

    def fit(self, table: Table) -> KNearestNeighborsImputer:
        """
        Learn a transformation for a set of columns in a table.

        **Note:** This transformer is not modified.

        Parameters
        ----------
        table:
            The table used to fit the transformer.

        Returns
        -------
        fitted_transformer:
            The fitted transformer.

        Raises
        ------
        ColumnNotFoundError
            If one of the columns, that should be fitted is not in the table.
        """
        from sklearn.impute import KNNImputer as sk_KNNImputer

        if table.row_count == 0:
            raise ValueError("The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows.")

        if self._selector is None:
            column_names = table.column_names
        else:
            column_names = self._selector
            _check_columns_exist(table, column_names)

        value_to_replace = self._value_to_replace

        if self._value_to_replace is None:
            from numpy import nan

            value_to_replace = nan

        wrapped_transformer = sk_KNNImputer(n_neighbors=self._neighbor_count, missing_values=value_to_replace)
        wrapped_transformer.set_output(transform="polars")
        wrapped_transformer.fit(
            table.select_columns(column_names)._data_frame,
        )

        result = KNearestNeighborsImputer(self._neighbor_count, selector=column_names)
        result._wrapped_transformer = wrapped_transformer

        return result

    def transform(self, table: Table) -> Table:
        """
        Apply the learned transformation to a table.

        **Note:** The given table is not modified.

        Parameters
        ----------
        table:
            The table to wich the learned transformation is applied.

        Returns
        -------
        transformed_table:
            The transformed table.

        Raises
        ------
        NotFittedError
            If the transformer is not fitted.
        ColumnNotFoundError
            If one of the columns, that should be transformed is not in the table.
        """
        if self._selector is None or self._wrapped_transformer is None:
            raise NotFittedError(kind="transformer")

        _check_columns_exist(table, self._selector)

        new_data = self._wrapped_transformer.transform(
            table.select_columns(self._selector)._data_frame,
        )

        return Table._from_polars_lazy_frame(
            table._lazy_frame.with_columns(new_data),
        )

is_fitted

Whether the transformer is fitted.

neighbor_count

The number of neighbors to consider when imputing missing values.

value_to_replace

The value to replace.

fit

Learn a transformation for a set of columns in a table.

Note: This transformer is not modified.

Parameters:

Name Type Description Default
table Table

The table used to fit the transformer.

required

Returns:

Name Type Description
fitted_transformer KNearestNeighborsImputer

The fitted transformer.

Raises:

Type Description
ColumnNotFoundError

If one of the columns, that should be fitted is not in the table.

Source code in src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py
def fit(self, table: Table) -> KNearestNeighborsImputer:
    """
    Learn a transformation for a set of columns in a table.

    **Note:** This transformer is not modified.

    Parameters
    ----------
    table:
        The table used to fit the transformer.

    Returns
    -------
    fitted_transformer:
        The fitted transformer.

    Raises
    ------
    ColumnNotFoundError
        If one of the columns, that should be fitted is not in the table.
    """
    from sklearn.impute import KNNImputer as sk_KNNImputer

    if table.row_count == 0:
        raise ValueError("The KNearestNeighborsImputer cannot be fitted because the table contains 0 rows.")

    if self._selector is None:
        column_names = table.column_names
    else:
        column_names = self._selector
        _check_columns_exist(table, column_names)

    value_to_replace = self._value_to_replace

    if self._value_to_replace is None:
        from numpy import nan

        value_to_replace = nan

    wrapped_transformer = sk_KNNImputer(n_neighbors=self._neighbor_count, missing_values=value_to_replace)
    wrapped_transformer.set_output(transform="polars")
    wrapped_transformer.fit(
        table.select_columns(column_names)._data_frame,
    )

    result = KNearestNeighborsImputer(self._neighbor_count, selector=column_names)
    result._wrapped_transformer = wrapped_transformer

    return result

fit_and_transform

Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.

Note: Neither this transformer nor the given table are modified.

Parameters:

Name Type Description Default
table Table

The table used to fit the transformer. The transformer is then applied to this table.

required

Returns:

Name Type Description
fitted_transformer Self

The fitted transformer.

transformed_table Table

The transformed table.

Source code in src/safeds/data/tabular/transformation/_table_transformer.py
def fit_and_transform(self, table: Table) -> tuple[Self, Table]:
    """
    Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.

    **Note:** Neither this transformer nor the given table are modified.

    Parameters
    ----------
    table:
        The table used to fit the transformer. The transformer is then applied to this table.

    Returns
    -------
    fitted_transformer:
        The fitted transformer.
    transformed_table:
        The transformed table.
    """
    fitted_transformer = self.fit(table)
    transformed_table = fitted_transformer.transform(table)
    return fitted_transformer, transformed_table

transform

Apply the learned transformation to a table.

Note: The given table is not modified.

Parameters:

Name Type Description Default
table Table

The table to wich the learned transformation is applied.

required

Returns:

Name Type Description
transformed_table Table

The transformed table.

Raises:

Type Description
NotFittedError

If the transformer is not fitted.

ColumnNotFoundError

If one of the columns, that should be transformed is not in the table.

Source code in src/safeds/data/tabular/transformation/_k_nearest_neighbors_imputer.py
def transform(self, table: Table) -> Table:
    """
    Apply the learned transformation to a table.

    **Note:** The given table is not modified.

    Parameters
    ----------
    table:
        The table to wich the learned transformation is applied.

    Returns
    -------
    transformed_table:
        The transformed table.

    Raises
    ------
    NotFittedError
        If the transformer is not fitted.
    ColumnNotFoundError
        If one of the columns, that should be transformed is not in the table.
    """
    if self._selector is None or self._wrapped_transformer is None:
        raise NotFittedError(kind="transformer")

    _check_columns_exist(table, self._selector)

    new_data = self._wrapped_transformer.transform(
        table.select_columns(self._selector)._data_frame,
    )

    return Table._from_polars_lazy_frame(
        table._lazy_frame.with_columns(new_data),
    )