Skip to content

SimpleImputer

Bases: TableTransformer

Replace missing values using the given strategy.

Parameters:

Name Type Description Default
strategy Strategy

How to replace missing values.

required
value_to_replace float | str | None

The value that should be replaced.

None
selector str | list[str] | None

The list of columns used to fit the transformer. If None, all columns are used.

None

Examples:

>>> from safeds.data.tabular.containers import Column, Table
>>> from safeds.data.tabular.transformation import SimpleImputer
>>>
>>> table = Table.from_columns(
...     [
...         Column("a", [1, 3, None]),
...         Column("b", [None, 2, 3]),
...     ],
... )
>>> transformer = SimpleImputer(SimpleImputer.Strategy.constant(0))
>>> transformed_table = transformer.fit_and_transform(table)

Classes:

Name Description
Strategy

Various strategies to replace missing values.

Methods:

Name Description
fit

Learn a transformation for a set of columns in a table.

fit_and_transform

Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.

transform

Apply the learned transformation to a table.

Attributes:

Name Type Description
is_fitted bool

Whether the transformer is fitted.

strategy Strategy

The strategy used to replace missing values.

value_to_replace Any

The value that should be replaced.

Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
class SimpleImputer(TableTransformer):
    """
    Replace missing values using the given strategy.

    Parameters
    ----------
    strategy:
        How to replace missing values.
    value_to_replace:
        The value that should be replaced.
    selector:
        The list of columns used to fit the transformer. If `None`, all columns are used.

    Examples
    --------
    >>> from safeds.data.tabular.containers import Column, Table
    >>> from safeds.data.tabular.transformation import SimpleImputer
    >>>
    >>> table = Table.from_columns(
    ...     [
    ...         Column("a", [1, 3, None]),
    ...         Column("b", [None, 2, 3]),
    ...     ],
    ... )
    >>> transformer = SimpleImputer(SimpleImputer.Strategy.constant(0))
    >>> transformed_table = transformer.fit_and_transform(table)
    """

    class Strategy(ABC):
        """
        Various strategies to replace missing values.

        Use the static factory methods to create instances of this class.
        """

        @abstractmethod
        def __eq__(self, other: object) -> bool: ...

        @abstractmethod
        def __hash__(self) -> int: ...

        @abstractmethod
        def __str__(self) -> str: ...

        @abstractmethod
        def _get_replacement(self, table: Table) -> dict[str, Any]:
            """Return a polars expression to compute the replacement value for each column of a data frame."""

        @staticmethod
        def constant(value: Any) -> SimpleImputer.Strategy:
            """
            Replace missing values with the given constant value.

            Parameters
            ----------
            value:
                The value to replace missing values.
            """
            return _Constant(value)

        @staticmethod
        def mean() -> SimpleImputer.Strategy:
            """Replace missing values with the mean of each column."""
            return _Mean()

        @staticmethod
        def median() -> SimpleImputer.Strategy:
            """Replace missing values with the median of each column."""
            return _Median()

        @staticmethod
        def mode() -> SimpleImputer.Strategy:
            """Replace missing values with the mode of each column."""
            return _Mode()

    # ------------------------------------------------------------------------------------------------------------------
    # Dunder methods
    # ------------------------------------------------------------------------------------------------------------------

    def __init__(
        self,
        strategy: SimpleImputer.Strategy,
        *,
        selector: str | list[str] | None = None,
        value_to_replace: float | str | None = None,
    ) -> None:
        super().__init__(selector)

        # Parameters
        self._strategy = strategy
        self._value_to_replace = value_to_replace

        # Internal state
        self._replacement: dict[str, Any] | None = None

    def __hash__(self) -> int:
        return _structural_hash(
            super().__hash__(),
            self._strategy,
            self._value_to_replace,
            # Leave out the internal state for faster hashing
        )

    # ------------------------------------------------------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------------------------------------------------------

    @property
    def is_fitted(self) -> bool:
        """Whether the transformer is fitted."""
        return self._replacement is not None

    @property
    def strategy(self) -> SimpleImputer.Strategy:
        """The strategy used to replace missing values."""
        return self._strategy

    @property
    def value_to_replace(self) -> Any:
        """The value that should be replaced."""
        return self._value_to_replace

    # ------------------------------------------------------------------------------------------------------------------
    # Learning and transformation
    # ------------------------------------------------------------------------------------------------------------------

    def fit(self, table: Table) -> SimpleImputer:
        """
        Learn a transformation for a set of columns in a table.

        This transformer is not modified.

        Parameters
        ----------
        table:
            The table used to fit the transformer.

        Returns
        -------
        fitted_transformer:
            The fitted transformer.

        Raises
        ------
        ColumnNotFoundError
            If column_names contain a column name that is missing in the table
        ValueError
            If the table contains 0 rows
        NonNumericColumnError
            If the strategy is set to either Mean or Median and the specified columns of the table contain non-numerical
            data.
        """
        if isinstance(self._strategy, _Mean | _Median):
            if self._selector is None:
                column_names = [name for name in table.column_names if table.get_column_type(name).is_numeric]
            else:
                column_names = self._selector
                _check_columns_exist(table, column_names)
                _check_columns_are_numeric(table, column_names, operation="fit a SimpleImputer")
        else:  # noqa: PLR5501
            if self._selector is None:
                column_names = table.column_names
            else:
                column_names = self._selector
                _check_columns_exist(table, column_names)

        if table.row_count == 0:
            raise ValueError("The SimpleImputer cannot be fitted because the table contains 0 rows")

        # Learn the transformation
        replacement = self._strategy._get_replacement(table)

        # Create a copy with the learned transformation
        result = SimpleImputer(self._strategy, selector=column_names, value_to_replace=self._value_to_replace)
        result._replacement = replacement

        return result

    def transform(self, table: Table) -> Table:
        """
        Apply the learned transformation to a table.

        The table is not modified.

        Parameters
        ----------
        table:
            The table to which the learned transformation is applied.

        Returns
        -------
        transformed_table:
            The transformed table.

        Raises
        ------
        NotFittedError
            If the transformer has not been fitted yet.
        ColumnNotFoundError
            If the input table does not contain all columns used to fit the transformer.
        """
        import polars as pl

        # Used in favor of is_fitted, so the type checker is happy
        if self._selector is None or self._replacement is None:
            raise NotFittedError(kind="transformer")

        _check_columns_exist(table, self._selector)

        columns = [
            (pl.col(name).replace(old=self._value_to_replace, new=self._replacement[name])) for name in self._selector
        ]

        return Table._from_polars_lazy_frame(
            table._lazy_frame.with_columns(columns),
        )

is_fitted

Whether the transformer is fitted.

strategy

The strategy used to replace missing values.

value_to_replace

The value that should be replaced.

Strategy

Bases: ABC

Various strategies to replace missing values.

Use the static factory methods to create instances of this class.

Methods:

Name Description
constant

Replace missing values with the given constant value.

mean

Replace missing values with the mean of each column.

median

Replace missing values with the median of each column.

mode

Replace missing values with the mode of each column.

Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
class Strategy(ABC):
    """
    Various strategies to replace missing values.

    Use the static factory methods to create instances of this class.
    """

    @abstractmethod
    def __eq__(self, other: object) -> bool: ...

    @abstractmethod
    def __hash__(self) -> int: ...

    @abstractmethod
    def __str__(self) -> str: ...

    @abstractmethod
    def _get_replacement(self, table: Table) -> dict[str, Any]:
        """Return a polars expression to compute the replacement value for each column of a data frame."""

    @staticmethod
    def constant(value: Any) -> SimpleImputer.Strategy:
        """
        Replace missing values with the given constant value.

        Parameters
        ----------
        value:
            The value to replace missing values.
        """
        return _Constant(value)

    @staticmethod
    def mean() -> SimpleImputer.Strategy:
        """Replace missing values with the mean of each column."""
        return _Mean()

    @staticmethod
    def median() -> SimpleImputer.Strategy:
        """Replace missing values with the median of each column."""
        return _Median()

    @staticmethod
    def mode() -> SimpleImputer.Strategy:
        """Replace missing values with the mode of each column."""
        return _Mode()

constant

Replace missing values with the given constant value.

Parameters:

Name Type Description Default
value Any

The value to replace missing values.

required
Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
@staticmethod
def constant(value: Any) -> SimpleImputer.Strategy:
    """
    Replace missing values with the given constant value.

    Parameters
    ----------
    value:
        The value to replace missing values.
    """
    return _Constant(value)

mean

Replace missing values with the mean of each column.

Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
@staticmethod
def mean() -> SimpleImputer.Strategy:
    """Replace missing values with the mean of each column."""
    return _Mean()

median

Replace missing values with the median of each column.

Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
@staticmethod
def median() -> SimpleImputer.Strategy:
    """Replace missing values with the median of each column."""
    return _Median()

mode

Replace missing values with the mode of each column.

Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
@staticmethod
def mode() -> SimpleImputer.Strategy:
    """Replace missing values with the mode of each column."""
    return _Mode()

fit

Learn a transformation for a set of columns in a table.

This transformer is not modified.

Parameters:

Name Type Description Default
table Table

The table used to fit the transformer.

required

Returns:

Name Type Description
fitted_transformer SimpleImputer

The fitted transformer.

Raises:

Type Description
ColumnNotFoundError

If column_names contain a column name that is missing in the table

ValueError

If the table contains 0 rows

NonNumericColumnError

If the strategy is set to either Mean or Median and the specified columns of the table contain non-numerical data.

Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
def fit(self, table: Table) -> SimpleImputer:
    """
    Learn a transformation for a set of columns in a table.

    This transformer is not modified.

    Parameters
    ----------
    table:
        The table used to fit the transformer.

    Returns
    -------
    fitted_transformer:
        The fitted transformer.

    Raises
    ------
    ColumnNotFoundError
        If column_names contain a column name that is missing in the table
    ValueError
        If the table contains 0 rows
    NonNumericColumnError
        If the strategy is set to either Mean or Median and the specified columns of the table contain non-numerical
        data.
    """
    if isinstance(self._strategy, _Mean | _Median):
        if self._selector is None:
            column_names = [name for name in table.column_names if table.get_column_type(name).is_numeric]
        else:
            column_names = self._selector
            _check_columns_exist(table, column_names)
            _check_columns_are_numeric(table, column_names, operation="fit a SimpleImputer")
    else:  # noqa: PLR5501
        if self._selector is None:
            column_names = table.column_names
        else:
            column_names = self._selector
            _check_columns_exist(table, column_names)

    if table.row_count == 0:
        raise ValueError("The SimpleImputer cannot be fitted because the table contains 0 rows")

    # Learn the transformation
    replacement = self._strategy._get_replacement(table)

    # Create a copy with the learned transformation
    result = SimpleImputer(self._strategy, selector=column_names, value_to_replace=self._value_to_replace)
    result._replacement = replacement

    return result

fit_and_transform

Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.

Note: Neither this transformer nor the given table are modified.

Parameters:

Name Type Description Default
table Table

The table used to fit the transformer. The transformer is then applied to this table.

required

Returns:

Name Type Description
fitted_transformer Self

The fitted transformer.

transformed_table Table

The transformed table.

Source code in src/safeds/data/tabular/transformation/_table_transformer.py
def fit_and_transform(self, table: Table) -> tuple[Self, Table]:
    """
    Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.

    **Note:** Neither this transformer nor the given table are modified.

    Parameters
    ----------
    table:
        The table used to fit the transformer. The transformer is then applied to this table.

    Returns
    -------
    fitted_transformer:
        The fitted transformer.
    transformed_table:
        The transformed table.
    """
    fitted_transformer = self.fit(table)
    transformed_table = fitted_transformer.transform(table)
    return fitted_transformer, transformed_table

transform

Apply the learned transformation to a table.

The table is not modified.

Parameters:

Name Type Description Default
table Table

The table to which the learned transformation is applied.

required

Returns:

Name Type Description
transformed_table Table

The transformed table.

Raises:

Type Description
NotFittedError

If the transformer has not been fitted yet.

ColumnNotFoundError

If the input table does not contain all columns used to fit the transformer.

Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
def transform(self, table: Table) -> Table:
    """
    Apply the learned transformation to a table.

    The table is not modified.

    Parameters
    ----------
    table:
        The table to which the learned transformation is applied.

    Returns
    -------
    transformed_table:
        The transformed table.

    Raises
    ------
    NotFittedError
        If the transformer has not been fitted yet.
    ColumnNotFoundError
        If the input table does not contain all columns used to fit the transformer.
    """
    import polars as pl

    # Used in favor of is_fitted, so the type checker is happy
    if self._selector is None or self._replacement is None:
        raise NotFittedError(kind="transformer")

    _check_columns_exist(table, self._selector)

    columns = [
        (pl.col(name).replace(old=self._value_to_replace, new=self._replacement[name])) for name in self._selector
    ]

    return Table._from_polars_lazy_frame(
        table._lazy_frame.with_columns(columns),
    )