Skip to content

SimpleImputer

Bases: TableTransformer

Replace missing values using the given strategy.

Parameters:

Name Type Description Default
strategy Strategy

How to replace missing values.

required
value_to_replace float | str | None

The value that should be replaced.

None
column_names str | list[str] | None

The list of columns used to fit the transformer. If None, all columns are used.

None

Examples:

>>> from safeds.data.tabular.containers import Column, Table
>>> from safeds.data.tabular.transformation import SimpleImputer
>>>
>>> table = Table.from_columns(
...     [
...         Column("a", [1, 3, None]),
...         Column("b", [None, 2, 3]),
...     ],
... )
>>> transformer = SimpleImputer(SimpleImputer.Strategy.constant(0))
>>> transformed_table = transformer.fit_and_transform(table)
Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
class SimpleImputer(TableTransformer):
    """
    Replace missing values using the given strategy.

    Parameters
    ----------
    strategy:
        How to replace missing values.
    value_to_replace:
        The value that should be replaced.
    column_names:
        The list of columns used to fit the transformer. If `None`, all columns are used.

    Examples
    --------
    >>> from safeds.data.tabular.containers import Column, Table
    >>> from safeds.data.tabular.transformation import SimpleImputer
    >>>
    >>> table = Table.from_columns(
    ...     [
    ...         Column("a", [1, 3, None]),
    ...         Column("b", [None, 2, 3]),
    ...     ],
    ... )
    >>> transformer = SimpleImputer(SimpleImputer.Strategy.constant(0))
    >>> transformed_table = transformer.fit_and_transform(table)
    """

    class Strategy(ABC):
        """
        Various strategies to replace missing values.

        Use the static factory methods to create instances of this class.
        """

        @abstractmethod
        def __eq__(self, other: object) -> bool: ...

        @abstractmethod
        def __hash__(self) -> int: ...

        @abstractmethod
        def __str__(self) -> str: ...

        @abstractmethod
        def _get_replacement(self, table: Table) -> dict[str, Any]:
            """Return a polars expression to compute the replacement value for each column of a data frame."""

        @staticmethod
        def constant(value: Any) -> SimpleImputer.Strategy:
            """
            Replace missing values with the given constant value.

            Parameters
            ----------
            value:
                The value to replace missing values.
            """
            return _Constant(value)

        @staticmethod
        def mean() -> SimpleImputer.Strategy:
            """Replace missing values with the mean of each column."""
            return _Mean()

        @staticmethod
        def median() -> SimpleImputer.Strategy:
            """Replace missing values with the median of each column."""
            return _Median()

        @staticmethod
        def mode() -> SimpleImputer.Strategy:
            """Replace missing values with the mode of each column."""
            return _Mode()

    # ------------------------------------------------------------------------------------------------------------------
    # Dunder methods
    # ------------------------------------------------------------------------------------------------------------------

    def __init__(
        self,
        strategy: SimpleImputer.Strategy,
        *,
        column_names: str | list[str] | None = None,
        value_to_replace: float | str | None = None,
    ) -> None:
        super().__init__(column_names)

        # Parameters
        self._strategy = strategy
        self._value_to_replace = value_to_replace

        # Internal state
        self._replacement: dict[str, Any] | None = None

    def __hash__(self) -> int:
        return _structural_hash(
            super().__hash__(),
            self._strategy,
            self._value_to_replace,
            # Leave out the internal state for faster hashing
        )

    # ------------------------------------------------------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------------------------------------------------------

    @property
    def is_fitted(self) -> bool:
        """Whether the transformer is fitted."""
        return self._replacement is not None

    @property
    def strategy(self) -> SimpleImputer.Strategy:
        """The strategy used to replace missing values."""
        return self._strategy

    @property
    def value_to_replace(self) -> Any:
        """The value that should be replaced."""
        return self._value_to_replace

    # ------------------------------------------------------------------------------------------------------------------
    # Learning and transformation
    # ------------------------------------------------------------------------------------------------------------------

    def fit(self, table: Table) -> SimpleImputer:
        """
        Learn a transformation for a set of columns in a table.

        This transformer is not modified.

        Parameters
        ----------
        table:
            The table used to fit the transformer.

        Returns
        -------
        fitted_transformer:
            The fitted transformer.

        Raises
        ------
        ColumnNotFoundError
            If column_names contain a column name that is missing in the table
        ValueError
            If the table contains 0 rows
        NonNumericColumnError
            If the strategy is set to either Mean or Median and the specified columns of the table contain non-numerical
            data.
        """
        if isinstance(self._strategy, _Mean | _Median):
            if self._column_names is None:
                column_names = [name for name in table.column_names if table.get_column_type(name).is_numeric]
            else:
                column_names = self._column_names
                _check_columns_exist(table, column_names)
                _check_columns_are_numeric(table, column_names, operation="fit a SimpleImputer")
        else:  # noqa: PLR5501
            if self._column_names is None:
                column_names = table.column_names
            else:
                column_names = self._column_names
                _check_columns_exist(table, column_names)

        if table.row_count == 0:
            raise ValueError("The SimpleImputer cannot be fitted because the table contains 0 rows")

        # Learn the transformation
        replacement = self._strategy._get_replacement(table)

        # Create a copy with the learned transformation
        result = SimpleImputer(self._strategy, column_names=column_names, value_to_replace=self._value_to_replace)
        result._replacement = replacement

        return result

    def transform(self, table: Table) -> Table:
        """
        Apply the learned transformation to a table.

        The table is not modified.

        Parameters
        ----------
        table:
            The table to which the learned transformation is applied.

        Returns
        -------
        transformed_table:
            The transformed table.

        Raises
        ------
        TransformerNotFittedError
            If the transformer has not been fitted yet.
        ColumnNotFoundError
            If the input table does not contain all columns used to fit the transformer.
        """
        import polars as pl

        # Used in favor of is_fitted, so the type checker is happy
        if self._column_names is None or self._replacement is None:
            raise TransformerNotFittedError

        _check_columns_exist(table, self._column_names)

        columns = [
            (pl.col(name).replace(old=self._value_to_replace, new=self._replacement[name]))
            for name in self._column_names
        ]

        return Table._from_polars_lazy_frame(
            table._lazy_frame.with_columns(columns),
        )

is_fitted: bool

Whether the transformer is fitted.

strategy: SimpleImputer.Strategy

The strategy used to replace missing values.

value_to_replace: Any

The value that should be replaced.

Strategy

Bases: ABC

Various strategies to replace missing values.

Use the static factory methods to create instances of this class.

Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
class Strategy(ABC):
    """
    Various strategies to replace missing values.

    Use the static factory methods to create instances of this class.
    """

    @abstractmethod
    def __eq__(self, other: object) -> bool: ...

    @abstractmethod
    def __hash__(self) -> int: ...

    @abstractmethod
    def __str__(self) -> str: ...

    @abstractmethod
    def _get_replacement(self, table: Table) -> dict[str, Any]:
        """Return a polars expression to compute the replacement value for each column of a data frame."""

    @staticmethod
    def constant(value: Any) -> SimpleImputer.Strategy:
        """
        Replace missing values with the given constant value.

        Parameters
        ----------
        value:
            The value to replace missing values.
        """
        return _Constant(value)

    @staticmethod
    def mean() -> SimpleImputer.Strategy:
        """Replace missing values with the mean of each column."""
        return _Mean()

    @staticmethod
    def median() -> SimpleImputer.Strategy:
        """Replace missing values with the median of each column."""
        return _Median()

    @staticmethod
    def mode() -> SimpleImputer.Strategy:
        """Replace missing values with the mode of each column."""
        return _Mode()

constant

Replace missing values with the given constant value.

Parameters:

Name Type Description Default
value Any

The value to replace missing values.

required
Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
@staticmethod
def constant(value: Any) -> SimpleImputer.Strategy:
    """
    Replace missing values with the given constant value.

    Parameters
    ----------
    value:
        The value to replace missing values.
    """
    return _Constant(value)

mean

Replace missing values with the mean of each column.

Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
@staticmethod
def mean() -> SimpleImputer.Strategy:
    """Replace missing values with the mean of each column."""
    return _Mean()

median

Replace missing values with the median of each column.

Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
@staticmethod
def median() -> SimpleImputer.Strategy:
    """Replace missing values with the median of each column."""
    return _Median()

mode

Replace missing values with the mode of each column.

Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
@staticmethod
def mode() -> SimpleImputer.Strategy:
    """Replace missing values with the mode of each column."""
    return _Mode()

fit

Learn a transformation for a set of columns in a table.

This transformer is not modified.

Parameters:

Name Type Description Default
table Table

The table used to fit the transformer.

required

Returns:

Name Type Description
fitted_transformer SimpleImputer

The fitted transformer.

Raises:

Type Description
ColumnNotFoundError

If column_names contain a column name that is missing in the table

ValueError

If the table contains 0 rows

NonNumericColumnError

If the strategy is set to either Mean or Median and the specified columns of the table contain non-numerical data.

Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
def fit(self, table: Table) -> SimpleImputer:
    """
    Learn a transformation for a set of columns in a table.

    This transformer is not modified.

    Parameters
    ----------
    table:
        The table used to fit the transformer.

    Returns
    -------
    fitted_transformer:
        The fitted transformer.

    Raises
    ------
    ColumnNotFoundError
        If column_names contain a column name that is missing in the table
    ValueError
        If the table contains 0 rows
    NonNumericColumnError
        If the strategy is set to either Mean or Median and the specified columns of the table contain non-numerical
        data.
    """
    if isinstance(self._strategy, _Mean | _Median):
        if self._column_names is None:
            column_names = [name for name in table.column_names if table.get_column_type(name).is_numeric]
        else:
            column_names = self._column_names
            _check_columns_exist(table, column_names)
            _check_columns_are_numeric(table, column_names, operation="fit a SimpleImputer")
    else:  # noqa: PLR5501
        if self._column_names is None:
            column_names = table.column_names
        else:
            column_names = self._column_names
            _check_columns_exist(table, column_names)

    if table.row_count == 0:
        raise ValueError("The SimpleImputer cannot be fitted because the table contains 0 rows")

    # Learn the transformation
    replacement = self._strategy._get_replacement(table)

    # Create a copy with the learned transformation
    result = SimpleImputer(self._strategy, column_names=column_names, value_to_replace=self._value_to_replace)
    result._replacement = replacement

    return result

fit_and_transform

Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.

Note: Neither this transformer nor the given table are modified.

Parameters:

Name Type Description Default
table Table

The table used to fit the transformer. The transformer is then applied to this table.

required

Returns:

Name Type Description
fitted_transformer Self

The fitted transformer.

transformed_table Table

The transformed table.

Source code in src/safeds/data/tabular/transformation/_table_transformer.py
def fit_and_transform(self, table: Table) -> tuple[Self, Table]:
    """
    Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.

    **Note:** Neither this transformer nor the given table are modified.

    Parameters
    ----------
    table:
        The table used to fit the transformer. The transformer is then applied to this table.

    Returns
    -------
    fitted_transformer:
        The fitted transformer.
    transformed_table:
        The transformed table.
    """
    fitted_transformer = self.fit(table)
    transformed_table = fitted_transformer.transform(table)
    return fitted_transformer, transformed_table

transform

Apply the learned transformation to a table.

The table is not modified.

Parameters:

Name Type Description Default
table Table

The table to which the learned transformation is applied.

required

Returns:

Name Type Description
transformed_table Table

The transformed table.

Raises:

Type Description
TransformerNotFittedError

If the transformer has not been fitted yet.

ColumnNotFoundError

If the input table does not contain all columns used to fit the transformer.

Source code in src/safeds/data/tabular/transformation/_simple_imputer.py
def transform(self, table: Table) -> Table:
    """
    Apply the learned transformation to a table.

    The table is not modified.

    Parameters
    ----------
    table:
        The table to which the learned transformation is applied.

    Returns
    -------
    transformed_table:
        The transformed table.

    Raises
    ------
    TransformerNotFittedError
        If the transformer has not been fitted yet.
    ColumnNotFoundError
        If the input table does not contain all columns used to fit the transformer.
    """
    import polars as pl

    # Used in favor of is_fitted, so the type checker is happy
    if self._column_names is None or self._replacement is None:
        raise TransformerNotFittedError

    _check_columns_exist(table, self._column_names)

    columns = [
        (pl.col(name).replace(old=self._value_to_replace, new=self._replacement[name]))
        for name in self._column_names
    ]

    return Table._from_polars_lazy_frame(
        table._lazy_frame.with_columns(columns),
    )