Skip to content

Discretizer

Bases: TableTransformer

The Discretizer bins continuous data into intervals.

Parameters:

Name Type Description Default
bin_count int

The number of bins to be created.

5
selector str | list[str] | None

The list of columns used to fit the transformer. If None, all numeric columns are used.

None

Raises:

Type Description
OutOfBoundsError

If the given bin_count is less than 2.

Methods:

Name Description
fit

Learn a transformation for a set of columns in a table.

fit_and_transform

Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.

transform

Apply the learned transformation to a table.

Attributes:

Name Type Description
bin_count int

The number of bins to be created.

is_fitted bool

Whether the transformer is fitted.

Source code in src/safeds/data/tabular/transformation/_discretizer.py
class Discretizer(TableTransformer):
    """
    The Discretizer bins continuous data into intervals.

    Parameters
    ----------
    bin_count:
        The number of bins to be created.
    selector:
        The list of columns used to fit the transformer. If `None`, all numeric columns are used.

    Raises
    ------
    OutOfBoundsError
        If the given `bin_count` is less than 2.
    """

    # ------------------------------------------------------------------------------------------------------------------
    # Dunder methods
    # ------------------------------------------------------------------------------------------------------------------

    def __init__(
        self,
        bin_count: int = 5,
        *,
        selector: str | list[str] | None = None,
    ) -> None:
        TableTransformer.__init__(self, selector)

        _check_bounds("bin_count", bin_count, lower_bound=_ClosedBound(2))

        self._wrapped_transformer: sk_KBinsDiscretizer | None = None
        self._bin_count = bin_count

    def __hash__(self) -> int:
        return _structural_hash(
            TableTransformer.__hash__(self),
            self._bin_count,
        )

    # ------------------------------------------------------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------------------------------------------------------

    @property
    def is_fitted(self) -> bool:
        """Whether the transformer is fitted."""
        return self._wrapped_transformer is not None

    @property
    def bin_count(self) -> int:
        """The number of bins to be created."""
        return self._bin_count

    # ------------------------------------------------------------------------------------------------------------------
    # Learning and transformation
    # ------------------------------------------------------------------------------------------------------------------

    def fit(self, table: Table) -> Discretizer:
        """
        Learn a transformation for a set of columns in a table.

        This transformer is not modified.

        Parameters
        ----------
        table:
            The table used to fit the transformer.

        Returns
        -------
        fitted_transformer:
            The fitted transformer.

        Raises
        ------
        ValueError
            If the table is empty.
        NonNumericColumnError
            If one of the columns, that should be fitted is non-numeric.
        ColumnNotFoundError
            If one of the columns, that should be fitted is not in the table.
        """
        from sklearn.preprocessing import KBinsDiscretizer as sk_KBinsDiscretizer

        if table.row_count == 0:
            raise ValueError("The Discretizer cannot be fitted because the table contains 0 rows")

        if self._selector is None:
            column_names = [name for name in table.column_names if table.get_column_type(name).is_numeric]
        else:
            column_names = self._selector
            _check_columns_exist(table, column_names)
            _check_columns_are_numeric(table, column_names, operation="fit a Discretizer")

        wrapped_transformer = sk_KBinsDiscretizer(n_bins=self._bin_count, encode="ordinal")
        wrapped_transformer.set_output(transform="polars")
        wrapped_transformer.fit(
            table.select_columns(column_names)._data_frame,
        )

        result = Discretizer(self._bin_count, selector=column_names)
        result._wrapped_transformer = wrapped_transformer

        return result

    def transform(self, table: Table) -> Table:
        """
        Apply the learned transformation to a table.

        The table is not modified.

        Parameters
        ----------
        table:
            The table to which the learned transformation is applied.

        Returns
        -------
        transformed_table:
            The transformed table.

        Raises
        ------
        NotFittedError
            If the transformer has not been fitted yet.
        ValueError
            If the table is empty.
        ColumnNotFoundError
            If one of the columns, that should be transformed is not in the table.
        NonNumericColumnError
            If one of the columns, that should be fitted is non-numeric.
        """
        # Transformer has not been fitted yet
        if self._wrapped_transformer is None or self._selector is None:
            raise NotFittedError(kind="transformer")

        if table.row_count == 0:
            raise ValueError("The table cannot be transformed because it contains 0 rows")

        # Input table does not contain all columns used to fit the transformer
        _check_columns_exist(table, self._selector)

        for column in self._selector:
            if not table.get_column(column).type.is_numeric:
                raise NonNumericColumnError(f"{column} is of type {table.get_column(column).type}.")

        new_data = self._wrapped_transformer.transform(
            table.select_columns(self._selector)._data_frame,
        )
        return Table._from_polars_lazy_frame(
            table._lazy_frame.update(new_data.lazy()),
        )

bin_count

The number of bins to be created.

is_fitted

Whether the transformer is fitted.

fit

Learn a transformation for a set of columns in a table.

This transformer is not modified.

Parameters:

Name Type Description Default
table Table

The table used to fit the transformer.

required

Returns:

Name Type Description
fitted_transformer Discretizer

The fitted transformer.

Raises:

Type Description
ValueError

If the table is empty.

NonNumericColumnError

If one of the columns, that should be fitted is non-numeric.

ColumnNotFoundError

If one of the columns, that should be fitted is not in the table.

Source code in src/safeds/data/tabular/transformation/_discretizer.py
def fit(self, table: Table) -> Discretizer:
    """
    Learn a transformation for a set of columns in a table.

    This transformer is not modified.

    Parameters
    ----------
    table:
        The table used to fit the transformer.

    Returns
    -------
    fitted_transformer:
        The fitted transformer.

    Raises
    ------
    ValueError
        If the table is empty.
    NonNumericColumnError
        If one of the columns, that should be fitted is non-numeric.
    ColumnNotFoundError
        If one of the columns, that should be fitted is not in the table.
    """
    from sklearn.preprocessing import KBinsDiscretizer as sk_KBinsDiscretizer

    if table.row_count == 0:
        raise ValueError("The Discretizer cannot be fitted because the table contains 0 rows")

    if self._selector is None:
        column_names = [name for name in table.column_names if table.get_column_type(name).is_numeric]
    else:
        column_names = self._selector
        _check_columns_exist(table, column_names)
        _check_columns_are_numeric(table, column_names, operation="fit a Discretizer")

    wrapped_transformer = sk_KBinsDiscretizer(n_bins=self._bin_count, encode="ordinal")
    wrapped_transformer.set_output(transform="polars")
    wrapped_transformer.fit(
        table.select_columns(column_names)._data_frame,
    )

    result = Discretizer(self._bin_count, selector=column_names)
    result._wrapped_transformer = wrapped_transformer

    return result

fit_and_transform

Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.

Note: Neither this transformer nor the given table are modified.

Parameters:

Name Type Description Default
table Table

The table used to fit the transformer. The transformer is then applied to this table.

required

Returns:

Name Type Description
fitted_transformer Self

The fitted transformer.

transformed_table Table

The transformed table.

Source code in src/safeds/data/tabular/transformation/_table_transformer.py
def fit_and_transform(self, table: Table) -> tuple[Self, Table]:
    """
    Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.

    **Note:** Neither this transformer nor the given table are modified.

    Parameters
    ----------
    table:
        The table used to fit the transformer. The transformer is then applied to this table.

    Returns
    -------
    fitted_transformer:
        The fitted transformer.
    transformed_table:
        The transformed table.
    """
    fitted_transformer = self.fit(table)
    transformed_table = fitted_transformer.transform(table)
    return fitted_transformer, transformed_table

transform

Apply the learned transformation to a table.

The table is not modified.

Parameters:

Name Type Description Default
table Table

The table to which the learned transformation is applied.

required

Returns:

Name Type Description
transformed_table Table

The transformed table.

Raises:

Type Description
NotFittedError

If the transformer has not been fitted yet.

ValueError

If the table is empty.

ColumnNotFoundError

If one of the columns, that should be transformed is not in the table.

NonNumericColumnError

If one of the columns, that should be fitted is non-numeric.

Source code in src/safeds/data/tabular/transformation/_discretizer.py
def transform(self, table: Table) -> Table:
    """
    Apply the learned transformation to a table.

    The table is not modified.

    Parameters
    ----------
    table:
        The table to which the learned transformation is applied.

    Returns
    -------
    transformed_table:
        The transformed table.

    Raises
    ------
    NotFittedError
        If the transformer has not been fitted yet.
    ValueError
        If the table is empty.
    ColumnNotFoundError
        If one of the columns, that should be transformed is not in the table.
    NonNumericColumnError
        If one of the columns, that should be fitted is non-numeric.
    """
    # Transformer has not been fitted yet
    if self._wrapped_transformer is None or self._selector is None:
        raise NotFittedError(kind="transformer")

    if table.row_count == 0:
        raise ValueError("The table cannot be transformed because it contains 0 rows")

    # Input table does not contain all columns used to fit the transformer
    _check_columns_exist(table, self._selector)

    for column in self._selector:
        if not table.get_column(column).type.is_numeric:
            raise NonNumericColumnError(f"{column} is of type {table.get_column(column).type}.")

    new_data = self._wrapped_transformer.transform(
        table.select_columns(self._selector)._data_frame,
    )
    return Table._from_polars_lazy_frame(
        table._lazy_frame.update(new_data.lazy()),
    )