Skip to content

Discretizer

Bases: TableTransformer

The Discretizer bins continuous data into intervals.

Parameters:

Name Type Description Default
number_of_bins int

The number of bins to be created.

5

Raises:

Type Description
OutOfBoundsError

If the given number_of_bins is less than 2.

Source code in src/safeds/data/tabular/transformation/_discretizer.py
class Discretizer(TableTransformer):
    """
    The Discretizer bins continuous data into intervals.

    Parameters
    ----------
    number_of_bins
        The number of bins to be created.

    Raises
    ------
    OutOfBoundsError
        If the given number_of_bins is less than 2.
    """

    def __init__(self, number_of_bins: int = 5):
        self._column_names: list[str] | None = None
        self._wrapped_transformer: sk_KBinsDiscretizer | None = None

        if number_of_bins < 2:
            raise OutOfBoundsError(number_of_bins, name="number_of_bins", lower_bound=ClosedBound(2))
        self._number_of_bins = number_of_bins

    def fit(self, table: Table, column_names: list[str] | None) -> Discretizer:
        """
        Learn a transformation for a set of columns in a table.

        This transformer is not modified.

        Parameters
        ----------
        table
            The table used to fit the transformer.
        column_names
            The list of columns from the table used to fit the transformer. If `None`, all columns are used.

        Returns
        -------
        fitted_transformer :
            The fitted transformer.

        Raises
        ------
        ValueError
            If the table is empty.
        NonNumericColumnError
            If one of the columns, that should be fitted is non-numeric.
        UnknownColumnNameError
            If one of the columns, that should be fitted is not in the table.
        """
        from sklearn.preprocessing import KBinsDiscretizer as sk_KBinsDiscretizer

        if table.number_of_rows == 0:
            raise ValueError("The Discretizer cannot be fitted because the table contains 0 rows")

        if column_names is None:
            column_names = table.column_names
        else:
            missing_columns = set(column_names) - set(table.column_names)
            if len(missing_columns) > 0:
                raise UnknownColumnNameError(
                    sorted(
                        missing_columns,
                        key={val: ix for ix, val in enumerate(column_names)}.__getitem__,
                    ),
                )

            for column in column_names:
                if not table.get_column(column).type.is_numeric():
                    raise NonNumericColumnError(f"{column} is of type {table.get_column(column).type}.")

        wrapped_transformer = sk_KBinsDiscretizer(n_bins=self._number_of_bins, encode="ordinal")
        wrapped_transformer.fit(table._data[column_names])

        result = Discretizer(self._number_of_bins)
        result._wrapped_transformer = wrapped_transformer
        result._column_names = column_names

        return result

    def transform(self, table: Table) -> Table:
        """
        Apply the learned transformation to a table.

        The table is not modified.

        Parameters
        ----------
        table
            The table to which the learned transformation is applied.

        Returns
        -------
        transformed_table :
            The transformed table.

        Raises
        ------
        TransformerNotFittedError
            If the transformer has not been fitted yet.
        ValueError
            If the table is empty.
        UnknownColumnNameError
            If one of the columns, that should be transformed is not in the table.
        NonNumericColumnError
            If one of the columns, that should be fitted is non-numeric.
        """
        # Transformer has not been fitted yet
        if self._wrapped_transformer is None or self._column_names is None:
            raise TransformerNotFittedError

        if table.number_of_rows == 0:
            raise ValueError("The table cannot be transformed because it contains 0 rows")

        # Input table does not contain all columns used to fit the transformer
        missing_columns = set(self._column_names) - set(table.column_names)
        if len(missing_columns) > 0:
            raise UnknownColumnNameError(
                sorted(
                    missing_columns,
                    key={val: ix for ix, val in enumerate(self._column_names)}.__getitem__,
                ),
            )

        for column in self._column_names:
            if not table.get_column(column).type.is_numeric():
                raise NonNumericColumnError(f"{column} is of type {table.get_column(column).type}.")

        data = table._data.reset_index(drop=True)
        data.columns = table.column_names
        data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names])
        return Table._from_pandas_dataframe(data)

    def is_fitted(self) -> bool:
        """
        Check if the transformer is fitted.

        Returns
        -------
        is_fitted :
            Whether the transformer is fitted.
        """
        return self._wrapped_transformer is not None

    def get_names_of_added_columns(self) -> list[str]:
        """
        Get the names of all new columns that have been added by the Discretizer.

        Returns
        -------
        added_columns :
            A list of names of the added columns, ordered as they will appear in the table.

        Raises
        ------
        TransformerNotFittedError
            If the transformer has not been fitted yet.
        """
        if not self.is_fitted():
            raise TransformerNotFittedError
        return []

    # (Must implement abstract method, cannot instantiate class otherwise.)
    def get_names_of_changed_columns(self) -> list[str]:
        """
         Get the names of all columns that may have been changed by the Discretizer.

        Returns
        -------
        changed_columns :
             The list of (potentially) changed column names, as passed to fit.

        Raises
        ------
        TransformerNotFittedError
            If the transformer has not been fitted yet.
        """
        if self._column_names is None:
            raise TransformerNotFittedError
        return self._column_names

    def get_names_of_removed_columns(self) -> list[str]:
        """
        Get the names of all columns that have been removed by the Discretizer.

        Returns
        -------
        removed_columns :
            A list of names of the removed columns, ordered as they appear in the table the Discretizer was fitted on.

        Raises
        ------
        TransformerNotFittedError
            If the transformer has not been fitted yet.
        """
        if not self.is_fitted():
            raise TransformerNotFittedError
        return []

__init__(number_of_bins=5)

Source code in src/safeds/data/tabular/transformation/_discretizer.py
def __init__(self, number_of_bins: int = 5):
    self._column_names: list[str] | None = None
    self._wrapped_transformer: sk_KBinsDiscretizer | None = None

    if number_of_bins < 2:
        raise OutOfBoundsError(number_of_bins, name="number_of_bins", lower_bound=ClosedBound(2))
    self._number_of_bins = number_of_bins

fit(table, column_names)

Learn a transformation for a set of columns in a table.

This transformer is not modified.

Parameters:

Name Type Description Default
table Table

The table used to fit the transformer.

required
column_names list[str] | None

The list of columns from the table used to fit the transformer. If None, all columns are used.

required

Returns:

Name Type Description
fitted_transformer Discretizer

The fitted transformer.

Raises:

Type Description
ValueError

If the table is empty.

NonNumericColumnError

If one of the columns, that should be fitted is non-numeric.

UnknownColumnNameError

If one of the columns, that should be fitted is not in the table.

Source code in src/safeds/data/tabular/transformation/_discretizer.py
def fit(self, table: Table, column_names: list[str] | None) -> Discretizer:
    """
    Learn a transformation for a set of columns in a table.

    This transformer is not modified.

    Parameters
    ----------
    table
        The table used to fit the transformer.
    column_names
        The list of columns from the table used to fit the transformer. If `None`, all columns are used.

    Returns
    -------
    fitted_transformer :
        The fitted transformer.

    Raises
    ------
    ValueError
        If the table is empty.
    NonNumericColumnError
        If one of the columns, that should be fitted is non-numeric.
    UnknownColumnNameError
        If one of the columns, that should be fitted is not in the table.
    """
    from sklearn.preprocessing import KBinsDiscretizer as sk_KBinsDiscretizer

    if table.number_of_rows == 0:
        raise ValueError("The Discretizer cannot be fitted because the table contains 0 rows")

    if column_names is None:
        column_names = table.column_names
    else:
        missing_columns = set(column_names) - set(table.column_names)
        if len(missing_columns) > 0:
            raise UnknownColumnNameError(
                sorted(
                    missing_columns,
                    key={val: ix for ix, val in enumerate(column_names)}.__getitem__,
                ),
            )

        for column in column_names:
            if not table.get_column(column).type.is_numeric():
                raise NonNumericColumnError(f"{column} is of type {table.get_column(column).type}.")

    wrapped_transformer = sk_KBinsDiscretizer(n_bins=self._number_of_bins, encode="ordinal")
    wrapped_transformer.fit(table._data[column_names])

    result = Discretizer(self._number_of_bins)
    result._wrapped_transformer = wrapped_transformer
    result._column_names = column_names

    return result

get_names_of_added_columns()

Get the names of all new columns that have been added by the Discretizer.

Returns:

Name Type Description
added_columns list[str]

A list of names of the added columns, ordered as they will appear in the table.

Raises:

Type Description
TransformerNotFittedError

If the transformer has not been fitted yet.

Source code in src/safeds/data/tabular/transformation/_discretizer.py
def get_names_of_added_columns(self) -> list[str]:
    """
    Get the names of all new columns that have been added by the Discretizer.

    Returns
    -------
    added_columns :
        A list of names of the added columns, ordered as they will appear in the table.

    Raises
    ------
    TransformerNotFittedError
        If the transformer has not been fitted yet.
    """
    if not self.is_fitted():
        raise TransformerNotFittedError
    return []

get_names_of_changed_columns()

Get the names of all columns that may have been changed by the Discretizer.

Returns:

Name Type Description
changed_columns list[str]

The list of (potentially) changed column names, as passed to fit.

Raises:

Type Description
TransformerNotFittedError

If the transformer has not been fitted yet.

Source code in src/safeds/data/tabular/transformation/_discretizer.py
def get_names_of_changed_columns(self) -> list[str]:
    """
     Get the names of all columns that may have been changed by the Discretizer.

    Returns
    -------
    changed_columns :
         The list of (potentially) changed column names, as passed to fit.

    Raises
    ------
    TransformerNotFittedError
        If the transformer has not been fitted yet.
    """
    if self._column_names is None:
        raise TransformerNotFittedError
    return self._column_names

get_names_of_removed_columns()

Get the names of all columns that have been removed by the Discretizer.

Returns:

Name Type Description
removed_columns list[str]

A list of names of the removed columns, ordered as they appear in the table the Discretizer was fitted on.

Raises:

Type Description
TransformerNotFittedError

If the transformer has not been fitted yet.

Source code in src/safeds/data/tabular/transformation/_discretizer.py
def get_names_of_removed_columns(self) -> list[str]:
    """
    Get the names of all columns that have been removed by the Discretizer.

    Returns
    -------
    removed_columns :
        A list of names of the removed columns, ordered as they appear in the table the Discretizer was fitted on.

    Raises
    ------
    TransformerNotFittedError
        If the transformer has not been fitted yet.
    """
    if not self.is_fitted():
        raise TransformerNotFittedError
    return []

is_fitted()

Check if the transformer is fitted.

Returns:

Name Type Description
is_fitted bool

Whether the transformer is fitted.

Source code in src/safeds/data/tabular/transformation/_discretizer.py
def is_fitted(self) -> bool:
    """
    Check if the transformer is fitted.

    Returns
    -------
    is_fitted :
        Whether the transformer is fitted.
    """
    return self._wrapped_transformer is not None

transform(table)

Apply the learned transformation to a table.

The table is not modified.

Parameters:

Name Type Description Default
table Table

The table to which the learned transformation is applied.

required

Returns:

Name Type Description
transformed_table Table

The transformed table.

Raises:

Type Description
TransformerNotFittedError

If the transformer has not been fitted yet.

ValueError

If the table is empty.

UnknownColumnNameError

If one of the columns, that should be transformed is not in the table.

NonNumericColumnError

If one of the columns, that should be fitted is non-numeric.

Source code in src/safeds/data/tabular/transformation/_discretizer.py
def transform(self, table: Table) -> Table:
    """
    Apply the learned transformation to a table.

    The table is not modified.

    Parameters
    ----------
    table
        The table to which the learned transformation is applied.

    Returns
    -------
    transformed_table :
        The transformed table.

    Raises
    ------
    TransformerNotFittedError
        If the transformer has not been fitted yet.
    ValueError
        If the table is empty.
    UnknownColumnNameError
        If one of the columns, that should be transformed is not in the table.
    NonNumericColumnError
        If one of the columns, that should be fitted is non-numeric.
    """
    # Transformer has not been fitted yet
    if self._wrapped_transformer is None or self._column_names is None:
        raise TransformerNotFittedError

    if table.number_of_rows == 0:
        raise ValueError("The table cannot be transformed because it contains 0 rows")

    # Input table does not contain all columns used to fit the transformer
    missing_columns = set(self._column_names) - set(table.column_names)
    if len(missing_columns) > 0:
        raise UnknownColumnNameError(
            sorted(
                missing_columns,
                key={val: ix for ix, val in enumerate(self._column_names)}.__getitem__,
            ),
        )

    for column in self._column_names:
        if not table.get_column(column).type.is_numeric():
            raise NonNumericColumnError(f"{column} is of type {table.get_column(column).type}.")

    data = table._data.reset_index(drop=True)
    data.columns = table.column_names
    data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names])
    return Table._from_pandas_dataframe(data)