Skip to content

LabelEncoder

Bases: InvertibleTableTransformer

The LabelEncoder encodes one or more given columns into labels.

Source code in src/safeds/data/tabular/transformation/_label_encoder.py
class LabelEncoder(InvertibleTableTransformer):
    """The LabelEncoder encodes one or more given columns into labels."""

    def __init__(self) -> None:
        self._wrapped_transformer: sk_OrdinalEncoder | None = None
        self._column_names: list[str] | None = None

    def fit(self, table: Table, column_names: list[str] | None) -> LabelEncoder:
        """
        Learn a transformation for a set of columns in a table.

        This transformer is not modified.

        Parameters
        ----------
        table : Table
            The table used to fit the transformer.
        column_names : list[str] | None
            The list of columns from the table used to fit the transformer. If `None`, all columns are used.

        Returns
        -------
        fitted_transformer : TableTransformer
            The fitted transformer.

        Raises
        ------
        UnknownColumnNameError
            If column_names contain a column name that is missing in the table.
        ValueError
            If the table contains 0 rows.
        """
        if column_names is None:
            column_names = table.column_names
        else:
            missing_columns = sorted(set(column_names) - set(table.column_names))
            if len(missing_columns) > 0:
                raise UnknownColumnNameError(missing_columns)

        if table.number_of_rows == 0:
            raise ValueError("The LabelEncoder cannot transform the table because it contains 0 rows")

        if table.keep_only_columns(column_names).remove_columns_with_non_numerical_values().number_of_columns > 0:
            warnings.warn(
                "The columns"
                f" {table.keep_only_columns(column_names).remove_columns_with_non_numerical_values().column_names} contain"
                " numerical data. The LabelEncoder is designed to encode non-numerical values into numerical values",
                UserWarning,
                stacklevel=2,
            )

        wrapped_transformer = sk_OrdinalEncoder()
        wrapped_transformer.fit(table._data[column_names])

        result = LabelEncoder()
        result._wrapped_transformer = wrapped_transformer
        result._column_names = column_names

        return result

    def transform(self, table: Table) -> Table:
        """
        Apply the learned transformation to a table.

        The table is not modified.

        Parameters
        ----------
        table : Table
            The table to which the learned transformation is applied.

        Returns
        -------
        transformed_table : Table
            The transformed table.

        Raises
        ------
        TransformerNotFittedError
            If the transformer has not been fitted yet.
        UnknownColumnNameError
            If the input table does not contain all columns used to fit the transformer.
        ValueError
            If the table contains 0 rows.
        """
        # Transformer has not been fitted yet
        if self._wrapped_transformer is None or self._column_names is None:
            raise TransformerNotFittedError

        # Input table does not contain all columns used to fit the transformer
        missing_columns = sorted(set(self._column_names) - set(table.column_names))
        if len(missing_columns) > 0:
            raise UnknownColumnNameError(missing_columns)

        if table.number_of_rows == 0:
            raise ValueError("The LabelEncoder cannot transform the table because it contains 0 rows")

        data = table._data.reset_index(drop=True)
        data.columns = table.column_names
        data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names])
        return Table._from_pandas_dataframe(data)

    def inverse_transform(self, transformed_table: Table) -> Table:
        """
        Undo the learned transformation.

        The table is not modified.

        Parameters
        ----------
        transformed_table : Table
            The table to be transformed back to the original version.

        Returns
        -------
        table : Table
            The original table.

        Raises
        ------
        TransformerNotFittedError
            If the transformer has not been fitted yet.
        UnknownColumnNameError
            If the input table does not contain all columns used to fit the transformer.
        NonNumericColumnError
            If the specified columns of the input table contain non-numerical data.
        ValueError
            If the table contains 0 rows.
        """
        # Transformer has not been fitted yet
        if self._wrapped_transformer is None or self._column_names is None:
            raise TransformerNotFittedError

        missing_columns = sorted(set(self._column_names) - set(transformed_table.column_names))
        if len(missing_columns) > 0:
            raise UnknownColumnNameError(missing_columns)

        if transformed_table.number_of_rows == 0:
            raise ValueError("The LabelEncoder cannot inverse transform the table because it contains 0 rows")

        if transformed_table.keep_only_columns(
            self._column_names,
        ).remove_columns_with_non_numerical_values().number_of_columns < len(self._column_names):
            raise NonNumericColumnError(
                str(
                    sorted(
                        set(self._column_names)
                        - set(
                            transformed_table.keep_only_columns(self._column_names)
                            .remove_columns_with_non_numerical_values()
                            .column_names,
                        ),
                    ),
                ),
            )

        data = transformed_table._data.reset_index(drop=True)
        data.columns = transformed_table.column_names
        data[self._column_names] = self._wrapped_transformer.inverse_transform(data[self._column_names])
        return Table._from_pandas_dataframe(data)

    def is_fitted(self) -> bool:
        """
        Check if the transformer is fitted.

        Returns
        -------
        is_fitted : bool
            Whether the transformer is fitted.
        """
        return self._wrapped_transformer is not None

    def get_names_of_added_columns(self) -> list[str]:
        """
        Get the names of all new columns that have been added by the LabelEncoder.

        Returns
        -------
        added_columns : list[str]
            A list of names of the added columns, ordered as they will appear in the table.

        Raises
        ------
        TransformerNotFittedError
            If the transformer has not been fitted yet.
        """
        if not self.is_fitted():
            raise TransformerNotFittedError
        return []

    # (Must implement abstract method, cannot instantiate class otherwise.)
    def get_names_of_changed_columns(self) -> list[str]:
        """
         Get the names of all columns that may have been changed by the LabelEncoder.

        Returns
        -------
        changed_columns : list[str]
             The list of (potentially) changed column names, as passed to fit.

        Raises
        ------
        TransformerNotFittedError
            If the transformer has not been fitted yet.
        """
        if self._column_names is None:
            raise TransformerNotFittedError
        return self._column_names

    def get_names_of_removed_columns(self) -> list[str]:
        """
        Get the names of all columns that have been removed by the LabelEncoder.

        Returns
        -------
        removed_columns : list[str]
            A list of names of the removed columns, ordered as they appear in the table the LabelEncoder was fitted on.

        Raises
        ------
        TransformerNotFittedError
            If the transformer has not been fitted yet.
        """
        if not self.is_fitted():
            raise TransformerNotFittedError
        return []

__init__()

Source code in src/safeds/data/tabular/transformation/_label_encoder.py
def __init__(self) -> None:
    self._wrapped_transformer: sk_OrdinalEncoder | None = None
    self._column_names: list[str] | None = None

fit(table, column_names)

Learn a transformation for a set of columns in a table.

This transformer is not modified.

Parameters:

Name Type Description Default
table Table

The table used to fit the transformer.

required
column_names list[str] | None

The list of columns from the table used to fit the transformer. If None, all columns are used.

required

Returns:

Name Type Description
fitted_transformer TableTransformer

The fitted transformer.

Raises:

Type Description
UnknownColumnNameError

If column_names contain a column name that is missing in the table.

ValueError

If the table contains 0 rows.

Source code in src/safeds/data/tabular/transformation/_label_encoder.py
def fit(self, table: Table, column_names: list[str] | None) -> LabelEncoder:
    """
    Learn a transformation for a set of columns in a table.

    This transformer is not modified.

    Parameters
    ----------
    table : Table
        The table used to fit the transformer.
    column_names : list[str] | None
        The list of columns from the table used to fit the transformer. If `None`, all columns are used.

    Returns
    -------
    fitted_transformer : TableTransformer
        The fitted transformer.

    Raises
    ------
    UnknownColumnNameError
        If column_names contain a column name that is missing in the table.
    ValueError
        If the table contains 0 rows.
    """
    if column_names is None:
        column_names = table.column_names
    else:
        missing_columns = sorted(set(column_names) - set(table.column_names))
        if len(missing_columns) > 0:
            raise UnknownColumnNameError(missing_columns)

    if table.number_of_rows == 0:
        raise ValueError("The LabelEncoder cannot transform the table because it contains 0 rows")

    if table.keep_only_columns(column_names).remove_columns_with_non_numerical_values().number_of_columns > 0:
        warnings.warn(
            "The columns"
            f" {table.keep_only_columns(column_names).remove_columns_with_non_numerical_values().column_names} contain"
            " numerical data. The LabelEncoder is designed to encode non-numerical values into numerical values",
            UserWarning,
            stacklevel=2,
        )

    wrapped_transformer = sk_OrdinalEncoder()
    wrapped_transformer.fit(table._data[column_names])

    result = LabelEncoder()
    result._wrapped_transformer = wrapped_transformer
    result._column_names = column_names

    return result

get_names_of_added_columns()

Get the names of all new columns that have been added by the LabelEncoder.

Returns:

Name Type Description
added_columns list[str]

A list of names of the added columns, ordered as they will appear in the table.

Raises:

Type Description
TransformerNotFittedError

If the transformer has not been fitted yet.

Source code in src/safeds/data/tabular/transformation/_label_encoder.py
def get_names_of_added_columns(self) -> list[str]:
    """
    Get the names of all new columns that have been added by the LabelEncoder.

    Returns
    -------
    added_columns : list[str]
        A list of names of the added columns, ordered as they will appear in the table.

    Raises
    ------
    TransformerNotFittedError
        If the transformer has not been fitted yet.
    """
    if not self.is_fitted():
        raise TransformerNotFittedError
    return []

get_names_of_changed_columns()

Get the names of all columns that may have been changed by the LabelEncoder.

Returns:

Name Type Description
changed_columns list[str]

The list of (potentially) changed column names, as passed to fit.

Raises:

Type Description
TransformerNotFittedError

If the transformer has not been fitted yet.

Source code in src/safeds/data/tabular/transformation/_label_encoder.py
def get_names_of_changed_columns(self) -> list[str]:
    """
     Get the names of all columns that may have been changed by the LabelEncoder.

    Returns
    -------
    changed_columns : list[str]
         The list of (potentially) changed column names, as passed to fit.

    Raises
    ------
    TransformerNotFittedError
        If the transformer has not been fitted yet.
    """
    if self._column_names is None:
        raise TransformerNotFittedError
    return self._column_names

get_names_of_removed_columns()

Get the names of all columns that have been removed by the LabelEncoder.

Returns:

Name Type Description
removed_columns list[str]

A list of names of the removed columns, ordered as they appear in the table the LabelEncoder was fitted on.

Raises:

Type Description
TransformerNotFittedError

If the transformer has not been fitted yet.

Source code in src/safeds/data/tabular/transformation/_label_encoder.py
def get_names_of_removed_columns(self) -> list[str]:
    """
    Get the names of all columns that have been removed by the LabelEncoder.

    Returns
    -------
    removed_columns : list[str]
        A list of names of the removed columns, ordered as they appear in the table the LabelEncoder was fitted on.

    Raises
    ------
    TransformerNotFittedError
        If the transformer has not been fitted yet.
    """
    if not self.is_fitted():
        raise TransformerNotFittedError
    return []

inverse_transform(transformed_table)

Undo the learned transformation.

The table is not modified.

Parameters:

Name Type Description Default
transformed_table Table

The table to be transformed back to the original version.

required

Returns:

Name Type Description
table Table

The original table.

Raises:

Type Description
TransformerNotFittedError

If the transformer has not been fitted yet.

UnknownColumnNameError

If the input table does not contain all columns used to fit the transformer.

NonNumericColumnError

If the specified columns of the input table contain non-numerical data.

ValueError

If the table contains 0 rows.

Source code in src/safeds/data/tabular/transformation/_label_encoder.py
def inverse_transform(self, transformed_table: Table) -> Table:
    """
    Undo the learned transformation.

    The table is not modified.

    Parameters
    ----------
    transformed_table : Table
        The table to be transformed back to the original version.

    Returns
    -------
    table : Table
        The original table.

    Raises
    ------
    TransformerNotFittedError
        If the transformer has not been fitted yet.
    UnknownColumnNameError
        If the input table does not contain all columns used to fit the transformer.
    NonNumericColumnError
        If the specified columns of the input table contain non-numerical data.
    ValueError
        If the table contains 0 rows.
    """
    # Transformer has not been fitted yet
    if self._wrapped_transformer is None or self._column_names is None:
        raise TransformerNotFittedError

    missing_columns = sorted(set(self._column_names) - set(transformed_table.column_names))
    if len(missing_columns) > 0:
        raise UnknownColumnNameError(missing_columns)

    if transformed_table.number_of_rows == 0:
        raise ValueError("The LabelEncoder cannot inverse transform the table because it contains 0 rows")

    if transformed_table.keep_only_columns(
        self._column_names,
    ).remove_columns_with_non_numerical_values().number_of_columns < len(self._column_names):
        raise NonNumericColumnError(
            str(
                sorted(
                    set(self._column_names)
                    - set(
                        transformed_table.keep_only_columns(self._column_names)
                        .remove_columns_with_non_numerical_values()
                        .column_names,
                    ),
                ),
            ),
        )

    data = transformed_table._data.reset_index(drop=True)
    data.columns = transformed_table.column_names
    data[self._column_names] = self._wrapped_transformer.inverse_transform(data[self._column_names])
    return Table._from_pandas_dataframe(data)

is_fitted()

Check if the transformer is fitted.

Returns:

Name Type Description
is_fitted bool

Whether the transformer is fitted.

Source code in src/safeds/data/tabular/transformation/_label_encoder.py
def is_fitted(self) -> bool:
    """
    Check if the transformer is fitted.

    Returns
    -------
    is_fitted : bool
        Whether the transformer is fitted.
    """
    return self._wrapped_transformer is not None

transform(table)

Apply the learned transformation to a table.

The table is not modified.

Parameters:

Name Type Description Default
table Table

The table to which the learned transformation is applied.

required

Returns:

Name Type Description
transformed_table Table

The transformed table.

Raises:

Type Description
TransformerNotFittedError

If the transformer has not been fitted yet.

UnknownColumnNameError

If the input table does not contain all columns used to fit the transformer.

ValueError

If the table contains 0 rows.

Source code in src/safeds/data/tabular/transformation/_label_encoder.py
def transform(self, table: Table) -> Table:
    """
    Apply the learned transformation to a table.

    The table is not modified.

    Parameters
    ----------
    table : Table
        The table to which the learned transformation is applied.

    Returns
    -------
    transformed_table : Table
        The transformed table.

    Raises
    ------
    TransformerNotFittedError
        If the transformer has not been fitted yet.
    UnknownColumnNameError
        If the input table does not contain all columns used to fit the transformer.
    ValueError
        If the table contains 0 rows.
    """
    # Transformer has not been fitted yet
    if self._wrapped_transformer is None or self._column_names is None:
        raise TransformerNotFittedError

    # Input table does not contain all columns used to fit the transformer
    missing_columns = sorted(set(self._column_names) - set(table.column_names))
    if len(missing_columns) > 0:
        raise UnknownColumnNameError(missing_columns)

    if table.number_of_rows == 0:
        raise ValueError("The LabelEncoder cannot transform the table because it contains 0 rows")

    data = table._data.reset_index(drop=True)
    data.columns = table.column_names
    data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names])
    return Table._from_pandas_dataframe(data)