Bases: InvertibleTableTransformer
The RobustScaler transforms column values to a range by removing the median and scaling to the interquartile range.
Currently, for columns with high stability (IQR == 0), it will only subtract the median and not scale to avoid
dividing by zero.
Parameters:
| Name |
Type |
Description |
Default |
selector
|
str | list[str] | None
|
The list of columns used to fit the transformer. If None, all numeric columns are used.
|
None
|
Methods:
| Name |
Description |
fit |
Learn a transformation for a set of columns in a table.
|
fit_and_transform |
Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.
|
inverse_transform |
Undo the learned transformation.
|
transform |
Apply the learned transformation to a table.
|
Attributes:
| Name |
Type |
Description |
is_fitted |
bool
|
Whether the transformer is fitted.
|
Source code in src/safeds/data/tabular/transformation/_robust_scaler.py
| class RobustScaler(InvertibleTableTransformer):
"""
The RobustScaler transforms column values to a range by removing the median and scaling to the interquartile range.
Currently, for columns with high stability (IQR == 0), it will only subtract the median and not scale to avoid
dividing by zero.
Parameters
----------
selector:
The list of columns used to fit the transformer. If `None`, all numeric columns are used.
"""
# ------------------------------------------------------------------------------------------------------------------
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------
def __init__(self, *, selector: str | list[str] | None = None) -> None:
super().__init__(selector)
# Internal state
self._data_median: pl.DataFrame | None = None
self._data_scale: pl.DataFrame | None = None
def __hash__(self) -> int:
# Leave out the internal state for faster hashing
return super().__hash__()
# ------------------------------------------------------------------------------------------------------------------
# Properties
# ------------------------------------------------------------------------------------------------------------------
@property
def is_fitted(self) -> bool:
"""Whether the transformer is fitted."""
return self._data_median is not None and self._data_scale is not None
# ------------------------------------------------------------------------------------------------------------------
# Learning and transformation
# ------------------------------------------------------------------------------------------------------------------
def fit(self, table: Table) -> RobustScaler:
"""
Learn a transformation for a set of columns in a table.
**Note:** This transformer is not modified.
Parameters
----------
table:
The table used to fit the transformer.
Returns
-------
fitted_transformer:
The fitted transformer.
Raises
------
ColumnNotFoundError
If column_names contain a column name that is missing in the table.
ColumnTypeError
If at least one of the specified columns in the table contains non-numerical data.
ValueError
If the table contains 0 rows.
"""
import polars as pl
if self._selector is None:
column_names = [name for name in table.column_names if table.get_column_type(name).is_numeric]
else:
column_names = self._selector
_check_columns_exist(table, column_names)
_check_columns_are_numeric(table, column_names, operation="fit a RobustScaler")
if table.row_count == 0:
raise ValueError("The RobustScaler cannot be fitted because the table contains 0 rows")
_data_median = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).median())
q1 = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).quantile(0.25))
q3 = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).quantile(0.75))
_data_scale = q3 - q1
# To make sure there is no division by zero
for col_e in column_names:
_data_scale = _data_scale.with_columns(
pl.when(pl.col(col_e) == 0).then(1).otherwise(pl.col(col_e)).alias(col_e),
)
# Create a copy with the learned transformation
result = RobustScaler(selector=column_names)
result._data_median = _data_median
result._data_scale = _data_scale
return result
def transform(self, table: Table) -> Table:
"""
Apply the learned transformation to a table.
**Note:** The given table is not modified.
Parameters
----------
table:
The table to which the learned transformation is applied.
Returns
-------
transformed_table:
The transformed table.
Raises
------
NotFittedError
If the transformer has not been fitted yet.
ColumnNotFoundError
If the input table does not contain all columns used to fit the transformer.
ColumnTypeError
If at least one of the columns in the input table that is used to fit contains non-numerical data.
"""
import polars as pl
# Used in favor of is_fitted, so the type checker is happy
if self._selector is None or self._data_median is None or self._data_scale is None:
raise NotFittedError(kind="transformer")
_check_columns_exist(table, self._selector)
_check_columns_are_numeric(table, self._selector, operation="transform with a RobustScaler")
columns = [
(pl.col(name) - self._data_median.get_column(name)) / self._data_scale.get_column(name)
for name in self._selector
]
return Table._from_polars_lazy_frame(
table._lazy_frame.with_columns(columns),
)
def inverse_transform(self, transformed_table: Table) -> Table:
"""
Undo the learned transformation.
**Note:** The given table is not modified.
Parameters
----------
transformed_table:
The table to be transformed back to the original version.
Returns
-------
original_table:
The original table.
Raises
------
NotFittedError
If the transformer has not been fitted yet.
ColumnNotFoundError
If the input table does not contain all columns used to fit the transformer.
ColumnTypeError
If the transformed columns of the input table contain non-numerical data.
"""
import polars as pl
# Used in favor of is_fitted, so the type checker is happy
if self._selector is None or self._data_median is None or self._data_scale is None:
raise NotFittedError(kind="transformer")
_check_columns_exist(transformed_table, self._selector)
_check_columns_are_numeric(
transformed_table,
self._selector,
operation="inverse-transform with a RobustScaler",
)
columns = [
pl.col(name) * self._data_scale.get_column(name) + self._data_median.get_column(name)
for name in self._selector
]
return Table._from_polars_lazy_frame(
transformed_table._lazy_frame.with_columns(columns),
)
|
Whether the transformer is fitted.
Learn a transformation for a set of columns in a table.
Note: This transformer is not modified.
Parameters:
| Name |
Type |
Description |
Default |
table
|
Table
|
The table used to fit the transformer.
|
required
|
Returns:
Raises:
| Type |
Description |
ColumnNotFoundError
|
If column_names contain a column name that is missing in the table.
|
ColumnTypeError
|
If at least one of the specified columns in the table contains non-numerical data.
|
ValueError
|
If the table contains 0 rows.
|
Source code in src/safeds/data/tabular/transformation/_robust_scaler.py
| def fit(self, table: Table) -> RobustScaler:
"""
Learn a transformation for a set of columns in a table.
**Note:** This transformer is not modified.
Parameters
----------
table:
The table used to fit the transformer.
Returns
-------
fitted_transformer:
The fitted transformer.
Raises
------
ColumnNotFoundError
If column_names contain a column name that is missing in the table.
ColumnTypeError
If at least one of the specified columns in the table contains non-numerical data.
ValueError
If the table contains 0 rows.
"""
import polars as pl
if self._selector is None:
column_names = [name for name in table.column_names if table.get_column_type(name).is_numeric]
else:
column_names = self._selector
_check_columns_exist(table, column_names)
_check_columns_are_numeric(table, column_names, operation="fit a RobustScaler")
if table.row_count == 0:
raise ValueError("The RobustScaler cannot be fitted because the table contains 0 rows")
_data_median = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).median())
q1 = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).quantile(0.25))
q3 = _safe_collect_lazy_frame(table._lazy_frame.select(column_names).quantile(0.75))
_data_scale = q3 - q1
# To make sure there is no division by zero
for col_e in column_names:
_data_scale = _data_scale.with_columns(
pl.when(pl.col(col_e) == 0).then(1).otherwise(pl.col(col_e)).alias(col_e),
)
# Create a copy with the learned transformation
result = RobustScaler(selector=column_names)
result._data_median = _data_median
result._data_scale = _data_scale
return result
|
Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.
Note: Neither this transformer nor the given table are modified.
Parameters:
| Name |
Type |
Description |
Default |
table
|
Table
|
The table used to fit the transformer. The transformer is then applied to this table.
|
required
|
Returns:
| Name | Type |
Description |
fitted_transformer |
Self
|
|
transformed_table |
Table
|
|
Source code in src/safeds/data/tabular/transformation/_table_transformer.py
| def fit_and_transform(self, table: Table) -> tuple[Self, Table]:
"""
Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.
**Note:** Neither this transformer nor the given table are modified.
Parameters
----------
table:
The table used to fit the transformer. The transformer is then applied to this table.
Returns
-------
fitted_transformer:
The fitted transformer.
transformed_table:
The transformed table.
"""
fitted_transformer = self.fit(table)
transformed_table = fitted_transformer.transform(table)
return fitted_transformer, transformed_table
|
Undo the learned transformation.
Note: The given table is not modified.
Parameters:
| Name |
Type |
Description |
Default |
transformed_table
|
Table
|
The table to be transformed back to the original version.
|
required
|
Returns:
| Name | Type |
Description |
original_table |
Table
|
|
Raises:
| Type |
Description |
NotFittedError
|
If the transformer has not been fitted yet.
|
ColumnNotFoundError
|
If the input table does not contain all columns used to fit the transformer.
|
ColumnTypeError
|
If the transformed columns of the input table contain non-numerical data.
|
Source code in src/safeds/data/tabular/transformation/_robust_scaler.py
| def inverse_transform(self, transformed_table: Table) -> Table:
"""
Undo the learned transformation.
**Note:** The given table is not modified.
Parameters
----------
transformed_table:
The table to be transformed back to the original version.
Returns
-------
original_table:
The original table.
Raises
------
NotFittedError
If the transformer has not been fitted yet.
ColumnNotFoundError
If the input table does not contain all columns used to fit the transformer.
ColumnTypeError
If the transformed columns of the input table contain non-numerical data.
"""
import polars as pl
# Used in favor of is_fitted, so the type checker is happy
if self._selector is None or self._data_median is None or self._data_scale is None:
raise NotFittedError(kind="transformer")
_check_columns_exist(transformed_table, self._selector)
_check_columns_are_numeric(
transformed_table,
self._selector,
operation="inverse-transform with a RobustScaler",
)
columns = [
pl.col(name) * self._data_scale.get_column(name) + self._data_median.get_column(name)
for name in self._selector
]
return Table._from_polars_lazy_frame(
transformed_table._lazy_frame.with_columns(columns),
)
|
Apply the learned transformation to a table.
Note: The given table is not modified.
Parameters:
| Name |
Type |
Description |
Default |
table
|
Table
|
The table to which the learned transformation is applied.
|
required
|
Returns:
| Name | Type |
Description |
transformed_table |
Table
|
|
Raises:
| Type |
Description |
NotFittedError
|
If the transformer has not been fitted yet.
|
ColumnNotFoundError
|
If the input table does not contain all columns used to fit the transformer.
|
ColumnTypeError
|
If at least one of the columns in the input table that is used to fit contains non-numerical data.
|
Source code in src/safeds/data/tabular/transformation/_robust_scaler.py
| def transform(self, table: Table) -> Table:
"""
Apply the learned transformation to a table.
**Note:** The given table is not modified.
Parameters
----------
table:
The table to which the learned transformation is applied.
Returns
-------
transformed_table:
The transformed table.
Raises
------
NotFittedError
If the transformer has not been fitted yet.
ColumnNotFoundError
If the input table does not contain all columns used to fit the transformer.
ColumnTypeError
If at least one of the columns in the input table that is used to fit contains non-numerical data.
"""
import polars as pl
# Used in favor of is_fitted, so the type checker is happy
if self._selector is None or self._data_median is None or self._data_scale is None:
raise NotFittedError(kind="transformer")
_check_columns_exist(table, self._selector)
_check_columns_are_numeric(table, self._selector, operation="transform with a RobustScaler")
columns = [
(pl.col(name) - self._data_median.get_column(name)) / self._data_scale.get_column(name)
for name in self._selector
]
return Table._from_polars_lazy_frame(
table._lazy_frame.with_columns(columns),
)
|