diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 410374c5b..693785063 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -2075,7 +2075,7 @@ def plot_histograms(self) -> Image: """ col_wrap = min(self.number_of_columns, 3) - data = pd.melt(self._data.applymap(lambda value: str(value)), value_vars=self.column_names) + data = pd.melt(self._data.map(lambda value: str(value)), value_vars=self.column_names) grid = sns.FacetGrid(data=data, col="variable", col_wrap=col_wrap, sharex=False, sharey=False) grid.map(sns.histplot, "value") grid.set_xlabels("") diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index cc045a7ec..e18bd6ce8 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -1,6 +1,5 @@ from __future__ import annotations -import copy from typing import TYPE_CHECKING from safeds.data.tabular.containers import Column, Row, Table @@ -193,21 +192,6 @@ def target(self) -> Column: """ return self._target - # ------------------------------------------------------------------------------------------------------------------ - # Helpers - # ------------------------------------------------------------------------------------------------------------------ - - def _copy(self) -> TaggedTable: - """ - Return a copy of this tagged table. - - Returns - ------- - table : TaggedTable - The copy of this tagged table. - """ - return copy.deepcopy(self) - # ------------------------------------------------------------------------------------------------------------------ # Specific methods from TaggedTable class: # ------------------------------------------------------------------------------------------------------------------ diff --git a/src/safeds/data/tabular/transformation/_discretizer.py b/src/safeds/data/tabular/transformation/_discretizer.py index 581130c55..3d887c7df 100644 --- a/src/safeds/data/tabular/transformation/_discretizer.py +++ b/src/safeds/data/tabular/transformation/_discretizer.py @@ -139,7 +139,7 @@ def transform(self, table: Table) -> Table: if not table.get_column(column).type.is_numeric(): raise NonNumericColumnError(f"{column} is of type {table.get_column(column).type}.") - data = table._data.copy() + data = table._data.reset_index(drop=True) data.columns = table.column_names data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names]) return Table._from_pandas_dataframe(data) diff --git a/src/safeds/data/tabular/transformation/_imputer.py b/src/safeds/data/tabular/transformation/_imputer.py index 074d99637..06e6c6d32 100644 --- a/src/safeds/data/tabular/transformation/_imputer.py +++ b/src/safeds/data/tabular/transformation/_imputer.py @@ -153,11 +153,9 @@ def fit(self, table: Table, column_names: list[str] | None) -> Imputer: multiple_most_frequent[name] = table.get_column(name).mode() if len(multiple_most_frequent) > 0: warnings.warn( - ( - "There are multiple most frequent values in a column given to the Imputer.\nThe lowest values" - " are being chosen in this cases. The following columns have multiple most frequent" - f" values:\n{multiple_most_frequent}" - ), + "There are multiple most frequent values in a column given to the Imputer.\nThe lowest values" + " are being chosen in this cases. The following columns have multiple most frequent" + f" values:\n{multiple_most_frequent}", UserWarning, stacklevel=2, ) @@ -210,7 +208,7 @@ def transform(self, table: Table) -> Table: if table.number_of_rows == 0: raise ValueError("The Imputer cannot transform the table because it contains 0 rows") - data = table._data.copy() + data = table._data.reset_index(drop=True) data[self._column_names] = pd.DataFrame( self._wrapped_transformer.transform(data[self._column_names]), columns=self._column_names, diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index 7b3351d7b..26c732327 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -56,12 +56,9 @@ def fit(self, table: Table, column_names: list[str] | None) -> LabelEncoder: if table.keep_only_columns(column_names).remove_columns_with_non_numerical_values().number_of_columns > 0: warnings.warn( - ( - "The columns" - f" {table.keep_only_columns(column_names).remove_columns_with_non_numerical_values().column_names} contain" - " numerical data. The LabelEncoder is designed to encode non-numerical values into numerical" - " values" - ), + "The columns" + f" {table.keep_only_columns(column_names).remove_columns_with_non_numerical_values().column_names} contain" + " numerical data. The LabelEncoder is designed to encode non-numerical values into numerical values", UserWarning, stacklevel=2, ) @@ -112,7 +109,7 @@ def transform(self, table: Table) -> Table: if table.number_of_rows == 0: raise ValueError("The LabelEncoder cannot transform the table because it contains 0 rows") - data = table._data.copy() + data = table._data.reset_index(drop=True) data.columns = table.column_names data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names]) return Table._from_pandas_dataframe(data) @@ -171,7 +168,7 @@ def inverse_transform(self, transformed_table: Table) -> Table: ), ) - data = transformed_table._data.copy() + data = transformed_table._data.reset_index(drop=True) data.columns = transformed_table.column_names data[self._column_names] = self._wrapped_transformer.inverse_transform(data[self._column_names]) return Table._from_pandas_dataframe(data) diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index ba9cda823..cf557b548 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -111,17 +111,14 @@ def fit(self, table: Table, column_names: list[str] | None) -> OneHotEncoder: > 0 ): warnings.warn( - ( - "The columns" - f" {table._as_table().keep_only_columns(column_names).remove_columns_with_non_numerical_values().column_names} contain" - " numerical data. The OneHotEncoder is designed to encode non-numerical values into numerical" - " values" - ), + "The columns" + f" {table._as_table().keep_only_columns(column_names).remove_columns_with_non_numerical_values().column_names} contain" + " numerical data. The OneHotEncoder is designed to encode non-numerical values into numerical values", UserWarning, stacklevel=2, ) - data = table._data.copy() + data = table._data.reset_index(drop=True) data.columns = table.column_names result = OneHotEncoder() @@ -223,7 +220,7 @@ def transform(self, table: Table) -> Table: # New columns may not be sorted: column_names = [] for name in table.column_names: - if name not in self._column_names.keys(): + if name not in self._column_names: column_names.append(name) else: column_names.extend( @@ -322,11 +319,11 @@ def inverse_transform(self, transformed_table: Table) -> Table: name if name not in [value for value_list in list(self._column_names.values()) for value in value_list] else list(self._column_names.keys())[ - [ + next( list(self._column_names.values()).index(value) for value in list(self._column_names.values()) if name in value - ][0] + ) ] ) for name in table.column_names diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 5c4fd3935..066c25632 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -151,7 +151,7 @@ def transform(self, table: Table) -> Table: ), ) - data = table._data.copy() + data = table._data.reset_index(drop=True) data.columns = table.column_names data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names]) return Table._from_pandas_dataframe(data) @@ -213,7 +213,7 @@ def inverse_transform(self, transformed_table: Table) -> Table: ), ) - data = transformed_table._data.copy() + data = transformed_table._data.reset_index(drop=True) data.columns = transformed_table.column_names data[self._column_names] = self._wrapped_transformer.inverse_transform(data[self._column_names]) return Table._from_pandas_dataframe(data) diff --git a/src/safeds/data/tabular/transformation/_standard_scaler.py b/src/safeds/data/tabular/transformation/_standard_scaler.py index 1153ef140..748209f9c 100644 --- a/src/safeds/data/tabular/transformation/_standard_scaler.py +++ b/src/safeds/data/tabular/transformation/_standard_scaler.py @@ -133,7 +133,7 @@ def transform(self, table: Table) -> Table: ), ) - data = table._data.copy() + data = table._data.reset_index(drop=True) data.columns = table.column_names data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names]) return Table._from_pandas_dataframe(data) @@ -195,7 +195,7 @@ def inverse_transform(self, transformed_table: Table) -> Table: ), ) - data = transformed_table._data.copy() + data = transformed_table._data.reset_index(drop=True) data.columns = transformed_table.column_names data[self._column_names] = self._wrapped_transformer.inverse_transform(data[self._column_names]) return Table._from_pandas_dataframe(data) diff --git a/src/safeds/ml/classical/_util_sklearn.py b/src/safeds/ml/classical/_util_sklearn.py index 1d801d214..9479f4391 100644 --- a/src/safeds/ml/classical/_util_sklearn.py +++ b/src/safeds/ml/classical/_util_sklearn.py @@ -52,11 +52,9 @@ def fit(model: Any, tagged_table: TaggedTable) -> None: if len(non_numerical_column_names) != 0: raise NonNumericColumnError( str(non_numerical_column_names), - ( - "You can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical" - " data.\nThe OneHotEncoder should be used if you work with nominal data. If your data contains too many" - " different values\nor is ordinal, you should use the LabelEncoder." - ), + "You can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical" + " data.\nThe OneHotEncoder should be used if you work with nominal data. If your data contains too many" + " different values\nor is ordinal, you should use the LabelEncoder.", ) null_containing_column_names = set(tagged_table.features.column_names) - set( @@ -65,10 +63,8 @@ def fit(model: Any, tagged_table: TaggedTable) -> None: if len(null_containing_column_names) != 0: raise MissingValuesColumnError( str(null_containing_column_names), - ( - "You can use the Imputer to replace the missing values based on different strategies.\nIf you want to" - " remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`." - ), + "You can use the Imputer to replace the missing values based on different strategies.\nIf you want to" + " remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`.", ) try: @@ -138,11 +134,9 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_ if len(non_numerical_column_names) != 0: raise NonNumericColumnError( str(non_numerical_column_names), - ( - "You can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical" - " data.\nThe OneHotEncoder should be used if you work with nominal data. If your data contains too many" - " different values\nor is ordinal, you should use the LabelEncoder.\n" - ), + "You can use the LabelEncoder or OneHotEncoder to transform your non-numerical data to numerical" + " data.\nThe OneHotEncoder should be used if you work with nominal data. If your data contains too many" + " different values\nor is ordinal, you should use the LabelEncoder.\n", ) null_containing_column_names = set(dataset.keep_only_columns(feature_names).column_names) - set( @@ -151,16 +145,14 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_ if len(null_containing_column_names) != 0: raise MissingValuesColumnError( str(null_containing_column_names), - ( - "You can use the Imputer to replace the missing values based on different strategies.\nIf you want to" - " remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`." - ), + "You can use the Imputer to replace the missing values based on different strategies.\nIf you want to" + " remove the missing values entirely you can use the method `Table.remove_rows_with_missing_values`.", ) dataset_df = dataset.keep_only_columns(feature_names)._data dataset_df.columns = feature_names - result_set = dataset._data.copy(deep=True) + result_set = dataset._data.reset_index(drop=True) result_set.columns = dataset.column_names try: diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py deleted file mode 100644 index 8819aff05..000000000 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest -from safeds.data.tabular.containers import TaggedTable - - -@pytest.mark.parametrize( - "tagged_table", - [ - TaggedTable({"a": [], "b": []}, target_name="b", feature_names=["a"]), - TaggedTable({"a": ["a", 3, 0.1], "b": [True, False, None]}, target_name="b", feature_names=["a"]), - TaggedTable( - {"a": ["a", 3, 0.1], "b": [True, False, None], "c": ["a", "b", "c"]}, - target_name="b", - feature_names=["a"], - ), - TaggedTable({"a": [], "b": [], "c": []}, target_name="b", feature_names=["a"]), - ], - ids=["empty-rows", "normal", "column_as_non_feature", "column_as_non_feature_with_empty_rows"], -) -def test_should_copy_tagged_table(tagged_table: TaggedTable) -> None: - copied = tagged_table._copy() - assert copied == tagged_table - assert copied is not tagged_table