From 57644413b876d7d1263601b8db014b46a306fb5d Mon Sep 17 00:00:00 2001 From: zogomii <160880662+zogomii@users.noreply.github.com> Date: Fri, 19 Jul 2024 14:53:51 +0200 Subject: [PATCH] feat: join (#870) Closes #745 ### Summary of Changes Implemented a join function for Table --------- Co-authored-by: grefrathc Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Co-authored-by: Lars Reimann --- src/safeds/data/tabular/containers/_table.py | 59 ++++++++++++ .../tabular/containers/_table/test_join.py | 91 +++++++++++++++++++ 2 files changed, 150 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/test_join.py diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index b4be7f697..150dbb5dd 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -1708,6 +1708,65 @@ def inverse_transform_table(self, fitted_transformer: InvertibleTableTransformer """ return fitted_transformer.inverse_transform(self) + def join( + self, + right_table: Table, + left_names: str | list[str], + right_names: str | list[str], + *, + mode: Literal["inner", "left", "outer"] = "inner", + ) -> Table: + """ + Join a table with the current table and return the result. + + Parameters + ---------- + right_table: + The other table which is to be joined to the current table. + left_names: + Name or list of names of columns from the current table on which to join right_table. + right_names: + Name or list of names of columns from right_table on which to join the current table. + mode: + Specify which type of join you want to use. Options include 'inner', 'outer', 'left', 'right'. + + Returns + ------- + new_table: + The table with the joined table. + + Examples + -------- + >>> from safeds.data.tabular.containers import Table + >>> table1 = Table({"a": [1, 2], "b": [3, 4]}) + >>> table2 = Table({"d": [1, 5], "e": [5, 6]}) + >>> table1.join(table2, "a", "d", mode="left") + +-----+-----+------+ + | a | b | e | + | --- | --- | --- | + | i64 | i64 | i64 | + +==================+ + | 1 | 3 | 5 | + | 2 | 4 | null | + +-----+-----+------+ + """ + # Validation + _check_columns_exist(self, left_names) + _check_columns_exist(right_table, right_names) + + if len(left_names) != len(right_names): + raise ValueError("The number of columns to join on must be the same in both tables.") + + # Implementation + return self._from_polars_lazy_frame( + self._lazy_frame.join( + right_table._lazy_frame, + left_on=left_names, + right_on=right_names, + how=mode, + ), + ) + def transform_table(self, fitted_transformer: TableTransformer) -> Table: """ Return a new table transformed by a **fitted** transformer. diff --git a/tests/safeds/data/tabular/containers/_table/test_join.py b/tests/safeds/data/tabular/containers/_table/test_join.py new file mode 100644 index 000000000..1a1aec200 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/test_join.py @@ -0,0 +1,91 @@ +from typing import Literal + +import pytest +from safeds.data.tabular.containers import Table +from safeds.exceptions import ColumnNotFoundError + + +@pytest.mark.parametrize( + ("table_left", "table_right", "left_names", "right_names", "mode", "table_expected"), + [ + ( + Table({"a": [1, 2], "b": [3, 4]}), + Table({"d": [1, 5], "e": [5, 6]}), + ["a"], + ["d"], + "outer", + Table({"a": [1, None, 2], "b": [3, None, 4], "d": [1, 5, None], "e": [5, 6, None]}), + ), + ( + Table({"a": [1, 2], "b": [3, 4]}), + Table({"d": [1, 5], "e": [5, 6]}), + ["a"], + ["d"], + "left", + Table({"a": [1, 2], "b": [3, 4], "e": [5, None]}), + ), + ( + Table({"a": [1, 2], "b": [3, 4]}), + Table({"d": [1, 5], "e": [5, 6]}), + ["a"], + ["d"], + "inner", + Table({"a": [1], "b": [3], "e": [5]}), + ), + ( + Table({"a": [1, 2], "b": [3, 4], "c": [5, 6]}), + Table({"d": [1, 5], "e": [5, 6], "g": [7, 9]}), + ["a", "c"], + ["d", "e"], + "inner", + Table({"a": [1], "b": [3], "c": [5], "g": [7]}), + ), + ( + Table({"a": [1, 2], "b": [3, 4]}), + Table({"d": [1, 5], "e": [5, 6]}), + ["b"], + ["e"], + "inner", + Table({"a": [], "b": [], "d": []}), + ), + ], +) +def test_should_join_two_tables( + table_left: Table, + table_right: Table, + left_names: list[str], + right_names: list[str], + mode: Literal["inner", "left", "outer"], + table_expected: Table, +) -> None: + assert table_left.join(table_right, left_names, right_names, mode=mode) == table_expected + + +def test_should_raise_if_columns_are_mismatched() -> None: + table_left = Table({"a": [1, 2], "b": [3, 4]}) + table_right = Table({"d": [1, 5], "e": [5, 6]}) + left_names = ["a"] + right_names = ["d", "e"] + with pytest.raises(ValueError, match="The number of columns to join on must be the same in both tables."): + table_left.join(table_right, left_names, right_names) + + +@pytest.mark.parametrize( + ("table_left", "table_right", "left_names", "right_names"), + [ + (Table({"a": [1, 2], "b": [3, 4]}), Table({"d": [1, 5], "e": [5, 6]}), ["c"], ["d"]), + (Table({"a": [1, 2], "b": [3, 4]}), Table({"d": [1, 5], "e": [5, 6]}), ["a"], ["f"]), + ], + ids=[ + "wrong_left_name", + "wrong_right_name", + ], +) +def test_should_raise_if_columns_are_missing( + table_left: Table, + table_right: Table, + left_names: list[str], + right_names: list[str], +) -> None: + with pytest.raises(ColumnNotFoundError): + table_left.join(table_right, left_names=left_names, right_names=right_names)