diff --git a/docs/tutorials/data_visualization.ipynb b/docs/tutorials/data_visualization.ipynb index 00c309193..1304d2cdb 100644 --- a/docs/tutorials/data_visualization.ipynb +++ b/docs/tutorials/data_visualization.ipynb @@ -332,6 +332,64 @@ "* The last boxplot, `survived`, depicts the survival rate of all passengers between 0.0 and 1.0. It is clear that box plot is not a good graphic display for such a variable because it is either a passenger survived (1) or died (0)." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Violinplot\n", + "\n", + "Like boxplots, violinplots are also univariate but can offer a more precise visualisation of the underlying distribution of your data." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic_numerical.get_column(\"age\").plot.violin_plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Violinplots of all numerical columns" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic_numerical.plot.violin_plots()" + ] + }, { "cell_type": "markdown", "metadata": { @@ -348,7 +406,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2024-06-20T18:48:43.336423700Z", @@ -364,7 +422,7 @@ "" ] }, - "execution_count": 8, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -393,7 +451,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2024-06-20T18:48:43.660060600Z", @@ -409,7 +467,7 @@ "" ] }, - "execution_count": 9, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -445,7 +503,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2024-06-20T18:48:43.776868800Z", @@ -461,7 +519,7 @@ "" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } diff --git a/src/safeds/data/tabular/plotting/_column_plotter.py b/src/safeds/data/tabular/plotting/_column_plotter.py index 1ba568525..3fd159f25 100644 --- a/src/safeds/data/tabular/plotting/_column_plotter.py +++ b/src/safeds/data/tabular/plotting/_column_plotter.py @@ -56,7 +56,6 @@ def box_plot(self, *, theme: Literal["dark", "light"] = "light") -> Image: """ if self._column.row_count > 0: _check_column_is_numeric(self._column, operation="create a box plot") - import matplotlib.pyplot as plt def _set_boxplot_colors(box: dict, theme: str) -> None: @@ -127,6 +126,73 @@ def _set_boxplot_colors(box: dict, theme: str) -> None: return _figure_to_image(fig) + def violin_plot(self, *, theme: Literal["dark", "light"] = "light") -> Image: + """ + Create a violin plot for the values in the column. This is only possible for numeric columns. + + Parameters + ---------- + theme: + The color theme of the plot. Default is "light". + + Returns + ------- + plot: + The violin plot as an image. + + Raises + ------ + TypeError + If the column is not numeric. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("test", [1, 2, 3]) + >>> violinplot = column.plot.violin_plot() + """ + if self._column.row_count > 0: + _check_column_is_numeric(self._column, operation="create a violin plot") + from math import nan + + import matplotlib.pyplot as plt + + style = "dark_background" if theme == "dark" else "default" + with plt.style.context(style): + if theme == "dark": + plt.rcParams.update( + { + "text.color": "white", + "axes.labelcolor": "white", + "axes.edgecolor": "white", + "xtick.color": "white", + "ytick.color": "white", + "grid.color": "gray", + "grid.linewidth": 0.5, + }, + ) + else: + plt.rcParams.update( + { + "grid.linewidth": 0.5, + }, + ) + + fig, ax = plt.subplots() + data = self._column._series.drop_nulls() + if len(data) == 0: + data = [nan, nan] + ax.violinplot( + data, + ) + + ax.set(title=self._column.name) + + ax.yaxis.grid(visible=True) + fig.tight_layout() + + return _figure_to_image(fig) + def histogram(self, *, max_bin_count: int = 10, theme: Literal["dark", "light"] = "light") -> Image: """ Create a histogram for the values in the column. diff --git a/src/safeds/data/tabular/plotting/_table_plotter.py b/src/safeds/data/tabular/plotting/_table_plotter.py index 4d6a253c2..fe7aa3fd2 100644 --- a/src/safeds/data/tabular/plotting/_table_plotter.py +++ b/src/safeds/data/tabular/plotting/_table_plotter.py @@ -119,20 +119,102 @@ def box_plots(self, *, theme: Literal["dark", "light"] = "light") -> Image: fig.delaxes(axs[number_of_rows - 1, i]) fig.tight_layout() + return _figure_to_image(fig) + + def violin_plots(self, *, theme: Literal["dark", "light"] = "light") -> Image: + """ + Create a violin plot for every numerical column. + + Parameters + ---------- + theme: + The color theme of the plot. Default is "light". + + Returns + ------- + plot: + The violin plot(s) as an image. + + Raises + ------ + NonNumericColumnError + If the table contains only non-numerical columns. + + Examples + -------- + >>> from safeds.data.tabular.containers import Table + >>> table = Table({"a": [1, 2], "b": [3, 42]}) + >>> image = table.plot.violin_plots() + """ + numerical_table = self._table.remove_non_numeric_columns() + if numerical_table.column_count == 0: + raise NonNumericColumnError("This table contains only non-numerical columns.") + from math import ceil + + import matplotlib.pyplot as plt + + style = "dark_background" if theme == "dark" else "default" + with plt.style.context(style): + if theme == "dark": + plt.rcParams.update( + { + "text.color": "white", + "axes.labelcolor": "white", + "axes.edgecolor": "white", + "xtick.color": "white", + "ytick.color": "white", + "grid.color": "gray", + "grid.linewidth": 0.5, + }, + ) + else: + plt.rcParams.update( + { + "grid.linewidth": 0.5, + }, + ) + + columns = numerical_table.to_columns() + columns = [column._series.drop_nulls() for column in columns] + max_width = 3 + number_of_columns = len(columns) if len(columns) <= max_width else max_width + number_of_rows = ceil(len(columns) / number_of_columns) + + fig, axs = plt.subplots(nrows=number_of_rows, ncols=number_of_columns) + line = 0 + for i, column in enumerate(columns): + data = column.to_list() + + if i % number_of_columns == 0 and i != 0: + line += 1 + + if number_of_columns == 1: + axs.violinplot( + data, + ) + axs.set_title(numerical_table.column_names[i]) + break - style = "dark_background" if theme == "dark" else "default" - with plt.style.context(style): - if theme == "dark": - plt.rcParams.update( - { - "text.color": "white", - "axes.labelcolor": "white", - "axes.edgecolor": "white", - "xtick.color": "white", - "ytick.color": "white", - }, + if number_of_rows == 1: + axs[i].violinplot( + data, + ) + axs[i].set_title(numerical_table.column_names[i]) + + else: + axs[line, i % number_of_columns].violinplot( + data, ) - return _figure_to_image(fig) + axs[line, i % number_of_columns].set_title(numerical_table.column_names[i]) + + # removes unused ax indices, so there wont be empty plots + last_filled_ax_index = len(columns) % number_of_columns + for i in range(last_filled_ax_index, number_of_columns): + if number_of_rows != 1 and last_filled_ax_index != 0: + fig.delaxes(axs[number_of_rows - 1, i]) + + fig.tight_layout() + return _figure_to_image(fig) def correlation_heatmap(self, *, theme: Literal["dark", "light"] = "light") -> Image: """ diff --git a/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_dark_snapshot[empty].png b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_dark_snapshot[empty].png new file mode 100644 index 000000000..d8e306a65 Binary files /dev/null and b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_dark_snapshot[empty].png differ diff --git a/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_dark_snapshot[multiple rows].png b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_dark_snapshot[multiple rows].png new file mode 100644 index 000000000..fcc00a4cf Binary files /dev/null and b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_dark_snapshot[multiple rows].png differ diff --git a/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_dark_snapshot[one row].png b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_dark_snapshot[one row].png new file mode 100644 index 000000000..c850594a9 Binary files /dev/null and b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_dark_snapshot[one row].png differ diff --git a/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_snapshot[empty].png b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_snapshot[empty].png new file mode 100644 index 000000000..f9f43b936 Binary files /dev/null and b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_snapshot[empty].png differ diff --git a/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_snapshot[multiple rows].png b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_snapshot[multiple rows].png new file mode 100644 index 000000000..c081f499c Binary files /dev/null and b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_snapshot[multiple rows].png differ diff --git a/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_snapshot[one row].png b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_snapshot[one row].png new file mode 100644 index 000000000..adaabf424 Binary files /dev/null and b/tests/safeds/data/tabular/containers/_column/__snapshots__/test_plot_violin_plot/test_should_match_snapshot[one row].png differ diff --git a/tests/safeds/data/tabular/containers/_column/test_plot_violin_plot.py b/tests/safeds/data/tabular/containers/_column/test_plot_violin_plot.py new file mode 100644 index 000000000..e3dabb212 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_column/test_plot_violin_plot.py @@ -0,0 +1,46 @@ +import pytest +from safeds.data.tabular.containers import Column +from safeds.exceptions import ColumnTypeError +from syrupy import SnapshotAssertion + + +@pytest.mark.parametrize( + "column", + [ + Column("a", []), + Column("a", [0]), + Column("a", [0, 1]), + ], + ids=[ + "empty", + "one row", + "multiple rows", + ], +) +def test_should_match_snapshot(column: Column, snapshot_png_image: SnapshotAssertion) -> None: + violin_plot = column.plot.violin_plot() + assert violin_plot == snapshot_png_image + + +@pytest.mark.parametrize( + "column", + [ + Column("a", []), + Column("a", [0]), + Column("a", [0, 1]), + ], + ids=[ + "empty", + "one row", + "multiple rows", + ], +) +def test_should_match_dark_snapshot(column: Column, snapshot_png_image: SnapshotAssertion) -> None: + violin_plot = column.plot.violin_plot(theme="dark") + assert violin_plot == snapshot_png_image + + +def test_should_raise_if_column_contains_non_numerical_values() -> None: + column = Column("a", ["A", "B", "C"]) + with pytest.raises(ColumnTypeError): + column.plot.violin_plot() diff --git a/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_dark_snapshot[four columns (all numeric)].png b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_dark_snapshot[four columns (all numeric)].png new file mode 100644 index 000000000..6a53a4db0 Binary files /dev/null and b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_dark_snapshot[four columns (all numeric)].png differ diff --git a/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_dark_snapshot[four columns (some non-numeric)].png b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_dark_snapshot[four columns (some non-numeric)].png new file mode 100644 index 000000000..c8d31170f Binary files /dev/null and b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_dark_snapshot[four columns (some non-numeric)].png differ diff --git a/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_dark_snapshot[one column].png b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_dark_snapshot[one column].png new file mode 100644 index 000000000..977583dbb Binary files /dev/null and b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_dark_snapshot[one column].png differ diff --git a/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_snapshot[four columns (all numeric)].png b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_snapshot[four columns (all numeric)].png new file mode 100644 index 000000000..fb87951ef Binary files /dev/null and b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_snapshot[four columns (all numeric)].png differ diff --git a/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_snapshot[four columns (some non-numeric)].png b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_snapshot[four columns (some non-numeric)].png new file mode 100644 index 000000000..7f73a81ac Binary files /dev/null and b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_snapshot[four columns (some non-numeric)].png differ diff --git a/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_snapshot[one column].png b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_snapshot[one column].png new file mode 100644 index 000000000..ecf84738b Binary files /dev/null and b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violin_plots/test_should_match_snapshot[one column].png differ diff --git a/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_dark_snapshot[four columns (all numeric)].png b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_dark_snapshot[four columns (all numeric)].png new file mode 100644 index 000000000..6a53a4db0 Binary files /dev/null and b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_dark_snapshot[four columns (all numeric)].png differ diff --git a/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_dark_snapshot[four columns (some non-numeric)].png b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_dark_snapshot[four columns (some non-numeric)].png new file mode 100644 index 000000000..c8d31170f Binary files /dev/null and b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_dark_snapshot[four columns (some non-numeric)].png differ diff --git a/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_dark_snapshot[one column].png b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_dark_snapshot[one column].png new file mode 100644 index 000000000..977583dbb Binary files /dev/null and b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_dark_snapshot[one column].png differ diff --git a/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_snapshot[four columns (all numeric)].png b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_snapshot[four columns (all numeric)].png new file mode 100644 index 000000000..fb87951ef Binary files /dev/null and b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_snapshot[four columns (all numeric)].png differ diff --git a/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_snapshot[four columns (some non-numeric)].png b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_snapshot[four columns (some non-numeric)].png new file mode 100644 index 000000000..7f73a81ac Binary files /dev/null and b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_snapshot[four columns (some non-numeric)].png differ diff --git a/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_snapshot[one column].png b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_snapshot[one column].png new file mode 100644 index 000000000..ecf84738b Binary files /dev/null and b/tests/safeds/data/tabular/plotting/__snapshots__/test_plot_violinplots/test_should_match_snapshot[one column].png differ diff --git a/tests/safeds/data/tabular/plotting/test_plot_violin_plots.py b/tests/safeds/data/tabular/plotting/test_plot_violin_plots.py new file mode 100644 index 000000000..d2da0fd93 --- /dev/null +++ b/tests/safeds/data/tabular/plotting/test_plot_violin_plots.py @@ -0,0 +1,49 @@ +import pytest +from safeds.data.tabular.containers import Table +from safeds.exceptions import NonNumericColumnError +from syrupy import SnapshotAssertion + + +@pytest.mark.parametrize( + "table", + [ + Table({"A": [1, 2, 3]}), + Table({"A": [1, 2, 3], "B": ["A", "A", "Bla"], "C": [True, True, False], "D": [1.0, 2.1, 4.5]}), + Table({"A": [1, 2, 3], "B": [1.0, 2.1, 4.5], "C": [1, 2, 3], "D": [1.0, 2.1, 4.5]}), + ], + ids=["one column", "four columns (some non-numeric)", "four columns (all numeric)"], +) +def test_should_match_snapshot(table: Table, snapshot_png_image: SnapshotAssertion) -> None: + violinplots = table.plot.violin_plots() + assert violinplots == snapshot_png_image + + +@pytest.mark.parametrize( + "table", + [ + Table({"A": [1, 2, 3]}), + Table({"A": [1, 2, 3], "B": ["A", "A", "Bla"], "C": [True, True, False], "D": [1.0, 2.1, 4.5]}), + Table({"A": [1, 2, 3], "B": [1.0, 2.1, 4.5], "C": [1, 2, 3], "D": [1.0, 2.1, 4.5]}), + ], + ids=["one column", "four columns (some non-numeric)", "four columns (all numeric)"], +) +def test_should_match_dark_snapshot(table: Table, snapshot_png_image: SnapshotAssertion) -> None: + violinplots = table.plot.violin_plots(theme="dark") + assert violinplots == snapshot_png_image + + +def test_should_raise_if_column_contains_non_numerical_values() -> None: + table = Table.from_dict({"A": ["1", "2", "3.5"], "B": ["0.2", "4", "77"]}) + with pytest.raises( + NonNumericColumnError, + match=( + r"Tried to do a numerical operation on one or multiple non-numerical columns: \nThis table contains only" + r" non-numerical columns." + ), + ): + table.plot.violin_plots() + + +def test_should_fail_on_empty_table() -> None: + with pytest.raises(NonNumericColumnError): + Table().plot.violin_plots()