Skip to content

Commit

Permalink
FIX handle outlier detector in _get_response_values (scikit-learn#27565)
Browse files Browse the repository at this point in the history
  • Loading branch information
glemaitre authored Oct 13, 2023
1 parent 5444030 commit 8912619
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 4 deletions.
34 changes: 34 additions & 0 deletions sklearn/inspection/_plot/tests/test_boundary_decision_display.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
make_classification,
make_multilabel_classification,
)
from sklearn.ensemble import IsolationForest
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.inspection._plot.decision_boundary import _check_boundary_response_method
from sklearn.linear_model import LogisticRegression
Expand Down Expand Up @@ -240,6 +241,39 @@ def test_decision_boundary_display_classifier(
assert disp.figure_ == fig2


@pytest.mark.parametrize("response_method", ["auto", "predict", "decision_function"])
@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
def test_decision_boundary_display_outlier_detector(
pyplot, response_method, plot_method
):
"""Check that decision boundary is correct for outlier detector."""
fig, ax = pyplot.subplots()
eps = 2.0
outlier_detector = IsolationForest(random_state=0).fit(X, y)
disp = DecisionBoundaryDisplay.from_estimator(
outlier_detector,
X,
grid_resolution=5,
response_method=response_method,
plot_method=plot_method,
eps=eps,
ax=ax,
)
assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
assert disp.ax_ == ax
assert disp.figure_ == fig

x0, x1 = X[:, 0], X[:, 1]

x0_min, x0_max = x0.min() - eps, x0.max() + eps
x1_min, x1_max = x1.min() - eps, x1.max() + eps

assert disp.xx0.min() == pytest.approx(x0_min)
assert disp.xx0.max() == pytest.approx(x0_max)
assert disp.xx1.min() == pytest.approx(x1_min)
assert disp.xx1.max() == pytest.approx(x1_max)


@pytest.mark.parametrize("response_method", ["auto", "predict"])
@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
def test_decision_boundary_display_regressor(pyplot, response_method, plot_method):
Expand Down
13 changes: 9 additions & 4 deletions sklearn/utils/_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,14 @@ def _get_response_values(
pos_label=None,
return_response_method_used=False,
):
"""Compute the response values of a classifier or a regressor.
"""Compute the response values of a classifier, an outlier detector, or a regressor.
The response values are predictions such that it follows the following shape:
- for binary classification, it is a 1d array of shape `(n_samples,)`;
- for multiclass classification, it is a 2d array of shape `(n_samples, n_classes)`;
- for multilabel classification, it is a 2d array of shape `(n_samples, n_outputs)`;
- for outlier detection, it is a 1d array of shape `(n_samples,)`;
- for regression, it is a 1d array of shape `(n_samples,)`.
If `estimator` is a binary classifier, also return the label for the
Expand All @@ -135,8 +136,9 @@ def _get_response_values(
Parameters
----------
estimator : estimator instance
Fitted classifier or regressor or a fitted :class:`~sklearn.pipeline.Pipeline`
in which the last estimator is a classifier or a regressor.
Fitted classifier, outlier detector, or regressor or a
fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a
classifier, an outlier detector, or a regressor.
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Input values.
Expand Down Expand Up @@ -188,7 +190,7 @@ def _get_response_values(
If the response method can be applied to a classifier only and
`estimator` is a regressor.
"""
from sklearn.base import is_classifier # noqa
from sklearn.base import is_classifier, is_outlier_detector # noqa

if is_classifier(estimator):
prediction_method = _check_response_method(estimator, response_method)
Expand Down Expand Up @@ -220,6 +222,9 @@ def _get_response_values(
classes=classes,
pos_label=pos_label,
)
elif is_outlier_detector(estimator):
prediction_method = _check_response_method(estimator, response_method)
y_pred, pos_label = prediction_method(X), None
else: # estimator is a regressor
if response_method != "predict":
raise ValueError(
Expand Down
28 changes: 28 additions & 0 deletions sklearn/utils/tests/test_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
make_multilabel_classification,
make_regression,
)
from sklearn.ensemble import IsolationForest
from sklearn.linear_model import (
LinearRegression,
LogisticRegression,
Expand Down Expand Up @@ -52,6 +53,33 @@ def test_get_response_values_regressor(return_response_method_used):
assert results[2] == "predict"


@pytest.mark.parametrize(
"response_method",
["predict", "decision_function", ["decision_function", "predict"]],
)
@pytest.mark.parametrize("return_response_method_used", [True, False])
def test_get_response_values_outlier_detection(
response_method, return_response_method_used
):
"""Check the behaviour of `_get_response_values` with outlier detector."""
X, y = make_classification(n_samples=50, random_state=0)
outlier_detector = IsolationForest(random_state=0).fit(X, y)
results = _get_response_values(
outlier_detector,
X,
response_method=response_method,
return_response_method_used=return_response_method_used,
)
chosen_response_method = (
response_method[0] if isinstance(response_method, list) else response_method
)
prediction_method = getattr(outlier_detector, chosen_response_method)
assert_array_equal(results[0], prediction_method(X))
assert results[1] is None
if return_response_method_used:
assert results[2] == chosen_response_method


@pytest.mark.parametrize(
"response_method",
["predict_proba", "decision_function", "predict"],
Expand Down

0 comments on commit 8912619

Please sign in to comment.