FIX handle outlier detector in _get_response_values (scikit-learn#27565)

neurodata · Oct 13, 2023 · 8912619 · 8912619
1 parent 5444030
commit 8912619
Show file tree

Hide file tree

Showing 3 changed files with 71 additions and 4 deletions.
diff --git a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py
@@ -10,6 +10,7 @@
     make_classification,
     make_multilabel_classification,
 )
+from sklearn.ensemble import IsolationForest
 from sklearn.inspection import DecisionBoundaryDisplay
 from sklearn.inspection._plot.decision_boundary import _check_boundary_response_method
 from sklearn.linear_model import LogisticRegression
@@ -240,6 +241,39 @@ def test_decision_boundary_display_classifier(
     assert disp.figure_ == fig2
 
 
+@pytest.mark.parametrize("response_method", ["auto", "predict", "decision_function"])
+@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
+def test_decision_boundary_display_outlier_detector(
+    pyplot, response_method, plot_method
+):
+    """Check that decision boundary is correct for outlier detector."""
+    fig, ax = pyplot.subplots()
+    eps = 2.0
+    outlier_detector = IsolationForest(random_state=0).fit(X, y)
+    disp = DecisionBoundaryDisplay.from_estimator(
+        outlier_detector,
+        X,
+        grid_resolution=5,
+        response_method=response_method,
+        plot_method=plot_method,
+        eps=eps,
+        ax=ax,
+    )
+    assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
+    assert disp.ax_ == ax
+    assert disp.figure_ == fig
+
+    x0, x1 = X[:, 0], X[:, 1]
+
+    x0_min, x0_max = x0.min() - eps, x0.max() + eps
+    x1_min, x1_max = x1.min() - eps, x1.max() + eps
+
+    assert disp.xx0.min() == pytest.approx(x0_min)
+    assert disp.xx0.max() == pytest.approx(x0_max)
+    assert disp.xx1.min() == pytest.approx(x1_min)
+    assert disp.xx1.max() == pytest.approx(x1_max)
+
+
 @pytest.mark.parametrize("response_method", ["auto", "predict"])
 @pytest.mark.parametrize("plot_method", ["contourf", "contour"])
 def test_decision_boundary_display_regressor(pyplot, response_method, plot_method):

diff --git a/sklearn/utils/_response.py b/sklearn/utils/_response.py
@@ -116,13 +116,14 @@ def _get_response_values(
     pos_label=None,
     return_response_method_used=False,
 ):
-    """Compute the response values of a classifier or a regressor.
+    """Compute the response values of a classifier, an outlier detector, or a regressor.
 
     The response values are predictions such that it follows the following shape:
 
     - for binary classification, it is a 1d array of shape `(n_samples,)`;
     - for multiclass classification, it is a 2d array of shape `(n_samples, n_classes)`;
     - for multilabel classification, it is a 2d array of shape `(n_samples, n_outputs)`;
+    - for outlier detection, it is a 1d array of shape `(n_samples,)`;
     - for regression, it is a 1d array of shape `(n_samples,)`.
 
     If `estimator` is a binary classifier, also return the label for the
@@ -135,8 +136,9 @@ def _get_response_values(
     Parameters
     ----------
     estimator : estimator instance
-        Fitted classifier or regressor or a fitted :class:`~sklearn.pipeline.Pipeline`
-        in which the last estimator is a classifier or a regressor.
+        Fitted classifier, outlier detector, or regressor or a
+        fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a
+        classifier, an outlier detector, or a regressor.
 
     X : {array-like, sparse matrix} of shape (n_samples, n_features)
         Input values.
@@ -188,7 +190,7 @@ def _get_response_values(
         If the response method can be applied to a classifier only and
         `estimator` is a regressor.
     """
-    from sklearn.base import is_classifier  # noqa
+    from sklearn.base import is_classifier, is_outlier_detector  # noqa
 
     if is_classifier(estimator):
         prediction_method = _check_response_method(estimator, response_method)
@@ -220,6 +222,9 @@ def _get_response_values(
                 classes=classes,
                 pos_label=pos_label,
             )
+    elif is_outlier_detector(estimator):
+        prediction_method = _check_response_method(estimator, response_method)
+        y_pred, pos_label = prediction_method(X), None
     else:  # estimator is a regressor
         if response_method != "predict":
             raise ValueError(

diff --git a/sklearn/utils/tests/test_response.py b/sklearn/utils/tests/test_response.py
@@ -7,6 +7,7 @@
     make_multilabel_classification,
     make_regression,
 )
+from sklearn.ensemble import IsolationForest
 from sklearn.linear_model import (
     LinearRegression,
     LogisticRegression,
@@ -52,6 +53,33 @@ def test_get_response_values_regressor(return_response_method_used):
         assert results[2] == "predict"
 
 
+@pytest.mark.parametrize(
+    "response_method",
+    ["predict", "decision_function", ["decision_function", "predict"]],
+)
+@pytest.mark.parametrize("return_response_method_used", [True, False])
+def test_get_response_values_outlier_detection(
+    response_method, return_response_method_used
+):
+    """Check the behaviour of `_get_response_values` with outlier detector."""
+    X, y = make_classification(n_samples=50, random_state=0)
+    outlier_detector = IsolationForest(random_state=0).fit(X, y)
+    results = _get_response_values(
+        outlier_detector,
+        X,
+        response_method=response_method,
+        return_response_method_used=return_response_method_used,
+    )
+    chosen_response_method = (
+        response_method[0] if isinstance(response_method, list) else response_method
+    )
+    prediction_method = getattr(outlier_detector, chosen_response_method)
+    assert_array_equal(results[0], prediction_method(X))
+    assert results[1] is None
+    if return_response_method_used:
+        assert results[2] == chosen_response_method
+
+
 @pytest.mark.parametrize(
     "response_method",
     ["predict_proba", "decision_function", "predict"],