From d43d7d61c159a63fd2c8ffebca505b9b3ae41a4e Mon Sep 17 00:00:00 2001
From: Linus Sommer <95619282+linus-md@users.noreply.github.com>
Date: Thu, 18 Jan 2024 23:22:43 +0100
Subject: [PATCH 01/32] DOC: Added drop down menus to `1.8` Cross Decomposition
 (#27916)

---
 doc/modules/cross_decomposition.rst | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/doc/modules/cross_decomposition.rst b/doc/modules/cross_decomposition.rst
index 337a7bcd250bb..8f8d217f87144 100644
--- a/doc/modules/cross_decomposition.rst
+++ b/doc/modules/cross_decomposition.rst
@@ -92,9 +92,9 @@ Step *a)* may be performed in two ways: either by computing the whole SVD of
 values, or by directly computing the singular vectors using the power method (cf section 11.3 in [1]_),
 which corresponds to the `'nipals'` option of the `algorithm` parameter.
 
-
-Transforming data
-^^^^^^^^^^^^^^^^^
+|details-start|
+**Transforming data**
+|details-split|
 
 To transform :math:`X` into :math:`\bar{X}`, we need to find a projection
 matrix :math:`P` such that :math:`\bar{X} = XP`. We know that for the
@@ -106,9 +106,11 @@ training data, :math:`\Xi = XP`, and :math:`X = \Xi \Gamma^T`. Setting
 
 Similarly, :math:`Y` can be transformed using the rotation matrix
 :math:`V(\Delta^T V)^{-1}`, accessed via the `y_rotations_` attribute.
+|details-end|
 
-Predicting the targets Y
-^^^^^^^^^^^^^^^^^^^^^^^^
+|details-start|
+**Predicting the targets Y**
+|details-split|
 
 To predict the targets of some data :math:`X`, we are looking for a
 coefficient matrix :math:`\beta \in R^{d \times t}` such that :math:`Y =
@@ -125,6 +127,8 @@ P \Delta^T`, and as a result the coefficient matrix :math:`\beta = \alpha P
 
 :math:`\beta` can be accessed through the `coef_` attribute.
 
+|details-end|
+
 PLSSVD
 ------
 
@@ -180,14 +184,17 @@ Since :class:`CCA` involves the inversion of :math:`X_k^TX_k` and
 :math:`Y_k^TY_k`, this estimator can be unstable if the number of features or
 targets is greater than the number of samples.
 
-
-.. topic:: Reference:
+|details-start|
+**Reference**
+|details-split|
 
    .. [1] `A survey of Partial Least Squares (PLS) methods, with emphasis on
       the two-block case
       <https://stat.uw.edu/sites/default/files/files/reports/2000/tr371.pdf>`_
       JA Wegelin
 
+|details-end|
+
 .. topic:: Examples:
 
     * :ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py`

From 26dfe833aa5122997a6b66197df0e03629a45e3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?=
 <34657725+jeremiedbb@users.noreply.github.com>
Date: Fri, 19 Jan 2024 06:51:23 +0100
Subject: [PATCH 02/32] Fix prevent infinite loop in KMeans (#28165)

---
 doc/whats_new/v1.4.rst                |  3 +++
 sklearn/cluster/_k_means_common.pyx   | 16 ++++++++++++++++
 sklearn/cluster/tests/test_k_means.py | 18 ++++++++++++++++++
 3 files changed, 37 insertions(+)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index c674a8619e076..ee47bae7b1f5b 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -20,6 +20,9 @@ Changelog
   :pr:`28121` by :user:`Pietro Peterlongo <pietroppeter>` and
   :user:`Yao Xiao <Charlie-XIAO>`.
 
+- |Fix| Avoid infinite loop in :class:`cluster.KMeans` when the number of clusters is
+  larger than the number of non-duplicate samples.
+  :pr:`28165` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
 .. _changes_1_4:
 
diff --git a/sklearn/cluster/_k_means_common.pyx b/sklearn/cluster/_k_means_common.pyx
index 151af55076b7b..7c9c1bb54eaae 100644
--- a/sklearn/cluster/_k_means_common.pyx
+++ b/sklearn/cluster/_k_means_common.pyx
@@ -192,6 +192,11 @@ cpdef void _relocate_empty_clusters_dense(
         int new_cluster_id, old_cluster_id, far_idx, idx, k
         floating weight
 
+    if np.max(distances) == 0:
+        # Happens when there are more clusters than non-duplicate samples. Relocating
+        # is pointless in this case.
+        return
+
     for idx in range(n_empty):
 
         new_cluster_id = empty_clusters[idx]
@@ -241,6 +246,11 @@ cpdef void _relocate_empty_clusters_sparse(
             X_indices[X_indptr[i]: X_indptr[i + 1]],
             centers_old[j], centers_squared_norms[j], True)
 
+    if np.max(distances) == 0:
+        # Happens when there are more clusters than non-duplicate samples. Relocating
+        # is pointless in this case.
+        return
+
     cdef:
         int[::1] far_from_centers = np.argpartition(distances, -n_empty)[:-n_empty-1:-1].astype(np.int32)
 
@@ -274,12 +284,18 @@ cdef void _average_centers(
         int n_features = centers.shape[1]
         int j, k
         floating alpha
+        int argmax_weight = np.argmax(weight_in_clusters)
 
     for j in range(n_clusters):
         if weight_in_clusters[j] > 0:
             alpha = 1.0 / weight_in_clusters[j]
             for k in range(n_features):
                 centers[j, k] *= alpha
+        else:
+            # For convenience, we avoid setting empty clusters at the origin but place
+            # them at the location of the biggest cluster.
+            for k in range(n_features):
+                centers[j, k] = centers[argmax_weight, k]
 
 
 cdef void _center_shift(
diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index 5b0c7ab9aace8..4a112a30b29ed 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -1352,3 +1352,21 @@ def test_sample_weight_zero(init, global_random_seed):
     # (i.e. be at a distance=0 from it)
     d = euclidean_distances(X[::2], clusters_weighted)
     assert not np.any(np.isclose(d, 0))
+
+
+@pytest.mark.parametrize("array_constr", data_containers, ids=data_containers_ids)
+@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
+def test_relocating_with_duplicates(algorithm, array_constr):
+    """Check that kmeans stops when there are more centers than non-duplicate samples
+
+    Non-regression test for issue:
+    https://github.com/scikit-learn/scikit-learn/issues/28055
+    """
+    X = np.array([[0, 0], [1, 1], [1, 1], [1, 0], [0, 1]])
+    km = KMeans(n_clusters=5, init=X, algorithm=algorithm)
+
+    msg = r"Number of distinct clusters \(4\) found smaller than n_clusters \(5\)"
+    with pytest.warns(ConvergenceWarning, match=msg):
+        km.fit(array_constr(X))
+
+    assert km.n_iter_ == 1

From 2da6d17bb472524b883d81afa4a85bd7a1c89d60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 19 Jan 2024 07:32:04 +0100
Subject: [PATCH 03/32] CI Remove temporary work-around related to scipy and
 pandas development wheel installing numpy<2 (#28163)

---
 build_tools/azure/install.sh | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/build_tools/azure/install.sh b/build_tools/azure/install.sh
index 5bd4112a1820b..df20e27b3c068 100755
--- a/build_tools/azure/install.sh
+++ b/build_tools/azure/install.sh
@@ -47,6 +47,16 @@ pre_python_environment_install() {
 
 }
 
+check_packages_dev_version() {
+    for package in $@; do
+        package_version=$(python -c "import $package; print($package.__version__)")
+        if ! [[ $package_version =~ "dev" ]]; then
+            echo "$package is not a development version: $package_version"
+            exit 1
+        fi
+    done
+}
+
 python_environment_install_and_activate() {
     if [[ "$DISTRIB" == "conda"* ]]; then
         # Install/update conda with the libmamba solver because the legacy
@@ -71,12 +81,10 @@ python_environment_install_and_activate() {
     if [[ "$DISTRIB" == "conda-pip-scipy-dev" ]]; then
         echo "Installing development dependency wheels"
         dev_anaconda_url=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple
-        pip install --pre --upgrade --timeout=60 --extra-index $dev_anaconda_url numpy pandas scipy
+        dev_packages="numpy scipy pandas"
+        pip install --pre --upgrade --timeout=60 --extra-index $dev_anaconda_url $dev_packages
 
-        # XXX: at the time of writing, installing scipy or pandas from the dev
-        # wheels forces the numpy dependency to be < 2.0.0. Let's force the
-        # installation of numpy dev wheels instead.
-        pip install --pre --upgrade --timeout=60 --extra-index $dev_anaconda_url numpy
+        check_packages_dev_version $dev_packages
 
         echo "Installing Cython from latest sources"
         pip install https://github.com/cython/cython/archive/master.zip

From 21fcab7223257d01dab5397424de9057128d5467 Mon Sep 17 00:00:00 2001
From: Andrei Dzis <dzisandy97@gmail.com>
Date: Fri, 19 Jan 2024 13:11:23 +0300
Subject: [PATCH 04/32] DOC Added relation between ROC-AUC and Gini in
 docstring of roc_auc_score (#28156)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_ranking.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index 4a2e7aa1b78a3..a117a5427a996 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -538,6 +538,21 @@ class scores must correspond to the order of ``labels``,
     RocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic
         (ROC) curve given the true and predicted values.
 
+    Notes
+    -----
+    The Gini Coefficient is a summary measure of the ranking ability of binary
+    classifiers. It is expressed using the area under of the ROC as follows:
+
+    G = 2 * AUC - 1
+
+    Where G is the Gini coefficient and AUC is the ROC-AUC score. This normalisation
+    will ensure that random guessing will yield a score of 0 in expectation, and it is
+    upper bounded by 1.
+
+    Note that there is another version of the Gini coefficient for regressors of a
+    continuous positive target variable. In this case, AUC is taken over the Lorenz
+    curve instead of the ROC [6]_.
+
     References
     ----------
     .. [1] `Wikipedia entry for the Receiver operating characteristic
@@ -558,6 +573,8 @@ class scores must correspond to the order of ``labels``,
             Under the ROC Curve for Multiple Class Classification Problems.
             Machine Learning, 45(2), 171-186.
             <http://link.springer.com/article/10.1023/A:1010920819831>`_
+    .. [6] `Wikipedia entry for the Gini coefficient
+            <https://en.wikipedia.org/wiki/Gini_coefficient>`_
 
     Examples
     --------

From a3c8da18af46da0d0e32027dacb20501647b078a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?=
 <34657725+jeremiedbb@users.noreply.github.com>
Date: Fri, 19 Jan 2024 13:01:11 +0100
Subject: [PATCH 05/32] MAINT Update SECURITY.md for 1.4.0 (#28182)

---
 SECURITY.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index 721f2041c2b85..3f291e7a566f8 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -4,8 +4,8 @@
 
 | Version   | Supported          |
 | --------- | ------------------ |
-| 1.3.2     | :white_check_mark: |
-| < 1.3.2   | :x:                |
+| 1.4.0     | :white_check_mark: |
+| < 1.4.0   | :x:                |
 
 ## Reporting a Vulnerability
 

From 5c7e831306e0a087c2b6af6913fa5b3c402f6d67 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 19 Jan 2024 13:58:02 +0100
Subject: [PATCH 06/32] DOC use list for the ridge_regression docstring
 (#28168)

---
 sklearn/linear_model/_ridge.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index c4f52c68e697e..5ce4a8c2fd3b8 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -552,14 +552,15 @@ def ridge_regression(
 
     Examples
     --------
+    >>> import numpy as np
     >>> from sklearn.datasets import make_regression
     >>> from sklearn.linear_model import ridge_regression
-    >>> X, y = make_regression(
-    ...     n_features=4, n_informative=2, shuffle=False, random_state=0
-    ... )
+    >>> rng = np.random.RandomState(0)
+    >>> X = rng.randn(100, 4)
+    >>> y = 2.0 * X[:, 0] - 1.0 * X[:, 1] + 0.1 * rng.standard_normal(100)
     >>> coef, intercept = ridge_regression(X, y, alpha=1.0, return_intercept=True)
-    >>> coef
-    array([20.2..., 33.7...,  0.1...,  0.0...])
+    >>> list(coef)
+    [1.97..., -1.00..., -0.0..., -0.0...]
     >>> intercept
     -0.0...
     """

From 66a6551786c3d257a7b4f0b23a705f52f868c235 Mon Sep 17 00:00:00 2001
From: Andrei Dzis <dzisandy97@gmail.com>
Date: Fri, 19 Jan 2024 23:15:37 +0300
Subject: [PATCH 07/32] DOC Fix for roc_auc_score documentation (#28190)

---
 sklearn/metrics/_ranking.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index a117a5427a996..4a960a2f4402a 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -549,10 +549,6 @@ class scores must correspond to the order of ``labels``,
     will ensure that random guessing will yield a score of 0 in expectation, and it is
     upper bounded by 1.
 
-    Note that there is another version of the Gini coefficient for regressors of a
-    continuous positive target variable. In this case, AUC is taken over the Lorenz
-    curve instead of the ROC [6]_.
-
     References
     ----------
     .. [1] `Wikipedia entry for the Receiver operating characteristic

From 2020648edfdbdeb4797465434ed4afd6e79ce2ed Mon Sep 17 00:00:00 2001
From: 101AlexMartin <101071686+101AlexMartin@users.noreply.github.com>
Date: Sat, 20 Jan 2024 10:53:07 +0100
Subject: [PATCH 08/32] MNT changed order pre-commits hooks following ruff
 recommendation (#28062)

Co-authored-by: Alejandro Martin <alejandro.martingil@tno.nl>
---
 .pre-commit-config.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index abffbbe149f2c..506e3ab4fe64e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,16 +5,16 @@ repos:
     -   id: check-yaml
     -   id: end-of-file-fixer
     -   id: trailing-whitespace
--   repo: https://github.com/psf/black
-    rev: 23.3.0
-    hooks:
-    -   id: black
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
     rev: v0.0.272
     hooks:
     -   id: ruff
         args: ["--fix", "--show-source"]
+-   repo: https://github.com/psf/black
+    rev: 23.3.0
+    hooks:
+    -   id: black
 -   repo: https://github.com/pre-commit/mirrors-mypy
     rev: v1.3.0
     hooks:

From 6a1022353103cefb93258f503b087d821262a1b6 Mon Sep 17 00:00:00 2001
From: Rodrigo Romero <69991220+rromer07@users.noreply.github.com>
Date: Sat, 20 Jan 2024 06:48:55 -0500
Subject: [PATCH 09/32] DOC add docstring example to
 `sklearn.metrics.consensus_score` (#28193)

---
 sklearn/metrics/cluster/_bicluster.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sklearn/metrics/cluster/_bicluster.py b/sklearn/metrics/cluster/_bicluster.py
index b9ca47c9b91aa..713d0bee8fa2e 100644
--- a/sklearn/metrics/cluster/_bicluster.py
+++ b/sklearn/metrics/cluster/_bicluster.py
@@ -89,6 +89,14 @@ def consensus_score(a, b, *, similarity="jaccard"):
     * Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis
       for bicluster acquisition
       <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2881408/>`__.
+
+    Examples
+    --------
+    >>> from sklearn.metrics import consensus_score
+    >>> a = ([[True, False], [False, True]], [[False, True], [True, False]])
+    >>> b = ([[False, True], [True, False]], [[True, False], [False, True]])
+    >>> consensus_score(a, b, similarity='jaccard')
+    1.0
     """
     if similarity == "jaccard":
         similarity = _jaccard

From 836690a401057572ef7d3478a9a3aa78dfa1447b Mon Sep 17 00:00:00 2001
From: Rodrigo Romero <69991220+rromer07@users.noreply.github.com>
Date: Sat, 20 Jan 2024 14:42:16 -0500
Subject: [PATCH 10/32] DOC add docstring example to
 `sklearn.metrics.coverage_error` (#28196)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_ranking.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index 4a960a2f4402a..74ae6dcf04299 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -1300,6 +1300,14 @@ def coverage_error(y_true, y_score, *, sample_weight=None):
     .. [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010).
            Mining multi-label data. In Data mining and knowledge discovery
            handbook (pp. 667-685). Springer US.
+
+    Examples
+    --------
+    >>> from sklearn.metrics import coverage_error
+    >>> y_true = [[1, 0, 0], [0, 1, 1]]
+    >>> y_score = [[1, 0, 0], [0, 1, 1]]
+    >>> coverage_error(y_true, y_score)
+    1.5
     """
     y_true = check_array(y_true, ensure_2d=True)
     y_score = check_array(y_score, ensure_2d=True)

From 897c0c570511be4b7912a335052ed479ac5ca1f3 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Sat, 20 Jan 2024 21:08:36 +0100
Subject: [PATCH 11/32] ENH improve HGBT predict classes (#27844)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 doc/whats_new/v1.4.rst                           |  4 ++++
 .../_hist_gradient_boosting/gradient_boosting.py | 16 +++++++++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index ee47bae7b1f5b..d832e4b508359 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -494,6 +494,10 @@ Changelog
   which allows to retrieve the training sample indices used for each tree estimator.
   :pr:`26736` by :user:`Adam Li <adam2392>`.
 
+- |Efficiency| Improves runtime of `predict` of
+  :class:`ensemble.HistGradientBoostingClassifier` by avoiding to call `predict_proba`.
+  :pr:`27844` by :user:`Christian Lorentzen <lorentzenchr>`.
+
 - |Fix| Fixes :class:`ensemble.IsolationForest` when the input is a sparse matrix and
   `contamination` is set to a float value.
   :pr:`27645` by :user:`Guillaume Lemaitre <glemaitre>`.
diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index 0837d19407030..698fd0629d02e 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -2137,7 +2137,13 @@ def predict(self, X):
             The predicted classes.
         """
         # TODO: This could be done in parallel
-        encoded_classes = np.argmax(self.predict_proba(X), axis=1)
+        raw_predictions = self._raw_predict(X)
+        if raw_predictions.shape[1] == 1:
+            # np.argmax([0.5, 0.5]) is 0, not 1. Therefore "> 0" not ">= 0" to be
+            # consistent with the multiclass case.
+            encoded_classes = (raw_predictions.ravel() > 0).astype(int)
+        else:
+            encoded_classes = np.argmax(raw_predictions, axis=1)
         return self.classes_[encoded_classes]
 
     def staged_predict(self, X):
@@ -2158,8 +2164,12 @@ def staged_predict(self, X):
         y : generator of ndarray of shape (n_samples,)
             The predicted classes of the input samples, for each iteration.
         """
-        for proba in self.staged_predict_proba(X):
-            encoded_classes = np.argmax(proba, axis=1)
+        for raw_predictions in self._staged_raw_predict(X):
+            if raw_predictions.shape[1] == 1:
+                # np.argmax([0, 0]) is 0, not 1, therefor "> 0" not ">= 0"
+                encoded_classes = (raw_predictions.ravel() > 0).astype(int)
+            else:
+                encoded_classes = np.argmax(raw_predictions, axis=1)
             yield self.classes_.take(encoded_classes, axis=0)
 
     def predict_proba(self, X):

From b4754ba7eeacf1519fb827392d99207d38011627 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Mon, 22 Jan 2024 02:31:13 -0500
Subject: [PATCH 12/32] ENH Checks pandas and polars directly (#28195)

---
 doc/whats_new/v1.4.rst      |  3 +++
 sklearn/utils/validation.py | 26 ++++++++++----------------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index d832e4b508359..98bfcd2d96f54 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -24,6 +24,9 @@ Changelog
   larger than the number of non-duplicate samples.
   :pr:`28165` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
+- |Enhancement| Pandas and Polars dataframe are validated directly without ducktyping
+  checks. :pr:`28195` by `Thomas Fan`_.
+
 .. _changes_1_4:
 
 Version 1.4.0
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 6531a9da3404b..43f553eb2d2d5 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -2070,26 +2070,20 @@ def _check_method_params(X, params, indices=None):
 
 def _is_pandas_df(X):
     """Return True if the X is a pandas dataframe."""
-    if hasattr(X, "columns") and hasattr(X, "iloc"):
-        # Likely a pandas DataFrame, we explicitly check the type to confirm.
-        try:
-            pd = sys.modules["pandas"]
-        except KeyError:
-            return False
-        return isinstance(X, pd.DataFrame)
-    return False
+    try:
+        pd = sys.modules["pandas"]
+    except KeyError:
+        return False
+    return isinstance(X, pd.DataFrame)
 
 
 def _is_polars_df(X):
     """Return True if the X is a polars dataframe."""
-    if hasattr(X, "columns") and hasattr(X, "schema"):
-        # Likely a polars DataFrame, we explicitly check the type to confirm.
-        try:
-            pl = sys.modules["polars"]
-        except KeyError:
-            return False
-        return isinstance(X, pl.DataFrame)
-    return False
+    try:
+        pl = sys.modules["polars"]
+    except KeyError:
+        return False
+    return isinstance(X, pl.DataFrame)
 
 
 def _get_feature_names(X):

From 69cef4adc1d689828958328598712e8b2937971d Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 22 Jan 2024 10:53:04 +0100
Subject: [PATCH 13/32] FIX _convert_container should be able to convert from
 sparse to sparse (#28185)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Loïc Estève <loic.esteve@ymail.com>
---
 sklearn/utils/_testing.py           | 40 ++++++++++++++++-------------
 sklearn/utils/tests/test_testing.py | 29 +++++++++++++++++++++
 2 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py
index b49622627c7ae..bb4da452712d2 100644
--- a/sklearn/utils/_testing.py
+++ b/sklearn/utils/_testing.py
@@ -775,8 +775,6 @@ def _convert_container(
             return tuple(np.asarray(container, dtype=dtype).tolist())
     elif constructor_name == "array":
         return np.asarray(container, dtype=dtype)
-    elif constructor_name == "sparse":
-        return sp.sparse.csr_matrix(np.atleast_2d(container), dtype=dtype)
     elif constructor_name in ("pandas", "dataframe"):
         pd = pytest.importorskip("pandas", minversion=minversion)
         result = pd.DataFrame(container, columns=columns_name, dtype=dtype, copy=False)
@@ -813,22 +811,28 @@ def _convert_container(
         return pd.Index(container, dtype=dtype)
     elif constructor_name == "slice":
         return slice(container[0], container[1])
-    elif constructor_name == "sparse_csr":
-        return sp.sparse.csr_matrix(np.atleast_2d(container), dtype=dtype)
-    elif constructor_name == "sparse_csr_array":
-        if sp_version >= parse_version("1.8"):
-            return sp.sparse.csr_array(np.atleast_2d(container), dtype=dtype)
-        raise ValueError(
-            f"sparse_csr_array is only available with scipy>=1.8.0, got {sp_version}"
-        )
-    elif constructor_name == "sparse_csc":
-        return sp.sparse.csc_matrix(np.atleast_2d(container), dtype=dtype)
-    elif constructor_name == "sparse_csc_array":
-        if sp_version >= parse_version("1.8"):
-            return sp.sparse.csc_array(np.atleast_2d(container), dtype=dtype)
-        raise ValueError(
-            f"sparse_csc_array is only available with scipy>=1.8.0, got {sp_version}"
-        )
+    elif "sparse" in constructor_name:
+        if not sp.sparse.issparse(container):
+            # For scipy >= 1.13, sparse array constructed from 1d array may be
+            # 1d or raise an exception. To avoid this, we make sure that the
+            # input container is 2d. For more details, see
+            # https://github.com/scipy/scipy/pull/18530#issuecomment-1878005149
+            container = np.atleast_2d(container)
+
+        if "array" in constructor_name and sp_version < parse_version("1.8"):
+            raise ValueError(
+                f"{constructor_name} is only available with scipy>=1.8.0, got "
+                f"{sp_version}"
+            )
+        if constructor_name in ("sparse", "sparse_csr"):
+            # sparse and sparse_csr are equivalent for legacy reasons
+            return sp.sparse.csr_matrix(container, dtype=dtype)
+        elif constructor_name == "sparse_csr_array":
+            return sp.sparse.csr_array(container, dtype=dtype)
+        elif constructor_name == "sparse_csc":
+            return sp.sparse.csc_matrix(container, dtype=dtype)
+        elif constructor_name == "sparse_csc_array":
+            return sp.sparse.csc_array(container, dtype=dtype)
 
 
 def raises(expected_exc_type, match=None, may_pass=False, err_msg=None):
diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index f24b4de928201..c6132afd0c1d4 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -845,3 +845,32 @@ def test_assert_run_python_script_without_output():
         match="output was not supposed to match.+got.+something to stderr",
     ):
         assert_run_python_script_without_output(code, pattern="to.+stderr")
+
+
+@pytest.mark.parametrize(
+    "constructor_name",
+    [
+        "sparse_csr",
+        "sparse_csc",
+        pytest.param(
+            "sparse_csr_array",
+            marks=pytest.mark.skipif(
+                sp_version < parse_version("1.8"),
+                reason="sparse arrays are available as of scipy 1.8.0",
+            ),
+        ),
+        pytest.param(
+            "sparse_csc_array",
+            marks=pytest.mark.skipif(
+                sp_version < parse_version("1.8"),
+                reason="sparse arrays are available as of scipy 1.8.0",
+            ),
+        ),
+    ],
+)
+def test_convert_container_sparse_to_sparse(constructor_name):
+    """Non-regression test to check that we can still convert a sparse container
+    from a given format to another format.
+    """
+    X_sparse = sparse.random(10, 10, density=0.1, format="csr")
+    _convert_container(X_sparse, constructor_name)

From 1df773fe12d54beaed1136d7b040571e51f17205 Mon Sep 17 00:00:00 2001
From: Anderson Nelson <anderson.nelson1@gmail.com>
Date: Mon, 22 Jan 2024 05:16:30 -0500
Subject: [PATCH 14/32] DOC Add docstring examples for covariance module
 (#28192)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/covariance/_shrunk_covariance.py | 37 ++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py
index 3a79afa30729f..5df229260b03c 100644
--- a/sklearn/covariance/_shrunk_covariance.py
+++ b/sklearn/covariance/_shrunk_covariance.py
@@ -134,6 +134,18 @@ def shrunk_covariance(emp_cov, shrinkage=0.1):
         (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)
 
     where `mu = trace(cov) / n_features`.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.datasets import make_gaussian_quantiles
+    >>> from sklearn.covariance import empirical_covariance, shrunk_covariance
+    >>> real_cov = np.array([[.8, .3], [.3, .4]])
+    >>> rng = np.random.RandomState(0)
+    >>> X = rng.multivariate_normal(mean=[0, 0], cov=real_cov, size=500)
+    >>> shrunk_covariance(empirical_covariance(X))
+    array([[0.73..., 0.25...],
+           [0.25..., 0.41...]])
     """
     emp_cov = check_array(emp_cov, allow_nd=True)
     n_features = emp_cov.shape[-1]
@@ -316,6 +328,17 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
     (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)
 
     where mu = trace(cov) / n_features
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.covariance import ledoit_wolf_shrinkage
+    >>> real_cov = np.array([[.4, .2], [.2, .8]])
+    >>> rng = np.random.RandomState(0)
+    >>> X = rng.multivariate_normal(mean=[0, 0], cov=real_cov, size=50)
+    >>> shrinkage_coefficient = ledoit_wolf_shrinkage(X)
+    >>> shrinkage_coefficient
+    0.23...
     """
     X = check_array(X)
     # for only one feature, the result is the same whatever the shrinkage
@@ -419,6 +442,20 @@ def ledoit_wolf(X, *, assume_centered=False, block_size=1000):
     (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)
 
     where mu = trace(cov) / n_features
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.covariance import empirical_covariance, ledoit_wolf
+    >>> real_cov = np.array([[.4, .2], [.2, .8]])
+    >>> rng = np.random.RandomState(0)
+    >>> X = rng.multivariate_normal(mean=[0, 0], cov=real_cov, size=50)
+    >>> covariance, shrinkage = ledoit_wolf(X)
+    >>> covariance
+    array([[0.44..., 0.16...],
+           [0.16..., 0.80...]])
+    >>> shrinkage
+    0.23...
     """
     estimator = LedoitWolf(
         assume_centered=assume_centered,

From 55eb8900b44d62cf665444258adf4a3ae29926a1 Mon Sep 17 00:00:00 2001
From: Shubham <134207725+shubhamparmar1@users.noreply.github.com>
Date: Mon, 22 Jan 2024 15:51:08 +0530
Subject: [PATCH 15/32] DOC Add a docstring examples for utils functions
 (#28181)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/_estimator_html_repr.py |  7 ++++++
 sklearn/utils/estimator_checks.py     |  7 ++++++
 sklearn/utils/extmath.py              | 33 +++++++++++++++++++++++++--
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/_estimator_html_repr.py b/sklearn/utils/_estimator_html_repr.py
index dd51a8bbb71de..5e465234f516b 100644
--- a/sklearn/utils/_estimator_html_repr.py
+++ b/sklearn/utils/_estimator_html_repr.py
@@ -329,6 +329,13 @@ def estimator_html_repr(estimator):
     -------
     html: str
         HTML representation of estimator.
+
+    Examples
+    --------
+    >>> from sklearn.utils._estimator_html_repr import estimator_html_repr
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> estimator_html_repr(LogisticRegression())
+    '<style>...</div>'
     """
     from sklearn.exceptions import NotFittedError
     from sklearn.utils.validation import check_is_fitted
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index b3135d30b362a..564bd6928a5a6 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -623,6 +623,13 @@ def check_estimator(estimator=None, generate_only=False):
     --------
     parametrize_with_checks : Pytest specific decorator for parametrizing estimator
         checks.
+
+    Examples
+    --------
+    >>> from sklearn.utils.estimator_checks import check_estimator
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> check_estimator(LogisticRegression(), generate_only=True)
+    <generator object ...>
     """
     if isinstance(estimator, type):
         msg = (
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index c9aa5db2e0359..faf3c94ad5483 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -138,13 +138,21 @@ def density(w):
 
     Parameters
     ----------
-    w : array-like
-        The sparse vector.
+    w : {ndarray, sparse matrix}
+        The input data can be numpy ndarray or a sparse matrix.
 
     Returns
     -------
     float
         The density of w, between 0 and 1.
+
+    Examples
+    --------
+    >>> from scipy import sparse
+    >>> from sklearn.utils.extmath import density
+    >>> X = sparse.random(10, 10, density=0.25, random_state=0)
+    >>> density(X)
+    0.25
     """
     if hasattr(w, "toarray"):
         d = float(w.nnz) / (w.shape[0] * w.shape[1])
@@ -168,6 +176,17 @@ def safe_sparse_dot(a, b, *, dense_output=False):
     -------
     dot_product : {ndarray, sparse matrix}
         Sparse if ``a`` and ``b`` are sparse and ``dense_output=False``.
+
+    Examples
+    --------
+    >>> from scipy.sparse import csr_matrix
+    >>> from sklearn.utils.extmath import safe_sparse_dot
+    >>> X = csr_matrix([[1, 2], [3, 4], [5, 6]])
+    >>> dot_product = safe_sparse_dot(X, X.T)
+    >>> dot_product.toarray()
+    array([[ 5, 11, 17],
+           [11, 25, 39],
+           [17, 39, 61]])
     """
     if a.ndim > 2 or b.ndim > 2:
         if sparse.issparse(a):
@@ -248,6 +267,16 @@ def randomized_range_finder(
     An implementation of a randomized algorithm for principal component
     analysis
     A. Szlam et al. 2014
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.utils.extmath import randomized_range_finder
+    >>> A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    >>> randomized_range_finder(A, size=2, n_iter=2, random_state=42)
+    array([[-0.21...,  0.88...],
+           [-0.52...,  0.24...],
+           [-0.82..., -0.38...]])
     """
     xp, is_array_api_compliant = get_namespace(A)
     random_state = check_random_state(random_state)

From f90ec09fba8a13c41faecaf9a2dfbd66b0ff0ced Mon Sep 17 00:00:00 2001
From: scikit-learn-bot <tjpfdev@gmail.com>
Date: Mon, 22 Jan 2024 11:57:59 +0100
Subject: [PATCH 16/32] :lock: :robot: CI Update lock files for cirrus-arm CI
 build(s) :lock: :robot: (#28214)

Co-authored-by: Lock file bot <noreply@github.com>
---
 .../cirrus/pymin_conda_forge_linux-aarch64_conda.lock     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock b/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock
index fa842def2d8d2..3816738da23b2 100644
--- a/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock
+++ b/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock
@@ -1,6 +1,6 @@
 # Generated by conda-lock.
 # platform: linux-aarch64
-# input_hash: dc7e28d3993d445e2d092c8e0962c7c7b4861c3413f40ab9e1f017be338abb90
+# input_hash: 2e525206249f57e81ca413c5e09cf8b9ee0654ff901e99e5c50b3beef4cc72c0
 @EXPLICIT
 https://conda.anaconda.org/conda-forge/linux-aarch64/ca-certificates-2023.11.17-hcefe29a_0.conda#695a28440b58e3ba920bcac4ac7c73c6
 https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.40-h2d8c526_0.conda#16246d69e945d0b1969a6099e7c5d457
@@ -49,7 +49,7 @@ https://conda.anaconda.org/conda-forge/noarch/certifi-2023.11.17-pyhd8ed1ab_0.co
 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
 https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441
 https://conda.anaconda.org/conda-forge/linux-aarch64/cython-3.0.7-py39h387a81e_0.conda#e5495f92998c2dca45221dbe10c49999
-https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_0.conda#f6c211fee3c98229652b60a9a42ef363
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa
 https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#67de0d8241e1060a479e3c37793e26f9
 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
 https://conda.anaconda.org/conda-forge/linux-aarch64/kiwisolver-1.4.5-py39had2cf8c_1.conda#ddb99610f7b950fdd5ff2aff19136363
@@ -69,7 +69,7 @@ https://conda.anaconda.org/conda-forge/linux-aarch64/tornado-6.3.3-py39h7cc1d5f_
 https://conda.anaconda.org/conda-forge/linux-aarch64/unicodedata2-15.1.0-py39h898b7ef_0.conda#8c072c9329aeea97a46005625267a851
 https://conda.anaconda.org/conda-forge/noarch/wheel-0.42.0-pyhd8ed1ab_0.conda#1cdea58981c5cbc17b51973bcaddcea7
 https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a
-https://conda.anaconda.org/conda-forge/linux-aarch64/fonttools-4.47.0-py39h898b7ef_0.conda#c1104ffe473cef5d35af62e0b6351de3
+https://conda.anaconda.org/conda-forge/linux-aarch64/fonttools-4.47.2-py39h898b7ef_0.conda#71624abe23260e6bd1f5fe7b260dd6e9
 https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.1.1-pyhd8ed1ab_0.conda#3d5fa25cf42f3f32a12b2d874ace8574
 https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc
 https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.9.0-20_linuxaarch64_openblas.conda#b41e55ae2cb9d3518da2cbe3677b3b3b
@@ -84,7 +84,7 @@ https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-1.26.3-py39h91c28bb_0
 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b
 https://conda.anaconda.org/conda-forge/linux-aarch64/blas-devel-3.9.0-20_linuxaarch64_openblas.conda#211c74d7600d8d1dec226daf5e28e2dc
 https://conda.anaconda.org/conda-forge/linux-aarch64/contourpy-1.2.0-py39hd16970a_0.conda#dc11a4a2e020d1d71350baa7cb4980e4
-https://conda.anaconda.org/conda-forge/linux-aarch64/scipy-1.11.3-py39h91c28bb_1.conda#216b118cdb919665ad7d9d2faff412df
+https://conda.anaconda.org/conda-forge/linux-aarch64/scipy-1.12.0-py39h91c28bb_0.conda#9d9f81aadf8cd9f555f06b709a85886d
 https://conda.anaconda.org/conda-forge/linux-aarch64/blas-2.120-openblas.conda#4354e2978d15f5b29b1557792e5c5c63
 https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-base-3.8.2-py39h8e43113_0.conda#0dd681b8d2a93b799954714481761fe0
 https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-3.8.2-py39ha65689a_0.conda#cbdd0df9ca705d88630c3eeabcf154e7

From a94d3821742176dc0f637436e9ec85f7f7543b76 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 22 Jan 2024 12:51:48 +0100
Subject: [PATCH 17/32] DOC fix some entries location of the changelog (#28217)

---
 doc/whats_new/v1.4.rst |  4 ----
 doc/whats_new/v1.5.rst | 26 +++++++++++++++++---------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 98bfcd2d96f54..0cc775fd7c604 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -497,10 +497,6 @@ Changelog
   which allows to retrieve the training sample indices used for each tree estimator.
   :pr:`26736` by :user:`Adam Li <adam2392>`.
 
-- |Efficiency| Improves runtime of `predict` of
-  :class:`ensemble.HistGradientBoostingClassifier` by avoiding to call `predict_proba`.
-  :pr:`27844` by :user:`Christian Lorentzen <lorentzenchr>`.
-
 - |Fix| Fixes :class:`ensemble.IsolationForest` when the input is a sparse matrix and
   `contamination` is set to a float value.
   :pr:`27645` by :user:`Guillaume Lemaitre <glemaitre>`.
diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index 159b8029c9137..851e86668b0d4 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -33,12 +33,19 @@ Changelog
   which returns the fitted transformers by name. :pr:`27990` by `Thomas Fan`_.
 
 :mod:`sklearn.dummy`
-.......................
+....................
 
 - |Enhancement| :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor` now
   have the `n_features_in_` and `feature_names_in_` attributes after `fit`.
   :pr:`27937` by :user:`Marco vd Boom <tvdboom>`.
 
+:mod:`sklearn.ensemble`
+.......................
+
+- |Efficiency| Improves runtime of `predict` of
+  :class:`ensemble.HistGradientBoostingClassifier` by avoiding to call `predict_proba`.
+  :pr:`27844` by :user:`Christian Lorentzen <lorentzenchr>`.
+
 :mod:`sklearn.feature_extraction`
 .................................
 
@@ -51,13 +58,6 @@ Changelog
   the data type of the input matrix if it is `np.float64` or `np.float32`.
   :pr:`28136` by :user:`Guillaume Lemaitre <glemaitre>`.
 
-:mod:`sklearn.impute`
-.....................
-
-- |Enhancement| :class:`impute.SimpleImputer` now supports custom strategies
-  by passing a function in place of a strategy name.
-  :pr:`28053` by :user:`Mark Elliot <mark-thm>`.
-
 :mod:`sklearn.feature_selection`
 ................................
 
@@ -66,11 +66,19 @@ Changelog
   :pr:`28085` by :user:`Neto Menoci <netomenoci>` and
   :user:`Florin Andrei <FlorinAndrei>`.
 
+:mod:`sklearn.impute`
+.....................
+
+- |Enhancement| :class:`impute.SimpleImputer` now supports custom strategies
+  by passing a function in place of a strategy name.
+  :pr:`28053` by :user:`Mark Elliot <mark-thm>`.
+
 :mod:`sklearn.metrics`
 ......................
 
 - |Efficiency| Improve efficiency of functions :func:`~metrics.brier_score_loss`,
-  :func:`~metrics.calibration_curve`, :func:`~metrics.det_curve`, :func:`~metrics.precision_recall_curve`,
+  :func:`~metrics.calibration_curve`, :func:`~metrics.det_curve`,
+  :func:`~metrics.precision_recall_curve`,
   :func:`~metrics.roc_curve` when `pos_label` argument is specified.
   Also improve efficiency of methods `from_estimator`
   and `from_predictions` in :class:`~metrics.RocCurveDisplay`,

From 8734d2f0804e31d67481373f7de858e7afb002ef Mon Sep 17 00:00:00 2001
From: manasimj <155366760+manasimj@users.noreply.github.com>
Date: Mon, 22 Jan 2024 19:21:27 +0530
Subject: [PATCH 18/32] DOC add example to make_ spd_matrix (#28170)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/datasets/_samples_generator.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py
index dd170942eb224..0f2c1b517d1eb 100644
--- a/sklearn/datasets/_samples_generator.py
+++ b/sklearn/datasets/_samples_generator.py
@@ -1603,6 +1603,13 @@ def make_spd_matrix(n_dim, *, random_state=None):
     See Also
     --------
     make_sparse_spd_matrix: Generate a sparse symmetric definite positive matrix.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import make_spd_matrix
+    >>> make_spd_matrix(n_dim=2, random_state=42)
+    array([[2.09..., 0.34...],
+           [0.34..., 0.21...]])
     """
     generator = check_random_state(random_state)
 

From e1ec2cf353c76b4801adede9d053d490eec69d0f Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Mon, 22 Jan 2024 09:48:04 -0500
Subject: [PATCH 19/32] ENH available_if reraises inner error (#28198)

---
 doc/whats_new/v1.5.rst                        |  6 +++++
 sklearn/ensemble/tests/test_voting.py         | 17 ++++++++----
 sklearn/linear_model/tests/test_sgd.py        | 16 +++++++++---
 sklearn/model_selection/tests/test_search.py  | 15 ++++++++---
 sklearn/neighbors/tests/test_lof.py           | 17 +++++++++---
 .../tests/test_self_training.py               |  2 +-
 sklearn/tests/test_multioutput.py             | 26 +++++++++++++++----
 sklearn/tests/test_pipeline.py                | 25 ++++++++++++------
 sklearn/utils/_available_if.py                | 19 +++++++++-----
 9 files changed, 107 insertions(+), 36 deletions(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index 851e86668b0d4..aee5f247c2a98 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -86,6 +86,12 @@ Changelog
   :class:`~calibration.CalibrationDisplay`.
   :pr:`28051` by :user:`Pierre de Fréminville <pidefrem>`.
 
+:mod:`sklearn.utils`
+....................
+
+- |Enhancement| :func:`utils.metaestimators.available_if` now reraises the error from
+  the `check` function as the cause of the `AttributeError`. :pr:`28198` by
+  `Thomas Fan`_.
 
 Code and Documentation Contributors
 -----------------------------------
diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py
index 52734fc031fde..c92ade99cc3a6 100644
--- a/sklearn/ensemble/tests/test_voting.py
+++ b/sklearn/ensemble/tests/test_voting.py
@@ -63,9 +63,13 @@ def test_predictproba_hardvoting():
         estimators=[("lr1", LogisticRegression()), ("lr2", LogisticRegression())],
         voting="hard",
     )
-    msg = "predict_proba is not available when voting='hard'"
-    with pytest.raises(AttributeError, match=msg):
+
+    inner_msg = "predict_proba is not available when voting='hard'"
+    outer_msg = "'VotingClassifier' has no attribute 'predict_proba'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         eclf.predict_proba
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg in str(exec_info.value.__cause__)
 
     assert not hasattr(eclf, "predict_proba")
     eclf.fit(X_scaled, y)
@@ -238,14 +242,17 @@ def test_predict_proba_on_toy_problem():
     assert_almost_equal(t21, eclf_res[2][1], decimal=1)
     assert_almost_equal(t31, eclf_res[3][1], decimal=1)
 
-    with pytest.raises(
-        AttributeError, match="predict_proba is not available when voting='hard'"
-    ):
+    inner_msg = "predict_proba is not available when voting='hard'"
+    outer_msg = "'VotingClassifier' has no attribute 'predict_proba'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         eclf = VotingClassifier(
             estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="hard"
         )
         eclf.fit(X, y).predict_proba(X)
 
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg in str(exec_info.value.__cause__)
+
 
 def test_multilabel():
     """Check if error is raised for multilabel classification."""
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index d1dd1ca960f86..d68eaa6d9d12f 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -724,15 +724,25 @@ def test_sgd_predict_proba_method_access(klass):
             assert hasattr(clf, "predict_proba")
             assert hasattr(clf, "predict_log_proba")
         else:
-            message = "probability estimates are not available for loss={!r}".format(
+            inner_msg = "probability estimates are not available for loss={!r}".format(
                 loss
             )
             assert not hasattr(clf, "predict_proba")
             assert not hasattr(clf, "predict_log_proba")
-            with pytest.raises(AttributeError, match=message):
+            with pytest.raises(
+                AttributeError, match="has no attribute 'predict_proba'"
+            ) as exec_info:
                 clf.predict_proba
-            with pytest.raises(AttributeError, match=message):
+
+            assert isinstance(exec_info.value.__cause__, AttributeError)
+            assert inner_msg in str(exec_info.value.__cause__)
+
+            with pytest.raises(
+                AttributeError, match="has no attribute 'predict_log_proba'"
+            ) as exec_info:
                 clf.predict_log_proba
+            assert isinstance(exec_info.value.__cause__, AttributeError)
+            assert inner_msg in str(exec_info.value.__cause__)
 
 
 @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index d2cfdd7f7b2ed..c0db76c5c6ef6 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -398,13 +398,17 @@ def test_no_refit():
             "transform",
             "inverse_transform",
         ):
-            error_msg = (
+            outer_msg = f"has no attribute '{fn_name}'"
+            inner_msg = (
                 f"`refit=False`. {fn_name} is available only after "
                 "refitting on the best parameters"
             )
-            with pytest.raises(AttributeError, match=error_msg):
+            with pytest.raises(AttributeError, match=outer_msg) as exec_info:
                 getattr(grid_search, fn_name)(X)
 
+            assert isinstance(exec_info.value.__cause__, AttributeError)
+            assert inner_msg in str(exec_info.value.__cause__)
+
     # Test that an invalid refit param raises appropriate error messages
     error_msg = (
         "For multi-metric scoring, the parameter refit must be set to a scorer key"
@@ -1271,10 +1275,13 @@ def test_search_cv_score_samples_error(search_cv):
 
     # Make sure to error out when underlying estimator does not implement
     # the method `score_samples`
-    err_msg = "'DecisionTreeClassifier' object has no attribute 'score_samples'"
+    outer_msg = f"'{search_cv.__class__.__name__}' has no attribute 'score_samples'"
+    inner_msg = "'DecisionTreeClassifier' object has no attribute 'score_samples'"
 
-    with pytest.raises(AttributeError, match=err_msg):
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         search_cv.score_samples(X)
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg == str(exec_info.value.__cause__)
 
 
 @pytest.mark.parametrize(
diff --git a/sklearn/neighbors/tests/test_lof.py b/sklearn/neighbors/tests/test_lof.py
index 7233beddafe9c..3f5c1e161b7e8 100644
--- a/sklearn/neighbors/tests/test_lof.py
+++ b/sklearn/neighbors/tests/test_lof.py
@@ -164,16 +164,25 @@ def test_novelty_errors():
     clf.fit(X)
     # predict, decision_function and score_samples raise ValueError
     for method in ["predict", "decision_function", "score_samples"]:
-        msg = "{} is not available when novelty=False".format(method)
-        with pytest.raises(AttributeError, match=msg):
+        outer_msg = f"'LocalOutlierFactor' has no attribute '{method}'"
+        inner_msg = "{} is not available when novelty=False".format(method)
+        with pytest.raises(AttributeError, match=outer_msg) as exec_info:
             getattr(clf, method)
 
+        assert isinstance(exec_info.value.__cause__, AttributeError)
+        assert inner_msg in str(exec_info.value.__cause__)
+
     # check errors for novelty=True
     clf = neighbors.LocalOutlierFactor(novelty=True)
-    msg = "fit_predict is not available when novelty=True"
-    with pytest.raises(AttributeError, match=msg):
+
+    outer_msg = "'LocalOutlierFactor' has no attribute 'fit_predict'"
+    inner_msg = "fit_predict is not available when novelty=True"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         getattr(clf, "fit_predict")
 
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg in str(exec_info.value.__cause__)
+
 
 def test_novelty_training_scores(global_dtype):
     # check that the scores of the training samples are still accessible
diff --git a/sklearn/semi_supervised/tests/test_self_training.py b/sklearn/semi_supervised/tests/test_self_training.py
index 71f0848f5767c..4b2865abf7a8c 100644
--- a/sklearn/semi_supervised/tests/test_self_training.py
+++ b/sklearn/semi_supervised/tests/test_self_training.py
@@ -320,5 +320,5 @@ def test_missing_predict_proba():
     base_estimator = SVC(probability=False, gamma="scale")
     self_training = SelfTrainingClassifier(base_estimator)
 
-    with pytest.raises(AttributeError, match="predict_proba is not available"):
+    with pytest.raises(AttributeError, match="has no attribute 'predict_proba'"):
         self_training.fit(X_train, y_train_missing_labels)
diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py
index 9d5accac21040..c42938229d5a6 100644
--- a/sklearn/tests/test_multioutput.py
+++ b/sklearn/tests/test_multioutput.py
@@ -253,10 +253,19 @@ def custom_scorer(estimator, X, y):
     sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5)
     multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
     multi_target_linear.fit(X, y)
-    err_msg = "probability estimates are not available for loss='hinge'"
-    with pytest.raises(AttributeError, match=err_msg):
+
+    inner2_msg = "probability estimates are not available for loss='hinge'"
+    inner1_msg = "'SGDClassifier' has no attribute 'predict_proba'"
+    outer_msg = "'MultiOutputClassifier' has no attribute 'predict_proba'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         multi_target_linear.predict_proba(X)
 
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner1_msg in str(exec_info.value.__cause__)
+
+    assert isinstance(exec_info.value.__cause__.__cause__, AttributeError)
+    assert inner2_msg in str(exec_info.value.__cause__.__cause__)
+
 
 def test_multi_output_classification_partial_fit():
     # test if multi_target initializes correctly with base estimator and fit
@@ -471,13 +480,20 @@ def test_multi_output_delegate_predict_proba():
     # A base estimator without `predict_proba` should raise an AttributeError
     moc = MultiOutputClassifier(LinearSVC(dual="auto"))
     assert not hasattr(moc, "predict_proba")
-    msg = "'LinearSVC' object has no attribute 'predict_proba'"
-    with pytest.raises(AttributeError, match=msg):
+
+    outer_msg = "'MultiOutputClassifier' has no attribute 'predict_proba'"
+    inner_msg = "'LinearSVC' object has no attribute 'predict_proba'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         moc.predict_proba(X)
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg == str(exec_info.value.__cause__)
+
     moc.fit(X, y)
     assert not hasattr(moc, "predict_proba")
-    with pytest.raises(AttributeError, match=msg):
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         moc.predict_proba(X)
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg == str(exec_info.value.__cause__)
 
 
 def generate_multilabel_dataset_with_correlations():
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index c57209d3c00d2..330660c4581fe 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -388,12 +388,15 @@ def test_score_samples_on_pipeline_without_score_samples():
     # step of the pipeline does not have score_samples defined.
     pipe = make_pipeline(LogisticRegression())
     pipe.fit(X, y)
-    with pytest.raises(
-        AttributeError,
-        match="'LogisticRegression' object has no attribute 'score_samples'",
-    ):
+
+    inner_msg = "'LogisticRegression' object has no attribute 'score_samples'"
+    outer_msg = "'Pipeline' has no attribute 'score_samples'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         pipe.score_samples(X)
 
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg in str(exec_info.value.__cause__)
+
 
 def test_pipeline_methods_preprocessing_svm():
     # Test the various methods of the pipeline (preprocessing + svm).
@@ -454,9 +457,12 @@ def test_fit_predict_on_pipeline_without_fit_predict():
     pca = PCA(svd_solver="full")
     pipe = Pipeline([("scaler", scaler), ("pca", pca)])
 
-    msg = "'PCA' object has no attribute 'fit_predict'"
-    with pytest.raises(AttributeError, match=msg):
+    outer_msg = "'Pipeline' has no attribute 'fit_predict'"
+    inner_msg = "'PCA' object has no attribute 'fit_predict'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         getattr(pipe, "fit_predict")
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg in str(exec_info.value.__cause__)
 
 
 def test_fit_predict_with_intermediate_fit_params():
@@ -784,9 +790,12 @@ def make():
     assert_array_equal([[exp]], pipeline.fit_transform(X, y))
     assert_array_equal(X, pipeline.inverse_transform([[exp]]))
 
-    msg = "'str' object has no attribute 'predict'"
-    with pytest.raises(AttributeError, match=msg):
+    inner_msg = "'str' object has no attribute 'predict'"
+    outer_msg = "This 'Pipeline' has no attribute 'predict'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         getattr(pipeline, "predict")
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg in str(exec_info.value.__cause__)
 
     # Check 'passthrough' step at construction time
     exp = 2 * 5
diff --git a/sklearn/utils/_available_if.py b/sklearn/utils/_available_if.py
index 3f6d50aa123c5..2d9598df9de7e 100644
--- a/sklearn/utils/_available_if.py
+++ b/sklearn/utils/_available_if.py
@@ -21,15 +21,23 @@ def __init__(self, fn, check, attribute_name):
         # update the docstring of the descriptor
         update_wrapper(self, fn)
 
-    def __get__(self, obj, owner=None):
-        attr_err = AttributeError(
+    def _check(self, obj, owner):
+        attr_err_msg = (
             f"This {repr(owner.__name__)} has no attribute {repr(self.attribute_name)}"
         )
+        try:
+            check_result = self.check(obj)
+        except Exception as e:
+            raise AttributeError(attr_err_msg) from e
+
+        if not check_result:
+            raise AttributeError(attr_err_msg)
+
+    def __get__(self, obj, owner=None):
         if obj is not None:
             # delegate only on instances, not the classes.
             # this is to allow access to the docstrings.
-            if not self.check(obj):
-                raise attr_err
+            self._check(obj, owner=owner)
             out = MethodType(self.fn, obj)
 
         else:
@@ -37,8 +45,7 @@ def __get__(self, obj, owner=None):
             # for instance when monkeypatching.
             @wraps(self.fn)
             def out(*args, **kwargs):
-                if not self.check(args[0]):
-                    raise attr_err
+                self._check(args[0], owner=owner)
                 return self.fn(*args, **kwargs)
 
         return out

From 440ef93b09bf4b860e0711d9abb55a910fbdbd84 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 22 Jan 2024 15:58:03 +0100
Subject: [PATCH 20/32] MAINT: set `SOURCE_DATE_EPOCH` in `build_wheels.sh`
 (#28152)

---
 build_tools/wheels/build_wheels.sh | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/build_tools/wheels/build_wheels.sh b/build_tools/wheels/build_wheels.sh
index d4283a7058e95..d2df4e3936829 100755
--- a/build_tools/wheels/build_wheels.sh
+++ b/build_tools/wheels/build_wheels.sh
@@ -3,6 +3,18 @@
 set -e
 set -x
 
+# Set environment variables to make our wheel build easier to reproduce byte
+# for byte from source. See https://reproducible-builds.org/. The long term
+# motivation would be to be able to detect supply chain attacks.
+#
+# In particular we set SOURCE_DATE_EPOCH to the commit date of the last commit.
+#
+# XXX: setting those environment variables is not enough. See the following
+# issue for more details on what remains to do:
+# https://github.com/scikit-learn/scikit-learn/issues/28151
+export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct)
+export PYTHONHASHSEED=0
+
 # OpenMP is not present on macOS by default
 if [[ $(uname) == "Darwin" ]]; then
     # Make sure to use a libomp version binary compatible with the oldest

From 6fbaa20e91c3afee0cabddc6c9a31c27e5f1f3ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20Labb=C3=A9?=
 <157261497+ManuelLabbe@users.noreply.github.com>
Date: Mon, 22 Jan 2024 12:18:00 -0300
Subject: [PATCH 21/32] DOC add example to make_circles (#28206)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/datasets/_samples_generator.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py
index 0f2c1b517d1eb..c33bd72187582 100644
--- a/sklearn/datasets/_samples_generator.py
+++ b/sklearn/datasets/_samples_generator.py
@@ -767,6 +767,17 @@ def make_circles(
 
     y : ndarray of shape (n_samples,)
         The integer labels (0 or 1) for class membership of each sample.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import make_circles
+    >>> X, y = make_circles(random_state=42)
+    >>> X.shape
+    (100, 2)
+    >>> y.shape
+    (100,)
+    >>> list(y[:5])
+    [1, 1, 1, 0, 0]
     """
     if isinstance(n_samples, numbers.Integral):
         n_samples_out = n_samples // 2

From f931884fb74645a91a43e58b0bc7feca01103aa2 Mon Sep 17 00:00:00 2001
From: Michael Higgins <55243596+Higgs32584@users.noreply.github.com>
Date: Mon, 22 Jan 2024 10:57:43 -0500
Subject: [PATCH 22/32] DOC added example for sklearn.manifold.trustworthiness
 (#28203)

Co-authored-by: Higgs32584 <mjhiggins2@loyola.edu>
---
 sklearn/manifold/_t_sne.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sklearn/manifold/_t_sne.py b/sklearn/manifold/_t_sne.py
index e280671ee2752..2233bea3a7681 100644
--- a/sklearn/manifold/_t_sne.py
+++ b/sklearn/manifold/_t_sne.py
@@ -512,6 +512,16 @@ def trustworthiness(X, X_embedded, *, n_neighbors=5, metric="euclidean"):
     .. [2] Laurens van der Maaten. Learning a Parametric Embedding by Preserving
            Local Structure. Proceedings of the Twelfth International Conference on
            Artificial Intelligence and Statistics, PMLR 5:384-391, 2009.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import make_blobs
+    >>> from sklearn.decomposition import PCA
+    >>> from sklearn.manifold import trustworthiness
+    >>> X, _ = make_blobs(n_samples=100, n_features=10, centers=3, random_state=42)
+    >>> X_embedded = PCA(n_components=2).fit_transform(X)
+    >>> print(f"{trustworthiness(X, X_embedded, n_neighbors=5):.2f}")
+    0.92
     """
     n_samples = _num_samples(X)
     if n_neighbors >= n_samples / 2:

From 11281cfd49597528dbb1fc6b67cde600e501619c Mon Sep 17 00:00:00 2001
From: Michael Higgins <55243596+Higgs32584@users.noreply.github.com>
Date: Mon, 22 Jan 2024 10:59:46 -0500
Subject: [PATCH 23/32] DOC added example for sklearn.utils.safe_sqr (#28201)

Co-authored-by: Higgs32584 <mjhiggins2@loyola.edu>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/__init__.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 2c7eb1390c128..adce62ad4487c 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -769,6 +769,12 @@ def safe_sqr(X, *, copy=True):
     -------
     X ** 2 : element wise square
          Return the element-wise square of the input.
+
+    Examples
+    --------
+    >>> from sklearn.utils import safe_sqr
+    >>> safe_sqr([1, 2, 3])
+    array([1, 4, 9])
     """
     X = check_array(X, accept_sparse=["csr", "csc", "coo"], ensure_2d=False)
     if issparse(X):

From 3e6ec6b6ad623e6d74848ac54653d7c938cdbc95 Mon Sep 17 00:00:00 2001
From: Rodrigo Romero <69991220+rromer07@users.noreply.github.com>
Date: Mon, 22 Jan 2024 11:00:10 -0500
Subject: [PATCH 24/32] DOC add docstring example to
 `sklearn.metrics.davies_bouldin_score` (#28199)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/cluster/_unsupervised.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sklearn/metrics/cluster/_unsupervised.py b/sklearn/metrics/cluster/_unsupervised.py
index 21d99d950b844..6c75fdc201a68 100644
--- a/sklearn/metrics/cluster/_unsupervised.py
+++ b/sklearn/metrics/cluster/_unsupervised.py
@@ -410,6 +410,14 @@ def davies_bouldin_score(X, labels):
        <https://ieeexplore.ieee.org/document/4766909>`__.
        IEEE Transactions on Pattern Analysis and Machine Intelligence.
        PAMI-1 (2): 224-227
+
+    Examples
+    --------
+    >>> from sklearn.metrics import davies_bouldin_score
+    >>> X = [[0, 1], [1, 1], [3, 4]]
+    >>> labels = [0, 0, 1]
+    >>> davies_bouldin_score(X, labels)
+    0.12...
     """
     X, labels = check_X_y(X, labels)
     le = LabelEncoder()

From b38b7f3bfc3384054e9b9d7f30466679dde54655 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Mon, 22 Jan 2024 11:07:19 -0500
Subject: [PATCH 25/32] MAINT Convert `int` to `intp_t` ctype def in `tree/`
 related code (#27546)

Signed-off-by: Adam Li <adam2392@gmail.com>
Co-authored-by: Julien Jerphanion <git@jjerphan.xyz>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/ensemble/_forest.py                   |  5 +--
 sklearn/ensemble/_gb.py                       |  6 ++--
 .../_hist_gradient_boosting/_predictor.pyx    |  4 +--
 .../gradient_boosting.py                      |  5 +--
 sklearn/inspection/_partial_dependence.py     |  2 +-
 .../tests/test_partial_dependence.py          |  4 +--
 sklearn/tree/_classes.py                      |  7 ++--
 sklearn/tree/_splitter.pyx                    | 18 +++++++---
 sklearn/tree/_tree.pyx                        | 36 +++++++++----------
 sklearn/tree/_utils.pyx                       |  6 ++--
 10 files changed, 53 insertions(+), 40 deletions(-)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index af732d90dd877..1610d1068cfb5 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -1135,10 +1135,10 @@ def _compute_partial_dependence_recursion(self, grid, target_features):
 
         Parameters
         ----------
-        grid : ndarray of shape (n_samples, n_target_features)
+        grid : ndarray of shape (n_samples, n_target_features), dtype=DTYPE
             The grid points on which the partial dependence should be
             evaluated.
-        target_features : ndarray of shape (n_target_features)
+        target_features : ndarray of shape (n_target_features), dtype=np.intp
             The set of target features for which the partial dependence
             should be evaluated.
 
@@ -1148,6 +1148,7 @@ def _compute_partial_dependence_recursion(self, grid, target_features):
             The value of the partial dependence function on each grid point.
         """
         grid = np.asarray(grid, dtype=DTYPE, order="C")
+        target_features = np.asarray(target_features, dtype=np.intp, order="C")
         averaged_predictions = np.zeros(
             shape=grid.shape[0], dtype=np.float64, order="C"
         )
diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py
index 7c5dd6fbdac3c..44c19894574ff 100644
--- a/sklearn/ensemble/_gb.py
+++ b/sklearn/ensemble/_gb.py
@@ -1042,10 +1042,10 @@ def _compute_partial_dependence_recursion(self, grid, target_features):
 
         Parameters
         ----------
-        grid : ndarray of shape (n_samples, n_target_features)
+        grid : ndarray of shape (n_samples, n_target_features), dtype=np.float32
             The grid points on which the partial dependence should be
             evaluated.
-        target_features : ndarray of shape (n_target_features,)
+        target_features : ndarray of shape (n_target_features,), dtype=np.intp
             The set of target features for which the partial dependence
             should be evaluated.
 
@@ -1068,6 +1068,8 @@ def _compute_partial_dependence_recursion(self, grid, target_features):
         averaged_predictions = np.zeros(
             (n_trees_per_stage, grid.shape[0]), dtype=np.float64, order="C"
         )
+        target_features = np.asarray(target_features, dtype=np.intp, order="C")
+
         for stage in range(n_estimators):
             for k in range(n_trees_per_stage):
                 tree = self.estimators_[stage, k].tree_
diff --git a/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx b/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx
index a84c7dbf9f280..01a6ea855751b 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx
@@ -148,7 +148,7 @@ cdef inline Y_DTYPE_C _predict_one_from_binned_data(
 def _compute_partial_dependence(
     node_struct [:] nodes,
     const X_DTYPE_C [:, ::1] X,
-    int [:] target_features,
+    const intp_t [:] target_features,
     Y_DTYPE_C [:] out
 ):
     """Partial dependence of the response on the ``target_features`` set.
@@ -173,7 +173,7 @@ def _compute_partial_dependence(
     X : view on 2d ndarray, shape (n_samples, n_target_features)
         The grid points on which the partial dependence should be
         evaluated.
-    target_features : view on 1d ndarray, shape (n_target_features)
+    target_features : view on 1d ndarray of intp_t, shape (n_target_features)
         The set of target features for which the partial dependence
         should be evaluated.
     out : view on 1d ndarray, shape (n_samples)
diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index 698fd0629d02e..69ae0090b1fb8 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -1357,10 +1357,10 @@ def _compute_partial_dependence_recursion(self, grid, target_features):
 
         Parameters
         ----------
-        grid : ndarray, shape (n_samples, n_target_features)
+        grid : ndarray, shape (n_samples, n_target_features), dtype=np.float32
             The grid points on which the partial dependence should be
             evaluated.
-        target_features : ndarray, shape (n_target_features)
+        target_features : ndarray, shape (n_target_features), dtype=np.intp
             The set of target features for which the partial dependence
             should be evaluated.
 
@@ -1383,6 +1383,7 @@ def _compute_partial_dependence_recursion(self, grid, target_features):
         averaged_predictions = np.zeros(
             (self.n_trees_per_iteration_, grid.shape[0]), dtype=Y_DTYPE
         )
+        target_features = np.asarray(target_features, dtype=np.intp, order="C")
 
         for predictors_of_ith_iteration in self._predictors:
             for k, predictor in enumerate(predictors_of_ith_iteration):
diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py
index d54adc90444fc..13425fa7e1c4b 100644
--- a/sklearn/inspection/_partial_dependence.py
+++ b/sklearn/inspection/_partial_dependence.py
@@ -660,7 +660,7 @@ def partial_dependence(
             raise ValueError("all features must be in [0, {}]".format(X.shape[1] - 1))
 
     features_indices = np.asarray(
-        _get_column_indices(X, features), dtype=np.int32, order="C"
+        _get_column_indices(X, features), dtype=np.intp, order="C"
     ).ravel()
 
     feature_names = _check_feature_names(X, feature_names)
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 0336dc4b827fe..b052609a85a2b 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -272,7 +272,7 @@ def test_partial_dependence_helpers(est, method, target_feature):
     est.fit(X, y)
 
     # target feature will be set to .5 and then to 123
-    features = np.array([target_feature], dtype=np.int32)
+    features = np.array([target_feature], dtype=np.intp)
     grid = np.array([[0.5], [123]])
 
     if method == "brute":
@@ -356,7 +356,7 @@ def test_recursion_decision_tree_vs_forest_and_gbdt(seed):
 
     grid = rng.randn(50).reshape(-1, 1)
     for f in range(n_features):
-        features = np.array([f], dtype=np.int32)
+        features = np.array([f], dtype=np.intp)
 
         pdp_forest = _partial_dependence_recursion(forest, grid, features)
         pdp_gbdt = _partial_dependence_recursion(gbdt, grid, features)
diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py
index 00d61f184731d..9f99d831a0990 100644
--- a/sklearn/tree/_classes.py
+++ b/sklearn/tree/_classes.py
@@ -1387,22 +1387,23 @@ def _compute_partial_dependence_recursion(self, grid, target_features):
 
         Parameters
         ----------
-        grid : ndarray of shape (n_samples, n_target_features)
+        grid : ndarray of shape (n_samples, n_target_features), dtype=np.float32
             The grid points on which the partial dependence should be
             evaluated.
-        target_features : ndarray of shape (n_target_features)
+        target_features : ndarray of shape (n_target_features), dtype=np.intp
             The set of target features for which the partial dependence
             should be evaluated.
 
         Returns
         -------
-        averaged_predictions : ndarray of shape (n_samples,)
+        averaged_predictions : ndarray of shape (n_samples,), dtype=np.float64
             The value of the partial dependence function on each grid point.
         """
         grid = np.asarray(grid, dtype=DTYPE, order="C")
         averaged_predictions = np.zeros(
             shape=grid.shape[0], dtype=np.float64, order="C"
         )
+        target_features = np.asarray(target_features, dtype=np.intp, order="C")
 
         self.tree_.compute_partial_dependence(
             grid, target_features, averaged_predictions
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 81cd61ed22631..52253b2d12eaa 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -193,8 +193,12 @@ cdef class Splitter:
             self.criterion.init_sum_missing()
         return 0
 
-    cdef int node_reset(self, intp_t start, intp_t end,
-                        float64_t* weighted_n_node_samples) except -1 nogil:
+    cdef int node_reset(
+        self,
+        intp_t start,
+        intp_t end,
+        float64_t* weighted_n_node_samples
+    ) except -1 nogil:
         """Reset splitter on node samples[start:end].
 
         Returns -1 in case of failure to allocate memory (and raise MemoryError)
@@ -559,7 +563,7 @@ cdef inline int node_split_best(
 cdef inline void sort(float32_t* feature_values, intp_t* samples, intp_t n) noexcept nogil:
     if n == 0:
         return
-    cdef int maxd = 2 * <int>log(n)
+    cdef intp_t maxd = 2 * <intp_t>log(n)
     introsort(feature_values, samples, n, maxd)
 
 
@@ -593,7 +597,7 @@ cdef inline float32_t median3(float32_t* feature_values, intp_t n) noexcept nogi
 # Introsort with median of 3 pivot selection and 3-way partition function
 # (robust to repeated elements, e.g. lots of zero features).
 cdef void introsort(float32_t* feature_values, intp_t *samples,
-                    intp_t n, int maxd) noexcept nogil:
+                    intp_t n, intp_t maxd) noexcept nogil:
     cdef float32_t pivot
     cdef intp_t i, l, r
 
@@ -1340,7 +1344,11 @@ cdef class SparsePartitioner:
 
 
 cdef int compare_SIZE_t(const void* a, const void* b) noexcept nogil:
-    """Comparison function for sort."""
+    """Comparison function for sort.
+
+    This must return an `int` as it is used by stdlib's qsort, which expects
+    an `int` return value.
+    """
     return <int>((<intp_t*>a)[0] - (<intp_t*>b)[0])
 
 
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index ef3a99a69ac1f..ea873764069f6 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -63,10 +63,10 @@ cdef float64_t INFINITY = np.inf
 cdef float64_t EPSILON = np.finfo('double').eps
 
 # Some handy constants (BestFirstTreeBuilder)
-cdef int IS_FIRST = 1
-cdef int IS_NOT_FIRST = 0
-cdef int IS_LEFT = 1
-cdef int IS_NOT_LEFT = 0
+cdef bint IS_FIRST = 1
+cdef bint IS_NOT_FIRST = 0
+cdef bint IS_LEFT = 1
+cdef bint IS_NOT_LEFT = 0
 
 TREE_LEAF = -1
 TREE_UNDEFINED = -2
@@ -177,10 +177,10 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         X, y, sample_weight = self._check_input(X, y, sample_weight)
 
         # Initial capacity
-        cdef int init_capacity
+        cdef intp_t init_capacity
 
         if tree.max_depth <= 10:
-            init_capacity = <int> (2 ** (tree.max_depth + 1)) - 1
+            init_capacity = <intp_t> (2 ** (tree.max_depth + 1)) - 1
         else:
             init_capacity = 2047
 
@@ -696,32 +696,32 @@ cdef class Tree:
 
     Attributes
     ----------
-    node_count : int
+    node_count : intp_t
         The number of nodes (internal nodes + leaves) in the tree.
 
-    capacity : int
+    capacity : intp_t
         The current capacity (i.e., size) of the arrays, which is at least as
         great as `node_count`.
 
-    max_depth : int
+    max_depth : intp_t
         The depth of the tree, i.e. the maximum depth of its leaves.
 
-    children_left : array of int, shape [node_count]
+    children_left : array of intp_t, shape [node_count]
         children_left[i] holds the node id of the left child of node i.
         For leaves, children_left[i] == TREE_LEAF. Otherwise,
         children_left[i] > i. This child handles the case where
         X[:, feature[i]] <= threshold[i].
 
-    children_right : array of int, shape [node_count]
+    children_right : array of intp_t, shape [node_count]
         children_right[i] holds the node id of the right child of node i.
         For leaves, children_right[i] == TREE_LEAF. Otherwise,
         children_right[i] > i. This child handles the case where
         X[:, feature[i]] > threshold[i].
 
-    n_leaves : int
+    n_leaves : intp_t
         Number of leaves in the tree.
 
-    feature : array of int, shape [node_count]
+    feature : array of intp_t, shape [node_count]
         feature[i] holds the feature to split on, for the internal node i.
 
     threshold : array of float64_t, shape [node_count]
@@ -734,7 +734,7 @@ cdef class Tree:
         impurity[i] holds the impurity (i.e., the value of the splitting
         criterion) at node i.
 
-    n_node_samples : array of int, shape [node_count]
+    n_node_samples : array of intp_t, shape [node_count]
         n_node_samples[i] holds the number of training samples reaching node i.
 
     weighted_n_node_samples : array of float64_t, shape [node_count]
@@ -797,7 +797,7 @@ cdef class Tree:
 
     # TODO: Convert n_classes to cython.integral memory view once
     #  https://github.com/cython/cython/issues/5243 is fixed
-    def __cinit__(self, int n_features, cnp.ndarray n_classes, int n_outputs):
+    def __cinit__(self, intp_t n_features, cnp.ndarray n_classes, intp_t n_outputs):
         """Constructor."""
         cdef intp_t dummy = 0
         size_t_dtype = np.array(dummy).dtype
@@ -1343,7 +1343,7 @@ cdef class Tree:
         return arr
 
     def compute_partial_dependence(self, float32_t[:, ::1] X,
-                                   int[::1] target_features,
+                                   const intp_t[::1] target_features,
                                    float64_t[::1] out):
         """Partial dependence of the response on the ``target_feature`` set.
 
@@ -1379,7 +1379,7 @@ cdef class Tree:
                                                   dtype=np.intp)
             intp_t sample_idx
             intp_t feature_idx
-            int stack_size
+            intp_t stack_size
             float64_t left_sample_frac
             float64_t current_weight
             float64_t total_weight  # used for sanity check only
@@ -1627,7 +1627,7 @@ cdef class _PathFinder(_CCPPruneController):
     cdef float64_t[:] impurities
     cdef uint32_t count
 
-    def __cinit__(self,  int node_count):
+    def __cinit__(self,  intp_t node_count):
         self.ccp_alphas = np.zeros(shape=(node_count), dtype=np.float64)
         self.impurities = np.zeros(shape=(node_count), dtype=np.float64)
         self.count = 0
diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx
index b6115d4f21d18..21b21df9c3007 100644
--- a/sklearn/tree/_utils.pyx
+++ b/sklearn/tree/_utils.pyx
@@ -450,9 +450,9 @@ cdef class WeightedMedianCalculator:
 def _any_isnan_axis0(const float32_t[:, :] X):
     """Same as np.any(np.isnan(X), axis=0)"""
     cdef:
-        int i, j
-        int n_samples = X.shape[0]
-        int n_features = X.shape[1]
+        intp_t i, j
+        intp_t n_samples = X.shape[0]
+        intp_t n_features = X.shape[1]
         unsigned char[::1] isnan_out = np.zeros(X.shape[1], dtype=np.bool_)
 
     with nogil:

From 0315fff921e17f62521f9151e8eb33368a327975 Mon Sep 17 00:00:00 2001
From: Michael Higgins <55243596+Higgs32584@users.noreply.github.com>
Date: Mon, 22 Jan 2024 13:02:58 -0500
Subject: [PATCH 26/32] DOC added example for sklearn.get_config (#28202)

Co-authored-by: Higgs32584 <mjhiggins2@loyola.edu>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/_config.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sklearn/_config.py b/sklearn/_config.py
index 8c0b83d1bfaa8..29ec3ce22e659 100644
--- a/sklearn/_config.py
+++ b/sklearn/_config.py
@@ -41,6 +41,13 @@ def get_config():
     --------
     config_context : Context manager for global scikit-learn configuration.
     set_config : Set global scikit-learn configuration.
+
+    Examples
+    --------
+    >>> import sklearn
+    >>> config = sklearn.get_config()
+    >>> config.keys()
+    dict_keys([...])
     """
     # Return a copy of the threadlocal configuration so that users will
     # not be able to modify the configuration with the returned dict.

From 166d30eafe8b14773055731e871b269008478eaa Mon Sep 17 00:00:00 2001
From: Michael Higgins <55243596+Higgs32584@users.noreply.github.com>
Date: Mon, 22 Jan 2024 14:38:56 -0500
Subject: [PATCH 27/32] DOC added examples for sklearn.utils.as_float_array and
 sklearn.utils.assert_all_finite (#28159)

Co-authored-by: Higgs32584 <mjhiggins2@loyola.edu>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/validation.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 43f553eb2d2d5..8371d2c4725c1 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -200,6 +200,18 @@ def assert_all_finite(
         if `input_name` is "X" and the data has NaN values and
         allow_nan is False, the error message will link to the imputer
         documentation.
+
+    Examples
+    --------
+    >>> from sklearn.utils import assert_all_finite
+    >>> import numpy as np
+    >>> array = np.array([1, np.inf, np.nan, 4])
+    >>> try:
+    ...     assert_all_finite(array)
+    ...     print("Test passed: Array contains only finite values.")
+    ... except ValueError:
+    ...     print("Test failed: Array contains non-finite values.")
+    Test failed: Array contains non-finite values.
     """
     _assert_all_finite(
         X.data if sp.issparse(X) else X,
@@ -244,6 +256,14 @@ def as_float_array(X, *, copy=True, force_all_finite=True):
     -------
     XT : {ndarray, sparse matrix}
         An array of type float.
+
+    Examples
+    --------
+    >>> from sklearn.utils import as_float_array
+    >>> import numpy as np
+    >>> array = np.array([0, 0, 1, 2, 2], dtype=np.int64)
+    >>> as_float_array(array)
+    array([0., 0., 1., 2., 2.])
     """
     if isinstance(X, np.matrix) or (
         not isinstance(X, np.ndarray) and not sp.issparse(X)

From a5622e1fe11bab5fa890aebd1baa6d933d5622cc Mon Sep 17 00:00:00 2001
From: Michael Higgins <55243596+Higgs32584@users.noreply.github.com>
Date: Mon, 22 Jan 2024 17:18:49 -0500
Subject: [PATCH 28/32] DOC sklearn.manifold.locally_linear_embedding (#28220)

Co-authored-by: Higgs32584 <mjhiggins2@loyola.edu>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/manifold/_locally_linear.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py
index 0547d2bee1402..41d0c233b8f76 100644
--- a/sklearn/manifold/_locally_linear.py
+++ b/sklearn/manifold/_locally_linear.py
@@ -307,6 +307,17 @@ def locally_linear_embedding(
     .. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear
         dimensionality reduction via tangent space alignment.
         Journal of Shanghai Univ.  8:406 (2004)
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.manifold import locally_linear_embedding
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> embedding, _ = locally_linear_embedding(X[:100],n_neighbors=5, n_components=2)
+    >>> embedding.shape
+    (100, 2)
     """
     if eigen_solver not in ("auto", "arpack", "dense"):
         raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver)

From d1d0a69d58a6f035cab6cdf19e0e46134971860c Mon Sep 17 00:00:00 2001
From: Michael Higgins <55243596+Higgs32584@users.noreply.github.com>
Date: Mon, 22 Jan 2024 17:19:33 -0500
Subject: [PATCH 29/32] DOC added examples for
 sklearn.datasets.dump_svmlight_file,
 sklearn.datasets.fetch_california_housing, and sklearn.utils.safe_indexing
 (#28158)

Co-authored-by: Higgs32584 <mjhiggins2@loyola.edu>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/datasets/_california_housing.py |  9 +++++++++
 sklearn/datasets/_svmlight_format_io.py |  7 +++++++
 sklearn/utils/__init__.py               | 10 ++++++++++
 3 files changed, 26 insertions(+)

diff --git a/sklearn/datasets/_california_housing.py b/sklearn/datasets/_california_housing.py
index 3153f0dd03f72..a8a889fa8ce1d 100644
--- a/sklearn/datasets/_california_housing.py
+++ b/sklearn/datasets/_california_housing.py
@@ -131,6 +131,15 @@ def fetch_california_housing(
     -----
 
     This dataset consists of 20,640 samples and 9 features.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import fetch_california_housing
+    >>> housing = fetch_california_housing()
+    >>> print(housing.data.shape, housing.target.shape)
+    (20640, 8) (20640,)
+    >>> print(housing.feature_names[0:6])
+    ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup']
     """
     data_home = get_data_home(data_home=data_home)
     if not exists(data_home):
diff --git a/sklearn/datasets/_svmlight_format_io.py b/sklearn/datasets/_svmlight_format_io.py
index a4fb553b5d7aa..641c216c18317 100644
--- a/sklearn/datasets/_svmlight_format_io.py
+++ b/sklearn/datasets/_svmlight_format_io.py
@@ -509,6 +509,13 @@ def dump_svmlight_file(
 
         .. versionadded:: 0.17
            parameter `multilabel` to support multilabel datasets.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import dump_svmlight_file, make_classification
+    >>> X, y = make_classification(random_state=0)
+    >>> output_file = "my_dataset.svmlight"
+    >>> dump_svmlight_file(X, y, output_file)  # doctest: +SKIP
     """
     if comment is not None:
         # Convert comment string to list of lines in UTF-8.
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index adce62ad4487c..72067415acd6b 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -345,6 +345,16 @@ def _safe_indexing(X, indices, *, axis=0):
     -----
     CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are
     not supported.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.utils import _safe_indexing
+    >>> data = np.array([[1, 2], [3, 4], [5, 6]])
+    >>> _safe_indexing(data, 0, axis=0)  # select the first row
+    array([1, 2])
+    >>> _safe_indexing(data, 0, axis=1)  # select the first column
+    array([1, 3, 5])
     """
     if indices is None:
         return X

From f8c2e11b431cee11fea89b21991b9c82c7c0e950 Mon Sep 17 00:00:00 2001
From: Richard Barnes <rbarnes@umn.edu>
Date: Mon, 22 Jan 2024 17:49:38 -0500
Subject: [PATCH 30/32] MAINT remove spurious semi-columns for `switch`
 statement in cpp file (#28221)

---
 sklearn/utils/src/MurmurHash3.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/src/MurmurHash3.cpp b/sklearn/utils/src/MurmurHash3.cpp
index 9572094b7942b..b1a56ff5760e0 100644
--- a/sklearn/utils/src/MurmurHash3.cpp
+++ b/sklearn/utils/src/MurmurHash3.cpp
@@ -144,7 +144,7 @@ void MurmurHash3_x86_32 ( const void * key, int len,
   case 2: k1 ^= tail[1] << 8;
   case 1: k1 ^= tail[0];
           k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-  };
+  }
 
   //----------
   // finalization
@@ -237,7 +237,7 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
   case  2: k1 ^= tail[ 1] << 8;
   case  1: k1 ^= tail[ 0] << 0;
            k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-  };
+  }
 
   //----------
   // finalization
@@ -322,7 +322,7 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
   case  2: k1 ^= uint64_t(tail[ 1]) << 8;
   case  1: k1 ^= uint64_t(tail[ 0]) << 0;
            k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
-  };
+  }
 
   //----------
   // finalization

From 07007b34ef29f709e872ba408af5e66229ed3d07 Mon Sep 17 00:00:00 2001
From: Egehan Gunduz <egehangunduz@gmail.com>
Date: Tue, 23 Jan 2024 09:40:54 +0000
Subject: [PATCH 31/32] DOC add docstring example to missing `sklearn.utils`
 functions (#28209)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/arrayfuncs.pyx | 19 +++++++++++++++++
 sklearn/utils/validation.py  | 40 ++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/sklearn/utils/arrayfuncs.pyx b/sklearn/utils/arrayfuncs.pyx
index b005bab896925..94b221460884c 100644
--- a/sklearn/utils/arrayfuncs.pyx
+++ b/sklearn/utils/arrayfuncs.pyx
@@ -23,6 +23,25 @@ def min_pos(const floating[:] X):
 
     Returns the maximum representable value of the input dtype if none of the
     values are positive.
+
+    Parameters
+    ----------
+    X : ndarray of shape (n,)
+        Input array.
+
+    Returns
+    -------
+    min_val : float
+        The smallest positive value in the array, or the maximum representable value
+         of the input dtype if no positive values are found.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.utils.arrayfuncs import min_pos
+    >>> X = np.array([0, -1, 2, 3, -4, 5])
+    >>> min_pos(X)
+    2.0
     """
     cdef Py_ssize_t i
     cdef floating min_val = FLT_MAX if floating is float else DBL_MAX
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 8371d2c4725c1..3000f3bae50ba 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -442,6 +442,13 @@ def check_consistent_length(*arrays):
     ----------
     *arrays : list or tuple of input objects.
         Objects that will be checked for consistent length.
+
+    Examples
+    --------
+    >>> from sklearn.utils.validation import check_consistent_length
+    >>> a = [1, 2, 3]
+    >>> b = [2, 3, 4]
+    >>> check_consistent_length(a, b)
     """
 
     lengths = [_num_samples(X) for X in arrays if X is not None]
@@ -802,6 +809,14 @@ def check_array(
     -------
     array_converted : object
         The converted and validated array.
+
+    Examples
+    --------
+    >>> from sklearn.utils.validation import check_array
+    >>> X = [[1, 2, 3], [4, 5, 6]]
+    >>> X_checked = check_array(X)
+    >>> X_checked
+    array([[1, 2, 3], [4, 5, 6]])
     """
     if isinstance(array, np.matrix):
         raise TypeError(
@@ -1199,6 +1214,19 @@ def check_X_y(
 
     y_converted : object
         The converted and validated y.
+
+    Examples
+    --------
+    >>> from sklearn.utils.validation import check_X_y
+    >>> X = [[1, 2], [3, 4], [5, 6]]
+    >>> y = [1, 2, 3]
+    >>> X, y = check_X_y(X, y)
+    >>> X
+    array([[1, 2],
+          [3, 4],
+          [5, 6]])
+    >>> y
+    array([1, 2, 3])
     """
     if y is None:
         if estimator is None:
@@ -1333,6 +1361,12 @@ def check_random_state(seed):
     -------
     :class:`numpy:numpy.random.RandomState`
         The random state object based on `seed` parameter.
+
+    Examples
+    --------
+    >>> from sklearn.utils.validation import check_random_state
+    >>> check_random_state(42)
+    RandomState(MT19937) at 0x...
     """
     if seed is None or seed is np.random:
         return np.random.mtrand._rand
@@ -1648,6 +1682,12 @@ def check_scalar(
     ValueError
         If the parameter's value violates the given bounds.
         If `min_val`, `max_val` and `include_boundaries` are inconsistent.
+
+    Examples
+    --------
+    >>> from sklearn.utils.validation import check_scalar
+    >>> check_scalar(10, "x", int, min_val=1, max_val=20)
+    10
     """
 
     def type_name(t):

From 56625c9a3064839d54992197e9901124405e4141 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Tue, 23 Jan 2024 15:05:56 +0100
Subject: [PATCH 32/32] BLD Add Meson support (#28040)

Co-authored-by: Adrin Jalali <adrin.jalali@gmail.com>
Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Co-authored-by: Julien Jerphanion <git@jjerphan.xyz>
---
 Makefile                                      |   8 +
 azure-pipelines.yml                           |   1 +
 build_tools/azure/install.sh                  |   4 +-
 ...latest_conda_forge_mkl_linux-64_conda.lock |  10 +-
 ...t_conda_forge_mkl_linux-64_environment.yml |   2 +
 build_tools/build-meson-editable-install.py   |  45 +++++
 .../update_environments_and_lock_files.py     |   2 +
 doc/developers/advanced_installation.rst      |  82 +++++++++
 doc/whats_new/v1.5.rst                        |  10 +
 meson.build                                   |  53 ++++++
 sklearn/__check_build/meson.build             |   7 +
 sklearn/__init__.py                           |   8 +
 sklearn/_build_utils/tempita.py               |  57 ++++++
 sklearn/_build_utils/version.py               |  14 ++
 sklearn/_loss/meson.build                     |  19 ++
 sklearn/cluster/_hdbscan/meson.build          |  16 ++
 sklearn/cluster/meson.build                   |  28 +++
 sklearn/datasets/meson.build                  |   8 +
 sklearn/decomposition/meson.build             |  17 ++
 .../_hist_gradient_boosting/meson.build       |  21 +++
 sklearn/ensemble/meson.build                  |  10 +
 sklearn/feature_extraction/meson.build        |   9 +
 sklearn/linear_model/meson.build              |  33 ++++
 sklearn/manifold/meson.build                  |  17 ++
 sklearn/meson.build                           | 166 +++++++++++++++++
 .../_pairwise_distances_reduction/meson.build | 172 ++++++++++++++++++
 sklearn/metrics/cluster/meson.build           |   8 +
 sklearn/metrics/meson.build                   |  44 +++++
 sklearn/neighbors/meson.build                 |  52 ++++++
 sklearn/preprocessing/meson.build             |  17 ++
 sklearn/svm/meson.build                       |  57 ++++++
 sklearn/tests/test_common.py                  |   7 +
 sklearn/tree/meson.build                      |  26 +++
 sklearn/utils/meson.build                     |  73 ++++++++
 34 files changed, 1100 insertions(+), 3 deletions(-)
 create mode 100644 build_tools/build-meson-editable-install.py
 create mode 100644 meson.build
 create mode 100644 sklearn/__check_build/meson.build
 create mode 100644 sklearn/_build_utils/tempita.py
 create mode 100644 sklearn/_build_utils/version.py
 create mode 100644 sklearn/_loss/meson.build
 create mode 100644 sklearn/cluster/_hdbscan/meson.build
 create mode 100644 sklearn/cluster/meson.build
 create mode 100644 sklearn/datasets/meson.build
 create mode 100644 sklearn/decomposition/meson.build
 create mode 100644 sklearn/ensemble/_hist_gradient_boosting/meson.build
 create mode 100644 sklearn/ensemble/meson.build
 create mode 100644 sklearn/feature_extraction/meson.build
 create mode 100644 sklearn/linear_model/meson.build
 create mode 100644 sklearn/manifold/meson.build
 create mode 100644 sklearn/meson.build
 create mode 100644 sklearn/metrics/_pairwise_distances_reduction/meson.build
 create mode 100644 sklearn/metrics/cluster/meson.build
 create mode 100644 sklearn/metrics/meson.build
 create mode 100644 sklearn/neighbors/meson.build
 create mode 100644 sklearn/preprocessing/meson.build
 create mode 100644 sklearn/svm/meson.build
 create mode 100644 sklearn/tree/meson.build
 create mode 100644 sklearn/utils/meson.build

diff --git a/Makefile b/Makefile
index e2ae6aa75ca94..89224b2866321 100644
--- a/Makefile
+++ b/Makefile
@@ -23,6 +23,14 @@ in: inplace # just a shortcut
 inplace:
 	$(PYTHON) setup.py build_ext -i
 
+dev-meson:
+	# Temporary script to try the experimental meson build. Once meson is
+	# accepted as the default build tool, this will go away.
+	python build_tools/build-meson-editable-install.py
+
+clean-meson:
+	pip uninstall -y scikit-learn
+
 test-code: in
 	$(PYTEST) --showlocals -v sklearn --durations=20
 test-sphinxext:
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 588083ba2ac57..22ca53c5a7d89 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -171,6 +171,7 @@ jobs:
         DISTRIB: 'conda'
         LOCK_FILE: './build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock'
         COVERAGE: 'true'
+        BUILD_WITH_MESON: 'true'
         SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '42'  # default global random seed
 
 # Check compilation with Ubuntu 22.04 LTS (Jammy Jellyfish) and scipy from conda-forge
diff --git a/build_tools/azure/install.sh b/build_tools/azure/install.sh
index df20e27b3c068..d10770c772dc3 100755
--- a/build_tools/azure/install.sh
+++ b/build_tools/azure/install.sh
@@ -126,8 +126,10 @@ scikit_learn_install() {
         export LDFLAGS="$LDFLAGS -Wl,--sysroot=/"
     fi
 
+    if [[ "$BUILD_WITH_MESON" == "true" ]]; then
+        make dev-meson
     # TODO use a specific variable for this rather than using a particular build ...
-    if [[ "$DISTRIB" == "conda-pip-latest" ]]; then
+    elif [[ "$DISTRIB" == "conda-pip-latest" ]]; then
         # Check that pip can automatically build scikit-learn with the build
         # dependencies specified in pyproject.toml using an isolated build
         # environment:
diff --git a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock
index 4171e34d5b5d1..a496590d8da2e 100644
--- a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock
+++ b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock
@@ -1,6 +1,6 @@
 # Generated by conda-lock.
 # platform: linux-64
-# input_hash: 0e751f4212c4e51710aad471314a8b385a5e12fe3536c2a766f949da61eabb88
+# input_hash: 0cee038efd0cc93a79f66b1cdbbc359ac52521c98df956b3e4042575e89711f5
 @EXPLICIT
 https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
 https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.11.17-hbcca054_0.conda#01ffc8d36f9eba0ce0b3c1955fa780ee
@@ -20,7 +20,7 @@ https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.10-hd590300_0.conda
 https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00
 https://conda.anaconda.org/conda-forge/linux-64/aws-c-common-0.9.0-hd590300_0.conda#71b89db63b5b504e7afc8ad901172e1e
 https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4
-https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.24.0-hd590300_0.conda#f5842b88e9cbfa177abfaeacd457a45d
+https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.25.0-hd590300_0.conda#89e40af02dd3a0846c0c1131c5126706
 https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37
 https://conda.anaconda.org/conda-forge/linux-64/gflags-2.2.2-he1b5a44_1004.tar.bz2#cddaf2c63ea4a5901cf09524c490ecdc
 https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h58526e2_1001.tar.bz2#8c54672728e8ec6aa6db90cf2806d220
@@ -50,6 +50,7 @@ https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#
 https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0
 https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.3-h59595ed_0.conda#bdadff838d5437aea83607ced8b37f75
 https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-h59595ed_2.conda#7dbaa197d7ba6032caf7ae7f32c1efa0
+https://conda.anaconda.org/conda-forge/linux-64/ninja-1.11.1-h924138e_0.conda#73a4953a2d9c115bdc10ff30a52f675f
 https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1
 https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.0-hd590300_1.conda#603827b39ea2b835268adb8c821b8570
 https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.0-h59595ed_0.conda#6b4b43013628634b6cfdee6b74fd696b
@@ -159,6 +160,7 @@ https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f
 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
 https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py311h459d7ec_1.conda#a700fcb5cedd3e72d0c75d095c7a6eda
 https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.9.0-pyha770c72_0.conda#a92a6440c3fe7052d63244f3aba2a4a7
+https://conda.anaconda.org/conda-forge/noarch/wheel-0.42.0-pyhd8ed1ab_0.conda#1cdea58981c5cbc17b51973bcaddcea7
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73
 https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.40-hd590300_0.conda#07c15d846a2e4d673da22cbd85fdb6d2
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec
@@ -173,9 +175,12 @@ https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4d
 https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.7-default_hb11cfb5_4.conda#c90f4cbb57839c98fef8f830e4b9972f
 https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.12.0-hac9eb74_1.conda#0dee716254497604762957076ac76540
 https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.6.0-h5d7e998_0.conda#d8edd0e29db6fb6b6988e1a28d35d994
+https://conda.anaconda.org/conda-forge/noarch/meson-1.3.1-pyhd8ed1ab_0.conda#54744574be599bff37ee4c3624ed02d2
 https://conda.anaconda.org/conda-forge/linux-64/mkl-2022.2.1-h84fe81f_16997.conda#a7ce56d5757f5b57e7daabe703ade5bb
 https://conda.anaconda.org/conda-forge/linux-64/pillow-10.2.0-py311ha6c5da5_0.conda#a5ccd7f2271f28b7d2de0b02b64e3796
+https://conda.anaconda.org/conda-forge/noarch/pip-23.3.2-pyhd8ed1ab_0.conda#8591c748f98dcc02253003533bc2e4b1
 https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-16.1-hb77b528_5.conda#ac902ff3c1c6d750dd0dfc93a974ab74
+https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.7.1-pyhd8ed1ab_0.conda#dcb27826ffc94d5f04e241322239983b
 https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4
 https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984
 https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py311hb755f60_0.conda#02336abab4cb5dd794010ef53c54bd09
@@ -184,6 +189,7 @@ https://conda.anaconda.org/conda-forge/linux-64/blas-1.0-mkl.tar.bz2#349aef876b1
 https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.8-h98fc4e7_1.conda#1b52a89485ab573a5bb83a5225ff706e
 https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.3.0-h3d44ed6_0.conda#5a6f6c00ef982a9bc83558d9ac8f64a0
 https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-16_linux64_mkl.tar.bz2#85f61af03fd291dae33150ffe89dc09a
+https://conda.anaconda.org/conda-forge/noarch/meson-python-0.15.0-pyh0c530f3_0.conda#3bc64565ca78ce3bb80248d09926d8f9
 https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py311hb755f60_5.conda#e4d262cc3600e70b505a6761d29f6207
 https://conda.anaconda.org/conda-forge/noarch/pytest-cov-4.1.0-pyhd8ed1ab_0.conda#06eb685a3a0b146347a58dda979485da
 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b
diff --git a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml
index 107ad5b3d6f8b..afdac2b88ceb0 100644
--- a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml
+++ b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml
@@ -21,6 +21,8 @@ dependencies:
   - pytest-cov
   - coverage
   - ccache
+  - meson-python
+  - pip
   - pytorch=1.13
   - pytorch-cpu
   - polars
diff --git a/build_tools/build-meson-editable-install.py b/build_tools/build-meson-editable-install.py
new file mode 100644
index 0000000000000..d1b7e7a873af3
--- /dev/null
+++ b/build_tools/build-meson-editable-install.py
@@ -0,0 +1,45 @@
+import re
+import shlex
+import subprocess
+from pathlib import Path
+
+
+def main():
+    pyproject_path = Path("pyproject.toml")
+
+    if not pyproject_path.exists():
+        raise SystemExit(
+            "Can not find pyproject.toml. You should run this script from the"
+            " scikit-learn root folder."
+        )
+
+    old_pyproject_content = pyproject_path.read_text(encoding="utf-8")
+    if 'build-backend = "mesonpy"' not in old_pyproject_content:
+        new_pyproject_content = re.sub(
+            r"\[build-system\]",
+            r'[build-system]\nbuild-backend = "mesonpy"',
+            old_pyproject_content,
+        )
+        pyproject_path.write_text(new_pyproject_content, encoding="utf-8")
+
+    command = shlex.split(
+        "pip install --editable .  --verbose --no-build-isolation "
+        "--config-settings editable-verbose=true"
+    )
+
+    exception = None
+    try:
+        subprocess.check_call(command)
+    except Exception as e:
+        exception = e
+    finally:
+        pyproject_path.write_text(old_pyproject_content, encoding="utf-8")
+
+    if exception is not None:
+        raise RuntimeError(
+            "There was some error when running the script"
+        ) from exception
+
+
+if __name__ == "__main__":
+    main()
diff --git a/build_tools/update_environments_and_lock_files.py b/build_tools/update_environments_and_lock_files.py
index 27d7d3fe85b31..cca86409dd519 100644
--- a/build_tools/update_environments_and_lock_files.py
+++ b/build_tools/update_environments_and_lock_files.py
@@ -97,6 +97,8 @@ def remove_from(alist, to_remove):
         "channel": "conda-forge",
         "conda_dependencies": common_dependencies + [
             "ccache",
+            "meson-python",
+            "pip",
             "pytorch",
             "pytorch-cpu",
             "polars",
diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
index 2eab1bb06d979..c8c1c3a788727 100644
--- a/doc/developers/advanced_installation.rst
+++ b/doc/developers/advanced_installation.rst
@@ -201,6 +201,88 @@ It is however preferred to use pip.
 On Unix-like systems, you can equivalently type ``make in`` from the top-level
 folder. Have a look at the ``Makefile`` for additional utilities.
 
+.. _building_with_meson:
+
+Building with Meson
+-------------------
+
+Support for Meson is experimental, in scikit-learn 1.5.0.dev0.
+`Open an issue <https://github.com/scikit-learn/scikit-learn/issues/new>`__ if
+you encounter any problems!
+
+Make sure you have `meson-python` and `ninja` installed, either with `conda`:
+
+.. code-block:: bash
+
+    conda install -c conda-forge meson-python ninja -y
+
+or with pip:
+
+.. code-block:: bash
+
+    pip install meson-python ninja
+
+Simplest way to build with Meson
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To build scikit-learn, the simplest way is to run:
+
+.. code-block:: bash
+
+    make dev-meson
+
+You need to do it once after this you can run your code that imports `sklearn`
+and it will recompile as needed.
+
+In case you want to go back to using setuptools:
+
+.. code-block:: bash
+
+    make clean-meson
+
+More advanced way to build with Meson
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you can not use `make`, want to do it yourself or understand what goes in
+behind the scenes, you can edit `pyproject.toml` and make sure `build-backend`
+is set to `"mesonpy"`
+
+.. code-block:: toml
+
+    [build-system]
+    build-backend = "mesonpy"
+
+Build with the following `pip` command:
+
+.. code-block:: bash
+
+    pip install --editable . \
+        --verbose --no-build-isolation \
+        --config-settings editable-verbose=true
+
+If you want to go back to using `setuptools`:
+
+.. code-block:: bash
+
+    pip uninstall -y scikit-learn
+
+Note `--config-settings editable-verbose=true` is advised to avoid surprises.
+meson-python implements editable install by recompiling when doing `import
+sklearn`. Even changing python files involves copying files to the Meson build
+directory. You will see the meson output when that happens, rather than
+potentially waiting a while and wondering what is taking so long. Bonus: that
+means you only have to do the `pip install` once, after that your code will
+recompile when doing `import sklearn`.
+
+Other places that may be worth looking at:
+
+- `pandas setup doc
+  <https://pandas.pydata.org/docs/development/contributing_environment.html#step-3-build-and-install-pandas>`_:
+  pandas has a similar setup as ours (no spin or dev.py)
+- `scipy Meson doc
+  <https://scipy.github.io/devdocs/building/understanding_meson.html>`_ gives
+  more background about how Meson works behind the scenes
+
 .. _platform_specific_instructions:
 
 Platform-specific instructions
diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index aee5f247c2a98..f976e99e498b6 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -11,6 +11,16 @@ Version 1.5.0
 
 .. include:: changelog_legend.inc
 
+Support for building with Meson
+-------------------------------
+
+Meson is now supported as a build backend, see :ref:`Building with Meson
+<building_with_meson>` for more details.
+
+:pr:`28040` by :user:`Loïc Estève <lesteve>`
+
+TODO Fill more details before the 1.5 release, when the Meson story has settled down.
+
 Changelog
 ---------
 
diff --git a/meson.build b/meson.build
new file mode 100644
index 0000000000000..3835a5099abb0
--- /dev/null
+++ b/meson.build
@@ -0,0 +1,53 @@
+project(
+  'scikit-learn',
+  'c', 'cpp', 'cython',
+  version: run_command('sklearn/_build_utils/version.py', check: true).stdout().strip(),
+  license: 'BSD-3',
+  meson_version: '>= 1.1.0',
+  default_options: [
+    'buildtype=debugoptimized',
+    'c_std=c99',
+    'cpp_std=c++14',
+  ],
+)
+
+cc = meson.get_compiler('c')
+cpp = meson.get_compiler('cpp')
+
+# Check compiler is recent enough (see "Toolchain Roadmap" for details)
+if cc.get_id() == 'gcc'
+  if not cc.version().version_compare('>=8.0')
+    error('scikit-learn requires GCC >= 8.0')
+  endif
+elif cc.get_id() == 'msvc'
+  if not cc.version().version_compare('>=19.20')
+    error('scikit-learn requires at least vc142 (default with Visual Studio 2019) ' + \
+          'when building with MSVC')
+  endif
+endif
+
+_global_c_args = cc.get_supported_arguments(
+  '-Wno-unused-but-set-variable',
+  '-Wno-unused-function',
+  '-Wno-conversion',
+  '-Wno-misleading-indentation',
+)
+add_project_arguments(_global_c_args, language : 'c')
+
+# We need -lm for all C code (assuming it uses math functions, which is safe to
+# assume for scikit-learn). For C++ it isn't needed, because libstdc++/libc++ is
+# guaranteed to depend on it.
+m_dep = cc.find_library('m', required : false)
+if m_dep.found()
+  add_project_link_arguments('-lm', language : 'c')
+endif
+
+tempita = files('sklearn/_build_utils/tempita.py')
+
+py = import('python').find_installation(pure: false)
+
+# Copy all the .py files to the install dir, rather than using
+# py.install_sources and needing to list them explicitely one by one
+install_subdir('sklearn', install_dir: py.get_install_dir())
+
+subdir('sklearn')
diff --git a/sklearn/__check_build/meson.build b/sklearn/__check_build/meson.build
new file mode 100644
index 0000000000000..8295e6b573639
--- /dev/null
+++ b/sklearn/__check_build/meson.build
@@ -0,0 +1,7 @@
+py.extension_module(
+  '_check_build',
+  '_check_build.pyx',
+  cython_args: cython_args,
+  install: true,
+  subdir: 'sklearn/__check_build',
+)
diff --git a/sklearn/__init__.py b/sklearn/__init__.py
index 673031649a265..5fced1ff0d413 100644
--- a/sklearn/__init__.py
+++ b/sklearn/__init__.py
@@ -133,6 +133,14 @@
         "show_versions",
     ]
 
+    _BUILT_WITH_MESON = False
+    try:
+        import sklearn._built_with_meson  # noqa: F401
+
+        _BUILT_WITH_MESON = True
+    except ModuleNotFoundError:
+        pass
+
 
 def setup_module(module):
     """Fixture for the tests to assure globally controllable seeding of RNGs"""
diff --git a/sklearn/_build_utils/tempita.py b/sklearn/_build_utils/tempita.py
new file mode 100644
index 0000000000000..8da4b9c0e7ace
--- /dev/null
+++ b/sklearn/_build_utils/tempita.py
@@ -0,0 +1,57 @@
+import argparse
+import os
+
+from Cython import Tempita as tempita
+
+# XXX: If this import ever fails (does it really?), vendor either
+# cython.tempita or numpy/npy_tempita.
+
+
+def process_tempita(fromfile, outfile=None):
+    """Process tempita templated file and write out the result.
+
+    The template file is expected to end in `.c.tp` or `.pyx.tp`:
+    E.g. processing `template.c.in` generates `template.c`.
+
+    """
+    with open(fromfile, "r", encoding="utf-8") as f:
+        template_content = f.read()
+
+    template = tempita.Template(template_content)
+    content = template.substitute()
+
+    with open(outfile, "w", encoding="utf-8") as f:
+        f.write(content)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("infile", type=str, help="Path to the input file")
+    parser.add_argument("-o", "--outdir", type=str, help="Path to the output directory")
+    parser.add_argument(
+        "-i",
+        "--ignore",
+        type=str,
+        help=(
+            "An ignored input - may be useful to add a "
+            "dependency between custom targets"
+        ),
+    )
+    args = parser.parse_args()
+
+    if not args.infile.endswith(".tp"):
+        raise ValueError(f"Unexpected extension: {args.infile}")
+
+    if not args.outdir:
+        raise ValueError("Missing `--outdir` argument to tempita.py")
+
+    outdir_abs = os.path.join(os.getcwd(), args.outdir)
+    outfile = os.path.join(
+        outdir_abs, os.path.splitext(os.path.split(args.infile)[1])[0]
+    )
+
+    process_tempita(args.infile, outfile)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sklearn/_build_utils/version.py b/sklearn/_build_utils/version.py
new file mode 100644
index 0000000000000..1f8688a008e9d
--- /dev/null
+++ b/sklearn/_build_utils/version.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python
+""" Extract version number from __init__.py
+"""
+
+import os
+
+sklearn_init = os.path.join(os.path.dirname(__file__), "../__init__.py")
+
+data = open(sklearn_init).readlines()
+version_line = next(line for line in data if line.startswith("__version__"))
+
+version = version_line.strip().split(" = ")[1].replace('"', "").replace("'", "")
+
+print(version)
diff --git a/sklearn/_loss/meson.build b/sklearn/_loss/meson.build
new file mode 100644
index 0000000000000..7802d1643df18
--- /dev/null
+++ b/sklearn/_loss/meson.build
@@ -0,0 +1,19 @@
+# .pyx is generated, so this is needed to make Cython compilation work
+_loss_cython_tree = [
+  fs.copyfile('_loss.pxd')
+]
+
+_loss_pyx = custom_target(
+  '_loss_pyx',
+  output: '_loss.pyx',
+  input: '_loss.pyx.tp',
+  command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@'],
+)
+
+py.extension_module(
+  '_loss',
+  [_loss_pyx, _loss_cython_tree],
+  cython_args: cython_args,
+  install: true,
+  subdir: 'sklearn/_loss',
+)
diff --git a/sklearn/cluster/_hdbscan/meson.build b/sklearn/cluster/_hdbscan/meson.build
new file mode 100644
index 0000000000000..3f40ec85661d7
--- /dev/null
+++ b/sklearn/cluster/_hdbscan/meson.build
@@ -0,0 +1,16 @@
+cluster_hdbscan_extension_metadata = {
+  '_linkage': {'sources': ['_linkage.pyx']},
+  '_reachability': {'sources': ['_reachability.pyx']},
+  '_tree': {'sources': ['_tree.pyx']}
+}
+
+foreach ext_name, ext_dict : cluster_hdbscan_extension_metadata
+  py.extension_module(
+    ext_name,
+    ext_dict.get('sources'),
+    dependencies: [np_dep],
+    cython_args: cython_args,
+    subdir: 'sklearn/cluster/_hdbscan',
+    install: true
+  )
+endforeach
diff --git a/sklearn/cluster/meson.build b/sklearn/cluster/meson.build
new file mode 100644
index 0000000000000..c0a313f953a9d
--- /dev/null
+++ b/sklearn/cluster/meson.build
@@ -0,0 +1,28 @@
+cluster_extension_metadata = {
+  '_dbscan_inner':
+    {'sources': ['_dbscan_inner.pyx'],'override_options': ['cython_language=cpp']},
+  '_hierarchical_fast':
+    {'sources': ['_hierarchical_fast.pyx'], 'override_options': ['cython_language=cpp']},
+  '_k_means_common':
+    {'sources': ['_k_means_common.pyx']},
+  '_k_means_lloyd':
+    {'sources': ['_k_means_lloyd.pyx']},
+  '_k_means_elkan':
+    {'sources': ['_k_means_elkan.pyx']},
+  '_k_means_minibatch':
+    {'sources': ['_k_means_minibatch.pyx']},
+}
+
+foreach ext_name, ext_dict : cluster_extension_metadata
+  py.extension_module(
+    ext_name,
+    ext_dict.get('sources') + [utils_cython_tree],
+    dependencies: [np_dep, openmp_dep],
+    override_options : ext_dict.get('override_options', []),
+    cython_args: cython_args,
+    subdir: 'sklearn/cluster',
+    install: true
+  )
+endforeach
+
+subdir('_hdbscan')
diff --git a/sklearn/datasets/meson.build b/sklearn/datasets/meson.build
new file mode 100644
index 0000000000000..77f784d610b30
--- /dev/null
+++ b/sklearn/datasets/meson.build
@@ -0,0 +1,8 @@
+py.extension_module(
+  '_svmlight_format_fast',
+  '_svmlight_format_fast.pyx',
+  dependencies: [np_dep],
+  cython_args: cython_args,
+  subdir: 'sklearn/datasets',
+  install: true
+)
diff --git a/sklearn/decomposition/meson.build b/sklearn/decomposition/meson.build
new file mode 100644
index 0000000000000..b04dc55d92902
--- /dev/null
+++ b/sklearn/decomposition/meson.build
@@ -0,0 +1,17 @@
+py.extension_module(
+  '_online_lda_fast',
+  '_online_lda_fast.pyx',
+  dependencies: [np_dep],
+  cython_args: cython_args,
+  subdir: 'sklearn/decomposition',
+  install: true
+)
+
+py.extension_module(
+  '_cdnmf_fast',
+  '_cdnmf_fast.pyx',
+  dependencies: [np_dep],
+  cython_args: cython_args,
+  subdir: 'sklearn/decomposition',
+  install: true
+)
diff --git a/sklearn/ensemble/_hist_gradient_boosting/meson.build b/sklearn/ensemble/_hist_gradient_boosting/meson.build
new file mode 100644
index 0000000000000..326fc82685a2e
--- /dev/null
+++ b/sklearn/ensemble/_hist_gradient_boosting/meson.build
@@ -0,0 +1,21 @@
+hist_gradient_boosting_extension_metadata = {
+  '_gradient_boosting': {'sources': ['_gradient_boosting.pyx']},
+  'histogram': {'sources': ['histogram.pyx']},
+  'splitting': {'sources': ['splitting.pyx']},
+  '_binning': {'sources': ['_binning.pyx']},
+  '_predictor': {'sources': ['_predictor.pyx']},
+  '_bitset': {'sources': ['_bitset.pyx']},
+  'common': {'sources': ['common.pyx']},
+  'utils': {'sources': ['utils.pyx']},
+}
+
+foreach ext_name, ext_dict : hist_gradient_boosting_extension_metadata
+  py.extension_module(
+    ext_name,
+    ext_dict.get('sources'),
+    dependencies: [np_dep, openmp_dep],
+    cython_args: cython_args,
+    subdir: 'sklearn/ensemble/_hist_gradient_boosting',
+    install: true
+  )
+endforeach
diff --git a/sklearn/ensemble/meson.build b/sklearn/ensemble/meson.build
new file mode 100644
index 0000000000000..bc5868b3a0104
--- /dev/null
+++ b/sklearn/ensemble/meson.build
@@ -0,0 +1,10 @@
+py.extension_module(
+  '_gradient_boosting',
+  ['_gradient_boosting.pyx'] + utils_cython_tree,
+  dependencies: [np_dep],
+  cython_args: cython_args,
+  subdir: 'sklearn/ensemble',
+  install: true
+)
+
+subdir('_hist_gradient_boosting')
diff --git a/sklearn/feature_extraction/meson.build b/sklearn/feature_extraction/meson.build
new file mode 100644
index 0000000000000..5499cea908d79
--- /dev/null
+++ b/sklearn/feature_extraction/meson.build
@@ -0,0 +1,9 @@
+py.extension_module(
+  '_hashing_fast',
+  '_hashing_fast.pyx',
+  dependencies: [np_dep],
+  override_options: ['cython_language=cpp'],
+  cython_args: cython_args,
+  subdir: 'sklearn/feature_extraction',
+  install: true
+)
diff --git a/sklearn/linear_model/meson.build b/sklearn/linear_model/meson.build
new file mode 100644
index 0000000000000..da18e793054c4
--- /dev/null
+++ b/sklearn/linear_model/meson.build
@@ -0,0 +1,33 @@
+# .pyx is generated, so this is needed to make Cython compilation work
+linear_model_cython_tree = [
+  fs.copyfile('__init__.py'),
+  fs.copyfile('_sgd_fast.pxd'),
+]
+
+py.extension_module(
+  '_cd_fast',
+  '_cd_fast.pyx',
+  dependencies: [np_dep],
+  cython_args: cython_args,
+  subdir: 'sklearn/linear_model',
+  install: true
+)
+
+name_list = ['_sgd_fast', '_sag_fast']
+
+foreach name: name_list
+  pyx = custom_target(
+    name + '_pyx',
+    output: name + '.pyx',
+    input: name + '.pyx.tp',
+    command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@']
+  )
+  py.extension_module(
+    name,
+    [pyx, linear_model_cython_tree],
+    dependencies: [np_dep],
+    cython_args: cython_args,
+    subdir: 'sklearn/linear_model',
+    install: true
+)
+endforeach
diff --git a/sklearn/manifold/meson.build b/sklearn/manifold/meson.build
new file mode 100644
index 0000000000000..6515631135cfa
--- /dev/null
+++ b/sklearn/manifold/meson.build
@@ -0,0 +1,17 @@
+py.extension_module(
+  '_utils',
+  '_utils.pyx',
+  dependencies: [np_dep],
+  cython_args: cython_args,
+  subdir: 'sklearn/manifold',
+  install: true
+)
+
+py.extension_module(
+  '_barnes_hut_tsne',
+  '_barnes_hut_tsne.pyx',
+  dependencies: [np_dep],
+  cython_args: cython_args,
+  subdir: 'sklearn/manifold',
+  install: true
+)
diff --git a/sklearn/meson.build b/sklearn/meson.build
new file mode 100644
index 0000000000000..bd71447597d42
--- /dev/null
+++ b/sklearn/meson.build
@@ -0,0 +1,166 @@
+fs = import('fs')
+
+cython_args = []
+
+# Platform detection
+is_windows = host_machine.system() == 'windows'
+is_mingw = is_windows and cc.get_id() == 'gcc'
+
+# Adapted from Scipy. mingw is untested and not officially supported. If you
+# ever bump into issues when trying to compile for mingw, please open an issue
+# in the scikit-learn issue tracker
+if is_mingw
+  # For mingw-w64, link statically against the UCRT.
+  gcc_link_args = ['-lucrt', '-static']
+  add_project_link_arguments(gcc_link_args, language: ['c', 'cpp'])
+  # Force gcc to float64 long doubles for compatibility with MSVC
+  # builds, for C only.
+  add_project_arguments('-mlong-double-64', language: 'c')
+endif
+
+# Adapted from scipy, each project seems to have its own tweaks for this. One
+# day using dependency('numpy') will be a thing, see
+# https://github.com/mesonbuild/meson/issues/9598.
+# NumPy include directory - needed in all submodules
+# Relative paths are needed when for example a virtualenv is
+# placed inside the source tree; Meson rejects absolute paths to places inside
+# the source tree. The try-except is needed because when things are split
+# across drives on Windows, there is no relative path and an exception gets
+# raised. There may be other such cases, so add a catch-all and switch to
+# an absolute path.
+# For cross-compilation it is often not possible to run the Python interpreter
+# in order to retrieve numpy's include directory. It can be specified in the
+# cross file instead:
+#   [properties]
+#   numpy-include-dir = /abspath/to/host-pythons/site-packages/numpy/core/include
+#
+# This uses the path as is, and avoids running the interpreter.
+incdir_numpy = meson.get_external_property('numpy-include-dir', 'not-given')
+if incdir_numpy == 'not-given'
+  incdir_numpy = run_command(py,
+    [
+      '-c',
+      '''
+import os
+import numpy as np
+try:
+  incdir = os.path.relpath(np.get_include())
+except Exception:
+  incdir = np.get_include()
+print(incdir)
+'''
+    ],
+    check: true
+  ).stdout().strip()
+endif
+
+inc_np = include_directories(incdir_numpy)
+np_dep = declare_dependency(include_directories: inc_np)
+
+openmp_dep = dependency('OpenMP', language: 'c', required: false)
+
+if not openmp_dep.found()
+    warning(
+'''
+                ***********
+                * WARNING *
+                ***********
+
+It seems that scikit-learn cannot be built with OpenMP.
+
+- Make sure you have followed the installation instructions:
+
+    https://scikit-learn.org/dev/developers/advanced_installation.html
+
+- If your compiler supports OpenMP but you still see this
+  message, please submit a bug report at:
+
+    https://github.com/scikit-learn/scikit-learn/issues
+
+- The build will continue with OpenMP-based parallelism
+  disabled. Note however that some estimators will run in
+  sequential mode instead of leveraging thread-based
+  parallelism.
+
+                    ***
+''')
+endif
+
+# For now, we keep supporting SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES variable
+# (see how it is done in sklearn/_build_utils/__init__.py when building with
+# setuptools). Accessing environment variables in meson.build is discouraged,
+# so once we drop setuptools this functionality should be behind a meson option
+# or buildtype
+boundscheck = run_command(py,
+    [
+      '-c',
+      '''
+import os
+
+if os.environ.get("SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES", "0") != "0":
+    print(True)
+else:
+    print(False)
+      '''
+    ],
+    check: true
+    ).stdout().strip()
+
+scikit_learn_cython_args = [
+  '-X language_level=3', '-X boundscheck=' + boundscheck, '-X wraparound=False',
+  '-X initializedcheck=False', '-X nonecheck=False', '-X cdivision=True',
+  '-X profile=False',
+  # Needed for cython imports across subpackages, e.g. cluster pyx that
+  # cimports metrics pxd
+  '--include-dir', meson.global_build_root(),
+]
+cython_args += scikit_learn_cython_args
+
+# Write file in Meson build dir to be able to figure out from Python code
+# whether scikit-learn was built with Meson. Adapted from pandas
+# _version_meson.py.
+custom_target('write_built_with_meson_file',
+    output: '_built_with_meson.py',
+    command: [
+        py, '-c', 'with open("sklearn/_built_with_meson.py", "w") as f: f.write("")'
+    ],
+    install: true,
+    install_dir: py.get_install_dir() / 'sklearn'
+)
+# endif
+
+extensions = ['_isotonic']
+
+py.extension_module(
+  '_isotonic',
+  '_isotonic.pyx',
+  cython_args: cython_args,
+  install: true,
+  subdir: 'sklearn',
+)
+
+# Need for Cython cimports across subpackages to work, i.e. avoid errors like
+# relative cimport from non-package directory is not allowed
+fs.copyfile('__init__.py')
+
+sklearn_dir = py.get_install_dir() / 'sklearn'
+
+# Subpackages are mostly in alphabetical order except to handle Cython
+# dependencies across subpackages
+subdir('__check_build')
+subdir('_loss')
+# utils needs to be early since plenty of other modules cimports utils .pxd
+subdir('utils')
+# metrics needs to be to be before cluster since cluster cimports metrics .pxd
+subdir('metrics')
+subdir('cluster')
+subdir('datasets')
+subdir('decomposition')
+subdir('ensemble')
+subdir('feature_extraction')
+subdir('linear_model')
+subdir('manifold')
+subdir('neighbors')
+subdir('preprocessing')
+subdir('svm')
+subdir('tree')
diff --git a/sklearn/metrics/_pairwise_distances_reduction/meson.build b/sklearn/metrics/_pairwise_distances_reduction/meson.build
new file mode 100644
index 0000000000000..6414afb96bed0
--- /dev/null
+++ b/sklearn/metrics/_pairwise_distances_reduction/meson.build
@@ -0,0 +1,172 @@
+# Note: the dependencies between different Cython files in
+# _pairwise_distances_reduction is probably one of the most involved in
+# scikit-learn. If you change this file make sure you build from scratch:
+# rm -rf build; make dev-meson
+# run a command like this:
+# ninja -C build/cp312 -t missingdeps
+# and make sure that the output is something like:
+# No missing dependencies on generated files found.
+
+# _pairwise_distances_reduction is cimported from other subpackages so this is
+# needed for the cimport to work
+_pairwise_distances_reduction_cython_tree = [
+  fs.copyfile('__init__.py'),
+]
+
+_classmode_pxd = fs.copyfile('_classmode.pxd')
+
+_datasets_pair_pxd = custom_target(
+  '_datasets_pair_pxd',
+  output: '_datasets_pair.pxd',
+  input: '_datasets_pair.pxd.tp',
+  command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@']
+)
+_datasets_pair_pyx = custom_target(
+  '_datasets_pair_pyx',
+  output: '_datasets_pair.pyx',
+  input: '_datasets_pair.pyx.tp',
+  command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@'],
+)
+_datasets_pair = py.extension_module(
+  '_datasets_pair',
+  [_datasets_pair_pxd, _datasets_pair_pyx,
+    _pairwise_distances_reduction_cython_tree, utils_cython_tree],
+  dependencies: [np_dep, openmp_dep],
+  override_options: ['cython_language=cpp'],
+  cython_args: cython_args,
+  subdir: 'sklearn/metrics/_pairwise_distances_reduction',
+  install: true
+)
+
+_base_pxd = custom_target(
+  '_base_pxd',
+  output: '_base.pxd',
+  input: '_base.pxd.tp',
+  command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@']
+)
+_base_pyx = custom_target(
+  '_base_pyx',
+  output: '_base.pyx',
+  input: '_base.pyx.tp',
+  command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@'],
+)
+_base = py.extension_module(
+  '_base',
+  [_base_pxd, _base_pyx,
+   _pairwise_distances_reduction_cython_tree,
+   _datasets_pair_pxd, utils_cython_tree],
+  dependencies: [np_dep, openmp_dep],
+  override_options: ['cython_language=cpp'],
+  cython_args: cython_args,
+  subdir: 'sklearn/metrics/_pairwise_distances_reduction',
+  install: true
+)
+
+_middle_term_computer_pxd = custom_target(
+  '_middle_term_computer_pxd',
+  output: '_middle_term_computer.pxd',
+  input: '_middle_term_computer.pxd.tp',
+  command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@']
+)
+_middle_term_computer_pyx = custom_target(
+  '_middle_term_computer_pyx',
+  output: '_middle_term_computer.pyx',
+  input: '_middle_term_computer.pyx.tp',
+  command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@'],
+)
+_middle_term_computer = py.extension_module(
+  '_middle_term_computer',
+  [_middle_term_computer_pxd, _middle_term_computer_pyx,
+   _pairwise_distances_reduction_cython_tree, utils_cython_tree],
+  dependencies: [np_dep, openmp_dep],
+  override_options: ['cython_language=cpp'],
+  cython_args: cython_args,
+  subdir: 'sklearn/metrics/_pairwise_distances_reduction',
+  install: true
+)
+
+_argkmin_pxd = custom_target(
+    '_argkmin_pxd',
+    output: '_argkmin.pxd',
+    input: '_argkmin.pxd.tp',
+    command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@']
+  )
+_argkmin_pyx = custom_target(
+    '_argkmin_pyx',
+    output: '_argkmin.pyx',
+    input: '_argkmin.pyx.tp',
+    command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@'],
+  )
+_argkmin = py.extension_module(
+    '_argkmin',
+    [_argkmin_pxd, _argkmin_pyx,
+     _pairwise_distances_reduction_cython_tree,
+     _datasets_pair_pxd, _base_pxd, _middle_term_computer_pxd,
+     utils_cython_tree],
+    dependencies: [np_dep, openmp_dep],
+    override_options: ['cython_language=cpp'],
+    cython_args: cython_args,
+    subdir: 'sklearn/metrics/_pairwise_distances_reduction',
+    install: true
+)
+
+_radius_neighbors_pxd = custom_target(
+    '_radius_neighbors_pxd',
+    output: '_radius_neighbors.pxd',
+    input: '_radius_neighbors.pxd.tp',
+    command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@']
+  )
+_radius_neighbors_pyx = custom_target(
+    '_radius_neighbors_pyx',
+    output: '_radius_neighbors.pyx',
+    input: '_radius_neighbors.pyx.tp',
+    command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@'],
+  )
+_radius_neighbors = py.extension_module(
+    '_radius_neighbors',
+    [_radius_neighbors_pxd, _radius_neighbors_pyx,
+     _datasets_pair_pxd, _base_pxd, _middle_term_computer_pxd,
+     _pairwise_distances_reduction_cython_tree, utils_cython_tree],
+    dependencies: [np_dep, openmp_dep],
+    override_options: ['cython_language=cpp'],
+    cython_args: cython_args,
+    subdir: 'sklearn/metrics/_pairwise_distances_reduction',
+    install: true
+)
+
+_argkmin_classmode_pyx = custom_target(
+  '_argkmin_classmode_pyx',
+  output: '_argkmin_classmode.pyx',
+  input: '_argkmin_classmode.pyx.tp',
+  command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@'],
+)
+_argkmin_classmode = py.extension_module(
+  '_argkmin_classmode',
+  [_argkmin_classmode_pyx, _classmode_pxd,
+   _argkmin_pxd, _pairwise_distances_reduction_cython_tree,
+   _datasets_pair_pxd, _base_pxd, _middle_term_computer_pxd, utils_cython_tree],
+  dependencies: [np_dep],
+  override_options: ['cython_language=cpp'],
+  cython_args: cython_args,
+  subdir: 'sklearn/metrics/_pairwise_distances_reduction',
+  install: true
+)
+
+_radius_neighbors_classmode_pyx = custom_target(
+  '_radius_neighbors_classmode_pyx',
+  output: '_radius_neighbors_classmode.pyx',
+  input: '_radius_neighbors_classmode.pyx.tp',
+  command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@'],
+)
+_radius_neighbors_classmode = py.extension_module(
+  '_radius_neighbors_classmode',
+  [_radius_neighbors_classmode_pyx, _classmode_pxd,
+  _middle_term_computer_pxd, _radius_neighbors_pxd,
+  _pairwise_distances_reduction_cython_tree,
+  _datasets_pair_pxd, _base_pxd, utils_cython_tree],
+  dependencies: [np_dep],
+  override_options: ['cython_language=cpp'],
+  cython_args: cython_args,
+  subdir: 'sklearn/metrics/_pairwise_distances_reduction',
+  install: true
+)
diff --git a/sklearn/metrics/cluster/meson.build b/sklearn/metrics/cluster/meson.build
new file mode 100644
index 0000000000000..cf9aa44e58cb9
--- /dev/null
+++ b/sklearn/metrics/cluster/meson.build
@@ -0,0 +1,8 @@
+py.extension_module(
+  '_expected_mutual_info_fast',
+  '_expected_mutual_info_fast.pyx',
+  dependencies: [np_dep],
+  cython_args: cython_args,
+  subdir: 'sklearn/metrics/cluster',
+  install: true
+)
diff --git a/sklearn/metrics/meson.build b/sklearn/metrics/meson.build
new file mode 100644
index 0000000000000..5f7e98b905d55
--- /dev/null
+++ b/sklearn/metrics/meson.build
@@ -0,0 +1,44 @@
+# Metrics is cimported from other subpackages so this is needed for the cimport
+# to work
+metrics_cython_tree = [
+  fs.copyfile('__init__.py')
+]
+
+_dist_metrics_pxd = custom_target(
+  '_dist_metrics_pxd',
+  output: '_dist_metrics.pxd',
+  input: '_dist_metrics.pxd.tp',
+  command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@'],
+  # Need to install the generated pxd because it is needed in other subpackages
+  # Cython code, e.g. sklearn.cluster
+  install_dir: sklearn_dir / 'metrics',
+  install: true,
+)
+
+_dist_metrics_pyx = custom_target(
+  '_dist_metrics_pyx',
+  output: '_dist_metrics.pyx',
+  input: '_dist_metrics.pyx.tp',
+  command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@']
+)
+
+_dist_metrics = py.extension_module(
+  '_dist_metrics',
+  [_dist_metrics_pyx, _dist_metrics_pxd, utils_cython_tree, metrics_cython_tree],
+  dependencies: [np_dep],
+  cython_args: cython_args,
+  subdir: 'sklearn/metrics',
+  install: true
+)
+
+py.extension_module(
+  '_pairwise_fast',
+  ['_pairwise_fast.pyx', utils_cython_tree, metrics_cython_tree],
+  dependencies: [np_dep],
+  cython_args: cython_args,
+  subdir: 'sklearn/metrics',
+  install: true
+)
+
+subdir('_pairwise_distances_reduction')
+subdir('cluster')
diff --git a/sklearn/neighbors/meson.build b/sklearn/neighbors/meson.build
new file mode 100644
index 0000000000000..55b1754c47e8f
--- /dev/null
+++ b/sklearn/neighbors/meson.build
@@ -0,0 +1,52 @@
+_binary_tree_pxi = custom_target(
+  '_binary_tree_pxi',
+  output: '_binary_tree.pxi',
+  input: '_binary_tree.pxi.tp',
+  command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@'],
+)
+
+# .pyx is generated so this is needed to make Cython compilation work. The pxi
+# file is included avoid "missing dependency paths" with ninja -t missindeps
+neighbors_cython_tree = [
+  fs.copyfile('__init__.py'),
+  fs.copyfile('_partition_nodes.pxd'),
+  _binary_tree_pxi,
+]
+
+name_list = ['_ball_tree', '_kd_tree']
+
+foreach name: name_list
+  pyx = custom_target(
+    name + '_pyx',
+    output: name + '.pyx',
+    input: name + '.pyx.tp',
+    command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@']
+  )
+  py.extension_module(
+    name,
+    [pyx, neighbors_cython_tree],
+    dependencies: [np_dep],
+    cython_args: cython_args,
+    subdir: 'sklearn/neighbors',
+    install: true
+)
+endforeach
+
+neighbors_extension_metadata = {
+  '_partition_nodes':
+      {'sources': ['_partition_nodes.pyx'],
+       'override_options': ['cython_language=cpp'], 'dependencies': [np_dep]},
+  '_quad_tree': {'sources': ['_quad_tree.pyx'], 'dependencies': [np_dep]},
+}
+
+foreach ext_name, ext_dict : neighbors_extension_metadata
+  py.extension_module(
+    ext_name,
+    ext_dict.get('sources'),
+    dependencies: ext_dict.get('dependencies'),
+    override_options : ext_dict.get('override_options', []),
+    cython_args: cython_args,
+    subdir: 'sklearn/neighbors',
+    install: true
+  )
+endforeach
diff --git a/sklearn/preprocessing/meson.build b/sklearn/preprocessing/meson.build
new file mode 100644
index 0000000000000..f0977ed4a5244
--- /dev/null
+++ b/sklearn/preprocessing/meson.build
@@ -0,0 +1,17 @@
+py.extension_module(
+  '_csr_polynomial_expansion',
+  '_csr_polynomial_expansion.pyx',
+  cython_args: cython_args,
+  subdir: 'sklearn/preprocessing',
+  install: true
+)
+
+py.extension_module(
+  '_target_encoder_fast',
+  '_target_encoder_fast.pyx',
+  dependencies: [np_dep],
+  override_options: ['cython_language=cpp'],
+  cython_args: cython_args,
+  subdir: 'sklearn/preprocessing',
+  install: true
+)
diff --git a/sklearn/svm/meson.build b/sklearn/svm/meson.build
new file mode 100644
index 0000000000000..74cbfe6b56814
--- /dev/null
+++ b/sklearn/svm/meson.build
@@ -0,0 +1,57 @@
+newrand_include = include_directories('src/newrand')
+libsvm_include = include_directories('src/libsvm')
+liblinear_include = include_directories('src/liblinear')
+
+_newrand = py.extension_module(
+  '_newrand',
+  '_newrand.pyx',
+  dependencies: [np_dep],
+  override_options: ['cython_language=cpp'],
+  include_directories: [newrand_include],
+  cython_args: cython_args,
+  subdir: 'sklearn/svm',
+  install: true
+)
+
+libsvm_skl = static_library(
+  'libsvm-skl',
+  ['src/libsvm/libsvm_template.cpp'],
+)
+
+py.extension_module(
+  '_libsvm',
+  ['_libsvm.pyx'],
+  dependencies: [np_dep],
+  include_directories: [newrand_include, libsvm_include],
+  link_with: libsvm_skl,
+  cython_args: cython_args,
+  subdir: 'sklearn/svm',
+  install: true
+)
+
+py.extension_module(
+  '_libsvm_sparse',
+  ['_libsvm_sparse.pyx'],
+  dependencies: [np_dep],
+  include_directories: [newrand_include, libsvm_include],
+  link_with: libsvm_skl,
+  cython_args: cython_args,
+  subdir: 'sklearn/svm',
+  install: true
+)
+
+liblinear_skl = static_library(
+  'liblinear-skl',
+  ['src/liblinear/linear.cpp', 'src/liblinear/tron.cpp'],
+)
+
+py.extension_module(
+  '_liblinear',
+  ['_liblinear.pyx'],
+  dependencies: [np_dep],
+  include_directories: [newrand_include, liblinear_include],
+  link_with: [liblinear_skl],
+  cython_args: cython_args,
+  subdir: 'sklearn/svm',
+  install: true
+)
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index cbd658f25ed28..aef5f5b587318 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -248,6 +248,13 @@ def test_root_import_all_completeness():
         assert modname in sklearn.__all__
 
 
+@pytest.mark.skipif(
+    sklearn._BUILT_WITH_MESON,
+    reason=(
+        "This test fails with Meson editable installs see"
+        " https://github.com/mesonbuild/meson-python/issues/557 for more details"
+    ),
+)
 def test_all_tests_are_importable():
     # Ensure that for each contentful subpackage, there is a test directory
     # within it that is also a subpackage (i.e. a directory with __init__.py)
diff --git a/sklearn/tree/meson.build b/sklearn/tree/meson.build
new file mode 100644
index 0000000000000..12c1ddcedea98
--- /dev/null
+++ b/sklearn/tree/meson.build
@@ -0,0 +1,26 @@
+tree_extension_metadata = {
+  '_tree':
+    {'sources': ['_tree.pyx'],
+     'override_options': ['cython_language=cpp', 'optimization=3']},
+  '_splitter':
+    {'sources': ['_splitter.pyx'],
+     'override_options': ['optimization=3']},
+  '_criterion':
+    {'sources': ['_criterion.pyx'],
+     'override_options': ['optimization=3']},
+  '_utils':
+    {'sources': ['_utils.pyx'],
+     'override_options': ['optimization=3']},
+}
+
+foreach ext_name, ext_dict : tree_extension_metadata
+  py.extension_module(
+    ext_name,
+    ext_dict.get('sources'),
+    dependencies: [np_dep],
+    override_options : ext_dict.get('override_options', []),
+    cython_args: cython_args,
+    subdir: 'sklearn/tree',
+    install: true
+  )
+endforeach
diff --git a/sklearn/utils/meson.build b/sklearn/utils/meson.build
new file mode 100644
index 0000000000000..4805949587e36
--- /dev/null
+++ b/sklearn/utils/meson.build
@@ -0,0 +1,73 @@
+# utils is cimported from other subpackages so this is needed for the cimport
+# to work
+utils_cython_tree = [
+  fs.copyfile('__init__.py'),
+  fs.copyfile('_cython_blas.pxd'),
+  fs.copyfile('_heap.pxd'),
+  fs.copyfile('_openmp_helpers.pxd'),
+  fs.copyfile('_random.pxd'),
+  fs.copyfile('_sorting.pxd'),
+  fs.copyfile('_typedefs.pxd'),
+  fs.copyfile('_vector_sentinel.pxd'),
+]
+
+utils_extension_metadata = {
+  'sparsefuncs_fast':
+    {'sources': ['sparsefuncs_fast.pyx'], 'dependencies': [np_dep]},
+  '_cython_blas': {'sources': ['_cython_blas.pyx']},
+  'arrayfuncs': {'sources': ['arrayfuncs.pyx']},
+  'murmurhash': {
+      'sources': ['murmurhash.pyx', 'src' / 'MurmurHash3.cpp'],
+      'dependencies': [np_dep]
+  },
+  '_fast_dict':
+    {'sources': ['_fast_dict.pyx'], 'override_options': ['cython_language=cpp']},
+  '_openmp_helpers': {'sources': ['_openmp_helpers.pyx'], 'dependencies': [openmp_dep]},
+  '_random': {'sources': ['_random.pyx'], 'dependencies': [np_dep]},
+  '_typedefs': {'sources': ['_typedefs.pyx']},
+  '_heap': {'sources': ['_heap.pyx']},
+  '_sorting': {'sources': ['_sorting.pyx']},
+  '_vector_sentinel':
+    {'sources': ['_vector_sentinel.pyx'], 'override_options': ['cython_language=cpp'],
+     'dependencies': [np_dep]},
+  '_isfinite': {'sources': ['_isfinite.pyx']},
+}
+
+foreach ext_name, ext_dict : utils_extension_metadata
+  py.extension_module(
+    ext_name,
+    ext_dict.get('sources') + utils_cython_tree,
+    dependencies: ext_dict.get('dependencies', []),
+    override_options : ext_dict.get('override_options', []),
+    cython_args: cython_args,
+    subdir: 'sklearn/utils',
+    install: true
+  )
+endforeach
+
+util_extension_names = ['_seq_dataset', '_weight_vector']
+
+foreach name: util_extension_names
+  pxd = custom_target(
+    name + '_pxd',
+    output: name + '.pxd',
+    input: name + '.pxd.tp',
+    command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@'],
+  )
+  utils_cython_tree += [pxd]
+
+  pyx = custom_target(
+    name + '_pyx',
+    output: name + '.pyx',
+    input: name + '.pyx.tp',
+    command: [py, tempita, '@INPUT@', '-o', '@OUTDIR@']
+  )
+  py.extension_module(
+    name,
+    [pxd, pyx] + utils_cython_tree,
+    dependencies: [np_dep],
+    cython_args: cython_args,
+    subdir: 'sklearn/utils',
+    install: true
+   )
+endforeach