Merge branch 'scikit-learn:main' into submodulev3

neurodata · Sep 26, 2023 · 6086c9c · 6086c9c
2 parents 679c9a2 + 457b02c
commit 6086c9c
Show file tree

Hide file tree

Showing 96 changed files with 1,803 additions and 647 deletions.
diff --git a/.gitignore b/.gitignore
@@ -99,6 +99,7 @@ sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd
 sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx
 sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd
 sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx
+sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors_classmode.pyx
 sklearn/neighbors/_ball_tree.pyx
 sklearn/neighbors/_binary_tree.pxi
 sklearn/neighbors/_kd_tree.pyx

diff --git a/SECURITY.md b/SECURITY.md
@@ -4,8 +4,8 @@
 
 | Version   | Supported          |
 | --------- | ------------------ |
-| 1.3.0     | :white_check_mark: |
-| < 1.3.0   | :x:                |
+| 1.3.1     | :white_check_mark: |
+| < 1.3.1   | :x:                |
 
 ## Reporting a Vulnerability
 

diff --git a/asv_benchmarks/asv.conf.json b/asv_benchmarks/asv.conf.json
@@ -72,13 +72,13 @@
     // followed by the pip installed packages).
     //
     // The versions of the dependencies should be bumped in a dedicated commit
-    // to easily identify regressions/imrovements due to code changes from
+    // to easily identify regressions/improvements due to code changes from
     // those due to dependency changes.
     //
     "matrix": {
         "numpy": ["1.25.2"],
         "scipy": ["1.11.2"],
-        "cython": ["0.29.36"],
+        "cython": ["3.0.2"],
         "joblib": ["1.3.2"],
         "threadpoolctl": ["3.2.0"],
         "pandas": ["2.1.0"]

diff --git a/build_tools/circle/doc_environment.yml b/build_tools/circle/doc_environment.yml
@@ -12,7 +12,7 @@ dependencies:
   - joblib
   - threadpoolctl
   - matplotlib
-  - pandas
+  - pandas<2.1
   - pyamg
   - pytest
   - pytest-xdist=2.5.0

diff --git a/build_tools/circle/doc_linux-64_conda.lock b/build_tools/circle/doc_linux-64_conda.lock
diff --git a/build_tools/generate_authors_table.py b/build_tools/generate_authors_table.py
@@ -89,11 +89,23 @@ def get_contributors():
         core_devs  # remove ogrisel from contributor_experience_team
     )
 
-    emeritus = members - core_devs - contributor_experience_team - comm_team
+    emeritus = (
+        members
+        - core_devs
+        - contributor_experience_team
+        - comm_team
+        - documentation_team
+    )
 
     # hard coded
+    emeritus_contributor_experience_team = {
+        "cmarmo",
+    }
     emeritus_comm_team = {"reshamas"}
 
+    # Up-to-now, we can subtract the team emeritus from the original emeritus
+    emeritus -= emeritus_contributor_experience_team | emeritus_comm_team
+
     comm_team -= {"reshamas"}  # in the comm team but not on the web page
 
     # get profiles from GitHub
@@ -102,6 +114,9 @@ def get_contributors():
     contributor_experience_team = [
         get_profile(login) for login in contributor_experience_team
     ]
+    emeritus_contributor_experience_team = [
+        get_profile(login) for login in emeritus_contributor_experience_team
+    ]
     comm_team = [get_profile(login) for login in comm_team]
     emeritus_comm_team = [get_profile(login) for login in emeritus_comm_team]
     documentation_team = [get_profile(login) for login in documentation_team]
@@ -110,6 +125,9 @@ def get_contributors():
     core_devs = sorted(core_devs, key=key)
     emeritus = sorted(emeritus, key=key)
     contributor_experience_team = sorted(contributor_experience_team, key=key)
+    emeritus_contributor_experience_team = sorted(
+        emeritus_contributor_experience_team, key=key
+    )
     documentation_team = sorted(documentation_team, key=key)
     comm_team = sorted(comm_team, key=key)
     emeritus_comm_team = sorted(emeritus_comm_team, key=key)
@@ -118,6 +136,7 @@ def get_contributors():
         core_devs,
         emeritus,
         contributor_experience_team,
+        emeritus_contributor_experience_team,
         comm_team,
         emeritus_comm_team,
         documentation_team,
@@ -188,6 +207,7 @@ def generate_list(contributors):
         core_devs,
         emeritus,
         contributor_experience_team,
+        emeritus_contributor_experience_team,
         comm_team,
         emeritus_comm_team,
         documentation_team,
@@ -206,6 +226,13 @@ def generate_list(contributors):
     ) as rst_file:
         rst_file.write(generate_table(contributor_experience_team))
 
+    with open(
+        REPO_FOLDER / "doc" / "contributor_experience_team_emeritus.rst",
+        "w+",
+        encoding="utf-8",
+    ) as rst_file:
+        rst_file.write(generate_list(emeritus_contributor_experience_team))
+
     with open(
         REPO_FOLDER / "doc" / "communication_team.rst", "w+", encoding="utf-8"
     ) as rst_file:

diff --git a/build_tools/update_environments_and_lock_files.py b/build_tools/update_environments_and_lock_files.py
@@ -323,6 +323,9 @@ def remove_from(alist, to_remove):
             # Regression have been observed with Cython>=3.0.0.
             # See: https://github.com/scikit-learn/scikit-learn/issues/27086
             "cython": "<3.0.0",
+            # seaborn 0.12.2 raises deprecation warnings appearing in the documentation
+            # We should remove this constraint when seaborn 0.13 is released
+            "pandas": "<2.1",
         },
     },
     {

diff --git a/doc/authors_emeritus.rst b/doc/authors_emeritus.rst
@@ -20,7 +20,6 @@
 - Wei Li
 - Paolo Losi
 - Gilles Louppe
-- Chiara Marmo
 - Vincent Michel
 - Jarrod Millman
 - Alexandre Passos

diff --git a/doc/computing/parallelism.rst b/doc/computing/parallelism.rst
@@ -114,7 +114,7 @@ using environment variables, namely:
 Note that BLAS & LAPACK implementations can also be impacted by
 `OMP_NUM_THREADS`. To check whether this is the case in your environment,
 you can inspect how the number of threads effectively used by those libraries
-is affected when running the the following command in a bash or zsh terminal
+is affected when running the following command in a bash or zsh terminal
 for different values of `OMP_NUM_THREADS`::
 
 .. prompt:: bash $

diff --git a/doc/conf.py b/doc/conf.py
@@ -701,7 +701,6 @@ def setup(app):
     ),
 )
 
-
 # maps functions with a class name that is indistinguishable when case is
 # ignore to another filename
 autosummary_filename_map = {

diff --git a/doc/contributor_experience_team.rst b/doc/contributor_experience_team.rst
@@ -6,10 +6,6 @@
       img.avatar {border-radius: 10px;}
     </style>
     <div>
-    <a href='https://github.com/ArturoAmorQ'><img src='https://avatars.githubusercontent.com/u/86408019?v=4' class='avatar' /></a> <br />
-    <p>Arturo Amor</p>
-    </div>
-    <div>
     <a href='https://github.com/alfaro96'><img src='https://avatars.githubusercontent.com/u/32649176?v=4' class='avatar' /></a> <br />
     <p>Juan Carlos Alfaro Jiménez</p>
     </div>

diff --git a/doc/jupyter-lite.json b/doc/jupyter-lite.json
@@ -3,7 +3,7 @@
   "jupyter-config-data": {
     "litePluginSettings": {
       "@jupyterlite/pyodide-kernel-extension:kernel": {
-        "pyodideUrl": "https://cdn.jsdelivr.net/pyodide/v0.23.4/full/pyodide.js"
+        "pyodideUrl": "https://cdn.jsdelivr.net/pyodide/v0.24.0/full/pyodide.js"
       }
     }
   }

diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst
@@ -10,9 +10,11 @@ Metadata Routing
 
 .. note::
   The Metadata Routing API is experimental, and is not implemented yet for many
-  estimators. It may change without the usual deprecation cycle. By default
-  this feature is not enabled. You can enable this feature  by setting the
-  ``enable_metadata_routing`` flag to ``True``:
+  estimators. Please refer to the :ref:`list of supported and unsupported
+  models <metadata_routing_models>` for more information. It may change without
+  the usual deprecation cycle. By default this feature is not enabled. You can
+  enable this feature  by setting the ``enable_metadata_routing`` flag to
+  ``True``::
 
     >>> import sklearn
     >>> sklearn.set_config(enable_metadata_routing=True)
@@ -230,3 +232,72 @@ The issue can be fixed by explicitly setting the request value::
     >>> lr = LogisticRegression().set_fit_request(
     ...     sample_weight=True
     ... ).set_score_request(sample_weight=False)
+
+At the end we disable the configuration flag for metadata routing::
+
+    >>> sklearn.set_config(enable_metadata_routing=False)
+
+.. _metadata_routing_models:
+
+Metadata Routing Support Status
+*******************************
+All consumers (i.e. simple estimators which only consume metadata and don't
+route them) support metadata routing, meaning they can be used inside
+meta-estimators which support metadata routing. However, development of support
+for metadata routing for meta-estimators is in progress, and here is a list of
+meta-estimators and tools which support and don't yet support metadata routing.
+
+
+Meta-estimators and functions supporting metadata routing:
+
+- :class:`sklearn.calibration.CalibratedClassifierCV`
+- :class:`sklearn.compose.ColumnTransformer`
+- :class:`sklearn.linear_model.LogisticRegressionCV`
+- :class:`sklearn.model_selection.GridSearchCV`
+- :class:`sklearn.model_selection.HalvingGridSearchCV`
+- :class:`sklearn.model_selection.HalvingRandomSearchCV`
+- :class:`sklearn.model_selection.RandomizedSearchCV`
+- :func:`sklearn.model_selection.cross_validate`
+- :func:`sklearn.model_selection.cross_val_score`
+- :func:`sklearn.model_selection.cross_val_predict`
+- :class:`sklearn.multioutput.ClassifierChain`
+- :class:`sklearn.multioutput.MultiOutputClassifier`
+- :class:`sklearn.multioutput.MultiOutputRegressor`
+- :class:`sklearn.multioutput.RegressorChain`
+- :class:`sklearn.pipeline.Pipeline`
+
+Meta-estimators and tools not supporting metadata routing yet:
+
+- :class:`sklearn.compose.TransformedTargetRegressor`
+- :class:`sklearn.covariance.GraphicalLassoCV`
+- :class:`sklearn.ensemble.AdaBoostClassifier`
+- :class:`sklearn.ensemble.AdaBoostRegressor`
+- :class:`sklearn.ensemble.BaggingClassifier`
+- :class:`sklearn.ensemble.BaggingRegressor`
+- :class:`sklearn.ensemble.StackingClassifier`
+- :class:`sklearn.ensemble.StackingRegressor`
+- :class:`sklearn.ensemble.VotingClassifier`
+- :class:`sklearn.ensemble.VotingRegressor`
+- :class:`sklearn.feature_selection.RFE`
+- :class:`sklearn.feature_selection.RFECV`
+- :class:`sklearn.feature_selection.SelectFromModel`
+- :class:`sklearn.feature_selection.SequentialFeatureSelector`
+- :class:`sklearn.impute.IterativeImputer`
+- :class:`sklearn.linear_model.ElasticNetCV`
+- :class:`sklearn.linear_model.LarsCV`
+- :class:`sklearn.linear_model.LassoCV`
+- :class:`sklearn.linear_model.LassoLarsCV`
+- :class:`sklearn.linear_model.MultiTaskElasticNetCV`
+- :class:`sklearn.linear_model.MultiTaskLassoCV`
+- :class:`sklearn.linear_model.OrthogonalMatchingPursuitCV`
+- :class:`sklearn.linear_model.RANSACRegressor`
+- :class:`sklearn.linear_model.RidgeClassifierCV`
+- :class:`sklearn.linear_model.RidgeCV`
+- :class:`sklearn.model_selection.learning_curve`
+- :class:`sklearn.model_selection.permutation_test_score`
+- :class:`sklearn.model_selection.validation_curve`
+- :class:`sklearn.multiclass.OneVsOneClassifier`
+- :class:`sklearn.multiclass.OneVsRestClassifier`
+- :class:`sklearn.multiclass.OutputCodeClassifier`
+- :class:`sklearn.pipeline.FeatureUnion`
+- :class:`sklearn.semi_supervised.SelfTrainingClassifier`
diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
@@ -1780,7 +1780,7 @@ mean of homogeneity and completeness**:
    measure <https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
    Andrew Rosenberg and Julia Hirschberg, 2007
 
- .. [B2011] `Identication and Characterization of Events in Social Media
+ .. [B2011] `Identification and Characterization of Events in Social Media
    <http://www.cs.columbia.edu/~hila/hila-thesis-distributed.pdf>`_, Hila
    Becker, PhD Thesis.
 
@@ -1800,7 +1800,7 @@ Where ``TP`` is the number of **True Positive** (i.e. the number of pair
 of points that belong to the same clusters in both the true labels and the
 predicted labels), ``FP`` is the number of **False Positive** (i.e. the number
 of pair of points that belong to the same clusters in the true labels and not
-in the predicted labels) and ``FN`` is the number of **False Negative** (i.e the
+in the predicted labels) and ``FN`` is the number of **False Negative** (i.e. the
 number of pair of points that belongs in the same clusters in the predicted
 labels and not in the true labels).
-Original file line number
+Diff line change
@@ Expand Up / @@ -701,7 +701,6 @@ def setup(app): @@
         ),
     )
     # maps functions with a class name that is indistinguishable when case is
     # ignore to another filename
     autosummary_filename_map = {
@@ Expand Down @@