From deefca2f18bc6c22b2e8700f364a1c9d4bd34cc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= <34657725+jeremiedbb@users.noreply.github.com> Date: Wed, 27 Sep 2023 15:32:06 +0200 Subject: [PATCH 01/15] MAINT Explicit cython options for better discovery (#27480) --- doc/developers/cython.rst | 2 +- sklearn/_build_utils/__init__.py | 2 ++ sklearn/_loss/_loss.pxd | 2 -- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/developers/cython.rst b/doc/developers/cython.rst index 0c319eda4a08d..8558169848052 100644 --- a/doc/developers/cython.rst +++ b/doc/developers/cython.rst @@ -63,7 +63,7 @@ Tips to ease development # You might want to add this alias to your shell script config. alias cythonX="cython -X language_level=3 -X boundscheck=False -X wraparound=False -X initializedcheck=False -X nonecheck=False -X cdivision=True" - # This generates `source.c` as as if you had recompiled scikit-learn entirely. + # This generates `source.c` as if you had recompiled scikit-learn entirely. cythonX --annotate source.pyx * Using the ``--annotate`` option with this flag allows generating a HTML report of code annotation. diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py index 056215e162647..a8ced8aa9d292 100644 --- a/sklearn/_build_utils/__init__.py +++ b/sklearn/_build_utils/__init__.py @@ -75,12 +75,14 @@ def cythonize_extensions(extension): "initializedcheck": False, "nonecheck": False, "cdivision": True, + "profile": False, } return cythonize( extension, nthreads=n_jobs, compiler_directives=compiler_directives, + annotate=False, ) diff --git a/sklearn/_loss/_loss.pxd b/sklearn/_loss/_loss.pxd index 69bef42b9ed6e..f38cbe0badc96 100644 --- a/sklearn/_loss/_loss.pxd +++ b/sklearn/_loss/_loss.pxd @@ -1,5 +1,3 @@ -# cython: language_level=3 - # Fused types for input like y_true, raw_prediction, sample_weights. ctypedef fused floating_in: double From 32f8bdacfdfdb77d1bc5e5106524bbd0199213f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 27 Sep 2023 15:33:22 +0200 Subject: [PATCH 02/15] MAINT Update Pyodide to 0.24.1 for JupyterLite button (#27474) --- doc/jupyter-lite.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/jupyter-lite.json b/doc/jupyter-lite.json index d5fb63ceb4a84..05a02b1080fa4 100644 --- a/doc/jupyter-lite.json +++ b/doc/jupyter-lite.json @@ -3,7 +3,7 @@ "jupyter-config-data": { "litePluginSettings": { "@jupyterlite/pyodide-kernel-extension:kernel": { - "pyodideUrl": "https://cdn.jsdelivr.net/pyodide/v0.24.0/full/pyodide.js" + "pyodideUrl": "https://cdn.jsdelivr.net/pyodide/v0.24.1/full/pyodide.js" } } } From 38a1e64bbe42a7b66a77835e46e5f7725bccd18b Mon Sep 17 00:00:00 2001 From: Naoise Holohan <51835109+naoise-h@users.noreply.github.com> Date: Wed, 27 Sep 2023 17:07:49 +0100 Subject: [PATCH 03/15] FIX Param validation Interval error for large integers (#26648) Co-authored-by: jeremie du boisberranger --- doc/whats_new/v1.4.rst | 4 +++ sklearn/utils/__init__.py | 6 +++- sklearn/utils/_param_validation.py | 6 ++-- sklearn/utils/tests/test_param_validation.py | 34 +++++++++++++++++--- 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 92cf1af0523ae..d4f92548ba0ac 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -29,6 +29,10 @@ Changes impacting all modules to work with our estimators and functions. :pr:`26464` by `Thomas Fan`_. +- |Fix| Fixed a bug in most estimators and functions where setting a parameter to + a large integer would cause a `TypeError`. + :pr:`26648` by :user:`Naoise Holohan `. + Metadata Routing ---------------- diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index f8b4d2042223c..44d49abb1304b 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -1094,7 +1094,11 @@ def is_scalar_nan(x): >>> is_scalar_nan([np.nan]) False """ - return isinstance(x, numbers.Real) and math.isnan(x) + return ( + not isinstance(x, numbers.Integral) + and isinstance(x, numbers.Real) + and math.isnan(x) + ) def _approximate_mode(class_counts, n_draws, rng): diff --git a/sklearn/utils/_param_validation.py b/sklearn/utils/_param_validation.py index 0e30627ab06cc..11c1d60481992 100644 --- a/sklearn/utils/_param_validation.py +++ b/sklearn/utils/_param_validation.py @@ -311,7 +311,9 @@ class _NanConstraint(_Constraint): """Constraint representing the indicator `np.nan`.""" def is_satisfied_by(self, val): - return isinstance(val, Real) and math.isnan(val) + return ( + not isinstance(val, Integral) and isinstance(val, Real) and math.isnan(val) + ) def __str__(self): return "numpy.nan" @@ -475,7 +477,7 @@ def _check_params(self): ) def __contains__(self, val): - if np.isnan(val): + if not isinstance(val, Integral) and np.isnan(val): return False left_cmp = operator.lt if self.closed in ("left", "both") else operator.le diff --git a/sklearn/utils/tests/test_param_validation.py b/sklearn/utils/tests/test_param_validation.py index 3fd46a5d568f6..3894a24297cd9 100644 --- a/sklearn/utils/tests/test_param_validation.py +++ b/sklearn/utils/tests/test_param_validation.py @@ -74,16 +74,41 @@ def fit(self, X=None, y=None): def test_interval_range(interval_type): """Check the range of values depending on closed.""" interval = Interval(interval_type, -2, 2, closed="left") - assert -2 in interval and 2 not in interval + assert -2 in interval + assert 2 not in interval interval = Interval(interval_type, -2, 2, closed="right") - assert -2 not in interval and 2 in interval + assert -2 not in interval + assert 2 in interval interval = Interval(interval_type, -2, 2, closed="both") - assert -2 in interval and 2 in interval + assert -2 in interval + assert 2 in interval interval = Interval(interval_type, -2, 2, closed="neither") - assert -2 not in interval and 2 not in interval + assert -2 not in interval + assert 2 not in interval + + +@pytest.mark.parametrize("interval_type", [Integral, Real]) +def test_interval_large_integers(interval_type): + """Check that Interval constraint work with large integers. + + non-regression test for #26648. + """ + interval = Interval(interval_type, 0, 2, closed="neither") + assert 2**65 not in interval + assert 2**128 not in interval + assert float(2**65) not in interval + assert float(2**128) not in interval + + interval = Interval(interval_type, 0, 2**128, closed="neither") + assert 2**65 in interval + assert 2**128 not in interval + assert float(2**65) in interval + assert float(2**128) not in interval + + assert 2**1024 not in interval def test_interval_inf_in_bounds(): @@ -389,6 +414,7 @@ def test_generate_valid_param(constraint): ("verbose", 1), (MissingValues(), -1), (MissingValues(), -1.0), + (MissingValues(), 2**1028), (MissingValues(), None), (MissingValues(), float("nan")), (MissingValues(), np.nan), From b06a099aad48d175591c2a1cfed75fb94b74d010 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 28 Sep 2023 03:43:35 -0400 Subject: [PATCH 04/15] TST Improve `assert_argkmin_results_quasi_equality` error message (#27281) Co-authored-by: Olivier Grisel Co-authored-by: Guillaume Lemaitre --- .../test_pairwise_distances_reduction.py | 749 ++++++++++-------- sklearn/neighbors/tests/test_neighbors.py | 37 +- 2 files changed, 464 insertions(+), 322 deletions(-) diff --git a/sklearn/metrics/tests/test_pairwise_distances_reduction.py b/sklearn/metrics/tests/test_pairwise_distances_reduction.py index f9b4b9fb242fe..75f497315ff01 100644 --- a/sklearn/metrics/tests/test_pairwise_distances_reduction.py +++ b/sklearn/metrics/tests/test_pairwise_distances_reduction.py @@ -1,15 +1,14 @@ import itertools import re import warnings -from collections import defaultdict -from math import floor, log10 +from functools import partial import numpy as np import pytest import threadpoolctl from scipy.spatial.distance import cdist -from sklearn.metrics import euclidean_distances +from sklearn.metrics import euclidean_distances, pairwise_distances from sklearn.metrics._pairwise_distances_reduction import ( ArgKmin, ArgKminClassMode, @@ -66,144 +65,194 @@ def _get_metric_params_list(metric: str, n_features: int, seed: int = 1): return [{}] -def assert_argkmin_results_equality(ref_dist, dist, ref_indices, indices, rtol=1e-7): - assert_array_equal( - ref_indices, - indices, - err_msg="Query vectors have different neighbors' indices", - ) - assert_allclose( - ref_dist, - dist, - err_msg="Query vectors have different neighbors' distances", - rtol=rtol, - ) - +def assert_same_distances_for_common_neighbors( + query_idx, + dist_row_a, + dist_row_b, + indices_row_a, + indices_row_b, + rtol, + atol, +): + """Check that the distances of common neighbors are equal up to tolerance. -def relative_rounding(scalar, n_significant_digits): - """Round a scalar to a number of significant digits relatively to its value.""" - if scalar == 0: - return 0.0 - magnitude = int(floor(log10(abs(scalar)))) + 1 - return round(scalar, n_significant_digits - magnitude) + This does not check if there are missing neighbors in either result set. + Missingness is handled by assert_no_missing_neighbors. + """ + # Compute a mapping from indices to distances for each result set and + # check that the computed neighbors with matching indices are within + # the expected distance tolerance. + indices_to_dist_a = dict(zip(indices_row_a, dist_row_a)) + indices_to_dist_b = dict(zip(indices_row_b, dist_row_b)) + + common_indices = set(indices_row_a).intersection(set(indices_row_b)) + for idx in common_indices: + dist_a = indices_to_dist_a[idx] + dist_b = indices_to_dist_b[idx] + try: + assert_allclose(dist_a, dist_b, rtol=rtol, atol=atol) + except AssertionError as e: + # Wrap exception to provide more context while also including + # the original exception with the computed absolute and + # relative differences. + raise AssertionError( + f"Query vector with index {query_idx} lead to different distances" + f" for common neighbor with index {idx}:" + f" dist_a={dist_a} vs dist_b={dist_b} (with atol={atol} and" + f" rtol={rtol})" + ) from e + + +def assert_no_missing_neighbors( + query_idx, + dist_row_a, + dist_row_b, + indices_row_a, + indices_row_b, + threshold, +): + """Compare the indices of neighbors in two results sets. + Any neighbor index with a distance below the precision threshold should + match one in the other result set. We ignore the last few neighbors beyond + the threshold as those can typically be missing due to rounding errors. -def test_relative_rounding(): - assert relative_rounding(0, 1) == 0.0 - assert relative_rounding(0, 10) == 0.0 - assert relative_rounding(0, 123456) == 0.0 + For radius queries, the threshold is just the radius minus the expected + precision level. - assert relative_rounding(123456789, 0) == 0 - assert relative_rounding(123456789, 2) == 120000000 - assert relative_rounding(123456789, 3) == 123000000 - assert relative_rounding(123456789, 10) == 123456789 - assert relative_rounding(123456789, 20) == 123456789 + For k-NN queries, it is the maxium distance to the k-th neighbor minus the + expected precision level. + """ + mask_a = dist_row_a < threshold + mask_b = dist_row_b < threshold + missing_from_b = np.setdiff1d(indices_row_a[mask_a], indices_row_b) + missing_from_a = np.setdiff1d(indices_row_b[mask_b], indices_row_a) + if len(missing_from_a) > 0 or len(missing_from_b) > 0: + raise AssertionError( + f"Query vector with index {query_idx} lead to mismatched result indices:\n" + f"neighors in b missing from a: {missing_from_a}\n" + f"neighors in a missing from b: {missing_from_b}\n" + f"dist_row_a={dist_row_a}\n" + f"dist_row_b={dist_row_b}\n" + f"indices_row_a={indices_row_a}\n" + f"indices_row_b={indices_row_b}\n" + ) - assert relative_rounding(1.23456789, 2) == 1.2 - assert relative_rounding(1.23456789, 3) == 1.23 - assert relative_rounding(1.23456789, 10) == 1.23456789 - assert relative_rounding(123.456789, 3) == 123.0 - assert relative_rounding(123.456789, 9) == 123.456789 - assert relative_rounding(123.456789, 10) == 123.456789 +def assert_compatible_argkmin_results( + neighbors_dists_a, + neighbors_dists_b, + neighbors_indices_a, + neighbors_indices_b, + rtol=1e-5, + atol=1e-6, +): + """Assert that argkmin results are valid up to rounding errors. + This function asserts that the results of argkmin queries are valid up to: + - rounding error tolerance on distance values; + - permutations of indices for distances values that differ up to the + expected precision level. -def assert_argkmin_results_quasi_equality( - ref_dist, - dist, - ref_indices, - indices, - rtol=1e-4, -): - """Assert that argkmin results are valid up to: - - relative tolerance on computed distance values - - permutations of indices for distances values that differ up to - a precision level + Furthermore, the distances must be sorted. - To be used for testing neighbors queries on float32 datasets: we - accept neighbors rank swaps only if they are caused by small - rounding errors on the distance computations. + To be used for testing neighbors queries on float32 datasets: we accept + neighbors rank swaps only if they are caused by small rounding errors on + the distance computations. """ is_sorted = lambda a: np.all(a[:-1] <= a[1:]) - n_significant_digits = -(int(floor(log10(abs(rtol)))) + 1) - assert ( - ref_dist.shape == dist.shape == ref_indices.shape == indices.shape - ), "Arrays of results have various shapes." + neighbors_dists_a.shape + == neighbors_dists_b.shape + == neighbors_indices_a.shape + == neighbors_indices_b.shape + ), "Arrays of results have incompatible shapes." - n_queries, n_neighbors = ref_dist.shape + n_queries, _ = neighbors_dists_a.shape # Asserting equality results one row at a time for query_idx in range(n_queries): - ref_dist_row = ref_dist[query_idx] - dist_row = dist[query_idx] - - assert is_sorted( - ref_dist_row - ), f"Reference distances aren't sorted on row {query_idx}" - assert is_sorted(dist_row), f"Distances aren't sorted on row {query_idx}" - - assert_allclose(ref_dist_row, dist_row, rtol=rtol) - - ref_indices_row = ref_indices[query_idx] - indices_row = indices[query_idx] - - # Grouping indices by distances using sets on a rounded distances up - # to a given number of decimals of significant digits derived from rtol. - reference_neighbors_groups = defaultdict(set) - effective_neighbors_groups = defaultdict(set) + dist_row_a = neighbors_dists_a[query_idx] + dist_row_b = neighbors_dists_b[query_idx] + indices_row_a = neighbors_indices_a[query_idx] + indices_row_b = neighbors_indices_b[query_idx] + + assert is_sorted(dist_row_a), f"Distances aren't sorted on row {query_idx}" + assert is_sorted(dist_row_b), f"Distances aren't sorted on row {query_idx}" + + assert_same_distances_for_common_neighbors( + query_idx, + dist_row_a, + dist_row_b, + indices_row_a, + indices_row_b, + rtol, + atol, + ) - for neighbor_rank in range(n_neighbors): - rounded_dist = relative_rounding( - ref_dist_row[neighbor_rank], - n_significant_digits=n_significant_digits, - ) - reference_neighbors_groups[rounded_dist].add(ref_indices_row[neighbor_rank]) - effective_neighbors_groups[rounded_dist].add(indices_row[neighbor_rank]) - - # Asserting equality of groups (sets) for each distance - msg = ( - f"Neighbors indices for query {query_idx} are not matching " - f"when rounding distances at {n_significant_digits} significant digits " - f"derived from rtol={rtol:.1e}" + # Check that any neighbor with distances below the rounding error + # threshold have matching indices. The threshold is the distance to the + # k-th neighbors minus the expected precision level: + # + # (1 - rtol) * dist_k - atol + # + # Where dist_k is defined as the maxium distance to the kth-neighbor + # among the two result sets. This way of defining the threshold is + # stricter than taking the minimum of the two. + threshold = (1 - rtol) * np.maximum( + np.max(dist_row_a), np.max(dist_row_b) + ) - atol + assert_no_missing_neighbors( + query_idx, + dist_row_a, + dist_row_b, + indices_row_a, + indices_row_b, + threshold, ) - for rounded_distance in reference_neighbors_groups.keys(): - assert ( - reference_neighbors_groups[rounded_distance] - == effective_neighbors_groups[rounded_distance] - ), msg -def assert_radius_neighbors_results_equality( - ref_dist, dist, ref_indices, indices, radius +def _non_trivial_radius( + *, + X=None, + Y=None, + metric=None, + precomputed_dists=None, + expected_n_neighbors=10, + n_subsampled_queries=10, + **metric_kwargs, ): - # We get arrays of arrays and we need to check for individual pairs - for i in range(ref_dist.shape[0]): - assert (ref_dist[i] <= radius).all() - assert_array_equal( - ref_indices[i], - indices[i], - err_msg=f"Query vector #{i} has different neighbors' indices", - ) - assert_allclose( - ref_dist[i], - dist[i], - err_msg=f"Query vector #{i} has different neighbors' distances", - rtol=1e-7, - ) + # Find a non-trivial radius using a small subsample of the pairwise + # distances between X and Y: we want to return around expected_n_neighbors + # on average. Yielding too many results would make the test slow (because + # checking the results is expensive for large result sets), yielding 0 most + # of the time would make the test useless. + if precomputed_dists is None and metric is None: + raise ValueError("Either metric or dists must be provided") + if precomputed_dists is None: + assert X is not None + assert Y is not None + sampled_dists = pairwise_distances(X, Y, metric=metric, **metric_kwargs) + else: + sampled_dists = precomputed_dists[:n_subsampled_queries].copy() + sampled_dists.sort(axis=1) + return sampled_dists[:, expected_n_neighbors].mean() -def assert_radius_neighbors_results_quasi_equality( - ref_dist, - dist, - ref_indices, - indices, +def assert_compatible_radius_results( + neighbors_dists_a, + neighbors_dists_b, + neighbors_indices_a, + neighbors_indices_b, radius, - rtol=1e-4, + check_sorted=True, + rtol=1e-5, + atol=1e-6, ): """Assert that radius neighborhood results are valid up to: - - relative tolerance on computed distance values + + - relative and absolute tolerance on computed distance values - permutations of indices for distances values that differ up to a precision level - missing or extra last elements if their distance is @@ -217,101 +266,92 @@ def assert_radius_neighbors_results_quasi_equality( """ is_sorted = lambda a: np.all(a[:-1] <= a[1:]) - n_significant_digits = -(int(floor(log10(abs(rtol)))) + 1) - assert ( - len(ref_dist) == len(dist) == len(ref_indices) == len(indices) - ), "Arrays of results have various lengths." + len(neighbors_dists_a) + == len(neighbors_dists_b) + == len(neighbors_indices_a) + == len(neighbors_indices_b) + ) - n_queries = len(ref_dist) + n_queries = len(neighbors_dists_a) # Asserting equality of results one vector at a time for query_idx in range(n_queries): - ref_dist_row = ref_dist[query_idx] - dist_row = dist[query_idx] - - assert is_sorted( - ref_dist_row - ), f"Reference distances aren't sorted on row {query_idx}" - assert is_sorted(dist_row), f"Distances aren't sorted on row {query_idx}" - - # Vectors' lengths might be different due to small - # numerical differences of distance w.r.t the `radius` threshold. - largest_row = ref_dist_row if len(ref_dist_row) > len(dist_row) else dist_row - - # For the longest distances vector, we check that last extra elements - # that aren't present in the other vector are all in: [radius ± rtol] - min_length = min(len(ref_dist_row), len(dist_row)) - last_extra_elements = largest_row[min_length:] - if last_extra_elements.size > 0: - assert np.all(radius - rtol <= last_extra_elements <= radius + rtol), ( - f"The last extra elements ({last_extra_elements}) aren't in [radius ±" - f" rtol]=[{radius} ± {rtol}]" + dist_row_a = neighbors_dists_a[query_idx] + dist_row_b = neighbors_dists_b[query_idx] + indices_row_a = neighbors_indices_a[query_idx] + indices_row_b = neighbors_indices_b[query_idx] + + if check_sorted: + assert is_sorted(dist_row_a), f"Distances aren't sorted on row {query_idx}" + assert is_sorted(dist_row_b), f"Distances aren't sorted on row {query_idx}" + + assert len(dist_row_a) == len(indices_row_a) + assert len(dist_row_b) == len(indices_row_b) + + # Check that all distances are within the requested radius + if len(dist_row_a) > 0: + max_dist_a = np.max(dist_row_a) + assert max_dist_a <= radius, ( + f"Largest returned distance {max_dist_a} not within requested" + f" radius {radius} on row {query_idx}" + ) + if len(dist_row_b) > 0: + max_dist_b = np.max(dist_row_b) + assert max_dist_b <= radius, ( + f"Largest returned distance {max_dist_b} not within requested" + f" radius {radius} on row {query_idx}" ) - # We truncate the neighbors results list on the smallest length to - # be able to compare them, ignoring the elements checked above. - ref_dist_row = ref_dist_row[:min_length] - dist_row = dist_row[:min_length] - - assert_allclose(ref_dist_row, dist_row, rtol=rtol) - - ref_indices_row = ref_indices[query_idx] - indices_row = indices[query_idx] - - # Grouping indices by distances using sets on a rounded distances up - # to a given number of significant digits derived from rtol. - reference_neighbors_groups = defaultdict(set) - effective_neighbors_groups = defaultdict(set) + assert_same_distances_for_common_neighbors( + query_idx, + dist_row_a, + dist_row_b, + indices_row_a, + indices_row_b, + rtol, + atol, + ) - for neighbor_rank in range(min_length): - rounded_dist = relative_rounding( - ref_dist_row[neighbor_rank], - n_significant_digits=n_significant_digits, - ) - reference_neighbors_groups[rounded_dist].add(ref_indices_row[neighbor_rank]) - effective_neighbors_groups[rounded_dist].add(indices_row[neighbor_rank]) - - # Asserting equality of groups (sets) for each distance - msg = ( - f"Neighbors indices for query {query_idx} are not matching " - f"when rounding distances at {n_significant_digits} significant digits " - f"derived from rtol={rtol:.1e}" + threshold = (1 - rtol) * radius - atol + assert_no_missing_neighbors( + query_idx, + dist_row_a, + dist_row_b, + indices_row_a, + indices_row_b, + threshold, ) - for rounded_distance in reference_neighbors_groups.keys(): - assert ( - reference_neighbors_groups[rounded_distance] - == effective_neighbors_groups[rounded_distance] - ), msg +FLOAT32_TOLS = { + "atol": 1e-7, + "rtol": 1e-5, +} +FLOAT64_TOLS = { + "atol": 1e-9, + "rtol": 1e-7, +} ASSERT_RESULT = { - # In the case of 64bit, we test for exact equality of the results rankings - # and standard tolerance levels for the computed distance values. - # - # XXX: Note that in the future we might be interested in using quasi equality - # checks also for float64 data (with a larger number of significant digits) - # as the tests could be unstable because of numerically tied distances on - # some datasets (e.g. uniform grids). - (ArgKmin, np.float64): assert_argkmin_results_equality, + (ArgKmin, np.float64): partial(assert_compatible_argkmin_results, **FLOAT64_TOLS), + (ArgKmin, np.float32): partial(assert_compatible_argkmin_results, **FLOAT32_TOLS), ( RadiusNeighbors, np.float64, - ): assert_radius_neighbors_results_equality, - # In the case of 32bit, indices can be permuted due to small difference - # in the computations of their associated distances, hence we test equality of - # results up to valid permutations. - (ArgKmin, np.float32): assert_argkmin_results_quasi_equality, + ): partial(assert_compatible_radius_results, **FLOAT64_TOLS), ( RadiusNeighbors, np.float32, - ): assert_radius_neighbors_results_quasi_equality, + ): partial(assert_compatible_radius_results, **FLOAT32_TOLS), } -def test_assert_argkmin_results_quasi_equality(): - rtol = 1e-7 - eps = 1e-7 +def test_assert_compatible_argkmin_results(): + atol = 1e-7 + rtol = 0.0 + tols = dict(atol=atol, rtol=rtol) + + eps = atol / 3 _1m = 1.0 - eps _1p = 1.0 + eps @@ -332,72 +372,128 @@ def test_assert_argkmin_results_quasi_equality(): ) # Sanity check: compare the reference results to themselves. - assert_argkmin_results_quasi_equality( + assert_compatible_argkmin_results( ref_dist, ref_dist, ref_indices, ref_indices, rtol ) - # Apply valid permutation on indices: the last 3 points are - # all very close to one another so we accept any permutation - # on their rankings. - assert_argkmin_results_quasi_equality( + # Apply valid permutation on indices: the last 3 points are all very close + # to one another so we accept any permutation on their rankings. + assert_compatible_argkmin_results( + np.array([[1.2, 2.5, _6_1m, 6.1, _6_1p]]), np.array([[1.2, 2.5, _6_1m, 6.1, _6_1p]]), - np.array([[1.2, 2.5, 6.1, 6.1, 6.1]]), np.array([[1, 2, 3, 4, 5]]), - np.array([[1, 2, 4, 5, 3]]), - rtol=rtol, + np.array([[1, 2, 5, 4, 3]]), + **tols, ) - # All points are have close distances so any ranking permutation + + # The last few indices do not necessarily have to match because of the rounding + # errors on the distances: there could be tied results at the boundary. + assert_compatible_argkmin_results( + np.array([[1.2, 2.5, 3.0, 6.1, _6_1p]]), + np.array([[1.2, 2.5, 3.0, _6_1m, 6.1]]), + np.array([[1, 2, 3, 4, 5]]), + np.array([[1, 2, 3, 6, 7]]), + **tols, + ) + + # All points have close distances so any ranking permutation # is valid for this query result. - assert_argkmin_results_quasi_equality( - np.array([[_1m, _1m, 1, _1p, _1p]]), - np.array([[_1m, _1m, 1, _1p, _1p]]), - np.array([[6, 7, 8, 9, 10]]), + assert_compatible_argkmin_results( + np.array([[_1m, 1, _1p, _1p, _1p]]), + np.array([[1, 1, 1, 1, _1p]]), + np.array([[7, 6, 8, 10, 9]]), np.array([[6, 9, 7, 8, 10]]), - rtol=rtol, + **tols, ) - # Apply invalid permutation on indices: permuting the ranks - # of the 2 nearest neighbors is invalid because the distance - # values are too different. - msg = "Neighbors indices for query 0 are not matching" + # They could also be nearly truncation of very large nearly tied result + # sets hence all indices can also be distinct in this case: + assert_compatible_argkmin_results( + np.array([[_1m, 1, _1p, _1p, _1p]]), + np.array([[_1m, 1, 1, 1, _1p]]), + np.array([[34, 30, 8, 12, 24]]), + np.array([[42, 1, 21, 13, 3]]), + **tols, + ) + + # Apply invalid permutation on indices: permuting the ranks of the 2 + # nearest neighbors is invalid because the distance values are too + # different. + msg = re.escape( + "Query vector with index 0 lead to different distances for common neighbor with" + " index 1: dist_a=1.2 vs dist_b=2.5" + ) with pytest.raises(AssertionError, match=msg): - assert_argkmin_results_quasi_equality( + assert_compatible_argkmin_results( np.array([[1.2, 2.5, _6_1m, 6.1, _6_1p]]), np.array([[1.2, 2.5, _6_1m, 6.1, _6_1p]]), np.array([[1, 2, 3, 4, 5]]), np.array([[2, 1, 3, 4, 5]]), - rtol=rtol, + **tols, ) - # Indices aren't properly sorted w.r.t their distances - msg = "Neighbors indices for query 0 are not matching" + # Detect missing indices within the expected precision level, even when the + # distances match exactly. + msg = re.escape( + "neighors in b missing from a: [12]\nneighors in a missing from b: [1]" + ) with pytest.raises(AssertionError, match=msg): - assert_argkmin_results_quasi_equality( + assert_compatible_argkmin_results( np.array([[1.2, 2.5, _6_1m, 6.1, _6_1p]]), np.array([[1.2, 2.5, _6_1m, 6.1, _6_1p]]), np.array([[1, 2, 3, 4, 5]]), - np.array([[2, 1, 4, 5, 3]]), - rtol=rtol, + np.array([[12, 2, 4, 11, 3]]), + **tols, + ) + + # Detect missing indices outside the expected precision level. + msg = re.escape( + "neighors in b missing from a: []\nneighors in a missing from b: [3]" + ) + with pytest.raises(AssertionError, match=msg): + assert_compatible_argkmin_results( + np.array([[_1m, 1.0, _6_1m, 6.1, _6_1p]]), + np.array([[1.0, 1.0, _6_1m, 6.1, 7]]), + np.array([[1, 2, 3, 4, 5]]), + np.array([[2, 1, 4, 5, 12]]), + **tols, + ) + + # Detect missing indices outside the expected precision level, in the other + # direction: + msg = re.escape( + "neighors in b missing from a: [5]\nneighors in a missing from b: []" + ) + with pytest.raises(AssertionError, match=msg): + assert_compatible_argkmin_results( + np.array([[_1m, 1.0, _6_1m, 6.1, 7]]), + np.array([[1.0, 1.0, _6_1m, 6.1, _6_1p]]), + np.array([[1, 2, 3, 4, 12]]), + np.array([[2, 1, 5, 3, 4]]), + **tols, ) # Distances aren't properly sorted msg = "Distances aren't sorted on row 0" with pytest.raises(AssertionError, match=msg): - assert_argkmin_results_quasi_equality( + assert_compatible_argkmin_results( np.array([[1.2, 2.5, _6_1m, 6.1, _6_1p]]), np.array([[2.5, 1.2, _6_1m, 6.1, _6_1p]]), np.array([[1, 2, 3, 4, 5]]), np.array([[2, 1, 4, 5, 3]]), - rtol=rtol, + **tols, ) -def test_assert_radius_neighbors_results_quasi_equality(): - rtol = 1e-7 - eps = 1e-7 +@pytest.mark.parametrize("check_sorted", [True, False]) +def test_assert_compatible_radius_results(check_sorted): + atol = 1e-7 + rtol = 0.0 + tols = dict(atol=atol, rtol=rtol) + + eps = atol / 3 _1m = 1.0 - eps _1p = 1.0 + eps - _6_1m = 6.1 - eps _6_1p = 6.1 + eps @@ -412,91 +508,143 @@ def test_assert_radius_neighbors_results_quasi_equality(): ] # Sanity check: compare the reference results to themselves. - assert_radius_neighbors_results_quasi_equality( + assert_compatible_radius_results( ref_dist, ref_dist, ref_indices, ref_indices, - radius=6.1, - rtol=rtol, + radius=7.0, + check_sorted=check_sorted, + **tols, ) # Apply valid permutation on indices - assert_radius_neighbors_results_quasi_equality( + assert_compatible_radius_results( np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1, 2, 3, 4, 5])]), np.array([np.array([1, 2, 4, 5, 3])]), - radius=6.1, - rtol=rtol, + radius=7.0, + check_sorted=check_sorted, + **tols, ) - assert_radius_neighbors_results_quasi_equality( + assert_compatible_radius_results( np.array([np.array([_1m, _1m, 1, _1p, _1p])]), np.array([np.array([_1m, _1m, 1, _1p, _1p])]), np.array([np.array([6, 7, 8, 9, 10])]), np.array([np.array([6, 9, 7, 8, 10])]), - radius=6.1, - rtol=rtol, + radius=7.0, + check_sorted=check_sorted, + **tols, ) # Apply invalid permutation on indices - msg = "Neighbors indices for query 0 are not matching" + msg = re.escape( + "Query vector with index 0 lead to different distances for common neighbor with" + " index 1: dist_a=1.2 vs dist_b=2.5" + ) with pytest.raises(AssertionError, match=msg): - assert_radius_neighbors_results_quasi_equality( + assert_compatible_radius_results( np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1, 2, 3, 4, 5])]), np.array([np.array([2, 1, 3, 4, 5])]), - radius=6.1, - rtol=rtol, + radius=7.0, + check_sorted=check_sorted, + **tols, ) - # Having extra last elements is valid if they are in: [radius ± rtol] - assert_radius_neighbors_results_quasi_equality( - np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), + # Having extra last or missing elements is valid if they are in the + # tolerated rounding error range: [(1 - rtol) * radius - atol, radius] + assert_compatible_radius_results( + np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p, _6_1p])]), np.array([np.array([1.2, 2.5, _6_1m, 6.1])]), - np.array([np.array([1, 2, 3, 4, 5])]), - np.array([np.array([1, 2, 3, 4])]), - radius=6.1, - rtol=rtol, + np.array([np.array([1, 2, 3, 4, 5, 7])]), + np.array([np.array([1, 2, 3, 6])]), + radius=_6_1p, + check_sorted=check_sorted, + **tols, ) - # Having extra last elements is invalid if they are lesser than radius - rtol + # Any discrepancy outside the tolerated rounding error range is invalid and + # indicates a missing neighbor in one of the result sets. msg = re.escape( - "The last extra elements ([6.]) aren't in [radius ± rtol]=[6.1 ± 1e-07]" + "Query vector with index 0 lead to mismatched result indices:\nneighors in b" + " missing from a: []\nneighors in a missing from b: [3]" ) with pytest.raises(AssertionError, match=msg): - assert_radius_neighbors_results_quasi_equality( + assert_compatible_radius_results( np.array([np.array([1.2, 2.5, 6])]), np.array([np.array([1.2, 2.5])]), np.array([np.array([1, 2, 3])]), np.array([np.array([1, 2])]), radius=6.1, - rtol=rtol, + check_sorted=check_sorted, + **tols, + ) + msg = re.escape( + "Query vector with index 0 lead to mismatched result indices:\nneighors in b" + " missing from a: [4]\nneighors in a missing from b: [2]" + ) + with pytest.raises(AssertionError, match=msg): + assert_compatible_radius_results( + np.array([np.array([1.2, 2.1, 2.5])]), + np.array([np.array([1.2, 2, 2.5])]), + np.array([np.array([1, 2, 3])]), + np.array([np.array([1, 4, 3])]), + radius=6.1, + check_sorted=check_sorted, + **tols, ) - # Indices aren't properly sorted w.r.t their distances - msg = "Neighbors indices for query 0 are not matching" + # Radius upper bound is strictly checked + msg = re.escape( + "Largest returned distance 6.100000033333333 not within requested radius 6.1 on" + " row 0" + ) with pytest.raises(AssertionError, match=msg): - assert_radius_neighbors_results_quasi_equality( + assert_compatible_radius_results( np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), + np.array([np.array([1.2, 2.5, _6_1m, 6.1, 6.1])]), + np.array([np.array([1, 2, 3, 4, 5])]), + np.array([np.array([2, 1, 4, 5, 3])]), + radius=6.1, + check_sorted=check_sorted, + **tols, + ) + with pytest.raises(AssertionError, match=msg): + assert_compatible_radius_results( + np.array([np.array([1.2, 2.5, _6_1m, 6.1, 6.1])]), np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([1, 2, 3, 4, 5])]), np.array([np.array([2, 1, 4, 5, 3])]), radius=6.1, - rtol=rtol, + check_sorted=check_sorted, + **tols, ) - # Distances aren't properly sorted - msg = "Distances aren't sorted on row 0" - with pytest.raises(AssertionError, match=msg): - assert_radius_neighbors_results_quasi_equality( + if check_sorted: + # Distances aren't properly sorted + msg = "Distances aren't sorted on row 0" + with pytest.raises(AssertionError, match=msg): + assert_compatible_radius_results( + np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), + np.array([np.array([2.5, 1.2, _6_1m, 6.1, _6_1p])]), + np.array([np.array([1, 2, 3, 4, 5])]), + np.array([np.array([2, 1, 4, 5, 3])]), + radius=_6_1p, + check_sorted=True, + **tols, + ) + else: + assert_compatible_radius_results( np.array([np.array([1.2, 2.5, _6_1m, 6.1, _6_1p])]), np.array([np.array([2.5, 1.2, _6_1m, 6.1, _6_1p])]), np.array([np.array([1, 2, 3, 4, 5])]), np.array([np.array([2, 1, 4, 5, 3])]), - radius=6.1, - rtol=rtol, + radius=_6_1p, + check_sorted=False, + **tols, ) @@ -963,22 +1111,18 @@ def test_radius_neighbors_classmode_factory_method_wrong_usages(): ) -@pytest.mark.parametrize( - "n_samples_X, n_samples_Y", [(100, 100), (500, 100), (100, 500)] -) @pytest.mark.parametrize("Dispatcher", [ArgKmin, RadiusNeighbors]) @pytest.mark.parametrize("dtype", [np.float64, np.float32]) def test_chunk_size_agnosticism( global_random_seed, Dispatcher, - n_samples_X, - n_samples_Y, dtype, n_features=100, ): """Check that results do not depend on the chunk size.""" rng = np.random.RandomState(global_random_seed) spread = 100 + n_samples_X, n_samples_Y = rng.choice([97, 100, 101, 500], size=2, replace=False) X = rng.rand(n_samples_X, n_features).astype(dtype) * spread Y = rng.rand(n_samples_Y, n_features).astype(dtype) * spread @@ -987,8 +1131,7 @@ def test_chunk_size_agnosticism( check_parameters = {} compute_parameters = {} else: - # Scaling the radius slightly with the numbers of dimensions - radius = 10 ** np.log(n_features) + radius = _non_trivial_radius(X=X, Y=Y, metric="euclidean") parameter = radius check_parameters = {"radius": radius} compute_parameters = {"sort_results": True} @@ -1018,21 +1161,17 @@ def test_chunk_size_agnosticism( ) -@pytest.mark.parametrize( - "n_samples_X, n_samples_Y", [(100, 100), (500, 100), (100, 500)] -) @pytest.mark.parametrize("Dispatcher", [ArgKmin, RadiusNeighbors]) @pytest.mark.parametrize("dtype", [np.float64, np.float32]) def test_n_threads_agnosticism( global_random_seed, Dispatcher, - n_samples_X, - n_samples_Y, dtype, n_features=100, ): """Check that results do not depend on the number of threads.""" rng = np.random.RandomState(global_random_seed) + n_samples_X, n_samples_Y = rng.choice([97, 100, 101, 500], size=2, replace=False) spread = 100 X = rng.rand(n_samples_X, n_features).astype(dtype) * spread Y = rng.rand(n_samples_Y, n_features).astype(dtype) * spread @@ -1042,8 +1181,7 @@ def test_n_threads_agnosticism( check_parameters = {} compute_parameters = {} else: - # Scaling the radius slightly with the numbers of dimensions - radius = 10 ** np.log(n_features) + radius = _non_trivial_radius(X=X, Y=Y, metric="euclidean") parameter = radius check_parameters = {"radius": radius} compute_parameters = {"sort_results": True} @@ -1104,8 +1242,9 @@ def test_format_agnosticism( check_parameters = {} compute_parameters = {} else: - # Scaling the radius slightly with the numbers of dimensions - radius = 10 ** np.log(n_features) + # Adjusting the radius to ensure that the expected results is neither + # trivially empty nor too large. + radius = _non_trivial_radius(X=X, Y=Y, metric="euclidean") parameter = radius check_parameters = {"radius": radius} compute_parameters = {"sort_results": True} @@ -1139,29 +1278,30 @@ def test_format_agnosticism( ) -@pytest.mark.parametrize( - "n_samples_X, n_samples_Y", [(100, 100), (100, 500), (500, 100)] -) -@pytest.mark.parametrize( - "metric", - ["euclidean", "minkowski", "manhattan", "infinity", "seuclidean", "haversine"], -) @pytest.mark.parametrize("Dispatcher", [ArgKmin, RadiusNeighbors]) -@pytest.mark.parametrize("dtype", [np.float64, np.float32]) def test_strategies_consistency( global_random_seed, + global_dtype, Dispatcher, - metric, - n_samples_X, - n_samples_Y, - dtype, n_features=10, ): """Check that the results do not depend on the strategy used.""" rng = np.random.RandomState(global_random_seed) + metric = rng.choice( + np.array( + [ + "euclidean", + "minkowski", + "manhattan", + "haversine", + ], + dtype=object, + ) + ) + n_samples_X, n_samples_Y = rng.choice([97, 100, 101, 500], size=2, replace=False) spread = 100 - X = rng.rand(n_samples_X, n_features).astype(dtype) * spread - Y = rng.rand(n_samples_Y, n_features).astype(dtype) * spread + X = rng.rand(n_samples_X, n_features).astype(global_dtype) * spread + Y = rng.rand(n_samples_Y, n_features).astype(global_dtype) * spread # Haversine distance only accepts 2D data if metric == "haversine": @@ -1173,8 +1313,7 @@ def test_strategies_consistency( check_parameters = {} compute_parameters = {} else: - # Scaling the radius slightly with the numbers of dimensions - radius = 10 ** np.log(n_features) + radius = _non_trivial_radius(X=X, Y=Y, metric=metric) parameter = radius check_parameters = {"radius": radius} compute_parameters = {"sort_results": True} @@ -1211,7 +1350,7 @@ def test_strategies_consistency( **compute_parameters, ) - ASSERT_RESULT[(Dispatcher, dtype)]( + ASSERT_RESULT[(Dispatcher, global_dtype)]( dist_par_X, dist_par_Y, indices_par_X, indices_par_Y, **check_parameters ) @@ -1219,34 +1358,25 @@ def test_strategies_consistency( # "Concrete Dispatchers"-specific tests -@pytest.mark.parametrize("n_features", [50, 500]) -@pytest.mark.parametrize("translation", [0, 1e6]) @pytest.mark.parametrize("metric", CDIST_PAIRWISE_DISTANCES_REDUCTION_COMMON_METRICS) @pytest.mark.parametrize("strategy", ("parallel_on_X", "parallel_on_Y")) @pytest.mark.parametrize("dtype", [np.float64, np.float32]) @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) def test_pairwise_distances_argkmin( global_random_seed, - n_features, - translation, metric, strategy, dtype, csr_container, + n_queries=5, n_samples=100, k=10, ): - # TODO: can we easily fix this discrepancy? - edge_cases = [ - (np.float32, "chebyshev", 1000000.0), - (np.float32, "cityblock", 1000000.0), - ] - if (dtype, metric, translation) in edge_cases: - pytest.xfail("Numerical differences lead to small differences in results.") - rng = np.random.RandomState(global_random_seed) + n_features = rng.choice([50, 500]) + translation = rng.choice([0, 1e6]) spread = 1000 - X = translation + rng.rand(n_samples, n_features).astype(dtype) * spread + X = translation + rng.rand(n_queries, n_features).astype(dtype) * spread Y = translation + rng.rand(n_samples, n_features).astype(dtype) * spread X_csr = csr_container(X) @@ -1295,24 +1425,22 @@ def test_pairwise_distances_argkmin( ) -@pytest.mark.parametrize("n_features", [50, 500]) -@pytest.mark.parametrize("translation", [0, 1e6]) @pytest.mark.parametrize("metric", CDIST_PAIRWISE_DISTANCES_REDUCTION_COMMON_METRICS) @pytest.mark.parametrize("strategy", ("parallel_on_X", "parallel_on_Y")) @pytest.mark.parametrize("dtype", [np.float64, np.float32]) def test_pairwise_distances_radius_neighbors( global_random_seed, - n_features, - translation, metric, strategy, dtype, + n_queries=5, n_samples=100, ): rng = np.random.RandomState(global_random_seed) + n_features = rng.choice([50, 500]) + translation = rng.choice([0, 1e6]) spread = 1000 - radius = spread * np.log(n_features) - X = translation + rng.rand(n_samples, n_features).astype(dtype) * spread + X = translation + rng.rand(n_queries, n_features).astype(dtype) * spread Y = translation + rng.rand(n_samples, n_features).astype(dtype) * spread metric_kwargs = _get_metric_params_list( @@ -1326,6 +1454,8 @@ def test_pairwise_distances_radius_neighbors( else: dist_matrix = cdist(X, Y, metric=metric, **metric_kwargs) + radius = _non_trivial_radius(precomputed_dists=dist_matrix) + # Getting the neighbors for a given radius neigh_indices_ref = [] neigh_distances_ref = [] @@ -1410,21 +1540,18 @@ def test_memmap_backed_data( ) -@pytest.mark.parametrize("n_samples", [100, 1000]) -@pytest.mark.parametrize("n_features", [5, 10, 100]) -@pytest.mark.parametrize("num_threads", [1, 2, 8]) @pytest.mark.parametrize("dtype", [np.float64, np.float32]) @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) def test_sqeuclidean_row_norms( global_random_seed, - n_samples, - n_features, - num_threads, dtype, csr_container, ): rng = np.random.RandomState(global_random_seed) spread = 100 + n_samples = rng.choice([97, 100, 101, 1000]) + n_features = rng.choice([5, 10, 100]) + num_threads = rng.choice([1, 2, 8]) X = rng.rand(n_samples, n_features).astype(dtype) * spread X_csr = csr_container(X) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 2d8fb8c69c599..ac312144ae968 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -18,10 +18,11 @@ from sklearn.metrics._dist_metrics import ( DistanceMetric, ) -from sklearn.metrics.pairwise import pairwise_distances +from sklearn.metrics.pairwise import PAIRWISE_BOOLEAN_FUNCTIONS, pairwise_distances from sklearn.metrics.tests.test_dist_metrics import BOOL_METRICS from sklearn.metrics.tests.test_pairwise_distances_reduction import ( - assert_radius_neighbors_results_equality, + assert_compatible_argkmin_results, + assert_compatible_radius_results, ) from sklearn.model_selection import cross_val_score, train_test_split from sklearn.neighbors import ( @@ -1712,8 +1713,15 @@ def test_neighbors_metrics( "metric", sorted(set(neighbors.VALID_METRICS["brute"]) - set(["precomputed"])) ) def test_kneighbors_brute_backend( - global_dtype, metric, n_samples=2000, n_features=30, n_query_pts=100, n_neighbors=5 + metric, + global_dtype, + global_random_seed, + n_samples=2000, + n_features=30, + n_query_pts=5, + n_neighbors=5, ): + rng = np.random.RandomState(global_random_seed) # Both backend for the 'brute' algorithm of kneighbors must give identical results. X_train = rng.rand(n_samples, n_features).astype(global_dtype, copy=False) X_test = rng.rand(n_query_pts, n_features).astype(global_dtype, copy=False) @@ -1724,6 +1732,10 @@ def test_kneighbors_brute_backend( X_train = np.ascontiguousarray(X_train[:, feature_sl]) X_test = np.ascontiguousarray(X_test[:, feature_sl]) + if metric in PAIRWISE_BOOLEAN_FUNCTIONS: + X_train = X_train > 0.5 + X_test = X_test > 0.5 + metric_params_list = _generate_test_params_for(metric, n_features) for metric_params in metric_params_list: @@ -1750,8 +1762,9 @@ def test_kneighbors_brute_backend( X_test, return_distance=True ) - assert_allclose(legacy_brute_dst, pdr_brute_dst) - assert_array_equal(legacy_brute_idx, pdr_brute_idx) + assert_compatible_argkmin_results( + legacy_brute_dst, pdr_brute_dst, legacy_brute_idx, pdr_brute_idx + ) def test_callable_metric(): @@ -2223,16 +2236,18 @@ def test_auto_algorithm(X, metric, metric_params, expected_algo): ) def test_radius_neighbors_brute_backend( metric, + global_random_seed, + global_dtype, n_samples=2000, n_features=30, - n_query_pts=100, - n_neighbors=5, + n_query_pts=5, radius=1.0, ): + rng = np.random.RandomState(global_random_seed) # Both backends for the 'brute' algorithm of radius_neighbors # must give identical results. - X_train = rng.rand(n_samples, n_features) - X_test = rng.rand(n_query_pts, n_features) + X_train = rng.rand(n_samples, n_features).astype(global_dtype, copy=False) + X_test = rng.rand(n_query_pts, n_features).astype(global_dtype, copy=False) # Haversine distance only accepts 2D data if metric == "haversine": @@ -2246,7 +2261,6 @@ def test_radius_neighbors_brute_backend( p = metric_params.pop("p", 2) neigh = neighbors.NearestNeighbors( - n_neighbors=n_neighbors, radius=radius, algorithm="brute", metric=metric, @@ -2267,12 +2281,13 @@ def test_radius_neighbors_brute_backend( X_test, return_distance=True ) - assert_radius_neighbors_results_equality( + assert_compatible_radius_results( legacy_brute_dst, pdr_brute_dst, legacy_brute_idx, pdr_brute_idx, radius=radius, + check_sorted=False, ) From e9b3d1c8369cdab14b641fa04d4ed703b4af3aad Mon Sep 17 00:00:00 2001 From: Eitan Hemed <37670372+EitanHemed@users.noreply.github.com> Date: Thu, 28 Sep 2023 11:17:31 +0300 Subject: [PATCH 05/15] Minor correction to the FastICA 2D cloud point example (#27370) --- examples/decomposition/plot_ica_vs_pca.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/decomposition/plot_ica_vs_pca.py b/examples/decomposition/plot_ica_vs_pca.py index e5ab3b0ee1ca2..07f6327e9922f 100644 --- a/examples/decomposition/plot_ica_vs_pca.py +++ b/examples/decomposition/plot_ica_vs_pca.py @@ -54,8 +54,6 @@ ica = FastICA(random_state=rng, whiten="arbitrary-variance") S_ica_ = ica.fit(X).transform(X) # Estimate the sources -S_ica_ /= S_ica_.std(axis=0) - # %% # Plot results @@ -113,4 +111,5 @@ def plot_samples(S, axis_list=None): plt.title("ICA recovered signals") plt.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.36) +plt.tight_layout() plt.show() From 4567245cc86b9e950199ff382f92c306f588ccd5 Mon Sep 17 00:00:00 2001 From: Christian Lorentzen Date: Thu, 28 Sep 2023 10:46:13 +0200 Subject: [PATCH 06/15] MAINT cython typedefs in _quad_tree (#27351) --- sklearn/neighbors/_quad_tree.pxd | 72 +++++++++++++++----------------- sklearn/neighbors/_quad_tree.pyx | 58 ++++++++++++------------- 2 files changed, 63 insertions(+), 67 deletions(-) diff --git a/sklearn/neighbors/_quad_tree.pxd b/sklearn/neighbors/_quad_tree.pxd index 71c4c3071344c..9ed033e747314 100644 --- a/sklearn/neighbors/_quad_tree.pxd +++ b/sklearn/neighbors/_quad_tree.pxd @@ -4,11 +4,7 @@ # See quad_tree.pyx for details. cimport numpy as cnp - -ctypedef cnp.npy_float32 DTYPE_t # Type of X -ctypedef cnp.npy_intp SIZE_t # Type for indices and counters -ctypedef cnp.npy_int32 INT32_t # Signed 32 bit integer -ctypedef cnp.npy_uint32 UINT32_t # Unsigned 32 bit integer +from ..utils._typedefs cimport float32_t, intp_t # This is effectively an ifdef statement in Cython # It allows us to write printf debugging lines @@ -25,26 +21,26 @@ cdef struct Cell: # Base storage structure for cells in a QuadTree object # Tree structure - SIZE_t parent # Parent cell of this cell - SIZE_t[8] children # Array pointing to children of this cell + intp_t parent # Parent cell of this cell + intp_t[8] children # Array pointing to children of this cell # Cell description - SIZE_t cell_id # Id of the cell in the cells array in the Tree - SIZE_t point_index # Index of the point at this cell (only defined - # # in non empty leaf) - bint is_leaf # Does this cell have children? - DTYPE_t squared_max_width # Squared value of the maximum width w - SIZE_t depth # Depth of the cell in the tree - SIZE_t cumulative_size # Number of points included in the subtree with - # # this cell as a root. + intp_t cell_id # Id of the cell in the cells array in the Tree + intp_t point_index # Index of the point at this cell (only defined + # # in non empty leaf) + bint is_leaf # Does this cell have children? + float32_t squared_max_width # Squared value of the maximum width w + intp_t depth # Depth of the cell in the tree + intp_t cumulative_size # Number of points included in the subtree with + # # this cell as a root. # Internal constants - DTYPE_t[3] center # Store the center for quick split of cells - DTYPE_t[3] barycenter # Keep track of the center of mass of the cell + float32_t[3] center # Store the center for quick split of cells + float32_t[3] barycenter # Keep track of the center of mass of the cell # Cell boundaries - DTYPE_t[3] min_bounds # Inferior boundaries of this cell (inclusive) - DTYPE_t[3] max_bounds # Superior boundaries of this cell (exclusive) + float32_t[3] min_bounds # Inferior boundaries of this cell (inclusive) + float32_t[3] max_bounds # Superior boundaries of this cell (exclusive) cdef class _QuadTree: @@ -57,40 +53,40 @@ cdef class _QuadTree: # Parameters of the tree cdef public int n_dimensions # Number of dimensions in X cdef public int verbose # Verbosity of the output - cdef SIZE_t n_cells_per_cell # Number of children per node. (2 ** n_dimension) + cdef intp_t n_cells_per_cell # Number of children per node. (2 ** n_dimension) # Tree inner structure - cdef public SIZE_t max_depth # Max depth of the tree - cdef public SIZE_t cell_count # Counter for node IDs - cdef public SIZE_t capacity # Capacity of tree, in terms of nodes - cdef public SIZE_t n_points # Total number of points + cdef public intp_t max_depth # Max depth of the tree + cdef public intp_t cell_count # Counter for node IDs + cdef public intp_t capacity # Capacity of tree, in terms of nodes + cdef public intp_t n_points # Total number of points cdef Cell* cells # Array of nodes # Point insertion methods - cdef int insert_point(self, DTYPE_t[3] point, SIZE_t point_index, - SIZE_t cell_id=*) except -1 nogil - cdef SIZE_t _insert_point_in_new_child(self, DTYPE_t[3] point, Cell* cell, - SIZE_t point_index, SIZE_t size=* + cdef int insert_point(self, float32_t[3] point, intp_t point_index, + intp_t cell_id=*) except -1 nogil + cdef intp_t _insert_point_in_new_child(self, float32_t[3] point, Cell* cell, + intp_t point_index, intp_t size=* ) noexcept nogil - cdef SIZE_t _select_child(self, DTYPE_t[3] point, Cell* cell) noexcept nogil - cdef bint _is_duplicate(self, DTYPE_t[3] point1, DTYPE_t[3] point2) noexcept nogil + cdef intp_t _select_child(self, float32_t[3] point, Cell* cell) noexcept nogil + cdef bint _is_duplicate(self, float32_t[3] point1, float32_t[3] point2) noexcept nogil # Create a summary of the Tree compare to a query point - cdef long summarize(self, DTYPE_t[3] point, DTYPE_t* results, - float squared_theta=*, SIZE_t cell_id=*, long idx=* + cdef long summarize(self, float32_t[3] point, float32_t* results, + float squared_theta=*, intp_t cell_id=*, long idx=* ) noexcept nogil # Internal cell initialization methods - cdef void _init_cell(self, Cell* cell, SIZE_t parent, SIZE_t depth) noexcept nogil - cdef void _init_root(self, DTYPE_t[3] min_bounds, DTYPE_t[3] max_bounds + cdef void _init_cell(self, Cell* cell, intp_t parent, intp_t depth) noexcept nogil + cdef void _init_root(self, float32_t[3] min_bounds, float32_t[3] max_bounds ) noexcept nogil # Private methods - cdef int _check_point_in_cell(self, DTYPE_t[3] point, Cell* cell + cdef int _check_point_in_cell(self, float32_t[3] point, Cell* cell ) except -1 nogil # Private array manipulation to manage the ``cells`` array - cdef int _resize(self, SIZE_t capacity) except -1 nogil - cdef int _resize_c(self, SIZE_t capacity=*) except -1 nogil - cdef int _get_cell(self, DTYPE_t[3] point, SIZE_t cell_id=*) except -1 nogil + cdef int _resize(self, intp_t capacity) except -1 nogil + cdef int _resize_c(self, intp_t capacity=*) except -1 nogil + cdef int _get_cell(self, float32_t[3] point, intp_t cell_id=*) except -1 nogil cdef Cell[:] _get_cell_ndarray(self) diff --git a/sklearn/neighbors/_quad_tree.pyx b/sklearn/neighbors/_quad_tree.pyx index 1da59c9f29206..e481e41ca65e4 100644 --- a/sklearn/neighbors/_quad_tree.pyx +++ b/sklearn/neighbors/_quad_tree.pyx @@ -80,11 +80,11 @@ cdef class _QuadTree: """Build a tree from an array of points X.""" cdef: int i - DTYPE_t[3] pt - DTYPE_t[3] min_bounds, max_bounds + float32_t[3] pt + float32_t[3] min_bounds, max_bounds # validate X and prepare for query - # X = check_array(X, dtype=DTYPE_t, order='C') + # X = check_array(X, dtype=float32_t, order='C') n_samples = X.shape[0] capacity = 100 @@ -113,13 +113,13 @@ cdef class _QuadTree: # Shrink the cells array to reduce memory usage self._resize(capacity=self.cell_count) - cdef int insert_point(self, DTYPE_t[3] point, SIZE_t point_index, - SIZE_t cell_id=0) except -1 nogil: + cdef int insert_point(self, float32_t[3] point, intp_t point_index, + intp_t cell_id=0) except -1 nogil: """Insert a point in the QuadTree.""" cdef int ax - cdef SIZE_t selected_child + cdef intp_t selected_child cdef Cell* cell = &self.cells[cell_id] - cdef SIZE_t n_point = cell.cumulative_size + cdef intp_t n_point = cell.cumulative_size if self.verbose > 10: printf("[QuadTree] Inserting depth %li\n", cell.depth) @@ -177,16 +177,16 @@ cdef class _QuadTree: return self.insert_point(point, point_index, cell_id) # XXX: This operation is not Thread safe - cdef SIZE_t _insert_point_in_new_child( - self, DTYPE_t[3] point, Cell* cell, SIZE_t point_index, SIZE_t size=1 + cdef intp_t _insert_point_in_new_child( + self, float32_t[3] point, Cell* cell, intp_t point_index, intp_t size=1 ) noexcept nogil: """Create a child of cell which will contain point.""" # Local variable definition cdef: - SIZE_t cell_id, cell_child_id, parent_id - DTYPE_t[3] save_point - DTYPE_t width + intp_t cell_id, cell_child_id, parent_id + float32_t[3] save_point + float32_t width Cell* child int i @@ -247,7 +247,7 @@ cdef class _QuadTree: return cell_id - cdef bint _is_duplicate(self, DTYPE_t[3] point1, DTYPE_t[3] point2) noexcept nogil: + cdef bint _is_duplicate(self, float32_t[3] point1, float32_t[3] point2) noexcept nogil: """Check if the two given points are equals.""" cdef int i cdef bint res = True @@ -256,11 +256,11 @@ cdef class _QuadTree: res &= fabsf(point1[i] - point2[i]) <= EPSILON return res - cdef SIZE_t _select_child(self, DTYPE_t[3] point, Cell* cell) noexcept nogil: + cdef intp_t _select_child(self, float32_t[3] point, Cell* cell) noexcept nogil: """Select the child of cell which contains the given query point.""" cdef: int i - SIZE_t selected_child = 0 + intp_t selected_child = 0 for i in range(self.n_dimensions): # Select the correct child cell to insert the point by comparing @@ -270,7 +270,7 @@ cdef class _QuadTree: selected_child += 1 return cell.children[selected_child] - cdef void _init_cell(self, Cell* cell, SIZE_t parent, SIZE_t depth) noexcept nogil: + cdef void _init_cell(self, Cell* cell, intp_t parent, intp_t depth) noexcept nogil: """Initialize a cell structure with some constants.""" cell.parent = parent cell.is_leaf = True @@ -280,12 +280,12 @@ cdef class _QuadTree: for i in range(self.n_cells_per_cell): cell.children[i] = SIZE_MAX - cdef void _init_root(self, DTYPE_t[3] min_bounds, DTYPE_t[3] max_bounds + cdef void _init_root(self, float32_t[3] min_bounds, float32_t[3] max_bounds ) noexcept nogil: """Initialize the root node with the given space boundaries""" cdef: int i - DTYPE_t width + float32_t width Cell* root = &self.cells[0] self._init_cell(root, -1, 0) @@ -299,7 +299,7 @@ cdef class _QuadTree: self.cell_count += 1 - cdef int _check_point_in_cell(self, DTYPE_t[3] point, Cell* cell + cdef int _check_point_in_cell(self, float32_t[3] point, Cell* cell ) except -1 nogil: """Check that the given point is in the cell boundaries.""" @@ -366,8 +366,8 @@ cdef class _QuadTree: "in children." .format(self.n_points, self.cells[0].cumulative_size)) - cdef long summarize(self, DTYPE_t[3] point, DTYPE_t* results, - float squared_theta=.5, SIZE_t cell_id=0, long idx=0 + cdef long summarize(self, float32_t[3] point, float32_t* results, + float squared_theta=.5, intp_t cell_id=0, long idx=0 ) noexcept nogil: """Summarize the tree compared to a query point. @@ -429,7 +429,7 @@ cdef class _QuadTree: # Otherwise, we go a higher level of resolution and into the leaves. if cell.is_leaf or ( (cell.squared_max_width / results[idx_d]) < squared_theta): - results[idx_d + 1] = cell.cumulative_size + results[idx_d + 1] = cell.cumulative_size return idx + self.n_dimensions + 2 else: @@ -446,7 +446,7 @@ cdef class _QuadTree: """return the id of the cell containing the query point or raise ValueError if the point is not in the tree """ - cdef DTYPE_t[3] query_pt + cdef float32_t[3] query_pt cdef int i assert len(point) == self.n_dimensions, ( @@ -458,14 +458,14 @@ cdef class _QuadTree: return self._get_cell(query_pt, 0) - cdef int _get_cell(self, DTYPE_t[3] point, SIZE_t cell_id=0 + cdef int _get_cell(self, float32_t[3] point, intp_t cell_id=0 ) except -1 nogil: """guts of get_cell. Return the id of the cell containing the query point or raise ValueError if the point is not in the tree""" cdef: - SIZE_t selected_child + intp_t selected_child Cell* cell = &self.cells[cell_id] if cell.is_leaf: @@ -562,7 +562,7 @@ cdef class _QuadTree: raise ValueError("Can't initialize array!") return arr - cdef int _resize(self, SIZE_t capacity) except -1 nogil: + cdef int _resize(self, intp_t capacity) except -1 nogil: """Resize all inner arrays to `capacity`, if `capacity` == -1, then double the size of the inner arrays. @@ -574,7 +574,7 @@ cdef class _QuadTree: with gil: raise MemoryError() - cdef int _resize_c(self, SIZE_t capacity=SIZE_MAX) except -1 nogil: + cdef int _resize_c(self, intp_t capacity=SIZE_MAX) except -1 nogil: """Guts of _resize Returns -1 in case of failure to allocate memory (and raise MemoryError) @@ -598,10 +598,10 @@ cdef class _QuadTree: self.capacity = capacity return 0 - def _py_summarize(self, DTYPE_t[:] query_pt, DTYPE_t[:, :] X, float angle): + def _py_summarize(self, float32_t[:] query_pt, float32_t[:, :] X, float angle): # Used for testing summarize cdef: - DTYPE_t[:] summary + float32_t[:] summary int n_samples n_samples = X.shape[0] From cdcfcbeb571ef662c9d3b7993a9e3b57a4099415 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 28 Sep 2023 12:52:08 +0200 Subject: [PATCH 07/15] DOC add narrative description and remove deprecation warning (#27440) --- README.rst | 2 +- ...38_conda_defaults_openblas_environment.yml | 2 +- ...onda_defaults_openblas_linux-64_conda.lock | 23 +- .../doc_min_dependencies_environment.yml | 2 +- .../doc_min_dependencies_linux-64_conda.lock | 182 ++++++---- examples/classification/plot_lda_qda.py | 334 ++++++++++-------- sklearn/_min_dependencies.py | 2 +- 7 files changed, 316 insertions(+), 231 deletions(-) diff --git a/README.rst b/README.rst index 4042420ba2977..ebd8388b857ed 100644 --- a/README.rst +++ b/README.rst @@ -37,7 +37,7 @@ .. |SciPyMinVersion| replace:: 1.5.0 .. |JoblibMinVersion| replace:: 1.1.1 .. |ThreadpoolctlMinVersion| replace:: 2.0.0 -.. |MatplotlibMinVersion| replace:: 3.1.3 +.. |MatplotlibMinVersion| replace:: 3.3.4 .. |Scikit-ImageMinVersion| replace:: 0.16.2 .. |PandasMinVersion| replace:: 1.0.5 .. |SeabornMinVersion| replace:: 0.9.0 diff --git a/build_tools/azure/py38_conda_defaults_openblas_environment.yml b/build_tools/azure/py38_conda_defaults_openblas_environment.yml index 2493606135454..1d0b5f29ed787 100644 --- a/build_tools/azure/py38_conda_defaults_openblas_environment.yml +++ b/build_tools/azure/py38_conda_defaults_openblas_environment.yml @@ -11,7 +11,7 @@ dependencies: - cython<3.0.0 - joblib - threadpoolctl=2.2.0 - - matplotlib=3.1.3 # min + - matplotlib=3.3.4 # min - pandas - pyamg - pytest diff --git a/build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock b/build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock index bc261623f5757..ca1cae54fadf9 100644 --- a/build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock +++ b/build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock @@ -1,10 +1,10 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: cc5492b4677e6d5132ab4ab70eda13c942bdf5f6dd53af977e801c42d5f48132 +# input_hash: 8ab29d5bd10968567d77fd3563ef1c61a10907318ccab81f9e3703588aa4dfd6 @EXPLICIT https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9 https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-openblas.conda#9ddfcaef10d79366c90128f5dc444be8 -https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.05.30-h06a4308_0.conda#979be8dd2368decd342b13e01540d297 +https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.08.22-h06a4308_0.conda#243d5065a09a3e85ab888c05f5b6445a https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b https://repo.anaconda.com/pkgs/main/linux-64/libgfortran4-7.5.0-ha8ba4b0_17.conda#e3883581cbf0a98672250c3e80d292bf https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-7.5.0-ha8ba4b0_17.conda#ecb35c8952579d5c8dc56c6e076ba948 @@ -21,12 +21,12 @@ https://repo.anaconda.com/pkgs/main/linux-64/libdeflate-1.17-h5eee18b_0.conda#b4 https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_0.conda#06e288f9250abef59b9a367d151fc339 https://repo.anaconda.com/pkgs/main/linux-64/libopenblas-0.3.18-hf726d26_0.conda#10422bb3b9b022e27798fc368cda69ba https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.41.5-h5eee18b_0.conda#4a6a2354414c9080327274aa514e5299 -https://repo.anaconda.com/pkgs/main/linux-64/libwebp-base-1.2.4-h5eee18b_1.conda#a65a20c48061ecf2a6f4f02eae9f2366 +https://repo.anaconda.com/pkgs/main/linux-64/libwebp-base-1.3.2-h5eee18b_0.conda#9179fc7baefa1e027f572edbc519d805 https://repo.anaconda.com/pkgs/main/linux-64/libxcb-1.15-h7f8727e_0.conda#ada518dcadd6aaee9aae47ba9a671553 https://repo.anaconda.com/pkgs/main/linux-64/lz4-c-1.9.4-h6a678d5_0.conda#53915e9402180a7f22ea619c41089520 https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c https://repo.anaconda.com/pkgs/main/linux-64/nspr-4.35-h6a678d5_0.conda#208fff5d60133bcff6998a70c9f5203b -https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.10-h7f8727e_2.conda#066a828cc9dcd120af8c503381d6a1b8 +https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.11-h7f8727e_2.conda#6cad6f2dcde73f8625d729c6db1272d0 https://repo.anaconda.com/pkgs/main/linux-64/pcre-8.45-h295c915_0.conda#b32ccc24d1d9808618c1e898da60f68d https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.2-h5eee18b_0.conda#bcd31de48a0dcb44bc5b99675800c5cc https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda#333e31fbfbb5057c92fa845ad6adef93 @@ -56,9 +56,10 @@ https://repo.anaconda.com/pkgs/main/linux-64/gst-plugins-base-1.14.1-h6a678d5_1. https://repo.anaconda.com/pkgs/main/linux-64/lcms2-2.12-h3be6417_0.conda#719db47afba9f6586eecb5eacac70bff https://repo.anaconda.com/pkgs/main/linux-64/libclang-14.0.6-default_hc6dbbc7_1.conda#8f12583c4027b2861cff470f6b8837c4 https://repo.anaconda.com/pkgs/main/linux-64/libpq-12.15-hdbd6064_1.conda#218227d255f6056b6f49f52dd0d1731f -https://repo.anaconda.com/pkgs/main/linux-64/libwebp-1.2.4-h11a3e52_1.conda#9f9153b30e58e9ce896f74634622cbf1 +https://repo.anaconda.com/pkgs/main/linux-64/libwebp-1.3.2-h11a3e52_0.conda#9e0d6c9abdd97b076c66d4cf488589ee https://repo.anaconda.com/pkgs/main/linux-64/nss-3.89.1-h6a678d5_0.conda#4d9d28fc3a0ca4916f281d2f5429ac50 -https://repo.anaconda.com/pkgs/main/linux-64/python-3.8.17-h955ad1f_0.conda#f901f4fd76d24a2d598788a24e4d7246 +https://repo.anaconda.com/pkgs/main/linux-64/python-3.8.18-h955ad1f_0.conda#fa35c1028f48db26df051ee75dd9422f +https://repo.anaconda.com/pkgs/main/linux-64/certifi-2023.7.22-py38h06a4308_0.conda#59416ad8979a654bb8f5184b62d8a9e7 https://repo.anaconda.com/pkgs/main/noarch/cycler-0.11.0-pyhd3eb1b0_0.conda#f5e365d2cdb66d547eb8c3ab93843aab https://repo.anaconda.com/pkgs/main/linux-64/cython-0.29.36-py38h5eee18b_0.conda#0465e461450c86b395da9ccc3853d7dc https://repo.anaconda.com/pkgs/main/linux-64/exceptiongroup-1.0.4-py38h06a4308_0.conda#db954e73dca6076c64a1004d71b45784 @@ -69,12 +70,12 @@ https://repo.anaconda.com/pkgs/main/linux-64/kiwisolver-1.4.4-py38h6a678d5_0.con https://repo.anaconda.com/pkgs/main/linux-64/mysql-5.7.24-h721c034_2.conda#dfc19ca2466d275c4c1f73b62c57f37b https://repo.anaconda.com/pkgs/main/linux-64/numpy-base-1.17.3-py38h2f8d375_0.conda#40edbb76ecacefb1e6ab639b514822b1 https://repo.anaconda.com/pkgs/main/linux-64/packaging-23.1-py38h06a4308_0.conda#9ec9b6ee22dad7f49806c51218befd5b -https://repo.anaconda.com/pkgs/main/linux-64/pillow-9.4.0-py38h6a678d5_0.conda#8afd1f4f8b23a1c44fca4975253b17f7 +https://repo.anaconda.com/pkgs/main/linux-64/pillow-9.4.0-py38h6a678d5_1.conda#3cc4f7f7c7ca5d7a5c5f26ad5425d8ef https://repo.anaconda.com/pkgs/main/linux-64/pluggy-1.0.0-py38h06a4308_1.conda#87bb1d3f6cf3e409a1dac38cee99918e https://repo.anaconda.com/pkgs/main/linux-64/ply-3.11-py38_0.conda#d6a69c576c6e4d19e3074eaae3d149f2 https://repo.anaconda.com/pkgs/main/noarch/py-1.11.0-pyhd3eb1b0_0.conda#7205a898ed2abbf6e9b903dff6abe08e https://repo.anaconda.com/pkgs/main/linux-64/pyparsing-3.0.9-py38h06a4308_0.conda#becbbf51d2b05de228eed968e20f963d -https://repo.anaconda.com/pkgs/main/linux-64/pytz-2022.7-py38h06a4308_0.conda#19c9f6a24d5c6f779c645d00f646666b +https://repo.anaconda.com/pkgs/main/linux-64/pytz-2023.3.post1-py38h06a4308_0.conda#351d59ddfed216ab9b05481d3bb63106 https://repo.anaconda.com/pkgs/main/linux-64/setuptools-68.0.0-py38h06a4308_0.conda#24f9c895455f3992d6b04957fd0e7546 https://repo.anaconda.com/pkgs/main/noarch/six-1.16.0-pyhd3eb1b0_1.conda#34586824d411d36af2fa40e799c172d0 https://repo.anaconda.com/pkgs/main/noarch/threadpoolctl-2.2.0-pyh0d69192_0.conda#bbfdbae4934150b902f97daaf287efe2 @@ -87,10 +88,10 @@ https://repo.anaconda.com/pkgs/main/linux-64/pytest-7.4.0-py38h06a4308_0.conda#b https://repo.anaconda.com/pkgs/main/noarch/python-dateutil-2.8.2-pyhd3eb1b0_0.conda#211ee00320b08a1ac9fea6677649f6c9 https://repo.anaconda.com/pkgs/main/linux-64/qt-main-5.15.2-h7358343_9.conda#d3eac069d7e4e93b866a07c2274c9ee7 https://repo.anaconda.com/pkgs/main/linux-64/sip-6.6.2-py38h6a678d5_0.conda#cb3f0d10f7f79870945f4dbbe0000f92 -https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-base-3.1.3-py38hef1b27d_0.conda#a7ad7d097c25b7beeb76f370d51687a1 +https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-base-3.3.4-py38h62a2d02_0.conda#7156fafe3362d0b6a2de43e0002febb3 https://repo.anaconda.com/pkgs/main/linux-64/pandas-1.2.4-py38ha9443f7_0.conda#5bd3fd807a294f387feabc65821b75d0 https://repo.anaconda.com/pkgs/main/linux-64/pyqt5-sip-12.11.0-py38h6a678d5_1.conda#7bc403c7d55f1465e922964d293d2186 -https://repo.anaconda.com/pkgs/main/linux-64/pytest-cov-4.0.0-py38h06a4308_0.conda#54035e39255f285f98ca1141b7f098e7 +https://repo.anaconda.com/pkgs/main/linux-64/pytest-cov-4.1.0-py38h06a4308_0.conda#ef981a8b88a9ecf7a84bf50516211e0c https://repo.anaconda.com/pkgs/main/noarch/pytest-forked-1.3.0-pyhd3eb1b0_0.tar.bz2#07970bffdc78f417d7f8f1c7e620f5c4 https://repo.anaconda.com/pkgs/main/linux-64/qt-webengine-5.15.9-h9ab4d14_7.conda#907aa480f11eabd16bd6c72c81720ef2 https://repo.anaconda.com/pkgs/main/linux-64/scipy-1.5.0-py38habc2bb6_0.conda#a27a97fc2377ab74cbd33ce22d3c3353 @@ -98,4 +99,4 @@ https://repo.anaconda.com/pkgs/main/linux-64/pyamg-4.2.3-py38h79cecc1_0.conda#6e https://repo.anaconda.com/pkgs/main/noarch/pytest-xdist-2.5.0-pyhd3eb1b0_0.conda#d15cdc4207bcf8ca920822597f1d138d https://repo.anaconda.com/pkgs/main/linux-64/qtwebkit-5.212-h3fafdc1_5.conda#e811bbc0456e3d3a02cab199492153ee https://repo.anaconda.com/pkgs/main/linux-64/pyqt-5.15.7-py38h6a678d5_1.conda#62232dc285be8e7e85ae9596d89b3b95 -https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-3.1.3-py38_0.conda#70d5f6df438d469dc78f082389ada23d +https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-3.3.4-py38h06a4308_0.conda#96033fd3465abc467ae394c6852930de diff --git a/build_tools/circle/doc_min_dependencies_environment.yml b/build_tools/circle/doc_min_dependencies_environment.yml index 3f3ba57eae8c6..b9e2c19bf3737 100644 --- a/build_tools/circle/doc_min_dependencies_environment.yml +++ b/build_tools/circle/doc_min_dependencies_environment.yml @@ -11,7 +11,7 @@ dependencies: - cython=0.29.33 # min - joblib - threadpoolctl - - matplotlib=3.1.3 # min + - matplotlib=3.3.4 # min - pandas=1.0.5 # min - pyamg - pytest diff --git a/build_tools/circle/doc_min_dependencies_linux-64_conda.lock b/build_tools/circle/doc_min_dependencies_linux-64_conda.lock index 78b2081a07b0f..d4e97e06695d5 100644 --- a/build_tools/circle/doc_min_dependencies_linux-64_conda.lock +++ b/build_tools/circle/doc_min_dependencies_linux-64_conda.lock @@ -1,76 +1,108 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: 65e3ed797c4adee3f15a519d65e7dd7ed27340eb62a52cd1d1b597c377bc77f5 +# input_hash: 89bff8490bf2ceb18906f6ddd4607d9246575b235172ddd0f03716ae6cb21a3d @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.7.22-hbcca054_0.conda#a73ecd2988327ad4c8f2c331482917f2 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2#19410c3df09dfb12d1206132a1d357c5 https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_16.conda#7ca122655873935e02c91279c5b03c8c https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.36.1-hea4e1c9_2.tar.bz2#bd4f2e711b39af170e7ff15163fe87ee https://conda.anaconda.org/conda-forge/linux-64/libgcc-devel_linux-64-7.5.0-hda03d7c_20.tar.bz2#2146b25eb2a762a44fab709338a7b6d9 https://conda.anaconda.org/conda-forge/linux-64/libgfortran4-7.5.0-h14aa051_20.tar.bz2#a072eab836c3a9578ce72b5640ce592d https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-devel_linux-64-7.5.0-hb016644_20.tar.bz2#31d5500f621954679ee41d7f5d1089fb -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.1.0-hfd8a6a1_0.conda#067bcc23164642f4c226da631f2a2e1d -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.8-3_cp38.conda#2f3f7af062b42d664117662612022204 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h7e041cc_2.conda#9172c297304f2a20134fc56c97fbe229 +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.8-4_cp38.conda#ea6b353536f42246cd130c7fef1285cf +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-7.5.0-h14aa051_20.tar.bz2#c3b2ad091c043c08689e64b10741484b -https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.1.0-he5830b7_0.conda#56ca14d57ac29a75d23a39eb3ee0ddeb +https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_2.conda#e2042154faafe61969556f28bade94b9 https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_16.conda#071ea8dceff4d30ac511f4a2f8437cd1 https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.36.1-h193b22a_2.tar.bz2#32aae4265554a47ea77f7c09f86aeb3b +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab https://conda.anaconda.org/conda-forge/linux-64/binutils-2.36.1-hdd6e379_2.tar.bz2#3111f86041b5b6863545ca49130cca95 https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.36-hf3e587d_33.tar.bz2#72b245322c589284f1b92a5c971e5cb6 https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.1.0-he5830b7_0.conda#cd93f779ff018dd85c7544c015c9db3c +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_2.conda#c28003b0be0494f9a7664389146716ff +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.10-hd590300_0.conda#75dae9a4201732aa78a530b826ee5fe0 +https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54 https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-7.5.0-habd7529_20.tar.bz2#42140612518a7ce78f571d64b6a50ba3 https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37 -https://conda.anaconda.org/conda-forge/linux-64/icu-64.2-he1b5a44_1.tar.bz2#8e881214a23508f1541eb7a3135d6fcb -https://conda.anaconda.org/conda-forge/linux-64/jpeg-9e-h0b41bf4_3.conda#c7a069243e1fbe9a556ed2ec030e6407 +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h58526e2_1001.tar.bz2#8c54672728e8ec6aa6db90cf2806d220 +https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff +https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 +https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.17-h0b41bf4_0.conda#5cc781fd91968b11a8a7fdbee0982676 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.19-hd590300_0.conda#1635570038840ee3f9c71d22aa5b8b6d https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.2.1-he1b5a44_1007.tar.bz2#11389072d7d6036fd811c3d9460475cd +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-h166bdaf_0.tar.bz2#b62b52da46c39ee2bc3c162ac7f1804d +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-2.1.5.1-hd590300_1.conda#323e90742f0f48fc22bea908735f55e6 +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.0-h7f98852_0.tar.bz2#39b1328babf85c7c3a61636d9cd50206 +https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 +https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.1-hd590300_0.conda#82bf6f63eb15ef719b556b63feec3a77 -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-0.10.0-he1b5a44_0.tar.bz2#78ccac2098edcd3673af2ceb3e95f932 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.2-hd590300_0.conda#30de3fd9b3b602f7473f30e684eeea8c https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 +https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.31.3-hcb278e6_0.conda#141a126675b6d1a4eabb111a4a353898 https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179 https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 -https://conda.anaconda.org/conda-forge/linux-64/openssl-1.1.1v-hd590300_0.conda#b1701dc29287ef4638ccc7f12cf73405 -https://conda.anaconda.org/conda-forge/linux-64/pcre-8.45-h9c3ff4c_0.tar.bz2#c05d1820a6d34ff07aaaab7a9b7eddaa +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.3-hd590300_0.conda#7bb88ce04c8deb9f7d763ae04a1da72f +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.40.0-h36c2ea0_0.tar.bz2#660e72c82f2e75a6b3fe6a6e75c79f19 https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 +https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 +https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h7f98852_2.tar.bz2#4cb3ad778ec2d5a7acbdf254eb1c42ae https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-hcb278e6_1.conda#8b9b5aca60558d02ddaa09d599e55920 https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-7.5.0-h47867f9_33.tar.bz2#3a31c3f430a31184a5d07e67d3b24e2c https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-7.5.0-h56cb351_20.tar.bz2#8f897b30195bd3a2251b4c51c3cc91cf https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-7.5.0-hd0bb8aa_20.tar.bz2#dbe78fc5fb9c339f8e55426559e12f7b -https://conda.anaconda.org/conda-forge/linux-64/libllvm9-9.0.1-default_hc23dcda_7.tar.bz2#9f4686a2c319355fe8636ca13783c3b4 +https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5 +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 +https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d +https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.47-h71f35ed_0.conda#c2097d0b46367996f09b4e8e4920384a https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416 https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.43.0-h2797004_0.conda#903fa782a9067d5934210df6d79220f6 -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.13-h7f98852_1004.tar.bz2#b3653fdc58d03face9724f602218a904 +https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.11.5-h232c23b_1.conda#f3858448893839820d4bcfb14ad3ecdf +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.33-hf1915f5_4.conda#f6f0ac5665849afc0716213a6cff224d +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.40-hc3806b6_0.tar.bz2#69e2c796349cd9b273890bee0febfe1b https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589 https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.1.1-h516909a_0.tar.bz2#d98aa4948ec35f52907e2d6152e2b255 -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_1.conda#e1232042de76d24539a436d37597eb06 +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-7.5.0-h78c8a43_33.tar.bz2#b2879010fb369f4012040f7a27657cd8 https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-7.5.0-h555fc39_33.tar.bz2#5cf979793d2c5130a012cb6480867adc -https://conda.anaconda.org/conda-forge/linux-64/libclang-9.0.1-default_hb4e5071_5.tar.bz2#9dde69aa2a8ecd575a16e44987bdc9f7 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.66.3-hbe7bbb4_0.tar.bz2#d5a09a9e981849b751cb75656b7302a0 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.0-h6adf6a1_2.conda#2e648a34072eb39d7c4fc2a9981c5f0c -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.9.10-hee79883_0.tar.bz2#0217b0926808b1adf93247bba489d733 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 +https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.1-h166bdaf_0.tar.bz2#f967fc95089cd247ceed56eda31de3a9 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.78.0-hebfc3b9_0.conda#e618003da3547216310088478e475945 +https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-h5cf9203_3.conda#9efe82d44b76a7529a1d702e5a37752e +https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hbc2eb40_0.conda#38f84d395629e48b7c7b48a8ca740341 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h29866fb_1.conda#4e9afd30f4ccb2f98645e51005f82236 https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.6-h4dfa4b3_0.conda#b096c85c415519259e731d8fb719a3ef +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.33-hca2cd23_4.conda#db7f2c877209ac620fcd1c3ce7407cf0 https://conda.anaconda.org/conda-forge/linux-64/nss-3.92-h1d7d5a4_0.conda#22c89a3d87828fe925b310b9cdf0f574 -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.43.0-h2c6b66d_0.conda#713f9eac95d051abe14c3774376854fe -https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.1.1-hc9558a2_0.tar.bz2#1eb7c67eb11eab0c98a87f84174fdde1 -https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d -https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.1.1-he991be0_0.tar.bz2#e38ac82cc517b9e245c1ae99f9f140da -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.15-hfd0df8a_0.conda#aa8840cdf17ef0c6084d1e24abc7a28b -https://conda.anaconda.org/conda-forge/linux-64/mkl-2020.4-h726a3e6_304.tar.bz2#b9b35a50e5377b19da6ec0709ae77fc3 -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-hfec8fc6_2.conda#5ce6a42505c6e9e6151c54c3ec8d68ea -https://conda.anaconda.org/conda-forge/linux-64/python-3.8.6-h852b56e_0_cpython.tar.bz2#dd65401dfb61ac030edc0dc4d15c2c51 +https://conda.anaconda.org/conda-forge/linux-64/python-3.8.17-he550d4f_0_cpython.conda#72d038de0a228e4f0ef4011940641293 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.6-h8ee46fc_0.conda#7590b76c3d11d21caa44f3fc38ac584a https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.13-pyhd8ed1ab_0.conda#06006184e203b61d3525f90de394471e https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py38h17151c0_0.conda#5b332445993432e76df706fe1ebe776d https://conda.anaconda.org/conda-forge/noarch/certifi-2023.7.22-pyhd8ed1ab_0.conda#7f3dbc9179b4dde7da98dfb151d0ad22 @@ -78,32 +110,41 @@ https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.2.0-pyhd8ed1a https://conda.anaconda.org/conda-forge/noarch/click-8.1.7-unix_pyh707e725_0.conda#f3ad426304898027fc619827ff428eca https://conda.anaconda.org/conda-forge/noarch/cloudpickle-2.2.1-pyhd8ed1ab_0.conda#b325bfc4cff7d7f8a868f1f7ecc4ed16 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/linux-64/compilers-1.1.1-0.tar.bz2#1ba267e19dbaf3db9dd0404e6fb9cdb9 +https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.1.1-hc9558a2_0.tar.bz2#1eb7c67eb11eab0c98a87f84174fdde1 https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.33-py38h8dc9893_0.conda#5d50cd654981f0ccc7c878ac297afaa7 +https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d https://conda.anaconda.org/conda-forge/linux-64/docutils-0.19-py38h578d9bd_1.tar.bz2#3746b24949251f1a00ae0d616d4cdc1b https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.3-pyhd8ed1ab_0.conda#e6518222753f519e911e83136d2158d9 https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#67de0d8241e1060a479e3c37793e26f9 -https://conda.anaconda.org/conda-forge/noarch/fsspec-2023.6.0-pyh1a96a4e_0.conda#50ea2067ec92dfcc38b4f07992d7e235 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.66.3-h58526e2_0.tar.bz2#62c2e5c84f6cdc7ded2307ef9c30dc8c +https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d +https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.1.1-he991be0_0.tar.bz2#e38ac82cc517b9e245c1ae99f9f140da +https://conda.anaconda.org/conda-forge/noarch/fsspec-2023.9.1-pyh1a96a4e_0.conda#d69753ff6ee3c84a6638921dd95db662 +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.78.0-hfc55251_0.conda#e10134de3558dd95abda6987b5548f4f https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py38h7f3f72f_0.conda#eec56ac40315e360dd57c2de6604a325 -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.8.0-20_mkl.tar.bz2#8fbce60932c01d0e193a1a814f2002be +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py38h7f3f72f_1.conda#b66dcd4f710628fc5563ad56f02ca89b +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.15-h7f713cb_2.conda#9ab79924a3760f85a799f21bc99bd655 +https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.7-default_h9986a30_3.conda#1720df000b48e31842500323cb7be18c +https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 +https://conda.anaconda.org/conda-forge/linux-64/libpq-15.4-hfc447b1_0.conda#b9ce311e7aba8b5fc3122254f0a6e97e +https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-254-h3516f8a_0.conda#df4b1cd0c91b4234fb02b5701a4cdddc https://conda.anaconda.org/conda-forge/noarch/locket-1.0.0-pyhd8ed1ab_0.tar.bz2#91e27ef3d05cc772ce627e51cff111c4 -https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.3-py38h01eb140_0.conda#17d2a5314adf0f25220eeebb312d00a4 +https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.3-py38h01eb140_1.conda#2dabf287937cd631e292096cc6d0867e +https://conda.anaconda.org/conda-forge/linux-64/mkl-2020.4-h726a3e6_304.tar.bz2#b9b35a50e5377b19da6ec0709ae77fc3 https://conda.anaconda.org/conda-forge/noarch/networkx-3.1-pyhd8ed1ab_0.conda#254f787d5068bc89f578bf63893ce8b4 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-h488ebb8_3.conda#128c25b7fe6a25286a48f3a6a9b5b6f3 https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30 -https://conda.anaconda.org/conda-forge/linux-64/pillow-9.4.0-py38hde6dc18_1.conda#3de5619d3f556f966189e5251a266125 https://conda.anaconda.org/conda-forge/noarch/pluggy-1.3.0-pyhd8ed1ab_0.conda#2390bd10bed1f3fdc7a537fb5a447d8d -https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.5-py38h1de0b5d_0.conda#92e899e7b0ed27c793014d1fa54f9b7b +https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727 +https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.5-py38h01eb140_1.conda#89cb08bb523adf12fed3829558638d84 https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 https://conda.anaconda.org/conda-forge/noarch/pygments-2.16.1-pyhd8ed1ab_0.conda#40e5cb18165466773619e5c963f00a7b https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.1-pyhd8ed1ab_0.conda#176f7d56f0cfe9008bdf1bccd7de02fb https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3-pyhd8ed1ab_0.conda#d3076b483092a435832603243567bc31 -https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.1-py38h01eb140_0.conda#ece207648b63c36c16a2caa201509e51 +https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3.post1-pyhd8ed1ab_0.conda#c93346b446cd08c169d843ae5fc0da97 +https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.1-py38h01eb140_1.conda#5f05353ae9a6c37e1b4aebc9f7834d23 https://conda.anaconda.org/conda-forge/linux-64/setuptools-59.8.0-py38h578d9bd_1.tar.bz2#da023e4a9c777abc28434d7a6473dcc2 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e @@ -115,57 +156,72 @@ https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.3-py_0.ta https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.5-pyhd8ed1ab_2.tar.bz2#9ff55a0901cf952f05c654394de76bf7 https://conda.anaconda.org/conda-forge/noarch/tenacity-8.2.3-pyhd8ed1ab_0.conda#1482e77f87c6a702a7e05ef22c9b197b https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86 +https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.0-pyhd8ed1ab_0.tar.bz2#92facfec94bc02d6ccf42e7173831a36 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py38h01eb140_0.conda#465bbfc0eb2022837d957d045b6b627a -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.7.1-pyha770c72_0.conda#c39d6a09fe819de4951c2642629d9115 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py38h01eb140_1.conda#660cfc2fc5bd9e3b458ad394976652cf +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.8.0-pyha770c72_0.conda#5b1be40a26d10a06f6d4f1f9e19fa0c7 https://conda.anaconda.org/conda-forge/noarch/wheel-0.41.2-pyhd8ed1ab_0.conda#1ccd092478b3e0ee10d7a891adbf8a4f -https://conda.anaconda.org/conda-forge/noarch/zipp-3.16.2-pyhd8ed1ab_0.conda#2da0451b54c4563c32490cb1b7cf68a1 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 +https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.39-hd590300_0.conda#d88c7fc8a11858fb14761832e4da1954 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530 +https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a https://conda.anaconda.org/conda-forge/noarch/babel-2.12.1-pyhd8ed1ab_1.conda#ac432e732804a81ddcf29c92ead57cde +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.16.0-h0c91306_1017.conda#3db543896d34fc6804ddfb9239dcb125 +https://conda.anaconda.org/conda-forge/linux-64/compilers-1.1.1-0.tar.bz2#1ba267e19dbaf3db9dd0404e6fb9cdb9 https://conda.anaconda.org/conda-forge/linux-64/cytoolz-0.12.2-py38h01eb140_0.conda#e9d465b78d0b41beeb6bcceb6714520d -https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-hfdff14a_1.tar.bz2#4caaca6356992ee545080c7d7193b5a3 -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.14.5-h36ae1b5_2.tar.bz2#00084ab2657be5bf0ba0757ccde797ef +https://conda.anaconda.org/conda-forge/linux-64/glib-2.78.0-hfc55251_0.conda#2f55a36b549f51a7e0c2b1e3c3f0ccd4 https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.8.0-pyha770c72_0.conda#4e9f59a060c3be52bc4ddc46ee9b6946 https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.2-pyhd8ed1ab_1.tar.bz2#c8490ed5c70966d232fdd389d0dbed37 https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.8.0-20_mkl.tar.bz2#14b25490fdcc44e879ac6c10fe764f68 -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.8.0-20_mkl.tar.bz2#52c0ae3606eeae7e1d493f37f336f4f5 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.8.0-20_mkl.tar.bz2#8fbce60932c01d0e193a1a814f2002be +https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.7-default_h7634d5b_3.conda#0922208521c0463e690bbaebba7eb551 +https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.5.0-h5d7e998_3.conda#c91ea308d7bf70b62ddda568478aa03b https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_0.tar.bz2#8b45f9f2b2f7a98b0ec179c8991a4a9b -https://conda.anaconda.org/conda-forge/noarch/partd-1.4.0-pyhd8ed1ab_0.conda#721dab5803ea92ce02ddc4ee50aa0c48 +https://conda.anaconda.org/conda-forge/noarch/partd-1.4.0-pyhd8ed1ab_1.conda#6ceb4e000cbe0b56b290180aea8520e8 +https://conda.anaconda.org/conda-forge/linux-64/pillow-10.0.1-py38h71741d6_1.conda#461be46dfe70775d0041a65ad7db000e https://conda.anaconda.org/conda-forge/noarch/pip-23.2.1-pyhd8ed1ab_0.conda#e2783aa3f9235225eec92f9081c5b801 https://conda.anaconda.org/conda-forge/noarch/plotly-5.14.0-pyhd8ed1ab_0.conda#6a7bcc42ef58dd6cf3da9333ea102433 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.0-pyhd8ed1ab_0.conda#3cfe9b9e958e7238a386933c75d190db +https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-16.1-hb77b528_5.conda#ac902ff3c1c6d750dd0dfc93a974ab74 +https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.2-pyhd8ed1ab_0.conda#6dd662ff5ac9a783e5c940ce9f3fe649 https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.7.1-hd8ed1ab_0.conda#f96688577f1faa58096d06a45136afa2 -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.4-pyhd8ed1ab_0.conda#18badd8fa3648d1beb1fcc7f2e0f756e -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.14.5-h0935bb2_2.tar.bz2#eb125ee86480e00a4a1ed45a577c3311 +https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.11-py38h17151c0_0.conda#f05f0120127bac812e948b02997e4374 +https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.8.0-hd8ed1ab_0.conda#384462e63262a527bda564fa2d9126c0 +https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.5-pyhd8ed1ab_0.conda#3bda70bbeb2920f44db5375af2e5fe38 +https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.6-h98fc4e7_0.conda#882a66517c52cae2719ac25308f61316 +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.2.1-h3d44ed6_0.conda#98db5f8813f45e2b29766aff0e4a499c https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-6.8.0-hd8ed1ab_0.conda#b279b07ce18058034e5b3606ba103a8b -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.8.0-20_mkl.tar.bz2#8274dc30518af9df1de47f5d9e73165c -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.17.3-py38h95a1406_0.tar.bz2#bc0cbf611fe2f86eab29b98e51404f5e +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.8.0-20_mkl.tar.bz2#14b25490fdcc44e879ac6c10fe764f68 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.8.0-20_mkl.tar.bz2#52c0ae3606eeae7e1d493f37f336f4f5 https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.10.0-pyhd8ed1ab_0.conda#0809187ef9b89a3d94a5c24d13936236 +https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py38h17151c0_4.conda#95447fd7bd5b420df7e7eb405f19f463 https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0 https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b -https://conda.anaconda.org/conda-forge/linux-64/blas-2.20-mkl.tar.bz2#e7d09a07f5413e53dca5282b8fa50bed https://conda.anaconda.org/conda-forge/noarch/dask-core-2023.5.0-pyhd8ed1ab_0.conda#03ed2d040648a5ba1063bf1cb0d87b78 -https://conda.anaconda.org/conda-forge/noarch/imageio-2.31.1-pyh24c5eb1_0.conda#1051cc0376612ba101d4f59e954a1ff4 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.1.3-py38h250f245_0.tar.bz2#eb182969d8ed019d4de6939f393270d2 -https://conda.anaconda.org/conda-forge/linux-64/pandas-1.0.5-py38hcb8c335_0.tar.bz2#1e1b4382170fd26cf722ef008ffb651e +https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.6-h8e1006c_0.conda#cd758f0e1d30ada1c320be50767dd55e +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.8.0-20_mkl.tar.bz2#8274dc30518af9df1de47f5d9e73165c +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.17.3-py38h95a1406_0.tar.bz2#bc0cbf611fe2f86eab29b98e51404f5e https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.1.1-py38h5c078b8_3.tar.bz2#dafeef887e68bd18ec84681747ca0fd5 -https://conda.anaconda.org/conda-forge/linux-64/qt-5.12.5-hd8c4c69_1.tar.bz2#0e105d4afe0c3c81c4fbd9937ec4f359 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.5.0-py38h18bccfc_0.tar.bz2#b6fda3b4ee494afef756621daa115d4d https://conda.anaconda.org/conda-forge/noarch/sphinx-6.0.0-pyhd8ed1ab_2.conda#ac1d3b55da1669ee3a56973054fd7efb +https://conda.anaconda.org/conda-forge/linux-64/blas-2.20-mkl.tar.bz2#e7d09a07f5413e53dca5282b8fa50bed +https://conda.anaconda.org/conda-forge/noarch/imageio-2.31.1-pyh24c5eb1_0.conda#1051cc0376612ba101d4f59e954a1ff4 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.3.4-py38h0efea84_0.tar.bz2#9818b095ff2ddceadb7553b0d56d219f https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.2-pyhd8ed1ab_0.tar.bz2#025ad7ca2c7f65007ab6b6f5d93a56eb +https://conda.anaconda.org/conda-forge/linux-64/pandas-1.0.5-py38hcb8c335_0.tar.bz2#1e1b4382170fd26cf722ef008ffb651e +https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.1.1-py38h5c078b8_3.tar.bz2#dafeef887e68bd18ec84681747ca0fd5 +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-hc47bfe8_16.conda#a8dd2dfcd570e3965c73be6c5e03e74f +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.5.0-py38h18bccfc_0.tar.bz2#b6fda3b4ee494afef756621daa115d4d +https://conda.anaconda.org/conda-forge/noarch/sphinx-copybutton-0.5.2-pyhd8ed1ab_0.conda#ac832cc43adc79118cf6e23f1f9b8995 +https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.3.0-py_0.tar.bz2#9363002e2a134a287af4e32ff0f26cdc https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.3-pyhd8ed1ab_0.tar.bz2#50ef6b29b1fb0768ca82c5aeb4fb2d96 https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.0.0-py38hf6732f7_1003.tar.bz2#44e00bf7a4b6a564e9313181aaea2615 -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.12.3-py38ha8c2ead_3.tar.bz2#242c206b0c30fdc4c18aea16f04c4262 +https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py38hffdaa6c_4.conda#8a230666b1e346b9bc995a8eef0c732e https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.16.2-py38hb3f55d8_0.tar.bz2#468b398fefac8884cd6e6513af66549b https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.12.2-pyhd8ed1ab_0.conda#cf88f3a1c11536bc3c10c14ad00ccc42 -https://conda.anaconda.org/conda-forge/noarch/sphinx-copybutton-0.5.2-pyhd8ed1ab_0.conda#ac832cc43adc79118cf6e23f1f9b8995 https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.10.1-pyhd8ed1ab_0.tar.bz2#4918585fe5e5341740f7e63c61743efb -https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.3.0-py_0.tar.bz2#9363002e2a134a287af4e32ff0f26cdc -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.1.3-py38_0.tar.bz2#1992ab91bbff86ded8d99d1f488d8e8b +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.3.4-py38h578d9bd_0.tar.bz2#2ad11624aec829f58f86a231bbdf3990 https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.12.2-py38h5c078b8_0.tar.bz2#33787719ad03d33cffc4e2e3ea82bc9e https://conda.anaconda.org/conda-forge/noarch/seaborn-0.12.2-hd8ed1ab_0.conda#50847a47c07812f88581081c620f5160 # pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/50/ac/c105ed3e0a00b14b28c0aa630935af858fd8a32affeff19574b16e2c6ae8/sphinxext_opengraph-0.4.2-py3-none-any.whl#sha256=a51f2604f9a5b6c0d25d3a88e694d5c02e20812dc0e482adf96c8628f9109357 diff --git a/examples/classification/plot_lda_qda.py b/examples/classification/plot_lda_qda.py index 71230d0a9bcd9..0691f52390a06 100644 --- a/examples/classification/plot_lda_qda.py +++ b/examples/classification/plot_lda_qda.py @@ -3,135 +3,94 @@ Linear and Quadratic Discriminant Analysis with covariance ellipsoid ==================================================================== -This example plots the covariance ellipsoids of each class and -decision boundary learned by LDA and QDA. The ellipsoids display -the double standard deviation for each class. With LDA, the -standard deviation is the same for all the classes, while each -class has its own standard deviation with QDA. - +This example plots the covariance ellipsoids of each class and the decision boundary +learned by :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis` (LDA) and +:class:`~sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis` (QDA). The +ellipsoids display the double standard deviation for each class. With LDA, the standard +deviation is the same for all the classes, while each class has its own standard +deviation with QDA. """ # %% -# Colormap -# -------- - -import matplotlib as mpl -import matplotlib.pyplot as plt -from matplotlib import colors - -cmap = colors.LinearSegmentedColormap( - "red_blue_classes", - { - "red": [(0, 1, 1), (1, 0.7, 0.7)], - "green": [(0, 0.7, 0.7), (1, 0.7, 0.7)], - "blue": [(0, 0.7, 0.7), (1, 1, 1)], - }, -) -plt.cm.register_cmap(cmap=cmap) - - -# %% -# Datasets generation functions -# ----------------------------- - +# Data generation +# --------------- +# +# First, we define a function to generate synthetic data. It creates two blobs centered +# at `(0, 0)` and `(1, 1)`. Each blob is assigned a specific class. The dispersion of +# the blob is controlled by the parameters `cov_class_1` and `cov_class_2`, that are the +# covariance matrices used when generating the samples from the Gaussian distributions. import numpy as np -def dataset_fixed_cov(): - """Generate 2 Gaussians samples with the same covariance matrix""" - n, dim = 300, 2 - np.random.seed(0) - C = np.array([[0.0, -0.23], [0.83, 0.23]]) - X = np.r_[ - np.dot(np.random.randn(n, dim), C), - np.dot(np.random.randn(n, dim), C) + np.array([1, 1]), - ] - y = np.hstack((np.zeros(n), np.ones(n))) +def make_data(n_samples, n_features, cov_class_1, cov_class_2, seed=0): + rng = np.random.RandomState(seed) + X = np.concatenate( + [ + rng.randn(n_samples, n_features) @ cov_class_1, + rng.randn(n_samples, n_features) @ cov_class_2 + np.array([1, 1]), + ] + ) + y = np.concatenate([np.zeros(n_samples), np.ones(n_samples)]) return X, y -def dataset_cov(): - """Generate 2 Gaussians samples with different covariance matrices""" - n, dim = 300, 2 - np.random.seed(0) - C = np.array([[0.0, -1.0], [2.5, 0.7]]) * 2.0 - X = np.r_[ - np.dot(np.random.randn(n, dim), C), - np.dot(np.random.randn(n, dim), C.T) + np.array([1, 4]), - ] - y = np.hstack((np.zeros(n), np.ones(n))) - return X, y +# %% +# We generate three datasets. In the first dataset, the two classes share the same +# covariance matrix, and this covariance matrix has the specificity of being spherical +# (isotropic). The second dataset is similar to the first one but does not enforce the +# covariance to be spherical. Finally, the third dataset has a non-spherical covariance +# matrix for each class. +covariance = np.array([[1, 0], [0, 1]]) +X_isotropic_covariance, y_isotropic_covariance = make_data( + n_samples=1_000, + n_features=2, + cov_class_1=covariance, + cov_class_2=covariance, + seed=0, +) +covariance = np.array([[0.0, -0.23], [0.83, 0.23]]) +X_shared_covariance, y_shared_covariance = make_data( + n_samples=300, + n_features=2, + cov_class_1=covariance, + cov_class_2=covariance, + seed=0, +) +cov_class_1 = np.array([[0.0, -1.0], [2.5, 0.7]]) * 2.0 +cov_class_2 = cov_class_1.T +X_different_covariance, y_different_covariance = make_data( + n_samples=300, + n_features=2, + cov_class_1=cov_class_1, + cov_class_2=cov_class_2, + seed=0, +) # %% -# Plot functions -# -------------- - -from scipy import linalg - - -def plot_data(lda, X, y, y_pred, fig_index): - splot = plt.subplot(2, 2, fig_index) - if fig_index == 1: - plt.title("Linear Discriminant Analysis") - plt.ylabel("Data with\n fixed covariance") - elif fig_index == 2: - plt.title("Quadratic Discriminant Analysis") - elif fig_index == 3: - plt.ylabel("Data with\n varying covariances") - - tp = y == y_pred # True Positive - tp0, tp1 = tp[y == 0], tp[y == 1] - X0, X1 = X[y == 0], X[y == 1] - X0_tp, X0_fp = X0[tp0], X0[~tp0] - X1_tp, X1_fp = X1[tp1], X1[~tp1] - - # class 0: dots - plt.scatter(X0_tp[:, 0], X0_tp[:, 1], marker=".", color="red") - plt.scatter(X0_fp[:, 0], X0_fp[:, 1], marker="x", s=20, color="#990000") # dark red - - # class 1: dots - plt.scatter(X1_tp[:, 0], X1_tp[:, 1], marker=".", color="blue") - plt.scatter( - X1_fp[:, 0], X1_fp[:, 1], marker="x", s=20, color="#000099" - ) # dark blue - - # class 0 and 1 : areas - nx, ny = 200, 100 - x_min, x_max = plt.xlim() - y_min, y_max = plt.ylim() - xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx), np.linspace(y_min, y_max, ny)) - Z = lda.predict_proba(np.c_[xx.ravel(), yy.ravel()]) - Z = Z[:, 1].reshape(xx.shape) - plt.pcolormesh( - xx, yy, Z, cmap="red_blue_classes", norm=colors.Normalize(0.0, 1.0), zorder=0 - ) - plt.contour(xx, yy, Z, [0.5], linewidths=2.0, colors="white") - - # means - plt.plot( - lda.means_[0][0], - lda.means_[0][1], - "*", - color="yellow", - markersize=15, - markeredgecolor="grey", - ) - plt.plot( - lda.means_[1][0], - lda.means_[1][1], - "*", - color="yellow", - markersize=15, - markeredgecolor="grey", - ) +# Plotting Functions +# ------------------ +# +# The code below is used to plot several pieces of information from the estimators used, +# i.e., :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis` (LDA) and +# :class:`~sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis` (QDA). The +# displayed information includes: +# +# - the decision boundary based on the probability estimate of the estimator; +# - a scatter plot with circles representing the well-classified samples; +# - a scatter plot with crosses representing the misclassified samples; +# - the mean of each class, estimated by the estimator, marked with a star; +# - the estimated covariance represented by an ellipse at 2 standard deviations from the +# mean. +import matplotlib as mpl +from matplotlib import colors - return splot +from sklearn.inspection import DecisionBoundaryDisplay -def plot_ellipse(splot, mean, cov, color): - v, w = linalg.eigh(cov) - u = w[0] / linalg.norm(w[0]) +def plot_ellipse(mean, cov, color, ax): + v, w = np.linalg.eigh(cov) + u = w[0] / np.linalg.norm(w[0]) angle = np.arctan(u[1] / u[0]) angle = 180 * angle / np.pi # convert to degrees # filled Gaussian at 2 standard deviation @@ -144,54 +103,123 @@ def plot_ellipse(splot, mean, cov, color): edgecolor="black", linewidth=2, ) - ell.set_clip_box(splot.bbox) - ell.set_alpha(0.2) - splot.add_artist(ell) - splot.set_xticks(()) - splot.set_yticks(()) - - -def plot_lda_cov(lda, splot): - plot_ellipse(splot, lda.means_[0], lda.covariance_, "red") - plot_ellipse(splot, lda.means_[1], lda.covariance_, "blue") + ell.set_clip_box(ax.bbox) + ell.set_alpha(0.4) + ax.add_artist(ell) + + +def plot_result(estimator, X, y, ax): + cmap = colors.ListedColormap(["tab:red", "tab:blue"]) + DecisionBoundaryDisplay.from_estimator( + estimator, + X, + response_method="predict_proba", + plot_method="pcolormesh", + ax=ax, + cmap="RdBu", + alpha=0.3, + ) + DecisionBoundaryDisplay.from_estimator( + estimator, + X, + response_method="predict_proba", + plot_method="contour", + ax=ax, + alpha=1.0, + levels=[0.5], + ) + y_pred = estimator.predict(X) + X_right, y_right = X[y == y_pred], y[y == y_pred] + X_wrong, y_wrong = X[y != y_pred], y[y != y_pred] + ax.scatter(X_right[:, 0], X_right[:, 1], c=y_right, s=20, cmap=cmap, alpha=0.5) + ax.scatter( + X_wrong[:, 0], + X_wrong[:, 1], + c=y_wrong, + s=30, + cmap=cmap, + alpha=0.9, + marker="x", + ) + ax.scatter( + estimator.means_[:, 0], + estimator.means_[:, 1], + c="yellow", + s=200, + marker="*", + edgecolor="black", + ) + if isinstance(estimator, LinearDiscriminantAnalysis): + covariance = [estimator.covariance_] * 2 + else: + covariance = estimator.covariance_ + plot_ellipse(estimator.means_[0], covariance[0], "tab:red", ax) + plot_ellipse(estimator.means_[1], covariance[1], "tab:blue", ax) -def plot_qda_cov(qda, splot): - plot_ellipse(splot, qda.means_[0], qda.covariance_[0], "red") - plot_ellipse(splot, qda.means_[1], qda.covariance_[1], "blue") + ax.set_box_aspect(1) + ax.spines["top"].set_visible(False) + ax.spines["bottom"].set_visible(False) + ax.spines["left"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.set(xticks=[], yticks=[]) # %% -# Plot -# ---- - -plt.figure(figsize=(10, 8), facecolor="white") -plt.suptitle( - "Linear Discriminant Analysis vs Quadratic Discriminant Analysis", - y=0.98, - fontsize=15, -) +# Comparison of LDA and QDA +# ------------------------- +# +# We compare the two estimators LDA and QDA on all three datasets. +import matplotlib.pyplot as plt from sklearn.discriminant_analysis import ( LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis, ) -for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]): - # Linear Discriminant Analysis - lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True) - y_pred = lda.fit(X, y).predict(X) - splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1) - plot_lda_cov(lda, splot) - plt.axis("tight") - - # Quadratic Discriminant Analysis - qda = QuadraticDiscriminantAnalysis(store_covariance=True) - y_pred = qda.fit(X, y).predict(X) - splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2) - plot_qda_cov(qda, splot) - plt.axis("tight") - -plt.tight_layout() -plt.subplots_adjust(top=0.92) +fig, axs = plt.subplots(nrows=3, ncols=2, sharex="row", sharey="row", figsize=(8, 12)) + +lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True) +qda = QuadraticDiscriminantAnalysis(store_covariance=True) + +for ax_row, X, y in zip( + axs, + (X_isotropic_covariance, X_shared_covariance, X_different_covariance), + (y_isotropic_covariance, y_shared_covariance, y_different_covariance), +): + lda.fit(X, y) + plot_result(lda, X, y, ax_row[0]) + qda.fit(X, y) + plot_result(qda, X, y, ax_row[1]) + +axs[0, 0].set_title("Linear Discriminant Analysis") +axs[0, 0].set_ylabel("Data with fixed and spherical covariance") +axs[1, 0].set_ylabel("Data with fixed covariance") +axs[0, 1].set_title("Quadratic Discriminant Analysis") +axs[2, 0].set_ylabel("Data with varying covariances") +fig.suptitle( + "Linear Discriminant Analysis vs Quadratic Discriminant Analysis", + y=0.94, + fontsize=15, +) plt.show() + +# %% +# The first important thing to notice is that LDA and QDA are equivalent for the +# first and second datasets. Indeed, the major difference is that LDA assumes +# that the covariance matrix of each class is equal, while QDA estimates a +# covariance matrix per class. Since in these cases the data generative process +# has the same covariance matrix for both classes, QDA estimates two covariance +# matrices that are (almost) equal and therefore equivalent to the covariance +# matrix estimated by LDA. +# +# In the first dataset the covariance matrix used to generate the dataset is +# spherical, which results in a discriminant boundary that aligns with the +# perpendicular bisector between the two means. This is no longer the case for +# the second dataset. The discriminant boundary only passes through the middle +# of the two means. +# +# Finally, in the third dataset, we observe the real difference between LDA and +# QDA. QDA fits two covariance matrices and provides a non-linear discriminant +# boundary, whereas LDA underfits since it assumes that both classes share a +# single covariance matrix. diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py index e68b086993f4a..703419ac97767 100644 --- a/sklearn/_min_dependencies.py +++ b/sklearn/_min_dependencies.py @@ -29,7 +29,7 @@ "joblib": (JOBLIB_MIN_VERSION, "install"), "threadpoolctl": (THREADPOOLCTL_MIN_VERSION, "install"), "cython": (CYTHON_MIN_VERSION, "build"), - "matplotlib": ("3.1.3", "benchmark, docs, examples, tests"), + "matplotlib": ("3.3.4", "benchmark, docs, examples, tests"), "scikit-image": ("0.16.2", "docs, examples, tests"), "pandas": ("1.0.5", "benchmark, docs, examples, tests"), "seaborn": ("0.9.0", "docs, examples"), From 8477d63e1f823c212936d5c60473cd85021f53fb Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 28 Sep 2023 13:02:14 +0200 Subject: [PATCH 08/15] MAINT cosmetic improvement in _non_trivial_radius test helper (#27486) --- sklearn/metrics/tests/test_pairwise_distances_reduction.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/tests/test_pairwise_distances_reduction.py b/sklearn/metrics/tests/test_pairwise_distances_reduction.py index 75f497315ff01..fee8f1ab2c27d 100644 --- a/sklearn/metrics/tests/test_pairwise_distances_reduction.py +++ b/sklearn/metrics/tests/test_pairwise_distances_reduction.py @@ -228,8 +228,10 @@ def _non_trivial_radius( # on average. Yielding too many results would make the test slow (because # checking the results is expensive for large result sets), yielding 0 most # of the time would make the test useless. - if precomputed_dists is None and metric is None: - raise ValueError("Either metric or dists must be provided") + assert ( + precomputed_dists is not None or metric is not None + ), "Either metric or precomputed_dists must be provided." + if precomputed_dists is None: assert X is not None assert Y is not None From 2a548da593ab7a8f1c7b0af8985de8935bc3be98 Mon Sep 17 00:00:00 2001 From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com> Date: Thu, 28 Sep 2023 19:19:36 +0800 Subject: [PATCH 09/15] FIX make dataset fetchers accept `os.Pathlike` for `data_home` (#27468) Co-authored-by: Guillaume Lemaitre --- doc/whats_new/v1.4.rst | 4 ++++ sklearn/datasets/_base.py | 4 ++-- sklearn/datasets/_california_housing.py | 6 +++--- sklearn/datasets/_covtype.py | 4 ++-- sklearn/datasets/_kddcup99.py | 4 ++-- sklearn/datasets/_lfw.py | 10 +++++----- sklearn/datasets/_olivetti_faces.py | 6 +++--- sklearn/datasets/_openml.py | 6 +++--- sklearn/datasets/_rcv1.py | 6 +++--- sklearn/datasets/_species_distributions.py | 6 +++--- sklearn/datasets/_twenty_newsgroups.py | 8 ++++---- sklearn/datasets/tests/test_base.py | 18 +++++++++++++++++- 12 files changed, 51 insertions(+), 31 deletions(-) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index d4f92548ba0ac..a9ea738beca91 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -182,6 +182,10 @@ Changelog which returns a dense numpy ndarray as before. :pr:`27438` by :user:`Yao Xiao `. +- |Fix| All dataset fetchers now accept `data_home` as any object that implements + the :class:`os.PathLike` interface, for instance, :class:`pathlib.Path`. + :pr:`27468` by :user:`Yao Xiao `. + :mod:`sklearn.decomposition` ............................ diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py index b2d198ecf8c2f..5675798137824 100644 --- a/sklearn/datasets/_base.py +++ b/sklearn/datasets/_base.py @@ -57,7 +57,7 @@ def get_data_home(data_home=None) -> str: ---------- data_home : str or path-like, default=None The path to scikit-learn data directory. If `None`, the default path - is `~/sklearn_learn_data`. + is `~/scikit_learn_data`. Returns ------- @@ -84,7 +84,7 @@ def clear_data_home(data_home=None): ---------- data_home : str or path-like, default=None The path to scikit-learn data directory. If `None`, the default path - is `~/sklearn_learn_data`. + is `~/scikit_learn_data`. """ data_home = get_data_home(data_home) shutil.rmtree(data_home) diff --git a/sklearn/datasets/_california_housing.py b/sklearn/datasets/_california_housing.py index b48e7e10bdc4b..3153f0dd03f72 100644 --- a/sklearn/datasets/_california_housing.py +++ b/sklearn/datasets/_california_housing.py @@ -23,7 +23,7 @@ import logging import tarfile -from os import makedirs, remove +from os import PathLike, makedirs, remove from os.path import exists import joblib @@ -53,7 +53,7 @@ @validate_params( { - "data_home": [str, None], + "data_home": [str, PathLike, None], "download_if_missing": ["boolean"], "return_X_y": ["boolean"], "as_frame": ["boolean"], @@ -76,7 +76,7 @@ def fetch_california_housing( Parameters ---------- - data_home : str, default=None + data_home : str or path-like, default=None Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py index 557899bc88e97..7620e08c5ec92 100644 --- a/sklearn/datasets/_covtype.py +++ b/sklearn/datasets/_covtype.py @@ -65,7 +65,7 @@ @validate_params( { - "data_home": [str, None], + "data_home": [str, os.PathLike, None], "download_if_missing": ["boolean"], "random_state": ["random_state"], "shuffle": ["boolean"], @@ -98,7 +98,7 @@ def fetch_covtype( Parameters ---------- - data_home : str, default=None + data_home : str or path-like, default=None Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. diff --git a/sklearn/datasets/_kddcup99.py b/sklearn/datasets/_kddcup99.py index 17c49161c3bc2..444bd01737901 100644 --- a/sklearn/datasets/_kddcup99.py +++ b/sklearn/datasets/_kddcup99.py @@ -50,7 +50,7 @@ @validate_params( { "subset": [StrOptions({"SA", "SF", "http", "smtp"}), None], - "data_home": [str, None], + "data_home": [str, os.PathLike, None], "shuffle": ["boolean"], "random_state": ["random_state"], "percent10": ["boolean"], @@ -92,7 +92,7 @@ def fetch_kddcup99( To return the corresponding classical subsets of kddcup 99. If None, return the entire kddcup 99 dataset. - data_home : str, default=None + data_home : str or path-like, default=None Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py index 345f56e89a03b..d06d29f21d0a5 100644 --- a/sklearn/datasets/_lfw.py +++ b/sklearn/datasets/_lfw.py @@ -10,7 +10,7 @@ import logging from numbers import Integral, Real -from os import listdir, makedirs, remove +from os import PathLike, listdir, makedirs, remove from os.path import exists, isdir, join import numpy as np @@ -234,7 +234,7 @@ def _fetch_lfw_people( @validate_params( { - "data_home": [str, None], + "data_home": [str, PathLike, None], "funneled": ["boolean"], "resize": [Interval(Real, 0, None, closed="neither"), None], "min_faces_per_person": [Interval(Integral, 0, None, closed="left"), None], @@ -272,7 +272,7 @@ def fetch_lfw_people( Parameters ---------- - data_home : str, default=None + data_home : str or path-like, default=None Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. @@ -431,7 +431,7 @@ def _fetch_lfw_pairs( @validate_params( { "subset": [StrOptions({"train", "test", "10_folds"})], - "data_home": [str, None], + "data_home": [str, PathLike, None], "funneled": ["boolean"], "resize": [Interval(Real, 0, None, closed="neither"), None], "color": ["boolean"], @@ -480,7 +480,7 @@ def fetch_lfw_pairs( official evaluation set that is meant to be used with a 10-folds cross validation. - data_home : str, default=None + data_home : str or path-like, default=None Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. diff --git a/sklearn/datasets/_olivetti_faces.py b/sklearn/datasets/_olivetti_faces.py index 51710faccc417..8e1b3c91e254b 100644 --- a/sklearn/datasets/_olivetti_faces.py +++ b/sklearn/datasets/_olivetti_faces.py @@ -13,7 +13,7 @@ # Copyright (c) 2011 David Warde-Farley # License: BSD 3 clause -from os import makedirs, remove +from os import PathLike, makedirs, remove from os.path import exists import joblib @@ -36,7 +36,7 @@ @validate_params( { - "data_home": [str, None], + "data_home": [str, PathLike, None], "shuffle": ["boolean"], "random_state": ["random_state"], "download_if_missing": ["boolean"], @@ -67,7 +67,7 @@ def fetch_olivetti_faces( Parameters ---------- - data_home : str, default=None + data_home : str or path-like, default=None Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. diff --git a/sklearn/datasets/_openml.py b/sklearn/datasets/_openml.py index 1c36dc8a25ce1..c9d09dc3ce46a 100644 --- a/sklearn/datasets/_openml.py +++ b/sklearn/datasets/_openml.py @@ -749,7 +749,7 @@ def _valid_data_column_names(features_list, target_columns): "name": [str, None], "version": [Interval(Integral, 1, None, closed="left"), StrOptions({"active"})], "data_id": [Interval(Integral, 1, None, closed="left"), None], - "data_home": [str, None], + "data_home": [str, os.PathLike, None], "target_column": [str, list, None], "cache": [bool], "return_X_y": [bool], @@ -769,7 +769,7 @@ def fetch_openml( *, version: Union[str, int] = "active", data_id: Optional[int] = None, - data_home: Optional[str] = None, + data_home: Optional[Union[str, os.PathLike]] = None, target_column: Optional[Union[str, List]] = "default-target", cache: bool = True, return_X_y: bool = False, @@ -815,7 +815,7 @@ def fetch_openml( dataset. If data_id is not given, name (and potential version) are used to obtain a dataset. - data_home : str, default=None + data_home : str or path-like, default=None Specify another download and cache folder for the data sets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. diff --git a/sklearn/datasets/_rcv1.py b/sklearn/datasets/_rcv1.py index a807d8e311466..d9f392d872216 100644 --- a/sklearn/datasets/_rcv1.py +++ b/sklearn/datasets/_rcv1.py @@ -10,7 +10,7 @@ import logging from gzip import GzipFile -from os import makedirs, remove +from os import PathLike, makedirs, remove from os.path import exists, join import joblib @@ -74,7 +74,7 @@ @validate_params( { - "data_home": [str, None], + "data_home": [str, PathLike, None], "subset": [StrOptions({"train", "test", "all"})], "download_if_missing": ["boolean"], "random_state": ["random_state"], @@ -111,7 +111,7 @@ def fetch_rcv1( Parameters ---------- - data_home : str, default=None + data_home : str or path-like, default=None Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. diff --git a/sklearn/datasets/_species_distributions.py b/sklearn/datasets/_species_distributions.py index 0bfc4bb0fdaf5..a1e654d41e071 100644 --- a/sklearn/datasets/_species_distributions.py +++ b/sklearn/datasets/_species_distributions.py @@ -39,7 +39,7 @@ import logging from io import BytesIO -from os import makedirs, remove +from os import PathLike, makedirs, remove from os.path import exists import joblib @@ -136,7 +136,7 @@ def construct_grids(batch): @validate_params( - {"data_home": [str, None], "download_if_missing": ["boolean"]}, + {"data_home": [str, PathLike, None], "download_if_missing": ["boolean"]}, prefer_skip_nested_validation=True, ) def fetch_species_distributions(*, data_home=None, download_if_missing=True): @@ -146,7 +146,7 @@ def fetch_species_distributions(*, data_home=None, download_if_missing=True): Parameters ---------- - data_home : str, default=None + data_home : str or path-like, default=None Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. diff --git a/sklearn/datasets/_twenty_newsgroups.py b/sklearn/datasets/_twenty_newsgroups.py index 637cf8e4fc8d4..5973e998c34b9 100644 --- a/sklearn/datasets/_twenty_newsgroups.py +++ b/sklearn/datasets/_twenty_newsgroups.py @@ -153,7 +153,7 @@ def strip_newsgroup_footer(text): @validate_params( { - "data_home": [str, None], + "data_home": [str, os.PathLike, None], "subset": [StrOptions({"train", "test", "all"})], "categories": ["array-like", None], "shuffle": ["boolean"], @@ -191,7 +191,7 @@ def fetch_20newsgroups( Parameters ---------- - data_home : str, default=None + data_home : str or path-like, default=None Specify a download and cache folder for the datasets. If None, all scikit-learn data is stored in '~/scikit_learn_data' subfolders. @@ -351,7 +351,7 @@ def fetch_20newsgroups( { "subset": [StrOptions({"train", "test", "all"})], "remove": [tuple], - "data_home": [str, None], + "data_home": [str, os.PathLike, None], "download_if_missing": ["boolean"], "return_X_y": ["boolean"], "normalize": ["boolean"], @@ -411,7 +411,7 @@ def fetch_20newsgroups_vectorized( ends of posts that look like signatures, and 'quotes' removes lines that appear to be quoting another post. - data_home : str, default=None + data_home : str or path-like, default=None Specify an download and cache folder for the datasets. If None, all scikit-learn data is stored in '~/scikit_learn_data' subfolders. diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index f31f20636c0c1..f84c275d67cf9 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -3,6 +3,7 @@ import tempfile import warnings from functools import partial +from pathlib import Path from pickle import dumps, loads import numpy as np @@ -31,6 +32,16 @@ from sklearn.utils.fixes import _is_resource +class _DummyPath: + """Minimal class that implements the os.PathLike interface.""" + + def __init__(self, path): + self.path = path + + def __fspath__(self): + return self.path + + def _remove_dir(path): if os.path.isdir(path): shutil.rmtree(path) @@ -67,13 +78,18 @@ def test_category_dir_2(load_files_root): _remove_dir(test_category_dir2) -def test_data_home(data_home): +@pytest.mark.parametrize("path_container", [None, Path, _DummyPath]) +def test_data_home(path_container, data_home): # get_data_home will point to a pre-existing folder + if path_container is not None: + data_home = path_container(data_home) data_home = get_data_home(data_home=data_home) assert data_home == data_home assert os.path.exists(data_home) # clear_data_home will delete both the content and the folder it-self + if path_container is not None: + data_home = path_container(data_home) clear_data_home(data_home=data_home) assert not os.path.exists(data_home) From 751ccc0a4d4ca832ca2f1fa5aa45eff390fd8027 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 28 Sep 2023 23:57:20 +0200 Subject: [PATCH 10/15] CI Run test suite inside Pyodide (#27346) Co-authored-by: Thomas J. Fan --- azure-pipelines.yml | 10 ++-- build_tools/azure/install_pyodide.sh | 8 ++- build_tools/azure/pytest-pyodide.js | 53 +++++++++++++++++++ build_tools/azure/test_script_pyodide.sh | 16 ++---- sklearn/_loss/tests/test_loss.py | 5 +- .../test_enable_hist_gradient_boosting.py | 4 ++ .../tests/test_enable_iterative_imputer.py | 4 ++ .../tests/test_enable_successive_halving.py | 4 ++ sklearn/feature_extraction/tests/test_text.py | 9 +++- sklearn/tests/test_common.py | 3 +- sklearn/tests/test_config.py | 2 + sklearn/tests/test_discriminant_analysis.py | 9 +++- sklearn/utils/__init__.py | 1 + sklearn/utils/tests/test_sparsefuncs.py | 13 +++++ sklearn/utils/tests/test_testing.py | 2 + 15 files changed, 118 insertions(+), 25 deletions(-) create mode 100644 build_tools/azure/pytest-pyodide.js diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 464096fb69c29..0304df24b8504 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -127,11 +127,11 @@ jobs: vmImage: ubuntu-22.04 variables: # Need to match Python version and Emscripten version for the correct - # Pyodide version. For example, for Pyodide version 0.23.4, see - # https://github.com/pyodide/pyodide/blob/0.23.4/Makefile.envs - PYODIDE_VERSION: '0.23.4' - EMSCRIPTEN_VERSION: '3.1.32' - PYTHON_VERSION: '3.11.2' + # Pyodide version. For example, for Pyodide version 0.24.1, see + # https://github.com/pyodide/pyodide/blob/0.24.1/Makefile.envs + PYODIDE_VERSION: '0.24.1' + EMSCRIPTEN_VERSION: '3.1.45' + PYTHON_VERSION: '3.11.3' dependsOn: [git_commit, linting] condition: | diff --git a/build_tools/azure/install_pyodide.sh b/build_tools/azure/install_pyodide.sh index 8bcfe45ef4152..58d0348a53202 100644 --- a/build_tools/azure/install_pyodide.sh +++ b/build_tools/azure/install_pyodide.sh @@ -15,8 +15,6 @@ pyodide build ls -ltrh dist -pyodide venv pyodide-venv -source pyodide-venv/bin/activate - -pip install dist/*.whl -pip list +# The Pyodide js library is needed by build_tools/azure/test_script_pyodide.sh +# to run tests inside Pyodide +npm install pyodide@$PYODIDE_VERSION diff --git a/build_tools/azure/pytest-pyodide.js b/build_tools/azure/pytest-pyodide.js new file mode 100644 index 0000000000000..c195940ce3b5b --- /dev/null +++ b/build_tools/azure/pytest-pyodide.js @@ -0,0 +1,53 @@ +const { opendir } = require('node:fs/promises'); +const { loadPyodide } = require("pyodide"); + +async function main() { + let exit_code = 0; + try { + global.pyodide = await loadPyodide(); + let pyodide = global.pyodide; + const FS = pyodide.FS; + const NODEFS = FS.filesystems.NODEFS; + + let mountDir = "/mnt"; + pyodide.FS.mkdir(mountDir); + pyodide.FS.mount(pyodide.FS.filesystems.NODEFS, { root: "." }, mountDir); + + await pyodide.loadPackage(["micropip"]); + await pyodide.runPythonAsync(` + import glob + import micropip + + wheels = glob.glob('/mnt/dist/*.whl') + wheels = [f'emfs://{wheel}' for wheel in wheels] + print(f'installing wheels: {wheels}') + await micropip.install(wheels); + + pkg_list = micropip.list() + print(pkg_list) + `); + + // Pyodide is built without OpenMP, need to set environment variable to + // skip related test + await pyodide.runPythonAsync(` + import os + os.environ['SKLEARN_SKIP_OPENMP_TEST'] = 'true' + `); + + await pyodide.runPythonAsync("import micropip; micropip.install('pytest')"); + let pytest = pyodide.pyimport("pytest"); + let args = process.argv.slice(2); + console.log('pytest args:', args); + exit_code = pytest.main(pyodide.toPy(args)); + } catch (e) { + console.error(e); + // Arbitrary exit code here. I have seen this code reached instead of a + // Pyodide fatal error sometimes + exit_code = 66; + + } finally { + process.exit(exit_code); + } +} + +main(); diff --git a/build_tools/azure/test_script_pyodide.sh b/build_tools/azure/test_script_pyodide.sh index 69dea9c41eaf5..d1aa207f864a2 100644 --- a/build_tools/azure/test_script_pyodide.sh +++ b/build_tools/azure/test_script_pyodide.sh @@ -2,14 +2,8 @@ set -e -source pyodide-venv/bin/activate - -pip list - -# Need to be outside of the git clone otherwise finds non build sklearn folder -cd /tmp - -# TODO for now only testing sklearn import to make sure the wheel is not badly -# broken. When Pyodide 0.24 is released we should run the full test suite and -# xfail tests that fail due to Pyodide limitations -python -c 'import sklearn' +# We are using a pytest js wrapper script to run tests inside Pyodide. Maybe +# one day we can use a Pyodide venv instead but at the time of writing +# (2023-09-27) there is an issue with scipy.linalg in a Pyodide venv, see +# https://github.com/pyodide/pyodide/issues/3865 for more details. +node build_tools/azure/pytest-pyodide.js --pyargs sklearn --durations 20 --showlocals diff --git a/sklearn/_loss/tests/test_loss.py b/sklearn/_loss/tests/test_loss.py index d279a2f06a182..444b5e1d65406 100644 --- a/sklearn/_loss/tests/test_loss.py +++ b/sklearn/_loss/tests/test_loss.py @@ -27,7 +27,7 @@ HuberLoss, PinballLoss, ) -from sklearn.utils import assert_all_finite +from sklearn.utils import _IS_WASM, assert_all_finite from sklearn.utils._testing import create_memmap_backed_data, skip_if_32bit ALL_LOSSES = list(_LOSSES.values()) @@ -286,6 +286,9 @@ def test_loss_dtype( Also check that input arrays can be readonly, e.g. memory mapped. """ + if _IS_WASM and readonly_memmap: # pragma: nocover + pytest.xfail(reason="memmap not fully supported") + loss = loss() # generate a y_true and raw_prediction in valid range n_samples = 5 diff --git a/sklearn/experimental/tests/test_enable_hist_gradient_boosting.py b/sklearn/experimental/tests/test_enable_hist_gradient_boosting.py index 8ea365fed6e59..6e0b50c18e0ae 100644 --- a/sklearn/experimental/tests/test_enable_hist_gradient_boosting.py +++ b/sklearn/experimental/tests/test_enable_hist_gradient_boosting.py @@ -2,9 +2,13 @@ import textwrap +import pytest + +from sklearn.utils import _IS_WASM from sklearn.utils._testing import assert_run_python_script +@pytest.mark.xfail(_IS_WASM, reason="cannot start subprocess") def test_import_raises_warning(): code = """ import pytest diff --git a/sklearn/experimental/tests/test_enable_iterative_imputer.py b/sklearn/experimental/tests/test_enable_iterative_imputer.py index 3f4ce37f7afcc..3044a52daf0ce 100644 --- a/sklearn/experimental/tests/test_enable_iterative_imputer.py +++ b/sklearn/experimental/tests/test_enable_iterative_imputer.py @@ -2,9 +2,13 @@ import textwrap +import pytest + +from sklearn.utils import _IS_WASM from sklearn.utils._testing import assert_run_python_script +@pytest.mark.xfail(_IS_WASM, reason="cannot start subprocess") def test_imports_strategies(): # Make sure different import strategies work or fail as expected. diff --git a/sklearn/experimental/tests/test_enable_successive_halving.py b/sklearn/experimental/tests/test_enable_successive_halving.py index 4aa695e654ccc..8c0d5ef869680 100644 --- a/sklearn/experimental/tests/test_enable_successive_halving.py +++ b/sklearn/experimental/tests/test_enable_successive_halving.py @@ -2,9 +2,13 @@ import textwrap +import pytest + +from sklearn.utils import _IS_WASM from sklearn.utils._testing import assert_run_python_script +@pytest.mark.xfail(_IS_WASM, reason="cannot start subprocess") def test_imports_strategies(): # Make sure different import strategies work or fail as expected. diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index 291e34e4b94fd..7c7cac85ccc6b 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -26,7 +26,7 @@ from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split from sklearn.pipeline import Pipeline from sklearn.svm import LinearSVC -from sklearn.utils import IS_PYPY +from sklearn.utils import _IS_WASM, IS_PYPY from sklearn.utils._testing import ( assert_allclose_dense_sparse, assert_almost_equal, @@ -475,6 +475,13 @@ def test_tf_idf_smoothing(): assert (tfidf >= 0).all() +@pytest.mark.xfail( + _IS_WASM, + reason=( + "no floating point exceptions, see" + " https://github.com/numpy/numpy/pull/21895#issuecomment-1311525881" + ), +) def test_tfidf_no_smoothing(): X = [[1, 1, 1], [1, 1, 0], [1, 0, 0]] tr = TfidfTransformer(smooth_idf=False, norm="l2") diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 8b407d18f90d8..74543d46ec63f 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -60,7 +60,7 @@ StandardScaler, ) from sklearn.semi_supervised import LabelPropagation, LabelSpreading -from sklearn.utils import IS_PYPY, all_estimators +from sklearn.utils import _IS_WASM, IS_PYPY, all_estimators from sklearn.utils._tags import _DEFAULT_TAGS, _safe_tags from sklearn.utils._testing import ( SkipTest, @@ -206,6 +206,7 @@ def test_class_weight_balanced_linear_classifiers(name, Classifier): check_class_weight_balanced_linear_classifier(name, Classifier) +@pytest.mark.xfail(_IS_WASM, reason="importlib not supported for Pyodide packages") @ignore_warnings def test_import_all_consistency(): # Smoke test to check that any name in a __all__ list is actually defined diff --git a/sklearn/tests/test_config.py b/sklearn/tests/test_config.py index 1b92d58a5f28e..18155aaa2cf69 100644 --- a/sklearn/tests/test_config.py +++ b/sklearn/tests/test_config.py @@ -6,6 +6,7 @@ import sklearn from sklearn import config_context, get_config, set_config +from sklearn.utils import _IS_WASM from sklearn.utils.parallel import Parallel, delayed @@ -138,6 +139,7 @@ def test_config_threadsafe_joblib(backend): assert items == [False, True, False, True] +@pytest.mark.xfail(_IS_WASM, reason="cannot start threads") def test_config_threadsafe(): """Uses threads directly to test that the global config does not change between threads. Same test as `test_config_threadsafe_joblib` but with diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 27e183fde43e0..b60053e04b25b 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -11,7 +11,7 @@ _cov, ) from sklearn.preprocessing import StandardScaler -from sklearn.utils import check_random_state +from sklearn.utils import _IS_WASM, check_random_state from sklearn.utils._testing import ( _convert_container, assert_allclose, @@ -591,6 +591,13 @@ def test_qda_store_covariance(): ) +@pytest.mark.xfail( + _IS_WASM, + reason=( + "no floating point exceptions, see" + " https://github.com/numpy/numpy/pull/21895#issuecomment-1311525881" + ), +) def test_qda_regularization(): # The default is reg_param=0. and will cause issues when there is a # constant variable. diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 44d49abb1304b..8b97fcf8ebfcb 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -77,6 +77,7 @@ IS_PYPY = platform.python_implementation() == "PyPy" _IS_32BIT = 8 * struct.calcsize("P") == 32 +_IS_WASM = platform.machine() in ["wasm32", "wasm64"] def _in_unstable_openblas_configuration(): diff --git a/sklearn/utils/tests/test_sparsefuncs.py b/sklearn/utils/tests/test_sparsefuncs.py index c1ea6d97bbd3b..019f413a276d1 100644 --- a/sklearn/utils/tests/test_sparsefuncs.py +++ b/sklearn/utils/tests/test_sparsefuncs.py @@ -6,6 +6,7 @@ from scipy import linalg from sklearn.datasets import make_classification +from sklearn.utils import _IS_WASM from sklearn.utils._testing import assert_allclose from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS, LIL_CONTAINERS from sklearn.utils.sparsefuncs import ( @@ -794,6 +795,18 @@ def test_min_max( dtype=dtype, ) X_sparse = sparse_format(X) + + if ( + _IS_WASM and large_indices and isinstance(X_sparse, sp.sparray) + ): # pragma: nocover + pytest.xfail( + reason=( + "temporary xfailing test until it is fixed in main, see" + " https://github.com/scikit-learn/scikit-learn/issues/27470 for more" + " details." + ) + ) + if large_indices: X_sparse.indices = X_sparse.indices.astype("int64") X_sparse.indptr = X_sparse.indptr.astype("int64") diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py index 2b0db8f2e675c..a62b844653612 100644 --- a/sklearn/utils/tests/test_testing.py +++ b/sklearn/utils/tests/test_testing.py @@ -9,6 +9,7 @@ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.tree import DecisionTreeClassifier +from sklearn.utils import _IS_WASM from sklearn.utils._testing import ( TempMemmap, _convert_container, @@ -607,6 +608,7 @@ def test_tempmemmap(monkeypatch): assert registration_counter.nb_calls == 2 +@pytest.mark.xfail(_IS_WASM, reason="memmap not fully supported") @pytest.mark.parametrize("aligned", [False, True]) def test_create_memmap_backed_data(monkeypatch, aligned): registration_counter = RegistrationCounter() From 3aa105426365fe7aa6ce1e4fc10edb4256bd5c89 Mon Sep 17 00:00:00 2001 From: Kushan Sharma <50699013+greyisbetter@users.noreply.github.com> Date: Fri, 29 Sep 2023 12:25:10 +0530 Subject: [PATCH 11/15] DOC Add link to Early Stopping example in Gradient Boosting (#27025) Co-authored-by: Adrin Jalali --- doc/modules/ensemble.rst | 6 ++++-- examples/ensemble/plot_gradient_boosting_early_stopping.py | 2 +- sklearn/ensemble/_gb.py | 4 ++++ sklearn/linear_model/_passive_aggressive.py | 6 +++--- sklearn/linear_model/_perceptron.py | 6 +++--- sklearn/neural_network/_multilayer_perceptron.py | 2 +- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 6047cd5bc7511..2d1f05f230ed4 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -740,8 +740,10 @@ of ``learning_rate`` require larger numbers of weak learners to maintain a constant training error. Empirical evidence suggests that small values of ``learning_rate`` favor better test error. [HTF]_ recommend to set the learning rate to a small constant -(e.g. ``learning_rate <= 0.1``) and choose ``n_estimators`` by early -stopping. For a more detailed discussion of the interaction between +(e.g. ``learning_rate <= 0.1``) and choose ``n_estimators`` large enough +that early stopping applies, +see :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_early_stopping.py` +for a more detailed discussion of the interaction between ``learning_rate`` and ``n_estimators`` see [R2007]_. Subsampling diff --git a/examples/ensemble/plot_gradient_boosting_early_stopping.py b/examples/ensemble/plot_gradient_boosting_early_stopping.py index f271f80a07c55..e8514fe2aff87 100644 --- a/examples/ensemble/plot_gradient_boosting_early_stopping.py +++ b/examples/ensemble/plot_gradient_boosting_early_stopping.py @@ -1,6 +1,6 @@ """ =================================== -Early stopping of Gradient Boosting +Early stopping in Gradient Boosting =================================== Gradient boosting is an ensembling technique where several weak learners diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index 057d1e2e1e054..5982f8a7fb952 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -1282,6 +1282,8 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): improving in all of the previous ``n_iter_no_change`` numbers of iterations. The split is stratified. Values must be in the range `[1, inf)`. + See + :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_early_stopping.py`. .. versionadded:: 0.20 @@ -1891,6 +1893,8 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): improving in all of the previous ``n_iter_no_change`` numbers of iterations. Values must be in the range `[1, inf)`. + See + :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_early_stopping.py`. .. versionadded:: 0.20 diff --git a/sklearn/linear_model/_passive_aggressive.py b/sklearn/linear_model/_passive_aggressive.py index d27cc928ca056..68237ade18bb5 100644 --- a/sklearn/linear_model/_passive_aggressive.py +++ b/sklearn/linear_model/_passive_aggressive.py @@ -35,11 +35,11 @@ class PassiveAggressiveClassifier(BaseSGDClassifier): .. versionadded:: 0.19 early_stopping : bool, default=False - Whether to use early stopping to terminate training when validation. + Whether to use early stopping to terminate training when validation score is not improving. If set to True, it will automatically set aside a stratified fraction of training data as validation and terminate - training when validation score is not improving by at least tol for - n_iter_no_change consecutive epochs. + training when validation score is not improving by at least `tol` for + `n_iter_no_change` consecutive epochs. .. versionadded:: 0.20 diff --git a/sklearn/linear_model/_perceptron.py b/sklearn/linear_model/_perceptron.py index 30e781983365e..eaf3da556b24a 100644 --- a/sklearn/linear_model/_perceptron.py +++ b/sklearn/linear_model/_perceptron.py @@ -68,11 +68,11 @@ class Perceptron(BaseSGDClassifier): See :term:`Glossary `. early_stopping : bool, default=False - Whether to use early stopping to terminate training when validation. + Whether to use early stopping to terminate training when validation score is not improving. If set to True, it will automatically set aside a stratified fraction of training data as validation and terminate - training when validation score is not improving by at least tol for - n_iter_no_change consecutive epochs. + training when validation score is not improving by at least `tol` for + `n_iter_no_change` consecutive epochs. .. versionadded:: 0.20 diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py index d64593c27d6f5..02303006dd91c 100644 --- a/sklearn/neural_network/_multilayer_perceptron.py +++ b/sklearn/neural_network/_multilayer_perceptron.py @@ -887,7 +887,7 @@ class MLPClassifier(ClassifierMixin, BaseMultilayerPerceptron): Whether to use early stopping to terminate training when validation score is not improving. If set to true, it will automatically set aside 10% of training data as validation and terminate training when - validation score is not improving by at least tol for + validation score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive epochs. The split is stratified, except in a multilabel setting. If early stopping is False, then the training stops when the training From 58687610afefd46524d4e7486a19960cf6960391 Mon Sep 17 00:00:00 2001 From: Iwona Zdzieblo <66373675+IwonaZ@users.noreply.github.com> Date: Fri, 29 Sep 2023 11:12:39 +0200 Subject: [PATCH 12/15] DOC add link to plot_isolation_forest.py (#26975) Co-authored-by: Guillaume Lemaitre Co-authored-by: Adrin Jalali --- sklearn/ensemble/_iforest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/ensemble/_iforest.py b/sklearn/ensemble/_iforest.py index 9371d2e4e6c5b..16d5215b7e0a8 100644 --- a/sklearn/ensemble/_iforest.py +++ b/sklearn/ensemble/_iforest.py @@ -199,6 +199,9 @@ class IsolationForest(OutlierMixin, BaseBagging): >>> clf = IsolationForest(random_state=0).fit(X) >>> clf.predict([[0.1], [0], [90]]) array([ 1, 1, -1]) + + For an example of using isolation forest for anomaly detection see + :ref:`sphx_glr_auto_examples_ensemble_plot_isolation_forest.py`. """ _parameter_constraints: dict = { From bafd46d853f2b93aa7b04cd9f9daa8a6a764faa1 Mon Sep 17 00:00:00 2001 From: Adrin Jalali Date: Fri, 29 Sep 2023 14:58:36 +0200 Subject: [PATCH 13/15] DOC add link to sklearn_is_fitted example in check_is_fitted (#26926) Co-authored-by: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> --- sklearn/utils/validation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index ac75c8afe5e5b..42b635557d2f4 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -1410,8 +1410,10 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): raises a NotFittedError with the given message. If an estimator does not set any attributes with a trailing underscore, it - can define a ``__sklearn_is_fitted__`` method returning a boolean to specify if the - estimator is fitted or not. + can define a ``__sklearn_is_fitted__`` method returning a boolean to + specify if the estimator is fitted or not. See + :ref:`sphx_glr_auto_examples_developing_estimators_sklearn_is_fitted.py` + for an example on how to use the API. Parameters ---------- From 0856062d1410e55eb7b463958ea23117f1ca09e6 Mon Sep 17 00:00:00 2001 From: Christos Aridas Date: Fri, 29 Sep 2023 17:12:11 +0300 Subject: [PATCH 14/15] MAINT Remove a redundant line in multilayer perceptron (#27497) --- sklearn/neural_network/_multilayer_perceptron.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py index 02303006dd91c..5175247204fb8 100644 --- a/sklearn/neural_network/_multilayer_perceptron.py +++ b/sklearn/neural_network/_multilayer_perceptron.py @@ -701,7 +701,6 @@ def _fit_stochastic( # restore best weights self.coefs_ = self._best_coefs self.intercepts_ = self._best_intercepts - self.validation_scores_ = self.validation_scores_ def _update_no_improvement_count(self, early_stopping, X_val, y_val): if early_stopping: From bdf66d048c2113e94397b11ff17b7b5c03938ab7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Fri, 29 Sep 2023 19:08:17 +0200 Subject: [PATCH 15/15] MAINT Fix Pyodide step description (#27496) --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 0304df24b8504..92ab89d1d4f64 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -150,7 +150,7 @@ jobs: addToPath: true - bash: bash build_tools/azure/install_pyodide.sh - displayName: Build Pyodide wheel and install it in a Pyodide venv + displayName: Build Pyodide wheel - bash: bash build_tools/azure/test_script_pyodide.sh displayName: Test Pyodide wheel